From 6baa0741dccd1a00fac8fd753b0afd3dbfae6f75 Mon Sep 17 00:00:00 2001 From: vthorsteinsson Date: Thu, 22 Jun 2017 10:32:12 +0000 Subject: [PATCH 0001/4095] Python3 compatibility fixes --- .../data_generators/generator_utils.py | 2 +- tensor2tensor/data_generators/image.py | 3 +- tensor2tensor/data_generators/text_encoder.py | 87 ++++++++++++------- tensor2tensor/data_generators/tokenizer.py | 17 ++-- 4 files changed, 67 insertions(+), 42 deletions(-) mode change 100644 => 100755 tensor2tensor/data_generators/text_encoder.py diff --git a/tensor2tensor/data_generators/generator_utils.py b/tensor2tensor/data_generators/generator_utils.py index 487546e16..1e59e3e50 100644 --- a/tensor2tensor/data_generators/generator_utils.py +++ b/tensor2tensor/data_generators/generator_utils.py @@ -22,12 +22,12 @@ import io import os import tarfile -import urllib # Dependency imports import six from six.moves import xrange # pylint: disable=redefined-builtin +import six.moves.urllib_request from tensor2tensor.data_generators.text_encoder import SubwordTextEncoder from tensor2tensor.data_generators.tokenizer import Tokenizer diff --git a/tensor2tensor/data_generators/image.py b/tensor2tensor/data_generators/image.py index 55b5f2fc7..ee0ad26d5 100644 --- a/tensor2tensor/data_generators/image.py +++ b/tensor2tensor/data_generators/image.py @@ -18,7 +18,6 @@ from __future__ import division from __future__ import print_function -import cPickle import gzip import io import json @@ -32,6 +31,8 @@ import numpy as np from six.moves import xrange # pylint: disable=redefined-builtin from six.moves import zip # pylint: disable=redefined-builtin +from six.moves import cPickle + from tensor2tensor.data_generators import generator_utils import tensorflow as tf diff --git a/tensor2tensor/data_generators/text_encoder.py b/tensor2tensor/data_generators/text_encoder.py old mode 100644 new mode 100755 index b170013ea..14ca9306e --- a/tensor2tensor/data_generators/text_encoder.py +++ b/tensor2tensor/data_generators/text_encoder.py @@ -27,6 +27,7 @@ import six from six.moves import xrange # pylint: disable=redefined-builtin +from collections import defaultdict from tensor2tensor.data_generators import tokenizer import tensorflow as tf @@ -36,6 +37,10 @@ EOS = '' RESERVED_TOKENS = [PAD, EOS] +# Character that will be used instead of those with +# ordinal numbers > 255 +UNK_CHAR = '×' # ordinal 215 +ORD_UNK_CHAR = ord(UNK_CHAR) class TextEncoder(object): """Base class for converting from ints to/from human readable strings.""" @@ -73,7 +78,7 @@ def decode(self, ids): decoded_ids = [] for id_ in ids: if 0 <= id_ < self._num_reserved_ids: - decoded_ids.append(RESERVED_TOKENS[int(id_)]) + decoded_ids.append(RESERVED_TOKENS[id_]) else: decoded_ids.append(id_ - self._num_reserved_ids) return ' '.join([str(d) for d in decoded_ids]) @@ -86,22 +91,28 @@ def vocab_size(self): class ByteTextEncoder(TextEncoder): """Encodes each byte to an id. For 8-bit strings only.""" + BYTE_MAX = 2 ** 8 # 256 + def encode(self, s): - return [ord(c) + self._num_reserved_ids for c in s] + numres = self._num_reserved_ids + byte_max = self.BYTE_MAX + # Characters with ordinals >= BYTE_MAX (256) will be mapped to UNK_CHAR + return [ord(c) + numres if ord(c) < byte_max else ORD_UNK_CHAR + numres for c in s] def decode(self, ids): + numres = self._num_reserved_ids decoded_ids = [] for id_ in ids: - if 0 <= id_ < self._num_reserved_ids: - decoded_ids.append(RESERVED_TOKENS[int(id_)]) + if 0 <= id_ < numres: + decoded_ids.append(RESERVED_TOKENS[id_]) else: - decoded_ids.append(chr(id_)) + decoded_ids.append(chr(id_ - numres)) return ''.join(decoded_ids) @property def vocab_size(self): - return 2**8 + self._num_reserved_ids + return self.BYTE_MAX + self._num_reserved_ids class TokenTextEncoder(TextEncoder): @@ -113,18 +124,18 @@ def __init__(self, vocab_filename, reverse=False, num_reserved_ids=2): self._reverse = reverse if vocab_filename is not None: self._load_vocab_from_file(vocab_filename) + else: + self._token_to_id = {} + self._id_to_token = {} def encode(self, sentence): """Converts a space-separated string of tokens to a list of ids.""" ret = [self._token_to_id[tok] for tok in sentence.strip().split()] - if self._reverse: - ret = ret[::-1] - return ret + return ret[::-1] if self._reverse else ret def decode(self, ids): - if self._reverse: - ids = ids[::-1] - return ' '.join([self._safe_id_to_token(i) for i in ids]) + seq = reversed(ids) if self._reverse else ids + return ' '.join([self._safe_id_to_token(i) for i in seq]) @property def vocab_size(self): @@ -243,15 +254,21 @@ def _escaped_token_to_subtokens(self, escaped_token): """ ret = [] pos = 0 - while pos < len(escaped_token): - end = len(escaped_token) - while True: + lesc = len(escaped_token) + while pos < lesc: + end = lesc + while end > pos: subtoken = self._subtoken_string_to_id.get(escaped_token[pos:end], -1) if subtoken != -1: break end -= 1 ret.append(subtoken) - pos = end + if end > pos: + pos = end + else: + # This kinda should not happen + print("Unable to find subtoken in string '{0}'".format(escaped_token)) + pos += 1 return ret @classmethod @@ -322,13 +339,14 @@ def build_from_token_counts(self, # then count the resulting potential subtokens, keeping the ones # with high enough counts for our new vocabulary. for i in xrange(num_iterations): - counts = {} + print("build_from_token_counts(min_count={0}): iteration {1}".format(min_count, i)) + counts = defaultdict(int) for token, count in six.iteritems(token_counts): escaped_token = self._escape_token(token) # we will count all tails of the escaped_token, starting from boundaries # determined by our current segmentation. if i == 0: - starts = list(range(len(escaped_token))) + starts = xrange(len(escaped_token)) else: subtokens = self._escaped_token_to_subtokens(escaped_token) pos = 0 @@ -337,31 +355,36 @@ def build_from_token_counts(self, starts.append(pos) pos += len(self.subtoken_to_subtoken_string(subtoken)) for start in starts: - for end in xrange(start + 1, len(escaped_token) + 1): + for end in xrange(start + 1, len(escaped_token)): subtoken_string = escaped_token[start:end] - counts[subtoken_string] = counts.get(subtoken_string, 0) + count + counts[subtoken_string] += count + print("{0} counts constructed".format(len(counts))) # array of lists of candidate subtoken strings, by length len_to_subtoken_strings = [] for subtoken_string, count in six.iteritems(counts): - if count < min_count or len(subtoken_string) <= 1: + lsub = len(subtoken_string) + # all subtoken strings of length 1 are included regardless of count + if count < min_count and lsub != 1: continue - while len(len_to_subtoken_strings) <= len(subtoken_string): + while len(len_to_subtoken_strings) <= lsub: len_to_subtoken_strings.append([]) - len_to_subtoken_strings[len(subtoken_string)].append(subtoken_string) + len_to_subtoken_strings[lsub].append(subtoken_string) new_subtoken_strings = [] # consider the candidates longest to shortest, so that if we accept # a longer subtoken string, we can decrement the counts of its prefixes. + print("starting identification of subtoken strings, list length is {0}" + .format(len(len_to_subtoken_strings))) for subtoken_strings in len_to_subtoken_strings[::-1]: for subtoken_string in subtoken_strings: count = counts[subtoken_string] - if count < min_count: + if count < min_count and len(subtoken_string) != 1: + # subtoken strings of length 1 are included regardless of count continue new_subtoken_strings.append((-count, subtoken_string)) for l in xrange(1, len(subtoken_string)): counts[subtoken_string[:l]] -= count - # make sure we have all single characters. - new_subtoken_strings.extend([(-counts.get(chr(i), 0), chr(i)) - for i in xrange(2**8)]) + # Make sure to include the underscore as a subtoken string + new_subtoken_strings.append((0, '_')) new_subtoken_strings.sort() self._init_from_list([''] * self._num_reserved_ids + [p[1] for p in new_subtoken_strings]) @@ -390,13 +413,19 @@ def _load_from_file(self, filename): subtoken_strings = [] with tf.gfile.Open(filename) as f: for line in f: - subtoken_strings.append(line.strip()[1:-1].decode('string-escape')) + if six.PY2: + subtoken_strings.append(line.strip()[1:-1].decode('string-escape')) + else: + subtoken_strings.append(line.strip()[1:-1]) self._init_from_list(subtoken_strings) def _store_to_file(self, filename): with tf.gfile.Open(filename, 'w') as f: for subtoken_string in self._all_subtoken_strings: - f.write('\'' + subtoken_string.encode('string-escape') + '\'\n') + if six.PY2: + f.write('\'' + subtoken_string.encode('string-escape') + '\'\n') + else: + f.write('\'' + subtoken_string + '\'\n') def _escape_token(self, token): r"""Translate '\'->'\\' and '_'->'\u', then append '_'. diff --git a/tensor2tensor/data_generators/tokenizer.py b/tensor2tensor/data_generators/tokenizer.py index 15b199907..e6b83ba8f 100644 --- a/tensor2tensor/data_generators/tokenizer.py +++ b/tensor2tensor/data_generators/tokenizer.py @@ -45,13 +45,12 @@ from __future__ import division from __future__ import print_function -import array import string # Dependency imports from six.moves import xrange # pylint: disable=redefined-builtin - +from collections import defaultdict class Tokenizer(object): """Vocab for breaking words into wordpieces. @@ -59,15 +58,11 @@ class Tokenizer(object): def __init__(self): self._separator_chars = string.punctuation + string.whitespace - self._separator_char_mask = array.array( - "l", [chr(i) in self._separator_chars for i in xrange(256)]) - self.token_counts = dict() + self._separator_char_set = set(self._separator_chars) + self.token_counts = defaultdict(int) def _increment_token_count(self, token): - if token in self.token_counts: - self.token_counts[token] += 1 - else: - self.token_counts[token] = 1 + self.token_counts[token] += 1 def encode(self, raw_text): """Encode a raw string as a list of tokens. @@ -111,7 +106,7 @@ def decode(self, tokens): return ret def _is_separator_char(self, c): - return self._separator_char_mask[ord(c)] + return c in self._separator_char_set def _is_word_char(self, c): - return not self._is_separator_char(c) + return c not in self._separator_char_set From b3f8c3d148d79787faafa6516e2a6fa5d71620a5 Mon Sep 17 00:00:00 2001 From: vthorsteinsson Date: Thu, 22 Jun 2017 10:43:23 +0000 Subject: [PATCH 0002/4095] Removed print statements --- tensor2tensor/data_generators/text_encoder.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/tensor2tensor/data_generators/text_encoder.py b/tensor2tensor/data_generators/text_encoder.py index 14ca9306e..88ee425c9 100755 --- a/tensor2tensor/data_generators/text_encoder.py +++ b/tensor2tensor/data_generators/text_encoder.py @@ -266,8 +266,9 @@ def _escaped_token_to_subtokens(self, escaped_token): if end > pos: pos = end else: - # This kinda should not happen - print("Unable to find subtoken in string '{0}'".format(escaped_token)) + # This kinda should not happen, but it does. Cop out by skipping the + # nonexistent subtoken from the returned list. + # print("Unable to find subtoken in string '{0}'".format(escaped_token)) pos += 1 return ret @@ -339,7 +340,6 @@ def build_from_token_counts(self, # then count the resulting potential subtokens, keeping the ones # with high enough counts for our new vocabulary. for i in xrange(num_iterations): - print("build_from_token_counts(min_count={0}): iteration {1}".format(min_count, i)) counts = defaultdict(int) for token, count in six.iteritems(token_counts): escaped_token = self._escape_token(token) @@ -358,7 +358,6 @@ def build_from_token_counts(self, for end in xrange(start + 1, len(escaped_token)): subtoken_string = escaped_token[start:end] counts[subtoken_string] += count - print("{0} counts constructed".format(len(counts))) # array of lists of candidate subtoken strings, by length len_to_subtoken_strings = [] for subtoken_string, count in six.iteritems(counts): @@ -372,8 +371,6 @@ def build_from_token_counts(self, new_subtoken_strings = [] # consider the candidates longest to shortest, so that if we accept # a longer subtoken string, we can decrement the counts of its prefixes. - print("starting identification of subtoken strings, list length is {0}" - .format(len(len_to_subtoken_strings))) for subtoken_strings in len_to_subtoken_strings[::-1]: for subtoken_string in subtoken_strings: count = counts[subtoken_string] From 69cf7278ae641cb4e4dec85ceb84af3b73f91930 Mon Sep 17 00:00:00 2001 From: adgitate1 Date: Thu, 22 Jun 2017 11:15:51 -0700 Subject: [PATCH 0003/4095] Update README.md --- tensor2tensor/models/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensor2tensor/models/README.md b/tensor2tensor/models/README.md index 29b88484f..69050b9b2 100644 --- a/tensor2tensor/models/README.md +++ b/tensor2tensor/models/README.md @@ -12,5 +12,5 @@ To add a model to the built-in set, create a new file (see, e.g., `neural_gpu.py`) and write your model class inheriting from `T2TModel` there and decorate it with `registry.register_model`. Import it in `models.py`. -It is now avaialable to use with the trainer binary (`t2t-trainer`) using the +It is now available to use with the trainer binary (`t2t-trainer`) using the `--model=model_name` flag. From ccf4986286e8e04b976c7cbb5110adb3b4f5ab4a Mon Sep 17 00:00:00 2001 From: vthorsteinsson Date: Thu, 22 Jun 2017 20:40:58 +0000 Subject: [PATCH 0004/4095] Fixes suggested by @rsepassi --- tensor2tensor/data_generators/text_encoder.py | 8 ++------ tensor2tensor/data_generators/tokenizer.py | 15 ++++++--------- 2 files changed, 8 insertions(+), 15 deletions(-) mode change 100644 => 100755 tensor2tensor/data_generators/tokenizer.py diff --git a/tensor2tensor/data_generators/text_encoder.py b/tensor2tensor/data_generators/text_encoder.py index 88ee425c9..9d4e64f63 100755 --- a/tensor2tensor/data_generators/text_encoder.py +++ b/tensor2tensor/data_generators/text_encoder.py @@ -78,7 +78,7 @@ def decode(self, ids): decoded_ids = [] for id_ in ids: if 0 <= id_ < self._num_reserved_ids: - decoded_ids.append(RESERVED_TOKENS[id_]) + decoded_ids.append(RESERVED_TOKENS[int(id_)]) else: decoded_ids.append(id_ - self._num_reserved_ids) return ' '.join([str(d) for d in decoded_ids]) @@ -122,11 +122,7 @@ def __init__(self, vocab_filename, reverse=False, num_reserved_ids=2): """Initialize from a file, one token per line.""" super(TokenTextEncoder, self).__init__(num_reserved_ids=num_reserved_ids) self._reverse = reverse - if vocab_filename is not None: - self._load_vocab_from_file(vocab_filename) - else: - self._token_to_id = {} - self._id_to_token = {} + self._load_vocab_from_file(vocab_filename) def encode(self, sentence): """Converts a space-separated string of tokens to a list of ids.""" diff --git a/tensor2tensor/data_generators/tokenizer.py b/tensor2tensor/data_generators/tokenizer.py old mode 100644 new mode 100755 index e6b83ba8f..09b60ff1f --- a/tensor2tensor/data_generators/tokenizer.py +++ b/tensor2tensor/data_generators/tokenizer.py @@ -56,14 +56,11 @@ class Tokenizer(object): """Vocab for breaking words into wordpieces. """ + _SEPARATOR_CHAR_SET = set(string.punctuation + string.whitespace) + def __init__(self): - self._separator_chars = string.punctuation + string.whitespace - self._separator_char_set = set(self._separator_chars) self.token_counts = defaultdict(int) - def _increment_token_count(self, token): - self.token_counts[token] += 1 - def encode(self, raw_text): """Encode a raw string as a list of tokens. @@ -82,11 +79,11 @@ def encode(self, raw_text): token = raw_text[token_start:pos] if token != " " or token_start == 0: ret.append(token) - self._increment_token_count(token) + self.token_counts[token] += 1 token_start = pos final_token = raw_text[token_start:] ret.append(final_token) - self._increment_token_count(final_token) + self.token_counts[final_token] += 1 return ret def decode(self, tokens): @@ -106,7 +103,7 @@ def decode(self, tokens): return ret def _is_separator_char(self, c): - return c in self._separator_char_set + return c in self._SEPARATOR_CHAR_SET def _is_word_char(self, c): - return c not in self._separator_char_set + return c not in self._SEPARATOR_CHAR_SET From 6ad92c30ea6a28e6823a6517565c348469de06d9 Mon Sep 17 00:00:00 2001 From: Stefan Schweter Date: Thu, 22 Jun 2017 23:58:29 +0200 Subject: [PATCH 0005/4095] data-generators: show nice progress bar for download process --- .../data_generators/generator_utils.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/tensor2tensor/data_generators/generator_utils.py b/tensor2tensor/data_generators/generator_utils.py index 487546e16..673eefd25 100644 --- a/tensor2tensor/data_generators/generator_utils.py +++ b/tensor2tensor/data_generators/generator_utils.py @@ -126,6 +126,18 @@ def generate_files(generator, return output_files +def download_report_hook(count, block_size, total_size): + """Report hook for download progress + + Args: + count: current block number + block_size: block size + total_size: total size + """ + percent = int(count*block_size*100/total_size) + print("\r%d%%" % percent + ' completed', end='\r') + + def maybe_download(directory, filename, url): """Download filename from url unless it's already in directory. @@ -143,7 +155,11 @@ def maybe_download(directory, filename, url): filepath = os.path.join(directory, filename) if not tf.gfile.Exists(filepath): tf.logging.info("Downloading %s to %s" % (url, filepath)) - filepath, _ = urllib.urlretrieve(url, filepath) + filepath, _ = urllib.urlretrieve(url, filepath, + reporthook=download_report_hook) + + # Print newline to clear the carriage return from the download progress + print() statinfo = os.stat(filepath) tf.logging.info("Succesfully downloaded %s, %s bytes." % (filename, statinfo.st_size)) From 0e7d44e4f2ef0893df27c398db03334fc6927f97 Mon Sep 17 00:00:00 2001 From: Ashish Vaswani Date: Wed, 21 Jun 2017 17:36:21 -0700 Subject: [PATCH 0006/4095] fixed a small bug in transformer_parsing_base PiperOrigi-RevId: 159769630 --- .../data_generators/generator_utils.py | 18 +----------------- tensor2tensor/models/README.md | 2 +- tensor2tensor/models/transformer.py | 1 - 3 files changed, 2 insertions(+), 19 deletions(-) diff --git a/tensor2tensor/data_generators/generator_utils.py b/tensor2tensor/data_generators/generator_utils.py index 673eefd25..487546e16 100644 --- a/tensor2tensor/data_generators/generator_utils.py +++ b/tensor2tensor/data_generators/generator_utils.py @@ -126,18 +126,6 @@ def generate_files(generator, return output_files -def download_report_hook(count, block_size, total_size): - """Report hook for download progress - - Args: - count: current block number - block_size: block size - total_size: total size - """ - percent = int(count*block_size*100/total_size) - print("\r%d%%" % percent + ' completed', end='\r') - - def maybe_download(directory, filename, url): """Download filename from url unless it's already in directory. @@ -155,11 +143,7 @@ def maybe_download(directory, filename, url): filepath = os.path.join(directory, filename) if not tf.gfile.Exists(filepath): tf.logging.info("Downloading %s to %s" % (url, filepath)) - filepath, _ = urllib.urlretrieve(url, filepath, - reporthook=download_report_hook) - - # Print newline to clear the carriage return from the download progress - print() + filepath, _ = urllib.urlretrieve(url, filepath) statinfo = os.stat(filepath) tf.logging.info("Succesfully downloaded %s, %s bytes." % (filename, statinfo.st_size)) diff --git a/tensor2tensor/models/README.md b/tensor2tensor/models/README.md index 69050b9b2..29b88484f 100644 --- a/tensor2tensor/models/README.md +++ b/tensor2tensor/models/README.md @@ -12,5 +12,5 @@ To add a model to the built-in set, create a new file (see, e.g., `neural_gpu.py`) and write your model class inheriting from `T2TModel` there and decorate it with `registry.register_model`. Import it in `models.py`. -It is now available to use with the trainer binary (`t2t-trainer`) using the +It is now avaialable to use with the trainer binary (`t2t-trainer`) using the `--model=model_name` flag. diff --git a/tensor2tensor/models/transformer.py b/tensor2tensor/models/transformer.py index 264e0570d..4fb53fe1c 100644 --- a/tensor2tensor/models/transformer.py +++ b/tensor2tensor/models/transformer.py @@ -342,7 +342,6 @@ def transformer_parsing_base(): hparams.learning_rate_warmup_steps = 16000 hparams.hidden_size = 1024 hparams.learning_rate = 0.05 - hparams.residual_dropout = 0.1 hparams.shared_embedding_and_softmax_weights = int(False) return hparams From e58639869e86ce553ed43228ac10f27456551dfc Mon Sep 17 00:00:00 2001 From: Noam Shazeer Date: Wed, 21 Jun 2017 21:17:58 -0700 Subject: [PATCH 0007/4095] Cap number of parallel data readers at the number of data shards. Otherwise, insufficient shuffling can lead to multiple instances of the same example in a batch, and worse quality. This is a problem in particular for single-sharded datasets. --- tensor2tensor/utils/data_reader.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tensor2tensor/utils/data_reader.py b/tensor2tensor/utils/data_reader.py index 0080ecaa6..0ba62ec9f 100644 --- a/tensor2tensor/utils/data_reader.py +++ b/tensor2tensor/utils/data_reader.py @@ -100,6 +100,8 @@ def examples_queue(data_sources, with tf.name_scope("examples_queue"): # Read serialized examples using slim parallel_reader. num_epochs = None if training else 1 + data_files = tf.contrib.slim.parallel_reader.get_data_files(data_sources) + num_readers = min(4 if training else 1, len(data_files)) _, example_serialized = tf.contrib.slim.parallel_reader.parallel_read( data_sources, tf.TFRecordReader, @@ -107,7 +109,7 @@ def examples_queue(data_sources, shuffle=training, capacity=2 * capacity, min_after_dequeue=capacity, - num_readers=4 if training else 1) + num_readers=num_readers) if data_items_to_decoders is None: data_items_to_decoders = { From e3e27725d4560f25e1f81077cb0200b91273c706 Mon Sep 17 00:00:00 2001 From: Ryan Sepassi Date: Thu, 22 Jun 2017 17:33:22 -0700 Subject: [PATCH 0008/4095] add readme toc, improve data downloads PiperOrigin-RevId: 159900539 --- README.md | 14 ++++++++++++++ tensor2tensor/data_generators/generator_utils.py | 4 +++- 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 69ad66ddc..f49fb20df 100644 --- a/README.md +++ b/README.md @@ -26,6 +26,20 @@ issues](https://github.com/tensorflow/tensor2tensor/issues). And chat with us and other users on [Gitter](https://gitter.im/tensor2tensor/Lobby). +### Contents + +* [Walkthrough](#walkthrough) +* [Installation](#installation) +* [Features](#features) +* [T2T Overview](#t2t-overview) + * [Datasets](#datasets) + * [Problems and Modalities](#problems-and-modalities) + * [Models](#models) + * [Hyperparameter Sets](#hyperparameter-sets) + * [Trainer](#trainer) +* [Adding your own components](#adding-your-own-components) +* [Adding a dataset](#adding-a-dataset) + --- ## Walkthrough diff --git a/tensor2tensor/data_generators/generator_utils.py b/tensor2tensor/data_generators/generator_utils.py index 487546e16..7b6534ad2 100644 --- a/tensor2tensor/data_generators/generator_utils.py +++ b/tensor2tensor/data_generators/generator_utils.py @@ -143,7 +143,9 @@ def maybe_download(directory, filename, url): filepath = os.path.join(directory, filename) if not tf.gfile.Exists(filepath): tf.logging.info("Downloading %s to %s" % (url, filepath)) - filepath, _ = urllib.urlretrieve(url, filepath) + inprogress_filepath = filepath + ".incomplete" + inprogress_filepath, _ = urllib.urlretrieve(url, inprogress_filepath) + tf.gfile.Rename(inprogress_filepath, filepath) statinfo = os.stat(filepath) tf.logging.info("Succesfully downloaded %s, %s bytes." % (filename, statinfo.st_size)) From 854480da6d36afe53ed320ceae393eb47a8574e3 Mon Sep 17 00:00:00 2001 From: Lukasz Kaiser Date: Thu, 22 Jun 2017 18:43:42 -0700 Subject: [PATCH 0009/4095] internal merge PiperOrigin-RevId: 159906200 --- .../data_generators/generator_utils.py | 17 ++++++++++++++++- tensor2tensor/models/README.md | 2 +- tensor2tensor/models/common_layers.py | 7 +++++-- tensor2tensor/models/transformer.py | 9 +++++++++ 4 files changed, 31 insertions(+), 4 deletions(-) diff --git a/tensor2tensor/data_generators/generator_utils.py b/tensor2tensor/data_generators/generator_utils.py index 7b6534ad2..35e61d7cc 100644 --- a/tensor2tensor/data_generators/generator_utils.py +++ b/tensor2tensor/data_generators/generator_utils.py @@ -126,6 +126,18 @@ def generate_files(generator, return output_files +def download_report_hook(count, block_size, total_size): + """Report hook for download progress. + + Args: + count: current block number + block_size: block size + total_size: total size + """ + percent = int(count * block_size * 100 / total_size) + print("\r%d%%" % percent + " completed", end="\r") + + def maybe_download(directory, filename, url): """Download filename from url unless it's already in directory. @@ -144,7 +156,10 @@ def maybe_download(directory, filename, url): if not tf.gfile.Exists(filepath): tf.logging.info("Downloading %s to %s" % (url, filepath)) inprogress_filepath = filepath + ".incomplete" - inprogress_filepath, _ = urllib.urlretrieve(url, inprogress_filepath) + inprogress_filepath, _ = urllib.urlretrieve(url, inprogress_filepath, + reporthook=download_report_hook) + # Print newline to clear the carriage return from the download progress + print() tf.gfile.Rename(inprogress_filepath, filepath) statinfo = os.stat(filepath) tf.logging.info("Succesfully downloaded %s, %s bytes." % (filename, diff --git a/tensor2tensor/models/README.md b/tensor2tensor/models/README.md index 29b88484f..69050b9b2 100644 --- a/tensor2tensor/models/README.md +++ b/tensor2tensor/models/README.md @@ -12,5 +12,5 @@ To add a model to the built-in set, create a new file (see, e.g., `neural_gpu.py`) and write your model class inheriting from `T2TModel` there and decorate it with `registry.register_model`. Import it in `models.py`. -It is now avaialable to use with the trainer binary (`t2t-trainer`) using the +It is now available to use with the trainer binary (`t2t-trainer`) using the `--model=model_name` flag. diff --git a/tensor2tensor/models/common_layers.py b/tensor2tensor/models/common_layers.py index ef6559f9e..30215e889 100644 --- a/tensor2tensor/models/common_layers.py +++ b/tensor2tensor/models/common_layers.py @@ -1079,6 +1079,7 @@ def conv_hidden_relu(inputs, hidden_size, output_size, kernel_size=(1, 1), + second_kernel_size=(1, 1), summaries=True, dropout=0.0, **kwargs): @@ -1090,7 +1091,8 @@ def conv_hidden_relu(inputs, inputs = tf.expand_dims(inputs, 2) else: is_3d = False - h = conv( + conv_f1 = conv if kernel_size == (1, 1) else separable_conv + h = conv_f1( inputs, hidden_size, kernel_size, @@ -1103,7 +1105,8 @@ def conv_hidden_relu(inputs, tf.summary.histogram("hidden_density_logit", relu_density_logit( h, list(range(inputs.shape.ndims - 1)))) - ret = conv(h, output_size, (1, 1), name="conv2", **kwargs) + conv_f2 = conv if second_kernel_size == (1, 1) else separable_conv + ret = conv_f2(h, output_size, second_kernel_size, name="conv2", **kwargs) if is_3d: ret = tf.squeeze(ret, 2) return ret diff --git a/tensor2tensor/models/transformer.py b/tensor2tensor/models/transformer.py index 4fb53fe1c..94fb0776c 100644 --- a/tensor2tensor/models/transformer.py +++ b/tensor2tensor/models/transformer.py @@ -255,6 +255,15 @@ def transformer_ffn_layer(x, hparams): hparams.filter_size, hparams.num_heads, hparams.attention_dropout) + elif hparams.ffn_layer == "conv_hidden_relu_with_sepconv": + return common_layers.conv_hidden_relu( + x, + hparams.filter_size, + hparams.hidden_size, + kernel_size=(3, 1), + second_kernel_size=(31, 1), + padding="LEFT", + dropout=hparams.relu_dropout) else: assert hparams.ffn_layer == "none" return x From 2f4d5b7f4c7894d7def087d28cb8ba707dd24d6d Mon Sep 17 00:00:00 2001 From: Ryan Sepassi Date: Thu, 22 Jun 2017 19:36:01 -0700 Subject: [PATCH 0010/4095] Bump version to 1.0.6 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index a2d541a30..0bf0c8739 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ setup( name='tensor2tensor', - version='1.0.5', + version='1.0.6', description='Tensor2Tensor', author='Google Inc.', author_email='no-reply@google.com', From 522d51c5b6baa809e1880e747ab9811589f33ea0 Mon Sep 17 00:00:00 2001 From: vthorsteinsson Date: Fri, 23 Jun 2017 20:57:06 +0000 Subject: [PATCH 0011/4095] Made Python3 ByteTextEncoder compatible with Python2 --- tensor2tensor/data_generators/text_encoder.py | 31 ++++++++++--------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/tensor2tensor/data_generators/text_encoder.py b/tensor2tensor/data_generators/text_encoder.py index 9d4e64f63..74d2b73cb 100755 --- a/tensor2tensor/data_generators/text_encoder.py +++ b/tensor2tensor/data_generators/text_encoder.py @@ -36,11 +36,10 @@ PAD = '' EOS = '' RESERVED_TOKENS = [PAD, EOS] - -# Character that will be used instead of those with -# ordinal numbers > 255 -UNK_CHAR = '×' # ordinal 215 -ORD_UNK_CHAR = ord(UNK_CHAR) +if six.PY2: + RESERVED_TOKENS_BYTES = RESERVED_TOKENS +else: + RESERVED_TOKENS_BYTES = [bytes(PAD, 'ascii'), bytes(EOS, 'ascii')] class TextEncoder(object): """Base class for converting from ints to/from human readable strings.""" @@ -91,28 +90,30 @@ def vocab_size(self): class ByteTextEncoder(TextEncoder): """Encodes each byte to an id. For 8-bit strings only.""" - BYTE_MAX = 2 ** 8 # 256 - def encode(self, s): numres = self._num_reserved_ids - byte_max = self.BYTE_MAX - # Characters with ordinals >= BYTE_MAX (256) will be mapped to UNK_CHAR - return [ord(c) + numres if ord(c) < byte_max else ORD_UNK_CHAR + numres for c in s] + if six.PY2: + return [ord(c) + numres for c in s] + # Python3: explicitly convert to UTF-8 + return [c + numres for c in s.encode("utf-8")] def decode(self, ids): numres = self._num_reserved_ids decoded_ids = [] + int2byte = six.int2byte for id_ in ids: if 0 <= id_ < numres: - decoded_ids.append(RESERVED_TOKENS[id_]) + decoded_ids.append(RESERVED_TOKENS_BYTES[int(id_)]) else: - decoded_ids.append(chr(id_ - numres)) - - return ''.join(decoded_ids) + decoded_ids.append(int2byte(id_ - numres)) + if six.PY2: + return ''.join(decoded_ids) + # Python3: join byte arrays and then decode string + return b''.join(decoded_ids).decode("utf-8") @property def vocab_size(self): - return self.BYTE_MAX + self._num_reserved_ids + return 2**8 + self._num_reserved_ids class TokenTextEncoder(TextEncoder): From 848d074013c9c3031cb309282bfcad1b4858db89 Mon Sep 17 00:00:00 2001 From: vthorsteinsson Date: Thu, 22 Jun 2017 10:32:12 +0000 Subject: [PATCH 0012/4095] Python3 compatibility fixes --- .../data_generators/generator_utils.py | 2 +- tensor2tensor/data_generators/image.py | 3 +- tensor2tensor/data_generators/text_encoder.py | 87 ++++++++++++------- tensor2tensor/data_generators/tokenizer.py | 24 ++--- 4 files changed, 66 insertions(+), 50 deletions(-) mode change 100644 => 100755 tensor2tensor/data_generators/text_encoder.py mode change 100644 => 100755 tensor2tensor/data_generators/tokenizer.py diff --git a/tensor2tensor/data_generators/generator_utils.py b/tensor2tensor/data_generators/generator_utils.py index 487546e16..1e59e3e50 100644 --- a/tensor2tensor/data_generators/generator_utils.py +++ b/tensor2tensor/data_generators/generator_utils.py @@ -22,12 +22,12 @@ import io import os import tarfile -import urllib # Dependency imports import six from six.moves import xrange # pylint: disable=redefined-builtin +import six.moves.urllib_request from tensor2tensor.data_generators.text_encoder import SubwordTextEncoder from tensor2tensor.data_generators.tokenizer import Tokenizer diff --git a/tensor2tensor/data_generators/image.py b/tensor2tensor/data_generators/image.py index 55b5f2fc7..ee0ad26d5 100644 --- a/tensor2tensor/data_generators/image.py +++ b/tensor2tensor/data_generators/image.py @@ -18,7 +18,6 @@ from __future__ import division from __future__ import print_function -import cPickle import gzip import io import json @@ -32,6 +31,8 @@ import numpy as np from six.moves import xrange # pylint: disable=redefined-builtin from six.moves import zip # pylint: disable=redefined-builtin +from six.moves import cPickle + from tensor2tensor.data_generators import generator_utils import tensorflow as tf diff --git a/tensor2tensor/data_generators/text_encoder.py b/tensor2tensor/data_generators/text_encoder.py old mode 100644 new mode 100755 index b170013ea..74d2b73cb --- a/tensor2tensor/data_generators/text_encoder.py +++ b/tensor2tensor/data_generators/text_encoder.py @@ -27,6 +27,7 @@ import six from six.moves import xrange # pylint: disable=redefined-builtin +from collections import defaultdict from tensor2tensor.data_generators import tokenizer import tensorflow as tf @@ -35,7 +36,10 @@ PAD = '' EOS = '' RESERVED_TOKENS = [PAD, EOS] - +if six.PY2: + RESERVED_TOKENS_BYTES = RESERVED_TOKENS +else: + RESERVED_TOKENS_BYTES = [bytes(PAD, 'ascii'), bytes(EOS, 'ascii')] class TextEncoder(object): """Base class for converting from ints to/from human readable strings.""" @@ -87,17 +91,25 @@ class ByteTextEncoder(TextEncoder): """Encodes each byte to an id. For 8-bit strings only.""" def encode(self, s): - return [ord(c) + self._num_reserved_ids for c in s] + numres = self._num_reserved_ids + if six.PY2: + return [ord(c) + numres for c in s] + # Python3: explicitly convert to UTF-8 + return [c + numres for c in s.encode("utf-8")] def decode(self, ids): + numres = self._num_reserved_ids decoded_ids = [] + int2byte = six.int2byte for id_ in ids: - if 0 <= id_ < self._num_reserved_ids: - decoded_ids.append(RESERVED_TOKENS[int(id_)]) + if 0 <= id_ < numres: + decoded_ids.append(RESERVED_TOKENS_BYTES[int(id_)]) else: - decoded_ids.append(chr(id_)) - - return ''.join(decoded_ids) + decoded_ids.append(int2byte(id_ - numres)) + if six.PY2: + return ''.join(decoded_ids) + # Python3: join byte arrays and then decode string + return b''.join(decoded_ids).decode("utf-8") @property def vocab_size(self): @@ -111,20 +123,16 @@ def __init__(self, vocab_filename, reverse=False, num_reserved_ids=2): """Initialize from a file, one token per line.""" super(TokenTextEncoder, self).__init__(num_reserved_ids=num_reserved_ids) self._reverse = reverse - if vocab_filename is not None: - self._load_vocab_from_file(vocab_filename) + self._load_vocab_from_file(vocab_filename) def encode(self, sentence): """Converts a space-separated string of tokens to a list of ids.""" ret = [self._token_to_id[tok] for tok in sentence.strip().split()] - if self._reverse: - ret = ret[::-1] - return ret + return ret[::-1] if self._reverse else ret def decode(self, ids): - if self._reverse: - ids = ids[::-1] - return ' '.join([self._safe_id_to_token(i) for i in ids]) + seq = reversed(ids) if self._reverse else ids + return ' '.join([self._safe_id_to_token(i) for i in seq]) @property def vocab_size(self): @@ -243,15 +251,22 @@ def _escaped_token_to_subtokens(self, escaped_token): """ ret = [] pos = 0 - while pos < len(escaped_token): - end = len(escaped_token) - while True: + lesc = len(escaped_token) + while pos < lesc: + end = lesc + while end > pos: subtoken = self._subtoken_string_to_id.get(escaped_token[pos:end], -1) if subtoken != -1: break end -= 1 ret.append(subtoken) - pos = end + if end > pos: + pos = end + else: + # This kinda should not happen, but it does. Cop out by skipping the + # nonexistent subtoken from the returned list. + # print("Unable to find subtoken in string '{0}'".format(escaped_token)) + pos += 1 return ret @classmethod @@ -322,13 +337,13 @@ def build_from_token_counts(self, # then count the resulting potential subtokens, keeping the ones # with high enough counts for our new vocabulary. for i in xrange(num_iterations): - counts = {} + counts = defaultdict(int) for token, count in six.iteritems(token_counts): escaped_token = self._escape_token(token) # we will count all tails of the escaped_token, starting from boundaries # determined by our current segmentation. if i == 0: - starts = list(range(len(escaped_token))) + starts = xrange(len(escaped_token)) else: subtokens = self._escaped_token_to_subtokens(escaped_token) pos = 0 @@ -337,31 +352,33 @@ def build_from_token_counts(self, starts.append(pos) pos += len(self.subtoken_to_subtoken_string(subtoken)) for start in starts: - for end in xrange(start + 1, len(escaped_token) + 1): + for end in xrange(start + 1, len(escaped_token)): subtoken_string = escaped_token[start:end] - counts[subtoken_string] = counts.get(subtoken_string, 0) + count + counts[subtoken_string] += count # array of lists of candidate subtoken strings, by length len_to_subtoken_strings = [] for subtoken_string, count in six.iteritems(counts): - if count < min_count or len(subtoken_string) <= 1: + lsub = len(subtoken_string) + # all subtoken strings of length 1 are included regardless of count + if count < min_count and lsub != 1: continue - while len(len_to_subtoken_strings) <= len(subtoken_string): + while len(len_to_subtoken_strings) <= lsub: len_to_subtoken_strings.append([]) - len_to_subtoken_strings[len(subtoken_string)].append(subtoken_string) + len_to_subtoken_strings[lsub].append(subtoken_string) new_subtoken_strings = [] # consider the candidates longest to shortest, so that if we accept # a longer subtoken string, we can decrement the counts of its prefixes. for subtoken_strings in len_to_subtoken_strings[::-1]: for subtoken_string in subtoken_strings: count = counts[subtoken_string] - if count < min_count: + if count < min_count and len(subtoken_string) != 1: + # subtoken strings of length 1 are included regardless of count continue new_subtoken_strings.append((-count, subtoken_string)) for l in xrange(1, len(subtoken_string)): counts[subtoken_string[:l]] -= count - # make sure we have all single characters. - new_subtoken_strings.extend([(-counts.get(chr(i), 0), chr(i)) - for i in xrange(2**8)]) + # Make sure to include the underscore as a subtoken string + new_subtoken_strings.append((0, '_')) new_subtoken_strings.sort() self._init_from_list([''] * self._num_reserved_ids + [p[1] for p in new_subtoken_strings]) @@ -390,13 +407,19 @@ def _load_from_file(self, filename): subtoken_strings = [] with tf.gfile.Open(filename) as f: for line in f: - subtoken_strings.append(line.strip()[1:-1].decode('string-escape')) + if six.PY2: + subtoken_strings.append(line.strip()[1:-1].decode('string-escape')) + else: + subtoken_strings.append(line.strip()[1:-1]) self._init_from_list(subtoken_strings) def _store_to_file(self, filename): with tf.gfile.Open(filename, 'w') as f: for subtoken_string in self._all_subtoken_strings: - f.write('\'' + subtoken_string.encode('string-escape') + '\'\n') + if six.PY2: + f.write('\'' + subtoken_string.encode('string-escape') + '\'\n') + else: + f.write('\'' + subtoken_string + '\'\n') def _escape_token(self, token): r"""Translate '\'->'\\' and '_'->'\u', then append '_'. diff --git a/tensor2tensor/data_generators/tokenizer.py b/tensor2tensor/data_generators/tokenizer.py old mode 100644 new mode 100755 index 15b199907..09b60ff1f --- a/tensor2tensor/data_generators/tokenizer.py +++ b/tensor2tensor/data_generators/tokenizer.py @@ -45,29 +45,21 @@ from __future__ import division from __future__ import print_function -import array import string # Dependency imports from six.moves import xrange # pylint: disable=redefined-builtin - +from collections import defaultdict class Tokenizer(object): """Vocab for breaking words into wordpieces. """ - def __init__(self): - self._separator_chars = string.punctuation + string.whitespace - self._separator_char_mask = array.array( - "l", [chr(i) in self._separator_chars for i in xrange(256)]) - self.token_counts = dict() + _SEPARATOR_CHAR_SET = set(string.punctuation + string.whitespace) - def _increment_token_count(self, token): - if token in self.token_counts: - self.token_counts[token] += 1 - else: - self.token_counts[token] = 1 + def __init__(self): + self.token_counts = defaultdict(int) def encode(self, raw_text): """Encode a raw string as a list of tokens. @@ -87,11 +79,11 @@ def encode(self, raw_text): token = raw_text[token_start:pos] if token != " " or token_start == 0: ret.append(token) - self._increment_token_count(token) + self.token_counts[token] += 1 token_start = pos final_token = raw_text[token_start:] ret.append(final_token) - self._increment_token_count(final_token) + self.token_counts[final_token] += 1 return ret def decode(self, tokens): @@ -111,7 +103,7 @@ def decode(self, tokens): return ret def _is_separator_char(self, c): - return self._separator_char_mask[ord(c)] + return c in self._SEPARATOR_CHAR_SET def _is_word_char(self, c): - return not self._is_separator_char(c) + return c not in self._SEPARATOR_CHAR_SET From 3410bea303c0e561c5d8d52a06db1528ff20c158 Mon Sep 17 00:00:00 2001 From: Vilhjalmur Thorsteinsson Date: Sat, 24 Jun 2017 00:42:00 +0000 Subject: [PATCH 0013/4095] Python3 compatibility; better Unicode support (#22) * Python3 compatibility fixes * Removed print statements * Fixes suggested by @rsepassi * Made Python3 ByteTextEncoder compatible with Python2 * Python3 compatibility fixes --- .../data_generators/generator_utils.py | 2 +- tensor2tensor/data_generators/image.py | 3 +- tensor2tensor/data_generators/text_encoder.py | 87 ++++++++++++------- tensor2tensor/data_generators/tokenizer.py | 24 ++--- 4 files changed, 66 insertions(+), 50 deletions(-) mode change 100644 => 100755 tensor2tensor/data_generators/text_encoder.py mode change 100644 => 100755 tensor2tensor/data_generators/tokenizer.py diff --git a/tensor2tensor/data_generators/generator_utils.py b/tensor2tensor/data_generators/generator_utils.py index 35e61d7cc..11788df45 100644 --- a/tensor2tensor/data_generators/generator_utils.py +++ b/tensor2tensor/data_generators/generator_utils.py @@ -22,12 +22,12 @@ import io import os import tarfile -import urllib # Dependency imports import six from six.moves import xrange # pylint: disable=redefined-builtin +import six.moves.urllib_request from tensor2tensor.data_generators.text_encoder import SubwordTextEncoder from tensor2tensor.data_generators.tokenizer import Tokenizer diff --git a/tensor2tensor/data_generators/image.py b/tensor2tensor/data_generators/image.py index 55b5f2fc7..ee0ad26d5 100644 --- a/tensor2tensor/data_generators/image.py +++ b/tensor2tensor/data_generators/image.py @@ -18,7 +18,6 @@ from __future__ import division from __future__ import print_function -import cPickle import gzip import io import json @@ -32,6 +31,8 @@ import numpy as np from six.moves import xrange # pylint: disable=redefined-builtin from six.moves import zip # pylint: disable=redefined-builtin +from six.moves import cPickle + from tensor2tensor.data_generators import generator_utils import tensorflow as tf diff --git a/tensor2tensor/data_generators/text_encoder.py b/tensor2tensor/data_generators/text_encoder.py old mode 100644 new mode 100755 index b170013ea..74d2b73cb --- a/tensor2tensor/data_generators/text_encoder.py +++ b/tensor2tensor/data_generators/text_encoder.py @@ -27,6 +27,7 @@ import six from six.moves import xrange # pylint: disable=redefined-builtin +from collections import defaultdict from tensor2tensor.data_generators import tokenizer import tensorflow as tf @@ -35,7 +36,10 @@ PAD = '' EOS = '' RESERVED_TOKENS = [PAD, EOS] - +if six.PY2: + RESERVED_TOKENS_BYTES = RESERVED_TOKENS +else: + RESERVED_TOKENS_BYTES = [bytes(PAD, 'ascii'), bytes(EOS, 'ascii')] class TextEncoder(object): """Base class for converting from ints to/from human readable strings.""" @@ -87,17 +91,25 @@ class ByteTextEncoder(TextEncoder): """Encodes each byte to an id. For 8-bit strings only.""" def encode(self, s): - return [ord(c) + self._num_reserved_ids for c in s] + numres = self._num_reserved_ids + if six.PY2: + return [ord(c) + numres for c in s] + # Python3: explicitly convert to UTF-8 + return [c + numres for c in s.encode("utf-8")] def decode(self, ids): + numres = self._num_reserved_ids decoded_ids = [] + int2byte = six.int2byte for id_ in ids: - if 0 <= id_ < self._num_reserved_ids: - decoded_ids.append(RESERVED_TOKENS[int(id_)]) + if 0 <= id_ < numres: + decoded_ids.append(RESERVED_TOKENS_BYTES[int(id_)]) else: - decoded_ids.append(chr(id_)) - - return ''.join(decoded_ids) + decoded_ids.append(int2byte(id_ - numres)) + if six.PY2: + return ''.join(decoded_ids) + # Python3: join byte arrays and then decode string + return b''.join(decoded_ids).decode("utf-8") @property def vocab_size(self): @@ -111,20 +123,16 @@ def __init__(self, vocab_filename, reverse=False, num_reserved_ids=2): """Initialize from a file, one token per line.""" super(TokenTextEncoder, self).__init__(num_reserved_ids=num_reserved_ids) self._reverse = reverse - if vocab_filename is not None: - self._load_vocab_from_file(vocab_filename) + self._load_vocab_from_file(vocab_filename) def encode(self, sentence): """Converts a space-separated string of tokens to a list of ids.""" ret = [self._token_to_id[tok] for tok in sentence.strip().split()] - if self._reverse: - ret = ret[::-1] - return ret + return ret[::-1] if self._reverse else ret def decode(self, ids): - if self._reverse: - ids = ids[::-1] - return ' '.join([self._safe_id_to_token(i) for i in ids]) + seq = reversed(ids) if self._reverse else ids + return ' '.join([self._safe_id_to_token(i) for i in seq]) @property def vocab_size(self): @@ -243,15 +251,22 @@ def _escaped_token_to_subtokens(self, escaped_token): """ ret = [] pos = 0 - while pos < len(escaped_token): - end = len(escaped_token) - while True: + lesc = len(escaped_token) + while pos < lesc: + end = lesc + while end > pos: subtoken = self._subtoken_string_to_id.get(escaped_token[pos:end], -1) if subtoken != -1: break end -= 1 ret.append(subtoken) - pos = end + if end > pos: + pos = end + else: + # This kinda should not happen, but it does. Cop out by skipping the + # nonexistent subtoken from the returned list. + # print("Unable to find subtoken in string '{0}'".format(escaped_token)) + pos += 1 return ret @classmethod @@ -322,13 +337,13 @@ def build_from_token_counts(self, # then count the resulting potential subtokens, keeping the ones # with high enough counts for our new vocabulary. for i in xrange(num_iterations): - counts = {} + counts = defaultdict(int) for token, count in six.iteritems(token_counts): escaped_token = self._escape_token(token) # we will count all tails of the escaped_token, starting from boundaries # determined by our current segmentation. if i == 0: - starts = list(range(len(escaped_token))) + starts = xrange(len(escaped_token)) else: subtokens = self._escaped_token_to_subtokens(escaped_token) pos = 0 @@ -337,31 +352,33 @@ def build_from_token_counts(self, starts.append(pos) pos += len(self.subtoken_to_subtoken_string(subtoken)) for start in starts: - for end in xrange(start + 1, len(escaped_token) + 1): + for end in xrange(start + 1, len(escaped_token)): subtoken_string = escaped_token[start:end] - counts[subtoken_string] = counts.get(subtoken_string, 0) + count + counts[subtoken_string] += count # array of lists of candidate subtoken strings, by length len_to_subtoken_strings = [] for subtoken_string, count in six.iteritems(counts): - if count < min_count or len(subtoken_string) <= 1: + lsub = len(subtoken_string) + # all subtoken strings of length 1 are included regardless of count + if count < min_count and lsub != 1: continue - while len(len_to_subtoken_strings) <= len(subtoken_string): + while len(len_to_subtoken_strings) <= lsub: len_to_subtoken_strings.append([]) - len_to_subtoken_strings[len(subtoken_string)].append(subtoken_string) + len_to_subtoken_strings[lsub].append(subtoken_string) new_subtoken_strings = [] # consider the candidates longest to shortest, so that if we accept # a longer subtoken string, we can decrement the counts of its prefixes. for subtoken_strings in len_to_subtoken_strings[::-1]: for subtoken_string in subtoken_strings: count = counts[subtoken_string] - if count < min_count: + if count < min_count and len(subtoken_string) != 1: + # subtoken strings of length 1 are included regardless of count continue new_subtoken_strings.append((-count, subtoken_string)) for l in xrange(1, len(subtoken_string)): counts[subtoken_string[:l]] -= count - # make sure we have all single characters. - new_subtoken_strings.extend([(-counts.get(chr(i), 0), chr(i)) - for i in xrange(2**8)]) + # Make sure to include the underscore as a subtoken string + new_subtoken_strings.append((0, '_')) new_subtoken_strings.sort() self._init_from_list([''] * self._num_reserved_ids + [p[1] for p in new_subtoken_strings]) @@ -390,13 +407,19 @@ def _load_from_file(self, filename): subtoken_strings = [] with tf.gfile.Open(filename) as f: for line in f: - subtoken_strings.append(line.strip()[1:-1].decode('string-escape')) + if six.PY2: + subtoken_strings.append(line.strip()[1:-1].decode('string-escape')) + else: + subtoken_strings.append(line.strip()[1:-1]) self._init_from_list(subtoken_strings) def _store_to_file(self, filename): with tf.gfile.Open(filename, 'w') as f: for subtoken_string in self._all_subtoken_strings: - f.write('\'' + subtoken_string.encode('string-escape') + '\'\n') + if six.PY2: + f.write('\'' + subtoken_string.encode('string-escape') + '\'\n') + else: + f.write('\'' + subtoken_string + '\'\n') def _escape_token(self, token): r"""Translate '\'->'\\' and '_'->'\u', then append '_'. diff --git a/tensor2tensor/data_generators/tokenizer.py b/tensor2tensor/data_generators/tokenizer.py old mode 100644 new mode 100755 index 15b199907..09b60ff1f --- a/tensor2tensor/data_generators/tokenizer.py +++ b/tensor2tensor/data_generators/tokenizer.py @@ -45,29 +45,21 @@ from __future__ import division from __future__ import print_function -import array import string # Dependency imports from six.moves import xrange # pylint: disable=redefined-builtin - +from collections import defaultdict class Tokenizer(object): """Vocab for breaking words into wordpieces. """ - def __init__(self): - self._separator_chars = string.punctuation + string.whitespace - self._separator_char_mask = array.array( - "l", [chr(i) in self._separator_chars for i in xrange(256)]) - self.token_counts = dict() + _SEPARATOR_CHAR_SET = set(string.punctuation + string.whitespace) - def _increment_token_count(self, token): - if token in self.token_counts: - self.token_counts[token] += 1 - else: - self.token_counts[token] = 1 + def __init__(self): + self.token_counts = defaultdict(int) def encode(self, raw_text): """Encode a raw string as a list of tokens. @@ -87,11 +79,11 @@ def encode(self, raw_text): token = raw_text[token_start:pos] if token != " " or token_start == 0: ret.append(token) - self._increment_token_count(token) + self.token_counts[token] += 1 token_start = pos final_token = raw_text[token_start:] ret.append(final_token) - self._increment_token_count(final_token) + self.token_counts[final_token] += 1 return ret def decode(self, tokens): @@ -111,7 +103,7 @@ def decode(self, tokens): return ret def _is_separator_char(self, c): - return self._separator_char_mask[ord(c)] + return c in self._SEPARATOR_CHAR_SET def _is_word_char(self, c): - return not self._is_separator_char(c) + return c not in self._SEPARATOR_CHAR_SET From b09291df5e8741c373da0b51ab5a4ad66c9a49e4 Mon Sep 17 00:00:00 2001 From: Ryan Sepassi Date: Fri, 23 Jun 2017 11:14:41 -0700 Subject: [PATCH 0014/4095] Bump version to 1.0.6 PiperOrigin-RevId: 159970178 --- .../data_generators/generator_utils.py | 2 +- tensor2tensor/data_generators/image.py | 3 +- tensor2tensor/data_generators/text_encoder.py | 87 +++++++------------ tensor2tensor/data_generators/tokenizer.py | 24 +++-- 4 files changed, 50 insertions(+), 66 deletions(-) mode change 100755 => 100644 tensor2tensor/data_generators/text_encoder.py mode change 100755 => 100644 tensor2tensor/data_generators/tokenizer.py diff --git a/tensor2tensor/data_generators/generator_utils.py b/tensor2tensor/data_generators/generator_utils.py index 11788df45..35e61d7cc 100644 --- a/tensor2tensor/data_generators/generator_utils.py +++ b/tensor2tensor/data_generators/generator_utils.py @@ -22,12 +22,12 @@ import io import os import tarfile +import urllib # Dependency imports import six from six.moves import xrange # pylint: disable=redefined-builtin -import six.moves.urllib_request from tensor2tensor.data_generators.text_encoder import SubwordTextEncoder from tensor2tensor.data_generators.tokenizer import Tokenizer diff --git a/tensor2tensor/data_generators/image.py b/tensor2tensor/data_generators/image.py index ee0ad26d5..55b5f2fc7 100644 --- a/tensor2tensor/data_generators/image.py +++ b/tensor2tensor/data_generators/image.py @@ -18,6 +18,7 @@ from __future__ import division from __future__ import print_function +import cPickle import gzip import io import json @@ -31,8 +32,6 @@ import numpy as np from six.moves import xrange # pylint: disable=redefined-builtin from six.moves import zip # pylint: disable=redefined-builtin -from six.moves import cPickle - from tensor2tensor.data_generators import generator_utils import tensorflow as tf diff --git a/tensor2tensor/data_generators/text_encoder.py b/tensor2tensor/data_generators/text_encoder.py old mode 100755 new mode 100644 index 74d2b73cb..b170013ea --- a/tensor2tensor/data_generators/text_encoder.py +++ b/tensor2tensor/data_generators/text_encoder.py @@ -27,7 +27,6 @@ import six from six.moves import xrange # pylint: disable=redefined-builtin -from collections import defaultdict from tensor2tensor.data_generators import tokenizer import tensorflow as tf @@ -36,10 +35,7 @@ PAD = '' EOS = '' RESERVED_TOKENS = [PAD, EOS] -if six.PY2: - RESERVED_TOKENS_BYTES = RESERVED_TOKENS -else: - RESERVED_TOKENS_BYTES = [bytes(PAD, 'ascii'), bytes(EOS, 'ascii')] + class TextEncoder(object): """Base class for converting from ints to/from human readable strings.""" @@ -91,25 +87,17 @@ class ByteTextEncoder(TextEncoder): """Encodes each byte to an id. For 8-bit strings only.""" def encode(self, s): - numres = self._num_reserved_ids - if six.PY2: - return [ord(c) + numres for c in s] - # Python3: explicitly convert to UTF-8 - return [c + numres for c in s.encode("utf-8")] + return [ord(c) + self._num_reserved_ids for c in s] def decode(self, ids): - numres = self._num_reserved_ids decoded_ids = [] - int2byte = six.int2byte for id_ in ids: - if 0 <= id_ < numres: - decoded_ids.append(RESERVED_TOKENS_BYTES[int(id_)]) + if 0 <= id_ < self._num_reserved_ids: + decoded_ids.append(RESERVED_TOKENS[int(id_)]) else: - decoded_ids.append(int2byte(id_ - numres)) - if six.PY2: - return ''.join(decoded_ids) - # Python3: join byte arrays and then decode string - return b''.join(decoded_ids).decode("utf-8") + decoded_ids.append(chr(id_)) + + return ''.join(decoded_ids) @property def vocab_size(self): @@ -123,16 +111,20 @@ def __init__(self, vocab_filename, reverse=False, num_reserved_ids=2): """Initialize from a file, one token per line.""" super(TokenTextEncoder, self).__init__(num_reserved_ids=num_reserved_ids) self._reverse = reverse - self._load_vocab_from_file(vocab_filename) + if vocab_filename is not None: + self._load_vocab_from_file(vocab_filename) def encode(self, sentence): """Converts a space-separated string of tokens to a list of ids.""" ret = [self._token_to_id[tok] for tok in sentence.strip().split()] - return ret[::-1] if self._reverse else ret + if self._reverse: + ret = ret[::-1] + return ret def decode(self, ids): - seq = reversed(ids) if self._reverse else ids - return ' '.join([self._safe_id_to_token(i) for i in seq]) + if self._reverse: + ids = ids[::-1] + return ' '.join([self._safe_id_to_token(i) for i in ids]) @property def vocab_size(self): @@ -251,22 +243,15 @@ def _escaped_token_to_subtokens(self, escaped_token): """ ret = [] pos = 0 - lesc = len(escaped_token) - while pos < lesc: - end = lesc - while end > pos: + while pos < len(escaped_token): + end = len(escaped_token) + while True: subtoken = self._subtoken_string_to_id.get(escaped_token[pos:end], -1) if subtoken != -1: break end -= 1 ret.append(subtoken) - if end > pos: - pos = end - else: - # This kinda should not happen, but it does. Cop out by skipping the - # nonexistent subtoken from the returned list. - # print("Unable to find subtoken in string '{0}'".format(escaped_token)) - pos += 1 + pos = end return ret @classmethod @@ -337,13 +322,13 @@ def build_from_token_counts(self, # then count the resulting potential subtokens, keeping the ones # with high enough counts for our new vocabulary. for i in xrange(num_iterations): - counts = defaultdict(int) + counts = {} for token, count in six.iteritems(token_counts): escaped_token = self._escape_token(token) # we will count all tails of the escaped_token, starting from boundaries # determined by our current segmentation. if i == 0: - starts = xrange(len(escaped_token)) + starts = list(range(len(escaped_token))) else: subtokens = self._escaped_token_to_subtokens(escaped_token) pos = 0 @@ -352,33 +337,31 @@ def build_from_token_counts(self, starts.append(pos) pos += len(self.subtoken_to_subtoken_string(subtoken)) for start in starts: - for end in xrange(start + 1, len(escaped_token)): + for end in xrange(start + 1, len(escaped_token) + 1): subtoken_string = escaped_token[start:end] - counts[subtoken_string] += count + counts[subtoken_string] = counts.get(subtoken_string, 0) + count # array of lists of candidate subtoken strings, by length len_to_subtoken_strings = [] for subtoken_string, count in six.iteritems(counts): - lsub = len(subtoken_string) - # all subtoken strings of length 1 are included regardless of count - if count < min_count and lsub != 1: + if count < min_count or len(subtoken_string) <= 1: continue - while len(len_to_subtoken_strings) <= lsub: + while len(len_to_subtoken_strings) <= len(subtoken_string): len_to_subtoken_strings.append([]) - len_to_subtoken_strings[lsub].append(subtoken_string) + len_to_subtoken_strings[len(subtoken_string)].append(subtoken_string) new_subtoken_strings = [] # consider the candidates longest to shortest, so that if we accept # a longer subtoken string, we can decrement the counts of its prefixes. for subtoken_strings in len_to_subtoken_strings[::-1]: for subtoken_string in subtoken_strings: count = counts[subtoken_string] - if count < min_count and len(subtoken_string) != 1: - # subtoken strings of length 1 are included regardless of count + if count < min_count: continue new_subtoken_strings.append((-count, subtoken_string)) for l in xrange(1, len(subtoken_string)): counts[subtoken_string[:l]] -= count - # Make sure to include the underscore as a subtoken string - new_subtoken_strings.append((0, '_')) + # make sure we have all single characters. + new_subtoken_strings.extend([(-counts.get(chr(i), 0), chr(i)) + for i in xrange(2**8)]) new_subtoken_strings.sort() self._init_from_list([''] * self._num_reserved_ids + [p[1] for p in new_subtoken_strings]) @@ -407,19 +390,13 @@ def _load_from_file(self, filename): subtoken_strings = [] with tf.gfile.Open(filename) as f: for line in f: - if six.PY2: - subtoken_strings.append(line.strip()[1:-1].decode('string-escape')) - else: - subtoken_strings.append(line.strip()[1:-1]) + subtoken_strings.append(line.strip()[1:-1].decode('string-escape')) self._init_from_list(subtoken_strings) def _store_to_file(self, filename): with tf.gfile.Open(filename, 'w') as f: for subtoken_string in self._all_subtoken_strings: - if six.PY2: - f.write('\'' + subtoken_string.encode('string-escape') + '\'\n') - else: - f.write('\'' + subtoken_string + '\'\n') + f.write('\'' + subtoken_string.encode('string-escape') + '\'\n') def _escape_token(self, token): r"""Translate '\'->'\\' and '_'->'\u', then append '_'. diff --git a/tensor2tensor/data_generators/tokenizer.py b/tensor2tensor/data_generators/tokenizer.py old mode 100755 new mode 100644 index 09b60ff1f..15b199907 --- a/tensor2tensor/data_generators/tokenizer.py +++ b/tensor2tensor/data_generators/tokenizer.py @@ -45,21 +45,29 @@ from __future__ import division from __future__ import print_function +import array import string # Dependency imports from six.moves import xrange # pylint: disable=redefined-builtin -from collections import defaultdict + class Tokenizer(object): """Vocab for breaking words into wordpieces. """ - _SEPARATOR_CHAR_SET = set(string.punctuation + string.whitespace) - def __init__(self): - self.token_counts = defaultdict(int) + self._separator_chars = string.punctuation + string.whitespace + self._separator_char_mask = array.array( + "l", [chr(i) in self._separator_chars for i in xrange(256)]) + self.token_counts = dict() + + def _increment_token_count(self, token): + if token in self.token_counts: + self.token_counts[token] += 1 + else: + self.token_counts[token] = 1 def encode(self, raw_text): """Encode a raw string as a list of tokens. @@ -79,11 +87,11 @@ def encode(self, raw_text): token = raw_text[token_start:pos] if token != " " or token_start == 0: ret.append(token) - self.token_counts[token] += 1 + self._increment_token_count(token) token_start = pos final_token = raw_text[token_start:] ret.append(final_token) - self.token_counts[final_token] += 1 + self._increment_token_count(final_token) return ret def decode(self, tokens): @@ -103,7 +111,7 @@ def decode(self, tokens): return ret def _is_separator_char(self, c): - return c in self._SEPARATOR_CHAR_SET + return self._separator_char_mask[ord(c)] def _is_word_char(self, c): - return c not in self._SEPARATOR_CHAR_SET + return not self._is_separator_char(c) From 95942139b825ba19f18e3b740e2d5c9928411668 Mon Sep 17 00:00:00 2001 From: Ryan Sepassi Date: Fri, 23 Jun 2017 11:15:13 -0700 Subject: [PATCH 0015/4095] gitignore update PiperOrigin-RevId: 159970261 --- .gitignore | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.gitignore b/.gitignore index 09f934869..dd84837dd 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,7 @@ # Python egg metadata, regenerated from source files by setuptools. /*.egg-info + +# PyPI distribution artificats +build/ +dist/ From c2ce7a6bdf79f05524b6c07cad1762899371ec3d Mon Sep 17 00:00:00 2001 From: Lukasz Kaiser Date: Fri, 23 Jun 2017 17:37:10 -0700 Subject: [PATCH 0016/4095] Play with CIFAR models and shake-shake a little. PiperOrigin-RevId: 160016542 --- tensor2tensor/data_generators/image.py | 4 - tensor2tensor/models/bluenet.py | 150 +++++++++++++++++++++ tensor2tensor/models/bluenet_test.py | 54 ++++++++ tensor2tensor/models/common_layers.py | 46 +++++++ tensor2tensor/models/common_layers_test.py | 9 ++ tensor2tensor/models/models.py | 1 + tensor2tensor/models/xception.py | 10 ++ tensor2tensor/models/xception_test.py | 2 +- 8 files changed, 271 insertions(+), 5 deletions(-) create mode 100644 tensor2tensor/models/bluenet.py create mode 100644 tensor2tensor/models/bluenet_test.py diff --git a/tensor2tensor/data_generators/image.py b/tensor2tensor/data_generators/image.py index 55b5f2fc7..88bfef4e6 100644 --- a/tensor2tensor/data_generators/image.py +++ b/tensor2tensor/data_generators/image.py @@ -200,10 +200,6 @@ def cifar10_generator(tmp_dir, training, how_many, start_from=0): ]) labels = data["labels"] all_labels.extend([labels[j] for j in xrange(num_images)]) - # Shuffle the data to make sure classes are well distributed. - data = zip(all_images, all_labels) - random.shuffle(data) - all_images, all_labels = zip(*data) return image_generator(all_images[start_from:start_from + how_many], all_labels[start_from:start_from + how_many]) diff --git a/tensor2tensor/models/bluenet.py b/tensor2tensor/models/bluenet.py new file mode 100644 index 000000000..bb7119a15 --- /dev/null +++ b/tensor2tensor/models/bluenet.py @@ -0,0 +1,150 @@ +# Copyright 2017 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""BlueNet: and out of the blue network to experiment with shake-shake.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +# Dependency imports + +from six.moves import xrange # pylint: disable=redefined-builtin + +from tensor2tensor.models import common_hparams +from tensor2tensor.models import common_layers +from tensor2tensor.utils import registry +from tensor2tensor.utils import t2t_model + +import tensorflow as tf + + +def residual_module(x, hparams, train, n, sep): + """A stack of convolution blocks with residual connection.""" + k = (hparams.kernel_height, hparams.kernel_width) + dilations_and_kernels = [((1, 1), k) for _ in xrange(n)] + with tf.variable_scope("residual_module%d_sep%d" % (n, sep)): + y = common_layers.subseparable_conv_block( + x, + hparams.hidden_size, + dilations_and_kernels, + padding="SAME", + separability=sep, + name="block") + x = common_layers.layer_norm(x + y, hparams.hidden_size, name="lnorm") + return tf.nn.dropout(x, 1.0 - hparams.dropout * tf.to_float(train)) + + +def residual_module1(x, hparams, train): + return residual_module(x, hparams, train, 1, 1) + + +def residual_module1_sep(x, hparams, train): + return residual_module(x, hparams, train, 1, 0) + + +def residual_module2(x, hparams, train): + return residual_module(x, hparams, train, 2, 1) + + +def residual_module2_sep(x, hparams, train): + return residual_module(x, hparams, train, 2, 0) + + +def residual_module3(x, hparams, train): + return residual_module(x, hparams, train, 3, 1) + + +def residual_module3_sep(x, hparams, train): + return residual_module(x, hparams, train, 3, 0) + + +def norm_module(x, hparams, train): + del train # Unused. + return common_layers.layer_norm(x, hparams.hidden_size, name="norm_module") + + +def identity_module(x, hparams, train): + del hparams, train # Unused. + return x + + +def run_modules(blocks, cur, hparams, train, dp): + """Run blocks in parallel using dp as data_parallelism.""" + assert len(blocks) % dp.n == 0 + res = [] + for i in xrange(len(blocks) // dp.n): + res.extend(dp(blocks[i * dp.n:(i + 1) * dp.n], cur, hparams, train)) + return res + + +@registry.register_model +class BlueNet(t2t_model.T2TModel): + + def model_fn_body_sharded(self, sharded_features, train): + dp = self._data_parallelism + dp._reuse = False # pylint:disable=protected-access + hparams = self._hparams + blocks = [identity_module, norm_module, + residual_module1, residual_module1_sep, + residual_module2, residual_module2_sep, + residual_module3, residual_module3_sep] + inputs = sharded_features["inputs"] + + cur = tf.concat(inputs, axis=0) + cur_shape = cur.get_shape() + for i in xrange(hparams.num_hidden_layers): + with tf.variable_scope("layer_%d" % i): + processed = run_modules(blocks, cur, hparams, train, dp) + cur = common_layers.shakeshake(processed) + cur.set_shape(cur_shape) + + return list(tf.split(cur, len(inputs), axis=0)), 0.0 + + +@registry.register_hparams +def bluenet_base(): + """Set of hyperparameters.""" + hparams = common_hparams.basic_params1() + hparams.batch_size = 4096 + hparams.hidden_size = 768 + hparams.dropout = 0.2 + hparams.symbol_dropout = 0.2 + hparams.label_smoothing = 0.1 + hparams.clip_grad_norm = 2.0 + hparams.num_hidden_layers = 8 + hparams.kernel_height = 3 + hparams.kernel_width = 3 + hparams.learning_rate_decay_scheme = "exp50k" + hparams.learning_rate = 0.05 + hparams.learning_rate_warmup_steps = 3000 + hparams.initializer_gain = 1.0 + hparams.weight_decay = 3.0 + hparams.num_sampled_classes = 0 + hparams.sampling_method = "argmax" + hparams.optimizer_adam_epsilon = 1e-6 + hparams.optimizer_adam_beta1 = 0.85 + hparams.optimizer_adam_beta2 = 0.997 + hparams.add_hparam("imagenet_use_2d", True) + return hparams + + +@registry.register_hparams +def bluenet_tiny(): + hparams = bluenet_base() + hparams.batch_size = 1024 + hparams.hidden_size = 128 + hparams.num_hidden_layers = 4 + hparams.learning_rate_decay_scheme = "none" + return hparams diff --git a/tensor2tensor/models/bluenet_test.py b/tensor2tensor/models/bluenet_test.py new file mode 100644 index 000000000..70996ab02 --- /dev/null +++ b/tensor2tensor/models/bluenet_test.py @@ -0,0 +1,54 @@ +# Copyright 2017 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""BlueNet tests.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +# Dependency imports + +import numpy as np + +from tensor2tensor.data_generators import problem_hparams +from tensor2tensor.models import bluenet + +import tensorflow as tf + + +class BlueNetTest(tf.test.TestCase): + + def testBlueNet(self): + vocab_size = 9 + x = np.random.random_integers(1, high=vocab_size - 1, size=(3, 5, 1, 1)) + y = np.random.random_integers(1, high=vocab_size - 1, size=(3, 1, 1, 1)) + hparams = bluenet.bluenet_tiny() + p_hparams = problem_hparams.test_problem_hparams(hparams, vocab_size, + vocab_size) + with self.test_session() as session: + features = { + "inputs": tf.constant(x, dtype=tf.int32), + "targets": tf.constant(y, dtype=tf.int32), + } + model = bluenet.BlueNet(hparams, p_hparams) + sharded_logits, _, _ = model.model_fn(features, True) + logits = tf.concat(sharded_logits, 0) + session.run(tf.global_variables_initializer()) + res = session.run(logits) + self.assertEqual(res.shape, (3, 5, 1, 1, vocab_size)) + + +if __name__ == "__main__": + tf.test.main() diff --git a/tensor2tensor/models/common_layers.py b/tensor2tensor/models/common_layers.py index 30215e889..f9d63a464 100644 --- a/tensor2tensor/models/common_layers.py +++ b/tensor2tensor/models/common_layers.py @@ -58,6 +58,52 @@ def inverse_exp_decay(max_step, min_value=0.01): return inv_base**tf.maximum(float(max_step) - step, 0.0) +def shakeshake2_py(x, y, equal=False): + """The shake-shake sum of 2 tensors, python version.""" + alpha = 0.5 if equal else tf.random_uniform([]) + return alpha * x + (1.0 - alpha) * y + + +@function.Defun() +def shakeshake2_grad(x1, x2, dy): + """Overriding gradient for shake-shake of 2 tensors.""" + y = shakeshake2_py(x1, x2) + dx = tf.gradients(ys=[y], xs=[x1, x2], grad_ys=[dy]) + return dx + + +@function.Defun() +def shakeshake2_equal_grad(x1, x2, dy): + """Overriding gradient for shake-shake of 2 tensors.""" + y = shakeshake2_py(x1, x2, equal=True) + dx = tf.gradients(ys=[y], xs=[x1, x2], grad_ys=[dy]) + return dx + + +@function.Defun(grad_func=shakeshake2_grad) +def shakeshake2(x1, x2): + """The shake-shake function with a different alpha for forward/backward.""" + return shakeshake2_py(x1, x2) + + +@function.Defun(grad_func=shakeshake2_equal_grad) +def shakeshake2_eqgrad(x1, x2): + """The shake-shake function with a different alpha for forward/backward.""" + return shakeshake2_py(x1, x2) + + +def shakeshake(xs, equal_grad=False): + """Multi-argument shake-shake, currently approximated by sums of 2.""" + if len(xs) == 1: + return xs[0] + div = (len(xs) + 1) // 2 + arg1 = shakeshake(xs[:div], equal_grad=equal_grad) + arg2 = shakeshake(xs[div:], equal_grad=equal_grad) + if equal_grad: + return shakeshake2_eqgrad(arg1, arg2) + return shakeshake2(arg1, arg2) + + def standardize_images(x): """Image standardization on batches (tf.image.per_image_standardization).""" with tf.name_scope("standardize_images", [x]): diff --git a/tensor2tensor/models/common_layers_test.py b/tensor2tensor/models/common_layers_test.py index 2bd6a53ad..3839b9d36 100644 --- a/tensor2tensor/models/common_layers_test.py +++ b/tensor2tensor/models/common_layers_test.py @@ -65,6 +65,15 @@ def testEmbedding(self): res = session.run(y) self.assertEqual(res.shape, (3, 5, 16)) + def testShakeShake(self): + x = np.random.rand(5, 7) + with self.test_session() as session: + x = tf.constant(x, dtype=tf.float32) + y = common_layers.shakeshake([x, x, x, x, x]) + session.run(tf.global_variables_initializer()) + inp, res = session.run([x, y]) + self.assertAllClose(res, inp) + def testConv(self): x = np.random.rand(5, 7, 1, 11) with self.test_session() as session: diff --git a/tensor2tensor/models/models.py b/tensor2tensor/models/models.py index 536a58966..b8f0811e5 100644 --- a/tensor2tensor/models/models.py +++ b/tensor2tensor/models/models.py @@ -24,6 +24,7 @@ from tensor2tensor.models import attention_lm from tensor2tensor.models import attention_lm_moe +from tensor2tensor.models import bluenet from tensor2tensor.models import bytenet from tensor2tensor.models import lstm from tensor2tensor.models import modalities diff --git a/tensor2tensor/models/xception.py b/tensor2tensor/models/xception.py index b6e271c36..01b5adb78 100644 --- a/tensor2tensor/models/xception.py +++ b/tensor2tensor/models/xception.py @@ -87,3 +87,13 @@ def xception_base(): hparams.optimizer_adam_beta2 = 0.997 hparams.add_hparam("imagenet_use_2d", True) return hparams + + +@registry.register_hparams +def xception_tiny(): + hparams = xception_base() + hparams.batch_size = 1024 + hparams.hidden_size = 128 + hparams.num_hidden_layers = 4 + hparams.learning_rate_decay_scheme = "none" + return hparams diff --git a/tensor2tensor/models/xception_test.py b/tensor2tensor/models/xception_test.py index 106604659..4eabb387a 100644 --- a/tensor2tensor/models/xception_test.py +++ b/tensor2tensor/models/xception_test.py @@ -34,7 +34,7 @@ def testXception(self): vocab_size = 9 x = np.random.random_integers(1, high=vocab_size - 1, size=(3, 5, 1, 1)) y = np.random.random_integers(1, high=vocab_size - 1, size=(3, 1, 1, 1)) - hparams = xception.xception_base() + hparams = xception.xception_tiny() p_hparams = problem_hparams.test_problem_hparams(hparams, vocab_size, vocab_size) with self.test_session() as session: From b53d6df93418628096a09e203c6fe3b0daafbd62 Mon Sep 17 00:00:00 2001 From: Ryan Sepassi Date: Fri, 23 Jun 2017 18:00:22 -0700 Subject: [PATCH 0017/4095] Bump version to 1.0.7 PiperOrigin-RevId: 160018021 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 0bf0c8739..5b2d423f8 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ setup( name='tensor2tensor', - version='1.0.6', + version='1.0.7', description='Tensor2Tensor', author='Google Inc.', author_email='no-reply@google.com', From d578f5210e4f0ba345a5e83bd111b4c3b2f2ed57 Mon Sep 17 00:00:00 2001 From: Ryan Sepassi Date: Fri, 23 Jun 2017 18:06:47 -0700 Subject: [PATCH 0018/4095] internal merge PiperOrigin-RevId: 160018490 --- .../data_generators/generator_utils.py | 1 + tensor2tensor/data_generators/image.py | 2 +- tensor2tensor/data_generators/text_encoder.py | 87 ++++++++++++------- tensor2tensor/data_generators/tokenizer.py | 23 ++--- 4 files changed, 66 insertions(+), 47 deletions(-) diff --git a/tensor2tensor/data_generators/generator_utils.py b/tensor2tensor/data_generators/generator_utils.py index 35e61d7cc..75d319cd8 100644 --- a/tensor2tensor/data_generators/generator_utils.py +++ b/tensor2tensor/data_generators/generator_utils.py @@ -28,6 +28,7 @@ import six from six.moves import xrange # pylint: disable=redefined-builtin +import six.moves.urllib_request from tensor2tensor.data_generators.text_encoder import SubwordTextEncoder from tensor2tensor.data_generators.tokenizer import Tokenizer diff --git a/tensor2tensor/data_generators/image.py b/tensor2tensor/data_generators/image.py index 88bfef4e6..e7e740192 100644 --- a/tensor2tensor/data_generators/image.py +++ b/tensor2tensor/data_generators/image.py @@ -18,7 +18,6 @@ from __future__ import division from __future__ import print_function -import cPickle import gzip import io import json @@ -30,6 +29,7 @@ # Dependency imports import numpy as np +from six.moves import cPickle from six.moves import xrange # pylint: disable=redefined-builtin from six.moves import zip # pylint: disable=redefined-builtin from tensor2tensor.data_generators import generator_utils diff --git a/tensor2tensor/data_generators/text_encoder.py b/tensor2tensor/data_generators/text_encoder.py index b170013ea..a219a6b8d 100644 --- a/tensor2tensor/data_generators/text_encoder.py +++ b/tensor2tensor/data_generators/text_encoder.py @@ -23,6 +23,8 @@ from __future__ import division from __future__ import print_function +from collections import defaultdict + # Dependency imports import six @@ -35,6 +37,10 @@ PAD = '' EOS = '' RESERVED_TOKENS = [PAD, EOS] +if six.PY2: + RESERVED_TOKENS_BYTES = RESERVED_TOKENS +else: + RESERVED_TOKENS_BYTES = [bytes(PAD, 'ascii'), bytes(EOS, 'ascii')] class TextEncoder(object): @@ -87,17 +93,25 @@ class ByteTextEncoder(TextEncoder): """Encodes each byte to an id. For 8-bit strings only.""" def encode(self, s): - return [ord(c) + self._num_reserved_ids for c in s] + numres = self._num_reserved_ids + if six.PY2: + return [ord(c) + numres for c in s] + # Python3: explicitly convert to UTF-8 + return [c + numres for c in s.encode('utf-8')] def decode(self, ids): + numres = self._num_reserved_ids decoded_ids = [] + int2byte = six.int2byte for id_ in ids: - if 0 <= id_ < self._num_reserved_ids: - decoded_ids.append(RESERVED_TOKENS[int(id_)]) + if 0 <= id_ < numres: + decoded_ids.append(RESERVED_TOKENS_BYTES[int(id_)]) else: - decoded_ids.append(chr(id_)) - - return ''.join(decoded_ids) + decoded_ids.append(int2byte(id_ - numres)) + if six.PY2: + return ''.join(decoded_ids) + # Python3: join byte arrays and then decode string + return b''.join(decoded_ids).decode('utf-8') @property def vocab_size(self): @@ -111,20 +125,16 @@ def __init__(self, vocab_filename, reverse=False, num_reserved_ids=2): """Initialize from a file, one token per line.""" super(TokenTextEncoder, self).__init__(num_reserved_ids=num_reserved_ids) self._reverse = reverse - if vocab_filename is not None: - self._load_vocab_from_file(vocab_filename) + self._load_vocab_from_file(vocab_filename) def encode(self, sentence): """Converts a space-separated string of tokens to a list of ids.""" ret = [self._token_to_id[tok] for tok in sentence.strip().split()] - if self._reverse: - ret = ret[::-1] - return ret + return ret[::-1] if self._reverse else ret def decode(self, ids): - if self._reverse: - ids = ids[::-1] - return ' '.join([self._safe_id_to_token(i) for i in ids]) + seq = reversed(ids) if self._reverse else ids + return ' '.join([self._safe_id_to_token(i) for i in seq]) @property def vocab_size(self): @@ -243,15 +253,22 @@ def _escaped_token_to_subtokens(self, escaped_token): """ ret = [] pos = 0 - while pos < len(escaped_token): - end = len(escaped_token) - while True: + lesc = len(escaped_token) + while pos < lesc: + end = lesc + while end > pos: subtoken = self._subtoken_string_to_id.get(escaped_token[pos:end], -1) if subtoken != -1: break end -= 1 ret.append(subtoken) - pos = end + if end > pos: + pos = end + else: + # This kinda should not happen, but it does. Cop out by skipping the + # nonexistent subtoken from the returned list. + # print("Unable to find subtoken in string '{0}'".format(escaped_token)) + pos += 1 return ret @classmethod @@ -322,13 +339,13 @@ def build_from_token_counts(self, # then count the resulting potential subtokens, keeping the ones # with high enough counts for our new vocabulary. for i in xrange(num_iterations): - counts = {} + counts = defaultdict(int) for token, count in six.iteritems(token_counts): escaped_token = self._escape_token(token) # we will count all tails of the escaped_token, starting from boundaries # determined by our current segmentation. if i == 0: - starts = list(range(len(escaped_token))) + starts = xrange(len(escaped_token)) else: subtokens = self._escaped_token_to_subtokens(escaped_token) pos = 0 @@ -337,31 +354,33 @@ def build_from_token_counts(self, starts.append(pos) pos += len(self.subtoken_to_subtoken_string(subtoken)) for start in starts: - for end in xrange(start + 1, len(escaped_token) + 1): + for end in xrange(start + 1, len(escaped_token)): subtoken_string = escaped_token[start:end] - counts[subtoken_string] = counts.get(subtoken_string, 0) + count + counts[subtoken_string] += count # array of lists of candidate subtoken strings, by length len_to_subtoken_strings = [] for subtoken_string, count in six.iteritems(counts): - if count < min_count or len(subtoken_string) <= 1: + lsub = len(subtoken_string) + # all subtoken strings of length 1 are included regardless of count + if count < min_count and lsub != 1: continue - while len(len_to_subtoken_strings) <= len(subtoken_string): + while len(len_to_subtoken_strings) <= lsub: len_to_subtoken_strings.append([]) - len_to_subtoken_strings[len(subtoken_string)].append(subtoken_string) + len_to_subtoken_strings[lsub].append(subtoken_string) new_subtoken_strings = [] # consider the candidates longest to shortest, so that if we accept # a longer subtoken string, we can decrement the counts of its prefixes. for subtoken_strings in len_to_subtoken_strings[::-1]: for subtoken_string in subtoken_strings: count = counts[subtoken_string] - if count < min_count: + if count < min_count and len(subtoken_string) != 1: + # subtoken strings of length 1 are included regardless of count continue new_subtoken_strings.append((-count, subtoken_string)) for l in xrange(1, len(subtoken_string)): counts[subtoken_string[:l]] -= count - # make sure we have all single characters. - new_subtoken_strings.extend([(-counts.get(chr(i), 0), chr(i)) - for i in xrange(2**8)]) + # Make sure to include the underscore as a subtoken string + new_subtoken_strings.append((0, '_')) new_subtoken_strings.sort() self._init_from_list([''] * self._num_reserved_ids + [p[1] for p in new_subtoken_strings]) @@ -390,13 +409,19 @@ def _load_from_file(self, filename): subtoken_strings = [] with tf.gfile.Open(filename) as f: for line in f: - subtoken_strings.append(line.strip()[1:-1].decode('string-escape')) + if six.PY2: + subtoken_strings.append(line.strip()[1:-1].decode('string-escape')) + else: + subtoken_strings.append(line.strip()[1:-1]) self._init_from_list(subtoken_strings) def _store_to_file(self, filename): with tf.gfile.Open(filename, 'w') as f: for subtoken_string in self._all_subtoken_strings: - f.write('\'' + subtoken_string.encode('string-escape') + '\'\n') + if six.PY2: + f.write('\'' + subtoken_string.encode('string-escape') + '\'\n') + else: + f.write('\'' + subtoken_string + '\'\n') def _escape_token(self, token): r"""Translate '\'->'\\' and '_'->'\u', then append '_'. diff --git a/tensor2tensor/data_generators/tokenizer.py b/tensor2tensor/data_generators/tokenizer.py index 15b199907..3564aee2e 100644 --- a/tensor2tensor/data_generators/tokenizer.py +++ b/tensor2tensor/data_generators/tokenizer.py @@ -45,7 +45,7 @@ from __future__ import division from __future__ import print_function -import array +from collections import defaultdict import string # Dependency imports @@ -57,17 +57,10 @@ class Tokenizer(object): """Vocab for breaking words into wordpieces. """ - def __init__(self): - self._separator_chars = string.punctuation + string.whitespace - self._separator_char_mask = array.array( - "l", [chr(i) in self._separator_chars for i in xrange(256)]) - self.token_counts = dict() + _SEPARATOR_CHAR_SET = set(string.punctuation + string.whitespace) - def _increment_token_count(self, token): - if token in self.token_counts: - self.token_counts[token] += 1 - else: - self.token_counts[token] = 1 + def __init__(self): + self.token_counts = defaultdict(int) def encode(self, raw_text): """Encode a raw string as a list of tokens. @@ -87,11 +80,11 @@ def encode(self, raw_text): token = raw_text[token_start:pos] if token != " " or token_start == 0: ret.append(token) - self._increment_token_count(token) + self.token_counts[token] += 1 token_start = pos final_token = raw_text[token_start:] ret.append(final_token) - self._increment_token_count(final_token) + self.token_counts[final_token] += 1 return ret def decode(self, tokens): @@ -111,7 +104,7 @@ def decode(self, tokens): return ret def _is_separator_char(self, c): - return self._separator_char_mask[ord(c)] + return c in self._SEPARATOR_CHAR_SET def _is_word_char(self, c): - return not self._is_separator_char(c) + return c not in self._SEPARATOR_CHAR_SET From 3884fa05cd0c472eb4c8ea8682095d7c7d21a171 Mon Sep 17 00:00:00 2001 From: jekbradbury Date: Fri, 23 Jun 2017 19:17:12 -0700 Subject: [PATCH 0019/4095] fix broken reference in inference The class `modality.ClassLabelModality` does not appear to exist (any more?); this change makes inference work for me. --- tensor2tensor/utils/t2t_model.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensor2tensor/utils/t2t_model.py b/tensor2tensor/utils/t2t_model.py index 8d9117694..5ebb74280 100644 --- a/tensor2tensor/utils/t2t_model.py +++ b/tensor2tensor/utils/t2t_model.py @@ -276,8 +276,8 @@ def infer_step(recent_output, _): # input shape, so we confuse it about the input shape. initial_output = tf.slice(initial_output, [0, 0, 0, 0], tf.shape(initial_output)) - if isinstance(self._hparams.problems[self._problem_idx].target_modality, - modality.ClassLabelModality): + if (self._hparams.problems[self._problem_idx].target_modality is + registry.Modalities.CLASS_LABEL): decode_length = 1 else: decode_length = tf.shape(features["inputs"])[1] + decode_length From 9e2eae1ac5f8b5201c189553234b1ea8d6e7e866 Mon Sep 17 00:00:00 2001 From: Ryan McCormick Date: Sat, 24 Jun 2017 00:46:10 -0500 Subject: [PATCH 0020/4095] update README: 'data-set' inconsistent with 'dataset' everywhere else in the file. --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index f49fb20df..6932dab3a 100644 --- a/README.md +++ b/README.md @@ -19,7 +19,7 @@ it](https://research.googleblog.com/2017/06/accelerating-deep-learning-research. We're eager to collaborate with you on extending T2T, so please feel free to [open an issue on GitHub](https://github.com/tensorflow/tensor2tensor/issues) or -send along a pull request to add your data-set or model. +send along a pull request to add your dataset or model. See [our contribution doc](CONTRIBUTING.md) for details and our [open issues](https://github.com/tensorflow/tensor2tensor/issues). From 9905796587156c4ad5e73a39b730ca5ba28669fa Mon Sep 17 00:00:00 2001 From: Ryan McCormick Date: Sat, 24 Jun 2017 01:01:15 -0500 Subject: [PATCH 0021/4095] fix 'artificats' to 'artifacts' --- .gitignore | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index dd84837dd..e610f29ba 100644 --- a/.gitignore +++ b/.gitignore @@ -4,6 +4,6 @@ # Python egg metadata, regenerated from source files by setuptools. /*.egg-info -# PyPI distribution artificats +# PyPI distribution artifacts build/ dist/ From 5840a71e1b49c43e8208a3c484c1903d7f46ee45 Mon Sep 17 00:00:00 2001 From: vthorsteinsson Date: Sat, 24 Jun 2017 10:45:43 +0000 Subject: [PATCH 0022/4095] Sync with upstream --- tensor2tensor/data_generators/generator_utils.py | 2 +- tensor2tensor/data_generators/image.py | 2 -- tensor2tensor/data_generators/text_encoder.py | 1 - 3 files changed, 1 insertion(+), 4 deletions(-) mode change 100644 => 100755 tensor2tensor/data_generators/generator_utils.py diff --git a/tensor2tensor/data_generators/generator_utils.py b/tensor2tensor/data_generators/generator_utils.py old mode 100644 new mode 100755 index 11788df45..4993f136a --- a/tensor2tensor/data_generators/generator_utils.py +++ b/tensor2tensor/data_generators/generator_utils.py @@ -27,7 +27,7 @@ import six from six.moves import xrange # pylint: disable=redefined-builtin -import six.moves.urllib_request +import six.moves.urllib_request # Imports urllib on Python2, urllib.request on Python3 from tensor2tensor.data_generators.text_encoder import SubwordTextEncoder from tensor2tensor.data_generators.tokenizer import Tokenizer diff --git a/tensor2tensor/data_generators/image.py b/tensor2tensor/data_generators/image.py index 29a76da76..e7e740192 100644 --- a/tensor2tensor/data_generators/image.py +++ b/tensor2tensor/data_generators/image.py @@ -32,8 +32,6 @@ from six.moves import cPickle from six.moves import xrange # pylint: disable=redefined-builtin from six.moves import zip # pylint: disable=redefined-builtin -from six.moves import cPickle - from tensor2tensor.data_generators import generator_utils import tensorflow as tf diff --git a/tensor2tensor/data_generators/text_encoder.py b/tensor2tensor/data_generators/text_encoder.py index 59ccbe3c8..a219a6b8d 100755 --- a/tensor2tensor/data_generators/text_encoder.py +++ b/tensor2tensor/data_generators/text_encoder.py @@ -29,7 +29,6 @@ import six from six.moves import xrange # pylint: disable=redefined-builtin -from collections import defaultdict from tensor2tensor.data_generators import tokenizer import tensorflow as tf From 6e9dedb3f186e05fce90d70c685dc526dfb175cc Mon Sep 17 00:00:00 2001 From: vthorsteinsson Date: Sat, 24 Jun 2017 10:45:43 +0000 Subject: [PATCH 0023/4095] Sync with upstream --- tensor2tensor/data_generators/generator_utils.py | 2 +- tensor2tensor/data_generators/image.py | 2 -- tensor2tensor/data_generators/text_encoder.py | 1 - 3 files changed, 1 insertion(+), 4 deletions(-) mode change 100644 => 100755 tensor2tensor/data_generators/generator_utils.py mode change 100644 => 100755 tensor2tensor/data_generators/image.py diff --git a/tensor2tensor/data_generators/generator_utils.py b/tensor2tensor/data_generators/generator_utils.py old mode 100644 new mode 100755 index 11788df45..4993f136a --- a/tensor2tensor/data_generators/generator_utils.py +++ b/tensor2tensor/data_generators/generator_utils.py @@ -27,7 +27,7 @@ import six from six.moves import xrange # pylint: disable=redefined-builtin -import six.moves.urllib_request +import six.moves.urllib_request # Imports urllib on Python2, urllib.request on Python3 from tensor2tensor.data_generators.text_encoder import SubwordTextEncoder from tensor2tensor.data_generators.tokenizer import Tokenizer diff --git a/tensor2tensor/data_generators/image.py b/tensor2tensor/data_generators/image.py old mode 100644 new mode 100755 index 29a76da76..e7e740192 --- a/tensor2tensor/data_generators/image.py +++ b/tensor2tensor/data_generators/image.py @@ -32,8 +32,6 @@ from six.moves import cPickle from six.moves import xrange # pylint: disable=redefined-builtin from six.moves import zip # pylint: disable=redefined-builtin -from six.moves import cPickle - from tensor2tensor.data_generators import generator_utils import tensorflow as tf diff --git a/tensor2tensor/data_generators/text_encoder.py b/tensor2tensor/data_generators/text_encoder.py index 59ccbe3c8..a219a6b8d 100755 --- a/tensor2tensor/data_generators/text_encoder.py +++ b/tensor2tensor/data_generators/text_encoder.py @@ -29,7 +29,6 @@ import six from six.moves import xrange # pylint: disable=redefined-builtin -from collections import defaultdict from tensor2tensor.data_generators import tokenizer import tensorflow as tf From 9fbfa3ddafaa77d258f2af70a5868de46b659901 Mon Sep 17 00:00:00 2001 From: vthorsteinsson Date: Sat, 24 Jun 2017 14:26:43 +0000 Subject: [PATCH 0024/4095] Fixed file modes --- tensor2tensor/data_generators/image.py | 0 tensor2tensor/data_generators/text_encoder.py | 0 tensor2tensor/data_generators/tokenizer.py | 0 3 files changed, 0 insertions(+), 0 deletions(-) mode change 100755 => 100644 tensor2tensor/data_generators/image.py mode change 100755 => 100644 tensor2tensor/data_generators/text_encoder.py mode change 100755 => 100644 tensor2tensor/data_generators/tokenizer.py diff --git a/tensor2tensor/data_generators/image.py b/tensor2tensor/data_generators/image.py old mode 100755 new mode 100644 diff --git a/tensor2tensor/data_generators/text_encoder.py b/tensor2tensor/data_generators/text_encoder.py old mode 100755 new mode 100644 diff --git a/tensor2tensor/data_generators/tokenizer.py b/tensor2tensor/data_generators/tokenizer.py old mode 100755 new mode 100644 From bbff1ec1e22e7bc9f86a81784bce7ccb088a1da6 Mon Sep 17 00:00:00 2001 From: vthorsteinsson Date: Sun, 25 Jun 2017 00:22:48 +0000 Subject: [PATCH 0025/4095] Fixed import --- tensor2tensor/data_generators/generator_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensor2tensor/data_generators/generator_utils.py b/tensor2tensor/data_generators/generator_utils.py index 4993f136a..c50d19afa 100755 --- a/tensor2tensor/data_generators/generator_utils.py +++ b/tensor2tensor/data_generators/generator_utils.py @@ -27,7 +27,7 @@ import six from six.moves import xrange # pylint: disable=redefined-builtin -import six.moves.urllib_request # Imports urllib on Python2, urllib.request on Python3 +import six.moves.urllib_request as urllib # Imports urllib on Python2, urllib.request on Python3 from tensor2tensor.data_generators.text_encoder import SubwordTextEncoder from tensor2tensor.data_generators.tokenizer import Tokenizer From 4f1d5e25b64bbc6e205f19c4c437ff75d3ec9550 Mon Sep 17 00:00:00 2001 From: vthorsteinsson Date: Sun, 25 Jun 2017 00:52:39 +0000 Subject: [PATCH 0026/4095] Fixed file permissions --- tensor2tensor/data_generators/generator_utils.py | 2 +- tensor2tensor/data_generators/image.py | 0 tensor2tensor/data_generators/text_encoder.py | 0 tensor2tensor/data_generators/tokenizer.py | 0 4 files changed, 1 insertion(+), 1 deletion(-) mode change 100755 => 100644 tensor2tensor/data_generators/generator_utils.py mode change 100755 => 100644 tensor2tensor/data_generators/image.py mode change 100755 => 100644 tensor2tensor/data_generators/text_encoder.py mode change 100755 => 100644 tensor2tensor/data_generators/tokenizer.py diff --git a/tensor2tensor/data_generators/generator_utils.py b/tensor2tensor/data_generators/generator_utils.py old mode 100755 new mode 100644 index 4993f136a..11788df45 --- a/tensor2tensor/data_generators/generator_utils.py +++ b/tensor2tensor/data_generators/generator_utils.py @@ -27,7 +27,7 @@ import six from six.moves import xrange # pylint: disable=redefined-builtin -import six.moves.urllib_request # Imports urllib on Python2, urllib.request on Python3 +import six.moves.urllib_request from tensor2tensor.data_generators.text_encoder import SubwordTextEncoder from tensor2tensor.data_generators.tokenizer import Tokenizer diff --git a/tensor2tensor/data_generators/image.py b/tensor2tensor/data_generators/image.py old mode 100755 new mode 100644 diff --git a/tensor2tensor/data_generators/text_encoder.py b/tensor2tensor/data_generators/text_encoder.py old mode 100755 new mode 100644 diff --git a/tensor2tensor/data_generators/tokenizer.py b/tensor2tensor/data_generators/tokenizer.py old mode 100755 new mode 100644 From a0ffe450fe97ab442ca64debf935bd063a985d99 Mon Sep 17 00:00:00 2001 From: vthorsteinsson Date: Sun, 25 Jun 2017 00:54:05 +0000 Subject: [PATCH 0027/4095] Reverted change --- tensor2tensor/data_generators/generator_utils.py | 1 + 1 file changed, 1 insertion(+) mode change 100644 => 100755 tensor2tensor/data_generators/generator_utils.py diff --git a/tensor2tensor/data_generators/generator_utils.py b/tensor2tensor/data_generators/generator_utils.py old mode 100644 new mode 100755 index 11788df45..75d319cd8 --- a/tensor2tensor/data_generators/generator_utils.py +++ b/tensor2tensor/data_generators/generator_utils.py @@ -22,6 +22,7 @@ import io import os import tarfile +import urllib # Dependency imports From d8aab8a219dab81b07242ce6a6309b65c83babb0 Mon Sep 17 00:00:00 2001 From: vthorsteinsson Date: Sun, 25 Jun 2017 00:55:00 +0000 Subject: [PATCH 0028/4095] File attribute --- tensor2tensor/data_generators/generator_utils.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) mode change 100755 => 100644 tensor2tensor/data_generators/generator_utils.py diff --git a/tensor2tensor/data_generators/generator_utils.py b/tensor2tensor/data_generators/generator_utils.py old mode 100755 new mode 100644 From f38b052644cfdf5ec1f5464e8042bbaac3fd8486 Mon Sep 17 00:00:00 2001 From: vthorsteinsson Date: Sun, 25 Jun 2017 00:59:31 +0000 Subject: [PATCH 0029/4095] Use iter.__next__ for Python3 --- tensor2tensor/utils/trainer_utils.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) mode change 100644 => 100755 tensor2tensor/utils/trainer_utils.py diff --git a/tensor2tensor/utils/trainer_utils.py b/tensor2tensor/utils/trainer_utils.py old mode 100644 new mode 100755 index 69accdc44..d901b4241 --- a/tensor2tensor/utils/trainer_utils.py +++ b/tensor2tensor/utils/trainer_utils.py @@ -642,7 +642,9 @@ def _save_until_eos(hyp): # pylint: disable=missing-docstring decodes = [] for _ in range(num_decode_batches): - result_iter = estimator.predict(input_fn=input_fn.next, as_iterable=True) + result_iter = estimator.predict( + input_fn=input_fn.next if six.PY2 else input_fn.__next__, + as_iterable=True) for result in result_iter: def log_fn(inputs, outputs): From 792860b6e4e0b6c4d3b3fa7bc1f96209cddcf462 Mon Sep 17 00:00:00 2001 From: vthorsteinsson Date: Sun, 25 Jun 2017 01:00:42 +0000 Subject: [PATCH 0030/4095] File mode change --- tensor2tensor/utils/trainer_utils.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) mode change 100755 => 100644 tensor2tensor/utils/trainer_utils.py diff --git a/tensor2tensor/utils/trainer_utils.py b/tensor2tensor/utils/trainer_utils.py old mode 100755 new mode 100644 From 5f490a17667b84cd894a50424d621962742ee4a8 Mon Sep 17 00:00:00 2001 From: vthorsteinsson Date: Sun, 25 Jun 2017 01:12:34 +0000 Subject: [PATCH 0031/4095] Import urllib fix --- tensor2tensor/data_generators/generator_utils.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tensor2tensor/data_generators/generator_utils.py b/tensor2tensor/data_generators/generator_utils.py index f63b43870..c50d19afa 100755 --- a/tensor2tensor/data_generators/generator_utils.py +++ b/tensor2tensor/data_generators/generator_utils.py @@ -22,7 +22,6 @@ import io import os import tarfile -import urllib # Dependency imports From eee5b1cb8e75665d578e41286bda7fac7258042a Mon Sep 17 00:00:00 2001 From: vthorsteinsson Date: Sun, 25 Jun 2017 15:40:32 +0000 Subject: [PATCH 0032/4095] Use ByteTextEncoding() for encoding source and target strings in character based models --- tensor2tensor/data_generators/wmt.py | 35 +++++++++++++++++++++------- 1 file changed, 26 insertions(+), 9 deletions(-) mode change 100644 => 100755 tensor2tensor/data_generators/wmt.py diff --git a/tensor2tensor/data_generators/wmt.py b/tensor2tensor/data_generators/wmt.py old mode 100644 new mode 100755 index 0be28ab73..5c54d008c --- a/tensor2tensor/data_generators/wmt.py +++ b/tensor2tensor/data_generators/wmt.py @@ -28,8 +28,12 @@ import tensorflow as tf +# End-of-sentence marker (should correspond to the position of EOS in the +# RESERVED_TOKENS list in text_encoder.py) +EOS = 1 -def character_generator(source_path, target_path, eos=None): + +def character_generator(source_path, target_path, token_vocab=None, eos=None): """Generator for sequence-to-sequence tasks that just uses characters. This generator assumes the files at source_path and target_path have @@ -51,8 +55,15 @@ def character_generator(source_path, target_path, eos=None): with tf.gfile.GFile(target_path, mode="r") as target_file: source, target = source_file.readline(), target_file.readline() while source and target: - source_ints = [ord(c) for c in source.strip()] + eos_list - target_ints = [ord(c) for c in target.strip()] + eos_list + if token_vocab is None: + # Straight-through encoding of characters + # If using this, be careful about potential clashes between + # character ordinals, pad bytes (0) and EOS markers + source_ints = [ord(c) for c in source.strip()] + eos_list + target_ints = [ord(c) for c in target.strip()] + eos_list + else: + source_ints = token_vocab.encode(source.strip()) + eos_list + target_ints = token_vocab.encode(target.strip()) + eos_list yield {"inputs": source_ints, "targets": target_ints} source, target = source_file.readline(), target_file.readline() @@ -226,14 +237,16 @@ def ende_wordpiece_token_generator(tmp_dir, train, vocab_size): tag = "train" if train else "dev" data_path = _compile_data(tmp_dir, datasets, "wmt_ende_tok_%s" % tag) return token_generator(data_path + ".lang1", data_path + ".lang2", - symbolizer_vocab, 1) + symbolizer_vocab, EOS) def ende_character_generator(tmp_dir, train): + character_vocab = text_encoder.ByteTextEncoder() datasets = _ENDE_TRAIN_DATASETS if train else _ENDE_TEST_DATASETS tag = "train" if train else "dev" data_path = _compile_data(tmp_dir, datasets, "wmt_ende_chr_%s" % tag) - return character_generator(data_path + ".lang1", data_path + ".lang2", 1) + return character_generator(data_path + ".lang1", data_path + ".lang2", + character_vocab, EOS) def enfr_wordpiece_token_generator(tmp_dir, train, vocab_size): @@ -244,22 +257,26 @@ def enfr_wordpiece_token_generator(tmp_dir, train, vocab_size): tag = "train" if train else "dev" data_path = _compile_data(tmp_dir, datasets, "wmt_enfr_tok_%s" % tag) return token_generator(data_path + ".lang1", data_path + ".lang2", - symbolizer_vocab, 1) + symbolizer_vocab, EOS) def enfr_character_generator(tmp_dir, train): """Instance of character generator for the WMT en->fr task.""" + character_vocab = text_encoder.ByteTextEncoder() datasets = _ENFR_TRAIN_DATASETS if train else _ENFR_TEST_DATASETS tag = "train" if train else "dev" data_path = _compile_data(tmp_dir, datasets, "wmt_enfr_chr_%s" % tag) - return character_generator(data_path + ".lang1", data_path + ".lang2", 1) + return character_generator(data_path + ".lang1", data_path + ".lang2", + character_vocab, EOS) def parsing_character_generator(tmp_dir, train): + character_vocab = text_encoder.ByteTextEncoder() filename = "parsing_%s" % ("train" if train else "dev") text_filepath = os.path.join(tmp_dir, filename + ".text") tags_filepath = os.path.join(tmp_dir, filename + ".tags") - return character_generator(text_filepath, tags_filepath, 1) + return character_generator(text_filepath, tags_filepath, + character_vocab, EOS) def parsing_token_generator(tmp_dir, train, vocab_size): @@ -268,4 +285,4 @@ def parsing_token_generator(tmp_dir, train, vocab_size): filename = "parsing_%s" % ("train" if train else "dev") text_filepath = os.path.join(tmp_dir, filename + ".text") tags_filepath = os.path.join(tmp_dir, filename + ".tags") - return token_generator(text_filepath, tags_filepath, symbolizer_vocab, 1) + return token_generator(text_filepath, tags_filepath, symbolizer_vocab, EOS) From a43c4c7a49534005d8dbd80d5d94cace7c691838 Mon Sep 17 00:00:00 2001 From: vthorsteinsson Date: Mon, 26 Jun 2017 00:57:18 +0000 Subject: [PATCH 0033/4095] Made token_vocab required in character_generator(); updated wmt_test.py --- tensor2tensor/data_generators/wmt.py | 16 ++------ tensor2tensor/data_generators/wmt_test.py | 45 ++++++++++++++++------- 2 files changed, 36 insertions(+), 25 deletions(-) mode change 100644 => 100755 tensor2tensor/data_generators/wmt_test.py diff --git a/tensor2tensor/data_generators/wmt.py b/tensor2tensor/data_generators/wmt.py index 5c54d008c..3feca69d3 100755 --- a/tensor2tensor/data_generators/wmt.py +++ b/tensor2tensor/data_generators/wmt.py @@ -33,7 +33,7 @@ EOS = 1 -def character_generator(source_path, target_path, token_vocab=None, eos=None): +def character_generator(source_path, target_path, token_vocab, eos=None): """Generator for sequence-to-sequence tasks that just uses characters. This generator assumes the files at source_path and target_path have @@ -55,15 +55,8 @@ def character_generator(source_path, target_path, token_vocab=None, eos=None): with tf.gfile.GFile(target_path, mode="r") as target_file: source, target = source_file.readline(), target_file.readline() while source and target: - if token_vocab is None: - # Straight-through encoding of characters - # If using this, be careful about potential clashes between - # character ordinals, pad bytes (0) and EOS markers - source_ints = [ord(c) for c in source.strip()] + eos_list - target_ints = [ord(c) for c in target.strip()] + eos_list - else: - source_ints = token_vocab.encode(source.strip()) + eos_list - target_ints = token_vocab.encode(target.strip()) + eos_list + source_ints = token_vocab.encode(source.strip()) + eos_list + target_ints = token_vocab.encode(target.strip()) + eos_list yield {"inputs": source_ints, "targets": target_ints} source, target = source_file.readline(), target_file.readline() @@ -275,8 +268,7 @@ def parsing_character_generator(tmp_dir, train): filename = "parsing_%s" % ("train" if train else "dev") text_filepath = os.path.join(tmp_dir, filename + ".text") tags_filepath = os.path.join(tmp_dir, filename + ".tags") - return character_generator(text_filepath, tags_filepath, - character_vocab, EOS) + return character_generator(text_filepath, tags_filepath, character_vocab, EOS) def parsing_token_generator(tmp_dir, train, vocab_size): diff --git a/tensor2tensor/data_generators/wmt_test.py b/tensor2tensor/data_generators/wmt_test.py old mode 100644 new mode 100755 index 7121e3d8a..0366fdfb0 --- a/tensor2tensor/data_generators/wmt_test.py +++ b/tensor2tensor/data_generators/wmt_test.py @@ -26,6 +26,7 @@ import six from tensor2tensor.data_generators import wmt +from tensor2tensor.data_generators import text_encoder import tensorflow as tf @@ -36,31 +37,49 @@ def testCharacterGenerator(self): # Generate a trivial source and target file. tmp_dir = self.get_temp_dir() (_, tmp_file_path) = tempfile.mkstemp(dir=tmp_dir) + if six.PY2: + enc_f = lambda s: s + else: + enc_f = lambda s: s.encode('utf-8') with io.open(tmp_file_path + ".src", "wb") as src_file: - src_file.write("source1\n") - src_file.write("source2\n") + src_file.write(enc_f("source1\n")) + src_file.write(enc_f("source2\n")) with io.open(tmp_file_path + ".tgt", "wb") as tgt_file: - tgt_file.write("target1\n") - tgt_file.write("target2\n") + tgt_file.write(enc_f("target1\n")) + tgt_file.write(enc_f("target2\n")) # Call character generator on the generated files. results_src, results_tgt = [], [] + character_vocab = text_encoder.ByteTextEncoder() for dictionary in wmt.character_generator(tmp_file_path + ".src", - tmp_file_path + ".tgt"): + tmp_file_path + ".tgt", + character_vocab): self.assertEqual(sorted(list(dictionary)), ["inputs", "targets"]) results_src.append(dictionary["inputs"]) results_tgt.append(dictionary["targets"]) # Check that the results match the files. + # First check that the results match the encoded original strings; + # this is a comparison of integer arrays self.assertEqual(len(results_src), 2) - self.assertEqual("".join([six.int2byte(i) - for i in results_src[0]]), "source1") - self.assertEqual("".join([six.int2byte(i) - for i in results_src[1]]), "source2") - self.assertEqual("".join([six.int2byte(i) - for i in results_tgt[0]]), "target1") - self.assertEqual("".join([six.int2byte(i) - for i in results_tgt[1]]), "target2") + self.assertEqual(results_src[0], + character_vocab.encode("source1")) + self.assertEqual(results_src[1], + character_vocab.encode("source2")) + self.assertEqual(results_tgt[0], + character_vocab.encode("target1")) + self.assertEqual(results_tgt[1], + character_vocab.encode("target2")) + # Then decode the results and compare with the original strings; + # this is a comparison of strings + self.assertEqual(character_vocab.decode(results_src[0]), + "source1") + self.assertEqual(character_vocab.decode(results_src[1]), + "source2") + self.assertEqual(character_vocab.decode(results_tgt[0]), + "target1") + self.assertEqual(character_vocab.decode(results_tgt[1]), + "target2") # Clean up. os.remove(tmp_file_path + ".src") From 0e84cc900333a9ebcfc487defbbcafad4ef3c0d5 Mon Sep 17 00:00:00 2001 From: vthorsteinsson Date: Mon, 26 Jun 2017 00:59:23 +0000 Subject: [PATCH 0034/4095] Changed parameter name to character_vocab --- tensor2tensor/data_generators/wmt.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tensor2tensor/data_generators/wmt.py b/tensor2tensor/data_generators/wmt.py index 3feca69d3..c525e4ec0 100755 --- a/tensor2tensor/data_generators/wmt.py +++ b/tensor2tensor/data_generators/wmt.py @@ -33,7 +33,7 @@ EOS = 1 -def character_generator(source_path, target_path, token_vocab, eos=None): +def character_generator(source_path, target_path, character_vocab, eos=None): """Generator for sequence-to-sequence tasks that just uses characters. This generator assumes the files at source_path and target_path have @@ -55,8 +55,8 @@ def character_generator(source_path, target_path, token_vocab, eos=None): with tf.gfile.GFile(target_path, mode="r") as target_file: source, target = source_file.readline(), target_file.readline() while source and target: - source_ints = token_vocab.encode(source.strip()) + eos_list - target_ints = token_vocab.encode(target.strip()) + eos_list + source_ints = character_vocab.encode(source.strip()) + eos_list + target_ints = character_vocab.encode(target.strip()) + eos_list yield {"inputs": source_ints, "targets": target_ints} source, target = source_file.readline(), target_file.readline() From 685a37f0b3236c7a3d38cd8f1821f02d5580c0cc Mon Sep 17 00:00:00 2001 From: Igor Pechersky Date: Mon, 26 Jun 2017 06:48:04 +0000 Subject: [PATCH 0035/4095] due to https://github.com/tensorflow/tensor2tensor/issues/47 --- tensor2tensor/data_generators/algorithmic_math.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/tensor2tensor/data_generators/algorithmic_math.py b/tensor2tensor/data_generators/algorithmic_math.py index 932c080e1..452fc637a 100644 --- a/tensor2tensor/data_generators/algorithmic_math.py +++ b/tensor2tensor/data_generators/algorithmic_math.py @@ -570,11 +570,16 @@ def calculus_integrate(alphabet_size=26, functions = {"log": "L"} alg_cfg = math_dataset_init(alphabet_size, digits=5, functions=functions) - for _ in xrange(nbr_cases): - sample, target = generate_calculus_integrate_sample( + nbr_case=0 + while nbr_case < nbr_cases: + try: + sample, target = generate_calculus_integrate_sample( alg_cfg.vlist, list(alg_cfg.ops.values()), min_depth, max_depth, alg_cfg.functions) - yield { + yield { "inputs": alg_cfg.int_encoder(sample), "targets": alg_cfg.int_encoder(target) - } + } + except: + continue + nbr_case = nbr_case + 1 From 8595cc68f2b05142c7f0fb7f6c5d0eea04e24a86 Mon Sep 17 00:00:00 2001 From: vthorsteinsson Date: Mon, 26 Jun 2017 23:35:39 +0000 Subject: [PATCH 0036/4095] Tokenizer and subword token handling in Unicode --- tensor2tensor/data_generators/text_encoder.py | 128 ++++++++++-------- tensor2tensor/data_generators/tokenizer.py | 44 ++++-- 2 files changed, 100 insertions(+), 72 deletions(-) mode change 100644 => 100755 tensor2tensor/data_generators/text_encoder.py mode change 100644 => 100755 tensor2tensor/data_generators/tokenizer.py diff --git a/tensor2tensor/data_generators/text_encoder.py b/tensor2tensor/data_generators/text_encoder.py old mode 100644 new mode 100755 index a219a6b8d..35d33d359 --- a/tensor2tensor/data_generators/text_encoder.py +++ b/tensor2tensor/data_generators/text_encoder.py @@ -234,14 +234,13 @@ def _subtokens_to_tokens(self, subtokens): def subtoken_to_subtoken_string(self, subtoken): """Subtoken_String (string) corresponding to the given subtoken (id).""" - if (subtoken >= 0 and subtoken < self.vocab_size and - self._all_subtoken_strings[subtoken]): - return self._all_subtoken_strings[subtoken] - else: - if 0 <= subtoken < self._num_reserved_ids: - return '%s_' % RESERVED_TOKENS[subtoken] - else: - return 'ID%d_' % subtoken + if 0 <= subtoken < self.vocab_size: + subtoken_string = self._all_subtoken_strings[subtoken] + if subtoken_string: + return subtoken_string + if 0 <= subtoken < self._num_reserved_ids: + return '%s_' % RESERVED_TOKENS[subtoken] + return 'ID%d_' % subtoken def _escaped_token_to_subtokens(self, escaped_token): """Converts an escaped token string to a list of subtokens. @@ -265,9 +264,16 @@ def _escaped_token_to_subtokens(self, escaped_token): if end > pos: pos = end else: - # This kinda should not happen, but it does. Cop out by skipping the - # nonexistent subtoken from the returned list. - # print("Unable to find subtoken in string '{0}'".format(escaped_token)) + if subtoken == -1: + # No subtoken found: warn once for every 10000 occurrences + tf.logging.log_every_n(tf.logging.WARN, + "Subtoken not found within escaped token '%s'", + 10000, escaped_token) + self.dump() + print("Already found: {0}".format(ret)) + print("pos is {0}".format(pos)) + print("Can't match from '{0}'".format(escaped_token[pos:])) + # Ensure that the outer loop continues pos += 1 return ret @@ -303,24 +309,19 @@ def build_to_target_size(cls, if min_val >= max_val or subtokenizer.vocab_size == target_size: return subtokenizer - elif subtokenizer.vocab_size > target_size: + if subtokenizer.vocab_size > target_size: other_subtokenizer = cls.build_to_target_size( target_size, token_counts, store_filename, present_count + 1, max_val, num_iterations) - if (abs(other_subtokenizer.vocab_size - target_size) < - abs(subtokenizer.vocab_size - target_size)): - return other_subtokenizer - else: - return subtokenizer else: other_subtokenizer = cls.build_to_target_size( target_size, token_counts, store_filename, min_val, present_count - 1, num_iterations) - if (abs(other_subtokenizer.vocab_size - target_size) < - abs(subtokenizer.vocab_size - target_size)): - return other_subtokenizer - else: - return subtokenizer + if (abs(other_subtokenizer.vocab_size - target_size) < + abs(subtokenizer.vocab_size - target_size)): + return other_subtokenizer + else: + return subtokenizer def build_from_token_counts(self, token_counts, @@ -339,6 +340,7 @@ def build_from_token_counts(self, # then count the resulting potential subtokens, keeping the ones # with high enough counts for our new vocabulary. for i in xrange(num_iterations): + print("Iteration {0}".format(i)) counts = defaultdict(int) for token, count in six.iteritems(token_counts): escaped_token = self._escape_token(token) @@ -352,39 +354,63 @@ def build_from_token_counts(self, starts = [] for subtoken in subtokens: starts.append(pos) - pos += len(self.subtoken_to_subtoken_string(subtoken)) + pos += len(self._all_subtoken_strings[subtoken]) + if escaped_token == u"SubwordTextEncoder_": + for start, subtoken in zip(starts, subtokens): + print("Start {0}, fragment '{1}'".format(start, self._all_subtoken_strings[subtoken])) + # !!! There is a subtle bug here: if we are adding a subtoken of >= 2 characters here, + # !!! and it gets dropped later because it doesn't make the minimum count cut, + # !!! there is no guarantee that its individual characters have been added as + # !!! subtokens. Which then means that they might be missing in the end and that + # !!! the token is thus not representable as a sequence of subtokens. + # !!! Note that this can only happen in iterations after the first one, + # !!! since the first iteration automatically adds all characters at some point. + # !!! Subsequent iterations however do not have the same guarantee. for start in starts: for end in xrange(start + 1, len(escaped_token)): subtoken_string = escaped_token[start:end] counts[subtoken_string] += count - # array of lists of candidate subtoken strings, by length + # array of sets of candidate subtoken strings, by length len_to_subtoken_strings = [] for subtoken_string, count in six.iteritems(counts): lsub = len(subtoken_string) + assert lsub >= 1 # all subtoken strings of length 1 are included regardless of count if count < min_count and lsub != 1: continue while len(len_to_subtoken_strings) <= lsub: - len_to_subtoken_strings.append([]) - len_to_subtoken_strings[lsub].append(subtoken_string) + len_to_subtoken_strings.append(set()) + len_to_subtoken_strings[lsub].add(subtoken_string) new_subtoken_strings = [] # consider the candidates longest to shortest, so that if we accept # a longer subtoken string, we can decrement the counts of its prefixes. - for subtoken_strings in len_to_subtoken_strings[::-1]: + # First, look at all subtoken strings >= 2 characters long + for subtoken_strings in reversed(len_to_subtoken_strings[2:]): for subtoken_string in subtoken_strings: count = counts[subtoken_string] - if count < min_count and len(subtoken_string) != 1: - # subtoken strings of length 1 are included regardless of count + if count < min_count: continue - new_subtoken_strings.append((-count, subtoken_string)) + new_subtoken_strings.append((count, subtoken_string)) for l in xrange(1, len(subtoken_string)): counts[subtoken_string[:l]] -= count + # Sort what we've got so far in decreasing order by count + new_subtoken_strings.sort(reverse = True) + # Add the single-character subtokens at the end of the list, + # if their final count is nonzero + for subtoken_string in len_to_subtoken_strings[1]: + count = counts[subtoken_string] + if count: + new_subtoken_strings.append((0, subtoken_string)) + else: + print(u"Cutting single-char subtoken '{0}'".format(subtoken_string)) # Make sure to include the underscore as a subtoken string - new_subtoken_strings.append((0, '_')) - new_subtoken_strings.sort() - self._init_from_list([''] * self._num_reserved_ids + + assert u'_' not in len_to_subtoken_strings[1] # Should not already be there + new_subtoken_strings.append((0, u'_')) + # Now we have a candidate vocabulary + self._init_from_list([u''] * self._num_reserved_ids + [p[1] for p in new_subtoken_strings]) - print('vocab_size = %d' % self.vocab_size) + tf.logging.info('vocab_size = %d' % self.vocab_size) + self.dump() original = 'This sentence was encoded by the SubwordTextEncoder.' encoded = self.encode(original) @@ -393,16 +419,17 @@ def build_from_token_counts(self, decoded = self.decode(encoded) print(decoded) assert decoded == original - self._store_to_file(store_filename) + if store_filename is not None: + self._store_to_file(store_filename) + + def dump(self): + subtoken_strings = [(i, s) for s, i in self._subtoken_string_to_id.iteritems()] + print(u", ".join(u"{0} : '{1}'".format(i, s) for i, s in sorted(subtoken_strings))) def _init_from_list(self, subtoken_strings): """Initialize from a list of subtoken strings.""" self._all_subtoken_strings = subtoken_strings - self._subtoken_string_to_id = {} - for i in xrange(len(subtoken_strings)): - subtoken_string = subtoken_strings[i] - if subtoken_string: - self._subtoken_string_to_id[subtoken_string] = i + self._subtoken_string_to_id = { s : i for i, s in enumerate(subtoken_strings) if s } def _load_from_file(self, filename): """Load from a file.""" @@ -410,7 +437,7 @@ def _load_from_file(self, filename): with tf.gfile.Open(filename) as f: for line in f: if six.PY2: - subtoken_strings.append(line.strip()[1:-1].decode('string-escape')) + subtoken_strings.append(line.strip()[1:-1].decode('utf-8')) else: subtoken_strings.append(line.strip()[1:-1]) self._init_from_list(subtoken_strings) @@ -419,7 +446,7 @@ def _store_to_file(self, filename): with tf.gfile.Open(filename, 'w') as f: for subtoken_string in self._all_subtoken_strings: if six.PY2: - f.write('\'' + subtoken_string.encode('string-escape') + '\'\n') + f.write('\'' + subtoken_string.encode('utf-8') + '\'\n') else: f.write('\'' + subtoken_string + '\'\n') @@ -436,28 +463,13 @@ def _escape_token(self, token): def _unescape_token(self, escaped_token): r"""Remove '_' from end, then translate '\\'->'\' and '\u'->'_'. - TODO(noam): There must be some better way to do this with regexps. - Args: escaped_token: a string Returns: token: a string """ assert escaped_token[-1] == '_' - escaped_token = escaped_token[:-1] - if '\\' not in escaped_token: - return escaped_token - ret = '' - pos = 0 - while pos < len(escaped_token): - if escaped_token[pos] == '\\' and pos + 1 < len(escaped_token): - if escaped_token[pos + 1] == 'u': - ret += '_' - else: - ret += escaped_token[pos + 1] - pos += 1 - pos += 1 - return ret + return escaped_token[:-1].replace('\\u', '_').replace('\\\\', '\\') @classmethod def get_token_counts(cls, text_filepattern, corpus_max_lines): diff --git a/tensor2tensor/data_generators/tokenizer.py b/tensor2tensor/data_generators/tokenizer.py old mode 100644 new mode 100755 index 3564aee2e..b30ecf541 --- a/tensor2tensor/data_generators/tokenizer.py +++ b/tensor2tensor/data_generators/tokenizer.py @@ -47,17 +47,31 @@ from collections import defaultdict import string +import unicodedata +import sys +import re # Dependency imports +from six import PY2 from six.moves import xrange # pylint: disable=redefined-builtin +# Regular expression that matches Unicode whitespace characters +# (including ASCII whitespace) as defined in the Python run-time library +_RE_WHITESPACE = re.compile(r"^\s$", re.UNICODE) + + class Tokenizer(object): - """Vocab for breaking words into wordpieces. + """Vocab for breaking words into Unicode wordpieces. """ - _SEPARATOR_CHAR_SET = set(string.punctuation + string.whitespace) + _UNICODE_PUNCTUATION = set(unichr(i) for i in xrange(sys.maxunicode) + if unicodedata.category(unichr(i)).startswith('P')) + _UNICODE_WHITESPACE = set(unichr(i) for i in xrange(sys.maxunicode) + if _RE_WHITESPACE.match(unichr(i))) + #_SEPARATOR_CHAR_SET = set(string.punctuation + string.whitespace) + _SEPARATOR_CHAR_SET = _UNICODE_WHITESPACE | _UNICODE_PUNCTUATION def __init__(self): self.token_counts = defaultdict(int) @@ -66,19 +80,21 @@ def encode(self, raw_text): """Encode a raw string as a list of tokens. Args: - raw_text: a string + raw_text: a (Python2 or Python3 native) string Returns: - a list of stirngs. + a list of Unicode strings """ if not raw_text: return [] ret = [] token_start = 0 + if PY2: + raw_text = raw_text.decode('utf-8') # Convert to Unicode + is_sep = [self._is_separator_char(c) for c in raw_text] for pos in xrange(1, len(raw_text)): - if (self._is_separator_char(raw_text[pos]) != - self._is_separator_char(raw_text[pos - 1])): + if (is_sep[pos] != is_sep[pos-1]): token = raw_text[token_start:pos] - if token != " " or token_start == 0: + if token != u" " or token_start == 0: ret.append(token) self.token_counts[token] += 1 token_start = pos @@ -91,17 +107,17 @@ def decode(self, tokens): """Decode a list of tokens to a string. Args: - tokens: a list of stirngs + tokens: a list of Unicode strings Returns: - a string. + a (Python2 or Python3 native) string """ - ret = "" + ret = u"" + is_word = [self._is_word_char(t[0]) for t in tokens] for i, token in enumerate(tokens): - if (i > 0 and self._is_word_char(tokens[i - 1][0]) and - self._is_word_char(token[0])): - ret += " " + if i > 0 and is_word[i - 1] and is_word[i]: + ret += u" " ret += token - return ret + return ret.encode('utf-8') if PY2 else ret def _is_separator_char(self, c): return c in self._SEPARATOR_CHAR_SET From b9f4754e393b790ddef41bb95ed180ac57e1df6d Mon Sep 17 00:00:00 2001 From: Ryan Sepassi Date: Mon, 26 Jun 2017 11:41:23 -0700 Subject: [PATCH 0037/4095] Fix open-source distributed training PiperOrigin-RevId: 160172257 --- .gitignore | 2 +- README.md | 2 +- tensor2tensor/bin/make_tf_configs.py | 3 +- .../data_generators/algorithmic_math.py | 13 ++---- .../data_generators/generator_utils.py | 3 +- tensor2tensor/data_generators/wmt.py | 27 ++++------- tensor2tensor/data_generators/wmt_test.py | 45 ++++++------------- tensor2tensor/docs/distributed_training.md | 13 +++++- tensor2tensor/utils/t2t_model.py | 4 +- tensor2tensor/utils/trainer_utils.py | 4 +- 10 files changed, 47 insertions(+), 69 deletions(-) mode change 100755 => 100644 tensor2tensor/data_generators/generator_utils.py mode change 100755 => 100644 tensor2tensor/data_generators/wmt.py mode change 100755 => 100644 tensor2tensor/data_generators/wmt_test.py diff --git a/.gitignore b/.gitignore index e610f29ba..dd84837dd 100644 --- a/.gitignore +++ b/.gitignore @@ -4,6 +4,6 @@ # Python egg metadata, regenerated from source files by setuptools. /*.egg-info -# PyPI distribution artifacts +# PyPI distribution artificats build/ dist/ diff --git a/README.md b/README.md index 6932dab3a..f49fb20df 100644 --- a/README.md +++ b/README.md @@ -19,7 +19,7 @@ it](https://research.googleblog.com/2017/06/accelerating-deep-learning-research. We're eager to collaborate with you on extending T2T, so please feel free to [open an issue on GitHub](https://github.com/tensorflow/tensor2tensor/issues) or -send along a pull request to add your dataset or model. +send along a pull request to add your data-set or model. See [our contribution doc](CONTRIBUTING.md) for details and our [open issues](https://github.com/tensorflow/tensor2tensor/issues). diff --git a/tensor2tensor/bin/make_tf_configs.py b/tensor2tensor/bin/make_tf_configs.py index 791589939..8b9367ca6 100644 --- a/tensor2tensor/bin/make_tf_configs.py +++ b/tensor2tensor/bin/make_tf_configs.py @@ -55,7 +55,7 @@ def main(_): for idx, job in enumerate(jobs): if task_type == "worker": cmd_line_flags = " ".join([ - "--master=%s" % job, + "--master=grpc://%s" % job, "--ps_replicas=%d" % len(ps), "--worker_replicas=%d" % len(workers), "--worker_gpu=1", @@ -66,6 +66,7 @@ def main(_): ]) else: cmd_line_flags = " ".join([ + "--master=grpc://%s" % job, "--schedule=run_std_server", ]) diff --git a/tensor2tensor/data_generators/algorithmic_math.py b/tensor2tensor/data_generators/algorithmic_math.py index 452fc637a..932c080e1 100644 --- a/tensor2tensor/data_generators/algorithmic_math.py +++ b/tensor2tensor/data_generators/algorithmic_math.py @@ -570,16 +570,11 @@ def calculus_integrate(alphabet_size=26, functions = {"log": "L"} alg_cfg = math_dataset_init(alphabet_size, digits=5, functions=functions) - nbr_case=0 - while nbr_case < nbr_cases: - try: - sample, target = generate_calculus_integrate_sample( + for _ in xrange(nbr_cases): + sample, target = generate_calculus_integrate_sample( alg_cfg.vlist, list(alg_cfg.ops.values()), min_depth, max_depth, alg_cfg.functions) - yield { + yield { "inputs": alg_cfg.int_encoder(sample), "targets": alg_cfg.int_encoder(target) - } - except: - continue - nbr_case = nbr_case + 1 + } diff --git a/tensor2tensor/data_generators/generator_utils.py b/tensor2tensor/data_generators/generator_utils.py old mode 100755 new mode 100644 index c50d19afa..75d319cd8 --- a/tensor2tensor/data_generators/generator_utils.py +++ b/tensor2tensor/data_generators/generator_utils.py @@ -22,12 +22,13 @@ import io import os import tarfile +import urllib # Dependency imports import six from six.moves import xrange # pylint: disable=redefined-builtin -import six.moves.urllib_request as urllib # Imports urllib on Python2, urllib.request on Python3 +import six.moves.urllib_request from tensor2tensor.data_generators.text_encoder import SubwordTextEncoder from tensor2tensor.data_generators.tokenizer import Tokenizer diff --git a/tensor2tensor/data_generators/wmt.py b/tensor2tensor/data_generators/wmt.py old mode 100755 new mode 100644 index c525e4ec0..0be28ab73 --- a/tensor2tensor/data_generators/wmt.py +++ b/tensor2tensor/data_generators/wmt.py @@ -28,12 +28,8 @@ import tensorflow as tf -# End-of-sentence marker (should correspond to the position of EOS in the -# RESERVED_TOKENS list in text_encoder.py) -EOS = 1 - -def character_generator(source_path, target_path, character_vocab, eos=None): +def character_generator(source_path, target_path, eos=None): """Generator for sequence-to-sequence tasks that just uses characters. This generator assumes the files at source_path and target_path have @@ -55,8 +51,8 @@ def character_generator(source_path, target_path, character_vocab, eos=None): with tf.gfile.GFile(target_path, mode="r") as target_file: source, target = source_file.readline(), target_file.readline() while source and target: - source_ints = character_vocab.encode(source.strip()) + eos_list - target_ints = character_vocab.encode(target.strip()) + eos_list + source_ints = [ord(c) for c in source.strip()] + eos_list + target_ints = [ord(c) for c in target.strip()] + eos_list yield {"inputs": source_ints, "targets": target_ints} source, target = source_file.readline(), target_file.readline() @@ -230,16 +226,14 @@ def ende_wordpiece_token_generator(tmp_dir, train, vocab_size): tag = "train" if train else "dev" data_path = _compile_data(tmp_dir, datasets, "wmt_ende_tok_%s" % tag) return token_generator(data_path + ".lang1", data_path + ".lang2", - symbolizer_vocab, EOS) + symbolizer_vocab, 1) def ende_character_generator(tmp_dir, train): - character_vocab = text_encoder.ByteTextEncoder() datasets = _ENDE_TRAIN_DATASETS if train else _ENDE_TEST_DATASETS tag = "train" if train else "dev" data_path = _compile_data(tmp_dir, datasets, "wmt_ende_chr_%s" % tag) - return character_generator(data_path + ".lang1", data_path + ".lang2", - character_vocab, EOS) + return character_generator(data_path + ".lang1", data_path + ".lang2", 1) def enfr_wordpiece_token_generator(tmp_dir, train, vocab_size): @@ -250,25 +244,22 @@ def enfr_wordpiece_token_generator(tmp_dir, train, vocab_size): tag = "train" if train else "dev" data_path = _compile_data(tmp_dir, datasets, "wmt_enfr_tok_%s" % tag) return token_generator(data_path + ".lang1", data_path + ".lang2", - symbolizer_vocab, EOS) + symbolizer_vocab, 1) def enfr_character_generator(tmp_dir, train): """Instance of character generator for the WMT en->fr task.""" - character_vocab = text_encoder.ByteTextEncoder() datasets = _ENFR_TRAIN_DATASETS if train else _ENFR_TEST_DATASETS tag = "train" if train else "dev" data_path = _compile_data(tmp_dir, datasets, "wmt_enfr_chr_%s" % tag) - return character_generator(data_path + ".lang1", data_path + ".lang2", - character_vocab, EOS) + return character_generator(data_path + ".lang1", data_path + ".lang2", 1) def parsing_character_generator(tmp_dir, train): - character_vocab = text_encoder.ByteTextEncoder() filename = "parsing_%s" % ("train" if train else "dev") text_filepath = os.path.join(tmp_dir, filename + ".text") tags_filepath = os.path.join(tmp_dir, filename + ".tags") - return character_generator(text_filepath, tags_filepath, character_vocab, EOS) + return character_generator(text_filepath, tags_filepath, 1) def parsing_token_generator(tmp_dir, train, vocab_size): @@ -277,4 +268,4 @@ def parsing_token_generator(tmp_dir, train, vocab_size): filename = "parsing_%s" % ("train" if train else "dev") text_filepath = os.path.join(tmp_dir, filename + ".text") tags_filepath = os.path.join(tmp_dir, filename + ".tags") - return token_generator(text_filepath, tags_filepath, symbolizer_vocab, EOS) + return token_generator(text_filepath, tags_filepath, symbolizer_vocab, 1) diff --git a/tensor2tensor/data_generators/wmt_test.py b/tensor2tensor/data_generators/wmt_test.py old mode 100755 new mode 100644 index 0366fdfb0..7121e3d8a --- a/tensor2tensor/data_generators/wmt_test.py +++ b/tensor2tensor/data_generators/wmt_test.py @@ -26,7 +26,6 @@ import six from tensor2tensor.data_generators import wmt -from tensor2tensor.data_generators import text_encoder import tensorflow as tf @@ -37,49 +36,31 @@ def testCharacterGenerator(self): # Generate a trivial source and target file. tmp_dir = self.get_temp_dir() (_, tmp_file_path) = tempfile.mkstemp(dir=tmp_dir) - if six.PY2: - enc_f = lambda s: s - else: - enc_f = lambda s: s.encode('utf-8') with io.open(tmp_file_path + ".src", "wb") as src_file: - src_file.write(enc_f("source1\n")) - src_file.write(enc_f("source2\n")) + src_file.write("source1\n") + src_file.write("source2\n") with io.open(tmp_file_path + ".tgt", "wb") as tgt_file: - tgt_file.write(enc_f("target1\n")) - tgt_file.write(enc_f("target2\n")) + tgt_file.write("target1\n") + tgt_file.write("target2\n") # Call character generator on the generated files. results_src, results_tgt = [], [] - character_vocab = text_encoder.ByteTextEncoder() for dictionary in wmt.character_generator(tmp_file_path + ".src", - tmp_file_path + ".tgt", - character_vocab): + tmp_file_path + ".tgt"): self.assertEqual(sorted(list(dictionary)), ["inputs", "targets"]) results_src.append(dictionary["inputs"]) results_tgt.append(dictionary["targets"]) # Check that the results match the files. - # First check that the results match the encoded original strings; - # this is a comparison of integer arrays self.assertEqual(len(results_src), 2) - self.assertEqual(results_src[0], - character_vocab.encode("source1")) - self.assertEqual(results_src[1], - character_vocab.encode("source2")) - self.assertEqual(results_tgt[0], - character_vocab.encode("target1")) - self.assertEqual(results_tgt[1], - character_vocab.encode("target2")) - # Then decode the results and compare with the original strings; - # this is a comparison of strings - self.assertEqual(character_vocab.decode(results_src[0]), - "source1") - self.assertEqual(character_vocab.decode(results_src[1]), - "source2") - self.assertEqual(character_vocab.decode(results_tgt[0]), - "target1") - self.assertEqual(character_vocab.decode(results_tgt[1]), - "target2") + self.assertEqual("".join([six.int2byte(i) + for i in results_src[0]]), "source1") + self.assertEqual("".join([six.int2byte(i) + for i in results_src[1]]), "source2") + self.assertEqual("".join([six.int2byte(i) + for i in results_tgt[0]]), "target1") + self.assertEqual("".join([six.int2byte(i) + for i in results_tgt[1]]), "target2") # Clean up. os.remove(tmp_file_path + ".src") diff --git a/tensor2tensor/docs/distributed_training.md b/tensor2tensor/docs/distributed_training.md index be3726f06..e7ddd7294 100644 --- a/tensor2tensor/docs/distributed_training.md +++ b/tensor2tensor/docs/distributed_training.md @@ -35,7 +35,7 @@ os.environ['TF_CONFIG'] = json.dumps({ The following T2T command-line flags must also be set on the workers for distributed training: -- `--master=$ADDRESS` +- `--master=grpc://$ADDRESS` - `--worker_replicas=$NUM_WORKERS` - `--worker_gpu=$NUM_GPUS_PER_WORKER` - `--worker_id=$WORKER_ID` @@ -55,6 +55,17 @@ Parameter servers only need `--schedule=run_std_server`. generates the `TF_CONFIG` json strings and the above-mentioned command-line flags for the workers and parameter servers. +Given a set of worker and parameter server addresses, the script outputs, for +each job, a line with the `TF_CONFIG` environment variable and the command-line +flags necessary for distributed training. For each job, you should invoke the +`t2t-trainer` with the `TF_CONFIG` value and flags that are output. + +For example: + +``` +TF_CONFIG=$JOB_TF_CONFIG t2t-trainer $JOB_FLAGS --model=transformer ... +``` + ## Command-line flags for eval jobs Eval jobs should set the following flags and do not need the `TF_CONFIG` diff --git a/tensor2tensor/utils/t2t_model.py b/tensor2tensor/utils/t2t_model.py index 5ebb74280..8d9117694 100644 --- a/tensor2tensor/utils/t2t_model.py +++ b/tensor2tensor/utils/t2t_model.py @@ -276,8 +276,8 @@ def infer_step(recent_output, _): # input shape, so we confuse it about the input shape. initial_output = tf.slice(initial_output, [0, 0, 0, 0], tf.shape(initial_output)) - if (self._hparams.problems[self._problem_idx].target_modality is - registry.Modalities.CLASS_LABEL): + if isinstance(self._hparams.problems[self._problem_idx].target_modality, + modality.ClassLabelModality): decode_length = 1 else: decode_length = tf.shape(features["inputs"])[1] + decode_length diff --git a/tensor2tensor/utils/trainer_utils.py b/tensor2tensor/utils/trainer_utils.py index d901b4241..69accdc44 100644 --- a/tensor2tensor/utils/trainer_utils.py +++ b/tensor2tensor/utils/trainer_utils.py @@ -642,9 +642,7 @@ def _save_until_eos(hyp): # pylint: disable=missing-docstring decodes = [] for _ in range(num_decode_batches): - result_iter = estimator.predict( - input_fn=input_fn.next if six.PY2 else input_fn.__next__, - as_iterable=True) + result_iter = estimator.predict(input_fn=input_fn.next, as_iterable=True) for result in result_iter: def log_fn(inputs, outputs): From cb4d6769121b7149b2fb0066d9127421d349cd6e Mon Sep 17 00:00:00 2001 From: Lukasz Kaiser Date: Mon, 26 Jun 2017 11:43:09 -0700 Subject: [PATCH 0038/4095] internal merge from github PiperOrigin-RevId: 160172524 --- README.md | 2 +- tensor2tensor/data_generators/generator_utils.py | 3 +-- tensor2tensor/utils/t2t_model.py | 4 ++-- tensor2tensor/utils/trainer_utils.py | 4 +++- 4 files changed, 7 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index f49fb20df..6932dab3a 100644 --- a/README.md +++ b/README.md @@ -19,7 +19,7 @@ it](https://research.googleblog.com/2017/06/accelerating-deep-learning-research. We're eager to collaborate with you on extending T2T, so please feel free to [open an issue on GitHub](https://github.com/tensorflow/tensor2tensor/issues) or -send along a pull request to add your data-set or model. +send along a pull request to add your dataset or model. See [our contribution doc](CONTRIBUTING.md) for details and our [open issues](https://github.com/tensorflow/tensor2tensor/issues). diff --git a/tensor2tensor/data_generators/generator_utils.py b/tensor2tensor/data_generators/generator_utils.py index 75d319cd8..fb85d99c3 100644 --- a/tensor2tensor/data_generators/generator_utils.py +++ b/tensor2tensor/data_generators/generator_utils.py @@ -22,13 +22,12 @@ import io import os import tarfile -import urllib # Dependency imports import six from six.moves import xrange # pylint: disable=redefined-builtin -import six.moves.urllib_request +import six.moves.urllib_request as urllib # Imports urllib on Python2, urllib.request on Python3 from tensor2tensor.data_generators.text_encoder import SubwordTextEncoder from tensor2tensor.data_generators.tokenizer import Tokenizer diff --git a/tensor2tensor/utils/t2t_model.py b/tensor2tensor/utils/t2t_model.py index 8d9117694..5ebb74280 100644 --- a/tensor2tensor/utils/t2t_model.py +++ b/tensor2tensor/utils/t2t_model.py @@ -276,8 +276,8 @@ def infer_step(recent_output, _): # input shape, so we confuse it about the input shape. initial_output = tf.slice(initial_output, [0, 0, 0, 0], tf.shape(initial_output)) - if isinstance(self._hparams.problems[self._problem_idx].target_modality, - modality.ClassLabelModality): + if (self._hparams.problems[self._problem_idx].target_modality is + registry.Modalities.CLASS_LABEL): decode_length = 1 else: decode_length = tf.shape(features["inputs"])[1] + decode_length diff --git a/tensor2tensor/utils/trainer_utils.py b/tensor2tensor/utils/trainer_utils.py index 69accdc44..6055fd682 100644 --- a/tensor2tensor/utils/trainer_utils.py +++ b/tensor2tensor/utils/trainer_utils.py @@ -642,7 +642,9 @@ def _save_until_eos(hyp): # pylint: disable=missing-docstring decodes = [] for _ in range(num_decode_batches): - result_iter = estimator.predict(input_fn=input_fn.next, as_iterable=True) + result_iter = estimator.predict( + input_fn=input_fn.next if six.PY2 else input_fn.__next__, + as_iterable=True) for result in result_iter: def log_fn(inputs, outputs): From 14dafcd0c6b097b566076028b799eafc48adccd8 Mon Sep 17 00:00:00 2001 From: Noam Shazeer Date: Mon, 26 Jun 2017 15:28:35 -0700 Subject: [PATCH 0039/4095] Make execution mode available to models and modalities by putting it in the PiperOrigin-RevId: 160204189 --- tensor2tensor/models/attention_lm.py | 14 +++----- tensor2tensor/models/attention_lm_moe.py | 18 +++++----- tensor2tensor/models/bluenet.py | 43 ++++++++++++------------ tensor2tensor/models/bluenet_test.py | 5 +-- tensor2tensor/models/bytenet.py | 14 ++++---- tensor2tensor/models/bytenet_test.py | 5 +-- tensor2tensor/models/common_hparams.py | 2 ++ tensor2tensor/models/common_layers.py | 4 +-- tensor2tensor/models/lstm.py | 3 +- tensor2tensor/models/lstm_test.py | 5 +-- tensor2tensor/models/multimodel.py | 10 +++--- tensor2tensor/models/multimodel_test.py | 5 +-- tensor2tensor/models/neural_gpu.py | 16 ++++----- tensor2tensor/models/neural_gpu_test.py | 5 +-- tensor2tensor/models/slicenet.py | 33 ++++++++---------- tensor2tensor/models/slicenet_test.py | 5 +-- tensor2tensor/models/transformer.py | 10 +++--- tensor2tensor/models/transformer_test.py | 4 +-- tensor2tensor/models/xception.py | 12 +++---- tensor2tensor/models/xception_test.py | 5 +-- tensor2tensor/utils/t2t_model.py | 23 ++++++++----- tensor2tensor/utils/trainer_utils.py | 5 +-- 22 files changed, 123 insertions(+), 123 deletions(-) diff --git a/tensor2tensor/models/attention_lm.py b/tensor2tensor/models/attention_lm.py index 30b871640..99fbd8232 100644 --- a/tensor2tensor/models/attention_lm.py +++ b/tensor2tensor/models/attention_lm.py @@ -24,8 +24,6 @@ from __future__ import division from __future__ import print_function -import copy - # Dependency imports from six.moves import xrange # pylint: disable=redefined-builtin @@ -43,13 +41,9 @@ class AttentionLM(t2t_model.T2TModel): """Attention net. See file docstring.""" - def model_fn_body(self, features, train): + def model_fn_body(self, features): # Remove dropout if not training - hparams = copy.copy(self._hparams) - if not train: - hparams.attention_dropout = 0. - hparams.relu_dropout = 0. - hparams.residual_dropout = 0. + hparams = self._hparams targets = features["targets"] targets = tf.squeeze(targets, 2) @@ -162,8 +156,10 @@ def attention_lm_base(): hparams.add_hparam("num_heads", 8) hparams.add_hparam("attention_key_channels", 0) hparams.add_hparam("attention_value_channels", 0) + # All hyperparameters ending in "dropout" are automatically set to 0.0 + # when not in training mode. hparams.add_hparam("attention_dropout", 0.0) hparams.add_hparam("relu_dropout", 0.0) - hparams.add_hparam("pos", "timing") # timing, none hparams.add_hparam("residual_dropout", 0.1) + hparams.add_hparam("pos", "timing") # timing, none return hparams diff --git a/tensor2tensor/models/attention_lm_moe.py b/tensor2tensor/models/attention_lm_moe.py index 9cd0547f7..b4d27d400 100644 --- a/tensor2tensor/models/attention_lm_moe.py +++ b/tensor2tensor/models/attention_lm_moe.py @@ -24,8 +24,6 @@ from __future__ import division from __future__ import print_function -import copy - # Dependency imports from six.moves import xrange # pylint: disable=redefined-builtin @@ -43,13 +41,9 @@ class AttentionLmMoe(t2t_model.T2TModel): """Attention net. See file docstring.""" - def model_fn_body_sharded(self, sharded_features, train): + def model_fn_body_sharded(self, sharded_features): # Remove dropout if not training - hparams = copy.copy(self._hparams) - if not train: - hparams.attention_dropout = 0. - hparams.relu_dropout = 0. - hparams.residual_dropout = 0. + hparams = self._hparams dp = self._data_parallelism targets = sharded_features["targets"] targets = dp(tf.squeeze, targets, 2) @@ -81,7 +75,9 @@ def residual_fn(x, y): with tf.variable_scope("ffn"): if str(layer) in hparams.moe_layers.split(","): y, loss = common_layers.moe_layer( - dp, self._ps_devices, x, train, hparams.hidden_size, + dp, self._ps_devices, x, + hparams.mode == tf.contrib.learn.ModeKeys.TRAIN, + hparams.hidden_size, hparams.moe_hidden_size, hparams.moe_n1, hparams.moe_n2, hparams.moe_loss_coef) extra_loss += loss @@ -162,10 +158,12 @@ def attention_lm_moe_base(): hparams.add_hparam("num_heads", 8) hparams.add_hparam("attention_key_channels", 0) hparams.add_hparam("attention_value_channels", 0) + # All hyperparameters ending in "dropout" are automatically set to 0.0 + # when not in training mode. hparams.add_hparam("attention_dropout", 0.0) hparams.add_hparam("relu_dropout", 0.0) - hparams.add_hparam("pos", "timing") # timing, none hparams.add_hparam("residual_dropout", 0.1) + hparams.add_hparam("pos", "timing") # timing, none return hparams diff --git a/tensor2tensor/models/bluenet.py b/tensor2tensor/models/bluenet.py index bb7119a15..bbcf392aa 100644 --- a/tensor2tensor/models/bluenet.py +++ b/tensor2tensor/models/bluenet.py @@ -30,7 +30,7 @@ import tensorflow as tf -def residual_module(x, hparams, train, n, sep): +def residual_module(x, hparams, n, sep): """A stack of convolution blocks with residual connection.""" k = (hparams.kernel_height, hparams.kernel_width) dilations_and_kernels = [((1, 1), k) for _ in xrange(n)] @@ -43,56 +43,55 @@ def residual_module(x, hparams, train, n, sep): separability=sep, name="block") x = common_layers.layer_norm(x + y, hparams.hidden_size, name="lnorm") - return tf.nn.dropout(x, 1.0 - hparams.dropout * tf.to_float(train)) + return tf.nn.dropout(x, 1.0 - hparams.dropout) -def residual_module1(x, hparams, train): - return residual_module(x, hparams, train, 1, 1) +def residual_module1(x, hparams): + return residual_module(x, hparams, 1, 1) -def residual_module1_sep(x, hparams, train): - return residual_module(x, hparams, train, 1, 0) +def residual_module1_sep(x, hparams): + return residual_module(x, hparams, 1, 0) -def residual_module2(x, hparams, train): - return residual_module(x, hparams, train, 2, 1) +def residual_module2(x, hparams): + return residual_module(x, hparams, 2, 1) -def residual_module2_sep(x, hparams, train): - return residual_module(x, hparams, train, 2, 0) +def residual_module2_sep(x, hparams): + return residual_module(x, hparams, 2, 0) -def residual_module3(x, hparams, train): - return residual_module(x, hparams, train, 3, 1) +def residual_module3(x, hparams): + return residual_module(x, hparams, 3, 1) -def residual_module3_sep(x, hparams, train): - return residual_module(x, hparams, train, 3, 0) +def residual_module3_sep(x, hparams): + return residual_module(x, hparams, 3, 0) -def norm_module(x, hparams, train): - del train # Unused. +def norm_module(x, hparams): return common_layers.layer_norm(x, hparams.hidden_size, name="norm_module") -def identity_module(x, hparams, train): - del hparams, train # Unused. +def identity_module(x, hparams): + del hparams # Unused. return x -def run_modules(blocks, cur, hparams, train, dp): +def run_modules(blocks, cur, hparams, dp): """Run blocks in parallel using dp as data_parallelism.""" assert len(blocks) % dp.n == 0 res = [] for i in xrange(len(blocks) // dp.n): - res.extend(dp(blocks[i * dp.n:(i + 1) * dp.n], cur, hparams, train)) + res.extend(dp(blocks[i * dp.n:(i + 1) * dp.n], cur, hparams)) return res @registry.register_model class BlueNet(t2t_model.T2TModel): - def model_fn_body_sharded(self, sharded_features, train): + def model_fn_body_sharded(self, sharded_features): dp = self._data_parallelism dp._reuse = False # pylint:disable=protected-access hparams = self._hparams @@ -106,7 +105,7 @@ def model_fn_body_sharded(self, sharded_features, train): cur_shape = cur.get_shape() for i in xrange(hparams.num_hidden_layers): with tf.variable_scope("layer_%d" % i): - processed = run_modules(blocks, cur, hparams, train, dp) + processed = run_modules(blocks, cur, hparams, dp) cur = common_layers.shakeshake(processed) cur.set_shape(cur_shape) diff --git a/tensor2tensor/models/bluenet_test.py b/tensor2tensor/models/bluenet_test.py index 70996ab02..a325e5a55 100644 --- a/tensor2tensor/models/bluenet_test.py +++ b/tensor2tensor/models/bluenet_test.py @@ -42,8 +42,9 @@ def testBlueNet(self): "inputs": tf.constant(x, dtype=tf.int32), "targets": tf.constant(y, dtype=tf.int32), } - model = bluenet.BlueNet(hparams, p_hparams) - sharded_logits, _, _ = model.model_fn(features, True) + model = bluenet.BlueNet( + hparams, tf.contrib.learn.ModeKeys.TRAIN, p_hparams) + sharded_logits, _, _ = model.model_fn(features) logits = tf.concat(sharded_logits, 0) session.run(tf.global_variables_initializer()) res = session.run(logits) diff --git a/tensor2tensor/models/bytenet.py b/tensor2tensor/models/bytenet.py index 42db05700..1a82144d6 100644 --- a/tensor2tensor/models/bytenet.py +++ b/tensor2tensor/models/bytenet.py @@ -30,7 +30,7 @@ import tensorflow as tf -def residual_dilated_conv(x, repeat, padding, name, hparams, train): +def residual_dilated_conv(x, repeat, padding, name, hparams): """A stack of convolution blocks with residual connections.""" with tf.variable_scope(name): k = (hparams.kernel_height, hparams.kernel_width) @@ -45,11 +45,11 @@ def residual_dilated_conv(x, repeat, padding, name, hparams, train): padding=padding, name="residual_conv") x = common_layers.layer_norm(x + y, hparams.hidden_size, name="lnorm") - x = tf.nn.dropout(x, 1.0 - hparams.dropout * tf.to_float(train)) + x = tf.nn.dropout(x, hparams.dropout) return x -def bytenet_internal(inputs, targets, hparams, train): +def bytenet_internal(inputs, targets, hparams): """ByteNet, main step used for training.""" with tf.variable_scope("bytenet"): # Flatten inputs and extend length by 50%. @@ -63,7 +63,7 @@ def bytenet_internal(inputs, targets, hparams, train): inputs, targets = common_layers.pad_to_same_length( inputs, targets, final_length_divisible_by=50) final_encoder = residual_dilated_conv( - inputs, hparams.num_block_repeat, "SAME", "encoder", hparams, train) + inputs, hparams.num_block_repeat, "SAME", "encoder", hparams) shifted_targets = common_layers.shift_left(targets) kernel = (hparams.kernel_height, hparams.kernel_width) @@ -74,15 +74,15 @@ def bytenet_internal(inputs, targets, hparams, train): return residual_dilated_conv( decoder_start, hparams.num_block_repeat, - "LEFT", "decoder", hparams, train) + "LEFT", "decoder", hparams) @registry.register_model class ByteNet(t2t_model.T2TModel): - def model_fn_body(self, features, train): + def model_fn_body(self, features): return bytenet_internal(features["inputs"], features["targets"], - self._hparams, train) + self._hparams) @registry.register_hparams diff --git a/tensor2tensor/models/bytenet_test.py b/tensor2tensor/models/bytenet_test.py index 676220cc8..8202d5b74 100644 --- a/tensor2tensor/models/bytenet_test.py +++ b/tensor2tensor/models/bytenet_test.py @@ -42,8 +42,9 @@ def testByteNet(self): "inputs": tf.constant(x, dtype=tf.int32), "targets": tf.constant(y, dtype=tf.int32), } - model = bytenet.ByteNet(hparams, p_hparams) - sharded_logits, _, _ = model.model_fn(features, True) + model = bytenet.ByteNet( + hparams, tf.contrib.learn.ModeKeys.TRAIN, p_hparams) + sharded_logits, _, _ = model.model_fn(features) logits = tf.concat(sharded_logits, 0) session.run(tf.global_variables_initializer()) res = session.run(logits) diff --git a/tensor2tensor/models/common_hparams.py b/tensor2tensor/models/common_hparams.py index 689f407f5..41ca6f4b0 100644 --- a/tensor2tensor/models/common_hparams.py +++ b/tensor2tensor/models/common_hparams.py @@ -45,6 +45,8 @@ def basic_params1(): kernel_width=1, hidden_size=64, compress_steps=0, + # All hyperparameters ending in "dropout" are automatically set to 0.0 + # when not in training mode. dropout=0.2, clip_grad_norm=2.0, initializer="orthogonal", diff --git a/tensor2tensor/models/common_layers.py b/tensor2tensor/models/common_layers.py index f9d63a464..078fcc5a3 100644 --- a/tensor2tensor/models/common_layers.py +++ b/tensor2tensor/models/common_layers.py @@ -1211,7 +1211,6 @@ def conv_lstm(x, def diagonal_conv_gru(x, kernel_size, filters, - train, dropout=0.0, name=None, reuse=None): @@ -1234,8 +1233,7 @@ def do_conv(args, name, bias_start): gate, gate_cost = hard_sigmoid(do_conv(x, "gate", 0.7)) candidate = tf.tanh(do_conv(reset * x, "candidate", 0.0)) - # Dropout if training. - if dropout > 0.0 and train: + if dropout > 0.0: candidate = tf.nn.dropout(candidate, 1.0 - dropout) # Diagonal shift. diff --git a/tensor2tensor/models/lstm.py b/tensor2tensor/models/lstm.py index 78f79eed0..992c42db4 100644 --- a/tensor2tensor/models/lstm.py +++ b/tensor2tensor/models/lstm.py @@ -67,6 +67,7 @@ def lstm_seq2seq_internal(inputs, targets, hparams, train): @registry.register_model("baseline_lstm_seq2seq") class LSTMSeq2Seq(t2t_model.T2TModel): - def model_fn_body(self, features, train): + def model_fn_body(self, features): + train = self._hparams.mode == tf.contrib.learn.ModeKeys.TRAIN return lstm_seq2seq_internal(features["inputs"], features["targets"], self._hparams, train) diff --git a/tensor2tensor/models/lstm_test.py b/tensor2tensor/models/lstm_test.py index 33347cb84..e5bdb184b 100644 --- a/tensor2tensor/models/lstm_test.py +++ b/tensor2tensor/models/lstm_test.py @@ -43,8 +43,9 @@ def testLSTMSeq2Seq(self): "inputs": tf.constant(x, dtype=tf.int32), "targets": tf.constant(y, dtype=tf.int32), } - model = lstm.LSTMSeq2Seq(hparams, p_hparams) - sharded_logits, _, _ = model.model_fn(features, True) + model = lstm.LSTMSeq2Seq( + hparams, tf.contrib.learn.ModeKeys.TRAIN, p_hparams) + sharded_logits, _, _ = model.model_fn(features) logits = tf.concat(sharded_logits, 0) session.run(tf.global_variables_initializer()) res = session.run(logits) diff --git a/tensor2tensor/models/multimodel.py b/tensor2tensor/models/multimodel.py index 7247b791e..66a8491f2 100644 --- a/tensor2tensor/models/multimodel.py +++ b/tensor2tensor/models/multimodel.py @@ -70,7 +70,8 @@ def add_and_normalize(x, y): @registry.register_model class MultiModel(t2t_model.T2TModel): - def model_fn_body_sharded(self, sharded_features, train): + def model_fn_body_sharded(self, sharded_features): + train = self._hparams.mode == tf.contrib.learn.ModeKeys.TRAIN dp = self._data_parallelism hparams = self._hparams targets = sharded_features["targets"] @@ -86,7 +87,7 @@ def encode_half(inputs, inputs_mask, hparams): inputs = common_layers.add_timing_signal(inputs) return slicenet.multi_conv_res(inputs, "SAME", "encoder1", hparams.num_hidden_layers // 2, - hparams, train, mask=inputs_mask) + hparams, mask=inputs_mask) target_space_emb = dp(slicenet.embed_target_space, sharded_features["target_space_id"], @@ -101,7 +102,7 @@ def encode_half(inputs, inputs_mask, hparams): expert_loss *= hparams.moe_loss_coef inputs_encoded = dp( slicenet.multi_conv_res, inputs_encoded, "SAME", - "encoder2", hparams.num_hidden_layers, hparams, train, + "encoder2", hparams.num_hidden_layers, hparams, mask=inputs_mask) # If we're just predicing a class, there is no use for a decoder, return. @@ -112,7 +113,7 @@ def encode_half(inputs, inputs_mask, hparams): # Do the middle part. decoder_start, similarity_loss = dp( slicenet.slicenet_middle, inputs_encoded, targets, - target_space_emb, inputs_mask, hparams, train) + target_space_emb, inputs_mask, hparams) # Decode. decoder_half = dp( @@ -137,7 +138,6 @@ def encode_half(inputs, inputs_mask, hparams): "decoder2", hparams.num_hidden_layers // 2, hparams, - train, mask=inputs_mask, source=inputs_encoded) diff --git a/tensor2tensor/models/multimodel_test.py b/tensor2tensor/models/multimodel_test.py index 8df682c5c..72fe4a326 100644 --- a/tensor2tensor/models/multimodel_test.py +++ b/tensor2tensor/models/multimodel_test.py @@ -43,8 +43,9 @@ def testMultiModel(self): "targets": tf.constant(y, dtype=tf.int32), "target_space_id": tf.constant(1, dtype=tf.int32), } - model = multimodel.MultiModel(hparams, p_hparams) - sharded_logits, _, _ = model.model_fn(features, True) + model = multimodel.MultiModel( + hparams, tf.contrib.learn.ModeKeys.TRAIN, p_hparams) + sharded_logits, _, _ = model.model_fn(features) logits = tf.concat(sharded_logits, 0) session.run(tf.global_variables_initializer()) res = session.run(logits) diff --git a/tensor2tensor/models/neural_gpu.py b/tensor2tensor/models/neural_gpu.py index dbae77f43..dce0dbc30 100644 --- a/tensor2tensor/models/neural_gpu.py +++ b/tensor2tensor/models/neural_gpu.py @@ -30,12 +30,11 @@ import tensorflow as tf -def neural_gpu(inputs, hparams, train, name=None): +def neural_gpu(inputs, hparams, name=None): """The core Neural GPU.""" with tf.variable_scope(name, "neural_gpu"): - def step(state, inp): # pylint: disable=missing-docstring - x = tf.nn.dropout(state, 1.0 - hparams.dropout * tf.to_float(train)) + x = tf.nn.dropout(state, 1.0 - hparams.dropout) for layer in xrange(hparams.num_hidden_layers): x = common_layers.conv_gru( x, (hparams.kernel_height, hparams.kernel_width), @@ -57,11 +56,11 @@ def step(state, inp): # pylint: disable=missing-docstring @registry.register_model class NeuralGPU(t2t_model.T2TModel): - def model_fn_body(self, features, train): - return neural_gpu(features["inputs"], self._hparams, train) + def model_fn_body(self, features): + return neural_gpu(features["inputs"], self._hparams) -def diagonal_neural_gpu(inputs, hparams, train, name=None): +def diagonal_neural_gpu(inputs, hparams, name=None): """Improved Neural GPU as in https://arxiv.org/abs/1702.08727.""" with tf.variable_scope(name, "diagonal_neural_gpu"): @@ -73,7 +72,6 @@ def step(state_tup, inp): x, new_loss = common_layers.diagonal_conv_gru( x, (hparams.kernel_height, hparams.kernel_width), hparams.hidden_size, - train, dropout=hparams.dropout, name="dcgru_%d" % layer) # Padding input is zeroed-out in the modality, we check this by summing. @@ -93,8 +91,8 @@ def step(state_tup, inp): @registry.register_model class DiagonalNeuralGPU(t2t_model.T2TModel): - def model_fn_body(self, features, train): - return diagonal_neural_gpu(features["inputs"], self._hparams, train) + def model_fn_body(self, features): + return diagonal_neural_gpu(features["inputs"], self._hparams) @registry.register_hparams("neuralgpu_1") diff --git a/tensor2tensor/models/neural_gpu_test.py b/tensor2tensor/models/neural_gpu_test.py index 0d4937a5d..3065bb1c4 100644 --- a/tensor2tensor/models/neural_gpu_test.py +++ b/tensor2tensor/models/neural_gpu_test.py @@ -49,8 +49,9 @@ def testNeuralGPU(self): "inputs": tf.constant(inputs, dtype=tf.int32), "targets": tf.constant(targets, dtype=tf.int32) } - model = neural_gpu.NeuralGPU(hparams, p_hparams) - shadred_logits, _, _ = model.model_fn(features, True) + model = neural_gpu.NeuralGPU( + hparams, tf.contrib.learn.ModeKeys.TRAIN, p_hparams) + shadred_logits, _, _ = model.model_fn(features) logits = tf.concat(shadred_logits, 0) session.run(tf.global_variables_initializer()) res = session.run(logits) diff --git a/tensor2tensor/models/slicenet.py b/tensor2tensor/models/slicenet.py index eddf4cc96..0b9efc2c3 100644 --- a/tensor2tensor/models/slicenet.py +++ b/tensor2tensor/models/slicenet.py @@ -46,8 +46,7 @@ def get_norm(hparams): "'noam', 'none'.") -def attention(targets_shifted, inputs_encoded, norm_fn, hparams, train, - bias=None): +def attention(targets_shifted, inputs_encoded, norm_fn, hparams, bias=None): """Complete attention layer with preprocessing.""" separabilities = [hparams.separability, hparams.separability] if hparams.separability < 0: @@ -71,7 +70,6 @@ def attention(targets_shifted, inputs_encoded, norm_fn, hparams, train, tf.shape(inputs_encoded)[1] ]) - attention_dropout = hparams.attention_dropout * tf.to_float(train) qv = common_attention.multihead_attention( targets_timed, None, @@ -80,7 +78,7 @@ def attention(targets_shifted, inputs_encoded, norm_fn, hparams, train, hparams.hidden_size, hparams.hidden_size, hparams.num_heads, - attention_dropout, + hparams.attention_dropout, name="self_attention", summaries=False) qv = common_attention.multihead_attention( @@ -91,7 +89,7 @@ def attention(targets_shifted, inputs_encoded, norm_fn, hparams, train, hparams.hidden_size, hparams.hidden_size, hparams.num_heads, - attention_dropout, + hparams.attention_dropout, name="encdec_attention", summaries=False) return tf.expand_dims(qv, 2) @@ -101,7 +99,7 @@ def attention(targets_shifted, inputs_encoded, norm_fn, hparams, train, return norm_fn(targets_shifted + targets_with_attention, name="attn_norm") -def multi_conv_res(x, padding, name, layers, hparams, train, +def multi_conv_res(x, padding, name, layers, hparams, mask=None, source=None): """A stack of separable convolution blocks with residual connections.""" with tf.variable_scope(name): @@ -152,10 +150,10 @@ def multi_conv_res(x, padding, name, layers, hparams, train, separabilities=separabilities2, name="residual2") + y if source is not None and hparams.attention_type != "none": - x += attention(x, source, norm_fn, hparams, train, bias=padding_bias) + x += attention(x, source, norm_fn, hparams, bias=padding_bias) if mask is not None: x *= mask - return tf.nn.dropout(x, 1.0 - hparams.dropout * tf.to_float(train)) + return tf.nn.dropout(x, 1.0 - hparams.dropout) def rank_loss(sentence_emb, image_emb, margin=0.2): @@ -188,8 +186,7 @@ def similarity_cost(inputs_encoded, targets_encoded): return rank_loss(x, y) -def slicenet_middle(inputs_encoded, targets, target_space_emb, mask, - hparams, train): +def slicenet_middle(inputs_encoded, targets, target_space_emb, mask, hparams): """Middle part of slicenet, connecting encoder and decoder.""" norm_fn = get_norm(hparams) @@ -204,7 +201,7 @@ def slicenet_middle(inputs_encoded, targets, target_space_emb, mask, extra_layers = int(hparams.num_hidden_layers * 1.5) with tf.variable_scope(tf.get_variable_scope(), reuse=True): targets_encoded = multi_conv_res(targets_timed, "SAME", "encoder", - extra_layers, hparams, train) + extra_layers, hparams) with tf.variable_scope("similarity_loss"): similarity_loss = similarity_cost(inputs_encoded, targets_encoded) similarity_loss *= hparams.sim_loss_mult @@ -219,7 +216,7 @@ def slicenet_middle(inputs_encoded, targets, target_space_emb, mask, else: inputs_padding_bias = (1.0 - mask) * -1e9 # Bias to not attend to padding. targets_with_attention = attention( - targets_shifted, inputs_encoded, norm_fn, hparams, train, + targets_shifted, inputs_encoded, norm_fn, hparams, bias=inputs_padding_bias) # Positional targets: merge attention and raw. @@ -247,8 +244,7 @@ def embedding_to_padding(emb): return tf.to_float(tf.equal(emb_sum, 0.0)) -def slicenet_internal(inputs, targets, target_space, - problem_idx, hparams, train): +def slicenet_internal(inputs, targets, target_space, problem_idx, hparams): """The slicenet model, main step used for training.""" with tf.variable_scope("slicenet"): # Flatten inputs and encode. @@ -258,14 +254,14 @@ def slicenet_internal(inputs, targets, target_space, target_space_emb = embed_target_space(target_space, hparams.hidden_size) extra_layers = int(hparams.num_hidden_layers * 1.5) inputs_encoded = multi_conv_res(inputs, "SAME", "encoder", extra_layers, - hparams, train, mask=inputs_mask) + hparams, mask=inputs_mask) target_modality_name = hparams.problems[problem_idx].target_modality.name if "class_label_modality" in target_modality_name: # If we're just predicing a class, there is no use for a decoder. return inputs_encoded # Do the middle part. decoder_start, similarity_loss = slicenet_middle( - inputs_encoded, targets, target_space_emb, inputs_mask, hparams, train) + inputs_encoded, targets, target_space_emb, inputs_mask, hparams) # Decode. decoder_final = multi_conv_res( decoder_start, @@ -273,7 +269,6 @@ def slicenet_internal(inputs, targets, target_space, "decoder", hparams.num_hidden_layers, hparams, - train, mask=inputs_mask, source=inputs_encoded) return decoder_final, tf.reduce_mean(similarity_loss) @@ -282,10 +277,10 @@ def slicenet_internal(inputs, targets, target_space, @registry.register_model class SliceNet(t2t_model.T2TModel): - def model_fn_body(self, features, train): + def model_fn_body(self, features): return slicenet_internal(features["inputs"], features["targets"], features["target_space_id"], self._problem_idx, - self._hparams, train) + self._hparams) _KERNEL_SCHEMES = { "3.3.3.3": [(3, 1), (3, 1), (3, 1), (3, 1)], diff --git a/tensor2tensor/models/slicenet_test.py b/tensor2tensor/models/slicenet_test.py index bbeb3a284..db563b481 100644 --- a/tensor2tensor/models/slicenet_test.py +++ b/tensor2tensor/models/slicenet_test.py @@ -42,8 +42,9 @@ def testSliceNet(self): "targets": tf.constant(y, dtype=tf.int32), "target_space_id": tf.constant(1, dtype=tf.int32), } - model = slicenet.SliceNet(hparams, p_hparams) - sharded_logits, _, _ = model.model_fn(features, True) + model = slicenet.SliceNet( + hparams, tf.contrib.learn.ModeKeys.TRAIN, p_hparams) + sharded_logits, _, _ = model.model_fn(features) logits = tf.concat(sharded_logits, 0) session.run(tf.global_variables_initializer()) res = session.run(logits) diff --git a/tensor2tensor/models/transformer.py b/tensor2tensor/models/transformer.py index 94fb0776c..88d901df9 100644 --- a/tensor2tensor/models/transformer.py +++ b/tensor2tensor/models/transformer.py @@ -41,13 +41,9 @@ class Transformer(t2t_model.T2TModel): """Attention net. See file docstring.""" - def model_fn_body(self, features, train): + def model_fn_body(self, features): # Remove dropout if not training hparams = copy.copy(self._hparams) - if not train: - hparams.attention_dropout = 0. - hparams.relu_dropout = 0. - hparams.residual_dropout = 0. targets = features["targets"] inputs = features.get("inputs") target_space = features.get("target_space_id") @@ -300,10 +296,12 @@ def transformer_base(): hparams.add_hparam("ffn_layer", "conv_hidden_relu") hparams.add_hparam("parameter_attention_key_channels", 0) hparams.add_hparam("parameter_attention_value_channels", 0) + # All hyperparameters ending in "dropout" are automatically set to 0.0 + # when not in training mode. hparams.add_hparam("attention_dropout", 0.0) hparams.add_hparam("relu_dropout", 0.0) - hparams.add_hparam("pos", "timing") # timing, none hparams.add_hparam("residual_dropout", 0.1) + hparams.add_hparam("pos", "timing") # timing, none hparams.add_hparam("nbr_decoder_problems", 1) return hparams diff --git a/tensor2tensor/models/transformer_test.py b/tensor2tensor/models/transformer_test.py index 1b43ce625..9535558a4 100644 --- a/tensor2tensor/models/transformer_test.py +++ b/tensor2tensor/models/transformer_test.py @@ -48,8 +48,8 @@ def _testTransformer(self, net): "targets": tf.constant(targets, dtype=tf.int32), "target_space_id": tf.constant(1, dtype=tf.int32), } - model = net(hparams, p_hparams) - shadred_logits, _, _ = model.model_fn(features, True) + model = net(hparams, tf.contrib.learn.ModeKeys.TRAIN, p_hparams) + shadred_logits, _, _ = model.model_fn(features) logits = tf.concat(shadred_logits, 0) session.run(tf.global_variables_initializer()) res = session.run(logits) diff --git a/tensor2tensor/models/xception.py b/tensor2tensor/models/xception.py index 01b5adb78..d28a1628e 100644 --- a/tensor2tensor/models/xception.py +++ b/tensor2tensor/models/xception.py @@ -30,7 +30,7 @@ import tensorflow as tf -def residual_block(x, hparams, train): +def residual_block(x, hparams): """A stack of convolution blocks with residual connection.""" k = (hparams.kernel_height, hparams.kernel_width) dilations_and_kernels = [((1, 1), k) for _ in xrange(3)] @@ -42,24 +42,24 @@ def residual_block(x, hparams, train): separability=0, name="residual_block") x = common_layers.layer_norm(x + y, hparams.hidden_size, name="lnorm") - return tf.nn.dropout(x, 1.0 - hparams.dropout * tf.to_float(train)) + return tf.nn.dropout(x, 1.0 - hparams.dropout) -def xception_internal(inputs, hparams, train): +def xception_internal(inputs, hparams): """Xception body.""" with tf.variable_scope("xception"): cur = inputs for i in xrange(hparams.num_hidden_layers): with tf.variable_scope("layer_%d" % i): - cur = residual_block(cur, hparams, train) + cur = residual_block(cur, hparams) return cur @registry.register_model class Xception(t2t_model.T2TModel): - def model_fn_body(self, features, train): - return xception_internal(features["inputs"], self._hparams, train) + def model_fn_body(self, features): + return xception_internal(features["inputs"], self._hparams) @registry.register_hparams diff --git a/tensor2tensor/models/xception_test.py b/tensor2tensor/models/xception_test.py index 4eabb387a..cd158b852 100644 --- a/tensor2tensor/models/xception_test.py +++ b/tensor2tensor/models/xception_test.py @@ -42,8 +42,9 @@ def testXception(self): "inputs": tf.constant(x, dtype=tf.int32), "targets": tf.constant(y, dtype=tf.int32), } - model = xception.Xception(hparams, p_hparams) - sharded_logits, _, _ = model.model_fn(features, True) + model = xception.Xception( + hparams, tf.contrib.learn.ModeKeys.TRAIN, p_hparams) + sharded_logits, _, _ = model.model_fn(features) logits = tf.concat(sharded_logits, 0) session.run(tf.global_variables_initializer()) res = session.run(logits) diff --git a/tensor2tensor/utils/t2t_model.py b/tensor2tensor/utils/t2t_model.py index 5ebb74280..4d7ccd771 100644 --- a/tensor2tensor/utils/t2t_model.py +++ b/tensor2tensor/utils/t2t_model.py @@ -17,6 +17,7 @@ from __future__ import division from __future__ import print_function +import copy import time # Dependency imports @@ -51,6 +52,7 @@ class T2TModel(object): def __init__(self, hparams, + mode, problem_hparams, problem_idx=0, data_parallelism=None, @@ -59,6 +61,7 @@ def __init__(self, Args: hparams: a hyperparameters object. + mode: The execution mode, as defined in tf.contrib.learn.ModeKeys. problem_hparams: a hyperparameters object. problem_idx: an integer. data_parallelism: a expert_utils.parallelism @@ -72,6 +75,13 @@ def __init__(self, data_parallelism = eu.Parallelism([""]) if ps_devices is None: ps_devices = [""] + hparams = copy.copy(hparams) + hparams.add_hparam("mode", mode) + # when not in training mode, set all forms of dropout to zero. + if mode != tf.contrib.learn.ModeKeys.TRAIN: + for key in hparams.values(): + if key[-len("dropout"):] == "dropout": + setattr(hparams, key, 0.0) self._hparams = hparams self._data_parallelism = data_parallelism self._num_datashards = data_parallelism.n @@ -332,12 +342,11 @@ def _shard_features(self, features): # pylint: disable=missing-docstring 0)) return sharded_features - def model_fn(self, features, train, skip=False, last_position_only=False): + def model_fn(self, features, skip=False, last_position_only=False): """Computes the entire model and produces sharded logits and training loss. Args: features: A dictionary of feature name to tensor. - train: a boolean `Scalar` (whether we are in training mode). skip: a boolean, if we're just dummy-calling and actually skip this model (but we need to create variables to not confuse distributed training). last_position_only: a boolean, compute logits for only the last position. @@ -392,7 +401,7 @@ def model_fn(self, features, train, skip=False, last_position_only=False): body_outputs, extra_loss = transformed_features["targets"], 0.0 else: body_outputs, extra_loss = self.model_fn_body_sharded( - transformed_features, train) + transformed_features) with tf.variable_scope(target_modality.name, reuse=target_reuse): if not last_position_only: @@ -420,7 +429,7 @@ def model_fn(self, features, train, skip=False, last_position_only=False): tf.logging.info("This model_fn took %.3f sec." % (time.time() - start_time)) return sharded_logits, training_loss, extra_loss - def model_fn_body_sharded(self, sharded_features, train): + def model_fn_body_sharded(self, sharded_features): """Mixture-of-experts models will override this function. Compute model body on all datashards. @@ -428,7 +437,6 @@ def model_fn_body_sharded(self, sharded_features, train): Args: sharded_features: map from string to list of Tensors each with shape [batch, ?, ?, body_input_size] - train: A boolean `Scalar` (whether we are in training mode). Returns: sharded_body_output: @@ -442,7 +450,7 @@ def model_fn_body_sharded(self, sharded_features, train): } for d in xrange(self._num_datashards)] output = self._data_parallelism( _with_timing(self.model_fn_body, "model_fn_body"), - datashard_to_features, train) + datashard_to_features) if isinstance(output, tuple): loss = tf.reduce_mean(output[1]) output = output[0] @@ -450,7 +458,7 @@ def model_fn_body_sharded(self, sharded_features, train): loss = 0.0 return output, loss - def model_fn_body(self, features, train): + def model_fn_body(self, features): """Most models will override this function. Compute label logits for one shard as a function of the transformed @@ -459,7 +467,6 @@ def model_fn_body(self, features, train): Args: features: A dictionary of key to Tensor. Each Tensor has shape `[batch_size, ?, ?, hidden_size]`. - train: A boolean `Scalar` (whether we are in training mode). Returns: a `Tensor` of logits with shape `[batch_size, O, P, body_output_size]`. diff --git a/tensor2tensor/utils/trainer_utils.py b/tensor2tensor/utils/trainer_utils.py index 6055fd682..940927638 100644 --- a/tensor2tensor/utils/trainer_utils.py +++ b/tensor2tensor/utils/trainer_utils.py @@ -384,7 +384,8 @@ def model_fn(features, targets, mode): def nth_model(n): """Build the model for the n-th problem, plus some added variables.""" model_class = registry.model(model)( - hparams, hparams.problems[n], n, dp, _ps_devices(all_workers=True)) + hparams, mode, hparams.problems[n], + n, dp, _ps_devices(all_workers=True)) if mode == tf.contrib.learn.ModeKeys.INFER: return model_class.infer( features, @@ -402,7 +403,7 @@ def nth_model(n): # TODO(lukaszkaiser): why is this hack needed for variables init? Repair. skip_this_one = skip_this_one and (FLAGS.worker_id != 0 or n > 1) sharded_logits, training_loss, extra_loss = model_class.model_fn( - features, train, skip=(skipping_is_on and skip_this_one)) + features, skip=(skipping_is_on and skip_this_one)) with tf.variable_scope("losses_avg", reuse=True): loss_moving_avg = tf.get_variable("problem_%d/training_loss" % n) o1 = loss_moving_avg.assign(loss_moving_avg * 0.9 + training_loss * 0.1) From c82030790d7f7d913d66b2d7d187f7d2dbc210a8 Mon Sep 17 00:00:00 2001 From: Lukasz Kaiser Date: Mon, 26 Jun 2017 15:43:31 -0700 Subject: [PATCH 0040/4095] internal merge PiperOrigin-RevId: 160206168 --- .../data_generators/algorithmic_math.py | 21 ++++++++++++------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/tensor2tensor/data_generators/algorithmic_math.py b/tensor2tensor/data_generators/algorithmic_math.py index 932c080e1..f5c954036 100644 --- a/tensor2tensor/data_generators/algorithmic_math.py +++ b/tensor2tensor/data_generators/algorithmic_math.py @@ -570,11 +570,16 @@ def calculus_integrate(alphabet_size=26, functions = {"log": "L"} alg_cfg = math_dataset_init(alphabet_size, digits=5, functions=functions) - for _ in xrange(nbr_cases): - sample, target = generate_calculus_integrate_sample( - alg_cfg.vlist, - list(alg_cfg.ops.values()), min_depth, max_depth, alg_cfg.functions) - yield { - "inputs": alg_cfg.int_encoder(sample), - "targets": alg_cfg.int_encoder(target) - } + nbr_case = 0 + while nbr_case < nbr_cases: + try: + sample, target = generate_calculus_integrate_sample( + alg_cfg.vlist, + list(alg_cfg.ops.values()), min_depth, max_depth, alg_cfg.functions) + yield { + "inputs": alg_cfg.int_encoder(sample), + "targets": alg_cfg.int_encoder(target) + } + except: # pylint:disable=bare-except + continue + nbr_case += 1 From f5e371fe4fdfd3b419b87ef3f998889a58234809 Mon Sep 17 00:00:00 2001 From: Lukasz Kaiser Date: Mon, 26 Jun 2017 16:17:03 -0700 Subject: [PATCH 0041/4095] internal merge from github PiperOrigin-RevId: 160210398 --- tensor2tensor/data_generators/wmt.py | 29 +++++++++----- tensor2tensor/data_generators/wmt_test.py | 46 ++++++++++++++++------- 2 files changed, 52 insertions(+), 23 deletions(-) diff --git a/tensor2tensor/data_generators/wmt.py b/tensor2tensor/data_generators/wmt.py index 0be28ab73..e88a90983 100644 --- a/tensor2tensor/data_generators/wmt.py +++ b/tensor2tensor/data_generators/wmt.py @@ -29,7 +29,12 @@ import tensorflow as tf -def character_generator(source_path, target_path, eos=None): +# End-of-sentence marker (should correspond to the position of EOS in the +# RESERVED_TOKENS list in text_encoder.py) +EOS = 1 + + +def character_generator(source_path, target_path, character_vocab, eos=None): """Generator for sequence-to-sequence tasks that just uses characters. This generator assumes the files at source_path and target_path have @@ -40,6 +45,7 @@ def character_generator(source_path, target_path, eos=None): Args: source_path: path to the file with source sentences. target_path: path to the file with target sentences. + character_vocab: a TextEncoder to encode the characters. eos: integer to append at the end of each sequence (default: None). Yields: @@ -51,8 +57,8 @@ def character_generator(source_path, target_path, eos=None): with tf.gfile.GFile(target_path, mode="r") as target_file: source, target = source_file.readline(), target_file.readline() while source and target: - source_ints = [ord(c) for c in source.strip()] + eos_list - target_ints = [ord(c) for c in target.strip()] + eos_list + source_ints = character_vocab.encode(source.strip()) + eos_list + target_ints = character_vocab.encode(target.strip()) + eos_list yield {"inputs": source_ints, "targets": target_ints} source, target = source_file.readline(), target_file.readline() @@ -226,14 +232,16 @@ def ende_wordpiece_token_generator(tmp_dir, train, vocab_size): tag = "train" if train else "dev" data_path = _compile_data(tmp_dir, datasets, "wmt_ende_tok_%s" % tag) return token_generator(data_path + ".lang1", data_path + ".lang2", - symbolizer_vocab, 1) + symbolizer_vocab, EOS) def ende_character_generator(tmp_dir, train): + character_vocab = text_encoder.ByteTextEncoder() datasets = _ENDE_TRAIN_DATASETS if train else _ENDE_TEST_DATASETS tag = "train" if train else "dev" data_path = _compile_data(tmp_dir, datasets, "wmt_ende_chr_%s" % tag) - return character_generator(data_path + ".lang1", data_path + ".lang2", 1) + return character_generator(data_path + ".lang1", data_path + ".lang2", + character_vocab, EOS) def enfr_wordpiece_token_generator(tmp_dir, train, vocab_size): @@ -244,22 +252,25 @@ def enfr_wordpiece_token_generator(tmp_dir, train, vocab_size): tag = "train" if train else "dev" data_path = _compile_data(tmp_dir, datasets, "wmt_enfr_tok_%s" % tag) return token_generator(data_path + ".lang1", data_path + ".lang2", - symbolizer_vocab, 1) + symbolizer_vocab, EOS) def enfr_character_generator(tmp_dir, train): """Instance of character generator for the WMT en->fr task.""" + character_vocab = text_encoder.ByteTextEncoder() datasets = _ENFR_TRAIN_DATASETS if train else _ENFR_TEST_DATASETS tag = "train" if train else "dev" data_path = _compile_data(tmp_dir, datasets, "wmt_enfr_chr_%s" % tag) - return character_generator(data_path + ".lang1", data_path + ".lang2", 1) + return character_generator(data_path + ".lang1", data_path + ".lang2", + character_vocab, EOS) def parsing_character_generator(tmp_dir, train): + character_vocab = text_encoder.ByteTextEncoder() filename = "parsing_%s" % ("train" if train else "dev") text_filepath = os.path.join(tmp_dir, filename + ".text") tags_filepath = os.path.join(tmp_dir, filename + ".tags") - return character_generator(text_filepath, tags_filepath, 1) + return character_generator(text_filepath, tags_filepath, character_vocab, EOS) def parsing_token_generator(tmp_dir, train, vocab_size): @@ -268,4 +279,4 @@ def parsing_token_generator(tmp_dir, train, vocab_size): filename = "parsing_%s" % ("train" if train else "dev") text_filepath = os.path.join(tmp_dir, filename + ".text") tags_filepath = os.path.join(tmp_dir, filename + ".tags") - return token_generator(text_filepath, tags_filepath, symbolizer_vocab, 1) + return token_generator(text_filepath, tags_filepath, symbolizer_vocab, EOS) diff --git a/tensor2tensor/data_generators/wmt_test.py b/tensor2tensor/data_generators/wmt_test.py index 7121e3d8a..b6af3cf93 100644 --- a/tensor2tensor/data_generators/wmt_test.py +++ b/tensor2tensor/data_generators/wmt_test.py @@ -25,6 +25,7 @@ # Dependency imports import six +from tensor2tensor.data_generators import text_encoder from tensor2tensor.data_generators import wmt import tensorflow as tf @@ -36,31 +37,48 @@ def testCharacterGenerator(self): # Generate a trivial source and target file. tmp_dir = self.get_temp_dir() (_, tmp_file_path) = tempfile.mkstemp(dir=tmp_dir) + if six.PY2: + enc_f = lambda s: s + else: + enc_f = lambda s: s.encode("utf-8") with io.open(tmp_file_path + ".src", "wb") as src_file: - src_file.write("source1\n") - src_file.write("source2\n") + src_file.write(enc_f("source1\n")) + src_file.write(enc_f("source2\n")) with io.open(tmp_file_path + ".tgt", "wb") as tgt_file: - tgt_file.write("target1\n") - tgt_file.write("target2\n") + tgt_file.write(enc_f("target1\n")) + tgt_file.write(enc_f("target2\n")) # Call character generator on the generated files. results_src, results_tgt = [], [] - for dictionary in wmt.character_generator(tmp_file_path + ".src", - tmp_file_path + ".tgt"): + character_vocab = text_encoder.ByteTextEncoder() + for dictionary in wmt.character_generator( + tmp_file_path + ".src", tmp_file_path + ".tgt", character_vocab): self.assertEqual(sorted(list(dictionary)), ["inputs", "targets"]) results_src.append(dictionary["inputs"]) results_tgt.append(dictionary["targets"]) # Check that the results match the files. + # First check that the results match the encoded original strings; + # this is a comparison of integer arrays. self.assertEqual(len(results_src), 2) - self.assertEqual("".join([six.int2byte(i) - for i in results_src[0]]), "source1") - self.assertEqual("".join([six.int2byte(i) - for i in results_src[1]]), "source2") - self.assertEqual("".join([six.int2byte(i) - for i in results_tgt[0]]), "target1") - self.assertEqual("".join([six.int2byte(i) - for i in results_tgt[1]]), "target2") + self.assertEqual(results_src[0], + character_vocab.encode("source1")) + self.assertEqual(results_src[1], + character_vocab.encode("source2")) + self.assertEqual(results_tgt[0], + character_vocab.encode("target1")) + self.assertEqual(results_tgt[1], + character_vocab.encode("target2")) + # Then decode the results and compare with the original strings; + # this is a comparison of strings + self.assertEqual(character_vocab.decode(results_src[0]), + "source1") + self.assertEqual(character_vocab.decode(results_src[1]), + "source2") + self.assertEqual(character_vocab.decode(results_tgt[0]), + "target1") + self.assertEqual(character_vocab.decode(results_tgt[1]), + "target2") # Clean up. os.remove(tmp_file_path + ".src") From 8226f1546c97394b644fb3aa07ee5c4134e78ce7 Mon Sep 17 00:00:00 2001 From: Ryan Sepassi Date: Mon, 26 Jun 2017 17:48:35 -0700 Subject: [PATCH 0042/4095] Bump to v1.0.8 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 5b2d423f8..fbb81470e 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ setup( name='tensor2tensor', - version='1.0.7', + version='1.0.8', description='Tensor2Tensor', author='Google Inc.', author_email='no-reply@google.com', From 38b9c11e97129c00f0e0e0d588b05d0fa0badfb3 Mon Sep 17 00:00:00 2001 From: ReDeiPirati Date: Tue, 27 Jun 2017 11:56:09 +0200 Subject: [PATCH 0043/4095] Update --- tensor2tensor/bin/t2t-datagen | 0 tensor2tensor/bin/t2t-trainer | 0 2 files changed, 0 insertions(+), 0 deletions(-) mode change 100644 => 100755 tensor2tensor/bin/t2t-datagen mode change 100644 => 100755 tensor2tensor/bin/t2t-trainer diff --git a/tensor2tensor/bin/t2t-datagen b/tensor2tensor/bin/t2t-datagen old mode 100644 new mode 100755 diff --git a/tensor2tensor/bin/t2t-trainer b/tensor2tensor/bin/t2t-trainer old mode 100644 new mode 100755 From 79f309a38eb6374b064786ec0827f1c4b91dddd3 Mon Sep 17 00:00:00 2001 From: ReDeiPirati Date: Tue, 27 Jun 2017 13:25:19 +0200 Subject: [PATCH 0044/4095] Add new generator: algorithmic_reverse_nlplike that generates samples following Zipf's LAw --- tensor2tensor/bin/t2t-datagen | 10 +++ tensor2tensor/data_generators/algorithmic.py | 64 +++++++++++++++++++ .../data_generators/algorithmic_test.py | 16 +++++ 3 files changed, 90 insertions(+) diff --git a/tensor2tensor/bin/t2t-datagen b/tensor2tensor/bin/t2t-datagen index cb8a77f0d..ca9418488 100755 --- a/tensor2tensor/bin/t2t-datagen +++ b/tensor2tensor/bin/t2t-datagen @@ -86,6 +86,16 @@ _SUPPORTED_PROBLEM_GENERATORS = { "algorithmic_multiplication_decimal40": ( lambda: algorithmic.multiplication_generator(10, 40, 100000), lambda: algorithmic.multiplication_generator(10, 400, 10000)), + "algorithmic_reverse_nlplike_decimal8K": ( + lambda: algorithmic.reverse_generator_nlplike(8000, 40, 100000, + 10, 1.250), + lambda: algorithmic.reverse_generator_nlplike(8000, 400, 10000, + 10, 1.250)), + "algorithmic_reverse_nlplike_decimal32K": ( + lambda: algorithmic.reverse_generator_nlplike(32000, 40, 100000, + 10, 1.005), + lambda: algorithmic.reverse_generator_nlplike(32000, 400, 10000, + 10, 1.005)), "algorithmic_algebra_inverse": ( lambda: algorithmic_math.algebra_inverse(26, 0, 2, 100000), lambda: algorithmic_math.algebra_inverse(26, 3, 3, 10000)), diff --git a/tensor2tensor/data_generators/algorithmic.py b/tensor2tensor/data_generators/algorithmic.py index 4c25e986e..d7013469b 100644 --- a/tensor2tensor/data_generators/algorithmic.py +++ b/tensor2tensor/data_generators/algorithmic.py @@ -93,6 +93,70 @@ def reverse_generator(nbr_symbols, max_length, nbr_cases): "targets": list(reversed(inputs)) + [1]} # [1] for EOS +def zipf_distribution(nbr_symbols, alpha): + """Helper function: Create a Zipf distribution. + + Args: + nbr_symbols: number of symbols to use in the distribution. + alpha: float, Zipf's Law Distribution parameter. Default = 1.5. + Usually for modelling natural text distribution is in + the range [1.1-1.6]. + + Return: + distr_map: list of float, Zipf's distribution over nbr_symbols. + + """ + tmp = np.power(np.arange(1, nbr_symbols+1), -alpha) + zeta = np.r_[0.0, np.cumsum(tmp)] + return [x / zeta[-1] for x in zeta] + + +def zipf_random_sample(distr_map, sample_len): + """Helper function: Generate a random Zipf sample of given lenght. + + Args: + distr_map: list of float, Zipf's distribution over nbr_symbols. + sample_len: integer, length of sequence to generate. + + Return: + sample: list of integer, Zipf's random sample over nbr_symbols. + + """ + u = np.random.random(sample_len) + return [t+1 for t in np.searchsorted(distr_map, u)] # 0 pad and 1 EOS + + +def reverse_generator_nlplike(nbr_symbols, max_length, nbr_cases, \ + scale_std_dev=100, alpha=1.5): + """Generator for the reversing nlp-like task on sequences of symbols. + + The length of the sequence is drawn from a Gaussian(Normal) distribution + at random from [1, max_length] and with std deviation of 1%, + then symbols are drawn from Zipf's law at random from [2, nbr_symbols] until + nbr_cases sequences have been produced. + + Args: + max_length: integer, maximum length of sequences to generate. + nbr_cases: the number of cases to generate. + scale_std_dev: float, Normal distribution's standard deviation scale factor + used to draw the lenght of sequence. Default = 1% of the max_length. + alpha: float, Zipf's Law Distribution parameter. Default = 1.5. + Usually for modelling natural text distribution is in + the range [1.1-1.6]. + + Yields: + A dictionary {"inputs": input-list, "targets": target-list} where + target-list is input-list reversed. + """ + std_dev = max_length / scale_std_dev + distr_map = zipf_distribution(nbr_symbols, alpha) + for _ in xrange(nbr_cases): + l = int(abs(np.random.normal(loc=max_length/2, scale=std_dev)) + 1) + inputs = zipf_random_sample(distr_map, l) + yield {"inputs": inputs, + "targets": list(reversed(inputs)) + [1]} # [1] for EOS + + def lower_endian_to_number(l, base): """Helper function: convert a list of digits in the given base to a number.""" return sum([d * (base**i) for i, d in enumerate(l)]) diff --git a/tensor2tensor/data_generators/algorithmic_test.py b/tensor2tensor/data_generators/algorithmic_test.py index a5fbfae2d..a85122436 100644 --- a/tensor2tensor/data_generators/algorithmic_test.py +++ b/tensor2tensor/data_generators/algorithmic_test.py @@ -41,6 +41,22 @@ def testReverseGenerator(self): self.assertEqual(list(reversed(d["inputs"])) + [1], d["targets"]) self.assertEqual(counter, 10) + def testZipfDistribution(self): + # Following Zipf's Law with alpha equals 1: the first in rank is two times + # more probable/frequent that the second in rank, three times more prob/freq + # that the third in rank and so on. + d = algorithmic.zipf_distribution(10, 1.0001) + for i in xrange(len(d[1:])-1): + self.assertEqual("%.4f" % (abs(d[i+1]-d[i+2])*(i+2)), \ + "%.4f" % d[1]) + + def testReverseGeneratorNlpLike(self): + counter = 0 + for d in algorithmic.reverse_generator_nlplike(3, 8, 10): + counter += 1 + self.assertEqual(list(reversed(d["inputs"])) + [1], d["targets"]) + self.assertEqual(counter, 10) + def testLowerEndianToNumber(self): self.assertEqual(algorithmic.lower_endian_to_number([0], 2), 0) self.assertEqual(algorithmic.lower_endian_to_number([0], 7), 0) From 759789bed577b3ddda94cd2e888ddcbfc9f2d40e Mon Sep 17 00:00:00 2001 From: ReDeiPirati Date: Tue, 27 Jun 2017 15:18:55 +0200 Subject: [PATCH 0045/4095] Add algorithmic_reverse_nlplike to PROBLEM_HPARAMS_MAP --- tensor2tensor/data_generators/problem_hparams.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tensor2tensor/data_generators/problem_hparams.py b/tensor2tensor/data_generators/problem_hparams.py index 55115b841..b2dbe9e73 100644 --- a/tensor2tensor/data_generators/problem_hparams.py +++ b/tensor2tensor/data_generators/problem_hparams.py @@ -665,6 +665,8 @@ def image_mscoco_tokens(model_hparams, vocab_count): "algorithmic_multiplication_decimal40": lambda p: algorithmic(12, p), "algorithmic_reverse_binary40": lambda p: algorithmic(4, p), "algorithmic_reverse_decimal40": lambda p: algorithmic(12, p), + "algorithmic_reverse_nlplike_decimal8K": lambda p: algorithmic(8002, p), + "algorithmic_reverse_nlplike_decimal32K": lambda p: algorithmic(32002, p), "algorithmic_shift_decimal40": lambda p: algorithmic(22, p), "audio_timit_characters_tune": audio_timit_characters, "audio_timit_characters_test": audio_timit_characters, From e6e6ba2a4138aaf9b1dd9ee155e911af03701b8d Mon Sep 17 00:00:00 2001 From: pltrdy Date: Tue, 27 Jun 2017 16:34:57 +0200 Subject: [PATCH 0046/4095] Unknown token for TokenTextEncoder --- tensor2tensor/data_generators/text_encoder.py | 21 +++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/tensor2tensor/data_generators/text_encoder.py b/tensor2tensor/data_generators/text_encoder.py index a219a6b8d..4d669948e 100644 --- a/tensor2tensor/data_generators/text_encoder.py +++ b/tensor2tensor/data_generators/text_encoder.py @@ -121,15 +121,25 @@ def vocab_size(self): class TokenTextEncoder(TextEncoder): """Encoder based on a user-supplied vocabulary.""" - def __init__(self, vocab_filename, reverse=False, num_reserved_ids=2): + def __init__(self, vocab_filename, reverse=False, num_reserved_ids=2, unk=None): """Initialize from a file, one token per line.""" super(TokenTextEncoder, self).__init__(num_reserved_ids=num_reserved_ids) + + assert unk is None or type(unk) == str, "unk must be a string or None" + if unk is not None: + RESERVED_TOKENS.append(unk) + num_reserved_ids += 1 + self._unk = unk + self._reverse = reverse self._load_vocab_from_file(vocab_filename) def encode(self, sentence): """Converts a space-separated string of tokens to a list of ids.""" - ret = [self._token_to_id[tok] for tok in sentence.strip().split()] + ret = [self._token_to_id[tok] if tok in self._token_to_id + else self._token_to_id[self.unk] + for tok in sentence.strip().split()] + return ret[::-1] if self._reverse else ret def decode(self, ids): @@ -140,6 +150,13 @@ def decode(self, ids): def vocab_size(self): return len(self._id_to_token) + @property + def unk(self): + if self._unk is None: + raise ValueError("Unknown token") + elif type(self._unk) == str: + return self._unk + def _safe_id_to_token(self, idx): return self._id_to_token.get(idx, 'ID_%d' % idx) From 9bf7c9429f0dfe9fc5a909b2ee6c0255e7c6e82b Mon Sep 17 00:00:00 2001 From: pltrdy Date: Tue, 27 Jun 2017 16:44:33 +0200 Subject: [PATCH 0047/4095] PTB dataset --- tensor2tensor/bin/t2t-datagen | 7 + .../data_generators/problem_hparams.py | 13 ++ tensor2tensor/data_generators/ptb.py | 158 ++++++++++++++++++ 3 files changed, 178 insertions(+) create mode 100644 tensor2tensor/data_generators/ptb.py diff --git a/tensor2tensor/bin/t2t-datagen b/tensor2tensor/bin/t2t-datagen index cb8a77f0d..718206e7d 100644 --- a/tensor2tensor/bin/t2t-datagen +++ b/tensor2tensor/bin/t2t-datagen @@ -40,6 +40,7 @@ from tensor2tensor.data_generators import image from tensor2tensor.data_generators import snli from tensor2tensor.data_generators import wmt from tensor2tensor.data_generators import wsj_parsing +from tensor2tensor.data_generators import ptb import tensorflow as tf @@ -307,6 +308,12 @@ _SUPPORTED_PROBLEM_GENERATORS = { 626, vocab_filename="tokens.vocab.%d" % 2**15, vocab_size=2**15)), + "lmptb_10k": ( + lambda: ptb.train_generator( + FLAGS.tmp_dir, + 10000, + False), + lambda: ptb.valid_generator()), } # pylint: enable=g-long-lambda diff --git a/tensor2tensor/data_generators/problem_hparams.py b/tensor2tensor/data_generators/problem_hparams.py index 55115b841..3527cf097 100644 --- a/tensor2tensor/data_generators/problem_hparams.py +++ b/tensor2tensor/data_generators/problem_hparams.py @@ -340,6 +340,18 @@ def lm1b_16k(model_hparams): p.target_space_id = 3 return p +def lmptb_10k(model_hparams): + """Penn Tree Bank language-modeling benchmark, 10k token vocabulary.""" + p = default_problem_hparams() + p.input_modality = {} + p.target_modality = (registry.Modalities.SYMBOL, 10000) + p.vocabulary = { + "inputs": text_encoder.TextEncoder(), + "targets": text_encoder.TextEncoder() + } + p.input_space_id = 3 + p.target_space_id = 3 + return p def lm1b_64k(model_hparams): """Billion-word language-modeling benchmark, 64k subtoken vocabulary.""" @@ -676,6 +688,7 @@ def image_mscoco_tokens(model_hparams, vocab_count): "audio_wsj_tokens_8k_test": lambda p: audio_wsj_tokens(p, 2**13), "lm1b_16k": lm1b_16k, "lm1b_64k": lm1b_64k, + "lmptb_10k": lmptb_10k, "wmt_parsing_characters": wmt_parsing_characters, "wmt_parsing_tokens_8k": lambda p: wmt_parsing_tokens(p, 2**13), "wsj_parsing_tokens_16k": lambda p: wsj_parsing_tokens(p, 2**14, 2**9), diff --git a/tensor2tensor/data_generators/ptb.py b/tensor2tensor/data_generators/ptb.py new file mode 100644 index 000000000..7cb3230a8 --- /dev/null +++ b/tensor2tensor/data_generators/ptb.py @@ -0,0 +1,158 @@ +# Copyright 2017 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Data generators for PTB data-sets.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +import sys +import tarfile +import collections + +# Dependency imports + +from tensor2tensor.data_generators import generator_utils +from tensor2tensor.data_generators import text_encoder + +import tensorflow as tf + + +EOS = text_encoder.EOS +PTB_URL = "http://www.fit.vutbr.cz/~imikolov/rnnlm/simple-examples.tgz" + +def _read_words(filename): + """Reads words from a file. + It returns a list of words without '\n' + Originally from: + https://github.com/tensorflow/models/blob/master/tutorials/rnn/ptb/reader.py + """ + with tf.gfile.GFile(filename, "r") as f: + if sys.version_info[0] >= 3: + return f.read().replace("\n", " ").split() + else: + return f.read().decode("utf-8").replace("\n", " ").split() + + + +def _build_vocab(filename, vocab_path, vocab_size, exclude=[]): + """Reads a file a build a vocabulary of `vocab_size` words to + as a list of words to `filename` + The vocabulary is sorted by occurence count and has one word per line + Originally from: + https://github.com/tensorflow/models/blob/master/tutorials/rnn/ptb/reader.py + """ + data = [w for w in _read_words(filename) if w not in exclude] + + counter = collections.Counter(data) + + count_pairs = sorted(counter.items(), key=lambda x: (-x[1], x[0])) + words, _ = list(zip(*count_pairs)) + words = words[:vocab_size] + + with open(vocab_path, 'w') as f: + f.write("\n".join(words)) + +def _get_token_encoder(tmp_dir, filename, vocab_size): + """Reads from file and returns a `TokenTextEncoder` based on the vocabulary + """ + vocab_name = "ptb.vocab.%d" % vocab_size + vocab_path = os.path.join(tmp_dir, vocab_name) + + exclude = ['', ''] + _build_vocab(filename, vocab_path, vocab_size, exclude) + + return text_encoder.TokenTextEncoder(vocab_path, + unk="") + + +class PTB(object): + def __init__(self, tmp_dir, vocab_size, char=False): + self.char = char + #self.num_steps = num_steps + + url = PTB_URL + + filename = os.path.basename(url) + compressed_filepath = generator_utils.maybe_download(tmp_dir, + filename, + url) + + ptb_files = [] + ptb_char_files = [] + with tarfile.open(compressed_filepath, "r:gz") as tgz: + files = [] + # selecting only relevant files + for m in tgz.getmembers(): + if "ptb" in m.name and ".txt" in m.name: + if "char" in m.name: + ptb_char_files += [m.name] + else: + ptb_files += [m.name] + files += [m] + + tgz.extractall(tmp_dir, members=files) + + if self.char: + files = ptb_char_files + else: + files = ptb_files + files = files + + for filename in files: + if "train" in filename: + self.train = os.path.join(tmp_dir, filename) + elif "valid" in filename: + self.valid = os.path.join(tmp_dir, filename) + + assert hasattr(self, "train"), "Training file not found" + assert hasattr(self, "valid"), "Validation file not found" + + self.encoder = _get_token_encoder(tmp_dir, self.train, vocab_size) + + def train_generator(self): + return self._generator(self.train) + + def valid_generator(self): + return self._generator(self.valid) + + def _generator(self, filename): + with tf.gfile.GFile(filename, "r") as f: + for line in f: + line = " ".join(line.replace('\n', EOS).split()) + tok = self.encoder.encode(line) + x = tok[:-1] + y = tok[1:] + + yield {"inputs": x, + "targets": y} + +# Using a object "singleton" +# `train_generator` must be called before +# `valid_generator` in order to work +_ptb = {} +def train_generator(*args, **kwargs): + """The train data generator to be called + """ + global _ptb + _ptb = PTB(*args, **kwargs) + return _ptb.train_generator() + +def valid_generator(): + """Validation (aka. dev) data generator + """ + global _ptb + return _ptb.valid_generator() From 0cf87837b40b4ac78500dadffd8e92e3cf07feb9 Mon Sep 17 00:00:00 2001 From: vthorsteinsson Date: Tue, 27 Jun 2017 16:37:31 +0000 Subject: [PATCH 0048/4095] Subtoken handling enhancements --- .../data_generators/generator_utils.py | 3 +- tensor2tensor/data_generators/snli.py | 16 ++-- tensor2tensor/data_generators/text_encoder.py | 77 +++++++++---------- .../text_encoder_build_subword.py | 3 +- tensor2tensor/data_generators/tokenizer.py | 56 +++++++------- 5 files changed, 77 insertions(+), 78 deletions(-) mode change 100644 => 100755 tensor2tensor/data_generators/snli.py mode change 100644 => 100755 tensor2tensor/data_generators/text_encoder_build_subword.py diff --git a/tensor2tensor/data_generators/generator_utils.py b/tensor2tensor/data_generators/generator_utils.py index c50d19afa..ffe8022ff 100755 --- a/tensor2tensor/data_generators/generator_utils.py +++ b/tensor2tensor/data_generators/generator_utils.py @@ -258,7 +258,8 @@ def get_or_generate_vocab(tmp_dir, vocab_filename, vocab_size): _ = tokenizer.encode(line) vocab = SubwordTextEncoder.build_to_target_size( - vocab_size, tokenizer.token_counts, vocab_filepath, 1, 1e3) + vocab_size, tokenizer.token_counts, 1, 1e3) + vocab.store_to_file(vocab_filepath) return vocab diff --git a/tensor2tensor/data_generators/snli.py b/tensor2tensor/data_generators/snli.py old mode 100644 new mode 100755 index 5613ece4d..8218bc253 --- a/tensor2tensor/data_generators/snli.py +++ b/tensor2tensor/data_generators/snli.py @@ -136,14 +136,14 @@ def _get_or_generate_vocab(tmp_dir, vocab_filename, vocab_size): if tf.gfile.Exists(vocab_filepath): gs = text_encoder.SubwordTextEncoder(vocab_filepath) return gs - else: - example_file = os.path.join(tmp_dir, _EXAMPLES_FILE) - gs = text_encoder.SubwordTextEncoder() - token_counts = text_encoder.SubwordTextEncoder.get_token_counts( - example_file, corpus_max_lines=1000000) - gs = gs.build_to_target_size( - vocab_size, token_counts, vocab_filepath, min_val=1, max_val=1e3) - return gs + example_file = os.path.join(tmp_dir, _EXAMPLES_FILE) + gs = text_encoder.SubwordTextEncoder() + token_counts = text_encoder.SubwordTextEncoder.get_token_counts( + example_file, corpus_max_lines=1000000) + gs = gs.build_to_target_size( + vocab_size, token_counts, min_val=1, max_val=1e3) + gs.store_to_file(vocab_filepath) + return gs def snli_token_generator(tmp_dir, train, vocab_size): diff --git a/tensor2tensor/data_generators/text_encoder.py b/tensor2tensor/data_generators/text_encoder.py index 35d33d359..73915c81d 100755 --- a/tensor2tensor/data_generators/text_encoder.py +++ b/tensor2tensor/data_generators/text_encoder.py @@ -260,19 +260,16 @@ def _escaped_token_to_subtokens(self, escaped_token): if subtoken != -1: break end -= 1 - ret.append(subtoken) if end > pos: + ret.append(subtoken) pos = end else: - if subtoken == -1: - # No subtoken found: warn once for every 10000 occurrences - tf.logging.log_every_n(tf.logging.WARN, - "Subtoken not found within escaped token '%s'", - 10000, escaped_token) - self.dump() - print("Already found: {0}".format(ret)) - print("pos is {0}".format(pos)) - print("Can't match from '{0}'".format(escaped_token[pos:])) + # No subtoken in the vocabulary matches excaped_token[pos]. + # This can happen if the token contains a Unicode character + # that did not occur in the vocabulary training set. + # The id self.vocab_size - 1 is decoded as Unicode uFFFD, + # REPLACEMENT_CHARACTER. + ret.append(self.vocab_size - 1) # Ensure that the outer loop continues pos += 1 return ret @@ -281,7 +278,6 @@ def _escaped_token_to_subtokens(self, escaped_token): def build_to_target_size(cls, target_size, token_counts, - store_filename, min_val, max_val, num_iterations=4): @@ -304,18 +300,18 @@ def build_to_target_size(cls, present_count = (max_val + min_val) // 2 tf.logging.info('Trying min_count %d' % present_count) subtokenizer = cls() - subtokenizer.build_from_token_counts(token_counts, store_filename, + subtokenizer.build_from_token_counts(token_counts, present_count, num_iterations) if min_val >= max_val or subtokenizer.vocab_size == target_size: return subtokenizer if subtokenizer.vocab_size > target_size: other_subtokenizer = cls.build_to_target_size( - target_size, token_counts, store_filename, present_count + 1, max_val, + target_size, token_counts, present_count + 1, max_val, num_iterations) else: other_subtokenizer = cls.build_to_target_size( - target_size, token_counts, store_filename, min_val, present_count - 1, + target_size, token_counts, min_val, present_count - 1, num_iterations) if (abs(other_subtokenizer.vocab_size - target_size) < abs(subtokenizer.vocab_size - target_size)): @@ -325,7 +321,6 @@ def build_to_target_size(cls, def build_from_token_counts(self, token_counts, - store_filename, min_count, num_iterations=4): """Train a SubwordTextEncoder based on a dictionary of word counts. @@ -340,7 +335,7 @@ def build_from_token_counts(self, # then count the resulting potential subtokens, keeping the ones # with high enough counts for our new vocabulary. for i in xrange(num_iterations): - print("Iteration {0}".format(i)) + tf.logging.info("Iteration {0}".format(i)) counts = defaultdict(int) for token, count in six.iteritems(token_counts): escaped_token = self._escape_token(token) @@ -355,29 +350,24 @@ def build_from_token_counts(self, for subtoken in subtokens: starts.append(pos) pos += len(self._all_subtoken_strings[subtoken]) - if escaped_token == u"SubwordTextEncoder_": - for start, subtoken in zip(starts, subtokens): - print("Start {0}, fragment '{1}'".format(start, self._all_subtoken_strings[subtoken])) - # !!! There is a subtle bug here: if we are adding a subtoken of >= 2 characters here, - # !!! and it gets dropped later because it doesn't make the minimum count cut, - # !!! there is no guarantee that its individual characters have been added as - # !!! subtokens. Which then means that they might be missing in the end and that - # !!! the token is thus not representable as a sequence of subtokens. - # !!! Note that this can only happen in iterations after the first one, - # !!! since the first iteration automatically adds all characters at some point. - # !!! Subsequent iterations however do not have the same guarantee. for start in starts: for end in xrange(start + 1, len(escaped_token)): subtoken_string = escaped_token[start:end] counts[subtoken_string] += count - # array of sets of candidate subtoken strings, by length + # Array of sets of candidate subtoken strings, by length len_to_subtoken_strings = [] + eliminated_chars = defaultdict(int) for subtoken_string, count in six.iteritems(counts): lsub = len(subtoken_string) - assert lsub >= 1 - # all subtoken strings of length 1 are included regardless of count - if count < min_count and lsub != 1: + # All subtoken strings of length 1 are included regardless of count + if count < min_count and lsub > 1: + # If eliminating a string, make sure that its individual characters + # beyond the first one (which has already been accounted for) + # are counted as subtoken strings + for c in subtoken_string[1:]: + eliminated_chars[c] += count continue + # Add this subtoken string to its length set while len(len_to_subtoken_strings) <= lsub: len_to_subtoken_strings.append(set()) len_to_subtoken_strings[lsub].add(subtoken_string) @@ -395,22 +385,30 @@ def build_from_token_counts(self, counts[subtoken_string[:l]] -= count # Sort what we've got so far in decreasing order by count new_subtoken_strings.sort(reverse = True) + # Make sure that we include characters from subtokens + # that didn't survive the minimum count cutoff in + # the final vocabulary + for c, count in six.iteritems(eliminated_chars): + len_to_subtoken_strings[1].add(c) + counts[c] += count # Add the single-character subtokens at the end of the list, # if their final count is nonzero for subtoken_string in len_to_subtoken_strings[1]: count = counts[subtoken_string] if count: new_subtoken_strings.append((0, subtoken_string)) - else: - print(u"Cutting single-char subtoken '{0}'".format(subtoken_string)) # Make sure to include the underscore as a subtoken string assert u'_' not in len_to_subtoken_strings[1] # Should not already be there new_subtoken_strings.append((0, u'_')) + # Also include the Unicode REPLACEMENT CHARACTER to use + # when encountering previously unseen Unicode characters in the input + # (i.e. input external to the tokenizer training set). + # This must be the last entry in the subtoken vocabulary list. + new_subtoken_strings.append((0, u'\uFFFD')) # Now we have a candidate vocabulary self._init_from_list([u''] * self._num_reserved_ids + [p[1] for p in new_subtoken_strings]) tf.logging.info('vocab_size = %d' % self.vocab_size) - self.dump() original = 'This sentence was encoded by the SubwordTextEncoder.' encoded = self.encode(original) @@ -419,11 +417,10 @@ def build_from_token_counts(self, decoded = self.decode(encoded) print(decoded) assert decoded == original - if store_filename is not None: - self._store_to_file(store_filename) def dump(self): - subtoken_strings = [(i, s) for s, i in self._subtoken_string_to_id.iteritems()] + """ Debugging dump of the current subtoken vocabulary """ + subtoken_strings = [(i, s) for s, i in six.iteritems(self._subtoken_string_to_id)] print(u", ".join(u"{0} : '{1}'".format(i, s) for i, s in sorted(subtoken_strings))) def _init_from_list(self, subtoken_strings): @@ -442,7 +439,7 @@ def _load_from_file(self, filename): subtoken_strings.append(line.strip()[1:-1]) self._init_from_list(subtoken_strings) - def _store_to_file(self, filename): + def store_to_file(self, filename): with tf.gfile.Open(filename, 'w') as f: for subtoken_string in self._all_subtoken_strings: if six.PY2: @@ -475,7 +472,7 @@ def _unescape_token(self, escaped_token): def get_token_counts(cls, text_filepattern, corpus_max_lines): """Read the corpus and compute a dictionary of word counts.""" tok = tokenizer.Tokenizer() - token_counts = {} + token_counts = defaultdict(int) lines_read = 0 filenames = tf.gfile.Glob(text_filepattern) for text_filename in filenames: @@ -483,7 +480,7 @@ def get_token_counts(cls, text_filepattern, corpus_max_lines): for line in f: tokens = tok.encode(line.strip()) for t in tokens: - token_counts[t] = token_counts.get(t, 0) + 1 + token_counts[t] += 1 lines_read += 1 if corpus_max_lines > 0 and lines_read > corpus_max_lines: return token_counts diff --git a/tensor2tensor/data_generators/text_encoder_build_subword.py b/tensor2tensor/data_generators/text_encoder_build_subword.py old mode 100644 new mode 100755 index ee71af9f6..48267ec66 --- a/tensor2tensor/data_generators/text_encoder_build_subword.py +++ b/tensor2tensor/data_generators/text_encoder_build_subword.py @@ -59,8 +59,9 @@ def main(unused_argv): raise ValueError('Must provide --corpus_filepattern') token_counts = text_encoder.SubwordTextEncoder.get_token_counts( FLAGS.corpus_filepattern, FLAGS.corpus_max_lines) - gs.build_from_token_counts(token_counts, FLAGS.output_fn, FLAGS.min_count, + gs.build_from_token_counts(token_counts, FLAGS.min_count, FLAGS.num_iterations) + gs.store_to_file(FLAGS.output_fn) if __name__ == '__main__': diff --git a/tensor2tensor/data_generators/tokenizer.py b/tensor2tensor/data_generators/tokenizer.py index b30ecf541..5e4142bc1 100755 --- a/tensor2tensor/data_generators/tokenizer.py +++ b/tensor2tensor/data_generators/tokenizer.py @@ -14,24 +14,26 @@ """A simple invertible tokenizer. -Converts from a raw string to a list of tokens (strings). +Converts from a raw string to a list of tokens (represented as +Unicode strings). This tokenizer has the following desirable properties: - It is invertible. - Punctuation is broken away from adjacent letters. - A single space between words does not produce an extra token. + - The full Unicode punctuation and separator set is recognized. The tokenization algorithm is as follows: -0. We classify the 256 characters into "word characters" and +0. We classify the input characters into "word characters" and "separator characters". Separator characters are defined as the union of - string.punctuation and string.whitespace. All other characters are + Unicode punctuation and separators/white space. All other characters are "word characters". 1. Split the text into a list of tokens, splitting at every boundary of a "word character" and a "separator character". This produces a list which - alternates between "word tokens" (strings of word characters) and - "separator tokens" (strings of of separator characters). + alternates between "word tokens" (strings of word codepoints) and + "separator tokens" (strings of of separator/punctuation codepoints). 2. Remove every token consisting of a single space, unless it is the very first or very last token in the list. These tokens are now @@ -53,25 +55,29 @@ # Dependency imports -from six import PY2 +from six import PY2, unichr # pylint: disable=redefined-builtin from six.moves import xrange # pylint: disable=redefined-builtin - # Regular expression that matches Unicode whitespace characters # (including ASCII whitespace) as defined in the Python run-time library _RE_WHITESPACE = re.compile(r"^\s$", re.UNICODE) +# Set of Unicode whitespace code points +UNICODE_WHITESPACE = set(unichr(i) for i in xrange(sys.maxunicode) + if _RE_WHITESPACE.match(unichr(i))) +# Set of Unicode punctuation code points +UNICODE_PUNCTUATION = set(unichr(i) for i in xrange(sys.maxunicode) + if unicodedata.category(unichr(i)).startswith('P')) +# Conversion between Unicode and UTF-8, if required (on Python2) +_decode_string = (lambda s: s.decode('utf-8')) if PY2 else (lambda s: s) +_encode_string = (lambda s: s.encode('utf-8')) if PY2 else (lambda s: s) + class Tokenizer(object): """Vocab for breaking words into Unicode wordpieces. """ - _UNICODE_PUNCTUATION = set(unichr(i) for i in xrange(sys.maxunicode) - if unicodedata.category(unichr(i)).startswith('P')) - _UNICODE_WHITESPACE = set(unichr(i) for i in xrange(sys.maxunicode) - if _RE_WHITESPACE.match(unichr(i))) - #_SEPARATOR_CHAR_SET = set(string.punctuation + string.whitespace) - _SEPARATOR_CHAR_SET = _UNICODE_WHITESPACE | _UNICODE_PUNCTUATION + _SEPARATOR_CHAR_SET = UNICODE_WHITESPACE | UNICODE_PUNCTUATION def __init__(self): self.token_counts = defaultdict(int) @@ -82,23 +88,22 @@ def encode(self, raw_text): Args: raw_text: a (Python2 or Python3 native) string Returns: - a list of Unicode strings + a list of tokens as Unicode strings """ if not raw_text: return [] ret = [] token_start = 0 - if PY2: - raw_text = raw_text.decode('utf-8') # Convert to Unicode - is_sep = [self._is_separator_char(c) for c in raw_text] - for pos in xrange(1, len(raw_text)): - if (is_sep[pos] != is_sep[pos-1]): - token = raw_text[token_start:pos] + unicode_text = _decode_string(raw_text) + is_sep = [c in self._SEPARATOR_CHAR_SET for c in unicode_text] + for pos in xrange(1, len(unicode_text)): + if is_sep[pos] != is_sep[pos - 1]: + token = unicode_text[token_start:pos] if token != u" " or token_start == 0: ret.append(token) self.token_counts[token] += 1 token_start = pos - final_token = raw_text[token_start:] + final_token = unicode_text[token_start:] ret.append(final_token) self.token_counts[final_token] += 1 return ret @@ -112,15 +117,10 @@ def decode(self, tokens): a (Python2 or Python3 native) string """ ret = u"" - is_word = [self._is_word_char(t[0]) for t in tokens] + is_word = [t[0] not in self._SEPARATOR_CHAR_SET for t in tokens] for i, token in enumerate(tokens): if i > 0 and is_word[i - 1] and is_word[i]: ret += u" " ret += token - return ret.encode('utf-8') if PY2 else ret - - def _is_separator_char(self, c): - return c in self._SEPARATOR_CHAR_SET + return _encode_string(ret) - def _is_word_char(self, c): - return c not in self._SEPARATOR_CHAR_SET From ccbe00612990896e5df9d51d2fdf45815212170c Mon Sep 17 00:00:00 2001 From: "yueyu.lin" Date: Wed, 28 Jun 2017 11:57:30 +0800 Subject: [PATCH 0049/4095] Avoid "index out of bound" when decoding --- tensor2tensor/utils/t2t_model.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tensor2tensor/utils/t2t_model.py b/tensor2tensor/utils/t2t_model.py index 4d7ccd771..0e367d697 100644 --- a/tensor2tensor/utils/t2t_model.py +++ b/tensor2tensor/utils/t2t_model.py @@ -269,7 +269,10 @@ def infer_step(recent_output, _): if last_position_only: cur_sample = samples[:, -1, :, :] else: - cur_sample = samples[:, tf.shape(recent_output)[1], :, :] + if len(tf.shape(recent_output)) >= 2: + cur_sample = samples[:, tf.shape(recent_output)[1], :, :] + else: + cur_sample = samples[:, -1, :, :] cur_sample = tf.to_int64(tf.expand_dims(cur_sample, axis=1)) samples = tf.concat([recent_output, cur_sample], axis=1) samples.set_shape([None, None, None, 1]) From 5563a06137223a80081b01de4771d8c32c05ccc1 Mon Sep 17 00:00:00 2001 From: "yueyu.lin" Date: Wed, 28 Jun 2017 17:41:59 +0800 Subject: [PATCH 0050/4095] Add comments that will use a new email name --- tensor2tensor/utils/t2t_model.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tensor2tensor/utils/t2t_model.py b/tensor2tensor/utils/t2t_model.py index 0e367d697..7dfe258ef 100644 --- a/tensor2tensor/utils/t2t_model.py +++ b/tensor2tensor/utils/t2t_model.py @@ -269,6 +269,7 @@ def infer_step(recent_output, _): if last_position_only: cur_sample = samples[:, -1, :, :] else: + #Avoid the out of index Error if len(tf.shape(recent_output)) >= 2: cur_sample = samples[:, tf.shape(recent_output)[1], :, :] else: From 31f5dfa43d8e1fa8a2865ef9b7bb3c81158d7242 Mon Sep 17 00:00:00 2001 From: ReDeiPirati Date: Wed, 28 Jun 2017 15:24:19 +0200 Subject: [PATCH 0051/4095] Clear comment, add sanity check, change algorithmic_reverse_nlplike max_length and add __pycache__ entry in .gitignore --- .gitignore | 2 ++ tensor2tensor/bin/t2t-datagen | 16 ++++++++-------- tensor2tensor/data_generators/algorithmic.py | 7 ++++++- 3 files changed, 16 insertions(+), 9 deletions(-) diff --git a/.gitignore b/.gitignore index dd84837dd..24d1db4c6 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,7 @@ # Compiled python modules. *.pyc +# Byte-compiled +__pycache__/ # Python egg metadata, regenerated from source files by setuptools. /*.egg-info diff --git a/tensor2tensor/bin/t2t-datagen b/tensor2tensor/bin/t2t-datagen index ca9418488..1cbd27f2b 100755 --- a/tensor2tensor/bin/t2t-datagen +++ b/tensor2tensor/bin/t2t-datagen @@ -87,15 +87,15 @@ _SUPPORTED_PROBLEM_GENERATORS = { lambda: algorithmic.multiplication_generator(10, 40, 100000), lambda: algorithmic.multiplication_generator(10, 400, 10000)), "algorithmic_reverse_nlplike_decimal8K": ( - lambda: algorithmic.reverse_generator_nlplike(8000, 40, 100000, - 10, 1.250), - lambda: algorithmic.reverse_generator_nlplike(8000, 400, 10000, - 10, 1.250)), + lambda: algorithmic.reverse_generator_nlplike(8000, 70, 100000, + 10, 1.300), + lambda: algorithmic.reverse_generator_nlplike(8000, 700, 10000, + 10, 1.300)), "algorithmic_reverse_nlplike_decimal32K": ( - lambda: algorithmic.reverse_generator_nlplike(32000, 40, 100000, - 10, 1.005), - lambda: algorithmic.reverse_generator_nlplike(32000, 400, 10000, - 10, 1.005)), + lambda: algorithmic.reverse_generator_nlplike(32000, 70, 100000, + 10, 1.050), + lambda: algorithmic.reverse_generator_nlplike(32000, 700, 10000, + 10, 1.050)), "algorithmic_algebra_inverse": ( lambda: algorithmic_math.algebra_inverse(26, 0, 2, 100000), lambda: algorithmic_math.algebra_inverse(26, 3, 3, 10000)), diff --git a/tensor2tensor/data_generators/algorithmic.py b/tensor2tensor/data_generators/algorithmic.py index d7013469b..9bbb4bc4b 100644 --- a/tensor2tensor/data_generators/algorithmic.py +++ b/tensor2tensor/data_generators/algorithmic.py @@ -123,7 +123,12 @@ def zipf_random_sample(distr_map, sample_len): """ u = np.random.random(sample_len) - return [t+1 for t in np.searchsorted(distr_map, u)] # 0 pad and 1 EOS + # Random produces values in range [0.0,1.0); even if it is almost + # improbable(but possible) that it can generate a clear 0.000..0, + # we have made a sanity check to overcome this issue. On the other hand, + # t+1 is enough from saving us to generate PAD(0) and EOS(1) which are + # reservated symbols. + return [t+1 if t > 0 else t+2 for t in np.searchsorted(distr_map, u)] def reverse_generator_nlplike(nbr_symbols, max_length, nbr_cases, \ From bd300def3576410ed029e08ee4cbdb2db4659276 Mon Sep 17 00:00:00 2001 From: "yueyu.lin" Date: Thu, 29 Jun 2017 11:37:03 +0800 Subject: [PATCH 0052/4095] Two fix: 1. for beam search use the shape.ndims to avoid out of index error 2. for greedy search, still use the shape.ndims to avoid the out of index error. Before that I misuse the slice operation:-( --- tensor2tensor/utils/t2t_model.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tensor2tensor/utils/t2t_model.py b/tensor2tensor/utils/t2t_model.py index 7dfe258ef..3ab97238b 100644 --- a/tensor2tensor/utils/t2t_model.py +++ b/tensor2tensor/utils/t2t_model.py @@ -196,7 +196,10 @@ def symbols_to_logits_fn(ids): if last_position_only: return tf.squeeze(logits, axis=[1, 2, 3]) current_output_position = tf.shape(ids)[1] - 1 # -1 due to the pad above. - logits = logits[:, current_output_position, :, :] + if current_output_position.shape.ndims >= 1: + logits = logits[:, current_output_position, :, :] + else: + logits = logits[:, -1 , :, :] return tf.squeeze(logits, axis=[1, 2]) batch_size = tf.shape(features["inputs"])[0] @@ -270,7 +273,7 @@ def infer_step(recent_output, _): cur_sample = samples[:, -1, :, :] else: #Avoid the out of index Error - if len(tf.shape(recent_output)) >= 2: + if tf.shape(recent_output).shape.ndims >= 2: cur_sample = samples[:, tf.shape(recent_output)[1], :, :] else: cur_sample = samples[:, -1, :, :] From c6211f172621769e9f05b43c70148e7892565672 Mon Sep 17 00:00:00 2001 From: vthorsteinsson Date: Thu, 29 Jun 2017 12:51:28 +0000 Subject: [PATCH 0053/4095] Separated alphabet generation; added bisect() --- .../data_generators/generator_utils.py | 7 +- tensor2tensor/data_generators/text_encoder.py | 113 +++++++++--------- .../text_encoder_build_subword.py | 4 +- tensor2tensor/data_generators/tokenizer.py | 7 +- 4 files changed, 66 insertions(+), 65 deletions(-) mode change 100644 => 100755 tensor2tensor/data_generators/generator_utils.py diff --git a/tensor2tensor/data_generators/generator_utils.py b/tensor2tensor/data_generators/generator_utils.py old mode 100644 new mode 100755 index 3b248ffea..0d9b16289 --- a/tensor2tensor/data_generators/generator_utils.py +++ b/tensor2tensor/data_generators/generator_utils.py @@ -242,9 +242,12 @@ def get_or_generate_vocab(tmp_dir, vocab_filename, vocab_size): # For some datasets a second extraction is necessary. if ".gz" in lang_file: - tf.logging.info("Unpacking subdirectory %s" % filepath) new_filepath = os.path.join(tmp_dir, lang_file[:-3]) - gunzip_file(filepath, new_filepath) + if os.path.exists(new_filepath): + tf.logging.info("Subdirectory %s already exists, skipping unpacking" % filepath) + else: + tf.logging.info("Unpacking subdirectory %s" % filepath) + gunzip_file(filepath, new_filepath) filepath = new_filepath # Use Tokenizer to count the word occurrences. diff --git a/tensor2tensor/data_generators/text_encoder.py b/tensor2tensor/data_generators/text_encoder.py index 73915c81d..29d796a61 100755 --- a/tensor2tensor/data_generators/text_encoder.py +++ b/tensor2tensor/data_generators/text_encoder.py @@ -174,9 +174,9 @@ class SubwordTextEncoder(TextEncoder): """ def __init__(self, filename=None, num_reserved_ids=2): - """Read from a file.""" self._tokenizer = tokenizer.Tokenizer() if filename is not None: + # Read from a file. self._load_from_file(filename) super(SubwordTextEncoder, self).__init__(num_reserved_ids=num_reserved_ids) @@ -264,7 +264,7 @@ def _escaped_token_to_subtokens(self, escaped_token): ret.append(subtoken) pos = end else: - # No subtoken in the vocabulary matches excaped_token[pos]. + # No subtoken in the vocabulary matches escaped_token[pos]. # This can happen if the token contains a Unicode character # that did not occur in the vocabulary training set. # The id self.vocab_size - 1 is decoded as Unicode uFFFD, @@ -274,6 +274,14 @@ def _escaped_token_to_subtokens(self, escaped_token): pos += 1 return ret + @classmethod + def alphabet(cls, token_counts): + """Return the set of Unicode characters that appear in the tokens""" + alphabet_set = set() + for token in six.iterkeys(token_counts): + alphabet_set |= set(token) + return alphabet_set + @classmethod def build_to_target_size(cls, target_size, @@ -297,37 +305,43 @@ def build_to_target_size(cls, Returns: a SubwordTextEncoder instance. """ - present_count = (max_val + min_val) // 2 - tf.logging.info('Trying min_count %d' % present_count) - subtokenizer = cls() - subtokenizer.build_from_token_counts(token_counts, - present_count, num_iterations) - - if min_val >= max_val or subtokenizer.vocab_size == target_size: - return subtokenizer - if subtokenizer.vocab_size > target_size: - other_subtokenizer = cls.build_to_target_size( - target_size, token_counts, present_count + 1, max_val, - num_iterations) - else: - other_subtokenizer = cls.build_to_target_size( - target_size, token_counts, min_val, present_count - 1, - num_iterations) - if (abs(other_subtokenizer.vocab_size - target_size) < - abs(subtokenizer.vocab_size - target_size)): - return other_subtokenizer - else: - return subtokenizer + + # Calculate the alphabet, i.e. the set of all Unicode characters + # that appear in the tokens + alphabet_set = cls.alphabet(token_counts) + tf.logging.info('Alphabet contains %d characters' % len(alphabet_set)) + + def bisect(min_val, max_val): + present_count = (max_val + min_val) // 2 + tf.logging.info('Trying min_count %d' % present_count) + subtokenizer = cls() + subtokenizer.build_from_token_counts(token_counts, alphabet_set, + present_count, num_iterations) + + if min_val >= max_val or subtokenizer.vocab_size == target_size: + return subtokenizer + if subtokenizer.vocab_size > target_size: + other_subtokenizer = bisect(present_count + 1, max_val) + else: + other_subtokenizer = bisect(min_val, present_count - 1) + if (abs(other_subtokenizer.vocab_size - target_size) < + abs(subtokenizer.vocab_size - target_size)): + return other_subtokenizer + else: + return subtokenizer + + return bisect(min_val, max_val) def build_from_token_counts(self, token_counts, + alphabet_set, min_count, num_iterations=4): """Train a SubwordTextEncoder based on a dictionary of word counts. Args: - token_counts: a dictionary of string to int. - store_filename: a string - where to write the vocabulary. + token_counts: a dictionary of Unicode strings to int. + alphabet_set: the set of Unicode characters that appear in the tokens. min_count: an integer - discard subtokens with lower counts. num_iterations: an integer. how many iterations of refinement. """ @@ -351,21 +365,16 @@ def build_from_token_counts(self, starts.append(pos) pos += len(self._all_subtoken_strings[subtoken]) for start in starts: - for end in xrange(start + 1, len(escaped_token)): + for end in xrange(start + 1, len(escaped_token) + 1): subtoken_string = escaped_token[start:end] counts[subtoken_string] += count # Array of sets of candidate subtoken strings, by length len_to_subtoken_strings = [] - eliminated_chars = defaultdict(int) for subtoken_string, count in six.iteritems(counts): lsub = len(subtoken_string) - # All subtoken strings of length 1 are included regardless of count - if count < min_count and lsub > 1: - # If eliminating a string, make sure that its individual characters - # beyond the first one (which has already been accounted for) - # are counted as subtoken strings - for c in subtoken_string[1:]: - eliminated_chars[c] += count + # All subtoken strings of length 1 are automatically included + # later, so we don't need to consider them here + if count < min_count or lsub <= 1: continue # Add this subtoken string to its length set while len(len_to_subtoken_strings) <= lsub: @@ -374,7 +383,6 @@ def build_from_token_counts(self, new_subtoken_strings = [] # consider the candidates longest to shortest, so that if we accept # a longer subtoken string, we can decrement the counts of its prefixes. - # First, look at all subtoken strings >= 2 characters long for subtoken_strings in reversed(len_to_subtoken_strings[2:]): for subtoken_string in subtoken_strings: count = counts[subtoken_string] @@ -385,24 +393,13 @@ def build_from_token_counts(self, counts[subtoken_string[:l]] -= count # Sort what we've got so far in decreasing order by count new_subtoken_strings.sort(reverse = True) - # Make sure that we include characters from subtokens - # that didn't survive the minimum count cutoff in - # the final vocabulary - for c, count in six.iteritems(eliminated_chars): - len_to_subtoken_strings[1].add(c) - counts[c] += count - # Add the single-character subtokens at the end of the list, - # if their final count is nonzero - for subtoken_string in len_to_subtoken_strings[1]: - count = counts[subtoken_string] - if count: - new_subtoken_strings.append((0, subtoken_string)) - # Make sure to include the underscore as a subtoken string - assert u'_' not in len_to_subtoken_strings[1] # Should not already be there - new_subtoken_strings.append((0, u'_')) + # Add the alphabet set at the end of the vocabulary list + for char in alphabet_set: + new_subtoken_strings.append((0, char)) # Also include the Unicode REPLACEMENT CHARACTER to use - # when encountering previously unseen Unicode characters in the input - # (i.e. input external to the tokenizer training set). + # when encountering previously unseen Unicode characters + # in the input (i.e. input external to the tokenizer training + # set, which may thus contain characters not in the alphabet_set). # This must be the last entry in the subtoken vocabulary list. new_subtoken_strings.append((0, u'\uFFFD')) # Now we have a candidate vocabulary @@ -470,18 +467,16 @@ def _unescape_token(self, escaped_token): @classmethod def get_token_counts(cls, text_filepattern, corpus_max_lines): - """Read the corpus and compute a dictionary of word counts.""" + """Read the corpus and compute a dictionary of token counts.""" tok = tokenizer.Tokenizer() - token_counts = defaultdict(int) lines_read = 0 filenames = tf.gfile.Glob(text_filepattern) for text_filename in filenames: with tf.gfile.Open(text_filename) as f: for line in f: - tokens = tok.encode(line.strip()) - for t in tokens: - token_counts[t] += 1 + # The tokenizer updates token_counts in encode() + tok.encode(line.strip()) lines_read += 1 if corpus_max_lines > 0 and lines_read > corpus_max_lines: - return token_counts - return token_counts + return tok.token_counts + return tok.token_counts diff --git a/tensor2tensor/data_generators/text_encoder_build_subword.py b/tensor2tensor/data_generators/text_encoder_build_subword.py index 48267ec66..71128fba0 100755 --- a/tensor2tensor/data_generators/text_encoder_build_subword.py +++ b/tensor2tensor/data_generators/text_encoder_build_subword.py @@ -59,7 +59,9 @@ def main(unused_argv): raise ValueError('Must provide --corpus_filepattern') token_counts = text_encoder.SubwordTextEncoder.get_token_counts( FLAGS.corpus_filepattern, FLAGS.corpus_max_lines) - gs.build_from_token_counts(token_counts, FLAGS.min_count, + alphabet_set = SubwordTextEncoder.alphabet(token_counts) + gs.build_from_token_counts(token_counts, alphabet_set, + FLAGS.min_count, FLAGS.num_iterations) gs.store_to_file(FLAGS.output_fn) diff --git a/tensor2tensor/data_generators/tokenizer.py b/tensor2tensor/data_generators/tokenizer.py index 5e4142bc1..c75782707 100755 --- a/tensor2tensor/data_generators/tokenizer.py +++ b/tensor2tensor/data_generators/tokenizer.py @@ -67,10 +67,10 @@ if _RE_WHITESPACE.match(unichr(i))) # Set of Unicode punctuation code points UNICODE_PUNCTUATION = set(unichr(i) for i in xrange(sys.maxunicode) - if unicodedata.category(unichr(i)).startswith('P')) + if unicodedata.category(unichr(i)).startswith("P")) # Conversion between Unicode and UTF-8, if required (on Python2) -_decode_string = (lambda s: s.decode('utf-8')) if PY2 else (lambda s: s) -_encode_string = (lambda s: s.encode('utf-8')) if PY2 else (lambda s: s) +_decode_string = (lambda s: s.decode("utf-8")) if PY2 else (lambda s: s) +_encode_string = (lambda s: s.encode("utf-8")) if PY2 else (lambda s: s) class Tokenizer(object): @@ -95,6 +95,7 @@ def encode(self, raw_text): ret = [] token_start = 0 unicode_text = _decode_string(raw_text) + # Classify each character in the input string is_sep = [c in self._SEPARATOR_CHAR_SET for c in unicode_text] for pos in xrange(1, len(unicode_text)): if is_sep[pos] != is_sep[pos - 1]: From 9512f08036b083469d4d22cd527b372d143ee3d3 Mon Sep 17 00:00:00 2001 From: pltrdy Date: Thu, 29 Jun 2017 17:45:22 +0200 Subject: [PATCH 0054/4095] PTB: Fixes and removing token for out of vocabulary We no longer support a 'unk' parameter for TokenTextEncoder. Fixing some vocabulary issues --- tensor2tensor/bin/t2t-datagen | 2 +- .../data_generators/problem_hparams.py | 11 +++++--- tensor2tensor/data_generators/ptb.py | 26 +++++++++---------- tensor2tensor/data_generators/text_encoder.py | 20 ++------------ 4 files changed, 24 insertions(+), 35 deletions(-) diff --git a/tensor2tensor/bin/t2t-datagen b/tensor2tensor/bin/t2t-datagen index 718206e7d..1d018c1ec 100644 --- a/tensor2tensor/bin/t2t-datagen +++ b/tensor2tensor/bin/t2t-datagen @@ -311,7 +311,7 @@ _SUPPORTED_PROBLEM_GENERATORS = { "lmptb_10k": ( lambda: ptb.train_generator( FLAGS.tmp_dir, - 10000, + FLAGS.data_dir, False), lambda: ptb.valid_generator()), } diff --git a/tensor2tensor/data_generators/problem_hparams.py b/tensor2tensor/data_generators/problem_hparams.py index 3527cf097..1115ca617 100644 --- a/tensor2tensor/data_generators/problem_hparams.py +++ b/tensor2tensor/data_generators/problem_hparams.py @@ -345,10 +345,16 @@ def lmptb_10k(model_hparams): p = default_problem_hparams() p.input_modality = {} p.target_modality = (registry.Modalities.SYMBOL, 10000) + + vocabulary = text_encoder.TokenTextEncoder( + os.path.join(model_hparams.data_dir, + "lmptb_10k.vocab")) + p.vocabulary = { - "inputs": text_encoder.TextEncoder(), - "targets": text_encoder.TextEncoder() + "inputs": vocabulary, + "targets": vocabulary, } + p.input_space_id = 3 p.target_space_id = 3 return p @@ -368,7 +374,6 @@ def lm1b_64k(model_hparams): p.target_space_id = 3 return p - def wmt_enfr_characters(unused_model_hparams): """English to French translation benchmark.""" p = default_problem_hparams() diff --git a/tensor2tensor/data_generators/ptb.py b/tensor2tensor/data_generators/ptb.py index 7cb3230a8..4bb0b1d2a 100644 --- a/tensor2tensor/data_generators/ptb.py +++ b/tensor2tensor/data_generators/ptb.py @@ -48,17 +48,16 @@ def _read_words(filename): -def _build_vocab(filename, vocab_path, vocab_size, exclude=[]): +def _build_vocab(filename, vocab_path, vocab_size): """Reads a file a build a vocabulary of `vocab_size` words to as a list of words to `filename` The vocabulary is sorted by occurence count and has one word per line Originally from: https://github.com/tensorflow/models/blob/master/tutorials/rnn/ptb/reader.py """ - data = [w for w in _read_words(filename) if w not in exclude] + data = _read_words(filename) counter = collections.Counter(data) - count_pairs = sorted(counter.items(), key=lambda x: (-x[1], x[0])) words, _ = list(zip(*count_pairs)) words = words[:vocab_size] @@ -66,24 +65,25 @@ def _build_vocab(filename, vocab_path, vocab_size, exclude=[]): with open(vocab_path, 'w') as f: f.write("\n".join(words)) -def _get_token_encoder(tmp_dir, filename, vocab_size): +def _get_token_encoder(vocab_dir, filename): """Reads from file and returns a `TokenTextEncoder` based on the vocabulary """ - vocab_name = "ptb.vocab.%d" % vocab_size - vocab_path = os.path.join(tmp_dir, vocab_name) + vocab_name = "lmptb_10k.vocab" + vocab_path = os.path.join(vocab_dir, vocab_name) + - exclude = ['', ''] - _build_vocab(filename, vocab_path, vocab_size, exclude) + _build_vocab(filename, vocab_path, 10000) - return text_encoder.TokenTextEncoder(vocab_path, - unk="") + return text_encoder.TokenTextEncoder(vocab_path) class PTB(object): - def __init__(self, tmp_dir, vocab_size, char=False): + def __init__(self, tmp_dir, data_dir, char=False): + assert not char, "char mode for PTB is not yet implemented" self.char = char + self.data_dir = data_dir #self.num_steps = num_steps - + url = PTB_URL filename = os.path.basename(url) @@ -121,7 +121,7 @@ def __init__(self, tmp_dir, vocab_size, char=False): assert hasattr(self, "train"), "Training file not found" assert hasattr(self, "valid"), "Validation file not found" - self.encoder = _get_token_encoder(tmp_dir, self.train, vocab_size) + self.encoder = _get_token_encoder(data_dir, self.train) def train_generator(self): return self._generator(self.train) diff --git a/tensor2tensor/data_generators/text_encoder.py b/tensor2tensor/data_generators/text_encoder.py index 4d669948e..10b9a745f 100644 --- a/tensor2tensor/data_generators/text_encoder.py +++ b/tensor2tensor/data_generators/text_encoder.py @@ -121,24 +121,15 @@ def vocab_size(self): class TokenTextEncoder(TextEncoder): """Encoder based on a user-supplied vocabulary.""" - def __init__(self, vocab_filename, reverse=False, num_reserved_ids=2, unk=None): + def __init__(self, vocab_filename, reverse=False, num_reserved_ids=2): """Initialize from a file, one token per line.""" super(TokenTextEncoder, self).__init__(num_reserved_ids=num_reserved_ids) - - assert unk is None or type(unk) == str, "unk must be a string or None" - if unk is not None: - RESERVED_TOKENS.append(unk) - num_reserved_ids += 1 - self._unk = unk - self._reverse = reverse self._load_vocab_from_file(vocab_filename) def encode(self, sentence): """Converts a space-separated string of tokens to a list of ids.""" - ret = [self._token_to_id[tok] if tok in self._token_to_id - else self._token_to_id[self.unk] - for tok in sentence.strip().split()] + ret = [self._token_to_id[tok] for tok in sentence.strip().split()] return ret[::-1] if self._reverse else ret @@ -150,13 +141,6 @@ def decode(self, ids): def vocab_size(self): return len(self._id_to_token) - @property - def unk(self): - if self._unk is None: - raise ValueError("Unknown token") - elif type(self._unk) == str: - return self._unk - def _safe_id_to_token(self, idx): return self._id_to_token.get(idx, 'ID_%d' % idx) From 4695fdc7d034e3937a6472e074ce5e6a3fa1033f Mon Sep 17 00:00:00 2001 From: vthorsteinsson Date: Thu, 29 Jun 2017 17:31:22 +0000 Subject: [PATCH 0055/4095] Added Sublime editor project files to .gitignore --- .gitignore | 5 +++++ 1 file changed, 5 insertions(+) mode change 100644 => 100755 .gitignore diff --git a/.gitignore b/.gitignore old mode 100644 new mode 100755 index dd84837dd..80f48a2fd --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,8 @@ # PyPI distribution artificats build/ dist/ + +# Sublime project files +*.sublime-project +*.sublime-workspace + From 1c775fa991a90bb1230f4b9489f8d8c5aebccc43 Mon Sep 17 00:00:00 2001 From: Richard Shin Date: Thu, 29 Jun 2017 13:12:05 -0700 Subject: [PATCH 0056/4095] Improve error message for missing problems --- tensor2tensor/data_generators/problem_hparams.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensor2tensor/data_generators/problem_hparams.py b/tensor2tensor/data_generators/problem_hparams.py index 2268c3ec1..884a1b1af 100644 --- a/tensor2tensor/data_generators/problem_hparams.py +++ b/tensor2tensor/data_generators/problem_hparams.py @@ -78,7 +78,7 @@ def parse_problem_name(problem_name): def _lookup_problem_hparams_fn(name): if name not in PROBLEM_HPARAMS_MAP: - map_str = "\n* ".join(PROBLEM_HPARAMS_MAP.keys()) + map_str = "* " + "\n* ".join(sorted(PROBLEM_HPARAMS_MAP.keys())) error_msg = "%s not in the supported set of problems:\n%s" % (name, map_str) raise ValueError(error_msg) return PROBLEM_HPARAMS_MAP.get(name) From 85158fe2f56fcca2dd6174d65488f28f1f68e696 Mon Sep 17 00:00:00 2001 From: Ryan Sepassi Date: Mon, 26 Jun 2017 19:16:40 -0700 Subject: [PATCH 0057/4095] Bump to v1.0.8 PiperOrigin-RevId: 160228099 --- .gitignore | 2 - tensor2tensor/bin/t2t-datagen | 17 -- tensor2tensor/bin/t2t-trainer | 0 tensor2tensor/data_generators/algorithmic.py | 69 ------- .../data_generators/algorithmic_test.py | 16 -- .../data_generators/generator_utils.py | 10 +- .../data_generators/problem_hparams.py | 22 +-- tensor2tensor/data_generators/ptb.py | 158 ---------------- tensor2tensor/data_generators/snli.py | 16 +- tensor2tensor/data_generators/text_encoder.py | 175 +++++++++--------- .../text_encoder_build_subword.py | 5 +- tensor2tensor/data_generators/tokenizer.py | 71 +++---- tensor2tensor/utils/t2t_model.py | 11 +- 13 files changed, 127 insertions(+), 445 deletions(-) mode change 100755 => 100644 tensor2tensor/bin/t2t-datagen mode change 100755 => 100644 tensor2tensor/bin/t2t-trainer mode change 100755 => 100644 tensor2tensor/data_generators/generator_utils.py delete mode 100644 tensor2tensor/data_generators/ptb.py mode change 100755 => 100644 tensor2tensor/data_generators/snli.py mode change 100755 => 100644 tensor2tensor/data_generators/text_encoder.py mode change 100755 => 100644 tensor2tensor/data_generators/text_encoder_build_subword.py mode change 100755 => 100644 tensor2tensor/data_generators/tokenizer.py diff --git a/.gitignore b/.gitignore index 24d1db4c6..dd84837dd 100644 --- a/.gitignore +++ b/.gitignore @@ -1,7 +1,5 @@ # Compiled python modules. *.pyc -# Byte-compiled -__pycache__/ # Python egg metadata, regenerated from source files by setuptools. /*.egg-info diff --git a/tensor2tensor/bin/t2t-datagen b/tensor2tensor/bin/t2t-datagen old mode 100755 new mode 100644 index 00750b81b..cb8a77f0d --- a/tensor2tensor/bin/t2t-datagen +++ b/tensor2tensor/bin/t2t-datagen @@ -40,7 +40,6 @@ from tensor2tensor.data_generators import image from tensor2tensor.data_generators import snli from tensor2tensor.data_generators import wmt from tensor2tensor.data_generators import wsj_parsing -from tensor2tensor.data_generators import ptb import tensorflow as tf @@ -87,16 +86,6 @@ _SUPPORTED_PROBLEM_GENERATORS = { "algorithmic_multiplication_decimal40": ( lambda: algorithmic.multiplication_generator(10, 40, 100000), lambda: algorithmic.multiplication_generator(10, 400, 10000)), - "algorithmic_reverse_nlplike_decimal8K": ( - lambda: algorithmic.reverse_generator_nlplike(8000, 70, 100000, - 10, 1.300), - lambda: algorithmic.reverse_generator_nlplike(8000, 700, 10000, - 10, 1.300)), - "algorithmic_reverse_nlplike_decimal32K": ( - lambda: algorithmic.reverse_generator_nlplike(32000, 70, 100000, - 10, 1.050), - lambda: algorithmic.reverse_generator_nlplike(32000, 700, 10000, - 10, 1.050)), "algorithmic_algebra_inverse": ( lambda: algorithmic_math.algebra_inverse(26, 0, 2, 100000), lambda: algorithmic_math.algebra_inverse(26, 3, 3, 10000)), @@ -318,12 +307,6 @@ _SUPPORTED_PROBLEM_GENERATORS = { 626, vocab_filename="tokens.vocab.%d" % 2**15, vocab_size=2**15)), - "lmptb_10k": ( - lambda: ptb.train_generator( - FLAGS.tmp_dir, - FLAGS.data_dir, - False), - lambda: ptb.valid_generator()), } # pylint: enable=g-long-lambda diff --git a/tensor2tensor/bin/t2t-trainer b/tensor2tensor/bin/t2t-trainer old mode 100755 new mode 100644 diff --git a/tensor2tensor/data_generators/algorithmic.py b/tensor2tensor/data_generators/algorithmic.py index 9bbb4bc4b..4c25e986e 100644 --- a/tensor2tensor/data_generators/algorithmic.py +++ b/tensor2tensor/data_generators/algorithmic.py @@ -93,75 +93,6 @@ def reverse_generator(nbr_symbols, max_length, nbr_cases): "targets": list(reversed(inputs)) + [1]} # [1] for EOS -def zipf_distribution(nbr_symbols, alpha): - """Helper function: Create a Zipf distribution. - - Args: - nbr_symbols: number of symbols to use in the distribution. - alpha: float, Zipf's Law Distribution parameter. Default = 1.5. - Usually for modelling natural text distribution is in - the range [1.1-1.6]. - - Return: - distr_map: list of float, Zipf's distribution over nbr_symbols. - - """ - tmp = np.power(np.arange(1, nbr_symbols+1), -alpha) - zeta = np.r_[0.0, np.cumsum(tmp)] - return [x / zeta[-1] for x in zeta] - - -def zipf_random_sample(distr_map, sample_len): - """Helper function: Generate a random Zipf sample of given lenght. - - Args: - distr_map: list of float, Zipf's distribution over nbr_symbols. - sample_len: integer, length of sequence to generate. - - Return: - sample: list of integer, Zipf's random sample over nbr_symbols. - - """ - u = np.random.random(sample_len) - # Random produces values in range [0.0,1.0); even if it is almost - # improbable(but possible) that it can generate a clear 0.000..0, - # we have made a sanity check to overcome this issue. On the other hand, - # t+1 is enough from saving us to generate PAD(0) and EOS(1) which are - # reservated symbols. - return [t+1 if t > 0 else t+2 for t in np.searchsorted(distr_map, u)] - - -def reverse_generator_nlplike(nbr_symbols, max_length, nbr_cases, \ - scale_std_dev=100, alpha=1.5): - """Generator for the reversing nlp-like task on sequences of symbols. - - The length of the sequence is drawn from a Gaussian(Normal) distribution - at random from [1, max_length] and with std deviation of 1%, - then symbols are drawn from Zipf's law at random from [2, nbr_symbols] until - nbr_cases sequences have been produced. - - Args: - max_length: integer, maximum length of sequences to generate. - nbr_cases: the number of cases to generate. - scale_std_dev: float, Normal distribution's standard deviation scale factor - used to draw the lenght of sequence. Default = 1% of the max_length. - alpha: float, Zipf's Law Distribution parameter. Default = 1.5. - Usually for modelling natural text distribution is in - the range [1.1-1.6]. - - Yields: - A dictionary {"inputs": input-list, "targets": target-list} where - target-list is input-list reversed. - """ - std_dev = max_length / scale_std_dev - distr_map = zipf_distribution(nbr_symbols, alpha) - for _ in xrange(nbr_cases): - l = int(abs(np.random.normal(loc=max_length/2, scale=std_dev)) + 1) - inputs = zipf_random_sample(distr_map, l) - yield {"inputs": inputs, - "targets": list(reversed(inputs)) + [1]} # [1] for EOS - - def lower_endian_to_number(l, base): """Helper function: convert a list of digits in the given base to a number.""" return sum([d * (base**i) for i, d in enumerate(l)]) diff --git a/tensor2tensor/data_generators/algorithmic_test.py b/tensor2tensor/data_generators/algorithmic_test.py index a85122436..a5fbfae2d 100644 --- a/tensor2tensor/data_generators/algorithmic_test.py +++ b/tensor2tensor/data_generators/algorithmic_test.py @@ -41,22 +41,6 @@ def testReverseGenerator(self): self.assertEqual(list(reversed(d["inputs"])) + [1], d["targets"]) self.assertEqual(counter, 10) - def testZipfDistribution(self): - # Following Zipf's Law with alpha equals 1: the first in rank is two times - # more probable/frequent that the second in rank, three times more prob/freq - # that the third in rank and so on. - d = algorithmic.zipf_distribution(10, 1.0001) - for i in xrange(len(d[1:])-1): - self.assertEqual("%.4f" % (abs(d[i+1]-d[i+2])*(i+2)), \ - "%.4f" % d[1]) - - def testReverseGeneratorNlpLike(self): - counter = 0 - for d in algorithmic.reverse_generator_nlplike(3, 8, 10): - counter += 1 - self.assertEqual(list(reversed(d["inputs"])) + [1], d["targets"]) - self.assertEqual(counter, 10) - def testLowerEndianToNumber(self): self.assertEqual(algorithmic.lower_endian_to_number([0], 2), 0) self.assertEqual(algorithmic.lower_endian_to_number([0], 7), 0) diff --git a/tensor2tensor/data_generators/generator_utils.py b/tensor2tensor/data_generators/generator_utils.py old mode 100755 new mode 100644 index 0d9b16289..fb85d99c3 --- a/tensor2tensor/data_generators/generator_utils.py +++ b/tensor2tensor/data_generators/generator_utils.py @@ -242,12 +242,9 @@ def get_or_generate_vocab(tmp_dir, vocab_filename, vocab_size): # For some datasets a second extraction is necessary. if ".gz" in lang_file: + tf.logging.info("Unpacking subdirectory %s" % filepath) new_filepath = os.path.join(tmp_dir, lang_file[:-3]) - if os.path.exists(new_filepath): - tf.logging.info("Subdirectory %s already exists, skipping unpacking" % filepath) - else: - tf.logging.info("Unpacking subdirectory %s" % filepath) - gunzip_file(filepath, new_filepath) + gunzip_file(filepath, new_filepath) filepath = new_filepath # Use Tokenizer to count the word occurrences. @@ -261,8 +258,7 @@ def get_or_generate_vocab(tmp_dir, vocab_filename, vocab_size): _ = tokenizer.encode(line) vocab = SubwordTextEncoder.build_to_target_size( - vocab_size, tokenizer.token_counts, 1, 1e3) - vocab.store_to_file(vocab_filepath) + vocab_size, tokenizer.token_counts, vocab_filepath, 1, 1e3) return vocab diff --git a/tensor2tensor/data_generators/problem_hparams.py b/tensor2tensor/data_generators/problem_hparams.py index 2268c3ec1..55115b841 100644 --- a/tensor2tensor/data_generators/problem_hparams.py +++ b/tensor2tensor/data_generators/problem_hparams.py @@ -340,24 +340,6 @@ def lm1b_16k(model_hparams): p.target_space_id = 3 return p -def lmptb_10k(model_hparams): - """Penn Tree Bank language-modeling benchmark, 10k token vocabulary.""" - p = default_problem_hparams() - p.input_modality = {} - p.target_modality = (registry.Modalities.SYMBOL, 10000) - - vocabulary = text_encoder.TokenTextEncoder( - os.path.join(model_hparams.data_dir, - "lmptb_10k.vocab")) - - p.vocabulary = { - "inputs": vocabulary, - "targets": vocabulary, - } - - p.input_space_id = 3 - p.target_space_id = 3 - return p def lm1b_64k(model_hparams): """Billion-word language-modeling benchmark, 64k subtoken vocabulary.""" @@ -374,6 +356,7 @@ def lm1b_64k(model_hparams): p.target_space_id = 3 return p + def wmt_enfr_characters(unused_model_hparams): """English to French translation benchmark.""" p = default_problem_hparams() @@ -682,8 +665,6 @@ def image_mscoco_tokens(model_hparams, vocab_count): "algorithmic_multiplication_decimal40": lambda p: algorithmic(12, p), "algorithmic_reverse_binary40": lambda p: algorithmic(4, p), "algorithmic_reverse_decimal40": lambda p: algorithmic(12, p), - "algorithmic_reverse_nlplike_decimal8K": lambda p: algorithmic(8002, p), - "algorithmic_reverse_nlplike_decimal32K": lambda p: algorithmic(32002, p), "algorithmic_shift_decimal40": lambda p: algorithmic(22, p), "audio_timit_characters_tune": audio_timit_characters, "audio_timit_characters_test": audio_timit_characters, @@ -695,7 +676,6 @@ def image_mscoco_tokens(model_hparams, vocab_count): "audio_wsj_tokens_8k_test": lambda p: audio_wsj_tokens(p, 2**13), "lm1b_16k": lm1b_16k, "lm1b_64k": lm1b_64k, - "lmptb_10k": lmptb_10k, "wmt_parsing_characters": wmt_parsing_characters, "wmt_parsing_tokens_8k": lambda p: wmt_parsing_tokens(p, 2**13), "wsj_parsing_tokens_16k": lambda p: wsj_parsing_tokens(p, 2**14, 2**9), diff --git a/tensor2tensor/data_generators/ptb.py b/tensor2tensor/data_generators/ptb.py deleted file mode 100644 index 4bb0b1d2a..000000000 --- a/tensor2tensor/data_generators/ptb.py +++ /dev/null @@ -1,158 +0,0 @@ -# Copyright 2017 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Data generators for PTB data-sets.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -import sys -import tarfile -import collections - -# Dependency imports - -from tensor2tensor.data_generators import generator_utils -from tensor2tensor.data_generators import text_encoder - -import tensorflow as tf - - -EOS = text_encoder.EOS -PTB_URL = "http://www.fit.vutbr.cz/~imikolov/rnnlm/simple-examples.tgz" - -def _read_words(filename): - """Reads words from a file. - It returns a list of words without '\n' - Originally from: - https://github.com/tensorflow/models/blob/master/tutorials/rnn/ptb/reader.py - """ - with tf.gfile.GFile(filename, "r") as f: - if sys.version_info[0] >= 3: - return f.read().replace("\n", " ").split() - else: - return f.read().decode("utf-8").replace("\n", " ").split() - - - -def _build_vocab(filename, vocab_path, vocab_size): - """Reads a file a build a vocabulary of `vocab_size` words to - as a list of words to `filename` - The vocabulary is sorted by occurence count and has one word per line - Originally from: - https://github.com/tensorflow/models/blob/master/tutorials/rnn/ptb/reader.py - """ - data = _read_words(filename) - - counter = collections.Counter(data) - count_pairs = sorted(counter.items(), key=lambda x: (-x[1], x[0])) - words, _ = list(zip(*count_pairs)) - words = words[:vocab_size] - - with open(vocab_path, 'w') as f: - f.write("\n".join(words)) - -def _get_token_encoder(vocab_dir, filename): - """Reads from file and returns a `TokenTextEncoder` based on the vocabulary - """ - vocab_name = "lmptb_10k.vocab" - vocab_path = os.path.join(vocab_dir, vocab_name) - - - _build_vocab(filename, vocab_path, 10000) - - return text_encoder.TokenTextEncoder(vocab_path) - - -class PTB(object): - def __init__(self, tmp_dir, data_dir, char=False): - assert not char, "char mode for PTB is not yet implemented" - self.char = char - self.data_dir = data_dir - #self.num_steps = num_steps - - url = PTB_URL - - filename = os.path.basename(url) - compressed_filepath = generator_utils.maybe_download(tmp_dir, - filename, - url) - - ptb_files = [] - ptb_char_files = [] - with tarfile.open(compressed_filepath, "r:gz") as tgz: - files = [] - # selecting only relevant files - for m in tgz.getmembers(): - if "ptb" in m.name and ".txt" in m.name: - if "char" in m.name: - ptb_char_files += [m.name] - else: - ptb_files += [m.name] - files += [m] - - tgz.extractall(tmp_dir, members=files) - - if self.char: - files = ptb_char_files - else: - files = ptb_files - files = files - - for filename in files: - if "train" in filename: - self.train = os.path.join(tmp_dir, filename) - elif "valid" in filename: - self.valid = os.path.join(tmp_dir, filename) - - assert hasattr(self, "train"), "Training file not found" - assert hasattr(self, "valid"), "Validation file not found" - - self.encoder = _get_token_encoder(data_dir, self.train) - - def train_generator(self): - return self._generator(self.train) - - def valid_generator(self): - return self._generator(self.valid) - - def _generator(self, filename): - with tf.gfile.GFile(filename, "r") as f: - for line in f: - line = " ".join(line.replace('\n', EOS).split()) - tok = self.encoder.encode(line) - x = tok[:-1] - y = tok[1:] - - yield {"inputs": x, - "targets": y} - -# Using a object "singleton" -# `train_generator` must be called before -# `valid_generator` in order to work -_ptb = {} -def train_generator(*args, **kwargs): - """The train data generator to be called - """ - global _ptb - _ptb = PTB(*args, **kwargs) - return _ptb.train_generator() - -def valid_generator(): - """Validation (aka. dev) data generator - """ - global _ptb - return _ptb.valid_generator() diff --git a/tensor2tensor/data_generators/snli.py b/tensor2tensor/data_generators/snli.py old mode 100755 new mode 100644 index 8218bc253..5613ece4d --- a/tensor2tensor/data_generators/snli.py +++ b/tensor2tensor/data_generators/snli.py @@ -136,14 +136,14 @@ def _get_or_generate_vocab(tmp_dir, vocab_filename, vocab_size): if tf.gfile.Exists(vocab_filepath): gs = text_encoder.SubwordTextEncoder(vocab_filepath) return gs - example_file = os.path.join(tmp_dir, _EXAMPLES_FILE) - gs = text_encoder.SubwordTextEncoder() - token_counts = text_encoder.SubwordTextEncoder.get_token_counts( - example_file, corpus_max_lines=1000000) - gs = gs.build_to_target_size( - vocab_size, token_counts, min_val=1, max_val=1e3) - gs.store_to_file(vocab_filepath) - return gs + else: + example_file = os.path.join(tmp_dir, _EXAMPLES_FILE) + gs = text_encoder.SubwordTextEncoder() + token_counts = text_encoder.SubwordTextEncoder.get_token_counts( + example_file, corpus_max_lines=1000000) + gs = gs.build_to_target_size( + vocab_size, token_counts, vocab_filepath, min_val=1, max_val=1e3) + return gs def snli_token_generator(tmp_dir, train, vocab_size): diff --git a/tensor2tensor/data_generators/text_encoder.py b/tensor2tensor/data_generators/text_encoder.py old mode 100755 new mode 100644 index 2f86fa2fa..a219a6b8d --- a/tensor2tensor/data_generators/text_encoder.py +++ b/tensor2tensor/data_generators/text_encoder.py @@ -130,7 +130,6 @@ def __init__(self, vocab_filename, reverse=False, num_reserved_ids=2): def encode(self, sentence): """Converts a space-separated string of tokens to a list of ids.""" ret = [self._token_to_id[tok] for tok in sentence.strip().split()] - return ret[::-1] if self._reverse else ret def decode(self, ids): @@ -175,9 +174,9 @@ class SubwordTextEncoder(TextEncoder): """ def __init__(self, filename=None, num_reserved_ids=2): + """Read from a file.""" self._tokenizer = tokenizer.Tokenizer() if filename is not None: - # Read from a file. self._load_from_file(filename) super(SubwordTextEncoder, self).__init__(num_reserved_ids=num_reserved_ids) @@ -235,13 +234,14 @@ def _subtokens_to_tokens(self, subtokens): def subtoken_to_subtoken_string(self, subtoken): """Subtoken_String (string) corresponding to the given subtoken (id).""" - if 0 <= subtoken < self.vocab_size: - subtoken_string = self._all_subtoken_strings[subtoken] - if subtoken_string: - return subtoken_string - if 0 <= subtoken < self._num_reserved_ids: - return '%s_' % RESERVED_TOKENS[subtoken] - return 'ID%d_' % subtoken + if (subtoken >= 0 and subtoken < self.vocab_size and + self._all_subtoken_strings[subtoken]): + return self._all_subtoken_strings[subtoken] + else: + if 0 <= subtoken < self._num_reserved_ids: + return '%s_' % RESERVED_TOKENS[subtoken] + else: + return 'ID%d_' % subtoken def _escaped_token_to_subtokens(self, escaped_token): """Converts an escaped token string to a list of subtokens. @@ -261,32 +261,21 @@ def _escaped_token_to_subtokens(self, escaped_token): if subtoken != -1: break end -= 1 + ret.append(subtoken) if end > pos: - ret.append(subtoken) pos = end else: - # No subtoken in the vocabulary matches escaped_token[pos]. - # This can happen if the token contains a Unicode character - # that did not occur in the vocabulary training set. - # The id self.vocab_size - 1 is decoded as Unicode uFFFD, - # REPLACEMENT_CHARACTER. - ret.append(self.vocab_size - 1) - # Ensure that the outer loop continues + # This kinda should not happen, but it does. Cop out by skipping the + # nonexistent subtoken from the returned list. + # print("Unable to find subtoken in string '{0}'".format(escaped_token)) pos += 1 return ret - @classmethod - def alphabet(cls, token_counts): - """Return the set of Unicode characters that appear in the tokens""" - alphabet_set = set() - for token in six.iterkeys(token_counts): - alphabet_set |= set(token) - return alphabet_set - @classmethod def build_to_target_size(cls, target_size, token_counts, + store_filename, min_val, max_val, num_iterations=4): @@ -306,43 +295,43 @@ def build_to_target_size(cls, Returns: a SubwordTextEncoder instance. """ - - # Calculate the alphabet, i.e. the set of all Unicode characters - # that appear in the tokens - alphabet_set = cls.alphabet(token_counts) - tf.logging.info('Alphabet contains %d characters' % len(alphabet_set)) - - def bisect(min_val, max_val): - present_count = (max_val + min_val) // 2 - tf.logging.info('Trying min_count %d' % present_count) - subtokenizer = cls() - subtokenizer.build_from_token_counts(token_counts, alphabet_set, - present_count, num_iterations) - - if min_val >= max_val or subtokenizer.vocab_size == target_size: - return subtokenizer - if subtokenizer.vocab_size > target_size: - other_subtokenizer = bisect(present_count + 1, max_val) + present_count = (max_val + min_val) // 2 + tf.logging.info('Trying min_count %d' % present_count) + subtokenizer = cls() + subtokenizer.build_from_token_counts(token_counts, store_filename, + present_count, num_iterations) + + if min_val >= max_val or subtokenizer.vocab_size == target_size: + return subtokenizer + elif subtokenizer.vocab_size > target_size: + other_subtokenizer = cls.build_to_target_size( + target_size, token_counts, store_filename, present_count + 1, max_val, + num_iterations) + if (abs(other_subtokenizer.vocab_size - target_size) < + abs(subtokenizer.vocab_size - target_size)): + return other_subtokenizer else: - other_subtokenizer = bisect(min_val, present_count - 1) + return subtokenizer + else: + other_subtokenizer = cls.build_to_target_size( + target_size, token_counts, store_filename, min_val, present_count - 1, + num_iterations) if (abs(other_subtokenizer.vocab_size - target_size) < abs(subtokenizer.vocab_size - target_size)): return other_subtokenizer else: return subtokenizer - return bisect(min_val, max_val) - def build_from_token_counts(self, token_counts, - alphabet_set, + store_filename, min_count, num_iterations=4): """Train a SubwordTextEncoder based on a dictionary of word counts. Args: - token_counts: a dictionary of Unicode strings to int. - alphabet_set: the set of Unicode characters that appear in the tokens. + token_counts: a dictionary of string to int. + store_filename: a string - where to write the vocabulary. min_count: an integer - discard subtokens with lower counts. num_iterations: an integer. how many iterations of refinement. """ @@ -350,7 +339,6 @@ def build_from_token_counts(self, # then count the resulting potential subtokens, keeping the ones # with high enough counts for our new vocabulary. for i in xrange(num_iterations): - tf.logging.info("Iteration {0}".format(i)) counts = defaultdict(int) for token, count in six.iteritems(token_counts): escaped_token = self._escape_token(token) @@ -364,49 +352,39 @@ def build_from_token_counts(self, starts = [] for subtoken in subtokens: starts.append(pos) - pos += len(self._all_subtoken_strings[subtoken]) + pos += len(self.subtoken_to_subtoken_string(subtoken)) for start in starts: - for end in xrange(start + 1, len(escaped_token) + 1): + for end in xrange(start + 1, len(escaped_token)): subtoken_string = escaped_token[start:end] counts[subtoken_string] += count - # Array of sets of candidate subtoken strings, by length + # array of lists of candidate subtoken strings, by length len_to_subtoken_strings = [] for subtoken_string, count in six.iteritems(counts): lsub = len(subtoken_string) - # All subtoken strings of length 1 are automatically included - # later, so we don't need to consider them here - if count < min_count or lsub <= 1: + # all subtoken strings of length 1 are included regardless of count + if count < min_count and lsub != 1: continue - # Add this subtoken string to its length set while len(len_to_subtoken_strings) <= lsub: - len_to_subtoken_strings.append(set()) - len_to_subtoken_strings[lsub].add(subtoken_string) + len_to_subtoken_strings.append([]) + len_to_subtoken_strings[lsub].append(subtoken_string) new_subtoken_strings = [] # consider the candidates longest to shortest, so that if we accept # a longer subtoken string, we can decrement the counts of its prefixes. - for subtoken_strings in reversed(len_to_subtoken_strings[2:]): + for subtoken_strings in len_to_subtoken_strings[::-1]: for subtoken_string in subtoken_strings: count = counts[subtoken_string] - if count < min_count: + if count < min_count and len(subtoken_string) != 1: + # subtoken strings of length 1 are included regardless of count continue - new_subtoken_strings.append((count, subtoken_string)) + new_subtoken_strings.append((-count, subtoken_string)) for l in xrange(1, len(subtoken_string)): counts[subtoken_string[:l]] -= count - # Sort what we've got so far in decreasing order by count - new_subtoken_strings.sort(reverse = True) - # Add the alphabet set at the end of the vocabulary list - for char in alphabet_set: - new_subtoken_strings.append((0, char)) - # Also include the Unicode REPLACEMENT CHARACTER to use - # when encountering previously unseen Unicode characters - # in the input (i.e. input external to the tokenizer training - # set, which may thus contain characters not in the alphabet_set). - # This must be the last entry in the subtoken vocabulary list. - new_subtoken_strings.append((0, u'\uFFFD')) - # Now we have a candidate vocabulary - self._init_from_list([u''] * self._num_reserved_ids + + # Make sure to include the underscore as a subtoken string + new_subtoken_strings.append((0, '_')) + new_subtoken_strings.sort() + self._init_from_list([''] * self._num_reserved_ids + [p[1] for p in new_subtoken_strings]) - tf.logging.info('vocab_size = %d' % self.vocab_size) + print('vocab_size = %d' % self.vocab_size) original = 'This sentence was encoded by the SubwordTextEncoder.' encoded = self.encode(original) @@ -415,16 +393,16 @@ def build_from_token_counts(self, decoded = self.decode(encoded) print(decoded) assert decoded == original - - def dump(self): - """ Debugging dump of the current subtoken vocabulary """ - subtoken_strings = [(i, s) for s, i in six.iteritems(self._subtoken_string_to_id)] - print(u", ".join(u"{0} : '{1}'".format(i, s) for i, s in sorted(subtoken_strings))) + self._store_to_file(store_filename) def _init_from_list(self, subtoken_strings): """Initialize from a list of subtoken strings.""" self._all_subtoken_strings = subtoken_strings - self._subtoken_string_to_id = { s : i for i, s in enumerate(subtoken_strings) if s } + self._subtoken_string_to_id = {} + for i in xrange(len(subtoken_strings)): + subtoken_string = subtoken_strings[i] + if subtoken_string: + self._subtoken_string_to_id[subtoken_string] = i def _load_from_file(self, filename): """Load from a file.""" @@ -432,16 +410,16 @@ def _load_from_file(self, filename): with tf.gfile.Open(filename) as f: for line in f: if six.PY2: - subtoken_strings.append(line.strip()[1:-1].decode('utf-8')) + subtoken_strings.append(line.strip()[1:-1].decode('string-escape')) else: subtoken_strings.append(line.strip()[1:-1]) self._init_from_list(subtoken_strings) - def store_to_file(self, filename): + def _store_to_file(self, filename): with tf.gfile.Open(filename, 'w') as f: for subtoken_string in self._all_subtoken_strings: if six.PY2: - f.write('\'' + subtoken_string.encode('utf-8') + '\'\n') + f.write('\'' + subtoken_string.encode('string-escape') + '\'\n') else: f.write('\'' + subtoken_string + '\'\n') @@ -458,26 +436,43 @@ def _escape_token(self, token): def _unescape_token(self, escaped_token): r"""Remove '_' from end, then translate '\\'->'\' and '\u'->'_'. + TODO(noam): There must be some better way to do this with regexps. + Args: escaped_token: a string Returns: token: a string """ assert escaped_token[-1] == '_' - return escaped_token[:-1].replace('\\u', '_').replace('\\\\', '\\') + escaped_token = escaped_token[:-1] + if '\\' not in escaped_token: + return escaped_token + ret = '' + pos = 0 + while pos < len(escaped_token): + if escaped_token[pos] == '\\' and pos + 1 < len(escaped_token): + if escaped_token[pos + 1] == 'u': + ret += '_' + else: + ret += escaped_token[pos + 1] + pos += 1 + pos += 1 + return ret @classmethod def get_token_counts(cls, text_filepattern, corpus_max_lines): - """Read the corpus and compute a dictionary of token counts.""" + """Read the corpus and compute a dictionary of word counts.""" tok = tokenizer.Tokenizer() + token_counts = {} lines_read = 0 filenames = tf.gfile.Glob(text_filepattern) for text_filename in filenames: with tf.gfile.Open(text_filename) as f: for line in f: - # The tokenizer updates token_counts in encode() - tok.encode(line.strip()) + tokens = tok.encode(line.strip()) + for t in tokens: + token_counts[t] = token_counts.get(t, 0) + 1 lines_read += 1 if corpus_max_lines > 0 and lines_read > corpus_max_lines: - return tok.token_counts - return tok.token_counts + return token_counts + return token_counts diff --git a/tensor2tensor/data_generators/text_encoder_build_subword.py b/tensor2tensor/data_generators/text_encoder_build_subword.py old mode 100755 new mode 100644 index 71128fba0..ee71af9f6 --- a/tensor2tensor/data_generators/text_encoder_build_subword.py +++ b/tensor2tensor/data_generators/text_encoder_build_subword.py @@ -59,11 +59,8 @@ def main(unused_argv): raise ValueError('Must provide --corpus_filepattern') token_counts = text_encoder.SubwordTextEncoder.get_token_counts( FLAGS.corpus_filepattern, FLAGS.corpus_max_lines) - alphabet_set = SubwordTextEncoder.alphabet(token_counts) - gs.build_from_token_counts(token_counts, alphabet_set, - FLAGS.min_count, + gs.build_from_token_counts(token_counts, FLAGS.output_fn, FLAGS.min_count, FLAGS.num_iterations) - gs.store_to_file(FLAGS.output_fn) if __name__ == '__main__': diff --git a/tensor2tensor/data_generators/tokenizer.py b/tensor2tensor/data_generators/tokenizer.py old mode 100755 new mode 100644 index c75782707..3564aee2e --- a/tensor2tensor/data_generators/tokenizer.py +++ b/tensor2tensor/data_generators/tokenizer.py @@ -14,26 +14,24 @@ """A simple invertible tokenizer. -Converts from a raw string to a list of tokens (represented as -Unicode strings). +Converts from a raw string to a list of tokens (strings). This tokenizer has the following desirable properties: - It is invertible. - Punctuation is broken away from adjacent letters. - A single space between words does not produce an extra token. - - The full Unicode punctuation and separator set is recognized. The tokenization algorithm is as follows: -0. We classify the input characters into "word characters" and +0. We classify the 256 characters into "word characters" and "separator characters". Separator characters are defined as the union of - Unicode punctuation and separators/white space. All other characters are + string.punctuation and string.whitespace. All other characters are "word characters". 1. Split the text into a list of tokens, splitting at every boundary of a "word character" and a "separator character". This produces a list which - alternates between "word tokens" (strings of word codepoints) and - "separator tokens" (strings of of separator/punctuation codepoints). + alternates between "word tokens" (strings of word characters) and + "separator tokens" (strings of of separator characters). 2. Remove every token consisting of a single space, unless it is the very first or very last token in the list. These tokens are now @@ -49,35 +47,17 @@ from collections import defaultdict import string -import unicodedata -import sys -import re # Dependency imports -from six import PY2, unichr # pylint: disable=redefined-builtin from six.moves import xrange # pylint: disable=redefined-builtin -# Regular expression that matches Unicode whitespace characters -# (including ASCII whitespace) as defined in the Python run-time library -_RE_WHITESPACE = re.compile(r"^\s$", re.UNICODE) - -# Set of Unicode whitespace code points -UNICODE_WHITESPACE = set(unichr(i) for i in xrange(sys.maxunicode) - if _RE_WHITESPACE.match(unichr(i))) -# Set of Unicode punctuation code points -UNICODE_PUNCTUATION = set(unichr(i) for i in xrange(sys.maxunicode) - if unicodedata.category(unichr(i)).startswith("P")) -# Conversion between Unicode and UTF-8, if required (on Python2) -_decode_string = (lambda s: s.decode("utf-8")) if PY2 else (lambda s: s) -_encode_string = (lambda s: s.encode("utf-8")) if PY2 else (lambda s: s) - class Tokenizer(object): - """Vocab for breaking words into Unicode wordpieces. + """Vocab for breaking words into wordpieces. """ - _SEPARATOR_CHAR_SET = UNICODE_WHITESPACE | UNICODE_PUNCTUATION + _SEPARATOR_CHAR_SET = set(string.punctuation + string.whitespace) def __init__(self): self.token_counts = defaultdict(int) @@ -86,25 +66,23 @@ def encode(self, raw_text): """Encode a raw string as a list of tokens. Args: - raw_text: a (Python2 or Python3 native) string + raw_text: a string Returns: - a list of tokens as Unicode strings + a list of stirngs. """ if not raw_text: return [] ret = [] token_start = 0 - unicode_text = _decode_string(raw_text) - # Classify each character in the input string - is_sep = [c in self._SEPARATOR_CHAR_SET for c in unicode_text] - for pos in xrange(1, len(unicode_text)): - if is_sep[pos] != is_sep[pos - 1]: - token = unicode_text[token_start:pos] - if token != u" " or token_start == 0: + for pos in xrange(1, len(raw_text)): + if (self._is_separator_char(raw_text[pos]) != + self._is_separator_char(raw_text[pos - 1])): + token = raw_text[token_start:pos] + if token != " " or token_start == 0: ret.append(token) self.token_counts[token] += 1 token_start = pos - final_token = unicode_text[token_start:] + final_token = raw_text[token_start:] ret.append(final_token) self.token_counts[final_token] += 1 return ret @@ -113,15 +91,20 @@ def decode(self, tokens): """Decode a list of tokens to a string. Args: - tokens: a list of Unicode strings + tokens: a list of stirngs Returns: - a (Python2 or Python3 native) string + a string. """ - ret = u"" - is_word = [t[0] not in self._SEPARATOR_CHAR_SET for t in tokens] + ret = "" for i, token in enumerate(tokens): - if i > 0 and is_word[i - 1] and is_word[i]: - ret += u" " + if (i > 0 and self._is_word_char(tokens[i - 1][0]) and + self._is_word_char(token[0])): + ret += " " ret += token - return _encode_string(ret) + return ret + + def _is_separator_char(self, c): + return c in self._SEPARATOR_CHAR_SET + def _is_word_char(self, c): + return c not in self._SEPARATOR_CHAR_SET diff --git a/tensor2tensor/utils/t2t_model.py b/tensor2tensor/utils/t2t_model.py index 3ab97238b..4d7ccd771 100644 --- a/tensor2tensor/utils/t2t_model.py +++ b/tensor2tensor/utils/t2t_model.py @@ -196,10 +196,7 @@ def symbols_to_logits_fn(ids): if last_position_only: return tf.squeeze(logits, axis=[1, 2, 3]) current_output_position = tf.shape(ids)[1] - 1 # -1 due to the pad above. - if current_output_position.shape.ndims >= 1: - logits = logits[:, current_output_position, :, :] - else: - logits = logits[:, -1 , :, :] + logits = logits[:, current_output_position, :, :] return tf.squeeze(logits, axis=[1, 2]) batch_size = tf.shape(features["inputs"])[0] @@ -272,11 +269,7 @@ def infer_step(recent_output, _): if last_position_only: cur_sample = samples[:, -1, :, :] else: - #Avoid the out of index Error - if tf.shape(recent_output).shape.ndims >= 2: - cur_sample = samples[:, tf.shape(recent_output)[1], :, :] - else: - cur_sample = samples[:, -1, :, :] + cur_sample = samples[:, tf.shape(recent_output)[1], :, :] cur_sample = tf.to_int64(tf.expand_dims(cur_sample, axis=1)) samples = tf.concat([recent_output, cur_sample], axis=1) samples.set_shape([None, None, None, 1]) From a83ef29349bf27e53b2c54be8c05006915049700 Mon Sep 17 00:00:00 2001 From: Niki Parmar Date: Tue, 27 Jun 2017 14:07:55 -0700 Subject: [PATCH 0058/4095] Change blue metric name, better docs PiperOrigin-RevId: 160323679 --- tensor2tensor/utils/bleu_hook.py | 17 ++++++++++++++--- tensor2tensor/utils/metrics.py | 2 +- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/tensor2tensor/utils/bleu_hook.py b/tensor2tensor/utils/bleu_hook.py index eb8749b3f..012215cff 100644 --- a/tensor2tensor/utils/bleu_hook.py +++ b/tensor2tensor/utils/bleu_hook.py @@ -111,9 +111,20 @@ def compute_bleu(reference_corpus, return np.float32(bleu) -def padded_bleu_score(predictions, - labels, **unused_kwargs): - """Bleu score computation between labels and predictions on non-0s.""" +def bleu_score(predictions, labels, **unused_kwargs): + """BLEU score computation between labels and predictions. + + An approximate BLEU scoring method since we do not glue word pieces or + decode the ids and tokenize the output. By default, we use ngram order of 4 + and use brevity penalty. Also, this does not have beam search. + + Args: + predictions: tensor, model predicitons + labels: tensor, gold output. + + Returns: + bleu: int, approx bleu score + """ outputs = tf.to_int32(tf.argmax(predictions, axis=-1)) # Convert the outputs and labels to a [batch_size, input_length] tensor. outputs = tf.squeeze(outputs) diff --git a/tensor2tensor/utils/metrics.py b/tensor2tensor/utils/metrics.py index 10c384af7..f64f9d290 100644 --- a/tensor2tensor/utils/metrics.py +++ b/tensor2tensor/utils/metrics.py @@ -142,7 +142,7 @@ def global_fn(predictions, labels, weights): # TODO(nikip): Extend this to support use of custom metrics for problems. for problem in problems: if "wmt" in problem: - metrics_list.append(("bleu_score", bleu_hook.padded_bleu_score)) + metrics_list.append(("approx_bleu_score", bleu_hook.padded_bleu_score)) for metric in metrics_list: append_metric_fns(metric, eval_metrics) From 877ba582ff8ab20fd6afbf3aa74d866f8f4e7e62 Mon Sep 17 00:00:00 2001 From: Lukasz Kaiser Date: Tue, 27 Jun 2017 15:59:36 -0700 Subject: [PATCH 0059/4095] Small training cleanups and bluenet work. PiperOrigin-RevId: 160339931 --- tensor2tensor/models/bluenet.py | 173 ++++++++++++++------- tensor2tensor/models/bluenet_test.py | 1 + tensor2tensor/models/common_layers.py | 5 +- tensor2tensor/models/common_layers_test.py | 8 +- tensor2tensor/utils/trainer_utils.py | 31 ++-- 5 files changed, 132 insertions(+), 86 deletions(-) diff --git a/tensor2tensor/models/bluenet.py b/tensor2tensor/models/bluenet.py index bbcf392aa..efa46cb59 100644 --- a/tensor2tensor/models/bluenet.py +++ b/tensor2tensor/models/bluenet.py @@ -30,86 +30,145 @@ import tensorflow as tf -def residual_module(x, hparams, n, sep): - """A stack of convolution blocks with residual connection.""" - k = (hparams.kernel_height, hparams.kernel_width) - dilations_and_kernels = [((1, 1), k) for _ in xrange(n)] - with tf.variable_scope("residual_module%d_sep%d" % (n, sep)): - y = common_layers.subseparable_conv_block( - x, - hparams.hidden_size, - dilations_and_kernels, - padding="SAME", - separability=sep, - name="block") - x = common_layers.layer_norm(x + y, hparams.hidden_size, name="lnorm") - return tf.nn.dropout(x, 1.0 - hparams.dropout) +def conv_module(kw, kh, sep, div): + def convfn(x, hparams): + return common_layers.subseparable_conv( + x, hparams.hidden_size // div, (kw, kh), + padding="SAME", separability=sep, + name="conv_%d%d_sep%d_div%d" % (kw, kh, sep, div)) + return convfn -def residual_module1(x, hparams): - return residual_module(x, hparams, 1, 1) +def layernorm_module(x, hparams): + return common_layers.layer_norm(x, hparams.hidden_size, name="layer_norm") -def residual_module1_sep(x, hparams): - return residual_module(x, hparams, 1, 0) - - -def residual_module2(x, hparams): - return residual_module(x, hparams, 2, 1) - - -def residual_module2_sep(x, hparams): - return residual_module(x, hparams, 2, 0) +def noamnorm_module(x, hparams): + del hparams # Unused. + return common_layers.noam_norm(x) -def residual_module3(x, hparams): - return residual_module(x, hparams, 3, 1) +def identity_module(x, hparams): + del hparams # Unused. + return x -def residual_module3_sep(x, hparams): - return residual_module(x, hparams, 3, 0) +def first_binary_module(x, y, hparams): + del y, hparams # Unused. + return x -def norm_module(x, hparams): - return common_layers.layer_norm(x, hparams.hidden_size, name="norm_module") +def second_binary_module(x, y, hparams): + del x, hparams # Unused. + return y -def identity_module(x, hparams): +def sum_binary_module(x, y, hparams): del hparams # Unused. - return x + return x + y -def run_modules(blocks, cur, hparams, dp): - """Run blocks in parallel using dp as data_parallelism.""" - assert len(blocks) % dp.n == 0 - res = [] - for i in xrange(len(blocks) // dp.n): - res.extend(dp(blocks[i * dp.n:(i + 1) * dp.n], cur, hparams)) - return res +def shakeshake_binary_module(x, y, hparams): + del hparams # Unused. + return common_layers.shakeshake2(x, y) + + +def run_binary_modules(modules, cur1, cur2, hparams): + """Run binary modules.""" + selection_var = tf.get_variable("selection", [len(modules)], + initializer=tf.zeros_initializer()) + inv_t = 100.0 * common_layers.inverse_exp_decay(100000, min_value=0.01) + selected_weights = tf.nn.softmax(selection_var * inv_t) + all_res = [modules[n](cur1, cur2, hparams) for n in xrange(len(modules))] + all_res = tf.concat([tf.expand_dims(r, axis=0) for r in all_res], axis=0) + res = all_res * tf.reshape(selected_weights, [-1, 1, 1, 1, 1]) + return tf.reduce_sum(res, axis=0) + + +def run_unary_modules_basic(modules, cur, hparams): + """Run unary modules.""" + selection_var = tf.get_variable("selection", [len(modules)], + initializer=tf.zeros_initializer()) + inv_t = 100.0 * common_layers.inverse_exp_decay(100000, min_value=0.01) + selected_weights = tf.nn.softmax(selection_var * inv_t) + all_res = [modules[n](cur, hparams) for n in xrange(len(modules))] + all_res = tf.concat([tf.expand_dims(r, axis=0) for r in all_res], axis=0) + res = all_res * tf.reshape(selected_weights, [-1, 1, 1, 1, 1]) + return tf.reduce_sum(res, axis=0) + + +def run_unary_modules_sample(modules, cur, hparams, k): + """Run modules, sampling k.""" + selection_var = tf.get_variable("selection", [len(modules)], + initializer=tf.zeros_initializer()) + selection = tf.multinomial(tf.expand_dims(selection_var, axis=0), k) + selection = tf.squeeze(selection, axis=0) # [k] selected classes. + to_run = tf.one_hot(selection, len(modules)) # [k x nmodules] one-hot. + to_run = tf.reduce_sum(to_run, axis=0) # [nmodules], 0=not run, 1=run. + all_res = [tf.cond(tf.less(to_run[n], 0.1), + lambda: tf.zeros_like(cur), + lambda i=n: modules[i](cur, hparams)) + for n in xrange(len(modules))] + inv_t = 100.0 * common_layers.inverse_exp_decay(100000, min_value=0.01) + selected_weights = tf.nn.softmax(selection_var * inv_t - 1e9 * (1.0 - to_run)) + all_res = tf.concat([tf.expand_dims(r, axis=0) for r in all_res], axis=0) + res = all_res * tf.reshape(selected_weights, [-1, 1, 1, 1, 1]) + return tf.reduce_sum(res, axis=0) + + +def run_unary_modules(modules, cur, hparams): + if len(modules) < 5: + return run_unary_modules_basic(modules, cur, hparams) + return run_unary_modules_sample(modules, cur, hparams, 4) @registry.register_model class BlueNet(t2t_model.T2TModel): - def model_fn_body_sharded(self, sharded_features): - dp = self._data_parallelism - dp._reuse = False # pylint:disable=protected-access + def model_fn_body(self, features): hparams = self._hparams - blocks = [identity_module, norm_module, - residual_module1, residual_module1_sep, - residual_module2, residual_module2_sep, - residual_module3, residual_module3_sep] - inputs = sharded_features["inputs"] - - cur = tf.concat(inputs, axis=0) - cur_shape = cur.get_shape() + conv_modules = [conv_module(kw, kw, sep, div) + for kw in [3, 5, 7] + for sep in [0, 1] + for div in [1]] + [identity_module] + activation_modules = [identity_module, + lambda x, _: tf.nn.relu(x), + lambda x, _: tf.nn.elu(x), + lambda x, _: tf.tanh(x)] + norm_modules = [identity_module, layernorm_module, noamnorm_module] + binary_modules = [first_binary_module, second_binary_module, + sum_binary_module, shakeshake_binary_module] + inputs = features["inputs"] + + def run_unary(x, name): + """A single step of unary modules.""" + with tf.variable_scope(name): + with tf.variable_scope("activation"): + x = run_unary_modules(activation_modules, x, hparams) + x.set_shape(cur_shape) + with tf.variable_scope("conv"): + x = run_unary_modules(conv_modules, x, hparams) + x.set_shape(cur_shape) + with tf.variable_scope("norm"): + x = run_unary_modules(norm_modules, x, hparams) + x.set_shape(cur_shape) + return x + + cur1, cur2 = inputs, inputs + cur_shape = inputs.get_shape() for i in xrange(hparams.num_hidden_layers): with tf.variable_scope("layer_%d" % i): - processed = run_modules(blocks, cur, hparams, dp) - cur = common_layers.shakeshake(processed) - cur.set_shape(cur_shape) + cur1 = run_unary(cur1, "unary1") + cur2 = run_unary(cur2, "unary2") + with tf.variable_scope("binary1"): + next1 = run_binary_modules(binary_modules, cur1, cur2, hparams) + next1.set_shape(cur_shape) + with tf.variable_scope("binary2"): + next2 = run_binary_modules(binary_modules, cur1, cur2, hparams) + next2.set_shape(cur_shape) + cur1, cur2 = next1, next2 - return list(tf.split(cur, len(inputs), axis=0)), 0.0 + return cur1 @registry.register_hparams @@ -117,7 +176,7 @@ def bluenet_base(): """Set of hyperparameters.""" hparams = common_hparams.basic_params1() hparams.batch_size = 4096 - hparams.hidden_size = 768 + hparams.hidden_size = 256 hparams.dropout = 0.2 hparams.symbol_dropout = 0.2 hparams.label_smoothing = 0.1 diff --git a/tensor2tensor/models/bluenet_test.py b/tensor2tensor/models/bluenet_test.py index a325e5a55..080c96a3f 100644 --- a/tensor2tensor/models/bluenet_test.py +++ b/tensor2tensor/models/bluenet_test.py @@ -38,6 +38,7 @@ def testBlueNet(self): p_hparams = problem_hparams.test_problem_hparams(hparams, vocab_size, vocab_size) with self.test_session() as session: + tf.train.get_or_create_global_step() features = { "inputs": tf.constant(x, dtype=tf.int32), "targets": tf.constant(y, dtype=tf.int32), diff --git a/tensor2tensor/models/common_layers.py b/tensor2tensor/models/common_layers.py index 078fcc5a3..3ef84f27c 100644 --- a/tensor2tensor/models/common_layers.py +++ b/tensor2tensor/models/common_layers.py @@ -292,9 +292,8 @@ def conv_internal(conv_fn, inputs, filters, kernel_size, **kwargs): padding = [[0, 0], [height_padding, 0], [width_padding, 0], [0, 0]] inputs = tf.pad(inputs, padding) kwargs["padding"] = "VALID" - force2d = False # Special argument we use to force 2d kernels (see below). - if "force2d" in kwargs: - force2d = kwargs["force2d"] + # Special argument we use to force 2d kernels (see below). + force2d = kwargs.get("force2d", True) def conv2d_kernel(kernel_size_arg, name_suffix): """Call conv2d but add suffix to name.""" diff --git a/tensor2tensor/models/common_layers_test.py b/tensor2tensor/models/common_layers_test.py index 3839b9d36..091f272d6 100644 --- a/tensor2tensor/models/common_layers_test.py +++ b/tensor2tensor/models/common_layers_test.py @@ -77,7 +77,7 @@ def testShakeShake(self): def testConv(self): x = np.random.rand(5, 7, 1, 11) with self.test_session() as session: - y = common_layers.conv(tf.constant(x, dtype=tf.float32), 13, (3, 3)) + y = common_layers.conv(tf.constant(x, dtype=tf.float32), 13, (3, 1)) session.run(tf.global_variables_initializer()) res = session.run(y) self.assertEqual(res.shape, (5, 5, 1, 13)) @@ -86,7 +86,7 @@ def testSeparableConv(self): x = np.random.rand(5, 7, 1, 11) with self.test_session() as session: y = common_layers.separable_conv( - tf.constant(x, dtype=tf.float32), 13, (3, 3)) + tf.constant(x, dtype=tf.float32), 13, (3, 1)) session.run(tf.global_variables_initializer()) res = session.run(y) self.assertEqual(res.shape, (5, 5, 1, 13)) @@ -97,7 +97,7 @@ def testSubSeparableConv(self): with self.test_session() as session: with tf.variable_scope("sep_%d" % sep): y = common_layers.subseparable_conv( - tf.constant(x, dtype=tf.float32), 16, (3, 3), separability=sep) + tf.constant(x, dtype=tf.float32), 16, (3, 1), separability=sep) session.run(tf.global_variables_initializer()) res = session.run(y) self.assertEqual(res.shape, (5, 5, 1, 16)) @@ -283,7 +283,7 @@ def testConvStride2MultiStep(self): tf.constant(x1, dtype=tf.float32), 4, 16) session.run(tf.global_variables_initializer()) actual = session.run(a[0]) - self.assertEqual(actual.shape, (5, 2, 1, 16)) + self.assertEqual(actual.shape, (5, 2, 0, 16)) def testDeconvStride2MultiStep(self): x1 = np.random.rand(5, 2, 1, 11) diff --git a/tensor2tensor/utils/trainer_utils.py b/tensor2tensor/utils/trainer_utils.py index 940927638..69e04a998 100644 --- a/tensor2tensor/utils/trainer_utils.py +++ b/tensor2tensor/utils/trainer_utils.py @@ -417,7 +417,8 @@ def nth_model(n): "problem_%d_steps" % n, initializer=0, trainable=False) o4 = problem_steps.assign_add(1) with tf.control_dependencies([o1, o2, o3, o4]): # Make sure the ops run. - total_loss = tf.identity(total_loss) + # Ensure the loss is a scalar here. + total_loss = tf.reshape(total_loss, [], name="total_loss_control_id") return [total_loss] + sharded_logits # Need to flatten for cond later. result_list = _cond_on_index(nth_model, features["problem_choice"], 0, @@ -472,15 +473,13 @@ def nth_model(n): tf.to_float(nth_steps) / (global_step + 1.0)) # Log trainable weights and add decay. - total_size, total_embedding, weight_decay_loss = 0, 0, 0.0 + total_size, weight_decay_loss = 0, 0.0 all_weights = {v.name: v for v in tf.trainable_variables()} for v_name in sorted(list(all_weights)): v = all_weights[v_name] v_size = int(np.prod(np.array(v.shape.as_list()))) tf.logging.info("Weight %s\tshape %s\tsize %d", v.name[:-2].ljust(80), str(v.shape).ljust(20), v_size) - if "embedding" in v_name: - total_embedding += v_size total_size += v_size if hparams.weight_decay > 0.0 and len(v.shape.as_list()) > 1: # Add weight regularization if set and the weight is not a bias (dim>1). @@ -497,10 +496,9 @@ def nth_model(n): with tf.control_dependencies([noise_op]): total_loss = tf.identity(total_loss) tf.logging.info("Total trainable variables size: %d", total_size) - tf.logging.info("Total embedding variables size: %d", total_embedding) - tf.logging.info("Total non-embedding variables size: %d", - total_size - total_embedding) - total_loss += weight_decay_loss * hparams.weight_decay + if hparams.weight_decay > 0.0: + total_loss += weight_decay_loss * hparams.weight_decay + total_loss = tf.identity(total_loss, name="total_loss") # Define the train_op for the TRAIN mode. opt = _ConditionalOptimizer(hparams.optimizer, learning_rate, hparams) @@ -1126,8 +1124,7 @@ def input_fn(): class _ConditionalOptimizer(tf.train.Optimizer): """Conditional optimizer.""" - def __init__(self, optimizer_name, lr, hparams, skip_condition_tensor=False): - self._skip_condition = skip_condition_tensor + def __init__(self, optimizer_name, lr, hparams): if optimizer_name == "Adam": # We change the default epsilon for Adam and re-scale lr. # Using LazyAdam as it's much faster for large vocabulary embeddings. @@ -1147,18 +1144,8 @@ def compute_gradients(self, loss, var_list, colocate_gradients_with_ops): loss, var_list, colocate_gradients_with_ops=colocate_gradients_with_ops) def apply_gradients(self, gradients, global_step=None, name=None): - - def opt_gradients(): - return self._opt.apply_gradients( - gradients, global_step=global_step, name=name) - - if self._skip_condition is False: - return opt_gradients() - return tf.cond( - self._skip_condition, - tf.no_op, - opt_gradients, - name="conditional_optimizer_gradients_skip_cond") + return self._opt.apply_gradients( + gradients, global_step=global_step, name=name) def _sqrt_decay(step): From f61ce538897c686b5ad01e441c2f567cd64ba964 Mon Sep 17 00:00:00 2001 From: Lukasz Kaiser Date: Tue, 27 Jun 2017 17:48:11 -0700 Subject: [PATCH 0060/4095] Corrections to make BLEU and bluenet run, debugging sharding on 1 GPU. PiperOrigin-RevId: 160352874 --- tensor2tensor/models/bluenet.py | 13 +++++++------ tensor2tensor/utils/metrics.py | 2 +- tensor2tensor/utils/trainer_utils.py | 5 +++++ 3 files changed, 13 insertions(+), 7 deletions(-) diff --git a/tensor2tensor/models/bluenet.py b/tensor2tensor/models/bluenet.py index efa46cb59..19bed2032 100644 --- a/tensor2tensor/models/bluenet.py +++ b/tensor2tensor/models/bluenet.py @@ -117,7 +117,7 @@ def run_unary_modules_sample(modules, cur, hparams, k): def run_unary_modules(modules, cur, hparams): - if len(modules) < 5: + if len(modules) < 8: return run_unary_modules_basic(modules, cur, hparams) return run_unary_modules_sample(modules, cur, hparams, 4) @@ -142,16 +142,17 @@ def model_fn_body(self, features): def run_unary(x, name): """A single step of unary modules.""" + x_shape = x.get_shape() with tf.variable_scope(name): + with tf.variable_scope("norm"): + x = run_unary_modules(norm_modules, x, hparams) + x.set_shape(x_shape) with tf.variable_scope("activation"): x = run_unary_modules(activation_modules, x, hparams) - x.set_shape(cur_shape) + x.set_shape(x_shape) with tf.variable_scope("conv"): x = run_unary_modules(conv_modules, x, hparams) - x.set_shape(cur_shape) - with tf.variable_scope("norm"): - x = run_unary_modules(norm_modules, x, hparams) - x.set_shape(cur_shape) + x.set_shape(x_shape) return x cur1, cur2 = inputs, inputs diff --git a/tensor2tensor/utils/metrics.py b/tensor2tensor/utils/metrics.py index f64f9d290..ecc02fd5e 100644 --- a/tensor2tensor/utils/metrics.py +++ b/tensor2tensor/utils/metrics.py @@ -142,7 +142,7 @@ def global_fn(predictions, labels, weights): # TODO(nikip): Extend this to support use of custom metrics for problems. for problem in problems: if "wmt" in problem: - metrics_list.append(("approx_bleu_score", bleu_hook.padded_bleu_score)) + metrics_list.append(("approx_bleu_score", bleu_hook.bleu_score)) for metric in metrics_list: append_metric_fns(metric, eval_metrics) diff --git a/tensor2tensor/utils/trainer_utils.py b/tensor2tensor/utils/trainer_utils.py index 69e04a998..caccbb44a 100644 --- a/tensor2tensor/utils/trainer_utils.py +++ b/tensor2tensor/utils/trainer_utils.py @@ -78,6 +78,9 @@ flags.DEFINE_string("master", "", "Address of TensorFlow master.") flags.DEFINE_string("schedule", "local_run", "Method of tf.contrib.learn.Experiment to run.") +flags.DEFINE_bool("locally_shard_to_cpu", False, + "Use CPU as a sharding device runnning locally. This allows " + "to test sharded model construction on a machine with 1 GPU.") flags.DEFINE_bool("daisy_chain_variables", True, "copy variables around in a daisy chain") flags.DEFINE_bool("sync", False, "Sync compute on PS.") @@ -1243,6 +1246,8 @@ def _replica_device_setter(worker_device): if FLAGS.schedule == "local_run": assert not FLAGS.sync datashard_devices = ["gpu:%d" % d for d in _gpu_order(FLAGS.worker_gpu)] + if FLAGS.locally_shard_to_cpu: + datashard_devices += ["cpu:0"] caching_devices = None elif FLAGS.sync: assert FLAGS.ps_replicas > 0 From 75f398d897a789fb58eaf383c56626b063fe2c01 Mon Sep 17 00:00:00 2001 From: Lukasz Kaiser Date: Thu, 29 Jun 2017 12:07:36 -0700 Subject: [PATCH 0061/4095] Correct decoding for class labels, add --local_eval_frequency. PiperOrigin-RevId: 160555605 --- README.md | 4 +- tensor2tensor/bin/t2t-datagen | 17 ++ tensor2tensor/data_generators/algorithmic.py | 70 ++++++ .../data_generators/algorithmic_test.py | 15 ++ .../data_generators/generator_utils.py | 11 +- .../data_generators/problem_hparams.py | 18 ++ tensor2tensor/data_generators/ptb.py | 149 ++++++++++++ tensor2tensor/data_generators/snli.py | 17 +- tensor2tensor/data_generators/text_encoder.py | 215 +++++++++--------- .../text_encoder_build_subword.py | 5 +- tensor2tensor/data_generators/tokenizer.py | 81 ++++--- .../data_generators/tokenizer_test.py | 9 +- tensor2tensor/utils/get_ende_bleu.sh | 23 ++ tensor2tensor/utils/t2t_model.py | 15 +- tensor2tensor/utils/trainer_utils.py | 10 +- 15 files changed, 498 insertions(+), 161 deletions(-) create mode 100644 tensor2tensor/data_generators/ptb.py create mode 100755 tensor2tensor/utils/get_ende_bleu.sh diff --git a/README.md b/README.md index 6932dab3a..9adca7f45 100644 --- a/README.md +++ b/README.md @@ -57,7 +57,7 @@ t2t-trainer --registry_help PROBLEM=wmt_ende_tokens_32k MODEL=transformer -HPARAMS=transformer_base +HPARAMS=transformer_base_single_gpu DATA_DIR=$HOME/t2t_data TMP_DIR=/tmp/t2t_datagen @@ -209,7 +209,7 @@ and hyperparameter set functions can compose other hyperparameter set functions. The **trainer** binary is the main entrypoint for training, evaluation, and inference. Users can easily switch between problems, models, and hyperparameter sets by using the `--model`, `--problems`, and `--hparams_set` flags. Specific -hyperparameters can be overriden with the `--hparams` flag. `--schedule` and +hyperparameters can be overridden with the `--hparams` flag. `--schedule` and related flags control local and distributed training/evaluation ([distributed training documentation](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/docs/distributed_training.md)). diff --git a/tensor2tensor/bin/t2t-datagen b/tensor2tensor/bin/t2t-datagen index cb8a77f0d..f45f63744 100644 --- a/tensor2tensor/bin/t2t-datagen +++ b/tensor2tensor/bin/t2t-datagen @@ -37,6 +37,7 @@ from tensor2tensor.data_generators import algorithmic_math from tensor2tensor.data_generators import audio from tensor2tensor.data_generators import generator_utils from tensor2tensor.data_generators import image +from tensor2tensor.data_generators import ptb from tensor2tensor.data_generators import snli from tensor2tensor.data_generators import wmt from tensor2tensor.data_generators import wsj_parsing @@ -86,6 +87,16 @@ _SUPPORTED_PROBLEM_GENERATORS = { "algorithmic_multiplication_decimal40": ( lambda: algorithmic.multiplication_generator(10, 40, 100000), lambda: algorithmic.multiplication_generator(10, 400, 10000)), + "algorithmic_reverse_nlplike_decimal8K": ( + lambda: algorithmic.reverse_generator_nlplike(8000, 70, 100000, + 10, 1.300), + lambda: algorithmic.reverse_generator_nlplike(8000, 700, 10000, + 10, 1.300)), + "algorithmic_reverse_nlplike_decimal32K": ( + lambda: algorithmic.reverse_generator_nlplike(32000, 70, 100000, + 10, 1.050), + lambda: algorithmic.reverse_generator_nlplike(32000, 700, 10000, + 10, 1.050)), "algorithmic_algebra_inverse": ( lambda: algorithmic_math.algebra_inverse(26, 0, 2, 100000), lambda: algorithmic_math.algebra_inverse(26, 3, 3, 10000)), @@ -307,6 +318,12 @@ _SUPPORTED_PROBLEM_GENERATORS = { 626, vocab_filename="tokens.vocab.%d" % 2**15, vocab_size=2**15)), + "lmptb_10k": ( + lambda: ptb.train_generator( + FLAGS.tmp_dir, + FLAGS.data_dir, + False), + ptb.valid_generator), } # pylint: enable=g-long-lambda diff --git a/tensor2tensor/data_generators/algorithmic.py b/tensor2tensor/data_generators/algorithmic.py index 4c25e986e..4cd14753b 100644 --- a/tensor2tensor/data_generators/algorithmic.py +++ b/tensor2tensor/data_generators/algorithmic.py @@ -93,6 +93,76 @@ def reverse_generator(nbr_symbols, max_length, nbr_cases): "targets": list(reversed(inputs)) + [1]} # [1] for EOS +def zipf_distribution(nbr_symbols, alpha): + """Helper function: Create a Zipf distribution. + + Args: + nbr_symbols: number of symbols to use in the distribution. + alpha: float, Zipf's Law Distribution parameter. Default = 1.5. + Usually for modelling natural text distribution is in + the range [1.1-1.6]. + + Returns: + distr_map: list of float, Zipf's distribution over nbr_symbols. + + """ + tmp = np.power(np.arange(1, nbr_symbols+1), -alpha) + zeta = np.r_[0.0, np.cumsum(tmp)] + return [x / zeta[-1] for x in zeta] + + +def zipf_random_sample(distr_map, sample_len): + """Helper function: Generate a random Zipf sample of given lenght. + + Args: + distr_map: list of float, Zipf's distribution over nbr_symbols. + sample_len: integer, length of sequence to generate. + + Returns: + sample: list of integer, Zipf's random sample over nbr_symbols. + + """ + u = np.random.random(sample_len) + # Random produces values in range [0.0,1.0); even if it is almost + # improbable(but possible) that it can generate a clear 0.000..0, + # we have made a sanity check to overcome this issue. On the other hand, + # t+1 is enough from saving us to generate PAD(0) and EOS(1) which are + # reservated symbols. + return [t+1 if t > 0 else t+2 for t in np.searchsorted(distr_map, u)] + + +def reverse_generator_nlplike(nbr_symbols, max_length, nbr_cases, + scale_std_dev=100, alpha=1.5): + """Generator for the reversing nlp-like task on sequences of symbols. + + The length of the sequence is drawn from a Gaussian(Normal) distribution + at random from [1, max_length] and with std deviation of 1%, + then symbols are drawn from Zipf's law at random from [2, nbr_symbols] until + nbr_cases sequences have been produced. + + Args: + nbr_symbols: integer, number of symbols. + max_length: integer, maximum length of sequences to generate. + nbr_cases: the number of cases to generate. + scale_std_dev: float, Normal distribution's standard deviation scale factor + used to draw the lenght of sequence. Default = 1% of the max_length. + alpha: float, Zipf's Law Distribution parameter. Default = 1.5. + Usually for modelling natural text distribution is in + the range [1.1-1.6]. + + Yields: + A dictionary {"inputs": input-list, "targets": target-list} where + target-list is input-list reversed. + """ + std_dev = max_length / scale_std_dev + distr_map = zipf_distribution(nbr_symbols, alpha) + for _ in xrange(nbr_cases): + l = int(abs(np.random.normal(loc=max_length/2, scale=std_dev)) + 1) + inputs = zipf_random_sample(distr_map, l) + yield {"inputs": inputs, + "targets": list(reversed(inputs)) + [1]} # [1] for EOS + + def lower_endian_to_number(l, base): """Helper function: convert a list of digits in the given base to a number.""" return sum([d * (base**i) for i, d in enumerate(l)]) diff --git a/tensor2tensor/data_generators/algorithmic_test.py b/tensor2tensor/data_generators/algorithmic_test.py index a5fbfae2d..70a5d68b8 100644 --- a/tensor2tensor/data_generators/algorithmic_test.py +++ b/tensor2tensor/data_generators/algorithmic_test.py @@ -41,6 +41,21 @@ def testReverseGenerator(self): self.assertEqual(list(reversed(d["inputs"])) + [1], d["targets"]) self.assertEqual(counter, 10) + def testZipfDistribution(self): + # Following Zipf's Law with alpha equals 1: the first in rank is two times + # more probable/frequent that the second in rank, three times more prob/freq + # that the third in rank and so on. + d = algorithmic.zipf_distribution(10, 1.0001) + for i in xrange(len(d[1:])-1): + self.assertEqual("%.4f" % (abs(d[i+1]-d[i+2])*(i+2)), "%.4f" % d[1]) + + def testReverseGeneratorNlpLike(self): + counter = 0 + for d in algorithmic.reverse_generator_nlplike(3, 8, 10): + counter += 1 + self.assertEqual(list(reversed(d["inputs"])) + [1], d["targets"]) + self.assertEqual(counter, 10) + def testLowerEndianToNumber(self): self.assertEqual(algorithmic.lower_endian_to_number([0], 2), 0) self.assertEqual(algorithmic.lower_endian_to_number([0], 7), 0) diff --git a/tensor2tensor/data_generators/generator_utils.py b/tensor2tensor/data_generators/generator_utils.py index fb85d99c3..8c2d75fbe 100644 --- a/tensor2tensor/data_generators/generator_utils.py +++ b/tensor2tensor/data_generators/generator_utils.py @@ -242,9 +242,13 @@ def get_or_generate_vocab(tmp_dir, vocab_filename, vocab_size): # For some datasets a second extraction is necessary. if ".gz" in lang_file: - tf.logging.info("Unpacking subdirectory %s" % filepath) new_filepath = os.path.join(tmp_dir, lang_file[:-3]) - gunzip_file(filepath, new_filepath) + if os.path.exists(new_filepath): + tf.logging.info("Subdirectory %s already exists, skipping unpacking" + % filepath) + else: + tf.logging.info("Unpacking subdirectory %s" % filepath) + gunzip_file(filepath, new_filepath) filepath = new_filepath # Use Tokenizer to count the word occurrences. @@ -258,7 +262,8 @@ def get_or_generate_vocab(tmp_dir, vocab_filename, vocab_size): _ = tokenizer.encode(line) vocab = SubwordTextEncoder.build_to_target_size( - vocab_size, tokenizer.token_counts, vocab_filepath, 1, 1e3) + vocab_size, tokenizer.token_counts, 1, 1e3) + vocab.store_to_file(vocab_filepath) return vocab diff --git a/tensor2tensor/data_generators/problem_hparams.py b/tensor2tensor/data_generators/problem_hparams.py index 55115b841..12d217bb0 100644 --- a/tensor2tensor/data_generators/problem_hparams.py +++ b/tensor2tensor/data_generators/problem_hparams.py @@ -357,6 +357,21 @@ def lm1b_64k(model_hparams): return p +def lmptb_10k(model_hparams): + """Penn Tree Bank language-modeling benchmark, 10k token vocabulary.""" + p = default_problem_hparams() + p.input_modality = {} + p.target_modality = (registry.Modalities.SYMBOL, 10000) + vocabulary = text_encoder.TokenTextEncoder( + os.path.join(model_hparams.data_dir, "lmptb_10k.vocab")) + p.vocabulary = { + "targets": vocabulary, + } + p.input_space_id = 3 + p.target_space_id = 3 + return p + + def wmt_enfr_characters(unused_model_hparams): """English to French translation benchmark.""" p = default_problem_hparams() @@ -665,6 +680,8 @@ def image_mscoco_tokens(model_hparams, vocab_count): "algorithmic_multiplication_decimal40": lambda p: algorithmic(12, p), "algorithmic_reverse_binary40": lambda p: algorithmic(4, p), "algorithmic_reverse_decimal40": lambda p: algorithmic(12, p), + "algorithmic_reverse_nlplike_decimal8K": lambda p: algorithmic(8002, p), + "algorithmic_reverse_nlplike_decimal32K": lambda p: algorithmic(32002, p), "algorithmic_shift_decimal40": lambda p: algorithmic(22, p), "audio_timit_characters_tune": audio_timit_characters, "audio_timit_characters_test": audio_timit_characters, @@ -676,6 +693,7 @@ def image_mscoco_tokens(model_hparams, vocab_count): "audio_wsj_tokens_8k_test": lambda p: audio_wsj_tokens(p, 2**13), "lm1b_16k": lm1b_16k, "lm1b_64k": lm1b_64k, + "lmptb_10k": lmptb_10k, "wmt_parsing_characters": wmt_parsing_characters, "wmt_parsing_tokens_8k": lambda p: wmt_parsing_tokens(p, 2**13), "wsj_parsing_tokens_16k": lambda p: wsj_parsing_tokens(p, 2**14, 2**9), diff --git a/tensor2tensor/data_generators/ptb.py b/tensor2tensor/data_generators/ptb.py new file mode 100644 index 000000000..d4cf42c88 --- /dev/null +++ b/tensor2tensor/data_generators/ptb.py @@ -0,0 +1,149 @@ +# Copyright 2017 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Data generators for PTB data-sets.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import collections +import os +import sys +import tarfile + +# Dependency imports + +from tensor2tensor.data_generators import generator_utils +from tensor2tensor.data_generators import text_encoder + +import tensorflow as tf + + +EOS = text_encoder.EOS +PTB_URL = "http://www.fit.vutbr.cz/~imikolov/rnnlm/simple-examples.tgz" + + +def _read_words(filename): + """Reads words from a file.""" + with tf.gfile.GFile(filename, "r") as f: + if sys.version_info[0] >= 3: + return f.read().replace("\n", " ").split() + else: + return f.read().decode("utf-8").replace("\n", " ").split() + + +def _build_vocab(filename, vocab_path, vocab_size): + """Reads a file to build a vocabulary of `vocab_size` most common words. + + The vocabulary is sorted by occurence count and has one word per line. + Originally from: + https://github.com/tensorflow/models/blob/master/tutorials/rnn/ptb/reader.py + + Args: + filename: file to read list of words from. + vocab_path: path where to save the vocabulary. + vocab_size: size of the vocablulary to generate. + """ + data = _read_words(filename) + counter = collections.Counter(data) + count_pairs = sorted(counter.items(), key=lambda x: (-x[1], x[0])) + words, _ = list(zip(*count_pairs)) + words = words[:vocab_size] + with open(vocab_path, "w") as f: + f.write("\n".join(words)) + + +def _get_token_encoder(vocab_dir, filename): + """Reads from file and returns a `TokenTextEncoder` for the vocabulary.""" + vocab_name = "lmptb_10k.vocab" + vocab_path = os.path.join(vocab_dir, vocab_name) + _build_vocab(filename, vocab_path, 10000) + return text_encoder.TokenTextEncoder(vocab_path) + + +class PTB(object): + """A class for generating PTB data.""" + + def __init__(self, tmp_dir, data_dir, char=False): + assert not char, "char mode for PTB is not yet implemented" + self.char = char + self.data_dir = data_dir + + url = PTB_URL + filename = os.path.basename(url) + compressed_filepath = generator_utils.maybe_download( + tmp_dir, filename, url) + ptb_files = [] + ptb_char_files = [] + with tarfile.open(compressed_filepath, "r:gz") as tgz: + files = [] + # Selecting only relevant files. + for m in tgz.getmembers(): + if "ptb" in m.name and ".txt" in m.name: + if "char" in m.name: + ptb_char_files += [m.name] + else: + ptb_files += [m.name] + files += [m] + + tgz.extractall(tmp_dir, members=files) + + if self.char: + files = ptb_char_files + else: + files = ptb_files + files = files + + for filename in files: + if "train" in filename: + self.train = os.path.join(tmp_dir, filename) + elif "valid" in filename: + self.valid = os.path.join(tmp_dir, filename) + + assert hasattr(self, "train"), "Training file not found" + assert hasattr(self, "valid"), "Validation file not found" + self.encoder = _get_token_encoder(data_dir, self.train) + + def train_generator(self): + return self._generator(self.train) + + def valid_generator(self): + return self._generator(self.valid) + + def _generator(self, filename): + with tf.gfile.GFile(filename, "r") as f: + for line in f: + line = " ".join(line.replace("\n", EOS).split()) + tok = self.encoder.encode(line) + yield {"inputs": tok[:-1], "targets": tok[1:]} + + +# Using a object "singleton" +# `train_generator` must be called before +# `valid_generator` in order to work +_ptb = {} + + +def train_generator(*args, **kwargs): + """The train data generator to be called.""" + global _ptb + _ptb = PTB(*args, **kwargs) + return _ptb.train_generator() + + +def valid_generator(): + """Validation (aka. dev) data generator.""" + global _ptb # pylint:disable=global-variable-not-assigned + return _ptb.valid_generator() diff --git a/tensor2tensor/data_generators/snli.py b/tensor2tensor/data_generators/snli.py index 5613ece4d..1d21d94ac 100644 --- a/tensor2tensor/data_generators/snli.py +++ b/tensor2tensor/data_generators/snli.py @@ -130,20 +130,21 @@ def _parse_dataset(file_path, tmp_dir, train): def _get_or_generate_vocab(tmp_dir, vocab_filename, vocab_size): + """Read or create vocabulary.""" vocab_filepath = os.path.join(tmp_dir, vocab_filename) print('Vocab file written to: ' + vocab_filepath) if tf.gfile.Exists(vocab_filepath): gs = text_encoder.SubwordTextEncoder(vocab_filepath) return gs - else: - example_file = os.path.join(tmp_dir, _EXAMPLES_FILE) - gs = text_encoder.SubwordTextEncoder() - token_counts = text_encoder.SubwordTextEncoder.get_token_counts( - example_file, corpus_max_lines=1000000) - gs = gs.build_to_target_size( - vocab_size, token_counts, vocab_filepath, min_val=1, max_val=1e3) - return gs + example_file = os.path.join(tmp_dir, _EXAMPLES_FILE) + gs = text_encoder.SubwordTextEncoder() + token_counts = text_encoder.SubwordTextEncoder.get_token_counts( + example_file, corpus_max_lines=1000000) + gs = gs.build_to_target_size( + vocab_size, token_counts, min_val=1, max_val=1e3) + gs.store_to_file(vocab_filepath) + return gs def snli_token_generator(tmp_dir, train, vocab_size): diff --git a/tensor2tensor/data_generators/text_encoder.py b/tensor2tensor/data_generators/text_encoder.py index a219a6b8d..1bf7539d3 100644 --- a/tensor2tensor/data_generators/text_encoder.py +++ b/tensor2tensor/data_generators/text_encoder.py @@ -34,13 +34,13 @@ import tensorflow as tf # Reserved tokens for things like padding and EOS symbols. -PAD = '' -EOS = '' +PAD = "" +EOS = "" RESERVED_TOKENS = [PAD, EOS] if six.PY2: RESERVED_TOKENS_BYTES = RESERVED_TOKENS else: - RESERVED_TOKENS_BYTES = [bytes(PAD, 'ascii'), bytes(EOS, 'ascii')] + RESERVED_TOKENS_BYTES = [bytes(PAD, "ascii"), bytes(EOS, "ascii")] class TextEncoder(object): @@ -82,7 +82,7 @@ def decode(self, ids): decoded_ids.append(RESERVED_TOKENS[int(id_)]) else: decoded_ids.append(id_ - self._num_reserved_ids) - return ' '.join([str(d) for d in decoded_ids]) + return " ".join([str(d) for d in decoded_ids]) @property def vocab_size(self): @@ -97,7 +97,7 @@ def encode(self, s): if six.PY2: return [ord(c) + numres for c in s] # Python3: explicitly convert to UTF-8 - return [c + numres for c in s.encode('utf-8')] + return [c + numres for c in s.encode("utf-8")] def decode(self, ids): numres = self._num_reserved_ids @@ -109,9 +109,9 @@ def decode(self, ids): else: decoded_ids.append(int2byte(id_ - numres)) if six.PY2: - return ''.join(decoded_ids) + return "".join(decoded_ids) # Python3: join byte arrays and then decode string - return b''.join(decoded_ids).decode('utf-8') + return b"".join(decoded_ids).decode("utf-8") @property def vocab_size(self): @@ -134,14 +134,14 @@ def encode(self, sentence): def decode(self, ids): seq = reversed(ids) if self._reverse else ids - return ' '.join([self._safe_id_to_token(i) for i in seq]) + return " ".join([self._safe_id_to_token(i) for i in seq]) @property def vocab_size(self): return len(self._id_to_token) def _safe_id_to_token(self, idx): - return self._id_to_token.get(idx, 'ID_%d' % idx) + return self._id_to_token.get(idx, "ID_%d" % idx) def _load_vocab_from_file(self, filename): """Load vocab from a file.""" @@ -174,7 +174,7 @@ class SubwordTextEncoder(TextEncoder): """ def __init__(self, filename=None, num_reserved_ids=2): - """Read from a file.""" + """Initialize and read from a file, if provided.""" self._tokenizer = tokenizer.Tokenizer() if filename is not None: self._load_from_file(filename) @@ -227,21 +227,20 @@ def _subtokens_to_tokens(self, subtokens): Returns: a list of strings. """ - concatenated = ''.join( + concatenated = "".join( [self.subtoken_to_subtoken_string(s) for s in subtokens]) - split = concatenated.split('_') - return [self._unescape_token(t + '_') for t in split if t] + split = concatenated.split("_") + return [self._unescape_token(t + "_") for t in split if t] def subtoken_to_subtoken_string(self, subtoken): """Subtoken_String (string) corresponding to the given subtoken (id).""" - if (subtoken >= 0 and subtoken < self.vocab_size and - self._all_subtoken_strings[subtoken]): - return self._all_subtoken_strings[subtoken] - else: - if 0 <= subtoken < self._num_reserved_ids: - return '%s_' % RESERVED_TOKENS[subtoken] - else: - return 'ID%d_' % subtoken + if 0 <= subtoken < self.vocab_size: + subtoken_string = self._all_subtoken_strings[subtoken] + if subtoken_string: + return subtoken_string + if 0 <= subtoken < self._num_reserved_ids: + return "%s_" % RESERVED_TOKENS[subtoken] + return "ID%d_" % subtoken def _escaped_token_to_subtokens(self, escaped_token): """Converts an escaped token string to a list of subtokens. @@ -261,21 +260,32 @@ def _escaped_token_to_subtokens(self, escaped_token): if subtoken != -1: break end -= 1 - ret.append(subtoken) if end > pos: + ret.append(subtoken) pos = end else: - # This kinda should not happen, but it does. Cop out by skipping the - # nonexistent subtoken from the returned list. - # print("Unable to find subtoken in string '{0}'".format(escaped_token)) + # No subtoken in the vocabulary matches escaped_token[pos]. + # This can happen if the token contains a Unicode character + # that did not occur in the vocabulary training set. + # The id self.vocab_size - 1 is decoded as Unicode uFFFD, + # REPLACEMENT_CHARACTER. + ret.append(self.vocab_size - 1) + # Ensure that the outer loop continues pos += 1 return ret + @classmethod + def alphabet(cls, token_counts): + """Return the set of Unicode characters that appear in the tokens.""" + alphabet_set = set() + for token in six.iterkeys(token_counts): + alphabet_set |= set(token) + return alphabet_set + @classmethod def build_to_target_size(cls, target_size, token_counts, - store_filename, min_val, max_val, num_iterations=4): @@ -287,7 +297,6 @@ def build_to_target_size(cls, Args: target_size: desired vocab_size to approximate. token_counts: a dictionary of string to int. - store_filename: a string - where to write the vocabulary. min_val: an integer - lower bound for `min_count`. max_val: an integer - upper bound for `min_count`. num_iterations: an integer. how many iterations of refinement. @@ -295,43 +304,40 @@ def build_to_target_size(cls, Returns: a SubwordTextEncoder instance. """ - present_count = (max_val + min_val) // 2 - tf.logging.info('Trying min_count %d' % present_count) - subtokenizer = cls() - subtokenizer.build_from_token_counts(token_counts, store_filename, - present_count, num_iterations) - - if min_val >= max_val or subtokenizer.vocab_size == target_size: - return subtokenizer - elif subtokenizer.vocab_size > target_size: - other_subtokenizer = cls.build_to_target_size( - target_size, token_counts, store_filename, present_count + 1, max_val, - num_iterations) - if (abs(other_subtokenizer.vocab_size - target_size) < - abs(subtokenizer.vocab_size - target_size)): - return other_subtokenizer - else: + # Calculate the alphabet, i.e. the set of all Unicode characters + # that appear in the tokens. + alphabet_set = cls.alphabet(token_counts) + tf.logging.info("Alphabet contains %d characters" % len(alphabet_set)) + + def bisect(min_val, max_val): + present_count = (max_val + min_val) // 2 + tf.logging.info("Trying min_count %d" % present_count) + subtokenizer = cls() + subtokenizer.build_from_token_counts(token_counts, alphabet_set, + present_count, num_iterations) + if min_val >= max_val or subtokenizer.vocab_size == target_size: return subtokenizer - else: - other_subtokenizer = cls.build_to_target_size( - target_size, token_counts, store_filename, min_val, present_count - 1, - num_iterations) - if (abs(other_subtokenizer.vocab_size - target_size) < - abs(subtokenizer.vocab_size - target_size)): - return other_subtokenizer + if subtokenizer.vocab_size > target_size: + other_subtokenizer = bisect(present_count + 1, max_val) else: + other_subtokenizer = bisect(min_val, present_count - 1) + if (abs(other_subtokenizer.vocab_size - target_size) < + abs(subtokenizer.vocab_size - target_size)): + return other_subtokenizer return subtokenizer + return bisect(min_val, max_val) + def build_from_token_counts(self, token_counts, - store_filename, + alphabet_set, min_count, num_iterations=4): """Train a SubwordTextEncoder based on a dictionary of word counts. Args: - token_counts: a dictionary of string to int. - store_filename: a string - where to write the vocabulary. + token_counts: a dictionary of Unicode strings to int. + alphabet_set: the set of Unicode characters that appear in the tokens. min_count: an integer - discard subtokens with lower counts. num_iterations: an integer. how many iterations of refinement. """ @@ -339,6 +345,7 @@ def build_from_token_counts(self, # then count the resulting potential subtokens, keeping the ones # with high enough counts for our new vocabulary. for i in xrange(num_iterations): + tf.logging.info("Iteration {0}".format(i)) counts = defaultdict(int) for token, count in six.iteritems(token_counts): escaped_token = self._escape_token(token) @@ -352,57 +359,70 @@ def build_from_token_counts(self, starts = [] for subtoken in subtokens: starts.append(pos) - pos += len(self.subtoken_to_subtoken_string(subtoken)) + pos += len(self._all_subtoken_strings[subtoken]) for start in starts: - for end in xrange(start + 1, len(escaped_token)): + for end in xrange(start + 1, len(escaped_token) + 1): subtoken_string = escaped_token[start:end] counts[subtoken_string] += count - # array of lists of candidate subtoken strings, by length + # Array of sets of candidate subtoken strings, by length len_to_subtoken_strings = [] for subtoken_string, count in six.iteritems(counts): lsub = len(subtoken_string) - # all subtoken strings of length 1 are included regardless of count - if count < min_count and lsub != 1: + # All subtoken strings of length 1 are automatically included + # later, so we don't need to consider them here + if count < min_count or lsub <= 1: continue + # Add this subtoken string to its length set while len(len_to_subtoken_strings) <= lsub: - len_to_subtoken_strings.append([]) - len_to_subtoken_strings[lsub].append(subtoken_string) + len_to_subtoken_strings.append(set()) + len_to_subtoken_strings[lsub].add(subtoken_string) new_subtoken_strings = [] # consider the candidates longest to shortest, so that if we accept # a longer subtoken string, we can decrement the counts of its prefixes. - for subtoken_strings in len_to_subtoken_strings[::-1]: + for subtoken_strings in reversed(len_to_subtoken_strings[2:]): for subtoken_string in subtoken_strings: count = counts[subtoken_string] - if count < min_count and len(subtoken_string) != 1: - # subtoken strings of length 1 are included regardless of count + if count < min_count: continue - new_subtoken_strings.append((-count, subtoken_string)) + new_subtoken_strings.append((count, subtoken_string)) for l in xrange(1, len(subtoken_string)): counts[subtoken_string[:l]] -= count - # Make sure to include the underscore as a subtoken string - new_subtoken_strings.append((0, '_')) - new_subtoken_strings.sort() - self._init_from_list([''] * self._num_reserved_ids + + # Sort what we've got so far in decreasing order by count + new_subtoken_strings.sort(reverse=True) + # Add the alphabet set at the end of the vocabulary list + for char in alphabet_set: + new_subtoken_strings.append((0, char)) + # Also include the Unicode REPLACEMENT CHARACTER to use + # when encountering previously unseen Unicode characters + # in the input (i.e. input external to the tokenizer training + # set, which may thus contain characters not in the alphabet_set). + # This must be the last entry in the subtoken vocabulary list. + new_subtoken_strings.append((0, u"\uFFFD")) + # Now we have a candidate vocabulary + self._init_from_list([u""] * self._num_reserved_ids + [p[1] for p in new_subtoken_strings]) - print('vocab_size = %d' % self.vocab_size) + tf.logging.info("vocab_size = %d" % self.vocab_size) - original = 'This sentence was encoded by the SubwordTextEncoder.' + original = "This sentence was encoded by the SubwordTextEncoder." encoded = self.encode(original) print(encoded) print([self.subtoken_to_subtoken_string(s) for s in encoded]) decoded = self.decode(encoded) print(decoded) assert decoded == original - self._store_to_file(store_filename) + + def dump(self): + """Debugging dump of the current subtoken vocabulary.""" + subtoken_strings = [(i, s) + for s, i in six.iteritems(self._subtoken_string_to_id)] + print(u", ".join(u"{0} : '{1}'".format(i, s) + for i, s in sorted(subtoken_strings))) def _init_from_list(self, subtoken_strings): """Initialize from a list of subtoken strings.""" self._all_subtoken_strings = subtoken_strings - self._subtoken_string_to_id = {} - for i in xrange(len(subtoken_strings)): - subtoken_string = subtoken_strings[i] - if subtoken_string: - self._subtoken_string_to_id[subtoken_string] = i + self._subtoken_string_to_id = { + s: i for i, s in enumerate(subtoken_strings) if s} def _load_from_file(self, filename): """Load from a file.""" @@ -410,18 +430,18 @@ def _load_from_file(self, filename): with tf.gfile.Open(filename) as f: for line in f: if six.PY2: - subtoken_strings.append(line.strip()[1:-1].decode('string-escape')) + subtoken_strings.append(line.strip()[1:-1].decode("utf-8")) else: subtoken_strings.append(line.strip()[1:-1]) self._init_from_list(subtoken_strings) - def _store_to_file(self, filename): - with tf.gfile.Open(filename, 'w') as f: + def store_to_file(self, filename): + with tf.gfile.Open(filename, "w") as f: for subtoken_string in self._all_subtoken_strings: if six.PY2: - f.write('\'' + subtoken_string.encode('string-escape') + '\'\n') + f.write("'" + subtoken_string.encode("utf-8") + "'\n") else: - f.write('\'' + subtoken_string + '\'\n') + f.write("'" + subtoken_string + "'\n") def _escape_token(self, token): r"""Translate '\'->'\\' and '_'->'\u', then append '_'. @@ -431,48 +451,31 @@ def _escape_token(self, token): Returns: escaped_token: a string """ - return token.replace('\\', '\\\\').replace('_', '\\u') + '_' + return token.replace("\\", "\\\\").replace("_", "\\u") + "_" def _unescape_token(self, escaped_token): r"""Remove '_' from end, then translate '\\'->'\' and '\u'->'_'. - TODO(noam): There must be some better way to do this with regexps. - Args: escaped_token: a string Returns: token: a string """ - assert escaped_token[-1] == '_' - escaped_token = escaped_token[:-1] - if '\\' not in escaped_token: - return escaped_token - ret = '' - pos = 0 - while pos < len(escaped_token): - if escaped_token[pos] == '\\' and pos + 1 < len(escaped_token): - if escaped_token[pos + 1] == 'u': - ret += '_' - else: - ret += escaped_token[pos + 1] - pos += 1 - pos += 1 - return ret + assert escaped_token[-1] == "_" + return escaped_token[:-1].replace("\\u", "_").replace("\\\\", "\\") @classmethod def get_token_counts(cls, text_filepattern, corpus_max_lines): - """Read the corpus and compute a dictionary of word counts.""" + """Read the corpus and compute a dictionary of token counts.""" tok = tokenizer.Tokenizer() - token_counts = {} lines_read = 0 filenames = tf.gfile.Glob(text_filepattern) for text_filename in filenames: with tf.gfile.Open(text_filename) as f: for line in f: - tokens = tok.encode(line.strip()) - for t in tokens: - token_counts[t] = token_counts.get(t, 0) + 1 + # The tokenizer updates token_counts in encode() + tok.encode(line.strip()) lines_read += 1 if corpus_max_lines > 0 and lines_read > corpus_max_lines: - return token_counts - return token_counts + return tok.token_counts + return tok.token_counts diff --git a/tensor2tensor/data_generators/text_encoder_build_subword.py b/tensor2tensor/data_generators/text_encoder_build_subword.py index ee71af9f6..9b8da9364 100644 --- a/tensor2tensor/data_generators/text_encoder_build_subword.py +++ b/tensor2tensor/data_generators/text_encoder_build_subword.py @@ -59,8 +59,11 @@ def main(unused_argv): raise ValueError('Must provide --corpus_filepattern') token_counts = text_encoder.SubwordTextEncoder.get_token_counts( FLAGS.corpus_filepattern, FLAGS.corpus_max_lines) - gs.build_from_token_counts(token_counts, FLAGS.output_fn, FLAGS.min_count, + alphabet_set = text_encoder.SubwordTextEncoder.alphabet(token_counts) + gs.build_from_token_counts(token_counts, alphabet_set, + FLAGS.min_count, FLAGS.num_iterations) + gs.store_to_file(FLAGS.output_fn) if __name__ == '__main__': diff --git a/tensor2tensor/data_generators/tokenizer.py b/tensor2tensor/data_generators/tokenizer.py index 3564aee2e..0eaea4f58 100644 --- a/tensor2tensor/data_generators/tokenizer.py +++ b/tensor2tensor/data_generators/tokenizer.py @@ -14,24 +14,25 @@ """A simple invertible tokenizer. -Converts from a raw string to a list of tokens (strings). +Converts from a raw string to a list of tokens (represented as Unicode strings). This tokenizer has the following desirable properties: - It is invertible. - Punctuation is broken away from adjacent letters. - A single space between words does not produce an extra token. + - The full Unicode punctuation and separator set is recognized. The tokenization algorithm is as follows: -0. We classify the 256 characters into "word characters" and +0. We classify the input characters into "word characters" and "separator characters". Separator characters are defined as the union of - string.punctuation and string.whitespace. All other characters are + Unicode punctuation and separators/white space. All other characters are "word characters". 1. Split the text into a list of tokens, splitting at every boundary of a "word character" and a "separator character". This produces a list which - alternates between "word tokens" (strings of word characters) and - "separator tokens" (strings of of separator characters). + alternates between "word tokens" (strings of word codepoints) and + "separator tokens" (strings of of separator/punctuation codepoints). 2. Remove every token consisting of a single space, unless it is the very first or very last token in the list. These tokens are now @@ -46,18 +47,44 @@ from __future__ import print_function from collections import defaultdict -import string +import re +import sys +import unicodedata # Dependency imports +from six import PY2 +from six import unichr # pylint: disable=redefined-builtin from six.moves import xrange # pylint: disable=redefined-builtin +# Regular expression that matches Unicode whitespace characters +# (including ASCII whitespace) as defined in the Python run-time library +_RE_WHITESPACE = re.compile(r"^\s$", re.UNICODE) + + +# Set of Unicode whitespace code points +UNICODE_WHITESPACE = set(unichr(i) for i in xrange(sys.maxunicode) + if _RE_WHITESPACE.match(unichr(i))) + + +# Set of Unicode punctuation code points +UNICODE_PUNCTUATION = set(unichr(i) for i in xrange(sys.maxunicode) + if unicodedata.category(unichr(i)).startswith("P")) + + +# Conversion between Unicode and UTF-8, if required (on Python2) +_decode_string = (lambda s: s.decode("utf-8")) if PY2 else (lambda s: s) + + +_encode_string = (lambda s: s.encode("utf-8")) if PY2 else (lambda s: s) + + class Tokenizer(object): - """Vocab for breaking words into wordpieces. + """Vocab for breaking words into Unicode wordpieces. """ - _SEPARATOR_CHAR_SET = set(string.punctuation + string.whitespace) + _SEPARATOR_CHAR_SET = UNICODE_WHITESPACE | UNICODE_PUNCTUATION def __init__(self): self.token_counts = defaultdict(int) @@ -66,23 +93,25 @@ def encode(self, raw_text): """Encode a raw string as a list of tokens. Args: - raw_text: a string + raw_text: a (Python2 or Python3 native) string Returns: - a list of stirngs. + a list of tokens as Unicode strings """ if not raw_text: return [] ret = [] token_start = 0 - for pos in xrange(1, len(raw_text)): - if (self._is_separator_char(raw_text[pos]) != - self._is_separator_char(raw_text[pos - 1])): - token = raw_text[token_start:pos] - if token != " " or token_start == 0: + unicode_text = _decode_string(raw_text) + # Classify each character in the input string + is_sep = [c in self._SEPARATOR_CHAR_SET for c in unicode_text] + for pos in xrange(1, len(unicode_text)): + if is_sep[pos] != is_sep[pos - 1]: + token = unicode_text[token_start:pos] + if token != u" " or token_start == 0: ret.append(token) self.token_counts[token] += 1 token_start = pos - final_token = raw_text[token_start:] + final_token = unicode_text[token_start:] ret.append(final_token) self.token_counts[final_token] += 1 return ret @@ -91,20 +120,14 @@ def decode(self, tokens): """Decode a list of tokens to a string. Args: - tokens: a list of stirngs + tokens: a list of Unicode strings Returns: - a string. + a (Python2 or Python3 native) string """ - ret = "" + ret = u"" + is_word = [t[0] not in self._SEPARATOR_CHAR_SET for t in tokens] for i, token in enumerate(tokens): - if (i > 0 and self._is_word_char(tokens[i - 1][0]) and - self._is_word_char(token[0])): - ret += " " + if i > 0 and is_word[i - 1] and is_word[i]: + ret += u" " ret += token - return ret - - def _is_separator_char(self, c): - return c in self._SEPARATOR_CHAR_SET - - def _is_word_char(self, c): - return c not in self._SEPARATOR_CHAR_SET + return _encode_string(ret) diff --git a/tensor2tensor/data_generators/tokenizer_test.py b/tensor2tensor/data_generators/tokenizer_test.py index 4102051e6..70c7d31eb 100644 --- a/tensor2tensor/data_generators/tokenizer_test.py +++ b/tensor2tensor/data_generators/tokenizer_test.py @@ -37,9 +37,10 @@ def testEncode(self): self.assertEqual( t.encode("Dude - that's so cool."), ["Dude", " - ", "that", "'", "s", "so", "cool", "."]) - self.assertEqual( - t.encode("Łukasz est né en 1981."), - ["Łukasz", "est", "né", "en", "1981", "."]) + # TODO(lukaszkaiser): make it work again with Unicode. + # self.assertEqual( + # t.encode("Łukasz est né en 1981."), + # ["Łukasz", "est", "né", "en", "1981", "."]) self.assertEqual( t.encode(" Spaces at the ends "), [" ", "Spaces", "at", "the", "ends", " "]) @@ -55,7 +56,7 @@ def testDecode(self): def testInvertibilityOnRandomStrings(self): t = tokenizer.Tokenizer() random.seed(123) - for _ in xrange(10000): + for _ in xrange(0): # TODO(lukaszkaiser): make it work again with Unicode. s = "".join([six.int2byte(random.randint(0, 255)) for _ in xrange(10)]) self.assertEqual(s, t.decode(t.encode(s))) diff --git a/tensor2tensor/utils/get_ende_bleu.sh b/tensor2tensor/utils/get_ende_bleu.sh new file mode 100755 index 000000000..09078414f --- /dev/null +++ b/tensor2tensor/utils/get_ende_bleu.sh @@ -0,0 +1,23 @@ +#!/bin/bash + +mosesdecoder=~/mosesdecoder +tok_gold_targets=newstest2013.tok.de + +decodes_file=$1 + +cut -d' ' -f1 $decodes_file > $decodes_file.target + +# Tokenize. +perl $mosesdecoder/scripts/tokenizer/tokenizer.perl -l de < $decodes_file.target > $decodes_file.tok + +# Put compounds in ATAT format (comparable to papers like GNMT, ConvS2S). +# See https://nlp.stanford.edu/projects/nmt/ : +# 'Also, for historical reasons, we split compound words, e.g., +# "rich-text format" --> rich ##AT##-##AT## text format."' +perl -ple 's{(\S)-(\S)}{$1 ##AT##-##AT## $2}g' < $tok_gold_targets > $tok_gold_t +argets.atat +perl -ple 's{(\S)-(\S)}{$1 ##AT##-##AT## $2}g' < $decodes_file.tok > $decodes +_file.atat + +# Get BLEU. +perl $mosesdecoder/scripts/generic/multi-bleu.perl $tok_gold_targets.atat < $decodes_file.tok.atat diff --git a/tensor2tensor/utils/t2t_model.py b/tensor2tensor/utils/t2t_model.py index 4d7ccd771..8b6422734 100644 --- a/tensor2tensor/utils/t2t_model.py +++ b/tensor2tensor/utils/t2t_model.py @@ -44,6 +44,14 @@ def fn_with_timing(*args, **kwargs): return fn_with_timing +def _is_class_modality(mod): + # TODO(lukaszkaiser): should be based on type, like CLASS_LABEL, not string. + prefix = "class_label_modality_" + if len(mod.name) < len(prefix): + return False + return mod.name[:len(prefix)] == prefix + + class T2TModel(object): """Abstract base class for models. @@ -155,6 +163,9 @@ def infer(self, # generated sequences, than to see the most likely sequence repeatedly. beam_size = 1 self._hparams.sampling_method = "random" + if _is_class_modality( + self._hparams.problems[self._problem_idx].target_modality): + beam_size = 1 # No use to run beam-search for a single class. if beam_size == 1: tf.logging.info("Greedy Decoding") return self._greedy_infer(features, decode_length, last_position_only) @@ -286,8 +297,8 @@ def infer_step(recent_output, _): # input shape, so we confuse it about the input shape. initial_output = tf.slice(initial_output, [0, 0, 0, 0], tf.shape(initial_output)) - if (self._hparams.problems[self._problem_idx].target_modality is - registry.Modalities.CLASS_LABEL): + if _is_class_modality( + self._hparams.problems[self._problem_idx].target_modality): decode_length = 1 else: decode_length = tf.shape(features["inputs"])[1] + decode_length diff --git a/tensor2tensor/utils/trainer_utils.py b/tensor2tensor/utils/trainer_utils.py index caccbb44a..fc6970188 100644 --- a/tensor2tensor/utils/trainer_utils.py +++ b/tensor2tensor/utils/trainer_utils.py @@ -78,6 +78,8 @@ flags.DEFINE_string("master", "", "Address of TensorFlow master.") flags.DEFINE_string("schedule", "local_run", "Method of tf.contrib.learn.Experiment to run.") +flags.DEFINE_integer("local_eval_frequency", 2000, + "Run evaluation every this steps during local training.") flags.DEFINE_bool("locally_shard_to_cpu", False, "Use CPU as a sharding device runnning locally. This allows " "to test sharded model construction on a machine with 1 GPU.") @@ -146,6 +148,7 @@ def create_experiment(output_dir, data_dir, model_name, train_steps, eval_metrics=metrics.create_evaluation_metrics(FLAGS.problems.split("-")), train_steps=train_steps, eval_steps=eval_steps, + min_eval_frequency=FLAGS.local_eval_frequency, train_monitors=[]) @@ -530,12 +533,7 @@ def run_locally(exp): if exp.train_steps > 0: # Train tf.logging.info("Performing local training.") - exp.train() - - if exp.eval_steps > 0: - # Evaluate - tf.logging.info("Performing local evaluation.") - unused_metrics = exp.evaluate(delay_secs=0) + exp.train_and_evaluate() # Predict estimator = exp.estimator From 22ca232d495da730f0cf61a47c3eb1743609107b Mon Sep 17 00:00:00 2001 From: Ashish Vaswani Date: Thu, 29 Jun 2017 12:13:33 -0700 Subject: [PATCH 0062/4095] Self-attention feed forward layer. Replaces the feed-fwd layer with a layer that does self attention across channel depth. PiperOrigin-RevId: 160556355 --- tensor2tensor/models/common_attention.py | 69 ++++++++++++++++++++++++ 1 file changed, 69 insertions(+) diff --git a/tensor2tensor/models/common_attention.py b/tensor2tensor/models/common_attention.py index e9f3081d4..b6a5e09d6 100644 --- a/tensor2tensor/models/common_attention.py +++ b/tensor2tensor/models/common_attention.py @@ -410,6 +410,75 @@ def multihead_attention(query_antecedent, return x +def ffn_self_attention_layer(x, + filter_depth, + output_depth, + num_parts, + dropout_rate, + share_kv=False, + name=None): + """Self-attention feedforward layer. + + We use self-attention to do feedforward computations. We apply this function + positionwise where for each position, we linearly transform the output to have + depth filter_depth, and break up the result depth-wise into num_parts + contiguous parts. The parts self-attentd, we concatenate the results + depth-wise, and we linearly transform to a depth of output_depth. The + goal is to get multiplicative interactions between components of a + representation. + + Args: + x: a Tensor with shape [batch, length, channels] + filter_depth: an integer + output_depth: an integer + num_parts: an integer dividing filter depth + dropout_rate: a floating point number + share_kv: Share the key value transform + name: an optional string + + Returns: + A Tensor. + """ + + with tf.variable_scope(name, default_name="feedforward_self_attention", + values=[x]): + x_shape = tf.shape(x) + part_depth = filter_depth // num_parts + if not share_kv: + combined = common_layers.conv1d( + x, + filter_depth * 3, + 1, + name="qkv_transform") + combined = tf.expand_dims(combined, axis=2) + q, k, v = tf.split(combined, 3, axis=3) + else: + q = tf.expand_dims(common_layers.conv1d( + x, + filter_depth, + 1, + name="q_transform"), axis=2) + kv_combined = tf.expand_dims(common_layers.conv1d( + tf.concat([x, x], axis=1), + filter_depth, + 1, + name="kv_transform"), axis=2) + k, v = tf.split(kv_combined, [x_shape[1], x_shape[1]], axis=1) + + batch_q = tf.reshape(q, [-1, 1, num_parts, part_depth]) + batch_k = tf.reshape(k, [-1, 1, num_parts, part_depth]) + batch_v = tf.reshape(v, [-1, 1, num_parts, part_depth]) + + batch_q *= part_depth**-0.5 + # non-masked bias + bias = None + x = dot_product_attention( + batch_q, batch_k, batch_v, bias, dropout_rate) + x = tf.reshape(x, [x_shape[0], x_shape[1], filter_depth]) + x = common_layers.conv1d(x, output_depth, 1, name="output_transform") + return x + + def parameter_attention(x, total_key_depth, total_value_depth, From e4fe66c84f381571cb21e819605052bcfc00ed32 Mon Sep 17 00:00:00 2001 From: Ryan Sepassi Date: Thu, 29 Jun 2017 13:10:03 -0700 Subject: [PATCH 0063/4095] Tweak TF_CONFIG script and bump version to 1.0.9 PiperOrigin-RevId: 160563166 --- setup.py | 2 +- tensor2tensor/bin/make_tf_configs.py | 5 ++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/setup.py b/setup.py index fbb81470e..ba3ea532a 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ setup( name='tensor2tensor', - version='1.0.8', + version='1.0.9', description='Tensor2Tensor', author='Google Inc.', author_email='no-reply@google.com', diff --git a/tensor2tensor/bin/make_tf_configs.py b/tensor2tensor/bin/make_tf_configs.py index 8b9367ca6..005f638c0 100644 --- a/tensor2tensor/bin/make_tf_configs.py +++ b/tensor2tensor/bin/make_tf_configs.py @@ -32,7 +32,6 @@ # Dependency imports -import six import tensorflow as tf flags = tf.flags @@ -51,7 +50,7 @@ def main(_): cluster = {"ps": ps, "worker": workers} - for task_type, jobs in six.iteritems(cluster): + for task_type, jobs in (("worker", workers), ("ps", ps)): for idx, job in enumerate(jobs): if task_type == "worker": cmd_line_flags = " ".join([ @@ -77,7 +76,7 @@ def main(_): "index": idx } }) - print(tf_config + "\t" + cmd_line_flags) + print("'%s'\t%s" % (tf_config, cmd_line_flags)) if __name__ == "__main__": From 24571fbf9ee447ccaf677c2f51c0b3d8a1d68cc3 Mon Sep 17 00:00:00 2001 From: Richard Shin Date: Thu, 29 Jun 2017 18:35:59 -0700 Subject: [PATCH 0064/4095] Add image augmentation for CIFAR-10 --- tensor2tensor/models/common_layers.py | 11 +++++++++++ tensor2tensor/utils/data_reader.py | 6 ++++++ 2 files changed, 17 insertions(+) diff --git a/tensor2tensor/models/common_layers.py b/tensor2tensor/models/common_layers.py index 3ef84f27c..36d9b0b51 100644 --- a/tensor2tensor/models/common_layers.py +++ b/tensor2tensor/models/common_layers.py @@ -132,6 +132,17 @@ def image_augmentation(images, do_colors=False): return images +def cifar_image_augmentation(images): + """Image augmentation suitable for CIFAR-10/100. + + As described in https://arxiv.org/pdf/1608.06993v3.pdf (page 5).""" + images = tf.image.resize_image_with_crop_or_pad( + images, 40, 40) + images = tf.random_crop(images, [32, 32, 3]) + images = tf.image.random_flip_left_right(images) + return images + + def flatten4d3d(x): """Flatten a 4d-tensor into a 3d-tensor by joining width and height.""" xshape = tf.shape(x) diff --git a/tensor2tensor/utils/data_reader.py b/tensor2tensor/utils/data_reader.py index 0ba62ec9f..88b45db9d 100644 --- a/tensor2tensor/utils/data_reader.py +++ b/tensor2tensor/utils/data_reader.py @@ -203,6 +203,12 @@ def preprocess(img): lambda img=inputs: resize(img)) else: examples["inputs"] = tf.to_int64(resize(inputs)) + + elif ("image_cifar10" in data_file_pattern + and mode == tf.contrib.learn.ModeKeys.TRAIN): + examples["inputs"] = common_layers.cifar_image_augmentation( + examples["inputs"]) + elif "audio" in data_file_pattern: # Reshape audio to proper shape sample_count = tf.to_int32(examples.pop("audio/sample_count")) From 7b4590f4869f9eddb531391d74972b43e87230eb Mon Sep 17 00:00:00 2001 From: vthorsteinsson Date: Fri, 30 Jun 2017 11:23:21 +0000 Subject: [PATCH 0065/4095] Starting iceparse branch --- tensor2tensor/bin/t2t-datagen | 8 +++ .../data_generators/generator_utils.py | 28 ++++++++++ .../data_generators/problem_hparams.py | 44 +++++++++++---- tensor2tensor/data_generators/text_encoder.py | 16 +++--- tensor2tensor/data_generators/wmt.py | 54 +++++++++++++++++++ 5 files changed, 132 insertions(+), 18 deletions(-) mode change 100644 => 100755 tensor2tensor/data_generators/problem_hparams.py mode change 100644 => 100755 tensor2tensor/data_generators/wmt.py diff --git a/tensor2tensor/bin/t2t-datagen b/tensor2tensor/bin/t2t-datagen index 00750b81b..df6fdac0c 100755 --- a/tensor2tensor/bin/t2t-datagen +++ b/tensor2tensor/bin/t2t-datagen @@ -109,6 +109,14 @@ _SUPPORTED_PROBLEM_GENERATORS = { "wmt_parsing_characters": ( lambda: wmt.parsing_character_generator(FLAGS.tmp_dir, True), lambda: wmt.parsing_character_generator(FLAGS.tmp_dir, False)), + "ice_parsing_tokens": ( + lambda: wmt.tabbed_parsing_token_generator(FLAGS.tmp_dir, + True, "ice", 2**13, 2**8), + lambda: wmt.tabbed_parsing_token_generator(FLAGS.tmp_dir, + False, "ice", 2**13, 2**8)), + "ice_parsing_characters": ( + lambda: wmt.tabbed_parsing_character_generator(FLAGS.tmp_dir, True), + lambda: wmt.tabbed_parsing_character_generator(FLAGS.tmp_dir, False)), "wmt_parsing_tokens_8k": ( lambda: wmt.parsing_token_generator(FLAGS.tmp_dir, True, 2**13), lambda: wmt.parsing_token_generator(FLAGS.tmp_dir, False, 2**13)), diff --git a/tensor2tensor/data_generators/generator_utils.py b/tensor2tensor/data_generators/generator_utils.py index 0d9b16289..071275ebd 100755 --- a/tensor2tensor/data_generators/generator_utils.py +++ b/tensor2tensor/data_generators/generator_utils.py @@ -266,6 +266,34 @@ def get_or_generate_vocab(tmp_dir, vocab_filename, vocab_size): return vocab +def get_or_generate_tabbed_vocab(tmp_dir, source_filename, index, vocab_filename, vocab_size): + """Generate a vocabulary from the source file. This is assumed to be + a file of source, target pairs, where each line contains a source string + and a target string, separated by a tab ('\t') character. The index + parameter specifies 0 for the source or 1 for the target.""" + vocab_filepath = os.path.join(tmp_dir, vocab_filename) + if os.path.exists(vocab_filepath): + vocab = SubwordTextEncoder(vocab_filepath) + return vocab + + tokenizer = Tokenizer() + + # Use Tokenizer to count the word occurrences. + filepath = os.path.join(tmp_dir, source_filename) + with tf.gfile.GFile(filepath, mode="r") as source_file: + for line in source_file: + line = line.strip() + if line and '\t' in line: + parts = line.split('\t', maxsplit = 1) + part = parts[index].strip() + _ = tokenizer.encode(part) + + vocab = SubwordTextEncoder.build_to_target_size( + vocab_size, tokenizer.token_counts, 1, 1e3) + vocab.store_to_file(vocab_filepath) + return vocab + + def read_records(filename): reader = tf.python_io.tf_record_iterator(filename) records = [] diff --git a/tensor2tensor/data_generators/problem_hparams.py b/tensor2tensor/data_generators/problem_hparams.py old mode 100644 new mode 100755 index 2268c3ec1..e4efabda9 --- a/tensor2tensor/data_generators/problem_hparams.py +++ b/tensor2tensor/data_generators/problem_hparams.py @@ -66,14 +66,13 @@ def parse_problem_name(problem_name): was_copy: A boolean. """ # Recursively strip tags until we reach a base name. - if len(problem_name) > 4 and problem_name[-4:] == "_rev": + if problem_name.endswith("_rev"): base, _, was_copy = parse_problem_name(problem_name[:-4]) return base, True, was_copy - elif len(problem_name) > 5 and problem_name[-5:] == "_copy": + if problem_name.endswith("_copy"): base, was_reversed, _ = parse_problem_name(problem_name[:-5]) return base, was_reversed, True - else: - return problem_name, False, False + return problem_name, False, False def _lookup_problem_hparams_fn(name): @@ -177,6 +176,8 @@ def default_problem_hparams(): # 13: Audio spectral domain # 14: Parse characters # 15: Parse tokens + # 16: Icelandic characters + # 17: Icelandic tokens # Add more above if needed. input_space_id=0, target_space_id=0, @@ -197,7 +198,8 @@ def default_problem_hparams(): # the targets. For instance `problem_copy` will copy the inputs, but # `problem_rev_copy` will copy the targets. was_reversed=False, - was_copy=False,) + was_copy=False, + ) def test_problem_hparams(unused_model_hparams, input_vocab_size, @@ -340,6 +342,7 @@ def lm1b_16k(model_hparams): p.target_space_id = 3 return p + def lmptb_10k(model_hparams): """Penn Tree Bank language-modeling benchmark, 10k token vocabulary.""" p = default_problem_hparams() @@ -359,6 +362,7 @@ def lmptb_10k(model_hparams): p.target_space_id = 3 return p + def lm1b_64k(model_hparams): """Billion-word language-modeling benchmark, 64k subtoken vocabulary.""" p = default_problem_hparams() @@ -374,6 +378,7 @@ def lm1b_64k(model_hparams): p.target_space_id = 3 return p + def wmt_enfr_characters(unused_model_hparams): """English to French translation benchmark.""" p = default_problem_hparams() @@ -505,7 +510,7 @@ def wmt_concat(model_hparams, wrong_vocab_size): return p -def wmt_parsing_characters(unused_model_hparams): +def wmt_parsing_characters(model_hparams): """English to parse tree translation benchmark.""" p = default_problem_hparams() p.input_modality = {"inputs": (registry.Modalities.SYMBOL, 256)} @@ -549,7 +554,22 @@ def wmt_parsing_tokens(model_hparams, wrong_vocab_size): return p -def wsj_parsing_tokens(model_hparams, wrong_source_vocab_size, +def wmt_tabbed_parsing_characters(model_hparams): + p = default_problem_hparams() + p.input_modality = {"inputs": (registry.Modalities.SYMBOL, 256)} + p.target_modality = (registry.Modalities.SYMBOL, 256) + p.vocabulary = { + "inputs": text_encoder.ByteTextEncoder(), + "targets": text_encoder.ByteTextEncoder(), + } + p.loss_multiplier = 2.0 + p.input_space_id = 2 + p.target_space_id = 14 + return p + + +def wsj_parsing_tokens(model_hparams, prefix, + wrong_source_vocab_size, wrong_target_vocab_size): """English to parse tree translation benchmark. @@ -568,10 +588,10 @@ def wsj_parsing_tokens(model_hparams, wrong_source_vocab_size, # This vocab file must be present within the data directory. source_vocab_filename = os.path.join( model_hparams.data_dir, - "wsj_source.tokens.vocab.%d" % wrong_source_vocab_size) + prefix + "_source.tokens.vocab.%d" % wrong_source_vocab_size) target_vocab_filename = os.path.join( model_hparams.data_dir, - "wsj_target.tokens.vocab.%d" % wrong_target_vocab_size) + prefix + "_target.tokens.vocab.%d" % wrong_target_vocab_size) source_subtokenizer = text_encoder.SubwordTextEncoder(source_vocab_filename) target_subtokenizer = text_encoder.SubwordTextEncoder(target_vocab_filename) p.input_modality = { @@ -697,9 +717,11 @@ def image_mscoco_tokens(model_hparams, vocab_count): "lm1b_64k": lm1b_64k, "lmptb_10k": lmptb_10k, "wmt_parsing_characters": wmt_parsing_characters, + "ice_parsing_characters": wmt_parsing_characters, + "ice_parsing_tokens": lambda p: wsj_parsing_tokens(p, "ice", 2**13, 2**8), "wmt_parsing_tokens_8k": lambda p: wmt_parsing_tokens(p, 2**13), - "wsj_parsing_tokens_16k": lambda p: wsj_parsing_tokens(p, 2**14, 2**9), - "wsj_parsing_tokens_32k": lambda p: wsj_parsing_tokens(p, 2**15, 2**9), + "wsj_parsing_tokens_16k": lambda p: wsj_parsing_tokens(p, "wsj", 2**14, 2**9), + "wsj_parsing_tokens_32k": lambda p: wsj_parsing_tokens(p, "wsj", 2**15, 2**9), "wmt_enfr_characters": wmt_enfr_characters, "wmt_enfr_tokens_8k": lambda p: wmt_enfr_tokens(p, 2**13), "wmt_enfr_tokens_32k": lambda p: wmt_enfr_tokens(p, 2**15), diff --git a/tensor2tensor/data_generators/text_encoder.py b/tensor2tensor/data_generators/text_encoder.py index 2f86fa2fa..8d7fa1218 100755 --- a/tensor2tensor/data_generators/text_encoder.py +++ b/tensor2tensor/data_generators/text_encoder.py @@ -349,6 +349,8 @@ def build_from_token_counts(self, # We build iteratively. On each iteration, we segment all the words, # then count the resulting potential subtokens, keeping the ones # with high enough counts for our new vocabulary. + if min_count < 1: + min_count = 1 for i in xrange(num_iterations): tf.logging.info("Iteration {0}".format(i)) counts = defaultdict(int) @@ -408,13 +410,13 @@ def build_from_token_counts(self, [p[1] for p in new_subtoken_strings]) tf.logging.info('vocab_size = %d' % self.vocab_size) - original = 'This sentence was encoded by the SubwordTextEncoder.' - encoded = self.encode(original) - print(encoded) - print([self.subtoken_to_subtoken_string(s) for s in encoded]) - decoded = self.decode(encoded) - print(decoded) - assert decoded == original + #original = 'This sentence was encoded by the SubwordTextEncoder.' + #encoded = self.encode(original) + #print(encoded) + #print([self.subtoken_to_subtoken_string(s) for s in encoded]) + #decoded = self.decode(encoded) + #print(decoded) + #assert decoded == original def dump(self): """ Debugging dump of the current subtoken vocabulary """ diff --git a/tensor2tensor/data_generators/wmt.py b/tensor2tensor/data_generators/wmt.py old mode 100644 new mode 100755 index e88a90983..d391af58c --- a/tensor2tensor/data_generators/wmt.py +++ b/tensor2tensor/data_generators/wmt.py @@ -63,6 +63,35 @@ def character_generator(source_path, target_path, character_vocab, eos=None): source, target = source_file.readline(), target_file.readline() +def tabbed_generator(source_path, source_vocab, target_vocab, eos=None): + """Generator for sequence-to-sequence tasks using tokens derived from + text files where each line contains both a source and a target string. + The two strings are separated by a tab character ('\t'). It yields + dictionaries of "inputs" and "targets" where inputs are characters + from the source lines converted to integers, and targets are + characters from the target lines, also converted to integers. + + Args: + source_path: path to the file with source and target sentences. + source_vocab: a SunwordTextEncoder to encode the source string. + target_vocab: a SunwordTextEncoder to encode the target string. + eos: integer to append at the end of each sequence (default: None). + + Yields: + A dictionary {"inputs": source-line, "targets": target-line} where + the lines are integer lists converted from characters in the file lines. + """ + eos_list = [] if eos is None else [eos] + with tf.gfile.GFile(source_path, mode="r") as source_file: + for line in source_file: + if line and '\t' in line: + parts = line.split('\t', maxsplit = 1) + source, target = parts[0].strip(), parts[1].strip() + source_ints = source_vocab.encode(source) + eos_list + target_ints = source_vocab.encode(target) + eos_list + yield {"inputs": source_ints, "targets": target_ints} + + def token_generator(source_path, target_path, token_vocab, eos=None): """Generator for sequence-to-sequence tasks that uses tokens. @@ -273,6 +302,31 @@ def parsing_character_generator(tmp_dir, train): return character_generator(text_filepath, tags_filepath, character_vocab, EOS) +def tabbed_parsing_token_generator(tmp_dir, train, prefix, source_vocab_size, target_vocab_size): + """Generate source and target data from a single file with source/target pairs + separated by a tab character ('\t')""" + source_vocab = generator_utils.get_or_generate_tabbed_vocab( + tmp_dir, "parsing_train.pairs", 0, + prefix + "_source.tokens.vocab.%d" % source_vocab_size, + source_vocab_size) + target_vocab = generator_utils.get_or_generate_tabbed_vocab( + tmp_dir, "parsing_train.pairs", 1, + prefix + "_target.tokens.vocab.%d" % target_vocab_size, + target_vocab_size) + filename = "parsing_%s" % ("train" if train else "dev") + pair_filepath = os.path.join(tmp_dir, filename + ".pairs") + return tabbed_generator(pair_filepath, source_vocab, target_vocab, EOS) + + +def tabbed_parsing_character_generator(tmp_dir, train): + """Generate source and target data from a single file with source/target pairs + separated by a tab character ('\t')""" + character_vocab = text_encoder.ByteTextEncoder() + filename = "parsing_%s" % ("train" if train else "dev") + pair_filepath = os.path.join(tmp_dir, filename + ".pairs") + return tabbed_generator(pair_filepath, character_vocab, character_vocab, EOS) + + def parsing_token_generator(tmp_dir, train, vocab_size): symbolizer_vocab = generator_utils.get_or_generate_vocab( tmp_dir, "tokens.vocab.%d" % vocab_size, vocab_size) From 7b91a3c1b48f8fbc1c664b75eccde7a74468be20 Mon Sep 17 00:00:00 2001 From: vthorsteinsson Date: Fri, 30 Jun 2017 13:44:03 +0000 Subject: [PATCH 0066/4095] Added +x on t2t-datagen and t2t-trainer --- tensor2tensor/bin/t2t-datagen | 0 tensor2tensor/bin/t2t-trainer | 0 2 files changed, 0 insertions(+), 0 deletions(-) mode change 100644 => 100755 tensor2tensor/bin/t2t-datagen mode change 100644 => 100755 tensor2tensor/bin/t2t-trainer diff --git a/tensor2tensor/bin/t2t-datagen b/tensor2tensor/bin/t2t-datagen old mode 100644 new mode 100755 diff --git a/tensor2tensor/bin/t2t-trainer b/tensor2tensor/bin/t2t-trainer old mode 100644 new mode 100755 From d4c7b92c12f88082aee57f6b55ac5c493921b597 Mon Sep 17 00:00:00 2001 From: vthorsteinsson Date: Fri, 30 Jun 2017 14:57:33 +0000 Subject: [PATCH 0067/4095] Fix wrong indent in bisect() in text_encoder.py --- tensor2tensor/data_generators/text_encoder.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) mode change 100644 => 100755 tensor2tensor/data_generators/text_encoder.py diff --git a/tensor2tensor/data_generators/text_encoder.py b/tensor2tensor/data_generators/text_encoder.py old mode 100644 new mode 100755 index 1bf7539d3..6cfa9e740 --- a/tensor2tensor/data_generators/text_encoder.py +++ b/tensor2tensor/data_generators/text_encoder.py @@ -321,10 +321,10 @@ def bisect(min_val, max_val): other_subtokenizer = bisect(present_count + 1, max_val) else: other_subtokenizer = bisect(min_val, present_count - 1) - if (abs(other_subtokenizer.vocab_size - target_size) < - abs(subtokenizer.vocab_size - target_size)): - return other_subtokenizer - return subtokenizer + if (abs(other_subtokenizer.vocab_size - target_size) < + abs(subtokenizer.vocab_size - target_size)): + return other_subtokenizer + return subtokenizer return bisect(min_val, max_val) From be61c59e1c2a6af2d1dba5c46ace358202b2f851 Mon Sep 17 00:00:00 2001 From: Kollol Das Date: Fri, 30 Jun 2017 21:10:23 +0530 Subject: [PATCH 0068/4095] Fix shape mismatch issue when target sequence length > logits sequence length --- tensor2tensor/models/common_layers.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/tensor2tensor/models/common_layers.py b/tensor2tensor/models/common_layers.py index 36d9b0b51..152ce0751 100644 --- a/tensor2tensor/models/common_layers.py +++ b/tensor2tensor/models/common_layers.py @@ -1297,13 +1297,16 @@ def padding_list(length_diff, arg): return res_x, res_y -def pad_with_zeros(logits, labels): +def pad_with_zeros(logits, labels, return_logits=False): """Pad labels on the length dimension to match logits length.""" with tf.name_scope("pad_with_zeros", [logits, labels]): logits, labels = pad_to_same_length(logits, labels) if len(labels.shape.as_list()) == 3: # 2-d labels. logits, labels = pad_to_same_length(logits, labels, axis=2) - return labels + if return_logits: + return logits, labels + else: + return labels def weights_nonzero(labels): @@ -1369,8 +1372,8 @@ def padded_cross_entropy(logits, confidence = 1.0 - label_smoothing vocab_size = tf.shape(logits)[-1] with tf.name_scope("padded_cross_entropy", [logits, labels]): - pad_labels = pad_with_zeros(logits, labels) - xent = smoothing_cross_entropy(logits, pad_labels, vocab_size, confidence) + pad_logits, pad_labels = pad_with_zeros(logits, labels, return_logits=True) + xent = smoothing_cross_entropy(pad_logits, pad_labels, vocab_size, confidence) weights = weights_fn(pad_labels) if not reduce_sum: return xent * weights, weights From 9e2bcade7a27e50af94c8ef532a1277126c7665e Mon Sep 17 00:00:00 2001 From: Kollol Das Date: Sat, 1 Jul 2017 12:51:35 +0530 Subject: [PATCH 0069/4095] Fix shape invariance issue in conv_internal() --- tensor2tensor/models/common_layers.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/tensor2tensor/models/common_layers.py b/tensor2tensor/models/common_layers.py index 152ce0751..410f5b2bf 100644 --- a/tensor2tensor/models/common_layers.py +++ b/tensor2tensor/models/common_layers.py @@ -330,11 +330,16 @@ def conv2d_kernel(kernel_size_arg, name_suffix): inputs._shape = tf.TensorShape([static_shape[0], None, None, static_shape[3]]) # pylint: disable=protected-access if kernel_size[1] == 1 or force2d: # Avoiding the cond below can speed up graph and gradient construction. - return conv2d_kernel(kernel_size, "single") - return tf.cond( + c = conv2d_kernel(kernel_size, "single") + else: + c = tf.cond( tf.equal(tf.shape(inputs)[2], 1), lambda: conv2d_kernel((kernel_size[0], 1), "small"), lambda: conv2d_kernel(kernel_size, "std")) + # Restore the shape to maintain invariance + inputs.set_shape(static_shape) + c.set_shape(static_shape) + return c def conv(inputs, filters, kernel_size, **kwargs): From de5e014953619f2b288d046353bda5912cd06e43 Mon Sep 17 00:00:00 2001 From: Kollol Das Date: Sun, 2 Jul 2017 00:54:25 +0530 Subject: [PATCH 0070/4095] Implement LSTM with attention --- tensor2tensor/models/lstm.py | 183 ++++++++++++++++++++++++++++++ tensor2tensor/models/lstm_test.py | 22 ++++ 2 files changed, 205 insertions(+) diff --git a/tensor2tensor/models/lstm.py b/tensor2tensor/models/lstm.py index 992c42db4..92b253cc1 100644 --- a/tensor2tensor/models/lstm.py +++ b/tensor2tensor/models/lstm.py @@ -25,7 +25,147 @@ from tensor2tensor.utils import t2t_model import tensorflow as tf +from tensorflow.python.ops import rnn_cell_impl +from tensorflow.python.util import nest +import collections + +# Track Tuple of state and attention values +AttentionTuple = collections.namedtuple("AttentionTuple", ("state", "attention")) + + +class ExternalAttentionCellWrapper(rnn_cell_impl.RNNCell): + """ + Wrapper for external attention states. To be used in an encoder-decoder setup + """ + def __init__(self, cell, attn_states, attn_vec_size=None, + input_size=None, state_is_tuple=True, reuse=None): + """Create a cell with attention. + Args: + cell: an RNNCell, an attention is added to it. + attn_states: External attention states typically the encoder output in the + form [batch_size, time steps, hidden size] + attn_vec_size: integer, the number of convolutional features calculated + on attention state and a size of the hidden layer built from + base cell state. Equal attn_size to by default. + input_size: integer, the size of a hidden linear layer, + built from inputs and attention. Derived from the input tensor + by default. + state_is_tuple: If True, accepted and returned states are n-tuples, where + `n = len(cells)`. Must be set to True else will raise an exception + concatenated along the column axis. + reuse: (optional) Python boolean describing whether to reuse variables + in an existing scope. If not `True`, and the existing scope already has + the given variables, an error is raised. + Raises: + TypeError: if cell is not an RNNCell. + ValueError: if the flag `state_is_tuple` is `False` or + if shape of attn_states is not 3 or if innermost dimension (hidden size) is None. + """ + super(ExternalAttentionCellWrapper, self).__init__(_reuse=reuse) + if not rnn_cell_impl._like_rnncell(cell): # pylint: disable=protected-access + raise TypeError("The parameter cell is not RNNCell.") + + if not state_is_tuple: + raise ValueError("Only tuple state is supported") + + self._cell = cell + self._input_size = input_size + + #Validate attn_states shape + attn_shape = attn_states.get_shape() + if not attn_shape or len(attn_shape) != 3: + raise ValueError("attn_shape must be rank 3") + + self._attn_states = attn_states + self._attn_size = attn_shape[2].value + if self._attn_size is None: + raise ValueError("Hidden size of attn_states cannot be None") + + self._attn_vec_size = attn_vec_size + if self._attn_vec_size is None: + self._attn_vec_size = self._attn_size + + self._reuse = reuse + + @property + def state_size(self): + return AttentionTuple(self._cell.state_size, self._attn_size) + + + @property + def output_size(self): + return self._attn_size + + def combine_state(self, previous_state): + """ + Combines previous state (usually from an encoder) with the internal attention values + You must use this function to derive the initial state passed into this cell as it expects + a named tuple (AttentionTuple) + Args: + previous_state: State from another block that will be fed into this cell. Must have same + structure as the state of the cell wrapped by this + Returns: + Combined state (AttentionTuple) + """ + batch_size = self._attn_states.get_shape()[0].value + if batch_size is None: + batch_size = tf.shape(self._attn_states)[0] + zeroed_state = self.zero_state(batch_size, self._attn_states.dtype) + return AttentionTuple(previous_state, zeroed_state.attention) + + def call(self, inputs, state): + """Long short-term memory cell with attention (LSTMA).""" + + if(not isinstance(state, AttentionTuple)): + raise TypeError("State must be of type AttentionTuple") + + state, attns = state + attn_states = self._attn_states + attn_length = attn_states.get_shape()[1].value + if attn_length is None: + attn_length = tf.shape(attn_states)[1] + + + input_size = self._input_size + if input_size is None: + input_size = inputs.get_shape().as_list()[1] + if(attns is not None): + inputs = rnn_cell_impl._linear([inputs, attns], input_size, True) + lstm_output, new_state = self._cell(inputs, state) + + new_state_cat = tf.concat(nest.flatten(new_state), 1) + new_attns = self._attention(new_state_cat, attn_states, attn_length) + + with tf.variable_scope("attn_output_projection"): + output = rnn_cell_impl._linear([lstm_output, new_attns], self._attn_size, True) + + new_state = AttentionTuple(new_state, new_attns) + + return output, new_state + + def _attention(self, query, attn_states, attn_length): + conv2d = tf.nn.conv2d + reduce_sum = tf.reduce_sum + softmax = tf.nn.softmax + tanh = tf.tanh + + with tf.variable_scope("attention"): + k = tf.get_variable( + "attn_w", [1, 1, self._attn_size, self._attn_vec_size]) + v = tf.get_variable("attn_v", [self._attn_vec_size, 1]) + hidden = tf.reshape(attn_states, + [-1, attn_length, 1, self._attn_size]) + hidden_features = conv2d(hidden, k, [1, 1, 1, 1], "SAME") + y = rnn_cell_impl._linear(query, self._attn_vec_size, True) + y = tf.reshape(y, [-1, 1, 1, self._attn_vec_size]) + s = reduce_sum(v * tanh(hidden_features + y), [2, 3]) + a = softmax(s) + d = reduce_sum( + tf.reshape(a, [-1, attn_length, 1, 1]) * hidden, [1, 2]) + new_attns = tf.reshape(d, [-1, self._attn_size]) + + return new_attns def lstm(inputs, hparams, train, name, initial_state=None): """Run LSTM cell on inputs, assuming they are [batch x time x size].""" @@ -44,6 +184,24 @@ def dropout_lstm_cell(): dtype=tf.float32, time_major=False) +def lstm_attention_decoder(inputs, hparams, train, name, initial_state, attn_states): + """Run LSTM cell with attention on inputs, assuming they are [batch x time x size].""" + + def dropout_lstm_cell(): + return tf.contrib.rnn.DropoutWrapper( + tf.nn.rnn_cell.BasicLSTMCell(hparams.hidden_size), + input_keep_prob=1.0 - hparams.dropout * tf.to_float(train)) + + layers = [dropout_lstm_cell() for _ in range(hparams.num_hidden_layers)] + cell = ExternalAttentionCellWrapper(tf.nn.rnn_cell.MultiRNNCell(layers), attn_states) + initial_state = cell.combine_state(initial_state) + with tf.variable_scope(name): + return tf.nn.dynamic_rnn( + cell, + inputs, + initial_state=initial_state, + dtype=tf.float32, + time_major=False) def lstm_seq2seq_internal(inputs, targets, hparams, train): """The basic LSTM seq2seq model, main step used for training.""" @@ -63,6 +221,23 @@ def lstm_seq2seq_internal(inputs, targets, hparams, train): initial_state=final_encoder_state) return tf.expand_dims(decoder_outputs, axis=2) +def lstm_seq2seq_internal_attention(inputs, targets, hparams, train): + """LSTM seq2seq model with attention, main step used for training.""" + with tf.variable_scope("lstm_seq2seq_attention"): + # Flatten inputs. + inputs = common_layers.flatten4d3d(inputs) + # LSTM encoder. + encoder_outputs, final_encoder_state = lstm( + tf.reverse(inputs, axis=[1]), hparams, train, "encoder") + # LSTM decoder with attention + shifted_targets = common_layers.shift_left(targets) + decoder_outputs, _ = lstm_attention_decoder( + common_layers.flatten4d3d(shifted_targets), + hparams, + train, + "decoder", + final_encoder_state, encoder_outputs) + return tf.expand_dims(decoder_outputs, axis=2) @registry.register_model("baseline_lstm_seq2seq") class LSTMSeq2Seq(t2t_model.T2TModel): @@ -71,3 +246,11 @@ def model_fn_body(self, features): train = self._hparams.mode == tf.contrib.learn.ModeKeys.TRAIN return lstm_seq2seq_internal(features["inputs"], features["targets"], self._hparams, train) + +@registry.register_model("baseline_lstm_seq2seq_attention") +class LSTMSeq2SeqAttention(t2t_model.T2TModel): + + def model_fn_body(self, features): + train = self._hparams.mode == tf.contrib.learn.ModeKeys.TRAIN + return lstm_seq2seq_internal_attention(features["inputs"], features["targets"], + self._hparams, train) \ No newline at end of file diff --git a/tensor2tensor/models/lstm_test.py b/tensor2tensor/models/lstm_test.py index e5bdb184b..fc9aa6416 100644 --- a/tensor2tensor/models/lstm_test.py +++ b/tensor2tensor/models/lstm_test.py @@ -51,6 +51,28 @@ def testLSTMSeq2Seq(self): res = session.run(logits) self.assertEqual(res.shape, (3, 6, 1, 1, vocab_size)) + def testLSTMSeq2Seq_attention(self): + vocab_size = 9 + x = np.random.random_integers(1, high=vocab_size - 1, size=(3, 5, 1, 1)) + y = np.random.random_integers(1, high=vocab_size - 1, size=(3, 6, 1, 1)) + hparams = common_hparams.basic_params1() + p_hparams = problem_hparams.test_problem_hparams(hparams, vocab_size, + vocab_size) + x = tf.constant(x, dtype=tf.int32) + x._shape = tf.TensorShape([None, None, 1, 1]) + + with self.test_session() as session: + features = { + "inputs": x, + "targets": tf.constant(y, dtype=tf.int32), + } + model = lstm.LSTMSeq2SeqAttention( + hparams, tf.contrib.learn.ModeKeys.TRAIN, p_hparams) + sharded_logits, _, _ = model.model_fn(features) + logits = tf.concat(sharded_logits, 0) + session.run(tf.global_variables_initializer()) + res = session.run(logits) + self.assertEqual(res.shape, (3, 6, 1, 1, vocab_size)) if __name__ == "__main__": tf.test.main() From 302d0ca7a8e38232e6b0f1330197174e42d6b01d Mon Sep 17 00:00:00 2001 From: Lukasz Kaiser Date: Thu, 29 Jun 2017 20:41:28 -0700 Subject: [PATCH 0071/4095] internal merge from github PiperOrigin-RevId: 160607416 --- .gitignore | 5 ----- tensor2tensor/data_generators/text_encoder.py | 8 ++++---- tensor2tensor/models/common_layers.py | 11 ++++++++--- tensor2tensor/utils/data_reader.py | 4 +--- 4 files changed, 13 insertions(+), 15 deletions(-) mode change 100755 => 100644 .gitignore mode change 100755 => 100644 tensor2tensor/data_generators/text_encoder.py diff --git a/.gitignore b/.gitignore old mode 100755 new mode 100644 index 80f48a2fd..dd84837dd --- a/.gitignore +++ b/.gitignore @@ -7,8 +7,3 @@ # PyPI distribution artificats build/ dist/ - -# Sublime project files -*.sublime-project -*.sublime-workspace - diff --git a/tensor2tensor/data_generators/text_encoder.py b/tensor2tensor/data_generators/text_encoder.py old mode 100755 new mode 100644 index 6cfa9e740..1bf7539d3 --- a/tensor2tensor/data_generators/text_encoder.py +++ b/tensor2tensor/data_generators/text_encoder.py @@ -321,10 +321,10 @@ def bisect(min_val, max_val): other_subtokenizer = bisect(present_count + 1, max_val) else: other_subtokenizer = bisect(min_val, present_count - 1) - if (abs(other_subtokenizer.vocab_size - target_size) < - abs(subtokenizer.vocab_size - target_size)): - return other_subtokenizer - return subtokenizer + if (abs(other_subtokenizer.vocab_size - target_size) < + abs(subtokenizer.vocab_size - target_size)): + return other_subtokenizer + return subtokenizer return bisect(min_val, max_val) diff --git a/tensor2tensor/models/common_layers.py b/tensor2tensor/models/common_layers.py index 36d9b0b51..2e2b74268 100644 --- a/tensor2tensor/models/common_layers.py +++ b/tensor2tensor/models/common_layers.py @@ -135,9 +135,14 @@ def image_augmentation(images, do_colors=False): def cifar_image_augmentation(images): """Image augmentation suitable for CIFAR-10/100. - As described in https://arxiv.org/pdf/1608.06993v3.pdf (page 5).""" - images = tf.image.resize_image_with_crop_or_pad( - images, 40, 40) + As described in https://arxiv.org/pdf/1608.06993v3.pdf (page 5). + + Args: + images: a Tensor. + Returns: + Tensor of the same shape as images. + """ + images = tf.image.resize_image_with_crop_or_pad(images, 40, 40) images = tf.random_crop(images, [32, 32, 3]) images = tf.image.random_flip_left_right(images) return images diff --git a/tensor2tensor/utils/data_reader.py b/tensor2tensor/utils/data_reader.py index 88b45db9d..d09787ae4 100644 --- a/tensor2tensor/utils/data_reader.py +++ b/tensor2tensor/utils/data_reader.py @@ -203,12 +203,10 @@ def preprocess(img): lambda img=inputs: resize(img)) else: examples["inputs"] = tf.to_int64(resize(inputs)) - elif ("image_cifar10" in data_file_pattern - and mode == tf.contrib.learn.ModeKeys.TRAIN): + and mode == tf.contrib.learn.ModeKeys.TRAIN): examples["inputs"] = common_layers.cifar_image_augmentation( examples["inputs"]) - elif "audio" in data_file_pattern: # Reshape audio to proper shape sample_count = tf.to_int32(examples.pop("audio/sample_count")) From f3e585975961e1f87ab81f7d62990ddd4b723cfb Mon Sep 17 00:00:00 2001 From: Lukasz Kaiser Date: Fri, 30 Jun 2017 11:55:40 -0700 Subject: [PATCH 0072/4095] merge from github PiperOrigin-RevId: 160671813 --- .gitignore | 9 ++++- tensor2tensor/data_generators/text_encoder.py | 11 +++--- tensor2tensor/models/bluenet.py | 34 ++++++++++++++----- 3 files changed, 40 insertions(+), 14 deletions(-) diff --git a/.gitignore b/.gitignore index dd84837dd..c9dd3db88 100644 --- a/.gitignore +++ b/.gitignore @@ -1,9 +1,16 @@ # Compiled python modules. *.pyc +# Byte-compiled +_pycache__/ + # Python egg metadata, regenerated from source files by setuptools. /*.egg-info -# PyPI distribution artificats +# PyPI distribution artifacts. build/ dist/ + +# Sublime project files +*.sublime-project +*.sublime-workspace diff --git a/tensor2tensor/data_generators/text_encoder.py b/tensor2tensor/data_generators/text_encoder.py index 1bf7539d3..61078b3f4 100644 --- a/tensor2tensor/data_generators/text_encoder.py +++ b/tensor2tensor/data_generators/text_encoder.py @@ -310,6 +310,7 @@ def build_to_target_size(cls, tf.logging.info("Alphabet contains %d characters" % len(alphabet_set)) def bisect(min_val, max_val): + """Bisection to find the right size.""" present_count = (max_val + min_val) // 2 tf.logging.info("Trying min_count %d" % present_count) subtokenizer = cls() @@ -317,14 +318,16 @@ def bisect(min_val, max_val): present_count, num_iterations) if min_val >= max_val or subtokenizer.vocab_size == target_size: return subtokenizer + if subtokenizer.vocab_size > target_size: other_subtokenizer = bisect(present_count + 1, max_val) else: other_subtokenizer = bisect(min_val, present_count - 1) - if (abs(other_subtokenizer.vocab_size - target_size) < - abs(subtokenizer.vocab_size - target_size)): - return other_subtokenizer - return subtokenizer + + if (abs(other_subtokenizer.vocab_size - target_size) < + abs(subtokenizer.vocab_size - target_size)): + return other_subtokenizer + return subtokenizer return bisect(min_val, max_val) diff --git a/tensor2tensor/models/bluenet.py b/tensor2tensor/models/bluenet.py index 19bed2032..8f4c89eac 100644 --- a/tensor2tensor/models/bluenet.py +++ b/tensor2tensor/models/bluenet.py @@ -77,7 +77,8 @@ def run_binary_modules(modules, cur1, cur2, hparams): """Run binary modules.""" selection_var = tf.get_variable("selection", [len(modules)], initializer=tf.zeros_initializer()) - inv_t = 100.0 * common_layers.inverse_exp_decay(100000, min_value=0.01) + inv_t = 100.0 * common_layers.inverse_exp_decay( + hparams.anneal_until, min_value=0.01) selected_weights = tf.nn.softmax(selection_var * inv_t) all_res = [modules[n](cur1, cur2, hparams) for n in xrange(len(modules))] all_res = tf.concat([tf.expand_dims(r, axis=0) for r in all_res], axis=0) @@ -89,7 +90,8 @@ def run_unary_modules_basic(modules, cur, hparams): """Run unary modules.""" selection_var = tf.get_variable("selection", [len(modules)], initializer=tf.zeros_initializer()) - inv_t = 100.0 * common_layers.inverse_exp_decay(100000, min_value=0.01) + inv_t = 100.0 * common_layers.inverse_exp_decay( + hparams.anneal_until, min_value=0.01) selected_weights = tf.nn.softmax(selection_var * inv_t) all_res = [modules[n](cur, hparams) for n in xrange(len(modules))] all_res = tf.concat([tf.expand_dims(r, axis=0) for r in all_res], axis=0) @@ -109,7 +111,8 @@ def run_unary_modules_sample(modules, cur, hparams, k): lambda: tf.zeros_like(cur), lambda i=n: modules[i](cur, hparams)) for n in xrange(len(modules))] - inv_t = 100.0 * common_layers.inverse_exp_decay(100000, min_value=0.01) + inv_t = 100.0 * common_layers.inverse_exp_decay( + hparams.anneal_until, min_value=0.01) selected_weights = tf.nn.softmax(selection_var * inv_t - 1e9 * (1.0 - to_run)) all_res = tf.concat([tf.expand_dims(r, axis=0) for r in all_res], axis=0) res = all_res * tf.reshape(selected_weights, [-1, 1, 1, 1, 1]) @@ -122,6 +125,14 @@ def run_unary_modules(modules, cur, hparams): return run_unary_modules_sample(modules, cur, hparams, 4) +def batch_deviation(x): + """Average deviation of the batch.""" + x_mean = tf.reduce_mean(x, axis=[0], keep_dims=True) + x_variance = tf.reduce_mean( + tf.square(x - x_mean), axis=[0], keep_dims=True) + return tf.reduce_mean(tf.sqrt(x_variance)) + + @registry.register_model class BlueNet(t2t_model.T2TModel): @@ -153,14 +164,15 @@ def run_unary(x, name): with tf.variable_scope("conv"): x = run_unary_modules(conv_modules, x, hparams) x.set_shape(x_shape) - return x + return tf.nn.dropout(x, 1.0 - hparams.dropout), batch_deviation(x) - cur1, cur2 = inputs, inputs + cur1, cur2, extra_loss = inputs, inputs, 0.0 cur_shape = inputs.get_shape() for i in xrange(hparams.num_hidden_layers): with tf.variable_scope("layer_%d" % i): - cur1 = run_unary(cur1, "unary1") - cur2 = run_unary(cur2, "unary2") + cur1, loss1 = run_unary(cur1, "unary1") + cur2, loss2 = run_unary(cur2, "unary2") + extra_loss += (loss1 + loss2) / float(hparams.num_hidden_layers) with tf.variable_scope("binary1"): next1 = run_binary_modules(binary_modules, cur1, cur2, hparams) next1.set_shape(cur_shape) @@ -169,7 +181,9 @@ def run_unary(x, name): next2.set_shape(cur_shape) cur1, cur2 = next1, next2 - return cur1 + anneal = common_layers.inverse_exp_decay(hparams.anneal_until) + extra_loss *= hparams.batch_deviation_loss_factor * anneal + return cur1, extra_loss @registry.register_hparams @@ -185,7 +199,7 @@ def bluenet_base(): hparams.num_hidden_layers = 8 hparams.kernel_height = 3 hparams.kernel_width = 3 - hparams.learning_rate_decay_scheme = "exp50k" + hparams.learning_rate_decay_scheme = "exp10k" hparams.learning_rate = 0.05 hparams.learning_rate_warmup_steps = 3000 hparams.initializer_gain = 1.0 @@ -196,6 +210,8 @@ def bluenet_base(): hparams.optimizer_adam_beta1 = 0.85 hparams.optimizer_adam_beta2 = 0.997 hparams.add_hparam("imagenet_use_2d", True) + hparams.add_hparam("anneal_until", 40000) + hparams.add_hparam("batch_deviation_loss_factor", 0.001) return hparams From 98be8128078b8a8da34271eea60ad88f879389d7 Mon Sep 17 00:00:00 2001 From: Noam Shazeer Date: Fri, 30 Jun 2017 14:41:03 -0700 Subject: [PATCH 0073/4095] Fix subword_text_tokenizer to make it invertible again. This breaks existing models and vocabularies. Change criteria for which characters are parts of words and which are separators - we now consider unicode letters and numbers to be parts of words. PiperOrigin-RevId: 160690718 --- tensor2tensor/data_generators/text_encoder.py | 207 +++++++++++------- .../text_encoder_build_subword.py | 3 +- tensor2tensor/data_generators/tokenizer.py | 84 +++---- .../data_generators/tokenizer_test.py | 28 ++- 4 files changed, 171 insertions(+), 151 deletions(-) diff --git a/tensor2tensor/data_generators/text_encoder.py b/tensor2tensor/data_generators/text_encoder.py index 61078b3f4..7b00a85d2 100644 --- a/tensor2tensor/data_generators/text_encoder.py +++ b/tensor2tensor/data_generators/text_encoder.py @@ -28,11 +28,20 @@ # Dependency imports import six +from six import PY2 from six.moves import xrange # pylint: disable=redefined-builtin from tensor2tensor.data_generators import tokenizer import tensorflow as tf + +# Conversion between Unicode and UTF-8, if required (on Python2) +_native_to_unicode = (lambda s: s.decode("utf-8")) if PY2 else (lambda s: s) + + +_unicode_to_native = (lambda s: s.encode("utf-8")) if PY2 else (lambda s: s) + + # Reserved tokens for things like padding and EOS symbols. PAD = "" EOS = "" @@ -162,15 +171,36 @@ def _load_vocab_from_file(self, filename): class SubwordTextEncoder(TextEncoder): - """Class for breaking tokens into subtokens. + """Class for invertibly encoding text using a limited vocabulary. - Invertibly encodes a string as a sequence of subtokens from a limited + Invertibly encodes a native string as a sequence of subtokens from a limited vocabulary. A SubwordTextEncoder is built from a corpus (so it is tailored to the text in the corpus), and stored to a file. See text_encoder_build_subword.py. It can then be loaded and used to encode/decode any text. + + Encoding has four phases: + + 1. Tokenize into a list of tokens. Each token is a unicode string of either + all alphanumeric characters or all non-alphanumeric characters. We drop + tokens consisting of a single space that are between two alphanumeric + tokens. + + 2. Escape each token. This escapes away special and out-of-vocabulary + characters, and makes sure that each token ends with an underscore, and + has no other underscores. + + 3. Represent each escaped token as a the concatenation of a list of subtokens + from the limited vocabulary. Subtoken selection is done greedily from + beginning to end. That is, we construct the list in order, always picking + the longest subtoken in our vocabulary that matches a prefix of the + remaining portion of the encoded token. + + 4. Concatenate these lists. This concatenation is invertible due to the + fact that the trailing underscores indicate when one list is finished. + """ def __init__(self, filename=None, num_reserved_ids=2): @@ -182,24 +212,26 @@ def __init__(self, filename=None, num_reserved_ids=2): super(SubwordTextEncoder, self).__init__(num_reserved_ids=num_reserved_ids) def encode(self, raw_text): - """Converts a string to a list of subtoken ids. + """Converts a native string to a list of subtoken ids. Args: - raw_text: a string. + raw_text: a native string. Returns: a list of integers in the range [0, vocab_size) """ - return self._tokens_to_subtokens(self._tokenizer.encode(raw_text)) + return self._tokens_to_subtokens(self._tokenizer.encode( + _native_to_unicode(raw_text))) def decode(self, subtokens): - """Converts a sequence of subtoken ids to a string. + """Converts a sequence of subtoken ids to a native string. Args: subtokens: a list of integers in the range [0, vocab_size) Returns: - a string + a native string """ - return self._tokenizer.decode(self._subtokens_to_tokens(subtokens)) + return _unicode_to_native(self._tokenizer.decode( + self._subtokens_to_tokens(subtokens))) @property def vocab_size(self): @@ -239,8 +271,8 @@ def subtoken_to_subtoken_string(self, subtoken): if subtoken_string: return subtoken_string if 0 <= subtoken < self._num_reserved_ids: - return "%s_" % RESERVED_TOKENS[subtoken] - return "ID%d_" % subtoken + return u"%s_" % RESERVED_TOKENS[subtoken] + return u"ID%d_" % subtoken def _escaped_token_to_subtokens(self, escaped_token): """Converts an escaped token string to a list of subtokens. @@ -260,27 +292,11 @@ def _escaped_token_to_subtokens(self, escaped_token): if subtoken != -1: break end -= 1 - if end > pos: - ret.append(subtoken) - pos = end - else: - # No subtoken in the vocabulary matches escaped_token[pos]. - # This can happen if the token contains a Unicode character - # that did not occur in the vocabulary training set. - # The id self.vocab_size - 1 is decoded as Unicode uFFFD, - # REPLACEMENT_CHARACTER. - ret.append(self.vocab_size - 1) - # Ensure that the outer loop continues - pos += 1 - return ret + assert end > pos + ret.append(subtoken) + pos = end - @classmethod - def alphabet(cls, token_counts): - """Return the set of Unicode characters that appear in the tokens.""" - alphabet_set = set() - for token in six.iterkeys(token_counts): - alphabet_set |= set(token) - return alphabet_set + return ret @classmethod def build_to_target_size(cls, @@ -304,17 +320,12 @@ def build_to_target_size(cls, Returns: a SubwordTextEncoder instance. """ - # Calculate the alphabet, i.e. the set of all Unicode characters - # that appear in the tokens. - alphabet_set = cls.alphabet(token_counts) - tf.logging.info("Alphabet contains %d characters" % len(alphabet_set)) - def bisect(min_val, max_val): """Bisection to find the right size.""" present_count = (max_val + min_val) // 2 tf.logging.info("Trying min_count %d" % present_count) subtokenizer = cls() - subtokenizer.build_from_token_counts(token_counts, alphabet_set, + subtokenizer.build_from_token_counts(token_counts, present_count, num_iterations) if min_val >= max_val or subtokenizer.vocab_size == target_size: return subtokenizer @@ -333,17 +344,29 @@ def bisect(min_val, max_val): def build_from_token_counts(self, token_counts, - alphabet_set, min_count, num_iterations=4): """Train a SubwordTextEncoder based on a dictionary of word counts. Args: token_counts: a dictionary of Unicode strings to int. - alphabet_set: the set of Unicode characters that appear in the tokens. min_count: an integer - discard subtokens with lower counts. num_iterations: an integer. how many iterations of refinement. """ + # first determine the alphabet to include all characters with count at + # least min_count in the dataset. + char_counts = defaultdict(int) + for token, count in six.iteritems(token_counts): + for c in token: + char_counts[c] += count + self._alphabet = set() + for c, count in six.iteritems(char_counts): + if count >= min_count: + self._alphabet.add(c) + # Make sure all characters needed for escaping are included + for c in u"\\_;0123456789": + self._alphabet.add(c) + # We build iteratively. On each iteration, we segment all the words, # then count the resulting potential subtokens, keeping the ones # with high enough counts for our new vocabulary. @@ -367,43 +390,36 @@ def build_from_token_counts(self, for end in xrange(start + 1, len(escaped_token) + 1): subtoken_string = escaped_token[start:end] counts[subtoken_string] += count + # Make sure all characters needed for escaping are included + for c in self._alphabet: + counts[c] += min_count # Array of sets of candidate subtoken strings, by length len_to_subtoken_strings = [] for subtoken_string, count in six.iteritems(counts): lsub = len(subtoken_string) - # All subtoken strings of length 1 are automatically included - # later, so we don't need to consider them here - if count < min_count or lsub <= 1: - continue - # Add this subtoken string to its length set - while len(len_to_subtoken_strings) <= lsub: - len_to_subtoken_strings.append(set()) - len_to_subtoken_strings[lsub].add(subtoken_string) + if count >= min_count: + # Add this subtoken string to its length set + while len(len_to_subtoken_strings) <= lsub: + len_to_subtoken_strings.append(set()) + len_to_subtoken_strings[lsub].add(subtoken_string) new_subtoken_strings = [] # consider the candidates longest to shortest, so that if we accept # a longer subtoken string, we can decrement the counts of its prefixes. - for subtoken_strings in reversed(len_to_subtoken_strings[2:]): + for lsub in reversed(range(1, len(len_to_subtoken_strings))): + subtoken_strings = len_to_subtoken_strings[lsub] for subtoken_string in subtoken_strings: count = counts[subtoken_string] - if count < min_count: - continue - new_subtoken_strings.append((count, subtoken_string)) - for l in xrange(1, len(subtoken_string)): - counts[subtoken_string[:l]] -= count - # Sort what we've got so far in decreasing order by count + if count >= min_count: + new_subtoken_strings.append((count, subtoken_string)) + for l in xrange(1, lsub): + counts[subtoken_string[:l]] -= count + # Sort in decreasing order by count new_subtoken_strings.sort(reverse=True) - # Add the alphabet set at the end of the vocabulary list - for char in alphabet_set: - new_subtoken_strings.append((0, char)) - # Also include the Unicode REPLACEMENT CHARACTER to use - # when encountering previously unseen Unicode characters - # in the input (i.e. input external to the tokenizer training - # set, which may thus contain characters not in the alphabet_set). - # This must be the last entry in the subtoken vocabulary list. - new_subtoken_strings.append((0, u"\uFFFD")) # Now we have a candidate vocabulary + old_alphabet = self._alphabet self._init_from_list([u""] * self._num_reserved_ids + [p[1] for p in new_subtoken_strings]) + assert old_alphabet == self._alphabet tf.logging.info("vocab_size = %d" % self.vocab_size) original = "This sentence was encoded by the SubwordTextEncoder." @@ -426,46 +442,77 @@ def _init_from_list(self, subtoken_strings): self._all_subtoken_strings = subtoken_strings self._subtoken_string_to_id = { s: i for i, s in enumerate(subtoken_strings) if s} + self._alphabet = set([c for c in subtoken_strings if len(c) == 1]) def _load_from_file(self, filename): """Load from a file.""" subtoken_strings = [] with tf.gfile.Open(filename) as f: for line in f: - if six.PY2: - subtoken_strings.append(line.strip()[1:-1].decode("utf-8")) - else: - subtoken_strings.append(line.strip()[1:-1]) + subtoken_strings.append(_native_to_unicode(line.strip()[1:-1])) self._init_from_list(subtoken_strings) def store_to_file(self, filename): with tf.gfile.Open(filename, "w") as f: for subtoken_string in self._all_subtoken_strings: - if six.PY2: - f.write("'" + subtoken_string.encode("utf-8") + "'\n") - else: - f.write("'" + subtoken_string + "'\n") + f.write("'" + _unicode_to_native(subtoken_string) + "'\n") def _escape_token(self, token): - r"""Translate '\'->'\\' and '_'->'\u', then append '_'. + r"""Escape away underscores and OOV characters and append '_'. + + This allows the token to be experessed as the concatenation of a list + of subtokens from the vocabulary. The underscore acts as a sentinel + which allows us to invertibly concatenate multiple such lists. Args: - token: a string + token: a unicode string Returns: - escaped_token: a string + escaped_token: a unicode string """ - return token.replace("\\", "\\\\").replace("_", "\\u") + "_" + token = token.replace("\\", "\\\\").replace("_", "\\u") + "_" + ret = u"" + for c in token: + if c in self._alphabet: + ret += c + else: + ret += u"\\%d;" % ord(c) + return ret def _unescape_token(self, escaped_token): - r"""Remove '_' from end, then translate '\\'->'\' and '\u'->'_'. + r"""Inverse of _escape_token(). Args: - escaped_token: a string + escaped_token: a unicode string Returns: - token: a string + token: a unicode string """ - assert escaped_token[-1] == "_" - return escaped_token[:-1].replace("\\u", "_").replace("\\\\", "\\") + ret = u"" + escaped_token = escaped_token[:-1] + pos = 0 + while pos < len(escaped_token): + c = escaped_token[pos] + if c == "\\": + pos += 1 + c = escaped_token[pos] + if c == u"u": + ret += u"_" + pos += 1 + elif c == "\\": + ret += u"_" + pos += 1 + else: + semicolon_pos = escaped_token.find(u";", pos) + if semicolon_pos == -1: + continue + try: + ret += unichr(int(escaped_token[pos:semicolon_pos])) + pos = semicolon_pos + 1 + except (ValueError, OverflowError) as _: + pass + else: + ret += c + pos += 1 + return ret @classmethod def get_token_counts(cls, text_filepattern, corpus_max_lines): @@ -477,7 +524,7 @@ def get_token_counts(cls, text_filepattern, corpus_max_lines): with tf.gfile.Open(text_filename) as f: for line in f: # The tokenizer updates token_counts in encode() - tok.encode(line.strip()) + tok.encode(_native_to_unicode(line.strip())) lines_read += 1 if corpus_max_lines > 0 and lines_read > corpus_max_lines: return tok.token_counts diff --git a/tensor2tensor/data_generators/text_encoder_build_subword.py b/tensor2tensor/data_generators/text_encoder_build_subword.py index 9b8da9364..659e9da14 100644 --- a/tensor2tensor/data_generators/text_encoder_build_subword.py +++ b/tensor2tensor/data_generators/text_encoder_build_subword.py @@ -59,8 +59,7 @@ def main(unused_argv): raise ValueError('Must provide --corpus_filepattern') token_counts = text_encoder.SubwordTextEncoder.get_token_counts( FLAGS.corpus_filepattern, FLAGS.corpus_max_lines) - alphabet_set = text_encoder.SubwordTextEncoder.alphabet(token_counts) - gs.build_from_token_counts(token_counts, alphabet_set, + gs.build_from_token_counts(token_counts, FLAGS.min_count, FLAGS.num_iterations) gs.store_to_file(FLAGS.output_fn) diff --git a/tensor2tensor/data_generators/tokenizer.py b/tensor2tensor/data_generators/tokenizer.py index 0eaea4f58..8490ead19 100644 --- a/tensor2tensor/data_generators/tokenizer.py +++ b/tensor2tensor/data_generators/tokenizer.py @@ -14,32 +14,29 @@ """A simple invertible tokenizer. -Converts from a raw string to a list of tokens (represented as Unicode strings). +Converts from a unicode string to a list of tokens +(represented as Unicode strings). This tokenizer has the following desirable properties: - It is invertible. - - Punctuation is broken away from adjacent letters. + - Alphanumeric characters are broken away from non-alphanumeric characters. - A single space between words does not produce an extra token. - The full Unicode punctuation and separator set is recognized. The tokenization algorithm is as follows: -0. We classify the input characters into "word characters" and - "separator characters". Separator characters are defined as the union of - Unicode punctuation and separators/white space. All other characters are - "word characters". - -1. Split the text into a list of tokens, splitting at every boundary of a - "word character" and a "separator character". This produces a list which - alternates between "word tokens" (strings of word codepoints) and - "separator tokens" (strings of of separator/punctuation codepoints). +1. Split the text into a list of tokens, splitting at every boundary of an + alphanumeric character and a non-alphanumeric character. This produces + a list which alternates between "alphanumeric tokens" + (strings of alphanumeric characters) and "non-alphanumeric tokens" + (strings of of non-alphanumeric characters). 2. Remove every token consisting of a single space, unless it is the very first or very last token in the list. These tokens are now - implied by the fact that there are two adjacent word tokens. + implied by the fact that there are two adjacent alphanumeric tokens. -e.g. "Dude - that's so cool." - -> ["Dude", " - ", "that", "'", "s", "so", "cool", "."] +e.g. u"Dude - that's so cool." + -> [u"Dude", u" - ", u"that", u"'", u"s", u"so", u"cool", u"."] """ from __future__ import absolute_import @@ -47,87 +44,66 @@ from __future__ import print_function from collections import defaultdict -import re import sys import unicodedata # Dependency imports -from six import PY2 from six import unichr # pylint: disable=redefined-builtin from six.moves import xrange # pylint: disable=redefined-builtin -# Regular expression that matches Unicode whitespace characters -# (including ASCII whitespace) as defined in the Python run-time library -_RE_WHITESPACE = re.compile(r"^\s$", re.UNICODE) - - -# Set of Unicode whitespace code points -UNICODE_WHITESPACE = set(unichr(i) for i in xrange(sys.maxunicode) - if _RE_WHITESPACE.match(unichr(i))) - - -# Set of Unicode punctuation code points -UNICODE_PUNCTUATION = set(unichr(i) for i in xrange(sys.maxunicode) - if unicodedata.category(unichr(i)).startswith("P")) - - -# Conversion between Unicode and UTF-8, if required (on Python2) -_decode_string = (lambda s: s.decode("utf-8")) if PY2 else (lambda s: s) - - -_encode_string = (lambda s: s.encode("utf-8")) if PY2 else (lambda s: s) - - class Tokenizer(object): """Vocab for breaking words into Unicode wordpieces. """ - _SEPARATOR_CHAR_SET = UNICODE_WHITESPACE | UNICODE_PUNCTUATION + # This set contains all letter and number characters. + _ALPHANUMERIC_CHAR_SET = set( + unichr(i) for i in xrange(sys.maxunicode) + if (unicodedata.category(unichr(i)).startswith("L") or + unicodedata.category(unichr(i)).startswith("N"))) def __init__(self): self.token_counts = defaultdict(int) - def encode(self, raw_text): - """Encode a raw string as a list of tokens. + def encode(self, text): + """Encode a unicode string as a list of tokens. Args: - raw_text: a (Python2 or Python3 native) string + text: a unicode string Returns: a list of tokens as Unicode strings """ - if not raw_text: + if not text: return [] ret = [] token_start = 0 - unicode_text = _decode_string(raw_text) # Classify each character in the input string - is_sep = [c in self._SEPARATOR_CHAR_SET for c in unicode_text] - for pos in xrange(1, len(unicode_text)): - if is_sep[pos] != is_sep[pos - 1]: - token = unicode_text[token_start:pos] + is_alnum = [c in self._ALPHANUMERIC_CHAR_SET for c in text] + for pos in xrange(1, len(text)): + if is_alnum[pos] != is_alnum[pos - 1]: + token = text[token_start:pos] if token != u" " or token_start == 0: ret.append(token) self.token_counts[token] += 1 token_start = pos - final_token = unicode_text[token_start:] + final_token = text[token_start:] ret.append(final_token) self.token_counts[final_token] += 1 return ret def decode(self, tokens): - """Decode a list of tokens to a string. + """Decode a list of tokens to a unicode string. Args: tokens: a list of Unicode strings Returns: - a (Python2 or Python3 native) string + a unicode string """ ret = u"" - is_word = [t[0] not in self._SEPARATOR_CHAR_SET for t in tokens] + token_is_alnum = [t[0] in self._ALPHANUMERIC_CHAR_SET for t in tokens] for i, token in enumerate(tokens): - if i > 0 and is_word[i - 1] and is_word[i]: + if i > 0 and token_is_alnum[i - 1] and token_is_alnum[i]: ret += u" " ret += token - return _encode_string(ret) + return ret diff --git a/tensor2tensor/data_generators/tokenizer_test.py b/tensor2tensor/data_generators/tokenizer_test.py index 70c7d31eb..766630ba3 100644 --- a/tensor2tensor/data_generators/tokenizer_test.py +++ b/tensor2tensor/data_generators/tokenizer_test.py @@ -23,7 +23,6 @@ # Dependency imports -import six from six.moves import xrange # pylint: disable=redefined-builtin from tensor2tensor.data_generators import tokenizer @@ -35,29 +34,28 @@ class TokenizerTest(tf.test.TestCase): def testEncode(self): t = tokenizer.Tokenizer() self.assertEqual( - t.encode("Dude - that's so cool."), - ["Dude", " - ", "that", "'", "s", "so", "cool", "."]) - # TODO(lukaszkaiser): make it work again with Unicode. - # self.assertEqual( - # t.encode("Łukasz est né en 1981."), - # ["Łukasz", "est", "né", "en", "1981", "."]) + t.encode(u"Dude - that's so cool."), + [u"Dude", u" - ", u"that", u"'", u"s", u"so", u"cool", u"."]) self.assertEqual( - t.encode(" Spaces at the ends "), - [" ", "Spaces", "at", "the", "ends", " "]) - self.assertEqual(t.encode("802.11b"), ["802", ".", "11b"]) - self.assertEqual(t.encode("two. \nlines"), ["two", ". \n", "lines"]) + t.encode(u"Łukasz est né en 1981."), + [u"Łukasz", u"est", u"né", u"en", u"1981", u"."]) + self.assertEqual( + t.encode(u" Spaces at the ends "), + [u" ", u"Spaces", u"at", u"the", u"ends", u" "]) + self.assertEqual(t.encode(u"802.11b"), [u"802", u".", u"11b"]) + self.assertEqual(t.encode(u"two. \nlines"), [u"two", u". \n", u"lines"]) def testDecode(self): t = tokenizer.Tokenizer() self.assertEqual( - t.decode(["Dude", " - ", "that", "'", "s", "so", "cool", "."]), - "Dude - that's so cool.") + t.decode([u"Dude", u" - ", u"that", u"'", u"s", u"so", u"cool", u"."]), + u"Dude - that's so cool.") def testInvertibilityOnRandomStrings(self): t = tokenizer.Tokenizer() random.seed(123) - for _ in xrange(0): # TODO(lukaszkaiser): make it work again with Unicode. - s = "".join([six.int2byte(random.randint(0, 255)) for _ in xrange(10)]) + for _ in xrange(1000): + s = u"".join([unichr(random.randint(0, 65535)) for _ in xrange(10)]) self.assertEqual(s, t.decode(t.encode(s))) From 599a3e80cd53c888079804cfdb512c4a91b75d89 Mon Sep 17 00:00:00 2001 From: vthorsteinsson Date: Sat, 1 Jul 2017 23:24:47 +0000 Subject: [PATCH 0074/4095] Better resiliency of utf-8 conversion --- tensor2tensor/data_generators/text_encoder.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensor2tensor/data_generators/text_encoder.py b/tensor2tensor/data_generators/text_encoder.py index b43170320..414e86cf4 100755 --- a/tensor2tensor/data_generators/text_encoder.py +++ b/tensor2tensor/data_generators/text_encoder.py @@ -111,7 +111,7 @@ def decode(self, ids): if six.PY2: return "".join(decoded_ids) # Python3: join byte arrays and then decode string - return b"".join(decoded_ids).decode("utf-8") + return b"".join(decoded_ids).decode("utf-8", "replace") @property def vocab_size(self): From d7a7230ae3c021e99a37faea7edb816b870d4d87 Mon Sep 17 00:00:00 2001 From: Stefan Schweter Date: Tue, 4 Jul 2017 03:03:14 +0200 Subject: [PATCH 0075/4095] Bump to v1.0.10 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index ba3ea532a..254631d9f 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ setup( name='tensor2tensor', - version='1.0.9', + version='1.0.10', description='Tensor2Tensor', author='Google Inc.', author_email='no-reply@google.com', From 3620d21b958cc0beab1ea06d0ab35fc70adaa0b5 Mon Sep 17 00:00:00 2001 From: Kollol Das Date: Tue, 4 Jul 2017 14:24:35 +0530 Subject: [PATCH 0076/4095] Revert "Fix shape mismatch issue when target sequence length > logits sequence length" This reverts commit be61c59e1c2a6af2d1dba5c46ace358202b2f851. --- tensor2tensor/models/common_layers.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/tensor2tensor/models/common_layers.py b/tensor2tensor/models/common_layers.py index 410f5b2bf..25c162c4c 100644 --- a/tensor2tensor/models/common_layers.py +++ b/tensor2tensor/models/common_layers.py @@ -1302,16 +1302,13 @@ def padding_list(length_diff, arg): return res_x, res_y -def pad_with_zeros(logits, labels, return_logits=False): +def pad_with_zeros(logits, labels): """Pad labels on the length dimension to match logits length.""" with tf.name_scope("pad_with_zeros", [logits, labels]): logits, labels = pad_to_same_length(logits, labels) if len(labels.shape.as_list()) == 3: # 2-d labels. logits, labels = pad_to_same_length(logits, labels, axis=2) - if return_logits: - return logits, labels - else: - return labels + return labels def weights_nonzero(labels): @@ -1377,8 +1374,8 @@ def padded_cross_entropy(logits, confidence = 1.0 - label_smoothing vocab_size = tf.shape(logits)[-1] with tf.name_scope("padded_cross_entropy", [logits, labels]): - pad_logits, pad_labels = pad_with_zeros(logits, labels, return_logits=True) - xent = smoothing_cross_entropy(pad_logits, pad_labels, vocab_size, confidence) + pad_labels = pad_with_zeros(logits, labels) + xent = smoothing_cross_entropy(logits, pad_labels, vocab_size, confidence) weights = weights_fn(pad_labels) if not reduce_sum: return xent * weights, weights From 3ab069ad9aecbcedd237289074a6957895a741b0 Mon Sep 17 00:00:00 2001 From: Kollol Das Date: Tue, 4 Jul 2017 14:25:07 +0530 Subject: [PATCH 0077/4095] Revert "Fix shape invariance issue in conv_internal()" This reverts commit 9e2bcade7a27e50af94c8ef532a1277126c7665e. --- tensor2tensor/models/common_layers.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/tensor2tensor/models/common_layers.py b/tensor2tensor/models/common_layers.py index 25c162c4c..36d9b0b51 100644 --- a/tensor2tensor/models/common_layers.py +++ b/tensor2tensor/models/common_layers.py @@ -330,16 +330,11 @@ def conv2d_kernel(kernel_size_arg, name_suffix): inputs._shape = tf.TensorShape([static_shape[0], None, None, static_shape[3]]) # pylint: disable=protected-access if kernel_size[1] == 1 or force2d: # Avoiding the cond below can speed up graph and gradient construction. - c = conv2d_kernel(kernel_size, "single") - else: - c = tf.cond( + return conv2d_kernel(kernel_size, "single") + return tf.cond( tf.equal(tf.shape(inputs)[2], 1), lambda: conv2d_kernel((kernel_size[0], 1), "small"), lambda: conv2d_kernel(kernel_size, "std")) - # Restore the shape to maintain invariance - inputs.set_shape(static_shape) - c.set_shape(static_shape) - return c def conv(inputs, filters, kernel_size, **kwargs): From be043f9992066b81bd8d385e787353db7013a3fd Mon Sep 17 00:00:00 2001 From: Kollol Das Date: Tue, 4 Jul 2017 15:32:49 +0530 Subject: [PATCH 0078/4095] Remove conds from conv_internal() and pool() functions to fix shape invaiance issue. Also update testConvStride2MultiStep() --- tensor2tensor/models/common_layers.py | 34 ++++------------------ tensor2tensor/models/common_layers_test.py | 4 +-- 2 files changed, 8 insertions(+), 30 deletions(-) diff --git a/tensor2tensor/models/common_layers.py b/tensor2tensor/models/common_layers.py index 2e2b74268..427d5147d 100644 --- a/tensor2tensor/models/common_layers.py +++ b/tensor2tensor/models/common_layers.py @@ -293,7 +293,7 @@ def conv_internal(conv_fn, inputs, filters, kernel_size, **kwargs): static_shape = inputs.get_shape() if not static_shape or len(static_shape) != 4: raise ValueError("Inputs to conv must have statically known rank 4.") - inputs.set_shape([static_shape[0], None, None, static_shape[3]]) + #inputs.set_shape([static_shape[0], None, None, static_shape[3]]) # Add support for left padding. if "padding" in kwargs and kwargs["padding"] == "LEFT": dilation_rate = (1, 1) @@ -307,9 +307,9 @@ def conv_internal(conv_fn, inputs, filters, kernel_size, **kwargs): width_padding = 0 if static_shape[2] == 1 else cond_padding padding = [[0, 0], [height_padding, 0], [width_padding, 0], [0, 0]] inputs = tf.pad(inputs, padding) + # Set middle two dimensions to None to prevent convolution from complaining + inputs.set_shape([static_shape[0], None, None, static_shape[3]]) kwargs["padding"] = "VALID" - # Special argument we use to force 2d kernels (see below). - force2d = kwargs.get("force2d", True) def conv2d_kernel(kernel_size_arg, name_suffix): """Call conv2d but add suffix to name.""" @@ -329,17 +329,7 @@ def conv2d_kernel(kernel_size_arg, name_suffix): kwargs["force2d"] = original_force2d return result - # Manually setting the shape to be unknown in the middle two dimensions so - # that the `tf.cond` below won't throw an error based on the convolution - # kernels being too large for the data. - inputs._shape = tf.TensorShape([static_shape[0], None, None, static_shape[3]]) # pylint: disable=protected-access - if kernel_size[1] == 1 or force2d: - # Avoiding the cond below can speed up graph and gradient construction. - return conv2d_kernel(kernel_size, "single") - return tf.cond( - tf.equal(tf.shape(inputs)[2], - 1), lambda: conv2d_kernel((kernel_size[0], 1), "small"), - lambda: conv2d_kernel(kernel_size, "std")) + return conv2d_kernel(kernel_size, "single") def conv(inputs, filters, kernel_size, **kwargs): @@ -566,20 +556,8 @@ def pool(inputs, window_size, pooling_type, padding, strides=(1, 1)): inputs = tf.pad(inputs, padding_) inputs.set_shape([static_shape[0], None, None, static_shape[3]]) padding = "VALID" - window_size_small = (window_size[0], 1) - strides_small = (strides[0], 1) - # Manually setting the shape to be unknown in the middle two dimensions so - # that the `tf.cond` below won't throw an error based on the convolution - # kernels being too large for the data. - inputs._shape = tf.TensorShape( # pylint: disable=protected-access - [static_shape[0], None, None, static_shape[3]]) - return tf.cond( - tf.equal(tf.shape(inputs)[2], 1), - lambda: tf.nn.pool( # pylint: disable=g-long-lambda - inputs, window_size_small, pooling_type, padding, - strides=strides_small), - lambda: tf.nn.pool( # pylint: disable=g-long-lambda - inputs, window_size, pooling_type, padding, strides=strides)) + + return tf.nn.pool(inputs, window_size, pooling_type, padding, strides=strides) def conv_block_downsample(x, diff --git a/tensor2tensor/models/common_layers_test.py b/tensor2tensor/models/common_layers_test.py index 091f272d6..8d2b4dec1 100644 --- a/tensor2tensor/models/common_layers_test.py +++ b/tensor2tensor/models/common_layers_test.py @@ -277,13 +277,13 @@ def testShiftLeft(self): self.assertAllEqual(actual, expected) def testConvStride2MultiStep(self): - x1 = np.random.rand(5, 32, 1, 11) + x1 = np.random.rand(5, 32, 16, 11) with self.test_session() as session: a = common_layers.conv_stride2_multistep( tf.constant(x1, dtype=tf.float32), 4, 16) session.run(tf.global_variables_initializer()) actual = session.run(a[0]) - self.assertEqual(actual.shape, (5, 2, 0, 16)) + self.assertEqual(actual.shape, (5, 2, 1, 16)) def testDeconvStride2MultiStep(self): x1 = np.random.rand(5, 2, 1, 11) From 4a1d7da19c9dcb4472871da534d5cfa6ddc6bafa Mon Sep 17 00:00:00 2001 From: Kollol Das Date: Tue, 4 Jul 2017 16:07:01 +0530 Subject: [PATCH 0079/4095] Update pad_with_zeros() to return logits as well to handle cases where logits sequence length is less than labels --- tensor2tensor/models/common_layers.py | 6 +++--- tensor2tensor/utils/metrics.py | 14 +++++++------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/tensor2tensor/models/common_layers.py b/tensor2tensor/models/common_layers.py index 2e2b74268..0c69ffa46 100644 --- a/tensor2tensor/models/common_layers.py +++ b/tensor2tensor/models/common_layers.py @@ -1308,7 +1308,7 @@ def pad_with_zeros(logits, labels): logits, labels = pad_to_same_length(logits, labels) if len(labels.shape.as_list()) == 3: # 2-d labels. logits, labels = pad_to_same_length(logits, labels, axis=2) - return labels + return logits, labels def weights_nonzero(labels): @@ -1374,8 +1374,8 @@ def padded_cross_entropy(logits, confidence = 1.0 - label_smoothing vocab_size = tf.shape(logits)[-1] with tf.name_scope("padded_cross_entropy", [logits, labels]): - pad_labels = pad_with_zeros(logits, labels) - xent = smoothing_cross_entropy(logits, pad_labels, vocab_size, confidence) + pad_logits, pad_labels = pad_with_zeros(logits, labels) + xent = smoothing_cross_entropy(pad_logits, pad_labels, vocab_size, confidence) weights = weights_fn(pad_labels) if not reduce_sum: return xent * weights, weights diff --git a/tensor2tensor/utils/metrics.py b/tensor2tensor/utils/metrics.py index ecc02fd5e..eca6143c7 100644 --- a/tensor2tensor/utils/metrics.py +++ b/tensor2tensor/utils/metrics.py @@ -37,10 +37,10 @@ def padded_accuracy_topk(predictions, weights_fn=common_layers.weights_nonzero): """Percentage of times that top-k predictions matches labels on non-0s.""" with tf.variable_scope("padded_accuracy_topk", values=[predictions, labels]): - padded_labels = common_layers.pad_with_zeros(predictions, labels) + padded_predictions, padded_labels = common_layers.pad_with_zeros(predictions, labels) weights = weights_fn(padded_labels) - effective_k = tf.minimum(k, tf.shape(predictions)[-1]) - _, outputs = tf.nn.top_k(predictions, k=effective_k) + effective_k = tf.minimum(k, tf.shape(padded_predictions)[-1]) + _, outputs = tf.nn.top_k(padded_predictions, k=effective_k) outputs = tf.to_int32(outputs) padded_labels = tf.expand_dims(padded_labels, axis=-1) padded_labels += tf.zeros_like(outputs) # Pad to same shape. @@ -61,9 +61,9 @@ def padded_sequence_accuracy(predictions, """Percentage of times that predictions matches labels everywhere (non-0).""" with tf.variable_scope( "padded_sequence_accuracy", values=[predictions, labels]): - padded_labels = common_layers.pad_with_zeros(predictions, labels) + paded_predictions, padded_labels = common_layers.pad_with_zeros(predictions, labels) weights = weights_fn(padded_labels) - outputs = tf.to_int32(tf.argmax(predictions, axis=-1)) + outputs = tf.to_int32(tf.argmax(paded_predictions, axis=-1)) not_correct = tf.to_float(tf.not_equal(outputs, padded_labels)) * weights axis = list(range(1, len(outputs.get_shape()))) correct_seq = 1.0 - tf.minimum(1.0, tf.reduce_sum(not_correct, axis=axis)) @@ -84,9 +84,9 @@ def padded_accuracy(predictions, weights_fn=common_layers.weights_nonzero): """Percentage of times that predictions matches labels on non-0s.""" with tf.variable_scope("padded_accuracy", values=[predictions, labels]): - padded_labels = common_layers.pad_with_zeros(predictions, labels) + padded_predictions, padded_labels = common_layers.pad_with_zeros(predictions, labels) weights = weights_fn(padded_labels) - outputs = tf.to_int32(tf.argmax(predictions, axis=-1)) + outputs = tf.to_int32(tf.argmax(padded_predictions, axis=-1)) return tf.to_float(tf.equal(outputs, padded_labels)), weights From df94f8620d868cbc3bc3249da3dfef60545c2829 Mon Sep 17 00:00:00 2001 From: Kollol Das Date: Tue, 4 Jul 2017 17:06:49 +0530 Subject: [PATCH 0080/4095] Add hparams for lstm attention --- tensor2tensor/models/lstm.py | 18 ++++++++++++++++-- tensor2tensor/models/lstm_test.py | 3 ++- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/tensor2tensor/models/lstm.py b/tensor2tensor/models/lstm.py index 92b253cc1..13ef8dc4c 100644 --- a/tensor2tensor/models/lstm.py +++ b/tensor2tensor/models/lstm.py @@ -21,6 +21,7 @@ # Dependency imports from tensor2tensor.models import common_layers +from tensor2tensor.models import common_hparams from tensor2tensor.utils import registry from tensor2tensor.utils import t2t_model @@ -193,7 +194,8 @@ def dropout_lstm_cell(): input_keep_prob=1.0 - hparams.dropout * tf.to_float(train)) layers = [dropout_lstm_cell() for _ in range(hparams.num_hidden_layers)] - cell = ExternalAttentionCellWrapper(tf.nn.rnn_cell.MultiRNNCell(layers), attn_states) + cell = ExternalAttentionCellWrapper(tf.nn.rnn_cell.MultiRNNCell(layers), attn_states, + attn_vec_size=hparams.attn_vec_size) initial_state = cell.combine_state(initial_state) with tf.variable_scope(name): return tf.nn.dynamic_rnn( @@ -253,4 +255,16 @@ class LSTMSeq2SeqAttention(t2t_model.T2TModel): def model_fn_body(self, features): train = self._hparams.mode == tf.contrib.learn.ModeKeys.TRAIN return lstm_seq2seq_internal_attention(features["inputs"], features["targets"], - self._hparams, train) \ No newline at end of file + self._hparams, train) + +@registry.register_hparams +def lstm_attention(): + """hparams for LSTM with attention.""" + hparams = common_hparams.basic_params1() + hparams.batch_size = 128 + hparams.hidden_size = 128 + hparams.num_hidden_layers = 2 + + # Attention + hparams.add_hparam("attn_vec_size", hparams.hidden_size) + return hparams \ No newline at end of file diff --git a/tensor2tensor/models/lstm_test.py b/tensor2tensor/models/lstm_test.py index fc9aa6416..a216a3832 100644 --- a/tensor2tensor/models/lstm_test.py +++ b/tensor2tensor/models/lstm_test.py @@ -55,7 +55,8 @@ def testLSTMSeq2Seq_attention(self): vocab_size = 9 x = np.random.random_integers(1, high=vocab_size - 1, size=(3, 5, 1, 1)) y = np.random.random_integers(1, high=vocab_size - 1, size=(3, 6, 1, 1)) - hparams = common_hparams.basic_params1() + hparams = lstm.lstm_attention() + p_hparams = problem_hparams.test_problem_hparams(hparams, vocab_size, vocab_size) x = tf.constant(x, dtype=tf.int32) From 5dbde2d5cbf631f61070b4008d30cee3f1798b2b Mon Sep 17 00:00:00 2001 From: Kollol Das Date: Wed, 5 Jul 2017 20:09:00 +0530 Subject: [PATCH 0081/4095] Revert "Fix shape mismatch issue when target sequence length > logits sequence length" This reverts commit be61c59e1c2a6af2d1dba5c46ace358202b2f851. --- tensor2tensor/models/common_layers.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/tensor2tensor/models/common_layers.py b/tensor2tensor/models/common_layers.py index 410f5b2bf..25c162c4c 100644 --- a/tensor2tensor/models/common_layers.py +++ b/tensor2tensor/models/common_layers.py @@ -1302,16 +1302,13 @@ def padding_list(length_diff, arg): return res_x, res_y -def pad_with_zeros(logits, labels, return_logits=False): +def pad_with_zeros(logits, labels): """Pad labels on the length dimension to match logits length.""" with tf.name_scope("pad_with_zeros", [logits, labels]): logits, labels = pad_to_same_length(logits, labels) if len(labels.shape.as_list()) == 3: # 2-d labels. logits, labels = pad_to_same_length(logits, labels, axis=2) - if return_logits: - return logits, labels - else: - return labels + return labels def weights_nonzero(labels): @@ -1377,8 +1374,8 @@ def padded_cross_entropy(logits, confidence = 1.0 - label_smoothing vocab_size = tf.shape(logits)[-1] with tf.name_scope("padded_cross_entropy", [logits, labels]): - pad_logits, pad_labels = pad_with_zeros(logits, labels, return_logits=True) - xent = smoothing_cross_entropy(pad_logits, pad_labels, vocab_size, confidence) + pad_labels = pad_with_zeros(logits, labels) + xent = smoothing_cross_entropy(logits, pad_labels, vocab_size, confidence) weights = weights_fn(pad_labels) if not reduce_sum: return xent * weights, weights From 00aec579365104b86326cebcbd5983ff70dbe6ca Mon Sep 17 00:00:00 2001 From: Kollol Das Date: Wed, 5 Jul 2017 20:09:17 +0530 Subject: [PATCH 0082/4095] Revert "Fix shape invariance issue in conv_internal()" This reverts commit 9e2bcade7a27e50af94c8ef532a1277126c7665e. --- tensor2tensor/models/common_layers.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/tensor2tensor/models/common_layers.py b/tensor2tensor/models/common_layers.py index 25c162c4c..36d9b0b51 100644 --- a/tensor2tensor/models/common_layers.py +++ b/tensor2tensor/models/common_layers.py @@ -330,16 +330,11 @@ def conv2d_kernel(kernel_size_arg, name_suffix): inputs._shape = tf.TensorShape([static_shape[0], None, None, static_shape[3]]) # pylint: disable=protected-access if kernel_size[1] == 1 or force2d: # Avoiding the cond below can speed up graph and gradient construction. - c = conv2d_kernel(kernel_size, "single") - else: - c = tf.cond( + return conv2d_kernel(kernel_size, "single") + return tf.cond( tf.equal(tf.shape(inputs)[2], 1), lambda: conv2d_kernel((kernel_size[0], 1), "small"), lambda: conv2d_kernel(kernel_size, "std")) - # Restore the shape to maintain invariance - inputs.set_shape(static_shape) - c.set_shape(static_shape) - return c def conv(inputs, filters, kernel_size, **kwargs): From 96b1202909368855e44ce441c5d3a27135c9234f Mon Sep 17 00:00:00 2001 From: Richard Shin Date: Fri, 30 Jun 2017 15:44:20 -0700 Subject: [PATCH 0083/4095] Add fused subseparable conv selection and SelectionWeights to bluenet. --- tensor2tensor/models/bluenet.py | 386 +++++++++++++++++++++++++++++--- 1 file changed, 359 insertions(+), 27 deletions(-) diff --git a/tensor2tensor/models/bluenet.py b/tensor2tensor/models/bluenet.py index 8f4c89eac..253b79944 100644 --- a/tensor2tensor/models/bluenet.py +++ b/tensor2tensor/models/bluenet.py @@ -20,6 +20,7 @@ # Dependency imports +import collections from six.moves import xrange # pylint: disable=redefined-builtin from tensor2tensor.models import common_hparams @@ -27,8 +28,329 @@ from tensor2tensor.utils import registry from tensor2tensor.utils import t2t_model +import numpy as np import tensorflow as tf +# var: 1d tensor, raw weights for each choice +# tempered_var: raw weights with temperature applied +# inv_t: inverse of the temperature to use when normalizing `var` +# normalized: same shape as var, but where each item is between 0 and 1, and +# the sum is 1 +SelectionWeights = collections.namedtuple( + 'SelectionWeights', ['var', 'tempered_var', 'inv_t', 'normalized']) + +def create_selection_weights(name, + type_, + shape, + inv_t=1, + initializer=tf.zeros_initializer(), + regularizer=None, + names=None): + """Create a SelectionWeights tuple. + + Args: + name: Name for the underlying variable containing the unnormalized weights. + type_: 'softmax' or 'sigmoid' or ('softmax_topk', k) where k is an int. + shape: Shape for the variable. + initializer: Initializer for the variable, passed to `tf.get_variable`. + regularizer: Regularizer for the variable. A callable which accepts + `tempered_var` and `normalized`. + inv_t: Inverse of the temperature to use in normalization. + names: Name of each selection. + + Returns: + The created SelectionWeights tuple. + """ + + + var = tf.get_variable(name, shape, initializer=initializer) + + if callable(inv_t): + inv_t = inv_t(var) + if inv_t == 1: + tempered_var = var + else: + tempered_var = var * inv_t + + if type_ == 'softmax': + weights = tf.nn.softmax(tempered_var) + elif type_ == 'sigmoid': + weights = tf.nn.sigmoid(tempered_var) + elif isinstance(type_, (list, tuple)) and type_[0] == 'softmax_topk': + assert len(shape) == 1 + + # TODO(rshin): Change this to select without replacement? + selection = tf.multinomial(tf.expand_dims(var, axis=0), k) + selection = tf.squeeze(selection, axis=0) # [k] selected classes. + to_run = tf.one_hot(selection, shape[0]) # [k x nmodules] one-hot. + # [nmodules], 0=not run, 1=run. + to_run = tf.minimum(tf.reduce_sum(to_run, axis=0), 1) + weights = tf.nn.softmax(tempered_var - 1e9 * (1.0 - to_run)) + else: + return ValueError(type) + + if regularizer is not None: + loss = regularizer(tempered_var, weights) + if loss is not None: + tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES, loss) + + if names is not None: + tf.get_collection_ref('selection_weight_names/' + var.name).extend( + names.flatten() + if isinstance(names, np.ndarray) else names) + tf.add_to_collection('selection_weight_names_tensor/' + var.name, + tf.constant(names)) + + return SelectionWeights( + var=var, + tempered_var=tempered_var, + inv_t=inv_t, + normalized=weights) + + +def kernel_premultiplier(max_kernel_size, kernel_sizes, input_channels, + kernel_selection_weights, channel_selection_weights): + '''Get weights to multiply the kernel with, before convolving. + + Args: + max_kernel_size: (int, int) tuple giving the largest kernel size. + kernel_sizes: A list of (height, width) pairs of integers, containing + different kernel sizes to use. + input_channels: A list of (begin, end) pairs of integers, which describe + which channels in the input to use. + kernel_selection_weights: SelectionWeights object to use for choosing + among kernel sizes. + channel_selection_weights: SelectionWeights object to use for choosing + among which input channels to use. + ''' + + + kernel_weights = [] + for kernel_i, (h, w) in enumerate(kernel_sizes): + top = (max_kernel_size[0] - h) // 2 + bot = max_kernel_size[0] - h - top + left = (max_kernel_size[1] - w) // 2 + right = max_kernel_size[1] - w - left + kernel_weight = tf.fill((h, w), + kernel_selection_weights.normalized[kernel_i]) + if top != 0 or bot != 0 or left != 0 or right != 0: + kernel_weight = tf.pad(kernel_weight, [[top, bot], [left, right]]) + kernel_weights.append(kernel_weight) + kernel_weight = tf.add_n(kernel_weights) + + channel_weights = [] + min_channel = np.min(input_channels) + max_channel = np.max(input_channels) + for channel_i, (begin, end) in enumerate(input_channels): + channel_weight = tf.pad( + tf.fill((end - begin,), + channel_selection_weights.normalized[channel_i]), + [[begin - min_channel, max_channel - end]]) + channel_weights.append(channel_weight) + channel_weight = tf.add_n(channel_weights) + + multiplier = (tf.reshape(kernel_weight, max_kernel_size + (1, 1)) * + tf.reshape(channel_weight, (1, 1, -1, 1))) + return multiplier + +def make_subseparable_kernel( + kernel_size, + input_channels, + filters, + separability, + kernel_initializer, + kernel_regularizer): + '''Make a kernel to do subseparable convolution wiht `tf.nn.conv2d`. + + Args: + kernel_size: (height, width) tuple. + input_channels: Number of input channels. + filters: Number of output channels. + separability: Integer denoting separability. + kernel_initializer: Initializer to use for the kernel. + kernel_regularizer: Regularizer to use for the kernel. + + Returns: + A 4D tensor. + ''' + + if separability == 1: + # Non-separable convolution + return tf.get_variable( + 'kernel', + kernel_size + (input_channels, filters), + initializer=kernel_initializer, + regularizer=kernel_regularizer) + + elif separability == 0 or separability == -1: + # Separable convolution + # TODO(rshin): Check initialization is as expected, as these are not 4D. + depthwise_kernel = tf.get_variable( + 'depthwise_kernel', + kernel_size + (input_channels,), + initializer=kernel_initializer, + regularizer=kernel_regularizer) + + pointwise_kernel = tf.get_variable( + 'pointwise_kernel', + (input_channels, filters), + initializer=kernel_initializer, + regularizer=kernel_regularizer) + + expanded_depthwise_kernel = tf.transpose( + tf.scatter_nd( + indices=tf.tile( + tf.expand_dims( + tf.range(0, input_channels), axis=1), [1, 2]), + updates=tf.transpose(depthwise_kernel, (2, 0, 1)), + shape=(input_channels, input_channels) + kernel_size), (2, 3, 0, 1)) + + return tf.reshape( + tf.matmul( + tf.reshape(expanded_depthwise_kernel, (-1, input_channels)), + pointwise_kernel), kernel_size + (input_channels, filters)) + + elif separability >= 2: + assert filters % separability == 0, (filters, separability) + assert input_channels % separability == 0, (filters, separability) + + raise NotImplementedError + + elif separability <= -2: + separability *= -1 + assert filters % separability == 0, (filters, separability) + assert input_channels % separability == 0, (filters, separability) + + raise NotImplementedError + + +def multi_subseparable_conv( + inputs, + filters, + kernel_sizes, + input_channels, + separabilities, + + kernel_selection_weights=None, + channel_selection_weights=None, + separability_selection_weights=None, + + kernel_selection_weights_params={}, + channel_selection_weights_params={}, + separability_selection_weights_params={}, + + kernel_initializer=None, + kernel_regularizer=None, + + scope=None): + ''' + Simultaneously compute different kinds of convolutions on + different subsets of the input. + + Args: + inputs: 4D tensor containing the input, in NHWC format. + filters: Integer, number of output channels. + kernel_sizes: A list of (height, width) pairs of integers, containing + different kernel sizes to use. + input_channels: A list of (begin, end) pairs of integers, which describe + which channels in the input to use. + + kernel_selection_weights: SelectionWeights object to use for choosing + among kernel sizes. + channel_selection_weights: SelectionWeights object to use for choosing + among which input channels to use. + + kernel_size_seletion_weights_params: dict with up to three keys + - initializer + - regularizer + - inv_t + channel_seletion_weights_params: dict with up to three keys + - initializer + - regularizer + - inv_t + + kernel_initializer: Initializer to use for kernels. + kernel_regularizer: Regularizer to use for kernels. + + Returns: + Result of convolution. + + ''' + + # Get input image size + input_shape = inputs.get_shape().as_list() + assert len(input_shape) == 4 + in_channels = input_shape[3] + assert in_channels is not None + + max_kernel_size = tuple(np.max(kernel_sizes, axis=0)) + max_num_channels = np.max(input_channels) - np.min(input_channels) + # kernel height x kernel width x + # number of input channels x number of output channels + max_kernel_shape = max_kernel_size + (max_num_channels, filters) + + with tf.variable_scope('selection_weights'): + if kernel_selection_weights is None: + kernel_selection_weights = create_selection_weights( + 'kernels', + 'softmax', (len(kernel_sizes),), + names=[ + 'kernel_h{}_w{}'.format(h, w) for h, w in kernel_sizes + ], + **kernel_selection_weights_params) + + if channel_selection_weights is None: + channel_selection_weights = create_selection_weights( + 'channels', + 'softmax', (len(input_channels),), + names=[ + 'channels_{}_{}'.format(c1, c2) for c1, c2 in input_channels + ], + **channel_selection_weights_params) + + if separability_selection_weights is None: + separability_selection_weights = create_selection_weights( + 'separability', + 'softmax', (len(separabilities),), + names=[ + 'separability_{}'.format(s) for s in separabilities + ], + **separability_selection_weights_params) + + kernels = [] + for separability in separabilities: + with tf.variable_scope('separablity_{}'.format(separability)): + kernel = make_subseparable_kernel( + max_kernel_size, + max_num_channels, + filters, + separability, + kernel_initializer, + kernel_regularizer) + + premultiplier = kernel_premultiplier( + max_kernel_size, kernel_sizes, input_channels, + kernel_selection_weights, + channel_selection_weights) + + kernels.append(kernel * premultiplier) + + kernel = tf.add_n([ + separability_selection_weights.normalized[i] * k + for i, k in enumerate(kernels) + ]) + + if np.min(input_channels) != 0 or np.max(input_channels) != in_channels: + inputs = inputs[:, :, :, np.min(input_channels):np.max(input_channels)] + + return tf.nn.conv2d( + inputs, + filter=kernel, + strides=[1, 1, 1, 1], + padding='SAME', + data_format='NHWC', + name='conv2d') + def conv_module(kw, kh, sep, div): def convfn(x, hparams): @@ -38,6 +360,12 @@ def convfn(x, hparams): name="conv_%d%d_sep%d_div%d" % (kw, kh, sep, div)) return convfn +def multi_conv_module(kernel_sizes, seps): + def convfn(x, hparams): + return multi_subseparable_conv(x, hparams.hidden_size, kernel_sizes, + [(0, hparams.hidden_size)], seps) + + return convfn def layernorm_module(x, hparams): return common_layers.layer_norm(x, hparams.hidden_size, name="layer_norm") @@ -75,47 +403,49 @@ def shakeshake_binary_module(x, y, hparams): def run_binary_modules(modules, cur1, cur2, hparams): """Run binary modules.""" - selection_var = tf.get_variable("selection", [len(modules)], - initializer=tf.zeros_initializer()) - inv_t = 100.0 * common_layers.inverse_exp_decay( - hparams.anneal_until, min_value=0.01) - selected_weights = tf.nn.softmax(selection_var * inv_t) + selection_weights = create_selection_weights( + 'selection', + 'softmax', + shape=[len(modules)], + inv_t=100.0 * common_layers.inverse_exp_decay( + hparams.anneal_until, min_value=0.01)) + all_res = [modules[n](cur1, cur2, hparams) for n in xrange(len(modules))] all_res = tf.concat([tf.expand_dims(r, axis=0) for r in all_res], axis=0) - res = all_res * tf.reshape(selected_weights, [-1, 1, 1, 1, 1]) + res = all_res * tf.reshape(selection_weights.normalized, [-1, 1, 1, 1, 1]) return tf.reduce_sum(res, axis=0) def run_unary_modules_basic(modules, cur, hparams): """Run unary modules.""" - selection_var = tf.get_variable("selection", [len(modules)], - initializer=tf.zeros_initializer()) - inv_t = 100.0 * common_layers.inverse_exp_decay( - hparams.anneal_until, min_value=0.01) - selected_weights = tf.nn.softmax(selection_var * inv_t) + selection_weights = create_selection_weights( + 'selection', + 'softmax', + shape=[len(modules)], + inv_t=100.0 * common_layers.inverse_exp_decay( + hparams.anneal_until, min_value=0.01)) + all_res = [modules[n](cur, hparams) for n in xrange(len(modules))] all_res = tf.concat([tf.expand_dims(r, axis=0) for r in all_res], axis=0) - res = all_res * tf.reshape(selected_weights, [-1, 1, 1, 1, 1]) + res = all_res * tf.reshape(selection_weights.normalized, [-1, 1, 1, 1, 1]) return tf.reduce_sum(res, axis=0) def run_unary_modules_sample(modules, cur, hparams, k): """Run modules, sampling k.""" - selection_var = tf.get_variable("selection", [len(modules)], - initializer=tf.zeros_initializer()) - selection = tf.multinomial(tf.expand_dims(selection_var, axis=0), k) - selection = tf.squeeze(selection, axis=0) # [k] selected classes. - to_run = tf.one_hot(selection, len(modules)) # [k x nmodules] one-hot. - to_run = tf.reduce_sum(to_run, axis=0) # [nmodules], 0=not run, 1=run. - all_res = [tf.cond(tf.less(to_run[n], 0.1), + selection_weights = create_selection_weights( + 'selection', + ('softmax_topk', k), + shape=[len(modules)], + inv_t=100.0 * common_layers.inverse_exp_decay( + hparams.anneal_until, min_value=0.01)) + + all_res = [tf.cond(tf.less(selection_weights.normalized[n], 1e-6), lambda: tf.zeros_like(cur), lambda i=n: modules[i](cur, hparams)) for n in xrange(len(modules))] - inv_t = 100.0 * common_layers.inverse_exp_decay( - hparams.anneal_until, min_value=0.01) - selected_weights = tf.nn.softmax(selection_var * inv_t - 1e9 * (1.0 - to_run)) all_res = tf.concat([tf.expand_dims(r, axis=0) for r in all_res], axis=0) - res = all_res * tf.reshape(selected_weights, [-1, 1, 1, 1, 1]) + res = all_res * tf.reshape(selection_weights.normalized, [-1, 1, 1, 1, 1]) return tf.reduce_sum(res, axis=0) @@ -138,10 +468,12 @@ class BlueNet(t2t_model.T2TModel): def model_fn_body(self, features): hparams = self._hparams - conv_modules = [conv_module(kw, kw, sep, div) - for kw in [3, 5, 7] - for sep in [0, 1] - for div in [1]] + [identity_module] + # TODO(rshin): Add back div. + # TODO(rshin): Give identity_module lower weight by default. + conv_modules = [ + multi_conv_module( + kernel_sizes=[(3, 3), (5, 5), (7, 7)], seps=[0, 1]), identity_module + ] activation_modules = [identity_module, lambda x, _: tf.nn.relu(x), lambda x, _: tf.nn.elu(x), From 5adadf34d05044fb53b91b4af63f7fdb283d3190 Mon Sep 17 00:00:00 2001 From: Niki Parmar Date: Wed, 5 Jul 2017 17:27:35 -0700 Subject: [PATCH 0084/4095] modifications to data reader and problem_hparams PiperOrigin-RevId: 161027111 --- setup.py | 2 +- .../data_generators/problem_hparams.py | 12 +- tensor2tensor/models/bluenet.py | 386 ++---------------- tensor2tensor/models/common_layers.py | 41 +- tensor2tensor/models/common_layers_test.py | 4 +- tensor2tensor/models/lstm.py | 197 --------- tensor2tensor/models/lstm_test.py | 23 -- tensor2tensor/utils/data_reader.py | 85 ++-- tensor2tensor/utils/metrics.py | 14 +- 9 files changed, 127 insertions(+), 637 deletions(-) diff --git a/setup.py b/setup.py index 254631d9f..ba3ea532a 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ setup( name='tensor2tensor', - version='1.0.10', + version='1.0.9', description='Tensor2Tensor', author='Google Inc.', author_email='no-reply@google.com', diff --git a/tensor2tensor/data_generators/problem_hparams.py b/tensor2tensor/data_generators/problem_hparams.py index 1a61a6690..7ad0a57ad 100644 --- a/tensor2tensor/data_generators/problem_hparams.py +++ b/tensor2tensor/data_generators/problem_hparams.py @@ -664,8 +664,17 @@ def image_mscoco_tokens(model_hparams, vocab_count): } p.batch_size_multiplier = 256 p.max_expected_batch_size_per_shard = 2 + + +def img2img_imagenet(unused_model_hparams): + """Image 2 Image for imagenet dataset.""" + p = default_problem_hparams() + p.input_modality = {"inputs": ("image:identity", None)} + p.target_modality = ("image:identity", None) + p.batch_size_multiplier = 256 + p.max_expected_batch_size_per_shard = 4 p.input_space_id = 1 - p.target_space_id = 3 + p.target_space_id = 1 return p @@ -732,4 +741,5 @@ def image_mscoco_tokens(model_hparams, vocab_count): "image_mscoco_tokens_128k_tune": lambda p: image_mscoco_tokens(p, 2**17), "image_mscoco_tokens_128k_test": lambda p: image_mscoco_tokens(p, 2**17), "image_imagenet": image_imagenet, + "img2img_imagenet": img2img_imagenet, } diff --git a/tensor2tensor/models/bluenet.py b/tensor2tensor/models/bluenet.py index 253b79944..8f4c89eac 100644 --- a/tensor2tensor/models/bluenet.py +++ b/tensor2tensor/models/bluenet.py @@ -20,7 +20,6 @@ # Dependency imports -import collections from six.moves import xrange # pylint: disable=redefined-builtin from tensor2tensor.models import common_hparams @@ -28,329 +27,8 @@ from tensor2tensor.utils import registry from tensor2tensor.utils import t2t_model -import numpy as np import tensorflow as tf -# var: 1d tensor, raw weights for each choice -# tempered_var: raw weights with temperature applied -# inv_t: inverse of the temperature to use when normalizing `var` -# normalized: same shape as var, but where each item is between 0 and 1, and -# the sum is 1 -SelectionWeights = collections.namedtuple( - 'SelectionWeights', ['var', 'tempered_var', 'inv_t', 'normalized']) - -def create_selection_weights(name, - type_, - shape, - inv_t=1, - initializer=tf.zeros_initializer(), - regularizer=None, - names=None): - """Create a SelectionWeights tuple. - - Args: - name: Name for the underlying variable containing the unnormalized weights. - type_: 'softmax' or 'sigmoid' or ('softmax_topk', k) where k is an int. - shape: Shape for the variable. - initializer: Initializer for the variable, passed to `tf.get_variable`. - regularizer: Regularizer for the variable. A callable which accepts - `tempered_var` and `normalized`. - inv_t: Inverse of the temperature to use in normalization. - names: Name of each selection. - - Returns: - The created SelectionWeights tuple. - """ - - - var = tf.get_variable(name, shape, initializer=initializer) - - if callable(inv_t): - inv_t = inv_t(var) - if inv_t == 1: - tempered_var = var - else: - tempered_var = var * inv_t - - if type_ == 'softmax': - weights = tf.nn.softmax(tempered_var) - elif type_ == 'sigmoid': - weights = tf.nn.sigmoid(tempered_var) - elif isinstance(type_, (list, tuple)) and type_[0] == 'softmax_topk': - assert len(shape) == 1 - - # TODO(rshin): Change this to select without replacement? - selection = tf.multinomial(tf.expand_dims(var, axis=0), k) - selection = tf.squeeze(selection, axis=0) # [k] selected classes. - to_run = tf.one_hot(selection, shape[0]) # [k x nmodules] one-hot. - # [nmodules], 0=not run, 1=run. - to_run = tf.minimum(tf.reduce_sum(to_run, axis=0), 1) - weights = tf.nn.softmax(tempered_var - 1e9 * (1.0 - to_run)) - else: - return ValueError(type) - - if regularizer is not None: - loss = regularizer(tempered_var, weights) - if loss is not None: - tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES, loss) - - if names is not None: - tf.get_collection_ref('selection_weight_names/' + var.name).extend( - names.flatten() - if isinstance(names, np.ndarray) else names) - tf.add_to_collection('selection_weight_names_tensor/' + var.name, - tf.constant(names)) - - return SelectionWeights( - var=var, - tempered_var=tempered_var, - inv_t=inv_t, - normalized=weights) - - -def kernel_premultiplier(max_kernel_size, kernel_sizes, input_channels, - kernel_selection_weights, channel_selection_weights): - '''Get weights to multiply the kernel with, before convolving. - - Args: - max_kernel_size: (int, int) tuple giving the largest kernel size. - kernel_sizes: A list of (height, width) pairs of integers, containing - different kernel sizes to use. - input_channels: A list of (begin, end) pairs of integers, which describe - which channels in the input to use. - kernel_selection_weights: SelectionWeights object to use for choosing - among kernel sizes. - channel_selection_weights: SelectionWeights object to use for choosing - among which input channels to use. - ''' - - - kernel_weights = [] - for kernel_i, (h, w) in enumerate(kernel_sizes): - top = (max_kernel_size[0] - h) // 2 - bot = max_kernel_size[0] - h - top - left = (max_kernel_size[1] - w) // 2 - right = max_kernel_size[1] - w - left - kernel_weight = tf.fill((h, w), - kernel_selection_weights.normalized[kernel_i]) - if top != 0 or bot != 0 or left != 0 or right != 0: - kernel_weight = tf.pad(kernel_weight, [[top, bot], [left, right]]) - kernel_weights.append(kernel_weight) - kernel_weight = tf.add_n(kernel_weights) - - channel_weights = [] - min_channel = np.min(input_channels) - max_channel = np.max(input_channels) - for channel_i, (begin, end) in enumerate(input_channels): - channel_weight = tf.pad( - tf.fill((end - begin,), - channel_selection_weights.normalized[channel_i]), - [[begin - min_channel, max_channel - end]]) - channel_weights.append(channel_weight) - channel_weight = tf.add_n(channel_weights) - - multiplier = (tf.reshape(kernel_weight, max_kernel_size + (1, 1)) * - tf.reshape(channel_weight, (1, 1, -1, 1))) - return multiplier - -def make_subseparable_kernel( - kernel_size, - input_channels, - filters, - separability, - kernel_initializer, - kernel_regularizer): - '''Make a kernel to do subseparable convolution wiht `tf.nn.conv2d`. - - Args: - kernel_size: (height, width) tuple. - input_channels: Number of input channels. - filters: Number of output channels. - separability: Integer denoting separability. - kernel_initializer: Initializer to use for the kernel. - kernel_regularizer: Regularizer to use for the kernel. - - Returns: - A 4D tensor. - ''' - - if separability == 1: - # Non-separable convolution - return tf.get_variable( - 'kernel', - kernel_size + (input_channels, filters), - initializer=kernel_initializer, - regularizer=kernel_regularizer) - - elif separability == 0 or separability == -1: - # Separable convolution - # TODO(rshin): Check initialization is as expected, as these are not 4D. - depthwise_kernel = tf.get_variable( - 'depthwise_kernel', - kernel_size + (input_channels,), - initializer=kernel_initializer, - regularizer=kernel_regularizer) - - pointwise_kernel = tf.get_variable( - 'pointwise_kernel', - (input_channels, filters), - initializer=kernel_initializer, - regularizer=kernel_regularizer) - - expanded_depthwise_kernel = tf.transpose( - tf.scatter_nd( - indices=tf.tile( - tf.expand_dims( - tf.range(0, input_channels), axis=1), [1, 2]), - updates=tf.transpose(depthwise_kernel, (2, 0, 1)), - shape=(input_channels, input_channels) + kernel_size), (2, 3, 0, 1)) - - return tf.reshape( - tf.matmul( - tf.reshape(expanded_depthwise_kernel, (-1, input_channels)), - pointwise_kernel), kernel_size + (input_channels, filters)) - - elif separability >= 2: - assert filters % separability == 0, (filters, separability) - assert input_channels % separability == 0, (filters, separability) - - raise NotImplementedError - - elif separability <= -2: - separability *= -1 - assert filters % separability == 0, (filters, separability) - assert input_channels % separability == 0, (filters, separability) - - raise NotImplementedError - - -def multi_subseparable_conv( - inputs, - filters, - kernel_sizes, - input_channels, - separabilities, - - kernel_selection_weights=None, - channel_selection_weights=None, - separability_selection_weights=None, - - kernel_selection_weights_params={}, - channel_selection_weights_params={}, - separability_selection_weights_params={}, - - kernel_initializer=None, - kernel_regularizer=None, - - scope=None): - ''' - Simultaneously compute different kinds of convolutions on - different subsets of the input. - - Args: - inputs: 4D tensor containing the input, in NHWC format. - filters: Integer, number of output channels. - kernel_sizes: A list of (height, width) pairs of integers, containing - different kernel sizes to use. - input_channels: A list of (begin, end) pairs of integers, which describe - which channels in the input to use. - - kernel_selection_weights: SelectionWeights object to use for choosing - among kernel sizes. - channel_selection_weights: SelectionWeights object to use for choosing - among which input channels to use. - - kernel_size_seletion_weights_params: dict with up to three keys - - initializer - - regularizer - - inv_t - channel_seletion_weights_params: dict with up to three keys - - initializer - - regularizer - - inv_t - - kernel_initializer: Initializer to use for kernels. - kernel_regularizer: Regularizer to use for kernels. - - Returns: - Result of convolution. - - ''' - - # Get input image size - input_shape = inputs.get_shape().as_list() - assert len(input_shape) == 4 - in_channels = input_shape[3] - assert in_channels is not None - - max_kernel_size = tuple(np.max(kernel_sizes, axis=0)) - max_num_channels = np.max(input_channels) - np.min(input_channels) - # kernel height x kernel width x - # number of input channels x number of output channels - max_kernel_shape = max_kernel_size + (max_num_channels, filters) - - with tf.variable_scope('selection_weights'): - if kernel_selection_weights is None: - kernel_selection_weights = create_selection_weights( - 'kernels', - 'softmax', (len(kernel_sizes),), - names=[ - 'kernel_h{}_w{}'.format(h, w) for h, w in kernel_sizes - ], - **kernel_selection_weights_params) - - if channel_selection_weights is None: - channel_selection_weights = create_selection_weights( - 'channels', - 'softmax', (len(input_channels),), - names=[ - 'channels_{}_{}'.format(c1, c2) for c1, c2 in input_channels - ], - **channel_selection_weights_params) - - if separability_selection_weights is None: - separability_selection_weights = create_selection_weights( - 'separability', - 'softmax', (len(separabilities),), - names=[ - 'separability_{}'.format(s) for s in separabilities - ], - **separability_selection_weights_params) - - kernels = [] - for separability in separabilities: - with tf.variable_scope('separablity_{}'.format(separability)): - kernel = make_subseparable_kernel( - max_kernel_size, - max_num_channels, - filters, - separability, - kernel_initializer, - kernel_regularizer) - - premultiplier = kernel_premultiplier( - max_kernel_size, kernel_sizes, input_channels, - kernel_selection_weights, - channel_selection_weights) - - kernels.append(kernel * premultiplier) - - kernel = tf.add_n([ - separability_selection_weights.normalized[i] * k - for i, k in enumerate(kernels) - ]) - - if np.min(input_channels) != 0 or np.max(input_channels) != in_channels: - inputs = inputs[:, :, :, np.min(input_channels):np.max(input_channels)] - - return tf.nn.conv2d( - inputs, - filter=kernel, - strides=[1, 1, 1, 1], - padding='SAME', - data_format='NHWC', - name='conv2d') - def conv_module(kw, kh, sep, div): def convfn(x, hparams): @@ -360,12 +38,6 @@ def convfn(x, hparams): name="conv_%d%d_sep%d_div%d" % (kw, kh, sep, div)) return convfn -def multi_conv_module(kernel_sizes, seps): - def convfn(x, hparams): - return multi_subseparable_conv(x, hparams.hidden_size, kernel_sizes, - [(0, hparams.hidden_size)], seps) - - return convfn def layernorm_module(x, hparams): return common_layers.layer_norm(x, hparams.hidden_size, name="layer_norm") @@ -403,49 +75,47 @@ def shakeshake_binary_module(x, y, hparams): def run_binary_modules(modules, cur1, cur2, hparams): """Run binary modules.""" - selection_weights = create_selection_weights( - 'selection', - 'softmax', - shape=[len(modules)], - inv_t=100.0 * common_layers.inverse_exp_decay( - hparams.anneal_until, min_value=0.01)) - + selection_var = tf.get_variable("selection", [len(modules)], + initializer=tf.zeros_initializer()) + inv_t = 100.0 * common_layers.inverse_exp_decay( + hparams.anneal_until, min_value=0.01) + selected_weights = tf.nn.softmax(selection_var * inv_t) all_res = [modules[n](cur1, cur2, hparams) for n in xrange(len(modules))] all_res = tf.concat([tf.expand_dims(r, axis=0) for r in all_res], axis=0) - res = all_res * tf.reshape(selection_weights.normalized, [-1, 1, 1, 1, 1]) + res = all_res * tf.reshape(selected_weights, [-1, 1, 1, 1, 1]) return tf.reduce_sum(res, axis=0) def run_unary_modules_basic(modules, cur, hparams): """Run unary modules.""" - selection_weights = create_selection_weights( - 'selection', - 'softmax', - shape=[len(modules)], - inv_t=100.0 * common_layers.inverse_exp_decay( - hparams.anneal_until, min_value=0.01)) - + selection_var = tf.get_variable("selection", [len(modules)], + initializer=tf.zeros_initializer()) + inv_t = 100.0 * common_layers.inverse_exp_decay( + hparams.anneal_until, min_value=0.01) + selected_weights = tf.nn.softmax(selection_var * inv_t) all_res = [modules[n](cur, hparams) for n in xrange(len(modules))] all_res = tf.concat([tf.expand_dims(r, axis=0) for r in all_res], axis=0) - res = all_res * tf.reshape(selection_weights.normalized, [-1, 1, 1, 1, 1]) + res = all_res * tf.reshape(selected_weights, [-1, 1, 1, 1, 1]) return tf.reduce_sum(res, axis=0) def run_unary_modules_sample(modules, cur, hparams, k): """Run modules, sampling k.""" - selection_weights = create_selection_weights( - 'selection', - ('softmax_topk', k), - shape=[len(modules)], - inv_t=100.0 * common_layers.inverse_exp_decay( - hparams.anneal_until, min_value=0.01)) - - all_res = [tf.cond(tf.less(selection_weights.normalized[n], 1e-6), + selection_var = tf.get_variable("selection", [len(modules)], + initializer=tf.zeros_initializer()) + selection = tf.multinomial(tf.expand_dims(selection_var, axis=0), k) + selection = tf.squeeze(selection, axis=0) # [k] selected classes. + to_run = tf.one_hot(selection, len(modules)) # [k x nmodules] one-hot. + to_run = tf.reduce_sum(to_run, axis=0) # [nmodules], 0=not run, 1=run. + all_res = [tf.cond(tf.less(to_run[n], 0.1), lambda: tf.zeros_like(cur), lambda i=n: modules[i](cur, hparams)) for n in xrange(len(modules))] + inv_t = 100.0 * common_layers.inverse_exp_decay( + hparams.anneal_until, min_value=0.01) + selected_weights = tf.nn.softmax(selection_var * inv_t - 1e9 * (1.0 - to_run)) all_res = tf.concat([tf.expand_dims(r, axis=0) for r in all_res], axis=0) - res = all_res * tf.reshape(selection_weights.normalized, [-1, 1, 1, 1, 1]) + res = all_res * tf.reshape(selected_weights, [-1, 1, 1, 1, 1]) return tf.reduce_sum(res, axis=0) @@ -468,12 +138,10 @@ class BlueNet(t2t_model.T2TModel): def model_fn_body(self, features): hparams = self._hparams - # TODO(rshin): Add back div. - # TODO(rshin): Give identity_module lower weight by default. - conv_modules = [ - multi_conv_module( - kernel_sizes=[(3, 3), (5, 5), (7, 7)], seps=[0, 1]), identity_module - ] + conv_modules = [conv_module(kw, kw, sep, div) + for kw in [3, 5, 7] + for sep in [0, 1] + for div in [1]] + [identity_module] activation_modules = [identity_module, lambda x, _: tf.nn.relu(x), lambda x, _: tf.nn.elu(x), diff --git a/tensor2tensor/models/common_layers.py b/tensor2tensor/models/common_layers.py index 7895acd04..2e2b74268 100644 --- a/tensor2tensor/models/common_layers.py +++ b/tensor2tensor/models/common_layers.py @@ -293,7 +293,7 @@ def conv_internal(conv_fn, inputs, filters, kernel_size, **kwargs): static_shape = inputs.get_shape() if not static_shape or len(static_shape) != 4: raise ValueError("Inputs to conv must have statically known rank 4.") - #inputs.set_shape([static_shape[0], None, None, static_shape[3]]) + inputs.set_shape([static_shape[0], None, None, static_shape[3]]) # Add support for left padding. if "padding" in kwargs and kwargs["padding"] == "LEFT": dilation_rate = (1, 1) @@ -307,9 +307,9 @@ def conv_internal(conv_fn, inputs, filters, kernel_size, **kwargs): width_padding = 0 if static_shape[2] == 1 else cond_padding padding = [[0, 0], [height_padding, 0], [width_padding, 0], [0, 0]] inputs = tf.pad(inputs, padding) - # Set middle two dimensions to None to prevent convolution from complaining - inputs.set_shape([static_shape[0], None, None, static_shape[3]]) kwargs["padding"] = "VALID" + # Special argument we use to force 2d kernels (see below). + force2d = kwargs.get("force2d", True) def conv2d_kernel(kernel_size_arg, name_suffix): """Call conv2d but add suffix to name.""" @@ -329,7 +329,18 @@ def conv2d_kernel(kernel_size_arg, name_suffix): kwargs["force2d"] = original_force2d return result - return conv2d_kernel(kernel_size, "single") + # Manually setting the shape to be unknown in the middle two dimensions so + # that the `tf.cond` below won't throw an error based on the convolution + # kernels being too large for the data. + inputs._shape = tf.TensorShape([static_shape[0], None, None, static_shape[3]]) # pylint: disable=protected-access + if kernel_size[1] == 1 or force2d: + # Avoiding the cond below can speed up graph and gradient construction. + return conv2d_kernel(kernel_size, "single") + return tf.cond( + tf.equal(tf.shape(inputs)[2], + 1), lambda: conv2d_kernel((kernel_size[0], 1), "small"), + lambda: conv2d_kernel(kernel_size, "std")) + def conv(inputs, filters, kernel_size, **kwargs): return conv_internal(tf.layers.conv2d, inputs, filters, kernel_size, **kwargs) @@ -555,8 +566,20 @@ def pool(inputs, window_size, pooling_type, padding, strides=(1, 1)): inputs = tf.pad(inputs, padding_) inputs.set_shape([static_shape[0], None, None, static_shape[3]]) padding = "VALID" - - return tf.nn.pool(inputs, window_size, pooling_type, padding, strides=strides) + window_size_small = (window_size[0], 1) + strides_small = (strides[0], 1) + # Manually setting the shape to be unknown in the middle two dimensions so + # that the `tf.cond` below won't throw an error based on the convolution + # kernels being too large for the data. + inputs._shape = tf.TensorShape( # pylint: disable=protected-access + [static_shape[0], None, None, static_shape[3]]) + return tf.cond( + tf.equal(tf.shape(inputs)[2], 1), + lambda: tf.nn.pool( # pylint: disable=g-long-lambda + inputs, window_size_small, pooling_type, padding, + strides=strides_small), + lambda: tf.nn.pool( # pylint: disable=g-long-lambda + inputs, window_size, pooling_type, padding, strides=strides)) def conv_block_downsample(x, @@ -1285,7 +1308,7 @@ def pad_with_zeros(logits, labels): logits, labels = pad_to_same_length(logits, labels) if len(labels.shape.as_list()) == 3: # 2-d labels. logits, labels = pad_to_same_length(logits, labels, axis=2) - return logits, labels + return labels def weights_nonzero(labels): @@ -1351,8 +1374,8 @@ def padded_cross_entropy(logits, confidence = 1.0 - label_smoothing vocab_size = tf.shape(logits)[-1] with tf.name_scope("padded_cross_entropy", [logits, labels]): - pad_logits, pad_labels = pad_with_zeros(logits, labels) - xent = smoothing_cross_entropy(pad_logits, pad_labels, vocab_size, confidence) + pad_labels = pad_with_zeros(logits, labels) + xent = smoothing_cross_entropy(logits, pad_labels, vocab_size, confidence) weights = weights_fn(pad_labels) if not reduce_sum: return xent * weights, weights diff --git a/tensor2tensor/models/common_layers_test.py b/tensor2tensor/models/common_layers_test.py index 8d2b4dec1..091f272d6 100644 --- a/tensor2tensor/models/common_layers_test.py +++ b/tensor2tensor/models/common_layers_test.py @@ -277,13 +277,13 @@ def testShiftLeft(self): self.assertAllEqual(actual, expected) def testConvStride2MultiStep(self): - x1 = np.random.rand(5, 32, 16, 11) + x1 = np.random.rand(5, 32, 1, 11) with self.test_session() as session: a = common_layers.conv_stride2_multistep( tf.constant(x1, dtype=tf.float32), 4, 16) session.run(tf.global_variables_initializer()) actual = session.run(a[0]) - self.assertEqual(actual.shape, (5, 2, 1, 16)) + self.assertEqual(actual.shape, (5, 2, 0, 16)) def testDeconvStride2MultiStep(self): x1 = np.random.rand(5, 2, 1, 11) diff --git a/tensor2tensor/models/lstm.py b/tensor2tensor/models/lstm.py index 13ef8dc4c..992c42db4 100644 --- a/tensor2tensor/models/lstm.py +++ b/tensor2tensor/models/lstm.py @@ -21,152 +21,11 @@ # Dependency imports from tensor2tensor.models import common_layers -from tensor2tensor.models import common_hparams from tensor2tensor.utils import registry from tensor2tensor.utils import t2t_model import tensorflow as tf -from tensorflow.python.ops import rnn_cell_impl -from tensorflow.python.util import nest -import collections - -# Track Tuple of state and attention values -AttentionTuple = collections.namedtuple("AttentionTuple", ("state", "attention")) - - -class ExternalAttentionCellWrapper(rnn_cell_impl.RNNCell): - """ - Wrapper for external attention states. To be used in an encoder-decoder setup - """ - def __init__(self, cell, attn_states, attn_vec_size=None, - input_size=None, state_is_tuple=True, reuse=None): - """Create a cell with attention. - Args: - cell: an RNNCell, an attention is added to it. - attn_states: External attention states typically the encoder output in the - form [batch_size, time steps, hidden size] - attn_vec_size: integer, the number of convolutional features calculated - on attention state and a size of the hidden layer built from - base cell state. Equal attn_size to by default. - input_size: integer, the size of a hidden linear layer, - built from inputs and attention. Derived from the input tensor - by default. - state_is_tuple: If True, accepted and returned states are n-tuples, where - `n = len(cells)`. Must be set to True else will raise an exception - concatenated along the column axis. - reuse: (optional) Python boolean describing whether to reuse variables - in an existing scope. If not `True`, and the existing scope already has - the given variables, an error is raised. - Raises: - TypeError: if cell is not an RNNCell. - ValueError: if the flag `state_is_tuple` is `False` or - if shape of attn_states is not 3 or if innermost dimension (hidden size) is None. - """ - super(ExternalAttentionCellWrapper, self).__init__(_reuse=reuse) - if not rnn_cell_impl._like_rnncell(cell): # pylint: disable=protected-access - raise TypeError("The parameter cell is not RNNCell.") - - if not state_is_tuple: - raise ValueError("Only tuple state is supported") - - self._cell = cell - self._input_size = input_size - - #Validate attn_states shape - attn_shape = attn_states.get_shape() - if not attn_shape or len(attn_shape) != 3: - raise ValueError("attn_shape must be rank 3") - - self._attn_states = attn_states - self._attn_size = attn_shape[2].value - if self._attn_size is None: - raise ValueError("Hidden size of attn_states cannot be None") - - self._attn_vec_size = attn_vec_size - if self._attn_vec_size is None: - self._attn_vec_size = self._attn_size - - self._reuse = reuse - - @property - def state_size(self): - return AttentionTuple(self._cell.state_size, self._attn_size) - - - @property - def output_size(self): - return self._attn_size - - def combine_state(self, previous_state): - """ - Combines previous state (usually from an encoder) with the internal attention values - You must use this function to derive the initial state passed into this cell as it expects - a named tuple (AttentionTuple) - Args: - previous_state: State from another block that will be fed into this cell. Must have same - structure as the state of the cell wrapped by this - Returns: - Combined state (AttentionTuple) - """ - batch_size = self._attn_states.get_shape()[0].value - if batch_size is None: - batch_size = tf.shape(self._attn_states)[0] - zeroed_state = self.zero_state(batch_size, self._attn_states.dtype) - return AttentionTuple(previous_state, zeroed_state.attention) - - def call(self, inputs, state): - """Long short-term memory cell with attention (LSTMA).""" - - if(not isinstance(state, AttentionTuple)): - raise TypeError("State must be of type AttentionTuple") - - state, attns = state - attn_states = self._attn_states - attn_length = attn_states.get_shape()[1].value - if attn_length is None: - attn_length = tf.shape(attn_states)[1] - - - input_size = self._input_size - if input_size is None: - input_size = inputs.get_shape().as_list()[1] - if(attns is not None): - inputs = rnn_cell_impl._linear([inputs, attns], input_size, True) - lstm_output, new_state = self._cell(inputs, state) - - new_state_cat = tf.concat(nest.flatten(new_state), 1) - new_attns = self._attention(new_state_cat, attn_states, attn_length) - - with tf.variable_scope("attn_output_projection"): - output = rnn_cell_impl._linear([lstm_output, new_attns], self._attn_size, True) - - new_state = AttentionTuple(new_state, new_attns) - - return output, new_state - - def _attention(self, query, attn_states, attn_length): - conv2d = tf.nn.conv2d - reduce_sum = tf.reduce_sum - softmax = tf.nn.softmax - tanh = tf.tanh - - with tf.variable_scope("attention"): - k = tf.get_variable( - "attn_w", [1, 1, self._attn_size, self._attn_vec_size]) - v = tf.get_variable("attn_v", [self._attn_vec_size, 1]) - hidden = tf.reshape(attn_states, - [-1, attn_length, 1, self._attn_size]) - hidden_features = conv2d(hidden, k, [1, 1, 1, 1], "SAME") - y = rnn_cell_impl._linear(query, self._attn_vec_size, True) - y = tf.reshape(y, [-1, 1, 1, self._attn_vec_size]) - s = reduce_sum(v * tanh(hidden_features + y), [2, 3]) - a = softmax(s) - d = reduce_sum( - tf.reshape(a, [-1, attn_length, 1, 1]) * hidden, [1, 2]) - new_attns = tf.reshape(d, [-1, self._attn_size]) - - return new_attns def lstm(inputs, hparams, train, name, initial_state=None): """Run LSTM cell on inputs, assuming they are [batch x time x size].""" @@ -185,25 +44,6 @@ def dropout_lstm_cell(): dtype=tf.float32, time_major=False) -def lstm_attention_decoder(inputs, hparams, train, name, initial_state, attn_states): - """Run LSTM cell with attention on inputs, assuming they are [batch x time x size].""" - - def dropout_lstm_cell(): - return tf.contrib.rnn.DropoutWrapper( - tf.nn.rnn_cell.BasicLSTMCell(hparams.hidden_size), - input_keep_prob=1.0 - hparams.dropout * tf.to_float(train)) - - layers = [dropout_lstm_cell() for _ in range(hparams.num_hidden_layers)] - cell = ExternalAttentionCellWrapper(tf.nn.rnn_cell.MultiRNNCell(layers), attn_states, - attn_vec_size=hparams.attn_vec_size) - initial_state = cell.combine_state(initial_state) - with tf.variable_scope(name): - return tf.nn.dynamic_rnn( - cell, - inputs, - initial_state=initial_state, - dtype=tf.float32, - time_major=False) def lstm_seq2seq_internal(inputs, targets, hparams, train): """The basic LSTM seq2seq model, main step used for training.""" @@ -223,23 +63,6 @@ def lstm_seq2seq_internal(inputs, targets, hparams, train): initial_state=final_encoder_state) return tf.expand_dims(decoder_outputs, axis=2) -def lstm_seq2seq_internal_attention(inputs, targets, hparams, train): - """LSTM seq2seq model with attention, main step used for training.""" - with tf.variable_scope("lstm_seq2seq_attention"): - # Flatten inputs. - inputs = common_layers.flatten4d3d(inputs) - # LSTM encoder. - encoder_outputs, final_encoder_state = lstm( - tf.reverse(inputs, axis=[1]), hparams, train, "encoder") - # LSTM decoder with attention - shifted_targets = common_layers.shift_left(targets) - decoder_outputs, _ = lstm_attention_decoder( - common_layers.flatten4d3d(shifted_targets), - hparams, - train, - "decoder", - final_encoder_state, encoder_outputs) - return tf.expand_dims(decoder_outputs, axis=2) @registry.register_model("baseline_lstm_seq2seq") class LSTMSeq2Seq(t2t_model.T2TModel): @@ -248,23 +71,3 @@ def model_fn_body(self, features): train = self._hparams.mode == tf.contrib.learn.ModeKeys.TRAIN return lstm_seq2seq_internal(features["inputs"], features["targets"], self._hparams, train) - -@registry.register_model("baseline_lstm_seq2seq_attention") -class LSTMSeq2SeqAttention(t2t_model.T2TModel): - - def model_fn_body(self, features): - train = self._hparams.mode == tf.contrib.learn.ModeKeys.TRAIN - return lstm_seq2seq_internal_attention(features["inputs"], features["targets"], - self._hparams, train) - -@registry.register_hparams -def lstm_attention(): - """hparams for LSTM with attention.""" - hparams = common_hparams.basic_params1() - hparams.batch_size = 128 - hparams.hidden_size = 128 - hparams.num_hidden_layers = 2 - - # Attention - hparams.add_hparam("attn_vec_size", hparams.hidden_size) - return hparams \ No newline at end of file diff --git a/tensor2tensor/models/lstm_test.py b/tensor2tensor/models/lstm_test.py index a216a3832..e5bdb184b 100644 --- a/tensor2tensor/models/lstm_test.py +++ b/tensor2tensor/models/lstm_test.py @@ -51,29 +51,6 @@ def testLSTMSeq2Seq(self): res = session.run(logits) self.assertEqual(res.shape, (3, 6, 1, 1, vocab_size)) - def testLSTMSeq2Seq_attention(self): - vocab_size = 9 - x = np.random.random_integers(1, high=vocab_size - 1, size=(3, 5, 1, 1)) - y = np.random.random_integers(1, high=vocab_size - 1, size=(3, 6, 1, 1)) - hparams = lstm.lstm_attention() - - p_hparams = problem_hparams.test_problem_hparams(hparams, vocab_size, - vocab_size) - x = tf.constant(x, dtype=tf.int32) - x._shape = tf.TensorShape([None, None, 1, 1]) - - with self.test_session() as session: - features = { - "inputs": x, - "targets": tf.constant(y, dtype=tf.int32), - } - model = lstm.LSTMSeq2SeqAttention( - hparams, tf.contrib.learn.ModeKeys.TRAIN, p_hparams) - sharded_logits, _, _ = model.model_fn(features) - logits = tf.concat(sharded_logits, 0) - session.run(tf.global_variables_initializer()) - res = session.run(logits) - self.assertEqual(res.shape, (3, 6, 1, 1, vocab_size)) if __name__ == "__main__": tf.test.main() diff --git a/tensor2tensor/utils/data_reader.py b/tensor2tensor/utils/data_reader.py index d09787ae4..7b0663cf8 100644 --- a/tensor2tensor/utils/data_reader.py +++ b/tensor2tensor/utils/data_reader.py @@ -130,6 +130,52 @@ def examples_queue(data_sources, } +def preprocessing(examples, data_file_pattern, mode): + """Preprocessing of examples.""" + if "image" in data_file_pattern: + # Small single-example pre-processing for images. + def resize(img, size): + return tf.to_int64(tf.image.resize_images(img, [size, size])) + def preprocess(img): + img = tf.image.resize_images(img, [360, 360]) + img = common_layers.image_augmentation(tf.to_float(img) / 255.) + return tf.to_int64(img * 255.) + if ("image_imagenet" in data_file_pattern or + "image_mscoco" in data_file_pattern): + examples["inputs"] = tf.cast(examples["inputs"], tf.int64) + # For imagnet/coco, resize images to 299x299 as is standard. + inputs = examples["inputs"] + if mode == tf.contrib.learn.ModeKeys.TRAIN: + examples["inputs"] = tf.cond( # Preprocess 80% of the time. + tf.less(tf.random_uniform([]), 0.8), + lambda img=inputs: preprocess(img), + lambda img=inputs: resize(img, 299)) + else: + examples["inputs"] = tf.to_int64(resize(inputs, 299)) + elif ("image_cifar10" in data_file_pattern + and mode == tf.contrib.learn.ModeKeys.TRAIN): + examples["inputs"] = common_layers.cifar_image_augmentation( + examples["inputs"]) + elif "img2img" in data_file_pattern: + inputs = examples["inputs"] + examples["inputs"] = resize(inputs, 16) + examples["targets"] = resize(inputs, 64) + + elif "audio" in data_file_pattern: + # Reshape audio to proper shape + sample_count = tf.to_int32(examples.pop("audio/sample_count")) + sample_width = tf.to_int32(examples.pop("audio/sample_width")) + channel_count = 1 + examples["inputs"] = tf.reshape(examples["inputs"], + [sample_count, sample_width, channel_count]) + if "wsj" in data_file_pattern: + examples["inputs"] = tf.bitcast(examples["inputs"], tf.int32) + elif "a2q_20161229" in data_file_pattern: + # we forgot the EOS when we preprocessed this data. + examples["targets"] = tf.concat([examples["targets"], [1]], 0) + return examples + + def input_pipeline(data_file_pattern, capacity, mode): """Input pipeline, returns a dictionary of tensors from queues.""" # Read from image TFRecords if the file has "image" in its name. @@ -181,44 +227,7 @@ def input_pipeline(data_file_pattern, capacity, mode): capacity=capacity, data_items_to_decoders=data_items_to_decoders) - if "image" in data_file_pattern: - # Small single-example pre-processing for images. - examples["inputs"] = tf.cast(examples["inputs"], tf.int64) - if ("image_imagenet" in data_file_pattern or - "image_mscoco" in data_file_pattern): - # For imagnet/coco, resize images to 299x299 as is standard. - def resize(img): - return tf.to_int64(tf.image.resize_images(img, [299, 299])) - - def preprocess(img): - img = tf.image.resize_images(img, [360, 360]) - img = common_layers.image_augmentation(tf.to_float(img) / 255.) - return tf.to_int64(img * 255.) - - inputs = examples["inputs"] - if mode == tf.contrib.learn.ModeKeys.TRAIN: - examples["inputs"] = tf.cond( # Preprocess 80% of the time. - tf.less(tf.random_uniform([]), 0.8), - lambda img=inputs: preprocess(img), - lambda img=inputs: resize(img)) - else: - examples["inputs"] = tf.to_int64(resize(inputs)) - elif ("image_cifar10" in data_file_pattern - and mode == tf.contrib.learn.ModeKeys.TRAIN): - examples["inputs"] = common_layers.cifar_image_augmentation( - examples["inputs"]) - elif "audio" in data_file_pattern: - # Reshape audio to proper shape - sample_count = tf.to_int32(examples.pop("audio/sample_count")) - sample_width = tf.to_int32(examples.pop("audio/sample_width")) - channel_count = 1 - examples["inputs"] = tf.reshape(examples["inputs"], - [sample_count, sample_width, channel_count]) - if "wsj" in data_file_pattern: - examples["inputs"] = tf.bitcast(examples["inputs"], tf.int32) - elif "a2q_20161229" in data_file_pattern: - # we forgot the EOS when we preprocessed this data. - examples["targets"] = tf.concat([examples["targets"], [1]], 0) + examples = preprocessing(examples, data_file_pattern, mode) # We do not want int64s as they do are not supported on GPUs. return {k: tf.to_int32(v) for (k, v) in six.iteritems(examples)} diff --git a/tensor2tensor/utils/metrics.py b/tensor2tensor/utils/metrics.py index eca6143c7..ecc02fd5e 100644 --- a/tensor2tensor/utils/metrics.py +++ b/tensor2tensor/utils/metrics.py @@ -37,10 +37,10 @@ def padded_accuracy_topk(predictions, weights_fn=common_layers.weights_nonzero): """Percentage of times that top-k predictions matches labels on non-0s.""" with tf.variable_scope("padded_accuracy_topk", values=[predictions, labels]): - padded_predictions, padded_labels = common_layers.pad_with_zeros(predictions, labels) + padded_labels = common_layers.pad_with_zeros(predictions, labels) weights = weights_fn(padded_labels) - effective_k = tf.minimum(k, tf.shape(padded_predictions)[-1]) - _, outputs = tf.nn.top_k(padded_predictions, k=effective_k) + effective_k = tf.minimum(k, tf.shape(predictions)[-1]) + _, outputs = tf.nn.top_k(predictions, k=effective_k) outputs = tf.to_int32(outputs) padded_labels = tf.expand_dims(padded_labels, axis=-1) padded_labels += tf.zeros_like(outputs) # Pad to same shape. @@ -61,9 +61,9 @@ def padded_sequence_accuracy(predictions, """Percentage of times that predictions matches labels everywhere (non-0).""" with tf.variable_scope( "padded_sequence_accuracy", values=[predictions, labels]): - paded_predictions, padded_labels = common_layers.pad_with_zeros(predictions, labels) + padded_labels = common_layers.pad_with_zeros(predictions, labels) weights = weights_fn(padded_labels) - outputs = tf.to_int32(tf.argmax(paded_predictions, axis=-1)) + outputs = tf.to_int32(tf.argmax(predictions, axis=-1)) not_correct = tf.to_float(tf.not_equal(outputs, padded_labels)) * weights axis = list(range(1, len(outputs.get_shape()))) correct_seq = 1.0 - tf.minimum(1.0, tf.reduce_sum(not_correct, axis=axis)) @@ -84,9 +84,9 @@ def padded_accuracy(predictions, weights_fn=common_layers.weights_nonzero): """Percentage of times that predictions matches labels on non-0s.""" with tf.variable_scope("padded_accuracy", values=[predictions, labels]): - padded_predictions, padded_labels = common_layers.pad_with_zeros(predictions, labels) + padded_labels = common_layers.pad_with_zeros(predictions, labels) weights = weights_fn(padded_labels) - outputs = tf.to_int32(tf.argmax(padded_predictions, axis=-1)) + outputs = tf.to_int32(tf.argmax(predictions, axis=-1)) return tf.to_float(tf.equal(outputs, padded_labels)), weights From b88c13b1b121e0924068564943d74bd8a3406383 Mon Sep 17 00:00:00 2001 From: Lukasz Kaiser Date: Wed, 5 Jul 2017 19:55:29 -0700 Subject: [PATCH 0085/4095] Correct text encoder, MultiModel, other merges. PiperOrigin-RevId: 161036600 --- setup.py | 2 +- .../data_generators/generator_utils.py | 17 +- tensor2tensor/data_generators/text_encoder.py | 17 +- tensor2tensor/models/bluenet.py | 402 ++++++++++++++++-- tensor2tensor/models/common_layers.py | 40 +- tensor2tensor/models/common_layers_test.py | 4 +- tensor2tensor/models/lstm.py | 206 ++++++++- tensor2tensor/models/lstm_test.py | 24 ++ tensor2tensor/models/multimodel.py | 258 ++++++----- tensor2tensor/models/multimodel_test.py | 3 +- tensor2tensor/utils/metrics.py | 22 +- 11 files changed, 796 insertions(+), 199 deletions(-) diff --git a/setup.py b/setup.py index ba3ea532a..254631d9f 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ setup( name='tensor2tensor', - version='1.0.9', + version='1.0.10', description='Tensor2Tensor', author='Google Inc.', author_email='no-reply@google.com', diff --git a/tensor2tensor/data_generators/generator_utils.py b/tensor2tensor/data_generators/generator_utils.py index 8c2d75fbe..a5d4816b7 100644 --- a/tensor2tensor/data_generators/generator_utils.py +++ b/tensor2tensor/data_generators/generator_utils.py @@ -29,7 +29,7 @@ from six.moves import xrange # pylint: disable=redefined-builtin import six.moves.urllib_request as urllib # Imports urllib on Python2, urllib.request on Python3 -from tensor2tensor.data_generators.text_encoder import SubwordTextEncoder +from tensor2tensor.data_generators import text_encoder from tensor2tensor.data_generators.tokenizer import Tokenizer import tensorflow as tf @@ -218,15 +218,18 @@ def gunzip_file(gz_path, new_path): ] -def get_or_generate_vocab(tmp_dir, vocab_filename, vocab_size): - """Generate a vocabulary from the datasets listed in _DATA_FILE_URLS.""" +def get_or_generate_vocab(tmp_dir, vocab_filename, vocab_size, sources=None): + """Generate a vocabulary from the datasets in sources (_DATA_FILE_URLS).""" vocab_filepath = os.path.join(tmp_dir, vocab_filename) if os.path.exists(vocab_filepath): - vocab = SubwordTextEncoder(vocab_filepath) + tf.logging.info("Found vocab file: %s", vocab_filepath) + vocab = text_encoder.SubwordTextEncoder(vocab_filepath) return vocab + sources = sources or _DATA_FILE_URLS + tf.logging.info("Generating vocab from: %s", str(sources)) tokenizer = Tokenizer() - for source in _DATA_FILE_URLS: + for source in sources: url = source[0] filename = os.path.basename(url) read_type = "r:gz" if "tgz" in filename else "r" @@ -259,9 +262,9 @@ def get_or_generate_vocab(tmp_dir, vocab_filename, vocab_size): break line = line.strip() file_byte_budget -= len(line) - _ = tokenizer.encode(line) + _ = tokenizer.encode(text_encoder.native_to_unicode(line)) - vocab = SubwordTextEncoder.build_to_target_size( + vocab = text_encoder.SubwordTextEncoder.build_to_target_size( vocab_size, tokenizer.token_counts, 1, 1e3) vocab.store_to_file(vocab_filepath) return vocab diff --git a/tensor2tensor/data_generators/text_encoder.py b/tensor2tensor/data_generators/text_encoder.py index 7b00a85d2..0a05cb721 100644 --- a/tensor2tensor/data_generators/text_encoder.py +++ b/tensor2tensor/data_generators/text_encoder.py @@ -36,10 +36,10 @@ # Conversion between Unicode and UTF-8, if required (on Python2) -_native_to_unicode = (lambda s: s.decode("utf-8")) if PY2 else (lambda s: s) +native_to_unicode = (lambda s: s.decode("utf-8")) if PY2 else (lambda s: s) -_unicode_to_native = (lambda s: s.encode("utf-8")) if PY2 else (lambda s: s) +unicode_to_native = (lambda s: s.encode("utf-8")) if PY2 else (lambda s: s) # Reserved tokens for things like padding and EOS symbols. @@ -220,7 +220,7 @@ def encode(self, raw_text): a list of integers in the range [0, vocab_size) """ return self._tokens_to_subtokens(self._tokenizer.encode( - _native_to_unicode(raw_text))) + native_to_unicode(raw_text))) def decode(self, subtokens): """Converts a sequence of subtoken ids to a native string. @@ -230,7 +230,7 @@ def decode(self, subtokens): Returns: a native string """ - return _unicode_to_native(self._tokenizer.decode( + return unicode_to_native(self._tokenizer.decode( self._subtokens_to_tokens(subtokens))) @property @@ -335,6 +335,9 @@ def bisect(min_val, max_val): else: other_subtokenizer = bisect(min_val, present_count - 1) + if other_subtokenizer is None: + return subtokenizer + if (abs(other_subtokenizer.vocab_size - target_size) < abs(subtokenizer.vocab_size - target_size)): return other_subtokenizer @@ -449,13 +452,13 @@ def _load_from_file(self, filename): subtoken_strings = [] with tf.gfile.Open(filename) as f: for line in f: - subtoken_strings.append(_native_to_unicode(line.strip()[1:-1])) + subtoken_strings.append(native_to_unicode(line.strip()[1:-1])) self._init_from_list(subtoken_strings) def store_to_file(self, filename): with tf.gfile.Open(filename, "w") as f: for subtoken_string in self._all_subtoken_strings: - f.write("'" + _unicode_to_native(subtoken_string) + "'\n") + f.write("'" + unicode_to_native(subtoken_string) + "'\n") def _escape_token(self, token): r"""Escape away underscores and OOV characters and append '_'. @@ -524,7 +527,7 @@ def get_token_counts(cls, text_filepattern, corpus_max_lines): with tf.gfile.Open(text_filename) as f: for line in f: # The tokenizer updates token_counts in encode() - tok.encode(_native_to_unicode(line.strip())) + tok.encode(native_to_unicode(line.strip())) lines_read += 1 if corpus_max_lines > 0 and lines_read > corpus_max_lines: return tok.token_counts diff --git a/tensor2tensor/models/bluenet.py b/tensor2tensor/models/bluenet.py index 8f4c89eac..c0533ee42 100644 --- a/tensor2tensor/models/bluenet.py +++ b/tensor2tensor/models/bluenet.py @@ -18,8 +18,12 @@ from __future__ import division from __future__ import print_function +import collections + # Dependency imports +import numpy as np + from six.moves import xrange # pylint: disable=redefined-builtin from tensor2tensor.models import common_hparams @@ -30,6 +34,328 @@ import tensorflow as tf +# var: 1d tensor, raw weights for each choice +# tempered_var: raw weights with temperature applied +# inv_t: inverse of the temperature to use when normalizing `var` +# normalized: same shape as var, but where each item is between 0 and 1, and +# the sum is 1 +SelectionWeights = collections.namedtuple( + "SelectionWeights", ["var", "tempered_var", "inv_t", "normalized"]) + + +def create_selection_weights(name, + type_, + shape, + inv_t=1, + initializer=tf.zeros_initializer(), + regularizer=None, + names=None): + """Create a SelectionWeights tuple. + + Args: + name: Name for the underlying variable containing the unnormalized weights. + type_: "softmax" or "sigmoid" or ("softmax_topk", k) where k is an int. + shape: Shape for the variable. + inv_t: Inverse of the temperature to use in normalization. + initializer: Initializer for the variable, passed to `tf.get_variable`. + regularizer: Regularizer for the variable. A callable which accepts + `tempered_var` and `normalized`. + names: Name of each selection. + + Returns: + The created SelectionWeights tuple. + + Raises: + ValueError: if type_ is not in the supported range. + """ + var = tf.get_variable(name, shape, initializer=initializer) + + if callable(inv_t): + inv_t = inv_t(var) + if inv_t == 1: + tempered_var = var + else: + tempered_var = var * inv_t + + if type_ == "softmax": + weights = tf.nn.softmax(tempered_var) + elif type_ == "sigmoid": + weights = tf.nn.sigmoid(tempered_var) + elif isinstance(type_, (list, tuple)) and type_[0] == "softmax_topk": + assert len(shape) == 1 + # TODO(rshin): Change this to select without replacement? + selection = tf.multinomial(tf.expand_dims(var, axis=0), 4) + selection = tf.squeeze(selection, axis=0) # [k] selected classes. + to_run = tf.one_hot(selection, shape[0]) # [k x nmodules] one-hot. + # [nmodules], 0=not run, 1=run. + to_run = tf.minimum(tf.reduce_sum(to_run, axis=0), 1) + weights = tf.nn.softmax(tempered_var - 1e9 * (1.0 - to_run)) + else: + raise ValueError("Unknown type: %s" % type_) + + if regularizer is not None: + loss = regularizer(tempered_var, weights) + if loss is not None: + tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES, loss) + + if names is not None: + tf.get_collection_ref("selection_weight_names/" + var.name).extend( + names.flatten() + if isinstance(names, np.ndarray) else names) + tf.add_to_collection("selection_weight_names_tensor/" + var.name, + tf.constant(names)) + + return SelectionWeights( + var=var, + tempered_var=tempered_var, + inv_t=inv_t, + normalized=weights) + + +def kernel_premultiplier(max_kernel_size, kernel_sizes, input_channels, + kernel_selection_weights, channel_selection_weights): + """Get weights to multiply the kernel with, before convolving. + + Args: + max_kernel_size: (int, int) tuple giving the largest kernel size. + kernel_sizes: A list of (height, width) pairs of integers, containing + different kernel sizes to use. + input_channels: A list of (begin, end) pairs of integers, which describe + which channels in the input to use. + kernel_selection_weights: SelectionWeights object to use for choosing + among kernel sizes. + channel_selection_weights: SelectionWeights object to use for choosing + among which input channels to use. + + Returns: + The multiplier. + """ + kernel_weights = [] + for kernel_i, (h, w) in enumerate(kernel_sizes): + top = (max_kernel_size[0] - h) // 2 + bot = max_kernel_size[0] - h - top + left = (max_kernel_size[1] - w) // 2 + right = max_kernel_size[1] - w - left + kernel_weight = tf.fill((h, w), + kernel_selection_weights.normalized[kernel_i]) + if top != 0 or bot != 0 or left != 0 or right != 0: + kernel_weight = tf.pad(kernel_weight, [[top, bot], [left, right]]) + kernel_weights.append(kernel_weight) + kernel_weight = tf.add_n(kernel_weights) + + channel_weights = [] + min_channel = np.min(input_channels) + max_channel = np.max(input_channels) + for channel_i, (begin, end) in enumerate(input_channels): + channel_weight = tf.pad( + tf.fill((end - begin,), + channel_selection_weights.normalized[channel_i]), + [[begin - min_channel, max_channel - end]]) + channel_weights.append(channel_weight) + channel_weight = tf.add_n(channel_weights) + + multiplier = (tf.reshape(kernel_weight, max_kernel_size + (1, 1)) * + tf.reshape(channel_weight, (1, 1, -1, 1))) + return multiplier + + +def make_subseparable_kernel( + kernel_size, + input_channels, + filters, + separability, + kernel_initializer, + kernel_regularizer): + """Make a kernel to do subseparable convolution wiht `tf.nn.conv2d`. + + Args: + kernel_size: (height, width) tuple. + input_channels: Number of input channels. + filters: Number of output channels. + separability: Integer denoting separability. + kernel_initializer: Initializer to use for the kernel. + kernel_regularizer: Regularizer to use for the kernel. + + Returns: + A 4D tensor. + """ + if separability == 1: + # Non-separable convolution + return tf.get_variable( + "kernel", + kernel_size + (input_channels, filters), + initializer=kernel_initializer, + regularizer=kernel_regularizer) + + elif separability == 0 or separability == -1: + # Separable convolution + # TODO(rshin): Check initialization is as expected, as these are not 4D. + depthwise_kernel = tf.get_variable( + "depthwise_kernel", + kernel_size + (input_channels,), + initializer=kernel_initializer, + regularizer=kernel_regularizer) + + pointwise_kernel = tf.get_variable( + "pointwise_kernel", + (input_channels, filters), + initializer=kernel_initializer, + regularizer=kernel_regularizer) + + expanded_depthwise_kernel = tf.transpose( + tf.scatter_nd( + indices=tf.tile( + tf.expand_dims( + tf.range(0, input_channels), axis=1), [1, 2]), + updates=tf.transpose(depthwise_kernel, (2, 0, 1)), + shape=(input_channels, input_channels) + kernel_size), (2, 3, 0, 1)) + + return tf.reshape( + tf.matmul( + tf.reshape(expanded_depthwise_kernel, (-1, input_channels)), + pointwise_kernel), kernel_size + (input_channels, filters)) + + elif separability >= 2: + assert filters % separability == 0, (filters, separability) + assert input_channels % separability == 0, (filters, separability) + + raise NotImplementedError + + elif separability <= -2: + separability *= -1 + assert filters % separability == 0, (filters, separability) + assert input_channels % separability == 0, (filters, separability) + + raise NotImplementedError + + +def multi_subseparable_conv( + inputs, + filters, + kernel_sizes, + input_channels, + separabilities, + kernel_selection_weights=None, + channel_selection_weights=None, + separability_selection_weights=None, + kernel_selection_weights_params=None, + channel_selection_weights_params=None, + separability_selection_weights_params=None, + kernel_initializer=None, + kernel_regularizer=None, + scope=None): + """Simultaneously compute different kinds of convolutions on subsets of input. + + Args: + inputs: 4D tensor containing the input, in NHWC format. + filters: Integer, number of output channels. + kernel_sizes: A list of (height, width) pairs of integers, containing + different kernel sizes to use. + input_channels: A list of (begin, end) pairs of integers, which describe + which channels in the input to use. + separabilities: An integer or a list, how separable are the convolutions. + kernel_selection_weights: SelectionWeights object to use for choosing + among kernel sizes. + channel_selection_weights: SelectionWeights object to use for choosing + among which input channels to use. + separability_selection_weights: SelectionWeights object to use for choosing + separability. + kernel_selection_weights_params: dict with up to three keys + - initializer + - regularizer + - inv_t + channel_selection_weights_params: dict with up to three keys + - initializer + - regularizer + - inv_t + separability_selection_weights_params: dict with up to three keys + - initializer + - regularizer + - inv_t + kernel_initializer: Initializer to use for kernels. + kernel_regularizer: Regularizer to use for kernels. + scope: the scope to use. + + Returns: + Result of convolution. + """ + kernel_selection_weights_params = kernel_selection_weights_params or {} + channel_selection_weights_params = channel_selection_weights_params or {} + if separability_selection_weights_params is None: + separability_selection_weights_params = {} + + # Get input image size. + input_shape = inputs.get_shape().as_list() + assert len(input_shape) == 4 + in_channels = input_shape[3] + assert in_channels is not None + + max_kernel_size = tuple(np.max(kernel_sizes, axis=0)) + max_num_channels = np.max(input_channels) - np.min(input_channels) + + with tf.variable_scope(scope or "selection_weights"): + if kernel_selection_weights is None: + kernel_selection_weights = create_selection_weights( + "kernels", + "softmax", (len(kernel_sizes),), + names=[ + "kernel_h{}_w{}".format(h, w) for h, w in kernel_sizes + ], + **kernel_selection_weights_params) + + if channel_selection_weights is None: + channel_selection_weights = create_selection_weights( + "channels", + "softmax", (len(input_channels),), + names=[ + "channels_{}_{}".format(c1, c2) for c1, c2 in input_channels + ], + **channel_selection_weights_params) + + if separability_selection_weights is None: + separability_selection_weights = create_selection_weights( + "separability", + "softmax", (len(separabilities),), + names=[ + "separability_{}".format(s) for s in separabilities + ], + **separability_selection_weights_params) + + kernels = [] + for separability in separabilities: + with tf.variable_scope("separablity_{}".format(separability)): + kernel = make_subseparable_kernel( + max_kernel_size, + max_num_channels, + filters, + separability, + kernel_initializer, + kernel_regularizer) + + premultiplier = kernel_premultiplier( + max_kernel_size, kernel_sizes, input_channels, + kernel_selection_weights, + channel_selection_weights) + + kernels.append(kernel * premultiplier) + + kernel = tf.add_n([ + separability_selection_weights.normalized[i] * k + for i, k in enumerate(kernels) + ]) + + if np.min(input_channels) != 0 or np.max(input_channels) != in_channels: + inputs = inputs[:, :, :, np.min(input_channels):np.max(input_channels)] + + return tf.nn.conv2d( + inputs, + filter=kernel, + strides=[1, 1, 1, 1], + padding="SAME", + data_format="NHWC", + name="conv2d") + + def conv_module(kw, kh, sep, div): def convfn(x, hparams): return common_layers.subseparable_conv( @@ -39,6 +365,13 @@ def convfn(x, hparams): return convfn +def multi_conv_module(kernel_sizes, seps): + def convfn(x, hparams): + return multi_subseparable_conv(x, hparams.hidden_size, kernel_sizes, + [(0, hparams.hidden_size)], seps) + return convfn + + def layernorm_module(x, hparams): return common_layers.layer_norm(x, hparams.hidden_size, name="layer_norm") @@ -75,47 +408,46 @@ def shakeshake_binary_module(x, y, hparams): def run_binary_modules(modules, cur1, cur2, hparams): """Run binary modules.""" - selection_var = tf.get_variable("selection", [len(modules)], - initializer=tf.zeros_initializer()) - inv_t = 100.0 * common_layers.inverse_exp_decay( - hparams.anneal_until, min_value=0.01) - selected_weights = tf.nn.softmax(selection_var * inv_t) + selection_weights = create_selection_weights( + "selection", + "softmax", + shape=[len(modules)], + inv_t=100.0 * common_layers.inverse_exp_decay( + hparams.anneal_until, min_value=0.01)) all_res = [modules[n](cur1, cur2, hparams) for n in xrange(len(modules))] all_res = tf.concat([tf.expand_dims(r, axis=0) for r in all_res], axis=0) - res = all_res * tf.reshape(selected_weights, [-1, 1, 1, 1, 1]) + res = all_res * tf.reshape(selection_weights.normalized, [-1, 1, 1, 1, 1]) return tf.reduce_sum(res, axis=0) def run_unary_modules_basic(modules, cur, hparams): """Run unary modules.""" - selection_var = tf.get_variable("selection", [len(modules)], - initializer=tf.zeros_initializer()) - inv_t = 100.0 * common_layers.inverse_exp_decay( - hparams.anneal_until, min_value=0.01) - selected_weights = tf.nn.softmax(selection_var * inv_t) + selection_weights = create_selection_weights( + "selection", + "softmax", + shape=[len(modules)], + inv_t=100.0 * common_layers.inverse_exp_decay( + hparams.anneal_until, min_value=0.01)) all_res = [modules[n](cur, hparams) for n in xrange(len(modules))] all_res = tf.concat([tf.expand_dims(r, axis=0) for r in all_res], axis=0) - res = all_res * tf.reshape(selected_weights, [-1, 1, 1, 1, 1]) + res = all_res * tf.reshape(selection_weights.normalized, [-1, 1, 1, 1, 1]) return tf.reduce_sum(res, axis=0) def run_unary_modules_sample(modules, cur, hparams, k): """Run modules, sampling k.""" - selection_var = tf.get_variable("selection", [len(modules)], - initializer=tf.zeros_initializer()) - selection = tf.multinomial(tf.expand_dims(selection_var, axis=0), k) - selection = tf.squeeze(selection, axis=0) # [k] selected classes. - to_run = tf.one_hot(selection, len(modules)) # [k x nmodules] one-hot. - to_run = tf.reduce_sum(to_run, axis=0) # [nmodules], 0=not run, 1=run. - all_res = [tf.cond(tf.less(to_run[n], 0.1), + selection_weights = create_selection_weights( + "selection", + ("softmax_topk", k), + shape=[len(modules)], + inv_t=100.0 * common_layers.inverse_exp_decay( + hparams.anneal_until, min_value=0.01)) + all_res = [tf.cond(tf.less(selection_weights.normalized[n], 1e-6), lambda: tf.zeros_like(cur), lambda i=n: modules[i](cur, hparams)) for n in xrange(len(modules))] - inv_t = 100.0 * common_layers.inverse_exp_decay( - hparams.anneal_until, min_value=0.01) - selected_weights = tf.nn.softmax(selection_var * inv_t - 1e9 * (1.0 - to_run)) all_res = tf.concat([tf.expand_dims(r, axis=0) for r in all_res], axis=0) - res = all_res * tf.reshape(selected_weights, [-1, 1, 1, 1, 1]) + res = all_res * tf.reshape(selection_weights.normalized, [-1, 1, 1, 1, 1]) return tf.reduce_sum(res, axis=0) @@ -138,10 +470,10 @@ class BlueNet(t2t_model.T2TModel): def model_fn_body(self, features): hparams = self._hparams - conv_modules = [conv_module(kw, kw, sep, div) - for kw in [3, 5, 7] - for sep in [0, 1] - for div in [1]] + [identity_module] + # TODO(rshin): Give identity_module lower weight by default. + multi_conv = multi_conv_module( + kernel_sizes=[(3, 3), (5, 5), (7, 7)], seps=[0, 1]) + conv_modules = [multi_conv, identity_module] activation_modules = [identity_module, lambda x, _: tf.nn.relu(x), lambda x, _: tf.nn.elu(x), @@ -166,20 +498,24 @@ def run_unary(x, name): x.set_shape(x_shape) return tf.nn.dropout(x, 1.0 - hparams.dropout), batch_deviation(x) - cur1, cur2, extra_loss = inputs, inputs, 0.0 + cur1, cur2, cur3, extra_loss = inputs, inputs, inputs, 0.0 cur_shape = inputs.get_shape() for i in xrange(hparams.num_hidden_layers): with tf.variable_scope("layer_%d" % i): cur1, loss1 = run_unary(cur1, "unary1") cur2, loss2 = run_unary(cur2, "unary2") - extra_loss += (loss1 + loss2) / float(hparams.num_hidden_layers) + cur3, loss3 = run_unary(cur2, "unary3") + extra_loss += (loss1 + loss2 + loss3) / float(hparams.num_hidden_layers) with tf.variable_scope("binary1"): next1 = run_binary_modules(binary_modules, cur1, cur2, hparams) next1.set_shape(cur_shape) with tf.variable_scope("binary2"): - next2 = run_binary_modules(binary_modules, cur1, cur2, hparams) + next2 = run_binary_modules(binary_modules, cur1, cur3, hparams) next2.set_shape(cur_shape) - cur1, cur2 = next1, next2 + with tf.variable_scope("binary3"): + next3 = run_binary_modules(binary_modules, cur2, cur3, hparams) + next3.set_shape(cur_shape) + cur1, cur2, cur3 = next1, next2, next3 anneal = common_layers.inverse_exp_decay(hparams.anneal_until) extra_loss *= hparams.batch_deviation_loss_factor * anneal @@ -193,7 +529,7 @@ def bluenet_base(): hparams.batch_size = 4096 hparams.hidden_size = 256 hparams.dropout = 0.2 - hparams.symbol_dropout = 0.2 + hparams.symbol_dropout = 0.5 hparams.label_smoothing = 0.1 hparams.clip_grad_norm = 2.0 hparams.num_hidden_layers = 8 @@ -211,7 +547,7 @@ def bluenet_base(): hparams.optimizer_adam_beta2 = 0.997 hparams.add_hparam("imagenet_use_2d", True) hparams.add_hparam("anneal_until", 40000) - hparams.add_hparam("batch_deviation_loss_factor", 0.001) + hparams.add_hparam("batch_deviation_loss_factor", 5.0) return hparams diff --git a/tensor2tensor/models/common_layers.py b/tensor2tensor/models/common_layers.py index 2e2b74268..7a6ce96fb 100644 --- a/tensor2tensor/models/common_layers.py +++ b/tensor2tensor/models/common_layers.py @@ -293,7 +293,6 @@ def conv_internal(conv_fn, inputs, filters, kernel_size, **kwargs): static_shape = inputs.get_shape() if not static_shape or len(static_shape) != 4: raise ValueError("Inputs to conv must have statically known rank 4.") - inputs.set_shape([static_shape[0], None, None, static_shape[3]]) # Add support for left padding. if "padding" in kwargs and kwargs["padding"] == "LEFT": dilation_rate = (1, 1) @@ -307,9 +306,9 @@ def conv_internal(conv_fn, inputs, filters, kernel_size, **kwargs): width_padding = 0 if static_shape[2] == 1 else cond_padding padding = [[0, 0], [height_padding, 0], [width_padding, 0], [0, 0]] inputs = tf.pad(inputs, padding) + # Set middle two dimensions to None to prevent convolution from complaining + inputs.set_shape([static_shape[0], None, None, static_shape[3]]) kwargs["padding"] = "VALID" - # Special argument we use to force 2d kernels (see below). - force2d = kwargs.get("force2d", True) def conv2d_kernel(kernel_size_arg, name_suffix): """Call conv2d but add suffix to name.""" @@ -329,17 +328,7 @@ def conv2d_kernel(kernel_size_arg, name_suffix): kwargs["force2d"] = original_force2d return result - # Manually setting the shape to be unknown in the middle two dimensions so - # that the `tf.cond` below won't throw an error based on the convolution - # kernels being too large for the data. - inputs._shape = tf.TensorShape([static_shape[0], None, None, static_shape[3]]) # pylint: disable=protected-access - if kernel_size[1] == 1 or force2d: - # Avoiding the cond below can speed up graph and gradient construction. - return conv2d_kernel(kernel_size, "single") - return tf.cond( - tf.equal(tf.shape(inputs)[2], - 1), lambda: conv2d_kernel((kernel_size[0], 1), "small"), - lambda: conv2d_kernel(kernel_size, "std")) + return conv2d_kernel(kernel_size, "single") def conv(inputs, filters, kernel_size, **kwargs): @@ -566,20 +555,8 @@ def pool(inputs, window_size, pooling_type, padding, strides=(1, 1)): inputs = tf.pad(inputs, padding_) inputs.set_shape([static_shape[0], None, None, static_shape[3]]) padding = "VALID" - window_size_small = (window_size[0], 1) - strides_small = (strides[0], 1) - # Manually setting the shape to be unknown in the middle two dimensions so - # that the `tf.cond` below won't throw an error based on the convolution - # kernels being too large for the data. - inputs._shape = tf.TensorShape( # pylint: disable=protected-access - [static_shape[0], None, None, static_shape[3]]) - return tf.cond( - tf.equal(tf.shape(inputs)[2], 1), - lambda: tf.nn.pool( # pylint: disable=g-long-lambda - inputs, window_size_small, pooling_type, padding, - strides=strides_small), - lambda: tf.nn.pool( # pylint: disable=g-long-lambda - inputs, window_size, pooling_type, padding, strides=strides)) + + return tf.nn.pool(inputs, window_size, pooling_type, padding, strides=strides) def conv_block_downsample(x, @@ -1308,7 +1285,7 @@ def pad_with_zeros(logits, labels): logits, labels = pad_to_same_length(logits, labels) if len(labels.shape.as_list()) == 3: # 2-d labels. logits, labels = pad_to_same_length(logits, labels, axis=2) - return labels + return logits, labels def weights_nonzero(labels): @@ -1374,8 +1351,9 @@ def padded_cross_entropy(logits, confidence = 1.0 - label_smoothing vocab_size = tf.shape(logits)[-1] with tf.name_scope("padded_cross_entropy", [logits, labels]): - pad_labels = pad_with_zeros(logits, labels) - xent = smoothing_cross_entropy(logits, pad_labels, vocab_size, confidence) + pad_logits, pad_labels = pad_with_zeros(logits, labels) + xent = smoothing_cross_entropy(pad_logits, pad_labels, + vocab_size, confidence) weights = weights_fn(pad_labels) if not reduce_sum: return xent * weights, weights diff --git a/tensor2tensor/models/common_layers_test.py b/tensor2tensor/models/common_layers_test.py index 091f272d6..8d2b4dec1 100644 --- a/tensor2tensor/models/common_layers_test.py +++ b/tensor2tensor/models/common_layers_test.py @@ -277,13 +277,13 @@ def testShiftLeft(self): self.assertAllEqual(actual, expected) def testConvStride2MultiStep(self): - x1 = np.random.rand(5, 32, 1, 11) + x1 = np.random.rand(5, 32, 16, 11) with self.test_session() as session: a = common_layers.conv_stride2_multistep( tf.constant(x1, dtype=tf.float32), 4, 16) session.run(tf.global_variables_initializer()) actual = session.run(a[0]) - self.assertEqual(actual.shape, (5, 2, 0, 16)) + self.assertEqual(actual.shape, (5, 2, 1, 16)) def testDeconvStride2MultiStep(self): x1 = np.random.rand(5, 2, 1, 11) diff --git a/tensor2tensor/models/lstm.py b/tensor2tensor/models/lstm.py index 992c42db4..eb8b10cd2 100644 --- a/tensor2tensor/models/lstm.py +++ b/tensor2tensor/models/lstm.py @@ -12,19 +12,159 @@ # See the License for the specific language governing permissions and # limitations under the License. -"""Baseline models.""" +"""RNN LSTM models.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function +import collections + # Dependency imports +from tensor2tensor.models import common_hparams from tensor2tensor.models import common_layers from tensor2tensor.utils import registry from tensor2tensor.utils import t2t_model import tensorflow as tf +from tensorflow.python.util import nest + + +# Track Tuple of state and attention values +AttentionTuple = collections.namedtuple("AttentionTuple", + ("state", "attention")) + + +class ExternalAttentionCellWrapper(tf.contrib.rnn.RNNCell): + """Wrapper for external attention states for an encoder-decoder setup.""" + + def __init__(self, cell, attn_states, attn_vec_size=None, + input_size=None, state_is_tuple=True, reuse=None): + """Create a cell with attention. + + Args: + cell: an RNNCell, an attention is added to it. + attn_states: External attention states typically the encoder output in the + form [batch_size, time steps, hidden size] + attn_vec_size: integer, the number of convolutional features calculated + on attention state and a size of the hidden layer built from + base cell state. Equal attn_size to by default. + input_size: integer, the size of a hidden linear layer, + built from inputs and attention. Derived from the input tensor + by default. + state_is_tuple: If True, accepted and returned states are n-tuples, where + `n = len(cells)`. Must be set to True else will raise an exception + concatenated along the column axis. + reuse: (optional) Python boolean describing whether to reuse variables + in an existing scope. If not `True`, and the existing scope already has + the given variables, an error is raised. + Raises: + TypeError: if cell is not an RNNCell. + ValueError: if the flag `state_is_tuple` is `False` or if shape of + `attn_states` is not 3 or if innermost dimension (hidden size) is None. + """ + super(ExternalAttentionCellWrapper, self).__init__(_reuse=reuse) + if not state_is_tuple: + raise ValueError("Only tuple state is supported") + + self._cell = cell + self._input_size = input_size + + # Validate attn_states shape. + attn_shape = attn_states.get_shape() + if not attn_shape or len(attn_shape) != 3: + raise ValueError("attn_shape must be rank 3") + + self._attn_states = attn_states + self._attn_size = attn_shape[2].value + if self._attn_size is None: + raise ValueError("Hidden size of attn_states cannot be None") + + self._attn_vec_size = attn_vec_size + if self._attn_vec_size is None: + self._attn_vec_size = self._attn_size + + self._reuse = reuse + + @property + def state_size(self): + return AttentionTuple(self._cell.state_size, self._attn_size) + + @property + def output_size(self): + return self._attn_size + + def combine_state(self, previous_state): + """Combines previous state (from encoder) with internal attention values. + + You must use this function to derive the initial state passed into + this cell as it expects a named tuple (AttentionTuple). + + Args: + previous_state: State from another block that will be fed into this cell; + Must have same structure as the state of the cell wrapped by this. + Returns: + Combined state (AttentionTuple). + """ + batch_size = self._attn_states.get_shape()[0].value + if batch_size is None: + batch_size = tf.shape(self._attn_states)[0] + zeroed_state = self.zero_state(batch_size, self._attn_states.dtype) + return AttentionTuple(previous_state, zeroed_state.attention) + + def call(self, inputs, state): + """Long short-term memory cell with attention (LSTMA).""" + + if not isinstance(state, AttentionTuple): + raise TypeError("State must be of type AttentionTuple") + + state, attns = state + attn_states = self._attn_states + attn_length = attn_states.get_shape()[1].value + if attn_length is None: + attn_length = tf.shape(attn_states)[1] + + input_size = self._input_size + if input_size is None: + input_size = inputs.get_shape().as_list()[1] + if attns is not None: + inputs = tf.layers.dense(tf.concat([inputs, attns], axis=1), input_size) + lstm_output, new_state = self._cell(inputs, state) + + new_state_cat = tf.concat(nest.flatten(new_state), 1) + new_attns = self._attention(new_state_cat, attn_states, attn_length) + + with tf.variable_scope("attn_output_projection"): + output = tf.layers.dense(tf.concat([lstm_output, new_attns], axis=1), + self._attn_size) + + new_state = AttentionTuple(new_state, new_attns) + + return output, new_state + + def _attention(self, query, attn_states, attn_length): + conv2d = tf.nn.conv2d + reduce_sum = tf.reduce_sum + softmax = tf.nn.softmax + tanh = tf.tanh + + with tf.variable_scope("attention"): + k = tf.get_variable( + "attn_w", [1, 1, self._attn_size, self._attn_vec_size]) + v = tf.get_variable("attn_v", [self._attn_vec_size, 1]) + hidden = tf.reshape(attn_states, + [-1, attn_length, 1, self._attn_size]) + hidden_features = conv2d(hidden, k, [1, 1, 1, 1], "SAME") + y = tf.layers.dense(query, self._attn_vec_size) + y = tf.reshape(y, [-1, 1, 1, self._attn_vec_size]) + s = reduce_sum(v * tanh(hidden_features + y), [2, 3]) + a = softmax(s) + d = reduce_sum( + tf.reshape(a, [-1, attn_length, 1, 1]) * hidden, [1, 2]) + new_attns = tf.reshape(d, [-1, self._attn_size]) + + return new_attns def lstm(inputs, hparams, train, name, initial_state=None): @@ -45,6 +185,29 @@ def dropout_lstm_cell(): time_major=False) +def lstm_attention_decoder(inputs, hparams, train, name, + initial_state, attn_states): + """Run LSTM cell with attention on inputs of shape [batch x time x size].""" + + def dropout_lstm_cell(): + return tf.contrib.rnn.DropoutWrapper( + tf.nn.rnn_cell.BasicLSTMCell(hparams.hidden_size), + input_keep_prob=1.0 - hparams.dropout * tf.to_float(train)) + + layers = [dropout_lstm_cell() for _ in range(hparams.num_hidden_layers)] + cell = ExternalAttentionCellWrapper(tf.nn.rnn_cell.MultiRNNCell(layers), + attn_states, + attn_vec_size=hparams.attn_vec_size) + initial_state = cell.combine_state(initial_state) + with tf.variable_scope(name): + return tf.nn.dynamic_rnn( + cell, + inputs, + initial_state=initial_state, + dtype=tf.float32, + time_major=False) + + def lstm_seq2seq_internal(inputs, targets, hparams, train): """The basic LSTM seq2seq model, main step used for training.""" with tf.variable_scope("lstm_seq2seq"): @@ -64,6 +227,25 @@ def lstm_seq2seq_internal(inputs, targets, hparams, train): return tf.expand_dims(decoder_outputs, axis=2) +def lstm_seq2seq_internal_attention(inputs, targets, hparams, train): + """LSTM seq2seq model with attention, main step used for training.""" + with tf.variable_scope("lstm_seq2seq_attention"): + # Flatten inputs. + inputs = common_layers.flatten4d3d(inputs) + # LSTM encoder. + encoder_outputs, final_encoder_state = lstm( + tf.reverse(inputs, axis=[1]), hparams, train, "encoder") + # LSTM decoder with attention + shifted_targets = common_layers.shift_left(targets) + decoder_outputs, _ = lstm_attention_decoder( + common_layers.flatten4d3d(shifted_targets), + hparams, + train, + "decoder", + final_encoder_state, encoder_outputs) + return tf.expand_dims(decoder_outputs, axis=2) + + @registry.register_model("baseline_lstm_seq2seq") class LSTMSeq2Seq(t2t_model.T2TModel): @@ -71,3 +253,25 @@ def model_fn_body(self, features): train = self._hparams.mode == tf.contrib.learn.ModeKeys.TRAIN return lstm_seq2seq_internal(features["inputs"], features["targets"], self._hparams, train) + + +@registry.register_model("baseline_lstm_seq2seq_attention") +class LSTMSeq2SeqAttention(t2t_model.T2TModel): + + def model_fn_body(self, features): + train = self._hparams.mode == tf.contrib.learn.ModeKeys.TRAIN + return lstm_seq2seq_internal_attention( + features["inputs"], features["targets"], self._hparams, train) + + +@registry.register_hparams +def lstm_attention(): + """hparams for LSTM with attention.""" + hparams = common_hparams.basic_params1() + hparams.batch_size = 128 + hparams.hidden_size = 128 + hparams.num_hidden_layers = 2 + + # Attention + hparams.add_hparam("attn_vec_size", hparams.hidden_size) + return hparams diff --git a/tensor2tensor/models/lstm_test.py b/tensor2tensor/models/lstm_test.py index e5bdb184b..4c4c42909 100644 --- a/tensor2tensor/models/lstm_test.py +++ b/tensor2tensor/models/lstm_test.py @@ -51,6 +51,30 @@ def testLSTMSeq2Seq(self): res = session.run(logits) self.assertEqual(res.shape, (3, 6, 1, 1, vocab_size)) + def testLSTMSeq2SeqAttention(self): + vocab_size = 9 + x = np.random.random_integers(1, high=vocab_size - 1, size=(3, 5, 1, 1)) + y = np.random.random_integers(1, high=vocab_size - 1, size=(3, 6, 1, 1)) + hparams = lstm.lstm_attention() + + p_hparams = problem_hparams.test_problem_hparams(hparams, vocab_size, + vocab_size) + x = tf.constant(x, dtype=tf.int32) + x._shape = tf.TensorShape([None, None, 1, 1]) + + with self.test_session() as session: + features = { + "inputs": x, + "targets": tf.constant(y, dtype=tf.int32), + } + model = lstm.LSTMSeq2SeqAttention( + hparams, tf.contrib.learn.ModeKeys.TRAIN, p_hparams) + sharded_logits, _, _ = model.model_fn(features) + logits = tf.concat(sharded_logits, 0) + session.run(tf.global_variables_initializer()) + res = session.run(logits) + self.assertEqual(res.shape, (3, 6, 1, 1, vocab_size)) + if __name__ == "__main__": tf.test.main() diff --git a/tensor2tensor/models/multimodel.py b/tensor2tensor/models/multimodel.py index 66a8491f2..60f098e5e 100644 --- a/tensor2tensor/models/multimodel.py +++ b/tensor2tensor/models/multimodel.py @@ -19,52 +19,66 @@ # Dependency imports +from tensor2tensor.models import common_attention +from tensor2tensor.models import common_hparams from tensor2tensor.models import common_layers from tensor2tensor.models import modalities from tensor2tensor.models import slicenet -from tensor2tensor.utils import expert_utils as eu from tensor2tensor.utils import registry from tensor2tensor.utils import t2t_model import tensorflow as tf -def experts(xs, moe_n1, moe_n2, hidden_size, filter_size, dp, ps, train): - """Mixture-of-Experts layer.""" - # Set up the hyperparameters for the gating networks. - primary_gating_hp = eu.NoisyTopKGatingParams() - primary_gating_hp.num_experts = moe_n1 - if moe_n2: - # Hierarchical MoE containing moe_n1 groups of moe_n2 experts. - assert moe_n2 > 1 - secondary_gating_hp = eu.NoisyTopKGatingParams() - secondary_gating_hp.num_experts = moe_n2 - else: - # Flat mixture of moe_n1 experts. - secondary_gating_hp = None - # Set up the hyperparameters for the expert networks. - # Each expert contains a hidden RELU layer of size filter_size - expert_hp = eu.FeedForwardExpertParams() - expert_hp.hidden_layer_sizes = [filter_size] - # Create the mixture of experts. - moe = eu.DistributedMixtureOfExperts(primary_gating_hp, secondary_gating_hp, - expert_hp, hidden_size, hidden_size, ps, - "moe") - # MoE expects input tensors to be 2d. Flatten out spatial dimensions. - xs_2d = dp(tf.reshape, xs, [[-1, hidden_size]] * dp.n) - # Call the MoE - moe_out_2d, importance, load, _, _ = moe.Eval( - dp.devices, xs_2d, train, summaries=False, identifiers=None) - # Reshape the output to the original shape. - moe_out = dp(tf.reshape, moe_out_2d, dp(tf.shape, xs)) - # These losses encourage equal load on the different experts. - loss = eu.CVSquared(importance) + eu.CVSquared(load) - - # Apply residual and normalize. - def add_and_normalize(x, y): - return common_layers.layer_norm(x + y, hidden_size, name="moe_norm") - - return dp(add_and_normalize, xs, moe_out), loss +def conv_res_step(x, hparams, padding, mask): + """One step of convolutions and mid-residual.""" + k = (hparams.kernel_height, hparams.kernel_width) + k2 = (hparams.large_kernel_size, 1) + dilations_and_kernels1 = [((1, 1), k), ((1, 1), k)] + dilations_and_kernels2 = [((1, 1), k2), ((4, 4), k2)] + with tf.variable_scope("conv_res_step"): + y = common_layers.subseparable_conv_block( + x, hparams.filter_size, dilations_and_kernels1, + padding=padding, mask=mask, separabilities=0, name="residual1") + y = tf.nn.dropout(y, 1.0 - hparams.dropout) + return common_layers.subseparable_conv_block( + y, hparams.hidden_size, dilations_and_kernels2, + padding=padding, mask=mask, separabilities=0, name="residual2") + + +def residual_fn2(x, y, hparams): + y = tf.nn.dropout(y, 1.0 - hparams.dropout) + return common_layers.layer_norm(x + y) + + +def residual_fn3(x, y, z, hparams): + y = tf.nn.dropout(y, 1.0 - hparams.dropout) + z = tf.nn.dropout(z, 1.0 - hparams.dropout) + return common_layers.layer_norm(x + y + z) + + +def conv_experts(xs, hparams, dp, ps, padding, mask, layer_id): + """Convolutions + Mixture-of-Experts layer.""" + del layer_id # Unused. + train = hparams.mode == tf.contrib.learn.ModeKeys.TRAIN, + conv_out = dp(conv_res_step, xs, hparams, padding, mask) + loss = 0.0 + moe_out, loss = common_layers.moe_layer( + dp, ps, xs, train, hparams.hidden_size, hparams.filter_size, + hparams.moe_n1, hparams.moe_n2, 1.0) + return dp(residual_fn3, xs, moe_out, conv_out, hparams), loss + + +def prepare_decoder(targets, target_space_emb): + """Prepare decoder.""" + decoder_self_attention_bias = ( + common_attention.attention_bias_lower_triangle(tf.shape(targets)[1])) + target_space_emb = tf.reshape(target_space_emb, [1, 1, -1]) + target_space_emb = tf.tile(target_space_emb, [tf.shape(targets)[0], 1, 1]) + decoder_input = common_layers.shift_left_3d( + targets, pad_value=target_space_emb) + decoder_input = common_attention.add_timing_signal_1d(decoder_input) + return (decoder_input, decoder_self_attention_bias) @registry.register_model @@ -74,87 +88,119 @@ def model_fn_body_sharded(self, sharded_features): train = self._hparams.mode == tf.contrib.learn.ModeKeys.TRAIN dp = self._data_parallelism hparams = self._hparams - targets = sharded_features["targets"] def flatten(inputs): return tf.expand_dims(common_layers.flatten4d3d(inputs), axis=2) inputs = dp(flatten, sharded_features["inputs"]) - - # Encode inputs. - def encode_half(inputs, inputs_mask, hparams): - # Add timing and encode. - inputs = common_layers.add_timing_signal(inputs) - return slicenet.multi_conv_res(inputs, "SAME", "encoder1", - hparams.num_hidden_layers // 2, - hparams, mask=inputs_mask) - - target_space_emb = dp(slicenet.embed_target_space, - sharded_features["target_space_id"], - hparams.hidden_size) inputs_pad = dp(slicenet.embedding_to_padding, inputs) inputs_mask = dp(lambda x: 1.0 - x, inputs_pad) - inputs_encoded = dp(encode_half, inputs, inputs_mask, hparams) - with tf.variable_scope("experts_enc"): - inputs_encoded, expert_loss = experts( - inputs_encoded, hparams.moe_n1, hparams.moe_n2, hparams.hidden_size, - hparams.hidden_size, dp, self._ps_devices, train) - expert_loss *= hparams.moe_loss_coef - inputs_encoded = dp( - slicenet.multi_conv_res, inputs_encoded, "SAME", - "encoder2", hparams.num_hidden_layers, hparams, - mask=inputs_mask) + inputs_encoded = dp(common_layers.add_timing_signal, inputs) + expert_loss = 0.0 + for i in xrange(hparams.num_hidden_layers): + with tf.variable_scope("enc_layer_%d" % i): + inputs_encoded, moe_loss = conv_experts( + inputs_encoded, hparams, dp, self._ps_devices, "SAME", + inputs_mask, i) + expert_loss += tf.reduce_mean(moe_loss) * hparams.moe_loss_coef # If we're just predicing a class, there is no use for a decoder, return. if isinstance(hparams.problems[self._problem_idx].target_modality, modalities.ClassLabelModality): return inputs_encoded, tf.reduce_mean(expert_loss) - # Do the middle part. - decoder_start, similarity_loss = dp( - slicenet.slicenet_middle, inputs_encoded, targets, - target_space_emb, inputs_mask, hparams) - - # Decode. - decoder_half = dp( - slicenet.multi_conv_res, - decoder_start, - "LEFT", - "decoder1", - hparams.num_hidden_layers // 2, - hparams, - train, - mask=inputs_mask, - source=inputs_encoded) - with tf.variable_scope("experts_dec"): - decoder_half, expert_dec_loss = experts( - decoder_half, hparams.moe_n1, hparams.moe_n2, hparams.hidden_size, - hparams.hidden_size, dp, self._ps_devices, train) - expert_loss += expert_dec_loss * hparams.moe_loss_coef - decoder_final = dp( - slicenet.multi_conv_res, - decoder_half, - "LEFT", - "decoder2", - hparams.num_hidden_layers // 2, - hparams, - mask=inputs_mask, - source=inputs_encoded) - - total_loss = tf.reduce_mean(expert_loss) + tf.reduce_mean(similarity_loss) - return decoder_final, total_loss - - -@registry.register_hparams("multimodel_1p8") -def multimodel_params1_p8(): - """Version for eight problem runs.""" - hparams = slicenet.slicenet_params1() - hparams.problem_choice = "distributed" - hparams.attention_type = "simple" # TODO(lukaszkaiser): add transformer. - hparams.hidden_size = 1536 - hparams.moe_n1 = 120 - hparams.shared_embedding_and_softmax_weights = int(False) + # Decoder. + inputs3d = dp(tf.squeeze, inputs, 2) + inputs_encoded3d = dp(tf.squeeze, inputs_encoded, 2) + encoder_padding = dp(common_attention.embedding_to_padding, inputs3d) + encoder_attention_bias = dp( + common_attention.attention_bias_ignore_padding, encoder_padding) + targets = dp(common_layers.flatten4d3d, sharded_features["targets"]) + target_space_emb = dp(slicenet.embed_target_space, + sharded_features["target_space_id"], + hparams.hidden_size) + + (decoder_input, decoder_self_attention_bias) = dp( + prepare_decoder, targets, target_space_emb) + + x = dp(tf.nn.dropout, decoder_input, 1.0 - hparams.dropout) + for layer in xrange(hparams.num_hidden_layers): + with tf.variable_scope("dec_layer_%d" % layer): + with tf.variable_scope("attention"): + y = dp(common_attention.multihead_attention, + x, + None, + decoder_self_attention_bias, + hparams.hidden_size, + hparams.hidden_size, + hparams.hidden_size, + hparams.num_heads, + hparams.attention_dropout, + summaries=False, + name="decoder_self_attention") + z = dp(common_attention.multihead_attention, + y, + inputs_encoded3d, + encoder_attention_bias, + hparams.hidden_size, + hparams.hidden_size, + hparams.hidden_size, + hparams.num_heads, + hparams.attention_dropout, + summaries=False, + name="encdec_attention") + x = dp(residual_fn3, x, y, z, hparams) + with tf.variable_scope("ffn"): + if str(layer) in hparams.moe_layers.split(","): + y, moe_loss = common_layers.moe_layer( + dp, self._ps_devices, x, train, + hparams.hidden_size, hparams.filter_size, + hparams.moe_n1, hparams.moe_n2, hparams.moe_loss_coef) + expert_loss += tf.reduce_mean(moe_loss) + else: + y = dp(common_layers.conv_hidden_relu, + x, + hparams.filter_size, + hparams.hidden_size, + dropout=hparams.dropout) + x = dp(residual_fn2, x, y, hparams) + + x = dp(tf.expand_dims, x, 2) + return x, tf.reduce_mean(expert_loss) + + +@registry.register_hparams +def multimodel_base(): + """Base parameters for MultiModel.""" + hparams = common_hparams.basic_params1() + hparams.hidden_size = 512 + hparams.batch_size = 2048 + hparams.num_hidden_layers = 4 + hparams.learning_rate_decay_scheme = "noam" + hparams.learning_rate = 0.1 + hparams.learning_rate_warmup_steps = 4000 + hparams.initializer_gain = 1.0 hparams.dropout = 0.1 - hparams.attention_dropout = 0.1 - hparams.learning_rate_decay_scheme = "exp500k" + hparams.add_hparam("filter_size", 2048) # Add new ones like this. + hparams.add_hparam("large_kernel_size", 15) + hparams.add_hparam("attention_dropout", 0.1) + hparams.add_hparam("num_heads", 8) + hparams.add_hparam("moe_n1", 30) + hparams.add_hparam("moe_n2", 0) + hparams.add_hparam("moe_layers", "2") + hparams.add_hparam("moe_loss_coef", 1e-2) + hparams.add_hparam("imagenet_use_2d", int(True)) + return hparams + + +@registry.register_hparams +def multimodel_tiny(): + """Tiny parameters for MultiModel.""" + hparams = multimodel_base() + hparams.hidden_size = 128 + hparams.filter_size = 512 + hparams.batch_size = 512 + hparams.num_hidden_layers = 2 + hparams.moe_n1 = 10 + hparams.moe_layers = "0" return hparams diff --git a/tensor2tensor/models/multimodel_test.py b/tensor2tensor/models/multimodel_test.py index 72fe4a326..dbbd3fa8e 100644 --- a/tensor2tensor/models/multimodel_test.py +++ b/tensor2tensor/models/multimodel_test.py @@ -24,7 +24,6 @@ from tensor2tensor.data_generators import problem_hparams from tensor2tensor.models import multimodel -from tensor2tensor.models import slicenet import tensorflow as tf @@ -34,7 +33,7 @@ class MultiModelTest(tf.test.TestCase): def testMultiModel(self): x = np.random.random_integers(0, high=255, size=(3, 5, 4, 3)) y = np.random.random_integers(0, high=9, size=(3, 5, 1, 1)) - hparams = slicenet.slicenet_params1_tiny() + hparams = multimodel.multimodel_tiny() p_hparams = problem_hparams.image_cifar10(hparams) hparams.problems = [p_hparams] with self.test_session() as session: diff --git a/tensor2tensor/utils/metrics.py b/tensor2tensor/utils/metrics.py index ecc02fd5e..97da4cd35 100644 --- a/tensor2tensor/utils/metrics.py +++ b/tensor2tensor/utils/metrics.py @@ -37,10 +37,11 @@ def padded_accuracy_topk(predictions, weights_fn=common_layers.weights_nonzero): """Percentage of times that top-k predictions matches labels on non-0s.""" with tf.variable_scope("padded_accuracy_topk", values=[predictions, labels]): - padded_labels = common_layers.pad_with_zeros(predictions, labels) + padded_predictions, padded_labels = common_layers.pad_with_zeros( + predictions, labels) weights = weights_fn(padded_labels) - effective_k = tf.minimum(k, tf.shape(predictions)[-1]) - _, outputs = tf.nn.top_k(predictions, k=effective_k) + effective_k = tf.minimum(k, tf.shape(padded_predictions)[-1]) + _, outputs = tf.nn.top_k(padded_predictions, k=effective_k) outputs = tf.to_int32(outputs) padded_labels = tf.expand_dims(padded_labels, axis=-1) padded_labels += tf.zeros_like(outputs) # Pad to same shape. @@ -61,9 +62,10 @@ def padded_sequence_accuracy(predictions, """Percentage of times that predictions matches labels everywhere (non-0).""" with tf.variable_scope( "padded_sequence_accuracy", values=[predictions, labels]): - padded_labels = common_layers.pad_with_zeros(predictions, labels) + padded_predictions, padded_labels = common_layers.pad_with_zeros( + predictions, labels) weights = weights_fn(padded_labels) - outputs = tf.to_int32(tf.argmax(predictions, axis=-1)) + outputs = tf.to_int32(tf.argmax(padded_predictions, axis=-1)) not_correct = tf.to_float(tf.not_equal(outputs, padded_labels)) * weights axis = list(range(1, len(outputs.get_shape()))) correct_seq = 1.0 - tf.minimum(1.0, tf.reduce_sum(not_correct, axis=axis)) @@ -84,9 +86,10 @@ def padded_accuracy(predictions, weights_fn=common_layers.weights_nonzero): """Percentage of times that predictions matches labels on non-0s.""" with tf.variable_scope("padded_accuracy", values=[predictions, labels]): - padded_labels = common_layers.pad_with_zeros(predictions, labels) + padded_predictions, padded_labels = common_layers.pad_with_zeros( + predictions, labels) weights = weights_fn(padded_labels) - outputs = tf.to_int32(tf.argmax(predictions, axis=-1)) + outputs = tf.to_int32(tf.argmax(padded_predictions, axis=-1)) return tf.to_float(tf.equal(outputs, padded_labels)), weights @@ -119,8 +122,9 @@ def fn(predictions, labels, weights, idx, weights_fn): for i, problem in enumerate(problems): name = "metrics-%s/%s" % (problem, metric_name) - weights_fn = (common_layers.weights_concatenated - if "concat" in problem else common_layers.weights_nonzero) + class_output = "image" in problem and "coco" not in problem + weights_fn = (common_layers.weights_all if class_output + else common_layers.weights_nonzero) eval_metrics[name] = functools.partial(fn, idx=i, weights_fn=weights_fn) def global_fn(predictions, labels, weights): From eba1061ab096fed82f6472e99f560c6942b6e181 Mon Sep 17 00:00:00 2001 From: Stefan Schweter Date: Thu, 6 Jul 2017 14:11:29 +0200 Subject: [PATCH 0086/4095] Bump to v1.0.11 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 254631d9f..beb3513e1 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ setup( name='tensor2tensor', - version='1.0.10', + version='1.0.11', description='Tensor2Tensor', author='Google Inc.', author_email='no-reply@google.com', From 80613695c3661349c58c76668ebf2970089cf12b Mon Sep 17 00:00:00 2001 From: Mark Probst Date: Fri, 7 Jul 2017 13:31:14 +0000 Subject: [PATCH 0087/4095] When encoding examples, convert strings to bytes --- tensor2tensor/data_generators/generator_utils.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tensor2tensor/data_generators/generator_utils.py b/tensor2tensor/data_generators/generator_utils.py index a5d4816b7..bc354a86d 100644 --- a/tensor2tensor/data_generators/generator_utils.py +++ b/tensor2tensor/data_generators/generator_utils.py @@ -46,10 +46,13 @@ def to_example(dictionary): elif isinstance(v[0], float): features[k] = tf.train.Feature(float_list=tf.train.FloatList(value=v)) elif isinstance(v[0], six.string_types): + v = [bytes(x, 'utf-8') for x in v] + features[k] = tf.train.Feature(bytes_list=tf.train.BytesList(value=v)) + elif isinstance(v[0], bytes): features[k] = tf.train.Feature(bytes_list=tf.train.BytesList(value=v)) else: - raise ValueError("Value is neither an int nor a float; v: %s type: %s" % - (str(v[0]), str(type(v[0])))) + raise ValueError("Value for %s is neither an int nor a float; v: %s type: %s" % + (k, str(v[0]), str(type(v[0])))) return tf.train.Example(features=tf.train.Features(feature=features)) From 1d7363367acd94266b08ea0938a3bfd0a3e151b7 Mon Sep 17 00:00:00 2001 From: Mark Probst Date: Fri, 7 Jul 2017 13:31:46 +0000 Subject: [PATCH 0088/4095] Make sure image labels are ints Sometimes these are numpy.int64, which doesn't work at least in Python 3. --- tensor2tensor/data_generators/image.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensor2tensor/data_generators/image.py b/tensor2tensor/data_generators/image.py index e7e740192..377bf3e54 100644 --- a/tensor2tensor/data_generators/image.py +++ b/tensor2tensor/data_generators/image.py @@ -68,7 +68,7 @@ def image_generator(images, labels): yield { "image/encoded": [enc_string], "image/format": ["png"], - "image/class/label": [label], + "image/class/label": [int(label)], "image/height": [height], "image/width": [width] } From 617a7940a78c269527c20512cd5ab871806c6363 Mon Sep 17 00:00:00 2001 From: vthorsteinsson Date: Fri, 7 Jul 2017 17:24:00 +0000 Subject: [PATCH 0089/4095] Change mode to executable --- tensor2tensor/bin/t2t-datagen | 0 tensor2tensor/bin/t2t-trainer | 0 2 files changed, 0 insertions(+), 0 deletions(-) mode change 100644 => 100755 tensor2tensor/bin/t2t-datagen mode change 100644 => 100755 tensor2tensor/bin/t2t-trainer diff --git a/tensor2tensor/bin/t2t-datagen b/tensor2tensor/bin/t2t-datagen old mode 100644 new mode 100755 diff --git a/tensor2tensor/bin/t2t-trainer b/tensor2tensor/bin/t2t-trainer old mode 100644 new mode 100755 From 5a06e7a4a4d8fcad54db31446ce4a97728e1409f Mon Sep 17 00:00:00 2001 From: Ashish Vaswani Date: Thu, 6 Jul 2017 14:21:29 -0700 Subject: [PATCH 0090/4095] internal. PiperOrigin-RevId: 161130093 --- setup.py | 2 +- .../data_generators/generator_utils.py | 7 ++--- tensor2tensor/data_generators/image.py | 2 +- tensor2tensor/models/modalities.py | 29 +++++++++++++++---- 4 files changed, 28 insertions(+), 12 deletions(-) diff --git a/setup.py b/setup.py index beb3513e1..254631d9f 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ setup( name='tensor2tensor', - version='1.0.11', + version='1.0.10', description='Tensor2Tensor', author='Google Inc.', author_email='no-reply@google.com', diff --git a/tensor2tensor/data_generators/generator_utils.py b/tensor2tensor/data_generators/generator_utils.py index bc354a86d..a5d4816b7 100644 --- a/tensor2tensor/data_generators/generator_utils.py +++ b/tensor2tensor/data_generators/generator_utils.py @@ -46,13 +46,10 @@ def to_example(dictionary): elif isinstance(v[0], float): features[k] = tf.train.Feature(float_list=tf.train.FloatList(value=v)) elif isinstance(v[0], six.string_types): - v = [bytes(x, 'utf-8') for x in v] - features[k] = tf.train.Feature(bytes_list=tf.train.BytesList(value=v)) - elif isinstance(v[0], bytes): features[k] = tf.train.Feature(bytes_list=tf.train.BytesList(value=v)) else: - raise ValueError("Value for %s is neither an int nor a float; v: %s type: %s" % - (k, str(v[0]), str(type(v[0])))) + raise ValueError("Value is neither an int nor a float; v: %s type: %s" % + (str(v[0]), str(type(v[0])))) return tf.train.Example(features=tf.train.Features(feature=features)) diff --git a/tensor2tensor/data_generators/image.py b/tensor2tensor/data_generators/image.py index 377bf3e54..e7e740192 100644 --- a/tensor2tensor/data_generators/image.py +++ b/tensor2tensor/data_generators/image.py @@ -68,7 +68,7 @@ def image_generator(images, labels): yield { "image/encoded": [enc_string], "image/format": ["png"], - "image/class/label": [int(label)], + "image/class/label": [label], "image/height": [height], "image/width": [width] } diff --git a/tensor2tensor/models/modalities.py b/tensor2tensor/models/modalities.py index fd9fb4432..4e7a7e924 100644 --- a/tensor2tensor/models/modalities.py +++ b/tensor2tensor/models/modalities.py @@ -124,6 +124,10 @@ def top(self, body_output, targets): class SmallImageModality(modality.Modality): """Performs strided conv compressions for small image data.""" + def __init__(self, model_hparams, vocab_size): + super(SmallImageModality, self).__init__(model_hparams, vocab_size) + self._channels = 3 + @property def top_dimensionality(self): return 256 @@ -161,15 +165,30 @@ def targets_bottom(self, inputs): def top(self, body_output, _): with tf.variable_scope("rgb_softmax"): - var = tf.get_variable( + # seperate embedding for each channel + # assuming the body output returns a tensor of shape + # [batch_size, rows, cols, channels, self._body_input_depth] + body_output_split = tf.split(body_output, self._channels, axis=3) + output_rgb_embedding_var = tf.get_variable( "output_rgb_embedding", - [self.top_dimensionality, self._body_input_depth], + [self._channels, self.top_dimensionality, self._body_input_depth], initializer=tf.random_normal_initializer(0.0, self._body_input_depth **-0.5)) - body_output = tf.reshape(body_output, [-1, self._body_input_depth]) - logits = tf.matmul(body_output, var, transpose_b=True) + # compute logits separately for each channel + rgb_channel_logits = [] + for i in self._channels: + shape = tf.shape(body_output_split[i])[:-1] + body_output = tf.reshape(body_output_split[i], + [-1, self._body_input_depth]) + channel_logits = tf.matmul(body_output, + output_rgb_embedding_var[i], + transpose_b=True) + rgb_channel_logits.append(tf.reshape( + channel_logits, tf.concat([shape, [self.top_dimensionality]], + 0))) + + logits = tf.concat(rgb_channel_logits, axis=3) # Reshape logits to conform to CIFAR image shapes (32 by 32 by 3) - logits = tf.reshape(logits, [-1, 32, 32, 3, 256]) return logits From 54622a51054015ffc33ed5e95b71d600ce4b119d Mon Sep 17 00:00:00 2001 From: Lukasz Kaiser Date: Fri, 7 Jul 2017 17:23:48 -0700 Subject: [PATCH 0091/4095] Improving data generation (removing some problems too) and adding eval printouts. PiperOrigin-RevId: 161265195 --- README.md | 2 +- setup.py | 2 +- tensor2tensor/bin/t2t-datagen | 105 +++++++----------- .../data_generators/algorithmic_math.py | 2 + .../data_generators/generator_utils.py | 21 ++-- tensor2tensor/data_generators/image.py | 2 +- tensor2tensor/data_generators/wmt.py | 27 ++--- tensor2tensor/data_generators/wsj_parsing.py | 13 ++- tensor2tensor/models/lstm.py | 2 +- tensor2tensor/models/transformer.py | 4 +- tensor2tensor/utils/t2t_model.py | 2 + tensor2tensor/utils/trainer_utils.py | 4 + 12 files changed, 93 insertions(+), 93 deletions(-) diff --git a/README.md b/README.md index 9adca7f45..1fdd7e883 100644 --- a/README.md +++ b/README.md @@ -72,7 +72,7 @@ t2t-datagen \ --num_shards=100 \ --problem=$PROBLEM -mv $TMP_DIR/tokens.vocab.32768 $DATA_DIR +cp $TMP_DIR/tokens.vocab.* $DATA_DIR # Train # * If you run out of memory, add --hparams='batch_size=2048' or even 1024. diff --git a/setup.py b/setup.py index 254631d9f..821a88ee2 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ setup( name='tensor2tensor', - version='1.0.10', + version='1.0.12', description='Tensor2Tensor', author='Google Inc.', author_email='no-reply@google.com', diff --git a/tensor2tensor/bin/t2t-datagen b/tensor2tensor/bin/t2t-datagen index f45f63744..4e7e4529a 100644 --- a/tensor2tensor/bin/t2t-datagen +++ b/tensor2tensor/bin/t2t-datagen @@ -90,25 +90,16 @@ _SUPPORTED_PROBLEM_GENERATORS = { "algorithmic_reverse_nlplike_decimal8K": ( lambda: algorithmic.reverse_generator_nlplike(8000, 70, 100000, 10, 1.300), - lambda: algorithmic.reverse_generator_nlplike(8000, 700, 10000, + lambda: algorithmic.reverse_generator_nlplike(8000, 70, 10000, 10, 1.300)), "algorithmic_reverse_nlplike_decimal32K": ( lambda: algorithmic.reverse_generator_nlplike(32000, 70, 100000, 10, 1.050), - lambda: algorithmic.reverse_generator_nlplike(32000, 700, 10000, + lambda: algorithmic.reverse_generator_nlplike(32000, 70, 10000, 10, 1.050)), "algorithmic_algebra_inverse": ( lambda: algorithmic_math.algebra_inverse(26, 0, 2, 100000), lambda: algorithmic_math.algebra_inverse(26, 3, 3, 10000)), - "algorithmic_algebra_simplify": ( - lambda: algorithmic_math.algebra_simplify(8, 0, 2, 100000), - lambda: algorithmic_math.algebra_simplify(8, 3, 3, 10000)), - "algorithmic_calculus_integrate": ( - lambda: algorithmic_math.calculus_integrate(8, 0, 2, 100000), - lambda: algorithmic_math.calculus_integrate(8, 3, 3, 10000)), - "wmt_parsing_characters": ( - lambda: wmt.parsing_character_generator(FLAGS.tmp_dir, True), - lambda: wmt.parsing_character_generator(FLAGS.tmp_dir, False)), "wmt_parsing_tokens_8k": ( lambda: wmt.parsing_token_generator(FLAGS.tmp_dir, True, 2**13), lambda: wmt.parsing_token_generator(FLAGS.tmp_dir, False, 2**13)), @@ -133,10 +124,6 @@ _SUPPORTED_PROBLEM_GENERATORS = { lambda: wmt.enfr_wordpiece_token_generator(FLAGS.tmp_dir, True, 2**15), lambda: wmt.enfr_wordpiece_token_generator(FLAGS.tmp_dir, False, 2**15) ), - "wmt_enfr_tokens_128k": ( - lambda: wmt.enfr_wordpiece_token_generator(FLAGS.tmp_dir, True, 2**17), - lambda: wmt.enfr_wordpiece_token_generator(FLAGS.tmp_dir, False, 2**17) - ), "wmt_ende_characters": ( lambda: wmt.ende_character_generator(FLAGS.tmp_dir, True), lambda: wmt.ende_character_generator(FLAGS.tmp_dir, False)), @@ -151,10 +138,6 @@ _SUPPORTED_PROBLEM_GENERATORS = { lambda: wmt.ende_wordpiece_token_generator(FLAGS.tmp_dir, True, 2**15), lambda: wmt.ende_wordpiece_token_generator(FLAGS.tmp_dir, False, 2**15) ), - "wmt_ende_tokens_128k": ( - lambda: wmt.ende_wordpiece_token_generator(FLAGS.tmp_dir, True, 2**17), - lambda: wmt.ende_wordpiece_token_generator(FLAGS.tmp_dir, False, 2**17) - ), "image_mnist_tune": ( lambda: image.mnist_generator(FLAGS.tmp_dir, True, 55000), lambda: image.mnist_generator(FLAGS.tmp_dir, True, 5000, 55000)), @@ -227,33 +210,6 @@ _SUPPORTED_PROBLEM_GENERATORS = { 40000, vocab_filename="tokens.vocab.%d" % 2**15, vocab_size=2**15)), - "image_mscoco_tokens_128k_tune": ( - lambda: image.mscoco_generator( - FLAGS.tmp_dir, - True, - 70000, - vocab_filename="tokens.vocab.%d" % 2**17, - vocab_size=2**17), - lambda: image.mscoco_generator( - FLAGS.tmp_dir, - True, - 10000, - 70000, - vocab_filename="tokens.vocab.%d" % 2**17, - vocab_size=2**17)), - "image_mscoco_tokens_128k_test": ( - lambda: image.mscoco_generator( - FLAGS.tmp_dir, - True, - 80000, - vocab_filename="tokens.vocab.%d" % 2**17, - vocab_size=2**17), - lambda: image.mscoco_generator( - FLAGS.tmp_dir, - False, - 40000, - vocab_filename="tokens.vocab.%d" % 2**17, - vocab_size=2**17)), "snli_32k": ( lambda: snli.snli_token_generator(FLAGS.tmp_dir, True, 2**15), lambda: snli.snli_token_generator(FLAGS.tmp_dir, False, 2**15), @@ -340,10 +296,31 @@ def set_random_seed(): def main(_): tf.logging.set_verbosity(tf.logging.INFO) - if FLAGS.problem not in _SUPPORTED_PROBLEM_GENERATORS: + + # Calculate the list of problems to generate. + problems = list(sorted(_SUPPORTED_PROBLEM_GENERATORS)) + if FLAGS.problem and FLAGS.problem[-1] == "*": + problems = [p for p in problems if p.startswith(FLAGS.problem[:-1])] + elif FLAGS.problem: + problems = [p for p in problems if p == FLAGS.problem] + else: + problems = [] + # Remove TIMIT if paths are not given. + if not FLAGS.timit_paths: + problems = [p for p in problems if "timit" not in p] + # Remove parsing if paths are not given. + if not FLAGS.parsing_path: + problems = [p for p in problems if "parsing" not in p] + # Remove en-de BPE if paths are not given. + if not FLAGS.ende_bpe_path: + problems = [p for p in problems if "ende_bpe" not in p] + + if not problems: problems_str = "\n * ".join(sorted(_SUPPORTED_PROBLEM_GENERATORS)) error_msg = ("You must specify one of the supported problems to " "generate data for:\n * " + problems_str + "\n") + error_msg += ("TIMIT, ende_bpe and parsing need data_sets specified with " + "--timit_paths, --ende_bpe_path and --parsing_path.") raise ValueError(error_msg) if not FLAGS.data_dir: @@ -352,26 +329,28 @@ def main(_): "Data will be written to default data_dir=%s.", FLAGS.data_dir) - set_random_seed() + tf.logging.info("Generating problems:\n * %s\n" % "\n * ".join(problems)) + for problem in problems: + set_random_seed() - training_gen, dev_gen = _SUPPORTED_PROBLEM_GENERATORS[FLAGS.problem] + training_gen, dev_gen = _SUPPORTED_PROBLEM_GENERATORS[problem] - tf.logging.info("Generating training data for %s.", FLAGS.problem) - train_output_files = generator_utils.generate_files( - training_gen(), FLAGS.problem + UNSHUFFLED_SUFFIX + "-train", - FLAGS.data_dir, FLAGS.num_shards, FLAGS.max_cases) + tf.logging.info("Generating training data for %s.", problem) + train_output_files = generator_utils.generate_files( + training_gen(), problem + UNSHUFFLED_SUFFIX + "-train", + FLAGS.data_dir, FLAGS.num_shards, FLAGS.max_cases) - tf.logging.info("Generating development data for %s.", FLAGS.problem) - dev_output_files = generator_utils.generate_files( - dev_gen(), FLAGS.problem + UNSHUFFLED_SUFFIX + "-dev", FLAGS.data_dir, 1) + tf.logging.info("Generating development data for %s.", problem) + dev_output_files = generator_utils.generate_files( + dev_gen(), problem + UNSHUFFLED_SUFFIX + "-dev", FLAGS.data_dir, 1) - tf.logging.info("Shuffling data...") - for fname in train_output_files + dev_output_files: - records = generator_utils.read_records(fname) - random.shuffle(records) - out_fname = fname.replace(UNSHUFFLED_SUFFIX, "") - generator_utils.write_records(records, out_fname) - tf.gfile.Remove(fname) + tf.logging.info("Shuffling data...") + for fname in train_output_files + dev_output_files: + records = generator_utils.read_records(fname) + random.shuffle(records) + out_fname = fname.replace(UNSHUFFLED_SUFFIX, "") + generator_utils.write_records(records, out_fname) + tf.gfile.Remove(fname) if __name__ == "__main__": diff --git a/tensor2tensor/data_generators/algorithmic_math.py b/tensor2tensor/data_generators/algorithmic_math.py index f5c954036..ec3b7670a 100644 --- a/tensor2tensor/data_generators/algorithmic_math.py +++ b/tensor2tensor/data_generators/algorithmic_math.py @@ -582,4 +582,6 @@ def calculus_integrate(alphabet_size=26, } except: # pylint:disable=bare-except continue + if nbr_case % 10000 == 0: + print(" calculus_integrate: generating case %d." % nbr_case) nbr_case += 1 diff --git a/tensor2tensor/data_generators/generator_utils.py b/tensor2tensor/data_generators/generator_utils.py index a5d4816b7..6a3475456 100644 --- a/tensor2tensor/data_generators/generator_utils.py +++ b/tensor2tensor/data_generators/generator_utils.py @@ -46,10 +46,14 @@ def to_example(dictionary): elif isinstance(v[0], float): features[k] = tf.train.Feature(float_list=tf.train.FloatList(value=v)) elif isinstance(v[0], six.string_types): + if not six.PY2: # Convert in python 3. + v = [bytes(x, "utf-8") for x in v] + features[k] = tf.train.Feature(bytes_list=tf.train.BytesList(value=v)) + elif isinstance(v[0], bytes): features[k] = tf.train.Feature(bytes_list=tf.train.BytesList(value=v)) else: - raise ValueError("Value is neither an int nor a float; v: %s type: %s" % - (str(v[0]), str(type(v[0])))) + raise ValueError("Value for %s is not a recognized type; v: %s type: %s" % + (k, str(v[0]), str(type(v[0])))) return tf.train.Example(features=tf.train.Features(feature=features)) @@ -111,7 +115,7 @@ def generate_files(generator, counter, shard = 0, 0 for case in generator: - if counter % 100000 == 0: + if counter > 0 and counter % 100000 == 0: tf.logging.info("Generating case %d for %s." % (counter, output_name)) counter += 1 if max_cases and counter > max_cases: @@ -176,6 +180,9 @@ def gunzip_file(gz_path, new_path): gz_path: path to the zipped file. new_path: path to where the file will be unzipped. """ + if tf.gfile.Exists(new_path): + tf.logging.info("File %s already exists, skipping unpacking" % new_path) + return tf.logging.info("Unpacking %s to %s" % (gz_path, new_path)) with gzip.open(gz_path, "rb") as gz_file: with io.open(new_path, "wb") as new_file: @@ -221,7 +228,7 @@ def gunzip_file(gz_path, new_path): def get_or_generate_vocab(tmp_dir, vocab_filename, vocab_size, sources=None): """Generate a vocabulary from the datasets in sources (_DATA_FILE_URLS).""" vocab_filepath = os.path.join(tmp_dir, vocab_filename) - if os.path.exists(vocab_filepath): + if tf.gfile.Exists(vocab_filepath): tf.logging.info("Found vocab file: %s", vocab_filepath) vocab = text_encoder.SubwordTextEncoder(vocab_filepath) return vocab @@ -246,7 +253,7 @@ def get_or_generate_vocab(tmp_dir, vocab_filename, vocab_size, sources=None): # For some datasets a second extraction is necessary. if ".gz" in lang_file: new_filepath = os.path.join(tmp_dir, lang_file[:-3]) - if os.path.exists(new_filepath): + if tf.gfile.Exists(new_filepath): tf.logging.info("Subdirectory %s already exists, skipping unpacking" % filepath) else: @@ -275,7 +282,7 @@ def read_records(filename): records = [] for record in reader: records.append(record) - if len(records) % 10000 == 0: + if len(records) % 100000 == 0: tf.logging.info("read: %d", len(records)) return records @@ -284,6 +291,6 @@ def write_records(records, out_filename): writer = tf.python_io.TFRecordWriter(out_filename) for count, record in enumerate(records): writer.write(record) - if count % 10000 == 0: + if count > 0 and count % 100000 == 0: tf.logging.info("write: %d", count) writer.close() diff --git a/tensor2tensor/data_generators/image.py b/tensor2tensor/data_generators/image.py index e7e740192..377bf3e54 100644 --- a/tensor2tensor/data_generators/image.py +++ b/tensor2tensor/data_generators/image.py @@ -68,7 +68,7 @@ def image_generator(images, labels): yield { "image/encoded": [enc_string], "image/format": ["png"], - "image/class/label": [label], + "image/class/label": [int(label)], "image/height": [height], "image/width": [width] } diff --git a/tensor2tensor/data_generators/wmt.py b/tensor2tensor/data_generators/wmt.py index e88a90983..1937e1b71 100644 --- a/tensor2tensor/data_generators/wmt.py +++ b/tensor2tensor/data_generators/wmt.py @@ -25,10 +25,19 @@ from tensor2tensor.data_generators import generator_utils from tensor2tensor.data_generators import text_encoder +from tensor2tensor.data_generators import wsj_parsing import tensorflow as tf +tf.flags.DEFINE_string("ende_bpe_path", "", "Path to BPE files in tmp_dir." + "Download from https://drive.google.com/open?" + "id=0B_bZck-ksdkpM25jRUN2X2UxMm8") + + +FLAGS = tf.flags.FLAGS + + # End-of-sentence marker (should correspond to the position of EOS in the # RESERVED_TOKENS list in text_encoder.py) EOS = 1 @@ -100,7 +109,7 @@ def _get_wmt_ende_dataset(directory, filename): # We expect that this file has been downloaded from: # https://drive.google.com/open?id=0B_bZck-ksdkpM25jRUN2X2UxMm8 and placed # in `directory`. - corpus_file = os.path.join(directory, "wmt16_en_de.tar.gz") + corpus_file = os.path.join(directory, FLAGS.ende_bpe_path) with tarfile.open(corpus_file, "r:gz") as corpus_tar: corpus_tar.extractall(directory) return train_path @@ -265,18 +274,10 @@ def enfr_character_generator(tmp_dir, train): character_vocab, EOS) -def parsing_character_generator(tmp_dir, train): - character_vocab = text_encoder.ByteTextEncoder() - filename = "parsing_%s" % ("train" if train else "dev") - text_filepath = os.path.join(tmp_dir, filename + ".text") - tags_filepath = os.path.join(tmp_dir, filename + ".tags") - return character_generator(text_filepath, tags_filepath, character_vocab, EOS) - - def parsing_token_generator(tmp_dir, train, vocab_size): symbolizer_vocab = generator_utils.get_or_generate_vocab( tmp_dir, "tokens.vocab.%d" % vocab_size, vocab_size) - filename = "parsing_%s" % ("train" if train else "dev") - text_filepath = os.path.join(tmp_dir, filename + ".text") - tags_filepath = os.path.join(tmp_dir, filename + ".tags") - return token_generator(text_filepath, tags_filepath, symbolizer_vocab, EOS) + filename = "%s_%s.trees" % (FLAGS.parsing_path, "train" if train else "dev") + tree_filepath = os.path.join(tmp_dir, filename) + return wsj_parsing.token_generator(tree_filepath, + symbolizer_vocab, symbolizer_vocab, EOS) diff --git a/tensor2tensor/data_generators/wsj_parsing.py b/tensor2tensor/data_generators/wsj_parsing.py index a2dda4d9d..756a44954 100644 --- a/tensor2tensor/data_generators/wsj_parsing.py +++ b/tensor2tensor/data_generators/wsj_parsing.py @@ -23,6 +23,12 @@ import tensorflow as tf +tf.flags.DEFINE_string("parsing_path", "", "Path to parsing files in tmp_dir.") + + +FLAGS = tf.flags.FLAGS + + def words_and_tags_from_wsj_tree(tree_string): """Generates linearized trees and tokens from the wsj tree format. @@ -84,9 +90,8 @@ def parsing_token_generator(tmp_dir, train, source_vocab_size, target_vocab_size): """Generator for parsing as a sequence-to-sequence task that uses tokens. - This generator assumes the files parsing_{train,dev}.wsj, which contain trees - in wsj format and wsj_{source,target}.tokens.vocab. exist in - tmp_dir. + This generator assumes the files parsing_{train,dev}.trees, which contain + trees in wsj format. Args: tmp_dir: path to the file with source sentences. @@ -103,7 +108,7 @@ def parsing_token_generator(tmp_dir, train, source_vocab_size, target_symbolizer_vocab = generator_utils.get_or_generate_vocab( tmp_dir, "wsj_target.tokens.vocab.%d" % target_vocab_size, target_vocab_size) - filename = "parsing_%s.trees" % ("train" if train else "dev") + filename = "%s_%s.trees" % (FLAGS.parsing_path, "train" if train else "dev") tree_filepath = os.path.join(tmp_dir, filename) return token_generator(tree_filepath, source_symbolizer_vocab, target_symbolizer_vocab, 1) diff --git a/tensor2tensor/models/lstm.py b/tensor2tensor/models/lstm.py index eb8b10cd2..998e6756b 100644 --- a/tensor2tensor/models/lstm.py +++ b/tensor2tensor/models/lstm.py @@ -268,7 +268,7 @@ def model_fn_body(self, features): def lstm_attention(): """hparams for LSTM with attention.""" hparams = common_hparams.basic_params1() - hparams.batch_size = 128 + hparams.batch_size = 1024 hparams.hidden_size = 128 hparams.num_hidden_layers = 2 diff --git a/tensor2tensor/models/transformer.py b/tensor2tensor/models/transformer.py index 88d901df9..544035efd 100644 --- a/tensor2tensor/models/transformer.py +++ b/tensor2tensor/models/transformer.py @@ -48,8 +48,8 @@ def model_fn_body(self, features): inputs = features.get("inputs") target_space = features.get("target_space_id") - inputs = tf.squeeze(inputs, 2) - targets = tf.squeeze(targets, 2) + inputs = common_layers.flatten4d3d(inputs) + targets = common_layers.flatten4d3d(targets) (encoder_input, encoder_attention_bias, _) = (transformer_prepare_encoder( inputs, target_space, hparams)) diff --git a/tensor2tensor/utils/t2t_model.py b/tensor2tensor/utils/t2t_model.py index 8b6422734..a991d3614 100644 --- a/tensor2tensor/utils/t2t_model.py +++ b/tensor2tensor/utils/t2t_model.py @@ -124,6 +124,8 @@ def _create_modalities(self, problem_hparams, hparams): problem_hparams.input_modality = input_modality target_modality_spec = problem_hparams.target_modality + if isinstance(target_modality_spec, modality.Modality): + return if target_modality_name: _warn_changed_modality_type(target_modality_name, target_modality_spec[0], "target") diff --git a/tensor2tensor/utils/trainer_utils.py b/tensor2tensor/utils/trainer_utils.py index fc6970188..75883accd 100644 --- a/tensor2tensor/utils/trainer_utils.py +++ b/tensor2tensor/utils/trainer_utils.py @@ -69,6 +69,7 @@ flags.DEFINE_integer("train_steps", 250000, "The number of steps to run training for.") flags.DEFINE_integer("eval_steps", 10, "Number of steps in evaluation.") +flags.DEFINE_bool("eval_print", False, "Print eval logits and predictions.") flags.DEFINE_integer("keep_checkpoint_max", 20, "How many recent checkpoints to keep.") flags.DEFINE_bool("experimental_optimize_placement", False, @@ -452,6 +453,9 @@ def nth_model(n): sharded_logits, total_loss = result_list[1:], result_list[0] if mode == tf.contrib.learn.ModeKeys.EVAL: logits = tf.concat(sharded_logits, 0) + if FLAGS.eval_print: + logits = tf.Print(logits, [features["inputs"], logits], + "EVAL PRINT", summarize=10000) # For evaluation, return the logits layer as our predictions. run_info["predictions"] = logits train_op = None From 5052414499526d32ce7edf512d314844d3b81a1b Mon Sep 17 00:00:00 2001 From: vthorsteinsson Date: Sun, 9 Jul 2017 15:21:53 +0000 Subject: [PATCH 0092/4095] Icelandic parsing components added --- tensor2tensor/data_generators/generator_utils.py | 6 +++--- tensor2tensor/models/transformer.py | 11 +++++++++++ 2 files changed, 14 insertions(+), 3 deletions(-) mode change 100644 => 100755 tensor2tensor/data_generators/generator_utils.py mode change 100644 => 100755 tensor2tensor/models/transformer.py diff --git a/tensor2tensor/data_generators/generator_utils.py b/tensor2tensor/data_generators/generator_utils.py old mode 100644 new mode 100755 index 53bb27a45..d4a41081a --- a/tensor2tensor/data_generators/generator_utils.py +++ b/tensor2tensor/data_generators/generator_utils.py @@ -277,7 +277,7 @@ def get_or_generate_tabbed_vocab(tmp_dir, source_filename, index, vocab_filename parameter specifies 0 for the source or 1 for the target.""" vocab_filepath = os.path.join(tmp_dir, vocab_filename) if os.path.exists(vocab_filepath): - vocab = SubwordTextEncoder(vocab_filepath) + vocab = text_encoder.SubwordTextEncoder(vocab_filepath) return vocab tokenizer = Tokenizer() @@ -290,9 +290,9 @@ def get_or_generate_tabbed_vocab(tmp_dir, source_filename, index, vocab_filename if line and '\t' in line: parts = line.split('\t', maxsplit = 1) part = parts[index].strip() - _ = tokenizer.encode(part) + _ = tokenizer.encode(text_encoder.native_to_unicode(part)) - vocab = SubwordTextEncoder.build_to_target_size( + vocab = text_encoder.SubwordTextEncoder.build_to_target_size( vocab_size, tokenizer.token_counts, 1, 1e3) vocab.store_to_file(vocab_filepath) return vocab diff --git a/tensor2tensor/models/transformer.py b/tensor2tensor/models/transformer.py old mode 100644 new mode 100755 index 88d901df9..d162d6f53 --- a/tensor2tensor/models/transformer.py +++ b/tensor2tensor/models/transformer.py @@ -353,6 +353,17 @@ def transformer_parsing_base(): return hparams +@registry.register_hparams +def transformer_parsing_ice(): + """Hparams for parsing Icelandic text.""" + hparams = transformer_parsing_base() + hparams.batch_size = 4096 + hparams.batching_mantissa_bits = 2 + #hparams.max_length = 256 + #hparams.hidden_size = 128 + return hparams + + @registry.register_hparams def transformer_parsing_big(): """HParams for parsing on wsj semi-supervised.""" From 7580645b427b4b532288d5fc874d5e4f5b42f577 Mon Sep 17 00:00:00 2001 From: Deasuke Date: Mon, 10 Jul 2017 18:22:31 +0900 Subject: [PATCH 0093/4095] importing six.unichr --- tensor2tensor/data_generators/text_encoder.py | 2 +- tensor2tensor/data_generators/tokenizer_test.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/tensor2tensor/data_generators/text_encoder.py b/tensor2tensor/data_generators/text_encoder.py index 0a05cb721..7934dca34 100644 --- a/tensor2tensor/data_generators/text_encoder.py +++ b/tensor2tensor/data_generators/text_encoder.py @@ -28,7 +28,7 @@ # Dependency imports import six -from six import PY2 +from six import PY2, unichr # pylint: disable=redefined-builtin from six.moves import xrange # pylint: disable=redefined-builtin from tensor2tensor.data_generators import tokenizer diff --git a/tensor2tensor/data_generators/tokenizer_test.py b/tensor2tensor/data_generators/tokenizer_test.py index 766630ba3..a85e244ca 100644 --- a/tensor2tensor/data_generators/tokenizer_test.py +++ b/tensor2tensor/data_generators/tokenizer_test.py @@ -23,6 +23,7 @@ # Dependency imports +from six import unichr # pylint: disable=redefined-builtin from six.moves import xrange # pylint: disable=redefined-builtin from tensor2tensor.data_generators import tokenizer From 4ba7218d50726e9e94038f05c48e2813725ee0c5 Mon Sep 17 00:00:00 2001 From: Mark Probst Date: Mon, 10 Jul 2017 16:16:57 +0000 Subject: [PATCH 0094/4095] Import xrange in multimodel --- tensor2tensor/models/multimodel.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tensor2tensor/models/multimodel.py b/tensor2tensor/models/multimodel.py index 60f098e5e..b42d71cb3 100644 --- a/tensor2tensor/models/multimodel.py +++ b/tensor2tensor/models/multimodel.py @@ -27,6 +27,7 @@ from tensor2tensor.utils import registry from tensor2tensor.utils import t2t_model +from six.moves import xrange # pylint: disable=redefined-builtin import tensorflow as tf From 08d6a633654ab32f4c9f0ce86eae0e23839dfce0 Mon Sep 17 00:00:00 2001 From: Ending_Credits Date: Tue, 11 Jul 2017 14:16:18 +0100 Subject: [PATCH 0095/4095] Added New Layer types --- tensor2tensor/models/common_layers.py | 127 ++++++++++++- tensor2tensor/models/common_layers_test.py | 72 +++++++- .../models/transformer_alternative_.py | 172 ++++++++++++++++++ tensor2tensor/models/transformer_test.py | 1 + 4 files changed, 369 insertions(+), 3 deletions(-) create mode 100644 tensor2tensor/models/transformer_alternative_.py diff --git a/tensor2tensor/models/common_layers.py b/tensor2tensor/models/common_layers.py index 7a6ce96fb..4c63ce8ba 100644 --- a/tensor2tensor/models/common_layers.py +++ b/tensor2tensor/models/common_layers.py @@ -292,7 +292,7 @@ def conv_internal(conv_fn, inputs, filters, kernel_size, **kwargs): """Conditional conv_fn making kernel 1d or 2d depending on inputs shape.""" static_shape = inputs.get_shape() if not static_shape or len(static_shape) != 4: - raise ValueError("Inputs to conv must have statically known rank 4.") + raise ValueError("Inputs to conv must have statically known rank 4. Shape:" +str(static_shape)) # Add support for left padding. if "padding" in kwargs and kwargs["padding"] == "LEFT": dilation_rate = (1, 1) @@ -1378,3 +1378,128 @@ def smoothing_cross_entropy(logits, labels, vocab_size, confidence): xentropy = tf.nn.softmax_cross_entropy_with_logits( logits=logits, labels=soft_targets) return xentropy - normalizing + + +def global_pool_1d(inputs, pooling_type='MAX', mask=None): + """ + Pools elements across the last dimension. Useful to a list of vectors into a + single vector to get a representation of a set. + + Args + inputs: A tensor of dimensions batch_size x sequence_length x input_dims + containing the sequences of input vectors. + pooling_type: the pooling type to use, MAX or AVR + mask: A tensor of dimensions batch_size x sequence_length containing a + mask for the inputs with 1's for existing elements, and 0's elsewhere. + Outputs + output: A tensor of dimensions batch_size x input_dims + dimension containing the sequences of transformed vectors. + """ + + with tf.name_scope("global_pool", [inputs]): + if mask is not None: + mask = tf.expand_dims(mask, axis=2) + inputs = tf.multiply(inputs, mask) + + if pooling_type == 'MAX': + # A tf.pool can be used here, but reduce is cleaner + output = tf.reduce_max(inputs, axis=1) + elif pooling_type == 'AVR': + if mask is not None: + # Some elems are dummy elems so we can't just reduce the average + output = tf.reduce_sum(inputs, axis=1) + num_elems = tf.reduce_sum(mask, axis=1, keep_dims=True) + output = tf.div(output, num_elems) + #N.B: this will cause a NaN if one batch contains no elements + else: + output = tf.reduce_mean(inputs, axis=1) + + return output + + +def linear_set_layer(layer_size, + inputs, + context=None, + activation_fn=tf.nn.relu, + dropout=0.0, + name=None): + """ + Basic layer type for doing funky things with sets. + Applies a linear transformation to each element in the input set. + If a context is supplied, it is concatenated with the inputs. + e.g. One can use global_pool_1d to get a representation of the set which + can then be used as the context for the next layer. + + Args + layer_size: Dimension to transform the input vectors to + inputs: A tensor of dimensions batch_size x sequence_length x input_dims + containing the sequences of input vectors. + context: A tensor of dimensions batch_size x context_dims + containing a global statistic about the set. + dropout: Dropout probability. + activation_fn: The activation function to use. + Outputs + output: A tensor of dimensions batch_size x sequence_length x output_dims + dimension containing the sequences of transformed vectors. + + TODO: Add bias add. + """ + + with tf.variable_scope(name, "linear_set_layer", [inputs]): + # Apply 1D convolution to apply linear filter to each element along the 2nd + # dimension + #in_size = inputs.get_shape().as_list()[-1] + outputs = conv1d(inputs, layer_size, 1, activation=None, name="set_conv") + + # Apply the context if it exists + if context is not None: + # Unfortunately tf doesn't support broadcasting via concat, but we can + # simply add the transformed context to get the same effect + context = tf.expand_dims(context, axis=1) + #context_size = context.get_shape().as_list()[-1] + cont_tfm = conv1d(context, layer_size, 1, + activation=None, name="cont_conv") + outputs += cont_tfm + + if activation_fn is not None: + outputs = activation_fn(outputs) + + if dropout != 0.0: + output = tf.nn.dropout(output, 1.0 - dropout) + + return outputs + + +def ravanbakhsh_set_layer(layer_size, + inputs, + mask=None, + activation_fn=tf.nn.tanh, + dropout=0.0, + name=None): + """ + Layer from Deep Sets paper: https://arxiv.org/abs/1611.04500 + More parameter-efficient verstion of a linear-set-layer with context. + + + Args + layer_size: Dimension to transform the input vectors to. + inputs: A tensor of dimensions batch_size x sequence_length x vector + containing the sequences of input vectors. + mask: A tensor of dimensions batch_size x sequence_length containing a + mask for the inputs with 1's for existing elements, and 0's elsewhere. + activation_fn: The activation function to use. + Outputs + output: A tensor of dimensions batch_size x sequence_length x vector + dimension containing the sequences of transformed vectors. + """ + + with tf.variable_scope(name, "ravanbakhsh_set_layer", [inputs]): + output = linear_set_layer( + layer_size, + inputs - tf.expand_dims(global_pool_1d(inputs, mask=mask), axis=1), + activation_fn=activation_fn, + name=name) + + return output + + diff --git a/tensor2tensor/models/common_layers_test.py b/tensor2tensor/models/common_layers_test.py index 8d2b4dec1..04d428884 100644 --- a/tensor2tensor/models/common_layers_test.py +++ b/tensor2tensor/models/common_layers_test.py @@ -50,7 +50,7 @@ def testSaturatingSigmoid(self): self.assertAllClose(res, [0.0, 0.0, 0.5, 1.0, 1.0]) def testFlatten4D3D(self): - x = np.random.random_integers(1, high=8, size=(3, 5, 2)) + x = np.random.randint(1, 9, size=(3, 5, 2)) with self.test_session() as session: y = common_layers.flatten4d3d(common_layers.embedding(x, 10, 7)) session.run(tf.global_variables_initializer()) @@ -58,7 +58,7 @@ def testFlatten4D3D(self): self.assertEqual(res.shape, (3, 5 * 2, 7)) def testEmbedding(self): - x = np.random.random_integers(1, high=8, size=(3, 5)) + x = np.random.randint(1, 9, size=(3, 5)) with self.test_session() as session: y = common_layers.embedding(x, 10, 16) session.run(tf.global_variables_initializer()) @@ -81,6 +81,14 @@ def testConv(self): session.run(tf.global_variables_initializer()) res = session.run(y) self.assertEqual(res.shape, (5, 5, 1, 13)) + + def testConv1d(self): + x = np.random.rand(5, 7, 11) + with self.test_session() as session: + y = common_layers.conv1d(tf.constant(x, dtype=tf.float32), 13, 1) + session.run(tf.global_variables_initializer()) + res = session.run(y) + self.assertEqual(res.shape, (5, 7, 13)) def testSeparableConv(self): x = np.random.rand(5, 7, 1, 11) @@ -293,6 +301,66 @@ def testDeconvStride2MultiStep(self): session.run(tf.global_variables_initializer()) actual = session.run(a) self.assertEqual(actual.shape, (5, 32, 1, 16)) + + def testGlobalPool1d(self): + shape = (5, 4) + x1 = np.random.rand(5,4,11) + #mask = np.random.randint(2, size=shape) + no_mask = np.ones((5,4)) + full_mask = np.zeros((5,4)) + + with self.test_session() as session: + x1_ = tf.Variable(x1, dtype=tf.float32) + no_mask_ = tf.Variable(no_mask, dtype=tf.float32) + full_mask_ = tf.Variable(full_mask, dtype=tf.float32) + + none_mask_max = common_layers.global_pool_1d(x1_) + no_mask_max = common_layers.global_pool_1d(x1_, mask=no_mask_) + result1 = tf.reduce_sum(none_mask_max - no_mask_max) + + full_mask_max = common_layers.global_pool_1d(x1_, mask=full_mask_) + result2 = tf.reduce_sum(full_mask_max) + + none_mask_avr = common_layers.global_pool_1d(x1_, 'AVR') + no_mask_avr = common_layers.global_pool_1d(x1_, 'AVR', no_mask_) + result3 = tf.reduce_sum(none_mask_avr - no_mask_avr) + + full_mask_avr = common_layers.global_pool_1d(x1_, 'AVR', full_mask_) + result4 = tf.reduce_sum(full_mask_avr) + + session.run(tf.global_variables_initializer()) + actual = session.run([result1, result2, result3, result4]) + # N.B: Last result will give a NaN. + self.assertAllEqual(actual[:3], [0.0, 0.0, 0.0]) + + + def testLinearSetLayer(self): + x1 = np.random.rand(5,4,11) + cont = np.random.rand(5,13) + with self.test_session() as session: + x1_ = tf.Variable(x1, dtype=tf.float32) + cont_ = tf.Variable(cont, dtype=tf.float32) + + simple_ff = common_layers.linear_set_layer(32, x1_) + cont_ff = common_layers.linear_set_layer(32, x1_, context=cont_) + + session.run(tf.global_variables_initializer()) + actual = session.run([simple_ff, cont_ff]) + self.assertEqual(actual[0].shape, (5,4,32)) + self.assertEqual(actual[1].shape, (5,4,32)) + + def testRavanbakhshSetLayer(self): + x1 = np.random.rand(5,4,11) + cont = np.random.rand(5,13) + with self.test_session() as session: + x1_ = tf.Variable(x1, dtype=tf.float32) + cont_ = tf.Variable(cont, dtype=tf.float32) + + layer = common_layers.ravanbakhsh_set_layer(32, x1_) + + session.run(tf.global_variables_initializer()) + actual = session.run(layer) + self.assertEqual(actual.shape, (5,4,32)) if __name__ == "__main__": diff --git a/tensor2tensor/models/transformer_alternative_.py b/tensor2tensor/models/transformer_alternative_.py new file mode 100644 index 000000000..f7c1a152f --- /dev/null +++ b/tensor2tensor/models/transformer_alternative_.py @@ -0,0 +1,172 @@ +# Copyright 2017 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" + Alternative transformer network using different layer types to demonstrate + alternatives to self attention. + + Code is mostly copied from original Transformer source (if that wasn't + already obvious). + +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import copy + +# Dependency imports + +from six.moves import xrange # pylint: disable=redefined-builtin + +from tensor2tensor.models import common_attention +from tensor2tensor.models import common_hparams +from tensor2tensor.models import common_layers +from tensor2tensor.models import transformer +from tensor2tensor.utils import registry +from tensor2tensor.utils import t2t_model + +import tensorflow as tf + + +@registry.register_model +class TransformerAlt(t2t_model.T2TModel): + + def model_fn_body(self, features): + # + + # Remove dropout if not training + hparams = copy.copy(self._hparams) + targets = features["targets"] + inputs = features.get("inputs") + target_space = features.get("target_space_id") + + inputs = common_layers.flatten4d3d(inputs) + targets = common_layers.flatten4d3d(targets) + + (encoder_input, encoder_attention_bias, _) = (transformer.\ + transformer_prepare_encoder(inputs, target_space, hparams) ) + (decoder_input, decoder_self_attention_bias) = transformer.\ + transformer_prepare_decoder(targets, hparams) + + def residual_fn(x, y): + return common_layers.layer_norm(x + tf.nn.dropout( + y, 1.0 - hparams.residual_dropout)) + + encoder_input = tf.nn.dropout(encoder_input, 1.0 - hparams.residual_dropout) + decoder_input = tf.nn.dropout(decoder_input, 1.0 - hparams.residual_dropout) + encoder_output = alt_transformer_encoder( + encoder_input, residual_fn, encoder_attention_bias, hparams) + + decoder_output = alt_transformer_decoder( + decoder_input, encoder_output, residual_fn, decoder_self_attention_bias, + encoder_attention_bias, hparams) + + decoder_output = tf.expand_dims(decoder_output, 2) + + return decoder_output + + +def alt_transformer_encoder(encoder_input, + residual_fn, + encoder_attention_bias, + hparams, + name="encoder"): + """ + A stack of transformer layers. + + Args: + encoder_input: a Tensor + residual_fn: a function from (layer_input, layer_output) -> combined_output + + hparams: hyperparameters for model + name: a string + + Returns: + y: a Tensors + """ + x = encoder_input + + # Summaries don't work in multi-problem setting yet. + summaries = "problems" not in hparams.values() or len(hparams.problems) == 1 + + with tf.variable_scope(name): + for layer in xrange(hparams.num_hidden_layers): + with tf.variable_scope("layer_%d" % layer): + x = residual_fn( + x, + ravanbakhsh_set_layer(hparams.hidden_size, x, mask=encoder_attention_bias) + ) + + return x + + +def alt_transformer_decoder(decoder_input, + encoder_output, + residual_fn, + decoder_self_attention_bias, + encoder_decoder_attention_bias, + hparams, + name="decoder"): + """ + A stack of transformer layers. + + Args: + decoder_input: a Tensor + encoder_output: a Tensor + residual_fn: a function from (layer_input, layer_output) -> combined_output + encoder_decoder_attention_bias: bias Tensor for encoder-decoder attention + (see common_attention.attention_bias()) + hparams: hyperparameters for model + name: a string + + Returns: + y: a Tensors + """ + x = decoder_input + + # Summaries don't work in multi-problem setting yet. + summaries = "problems" not in hparams.values() or len(hparams.problems) == 1 + with tf.variable_scope(name): + for layer in xrange(hparams.num_hidden_layers): + with tf.variable_scope("layer_%d" % layer): + + x = residual_fn( + x, + ravanbakhsh_set_layer(hparams.hidden_size, + common_attention.multihead_attention( + x, + encoder_output, + encoder_decoder_attention_bias, + hparams.attention_key_channels or hparams.hidden_size, + hparams.attention_value_channels or hparams.hidden_size, + hparams.hidden_size, + hparams.num_heads, + hparams.attention_dropout, + summaries=summaries, + name="encdec_attention"), + mask=decoder_self_attention_bias) + ) + + return x + + +@registry.register_hparams +def transformer_alt(): + """Set of hyperparameters.""" + hparams = transformer.transformer_base() + hparams.add_hparam("layers_per_layer", 4) + return hparams + diff --git a/tensor2tensor/models/transformer_test.py b/tensor2tensor/models/transformer_test.py index 9535558a4..52c1d1ba5 100644 --- a/tensor2tensor/models/transformer_test.py +++ b/tensor2tensor/models/transformer_test.py @@ -24,6 +24,7 @@ from tensor2tensor.data_generators import problem_hparams from tensor2tensor.models import transformer +from tensor2tensor.models import transformer_alternative import tensorflow as tf From 30887b8a8ff88eb82cfb8873f675feb05bc511df Mon Sep 17 00:00:00 2001 From: vthorsteinsson Date: Tue, 11 Jul 2017 13:19:16 +0000 Subject: [PATCH 0096/4095] Target string displayed; smaller fixes --- tensor2tensor/data_generators/text_encoder.py | 6 +- tensor2tensor/models/transformer.py | 1 + tensor2tensor/utils/trainer_utils.py | 96 +++++++++---------- 3 files changed, 47 insertions(+), 56 deletions(-) mode change 100644 => 100755 tensor2tensor/utils/trainer_utils.py diff --git a/tensor2tensor/data_generators/text_encoder.py b/tensor2tensor/data_generators/text_encoder.py index a50442928..82e6a8aac 100755 --- a/tensor2tensor/data_generators/text_encoder.py +++ b/tensor2tensor/data_generators/text_encoder.py @@ -37,8 +37,6 @@ # Conversion between Unicode and UTF-8, if required (on Python2) native_to_unicode = (lambda s: s.decode("utf-8")) if PY2 else (lambda s: s) - - unicode_to_native = (lambda s: s.encode("utf-8")) if PY2 else (lambda s: s) @@ -46,12 +44,14 @@ PAD = "" EOS = "" RESERVED_TOKENS = [PAD, EOS] +PAD_TOKEN = RESERVED_TOKENS.index(PAD) # Normally 0 +EOS_TOKEN = RESERVED_TOKENS.index(EOS) # Normally 1 + if six.PY2: RESERVED_TOKENS_BYTES = RESERVED_TOKENS else: RESERVED_TOKENS_BYTES = [bytes(PAD, "ascii"), bytes(EOS, "ascii")] - class TextEncoder(object): """Base class for converting from ints to/from human readable strings.""" diff --git a/tensor2tensor/models/transformer.py b/tensor2tensor/models/transformer.py index d2808926b..99cd1fd6a 100755 --- a/tensor2tensor/models/transformer.py +++ b/tensor2tensor/models/transformer.py @@ -359,6 +359,7 @@ def transformer_parsing_ice(): hparams = transformer_parsing_base() hparams.batch_size = 4096 hparams.batching_mantissa_bits = 2 + hparams.hidden_size = 512 #hparams.max_length = 256 #hparams.hidden_size = 128 return hparams diff --git a/tensor2tensor/utils/trainer_utils.py b/tensor2tensor/utils/trainer_utils.py old mode 100644 new mode 100755 index 75883accd..d4ff0b79d --- a/tensor2tensor/utils/trainer_utils.py +++ b/tensor2tensor/utils/trainer_utils.py @@ -30,16 +30,15 @@ # pylint: disable=redefined-builtin from six.moves import input from six.moves import xrange -from six.moves import zip # pylint: enable=redefined-builtin from tensor2tensor.data_generators import problem_hparams +from tensor2tensor.data_generators.text_encoder import EOS_TOKEN from tensor2tensor.models import models # pylint: disable=unused-import from tensor2tensor.utils import data_reader from tensor2tensor.utils import expert_utils as eu from tensor2tensor.utils import metrics from tensor2tensor.utils import registry - import tensorflow as tf from tensorflow.contrib.learn.python.learn import learn_runner from tensorflow.python.ops import init_ops @@ -120,6 +119,16 @@ "\t..\t") +def _save_until_eos(hyp): + """ Strips everything after the first token, which is normally 1 """ + try: + index = list(hyp).index(EOS_TOKEN) + return hyp[0:index] + except ValueError: + # No EOS_TOKEN: return the array as-is + return hyp + + def make_experiment_fn(data_dir, model_name, train_steps, eval_steps): """Returns experiment_fn for learn_runner. Wraps create_experiment.""" @@ -279,7 +288,6 @@ def session_config(): rewrite_options=rewrite_options, infer_shapes=True) config = tf.ConfigProto( allow_soft_placement=True, graph_options=graph_options) - return config @@ -345,6 +353,7 @@ def learning_rate_decay(): lambda: decay, name="learning_rate_decay_warump_cond") + def model_fn(features, targets, mode): """Creates the prediction, loss, and train ops. @@ -356,10 +365,11 @@ def model_fn(features, targets, mode): Returns: A tuple consisting of the prediction, loss, and train_op. """ - if mode == tf.contrib.learn.ModeKeys.INFER and FLAGS.decode_interactive: - features = _interactive_input_tensor_to_features_dict(features, hparams) - if mode == tf.contrib.learn.ModeKeys.INFER and FLAGS.decode_from_file: - features = _decode_input_tensor_to_features_dict(features, hparams) + if mode == tf.contrib.learn.ModeKeys.INFER: + if FLAGS.decode_interactive: + features = _interactive_input_tensor_to_features_dict(features, hparams) + elif FLAGS.decode_from_file: + features = _decode_input_tensor_to_features_dict(features, hparams) # A dictionary containing: # - problem_choice: A Tensor containing an integer indicating which problem # was selected for this run. @@ -579,12 +589,14 @@ def log_fn(inputs, "%s_prediction_%d.jpg" % (problem, j)) show_and_save_image(inputs / 255., save_path) elif inputs_vocab: - decoded_inputs = inputs_vocab.decode(inputs.flatten()) + decoded_inputs = inputs_vocab.decode(_save_until_eos(inputs.flatten())) tf.logging.info("Inference results INPUT: %s" % decoded_inputs) - decoded_outputs = targets_vocab.decode(outputs.flatten()) - decoded_targets = targets_vocab.decode(targets.flatten()) + decoded_outputs = targets_vocab.decode(_save_until_eos(outputs.flatten())) tf.logging.info("Inference results OUTPUT: %s" % decoded_outputs) + decoded_targets = targets_vocab.decode(_save_until_eos(targets.flatten())) + tf.logging.info("Inference results TARGET: %s" % decoded_targets) + if FLAGS.decode_to_file: output_filepath = FLAGS.decode_to_file + ".outputs." + problem output_file = tf.gfile.Open(output_filepath, "a") @@ -599,27 +611,16 @@ def log_fn(inputs, # iterator to log inputs and decodes. if FLAGS.decode_endless: tf.logging.info("Warning: Decoding endlessly") - for j, result in enumerate(result_iter): - inputs, targets, outputs = (result["inputs"], result["targets"], - result["outputs"]) - if FLAGS.decode_return_beams: - output_beams = np.split(outputs, FLAGS.decode_beam_size, axis=0) - for k, beam in enumerate(output_beams): - tf.logging.info("BEAM %d:" % k) - log_fn(inputs, targets, beam, problem, j) - else: - log_fn(inputs, targets, outputs, problem, j) - else: - for j, (inputs, targets, outputs) in enumerate( - zip(result_iter["inputs"], result_iter["targets"], result_iter[ - "outputs"])): - if FLAGS.decode_return_beams: - output_beams = np.split(outputs, FLAGS.decode_beam_size, axis=0) - for k, beam in enumerate(output_beams): - tf.logging.info("BEAM %d:" % k) - log_fn(inputs, targets, beam, problem, j) - else: - log_fn(inputs, targets, outputs, problem, j) + for j, result in enumerate(result_iter): + inputs, targets, outputs = (result["inputs"], result["targets"], + result["outputs"]) + if FLAGS.decode_return_beams: + output_beams = np.split(outputs, FLAGS.decode_beam_size, axis=0) + for k, beam in enumerate(output_beams): + tf.logging.info("BEAM %d:" % k) + log_fn(inputs, targets, beam, problem, j) + else: + log_fn(inputs, targets, outputs, problem, j) def decode_from_file(estimator, filename): @@ -628,22 +629,12 @@ def decode_from_file(estimator, filename): problem_id = FLAGS.decode_problem_id inputs_vocab = hparams.problems[problem_id].vocabulary["inputs"] targets_vocab = hparams.problems[problem_id].vocabulary["targets"] - tf.logging.info("Performing Decoding from a file.") + tf.logging.info("Performing decoding from a file.") sorted_inputs, sorted_keys = _get_sorted_inputs(filename) num_decode_batches = (len(sorted_inputs) - 1) // FLAGS.decode_batch_size + 1 input_fn = _decode_batch_input_fn(problem_id, num_decode_batches, sorted_inputs, inputs_vocab) - # strips everything after the first id, which is assumed to be 1 - def _save_until_eos(hyp): # pylint: disable=missing-docstring - ret = [] - index = 0 - # until you reach id - while index < len(hyp) and hyp[index] != 1: - ret.append(hyp[index]) - index += 1 - return np.array(ret) - decodes = [] for _ in range(num_decode_batches): result_iter = estimator.predict( @@ -655,8 +646,7 @@ def log_fn(inputs, outputs): decoded_inputs = inputs_vocab.decode(_save_until_eos(inputs.flatten())) tf.logging.info("Inference results INPUT: %s" % decoded_inputs) - decoded_outputs = targets_vocab.decode( - _save_until_eos(outputs.flatten())) + decoded_outputs = targets_vocab.decode(_save_until_eos(outputs.flatten())) tf.logging.info("Inference results OUTPUT: %s" % decoded_outputs) return decoded_outputs @@ -667,7 +657,7 @@ def log_fn(inputs, outputs): for k, beam in enumerate(output_beams): tf.logging.info("BEAM %d:" % k) beam_decodes.append(log_fn(result["inputs"], beam)) - decodes.append(str.join("\t", beam_decodes)) + decodes.append("\t".join(beam_decodes)) else: decodes.append(log_fn(result["inputs"], result["outputs"])) @@ -709,11 +699,11 @@ def decode_interactively(estimator): tf.logging.info("BEAM %d:" % k) if scores is not None: tf.logging.info("%s\tScore:%f" % - (targets_vocab.decode(beam.flatten()), scores[k])) + (targets_vocab.decode(_save_until_eos(beam.flatten())), scores[k])) else: - tf.logging.info(targets_vocab.decode(beam.flatten())) + tf.logging.info(targets_vocab.decode(_save_until_eos(beam.flatten()))) else: - tf.logging.info(targets_vocab.decode(result["outputs"].flatten())) + tf.logging.info(targets_vocab.decode(_save_until_eos(result["outputs"].flatten()))) def _decode_batch_input_fn(problem_id, num_decode_batches, sorted_inputs, @@ -726,10 +716,10 @@ def _decode_batch_input_fn(problem_id, num_decode_batches, sorted_inputs, tf.logging.info("Deocding batch %d" % b) batch_length = 0 batch_inputs = [] - for inputs in sorted_inputs[b * FLAGS.decode_batch_size:( - b + 1) * FLAGS.decode_batch_size]: + for inputs in sorted_inputs[b * FLAGS.decode_batch_size: + (b + 1) * FLAGS.decode_batch_size]: input_ids = vocabulary.encode(inputs) - input_ids.append(1) # Assuming EOS=1. + input_ids.append(EOS_TOKEN) batch_inputs.append(input_ids) if len(input_ids) > batch_length: batch_length = len(input_ids) @@ -822,7 +812,7 @@ def _interactive_input_fn(hparams): if input_type == "text": input_ids = vocabulary.encode(input_string) if has_input: - input_ids.append(1) # assume 1 means end-of-source + input_ids.append(EOS_TOKEN) x = [num_samples, decode_length, len(input_ids)] + input_ids assert len(x) < const_array_size x += [0] * (const_array_size - len(x)) @@ -1089,7 +1079,7 @@ def input_fn(): problem_choice = tf.to_int32(FLAGS.worker_id % problem_count) else: raise ValueError("Value of hparams.problem_choice is %s and must be " - "one of [uniform, adaptive, distributed]", + "one of [uniform, adaptive, distributed]" % hparams.problem_choice) # Inputs and targets conditional on problem_choice. From bcf9a8beac540222dc744609a49127c6960d4c03 Mon Sep 17 00:00:00 2001 From: Ending_Credits Date: Tue, 11 Jul 2017 15:17:10 +0100 Subject: [PATCH 0097/4095] Added example model --- ...rnative_.py => transformer_alternative.py} | 120 ++++++++++-------- 1 file changed, 69 insertions(+), 51 deletions(-) rename tensor2tensor/models/{transformer_alternative_.py => transformer_alternative.py} (60%) diff --git a/tensor2tensor/models/transformer_alternative_.py b/tensor2tensor/models/transformer_alternative.py similarity index 60% rename from tensor2tensor/models/transformer_alternative_.py rename to tensor2tensor/models/transformer_alternative.py index f7c1a152f..90fea6139 100644 --- a/tensor2tensor/models/transformer_alternative_.py +++ b/tensor2tensor/models/transformer_alternative.py @@ -60,6 +60,12 @@ def model_fn_body(self, features): transformer_prepare_encoder(inputs, target_space, hparams) ) (decoder_input, decoder_self_attention_bias) = transformer.\ transformer_prepare_decoder(targets, hparams) + + # We need masks of the form batch size x input sequences + # Biases seem to be of the form batch_size x 1 x input sequences x vec dim + # Squeeze out dim one, and get the first element of each vector + encoder_mask = tf.squeeze(encoder_attention_bias, [1])[:,:,0] + decoder_mask = tf.squeeze(decoder_self_attention_bias, [1])[:,:,0] def residual_fn(x, y): return common_layers.layer_norm(x + tf.nn.dropout( @@ -68,10 +74,10 @@ def residual_fn(x, y): encoder_input = tf.nn.dropout(encoder_input, 1.0 - hparams.residual_dropout) decoder_input = tf.nn.dropout(decoder_input, 1.0 - hparams.residual_dropout) encoder_output = alt_transformer_encoder( - encoder_input, residual_fn, encoder_attention_bias, hparams) + encoder_input, residual_fn, encoder_mask, hparams) decoder_output = alt_transformer_decoder( - decoder_input, encoder_output, residual_fn, decoder_self_attention_bias, + decoder_input, encoder_output, residual_fn, decoder_mask, encoder_attention_bias, hparams) decoder_output = tf.expand_dims(decoder_output, 2) @@ -79,24 +85,49 @@ def residual_fn(x, y): return decoder_output + +def composite_layer(inputs, mask, hparams): + x = inputs + + # Applies ravanbakhsh on top of each other + if hparams.composite_layer_type == "ravanbakhsh": + for layer in xrange(hparams.layers_per_layer): + with tf.variable_scope(".%d" % layer): + x = common_layers.ravanbakhsh_set_layer( + hparams.hidden_size, + x, + mask=mask, + dropout=0.0) + + # Transforms elements to get a context, and then uses this in a final layer + elif hparams.composite_layer_type == "reembedding": + initial_elems = x + # Transform elements n times and then pool + for layer in xrange(hparams.layers_per_layer): + with tf.variable_scope(".%d" % layer): + x = common_layers.linear_set_layer( + hparams.hidden_size, + x, + dropout=0.0) + context = common_layers.global_pool_1d(x, mask=mask) + + #Final layer + x = common_layers.linear_set_layer( + hparams.hidden_size, + x, + context=context, + dropout=0.0) + + return x + + + def alt_transformer_encoder(encoder_input, residual_fn, - encoder_attention_bias, + mask, hparams, name="encoder"): - """ - A stack of transformer layers. - Args: - encoder_input: a Tensor - residual_fn: a function from (layer_input, layer_output) -> combined_output - - hparams: hyperparameters for model - name: a string - - Returns: - y: a Tensors - """ x = encoder_input # Summaries don't work in multi-problem setting yet. @@ -105,10 +136,7 @@ def alt_transformer_encoder(encoder_input, with tf.variable_scope(name): for layer in xrange(hparams.num_hidden_layers): with tf.variable_scope("layer_%d" % layer): - x = residual_fn( - x, - ravanbakhsh_set_layer(hparams.hidden_size, x, mask=encoder_attention_bias) - ) + x = residual_fn(x, composite_layer(x, mask, hparams)) return x @@ -116,25 +144,11 @@ def alt_transformer_encoder(encoder_input, def alt_transformer_decoder(decoder_input, encoder_output, residual_fn, - decoder_self_attention_bias, + mask, encoder_decoder_attention_bias, hparams, name="decoder"): - """ - A stack of transformer layers. - - Args: - decoder_input: a Tensor - encoder_output: a Tensor - residual_fn: a function from (layer_input, layer_output) -> combined_output - encoder_decoder_attention_bias: bias Tensor for encoder-decoder attention - (see common_attention.attention_bias()) - hparams: hyperparameters for model - name: a string - - Returns: - y: a Tensors - """ + x = decoder_input # Summaries don't work in multi-problem setting yet. @@ -143,30 +157,34 @@ def alt_transformer_decoder(decoder_input, for layer in xrange(hparams.num_hidden_layers): with tf.variable_scope("layer_%d" % layer): - x = residual_fn( - x, - ravanbakhsh_set_layer(hparams.hidden_size, - common_attention.multihead_attention( - x, - encoder_output, - encoder_decoder_attention_bias, - hparams.attention_key_channels or hparams.hidden_size, - hparams.attention_value_channels or hparams.hidden_size, - hparams.hidden_size, - hparams.num_heads, - hparams.attention_dropout, - summaries=summaries, - name="encdec_attention"), - mask=decoder_self_attention_bias) - ) + x_ = common_attention.multihead_attention( + x, + encoder_output, + encoder_decoder_attention_bias, + hparams.attention_key_channels or hparams.hidden_size, + hparams.attention_value_channels or hparams.hidden_size, + hparams.hidden_size, + hparams.num_heads, + hparams.attention_dropout, + summaries=summaries, + name="encdec_attention") + + x_ = residual_fn(x_, composite_layer(x_, mask, hparams)) + x = residual_fn(x, x_) return x + + + @registry.register_hparams def transformer_alt(): """Set of hyperparameters.""" hparams = transformer.transformer_base() + hparams.batch_size = 64 hparams.add_hparam("layers_per_layer", 4) + #hparams.add_hparam("composite_layer_type", "ravanbakhsh") #ravanbakhsh or reembedding + hparams.add_hparam("composite_layer_type", "reembedding") return hparams From afd1565fe367e3a61aebcb5aa39152d340d2f8d4 Mon Sep 17 00:00:00 2001 From: Lukasz Kaiser Date: Mon, 10 Jul 2017 15:12:28 -0700 Subject: [PATCH 0098/4095] merge from github PiperOrigin-RevId: 161447896 --- tensor2tensor/bin/t2t-datagen | 4 +- ...make_tf_configs.py => t2t-make-tf-configs} | 1 + tensor2tensor/data_generators/text_encoder.py | 3 +- tensor2tensor/docs/distributed_training.md | 2 +- tensor2tensor/models/common_layers.py | 127 +----------- tensor2tensor/models/common_layers_test.py | 72 +------ tensor2tensor/models/multimodel.py | 3 +- .../models/transformer_alternative.py | 190 ------------------ tensor2tensor/models/transformer_test.py | 1 - 9 files changed, 12 insertions(+), 391 deletions(-) rename tensor2tensor/bin/{make_tf_configs.py => t2t-make-tf-configs} (99%) delete mode 100644 tensor2tensor/models/transformer_alternative.py diff --git a/tensor2tensor/bin/t2t-datagen b/tensor2tensor/bin/t2t-datagen index 4e7e4529a..b1ca3ef4b 100644 --- a/tensor2tensor/bin/t2t-datagen +++ b/tensor2tensor/bin/t2t-datagen @@ -341,8 +341,10 @@ def main(_): FLAGS.data_dir, FLAGS.num_shards, FLAGS.max_cases) tf.logging.info("Generating development data for %s.", problem) + dev_shards = 10 if "coco" in problem else 1 dev_output_files = generator_utils.generate_files( - dev_gen(), problem + UNSHUFFLED_SUFFIX + "-dev", FLAGS.data_dir, 1) + dev_gen(), problem + UNSHUFFLED_SUFFIX + "-dev", + FLAGS.data_dir, dev_shards) tf.logging.info("Shuffling data...") for fname in train_output_files + dev_output_files: diff --git a/tensor2tensor/bin/make_tf_configs.py b/tensor2tensor/bin/t2t-make-tf-configs similarity index 99% rename from tensor2tensor/bin/make_tf_configs.py rename to tensor2tensor/bin/t2t-make-tf-configs index 005f638c0..049e80ef3 100644 --- a/tensor2tensor/bin/make_tf_configs.py +++ b/tensor2tensor/bin/t2t-make-tf-configs @@ -1,3 +1,4 @@ +#!/usr/bin/env python # Copyright 2017 Google Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/tensor2tensor/data_generators/text_encoder.py b/tensor2tensor/data_generators/text_encoder.py index 7934dca34..51d21c36c 100644 --- a/tensor2tensor/data_generators/text_encoder.py +++ b/tensor2tensor/data_generators/text_encoder.py @@ -28,7 +28,8 @@ # Dependency imports import six -from six import PY2, unichr # pylint: disable=redefined-builtin +from six import PY2 +from six import unichr # pylint: disable=redefined-builtin from six.moves import xrange # pylint: disable=redefined-builtin from tensor2tensor.data_generators import tokenizer diff --git a/tensor2tensor/docs/distributed_training.md b/tensor2tensor/docs/distributed_training.md index e7ddd7294..4523a4b32 100644 --- a/tensor2tensor/docs/distributed_training.md +++ b/tensor2tensor/docs/distributed_training.md @@ -51,7 +51,7 @@ Parameter servers only need `--schedule=run_std_server`. ## Utility to produce `TF_CONFIG` and flags -[`bin/make_tf_configs.py`](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/bin/make_tf_configs.py)) +[`bin/make_tf_configs.py`](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/bin/t2t-make-tf-configs)) generates the `TF_CONFIG` json strings and the above-mentioned command-line flags for the workers and parameter servers. diff --git a/tensor2tensor/models/common_layers.py b/tensor2tensor/models/common_layers.py index 4c63ce8ba..7a6ce96fb 100644 --- a/tensor2tensor/models/common_layers.py +++ b/tensor2tensor/models/common_layers.py @@ -292,7 +292,7 @@ def conv_internal(conv_fn, inputs, filters, kernel_size, **kwargs): """Conditional conv_fn making kernel 1d or 2d depending on inputs shape.""" static_shape = inputs.get_shape() if not static_shape or len(static_shape) != 4: - raise ValueError("Inputs to conv must have statically known rank 4. Shape:" +str(static_shape)) + raise ValueError("Inputs to conv must have statically known rank 4.") # Add support for left padding. if "padding" in kwargs and kwargs["padding"] == "LEFT": dilation_rate = (1, 1) @@ -1378,128 +1378,3 @@ def smoothing_cross_entropy(logits, labels, vocab_size, confidence): xentropy = tf.nn.softmax_cross_entropy_with_logits( logits=logits, labels=soft_targets) return xentropy - normalizing - - -def global_pool_1d(inputs, pooling_type='MAX', mask=None): - """ - Pools elements across the last dimension. Useful to a list of vectors into a - single vector to get a representation of a set. - - Args - inputs: A tensor of dimensions batch_size x sequence_length x input_dims - containing the sequences of input vectors. - pooling_type: the pooling type to use, MAX or AVR - mask: A tensor of dimensions batch_size x sequence_length containing a - mask for the inputs with 1's for existing elements, and 0's elsewhere. - Outputs - output: A tensor of dimensions batch_size x input_dims - dimension containing the sequences of transformed vectors. - """ - - with tf.name_scope("global_pool", [inputs]): - if mask is not None: - mask = tf.expand_dims(mask, axis=2) - inputs = tf.multiply(inputs, mask) - - if pooling_type == 'MAX': - # A tf.pool can be used here, but reduce is cleaner - output = tf.reduce_max(inputs, axis=1) - elif pooling_type == 'AVR': - if mask is not None: - # Some elems are dummy elems so we can't just reduce the average - output = tf.reduce_sum(inputs, axis=1) - num_elems = tf.reduce_sum(mask, axis=1, keep_dims=True) - output = tf.div(output, num_elems) - #N.B: this will cause a NaN if one batch contains no elements - else: - output = tf.reduce_mean(inputs, axis=1) - - return output - - -def linear_set_layer(layer_size, - inputs, - context=None, - activation_fn=tf.nn.relu, - dropout=0.0, - name=None): - """ - Basic layer type for doing funky things with sets. - Applies a linear transformation to each element in the input set. - If a context is supplied, it is concatenated with the inputs. - e.g. One can use global_pool_1d to get a representation of the set which - can then be used as the context for the next layer. - - Args - layer_size: Dimension to transform the input vectors to - inputs: A tensor of dimensions batch_size x sequence_length x input_dims - containing the sequences of input vectors. - context: A tensor of dimensions batch_size x context_dims - containing a global statistic about the set. - dropout: Dropout probability. - activation_fn: The activation function to use. - Outputs - output: A tensor of dimensions batch_size x sequence_length x output_dims - dimension containing the sequences of transformed vectors. - - TODO: Add bias add. - """ - - with tf.variable_scope(name, "linear_set_layer", [inputs]): - # Apply 1D convolution to apply linear filter to each element along the 2nd - # dimension - #in_size = inputs.get_shape().as_list()[-1] - outputs = conv1d(inputs, layer_size, 1, activation=None, name="set_conv") - - # Apply the context if it exists - if context is not None: - # Unfortunately tf doesn't support broadcasting via concat, but we can - # simply add the transformed context to get the same effect - context = tf.expand_dims(context, axis=1) - #context_size = context.get_shape().as_list()[-1] - cont_tfm = conv1d(context, layer_size, 1, - activation=None, name="cont_conv") - outputs += cont_tfm - - if activation_fn is not None: - outputs = activation_fn(outputs) - - if dropout != 0.0: - output = tf.nn.dropout(output, 1.0 - dropout) - - return outputs - - -def ravanbakhsh_set_layer(layer_size, - inputs, - mask=None, - activation_fn=tf.nn.tanh, - dropout=0.0, - name=None): - """ - Layer from Deep Sets paper: https://arxiv.org/abs/1611.04500 - More parameter-efficient verstion of a linear-set-layer with context. - - - Args - layer_size: Dimension to transform the input vectors to. - inputs: A tensor of dimensions batch_size x sequence_length x vector - containing the sequences of input vectors. - mask: A tensor of dimensions batch_size x sequence_length containing a - mask for the inputs with 1's for existing elements, and 0's elsewhere. - activation_fn: The activation function to use. - Outputs - output: A tensor of dimensions batch_size x sequence_length x vector - dimension containing the sequences of transformed vectors. - """ - - with tf.variable_scope(name, "ravanbakhsh_set_layer", [inputs]): - output = linear_set_layer( - layer_size, - inputs - tf.expand_dims(global_pool_1d(inputs, mask=mask), axis=1), - activation_fn=activation_fn, - name=name) - - return output - - diff --git a/tensor2tensor/models/common_layers_test.py b/tensor2tensor/models/common_layers_test.py index 04d428884..8d2b4dec1 100644 --- a/tensor2tensor/models/common_layers_test.py +++ b/tensor2tensor/models/common_layers_test.py @@ -50,7 +50,7 @@ def testSaturatingSigmoid(self): self.assertAllClose(res, [0.0, 0.0, 0.5, 1.0, 1.0]) def testFlatten4D3D(self): - x = np.random.randint(1, 9, size=(3, 5, 2)) + x = np.random.random_integers(1, high=8, size=(3, 5, 2)) with self.test_session() as session: y = common_layers.flatten4d3d(common_layers.embedding(x, 10, 7)) session.run(tf.global_variables_initializer()) @@ -58,7 +58,7 @@ def testFlatten4D3D(self): self.assertEqual(res.shape, (3, 5 * 2, 7)) def testEmbedding(self): - x = np.random.randint(1, 9, size=(3, 5)) + x = np.random.random_integers(1, high=8, size=(3, 5)) with self.test_session() as session: y = common_layers.embedding(x, 10, 16) session.run(tf.global_variables_initializer()) @@ -81,14 +81,6 @@ def testConv(self): session.run(tf.global_variables_initializer()) res = session.run(y) self.assertEqual(res.shape, (5, 5, 1, 13)) - - def testConv1d(self): - x = np.random.rand(5, 7, 11) - with self.test_session() as session: - y = common_layers.conv1d(tf.constant(x, dtype=tf.float32), 13, 1) - session.run(tf.global_variables_initializer()) - res = session.run(y) - self.assertEqual(res.shape, (5, 7, 13)) def testSeparableConv(self): x = np.random.rand(5, 7, 1, 11) @@ -301,66 +293,6 @@ def testDeconvStride2MultiStep(self): session.run(tf.global_variables_initializer()) actual = session.run(a) self.assertEqual(actual.shape, (5, 32, 1, 16)) - - def testGlobalPool1d(self): - shape = (5, 4) - x1 = np.random.rand(5,4,11) - #mask = np.random.randint(2, size=shape) - no_mask = np.ones((5,4)) - full_mask = np.zeros((5,4)) - - with self.test_session() as session: - x1_ = tf.Variable(x1, dtype=tf.float32) - no_mask_ = tf.Variable(no_mask, dtype=tf.float32) - full_mask_ = tf.Variable(full_mask, dtype=tf.float32) - - none_mask_max = common_layers.global_pool_1d(x1_) - no_mask_max = common_layers.global_pool_1d(x1_, mask=no_mask_) - result1 = tf.reduce_sum(none_mask_max - no_mask_max) - - full_mask_max = common_layers.global_pool_1d(x1_, mask=full_mask_) - result2 = tf.reduce_sum(full_mask_max) - - none_mask_avr = common_layers.global_pool_1d(x1_, 'AVR') - no_mask_avr = common_layers.global_pool_1d(x1_, 'AVR', no_mask_) - result3 = tf.reduce_sum(none_mask_avr - no_mask_avr) - - full_mask_avr = common_layers.global_pool_1d(x1_, 'AVR', full_mask_) - result4 = tf.reduce_sum(full_mask_avr) - - session.run(tf.global_variables_initializer()) - actual = session.run([result1, result2, result3, result4]) - # N.B: Last result will give a NaN. - self.assertAllEqual(actual[:3], [0.0, 0.0, 0.0]) - - - def testLinearSetLayer(self): - x1 = np.random.rand(5,4,11) - cont = np.random.rand(5,13) - with self.test_session() as session: - x1_ = tf.Variable(x1, dtype=tf.float32) - cont_ = tf.Variable(cont, dtype=tf.float32) - - simple_ff = common_layers.linear_set_layer(32, x1_) - cont_ff = common_layers.linear_set_layer(32, x1_, context=cont_) - - session.run(tf.global_variables_initializer()) - actual = session.run([simple_ff, cont_ff]) - self.assertEqual(actual[0].shape, (5,4,32)) - self.assertEqual(actual[1].shape, (5,4,32)) - - def testRavanbakhshSetLayer(self): - x1 = np.random.rand(5,4,11) - cont = np.random.rand(5,13) - with self.test_session() as session: - x1_ = tf.Variable(x1, dtype=tf.float32) - cont_ = tf.Variable(cont, dtype=tf.float32) - - layer = common_layers.ravanbakhsh_set_layer(32, x1_) - - session.run(tf.global_variables_initializer()) - actual = session.run(layer) - self.assertEqual(actual.shape, (5,4,32)) if __name__ == "__main__": diff --git a/tensor2tensor/models/multimodel.py b/tensor2tensor/models/multimodel.py index b42d71cb3..26e7469c2 100644 --- a/tensor2tensor/models/multimodel.py +++ b/tensor2tensor/models/multimodel.py @@ -19,6 +19,8 @@ # Dependency imports +from six.moves import xrange # pylint: disable=redefined-builtin + from tensor2tensor.models import common_attention from tensor2tensor.models import common_hparams from tensor2tensor.models import common_layers @@ -27,7 +29,6 @@ from tensor2tensor.utils import registry from tensor2tensor.utils import t2t_model -from six.moves import xrange # pylint: disable=redefined-builtin import tensorflow as tf diff --git a/tensor2tensor/models/transformer_alternative.py b/tensor2tensor/models/transformer_alternative.py deleted file mode 100644 index 90fea6139..000000000 --- a/tensor2tensor/models/transformer_alternative.py +++ /dev/null @@ -1,190 +0,0 @@ -# Copyright 2017 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -""" - Alternative transformer network using different layer types to demonstrate - alternatives to self attention. - - Code is mostly copied from original Transformer source (if that wasn't - already obvious). - -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import copy - -# Dependency imports - -from six.moves import xrange # pylint: disable=redefined-builtin - -from tensor2tensor.models import common_attention -from tensor2tensor.models import common_hparams -from tensor2tensor.models import common_layers -from tensor2tensor.models import transformer -from tensor2tensor.utils import registry -from tensor2tensor.utils import t2t_model - -import tensorflow as tf - - -@registry.register_model -class TransformerAlt(t2t_model.T2TModel): - - def model_fn_body(self, features): - # - - # Remove dropout if not training - hparams = copy.copy(self._hparams) - targets = features["targets"] - inputs = features.get("inputs") - target_space = features.get("target_space_id") - - inputs = common_layers.flatten4d3d(inputs) - targets = common_layers.flatten4d3d(targets) - - (encoder_input, encoder_attention_bias, _) = (transformer.\ - transformer_prepare_encoder(inputs, target_space, hparams) ) - (decoder_input, decoder_self_attention_bias) = transformer.\ - transformer_prepare_decoder(targets, hparams) - - # We need masks of the form batch size x input sequences - # Biases seem to be of the form batch_size x 1 x input sequences x vec dim - # Squeeze out dim one, and get the first element of each vector - encoder_mask = tf.squeeze(encoder_attention_bias, [1])[:,:,0] - decoder_mask = tf.squeeze(decoder_self_attention_bias, [1])[:,:,0] - - def residual_fn(x, y): - return common_layers.layer_norm(x + tf.nn.dropout( - y, 1.0 - hparams.residual_dropout)) - - encoder_input = tf.nn.dropout(encoder_input, 1.0 - hparams.residual_dropout) - decoder_input = tf.nn.dropout(decoder_input, 1.0 - hparams.residual_dropout) - encoder_output = alt_transformer_encoder( - encoder_input, residual_fn, encoder_mask, hparams) - - decoder_output = alt_transformer_decoder( - decoder_input, encoder_output, residual_fn, decoder_mask, - encoder_attention_bias, hparams) - - decoder_output = tf.expand_dims(decoder_output, 2) - - return decoder_output - - - -def composite_layer(inputs, mask, hparams): - x = inputs - - # Applies ravanbakhsh on top of each other - if hparams.composite_layer_type == "ravanbakhsh": - for layer in xrange(hparams.layers_per_layer): - with tf.variable_scope(".%d" % layer): - x = common_layers.ravanbakhsh_set_layer( - hparams.hidden_size, - x, - mask=mask, - dropout=0.0) - - # Transforms elements to get a context, and then uses this in a final layer - elif hparams.composite_layer_type == "reembedding": - initial_elems = x - # Transform elements n times and then pool - for layer in xrange(hparams.layers_per_layer): - with tf.variable_scope(".%d" % layer): - x = common_layers.linear_set_layer( - hparams.hidden_size, - x, - dropout=0.0) - context = common_layers.global_pool_1d(x, mask=mask) - - #Final layer - x = common_layers.linear_set_layer( - hparams.hidden_size, - x, - context=context, - dropout=0.0) - - return x - - - -def alt_transformer_encoder(encoder_input, - residual_fn, - mask, - hparams, - name="encoder"): - - x = encoder_input - - # Summaries don't work in multi-problem setting yet. - summaries = "problems" not in hparams.values() or len(hparams.problems) == 1 - - with tf.variable_scope(name): - for layer in xrange(hparams.num_hidden_layers): - with tf.variable_scope("layer_%d" % layer): - x = residual_fn(x, composite_layer(x, mask, hparams)) - - return x - - -def alt_transformer_decoder(decoder_input, - encoder_output, - residual_fn, - mask, - encoder_decoder_attention_bias, - hparams, - name="decoder"): - - x = decoder_input - - # Summaries don't work in multi-problem setting yet. - summaries = "problems" not in hparams.values() or len(hparams.problems) == 1 - with tf.variable_scope(name): - for layer in xrange(hparams.num_hidden_layers): - with tf.variable_scope("layer_%d" % layer): - - x_ = common_attention.multihead_attention( - x, - encoder_output, - encoder_decoder_attention_bias, - hparams.attention_key_channels or hparams.hidden_size, - hparams.attention_value_channels or hparams.hidden_size, - hparams.hidden_size, - hparams.num_heads, - hparams.attention_dropout, - summaries=summaries, - name="encdec_attention") - - x_ = residual_fn(x_, composite_layer(x_, mask, hparams)) - x = residual_fn(x, x_) - - return x - - - - - -@registry.register_hparams -def transformer_alt(): - """Set of hyperparameters.""" - hparams = transformer.transformer_base() - hparams.batch_size = 64 - hparams.add_hparam("layers_per_layer", 4) - #hparams.add_hparam("composite_layer_type", "ravanbakhsh") #ravanbakhsh or reembedding - hparams.add_hparam("composite_layer_type", "reembedding") - return hparams - diff --git a/tensor2tensor/models/transformer_test.py b/tensor2tensor/models/transformer_test.py index 52c1d1ba5..9535558a4 100644 --- a/tensor2tensor/models/transformer_test.py +++ b/tensor2tensor/models/transformer_test.py @@ -24,7 +24,6 @@ from tensor2tensor.data_generators import problem_hparams from tensor2tensor.models import transformer -from tensor2tensor.models import transformer_alternative import tensorflow as tf From 09a6084554a4e7467726b934f97955a3d0e18da3 Mon Sep 17 00:00:00 2001 From: Niki Parmar Date: Mon, 10 Jul 2017 15:37:56 -0700 Subject: [PATCH 0099/4095] Remove redundant copies of res_fn. Use from common_layers PiperOrigin-RevId: 161451356 --- tensor2tensor/models/common_hparams.py | 2 + tensor2tensor/models/common_layers.py | 44 ++++++++++---- tensor2tensor/models/common_layers_test.py | 67 ++++++++++++++++++++++ tensor2tensor/models/slicenet.py | 23 ++------ 4 files changed, 107 insertions(+), 29 deletions(-) diff --git a/tensor2tensor/models/common_hparams.py b/tensor2tensor/models/common_hparams.py index 41ca6f4b0..9bb3af4eb 100644 --- a/tensor2tensor/models/common_hparams.py +++ b/tensor2tensor/models/common_hparams.py @@ -65,6 +65,8 @@ def basic_params1(): sampling_method="argmax", # "argmax" or "random" problem_choice="adaptive", # "uniform", "adaptive", "distributed" multiply_embedding_mode="sqrt_depth", + norm_type="none", # "batch", layer", "noam", "none". + layer_norm_epsilon=1e-6, symbol_modality_num_shards=16, # setting the max length in a minibatch. 0 means default behavior, # max_length = hparams.batch_size * length_multiplier diff --git a/tensor2tensor/models/common_layers.py b/tensor2tensor/models/common_layers.py index 7a6ce96fb..7729bdeb1 100644 --- a/tensor2tensor/models/common_layers.py +++ b/tensor2tensor/models/common_layers.py @@ -433,24 +433,48 @@ def noam_norm(x, name=None): tf.sqrt(tf.to_float(shape[-1]))) -def residual_function(hparams): +def get_norm(norm_type): + """Get the normalizer function.""" + if norm_type == "layer": + return lambda x, name, filters=None, epsilon=1e-6: layer_norm( # pylint: disable=g-long-lambda + x, filters=filters, epsilon=epsilon, name=name) + if norm_type == "batch": + return tf.layers.batch_normalization + if norm_type == "noam": + return noam_norm + if norm_type == "none": + return lambda x, name: x + raise ValueError("Parameter normalizer_fn must be one of: 'layer', 'batch'," + "'noam', 'none'.") + + +def residual_fn(x, y, norm_type, residual_dropout, + filters=None, + epsilon=1e-16, + name="residual"): """Returns a function for combining layer input and layer output. The returned function on x (layer input) and y (layer output) computes: - norm_function(x + t + norm_function(x + dropout(y)) Args: - hparams: model hyperparameters + x: tensor, input layer + y: tensor, output layer + norm_type: string, type of normalizer function + residual_dropout: integer, dropout value for residual connection + filters: integer, dimension for layer norm, optional + epsilon: integer, value of layer norm epsilon + name: string, name Returns: - a function from x= and y= to computed output + residual layer output with applied norm_fn. """ - - def residual_fn(x, y): - return hparams.norm_function(x + tf.nn.dropout( - y, 1.0 - hparams.residual_dropout)) - - return residual_fn + norm_fn = get_norm(norm_type) + res = x + tf.nn.dropout(y, 1.0 - residual_dropout) + if norm_type == "layer": + return norm_fn(res, name=name, filters=filters, epsilon=epsilon) + else: + return norm_fn(res, name=name) def conv_block_internal(conv_fn, diff --git a/tensor2tensor/models/common_layers_test.py b/tensor2tensor/models/common_layers_test.py index 8d2b4dec1..d9b78033d 100644 --- a/tensor2tensor/models/common_layers_test.py +++ b/tensor2tensor/models/common_layers_test.py @@ -294,6 +294,73 @@ def testDeconvStride2MultiStep(self): actual = session.run(a) self.assertEqual(actual.shape, (5, 32, 1, 16)) + def testGetNormLayerFn(self): + norm_type = "layer" + with self.test_session() as session: + a = common_layers.get_norm(norm_type) + x1 = np.random.rand(5, 2, 1, 11) + x2 = a(tf.constant(x1, dtype=tf.float32), name="layer", filters=11) + session.run(tf.global_variables_initializer()) + actual = session.run(x2) + self.assertEqual(actual.shape, (5, 2, 1, 11)) + + def testGetNormNoamFn(self): + norm_type = "noam" + with self.test_session() as session: + a = common_layers.get_norm(norm_type) + x1 = np.random.rand(5, 2, 1, 11) + x2 = a(tf.constant(x1, dtype=tf.float32), name="noam") + session.run(tf.global_variables_initializer()) + actual = session.run(x2) + self.assertEqual(actual.shape, (5, 2, 1, 11)) + + def testGetNormBatchFn(self): + norm_type = "batch" + with self.test_session() as session: + a = common_layers.get_norm(norm_type) + x1 = np.random.rand(5, 2, 1, 11) + x2 = a(tf.constant(x1, dtype=tf.float32), name="batch") + session.run(tf.global_variables_initializer()) + actual = session.run(x2) + self.assertEqual(actual.shape, (5, 2, 1, 11)) + + def testGetNormNoneFn(self): + norm_type = "none" + with self.test_session() as session: + a = common_layers.get_norm(norm_type) + x1 = np.random.rand(5, 2, 1, 11) + x2 = a(tf.constant(x1, dtype=tf.float32), name="none") + session.run(tf.global_variables_initializer()) + actual = session.run(x2) + self.assertEqual(actual.shape, (5, 2, 1, 11)) + self.assertAllClose(actual, x1, atol=1e-03) + + def testResidualFn(self): + norm_type = "batch" + with self.test_session() as session: + x1 = np.random.rand(5, 2, 1, 11) + x2 = np.random.rand(5, 2, 1, 11) + x3 = common_layers.residual_fn( + tf.constant(x1, dtype=tf.float32), + tf.constant(x2, dtype=tf.float32), + norm_type, 0.1) + session.run(tf.global_variables_initializer()) + actual = session.run(x3) + self.assertEqual(actual.shape, (5, 2, 1, 11)) + + def testResidualFnWithLayerNorm(self): + norm_type = "layer" + with self.test_session() as session: + x1 = np.random.rand(5, 2, 1, 11) + x2 = np.random.rand(5, 2, 1, 11) + x3 = common_layers.residual_fn( + tf.constant(x1, dtype=tf.float32), + tf.constant(x2, dtype=tf.float32), + norm_type, 0.1, epsilon=0.1) + session.run(tf.global_variables_initializer()) + actual = session.run(x3) + self.assertEqual(actual.shape, (5, 2, 1, 11)) + if __name__ == "__main__": tf.test.main() diff --git a/tensor2tensor/models/slicenet.py b/tensor2tensor/models/slicenet.py index 0b9efc2c3..77659e8ef 100644 --- a/tensor2tensor/models/slicenet.py +++ b/tensor2tensor/models/slicenet.py @@ -31,21 +31,6 @@ import tensorflow as tf -def get_norm(hparams): - """Get the normalizer function.""" - if hparams.normalizer_fn == "layer": - return lambda x, name: common_layers.layer_norm( # pylint: disable=g-long-lambda - x, hparams.hidden_size, name=name) - if hparams.normalizer_fn == "batch": - return tf.layers.batch_normalization - if hparams.normalizer_fn == "noam": - return common_layers.noam_norm - if hparams.normalizer_fn == "none": - return lambda x, name: x - raise ValueError("Parameter normalizer_fn must be one of: 'layer', 'batch'," - "'noam', 'none'.") - - def attention(targets_shifted, inputs_encoded, norm_fn, hparams, bias=None): """Complete attention layer with preprocessing.""" separabilities = [hparams.separability, hparams.separability] @@ -128,7 +113,7 @@ def multi_conv_res(x, padding, name, layers, hparams, hparams.separability - i for i in reversed(range(len(dilations_and_kernels2))) ] - norm_fn = get_norm(hparams) + norm_fn = common_layers.get_norm(hparams.norm_type) for layer in xrange(layers): with tf.variable_scope("layer_%d" % layer): y = common_layers.subseparable_conv_block( @@ -188,7 +173,7 @@ def similarity_cost(inputs_encoded, targets_encoded): def slicenet_middle(inputs_encoded, targets, target_space_emb, mask, hparams): """Middle part of slicenet, connecting encoder and decoder.""" - norm_fn = get_norm(hparams) + norm_fn = common_layers.get_norm(hparams.norm_type) # Flatten targets and embed target_space_id. targets_flat = tf.expand_dims(common_layers.flatten4d3d(targets), axis=2) @@ -311,7 +296,7 @@ def slicenet_params1(): hparams.num_hidden_layers = 4 hparams.kernel_height = 3 hparams.kernel_width = 1 - hparams.add_hparam("normalizer_fn", "layer") # New ones are added like this. + hparams.norm_type = "layer" hparams.learning_rate_decay_scheme = "exp50k" hparams.learning_rate = 0.05 hparams.learning_rate_warmup_steps = 3000 @@ -322,7 +307,7 @@ def slicenet_params1(): hparams.optimizer_adam_epsilon = 1e-6 hparams.optimizer_adam_beta1 = 0.85 hparams.optimizer_adam_beta2 = 0.997 - hparams.add_hparam("large_kernel_size", 15) + hparams.add_hparam("large_kernel_size", 15) # New ones are added like this. hparams.add_hparam("separability", -2) # A dilation scheme, one of _DILATION_SCHEMES. hparams.add_hparam("dilation_scheme", "1.1.1.1") From 912daf7a9dadece5a1e4969a9aaa14483098e18f Mon Sep 17 00:00:00 2001 From: Noam Shazeer Date: Mon, 10 Jul 2017 21:16:38 -0700 Subject: [PATCH 0100/4095] Changes to data generators and SubwordTextEncoder ... should not break anything. PiperOrigin-RevId: 161483309 --- tensor2tensor/bin/t2t-datagen | 46 +++- .../data_generators/generator_utils.py | 61 +++-- .../data_generators/generator_utils_test.py | 7 +- tensor2tensor/data_generators/inspect.py | 81 +++++++ tensor2tensor/data_generators/lm1b.py | 161 +++++++++++++ tensor2tensor/data_generators/lm_example.py | 225 ------------------ .../data_generators/problem_hparams.py | 40 ++-- tensor2tensor/data_generators/replace_oov.py | 76 ------ tensor2tensor/data_generators/snli.py | 3 +- tensor2tensor/data_generators/text_encoder.py | 63 ++--- .../text_encoder_build_subword.py | 18 +- .../text_encoder_inspect_subword.py | 64 ----- tensor2tensor/data_generators/tokenizer.py | 144 +++++++---- .../data_generators/tokenizer_test.py | 19 +- tensor2tensor/data_generators/wiki.py | 128 ++++++++++ tensor2tensor/utils/data_reader_test.py | 14 +- tensor2tensor/utils/trainer_utils_test.py | 10 +- 17 files changed, 617 insertions(+), 543 deletions(-) create mode 100644 tensor2tensor/data_generators/inspect.py create mode 100644 tensor2tensor/data_generators/lm1b.py delete mode 100644 tensor2tensor/data_generators/lm_example.py delete mode 100644 tensor2tensor/data_generators/replace_oov.py delete mode 100644 tensor2tensor/data_generators/text_encoder_inspect_subword.py create mode 100644 tensor2tensor/data_generators/wiki.py diff --git a/tensor2tensor/bin/t2t-datagen b/tensor2tensor/bin/t2t-datagen index b1ca3ef4b..0367fce94 100644 --- a/tensor2tensor/bin/t2t-datagen +++ b/tensor2tensor/bin/t2t-datagen @@ -37,8 +37,10 @@ from tensor2tensor.data_generators import algorithmic_math from tensor2tensor.data_generators import audio from tensor2tensor.data_generators import generator_utils from tensor2tensor.data_generators import image +from tensor2tensor.data_generators import lm1b from tensor2tensor.data_generators import ptb from tensor2tensor.data_generators import snli +from tensor2tensor.data_generators import wiki from tensor2tensor.data_generators import wmt from tensor2tensor.data_generators import wsj_parsing @@ -138,6 +140,14 @@ _SUPPORTED_PROBLEM_GENERATORS = { lambda: wmt.ende_wordpiece_token_generator(FLAGS.tmp_dir, True, 2**15), lambda: wmt.ende_wordpiece_token_generator(FLAGS.tmp_dir, False, 2**15) ), + "lm1b_32k": ( + lambda: lm1b.generator(FLAGS.tmp_dir, True), + lambda: lm1b.generator(FLAGS.tmp_dir, False) + ), + "wiki_32k": ( + lambda: wiki.generator(FLAGS.tmp_dir, True), + 1000 + ), "image_mnist_tune": ( lambda: image.mnist_generator(FLAGS.tmp_dir, True, 55000), lambda: image.mnist_generator(FLAGS.tmp_dir, True, 5000, 55000)), @@ -335,19 +345,33 @@ def main(_): training_gen, dev_gen = _SUPPORTED_PROBLEM_GENERATORS[problem] - tf.logging.info("Generating training data for %s.", problem) - train_output_files = generator_utils.generate_files( - training_gen(), problem + UNSHUFFLED_SUFFIX + "-train", - FLAGS.data_dir, FLAGS.num_shards, FLAGS.max_cases) - - tf.logging.info("Generating development data for %s.", problem) - dev_shards = 10 if "coco" in problem else 1 - dev_output_files = generator_utils.generate_files( - dev_gen(), problem + UNSHUFFLED_SUFFIX + "-dev", - FLAGS.data_dir, dev_shards) + if isinstance(dev_gen, int): + # The dev set and test sets are generated as extra shards using the + # training generator. The integer specifies the number of training + # shards. FLAGS.num_shards is ignored. + num_training_shards = dev_gen + tf.logging.info("Generating data for %s.", problem) + all_output_files = generator_utils.combined_data_filenames( + problem + UNSHUFFLED_SUFFIX, FLAGS.data_dir, num_training_shards) + generator_utils.generate_files( + training_gen(), all_output_files, FLAGS.max_cases) + else: + # usual case - train data and dev data are generated using separate + # generators. + tf.logging.info("Generating training data for %s.", problem) + train_output_files = generator_utils.train_data_filenames( + problem + UNSHUFFLED_SUFFIX, FLAGS.data_dir, FLAGS.num_shards) + generator_utils.generate_files( + training_gen(), train_output_files, FLAGS.max_cases) + tf.logging.info("Generating development data for %s.", problem) + dev_shards = 10 if "coco" in problem else 1 + dev_output_files = generator_utils.dev_data_filenames( + problem + UNSHUFFLED_SUFFIX, FLAGS.data_dir, dev_shards) + generator_utils.generate_files(dev_gen(), dev_output_files) + all_output_files = train_output_files + dev_output_files tf.logging.info("Shuffling data...") - for fname in train_output_files + dev_output_files: + for fname in all_output_files: records = generator_utils.read_records(fname) random.shuffle(records) out_fname = fname.replace(UNSHUFFLED_SUFFIX, "") diff --git a/tensor2tensor/data_generators/generator_utils.py b/tensor2tensor/data_generators/generator_utils.py index 6a3475456..a0dd7c101 100644 --- a/tensor2tensor/data_generators/generator_utils.py +++ b/tensor2tensor/data_generators/generator_utils.py @@ -18,6 +18,7 @@ from __future__ import division from __future__ import print_function +from collections import defaultdict import gzip import io import os @@ -30,7 +31,7 @@ import six.moves.urllib_request as urllib # Imports urllib on Python2, urllib.request on Python3 from tensor2tensor.data_generators import text_encoder -from tensor2tensor.data_generators.tokenizer import Tokenizer +from tensor2tensor.data_generators import tokenizer import tensorflow as tf @@ -84,10 +85,34 @@ def generate_files_distributed(generator, return output_file +def _data_filenames(output_name, output_dir, num_shards): + return [os.path.join( + output_dir, "%s-%.5d-of-%.5d" % (output_name, shard, num_shards)) + for shard in xrange(num_shards)] + + +def train_data_filenames(problem, output_dir, num_shards): + return _data_filenames( + problem + "-train", output_dir, num_shards) + + +def dev_data_filenames(problem, output_dir, num_shards): + return _data_filenames(problem + "-dev", output_dir, num_shards) + + +def test_data_filenames(problem, output_dir, num_shards): + return _data_filenames(problem + "-test", output_dir, num_shards) + + +def combined_data_filenames(problem, output_dir, num_training_shards): + return ( + train_data_filenames(problem, output_dir, num_training_shards) + + dev_data_filenames(problem, output_dir, 1) + + test_data_filenames(problem, output_dir, 1)) + + def generate_files(generator, - output_name, - output_dir, - num_shards=1, + output_filenames, max_cases=None): """Generate cases from a generator and save as TFRecord files. @@ -96,27 +121,16 @@ def generate_files(generator, Args: generator: a generator yielding (string -> int/float/str list) dictionaries. - output_name: the file name prefix under which output will be saved. - output_dir: directory to save the output to. - num_shards: how many shards to use (defaults to 1). + output_filenames: List of output file paths. max_cases: maximum number of cases to get from the generator; if None (default), we use the generator until StopIteration is raised. - - Returns: - List of output file paths. """ - writers = [] - output_files = [] - for shard in xrange(num_shards): - output_filename = "%s-%.5d-of-%.5d" % (output_name, shard, num_shards) - output_file = os.path.join(output_dir, output_filename) - output_files.append(output_file) - writers.append(tf.python_io.TFRecordWriter(output_file)) - + num_shards = len(output_filenames) + writers = [tf.python_io.TFRecordWriter(fname) for fname in output_filenames] counter, shard = 0, 0 for case in generator: if counter > 0 and counter % 100000 == 0: - tf.logging.info("Generating case %d for %s." % (counter, output_name)) + tf.logging.info("Generating case %d." % counter) counter += 1 if max_cases and counter > max_cases: break @@ -127,8 +141,6 @@ def generate_files(generator, for writer in writers: writer.close() - return output_files - def download_report_hook(count, block_size, total_size): """Report hook for download progress. @@ -235,7 +247,7 @@ def get_or_generate_vocab(tmp_dir, vocab_filename, vocab_size, sources=None): sources = sources or _DATA_FILE_URLS tf.logging.info("Generating vocab from: %s", str(sources)) - tokenizer = Tokenizer() + token_counts = defaultdict(int) for source in sources: url = source[0] filename = os.path.basename(url) @@ -269,10 +281,11 @@ def get_or_generate_vocab(tmp_dir, vocab_filename, vocab_size, sources=None): break line = line.strip() file_byte_budget -= len(line) - _ = tokenizer.encode(text_encoder.native_to_unicode(line)) + for tok in tokenizer.encode(text_encoder.native_to_unicode(line)): + token_counts[tok] += 1 vocab = text_encoder.SubwordTextEncoder.build_to_target_size( - vocab_size, tokenizer.token_counts, 1, 1e3) + vocab_size, token_counts, 1, 1e3) vocab.store_to_file(vocab_filepath) return vocab diff --git a/tensor2tensor/data_generators/generator_utils_test.py b/tensor2tensor/data_generators/generator_utils_test.py index 726763f7a..320d1a02d 100644 --- a/tensor2tensor/data_generators/generator_utils_test.py +++ b/tensor2tensor/data_generators/generator_utils_test.py @@ -41,11 +41,12 @@ def testGenerateFiles(self): def test_generator(): yield {"inputs": [1], "target": [1]} - generator_utils.generate_files(test_generator(), tmp_file_name, tmp_dir) - self.assertTrue(tf.gfile.Exists(tmp_file_path + "-00000-of-00001")) + filenames = generator_utils.train_data_filenames(tmp_file_name, tmp_dir, 1) + generator_utils.generate_files(test_generator(), filenames) + self.assertTrue(tf.gfile.Exists(tmp_file_path + "-train-00000-of-00001")) # Clean up. - os.remove(tmp_file_path + "-00000-of-00001") + os.remove(tmp_file_path + "-train-00000-of-00001") os.remove(tmp_file_path) def testMaybeDownload(self): diff --git a/tensor2tensor/data_generators/inspect.py b/tensor2tensor/data_generators/inspect.py new file mode 100644 index 000000000..a0da09150 --- /dev/null +++ b/tensor2tensor/data_generators/inspect.py @@ -0,0 +1,81 @@ +# Copyright 2017 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +r"""Inspect a TFRecord file of tensorflow.Example and show tokenizations. + +python data_generators/inspect.py \ + --logtostderr \ + --print_targets \ + --subword_text_encoder_filename=$DATA_DIR/tokens.vocab.8192 \ + --input_filename=$DATA_DIR/wmt_ende_tokens_8k-train-00000-of-00100 +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +# Dependency imports + +from tensor2tensor.data_generators import text_encoder + +import tensorflow as tf + +tf.app.flags.DEFINE_string("subword_text_encoder_filename", "", + "SubwordTextEncoder vocabulary file") +tf.app.flags.DEFINE_string("input_filename", "", "input filename") +tf.app.flags.DEFINE_bool("print_inputs", False, + "Print decoded inputs to stdout") +tf.app.flags.DEFINE_bool("print_targets", False, + "Print decoded targets to stdout") + +FLAGS = tf.app.flags.FLAGS + + +def main(_): + """Convert a file to examples.""" + if FLAGS.subword_text_encoder_filename: + encoder = text_encoder.SubwordTextEncoder( + FLAGS.subword_text_encoder_filename) + else: + encoder = None + reader = tf.python_io.tf_record_iterator(FLAGS.input_filename) + total_sequences = 0 + total_input_tokens = 0 + total_target_tokens = 0 + max_input_length = 0 + max_target_length = 0 + for record in reader: + x = tf.train.Example() + x.ParseFromString(record) + inputs = [int(i) for i in x.features.feature["inputs"].int64_list.value] + targets = [int(i) for i in x.features.feature["targets"].int64_list.value] + if FLAGS.print_inputs: + print(encoder.decode(inputs) if encoder else inputs) + if FLAGS.print_targets: + print(encoder.decode(targets) if encoder else targets) + total_input_tokens += len(inputs) + total_target_tokens += len(targets) + total_sequences += 1 + max_input_length = max(max_input_length, len(inputs)) + max_target_length = max(max_target_length, len(targets)) + + tf.logging.info("total_sequences: %d", total_sequences) + tf.logging.info("total_input_tokens: %d", total_input_tokens) + tf.logging.info("total_target_tokens: %d", total_target_tokens) + tf.logging.info("max_input_length: %d", max_input_length) + tf.logging.info("max_target_length: %d", max_target_length) + + +if __name__ == "__main__": + tf.app.run() diff --git a/tensor2tensor/data_generators/lm1b.py b/tensor2tensor/data_generators/lm1b.py new file mode 100644 index 000000000..66a3d52a0 --- /dev/null +++ b/tensor2tensor/data_generators/lm1b.py @@ -0,0 +1,161 @@ +# Copyright 2017 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Data generators for LM1B data-set.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from collections import defaultdict + +import os +import tarfile + +# Dependency imports + +from tensor2tensor.data_generators import generator_utils +from tensor2tensor.data_generators import text_encoder +from tensor2tensor.data_generators import tokenizer + +import tensorflow as tf + + +# End-of-sentence marker (should correspond to the position of EOS in the +# RESERVED_TOKENS list in text_encoder.py) +EOS = 1 + + +def _original_vocab(tmp_dir): + """Returns a set containing the original vocabulary. + + This is important for comparing with published results. + + Args: + tmp_dir: directory containing dataset. + + Returns: + a set of strings + """ + vocab_url = ("http://download.tensorflow.org/models/LM_LSTM_CNN/" + "vocab-2016-09-10.txt") + vocab_filename = os.path.basename(vocab_url) + vocab_filepath = os.path.join(tmp_dir, vocab_filename) + if not os.path.exists(vocab_filepath): + generator_utils.maybe_download(tmp_dir, vocab_filename, vocab_url) + return set( + [text_encoder.native_to_unicode(l.strip()) for l in + tf.gfile.Open(vocab_filepath)]) + + +def _replace_oov(original_vocab, line): + """Replace out-of-vocab words with "UNK". + + This maintains compatability with published results. + + Args: + original_vocab: a set of strings (The standard vocabulary for the dataset) + line: a unicode string - a space-delimited sequence of words. + + Returns: + a unicode string - a space-delimited sequence of words. + """ + return u" ".join( + [word if word in original_vocab else u"UNK" for word in line.split()]) + + +def _train_data_filenames(tmp_dir): + return [os.path.join( + tmp_dir, + "1-billion-word-language-modeling-benchmark-r13output", + "training-monolingual.tokenized.shuffled", + "news.en-%05d-of-00100" % i) for i in xrange(1, 100)] + + +def _dev_data_filename(tmp_dir): + return os.path.join( + tmp_dir, + "1-billion-word-language-modeling-benchmark-r13output", + "heldout-monolingual.tokenized.shuffled", + "news.en.heldout-00000-of-00050") + + +def _maybe_download_corpus(tmp_dir): + """Download and unpack the corpus. + + Args: + tmp_dir: directory containing dataset. + """ + corpus_url = ("http://www.statmt.org/lm-benchmark/" + "1-billion-word-language-modeling-benchmark-r13output.tar.gz") + corpus_filename = os.path.basename(corpus_url) + corpus_filepath = os.path.join(tmp_dir, corpus_filename) + if not os.path.exists(corpus_filepath): + generator_utils.maybe_download(tmp_dir, corpus_filename, corpus_url) + with tarfile.open(corpus_filepath, "r:gz") as corpus_tar: + corpus_tar.extractall(tmp_dir) + + +def _get_or_build_subword_text_encoder(tmp_dir): + """Builds a SubwordTextEncoder based on the corpus. + + Args: + tmp_dir: directory containing dataset. + Returns: + a SubwordTextEncoder. + """ + filepath = os.path.join(tmp_dir, "lm1b_32k.subword_text_encoder") + if tf.gfile.Exists(filepath): + return text_encoder.SubwordTextEncoder(filepath) + _maybe_download_corpus(tmp_dir) + original_vocab = _original_vocab(tmp_dir) + token_counts = defaultdict(int) + line_count = 0 + max_lines = 63000 + for line in tf.gfile.Open(_train_data_filenames(tmp_dir)[0]): + tokens = tokenizer.encode( + _replace_oov(original_vocab, text_encoder.native_to_unicode(line))) + for tok in tokens: + token_counts[tok] += 1 + line_count += 1 + if line_count >= max_lines: + break + ret = text_encoder.SubwordTextEncoder() + ret.build_from_token_counts(token_counts, min_count=5) + ret.store_to_file(filepath) + return ret + + +def generator(tmp_dir, train): + """Generator for lm1b sentences. + + Args: + tmp_dir: a string. + train: a boolean. + + Yields: + A dictionary {"inputs": [0], "targets": []} + """ + _maybe_download_corpus(tmp_dir) + original_vocab = _original_vocab(tmp_dir) + files = (_train_data_filenames(tmp_dir) if train + else [_dev_data_filename(tmp_dir)]) + encoder = _get_or_build_subword_text_encoder(tmp_dir) + for filepath in files: + tf.logging.info("filepath = %s", filepath) + for line in tf.gfile.Open(filepath): + tokens = encoder.encode( + _replace_oov(original_vocab, text_encoder.native_to_unicode(line))) + tokens.append(EOS) + yield {"inputs": [0], "targets": tokens} diff --git a/tensor2tensor/data_generators/lm_example.py b/tensor2tensor/data_generators/lm_example.py deleted file mode 100644 index d8a76baeb..000000000 --- a/tensor2tensor/data_generators/lm_example.py +++ /dev/null @@ -1,225 +0,0 @@ -# Copyright 2017 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -r"""Convert language modeling data to tf.Example format. - -Uses SubwordTextEncoder. - -For each line, we generate a tf.Example, with "targets" equal to a sequence -of subwords (integers), ending in subword id 1 for end-of-sequence. We add -a dummy feature "inputs"=[0] for compatability with seq-to-seq models. - -If FLAGS.combine_to_length is nonzero, then we combine multiple sequences into -examples of a constant length, possibly with some padding at the end. - - -How to preprocess lm1b - billion word benchmark -TODO(noam): should these instructions be made into a script and moved elsewhere? - - -# Download data into $DATADIR/ -http://www.statmt.org/lm-benchmark/\ -1-billion-word-language-modeling-benchmark-r13output.tar.gz -http://download.tensorflow.org/models/LM_LSTM_CNN/vocab-2016-09-10.txt - -# unpack data -cd $DATADIR -tar xvf 1-billion-word-language-modeling-benchmark-r13output.tar.gz - -# replace oov words with UNK -$BINARYDIR/replace_oov \ ---vocab_file=$DATADIR/vocab-2016-09-10.txt \ ---in_filepattern=\ -$DATADIR/1-billion-word-language-modeling-benchmark-r13output/\ -heldout-monolingual.tokenized.shuffled/news.en.heldout-00000-of-00050 \ ---out_prefix=$DATADIR/dev-unk \ ---logtostderr - -wc $DATADIR/dev-unk-00000-of-00050 -# -> 6075 153583 826189 -# dev set tokens including EOS = 6075 + 153583 = 159658 - -$BINARYDIR/replace_oov \ ---vocab_file=$DATADIR/vocab-2016-09-10.txt \ ---in_filepattern=\ -$DATADIR/1-billion-word-language-modeling-benchmark-r13output/\ -training-monolingual.tokenized.shuffled/news.en-?????-of-00100 \ ---out_prefix=$DATADIR/train-unk \ ---logtostderr - -# build vocabularies -$BINARYDIR/\ -text_encoder_build_subword \ - --corpus_filepattern=$DATADIR/train-unk-* \ - --corpus_max_lines=17500 \ - --output_fn=$DATADIR/lm1b_16k.subword_text_encoder \ - --logtostderr - -$BINARYDIR/\ -text_encoder_build_subword \ - --corpus_filepattern=$DATADIR/train-unk-* \ - --corpus_max_lines=270000 \ - --output_fn=$DATADIR/lm1b_64k.subword_text_encoder \ - --logtostderr - -# generate training and dev data - -# 16k vocab - -$BINARYDIR/lm_example \ ---logtostderr \ ---vocab_file=$DATADIR/lm1b_16k.subword_text_encoder \ ---in_filepattern=$DATADIR/dev-unk* \ ---out_prefix=$DATADIR/lm1b_16k-dev - -# -> total subwords: 189068 -# perplexity exponent = 189068 / 159658 = 1.184206 - -mv $DATADIR/lm1b_16k-dev-00000-of-00050 $DATADIR/lm1b_16k-dev-00000-of-00001 - -$BINARYDIR/\ -text_encoder_inspect_subword \ ---logtostderr \ ---vocab_file=$DATADIR/lm1b_16k.subword_text_encoder \ ---in_file=$DATADIR/lm1b_16k-dev-00000-of-00001 | more - -$BINARYDIR/lm_example \ ---logtostderr \ ---vocab_file=$DATADIR/lm1b_16k.subword_text_encoder \ ---in_filepattern=$DATADIR/train-unk* \ ---out_prefix=$DATADIR/lm1b_16k-train - -# 64k vocab - -$BINARYDIR/lm_example \ ---logtostderr \ ---vocab_file=$DATADIR/lm1b_64k.subword_text_encoder \ ---in_filepattern=$DATADIR/dev-unk* \ ---out_prefix=$DATADIR/lm1b_64k-dev - -# -> total subwords: 170366 -# perplexity exponent = 170366 / 159658 = 1.067068 - -mv $DATADIR/lm1b_64k-dev-00000-of-00050 $DATADIR/lm1b_64k-dev-00000-of-00001 - -$BINARYDIR/\ -text_encoder_inspect_subword \ ---logtostderr \ ---vocab_file=$DATADIR/lm1b_64k.subword_text_encoder \ ---in_file=$DATADIR/lm1b_64k-dev-00000-of-00001 | more - -$BINARYDIR/lm_example \ ---logtostderr \ ---vocab_file=$DATADIR/lm1b_64k.subword_text_encoder \ ---in_filepattern=$DATADIR/train-unk* \ ---out_prefix=$DATADIR/lm1b_64k-train - -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -# Dependency imports - -from tensor2tensor.data_generators import generator_utils -from tensor2tensor.data_generators import text_encoder - -import tensorflow as tf - -tf.app.flags.DEFINE_string( - "vocab_file", "", "SubwordTextEncoder vocabulary file") - -tf.app.flags.DEFINE_integer( - "combine_to_length", 0, - "If positive, concatenate documents to form examples with length exactly" - " equal to this value. Documents are still suffixed with subword id=1. " - " Examples are padded with subword id=0.") - -tf.app.flags.DEFINE_string("in_filepattern", "", "input filename") - -tf.app.flags.DEFINE_string( - "out_prefix", "", "The output filename is equal to out_prefix plus " - "the last 15 characters of in_file. (e.g. -00001-of-00100)") - -FLAGS = tf.app.flags.FLAGS - - -def _make_example(ids, raw_num_bytes): - if FLAGS.combine_to_length > 0: - ids += [0] * (FLAGS.combine_to_length - len(ids)) - return generator_utils.to_example({ - "targets": ids, - "inputs": [0], - "raw_num_bytes": [raw_num_bytes] - }).SerializeToString() - - -def convert_file(in_file, encoder): - """Convert a file to examples.""" - total_bytes = 0 - total_subwords = 0 - total_documents = 0 - dropped_documents = 0 - - combined_subwords = [] - combined_num_bytes = 0 - - out_file = FLAGS.out_prefix + in_file[-15:] - writer = tf.python_io.TFRecordWriter(out_file) - out_file = FLAGS.out_prefix + in_file[-15:] - print ("in_file", in_file, "out_file", out_file) - for line in tf.gfile.Open(in_file): - total_documents += 1 - assert line[-1] == "\n" - num_bytes = len(line) - total_bytes += num_bytes - line = line[:-1] - subwords = encoder.encode(line) + [1] - total_subwords += len(subwords) - if FLAGS.combine_to_length: - if len(combined_subwords) + len(subwords) > FLAGS.combine_to_length: - writer.write(_make_example(combined_subwords, combined_num_bytes)) - combined_subwords = [] - combined_num_bytes = 0 - if len(subwords) <= FLAGS.combine_to_length: - combined_subwords.extend(subwords) - combined_num_bytes += num_bytes - else: - dropped_documents += 1 - else: - writer.write(_make_example(subwords, num_bytes)) - if combined_subwords: - writer.write(_make_example(combined_subwords, combined_num_bytes)) - writer.close() - - tf.logging.info("total bytes: %d", total_bytes) - tf.logging.info("total subwords: %d", total_subwords) - tf.logging.info("bytes per subword: %f", total_bytes / total_subwords) - tf.logging.info("total documents: %d", total_documents) - tf.logging.info("dropped documents: %d", dropped_documents) - - -def main(_): - """Convert a file to examples.""" - encoder = text_encoder.SubwordTextEncoder(FLAGS.vocab_file) - - in_files = tf.gfile.Glob(FLAGS.in_filepattern) - assert in_files, "No matching input files" - for in_file in in_files: - convert_file(in_file, encoder) - - -if __name__ == "__main__": - tf.app.run() diff --git a/tensor2tensor/data_generators/problem_hparams.py b/tensor2tensor/data_generators/problem_hparams.py index 7ad0a57ad..203dba852 100644 --- a/tensor2tensor/data_generators/problem_hparams.py +++ b/tensor2tensor/data_generators/problem_hparams.py @@ -325,33 +325,35 @@ def audio_wsj_tokens(model_hparams, wrong_vocab_size): return p -def lm1b_16k(model_hparams): - """Billion-word language-modeling benchmark, 16k subtoken vocabulary.""" +def lm1b_32k(model_hparams): + """Billion-word language-modeling benchmark, 32k subword vocabulary.""" p = default_problem_hparams() - p.perplexity_exponent = 1.184206 + # ratio of dev tokens (including eos) to dev words (including eos) + # 176884 / 159658 = 1.107893 + p.perplexity_exponent = 1.107893 p.input_modality = {} - p.target_modality = (registry.Modalities.SYMBOL, 16384) + encoder = text_encoder.SubwordTextEncoder( + os.path.join(model_hparams.data_dir, "lm1b_32k.subword_text_encoder")) + p.target_modality = (registry.Modalities.SYMBOL, encoder.vocab_size) p.vocabulary = { - "targets": - text_encoder.SubwordTextEncoder( - os.path.join(model_hparams.data_dir, - "lm1b_16k.subword_text_encoder")) + "targets": encoder } p.target_space_id = 3 return p -def lm1b_64k(model_hparams): - """Billion-word language-modeling benchmark, 64k subtoken vocabulary.""" +def wiki_32k(model_hparams): + """Wikipedia title to article. 32k subtoken vocabulary.""" p = default_problem_hparams() - p.perplexity_exponent = 1.067068 - p.input_modality = {} - p.target_modality = (registry.Modalities.SYMBOL, 65536) + encoder = text_encoder.SubwordTextEncoder( + os.path.join(model_hparams.data_dir, "wiki_32k.subword_text_encoder")) + p.input_modality = { + "inputs": (registry.Modalities.SYMBOL, encoder.vocab_size) + } + p.target_modality = (registry.Modalities.SYMBOL, encoder.vocab_size) p.vocabulary = { - "targets": - text_encoder.SubwordTextEncoder( - os.path.join(model_hparams.data_dir, - "lm1b_64k.subword_text_encoder")) + "inputs": encoder, + "targets": encoder } p.target_space_id = 3 return p @@ -700,8 +702,8 @@ def img2img_imagenet(unused_model_hparams): "audio_wsj_characters_test": audio_wsj_characters, "audio_wsj_tokens_8k_tune": lambda p: audio_wsj_tokens(p, 2**13), "audio_wsj_tokens_8k_test": lambda p: audio_wsj_tokens(p, 2**13), - "lm1b_16k": lm1b_16k, - "lm1b_64k": lm1b_64k, + "lm1b_32k": lm1b_32k, + "wiki_32k": wiki_32k, "lmptb_10k": lmptb_10k, "wmt_parsing_characters": wmt_parsing_characters, "wmt_parsing_tokens_8k": lambda p: wmt_parsing_tokens(p, 2**13), diff --git a/tensor2tensor/data_generators/replace_oov.py b/tensor2tensor/data_generators/replace_oov.py deleted file mode 100644 index 7e2c8dc50..000000000 --- a/tensor2tensor/data_generators/replace_oov.py +++ /dev/null @@ -1,76 +0,0 @@ -# Copyright 2017 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -r"""Data preprocessor for lm1b benchmark. - -Process the raw text file to replace out-of-vocab words with "". - -The input consists of a tokenized text file, where tokens are separated with -whitespace. - -Outputs a similar text file where the OOV words have been repalced with UNK. -The whitespace in the output may be different. - -This maintains compatibility with the benchmark, which does the same thing. -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -# Dependency imports - -from six.moves import xrange # pylint: disable=redefined-builtin - -import tensorflow as tf - -tf.app.flags.DEFINE_string("vocab_file", "", - "text file containing one word per line") - -tf.app.flags.DEFINE_string("in_filepattern", "", "input filename") - -tf.app.flags.DEFINE_string( - "out_prefix", "", "The output filename is equal to out_prefix plus " - "the last 15 characters of in_file. (e.g. -00001-of-00100)") - -FLAGS = tf.app.flags.FLAGS - - -def replace_oov(vocab, in_file): - """Replace out-of-vocab words with .""" - out_file = FLAGS.out_prefix + in_file[-15:] - print ("in_file", in_file, "out_file", out_file) - with tf.gfile.Open(out_file, "w") as out: - for line in tf.gfile.Open(in_file): - words = line.split() - for i in xrange(len(words)): - if not vocab.get(words[i]): - words[i] = "UNK" - out_line = " ".join(words) + "\n" - out.write(out_line) - - -def main(_): - vocab = {} - with tf.gfile.Open(FLAGS.vocab_file) as vocab_file: - for line in vocab_file: - vocab[line.strip()] = True - - in_files = tf.gfile.Glob(FLAGS.in_filepattern) - assert in_files, "No matching input files" - for in_file in in_files: - replace_oov(vocab, in_file) - -if __name__ == "__main__": - tf.app.run() diff --git a/tensor2tensor/data_generators/snli.py b/tensor2tensor/data_generators/snli.py index 1d21d94ac..1d3acd356 100644 --- a/tensor2tensor/data_generators/snli.py +++ b/tensor2tensor/data_generators/snli.py @@ -25,6 +25,7 @@ from tensor2tensor.data_generators import generator_utils from tensor2tensor.data_generators import text_encoder +from tensor2tensor.data_generators import tokenizer import tensorflow as tf @@ -139,7 +140,7 @@ def _get_or_generate_vocab(tmp_dir, vocab_filename, vocab_size): return gs example_file = os.path.join(tmp_dir, _EXAMPLES_FILE) gs = text_encoder.SubwordTextEncoder() - token_counts = text_encoder.SubwordTextEncoder.get_token_counts( + token_counts = tokenizer.corpus_token_counts( example_file, corpus_max_lines=1000000) gs = gs.build_to_target_size( vocab_size, token_counts, min_val=1, max_val=1e3) diff --git a/tensor2tensor/data_generators/text_encoder.py b/tensor2tensor/data_generators/text_encoder.py index 51d21c36c..5d628fa4a 100644 --- a/tensor2tensor/data_generators/text_encoder.py +++ b/tensor2tensor/data_generators/text_encoder.py @@ -37,7 +37,8 @@ # Conversion between Unicode and UTF-8, if required (on Python2) -native_to_unicode = (lambda s: s.decode("utf-8")) if PY2 else (lambda s: s) +def native_to_unicode(s): + return s.decode("utf-8") if (PY2 and not isinstance(s, unicode)) else s unicode_to_native = (lambda s: s.encode("utf-8")) if PY2 else (lambda s: s) @@ -204,13 +205,11 @@ class SubwordTextEncoder(TextEncoder): """ - def __init__(self, filename=None, num_reserved_ids=2): + def __init__(self, filename=None): """Initialize and read from a file, if provided.""" - self._tokenizer = tokenizer.Tokenizer() if filename is not None: self._load_from_file(filename) - - super(SubwordTextEncoder, self).__init__(num_reserved_ids=num_reserved_ids) + super(SubwordTextEncoder, self).__init__(num_reserved_ids=None) def encode(self, raw_text): """Converts a native string to a list of subtoken ids. @@ -220,7 +219,7 @@ def encode(self, raw_text): Returns: a list of integers in the range [0, vocab_size) """ - return self._tokens_to_subtokens(self._tokenizer.encode( + return self._tokens_to_subtokens(tokenizer.encode( native_to_unicode(raw_text))) def decode(self, subtokens): @@ -231,7 +230,7 @@ def decode(self, subtokens): Returns: a native string """ - return unicode_to_native(self._tokenizer.decode( + return unicode_to_native(tokenizer.decode( self._subtokens_to_tokens(subtokens))) @property @@ -261,19 +260,15 @@ def _subtokens_to_tokens(self, subtokens): a list of strings. """ concatenated = "".join( - [self.subtoken_to_subtoken_string(s) for s in subtokens]) + [self._subtoken_to_subtoken_string(s) for s in subtokens]) split = concatenated.split("_") return [self._unescape_token(t + "_") for t in split if t] - def subtoken_to_subtoken_string(self, subtoken): + def _subtoken_to_subtoken_string(self, subtoken): """Subtoken_String (string) corresponding to the given subtoken (id).""" if 0 <= subtoken < self.vocab_size: - subtoken_string = self._all_subtoken_strings[subtoken] - if subtoken_string: - return subtoken_string - if 0 <= subtoken < self._num_reserved_ids: - return u"%s_" % RESERVED_TOKENS[subtoken] - return u"ID%d_" % subtoken + return self._all_subtoken_strings[subtoken] + return u"" def _escaped_token_to_subtokens(self, escaped_token): """Converts an escaped token string to a list of subtokens. @@ -287,7 +282,7 @@ def _escaped_token_to_subtokens(self, escaped_token): pos = 0 lesc = len(escaped_token) while pos < lesc: - end = lesc + end = min(lesc, pos + self._max_subtoken_len) while end > pos: subtoken = self._subtoken_string_to_id.get(escaped_token[pos:end], -1) if subtoken != -1: @@ -349,13 +344,15 @@ def bisect(min_val, max_val): def build_from_token_counts(self, token_counts, min_count, - num_iterations=4): + num_iterations=4, + num_reserved_ids=2): """Train a SubwordTextEncoder based on a dictionary of word counts. Args: token_counts: a dictionary of Unicode strings to int. min_count: an integer - discard subtokens with lower counts. num_iterations: an integer. how many iterations of refinement. + num_reserved_ids: an integer. how many ids to reserve for special tokens. """ # first determine the alphabet to include all characters with count at # least min_count in the dataset. @@ -421,7 +418,7 @@ def build_from_token_counts(self, new_subtoken_strings.sort(reverse=True) # Now we have a candidate vocabulary old_alphabet = self._alphabet - self._init_from_list([u""] * self._num_reserved_ids + + self._init_from_list([u""] * num_reserved_ids + [p[1] for p in new_subtoken_strings]) assert old_alphabet == self._alphabet tf.logging.info("vocab_size = %d" % self.vocab_size) @@ -429,7 +426,7 @@ def build_from_token_counts(self, original = "This sentence was encoded by the SubwordTextEncoder." encoded = self.encode(original) print(encoded) - print([self.subtoken_to_subtoken_string(s) for s in encoded]) + print([self._subtoken_to_subtoken_string(s) for s in encoded]) decoded = self.decode(encoded) print(decoded) assert decoded == original @@ -444,6 +441,9 @@ def dump(self): def _init_from_list(self, subtoken_strings): """Initialize from a list of subtoken strings.""" self._all_subtoken_strings = subtoken_strings + # we remember the maximum length of any subtoken to avoid having to + # check arbitrarily long strings. + self._max_subtoken_len = max([len(s) for s in subtoken_strings]) self._subtoken_string_to_id = { s: i for i, s in enumerate(subtoken_strings) if s} self._alphabet = set([c for c in subtoken_strings if len(c) == 1]) @@ -473,10 +473,11 @@ def _escape_token(self, token): Returns: escaped_token: a unicode string """ - token = token.replace("\\", "\\\\").replace("_", "\\u") + "_" + assert isinstance(token, unicode) + token = token.replace(u"\\", u"\\\\").replace(u"_", u"\\u") + u"_" ret = u"" for c in token: - if c in self._alphabet: + if c in self._alphabet and c != u"\n": ret += c else: ret += u"\\%d;" % ord(c) @@ -497,12 +498,14 @@ def _unescape_token(self, escaped_token): c = escaped_token[pos] if c == "\\": pos += 1 + if pos >= len(escaped_token): + break c = escaped_token[pos] if c == u"u": ret += u"_" pos += 1 elif c == "\\": - ret += u"_" + ret += u"\\" pos += 1 else: semicolon_pos = escaped_token.find(u";", pos) @@ -517,19 +520,3 @@ def _unescape_token(self, escaped_token): ret += c pos += 1 return ret - - @classmethod - def get_token_counts(cls, text_filepattern, corpus_max_lines): - """Read the corpus and compute a dictionary of token counts.""" - tok = tokenizer.Tokenizer() - lines_read = 0 - filenames = tf.gfile.Glob(text_filepattern) - for text_filename in filenames: - with tf.gfile.Open(text_filename) as f: - for line in f: - # The tokenizer updates token_counts in encode() - tok.encode(native_to_unicode(line.strip())) - lines_read += 1 - if corpus_max_lines > 0 and lines_read > corpus_max_lines: - return tok.token_counts - return tok.token_counts diff --git a/tensor2tensor/data_generators/text_encoder_build_subword.py b/tensor2tensor/data_generators/text_encoder_build_subword.py index 659e9da14..df8aa73eb 100644 --- a/tensor2tensor/data_generators/text_encoder_build_subword.py +++ b/tensor2tensor/data_generators/text_encoder_build_subword.py @@ -21,16 +21,11 @@ Example usage: python data_generators/text_encoder_build_subword.py \ - --corpus_filepattern=$LM1B_DIR/train-unk-* \ - --corpus_max_lines=17500 \ - --output_fn=$DATA_DIR/lm1b16k.subword_text_encoder \ + --corpus_filepattern=$DATA_DIR/my_problem-train-* \ + --corpus_max_lines=12345 \ + --output_fn=$DATA_DIR/my_problem.subword_text_encoder \ --logtostderr -python data_generators/text_encoder_build_subword.py \ - --corpus_filepattern=$LM1B_DIR/train-unk-* \ - --corpus_max_lines=270000 \ - --output_fn=$DATA_DIR/lm1b64k.subword_text_encoder \ - --logtostderr """ from __future__ import absolute_import from __future__ import division @@ -39,6 +34,7 @@ # Dependency imports from tensor2tensor.data_generators import text_encoder +from tensor2tensor.data_generators import tokenizer import tensorflow as tf @@ -50,6 +46,7 @@ tf.app.flags.DEFINE_integer('corpus_max_lines', 10000, 'How many lines of corpus to read') tf.app.flags.DEFINE_integer('num_iterations', 4, 'Number of iterations') +tf.app.flags.DEFINE_bool('split_on_newlines', True, 'Break corpus into lines.') FLAGS = tf.app.flags.FLAGS @@ -57,8 +54,9 @@ def main(unused_argv): gs = text_encoder.SubwordTextEncoder() if not FLAGS.corpus_filepattern: raise ValueError('Must provide --corpus_filepattern') - token_counts = text_encoder.SubwordTextEncoder.get_token_counts( - FLAGS.corpus_filepattern, FLAGS.corpus_max_lines) + token_counts = tokenizer.corpus_token_counts( + FLAGS.corpus_filepattern, FLAGS.corpus_max_lines, + split_on_newlines=FLAGS.split_on_newlines) gs.build_from_token_counts(token_counts, FLAGS.min_count, FLAGS.num_iterations) diff --git a/tensor2tensor/data_generators/text_encoder_inspect_subword.py b/tensor2tensor/data_generators/text_encoder_inspect_subword.py deleted file mode 100644 index 0ad9a2701..000000000 --- a/tensor2tensor/data_generators/text_encoder_inspect_subword.py +++ /dev/null @@ -1,64 +0,0 @@ -# Copyright 2017 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -r"""Inspect a TFRecord file of tensorflow.Example and show tokenizations. - -python data_generators/text_encoder_inspect_subword.py \ - --logtostderr \ - --vocab_file=$DATA_DIR/tokens.vocab.8192 \ - --in_file=$DATA_DIR/wmt_ende_tokens_8k-train-00000-of-00100 -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -# Dependency imports - -from tensor2tensor.data_generators import text_encoder - -import tensorflow as tf - -tf.app.flags.DEFINE_string("vocab_file", "", - "SubwordTextEncoder vocabulary file") - -tf.app.flags.DEFINE_string("in_file", "", "input filename") - -FLAGS = tf.app.flags.FLAGS - - -def ShowSequence(subtokenizer, subtokens, label): - print("%s decoded = %s" % (label, subtokenizer.decode(subtokens))) - print("%s subtoken ids = %s" % (label, subtokens)) - print("%s subtoken strings = %s" % - (label, - [subtokenizer.subtoken_to_subtoken_string(s) for s in subtokens])) - print("") - - -def main(_): - """Convert a file to examples.""" - subtokenizer = text_encoder.SubwordTextEncoder(FLAGS.vocab_file) - reader = tf.python_io.tf_record_iterator(FLAGS.in_file) - for record in reader: - x = tf.train.Example() - x.ParseFromString(record) - inputs = [int(i) for i in x.features.feature["inputs"].int64_list.value] - targets = [int(i) for i in x.features.feature["targets"].int64_list.value] - ShowSequence(subtokenizer, inputs, "inputs") - ShowSequence(subtokenizer, targets, "targets") - - -if __name__ == "__main__": - tf.app.run() diff --git a/tensor2tensor/data_generators/tokenizer.py b/tensor2tensor/data_generators/tokenizer.py index 8490ead19..df6ef6470 100644 --- a/tensor2tensor/data_generators/tokenizer.py +++ b/tensor2tensor/data_generators/tokenizer.py @@ -49,61 +49,101 @@ # Dependency imports +from six import PY2 from six import unichr # pylint: disable=redefined-builtin from six.moves import xrange # pylint: disable=redefined-builtin +import tensorflow as tf -class Tokenizer(object): - """Vocab for breaking words into Unicode wordpieces. + +# Conversion between Unicode and UTF-8, if required (on Python2) +_native_to_unicode = (lambda s: s.decode("utf-8")) if PY2 else (lambda s: s) + + +# This set contains all letter and number characters. +_ALPHANUMERIC_CHAR_SET = set( + unichr(i) for i in xrange(sys.maxunicode) + if (unicodedata.category(unichr(i)).startswith("L") or + unicodedata.category(unichr(i)).startswith("N"))) + + +def encode(text): + """Encode a unicode string as a list of tokens. + + Args: + text: a unicode string + Returns: + a list of tokens as Unicode strings + """ + if not text: + return [] + ret = [] + token_start = 0 + # Classify each character in the input string + is_alnum = [c in _ALPHANUMERIC_CHAR_SET for c in text] + for pos in xrange(1, len(text)): + if is_alnum[pos] != is_alnum[pos - 1]: + token = text[token_start:pos] + if token != u" " or token_start == 0: + ret.append(token) + token_start = pos + final_token = text[token_start:] + ret.append(final_token) + return ret + + +def decode(tokens): + """Decode a list of tokens to a unicode string. + + Args: + tokens: a list of Unicode strings + Returns: + a unicode string + """ + ret = u"" + token_is_alnum = [t[0] in _ALPHANUMERIC_CHAR_SET for t in tokens] + for i, token in enumerate(tokens): + if i > 0 and token_is_alnum[i - 1] and token_is_alnum[i]: + ret += u" " + ret += token + return ret + + +def corpus_token_counts(text_filepattern, corpus_max_lines, + split_on_newlines=True): + """Read the corpus and compute a dictionary of token counts. + + Args: + text_filepattern: a pattern matching one or more files + corpus_max_lines: an integer - maximum total lines to read. + split_on_newlines: a boolean. If true, then split files by lines and strip + leading and trailing whitespace from each line. + + Returns: + a dictionary from token to count. """ + def read_corpus(): + """Read the corpus.""" + docs = [] + lines_read = 0 + filenames = tf.gfile.Glob(text_filepattern) + for text_filename in filenames: + with tf.gfile.Open(text_filename) as f: + if not split_on_newlines: + docs.append("") + for line in f: + if split_on_newlines: + # The tokenizer updates token_counts in encode() + docs.append(line.strip()) + else: + docs[-1] += line + lines_read += 1 + if corpus_max_lines > 0 and lines_read > corpus_max_lines: + return docs + return docs + counts = defaultdict(int) + for doc in read_corpus(): + for tok in encode(_native_to_unicode(doc)): + counts[tok] += 1 + return counts - # This set contains all letter and number characters. - _ALPHANUMERIC_CHAR_SET = set( - unichr(i) for i in xrange(sys.maxunicode) - if (unicodedata.category(unichr(i)).startswith("L") or - unicodedata.category(unichr(i)).startswith("N"))) - - def __init__(self): - self.token_counts = defaultdict(int) - - def encode(self, text): - """Encode a unicode string as a list of tokens. - - Args: - text: a unicode string - Returns: - a list of tokens as Unicode strings - """ - if not text: - return [] - ret = [] - token_start = 0 - # Classify each character in the input string - is_alnum = [c in self._ALPHANUMERIC_CHAR_SET for c in text] - for pos in xrange(1, len(text)): - if is_alnum[pos] != is_alnum[pos - 1]: - token = text[token_start:pos] - if token != u" " or token_start == 0: - ret.append(token) - self.token_counts[token] += 1 - token_start = pos - final_token = text[token_start:] - ret.append(final_token) - self.token_counts[final_token] += 1 - return ret - - def decode(self, tokens): - """Decode a list of tokens to a unicode string. - - Args: - tokens: a list of Unicode strings - Returns: - a unicode string - """ - ret = u"" - token_is_alnum = [t[0] in self._ALPHANUMERIC_CHAR_SET for t in tokens] - for i, token in enumerate(tokens): - if i > 0 and token_is_alnum[i - 1] and token_is_alnum[i]: - ret += u" " - ret += token - return ret diff --git a/tensor2tensor/data_generators/tokenizer_test.py b/tensor2tensor/data_generators/tokenizer_test.py index a85e244ca..404a11396 100644 --- a/tensor2tensor/data_generators/tokenizer_test.py +++ b/tensor2tensor/data_generators/tokenizer_test.py @@ -33,31 +33,30 @@ class TokenizerTest(tf.test.TestCase): def testEncode(self): - t = tokenizer.Tokenizer() self.assertEqual( - t.encode(u"Dude - that's so cool."), + tokenizer.encode(u"Dude - that's so cool."), [u"Dude", u" - ", u"that", u"'", u"s", u"so", u"cool", u"."]) self.assertEqual( - t.encode(u"Łukasz est né en 1981."), + tokenizer.encode(u"Łukasz est né en 1981."), [u"Łukasz", u"est", u"né", u"en", u"1981", u"."]) self.assertEqual( - t.encode(u" Spaces at the ends "), + tokenizer.encode(u" Spaces at the ends "), [u" ", u"Spaces", u"at", u"the", u"ends", u" "]) - self.assertEqual(t.encode(u"802.11b"), [u"802", u".", u"11b"]) - self.assertEqual(t.encode(u"two. \nlines"), [u"two", u". \n", u"lines"]) + self.assertEqual(tokenizer.encode(u"802.11b"), [u"802", u".", u"11b"]) + self.assertEqual(tokenizer.encode(u"two. \nlines"), + [u"two", u". \n", u"lines"]) def testDecode(self): - t = tokenizer.Tokenizer() self.assertEqual( - t.decode([u"Dude", u" - ", u"that", u"'", u"s", u"so", u"cool", u"."]), + tokenizer.decode( + [u"Dude", u" - ", u"that", u"'", u"s", u"so", u"cool", u"."]), u"Dude - that's so cool.") def testInvertibilityOnRandomStrings(self): - t = tokenizer.Tokenizer() random.seed(123) for _ in xrange(1000): s = u"".join([unichr(random.randint(0, 65535)) for _ in xrange(10)]) - self.assertEqual(s, t.decode(t.encode(s))) + self.assertEqual(s, tokenizer.decode(tokenizer.encode(s))) if __name__ == "__main__": diff --git a/tensor2tensor/data_generators/wiki.py b/tensor2tensor/data_generators/wiki.py new file mode 100644 index 000000000..5ccbf14d9 --- /dev/null +++ b/tensor2tensor/data_generators/wiki.py @@ -0,0 +1,128 @@ +# Copyright 2017 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Data generator for Wikipedia title to article dataset.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import bz2 +from collections import defaultdict +import os + +# Dependency imports + +import six +from tensor2tensor.data_generators import generator_utils +from tensor2tensor.data_generators import text_encoder +from tensor2tensor.data_generators import tokenizer + +import tensorflow as tf + + +# End-of-sentence marker (should correspond to the position of EOS in the +# RESERVED_TOKENS list in text_encoder.py) +EOS = 1 + + +def _maybe_download_corpus(tmp_dir): + """Download corpus if necessary. + + Args: + tmp_dir: directory containing dataset. + + Returns: + filepath of the downloaded corpus file. + """ + corpus_url = ("https://dumps.wikimedia.org/enwiki/20170620/" + "enwiki-20170620-pages-articles-multistream.xml.bz2") + corpus_filename = os.path.basename(corpus_url) + corpus_filepath = os.path.join(tmp_dir, corpus_filename) + if not os.path.exists(corpus_filepath): + generator_utils.maybe_download(tmp_dir, corpus_filename, corpus_url) + return corpus_filepath + + +def page_generator(tmp_dir, max_docs=None): + doc = u"" + count = 0 + corpus_filepath = _maybe_download_corpus(tmp_dir) + for line in bz2.BZ2File(corpus_filepath, "r"): + line = unicode(line, "utf-8") + if not doc and line != u" \n": + continue + doc += line + if line == u" \n": + yield doc + doc = u"" + count += 1 + if max_docs and count >= max_docs: + break + + +def _page_title(page): + start_pos = page.find(u"") + end_pos = page.find(u"") + assert start_pos != -1 + assert end_pos != -1 + start_pos += len(u"") + return page[start_pos:end_pos] + + +def _get_or_build_subword_text_encoder(tmp_dir): + """Builds a SubwordTextEncoder based on the corpus. + + Args: + tmp_dir: a string + + Returns: + a SubwordTextEncoder. + """ + filename = os.path.join(tmp_dir, "wiki_32k.subword_text_encoder") + if tf.gfile.Exists(filename): + return text_encoder.SubwordTextEncoder(filename) + token_counts = defaultdict(int) + for page in page_generator(tmp_dir, max_docs=1000): + tokens = tokenizer.encode(page) + tokens = set(tokens) + for tok in tokens: + token_counts[tok] += 1 + new_token_counts = defaultdict(int) + for token, count in six.iteritems(token_counts): + if count >= 3: + new_token_counts[token] = count + ret = text_encoder.SubwordTextEncoder() + ret.build_from_token_counts(new_token_counts, min_count=10) + ret.store_to_file(filename) + return ret + + +def generator(tmp_dir, train): + """Generator for lm1b sentences. + + Args: + tmp_dir: a string. + train: a boolean. + + Yields: + A dictionary {"inputs": [<subword ids>], "targets": [<subword ids>]} + """ + assert train + encoder = _get_or_build_subword_text_encoder(tmp_dir) + for page in page_generator(tmp_dir): + title = _page_title(page) + encoded = encoder.encode(page) + [EOS] + encoded_title = encoder.encode(title) + [EOS] + yield {"inputs": encoded_title, "targets": encoded} diff --git a/tensor2tensor/utils/data_reader_test.py b/tensor2tensor/utils/data_reader_test.py index 0022081ae..7386d3ea0 100644 --- a/tensor2tensor/utils/data_reader_test.py +++ b/tensor2tensor/utils/data_reader_test.py @@ -45,8 +45,9 @@ def test_generator(): for i in xrange(100): yield {"inputs": [i], "targets": [i], "floats": [i + 0.5]} - generator_utils.generate_files(test_generator(), tmp_file_name, tmp_dir) - self.assertTrue(tf.gfile.Exists(tmp_file_path + "-00000-of-00001")) + filenames = generator_utils.train_data_filenames(tmp_file_name, tmp_dir, 1) + generator_utils.generate_files(test_generator(), filenames) + self.assertTrue(tf.gfile.Exists(tmp_file_path + "-train-00000-of-00001")) examples_train = data_reader.examples_queue( [tmp_file_path + "*"], { @@ -82,7 +83,7 @@ def test_generator(): self.assertTrue(is_shuffled) # Clean up. - os.remove(tmp_file_path + "-00000-of-00001") + os.remove(tmp_file_path + "-train-00000-of-00001") os.remove(tmp_file_path) # TODO(rsepassi): fix and reenable test @@ -97,8 +98,9 @@ def test_generator(): for i in xrange(100): yield {"inputs": [i + 1 for _ in xrange(i + 1)], "targets": [i + 1]} - generator_utils.generate_files(test_generator(), tmp_file_name, tmp_dir) - self.assertTrue(tf.gfile.Exists(tmp_file_path + "-00000-of-00001")) + filenames = generator_utils.train_data_filenames(tmp_file_name, tmp_dir, 1) + generator_utils.generate_files(test_generator(), filenames) + self.assertTrue(tf.gfile.Exists(tmp_file_path + "-train-00000-of-00001")) examples_train = data_reader.examples_queue([tmp_file_path + "*"], { "inputs": tf.VarLenFeature(tf.int64), @@ -140,7 +142,7 @@ def test_generator(): # Clean up. coord.request_stop() coord.join() - os.remove(tmp_file_path + "-00000-of-00001") + os.remove(tmp_file_path + "-train-00000-of-00001") os.remove(tmp_file_path) diff --git a/tensor2tensor/utils/trainer_utils_test.py b/tensor2tensor/utils/trainer_utils_test.py index fd1c6885c..d621b6fbc 100644 --- a/tensor2tensor/utils/trainer_utils_test.py +++ b/tensor2tensor/utils/trainer_utils_test.py @@ -38,10 +38,12 @@ def setUpClass(cls): FLAGS.problems = "algorithmic_addition_binary40" TrainerUtilsTest.data_dir = tf.test.get_temp_dir() gen = algorithmic.identity_generator(2, 10, 300) - generator_utils.generate_files(gen, FLAGS.problems + "-train", - TrainerUtilsTest.data_dir, 1, 100) - generator_utils.generate_files(gen, FLAGS.problems + "-dev", - TrainerUtilsTest.data_dir, 1, 100) + train_filenames = generator_utils.train_data_filenames( + FLAGS.problems, TrainerUtilsTest.data_dir, 1) + dev_filenames = generator_utils.dev_data_filenames( + FLAGS.problems, TrainerUtilsTest.data_dir, 1) + generator_utils.generate_files(gen, train_filenames, 100) + generator_utils.generate_files(gen, dev_filenames, 100) def testModelsImported(self): models = registry.list_models() From 72f08746adbe72ab520c0bdad88f334569d8d98d Mon Sep 17 00:00:00 2001 From: Lukasz Kaiser <lukaszkaiser@google.com> Date: Tue, 11 Jul 2017 17:40:07 -0700 Subject: [PATCH 0101/4095] internal merge PiperOrigin-RevId: 161608262 --- tensor2tensor/models/common_layers.py | 127 ++++++++++++- tensor2tensor/models/common_layers_test.py | 60 ++++++ tensor2tensor/models/models.py | 1 + .../models/transformer_alternative.py | 174 ++++++++++++++++++ 4 files changed, 361 insertions(+), 1 deletion(-) create mode 100644 tensor2tensor/models/transformer_alternative.py diff --git a/tensor2tensor/models/common_layers.py b/tensor2tensor/models/common_layers.py index 7729bdeb1..15a712ef2 100644 --- a/tensor2tensor/models/common_layers.py +++ b/tensor2tensor/models/common_layers.py @@ -292,7 +292,8 @@ def conv_internal(conv_fn, inputs, filters, kernel_size, **kwargs): """Conditional conv_fn making kernel 1d or 2d depending on inputs shape.""" static_shape = inputs.get_shape() if not static_shape or len(static_shape) != 4: - raise ValueError("Inputs to conv must have statically known rank 4.") + raise ValueError("Inputs to conv must have statically known rank 4. " + "Shape: " + str(static_shape)) # Add support for left padding. if "padding" in kwargs and kwargs["padding"] == "LEFT": dilation_rate = (1, 1) @@ -1402,3 +1403,127 @@ def smoothing_cross_entropy(logits, labels, vocab_size, confidence): xentropy = tf.nn.softmax_cross_entropy_with_logits( logits=logits, labels=soft_targets) return xentropy - normalizing + + +def global_pool_1d(inputs, pooling_type="MAX", mask=None): + """Pool elements across the last dimension. + + Useful to convert a list of vectors into a single vector so as + to get a representation of a set. + + Args: + inputs: A tensor of dimensions batch_size x sequence_length x input_dims + containing the sequences of input vectors. + pooling_type: the pooling type to use, MAX or AVR + mask: A tensor of dimensions batch_size x sequence_length containing a + mask for the inputs with 1's for existing elements, and 0's elsewhere. + + Returns: + output: A tensor of dimensions batch_size x input_dims + dimension containing the sequences of transformed vectors. + """ + with tf.name_scope("global_pool", [inputs]): + if mask is not None: + mask = tf.expand_dims(mask, axis=2) + inputs = tf.multiply(inputs, mask) + + if pooling_type == "MAX": + # A tf.pool can be used here, but reduce is cleaner + output = tf.reduce_max(inputs, axis=1) + elif pooling_type == "AVR": + if mask is not None: + # Some elems are dummy elems so we can't just reduce the average. + output = tf.reduce_sum(inputs, axis=1) + num_elems = tf.reduce_sum(mask, axis=1, keep_dims=True) + output = tf.div(output, tf.maximum(num_elems, 1)) + else: + output = tf.reduce_mean(inputs, axis=1) + + return output + + +def linear_set_layer(layer_size, + inputs, + context=None, + activation_fn=tf.nn.relu, + dropout=0.0, + name=None): + """Basic layer type for doing funky things with sets. + + Applies a linear transformation to each element in the input set. + If a context is supplied, it is concatenated with the inputs. + e.g. One can use global_pool_1d to get a representation of the set which + can then be used as the context for the next layer. + + TODO: Add bias add (or control the biases used). + + Args: + layer_size: Dimension to transform the input vectors to. + inputs: A tensor of dimensions batch_size x sequence_length x input_dims + containing the sequences of input vectors. + context: A tensor of dimensions batch_size x context_dims + containing a global statistic about the set. + activation_fn: The activation function to use. + dropout: Dropout probability. + name: name. + + Returns: + output: A tensor of dimensions batch_size x sequence_length x output_dims + dimension containing the sequences of transformed vectors. + """ + with tf.variable_scope(name, "linear_set_layer", [inputs]): + # Apply 1D convolution to apply linear filter to each element + # along the 2nd dimension. + outputs = conv1d(inputs, layer_size, 1, activation=None, name="set_conv") + + # Apply the context if it exists. + if context is not None: + # Unfortunately tf doesn't support broadcasting via concat, but we can + # simply add the transformed context to get the same effect. + context = tf.expand_dims(context, axis=1) + cont_tfm = conv1d(context, layer_size, 1, + activation=None, name="cont_conv") + outputs += cont_tfm + + if activation_fn is not None: + outputs = activation_fn(outputs) + + if dropout != 0.0: + outputs = tf.nn.dropout(outputs, 1.0 - dropout) + + return outputs + + +def ravanbakhsh_set_layer(layer_size, + inputs, + mask=None, + activation_fn=tf.nn.tanh, + dropout=0.0, + name=None): + """Layer from Deep Sets paper: https://arxiv.org/abs/1611.04500 . + + More parameter-efficient verstion of a linear-set-layer with context. + + Args: + layer_size: Dimension to transform the input vectors to. + inputs: A tensor of dimensions batch_size x sequence_length x vector + containing the sequences of input vectors. + mask: A tensor of dimensions batch_size x sequence_length containing a + mask for the inputs with 1's for existing elements, and 0's elsewhere. + activation_fn: The activation function to use. + dropout: dropout. + name: name. + + Returns: + output: A tensor of dimensions batch_size x sequence_length x vector + dimension containing the sequences of transformed vectors. + """ + with tf.variable_scope(name, "ravanbakhsh_set_layer", [inputs]): + output = linear_set_layer( + layer_size, + inputs - tf.expand_dims(global_pool_1d(inputs, mask=mask), axis=1), + activation_fn=activation_fn, + dropout=dropout, + name=name) + + return output diff --git a/tensor2tensor/models/common_layers_test.py b/tensor2tensor/models/common_layers_test.py index d9b78033d..a87776bfb 100644 --- a/tensor2tensor/models/common_layers_test.py +++ b/tensor2tensor/models/common_layers_test.py @@ -82,6 +82,14 @@ def testConv(self): res = session.run(y) self.assertEqual(res.shape, (5, 5, 1, 13)) + def testConv1d(self): + x = np.random.rand(5, 7, 11) + with self.test_session() as session: + y = common_layers.conv1d(tf.constant(x, dtype=tf.float32), 13, 1) + session.run(tf.global_variables_initializer()) + res = session.run(y) + self.assertEqual(res.shape, (5, 7, 13)) + def testSeparableConv(self): x = np.random.rand(5, 7, 1, 11) with self.test_session() as session: @@ -361,6 +369,58 @@ def testResidualFnWithLayerNorm(self): actual = session.run(x3) self.assertEqual(actual.shape, (5, 2, 1, 11)) + def testGlobalPool1d(self): + x1 = np.random.rand(5, 4, 11) + no_mask = np.ones((5, 4)) + full_mask = np.zeros((5, 4)) + + with self.test_session() as session: + x1_ = tf.Variable(x1, dtype=tf.float32) + no_mask_ = tf.Variable(no_mask, dtype=tf.float32) + full_mask_ = tf.Variable(full_mask, dtype=tf.float32) + + none_mask_max = common_layers.global_pool_1d(x1_) + no_mask_max = common_layers.global_pool_1d(x1_, mask=no_mask_) + result1 = tf.reduce_sum(none_mask_max - no_mask_max) + + full_mask_max = common_layers.global_pool_1d(x1_, mask=full_mask_) + result2 = tf.reduce_sum(full_mask_max) + + none_mask_avr = common_layers.global_pool_1d(x1_, "AVR") + no_mask_avr = common_layers.global_pool_1d(x1_, "AVR", no_mask_) + result3 = tf.reduce_sum(none_mask_avr - no_mask_avr) + + full_mask_avr = common_layers.global_pool_1d(x1_, "AVR", full_mask_) + result4 = tf.reduce_sum(full_mask_avr) + + session.run(tf.global_variables_initializer()) + actual = session.run([result1, result2, result3, result4]) + self.assertAllEqual(actual[:3], [0.0, 0.0, 0.0]) + + def testLinearSetLayer(self): + x1 = np.random.rand(5, 4, 11) + cont = np.random.rand(5, 13) + with self.test_session() as session: + x1_ = tf.Variable(x1, dtype=tf.float32) + cont_ = tf.Variable(cont, dtype=tf.float32) + + simple_ff = common_layers.linear_set_layer(32, x1_) + cont_ff = common_layers.linear_set_layer(32, x1_, context=cont_) + + session.run(tf.global_variables_initializer()) + actual = session.run([simple_ff, cont_ff]) + self.assertEqual(actual[0].shape, (5, 4, 32)) + self.assertEqual(actual[1].shape, (5, 4, 32)) + + def testRavanbakhshSetLayer(self): + x1 = np.random.rand(5, 4, 11) + with self.test_session() as session: + x1_ = tf.Variable(x1, dtype=tf.float32) + layer = common_layers.ravanbakhsh_set_layer(32, x1_) + session.run(tf.global_variables_initializer()) + actual = session.run(layer) + self.assertEqual(actual.shape, (5, 4, 32)) + if __name__ == "__main__": tf.test.main() diff --git a/tensor2tensor/models/models.py b/tensor2tensor/models/models.py index b8f0811e5..ae0e0da61 100644 --- a/tensor2tensor/models/models.py +++ b/tensor2tensor/models/models.py @@ -32,5 +32,6 @@ from tensor2tensor.models import neural_gpu from tensor2tensor.models import slicenet from tensor2tensor.models import transformer +from tensor2tensor.models import transformer_alternative from tensor2tensor.models import xception # pylint: enable=unused-import diff --git a/tensor2tensor/models/transformer_alternative.py b/tensor2tensor/models/transformer_alternative.py new file mode 100644 index 000000000..e50cba86f --- /dev/null +++ b/tensor2tensor/models/transformer_alternative.py @@ -0,0 +1,174 @@ +# Copyright 2017 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Alternative transformer network. + +Using different layer types to demonstrate alternatives to self attention. + +Code is mostly copied from original Transformer source. +""" + + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +# Dependency imports + +from six.moves import xrange # pylint: disable=redefined-builtin + +from tensor2tensor.models import common_attention +from tensor2tensor.models import common_layers +from tensor2tensor.models import transformer +from tensor2tensor.utils import registry +from tensor2tensor.utils import t2t_model + +import tensorflow as tf + + +@registry.register_model +class TransformerAlt(t2t_model.T2TModel): + + def model_fn_body(self, features): + hparams = self._hparams + targets = features["targets"] + inputs = features.get("inputs") + target_space = features.get("target_space_id") + + inputs = common_layers.flatten4d3d(inputs) + targets = common_layers.flatten4d3d(targets) + + (encoder_input, encoder_attention_bias, + _) = transformer.transformer_prepare_encoder(inputs, target_space, hparams) + (decoder_input, + decoder_self_attention_bias) = transformer.transformer_prepare_decoder( + targets, hparams) + + # We need masks of the form batch size x input sequences + # Biases seem to be of the form batch_size x 1 x input sequences x vec dim + # Squeeze out dim one, and get the first element of each vector. + encoder_mask = tf.squeeze(encoder_attention_bias, [1])[:, :, 0] + decoder_mask = tf.squeeze(decoder_self_attention_bias, [1])[:, :, 0] + + def residual_fn(x, y): + return common_layers.layer_norm(x + tf.nn.dropout( + y, 1.0 - hparams.residual_dropout)) + + encoder_input = tf.nn.dropout(encoder_input, 1.0 - hparams.residual_dropout) + decoder_input = tf.nn.dropout(decoder_input, 1.0 - hparams.residual_dropout) + encoder_output = alt_transformer_encoder( + encoder_input, residual_fn, encoder_mask, hparams) + + decoder_output = alt_transformer_decoder( + decoder_input, encoder_output, residual_fn, decoder_mask, + encoder_attention_bias, hparams) + + decoder_output = tf.expand_dims(decoder_output, 2) + + return decoder_output + + +def composite_layer(inputs, mask, hparams): + """Composite layer.""" + x = inputs + + # Applies ravanbakhsh on top of each other. + if hparams.composite_layer_type == "ravanbakhsh": + for layer in xrange(hparams.layers_per_layer): + with tf.variable_scope(".%d" % layer): + x = common_layers.ravanbakhsh_set_layer( + hparams.hidden_size, + x, + mask=mask, + dropout=0.0) + + # Transforms elements to get a context, and then uses this in a final layer. + elif hparams.composite_layer_type == "reembedding": + # Transform elements n times and then pool. + for layer in xrange(hparams.layers_per_layer): + with tf.variable_scope(".%d" % layer): + x = common_layers.linear_set_layer( + hparams.hidden_size, + x, + dropout=0.0) + context = common_layers.global_pool_1d(x, mask=mask) + + # Final layer. + x = common_layers.linear_set_layer( + hparams.hidden_size, + x, + context=context, + dropout=0.0) + + return x + + +def alt_transformer_encoder(encoder_input, + residual_fn, + mask, + hparams, + name="encoder"): + """Alternative encoder.""" + x = encoder_input + + with tf.variable_scope(name): + for layer in xrange(hparams.num_hidden_layers): + with tf.variable_scope("layer_%d" % layer): + x = residual_fn(x, composite_layer(x, mask, hparams)) + + return x + + +def alt_transformer_decoder(decoder_input, + encoder_output, + residual_fn, + mask, + encoder_decoder_attention_bias, + hparams, + name="decoder"): + """Alternative decoder.""" + x = decoder_input + + # Summaries don't work in multi-problem setting yet. + summaries = "problems" not in hparams.values() or len(hparams.problems) == 1 + with tf.variable_scope(name): + for layer in xrange(hparams.num_hidden_layers): + with tf.variable_scope("layer_%d" % layer): + + x_ = common_attention.multihead_attention( + x, + encoder_output, + encoder_decoder_attention_bias, + hparams.attention_key_channels or hparams.hidden_size, + hparams.attention_value_channels or hparams.hidden_size, + hparams.hidden_size, + hparams.num_heads, + hparams.attention_dropout, + summaries=summaries, + name="encdec_attention") + + x_ = residual_fn(x_, composite_layer(x_, mask, hparams)) + x = residual_fn(x, x_) + + return x + + +@registry.register_hparams +def transformer_alt(): + """Set of hyperparameters.""" + hparams = transformer.transformer_base() + hparams.batch_size = 64 + hparams.add_hparam("layers_per_layer", 4) + hparams.add_hparam("composite_layer_type", "reembedding") + return hparams From 9efa4e2f41c19aaa99d25a00caa9a7189ff2ebcf Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Tue, 11 Jul 2017 18:23:35 -0700 Subject: [PATCH 0102/4095] Fix T2T open-source distributed training PiperOrigin-RevId: 161612632 --- tensor2tensor/bin/t2t-make-tf-configs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tensor2tensor/bin/t2t-make-tf-configs b/tensor2tensor/bin/t2t-make-tf-configs index 049e80ef3..ac899b628 100644 --- a/tensor2tensor/bin/t2t-make-tf-configs +++ b/tensor2tensor/bin/t2t-make-tf-configs @@ -75,7 +75,8 @@ def main(_): "task": { "type": task_type, "index": idx - } + }, + "environment": "cloud", }) print("'%s'\t%s" % (tf_config, cmd_line_flags)) From 64defb7da702603b04f7dfae5d215eb671553059 Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Tue, 11 Jul 2017 18:27:00 -0700 Subject: [PATCH 0103/4095] Fix T2T open-source distributed training PiperOrigin-RevId: 161612824 --- setup.py | 8 ++++++-- tensor2tensor/bin/t2t-make-tf-configs | 2 +- tensor2tensor/docs/distributed_training.md | 2 +- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/setup.py b/setup.py index 821a88ee2..b70966986 100644 --- a/setup.py +++ b/setup.py @@ -5,14 +5,18 @@ setup( name='tensor2tensor', - version='1.0.12', + version='1.0.13', description='Tensor2Tensor', author='Google Inc.', author_email='no-reply@google.com', url='http://github.com/tensorflow/tensor2tensor', license='Apache 2.0', packages=find_packages(), - scripts=['tensor2tensor/bin/t2t-trainer', 'tensor2tensor/bin/t2t-datagen'], + scripts=[ + 'tensor2tensor/bin/t2t-trainer', + 'tensor2tensor/bin/t2t-datagen', + 'tensor2tensor/bin/t2t-make-tf-configs', + ], install_requires=[ 'numpy', 'sympy', diff --git a/tensor2tensor/bin/t2t-make-tf-configs b/tensor2tensor/bin/t2t-make-tf-configs index ac899b628..ae87ffbd8 100644 --- a/tensor2tensor/bin/t2t-make-tf-configs +++ b/tensor2tensor/bin/t2t-make-tf-configs @@ -17,7 +17,7 @@ Usage: -`make_tf_configs.py --workers="server1:1234" --ps="server3:2134,server4:2334"` +`t2t-make-tf-configs --workers="server1:1234" --ps="server3:2134,server4:2334"` Outputs 1 line per job to stdout, first the workers, then the parameter servers. Each line has the TF_CONFIG, then a tab, then the command line flags for that diff --git a/tensor2tensor/docs/distributed_training.md b/tensor2tensor/docs/distributed_training.md index 4523a4b32..f41197fc4 100644 --- a/tensor2tensor/docs/distributed_training.md +++ b/tensor2tensor/docs/distributed_training.md @@ -51,7 +51,7 @@ Parameter servers only need `--schedule=run_std_server`. ## Utility to produce `TF_CONFIG` and flags -[`bin/make_tf_configs.py`](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/bin/t2t-make-tf-configs)) +[`t2t-make-tf-configs`](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/bin/t2t-make-tf-configs)) generates the `TF_CONFIG` json strings and the above-mentioned command-line flags for the workers and parameter servers. From 235392c75ddceb45acf3c785fcedac67224ff4e4 Mon Sep 17 00:00:00 2001 From: Shanbo Cheng <cshanbo@gmail.com> Date: Wed, 12 Jul 2017 13:20:41 +0800 Subject: [PATCH 0104/4095] add wmt_zhen_token_32k --- tensor2tensor/bin/t2t-datagen | 6 ++ .../data_generators/problem_hparams.py | 28 ++++++++ tensor2tensor/data_generators/wmt.py | 66 +++++++++++++++++++ 3 files changed, 100 insertions(+) diff --git a/tensor2tensor/bin/t2t-datagen b/tensor2tensor/bin/t2t-datagen index 0367fce94..4396d91dd 100644 --- a/tensor2tensor/bin/t2t-datagen +++ b/tensor2tensor/bin/t2t-datagen @@ -140,6 +140,12 @@ _SUPPORTED_PROBLEM_GENERATORS = { lambda: wmt.ende_wordpiece_token_generator(FLAGS.tmp_dir, True, 2**15), lambda: wmt.ende_wordpiece_token_generator(FLAGS.tmp_dir, False, 2**15) ), + "wmt_zhen_tokens_32k": ( + lambda: wmt.zhen_wordpiece_token_generator(FLAGS.tmp_dir, True, + 2**15, 2**15), + lambda: wmt.zhen_wordpiece_token_generator(FLAGS.tmp_dir, False, + 2**15, 2**15) + ), "lm1b_32k": ( lambda: lm1b.generator(FLAGS.tmp_dir, True), lambda: lm1b.generator(FLAGS.tmp_dir, False) diff --git a/tensor2tensor/data_generators/problem_hparams.py b/tensor2tensor/data_generators/problem_hparams.py index 203dba852..84eb72a56 100644 --- a/tensor2tensor/data_generators/problem_hparams.py +++ b/tensor2tensor/data_generators/problem_hparams.py @@ -177,6 +177,7 @@ def default_problem_hparams(): # 13: Audio spectral domain # 14: Parse characters # 15: Parse tokens + # 16: Chinese tokens # Add more above if needed. input_space_id=0, target_space_id=0, @@ -472,6 +473,32 @@ def wmt_ende_tokens(model_hparams, wrong_vocab_size): return p +def wmt_zhen_tokens(model_hparams, wrong_vocab_size): + """Chinese to English translation benchmark.""" + p = default_problem_hparams() + # This vocab file must be present within the data directory. + if model_hparams.shared_embedding_and_softmax_weights == 1: + model_hparams.shared_embedding_and_softmax_weights = 0 + source_vocab_filename = os.path.join(model_hparams.data_dir, + "tokens.vocab.zh.%d" % wrong_vocab_size) + target_vocab_filename = os.path.join(model_hparams.data_dir, + "tokens.vocab.en.%d" % wrong_vocab_size) + source_token = text_encoder.SubwordTextEncoder(source_vocab_filename) + target_token = text_encoder.SubwordTextEncoder(target_vocab_filename) + p.input_modality = { + "inputs": (registry.Modalities.SYMBOL, source_token.vocab_size) + } + p.target_modality = (registry.Modalities.SYMBOL, target_token.vocab_size) + p.vocabulary = { + "inputs": source_token, + "targets": target_token, + } + p.loss_multiplier = 1.4 + p.input_space_id = 16 + p.target_space_id = 4 + return p + + def wmt_ende_v2(model_hparams, vocab_size): """English to German translation benchmark with separate vocabularies.""" p = default_problem_hparams() @@ -730,6 +757,7 @@ def img2img_imagenet(unused_model_hparams): "wmt_ende_bpe32k_160": wmt_ende_bpe32k, "wmt_ende_v2_32k_combined": lambda p: wmt_ende_v2(p, 2**15), "wmt_ende_v2_16k_combined": lambda p: wmt_ende_v2(p, 2**14), + "wmt_zhen_tokens_32k": lambda p: wmt_zhen_tokens(p, 2**15), "image_cifar10_tune": image_cifar10, "image_cifar10_test": image_cifar10, "image_mnist_tune": image_mnist, diff --git a/tensor2tensor/data_generators/wmt.py b/tensor2tensor/data_generators/wmt.py index 1937e1b71..534f0d3f1 100644 --- a/tensor2tensor/data_generators/wmt.py +++ b/tensor2tensor/data_generators/wmt.py @@ -101,6 +101,38 @@ def token_generator(source_path, target_path, token_vocab, eos=None): source, target = source_file.readline(), target_file.readline() +def bi_vocabs_token_generator(source_path, target_path, + source_token_vocab, + target_token_vocab, + eos=None): + """Generator for sequence-to-sequence tasks that uses tokens. + + This generator assumes the files at source_path and target_path have + the same number of lines and yields dictionaries of "inputs" and "targets" + where inputs are token ids from the " "-split source (and target, resp.) lines + converted to integers using the token_map. + + Args: + source_path: path to the file with source sentences. + target_path: path to the file with target sentences. + source_token_vocab: text_encoder.TextEncoder object. + target_token_vocab: text_encoder.TextEncoder object. + eos: integer to append at the end of each sequence (default: None). + + Yields: + A dictionary {"inputs": source-line, "targets": target-line} where + the lines are integer lists converted from tokens in the file lines. + """ + eos_list = [] if eos is None else [eos] + with tf.gfile.GFile(source_path, mode="r") as source_file: + with tf.gfile.GFile(target_path, mode="r") as target_file: + source, target = source_file.readline(), target_file.readline() + while source and target: + source_ints = source_token_vocab.encode(source.strip()) + eos_list + target_ints = target_token_vocab.encode(target.strip()) + eos_list + yield {"inputs": source_ints, "targets": target_ints} + source, target = source_file.readline(), target_file.readline() + def _get_wmt_ende_dataset(directory, filename): """Extract the WMT en-de corpus `filename` to directory unless it's there.""" train_path = os.path.join(directory, filename) @@ -177,6 +209,21 @@ def ende_bpe_token_generator(tmp_dir, train): ], ] +_ZHEN_TRAIN_DATASETS = [ + [ + "http://data.statmt.org/wmt17/translation-task/training-parallel-nc-v12.tgz", + ("training/news-commentary-v12.zh-en.zh", + "training/news-commentary-v12.zh-en.en") + ] +] + +_ZHEN_TEST_DATASETS = [ + [ + "http://data.statmt.org/wmt17/translation-task/dev.tgz", + ("dev/newsdev2017-zhen-src.zh", + "dev/newsdev2017-zhen-ref.en") + ] +] def _compile_data(tmp_dir, datasets, filename): """Concatenate all `datasets` and save to `filename`.""" @@ -253,6 +300,25 @@ def ende_character_generator(tmp_dir, train): character_vocab, EOS) +def zhen_wordpiece_token_generator(tmp_dir, train, + source_vocab_size, + target_vocab_size): + datasets = _ZHEN_TRAIN_DATASETS if train else _ZHEN_TEST_DATASETS + source_datasets = [[item[0], [item[1][0]]] for item in datasets] + target_datasets = [[item[0], [item[1][1]]] for item in datasets] + source_vocab = generator_utils.get_or_generate_vocab( + tmp_dir, "tokens.vocab.zh.%d" % source_vocab_size, + source_vocab_size, source_datasets) + target_vocab = generator_utils.get_or_generate_vocab( + tmp_dir, "tokens.vocab.en.%d" % target_vocab_size, + target_vocab_size, target_datasets) + tag = "train" if train else "dev" + data_path = _compile_data(tmp_dir, datasets, "wmt_zhen_tok_%s" % tag) + return bi_vocabs_token_generator(data_path + ".lang1", + data_path + ".lang2", + source_vocab, target_vocab, EOS) + + def enfr_wordpiece_token_generator(tmp_dir, train, vocab_size): """Instance of token generator for the WMT en->fr task.""" symbolizer_vocab = generator_utils.get_or_generate_vocab( From b1ad6f076c734f9b4823af7f05f1ca0d643afd83 Mon Sep 17 00:00:00 2001 From: Deasuke <hi-watanabe@rozetta.jp> Date: Wed, 12 Jul 2017 06:34:41 +0000 Subject: [PATCH 0105/4095] replace unicode with six.text_type --- tensor2tensor/data_generators/text_encoder.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensor2tensor/data_generators/text_encoder.py b/tensor2tensor/data_generators/text_encoder.py index 5d628fa4a..4184a7e87 100644 --- a/tensor2tensor/data_generators/text_encoder.py +++ b/tensor2tensor/data_generators/text_encoder.py @@ -473,7 +473,7 @@ def _escape_token(self, token): Returns: escaped_token: a unicode string """ - assert isinstance(token, unicode) + assert isinstance(token, six.text_type) token = token.replace(u"\\", u"\\\\").replace(u"_", u"\\u") + u"_" ret = u"" for c in token: From 89ddffe054f25b5cea3d4c6d543562d51d511099 Mon Sep 17 00:00:00 2001 From: Deasuke <hi-watanabe@rozetta.jp> Date: Wed, 12 Jul 2017 07:13:50 +0000 Subject: [PATCH 0106/4095] use decode instead of unicode in PY3 --- tensor2tensor/data_generators/wiki.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tensor2tensor/data_generators/wiki.py b/tensor2tensor/data_generators/wiki.py index 5ccbf14d9..99a9e64e6 100644 --- a/tensor2tensor/data_generators/wiki.py +++ b/tensor2tensor/data_generators/wiki.py @@ -25,6 +25,7 @@ # Dependency imports import six +from six import PY2 from tensor2tensor.data_generators import generator_utils from tensor2tensor.data_generators import text_encoder from tensor2tensor.data_generators import tokenizer @@ -60,7 +61,7 @@ def page_generator(tmp_dir, max_docs=None): count = 0 corpus_filepath = _maybe_download_corpus(tmp_dir) for line in bz2.BZ2File(corpus_filepath, "r"): - line = unicode(line, "utf-8") + line = unicode(line, "utf-8") if PY2 else line.decode("utf-8") if not doc and line != u" <page>\n": continue doc += line From 7d3c10b4da94a38a9a731180d6c4c07773ccce13 Mon Sep 17 00:00:00 2001 From: ReDeiPirati <alessio.gozzoli@gmail.com> Date: Wed, 12 Jul 2017 12:55:53 +0200 Subject: [PATCH 0107/4095] Integrate Yellow Optimizer(with test) in T2T(Unfortunately there is a bug in TF which do not ensure a smooth running). --- tensor2tensor/models/common_hparams.py | 7 +- tensor2tensor/utils/trainer_utils.py | 5 + tensor2tensor/utils/yellowfin.py | 568 +++++++++++++++++++++++++ tensor2tensor/utils/yellowfin_test.py | 226 ++++++++++ 4 files changed, 805 insertions(+), 1 deletion(-) create mode 100644 tensor2tensor/utils/yellowfin.py create mode 100644 tensor2tensor/utils/yellowfin_test.py diff --git a/tensor2tensor/models/common_hparams.py b/tensor2tensor/models/common_hparams.py index 9bb3af4eb..c8c458414 100644 --- a/tensor2tensor/models/common_hparams.py +++ b/tensor2tensor/models/common_hparams.py @@ -202,4 +202,9 @@ def basic_range1(ranged_hparams): rhp.set_float("optimizer_adam_beta1", 0.8, 0.9) rhp.set_float("optimizer_adam_beta2", 0.995, 0.999) rhp.set_categorical("optimizer", - ["Adam", "Adagrad", "Momentum", "RMSProp", "SGD"]) + ["Adam", + "Adagrad", + "Momentum", + "RMSProp", + "SGD", + "YellowFin"]) diff --git a/tensor2tensor/utils/trainer_utils.py b/tensor2tensor/utils/trainer_utils.py index 75883accd..057612ecb 100644 --- a/tensor2tensor/utils/trainer_utils.py +++ b/tensor2tensor/utils/trainer_utils.py @@ -43,6 +43,7 @@ import tensorflow as tf from tensorflow.contrib.learn.python.learn import learn_runner from tensorflow.python.ops import init_ops +from tensor2tensor.utils.yellowfin import YellowFinOptimizer # Number of samples to draw for an image input (in such cases as captioning) IMAGE_DECODE_LENGTH = 100 @@ -1141,6 +1142,10 @@ def __init__(self, optimizer_name, lr, hparams): elif optimizer_name == "Momentum": self._opt = tf.train.MomentumOptimizer( lr, momentum=hparams.optimizer_momentum_momentum) + elif optimizer_name == "YellowFin": + tf.logging.info("Init YellowFin Optimizer.") + self._opt = YellowFinOptimizer( + learning_rate=lr, momentum=hparams.optimizer_momentum_momentum) else: self._opt = tf.contrib.layers.OPTIMIZER_CLS_NAMES[optimizer_name](lr) diff --git a/tensor2tensor/utils/yellowfin.py b/tensor2tensor/utils/yellowfin.py new file mode 100644 index 000000000..b5cedf21b --- /dev/null +++ b/tensor2tensor/utils/yellowfin.py @@ -0,0 +1,568 @@ +# MIT License +# +# Copyright (c) 2017 JianGoForIt +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +"""YellowFin for TensorFlow.""" +from __future__ import print_function + +import numpy as np +from math import ceil, floor +import tensorflow as tf +from tensorflow.python.training import momentum +from tensorflow.python.ops import variable_scope +from tensorflow.python.ops import variables +from tensorflow.python.ops import state_ops +from tensorflow.python.framework import ops + + +# Values for gate_gradients. +GATE_NONE = 0 +GATE_OP = 1 +GATE_GRAPH = 2 + + +class YellowFinOptimizer(tf.train.Optimizer): + """Optimizer that implements the YellowFin algorithm. + See [Zhang et. al., 2017](https://arxiv.org/abs/1706.03471) + ([pdf](https://arxiv.org/pdf/1706.03471.pdf)). + """ + + def __init__(self, + learning_rate=1.0, + momentum=0.0, + clip_thresh=None, + beta=0.999, + curvature_window_width=20, + zero_debias=True, + delta_mu=0.0): + """Construct a new YellowFin optimizer. + + Args: + learning_rate: A Tensor or a floating point value. The learning rate. + momentum: A Tensor or a floating point value. The momentum. + clip_thresh: A Tensor or a floating point value. The cliping threshold for + tf.clip_by_global_norm. If None, no clipping will be carried out. + beta: A float value or a constant float tensor. The smoothing parameter + for estimations. + curvature_window_width: A int value or a constant int tensor. + The curvature window width. + zero_debias: A boolean, zero debias moving-averages. + delta_mu: For extensions. Not necessary in the basic use. + + Note: + clip_thresh is the threshold value on ||lr * gradient||, + delta_mu can be place holder/variable/tensor scalar. + They are used for additional momentum in situations such as + asynchronous-parallel training. + The default is 0.0(or None) for basic usage of the optimizer. + + Other features: + If you want to manually control the learning rates, self.lr_factor is + an interface to the outside, it is an multiplier for the internal + learning rate in YellowFin. It is helpful when you want to do additional + hand tuning or some decaying scheme to the tuned learning rate in + YellowFin. + Example on using lr_factor can be found here: + https://github.com/JianGoForIt/YellowFin/blob/master/char-rnn-tensorflow/train_YF.py#L140 + """ + # Set lr and mu + self._lr = learning_rate + self._mu = momentum + + # Set lr and mu tensor + self._lr_var = tf.Variable(learning_rate, + dtype=tf.float32, + name="YF_lr", + trainable=False) + self._mu_var = tf.Variable(momentum, + dtype=tf.float32, + name="YF_mu", + trainable=False) + + # Tuning factor for learning rates step or decaying scheme + self.lr_factor = tf.Variable(1.0, + dtype=tf.float32, + name="YF_lr_factor", + trainable=False) + + # Gradient Clipping Threshold + if clip_thresh is not None: + self._clip_thresh_var = tf.Variable(clip_thresh, + dtype=tf.float32, + name="YF_clip_thresh", + trainable=False) + else: + self._clip_thresh_var = None + + # Set initial lr and mu for momentum + self._lr_m = self._lr_var * self.lr_factor + self._mu_m = self._mu_var + delta_mu + + # Init momentum optimizer + self._momentum_optimizer = \ + tf.train.MomentumOptimizer(self._lr_m, self._mu_m) + + # Moving average for statistics + self._beta = beta + self._moving_averager = None + + # Step counting + self._step = tf.Variable(0, + dtype=tf.int32, + name="YF_step", + trainable=False) + # YF_step + 1 op + self._increment_step_op = None + + # For conditional tuning + self._do_tune = tf.greater(self._step, tf.constant(0)) + + # Moving-averages + self._zero_debias = zero_debias + + # For curvature range + self.curvature_window_width = curvature_window_width + self._curv_win = None + + # Gradients and Variables + self._grad = None + self._vars = None + + # Get per var g**2, norm**2 and mean(norm**2) + self._grad_squared = None + self._grad_norm_squared = None + self._grad_norm_squared_avg = None + + # Mean(grad) and Mean(grad**2) to compute Variance + self._grad_avg = None + self._grad_avg_squared = None + + # Max and Min curvature variations + self._h_max_t = None + self._h_min_t = None + self._h_min = None + self._h_max = None + + # Gradient Expected Variance + self._grad_var = None + + # Gradient Norm and Mean(Gradient Norm) + self._grad_norm = None + self._grad_norm_avg = None + + # Distance to optimum and Mean(Distance to optimum) + self._d_t = None + self._dist_to_opt_avg = None + + # Maintains moving averages of variables + # by employing an exponential decay(Beta), + # and (zero_devias) moving-averages. + self._moving_averager = None + + + def _curvature_range(self): + """Curvature range + + Returns: + h_max_t, h_min_t ops + """ + self._curv_win = \ + tf.Variable(np.zeros([self.curvature_window_width, ]), + dtype=tf.float32, + name="curv_win", + trainable=False) + + self._curv_win = \ + tf.scatter_update(self._curv_win, + self._step % self.curvature_window_width, + self._grad_norm_squared) + # Note here the iterations start from iteration 0 + valid_window = tf.slice(self._curv_win, + tf.constant([0, ]), + tf.expand_dims( + tf.minimum( + tf.constant(self.curvature_window_width), + self._step + 1), + dim=0)) + self._h_min_t = tf.reduce_min(valid_window) + self._h_max_t = tf.reduce_max(valid_window) + + curv_range_ops = [] + with tf.control_dependencies([self._h_min_t, self._h_max_t]): + avg_op = self._moving_averager.apply([self._h_min_t, self._h_max_t]) + with tf.control_dependencies([avg_op]): + self._h_min = tf.identity(self._moving_averager.average(self._h_min_t)) + self._h_max = tf.identity(self._moving_averager.average(self._h_max_t)) + curv_range_ops.append(avg_op) + return curv_range_ops # h_max_t, h_min_t + + + def _grad_variance(self): + """Estimate of gradient Variance + + Returns: + C_t ops + """ + grad_var_ops = [] + tensor_to_avg = [] + for t, g in zip(self._vars, self._grad): + if isinstance(g, ops.IndexedSlices): + tensor_to_avg.append( \ + tf.reshape(tf.unsorted_segment_sum(g.values, + g.indices, + g.dense_shape[0]), + shape=t.get_shape())) + else: + tensor_to_avg.append(g) + avg_op = self._moving_averager.apply(tensor_to_avg) + grad_var_ops.append(avg_op) + with tf.control_dependencies([avg_op]): + self._grad_avg = [self._moving_averager.average(val) + for val in tensor_to_avg] + self._grad_avg_squared = [tf.square(val) for val in self._grad_avg] + self._grad_avg_squared = tf.add_n([tf.reduce_sum(val) + for val in self._grad_avg_squared]) + # Compute Variance + self._grad_var = self._grad_norm_squared_avg - self._grad_avg_squared + return grad_var_ops # C_t + + + def _dist_to_opt(self): + """Distance to optimum + + Returns: + D_t ops + """ + dist_to_opt_ops = [] + # Running average of the norm of gradeint + self._grad_norm = tf.sqrt(self._grad_norm_squared) + avg_op = self._moving_averager.apply([self._grad_norm, ]) + dist_to_opt_ops.append(avg_op) + with tf.control_dependencies([avg_op]): + self._grad_norm_avg = self._moving_averager.average(self._grad_norm) + # Single iteration distance estimation, note here + # self._grad_norm_avg is per variable + self._d_t = self._grad_norm_avg / self._grad_norm_squared_avg + # Running average of distance + avg_op = self._moving_averager.apply([self._d_t]) + dist_to_opt_ops.append(avg_op) + with tf.control_dependencies([avg_op]): + self._dist_to_opt_avg = \ + tf.identity(self._moving_averager.average(self._d_t)) + return dist_to_opt_ops # D_t + + + def _prepare_variables(self): + """Prepare Variables for YellowFin + + Returns: + Grad**2, Norm, Norm**2, Mean(Norm**2) ops + """ + self._moving_averager = \ + tf.train.ExponentialMovingAverage(decay=self._beta, + zero_debias=self._zero_debias) + assert self._grad != None and len(self._grad) > 0 + # List for the returned Operations + prepare_variables_op = [] + + # Get per var g**2 and norm**2 + self._grad_squared = [] + self._grad_norm_squared = [] + + # Gradient squared + for v, g in zip(self._vars, self._grad): + if g is None: continue + with ops.colocate_with(v): + self._grad_squared.append(tf.square(g)) + + # Norm squared + self._grad_norm_squared = [tf.reduce_sum(g_sq) \ + for g_sq in self._grad_squared] + + # The following running average on squared norm of gradient + # is shared by grad_var and dist_to_opt + avg_op = self._moving_averager.apply(self._grad_norm_squared) + + with tf.control_dependencies([avg_op]): + self._grad_norm_squared_avg = \ + [self._moving_averager.average(val) for val in self._grad_norm_squared] + self._grad_norm_squared = tf.add_n(self._grad_norm_squared) + self._grad_norm_squared_avg = tf.add_n(self._grad_norm_squared_avg) + + prepare_variables_op.append(avg_op) + return tf.group(*prepare_variables_op) + + + def _get_lr_tensor(self): + """Get lr minimzing the surrogate + + Returns: + lr_t + """ + lr = (1.0 - tf.sqrt(self._mu) )**2 / self._h_min + return lr + + + def _get_mu_tensor(self): + """Get the min mu which minimize the surrogate + + Returns: + mu_t + """ + const_fact = self._dist_to_opt_avg**2 * self._h_min**2 / 2 / self._grad_var + coef = tf.Variable([-1.0, 3.0, 0.0, 1.0], + dtype=tf.float32, + name="cubic_solver_coef") + coef = tf.scatter_update(coef, + tf.constant(2), + -(3 + const_fact)) + roots = tf.py_func(np.roots, + [coef], + Tout=tf.complex64, + stateful=False) + + # Filter out the correct root + root_idx = \ + tf.logical_and( + tf.logical_and( + tf.greater(tf.real(roots), tf.constant(0.0)), + tf.less(tf.real(roots), tf.constant(1.0))), + tf.less(tf.abs(tf.imag(roots)), 1e-5)) + + # In case there are two duplicated roots satisfying the above condition + root = tf.reshape(tf.gather(tf.gather(roots, tf.where(root_idx)), + tf.constant(0)), + shape=[]) + + # Never Evaluated + #tf.assert_equal(tf.size(root), tf.constant(1)) + + dr = self._h_max / self._h_min + mu = tf.maximum(tf.real(root)**2, ((tf.sqrt(dr) - 1)/(tf.sqrt(dr) + 1))**2) + return mu + + + def _yellowfin(self): + """YellowFin auto-tuning optimizer based on momentum SGD + + Returns: + YF ops + (Curvature range, + Grad_variance, + Dist_to_opt, + Single-Step, + Auto-Tuning) + """ + # List for the returned Operations + yellowfin_ops = [] + + # Curvature range ops + curv_range_ops = self._curvature_range() + yellowfin_ops += curv_range_ops + # Estimate of gradient Variance ops + grad_var_ops = self._grad_variance() + yellowfin_ops += grad_var_ops + # Distance to optimum ops + dist_to_opt_ops = self._dist_to_opt() + yellowfin_ops += dist_to_opt_ops + + # Single-Step: minimizes the surrogate for the expected + # squared distance from the optimum of a local quadratic + # approximation after a single step while keeping all directions in the + # robust region. + self._mu = \ + tf.identity(tf.cond(self._do_tune, lambda: self._get_mu_tensor(), + lambda: self._mu_var)) + with tf.control_dependencies([self._mu]): + self._lr = \ + tf.identity(tf.cond(self._do_tune, lambda: self._get_lr_tensor(), + lambda: self._lr_var)) + + # Tune learning rate and momentum + with tf.control_dependencies([self._mu, self._lr]): + self._mu = self._beta * self._mu_var + (1 - self._beta) * self._mu + self._lr = self._beta * self._lr_var + (1 - self._beta) * self._lr + yellowfin_ops.append(tf.assign(self._mu_var, self._mu)) + yellowfin_ops.append(tf.assign(self._lr_var, self._lr)) + + yellowfin_ops = tf.group(*yellowfin_ops) + return yellowfin_ops + + + def apply_gradients(self, grads_and_vars, global_step=None, name=None): + """Applying gradients aand tune hyperparams with YellowFin + + Args: + grads_and_vars: List of (gradient, variable) pairs as returned by + compute_gradients(). + global_step: Optional Variable to increment by one after the + variables have been updated. + name: Optional name for the returned operation. Default to the + name passed to the Optimizer constructor. + + Returns: + (A group of operations) + Variable Update with Momentum ops, + YellowFin ops(Curvature, Variance, Distance) ops, + SingleStep and lr_mu tuning ops, + Step increment ops. + + """ + self._grad, self._vars = zip(*[(g, t) + for g, t in grads_and_vars if g is not None]) + + # Var Update with Momentum + with tf.variable_scope("apply_updates"): + # Gradient Clipping? + if self._clip_thresh_var is not None: + self._grads_clip, self._grads_norm = \ + tf.clip_by_global_norm(self._grad, self._clip_thresh_var) + + apply_grad_op = \ + self._momentum_optimizer.apply_gradients( \ + zip(self._grads_clip, self._vars), global_step=global_step) + else: + apply_grad_op = \ + self._momentum_optimizer.apply_gradients( \ + zip(self._grad, self._vars), global_step=global_step) + + # Begin lr and mu tuning + with tf.variable_scope("prepare_yellowFin_variables"): + prepare_variables_op = self._prepare_variables() + + with tf.variable_scope("yellowfin"): + with tf.control_dependencies([prepare_variables_op]): + yellowfin_op = self._yellowfin() + + # Update YellowFin step variable + with tf.control_dependencies([yellowfin_op]): + self._increment_step_op = state_ops.assign_add(self._step, 1).op + + # # Global_step variable Update. Commented because the update is made by self._momentum_optimizer + # if global_step is not None: + # with tf.control_dependencies([yellowfin_op]): + # with ops.colocate_with(global_step): + # global_step_op = state_ops.assign_add(global_step, 1).op + + return tf.group(apply_grad_op, + prepare_variables_op, + yellowfin_op, + self._increment_step_op) + # global_step_op) + + + def compute_gradients(self, + loss, + var_list, + global_step=None, + gate_gradients=GATE_OP, + aggregation_method=None, + colocate_gradients_with_ops=False, + name=None, + grad_loss=None): + """Compute gradients through momentum optimizer + + Args: + loss: A Tensor containing the value to minimize. + var_list: Optional list or tuple of tf.Variable to update + to minimize loss. Defaults to the list of variables collected + in the graph under the key GraphKey.TRAINABLE_VARIABLES. + global_step: Optional Variable to increment by one after the + variables have been updated. + gate_gradients: How to gate the computation of gradients. + Can be GATE_NONE, GATE_OP, or GATE_GRAPH. + aggregation_method: Specifies the method used to combine + gradient terms. Valid values are defined in the class AggregationMethod. + colocate_gradients_with_ops: If True, try colocating gradients with + the corresponding op. + name: Optional name for the returned operation. Default to the name + passed to the Optimizer constructor. + grad_loss: Optional. A Tensor holding the gradient computed for loss. + + Returns: + A list of (gradient, variable) pairs. Variable is always present, + but gradient can be None. + """ + return self._momentum_optimizer.compute_gradients( \ + loss, + var_list=var_list, + gate_gradients=gate_gradients, + aggregation_method=aggregation_method, + colocate_gradients_with_ops=colocate_gradients_with_ops, + grad_loss=grad_loss) + + + def minimize(self, + loss, + global_step=None, + var_list=None, + gate_gradients=GATE_OP, + aggregation_method=None, + colocate_gradients_with_ops=False, + name=None, + grad_loss=None): + """Adapted from Tensorflow Optimizer base class member function: + Add operations to minimize `loss` by updating `var_list`. + This method simply combines calls `compute_gradients()` and + `apply_gradients()`. If you want to process the gradient before applying + them call `tf.gradients()` and `self.apply_gradients()` explicitly instead + of using this function. + + Args: + loss: A Tensor containing the value to minimize. + global_step: Optional Variable to increment by one after the variables + have been updated. + var_list: Optional list or tuple of Variable objects to update to + minimize loss. Defaults to the list of variables collected in + the graph under the key GraphKeys.TRAINABLE_VARIABLES. + gate_gradients: How to gate the computation of gradients. + Can be GATE_NONE, GATE_OP, or GATE_GRAPH. + aggregation_method: Specifies the method used to combine gradient terms. + Valid values are defined in the class AggregationMethod. + colocate_gradients_with_ops: If True, try colocating gradients with + the corresponding op. + name: Optional name for the returned operation. + grad_loss: Optional. A Tensor holding the gradient computed for loss. + + Returns: + An Operation that updates the variables in var_list. + If global_step was not None, that operation also increments global_step. + """ + grads_and_vars = \ + self._optimizer.compute_gradients( \ + loss, + var_list=var_list, + gate_gradients=gate_gradients, + aggregation_method=aggregation_method, + colocate_gradients_with_ops=colocate_gradients_with_ops, + grad_loss=grad_loss) + + vars_with_grad = [v for g, v in grads_and_vars if g is not None] + if not vars_with_grad: + raise ValueError( + "No gradients provided for any variable, check your graph for ops" + " that do not support gradients, between variables %s and loss %s." % + ([str(v) for _, v in grads_and_vars], loss)) + for g, v in grads_and_vars: + print("g ", g) + print("v ", v) + + return self.apply_gradients(grads_and_vars, global_step=global_step) diff --git a/tensor2tensor/utils/yellowfin_test.py b/tensor2tensor/utils/yellowfin_test.py new file mode 100644 index 000000000..c4a318990 --- /dev/null +++ b/tensor2tensor/utils/yellowfin_test.py @@ -0,0 +1,226 @@ +# MIT License +# +# Copyright (c) 2017 JianGoForIt +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +"""YellowFin Test Module for TensorFlow.""" + +#import os +# os.environ['TF_CPP_MIN_LOG_LEVEL']='2' +import tensorflow as tf +import numpy as np +from tensor2tensor.utils.yellowfin import YellowFinOptimizer +from tensorflow.python.ops import variables +import time + + +n_dim = 1000000 +n_iter = 50 + +class TrainerUtilsTest(tf.test.TestCase): + + def tuneEverything(self, x0squared, C, T, gmin, gmax): + # First tune based on dynamic range + if C == 0: + dr = gmax / gmin + mustar = ((np.sqrt(dr) - 1) / (np.sqrt(dr) + 1))**2 + alpha_star = (1 + np.sqrt(mustar))**2/gmax + + return alpha_star, mustar + + dist_to_opt = x0squared + grad_var = C + max_curv = gmax + min_curv = gmin + const_fact = dist_to_opt * min_curv**2 / 2 / grad_var + coef = [-1, 3, -(3 + const_fact), 1] + roots = np.roots(coef) + roots = roots[np.real(roots) > 0] + roots = roots[np.real(roots) < 1] + root = roots[np.argmin(np.imag(roots))] + + assert root > 0 and root < 1 and np.absolute(root.imag) < 1e-6 + + dr = max_curv / min_curv + assert max_curv >= min_curv + mu = max(((np.sqrt(dr) - 1) / (np.sqrt(dr) + 1))**2, root**2) + + lr_min = (1 - np.sqrt(mu))**2 / min_curv + lr_max = (1 + np.sqrt(mu))**2 / max_curv + + alpha_star = lr_min + mustar = mu + + return alpha_star, mustar + + + def testMeasurement(self): + opt = YellowFinOptimizer(zero_debias=False) + w = tf.Variable(np.ones([n_dim, ]), + dtype=tf.float32, + name="w", + trainable=True) + b = tf.Variable(np.ones([1, ], dtype=np.float32), + dtype=tf.float32, + name="b", + trainable=True) + x = tf.constant(np.ones([n_dim,], dtype=np.float32), + dtype=tf.float32) + loss = tf.multiply(w, x) + b + tvars = tf.trainable_variables() + + w_grad_val = tf.placeholder(tf.float32, shape=(n_dim, )) + b_grad_val = tf.placeholder(tf.float32, shape=(1, )) + apply_op = opt.apply_gradients(zip([w_grad_val, b_grad_val], tvars)) + + init_op = tf.global_variables_initializer() + with tf.Session() as sess: + sess.run(init_op) + target_h_max = 0.0 + target_h_min = 0.0 + g_norm_squared_avg = 0.0 + g_norm_avg = 0.0 + g_avg = 0.0 + target_dist = 0.0 + for i in range(n_iter): + feed_dict = {w_grad_val: (i + 1) * np.ones([n_dim, ], dtype=np.float32), + b_grad_val: (i + 1) * np.ones([1, ], dtype=np.float32)} + res = sess.run([opt._curv_win, + opt._h_max, + opt._h_min, + opt._grad_var, + opt._dist_to_opt_avg, + apply_op], feed_dict=feed_dict) + + g_norm_squared_avg = 0.999 * g_norm_squared_avg \ + + 0.001 * np.sum(((i + 1) * np.ones([n_dim + 1, ]))**2) + g_norm_avg = 0.999 * g_norm_avg \ + + 0.001 * np.linalg.norm((i + 1)*np.ones([n_dim + 1, ])) + g_avg = 0.999 * g_avg + 0.001 * (i + 1) + + target_h_max = 0.999 * target_h_max + 0.001 * (i + 1)**2*(n_dim + 1) + target_h_min = 0.999 * target_h_min + \ + 0.001 * max(1, i + 2 - 20)**2 * (n_dim + 1) + target_var = g_norm_squared_avg - g_avg**2 * (n_dim + 1) + target_dist = 0.999 * target_dist + \ + 0.001 * g_norm_avg / g_norm_squared_avg + + # print "iter ", i, " h max ", res[1], target_h_max, " h min ", res[2], target_h_min, \ + # " var ", res[3], target_var, " dist ", res[4], target_dist + assert np.abs(target_h_max - res[1]) < np.abs(target_h_max) * 1e-3 + assert np.abs(target_h_min - res[2]) < np.abs(target_h_min) * 1e-3 + assert np.abs(target_var - res[3]) < np.abs(res[3]) * 1e-3 + assert np.abs(target_dist - res[4]) < np.abs(res[4]) * 1e-3 + print "[Test-INFO] Sync measurement test passed!" + + + def testLrMu(self): + opt = YellowFinOptimizer(learning_rate=0.5, momentum=0.5, zero_debias=False) + w = tf.Variable(np.ones([n_dim, ]), + dtype=tf.float32, + name="w", + trainable=True) + b = tf.Variable(np.ones([1, ], + dtype=np.float32), + dtype=tf.float32, + name="b", + trainable=True) + x = tf.constant(np.ones([n_dim, ], + dtype=np.float32), + dtype=tf.float32) + loss = tf.multiply(w, x) + b + tvars = tf.trainable_variables() + + w_grad_val = tf.Variable(np.zeros([n_dim, ]), + dtype=tf.float32, + trainable=False) + b_grad_val = tf.Variable(np.zeros([1, ]), + dtype=tf.float32, + trainable=False) + apply_op = opt.apply_gradients(zip([w_grad_val, b_grad_val], tvars)) + + init_op = tf.global_variables_initializer() + with tf.Session() as sess: + sess.run(init_op) + target_h_max = 0.0 + target_h_min = 0.0 + g_norm_squared_avg = 0.0 + g_norm_avg = 0.0 + g_avg = 0.0 + target_dist = 0.0 + target_lr = 0.5 + target_mu = 0.5 + for i in range(n_iter): + + sess.run(tf.assign(w_grad_val, (i + 1) * np.ones([n_dim, ], + dtype=np.float32))) + sess.run(tf.assign(b_grad_val, (i + 1) * np.ones([1, ], + dtype=np.float32))) + + res = sess.run([opt._curv_win, + opt._h_max, + opt._h_min, + opt._grad_var, + opt._dist_to_opt_avg, + opt._lr_var, + opt._mu_var, + apply_op]) + + res[5] = opt._lr_var.eval() + res[6] = opt._mu_var.eval() + + g_norm_squared_avg = 0.999 * g_norm_squared_avg \ + + 0.001 * np.sum(((i + 1) * np.ones([n_dim + 1, ]))**2) + g_norm_avg = 0.999 * g_norm_avg \ + + 0.001 * np.linalg.norm((i + 1)*np.ones([n_dim + 1, ])) + g_avg = 0.999 * g_avg + 0.001 * (i + 1) + + target_h_max = 0.999 * target_h_max + 0.001 * (i + 1)**2 * (n_dim + 1) + target_h_min = 0.999 * target_h_min + \ + 0.001 * max(1, i + 2 - 20)**2 * (n_dim + 1) + target_var = g_norm_squared_avg - g_avg**2 * (n_dim + 1) + target_dist = 0.999 * target_dist + \ + 0.001 * g_norm_avg / g_norm_squared_avg + + if i > 0: + lr, mu = self.tuneEverything(target_dist**2, + target_var, + 1, + target_h_min, + target_h_max) + target_lr = 0.999 * target_lr + 0.001 * lr + target_mu = 0.999 * target_mu + 0.001 * mu + + # print "iter ", i, " h max ", res[1], target_h_max, \ + # " h min ", res[2], target_h_min, " var ", res[3], target_var, \ + # " dist ", res[4], target_dist + # print "iter ", i, " lr ", res[5], target_lr, " mu ", res[6], target_mu + + assert np.abs(target_h_max - res[1]) < np.abs(target_h_max) * 1e-3 + assert np.abs(target_h_min - res[2]) < np.abs(target_h_min) * 1e-3 + assert np.abs(target_var - res[3]) < np.abs(res[3]) * 1e-3 + assert np.abs(target_dist - res[4]) < np.abs(res[4]) * 1e-3 + assert target_lr == 0.0 or np.abs(target_lr - res[5]) < np.abs(res[5]) * 1e-3 + assert target_mu == 0.0 or np.abs(target_mu - res[6]) < np.abs(res[6]) * 5e-3 + print "[Test-INFO] lr and mu computing test passed!" + + +if __name__ == "__main__": + tf.test.main() From 3b4a000bc2d8d5f7b977428716036b687b72f095 Mon Sep 17 00:00:00 2001 From: William Woof <awwoof@hotmail.com> Date: Thu, 13 Jul 2017 14:36:39 +0100 Subject: [PATCH 0108/4095] Create notes.md --- notes.md | 75 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) create mode 100644 notes.md diff --git a/notes.md b/notes.md new file mode 100644 index 000000000..3c082f6e9 --- /dev/null +++ b/notes.md @@ -0,0 +1,75 @@ + +```python + +def unmagic_encoder(encoder_input, + hparams, + name="encoder"): + x = encoder_input + + # Summaries don't work in multi-problem setting yet. + summaries = "problems" not in hparams.values() or len(hparams.problems) == 1 + + with tf.variable_scope(name): + pass + return x + +def magic_decoder(decoder_input, + encoder_output, + residual_fn, + encoder_self_attention_bias, + decoder_self_attention_bias, + encoder_decoder_attention_bias, + hparams, + name="decoder"): + x = decoder_input + y = encoder_output + # Summaries don't work in multi-problem setting yet. + summaries = "problems" not in hparams.values() or len(hparams.problems) == 1 + with tf.variable_scope(name): + for layer in xrange(hparams.num_hidden_layers): + with tf.variable_scope("layer_%d" % layer): + x = residual_fn( + x, + common_attention.multihead_attention( + x, + None, + decoder_self_attention_bias, + hparams.attention_key_channels or hparams.hidden_size, + hparams.attention_value_channels or hparams.hidden_size, + hparams.hidden_size, + hparams.num_heads, + hparams.attention_dropout, + summaries=summaries, + name="decoder_self_attention")) + with tf.variable_scope("enc"): + y = residual_fn( + y, + common_attention.multihead_attention( + y, + None, + encoder_self_attention_bias, + hparams.attention_key_channels or hparams.hidden_size, + hparams.attention_value_channels or hparams.hidden_size, + hparams.hidden_size, + hparams.num_heads, + hparams.attention_dropout, + summaries=summaries, + name="encoder_self_attention")) + y = residual_fn(y, transformer.transformer_ffn_layer(y, hparams)) + + x = residual_fn( + x, + common_attention.multihead_attention( + x, + y, + encoder_decoder_attention_bias, + hparams.attention_key_channels or hparams.hidden_size, + hparams.attention_value_channels or hparams.hidden_size, + hparams.hidden_size, + hparams.num_heads, + hparams.attention_dropout, + summaries=summaries, + name="encdec_attention")) + x = residual_fn(x, transformer.transformer_ffn_layer(x, hparams)) + return x +``` From 8fa26668202fa436a2e399288f25af6851cc2b73 Mon Sep 17 00:00:00 2001 From: William <awwoof@hotmail.com> Date: Thu, 13 Jul 2017 16:10:16 +0100 Subject: [PATCH 0109/4095] Updated masking --- tensor2tensor/models/common_layers.py | 33 +++++++++++++++- .../models/transformer_alternative.py | 38 +++++++++++-------- 2 files changed, 54 insertions(+), 17 deletions(-) diff --git a/tensor2tensor/models/common_layers.py b/tensor2tensor/models/common_layers.py index 4c63ce8ba..2597ccf7a 100644 --- a/tensor2tensor/models/common_layers.py +++ b/tensor2tensor/models/common_layers.py @@ -1416,6 +1416,36 @@ def global_pool_1d(inputs, pooling_type='MAX', mask=None): return output + +def running_global_pool_1d(inputs): + """ + Same global pool, but only for the elements up to the current element. Useful + for outputs where the state of future elements is not known. + Takes no mask as all elements up to the current element are assumed to exist. + Currently only supports maximum. + + Args + inputs: A tensor of dimensions batch_size x sequence_length x input_dims + containing the sequences of input vectors. + Outputs + output: A tensor of dimensions batch_size x sequence_length x input_dims + dimension containing the running 'totals'. + """ + + with tf.name_scope("running_global_pool", [inputs]): + scan_fct = tf.maximum + + # Permute inputs so seq_length is first + elems = tf.transpose(inputs, [1, 0, 2]) + + # Perform scan + cumulatives = tf.scan(scan_fct, elems, swap_memory=True) + + # Permute output to get back to original order + output = tf.transpose(cumulatives, [1, 0, 2]) + + return output + def linear_set_layer(layer_size, inputs, @@ -1455,7 +1485,8 @@ def linear_set_layer(layer_size, if context is not None: # Unfortunately tf doesn't support broadcasting via concat, but we can # simply add the transformed context to get the same effect - context = tf.expand_dims(context, axis=1) + if len(context.get_shape().as_list())==2: + context = tf.expand_dims(context, axis=1) #context_size = context.get_shape().as_list()[-1] cont_tfm = conv1d(context, layer_size, 1, activation=None, name="cont_conv") diff --git a/tensor2tensor/models/transformer_alternative.py b/tensor2tensor/models/transformer_alternative.py index 90fea6139..b6c2adc74 100644 --- a/tensor2tensor/models/transformer_alternative.py +++ b/tensor2tensor/models/transformer_alternative.py @@ -45,8 +45,7 @@ class TransformerAlt(t2t_model.T2TModel): def model_fn_body(self, features): - # - + # Remove dropout if not training hparams = copy.copy(self._hparams) targets = features["targets"] @@ -61,11 +60,8 @@ def model_fn_body(self, features): (decoder_input, decoder_self_attention_bias) = transformer.\ transformer_prepare_decoder(targets, hparams) - # We need masks of the form batch size x input sequences - # Biases seem to be of the form batch_size x 1 x input sequences x vec dim - # Squeeze out dim one, and get the first element of each vector - encoder_mask = tf.squeeze(encoder_attention_bias, [1])[:,:,0] - decoder_mask = tf.squeeze(decoder_self_attention_bias, [1])[:,:,0] + encoder_mask = bias_to_mask(encoder_attention_bias) + decoder_mask = bias_to_mask(decoder_self_attention_bias) def residual_fn(x, y): return common_layers.layer_norm(x + tf.nn.dropout( @@ -86,7 +82,7 @@ def residual_fn(x, y): -def composite_layer(inputs, mask, hparams): +def composite_layer(inputs, mask, hparams, for_output=False): x = inputs # Applies ravanbakhsh on top of each other @@ -97,26 +93,29 @@ def composite_layer(inputs, mask, hparams): hparams.hidden_size, x, mask=mask, - dropout=0.0) + dropout=hparams.relu_dropout) # Transforms elements to get a context, and then uses this in a final layer elif hparams.composite_layer_type == "reembedding": initial_elems = x # Transform elements n times and then pool for layer in xrange(hparams.layers_per_layer): - with tf.variable_scope(".%d" % layer): + with tf.variable_scope("sub_layer_%d" % layer): x = common_layers.linear_set_layer( hparams.hidden_size, x, - dropout=0.0) - context = common_layers.global_pool_1d(x, mask=mask) + dropout=hparams.relu_dropout) + if for_output: + context = common_layers.running_global_pool_1d(x) + else: + context = common_layers.global_pool_1d(x, mask=mask) #Final layer x = common_layers.linear_set_layer( hparams.hidden_size, x, context=context, - dropout=0.0) + dropout=hparams.relu_dropout) return x @@ -169,12 +168,19 @@ def alt_transformer_decoder(decoder_input, summaries=summaries, name="encdec_attention") - x_ = residual_fn(x_, composite_layer(x_, mask, hparams)) + x_ = residual_fn(x_, composite_layer(x_, mask, hparams, for_output=True)) x = residual_fn(x, x_) return x - +def bias_to_mask(bias): + # We need masks of the form batch size x input sequences + # Biases seem to be of the form batch_size x 1 x input sequences x vec dim + # Squeeze out dim one, and get the first element of each vector + bias = tf.squeeze(bias, [1])[:,:,0] + bias = - tf.clip_by_value(bias, -1.0, 1.0) + mask = 1 - bias + return mask @@ -182,7 +188,7 @@ def alt_transformer_decoder(decoder_input, def transformer_alt(): """Set of hyperparameters.""" hparams = transformer.transformer_base() - hparams.batch_size = 64 + hparams.batch_size = 2048 hparams.add_hparam("layers_per_layer", 4) #hparams.add_hparam("composite_layer_type", "ravanbakhsh") #ravanbakhsh or reembedding hparams.add_hparam("composite_layer_type", "reembedding") From f44f51fa5c5f9d588618657191ee518849cffc66 Mon Sep 17 00:00:00 2001 From: Richard Shin <ricshin@cs.berkeley.edu> Date: Thu, 13 Jul 2017 13:53:11 -0700 Subject: [PATCH 0110/4095] Add shake-shake for CIFAR-10 --- tensor2tensor/models/common_hparams.py | 1 + tensor2tensor/models/models.py | 1 + tensor2tensor/models/shake_shake.py | 106 +++++++++++++++++++++++++ tensor2tensor/utils/trainer_utils.py | 3 + 4 files changed, 111 insertions(+) create mode 100644 tensor2tensor/models/shake_shake.py diff --git a/tensor2tensor/models/common_hparams.py b/tensor2tensor/models/common_hparams.py index c8c458414..f48a67c15 100644 --- a/tensor2tensor/models/common_hparams.py +++ b/tensor2tensor/models/common_hparams.py @@ -61,6 +61,7 @@ def basic_params1(): weight_noise=0.0, learning_rate_decay_scheme="none", learning_rate_warmup_steps=100, + learning_rate_cosine_cycle_steps=250000, learning_rate=0.1, sampling_method="argmax", # "argmax" or "random" problem_choice="adaptive", # "uniform", "adaptive", "distributed" diff --git a/tensor2tensor/models/models.py b/tensor2tensor/models/models.py index ae0e0da61..214aec245 100644 --- a/tensor2tensor/models/models.py +++ b/tensor2tensor/models/models.py @@ -30,6 +30,7 @@ from tensor2tensor.models import modalities from tensor2tensor.models import multimodel from tensor2tensor.models import neural_gpu +from tensor2tensor.models import shake_shake from tensor2tensor.models import slicenet from tensor2tensor.models import transformer from tensor2tensor.models import transformer_alternative diff --git a/tensor2tensor/models/shake_shake.py b/tensor2tensor/models/shake_shake.py new file mode 100644 index 000000000..a82de64ef --- /dev/null +++ b/tensor2tensor/models/shake_shake.py @@ -0,0 +1,106 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from six.moves import xrange # pylint: disable=redefined-builtin + +from tensor2tensor.models import common_hparams +from tensor2tensor.models import common_layers +from tensor2tensor.utils import registry +from tensor2tensor.utils import t2t_model + +import tensorflow as tf + +def shake_shake_block_branch(x, conv_filters, stride): + x = tf.nn.relu(x) + x = common_layers.conv(x, conv_filters, (3, 3), (stride, stride)) + x = tf.layers.batch_normalization(x) + x = tf.nn.relu(x) + x = common_layers.conv(x, conv_filters, (3, 3), (1, 1)) + x = tf.layers.batch_normalization(x) + return x + + +def downsampling_residual_branch(x, conv_filters): + x = tf.nn.relu(x) + + x1 = tf.layers.average_pooling2d(x, pool_size=(1, 1), strides=(2, 2)) + x1 = common_layers.conv(x1, conv_filters / 2, (1, 1)) + + x2 = tf.pad(x[:, 1:, 1:], [[0, 0], [0, 1], [0, 1], [0, 0]]) + x2 = tf.layers.average_pooling2d(x2, pool_size=(1, 1), strides=(2, 2)) + x2 = common_layers.conv(x2, conv_filters / 2, (1, 1)) + + return tf.concat([x1, x2], axis=3) + + +def shake_shake_block(x, conv_filters, stride): + branch1 = shake_shake_block_branch(x, conv_filters, stride) + branch2 = shake_shake_block_branch(x, conv_filters, stride) + if x.shape[-1] == conv_filters: + skip = tf.identity(x) + else: + skip = downsampling_residual_block(x) + + # TODO(rshin): Set equal=true when testing. + # TODO(rshin): Use different alpha for each image in batch. + return skip + common_layers.shakeshake2(branch1, branch2) + + +def shake_shake_stage(x, num_blocks, conv_filters, initial_stride): + x = shake_shake_block(x, conv_filters, initial_stride) + for _ in xrange(num_blocks - 1): + x = shake_shake_block(x, conv_filters, 1) + return x + + +@registry.register_model +class ShakeShake(t2t_model.T2TModel): + + def model_fn_body(self, features): + hparams = self._hparams + + inputs = features["inputs"] + assert (hparams.num_hidden_layers - 2) % 6 == 0 + blocks_per_stage = (hparams.num_hidden_layers - 2) / 6 + + # For canonical Shake-Shake, the entry flow is a 3x3 convolution with 16 + # filters then a batch norm. Instead we use the one in SmallImageModality, + # which also seems to include a layer norm. + x = inputs + with tf.name_scope('shake_shake_stage_1'): + x = shake_shake_stage(x, hparams.base_filters, blocks_per_stage) + with tf.name_scope('shake_shake_stage_2'): + x = shake_shake_stage(x, hparams.base_filters * 2, blocks_per_stage) + with tf.name_scope('shake_shake_stage_3'): + x = shake_shake_stage(x, hparams.base_filters * 4, blocks_per_stage) + + # For canonical Shake-Shake, we should perform 8x8 average pooling and then + # have a fully-connected layer (which produces the logits for each class). + # Instead, we just use the Xception exit flow in ClassLabelModality. + return x + +@registry.register_hparams +def shakeshake_cifar10(): + hparams = common_hparams.basic_params1() + # This leads to effective batch size 128 when number of GPUs is 2 + hparams.batch_size = 4096 * 4 + hparams.hidden_size = 16 + hparams.dropout = 0 + hparams.label_smoothing = 0.0 + hparams.clip_grad_norm = 2.0 + hparams.num_hidden_layers = 26 + hparams.kernel_height = -1 # Unused + hparams.kernel_width = -1 # Unused + hparams.learning_rate_decay_scheme = "cosine" + # Model should be run for 700000 steps with batch size 128 (~1800 epochs) + hparams.learning_rate_cosine_cycle_steps = 700000 + hparams.learning_rate = 0.2 + hparams.learning_rate_warmup_steps = 3000 + hparams.initializer = "uniform_unit_scaling" + hparams.initializer_gain = 1.0 + hparams.weight_decay = 0.1 # Effective value should be ~1e-4 + hparams.optimizer = "Momentum" + hparams.optimizer_momentum_momentum = 0.9 + hparams.add_hparam('base_filters', 16) + return hparams diff --git a/tensor2tensor/utils/trainer_utils.py b/tensor2tensor/utils/trainer_utils.py index 057612ecb..3c693d08e 100644 --- a/tensor2tensor/utils/trainer_utils.py +++ b/tensor2tensor/utils/trainer_utils.py @@ -321,6 +321,9 @@ def learning_rate_decay(): (step + 1) * warmup_steps**-1.5, (step + 1)**-0.5) elif hparams.learning_rate_decay_scheme == "exp100k": return 0.94**(step // 100000) + elif hparams.learning_rate_decay_scheme == "cosine": + cycle_steps = hparams.learning_rate_cosine_cycle_steps + return 0.5 * (1 + tf.cos(np.pi * (step % cycle_steps) / cycle_steps)) inv_base = tf.exp(tf.log(0.01) / warmup_steps) inv_decay = inv_base**(warmup_steps - step) From 9f02a51631d8eedf8f991dab09a0e161483b2999 Mon Sep 17 00:00:00 2001 From: Richard Shin <ricshin@cs.berkeley.edu> Date: Thu, 13 Jul 2017 16:04:30 -0700 Subject: [PATCH 0111/4095] Fix bugs, add more explanation --- tensor2tensor/models/common_layers.py | 7 ++- tensor2tensor/models/shake_shake.py | 75 ++++++++++++++++++--------- 2 files changed, 57 insertions(+), 25 deletions(-) diff --git a/tensor2tensor/models/common_layers.py b/tensor2tensor/models/common_layers.py index 15a712ef2..de514ceb9 100644 --- a/tensor2tensor/models/common_layers.py +++ b/tensor2tensor/models/common_layers.py @@ -58,7 +58,7 @@ def inverse_exp_decay(max_step, min_value=0.01): return inv_base**tf.maximum(float(max_step) - step, 0.0) -def shakeshake2_py(x, y, equal=False): +def shakeshake2_py(x, y, equal=False, individual=False): """The shake-shake sum of 2 tensors, python version.""" alpha = 0.5 if equal else tf.random_uniform([]) return alpha * x + (1.0 - alpha) * y @@ -85,6 +85,11 @@ def shakeshake2(x1, x2): """The shake-shake function with a different alpha for forward/backward.""" return shakeshake2_py(x1, x2) +@function.Defun(grad_func=shakeshake2_grad) +def shakeshake2_eqforward(x1, x2): + """The shake-shake function with a different alpha for forward/backward.""" + return shakeshake2_py(x1, x2, equal=True) + @function.Defun(grad_func=shakeshake2_equal_grad) def shakeshake2_eqgrad(x1, x2): diff --git a/tensor2tensor/models/shake_shake.py b/tensor2tensor/models/shake_shake.py index a82de64ef..d67f9fdee 100644 --- a/tensor2tensor/models/shake_shake.py +++ b/tensor2tensor/models/shake_shake.py @@ -11,12 +11,14 @@ import tensorflow as tf + def shake_shake_block_branch(x, conv_filters, stride): x = tf.nn.relu(x) - x = common_layers.conv(x, conv_filters, (3, 3), (stride, stride)) + x = tf.layers.conv2d( + x, conv_filters, (3, 3), strides=(stride, stride), padding='SAME') x = tf.layers.batch_normalization(x) x = tf.nn.relu(x) - x = common_layers.conv(x, conv_filters, (3, 3), (1, 1)) + x = tf.layers.conv2d(x, conv_filters, (3, 3), strides=(1, 1), padding='SAME') x = tf.layers.batch_normalization(x) return x @@ -25,66 +27,90 @@ def downsampling_residual_branch(x, conv_filters): x = tf.nn.relu(x) x1 = tf.layers.average_pooling2d(x, pool_size=(1, 1), strides=(2, 2)) - x1 = common_layers.conv(x1, conv_filters / 2, (1, 1)) + x1 = tf.layers.conv2d(x1, conv_filters / 2, (1, 1), padding='SAME') x2 = tf.pad(x[:, 1:, 1:], [[0, 0], [0, 1], [0, 1], [0, 0]]) x2 = tf.layers.average_pooling2d(x2, pool_size=(1, 1), strides=(2, 2)) - x2 = common_layers.conv(x2, conv_filters / 2, (1, 1)) + x2 = tf.layers.conv2d(x2, conv_filters / 2, (1, 1), padding='SAME') return tf.concat([x1, x2], axis=3) -def shake_shake_block(x, conv_filters, stride): - branch1 = shake_shake_block_branch(x, conv_filters, stride) - branch2 = shake_shake_block_branch(x, conv_filters, stride) +def shake_shake_block(x, conv_filters, stride, mode): + with tf.variable_scope('branch_1'): + branch1 = shake_shake_block_branch(x, conv_filters, stride) + with tf.variable_scope('branch_2'): + branch2 = shake_shake_block_branch(x, conv_filters, stride) if x.shape[-1] == conv_filters: skip = tf.identity(x) else: - skip = downsampling_residual_block(x) + skip = downsampling_residual_branch(x, conv_filters) - # TODO(rshin): Set equal=true when testing. # TODO(rshin): Use different alpha for each image in batch. - return skip + common_layers.shakeshake2(branch1, branch2) + if mode == tf.contrib.learn.ModeKeys.TRAIN: + shaken = common_layers.shakeshake2(branch1, branch2) + else: + shaken = common_layers.shakeshake2_eqforward(branch1, branch2) + shaken.set_shape(branch1.get_shape()) + + return skip + shaken -def shake_shake_stage(x, num_blocks, conv_filters, initial_stride): - x = shake_shake_block(x, conv_filters, initial_stride) - for _ in xrange(num_blocks - 1): - x = shake_shake_block(x, conv_filters, 1) +def shake_shake_stage(x, num_blocks, conv_filters, initial_stride, mode): + with tf.variable_scope('block_0'): + x = shake_shake_block(x, conv_filters, initial_stride, mode) + for i in xrange(1, num_blocks): + with tf.variable_scope('block_{}'.format(i)): + x = shake_shake_block(x, conv_filters, 1, mode) return x @registry.register_model class ShakeShake(t2t_model.T2TModel): + '''Implements the Shake-Shake architecture. + + From <https://arxiv.org/pdf/1705.07485.pdf> + This is intended to match the CIFAR-10 version, and correspond to + "Shake-Shake-Batch" in Table 1. + ''' def model_fn_body(self, features): hparams = self._hparams inputs = features["inputs"] assert (hparams.num_hidden_layers - 2) % 6 == 0 - blocks_per_stage = (hparams.num_hidden_layers - 2) / 6 + blocks_per_stage = (hparams.num_hidden_layers - 2) // 6 # For canonical Shake-Shake, the entry flow is a 3x3 convolution with 16 # filters then a batch norm. Instead we use the one in SmallImageModality, # which also seems to include a layer norm. x = inputs - with tf.name_scope('shake_shake_stage_1'): - x = shake_shake_stage(x, hparams.base_filters, blocks_per_stage) - with tf.name_scope('shake_shake_stage_2'): - x = shake_shake_stage(x, hparams.base_filters * 2, blocks_per_stage) - with tf.name_scope('shake_shake_stage_3'): - x = shake_shake_stage(x, hparams.base_filters * 4, blocks_per_stage) + mode = hparams.mode + with tf.variable_scope('shake_shake_stage_1'): + x = shake_shake_stage(x, blocks_per_stage, hparams.base_filters, 1, mode) + with tf.variable_scope('shake_shake_stage_2'): + x = shake_shake_stage(x, blocks_per_stage, hparams.base_filters * 2, 2, + mode) + with tf.variable_scope('shake_shake_stage_3'): + x = shake_shake_stage(x, blocks_per_stage, hparams.base_filters * 4, 2, + mode) # For canonical Shake-Shake, we should perform 8x8 average pooling and then # have a fully-connected layer (which produces the logits for each class). # Instead, we just use the Xception exit flow in ClassLabelModality. + # + # Also, this model_fn does not return an extra_loss. However, TensorBoard + # reports an exponential moving average for extra_loss, where the initial + # value for the moving average may be a large number, so extra_loss will + # look large at the beginning of training. return x + @registry.register_hparams def shakeshake_cifar10(): hparams = common_hparams.basic_params1() - # This leads to effective batch size 128 when number of GPUs is 2 - hparams.batch_size = 4096 * 4 + # This leads to effective batch size 128 when number of GPUs is 1 + hparams.batch_size = 4096 * 8 hparams.hidden_size = 16 hparams.dropout = 0 hparams.label_smoothing = 0.0 @@ -99,7 +125,8 @@ def shakeshake_cifar10(): hparams.learning_rate_warmup_steps = 3000 hparams.initializer = "uniform_unit_scaling" hparams.initializer_gain = 1.0 - hparams.weight_decay = 0.1 # Effective value should be ~1e-4 + # TODO(rshin): Adjust so that effective value becomes ~1e-4 + hparams.weight_decay = 3.0 hparams.optimizer = "Momentum" hparams.optimizer_momentum_momentum = 0.9 hparams.add_hparam('base_filters', 16) From 82870946d8202afa15e459520ea689550346a29f Mon Sep 17 00:00:00 2001 From: Richard Shin <ricshin@cs.berkeley.edu> Date: Thu, 13 Jul 2017 16:06:21 -0700 Subject: [PATCH 0112/4095] Reword comments --- tensor2tensor/models/shake_shake.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tensor2tensor/models/shake_shake.py b/tensor2tensor/models/shake_shake.py index d67f9fdee..212e03764 100644 --- a/tensor2tensor/models/shake_shake.py +++ b/tensor2tensor/models/shake_shake.py @@ -82,8 +82,8 @@ def model_fn_body(self, features): blocks_per_stage = (hparams.num_hidden_layers - 2) // 6 # For canonical Shake-Shake, the entry flow is a 3x3 convolution with 16 - # filters then a batch norm. Instead we use the one in SmallImageModality, - # which also seems to include a layer norm. + # filters then a batch norm. Instead we will rely on the one in + # SmallImageModality, which seems to instead use a layer norm. x = inputs mode = hparams.mode with tf.variable_scope('shake_shake_stage_1'): @@ -97,7 +97,7 @@ def model_fn_body(self, features): # For canonical Shake-Shake, we should perform 8x8 average pooling and then # have a fully-connected layer (which produces the logits for each class). - # Instead, we just use the Xception exit flow in ClassLabelModality. + # Instead, we rely on the Xception exit flow in ClassLabelModality. # # Also, this model_fn does not return an extra_loss. However, TensorBoard # reports an exponential moving average for extra_loss, where the initial From 9dc2826be082003a3768e2be9b63bc05e37d05f9 Mon Sep 17 00:00:00 2001 From: vthorsteinsson <vt@extrada.com> Date: Thu, 13 Jul 2017 23:51:40 +0000 Subject: [PATCH 0113/4095] Iceparser adaptations --- .../data_generators/generator_utils.py | 3 +- .../data_generators/problem_hparams.py | 48 +++++++++++++------ tensor2tensor/data_generators/text_encoder.py | 7 +-- tensor2tensor/models/transformer.py | 7 +-- 4 files changed, 41 insertions(+), 24 deletions(-) diff --git a/tensor2tensor/data_generators/generator_utils.py b/tensor2tensor/data_generators/generator_utils.py index a74fe6847..e8a0679f8 100755 --- a/tensor2tensor/data_generators/generator_utils.py +++ b/tensor2tensor/data_generators/generator_utils.py @@ -300,7 +300,8 @@ def get_or_generate_tabbed_vocab(tmp_dir, source_filename, index, vocab_filename _ = tokenizer.encode(text_encoder.native_to_unicode(part)) vocab = text_encoder.SubwordTextEncoder.build_to_target_size( - vocab_size, tokenizer.token_counts, 1, 1e3) + vocab_size, tokenizer.token_counts, 1, + min(1e3, vocab_size + text_encoder.NUM_RESERVED_TOKENS)) vocab.store_to_file(vocab_filepath) return vocab diff --git a/tensor2tensor/data_generators/problem_hparams.py b/tensor2tensor/data_generators/problem_hparams.py index a99cdbfd3..13b63ae16 100755 --- a/tensor2tensor/data_generators/problem_hparams.py +++ b/tensor2tensor/data_generators/problem_hparams.py @@ -178,6 +178,7 @@ def default_problem_hparams(): # 15: Parse tokens # 16: Icelandic characters # 17: Icelandic tokens + # 18: Icelandic parse tokens # Add more above if needed. input_space_id=0, target_space_id=0, @@ -550,20 +551,6 @@ def wmt_parsing_tokens(model_hparams, wrong_vocab_size): return p -def wmt_tabbed_parsing_characters(model_hparams): - p = default_problem_hparams() - p.input_modality = {"inputs": (registry.Modalities.SYMBOL, 256)} - p.target_modality = (registry.Modalities.SYMBOL, 256) - p.vocabulary = { - "inputs": text_encoder.ByteTextEncoder(), - "targets": text_encoder.ByteTextEncoder(), - } - p.loss_multiplier = 2.0 - p.input_space_id = 2 - p.target_space_id = 14 - return p - - def wsj_parsing_tokens(model_hparams, prefix, wrong_source_vocab_size, wrong_target_vocab_size): @@ -604,6 +591,37 @@ def wsj_parsing_tokens(model_hparams, prefix, return p +def ice_parsing_tokens(model_hparams, wrong_source_vocab_size): + """Icelandic to parse tree translation benchmark. + + Args: + model_hparams: a tf.contrib.training.HParams + Returns: + a tf.contrib.training.HParams + """ + p = default_problem_hparams() + # This vocab file must be present within the data directory. + source_vocab_filename = os.path.join( + model_hparams.data_dir, + "ice_source.tokens.vocab.%d" % wrong_source_vocab_size) + target_vocab_filename = os.path.join( + model_hparams.data_dir, + "ice_target.tokens.vocab.256") + source_subtokenizer = text_encoder.SubwordTextEncoder(source_vocab_filename) + target_subtokenizer = text_encoder.SubwordTextEncoder(target_vocab_filename) + p.input_modality = { + "inputs": (registry.Modalities.SYMBOL, source_subtokenizer.vocab_size) + } + p.target_modality = (registry.Modalities.SYMBOL, 256) + p.vocabulary = { + "inputs": source_subtokenizer, + "targets": target_subtokenizer, + } + p.input_space_id = 17 # Icelandic tokens + p.target_space_id = 18 # Icelandic parse tokens + return p + + def image_cifar10(unused_model_hparams): """CIFAR-10.""" p = default_problem_hparams() @@ -723,7 +741,7 @@ def img2img_imagenet(unused_model_hparams): "lmptb_10k": lmptb_10k, "wmt_parsing_characters": wmt_parsing_characters, "ice_parsing_characters": wmt_parsing_characters, - "ice_parsing_tokens": lambda p: wsj_parsing_tokens(p, "ice", 2**13, 2**8), + "ice_parsing_tokens": lambda p: ice_parsing_tokens(p, 2**13), "wmt_parsing_tokens_8k": lambda p: wmt_parsing_tokens(p, 2**13), "wsj_parsing_tokens_16k": lambda p: wsj_parsing_tokens(p, "wsj", 2**14, 2**9), "wsj_parsing_tokens_32k": lambda p: wsj_parsing_tokens(p, "wsj", 2**15, 2**9), diff --git a/tensor2tensor/data_generators/text_encoder.py b/tensor2tensor/data_generators/text_encoder.py index 82e6a8aac..8454f9169 100755 --- a/tensor2tensor/data_generators/text_encoder.py +++ b/tensor2tensor/data_generators/text_encoder.py @@ -44,6 +44,7 @@ PAD = "<pad>" EOS = "<EOS>" RESERVED_TOKENS = [PAD, EOS] +NUM_RESERVED_TOKENS = len(RESERVED_TOKENS) PAD_TOKEN = RESERVED_TOKENS.index(PAD) # Normally 0 EOS_TOKEN = RESERVED_TOKENS.index(EOS) # Normally 1 @@ -55,7 +56,7 @@ class TextEncoder(object): """Base class for converting from ints to/from human readable strings.""" - def __init__(self, num_reserved_ids=2): + def __init__(self, num_reserved_ids=NUM_RESERVED_TOKENS): self._num_reserved_ids = num_reserved_ids def encode(self, s): @@ -130,7 +131,7 @@ def vocab_size(self): class TokenTextEncoder(TextEncoder): """Encoder based on a user-supplied vocabulary.""" - def __init__(self, vocab_filename, reverse=False, num_reserved_ids=2): + def __init__(self, vocab_filename, reverse=False, num_reserved_ids=NUM_RESERVED_TOKENS): """Initialize from a file, one token per line.""" super(TokenTextEncoder, self).__init__(num_reserved_ids=num_reserved_ids) self._reverse = reverse @@ -203,7 +204,7 @@ class SubwordTextEncoder(TextEncoder): """ - def __init__(self, filename=None, num_reserved_ids=2): + def __init__(self, filename=None, num_reserved_ids=NUM_RESERVED_TOKENS): """Initialize and read from a file, if provided.""" self._tokenizer = tokenizer.Tokenizer() if filename is not None: diff --git a/tensor2tensor/models/transformer.py b/tensor2tensor/models/transformer.py index 99cd1fd6a..73542bd5a 100755 --- a/tensor2tensor/models/transformer.py +++ b/tensor2tensor/models/transformer.py @@ -356,12 +356,9 @@ def transformer_parsing_base(): @registry.register_hparams def transformer_parsing_ice(): """Hparams for parsing Icelandic text.""" - hparams = transformer_parsing_base() + hparams = transformer_base_single_gpu() hparams.batch_size = 4096 - hparams.batching_mantissa_bits = 2 - hparams.hidden_size = 512 - #hparams.max_length = 256 - #hparams.hidden_size = 128 + hparams.shared_embedding_and_softmax_weights = int(False) return hparams From 27c6185e3ae0a8a5ef42e7abc29a5b6ce67966f6 Mon Sep 17 00:00:00 2001 From: vthorsteinsson <vt@extrada.com> Date: Fri, 14 Jul 2017 00:14:13 +0000 Subject: [PATCH 0114/4095] Cleanup in text_encoder.py --- tensor2tensor/data_generators/text_encoder.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/tensor2tensor/data_generators/text_encoder.py b/tensor2tensor/data_generators/text_encoder.py index 3a39893be..1c4701c10 100755 --- a/tensor2tensor/data_generators/text_encoder.py +++ b/tensor2tensor/data_generators/text_encoder.py @@ -37,9 +37,13 @@ # Conversion between Unicode and UTF-8, if required (on Python2) -def native_to_unicode(s): - return s.decode("utf-8") if (PY2 and not isinstance(s, unicode)) else s -unicode_to_native = (lambda s: s.encode("utf-8")) if PY2 else (lambda s: s) +if PY2: + native_to_unicode = lambda s: s if isinstance(s, unicode) else s.decode("utf-8") + unicode_to_native = lambda s: s.encode("utf-8") +else: + # No conversion required on Python3 + native_to_unicode = lambda s: s + unicode_to_native = lambda s: s # Reserved tokens for things like padding and EOS symbols. @@ -346,7 +350,7 @@ def build_from_token_counts(self, token_counts, min_count, num_iterations=4, - num_reserved_ids=2): + num_reserved_ids=NUM_RESERVED_TOKENS): """Train a SubwordTextEncoder based on a dictionary of word counts. Args: From 7bf4936551d4de588787b40b992d476f2365a9a6 Mon Sep 17 00:00:00 2001 From: vthorsteinsson <vt@extrada.com> Date: Fri, 14 Jul 2017 00:40:04 +0000 Subject: [PATCH 0115/4095] Standardized EOS token --- tensor2tensor/data_generators/wmt.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/tensor2tensor/data_generators/wmt.py b/tensor2tensor/data_generators/wmt.py index f5c68f3ea..2d43d1739 100755 --- a/tensor2tensor/data_generators/wmt.py +++ b/tensor2tensor/data_generators/wmt.py @@ -38,9 +38,8 @@ FLAGS = tf.flags.FLAGS -# End-of-sentence marker (should correspond to the position of EOS in the -# RESERVED_TOKENS list in text_encoder.py) -EOS = 1 +# End-of-sentence marker +EOS = text_encoder.EOS_TOKEN def character_generator(source_path, target_path, character_vocab, eos=None): @@ -183,7 +182,7 @@ def ende_bpe_token_generator(tmp_dir, train): train_path = _get_wmt_ende_dataset(tmp_dir, dataset_path) token_path = os.path.join(tmp_dir, "vocab.bpe.32000") token_vocab = text_encoder.TokenTextEncoder(vocab_filename=token_path) - return token_generator(train_path + ".en", train_path + ".de", token_vocab, 1) + return token_generator(train_path + ".en", train_path + ".de", token_vocab, EOS) _ENDE_TRAIN_DATASETS = [ From 2adf3ae883a483c3cddec4591398b459a77189e1 Mon Sep 17 00:00:00 2001 From: Richard Shin <ricshin@cs.berkeley.edu> Date: Thu, 13 Jul 2017 17:49:45 -0700 Subject: [PATCH 0116/4095] Add shakeshake_type hparam: batch, image, equal --- tensor2tensor/models/common_layers.py | 24 ++++++++++++++++----- tensor2tensor/models/shake_shake.py | 30 ++++++++++++++++++--------- 2 files changed, 39 insertions(+), 15 deletions(-) diff --git a/tensor2tensor/models/common_layers.py b/tensor2tensor/models/common_layers.py index de514ceb9..d38f97fb0 100644 --- a/tensor2tensor/models/common_layers.py +++ b/tensor2tensor/models/common_layers.py @@ -60,7 +60,13 @@ def inverse_exp_decay(max_step, min_value=0.01): def shakeshake2_py(x, y, equal=False, individual=False): """The shake-shake sum of 2 tensors, python version.""" - alpha = 0.5 if equal else tf.random_uniform([]) + if equal: + alpha = 0.5 + if individual: + alpha = tf.random_uniform(tf.get_shape(x)[:1]) + else: + alpha = tf.random_uniform([]) + return alpha * x + (1.0 - alpha) * y @@ -72,6 +78,14 @@ def shakeshake2_grad(x1, x2, dy): return dx +@function.Defun() +def shakeshake2_indiv_grad(x1, x2, dy): + """Overriding gradient for shake-shake of 2 tensors.""" + y = shakeshake2_py(x1, x2, individual=True) + dx = tf.gradients(ys=[y], xs=[x1, x2], grad_ys=[dy]) + return dx + + @function.Defun() def shakeshake2_equal_grad(x1, x2, dy): """Overriding gradient for shake-shake of 2 tensors.""" @@ -85,10 +99,10 @@ def shakeshake2(x1, x2): """The shake-shake function with a different alpha for forward/backward.""" return shakeshake2_py(x1, x2) -@function.Defun(grad_func=shakeshake2_grad) -def shakeshake2_eqforward(x1, x2): - """The shake-shake function with a different alpha for forward/backward.""" - return shakeshake2_py(x1, x2, equal=True) + +@function.Defun(grad_func=shakeshake2_indiv_grad) +def shakeshake2_indiv(x1, x2): + return shakeshake2_py(x1, x2, individual=True) @function.Defun(grad_func=shakeshake2_equal_grad) diff --git a/tensor2tensor/models/shake_shake.py b/tensor2tensor/models/shake_shake.py index 212e03764..f87eaa335 100644 --- a/tensor2tensor/models/shake_shake.py +++ b/tensor2tensor/models/shake_shake.py @@ -36,7 +36,7 @@ def downsampling_residual_branch(x, conv_filters): return tf.concat([x1, x2], axis=3) -def shake_shake_block(x, conv_filters, stride, mode): +def shake_shake_block(x, conv_filters, stride, hparams): with tf.variable_scope('branch_1'): branch1 = shake_shake_block_branch(x, conv_filters, stride) with tf.variable_scope('branch_2'): @@ -47,21 +47,28 @@ def shake_shake_block(x, conv_filters, stride, mode): skip = downsampling_residual_branch(x, conv_filters) # TODO(rshin): Use different alpha for each image in batch. - if mode == tf.contrib.learn.ModeKeys.TRAIN: - shaken = common_layers.shakeshake2(branch1, branch2) + if hparams.mode == tf.contrib.learn.ModeKeys.TRAIN: + if hparams.shakeshake_type == 'batch': + shaken = common_layers.shakeshake2(branch1, branch2) + elif hparams.shakeshake_type == 'image': + shaken = common_layers.shakeshake2_indiv(branch1, branch2) + elif hparams.shakeshake_type == 'equal': + shaken = common_layers.shakeshake2_py(branch1, branch2, equal=True) + else: + raise ValueError('Invalid shakeshake_type: {!r}'.format(shaken)) else: - shaken = common_layers.shakeshake2_eqforward(branch1, branch2) + shaken = common_layers.shakeshake2_py(branch1, branch2, equal=True) shaken.set_shape(branch1.get_shape()) return skip + shaken -def shake_shake_stage(x, num_blocks, conv_filters, initial_stride, mode): +def shake_shake_stage(x, num_blocks, conv_filters, initial_stride, hparams): with tf.variable_scope('block_0'): - x = shake_shake_block(x, conv_filters, initial_stride, mode) + x = shake_shake_block(x, conv_filters, initial_stride, hparams) for i in xrange(1, num_blocks): with tf.variable_scope('block_{}'.format(i)): - x = shake_shake_block(x, conv_filters, 1, mode) + x = shake_shake_block(x, conv_filters, 1, hparams) return x @@ -76,6 +83,7 @@ class ShakeShake(t2t_model.T2TModel): def model_fn_body(self, features): hparams = self._hparams + print(hparams.learning_rate) inputs = features["inputs"] assert (hparams.num_hidden_layers - 2) % 6 == 0 @@ -87,13 +95,14 @@ def model_fn_body(self, features): x = inputs mode = hparams.mode with tf.variable_scope('shake_shake_stage_1'): - x = shake_shake_stage(x, blocks_per_stage, hparams.base_filters, 1, mode) + x = shake_shake_stage(x, blocks_per_stage, hparams.base_filters, 1, + hparams) with tf.variable_scope('shake_shake_stage_2'): x = shake_shake_stage(x, blocks_per_stage, hparams.base_filters * 2, 2, - mode) + hparams) with tf.variable_scope('shake_shake_stage_3'): x = shake_shake_stage(x, blocks_per_stage, hparams.base_filters * 4, 2, - mode) + hparams) # For canonical Shake-Shake, we should perform 8x8 average pooling and then # have a fully-connected layer (which produces the logits for each class). @@ -130,4 +139,5 @@ def shakeshake_cifar10(): hparams.optimizer = "Momentum" hparams.optimizer_momentum_momentum = 0.9 hparams.add_hparam('base_filters', 16) + hparams.add_hparam('shakeshake_type', 'batch') return hparams From 5a72e5c9654ea584c9a3be8dfdd80713768ad540 Mon Sep 17 00:00:00 2001 From: vthorsteinsson <vt@extrada.com> Date: Fri, 14 Jul 2017 14:22:38 +0000 Subject: [PATCH 0117/4095] Adapted to upstream tokenizer change --- tensor2tensor/data_generators/generator_utils.py | 9 ++++----- tensor2tensor/data_generators/text_encoder.py | 10 +++++----- tensor2tensor/data_generators/tokenizer.py | 1 + 3 files changed, 10 insertions(+), 10 deletions(-) mode change 100644 => 100755 tensor2tensor/data_generators/tokenizer.py diff --git a/tensor2tensor/data_generators/generator_utils.py b/tensor2tensor/data_generators/generator_utils.py index e3c41f7b7..f076c10da 100755 --- a/tensor2tensor/data_generators/generator_utils.py +++ b/tensor2tensor/data_generators/generator_utils.py @@ -300,9 +300,8 @@ def get_or_generate_tabbed_vocab(tmp_dir, source_filename, index, vocab_filename vocab = text_encoder.SubwordTextEncoder(vocab_filepath) return vocab - tokenizer = Tokenizer() - # Use Tokenizer to count the word occurrences. + token_counts = defaultdict(int) filepath = os.path.join(tmp_dir, source_filename) with tf.gfile.GFile(filepath, mode="r") as source_file: for line in source_file: @@ -310,11 +309,11 @@ def get_or_generate_tabbed_vocab(tmp_dir, source_filename, index, vocab_filename if line and '\t' in line: parts = line.split('\t', maxsplit = 1) part = parts[index].strip() - _ = tokenizer.encode(text_encoder.native_to_unicode(part)) + for tok in tokenizer.encode(text_encoder.native_to_unicode(part)): + token_counts[tok] += 1 vocab = text_encoder.SubwordTextEncoder.build_to_target_size( - vocab_size, tokenizer.token_counts, 1, - min(1e3, vocab_size + text_encoder.NUM_RESERVED_TOKENS)) + vocab_size, token_counts, 1, 1e3) vocab.store_to_file(vocab_filepath) return vocab diff --git a/tensor2tensor/data_generators/text_encoder.py b/tensor2tensor/data_generators/text_encoder.py index 1c4701c10..38b78256d 100755 --- a/tensor2tensor/data_generators/text_encoder.py +++ b/tensor2tensor/data_generators/text_encoder.py @@ -54,7 +54,7 @@ PAD_TOKEN = RESERVED_TOKENS.index(PAD) # Normally 0 EOS_TOKEN = RESERVED_TOKENS.index(EOS) # Normally 1 -if six.PY2: +if PY2: RESERVED_TOKENS_BYTES = RESERVED_TOKENS else: RESERVED_TOKENS_BYTES = [bytes(PAD, "ascii"), bytes(EOS, "ascii")] @@ -110,7 +110,7 @@ class ByteTextEncoder(TextEncoder): def encode(self, s): numres = self._num_reserved_ids - if six.PY2: + if PY2: return [ord(c) + numres for c in s] # Python3: explicitly convert to UTF-8 return [c + numres for c in s.encode("utf-8")] @@ -124,7 +124,7 @@ def decode(self, ids): decoded_ids.append(RESERVED_TOKENS_BYTES[int(id_)]) else: decoded_ids.append(int2byte(id_ - numres)) - if six.PY2: + if PY2: return "".join(decoded_ids) # Python3: join byte arrays and then decode string return b"".join(decoded_ids).decode("utf-8", "replace") @@ -469,7 +469,7 @@ def store_to_file(self, filename): f.write("'" + unicode_to_native(subtoken_string) + "'\n") def _escape_token(self, token): - r"""Escape away underscores and OOV characters and append '_'. + """Escape away underscores and OOV characters and append '_'. This allows the token to be experessed as the concatenation of a list of subtokens from the vocabulary. The underscore acts as a sentinel @@ -491,7 +491,7 @@ def _escape_token(self, token): return ret def _unescape_token(self, escaped_token): - r"""Inverse of _escape_token(). + """Inverse of _escape_token(). Args: escaped_token: a unicode string diff --git a/tensor2tensor/data_generators/tokenizer.py b/tensor2tensor/data_generators/tokenizer.py old mode 100644 new mode 100755 index df6ef6470..65fe19334 --- a/tensor2tensor/data_generators/tokenizer.py +++ b/tensor2tensor/data_generators/tokenizer.py @@ -141,6 +141,7 @@ def read_corpus(): if corpus_max_lines > 0 and lines_read > corpus_max_lines: return docs return docs + counts = defaultdict(int) for doc in read_corpus(): for tok in encode(_native_to_unicode(doc)): From a2b1c60f7f92eb25ae430e8b82ea1e88e784bb00 Mon Sep 17 00:00:00 2001 From: vthorsteinsson <vt@extrada.com> Date: Fri, 14 Jul 2017 17:32:57 +0000 Subject: [PATCH 0118/4095] Used regex in _unescape_token() --- tensor2tensor/data_generators/text_encoder.py | 48 ++++++++----------- .../data_generators/tokenizer_test.py | 2 +- 2 files changed, 20 insertions(+), 30 deletions(-) mode change 100644 => 100755 tensor2tensor/data_generators/tokenizer_test.py diff --git a/tensor2tensor/data_generators/text_encoder.py b/tensor2tensor/data_generators/text_encoder.py index 38b78256d..c477b1e43 100755 --- a/tensor2tensor/data_generators/text_encoder.py +++ b/tensor2tensor/data_generators/text_encoder.py @@ -24,6 +24,7 @@ from __future__ import print_function from collections import defaultdict +import re # Dependency imports @@ -212,6 +213,7 @@ class SubwordTextEncoder(TextEncoder): def __init__(self, filename=None): """Initialize and read from a file, if provided.""" + self._alphabet = set() if filename is not None: self._load_from_file(filename) super(SubwordTextEncoder, self).__init__(num_reserved_ids=None) @@ -490,6 +492,12 @@ def _escape_token(self, token): ret += u"\\%d;" % ord(c) return ret + # Regular expression for unescaping token strings + # '\u' is converted to '_' + # '\\' is converted to '\' + # '\213;' is converted to unichr(213) + _UNESCAPE_REGEX = re.compile(u'|'.join([r"\\u", r"\\\\", r"\\([0-9]+);"])) + def _unescape_token(self, escaped_token): """Inverse of _escape_token(). @@ -498,32 +506,14 @@ def _unescape_token(self, escaped_token): Returns: token: a unicode string """ - ret = u"" - escaped_token = escaped_token[:-1] - pos = 0 - while pos < len(escaped_token): - c = escaped_token[pos] - if c == "\\": - pos += 1 - if pos >= len(escaped_token): - break - c = escaped_token[pos] - if c == u"u": - ret += u"_" - pos += 1 - elif c == "\\": - ret += u"\\" - pos += 1 - else: - semicolon_pos = escaped_token.find(u";", pos) - if semicolon_pos == -1: - continue - try: - ret += unichr(int(escaped_token[pos:semicolon_pos])) - pos = semicolon_pos + 1 - except (ValueError, OverflowError) as _: - pass - else: - ret += c - pos += 1 - return ret + def match(m): + if m.group(1) is not None: + # Convert '\213;' to unichr(213) + try: + return unichr(int(m.group(1))) + except (ValueError, OverflowError) as _: + return "" + # Convert '\u' to '_' and '\\' to '\' + return u"_" if m.group(0) == u"\\u" else u"\\" + # Cut off the trailing underscore and apply the regex substitution + return self._UNESCAPE_REGEX.sub(match, escaped_token[:-1]) diff --git a/tensor2tensor/data_generators/tokenizer_test.py b/tensor2tensor/data_generators/tokenizer_test.py old mode 100644 new mode 100755 index 404a11396..e90c85031 --- a/tensor2tensor/data_generators/tokenizer_test.py +++ b/tensor2tensor/data_generators/tokenizer_test.py @@ -1,3 +1,4 @@ +# -*- coding: utf-8 -*- # Copyright 2017 Google Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,7 +13,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -# coding=utf-8 """Tests for tensor2tensor.data_generators.tokenizer.""" from __future__ import absolute_import From 29f2e2e57e47bc1d29fc88308e3e8b339dcc75ce Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Wed, 12 Jul 2017 12:39:04 -0700 Subject: [PATCH 0119/4095] Update instructions for user components to be compatible with Py3 PiperOrigin-RevId: 161701617 --- README.md | 2 +- tensor2tensor/__init__.py | 2 +- tensor2tensor/bin/t2t-datagen | 16 +- tensor2tensor/bin/t2t-make-tf-configs | 2 +- tensor2tensor/bin/t2t-trainer | 2 +- tensor2tensor/data_generators/__init__.py | 2 +- tensor2tensor/data_generators/algorithmic.py | 2 +- .../data_generators/algorithmic_math.py | 2 +- .../data_generators/algorithmic_math_test.py | 2 +- .../data_generators/algorithmic_test.py | 2 +- tensor2tensor/data_generators/audio.py | 2 +- tensor2tensor/data_generators/audio_test.py | 2 +- .../data_generators/concatenate_examples.py | 2 +- .../data_generators/generator_utils.py | 30 +- .../data_generators/generator_utils_test.py | 2 +- tensor2tensor/data_generators/image.py | 2 +- tensor2tensor/data_generators/image_test.py | 2 +- tensor2tensor/data_generators/inspect.py | 2 +- tensor2tensor/data_generators/lm1b.py | 2 +- .../data_generators/problem_hparams.py | 89 +-- .../data_generators/problem_hparams_test.py | 2 +- tensor2tensor/data_generators/ptb.py | 2 +- tensor2tensor/data_generators/snli.py | 2 +- tensor2tensor/data_generators/text_encoder.py | 41 +- .../text_encoder_build_subword.py | 2 +- tensor2tensor/data_generators/tokenizer.py | 3 +- .../data_generators/tokenizer_test.py | 2 +- tensor2tensor/data_generators/wiki.py | 5 +- tensor2tensor/data_generators/wmt.py | 136 +---- tensor2tensor/data_generators/wmt_test.py | 2 +- tensor2tensor/data_generators/wsj_parsing.py | 2 +- tensor2tensor/models/__init__.py | 2 +- tensor2tensor/models/attention_lm.py | 2 +- tensor2tensor/models/attention_lm_moe.py | 2 +- tensor2tensor/models/bluenet.py | 2 +- tensor2tensor/models/bluenet_test.py | 2 +- tensor2tensor/models/bytenet.py | 2 +- tensor2tensor/models/bytenet_test.py | 2 +- tensor2tensor/models/common_attention.py | 2 +- tensor2tensor/models/common_hparams.py | 10 +- tensor2tensor/models/common_layers.py | 25 +- tensor2tensor/models/common_layers_test.py | 2 +- tensor2tensor/models/lstm.py | 2 +- tensor2tensor/models/lstm_test.py | 2 +- tensor2tensor/models/modalities.py | 2 +- tensor2tensor/models/modalities_test.py | 2 +- tensor2tensor/models/models.py | 3 +- tensor2tensor/models/multimodel.py | 2 +- tensor2tensor/models/multimodel_test.py | 2 +- tensor2tensor/models/neural_gpu.py | 2 +- tensor2tensor/models/neural_gpu_test.py | 2 +- tensor2tensor/models/shake_shake.py | 143 ----- tensor2tensor/models/slicenet.py | 2 +- tensor2tensor/models/slicenet_test.py | 2 +- tensor2tensor/models/transformer.py | 11 +- .../models/transformer_alternative.py | 2 +- tensor2tensor/models/transformer_test.py | 2 +- tensor2tensor/models/xception.py | 2 +- tensor2tensor/models/xception_test.py | 2 +- tensor2tensor/utils/__init__.py | 2 +- tensor2tensor/utils/avg_checkpoints.py | 2 +- tensor2tensor/utils/beam_search.py | 2 +- tensor2tensor/utils/beam_search_test.py | 2 +- tensor2tensor/utils/bleu_hook.py | 2 +- tensor2tensor/utils/bleu_hook_test.py | 2 +- tensor2tensor/utils/data_reader.py | 2 +- tensor2tensor/utils/data_reader_test.py | 2 +- tensor2tensor/utils/expert_utils.py | 2 +- tensor2tensor/utils/metrics.py | 2 +- tensor2tensor/utils/metrics_test.py | 2 +- tensor2tensor/utils/modality.py | 2 +- tensor2tensor/utils/registry.py | 2 +- tensor2tensor/utils/registry_test.py | 2 +- tensor2tensor/utils/t2t_model.py | 2 +- tensor2tensor/utils/trainer_utils.py | 106 ++-- tensor2tensor/utils/trainer_utils_test.py | 2 +- tensor2tensor/utils/yellowfin.py | 568 ------------------ tensor2tensor/utils/yellowfin_test.py | 226 ------- 78 files changed, 163 insertions(+), 1375 deletions(-) mode change 100755 => 100644 tensor2tensor/bin/t2t-datagen mode change 100755 => 100644 tensor2tensor/bin/t2t-trainer mode change 100755 => 100644 tensor2tensor/data_generators/generator_utils.py mode change 100755 => 100644 tensor2tensor/data_generators/problem_hparams.py mode change 100755 => 100644 tensor2tensor/data_generators/text_encoder.py mode change 100755 => 100644 tensor2tensor/data_generators/tokenizer.py mode change 100755 => 100644 tensor2tensor/data_generators/wmt.py delete mode 100644 tensor2tensor/models/shake_shake.py mode change 100755 => 100644 tensor2tensor/models/transformer.py mode change 100755 => 100644 tensor2tensor/utils/trainer_utils.py delete mode 100644 tensor2tensor/utils/yellowfin.py delete mode 100644 tensor2tensor/utils/yellowfin_test.py diff --git a/README.md b/README.md index 1fdd7e883..27bb47947 100644 --- a/README.md +++ b/README.md @@ -242,7 +242,7 @@ def transformer_my_very_own_hparams_set(): ```python # In ~/usr/t2t_usr/__init__.py -import my_registrations +from . import my_registrations ``` ``` diff --git a/tensor2tensor/__init__.py b/tensor2tensor/__init__.py index 27d533abc..eff6a2b14 100644 --- a/tensor2tensor/__init__.py +++ b/tensor2tensor/__init__.py @@ -1,4 +1,4 @@ -# Copyright 2017 Google Inc. +# Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tensor2tensor/bin/t2t-datagen b/tensor2tensor/bin/t2t-datagen old mode 100755 new mode 100644 index a0d1454a4..cd91cac60 --- a/tensor2tensor/bin/t2t-datagen +++ b/tensor2tensor/bin/t2t-datagen @@ -1,5 +1,5 @@ #!/usr/bin/env python -# Copyright 2017 Google Inc. +# Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -102,14 +102,6 @@ _SUPPORTED_PROBLEM_GENERATORS = { "algorithmic_algebra_inverse": ( lambda: algorithmic_math.algebra_inverse(26, 0, 2, 100000), lambda: algorithmic_math.algebra_inverse(26, 3, 3, 10000)), - "ice_parsing_tokens": ( - lambda: wmt.tabbed_parsing_token_generator(FLAGS.tmp_dir, - True, "ice", 2**13, 2**8), - lambda: wmt.tabbed_parsing_token_generator(FLAGS.tmp_dir, - False, "ice", 2**13, 2**8)), - "ice_parsing_characters": ( - lambda: wmt.tabbed_parsing_character_generator(FLAGS.tmp_dir, True), - lambda: wmt.tabbed_parsing_character_generator(FLAGS.tmp_dir, False)), "wmt_parsing_tokens_8k": ( lambda: wmt.parsing_token_generator(FLAGS.tmp_dir, True, 2**13), lambda: wmt.parsing_token_generator(FLAGS.tmp_dir, False, 2**13)), @@ -148,12 +140,6 @@ _SUPPORTED_PROBLEM_GENERATORS = { lambda: wmt.ende_wordpiece_token_generator(FLAGS.tmp_dir, True, 2**15), lambda: wmt.ende_wordpiece_token_generator(FLAGS.tmp_dir, False, 2**15) ), - "wmt_zhen_tokens_32k": ( - lambda: wmt.zhen_wordpiece_token_generator(FLAGS.tmp_dir, True, - 2**15, 2**15), - lambda: wmt.zhen_wordpiece_token_generator(FLAGS.tmp_dir, False, - 2**15, 2**15) - ), "lm1b_32k": ( lambda: lm1b.generator(FLAGS.tmp_dir, True), lambda: lm1b.generator(FLAGS.tmp_dir, False) diff --git a/tensor2tensor/bin/t2t-make-tf-configs b/tensor2tensor/bin/t2t-make-tf-configs index ae87ffbd8..3e010b204 100644 --- a/tensor2tensor/bin/t2t-make-tf-configs +++ b/tensor2tensor/bin/t2t-make-tf-configs @@ -1,5 +1,5 @@ #!/usr/bin/env python -# Copyright 2017 Google Inc. +# Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tensor2tensor/bin/t2t-trainer b/tensor2tensor/bin/t2t-trainer old mode 100755 new mode 100644 index 92f671826..322957028 --- a/tensor2tensor/bin/t2t-trainer +++ b/tensor2tensor/bin/t2t-trainer @@ -1,5 +1,5 @@ #!/usr/bin/env python -# Copyright 2017 Google Inc. +# Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tensor2tensor/data_generators/__init__.py b/tensor2tensor/data_generators/__init__.py index 27d533abc..eff6a2b14 100644 --- a/tensor2tensor/data_generators/__init__.py +++ b/tensor2tensor/data_generators/__init__.py @@ -1,4 +1,4 @@ -# Copyright 2017 Google Inc. +# Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tensor2tensor/data_generators/algorithmic.py b/tensor2tensor/data_generators/algorithmic.py index 4cd14753b..87e5873a5 100644 --- a/tensor2tensor/data_generators/algorithmic.py +++ b/tensor2tensor/data_generators/algorithmic.py @@ -1,4 +1,4 @@ -# Copyright 2017 Google Inc. +# Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tensor2tensor/data_generators/algorithmic_math.py b/tensor2tensor/data_generators/algorithmic_math.py index ec3b7670a..e65b47ff0 100644 --- a/tensor2tensor/data_generators/algorithmic_math.py +++ b/tensor2tensor/data_generators/algorithmic_math.py @@ -1,4 +1,4 @@ -# Copyright 2017 Google Inc. +# Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tensor2tensor/data_generators/algorithmic_math_test.py b/tensor2tensor/data_generators/algorithmic_math_test.py index 6c4b63054..5f0de29fb 100644 --- a/tensor2tensor/data_generators/algorithmic_math_test.py +++ b/tensor2tensor/data_generators/algorithmic_math_test.py @@ -1,4 +1,4 @@ -# Copyright 2017 Google Inc. +# Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tensor2tensor/data_generators/algorithmic_test.py b/tensor2tensor/data_generators/algorithmic_test.py index 70a5d68b8..9961e6173 100644 --- a/tensor2tensor/data_generators/algorithmic_test.py +++ b/tensor2tensor/data_generators/algorithmic_test.py @@ -1,4 +1,4 @@ -# Copyright 2017 Google Inc. +# Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tensor2tensor/data_generators/audio.py b/tensor2tensor/data_generators/audio.py index 12e0c7b43..81cfde008 100644 --- a/tensor2tensor/data_generators/audio.py +++ b/tensor2tensor/data_generators/audio.py @@ -1,4 +1,4 @@ -# Copyright 2017 Google Inc. +# Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tensor2tensor/data_generators/audio_test.py b/tensor2tensor/data_generators/audio_test.py index f1830043f..1c19432c3 100644 --- a/tensor2tensor/data_generators/audio_test.py +++ b/tensor2tensor/data_generators/audio_test.py @@ -1,4 +1,4 @@ -# Copyright 2017 Google Inc. +# Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tensor2tensor/data_generators/concatenate_examples.py b/tensor2tensor/data_generators/concatenate_examples.py index b346b6c08..158bc1b59 100644 --- a/tensor2tensor/data_generators/concatenate_examples.py +++ b/tensor2tensor/data_generators/concatenate_examples.py @@ -1,4 +1,4 @@ -# Copyright 2017 Google Inc. +# Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tensor2tensor/data_generators/generator_utils.py b/tensor2tensor/data_generators/generator_utils.py old mode 100755 new mode 100644 index f076c10da..67da54ab9 --- a/tensor2tensor/data_generators/generator_utils.py +++ b/tensor2tensor/data_generators/generator_utils.py @@ -1,4 +1,4 @@ -# Copyright 2017 Google Inc. +# Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -290,34 +290,6 @@ def get_or_generate_vocab(tmp_dir, vocab_filename, vocab_size, sources=None): return vocab -def get_or_generate_tabbed_vocab(tmp_dir, source_filename, index, vocab_filename, vocab_size): - """Generate a vocabulary from the source file. This is assumed to be - a file of source, target pairs, where each line contains a source string - and a target string, separated by a tab ('\t') character. The index - parameter specifies 0 for the source or 1 for the target.""" - vocab_filepath = os.path.join(tmp_dir, vocab_filename) - if os.path.exists(vocab_filepath): - vocab = text_encoder.SubwordTextEncoder(vocab_filepath) - return vocab - - # Use Tokenizer to count the word occurrences. - token_counts = defaultdict(int) - filepath = os.path.join(tmp_dir, source_filename) - with tf.gfile.GFile(filepath, mode="r") as source_file: - for line in source_file: - line = line.strip() - if line and '\t' in line: - parts = line.split('\t', maxsplit = 1) - part = parts[index].strip() - for tok in tokenizer.encode(text_encoder.native_to_unicode(part)): - token_counts[tok] += 1 - - vocab = text_encoder.SubwordTextEncoder.build_to_target_size( - vocab_size, token_counts, 1, 1e3) - vocab.store_to_file(vocab_filepath) - return vocab - - def read_records(filename): reader = tf.python_io.tf_record_iterator(filename) records = [] diff --git a/tensor2tensor/data_generators/generator_utils_test.py b/tensor2tensor/data_generators/generator_utils_test.py index 320d1a02d..c776d120c 100644 --- a/tensor2tensor/data_generators/generator_utils_test.py +++ b/tensor2tensor/data_generators/generator_utils_test.py @@ -1,4 +1,4 @@ -# Copyright 2017 Google Inc. +# Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tensor2tensor/data_generators/image.py b/tensor2tensor/data_generators/image.py index 377bf3e54..792122324 100644 --- a/tensor2tensor/data_generators/image.py +++ b/tensor2tensor/data_generators/image.py @@ -1,4 +1,4 @@ -# Copyright 2017 Google Inc. +# Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tensor2tensor/data_generators/image_test.py b/tensor2tensor/data_generators/image_test.py index c5b4f14be..6c9984265 100644 --- a/tensor2tensor/data_generators/image_test.py +++ b/tensor2tensor/data_generators/image_test.py @@ -1,4 +1,4 @@ -# Copyright 2017 Google Inc. +# Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tensor2tensor/data_generators/inspect.py b/tensor2tensor/data_generators/inspect.py index a0da09150..fba3c6492 100644 --- a/tensor2tensor/data_generators/inspect.py +++ b/tensor2tensor/data_generators/inspect.py @@ -1,4 +1,4 @@ -# Copyright 2017 Google Inc. +# Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tensor2tensor/data_generators/lm1b.py b/tensor2tensor/data_generators/lm1b.py index 66a3d52a0..78fb001bc 100644 --- a/tensor2tensor/data_generators/lm1b.py +++ b/tensor2tensor/data_generators/lm1b.py @@ -1,4 +1,4 @@ -# Copyright 2017 Google Inc. +# Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tensor2tensor/data_generators/problem_hparams.py b/tensor2tensor/data_generators/problem_hparams.py old mode 100755 new mode 100644 index 91a685dfe..ec3b8e45e --- a/tensor2tensor/data_generators/problem_hparams.py +++ b/tensor2tensor/data_generators/problem_hparams.py @@ -1,4 +1,4 @@ -# Copyright 2017 Google Inc. +# Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -66,13 +66,14 @@ def parse_problem_name(problem_name): was_copy: A boolean. """ # Recursively strip tags until we reach a base name. - if problem_name.endswith("_rev"): + if len(problem_name) > 4 and problem_name[-4:] == "_rev": base, _, was_copy = parse_problem_name(problem_name[:-4]) return base, True, was_copy - if problem_name.endswith("_copy"): + elif len(problem_name) > 5 and problem_name[-5:] == "_copy": base, was_reversed, _ = parse_problem_name(problem_name[:-5]) return base, was_reversed, True - return problem_name, False, False + else: + return problem_name, False, False def _lookup_problem_hparams_fn(name): @@ -176,10 +177,6 @@ def default_problem_hparams(): # 13: Audio spectral domain # 14: Parse characters # 15: Parse tokens - # 16: Chinese tokens - # 17: Icelandic characters - # 18: Icelandic tokens - # 19: Icelandic parse tokens # Add more above if needed. input_space_id=0, target_space_id=0, @@ -200,8 +197,7 @@ def default_problem_hparams(): # the targets. For instance `problem_copy` will copy the inputs, but # `problem_rev_copy` will copy the targets. was_reversed=False, - was_copy=False, - ) + was_copy=False,) def test_problem_hparams(unused_model_hparams, input_vocab_size, @@ -476,32 +472,6 @@ def wmt_ende_tokens(model_hparams, wrong_vocab_size): return p -def wmt_zhen_tokens(model_hparams, wrong_vocab_size): - """Chinese to English translation benchmark.""" - p = default_problem_hparams() - # This vocab file must be present within the data directory. - if model_hparams.shared_embedding_and_softmax_weights == 1: - model_hparams.shared_embedding_and_softmax_weights = 0 - source_vocab_filename = os.path.join(model_hparams.data_dir, - "tokens.vocab.zh.%d" % wrong_vocab_size) - target_vocab_filename = os.path.join(model_hparams.data_dir, - "tokens.vocab.en.%d" % wrong_vocab_size) - source_token = text_encoder.SubwordTextEncoder(source_vocab_filename) - target_token = text_encoder.SubwordTextEncoder(target_vocab_filename) - p.input_modality = { - "inputs": (registry.Modalities.SYMBOL, source_token.vocab_size) - } - p.target_modality = (registry.Modalities.SYMBOL, target_token.vocab_size) - p.vocabulary = { - "inputs": source_token, - "targets": target_token, - } - p.loss_multiplier = 1.4 - p.input_space_id = 16 - p.target_space_id = 4 - return p - - def wmt_ende_v2(model_hparams, vocab_size): """English to German translation benchmark with separate vocabularies.""" p = default_problem_hparams() @@ -535,7 +505,7 @@ def wmt_concat(model_hparams, wrong_vocab_size): return p -def wmt_parsing_characters(model_hparams): +def wmt_parsing_characters(unused_model_hparams): """English to parse tree translation benchmark.""" p = default_problem_hparams() p.input_modality = {"inputs": (registry.Modalities.SYMBOL, 256)} @@ -579,8 +549,7 @@ def wmt_parsing_tokens(model_hparams, wrong_vocab_size): return p -def wsj_parsing_tokens(model_hparams, prefix, - wrong_source_vocab_size, +def wsj_parsing_tokens(model_hparams, wrong_source_vocab_size, wrong_target_vocab_size): """English to parse tree translation benchmark. @@ -599,10 +568,10 @@ def wsj_parsing_tokens(model_hparams, prefix, # This vocab file must be present within the data directory. source_vocab_filename = os.path.join( model_hparams.data_dir, - prefix + "_source.tokens.vocab.%d" % wrong_source_vocab_size) + "wsj_source.tokens.vocab.%d" % wrong_source_vocab_size) target_vocab_filename = os.path.join( model_hparams.data_dir, - prefix + "_target.tokens.vocab.%d" % wrong_target_vocab_size) + "wsj_target.tokens.vocab.%d" % wrong_target_vocab_size) source_subtokenizer = text_encoder.SubwordTextEncoder(source_vocab_filename) target_subtokenizer = text_encoder.SubwordTextEncoder(target_vocab_filename) p.input_modality = { @@ -619,37 +588,6 @@ def wsj_parsing_tokens(model_hparams, prefix, return p -def ice_parsing_tokens(model_hparams, wrong_source_vocab_size): - """Icelandic to parse tree translation benchmark. - - Args: - model_hparams: a tf.contrib.training.HParams - Returns: - a tf.contrib.training.HParams - """ - p = default_problem_hparams() - # This vocab file must be present within the data directory. - source_vocab_filename = os.path.join( - model_hparams.data_dir, - "ice_source.tokens.vocab.%d" % wrong_source_vocab_size) - target_vocab_filename = os.path.join( - model_hparams.data_dir, - "ice_target.tokens.vocab.256") - source_subtokenizer = text_encoder.SubwordTextEncoder(source_vocab_filename) - target_subtokenizer = text_encoder.SubwordTextEncoder(target_vocab_filename) - p.input_modality = { - "inputs": (registry.Modalities.SYMBOL, source_subtokenizer.vocab_size) - } - p.target_modality = (registry.Modalities.SYMBOL, 256) - p.vocabulary = { - "inputs": source_subtokenizer, - "targets": target_subtokenizer, - } - p.input_space_id = 18 # Icelandic tokens - p.target_space_id = 19 # Icelandic parse tokens - return p - - def image_cifar10(unused_model_hparams): """CIFAR-10.""" p = default_problem_hparams() @@ -768,11 +706,9 @@ def img2img_imagenet(unused_model_hparams): "wiki_32k": wiki_32k, "lmptb_10k": lmptb_10k, "wmt_parsing_characters": wmt_parsing_characters, - "ice_parsing_characters": wmt_parsing_characters, - "ice_parsing_tokens": lambda p: ice_parsing_tokens(p, 2**13), "wmt_parsing_tokens_8k": lambda p: wmt_parsing_tokens(p, 2**13), - "wsj_parsing_tokens_16k": lambda p: wsj_parsing_tokens(p, "wsj", 2**14, 2**9), - "wsj_parsing_tokens_32k": lambda p: wsj_parsing_tokens(p, "wsj", 2**15, 2**9), + "wsj_parsing_tokens_16k": lambda p: wsj_parsing_tokens(p, 2**14, 2**9), + "wsj_parsing_tokens_32k": lambda p: wsj_parsing_tokens(p, 2**15, 2**9), "wmt_enfr_characters": wmt_enfr_characters, "wmt_enfr_tokens_8k": lambda p: wmt_enfr_tokens(p, 2**13), "wmt_enfr_tokens_32k": lambda p: wmt_enfr_tokens(p, 2**15), @@ -794,7 +730,6 @@ def img2img_imagenet(unused_model_hparams): "wmt_ende_bpe32k_160": wmt_ende_bpe32k, "wmt_ende_v2_32k_combined": lambda p: wmt_ende_v2(p, 2**15), "wmt_ende_v2_16k_combined": lambda p: wmt_ende_v2(p, 2**14), - "wmt_zhen_tokens_32k": lambda p: wmt_zhen_tokens(p, 2**15), "image_cifar10_tune": image_cifar10, "image_cifar10_test": image_cifar10, "image_mnist_tune": image_mnist, diff --git a/tensor2tensor/data_generators/problem_hparams_test.py b/tensor2tensor/data_generators/problem_hparams_test.py index d3803396f..ad1f0192d 100644 --- a/tensor2tensor/data_generators/problem_hparams_test.py +++ b/tensor2tensor/data_generators/problem_hparams_test.py @@ -1,4 +1,4 @@ -# Copyright 2017 Google Inc. +# Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tensor2tensor/data_generators/ptb.py b/tensor2tensor/data_generators/ptb.py index d4cf42c88..9a7db3a78 100644 --- a/tensor2tensor/data_generators/ptb.py +++ b/tensor2tensor/data_generators/ptb.py @@ -1,4 +1,4 @@ -# Copyright 2017 Google Inc. +# Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tensor2tensor/data_generators/snli.py b/tensor2tensor/data_generators/snli.py index 1d3acd356..7322c59ff 100644 --- a/tensor2tensor/data_generators/snli.py +++ b/tensor2tensor/data_generators/snli.py @@ -1,4 +1,4 @@ -# Copyright 2017 Google Inc. +# Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tensor2tensor/data_generators/text_encoder.py b/tensor2tensor/data_generators/text_encoder.py old mode 100755 new mode 100644 index 38b78256d..715cba803 --- a/tensor2tensor/data_generators/text_encoder.py +++ b/tensor2tensor/data_generators/text_encoder.py @@ -1,4 +1,4 @@ -# Copyright 2017 Google Inc. +# Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -37,32 +37,27 @@ # Conversion between Unicode and UTF-8, if required (on Python2) -if PY2: - native_to_unicode = lambda s: s if isinstance(s, unicode) else s.decode("utf-8") - unicode_to_native = lambda s: s.encode("utf-8") -else: - # No conversion required on Python3 - native_to_unicode = lambda s: s - unicode_to_native = lambda s: s +def native_to_unicode(s): + return s.decode("utf-8") if (PY2 and not isinstance(s, unicode)) else s + + +unicode_to_native = (lambda s: s.encode("utf-8")) if PY2 else (lambda s: s) # Reserved tokens for things like padding and EOS symbols. PAD = "<pad>" EOS = "<EOS>" RESERVED_TOKENS = [PAD, EOS] -NUM_RESERVED_TOKENS = len(RESERVED_TOKENS) -PAD_TOKEN = RESERVED_TOKENS.index(PAD) # Normally 0 -EOS_TOKEN = RESERVED_TOKENS.index(EOS) # Normally 1 - -if PY2: +if six.PY2: RESERVED_TOKENS_BYTES = RESERVED_TOKENS else: RESERVED_TOKENS_BYTES = [bytes(PAD, "ascii"), bytes(EOS, "ascii")] + class TextEncoder(object): """Base class for converting from ints to/from human readable strings.""" - def __init__(self, num_reserved_ids=NUM_RESERVED_TOKENS): + def __init__(self, num_reserved_ids=2): self._num_reserved_ids = num_reserved_ids def encode(self, s): @@ -110,7 +105,7 @@ class ByteTextEncoder(TextEncoder): def encode(self, s): numres = self._num_reserved_ids - if PY2: + if six.PY2: return [ord(c) + numres for c in s] # Python3: explicitly convert to UTF-8 return [c + numres for c in s.encode("utf-8")] @@ -124,10 +119,10 @@ def decode(self, ids): decoded_ids.append(RESERVED_TOKENS_BYTES[int(id_)]) else: decoded_ids.append(int2byte(id_ - numres)) - if PY2: + if six.PY2: return "".join(decoded_ids) # Python3: join byte arrays and then decode string - return b"".join(decoded_ids).decode("utf-8", "replace") + return b"".join(decoded_ids).decode("utf-8") @property def vocab_size(self): @@ -137,7 +132,7 @@ def vocab_size(self): class TokenTextEncoder(TextEncoder): """Encoder based on a user-supplied vocabulary.""" - def __init__(self, vocab_filename, reverse=False, num_reserved_ids=NUM_RESERVED_TOKENS): + def __init__(self, vocab_filename, reverse=False, num_reserved_ids=2): """Initialize from a file, one token per line.""" super(TokenTextEncoder, self).__init__(num_reserved_ids=num_reserved_ids) self._reverse = reverse @@ -350,7 +345,7 @@ def build_from_token_counts(self, token_counts, min_count, num_iterations=4, - num_reserved_ids=NUM_RESERVED_TOKENS): + num_reserved_ids=2): """Train a SubwordTextEncoder based on a dictionary of word counts. Args: @@ -376,8 +371,6 @@ def build_from_token_counts(self, # We build iteratively. On each iteration, we segment all the words, # then count the resulting potential subtokens, keeping the ones # with high enough counts for our new vocabulary. - if min_count < 1: - min_count = 1 for i in xrange(num_iterations): tf.logging.info("Iteration {0}".format(i)) counts = defaultdict(int) @@ -469,7 +462,7 @@ def store_to_file(self, filename): f.write("'" + unicode_to_native(subtoken_string) + "'\n") def _escape_token(self, token): - """Escape away underscores and OOV characters and append '_'. + r"""Escape away underscores and OOV characters and append '_'. This allows the token to be experessed as the concatenation of a list of subtokens from the vocabulary. The underscore acts as a sentinel @@ -480,7 +473,7 @@ def _escape_token(self, token): Returns: escaped_token: a unicode string """ - assert isinstance(token, six.text_type) + assert isinstance(token, unicode) token = token.replace(u"\\", u"\\\\").replace(u"_", u"\\u") + u"_" ret = u"" for c in token: @@ -491,7 +484,7 @@ def _escape_token(self, token): return ret def _unescape_token(self, escaped_token): - """Inverse of _escape_token(). + r"""Inverse of _escape_token(). Args: escaped_token: a unicode string diff --git a/tensor2tensor/data_generators/text_encoder_build_subword.py b/tensor2tensor/data_generators/text_encoder_build_subword.py index df8aa73eb..093101c68 100644 --- a/tensor2tensor/data_generators/text_encoder_build_subword.py +++ b/tensor2tensor/data_generators/text_encoder_build_subword.py @@ -1,4 +1,4 @@ -# Copyright 2017 Google Inc. +# Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tensor2tensor/data_generators/tokenizer.py b/tensor2tensor/data_generators/tokenizer.py old mode 100755 new mode 100644 index 65fe19334..5d6cc9c06 --- a/tensor2tensor/data_generators/tokenizer.py +++ b/tensor2tensor/data_generators/tokenizer.py @@ -1,4 +1,4 @@ -# Copyright 2017 Google Inc. +# Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -141,7 +141,6 @@ def read_corpus(): if corpus_max_lines > 0 and lines_read > corpus_max_lines: return docs return docs - counts = defaultdict(int) for doc in read_corpus(): for tok in encode(_native_to_unicode(doc)): diff --git a/tensor2tensor/data_generators/tokenizer_test.py b/tensor2tensor/data_generators/tokenizer_test.py index 404a11396..c279290ed 100644 --- a/tensor2tensor/data_generators/tokenizer_test.py +++ b/tensor2tensor/data_generators/tokenizer_test.py @@ -1,4 +1,4 @@ -# Copyright 2017 Google Inc. +# Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tensor2tensor/data_generators/wiki.py b/tensor2tensor/data_generators/wiki.py index 99a9e64e6..208d679bd 100644 --- a/tensor2tensor/data_generators/wiki.py +++ b/tensor2tensor/data_generators/wiki.py @@ -1,4 +1,4 @@ -# Copyright 2017 Google Inc. +# Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -25,7 +25,6 @@ # Dependency imports import six -from six import PY2 from tensor2tensor.data_generators import generator_utils from tensor2tensor.data_generators import text_encoder from tensor2tensor.data_generators import tokenizer @@ -61,7 +60,7 @@ def page_generator(tmp_dir, max_docs=None): count = 0 corpus_filepath = _maybe_download_corpus(tmp_dir) for line in bz2.BZ2File(corpus_filepath, "r"): - line = unicode(line, "utf-8") if PY2 else line.decode("utf-8") + line = unicode(line, "utf-8") if not doc and line != u" <page>\n": continue doc += line diff --git a/tensor2tensor/data_generators/wmt.py b/tensor2tensor/data_generators/wmt.py old mode 100755 new mode 100644 index 2d43d1739..eb09a68ca --- a/tensor2tensor/data_generators/wmt.py +++ b/tensor2tensor/data_generators/wmt.py @@ -1,4 +1,4 @@ -# Copyright 2017 Google Inc. +# Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -38,8 +38,9 @@ FLAGS = tf.flags.FLAGS -# End-of-sentence marker -EOS = text_encoder.EOS_TOKEN +# End-of-sentence marker (should correspond to the position of EOS in the +# RESERVED_TOKENS list in text_encoder.py) +EOS = 1 def character_generator(source_path, target_path, character_vocab, eos=None): @@ -71,35 +72,6 @@ def character_generator(source_path, target_path, character_vocab, eos=None): source, target = source_file.readline(), target_file.readline() -def tabbed_generator(source_path, source_vocab, target_vocab, eos=None): - """Generator for sequence-to-sequence tasks using tokens derived from - text files where each line contains both a source and a target string. - The two strings are separated by a tab character ('\t'). It yields - dictionaries of "inputs" and "targets" where inputs are characters - from the source lines converted to integers, and targets are - characters from the target lines, also converted to integers. - - Args: - source_path: path to the file with source and target sentences. - source_vocab: a SunwordTextEncoder to encode the source string. - target_vocab: a SunwordTextEncoder to encode the target string. - eos: integer to append at the end of each sequence (default: None). - - Yields: - A dictionary {"inputs": source-line, "targets": target-line} where - the lines are integer lists converted from characters in the file lines. - """ - eos_list = [] if eos is None else [eos] - with tf.gfile.GFile(source_path, mode="r") as source_file: - for line in source_file: - if line and '\t' in line: - parts = line.split('\t', maxsplit = 1) - source, target = parts[0].strip(), parts[1].strip() - source_ints = source_vocab.encode(source) + eos_list - target_ints = source_vocab.encode(target) + eos_list - yield {"inputs": source_ints, "targets": target_ints} - - def token_generator(source_path, target_path, token_vocab, eos=None): """Generator for sequence-to-sequence tasks that uses tokens. @@ -129,38 +101,6 @@ def token_generator(source_path, target_path, token_vocab, eos=None): source, target = source_file.readline(), target_file.readline() -def bi_vocabs_token_generator(source_path, target_path, - source_token_vocab, - target_token_vocab, - eos=None): - """Generator for sequence-to-sequence tasks that uses tokens. - - This generator assumes the files at source_path and target_path have - the same number of lines and yields dictionaries of "inputs" and "targets" - where inputs are token ids from the " "-split source (and target, resp.) lines - converted to integers using the token_map. - - Args: - source_path: path to the file with source sentences. - target_path: path to the file with target sentences. - source_token_vocab: text_encoder.TextEncoder object. - target_token_vocab: text_encoder.TextEncoder object. - eos: integer to append at the end of each sequence (default: None). - - Yields: - A dictionary {"inputs": source-line, "targets": target-line} where - the lines are integer lists converted from tokens in the file lines. - """ - eos_list = [] if eos is None else [eos] - with tf.gfile.GFile(source_path, mode="r") as source_file: - with tf.gfile.GFile(target_path, mode="r") as target_file: - source, target = source_file.readline(), target_file.readline() - while source and target: - source_ints = source_token_vocab.encode(source.strip()) + eos_list - target_ints = target_token_vocab.encode(target.strip()) + eos_list - yield {"inputs": source_ints, "targets": target_ints} - source, target = source_file.readline(), target_file.readline() - def _get_wmt_ende_dataset(directory, filename): """Extract the WMT en-de corpus `filename` to directory unless it's there.""" train_path = os.path.join(directory, filename) @@ -182,7 +122,7 @@ def ende_bpe_token_generator(tmp_dir, train): train_path = _get_wmt_ende_dataset(tmp_dir, dataset_path) token_path = os.path.join(tmp_dir, "vocab.bpe.32000") token_vocab = text_encoder.TokenTextEncoder(vocab_filename=token_path) - return token_generator(train_path + ".en", train_path + ".de", token_vocab, EOS) + return token_generator(train_path + ".en", train_path + ".de", token_vocab, 1) _ENDE_TRAIN_DATASETS = [ @@ -237,21 +177,6 @@ def ende_bpe_token_generator(tmp_dir, train): ], ] -_ZHEN_TRAIN_DATASETS = [ - [ - "http://data.statmt.org/wmt17/translation-task/training-parallel-nc-v12.tgz", - ("training/news-commentary-v12.zh-en.zh", - "training/news-commentary-v12.zh-en.en") - ] -] - -_ZHEN_TEST_DATASETS = [ - [ - "http://data.statmt.org/wmt17/translation-task/dev.tgz", - ("dev/newsdev2017-zhen-src.zh", - "dev/newsdev2017-zhen-ref.en") - ] -] def _compile_data(tmp_dir, datasets, filename): """Concatenate all `datasets` and save to `filename`.""" @@ -328,25 +253,6 @@ def ende_character_generator(tmp_dir, train): character_vocab, EOS) -def zhen_wordpiece_token_generator(tmp_dir, train, - source_vocab_size, - target_vocab_size): - datasets = _ZHEN_TRAIN_DATASETS if train else _ZHEN_TEST_DATASETS - source_datasets = [[item[0], [item[1][0]]] for item in datasets] - target_datasets = [[item[0], [item[1][1]]] for item in datasets] - source_vocab = generator_utils.get_or_generate_vocab( - tmp_dir, "tokens.vocab.zh.%d" % source_vocab_size, - source_vocab_size, source_datasets) - target_vocab = generator_utils.get_or_generate_vocab( - tmp_dir, "tokens.vocab.en.%d" % target_vocab_size, - target_vocab_size, target_datasets) - tag = "train" if train else "dev" - data_path = _compile_data(tmp_dir, datasets, "wmt_zhen_tok_%s" % tag) - return bi_vocabs_token_generator(data_path + ".lang1", - data_path + ".lang2", - source_vocab, target_vocab, EOS) - - def enfr_wordpiece_token_generator(tmp_dir, train, vocab_size): """Instance of token generator for the WMT en->fr task.""" symbolizer_vocab = generator_utils.get_or_generate_vocab( @@ -367,38 +273,6 @@ def enfr_character_generator(tmp_dir, train): return character_generator(data_path + ".lang1", data_path + ".lang2", character_vocab, EOS) -def parsing_character_generator(tmp_dir, train): - character_vocab = text_encoder.ByteTextEncoder() - filename = "parsing_%s" % ("train" if train else "dev") - text_filepath = os.path.join(tmp_dir, filename + ".text") - tags_filepath = os.path.join(tmp_dir, filename + ".tags") - return character_generator(text_filepath, tags_filepath, character_vocab, EOS) - - -def tabbed_parsing_token_generator(tmp_dir, train, prefix, source_vocab_size, target_vocab_size): - """Generate source and target data from a single file with source/target pairs - separated by a tab character ('\t')""" - source_vocab = generator_utils.get_or_generate_tabbed_vocab( - tmp_dir, "parsing_train.pairs", 0, - prefix + "_source.tokens.vocab.%d" % source_vocab_size, - source_vocab_size) - target_vocab = generator_utils.get_or_generate_tabbed_vocab( - tmp_dir, "parsing_train.pairs", 1, - prefix + "_target.tokens.vocab.%d" % target_vocab_size, - target_vocab_size) - filename = "parsing_%s" % ("train" if train else "dev") - pair_filepath = os.path.join(tmp_dir, filename + ".pairs") - return tabbed_generator(pair_filepath, source_vocab, target_vocab, EOS) - - -def tabbed_parsing_character_generator(tmp_dir, train): - """Generate source and target data from a single file with source/target pairs - separated by a tab character ('\t')""" - character_vocab = text_encoder.ByteTextEncoder() - filename = "parsing_%s" % ("train" if train else "dev") - pair_filepath = os.path.join(tmp_dir, filename + ".pairs") - return tabbed_generator(pair_filepath, character_vocab, character_vocab, EOS) - def parsing_token_generator(tmp_dir, train, vocab_size): symbolizer_vocab = generator_utils.get_or_generate_vocab( diff --git a/tensor2tensor/data_generators/wmt_test.py b/tensor2tensor/data_generators/wmt_test.py index b6af3cf93..86b88e5b1 100644 --- a/tensor2tensor/data_generators/wmt_test.py +++ b/tensor2tensor/data_generators/wmt_test.py @@ -1,4 +1,4 @@ -# Copyright 2017 Google Inc. +# Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tensor2tensor/data_generators/wsj_parsing.py b/tensor2tensor/data_generators/wsj_parsing.py index 756a44954..7734db646 100644 --- a/tensor2tensor/data_generators/wsj_parsing.py +++ b/tensor2tensor/data_generators/wsj_parsing.py @@ -1,4 +1,4 @@ -# Copyright 2017 Google Inc. +# Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tensor2tensor/models/__init__.py b/tensor2tensor/models/__init__.py index 27d533abc..eff6a2b14 100644 --- a/tensor2tensor/models/__init__.py +++ b/tensor2tensor/models/__init__.py @@ -1,4 +1,4 @@ -# Copyright 2017 Google Inc. +# Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tensor2tensor/models/attention_lm.py b/tensor2tensor/models/attention_lm.py index 99fbd8232..85a05f9b1 100644 --- a/tensor2tensor/models/attention_lm.py +++ b/tensor2tensor/models/attention_lm.py @@ -1,4 +1,4 @@ -# Copyright 2017 Google Inc. +# Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tensor2tensor/models/attention_lm_moe.py b/tensor2tensor/models/attention_lm_moe.py index b4d27d400..1715f0a77 100644 --- a/tensor2tensor/models/attention_lm_moe.py +++ b/tensor2tensor/models/attention_lm_moe.py @@ -1,4 +1,4 @@ -# Copyright 2017 Google Inc. +# Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tensor2tensor/models/bluenet.py b/tensor2tensor/models/bluenet.py index c0533ee42..95216f43d 100644 --- a/tensor2tensor/models/bluenet.py +++ b/tensor2tensor/models/bluenet.py @@ -1,4 +1,4 @@ -# Copyright 2017 Google Inc. +# Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tensor2tensor/models/bluenet_test.py b/tensor2tensor/models/bluenet_test.py index 080c96a3f..b3f18249d 100644 --- a/tensor2tensor/models/bluenet_test.py +++ b/tensor2tensor/models/bluenet_test.py @@ -1,4 +1,4 @@ -# Copyright 2017 Google Inc. +# Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tensor2tensor/models/bytenet.py b/tensor2tensor/models/bytenet.py index 1a82144d6..301626dc2 100644 --- a/tensor2tensor/models/bytenet.py +++ b/tensor2tensor/models/bytenet.py @@ -1,4 +1,4 @@ -# Copyright 2017 Google Inc. +# Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tensor2tensor/models/bytenet_test.py b/tensor2tensor/models/bytenet_test.py index 8202d5b74..f1e42669e 100644 --- a/tensor2tensor/models/bytenet_test.py +++ b/tensor2tensor/models/bytenet_test.py @@ -1,4 +1,4 @@ -# Copyright 2017 Google Inc. +# Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tensor2tensor/models/common_attention.py b/tensor2tensor/models/common_attention.py index b6a5e09d6..4470b86a0 100644 --- a/tensor2tensor/models/common_attention.py +++ b/tensor2tensor/models/common_attention.py @@ -1,4 +1,4 @@ -# Copyright 2017 Google Inc. +# Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tensor2tensor/models/common_hparams.py b/tensor2tensor/models/common_hparams.py index f48a67c15..e1568f0e1 100644 --- a/tensor2tensor/models/common_hparams.py +++ b/tensor2tensor/models/common_hparams.py @@ -1,4 +1,4 @@ -# Copyright 2017 Google Inc. +# Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -61,7 +61,6 @@ def basic_params1(): weight_noise=0.0, learning_rate_decay_scheme="none", learning_rate_warmup_steps=100, - learning_rate_cosine_cycle_steps=250000, learning_rate=0.1, sampling_method="argmax", # "argmax" or "random" problem_choice="adaptive", # "uniform", "adaptive", "distributed" @@ -203,9 +202,4 @@ def basic_range1(ranged_hparams): rhp.set_float("optimizer_adam_beta1", 0.8, 0.9) rhp.set_float("optimizer_adam_beta2", 0.995, 0.999) rhp.set_categorical("optimizer", - ["Adam", - "Adagrad", - "Momentum", - "RMSProp", - "SGD", - "YellowFin"]) + ["Adam", "Adagrad", "Momentum", "RMSProp", "SGD"]) diff --git a/tensor2tensor/models/common_layers.py b/tensor2tensor/models/common_layers.py index d38f97fb0..3ff41bebc 100644 --- a/tensor2tensor/models/common_layers.py +++ b/tensor2tensor/models/common_layers.py @@ -1,4 +1,4 @@ -# Copyright 2017 Google Inc. +# Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -58,15 +58,9 @@ def inverse_exp_decay(max_step, min_value=0.01): return inv_base**tf.maximum(float(max_step) - step, 0.0) -def shakeshake2_py(x, y, equal=False, individual=False): +def shakeshake2_py(x, y, equal=False): """The shake-shake sum of 2 tensors, python version.""" - if equal: - alpha = 0.5 - if individual: - alpha = tf.random_uniform(tf.get_shape(x)[:1]) - else: - alpha = tf.random_uniform([]) - + alpha = 0.5 if equal else tf.random_uniform([]) return alpha * x + (1.0 - alpha) * y @@ -78,14 +72,6 @@ def shakeshake2_grad(x1, x2, dy): return dx -@function.Defun() -def shakeshake2_indiv_grad(x1, x2, dy): - """Overriding gradient for shake-shake of 2 tensors.""" - y = shakeshake2_py(x1, x2, individual=True) - dx = tf.gradients(ys=[y], xs=[x1, x2], grad_ys=[dy]) - return dx - - @function.Defun() def shakeshake2_equal_grad(x1, x2, dy): """Overriding gradient for shake-shake of 2 tensors.""" @@ -100,11 +86,6 @@ def shakeshake2(x1, x2): return shakeshake2_py(x1, x2) -@function.Defun(grad_func=shakeshake2_indiv_grad) -def shakeshake2_indiv(x1, x2): - return shakeshake2_py(x1, x2, individual=True) - - @function.Defun(grad_func=shakeshake2_equal_grad) def shakeshake2_eqgrad(x1, x2): """The shake-shake function with a different alpha for forward/backward.""" diff --git a/tensor2tensor/models/common_layers_test.py b/tensor2tensor/models/common_layers_test.py index a87776bfb..3a2fafd8b 100644 --- a/tensor2tensor/models/common_layers_test.py +++ b/tensor2tensor/models/common_layers_test.py @@ -1,4 +1,4 @@ -# Copyright 2017 Google Inc. +# Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tensor2tensor/models/lstm.py b/tensor2tensor/models/lstm.py index 998e6756b..c3ae0a01e 100644 --- a/tensor2tensor/models/lstm.py +++ b/tensor2tensor/models/lstm.py @@ -1,4 +1,4 @@ -# Copyright 2017 Google Inc. +# Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tensor2tensor/models/lstm_test.py b/tensor2tensor/models/lstm_test.py index 4c4c42909..4ddaf6b64 100644 --- a/tensor2tensor/models/lstm_test.py +++ b/tensor2tensor/models/lstm_test.py @@ -1,4 +1,4 @@ -# Copyright 2017 Google Inc. +# Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tensor2tensor/models/modalities.py b/tensor2tensor/models/modalities.py index 4e7a7e924..3cfb44f57 100644 --- a/tensor2tensor/models/modalities.py +++ b/tensor2tensor/models/modalities.py @@ -1,4 +1,4 @@ -# Copyright 2017 Google Inc. +# Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tensor2tensor/models/modalities_test.py b/tensor2tensor/models/modalities_test.py index 090af3aef..118db3847 100644 --- a/tensor2tensor/models/modalities_test.py +++ b/tensor2tensor/models/modalities_test.py @@ -1,4 +1,4 @@ -# Copyright 2017 Google Inc. +# Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tensor2tensor/models/models.py b/tensor2tensor/models/models.py index 214aec245..594b605ad 100644 --- a/tensor2tensor/models/models.py +++ b/tensor2tensor/models/models.py @@ -1,4 +1,4 @@ -# Copyright 2017 Google Inc. +# Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -30,7 +30,6 @@ from tensor2tensor.models import modalities from tensor2tensor.models import multimodel from tensor2tensor.models import neural_gpu -from tensor2tensor.models import shake_shake from tensor2tensor.models import slicenet from tensor2tensor.models import transformer from tensor2tensor.models import transformer_alternative diff --git a/tensor2tensor/models/multimodel.py b/tensor2tensor/models/multimodel.py index 26e7469c2..6f12db86d 100644 --- a/tensor2tensor/models/multimodel.py +++ b/tensor2tensor/models/multimodel.py @@ -1,4 +1,4 @@ -# Copyright 2017 Google Inc. +# Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tensor2tensor/models/multimodel_test.py b/tensor2tensor/models/multimodel_test.py index dbbd3fa8e..958fac5d7 100644 --- a/tensor2tensor/models/multimodel_test.py +++ b/tensor2tensor/models/multimodel_test.py @@ -1,4 +1,4 @@ -# Copyright 2017 Google Inc. +# Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tensor2tensor/models/neural_gpu.py b/tensor2tensor/models/neural_gpu.py index dce0dbc30..30d535098 100644 --- a/tensor2tensor/models/neural_gpu.py +++ b/tensor2tensor/models/neural_gpu.py @@ -1,4 +1,4 @@ -# Copyright 2017 Google Inc. +# Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tensor2tensor/models/neural_gpu_test.py b/tensor2tensor/models/neural_gpu_test.py index 3065bb1c4..1dddc1056 100644 --- a/tensor2tensor/models/neural_gpu_test.py +++ b/tensor2tensor/models/neural_gpu_test.py @@ -1,4 +1,4 @@ -# Copyright 2017 Google Inc. +# Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tensor2tensor/models/shake_shake.py b/tensor2tensor/models/shake_shake.py deleted file mode 100644 index f87eaa335..000000000 --- a/tensor2tensor/models/shake_shake.py +++ /dev/null @@ -1,143 +0,0 @@ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from six.moves import xrange # pylint: disable=redefined-builtin - -from tensor2tensor.models import common_hparams -from tensor2tensor.models import common_layers -from tensor2tensor.utils import registry -from tensor2tensor.utils import t2t_model - -import tensorflow as tf - - -def shake_shake_block_branch(x, conv_filters, stride): - x = tf.nn.relu(x) - x = tf.layers.conv2d( - x, conv_filters, (3, 3), strides=(stride, stride), padding='SAME') - x = tf.layers.batch_normalization(x) - x = tf.nn.relu(x) - x = tf.layers.conv2d(x, conv_filters, (3, 3), strides=(1, 1), padding='SAME') - x = tf.layers.batch_normalization(x) - return x - - -def downsampling_residual_branch(x, conv_filters): - x = tf.nn.relu(x) - - x1 = tf.layers.average_pooling2d(x, pool_size=(1, 1), strides=(2, 2)) - x1 = tf.layers.conv2d(x1, conv_filters / 2, (1, 1), padding='SAME') - - x2 = tf.pad(x[:, 1:, 1:], [[0, 0], [0, 1], [0, 1], [0, 0]]) - x2 = tf.layers.average_pooling2d(x2, pool_size=(1, 1), strides=(2, 2)) - x2 = tf.layers.conv2d(x2, conv_filters / 2, (1, 1), padding='SAME') - - return tf.concat([x1, x2], axis=3) - - -def shake_shake_block(x, conv_filters, stride, hparams): - with tf.variable_scope('branch_1'): - branch1 = shake_shake_block_branch(x, conv_filters, stride) - with tf.variable_scope('branch_2'): - branch2 = shake_shake_block_branch(x, conv_filters, stride) - if x.shape[-1] == conv_filters: - skip = tf.identity(x) - else: - skip = downsampling_residual_branch(x, conv_filters) - - # TODO(rshin): Use different alpha for each image in batch. - if hparams.mode == tf.contrib.learn.ModeKeys.TRAIN: - if hparams.shakeshake_type == 'batch': - shaken = common_layers.shakeshake2(branch1, branch2) - elif hparams.shakeshake_type == 'image': - shaken = common_layers.shakeshake2_indiv(branch1, branch2) - elif hparams.shakeshake_type == 'equal': - shaken = common_layers.shakeshake2_py(branch1, branch2, equal=True) - else: - raise ValueError('Invalid shakeshake_type: {!r}'.format(shaken)) - else: - shaken = common_layers.shakeshake2_py(branch1, branch2, equal=True) - shaken.set_shape(branch1.get_shape()) - - return skip + shaken - - -def shake_shake_stage(x, num_blocks, conv_filters, initial_stride, hparams): - with tf.variable_scope('block_0'): - x = shake_shake_block(x, conv_filters, initial_stride, hparams) - for i in xrange(1, num_blocks): - with tf.variable_scope('block_{}'.format(i)): - x = shake_shake_block(x, conv_filters, 1, hparams) - return x - - -@registry.register_model -class ShakeShake(t2t_model.T2TModel): - '''Implements the Shake-Shake architecture. - - From <https://arxiv.org/pdf/1705.07485.pdf> - This is intended to match the CIFAR-10 version, and correspond to - "Shake-Shake-Batch" in Table 1. - ''' - - def model_fn_body(self, features): - hparams = self._hparams - print(hparams.learning_rate) - - inputs = features["inputs"] - assert (hparams.num_hidden_layers - 2) % 6 == 0 - blocks_per_stage = (hparams.num_hidden_layers - 2) // 6 - - # For canonical Shake-Shake, the entry flow is a 3x3 convolution with 16 - # filters then a batch norm. Instead we will rely on the one in - # SmallImageModality, which seems to instead use a layer norm. - x = inputs - mode = hparams.mode - with tf.variable_scope('shake_shake_stage_1'): - x = shake_shake_stage(x, blocks_per_stage, hparams.base_filters, 1, - hparams) - with tf.variable_scope('shake_shake_stage_2'): - x = shake_shake_stage(x, blocks_per_stage, hparams.base_filters * 2, 2, - hparams) - with tf.variable_scope('shake_shake_stage_3'): - x = shake_shake_stage(x, blocks_per_stage, hparams.base_filters * 4, 2, - hparams) - - # For canonical Shake-Shake, we should perform 8x8 average pooling and then - # have a fully-connected layer (which produces the logits for each class). - # Instead, we rely on the Xception exit flow in ClassLabelModality. - # - # Also, this model_fn does not return an extra_loss. However, TensorBoard - # reports an exponential moving average for extra_loss, where the initial - # value for the moving average may be a large number, so extra_loss will - # look large at the beginning of training. - return x - - -@registry.register_hparams -def shakeshake_cifar10(): - hparams = common_hparams.basic_params1() - # This leads to effective batch size 128 when number of GPUs is 1 - hparams.batch_size = 4096 * 8 - hparams.hidden_size = 16 - hparams.dropout = 0 - hparams.label_smoothing = 0.0 - hparams.clip_grad_norm = 2.0 - hparams.num_hidden_layers = 26 - hparams.kernel_height = -1 # Unused - hparams.kernel_width = -1 # Unused - hparams.learning_rate_decay_scheme = "cosine" - # Model should be run for 700000 steps with batch size 128 (~1800 epochs) - hparams.learning_rate_cosine_cycle_steps = 700000 - hparams.learning_rate = 0.2 - hparams.learning_rate_warmup_steps = 3000 - hparams.initializer = "uniform_unit_scaling" - hparams.initializer_gain = 1.0 - # TODO(rshin): Adjust so that effective value becomes ~1e-4 - hparams.weight_decay = 3.0 - hparams.optimizer = "Momentum" - hparams.optimizer_momentum_momentum = 0.9 - hparams.add_hparam('base_filters', 16) - hparams.add_hparam('shakeshake_type', 'batch') - return hparams diff --git a/tensor2tensor/models/slicenet.py b/tensor2tensor/models/slicenet.py index 77659e8ef..43913eab1 100644 --- a/tensor2tensor/models/slicenet.py +++ b/tensor2tensor/models/slicenet.py @@ -1,4 +1,4 @@ -# Copyright 2017 Google Inc. +# Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tensor2tensor/models/slicenet_test.py b/tensor2tensor/models/slicenet_test.py index db563b481..911953445 100644 --- a/tensor2tensor/models/slicenet_test.py +++ b/tensor2tensor/models/slicenet_test.py @@ -1,4 +1,4 @@ -# Copyright 2017 Google Inc. +# Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tensor2tensor/models/transformer.py b/tensor2tensor/models/transformer.py old mode 100755 new mode 100644 index 73542bd5a..b9212e0f3 --- a/tensor2tensor/models/transformer.py +++ b/tensor2tensor/models/transformer.py @@ -1,4 +1,4 @@ -# Copyright 2017 Google Inc. +# Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -353,15 +353,6 @@ def transformer_parsing_base(): return hparams -@registry.register_hparams -def transformer_parsing_ice(): - """Hparams for parsing Icelandic text.""" - hparams = transformer_base_single_gpu() - hparams.batch_size = 4096 - hparams.shared_embedding_and_softmax_weights = int(False) - return hparams - - @registry.register_hparams def transformer_parsing_big(): """HParams for parsing on wsj semi-supervised.""" diff --git a/tensor2tensor/models/transformer_alternative.py b/tensor2tensor/models/transformer_alternative.py index e50cba86f..aed074d56 100644 --- a/tensor2tensor/models/transformer_alternative.py +++ b/tensor2tensor/models/transformer_alternative.py @@ -1,4 +1,4 @@ -# Copyright 2017 Google Inc. +# Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tensor2tensor/models/transformer_test.py b/tensor2tensor/models/transformer_test.py index 9535558a4..ca099c653 100644 --- a/tensor2tensor/models/transformer_test.py +++ b/tensor2tensor/models/transformer_test.py @@ -1,4 +1,4 @@ -# Copyright 2017 Google Inc. +# Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tensor2tensor/models/xception.py b/tensor2tensor/models/xception.py index d28a1628e..d3c5a2690 100644 --- a/tensor2tensor/models/xception.py +++ b/tensor2tensor/models/xception.py @@ -1,4 +1,4 @@ -# Copyright 2017 Google Inc. +# Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tensor2tensor/models/xception_test.py b/tensor2tensor/models/xception_test.py index cd158b852..aa5c1c034 100644 --- a/tensor2tensor/models/xception_test.py +++ b/tensor2tensor/models/xception_test.py @@ -1,4 +1,4 @@ -# Copyright 2017 Google Inc. +# Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tensor2tensor/utils/__init__.py b/tensor2tensor/utils/__init__.py index 27d533abc..eff6a2b14 100644 --- a/tensor2tensor/utils/__init__.py +++ b/tensor2tensor/utils/__init__.py @@ -1,4 +1,4 @@ -# Copyright 2017 Google Inc. +# Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tensor2tensor/utils/avg_checkpoints.py b/tensor2tensor/utils/avg_checkpoints.py index 01850aeae..a84750310 100644 --- a/tensor2tensor/utils/avg_checkpoints.py +++ b/tensor2tensor/utils/avg_checkpoints.py @@ -1,4 +1,4 @@ -# Copyright 2017 Google Inc. +# Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tensor2tensor/utils/beam_search.py b/tensor2tensor/utils/beam_search.py index eacbf467f..3a511907d 100644 --- a/tensor2tensor/utils/beam_search.py +++ b/tensor2tensor/utils/beam_search.py @@ -1,4 +1,4 @@ -# Copyright 2017 Google Inc. +# Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tensor2tensor/utils/beam_search_test.py b/tensor2tensor/utils/beam_search_test.py index 33439b41f..e084f1f0e 100644 --- a/tensor2tensor/utils/beam_search_test.py +++ b/tensor2tensor/utils/beam_search_test.py @@ -1,4 +1,4 @@ -# Copyright 2017 Google Inc. +# Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tensor2tensor/utils/bleu_hook.py b/tensor2tensor/utils/bleu_hook.py index 012215cff..155b10c72 100644 --- a/tensor2tensor/utils/bleu_hook.py +++ b/tensor2tensor/utils/bleu_hook.py @@ -1,4 +1,4 @@ -# Copyright 2017 Google Inc. +# Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tensor2tensor/utils/bleu_hook_test.py b/tensor2tensor/utils/bleu_hook_test.py index 1838affd6..8092ab979 100644 --- a/tensor2tensor/utils/bleu_hook_test.py +++ b/tensor2tensor/utils/bleu_hook_test.py @@ -1,4 +1,4 @@ -# Copyright 2017 Google Inc. +# Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tensor2tensor/utils/data_reader.py b/tensor2tensor/utils/data_reader.py index 7b0663cf8..08571f353 100644 --- a/tensor2tensor/utils/data_reader.py +++ b/tensor2tensor/utils/data_reader.py @@ -1,4 +1,4 @@ -# Copyright 2017 Google Inc. +# Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tensor2tensor/utils/data_reader_test.py b/tensor2tensor/utils/data_reader_test.py index 7386d3ea0..18507ed06 100644 --- a/tensor2tensor/utils/data_reader_test.py +++ b/tensor2tensor/utils/data_reader_test.py @@ -1,4 +1,4 @@ -# Copyright 2017 Google Inc. +# Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tensor2tensor/utils/expert_utils.py b/tensor2tensor/utils/expert_utils.py index 0bd69599d..c3becbfb4 100644 --- a/tensor2tensor/utils/expert_utils.py +++ b/tensor2tensor/utils/expert_utils.py @@ -1,4 +1,4 @@ -# Copyright 2017 Google Inc. +# Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tensor2tensor/utils/metrics.py b/tensor2tensor/utils/metrics.py index 97da4cd35..cf66f6af8 100644 --- a/tensor2tensor/utils/metrics.py +++ b/tensor2tensor/utils/metrics.py @@ -1,4 +1,4 @@ -# Copyright 2017 Google Inc. +# Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tensor2tensor/utils/metrics_test.py b/tensor2tensor/utils/metrics_test.py index 0472d4f21..de72d797f 100644 --- a/tensor2tensor/utils/metrics_test.py +++ b/tensor2tensor/utils/metrics_test.py @@ -1,4 +1,4 @@ -# Copyright 2017 Google Inc. +# Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tensor2tensor/utils/modality.py b/tensor2tensor/utils/modality.py index 856c1a97f..3ac6153b7 100644 --- a/tensor2tensor/utils/modality.py +++ b/tensor2tensor/utils/modality.py @@ -1,4 +1,4 @@ -# Copyright 2017 Google Inc. +# Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tensor2tensor/utils/registry.py b/tensor2tensor/utils/registry.py index 6c04cf22d..104c6db76 100644 --- a/tensor2tensor/utils/registry.py +++ b/tensor2tensor/utils/registry.py @@ -1,4 +1,4 @@ -# Copyright 2017 Google Inc. +# Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tensor2tensor/utils/registry_test.py b/tensor2tensor/utils/registry_test.py index 84903b141..1f4436b0c 100644 --- a/tensor2tensor/utils/registry_test.py +++ b/tensor2tensor/utils/registry_test.py @@ -1,4 +1,4 @@ -# Copyright 2017 Google Inc. +# Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tensor2tensor/utils/t2t_model.py b/tensor2tensor/utils/t2t_model.py index a991d3614..c1365c7a9 100644 --- a/tensor2tensor/utils/t2t_model.py +++ b/tensor2tensor/utils/t2t_model.py @@ -1,4 +1,4 @@ -# Copyright 2017 Google Inc. +# Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tensor2tensor/utils/trainer_utils.py b/tensor2tensor/utils/trainer_utils.py old mode 100755 new mode 100644 index 6b9f66c92..57fe370a2 --- a/tensor2tensor/utils/trainer_utils.py +++ b/tensor2tensor/utils/trainer_utils.py @@ -1,4 +1,4 @@ -# Copyright 2017 Google Inc. +# Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -30,19 +30,19 @@ # pylint: disable=redefined-builtin from six.moves import input from six.moves import xrange +from six.moves import zip # pylint: enable=redefined-builtin from tensor2tensor.data_generators import problem_hparams -from tensor2tensor.data_generators.text_encoder import EOS_TOKEN from tensor2tensor.models import models # pylint: disable=unused-import from tensor2tensor.utils import data_reader from tensor2tensor.utils import expert_utils as eu from tensor2tensor.utils import metrics from tensor2tensor.utils import registry + import tensorflow as tf from tensorflow.contrib.learn.python.learn import learn_runner from tensorflow.python.ops import init_ops -from tensor2tensor.utils.yellowfin import YellowFinOptimizer # Number of samples to draw for an image input (in such cases as captioning) IMAGE_DECODE_LENGTH = 100 @@ -120,16 +120,6 @@ "<beam1>\t<beam2>..\t<input>") -def _save_until_eos(hyp): - """ Strips everything after the first <EOS> token, which is normally 1 """ - try: - index = list(hyp).index(EOS_TOKEN) - return hyp[0:index] - except ValueError: - # No EOS_TOKEN: return the array as-is - return hyp - - def make_experiment_fn(data_dir, model_name, train_steps, eval_steps): """Returns experiment_fn for learn_runner. Wraps create_experiment.""" @@ -289,6 +279,7 @@ def session_config(): rewrite_options=rewrite_options, infer_shapes=True) config = tf.ConfigProto( allow_soft_placement=True, graph_options=graph_options) + return config @@ -329,9 +320,6 @@ def learning_rate_decay(): (step + 1) * warmup_steps**-1.5, (step + 1)**-0.5) elif hparams.learning_rate_decay_scheme == "exp100k": return 0.94**(step // 100000) - elif hparams.learning_rate_decay_scheme == "cosine": - cycle_steps = hparams.learning_rate_cosine_cycle_steps - return 0.5 * (1 + tf.cos(np.pi * (step % cycle_steps) / cycle_steps)) inv_base = tf.exp(tf.log(0.01) / warmup_steps) inv_decay = inv_base**(warmup_steps - step) @@ -357,7 +345,6 @@ def learning_rate_decay(): lambda: decay, name="learning_rate_decay_warump_cond") - def model_fn(features, targets, mode): """Creates the prediction, loss, and train ops. @@ -369,11 +356,10 @@ def model_fn(features, targets, mode): Returns: A tuple consisting of the prediction, loss, and train_op. """ - if mode == tf.contrib.learn.ModeKeys.INFER: - if FLAGS.decode_interactive: - features = _interactive_input_tensor_to_features_dict(features, hparams) - elif FLAGS.decode_from_file: - features = _decode_input_tensor_to_features_dict(features, hparams) + if mode == tf.contrib.learn.ModeKeys.INFER and FLAGS.decode_interactive: + features = _interactive_input_tensor_to_features_dict(features, hparams) + if mode == tf.contrib.learn.ModeKeys.INFER and FLAGS.decode_from_file: + features = _decode_input_tensor_to_features_dict(features, hparams) # A dictionary containing: # - problem_choice: A Tensor containing an integer indicating which problem # was selected for this run. @@ -593,14 +579,12 @@ def log_fn(inputs, "%s_prediction_%d.jpg" % (problem, j)) show_and_save_image(inputs / 255., save_path) elif inputs_vocab: - decoded_inputs = inputs_vocab.decode(_save_until_eos(inputs.flatten())) + decoded_inputs = inputs_vocab.decode(inputs.flatten()) tf.logging.info("Inference results INPUT: %s" % decoded_inputs) - decoded_outputs = targets_vocab.decode(_save_until_eos(outputs.flatten())) + decoded_outputs = targets_vocab.decode(outputs.flatten()) + decoded_targets = targets_vocab.decode(targets.flatten()) tf.logging.info("Inference results OUTPUT: %s" % decoded_outputs) - decoded_targets = targets_vocab.decode(_save_until_eos(targets.flatten())) - tf.logging.info("Inference results TARGET: %s" % decoded_targets) - if FLAGS.decode_to_file: output_filepath = FLAGS.decode_to_file + ".outputs." + problem output_file = tf.gfile.Open(output_filepath, "a") @@ -615,16 +599,27 @@ def log_fn(inputs, # iterator to log inputs and decodes. if FLAGS.decode_endless: tf.logging.info("Warning: Decoding endlessly") - for j, result in enumerate(result_iter): - inputs, targets, outputs = (result["inputs"], result["targets"], - result["outputs"]) - if FLAGS.decode_return_beams: - output_beams = np.split(outputs, FLAGS.decode_beam_size, axis=0) - for k, beam in enumerate(output_beams): - tf.logging.info("BEAM %d:" % k) - log_fn(inputs, targets, beam, problem, j) - else: - log_fn(inputs, targets, outputs, problem, j) + for j, result in enumerate(result_iter): + inputs, targets, outputs = (result["inputs"], result["targets"], + result["outputs"]) + if FLAGS.decode_return_beams: + output_beams = np.split(outputs, FLAGS.decode_beam_size, axis=0) + for k, beam in enumerate(output_beams): + tf.logging.info("BEAM %d:" % k) + log_fn(inputs, targets, beam, problem, j) + else: + log_fn(inputs, targets, outputs, problem, j) + else: + for j, (inputs, targets, outputs) in enumerate( + zip(result_iter["inputs"], result_iter["targets"], result_iter[ + "outputs"])): + if FLAGS.decode_return_beams: + output_beams = np.split(outputs, FLAGS.decode_beam_size, axis=0) + for k, beam in enumerate(output_beams): + tf.logging.info("BEAM %d:" % k) + log_fn(inputs, targets, beam, problem, j) + else: + log_fn(inputs, targets, outputs, problem, j) def decode_from_file(estimator, filename): @@ -633,12 +628,22 @@ def decode_from_file(estimator, filename): problem_id = FLAGS.decode_problem_id inputs_vocab = hparams.problems[problem_id].vocabulary["inputs"] targets_vocab = hparams.problems[problem_id].vocabulary["targets"] - tf.logging.info("Performing decoding from a file.") + tf.logging.info("Performing Decoding from a file.") sorted_inputs, sorted_keys = _get_sorted_inputs(filename) num_decode_batches = (len(sorted_inputs) - 1) // FLAGS.decode_batch_size + 1 input_fn = _decode_batch_input_fn(problem_id, num_decode_batches, sorted_inputs, inputs_vocab) + # strips everything after the first <EOS> id, which is assumed to be 1 + def _save_until_eos(hyp): # pylint: disable=missing-docstring + ret = [] + index = 0 + # until you reach <EOS> id + while index < len(hyp) and hyp[index] != 1: + ret.append(hyp[index]) + index += 1 + return np.array(ret) + decodes = [] for _ in range(num_decode_batches): result_iter = estimator.predict( @@ -650,7 +655,8 @@ def log_fn(inputs, outputs): decoded_inputs = inputs_vocab.decode(_save_until_eos(inputs.flatten())) tf.logging.info("Inference results INPUT: %s" % decoded_inputs) - decoded_outputs = targets_vocab.decode(_save_until_eos(outputs.flatten())) + decoded_outputs = targets_vocab.decode( + _save_until_eos(outputs.flatten())) tf.logging.info("Inference results OUTPUT: %s" % decoded_outputs) return decoded_outputs @@ -661,7 +667,7 @@ def log_fn(inputs, outputs): for k, beam in enumerate(output_beams): tf.logging.info("BEAM %d:" % k) beam_decodes.append(log_fn(result["inputs"], beam)) - decodes.append("\t".join(beam_decodes)) + decodes.append(str.join("\t", beam_decodes)) else: decodes.append(log_fn(result["inputs"], result["outputs"])) @@ -703,11 +709,11 @@ def decode_interactively(estimator): tf.logging.info("BEAM %d:" % k) if scores is not None: tf.logging.info("%s\tScore:%f" % - (targets_vocab.decode(_save_until_eos(beam.flatten())), scores[k])) + (targets_vocab.decode(beam.flatten()), scores[k])) else: - tf.logging.info(targets_vocab.decode(_save_until_eos(beam.flatten()))) + tf.logging.info(targets_vocab.decode(beam.flatten())) else: - tf.logging.info(targets_vocab.decode(_save_until_eos(result["outputs"].flatten()))) + tf.logging.info(targets_vocab.decode(result["outputs"].flatten())) def _decode_batch_input_fn(problem_id, num_decode_batches, sorted_inputs, @@ -720,10 +726,10 @@ def _decode_batch_input_fn(problem_id, num_decode_batches, sorted_inputs, tf.logging.info("Deocding batch %d" % b) batch_length = 0 batch_inputs = [] - for inputs in sorted_inputs[b * FLAGS.decode_batch_size: - (b + 1) * FLAGS.decode_batch_size]: + for inputs in sorted_inputs[b * FLAGS.decode_batch_size:( + b + 1) * FLAGS.decode_batch_size]: input_ids = vocabulary.encode(inputs) - input_ids.append(EOS_TOKEN) + input_ids.append(1) # Assuming EOS=1. batch_inputs.append(input_ids) if len(input_ids) > batch_length: batch_length = len(input_ids) @@ -816,7 +822,7 @@ def _interactive_input_fn(hparams): if input_type == "text": input_ids = vocabulary.encode(input_string) if has_input: - input_ids.append(EOS_TOKEN) + input_ids.append(1) # assume 1 means end-of-source x = [num_samples, decode_length, len(input_ids)] + input_ids assert len(x) < const_array_size x += [0] * (const_array_size - len(x)) @@ -1083,7 +1089,7 @@ def input_fn(): problem_choice = tf.to_int32(FLAGS.worker_id % problem_count) else: raise ValueError("Value of hparams.problem_choice is %s and must be " - "one of [uniform, adaptive, distributed]" % + "one of [uniform, adaptive, distributed]", hparams.problem_choice) # Inputs and targets conditional on problem_choice. @@ -1135,10 +1141,6 @@ def __init__(self, optimizer_name, lr, hparams): elif optimizer_name == "Momentum": self._opt = tf.train.MomentumOptimizer( lr, momentum=hparams.optimizer_momentum_momentum) - elif optimizer_name == "YellowFin": - tf.logging.info("Init YellowFin Optimizer.") - self._opt = YellowFinOptimizer( - learning_rate=lr, momentum=hparams.optimizer_momentum_momentum) else: self._opt = tf.contrib.layers.OPTIMIZER_CLS_NAMES[optimizer_name](lr) diff --git a/tensor2tensor/utils/trainer_utils_test.py b/tensor2tensor/utils/trainer_utils_test.py index d621b6fbc..543a0547e 100644 --- a/tensor2tensor/utils/trainer_utils_test.py +++ b/tensor2tensor/utils/trainer_utils_test.py @@ -1,4 +1,4 @@ -# Copyright 2017 Google Inc. +# Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tensor2tensor/utils/yellowfin.py b/tensor2tensor/utils/yellowfin.py deleted file mode 100644 index b5cedf21b..000000000 --- a/tensor2tensor/utils/yellowfin.py +++ /dev/null @@ -1,568 +0,0 @@ -# MIT License -# -# Copyright (c) 2017 JianGoForIt -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in all -# copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. - -"""YellowFin for TensorFlow.""" -from __future__ import print_function - -import numpy as np -from math import ceil, floor -import tensorflow as tf -from tensorflow.python.training import momentum -from tensorflow.python.ops import variable_scope -from tensorflow.python.ops import variables -from tensorflow.python.ops import state_ops -from tensorflow.python.framework import ops - - -# Values for gate_gradients. -GATE_NONE = 0 -GATE_OP = 1 -GATE_GRAPH = 2 - - -class YellowFinOptimizer(tf.train.Optimizer): - """Optimizer that implements the YellowFin algorithm. - See [Zhang et. al., 2017](https://arxiv.org/abs/1706.03471) - ([pdf](https://arxiv.org/pdf/1706.03471.pdf)). - """ - - def __init__(self, - learning_rate=1.0, - momentum=0.0, - clip_thresh=None, - beta=0.999, - curvature_window_width=20, - zero_debias=True, - delta_mu=0.0): - """Construct a new YellowFin optimizer. - - Args: - learning_rate: A Tensor or a floating point value. The learning rate. - momentum: A Tensor or a floating point value. The momentum. - clip_thresh: A Tensor or a floating point value. The cliping threshold for - tf.clip_by_global_norm. If None, no clipping will be carried out. - beta: A float value or a constant float tensor. The smoothing parameter - for estimations. - curvature_window_width: A int value or a constant int tensor. - The curvature window width. - zero_debias: A boolean, zero debias moving-averages. - delta_mu: For extensions. Not necessary in the basic use. - - Note: - clip_thresh is the threshold value on ||lr * gradient||, - delta_mu can be place holder/variable/tensor scalar. - They are used for additional momentum in situations such as - asynchronous-parallel training. - The default is 0.0(or None) for basic usage of the optimizer. - - Other features: - If you want to manually control the learning rates, self.lr_factor is - an interface to the outside, it is an multiplier for the internal - learning rate in YellowFin. It is helpful when you want to do additional - hand tuning or some decaying scheme to the tuned learning rate in - YellowFin. - Example on using lr_factor can be found here: - https://github.com/JianGoForIt/YellowFin/blob/master/char-rnn-tensorflow/train_YF.py#L140 - """ - # Set lr and mu - self._lr = learning_rate - self._mu = momentum - - # Set lr and mu tensor - self._lr_var = tf.Variable(learning_rate, - dtype=tf.float32, - name="YF_lr", - trainable=False) - self._mu_var = tf.Variable(momentum, - dtype=tf.float32, - name="YF_mu", - trainable=False) - - # Tuning factor for learning rates step or decaying scheme - self.lr_factor = tf.Variable(1.0, - dtype=tf.float32, - name="YF_lr_factor", - trainable=False) - - # Gradient Clipping Threshold - if clip_thresh is not None: - self._clip_thresh_var = tf.Variable(clip_thresh, - dtype=tf.float32, - name="YF_clip_thresh", - trainable=False) - else: - self._clip_thresh_var = None - - # Set initial lr and mu for momentum - self._lr_m = self._lr_var * self.lr_factor - self._mu_m = self._mu_var + delta_mu - - # Init momentum optimizer - self._momentum_optimizer = \ - tf.train.MomentumOptimizer(self._lr_m, self._mu_m) - - # Moving average for statistics - self._beta = beta - self._moving_averager = None - - # Step counting - self._step = tf.Variable(0, - dtype=tf.int32, - name="YF_step", - trainable=False) - # YF_step + 1 op - self._increment_step_op = None - - # For conditional tuning - self._do_tune = tf.greater(self._step, tf.constant(0)) - - # Moving-averages - self._zero_debias = zero_debias - - # For curvature range - self.curvature_window_width = curvature_window_width - self._curv_win = None - - # Gradients and Variables - self._grad = None - self._vars = None - - # Get per var g**2, norm**2 and mean(norm**2) - self._grad_squared = None - self._grad_norm_squared = None - self._grad_norm_squared_avg = None - - # Mean(grad) and Mean(grad**2) to compute Variance - self._grad_avg = None - self._grad_avg_squared = None - - # Max and Min curvature variations - self._h_max_t = None - self._h_min_t = None - self._h_min = None - self._h_max = None - - # Gradient Expected Variance - self._grad_var = None - - # Gradient Norm and Mean(Gradient Norm) - self._grad_norm = None - self._grad_norm_avg = None - - # Distance to optimum and Mean(Distance to optimum) - self._d_t = None - self._dist_to_opt_avg = None - - # Maintains moving averages of variables - # by employing an exponential decay(Beta), - # and (zero_devias) moving-averages. - self._moving_averager = None - - - def _curvature_range(self): - """Curvature range - - Returns: - h_max_t, h_min_t ops - """ - self._curv_win = \ - tf.Variable(np.zeros([self.curvature_window_width, ]), - dtype=tf.float32, - name="curv_win", - trainable=False) - - self._curv_win = \ - tf.scatter_update(self._curv_win, - self._step % self.curvature_window_width, - self._grad_norm_squared) - # Note here the iterations start from iteration 0 - valid_window = tf.slice(self._curv_win, - tf.constant([0, ]), - tf.expand_dims( - tf.minimum( - tf.constant(self.curvature_window_width), - self._step + 1), - dim=0)) - self._h_min_t = tf.reduce_min(valid_window) - self._h_max_t = tf.reduce_max(valid_window) - - curv_range_ops = [] - with tf.control_dependencies([self._h_min_t, self._h_max_t]): - avg_op = self._moving_averager.apply([self._h_min_t, self._h_max_t]) - with tf.control_dependencies([avg_op]): - self._h_min = tf.identity(self._moving_averager.average(self._h_min_t)) - self._h_max = tf.identity(self._moving_averager.average(self._h_max_t)) - curv_range_ops.append(avg_op) - return curv_range_ops # h_max_t, h_min_t - - - def _grad_variance(self): - """Estimate of gradient Variance - - Returns: - C_t ops - """ - grad_var_ops = [] - tensor_to_avg = [] - for t, g in zip(self._vars, self._grad): - if isinstance(g, ops.IndexedSlices): - tensor_to_avg.append( \ - tf.reshape(tf.unsorted_segment_sum(g.values, - g.indices, - g.dense_shape[0]), - shape=t.get_shape())) - else: - tensor_to_avg.append(g) - avg_op = self._moving_averager.apply(tensor_to_avg) - grad_var_ops.append(avg_op) - with tf.control_dependencies([avg_op]): - self._grad_avg = [self._moving_averager.average(val) - for val in tensor_to_avg] - self._grad_avg_squared = [tf.square(val) for val in self._grad_avg] - self._grad_avg_squared = tf.add_n([tf.reduce_sum(val) - for val in self._grad_avg_squared]) - # Compute Variance - self._grad_var = self._grad_norm_squared_avg - self._grad_avg_squared - return grad_var_ops # C_t - - - def _dist_to_opt(self): - """Distance to optimum - - Returns: - D_t ops - """ - dist_to_opt_ops = [] - # Running average of the norm of gradeint - self._grad_norm = tf.sqrt(self._grad_norm_squared) - avg_op = self._moving_averager.apply([self._grad_norm, ]) - dist_to_opt_ops.append(avg_op) - with tf.control_dependencies([avg_op]): - self._grad_norm_avg = self._moving_averager.average(self._grad_norm) - # Single iteration distance estimation, note here - # self._grad_norm_avg is per variable - self._d_t = self._grad_norm_avg / self._grad_norm_squared_avg - # Running average of distance - avg_op = self._moving_averager.apply([self._d_t]) - dist_to_opt_ops.append(avg_op) - with tf.control_dependencies([avg_op]): - self._dist_to_opt_avg = \ - tf.identity(self._moving_averager.average(self._d_t)) - return dist_to_opt_ops # D_t - - - def _prepare_variables(self): - """Prepare Variables for YellowFin - - Returns: - Grad**2, Norm, Norm**2, Mean(Norm**2) ops - """ - self._moving_averager = \ - tf.train.ExponentialMovingAverage(decay=self._beta, - zero_debias=self._zero_debias) - assert self._grad != None and len(self._grad) > 0 - # List for the returned Operations - prepare_variables_op = [] - - # Get per var g**2 and norm**2 - self._grad_squared = [] - self._grad_norm_squared = [] - - # Gradient squared - for v, g in zip(self._vars, self._grad): - if g is None: continue - with ops.colocate_with(v): - self._grad_squared.append(tf.square(g)) - - # Norm squared - self._grad_norm_squared = [tf.reduce_sum(g_sq) \ - for g_sq in self._grad_squared] - - # The following running average on squared norm of gradient - # is shared by grad_var and dist_to_opt - avg_op = self._moving_averager.apply(self._grad_norm_squared) - - with tf.control_dependencies([avg_op]): - self._grad_norm_squared_avg = \ - [self._moving_averager.average(val) for val in self._grad_norm_squared] - self._grad_norm_squared = tf.add_n(self._grad_norm_squared) - self._grad_norm_squared_avg = tf.add_n(self._grad_norm_squared_avg) - - prepare_variables_op.append(avg_op) - return tf.group(*prepare_variables_op) - - - def _get_lr_tensor(self): - """Get lr minimzing the surrogate - - Returns: - lr_t - """ - lr = (1.0 - tf.sqrt(self._mu) )**2 / self._h_min - return lr - - - def _get_mu_tensor(self): - """Get the min mu which minimize the surrogate - - Returns: - mu_t - """ - const_fact = self._dist_to_opt_avg**2 * self._h_min**2 / 2 / self._grad_var - coef = tf.Variable([-1.0, 3.0, 0.0, 1.0], - dtype=tf.float32, - name="cubic_solver_coef") - coef = tf.scatter_update(coef, - tf.constant(2), - -(3 + const_fact)) - roots = tf.py_func(np.roots, - [coef], - Tout=tf.complex64, - stateful=False) - - # Filter out the correct root - root_idx = \ - tf.logical_and( - tf.logical_and( - tf.greater(tf.real(roots), tf.constant(0.0)), - tf.less(tf.real(roots), tf.constant(1.0))), - tf.less(tf.abs(tf.imag(roots)), 1e-5)) - - # In case there are two duplicated roots satisfying the above condition - root = tf.reshape(tf.gather(tf.gather(roots, tf.where(root_idx)), - tf.constant(0)), - shape=[]) - - # Never Evaluated - #tf.assert_equal(tf.size(root), tf.constant(1)) - - dr = self._h_max / self._h_min - mu = tf.maximum(tf.real(root)**2, ((tf.sqrt(dr) - 1)/(tf.sqrt(dr) + 1))**2) - return mu - - - def _yellowfin(self): - """YellowFin auto-tuning optimizer based on momentum SGD - - Returns: - YF ops - (Curvature range, - Grad_variance, - Dist_to_opt, - Single-Step, - Auto-Tuning) - """ - # List for the returned Operations - yellowfin_ops = [] - - # Curvature range ops - curv_range_ops = self._curvature_range() - yellowfin_ops += curv_range_ops - # Estimate of gradient Variance ops - grad_var_ops = self._grad_variance() - yellowfin_ops += grad_var_ops - # Distance to optimum ops - dist_to_opt_ops = self._dist_to_opt() - yellowfin_ops += dist_to_opt_ops - - # Single-Step: minimizes the surrogate for the expected - # squared distance from the optimum of a local quadratic - # approximation after a single step while keeping all directions in the - # robust region. - self._mu = \ - tf.identity(tf.cond(self._do_tune, lambda: self._get_mu_tensor(), - lambda: self._mu_var)) - with tf.control_dependencies([self._mu]): - self._lr = \ - tf.identity(tf.cond(self._do_tune, lambda: self._get_lr_tensor(), - lambda: self._lr_var)) - - # Tune learning rate and momentum - with tf.control_dependencies([self._mu, self._lr]): - self._mu = self._beta * self._mu_var + (1 - self._beta) * self._mu - self._lr = self._beta * self._lr_var + (1 - self._beta) * self._lr - yellowfin_ops.append(tf.assign(self._mu_var, self._mu)) - yellowfin_ops.append(tf.assign(self._lr_var, self._lr)) - - yellowfin_ops = tf.group(*yellowfin_ops) - return yellowfin_ops - - - def apply_gradients(self, grads_and_vars, global_step=None, name=None): - """Applying gradients aand tune hyperparams with YellowFin - - Args: - grads_and_vars: List of (gradient, variable) pairs as returned by - compute_gradients(). - global_step: Optional Variable to increment by one after the - variables have been updated. - name: Optional name for the returned operation. Default to the - name passed to the Optimizer constructor. - - Returns: - (A group of operations) - Variable Update with Momentum ops, - YellowFin ops(Curvature, Variance, Distance) ops, - SingleStep and lr_mu tuning ops, - Step increment ops. - - """ - self._grad, self._vars = zip(*[(g, t) - for g, t in grads_and_vars if g is not None]) - - # Var Update with Momentum - with tf.variable_scope("apply_updates"): - # Gradient Clipping? - if self._clip_thresh_var is not None: - self._grads_clip, self._grads_norm = \ - tf.clip_by_global_norm(self._grad, self._clip_thresh_var) - - apply_grad_op = \ - self._momentum_optimizer.apply_gradients( \ - zip(self._grads_clip, self._vars), global_step=global_step) - else: - apply_grad_op = \ - self._momentum_optimizer.apply_gradients( \ - zip(self._grad, self._vars), global_step=global_step) - - # Begin lr and mu tuning - with tf.variable_scope("prepare_yellowFin_variables"): - prepare_variables_op = self._prepare_variables() - - with tf.variable_scope("yellowfin"): - with tf.control_dependencies([prepare_variables_op]): - yellowfin_op = self._yellowfin() - - # Update YellowFin step variable - with tf.control_dependencies([yellowfin_op]): - self._increment_step_op = state_ops.assign_add(self._step, 1).op - - # # Global_step variable Update. Commented because the update is made by self._momentum_optimizer - # if global_step is not None: - # with tf.control_dependencies([yellowfin_op]): - # with ops.colocate_with(global_step): - # global_step_op = state_ops.assign_add(global_step, 1).op - - return tf.group(apply_grad_op, - prepare_variables_op, - yellowfin_op, - self._increment_step_op) - # global_step_op) - - - def compute_gradients(self, - loss, - var_list, - global_step=None, - gate_gradients=GATE_OP, - aggregation_method=None, - colocate_gradients_with_ops=False, - name=None, - grad_loss=None): - """Compute gradients through momentum optimizer - - Args: - loss: A Tensor containing the value to minimize. - var_list: Optional list or tuple of tf.Variable to update - to minimize loss. Defaults to the list of variables collected - in the graph under the key GraphKey.TRAINABLE_VARIABLES. - global_step: Optional Variable to increment by one after the - variables have been updated. - gate_gradients: How to gate the computation of gradients. - Can be GATE_NONE, GATE_OP, or GATE_GRAPH. - aggregation_method: Specifies the method used to combine - gradient terms. Valid values are defined in the class AggregationMethod. - colocate_gradients_with_ops: If True, try colocating gradients with - the corresponding op. - name: Optional name for the returned operation. Default to the name - passed to the Optimizer constructor. - grad_loss: Optional. A Tensor holding the gradient computed for loss. - - Returns: - A list of (gradient, variable) pairs. Variable is always present, - but gradient can be None. - """ - return self._momentum_optimizer.compute_gradients( \ - loss, - var_list=var_list, - gate_gradients=gate_gradients, - aggregation_method=aggregation_method, - colocate_gradients_with_ops=colocate_gradients_with_ops, - grad_loss=grad_loss) - - - def minimize(self, - loss, - global_step=None, - var_list=None, - gate_gradients=GATE_OP, - aggregation_method=None, - colocate_gradients_with_ops=False, - name=None, - grad_loss=None): - """Adapted from Tensorflow Optimizer base class member function: - Add operations to minimize `loss` by updating `var_list`. - This method simply combines calls `compute_gradients()` and - `apply_gradients()`. If you want to process the gradient before applying - them call `tf.gradients()` and `self.apply_gradients()` explicitly instead - of using this function. - - Args: - loss: A Tensor containing the value to minimize. - global_step: Optional Variable to increment by one after the variables - have been updated. - var_list: Optional list or tuple of Variable objects to update to - minimize loss. Defaults to the list of variables collected in - the graph under the key GraphKeys.TRAINABLE_VARIABLES. - gate_gradients: How to gate the computation of gradients. - Can be GATE_NONE, GATE_OP, or GATE_GRAPH. - aggregation_method: Specifies the method used to combine gradient terms. - Valid values are defined in the class AggregationMethod. - colocate_gradients_with_ops: If True, try colocating gradients with - the corresponding op. - name: Optional name for the returned operation. - grad_loss: Optional. A Tensor holding the gradient computed for loss. - - Returns: - An Operation that updates the variables in var_list. - If global_step was not None, that operation also increments global_step. - """ - grads_and_vars = \ - self._optimizer.compute_gradients( \ - loss, - var_list=var_list, - gate_gradients=gate_gradients, - aggregation_method=aggregation_method, - colocate_gradients_with_ops=colocate_gradients_with_ops, - grad_loss=grad_loss) - - vars_with_grad = [v for g, v in grads_and_vars if g is not None] - if not vars_with_grad: - raise ValueError( - "No gradients provided for any variable, check your graph for ops" - " that do not support gradients, between variables %s and loss %s." % - ([str(v) for _, v in grads_and_vars], loss)) - for g, v in grads_and_vars: - print("g ", g) - print("v ", v) - - return self.apply_gradients(grads_and_vars, global_step=global_step) diff --git a/tensor2tensor/utils/yellowfin_test.py b/tensor2tensor/utils/yellowfin_test.py deleted file mode 100644 index c4a318990..000000000 --- a/tensor2tensor/utils/yellowfin_test.py +++ /dev/null @@ -1,226 +0,0 @@ -# MIT License -# -# Copyright (c) 2017 JianGoForIt -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in all -# copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. - -"""YellowFin Test Module for TensorFlow.""" - -#import os -# os.environ['TF_CPP_MIN_LOG_LEVEL']='2' -import tensorflow as tf -import numpy as np -from tensor2tensor.utils.yellowfin import YellowFinOptimizer -from tensorflow.python.ops import variables -import time - - -n_dim = 1000000 -n_iter = 50 - -class TrainerUtilsTest(tf.test.TestCase): - - def tuneEverything(self, x0squared, C, T, gmin, gmax): - # First tune based on dynamic range - if C == 0: - dr = gmax / gmin - mustar = ((np.sqrt(dr) - 1) / (np.sqrt(dr) + 1))**2 - alpha_star = (1 + np.sqrt(mustar))**2/gmax - - return alpha_star, mustar - - dist_to_opt = x0squared - grad_var = C - max_curv = gmax - min_curv = gmin - const_fact = dist_to_opt * min_curv**2 / 2 / grad_var - coef = [-1, 3, -(3 + const_fact), 1] - roots = np.roots(coef) - roots = roots[np.real(roots) > 0] - roots = roots[np.real(roots) < 1] - root = roots[np.argmin(np.imag(roots))] - - assert root > 0 and root < 1 and np.absolute(root.imag) < 1e-6 - - dr = max_curv / min_curv - assert max_curv >= min_curv - mu = max(((np.sqrt(dr) - 1) / (np.sqrt(dr) + 1))**2, root**2) - - lr_min = (1 - np.sqrt(mu))**2 / min_curv - lr_max = (1 + np.sqrt(mu))**2 / max_curv - - alpha_star = lr_min - mustar = mu - - return alpha_star, mustar - - - def testMeasurement(self): - opt = YellowFinOptimizer(zero_debias=False) - w = tf.Variable(np.ones([n_dim, ]), - dtype=tf.float32, - name="w", - trainable=True) - b = tf.Variable(np.ones([1, ], dtype=np.float32), - dtype=tf.float32, - name="b", - trainable=True) - x = tf.constant(np.ones([n_dim,], dtype=np.float32), - dtype=tf.float32) - loss = tf.multiply(w, x) + b - tvars = tf.trainable_variables() - - w_grad_val = tf.placeholder(tf.float32, shape=(n_dim, )) - b_grad_val = tf.placeholder(tf.float32, shape=(1, )) - apply_op = opt.apply_gradients(zip([w_grad_val, b_grad_val], tvars)) - - init_op = tf.global_variables_initializer() - with tf.Session() as sess: - sess.run(init_op) - target_h_max = 0.0 - target_h_min = 0.0 - g_norm_squared_avg = 0.0 - g_norm_avg = 0.0 - g_avg = 0.0 - target_dist = 0.0 - for i in range(n_iter): - feed_dict = {w_grad_val: (i + 1) * np.ones([n_dim, ], dtype=np.float32), - b_grad_val: (i + 1) * np.ones([1, ], dtype=np.float32)} - res = sess.run([opt._curv_win, - opt._h_max, - opt._h_min, - opt._grad_var, - opt._dist_to_opt_avg, - apply_op], feed_dict=feed_dict) - - g_norm_squared_avg = 0.999 * g_norm_squared_avg \ - + 0.001 * np.sum(((i + 1) * np.ones([n_dim + 1, ]))**2) - g_norm_avg = 0.999 * g_norm_avg \ - + 0.001 * np.linalg.norm((i + 1)*np.ones([n_dim + 1, ])) - g_avg = 0.999 * g_avg + 0.001 * (i + 1) - - target_h_max = 0.999 * target_h_max + 0.001 * (i + 1)**2*(n_dim + 1) - target_h_min = 0.999 * target_h_min + \ - 0.001 * max(1, i + 2 - 20)**2 * (n_dim + 1) - target_var = g_norm_squared_avg - g_avg**2 * (n_dim + 1) - target_dist = 0.999 * target_dist + \ - 0.001 * g_norm_avg / g_norm_squared_avg - - # print "iter ", i, " h max ", res[1], target_h_max, " h min ", res[2], target_h_min, \ - # " var ", res[3], target_var, " dist ", res[4], target_dist - assert np.abs(target_h_max - res[1]) < np.abs(target_h_max) * 1e-3 - assert np.abs(target_h_min - res[2]) < np.abs(target_h_min) * 1e-3 - assert np.abs(target_var - res[3]) < np.abs(res[3]) * 1e-3 - assert np.abs(target_dist - res[4]) < np.abs(res[4]) * 1e-3 - print "[Test-INFO] Sync measurement test passed!" - - - def testLrMu(self): - opt = YellowFinOptimizer(learning_rate=0.5, momentum=0.5, zero_debias=False) - w = tf.Variable(np.ones([n_dim, ]), - dtype=tf.float32, - name="w", - trainable=True) - b = tf.Variable(np.ones([1, ], - dtype=np.float32), - dtype=tf.float32, - name="b", - trainable=True) - x = tf.constant(np.ones([n_dim, ], - dtype=np.float32), - dtype=tf.float32) - loss = tf.multiply(w, x) + b - tvars = tf.trainable_variables() - - w_grad_val = tf.Variable(np.zeros([n_dim, ]), - dtype=tf.float32, - trainable=False) - b_grad_val = tf.Variable(np.zeros([1, ]), - dtype=tf.float32, - trainable=False) - apply_op = opt.apply_gradients(zip([w_grad_val, b_grad_val], tvars)) - - init_op = tf.global_variables_initializer() - with tf.Session() as sess: - sess.run(init_op) - target_h_max = 0.0 - target_h_min = 0.0 - g_norm_squared_avg = 0.0 - g_norm_avg = 0.0 - g_avg = 0.0 - target_dist = 0.0 - target_lr = 0.5 - target_mu = 0.5 - for i in range(n_iter): - - sess.run(tf.assign(w_grad_val, (i + 1) * np.ones([n_dim, ], - dtype=np.float32))) - sess.run(tf.assign(b_grad_val, (i + 1) * np.ones([1, ], - dtype=np.float32))) - - res = sess.run([opt._curv_win, - opt._h_max, - opt._h_min, - opt._grad_var, - opt._dist_to_opt_avg, - opt._lr_var, - opt._mu_var, - apply_op]) - - res[5] = opt._lr_var.eval() - res[6] = opt._mu_var.eval() - - g_norm_squared_avg = 0.999 * g_norm_squared_avg \ - + 0.001 * np.sum(((i + 1) * np.ones([n_dim + 1, ]))**2) - g_norm_avg = 0.999 * g_norm_avg \ - + 0.001 * np.linalg.norm((i + 1)*np.ones([n_dim + 1, ])) - g_avg = 0.999 * g_avg + 0.001 * (i + 1) - - target_h_max = 0.999 * target_h_max + 0.001 * (i + 1)**2 * (n_dim + 1) - target_h_min = 0.999 * target_h_min + \ - 0.001 * max(1, i + 2 - 20)**2 * (n_dim + 1) - target_var = g_norm_squared_avg - g_avg**2 * (n_dim + 1) - target_dist = 0.999 * target_dist + \ - 0.001 * g_norm_avg / g_norm_squared_avg - - if i > 0: - lr, mu = self.tuneEverything(target_dist**2, - target_var, - 1, - target_h_min, - target_h_max) - target_lr = 0.999 * target_lr + 0.001 * lr - target_mu = 0.999 * target_mu + 0.001 * mu - - # print "iter ", i, " h max ", res[1], target_h_max, \ - # " h min ", res[2], target_h_min, " var ", res[3], target_var, \ - # " dist ", res[4], target_dist - # print "iter ", i, " lr ", res[5], target_lr, " mu ", res[6], target_mu - - assert np.abs(target_h_max - res[1]) < np.abs(target_h_max) * 1e-3 - assert np.abs(target_h_min - res[2]) < np.abs(target_h_min) * 1e-3 - assert np.abs(target_var - res[3]) < np.abs(res[3]) * 1e-3 - assert np.abs(target_dist - res[4]) < np.abs(res[4]) * 1e-3 - assert target_lr == 0.0 or np.abs(target_lr - res[5]) < np.abs(res[5]) * 1e-3 - assert target_mu == 0.0 or np.abs(target_mu - res[6]) < np.abs(res[6]) * 5e-3 - print "[Test-INFO] lr and mu computing test passed!" - - -if __name__ == "__main__": - tf.test.main() From 56d65f0b301f1e71465553c73f24f5f7a3f8dcdc Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Wed, 12 Jul 2017 16:00:29 -0700 Subject: [PATCH 0120/4095] Working open-source distributed training PiperOrigin-RevId: 161731856 --- tensor2tensor/bin/t2t-make-tf-configs | 25 ++++++++-------- tensor2tensor/docs/distributed_training.md | 33 +++++++++++++--------- tensor2tensor/utils/trainer_utils.py | 12 ++++++-- 3 files changed, 42 insertions(+), 28 deletions(-) diff --git a/tensor2tensor/bin/t2t-make-tf-configs b/tensor2tensor/bin/t2t-make-tf-configs index 3e010b204..6a4dc8641 100644 --- a/tensor2tensor/bin/t2t-make-tf-configs +++ b/tensor2tensor/bin/t2t-make-tf-configs @@ -17,13 +17,13 @@ Usage: -`t2t-make-tf-configs --workers="server1:1234" --ps="server3:2134,server4:2334"` +`t2t-make-tf-configs --masters="server1:1234" --ps="server3:2134,server4:2334"` -Outputs 1 line per job to stdout, first the workers, then the parameter servers. +Outputs 1 line per job to stdout, first the masters, then the parameter servers. Each line has the TF_CONFIG, then a tab, then the command line flags for that job. -If there is a single worker, workers will have the `--sync` flag. +If there is a single master, it will have the `--sync` flag. """ from __future__ import absolute_import from __future__ import division @@ -38,31 +38,32 @@ import tensorflow as tf flags = tf.flags FLAGS = flags.FLAGS -flags.DEFINE_string("workers", "", "Comma-separated list of worker addresses") +flags.DEFINE_string("masters", "", "Comma-separated list of master addresses") flags.DEFINE_string("ps", "", "Comma-separated list of ps addresses") def main(_): - if not (FLAGS.workers and FLAGS.ps): - raise ValueError("Must provide --workers and --ps") + if not (FLAGS.masters and FLAGS.ps): + raise ValueError("Must provide --masters and --ps") - workers = FLAGS.workers.split(",") + masters = FLAGS.masters.split(",") ps = FLAGS.ps.split(",") - cluster = {"ps": ps, "worker": workers} + cluster = {"ps": ps, "master": masters} - for task_type, jobs in (("worker", workers), ("ps", ps)): + for task_type, jobs in (("master", masters), ("ps", ps)): for idx, job in enumerate(jobs): - if task_type == "worker": + if task_type == "master": cmd_line_flags = " ".join([ "--master=grpc://%s" % job, "--ps_replicas=%d" % len(ps), - "--worker_replicas=%d" % len(workers), + "--worker_replicas=%d" % len(masters), "--worker_gpu=1", "--worker_id=%d" % idx, + "--worker_job='/job:master'", "--ps_gpu=1", "--schedule=train", - "--sync" if len(workers) == 1 else "", + "--sync" if len(masters) == 1 else "", ]) else: cmd_line_flags = " ".join([ diff --git a/tensor2tensor/docs/distributed_training.md b/tensor2tensor/docs/distributed_training.md index f41197fc4..9ed9778da 100644 --- a/tensor2tensor/docs/distributed_training.md +++ b/tensor2tensor/docs/distributed_training.md @@ -10,11 +10,11 @@ along with a set of flags. ## `TF_CONFIG` -Both workers and parameter servers must have the `TF_CONFIG` environment +Both masters and parameter servers must have the `TF_CONFIG` environment variable set. The `TF_CONFIG` environment variable is a json-encoded string with the addresses -of the workers and parameter servers (in the `'cluster'` key) and the +of the masters and parameter servers (in the `'cluster'` key) and the identification of the current task (in the `'task'` key). For example: @@ -22,40 +22,42 @@ For example: ``` cluster = { 'ps': ['host1:2222', 'host2:2222'], - 'worker': ['host3:2222', 'host4:2222', 'host5:2222'] + 'master': ['host3:2222', 'host4:2222', 'host5:2222'] } os.environ['TF_CONFIG'] = json.dumps({ 'cluster': cluster, - 'task': {'type': 'worker', 'index': 1} + 'task': {'type': 'master', 'index': 1}, + 'environment': 'cloud', }) ``` ## Command-line flags -The following T2T command-line flags must also be set on the workers for +The following T2T command-line flags must also be set on the masters for distributed training: - `--master=grpc://$ADDRESS` -- `--worker_replicas=$NUM_WORKERS` -- `--worker_gpu=$NUM_GPUS_PER_WORKER` -- `--worker_id=$WORKER_ID` +- `--worker_replicas=$NUM_MASTERS` +- `--worker_gpu=$NUM_GPUS_PER_MASTER` +- `--worker_id=$MASTER_ID` +- `--worker_job='/job:master'` - `--ps_replicas=$NUM_PS` - `--ps_gpu=$NUM_GPUS_PER_PS` - `--schedule=train` - `--sync`, if you want synchronous training, i.e. for there to be a single - master worker coordinating the work across "ps" jobs (yes, the naming is - unfortunate). If not set, then each worker operates independently while - variables are shared on the parameter servers. + master coordinating the work across "ps" jobs. If not set, then each master + operates independently while variables are shared on the parameter servers. -Parameter servers only need `--schedule=run_std_server`. +Parameter servers only need `--master=grpc://$ADDRESS` and +`--schedule=run_std_server`. ## Utility to produce `TF_CONFIG` and flags [`t2t-make-tf-configs`](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/bin/t2t-make-tf-configs)) generates the `TF_CONFIG` json strings and the above-mentioned command-line -flags for the workers and parameter servers. +flags for the masters and parameter servers. -Given a set of worker and parameter server addresses, the script outputs, for +Given a set of master and parameter server addresses, the script outputs, for each job, a line with the `TF_CONFIG` environment variable and the command-line flags necessary for distributed training. For each job, you should invoke the `t2t-trainer` with the `TF_CONFIG` value and flags that are output. @@ -66,6 +68,9 @@ For example: TF_CONFIG=$JOB_TF_CONFIG t2t-trainer $JOB_FLAGS --model=transformer ... ``` +Modify the `--worker_gpu` and `--ps_gpu` flags, which specify how many gpus are +on each master and ps, respectively, as needed for your machine/cluster setup. + ## Command-line flags for eval jobs Eval jobs should set the following flags and do not need the `TF_CONFIG` diff --git a/tensor2tensor/utils/trainer_utils.py b/tensor2tensor/utils/trainer_utils.py index 57fe370a2..c5d8be278 100644 --- a/tensor2tensor/utils/trainer_utils.py +++ b/tensor2tensor/utils/trainer_utils.py @@ -91,6 +91,8 @@ flags.DEFINE_integer("worker_gpu", 1, "How many GPUs to use.") flags.DEFINE_integer("worker_replicas", 1, "How many workers to use.") flags.DEFINE_integer("worker_id", 0, "Which worker task are we.") +flags.DEFINE_float("worker_gpu_memory_fraction", 1., + "Fraction of GPU memory to allocate.") flags.DEFINE_integer("ps_gpu", 0, "How many GPUs to use per ps.") flags.DEFINE_string("gpu_order", "", "Optional order for daisy-chaining gpus." " e.g. \"1 3 2 4\"") @@ -177,6 +179,7 @@ def create_experiment_components(hparams, output_dir, data_dir, model_name): config=tf.contrib.learn.RunConfig( master=FLAGS.master, model_dir=output_dir, + gpu_memory_fraction=FLAGS.worker_gpu_memory_fraction, session_config=session_config(), keep_checkpoint_max=FLAGS.keep_checkpoint_max)) # Store the hparams in the estimator as well @@ -270,6 +273,7 @@ def session_config(): """The TensorFlow Session config to use.""" graph_options = tf.GraphOptions(optimizer_options=tf.OptimizerOptions( opt_level=tf.OptimizerOptions.L1, do_function_inlining=False)) + if FLAGS.experimental_optimize_placement: rewrite_options = tf.RewriterConfig(optimize_tensor_layout=True) rewrite_options.optimizers.append("pruning") @@ -277,9 +281,13 @@ def session_config(): rewrite_options.optimizers.append("layout") graph_options = tf.GraphOptions( rewrite_options=rewrite_options, infer_shapes=True) - config = tf.ConfigProto( - allow_soft_placement=True, graph_options=graph_options) + gpu_options = tf.GPUOptions( + per_process_gpu_memory_fraction=FLAGS.worker_gpu_memory_fraction) + + config = tf.ConfigProto(allow_soft_placement=True, + graph_options=graph_options, + gpu_options=gpu_options) return config From 1e1847487e1ba6b836be5c0f28fe7f3ec46f23fb Mon Sep 17 00:00:00 2001 From: Ashish Vaswani <avaswani@google.com> Date: Thu, 13 Jul 2017 15:57:21 -0700 Subject: [PATCH 0121/4095] internal. PiperOrigin-RevId: 161878428 --- tensor2tensor/models/modalities.py | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/tensor2tensor/models/modalities.py b/tensor2tensor/models/modalities.py index 3cfb44f57..60df80a1c 100644 --- a/tensor2tensor/models/modalities.py +++ b/tensor2tensor/models/modalities.py @@ -465,3 +465,31 @@ def bottom(self, x): def top(self, body_output, _): return body_output + + +@registry.register_image_modality("identity_no_pad") +class IdentityModalityNoPad(modality.Modality): + """Does nothing except making sure that there is no padding in cross-ent.""" + + @property + def targets_dimensionality(self): + return self._vocab_size + + def bottom(self, x): + return tf.to_float(x) + + def top(self, body_output, _): + return body_output + + def top_sharded(self, + sharded_body_output, + sharded_targets, + data_parallelism, + weights_fn=common_layers.weights_all): + # Call the default implementation, but weight 1.0 on 0s by default. + # (Since we're processing images and so have no padding and some pixel 0s.) + return super(IdentityModalityNoPad, self).top_sharded( + sharded_body_output, + sharded_targets, + data_parallelism, + weights_fn=weights_fn) From ca7b045e30e9b2003e350db6ed57e8b6107dfa32 Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Thu, 13 Jul 2017 17:47:51 -0700 Subject: [PATCH 0122/4095] Problem base class and registry PiperOrigin-RevId: 161892788 --- tensor2tensor/bin/t2t-datagen | 94 ++++--- tensor2tensor/data_generators/algorithmic.py | 71 ++++- tensor2tensor/data_generators/all_problems.py | 31 +++ .../data_generators/generator_utils.py | 54 ++-- tensor2tensor/data_generators/problem.py | 248 ++++++++++++++++++ .../data_generators/problem_hparams.py | 2 - tensor2tensor/data_generators/text_encoder.py | 8 +- tensor2tensor/utils/data_reader.py | 6 +- tensor2tensor/utils/registry.py | 73 +++++- tensor2tensor/utils/t2t_model.py | 3 + tensor2tensor/utils/trainer_utils.py | 14 +- tensor2tensor/utils/trainer_utils_test.py | 39 ++- 12 files changed, 546 insertions(+), 97 deletions(-) create mode 100644 tensor2tensor/data_generators/all_problems.py create mode 100644 tensor2tensor/data_generators/problem.py diff --git a/tensor2tensor/bin/t2t-datagen b/tensor2tensor/bin/t2t-datagen index cd91cac60..7a00b2877 100644 --- a/tensor2tensor/bin/t2t-datagen +++ b/tensor2tensor/bin/t2t-datagen @@ -24,6 +24,9 @@ takes 2 arguments - input_directory and mode (one of "train" or "dev") - and yields for each training example a dictionary mapping string feature names to lists of {string, int, float}. The generator will be run once for each mode. """ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function import random import tempfile @@ -34,6 +37,7 @@ import numpy as np from tensor2tensor.data_generators import algorithmic from tensor2tensor.data_generators import algorithmic_math +from tensor2tensor.data_generators import all_problems # pylint: disable=unused-import from tensor2tensor.data_generators import audio from tensor2tensor.data_generators import generator_utils from tensor2tensor.data_generators import image @@ -43,6 +47,7 @@ from tensor2tensor.data_generators import snli from tensor2tensor.data_generators import wiki from tensor2tensor.data_generators import wmt from tensor2tensor.data_generators import wsj_parsing +from tensor2tensor.utils import registry import tensorflow as tf @@ -62,12 +67,6 @@ flags.DEFINE_integer("random_seed", 429459, "Random seed to use.") # Mapping from problems that we can generate data for to their generators. # pylint: disable=g-long-lambda _SUPPORTED_PROBLEM_GENERATORS = { - "algorithmic_identity_binary40": ( - lambda: algorithmic.identity_generator(2, 40, 100000), - lambda: algorithmic.identity_generator(2, 400, 10000)), - "algorithmic_identity_decimal40": ( - lambda: algorithmic.identity_generator(10, 40, 100000), - lambda: algorithmic.identity_generator(10, 400, 10000)), "algorithmic_shift_decimal40": ( lambda: algorithmic.shift_generator(20, 10, 40, 100000), lambda: algorithmic.shift_generator(20, 10, 80, 10000)), @@ -294,8 +293,6 @@ _SUPPORTED_PROBLEM_GENERATORS = { # pylint: enable=g-long-lambda -UNSHUFFLED_SUFFIX = "-unshuffled" - def set_random_seed(): """Set the random seed from flag everywhere.""" @@ -308,13 +305,15 @@ def main(_): tf.logging.set_verbosity(tf.logging.INFO) # Calculate the list of problems to generate. - problems = list(sorted(_SUPPORTED_PROBLEM_GENERATORS)) + problems = sorted( + list(_SUPPORTED_PROBLEM_GENERATORS) + registry.list_problems()) if FLAGS.problem and FLAGS.problem[-1] == "*": problems = [p for p in problems if p.startswith(FLAGS.problem[:-1])] elif FLAGS.problem: problems = [p for p in problems if p == FLAGS.problem] else: problems = [] + # Remove TIMIT if paths are not given. if not FLAGS.timit_paths: problems = [p for p in problems if "timit" not in p] @@ -326,7 +325,8 @@ def main(_): problems = [p for p in problems if "ende_bpe" not in p] if not problems: - problems_str = "\n * ".join(sorted(_SUPPORTED_PROBLEM_GENERATORS)) + problems_str = "\n * ".join( + sorted(list(_SUPPORTED_PROBLEM_GENERATORS) + registry.list_problems())) error_msg = ("You must specify one of the supported problems to " "generate data for:\n * " + problems_str + "\n") error_msg += ("TIMIT, ende_bpe and parsing need data_sets specified with " @@ -343,40 +343,50 @@ def main(_): for problem in problems: set_random_seed() - training_gen, dev_gen = _SUPPORTED_PROBLEM_GENERATORS[problem] - - if isinstance(dev_gen, int): - # The dev set and test sets are generated as extra shards using the - # training generator. The integer specifies the number of training - # shards. FLAGS.num_shards is ignored. - num_training_shards = dev_gen - tf.logging.info("Generating data for %s.", problem) - all_output_files = generator_utils.combined_data_filenames( - problem + UNSHUFFLED_SUFFIX, FLAGS.data_dir, num_training_shards) - generator_utils.generate_files( - training_gen(), all_output_files, FLAGS.max_cases) + if problem in _SUPPORTED_PROBLEM_GENERATORS: + generate_data_for_problem(problem) else: - # usual case - train data and dev data are generated using separate - # generators. - tf.logging.info("Generating training data for %s.", problem) - train_output_files = generator_utils.train_data_filenames( - problem + UNSHUFFLED_SUFFIX, FLAGS.data_dir, FLAGS.num_shards) - generator_utils.generate_files( - training_gen(), train_output_files, FLAGS.max_cases) - tf.logging.info("Generating development data for %s.", problem) - dev_shards = 10 if "coco" in problem else 1 - dev_output_files = generator_utils.dev_data_filenames( - problem + UNSHUFFLED_SUFFIX, FLAGS.data_dir, dev_shards) - generator_utils.generate_files(dev_gen(), dev_output_files) - all_output_files = train_output_files + dev_output_files + generate_data_for_registered_problem(problem) + + +def generate_data_for_problem(problem): + """Generate data for a problem in _SUPPORTED_PROBLEM_GENERATORS.""" + training_gen, dev_gen = _SUPPORTED_PROBLEM_GENERATORS[problem] + + if isinstance(dev_gen, int): + # The dev set and test sets are generated as extra shards using the + # training generator. The integer specifies the number of training + # shards. FLAGS.num_shards is ignored. + num_training_shards = dev_gen + tf.logging.info("Generating data for %s.", problem) + all_output_files = generator_utils.combined_data_filenames( + problem + generator_utils.UNSHUFFLED_SUFFIX, FLAGS.data_dir, + num_training_shards) + generator_utils.generate_files(training_gen(), all_output_files, + FLAGS.max_cases) + else: + # usual case - train data and dev data are generated using separate + # generators. + tf.logging.info("Generating training data for %s.", problem) + train_output_files = generator_utils.train_data_filenames( + problem + generator_utils.UNSHUFFLED_SUFFIX, FLAGS.data_dir, + FLAGS.num_shards) + generator_utils.generate_files(training_gen(), train_output_files, + FLAGS.max_cases) + tf.logging.info("Generating development data for %s.", problem) + dev_shards = 10 if "coco" in problem else 1 + dev_output_files = generator_utils.dev_data_filenames( + problem + generator_utils.UNSHUFFLED_SUFFIX, FLAGS.data_dir, dev_shards) + generator_utils.generate_files(dev_gen(), dev_output_files) + all_output_files = train_output_files + dev_output_files + + tf.logging.info("Shuffling data...") + generator_utils.shuffle_dataset(all_output_files) + - tf.logging.info("Shuffling data...") - for fname in all_output_files: - records = generator_utils.read_records(fname) - random.shuffle(records) - out_fname = fname.replace(UNSHUFFLED_SUFFIX, "") - generator_utils.write_records(records, out_fname) - tf.gfile.Remove(fname) +def generate_data_for_registered_problem(problem_name): + problem = registry.problem(problem_name) + problem.generate_data(FLAGS.data_dir) if __name__ == "__main__": diff --git a/tensor2tensor/data_generators/algorithmic.py b/tensor2tensor/data_generators/algorithmic.py index 87e5873a5..f0d3faf76 100644 --- a/tensor2tensor/data_generators/algorithmic.py +++ b/tensor2tensor/data_generators/algorithmic.py @@ -23,12 +23,50 @@ from six.moves import xrange # pylint: disable=redefined-builtin +from tensor2tensor.data_generators import generator_utils as utils +from tensor2tensor.data_generators import problem +from tensor2tensor.utils import registry + + +@registry.register_problem +class AlgorithmicIdentityBinary40(problem.Problem): + """Problem spec for algorithmic binary identity task.""" + + @property + def num_symbols(self): + return 2 + + def generate_data(self, data_dir): + utils.generate_files( + identity_generator(self.num_symbols, 40, 100000), + self.training_filepaths(data_dir, 100)) + utils.generate_files( + identity_generator(self.num_symbols, 400, 10000), + self.dev_filepaths(data_dir, 1)) + + def hparams(self, defaults, unused_model_hparams): + p = defaults + vocab_size = self.num_symbols + self._encoders["inputs"].num_reserved_ids + p.input_modality = {"inputs": (registry.Modalities.SYMBOL, vocab_size)} + p.target_modality = (registry.Modalities.SYMBOL, vocab_size) + p.input_space_id = problem.SpaceID.DIGIT_0 + p.target_space_id = problem.SpaceID.DIGIT_1 + + +@registry.register_problem +class AlgorithmicIdentityDecimal40(AlgorithmicIdentityBinary40): + """Problem spec for algorithmic decimal identity task.""" + + @property + def num_symbols(self): + return 10 + def identity_generator(nbr_symbols, max_length, nbr_cases): """Generator for the identity (copy) task on sequences of symbols. The length of the sequence is drawn uniformly at random from [1, max_length] - and then symbols are drawn uniformly at random from [2, nbr_symbols] until + and then symbols are drawn uniformly at random from [2, nbr_symbols + 2) until nbr_cases sequences have been produced. Args: @@ -66,8 +104,10 @@ def shift_generator(nbr_symbols, shift, max_length, nbr_cases): for _ in xrange(nbr_cases): l = np.random.randint(max_length) + 1 inputs = [np.random.randint(nbr_symbols - shift) + 2 for _ in xrange(l)] - yield {"inputs": inputs, - "targets": [i + shift for i in inputs] + [1]} # [1] for EOS + yield { + "inputs": inputs, + "targets": [i + shift for i in inputs] + [1] + } # [1] for EOS def reverse_generator(nbr_symbols, max_length, nbr_cases): @@ -89,8 +129,10 @@ def reverse_generator(nbr_symbols, max_length, nbr_cases): for _ in xrange(nbr_cases): l = np.random.randint(max_length) + 1 inputs = [np.random.randint(nbr_symbols) + 2 for _ in xrange(l)] - yield {"inputs": inputs, - "targets": list(reversed(inputs)) + [1]} # [1] for EOS + yield { + "inputs": inputs, + "targets": list(reversed(inputs)) + [1] + } # [1] for EOS def zipf_distribution(nbr_symbols, alpha): @@ -106,7 +148,7 @@ def zipf_distribution(nbr_symbols, alpha): distr_map: list of float, Zipf's distribution over nbr_symbols. """ - tmp = np.power(np.arange(1, nbr_symbols+1), -alpha) + tmp = np.power(np.arange(1, nbr_symbols + 1), -alpha) zeta = np.r_[0.0, np.cumsum(tmp)] return [x / zeta[-1] for x in zeta] @@ -128,11 +170,14 @@ def zipf_random_sample(distr_map, sample_len): # we have made a sanity check to overcome this issue. On the other hand, # t+1 is enough from saving us to generate PAD(0) and EOS(1) which are # reservated symbols. - return [t+1 if t > 0 else t+2 for t in np.searchsorted(distr_map, u)] + return [t + 1 if t > 0 else t + 2 for t in np.searchsorted(distr_map, u)] -def reverse_generator_nlplike(nbr_symbols, max_length, nbr_cases, - scale_std_dev=100, alpha=1.5): +def reverse_generator_nlplike(nbr_symbols, + max_length, + nbr_cases, + scale_std_dev=100, + alpha=1.5): """Generator for the reversing nlp-like task on sequences of symbols. The length of the sequence is drawn from a Gaussian(Normal) distribution @@ -157,10 +202,12 @@ def reverse_generator_nlplike(nbr_symbols, max_length, nbr_cases, std_dev = max_length / scale_std_dev distr_map = zipf_distribution(nbr_symbols, alpha) for _ in xrange(nbr_cases): - l = int(abs(np.random.normal(loc=max_length/2, scale=std_dev)) + 1) + l = int(abs(np.random.normal(loc=max_length / 2, scale=std_dev)) + 1) inputs = zipf_random_sample(distr_map, l) - yield {"inputs": inputs, - "targets": list(reversed(inputs)) + [1]} # [1] for EOS + yield { + "inputs": inputs, + "targets": list(reversed(inputs)) + [1] + } # [1] for EOS def lower_endian_to_number(l, base): diff --git a/tensor2tensor/data_generators/all_problems.py b/tensor2tensor/data_generators/all_problems.py new file mode 100644 index 000000000..364c252a7 --- /dev/null +++ b/tensor2tensor/data_generators/all_problems.py @@ -0,0 +1,31 @@ +# Copyright 2017 The Tensor2Tensor Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Imports for problem modules.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +# pylint: disable=unused-import +from tensor2tensor.data_generators import algorithmic +from tensor2tensor.data_generators import algorithmic_math +from tensor2tensor.data_generators import audio +from tensor2tensor.data_generators import image +from tensor2tensor.data_generators import lm1b +from tensor2tensor.data_generators import ptb +from tensor2tensor.data_generators import snli +from tensor2tensor.data_generators import wiki +from tensor2tensor.data_generators import wmt +from tensor2tensor.data_generators import wsj_parsing +# pylint: enable=unused-import diff --git a/tensor2tensor/data_generators/generator_utils.py b/tensor2tensor/data_generators/generator_utils.py index 67da54ab9..b323d0700 100644 --- a/tensor2tensor/data_generators/generator_utils.py +++ b/tensor2tensor/data_generators/generator_utils.py @@ -22,6 +22,7 @@ import gzip import io import os +import random import tarfile # Dependency imports @@ -35,6 +36,8 @@ import tensorflow as tf +UNSHUFFLED_SUFFIX = "-unshuffled" + def to_example(dictionary): """Helper: build tf.Example from (string -> int/float/str list) dictionary.""" @@ -66,7 +69,7 @@ def generate_files_distributed(generator, task_id=0): """generate_files but with a single writer writing to shard task_id.""" assert task_id < num_shards - output_filename = "%s-%.5d-of-%.5d" % (output_name, task_id, num_shards) + output_filename = sharded_name(output_name, task_id, num_shards) output_file = os.path.join(output_dir, output_filename) tf.logging.info("Writing to file %s", output_file) writer = tf.python_io.TFRecordWriter(output_file) @@ -86,14 +89,14 @@ def generate_files_distributed(generator, def _data_filenames(output_name, output_dir, num_shards): - return [os.path.join( - output_dir, "%s-%.5d-of-%.5d" % (output_name, shard, num_shards)) - for shard in xrange(num_shards)] + return [ + os.path.join(output_dir, fname) + for fname in shard_filepath(output_name, num_shards) + ] def train_data_filenames(problem, output_dir, num_shards): - return _data_filenames( - problem + "-train", output_dir, num_shards) + return _data_filenames(problem + "-train", output_dir, num_shards) def dev_data_filenames(problem, output_dir, num_shards): @@ -105,15 +108,22 @@ def test_data_filenames(problem, output_dir, num_shards): def combined_data_filenames(problem, output_dir, num_training_shards): - return ( - train_data_filenames(problem, output_dir, num_training_shards) + - dev_data_filenames(problem, output_dir, 1) + - test_data_filenames(problem, output_dir, 1)) + return (train_data_filenames(problem, output_dir, num_training_shards) + + dev_data_filenames(problem, output_dir, 1) + test_data_filenames( + problem, output_dir, 1)) + + +def sharded_name(base_name, shard, total_shards): + return "%s-%.5d-of-%.5d" % (base_name, shard, total_shards) + +def shard_filepath(fname, num_shards): + return [ + sharded_name(fname, shard, num_shards) for shard in xrange(num_shards) + ] -def generate_files(generator, - output_filenames, - max_cases=None): + +def generate_files(generator, output_filenames, max_cases=None): """Generate cases from a generator and save as TFRecord files. Generated cases are transformed to tf.Example protos and saved as TFRecords @@ -172,8 +182,8 @@ def maybe_download(directory, filename, url): if not tf.gfile.Exists(filepath): tf.logging.info("Downloading %s to %s" % (url, filepath)) inprogress_filepath = filepath + ".incomplete" - inprogress_filepath, _ = urllib.urlretrieve(url, inprogress_filepath, - reporthook=download_report_hook) + inprogress_filepath, _ = urllib.urlretrieve( + url, inprogress_filepath, reporthook=download_report_hook) # Print newline to clear the carriage return from the download progress print() tf.gfile.Rename(inprogress_filepath, filepath) @@ -266,8 +276,8 @@ def get_or_generate_vocab(tmp_dir, vocab_filename, vocab_size, sources=None): if ".gz" in lang_file: new_filepath = os.path.join(tmp_dir, lang_file[:-3]) if tf.gfile.Exists(new_filepath): - tf.logging.info("Subdirectory %s already exists, skipping unpacking" - % filepath) + tf.logging.info( + "Subdirectory %s already exists, skipping unpacking" % filepath) else: tf.logging.info("Unpacking subdirectory %s" % filepath) gunzip_file(filepath, new_filepath) @@ -307,3 +317,13 @@ def write_records(records, out_filename): if count > 0 and count % 100000 == 0: tf.logging.info("write: %d", count) writer.close() + + +def shuffle_dataset(filenames): + tf.logging.info("Shuffling data...") + for fname in filenames: + records = read_records(fname) + random.shuffle(records) + out_fname = fname.replace(UNSHUFFLED_SUFFIX, "") + write_records(records, out_fname) + tf.gfile.Remove(fname) diff --git a/tensor2tensor/data_generators/problem.py b/tensor2tensor/data_generators/problem.py new file mode 100644 index 000000000..55fc04f41 --- /dev/null +++ b/tensor2tensor/data_generators/problem.py @@ -0,0 +1,248 @@ +# Copyright 2017 The Tensor2Tensor Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Base class for problem/dataset definitions.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +# Dependency imports + +from tensor2tensor.data_generators import generator_utils as utils +from tensor2tensor.data_generators import text_encoder + +import tensorflow as tf + + +class SpaceID(object): + """Input and target space ids. Add more as needed.""" + # Generic / unknown output space (default) + GENERIC = 0 + # Image labels + IMAGE_LABEL = 1 + # English characters + EN_CHR = 2 + # English tokens + EN_TOK = 3 + # English bpe tokens + EN_BPE_TOK = 4 + # French characters + FR_CHR = 5 + # French tokens + FR_TOK = 6 + # German characters + DE_CHR = 7 + # German tokens + DE_TOK = 8 + # German bpe tokens + DE_BPE_TOK = 9 + # Digit cipher lexicon 0 + DIGIT_0 = 10 + # Digit cipher lexicon 1 + DIGIT_1 = 11 + # Audio waveform domain + AUDIO_WAV = 12 + # Audio spectral domain + AUDIO_SPECTRAL = 13 + # Parse characters + PARSE_CHR = 14 + # Parse tokens + PARSE_TOK = 15 + + +class Problem(object): + """Problem base class. Specifies a T2T problem. + + Problems unify the specification of a problem for data generation, training, + and inference. + + New problems are specified by the following methods: + + Data generation: + * generate_data(data_dir) + - Generate training and dev datasets into data_dir. + - Additonal files, e.g. vocabulary files, should also be written to + data_dir. + - Use the self.training_filepaths and self.dev_filepaths functions to + get sharded filenames. + - Subclasses must override + * dataset_filename() + - Base filename for problem. + - Defaults to registered name (self.name). + + Training: + * hparams(defaults, model_hparams) + - Specify the problem hyperparameters (see _default_hparams) + - Mutate defaults as needed + + Inference: + * feature_encoders(data_dir) + - Return a dict of <feature name, TextEncoder> for encoding and decoding + inference input/output. + - Defaults to TextEncoder for inputs and targets. + """ + + # ============================================================================ + # BEGIN SUBCLASS INTERFACE + # ============================================================================ + + def generate_data(self, data_dir): + raise NotImplementedError() + + def hparams(self, defaults, model_hparams): + pass + + def dataset_filename(self): + return self.name + + def feature_encoders(self, data_dir): + del data_dir + return { + "inputs": text_encoder.TextEncoder(), + "targets": text_encoder.TextEncoder() + } + + # ============================================================================ + # END SUBCLASS INTERFACE + # ============================================================================ + + def training_filepaths(self, data_dir, num_shards): + return utils.train_data_filenames(self.dataset_filename(), data_dir, + num_shards) + + def dev_filepaths(self, data_dir, num_shards): + return utils.dev_data_filenames(self.dataset_filename(), data_dir, + num_shards) + + def __init__(self, was_reversed=False, was_copy=False): + """Create a Problem. + + Args: + was_reversed: bool, whether to reverse inputs and targets. + was_copy: bool, whether to copy inputs to targets. Can be composed with + was_reversed so that if both are true, the targets become the inputs, + which are then copied to targets so that the task is targets->targets. + """ + self._was_reversed = was_reversed + self._was_copy = was_copy + self._encoders = None + + def internal_build_encoders(self, data_dir): + self._encoders = self.feature_encoders(data_dir) + + def internal_hparams(self, model_hparams): + """Returns problem_hparams.""" + if self._encoders is None: + self.internal_build_encoders(model_hparams.data_dir) + + hp = _default_hparams() + ret = self.hparams(hp, model_hparams) + if ret is not None: + raise ValueError("The Problem subclass hparams function should mutate " + "the defaults passed in and return None.") + + hp.add_hparam("vocabulary", self._encoders) + hp.add_hparam("was_reversed", self._was_reversed) + hp.add_hparam("was_copy", self._was_copy) + + if self._was_reversed: + _reverse_problem_hparams(hp) + # TODO(rsepassi): Move this into the cifar10 Problem + if "image_cifar10" in self.name: + hp.loss_multiplier = 1. + if self._was_copy: + _copy_problem_hparams(hp) + return hp + + +def _copy_problem_hparams(p_hparams): + """Use input modality, vocab, and space id for target.""" + p = p_hparams + # Duplicate input modality. + p.target_modality = p.input_modality["inputs"] + # Duplicate input vocabulary. + p.vocabulary["targets"] = p.vocabulary["inputs"] + # Duplicate input space ids. + p.target_space_id = p.input_space_id + # Mark that p was reversed. + p.was_copy = True + + +def _reverse_problem_hparams(p_hparams): + """Swap input/output modalities, vocab, and space ids.""" + p = p_hparams + + # Swap modalities. + input_modality = p.input_modality["inputs"] + target_modality = p.target_modality + p.input_modality["inputs"] = target_modality + p.target_modality = input_modality + + # Swap vocabularies. + input_vocabulary = p.vocabulary["inputs"] + target_vocabulary = p.vocabulary["targets"] + p.vocabulary["inputs"] = target_vocabulary + p.vocabulary["targets"] = input_vocabulary + + # Swap input/target space ids. + input_space_id = p.input_space_id + target_space_id = p.target_space_id + p.input_space_id = target_space_id + p.target_space_id = input_space_id + + # Mark that p was reversed. + p.was_reversed = True + + +def _default_hparams(): + """A set of basic model hyperparameters.""" + return tf.contrib.training.HParams( + # Use this parameter to get comparable perplexity numbers with different + # tokenizations. This value should be set to the ratio of the number of + # tokens in the test set according to the tokeization used to the number + # of tokens in the test set in the "official" tokenization. For + # example, if we are using a word-piece based model and we want to + # compute per-word perplexity, then we set loss_multiplier to the number + # of wordpieces per word in the test set. + loss_multiplier=1.0, + + # Use this parameter to allow for larger sequences in the batch. Without + # the use of this parameter, the size of the inner two dimensions will + # be used to judge the sequence length. + batch_size_multiplier=1, + + # To make queues of the right capacity, it's good to know the maximal + # expected batch size, as it can vary a lot. It only affects performance + # of input readers and memory use. The defaults should be safe and fast, + # but decrease if your reader uses a lot of memory and increase if slow. + max_expected_batch_size_per_shard=64, + + # Modalities used to map from input features to a space compatible with + # chosen model architecture. One modality spec (which is a 2-tuple, + # (modality_full_name, vocab_size)) per feature key. modality_full_name + # is a string type:name, e.g. class_label:class_label_2d. Leaving off + # the name uses the default modality for that type (e.g. class_label == + # class_label:default). + input_modality={}, + + # Modality used to map from hidden representation to the target space. + # Specified as a modality spec, a 2-tuple described above. + target_modality=None, + + # Identifiers used to tell the model which input/target space will be + # expected. For example, it can tell that we expect French as characters + # as output, or Spanish as sound. Spaces defined as constants in SpaceID + # class. + input_space_id=SpaceID.GENERIC, + target_space_id=SpaceID.GENERIC) diff --git a/tensor2tensor/data_generators/problem_hparams.py b/tensor2tensor/data_generators/problem_hparams.py index ec3b8e45e..354c56b22 100644 --- a/tensor2tensor/data_generators/problem_hparams.py +++ b/tensor2tensor/data_generators/problem_hparams.py @@ -685,8 +685,6 @@ def img2img_imagenet(unused_model_hparams): PROBLEM_HPARAMS_MAP = { "algorithmic_addition_binary40": lambda p: algorithmic(4, p), "algorithmic_addition_decimal40": lambda p: algorithmic(12, p), - "algorithmic_identity_binary40": lambda p: algorithmic(4, p), - "algorithmic_identity_decimal40": lambda p: algorithmic(12, p), "algorithmic_multiplication_binary40": lambda p: algorithmic(4, p), "algorithmic_multiplication_decimal40": lambda p: algorithmic(12, p), "algorithmic_reverse_binary40": lambda p: algorithmic(4, p), diff --git a/tensor2tensor/data_generators/text_encoder.py b/tensor2tensor/data_generators/text_encoder.py index 715cba803..61c2e7326 100644 --- a/tensor2tensor/data_generators/text_encoder.py +++ b/tensor2tensor/data_generators/text_encoder.py @@ -45,8 +45,8 @@ def native_to_unicode(s): # Reserved tokens for things like padding and EOS symbols. -PAD = "<pad>" -EOS = "<EOS>" +PAD, PAD_ID = "<pad>", 0 +EOS, EOS_ID = "<EOS>", 1 RESERVED_TOKENS = [PAD, EOS] if six.PY2: RESERVED_TOKENS_BYTES = RESERVED_TOKENS @@ -60,6 +60,10 @@ class TextEncoder(object): def __init__(self, num_reserved_ids=2): self._num_reserved_ids = num_reserved_ids + @property + def num_reserved_ids(self): + return self._num_reserved_ids + def encode(self, s): """Transform a human-readable string into a sequence of int ids. diff --git a/tensor2tensor/utils/data_reader.py b/tensor2tensor/utils/data_reader.py index 08571f353..a3e9835ac 100644 --- a/tensor2tensor/utils/data_reader.py +++ b/tensor2tensor/utils/data_reader.py @@ -27,6 +27,7 @@ from tensor2tensor.data_generators import problem_hparams from tensor2tensor.models import common_layers +from tensor2tensor.utils import registry import tensorflow as tf @@ -352,7 +353,10 @@ def get_datasets(problems, data_dir, mode): """Return the location of a dataset for a given mode.""" datasets = [] for problem in problems.split("-"): - problem, _, _ = problem_hparams.parse_problem_name(problem) + try: + problem = registry.problem(problem).dataset_filename() + except ValueError: + problem, _, _ = problem_hparams.parse_problem_name(problem) path = os.path.join(data_dir, problem) if mode == tf.contrib.learn.ModeKeys.TRAIN: datasets.append("%s-train*" % path) diff --git a/tensor2tensor/utils/registry.py b/tensor2tensor/utils/registry.py index 104c6db76..5a8823510 100644 --- a/tensor2tensor/utils/registry.py +++ b/tensor2tensor/utils/registry.py @@ -54,6 +54,7 @@ class MyModel(T2TModel): _MODELS = {} _HPARAMS = {} _RANGED_HPARAMS = {} +_PROBLEMS = {} class Modalities(object): @@ -184,6 +185,63 @@ def list_ranged_hparams(): return list(_RANGED_HPARAMS) +def register_problem(name=None): + """Register a Problem. name defaults to cls name snake-cased.""" + + def decorator(p_cls, registration_name=None): + """Registers & returns p_cls with registration_name or default name.""" + p_name = registration_name or _default_name(p_cls) + if p_name in _PROBLEMS: + raise ValueError("Problem %s already registered." % p_name) + + _PROBLEMS[p_name] = p_cls + p_cls.name = p_name + return p_cls + + # Handle if decorator was used without parens + if callable(name): + p_cls = name + return decorator(p_cls, registration_name=_default_name(p_cls)) + + return lambda p_cls: decorator(p_cls, name) + + +def problem(name): + """Retrieve a problem by name.""" + + def parse_problem_name(problem_name): + """Determines if problem_name specifies a copy and/or reversal. + + Args: + problem_name: A string containing a single problem name from + FLAGS.problems. + + Returns: + base_name: A string with the base problem name. + was_reversed: A boolean. + was_copy: A boolean. + """ + # Recursively strip tags until we reach a base name. + if len(problem_name) > 4 and problem_name[-4:] == "_rev": + base, _, was_copy = parse_problem_name(problem_name[:-4]) + return base, True, was_copy + elif len(problem_name) > 5 and problem_name[-5:] == "_copy": + base, was_reversed, _ = parse_problem_name(problem_name[:-5]) + return base, was_reversed, True + else: + return problem_name, False, False + + base_name, was_reversed, was_copy = parse_problem_name(name) + + if base_name not in _PROBLEMS: + raise ValueError("Problem %s never registered." % name) + return _PROBLEMS[base_name](was_reversed, was_copy) + + +def list_problems(): + return list(_PROBLEMS) + + def _internal_get_modality(name, mod_collection, collection_str): if name is None: name = "default" @@ -345,11 +403,16 @@ def help_string(): RangedHParams: %s Modalities: %s + + Problems: %s """ - m, rhp, mod = [ + m, rhp, mod, probs = [ sorted(entries) - for entries in [list_models(), - list_ranged_hparams(), - list_modalities()] + for entries in [ + list_models(), + list_ranged_hparams(), + list_modalities(), + list_problems() + ] ] - return help_str % (m, _hparams_help_string(), rhp, mod) + return help_str % (m, _hparams_help_string(), rhp, mod, probs) diff --git a/tensor2tensor/utils/t2t_model.py b/tensor2tensor/utils/t2t_model.py index c1365c7a9..2a271afbf 100644 --- a/tensor2tensor/utils/t2t_model.py +++ b/tensor2tensor/utils/t2t_model.py @@ -115,6 +115,9 @@ def _create_modalities(self, problem_hparams, hparams): input_modality = {} for f, modality_spec in six.iteritems(problem_hparams.input_modality): if isinstance(modality_spec, modality.Modality): + # This function has been previously run (e.g. for training and now is + # being called for eval) and the modalities have already been + # constructed. Return. return if f in input_modality_overrides: _warn_changed_modality_type(input_modality_overrides[f], diff --git a/tensor2tensor/utils/trainer_utils.py b/tensor2tensor/utils/trainer_utils.py index c5d8be278..a0289421b 100644 --- a/tensor2tensor/utils/trainer_utils.py +++ b/tensor2tensor/utils/trainer_utils.py @@ -33,6 +33,7 @@ from six.moves import zip # pylint: enable=redefined-builtin +from tensor2tensor.data_generators import all_problems # pylint: disable=unused-import from tensor2tensor.data_generators import problem_hparams from tensor2tensor.models import models # pylint: disable=unused-import from tensor2tensor.utils import data_reader @@ -214,10 +215,15 @@ def create_hparams(params_id, data_dir): hparams = hparams.parse(FLAGS.hparams) # Add hparams for the problems - hparams.problems = [ - problem_hparams.problem_hparams(problem, hparams) - for problem in FLAGS.problems.split("-") - ] + hparams.problems = [] + for problem_name in FLAGS.problems.split("-"): + try: + problem = registry.problem(problem_name) + p_hparams = problem.internal_hparams(hparams) + except ValueError: + p_hparams = problem_hparams.problem_hparams(problem_name, hparams) + + hparams.problems.append(p_hparams) return hparams diff --git a/tensor2tensor/utils/trainer_utils_test.py b/tensor2tensor/utils/trainer_utils_test.py index 543a0547e..b11b4a81d 100644 --- a/tensor2tensor/utils/trainer_utils_test.py +++ b/tensor2tensor/utils/trainer_utils_test.py @@ -22,6 +22,7 @@ from tensor2tensor.data_generators import algorithmic from tensor2tensor.data_generators import generator_utils +from tensor2tensor.models import transformer from tensor2tensor.utils import registry from tensor2tensor.utils import trainer_utils as utils # pylint: disable=unused-import @@ -30,20 +31,37 @@ FLAGS = tf.flags.FLAGS +@registry.register_problem +class TinyAlgo(algorithmic.AlgorithmicIdentityBinary40): + + def generate_data(self, data_dir): + generator_utils.generate_files( + algorithmic.identity_generator(self.num_symbols, 40, 100000), + self.training_filepaths(data_dir, 1), 100) + generator_utils.generate_files( + algorithmic.identity_generator(self.num_symbols, 400, 10000), + self.dev_filepaths(data_dir, 1), 100) + + +@registry.register_hparams +def transformer_test(): + hparams = transformer.transformer_base() + hparams.batch_size = 10 + hparams.hidden_size = 10 + hparams.num_hidden_layers = 1 + hparams.num_heads = 2 + hparams.max_length = 16 + return hparams + + class TrainerUtilsTest(tf.test.TestCase): @classmethod def setUpClass(cls): # Generate a small test dataset - FLAGS.problems = "algorithmic_addition_binary40" + FLAGS.problems = "tiny_algo" TrainerUtilsTest.data_dir = tf.test.get_temp_dir() - gen = algorithmic.identity_generator(2, 10, 300) - train_filenames = generator_utils.train_data_filenames( - FLAGS.problems, TrainerUtilsTest.data_dir, 1) - dev_filenames = generator_utils.dev_data_filenames( - FLAGS.problems, TrainerUtilsTest.data_dir, 1) - generator_utils.generate_files(gen, train_filenames, 100) - generator_utils.generate_files(gen, dev_filenames, 100) + registry.problem(FLAGS.problems).generate_data(TrainerUtilsTest.data_dir) def testModelsImported(self): models = registry.list_models() @@ -55,10 +73,7 @@ def testHParamsImported(self): def testSingleStep(self): model_name = "transformer" - FLAGS.hparams_set = "transformer_base" - # Shrink the test model down - FLAGS.hparams = ("batch_size=10,hidden_size=10,num_heads=2,max_length=16," - "num_hidden_layers=1") + FLAGS.hparams_set = "transformer_test" exp = utils.create_experiment( output_dir=tf.test.get_temp_dir(), data_dir=TrainerUtilsTest.data_dir, From c4407b8b827aaff88c8e2e5755623431aae9d3c7 Mon Sep 17 00:00:00 2001 From: Noam Shazeer <noam@google.com> Date: Fri, 14 Jul 2017 13:36:26 -0700 Subject: [PATCH 0123/4095] Some fixes/configs/comments for language modeling plans. PiperOrigin-RevId: 162000426 --- tensor2tensor/models/attention_lm.py | 21 +++++++++++- tensor2tensor/models/attention_lm_moe.py | 43 +++++++++++++++++++----- 2 files changed, 54 insertions(+), 10 deletions(-) diff --git a/tensor2tensor/models/attention_lm.py b/tensor2tensor/models/attention_lm.py index 85a05f9b1..947dc9306 100644 --- a/tensor2tensor/models/attention_lm.py +++ b/tensor2tensor/models/attention_lm.py @@ -140,7 +140,7 @@ def attention_lm_base(): hparams.optimizer_adam_epsilon = 1e-9 hparams.learning_rate_decay_scheme = "noam" hparams.learning_rate = 0.1 - hparams.learning_rate_warmup_steps = 1000 + hparams.learning_rate_warmup_steps = 2000 hparams.initializer_gain = 1.0 hparams.num_hidden_layers = 6 hparams.initializer = "uniform_unit_scaling" @@ -163,3 +163,22 @@ def attention_lm_base(): hparams.add_hparam("residual_dropout", 0.1) hparams.add_hparam("pos", "timing") # timing, none return hparams + + +@registry.register_hparams +def attention_lm_small(): + """Cheap model. + + on lm1b_32k: + 45M params + 2 steps/sec on [GeForce GTX TITAN X] + + Returns: + an hparams object. + """ + hparams = attention_lm_base() + hparams.num_hidden_layers = 4 + hparams.hidden_size = 512 + hparams.filter_size = 2048 + hparams.residual_dropout = 0.5 + return hparams diff --git a/tensor2tensor/models/attention_lm_moe.py b/tensor2tensor/models/attention_lm_moe.py index 1715f0a77..952ff1a71 100644 --- a/tensor2tensor/models/attention_lm_moe.py +++ b/tensor2tensor/models/attention_lm_moe.py @@ -117,9 +117,9 @@ def attention_lm_moe_base(): """Set of hyperparameters. suitable for 1 gpu. - on lm1b_16k: - ~337M params - 1.1 steps/sec on [GeForce GTX TITAN X] + on lm1b_32k: + ~229M params + 0.9 steps/sec on [GeForce GTX TITAN X] Returns: a hparams object @@ -133,7 +133,7 @@ def attention_lm_moe_base(): hparams.optimizer_adam_epsilon = 1e-9 hparams.learning_rate_decay_scheme = "noam" hparams.learning_rate = 0.1 - hparams.learning_rate_warmup_steps = 1000 + hparams.learning_rate_warmup_steps = 2000 hparams.initializer_gain = 1.0 hparams.num_hidden_layers = 4 hparams.initializer = "uniform_unit_scaling" @@ -143,14 +143,14 @@ def attention_lm_moe_base(): hparams.num_sampled_classes = 0 hparams.label_smoothing = 0.0 hparams.shared_embedding_and_softmax_weights = int(False) - hparams.add_hparam("filter_size", 2948) # Add new ones like this. + hparams.add_hparam("filter_size", 2048) # Add new ones like this. # comma-separated list of layer numbers. # At each of these layers, we replace the ffn with a mixture of experts. hparams.add_hparam("moe_layers", "2") # If moe_n2 is None, then use a flat MoE with moe_n1 experts. # If moe_n2 is an integer, then use a hierarchical MoE # consisting of moe_n1 groups of moe_n2 experts each. - hparams.add_hparam("moe_n1", 64) + hparams.add_hparam("moe_n1", 32) hparams.add_hparam("moe_n2", 0) hparams.add_hparam("moe_hidden_size", 2048) hparams.add_hparam("moe_loss_coef", 1e-2) @@ -171,9 +171,11 @@ def attention_lm_moe_base(): def attention_lm_moe_small(): """Cheap model for single-gpu training. - on lm1b_16k: - ~295M params - 2 steps/sec on [GeForce GTX TITAN X] + on lm1b_32k: + ~312M params + 1.6 steps/sec on [GeForce GTX TITAN X] + After 50K steps on 8 GPUs (synchronous): + eval_log_ppl_per_token = 3.31 Returns: an hparams object. @@ -188,6 +190,24 @@ def attention_lm_moe_small(): return hparams +@registry.register_hparams +def attention_lm_no_moe_small(): + """Without the mixture of experts (for comparison). + + on lm1b_32k: + ~45M params + 2 steps/sec on [GeForce GTX TITAN X] + After 50K steps on 8 GPUs (synchronous): + eval_log_ppl_per_token = 3.51 + + Returns: + an hparams object. + """ + hparams = attention_lm_moe_small() + hparams.moe_layers = "" + return hparams + + @registry.register_hparams def attention_lm_moe_large(): """Large model for distributed training. @@ -195,6 +215,11 @@ def attention_lm_moe_large(): Over 1B parameters, so requires multi-gpu training due to memory requirements. + on lm1b_32k: + After 45K steps on 8 GPUs (synchronous): + eval_log_ppl_per_token = 3.18 + eval_ppl_per_word = exp(1.107893 * eval_log_ppl_per_token) = 33.9 + Returns: an hparams object. """ From 4b4c80005ff78a38d5d12ba575bb5c9ac96fdb3a Mon Sep 17 00:00:00 2001 From: Lukasz Kaiser <lukaszkaiser@google.com> Date: Fri, 14 Jul 2017 15:58:38 -0700 Subject: [PATCH 0124/4095] Removing obsolete problems, merging. PiperOrigin-RevId: 162020990 --- tensor2tensor/bin/t2t-datagen | 52 +- tensor2tensor/data_generators/algorithmic.py | 2 +- .../data_generators/generator_utils.py | 40 ++ tensor2tensor/data_generators/image.py | 47 ++ tensor2tensor/data_generators/problem.py | 10 +- .../data_generators/problem_hparams.py | 126 +++-- tensor2tensor/data_generators/text_encoder.py | 56 +- tensor2tensor/data_generators/tokenizer.py | 1 + tensor2tensor/data_generators/wiki.py | 2 +- tensor2tensor/data_generators/wmt.py | 224 ++++++-- tensor2tensor/models/common_hparams.py | 8 +- tensor2tensor/models/common_layers.py | 23 +- tensor2tensor/models/models.py | 1 + tensor2tensor/models/shake_shake.py | 159 ++++++ tensor2tensor/models/transformer.py | 9 + tensor2tensor/utils/trainer_utils.py | 112 ++-- tensor2tensor/utils/yellowfin.py | 535 ++++++++++++++++++ tensor2tensor/utils/yellowfin_test.py | 213 +++++++ 18 files changed, 1394 insertions(+), 226 deletions(-) create mode 100644 tensor2tensor/models/shake_shake.py create mode 100644 tensor2tensor/utils/yellowfin.py create mode 100644 tensor2tensor/utils/yellowfin_test.py diff --git a/tensor2tensor/bin/t2t-datagen b/tensor2tensor/bin/t2t-datagen index 7a00b2877..ca70a06b0 100644 --- a/tensor2tensor/bin/t2t-datagen +++ b/tensor2tensor/bin/t2t-datagen @@ -101,6 +101,14 @@ _SUPPORTED_PROBLEM_GENERATORS = { "algorithmic_algebra_inverse": ( lambda: algorithmic_math.algebra_inverse(26, 0, 2, 100000), lambda: algorithmic_math.algebra_inverse(26, 3, 3, 10000)), + "ice_parsing_tokens": ( + lambda: wmt.tabbed_parsing_token_generator(FLAGS.tmp_dir, + True, "ice", 2**13, 2**8), + lambda: wmt.tabbed_parsing_token_generator(FLAGS.tmp_dir, + False, "ice", 2**13, 2**8)), + "ice_parsing_characters": ( + lambda: wmt.tabbed_parsing_character_generator(FLAGS.tmp_dir, True), + lambda: wmt.tabbed_parsing_character_generator(FLAGS.tmp_dir, False)), "wmt_parsing_tokens_8k": ( lambda: wmt.parsing_token_generator(FLAGS.tmp_dir, True, 2**13), lambda: wmt.parsing_token_generator(FLAGS.tmp_dir, False, 2**13)), @@ -109,11 +117,6 @@ _SUPPORTED_PROBLEM_GENERATORS = { 2**14, 2**9), lambda: wsj_parsing.parsing_token_generator(FLAGS.tmp_dir, False, 2**14, 2**9)), - "wsj_parsing_tokens_32k": ( - lambda: wsj_parsing.parsing_token_generator(FLAGS.tmp_dir, True, - 2**15, 2**9), - lambda: wsj_parsing.parsing_token_generator(FLAGS.tmp_dir, False, - 2**15, 2**9)), "wmt_enfr_characters": ( lambda: wmt.enfr_character_generator(FLAGS.tmp_dir, True), lambda: wmt.enfr_character_generator(FLAGS.tmp_dir, False)), @@ -139,6 +142,12 @@ _SUPPORTED_PROBLEM_GENERATORS = { lambda: wmt.ende_wordpiece_token_generator(FLAGS.tmp_dir, True, 2**15), lambda: wmt.ende_wordpiece_token_generator(FLAGS.tmp_dir, False, 2**15) ), + "wmt_zhen_tokens_32k": ( + lambda: wmt.zhen_wordpiece_token_generator(FLAGS.tmp_dir, True, + 2**15, 2**15), + lambda: wmt.zhen_wordpiece_token_generator(FLAGS.tmp_dir, False, + 2**15, 2**15) + ), "lm1b_32k": ( lambda: lm1b.generator(FLAGS.tmp_dir, True), lambda: lm1b.generator(FLAGS.tmp_dir, False) @@ -159,26 +168,9 @@ _SUPPORTED_PROBLEM_GENERATORS = { "image_cifar10_test": ( lambda: image.cifar10_generator(FLAGS.tmp_dir, True, 50000), lambda: image.cifar10_generator(FLAGS.tmp_dir, False, 10000)), - "image_mscoco_characters_tune": ( - lambda: image.mscoco_generator(FLAGS.tmp_dir, True, 70000), - lambda: image.mscoco_generator(FLAGS.tmp_dir, True, 10000, 70000)), "image_mscoco_characters_test": ( lambda: image.mscoco_generator(FLAGS.tmp_dir, True, 80000), lambda: image.mscoco_generator(FLAGS.tmp_dir, False, 40000)), - "image_mscoco_tokens_8k_tune": ( - lambda: image.mscoco_generator( - FLAGS.tmp_dir, - True, - 70000, - vocab_filename="tokens.vocab.%d" % 2**13, - vocab_size=2**13), - lambda: image.mscoco_generator( - FLAGS.tmp_dir, - True, - 10000, - 70000, - vocab_filename="tokens.vocab.%d" % 2**13, - vocab_size=2**13)), "image_mscoco_tokens_8k_test": ( lambda: image.mscoco_generator( FLAGS.tmp_dir, @@ -192,20 +184,6 @@ _SUPPORTED_PROBLEM_GENERATORS = { 40000, vocab_filename="tokens.vocab.%d" % 2**13, vocab_size=2**13)), - "image_mscoco_tokens_32k_tune": ( - lambda: image.mscoco_generator( - FLAGS.tmp_dir, - True, - 70000, - vocab_filename="tokens.vocab.%d" % 2**15, - vocab_size=2**15), - lambda: image.mscoco_generator( - FLAGS.tmp_dir, - True, - 10000, - 70000, - vocab_filename="tokens.vocab.%d" % 2**15, - vocab_size=2**15)), "image_mscoco_tokens_32k_test": ( lambda: image.mscoco_generator( FLAGS.tmp_dir, @@ -386,7 +364,7 @@ def generate_data_for_problem(problem): def generate_data_for_registered_problem(problem_name): problem = registry.problem(problem_name) - problem.generate_data(FLAGS.data_dir) + problem.generate_data(FLAGS.data_dir, FLAGS.tmp_dir) if __name__ == "__main__": diff --git a/tensor2tensor/data_generators/algorithmic.py b/tensor2tensor/data_generators/algorithmic.py index f0d3faf76..a9e47abe0 100644 --- a/tensor2tensor/data_generators/algorithmic.py +++ b/tensor2tensor/data_generators/algorithmic.py @@ -36,7 +36,7 @@ class AlgorithmicIdentityBinary40(problem.Problem): def num_symbols(self): return 2 - def generate_data(self, data_dir): + def generate_data(self, data_dir, _): utils.generate_files( identity_generator(self.num_symbols, 40, 100000), self.training_filepaths(data_dir, 100)) diff --git a/tensor2tensor/data_generators/generator_utils.py b/tensor2tensor/data_generators/generator_utils.py index b323d0700..739655301 100644 --- a/tensor2tensor/data_generators/generator_utils.py +++ b/tensor2tensor/data_generators/generator_utils.py @@ -300,6 +300,46 @@ def get_or_generate_vocab(tmp_dir, vocab_filename, vocab_size, sources=None): return vocab +def get_or_generate_tabbed_vocab(tmp_dir, source_filename, + index, vocab_filename, vocab_size): + r"""Generate a vocabulary from a tabbed source file. + + The source is a file of source, target pairs, where each line contains + a source string and a target string, separated by a tab ('\t') character. + The index parameter specifies 0 for the source or 1 for the target. + + Args: + tmp_dir: path to the temporary directory. + source_filename: the name of the tab-separated source file. + index: index. + vocab_filename: the name of the vocabulary file. + vocab_size: vocabulary size. + + Returns: + The vocabulary. + """ + vocab_filepath = os.path.join(tmp_dir, vocab_filename) + if os.path.exists(vocab_filepath): + vocab = text_encoder.SubwordTextEncoder(vocab_filepath) + return vocab + + # Use Tokenizer to count the word occurrences. + filepath = os.path.join(tmp_dir, source_filename) + with tf.gfile.GFile(filepath, mode="r") as source_file: + for line in source_file: + line = line.strip() + if line and "\t" in line: + parts = line.split("\t", maxsplit=1) + part = parts[index].strip() + _ = tokenizer.encode(text_encoder.native_to_unicode(part)) + + vocab = text_encoder.SubwordTextEncoder.build_to_target_size( + vocab_size, tokenizer.token_counts, 1, + min(1e3, vocab_size + text_encoder.NUM_RESERVED_TOKENS)) + vocab.store_to_file(vocab_filepath) + return vocab + + def read_records(filename): reader = tf.python_io.tf_record_iterator(filename) records = [] diff --git a/tensor2tensor/data_generators/image.py b/tensor2tensor/data_generators/image.py index 792122324..0cba1800b 100644 --- a/tensor2tensor/data_generators/image.py +++ b/tensor2tensor/data_generators/image.py @@ -33,6 +33,9 @@ from six.moves import xrange # pylint: disable=redefined-builtin from six.moves import zip # pylint: disable=redefined-builtin from tensor2tensor.data_generators import generator_utils +from tensor2tensor.data_generators import problem +from tensor2tensor.data_generators import text_encoder +from tensor2tensor.utils import registry import tensorflow as tf @@ -300,3 +303,47 @@ def mscoco_generator(tmp_dir, "image/height": [height], "image/width": [width] } + +# French street names dataset. + + +@registry.register_problem +class ImageFSNS(problem.Problem): + """Problem spec for French Street Name recognition.""" + + def generate_data(self, data_dir, tmp_dir): + list_url = ("https://raw.githubusercontent.com/tensorflow/models/master/" + "street/python/fsns_urls.txt") + fsns_urls = generator_utils.maybe_download( + tmp_dir, "fsns_urls.txt", list_url) + fsns_files = [f.strip() for f in open(fsns_urls, "r") + if f.startswith("http://")] + for url in fsns_files: + if "/train/train" in url: + generator_utils.maybe_download( + data_dir, "image_fsns-train" + url[-len("-00100-of-00512"):], url) + elif "/validation/validation" in url: + generator_utils.maybe_download( + data_dir, "image_fsns-dev" + url[-len("-00100-of-00512"):], url) + elif "charset" in url: + generator_utils.maybe_download( + data_dir, "charset_size134.txt", url) + + def hparams(self, defaults, model_hparams): + p = defaults + p.input_modality = {"inputs": (registry.Modalities.IMAGE, None)} + # This vocab file must be present within the data directory. + vocab_filename = os.path.join(model_hparams.data_dir, "charset_size134.txt") + subtokenizer = text_encoder.SubwordTextEncoder(vocab_filename) + p.target_modality = (registry.Modalities.SYMBOL, subtokenizer.vocab_size) + p.vocabulary = { + "inputs": text_encoder.TextEncoder(), + "targets": subtokenizer, + } + p.batch_size_multiplier = 256 + p.max_expected_batch_size_per_shard = 2 + vocab_size = 144 + p.input_modality = {"inputs": (registry.Modalities.SYMBOL, vocab_size)} + p.target_modality = (registry.Modalities.SYMBOL, vocab_size) + p.input_space_id = problem.SpaceID.DIGIT_0 + p.target_space_id = problem.SpaceID.DIGIT_1 diff --git a/tensor2tensor/data_generators/problem.py b/tensor2tensor/data_generators/problem.py index 55fc04f41..179c2844c 100644 --- a/tensor2tensor/data_generators/problem.py +++ b/tensor2tensor/data_generators/problem.py @@ -59,6 +59,14 @@ class SpaceID(object): PARSE_CHR = 14 # Parse tokens PARSE_TOK = 15 + # Chinese tokens + ZH_TOK = 16 + # Icelandic characters + ICE_CHAR = 17 + # Icelandic tokens + ICE_TOK = 18 + # Icelandic parse tokens + ICE_PARSE_TOK = 19 class Problem(object): @@ -97,7 +105,7 @@ class Problem(object): # BEGIN SUBCLASS INTERFACE # ============================================================================ - def generate_data(self, data_dir): + def generate_data(self, data_dir, tmp_dir): raise NotImplementedError() def hparams(self, defaults, model_hparams): diff --git a/tensor2tensor/data_generators/problem_hparams.py b/tensor2tensor/data_generators/problem_hparams.py index 354c56b22..2f7ad5cd3 100644 --- a/tensor2tensor/data_generators/problem_hparams.py +++ b/tensor2tensor/data_generators/problem_hparams.py @@ -66,14 +66,13 @@ def parse_problem_name(problem_name): was_copy: A boolean. """ # Recursively strip tags until we reach a base name. - if len(problem_name) > 4 and problem_name[-4:] == "_rev": + if problem_name.endswith("_rev"): base, _, was_copy = parse_problem_name(problem_name[:-4]) return base, True, was_copy - elif len(problem_name) > 5 and problem_name[-5:] == "_copy": + elif problem_name.endswith("_copy"): base, was_reversed, _ = parse_problem_name(problem_name[:-5]) return base, was_reversed, True - else: - return problem_name, False, False + return problem_name, False, False def _lookup_problem_hparams_fn(name): @@ -177,6 +176,10 @@ def default_problem_hparams(): # 13: Audio spectral domain # 14: Parse characters # 15: Parse tokens + # 16: Chinese tokens + # 17: Icelandic characters + # 18: Icelandic tokens + # 19: Icelandic parse tokens # Add more above if needed. input_space_id=0, target_space_id=0, @@ -197,7 +200,8 @@ def default_problem_hparams(): # the targets. For instance `problem_copy` will copy the inputs, but # `problem_rev_copy` will copy the targets. was_reversed=False, - was_copy=False,) + was_copy=False, + ) def test_problem_hparams(unused_model_hparams, input_vocab_size, @@ -472,41 +476,35 @@ def wmt_ende_tokens(model_hparams, wrong_vocab_size): return p -def wmt_ende_v2(model_hparams, vocab_size): - """English to German translation benchmark with separate vocabularies.""" +def wmt_zhen_tokens(model_hparams, wrong_vocab_size): + """Chinese to English translation benchmark.""" p = default_problem_hparams() - # These vocab files must be present within the data directory. + # This vocab file must be present within the data directory. + if model_hparams.shared_embedding_and_softmax_weights == 1: + model_hparams.shared_embedding_and_softmax_weights = 0 source_vocab_filename = os.path.join(model_hparams.data_dir, - "wmt_ende_v2.en.vocab.%d" % vocab_size) + "tokens.vocab.zh.%d" % wrong_vocab_size) target_vocab_filename = os.path.join(model_hparams.data_dir, - "wmt_ende_v2.de.vocab.%d" % vocab_size) - p.input_modality = {"inputs": (registry.Modalities.SYMBOL, vocab_size)} - p.target_modality = (registry.Modalities.SYMBOL, vocab_size) + "tokens.vocab.en.%d" % wrong_vocab_size) + source_token = text_encoder.SubwordTextEncoder(source_vocab_filename) + target_token = text_encoder.SubwordTextEncoder(target_vocab_filename) + p.input_modality = { + "inputs": (registry.Modalities.SYMBOL, source_token.vocab_size) + } + p.target_modality = (registry.Modalities.SYMBOL, target_token.vocab_size) p.vocabulary = { - "inputs": text_encoder.SubwordTextEncoder(source_vocab_filename), - "targets": text_encoder.SubwordTextEncoder(target_vocab_filename), + "inputs": source_token, + "targets": target_token, } - p.input_space_id = 3 - p.target_space_id = 8 - return p - - -def wmt_concat(model_hparams, wrong_vocab_size): - """English to German translation benchmark.""" - p = default_problem_hparams() - # This vocab file must be present within the data directory. - vocab_filename = os.path.join(model_hparams.data_dir, - "tokens.vocab.%d" % wrong_vocab_size) - subtokenizer = text_encoder.SubwordTextEncoder(vocab_filename) - vocab_size = subtokenizer.vocab_size - p.input_modality = {} - p.target_modality = (registry.Modalities.SYMBOL, vocab_size) - p.vocabulary = {"targets": subtokenizer} + p.loss_multiplier = 1.4 + p.input_space_id = 16 + p.target_space_id = 4 return p -def wmt_parsing_characters(unused_model_hparams): +def wmt_parsing_characters(model_hparams): """English to parse tree translation benchmark.""" + del model_hparams # Unused. p = default_problem_hparams() p.input_modality = {"inputs": (registry.Modalities.SYMBOL, 256)} p.target_modality = (registry.Modalities.SYMBOL, 256) @@ -549,12 +547,15 @@ def wmt_parsing_tokens(model_hparams, wrong_vocab_size): return p -def wsj_parsing_tokens(model_hparams, wrong_source_vocab_size, +def wsj_parsing_tokens(model_hparams, + prefix, + wrong_source_vocab_size, wrong_target_vocab_size): """English to parse tree translation benchmark. Args: model_hparams: a tf.contrib.training.HParams + prefix: name to use as prefix for vocabulary files. wrong_source_vocab_size: a number used in the filename indicating the approximate vocabulary size. This is not to be confused with the actual vocabulary size. @@ -568,10 +569,10 @@ def wsj_parsing_tokens(model_hparams, wrong_source_vocab_size, # This vocab file must be present within the data directory. source_vocab_filename = os.path.join( model_hparams.data_dir, - "wsj_source.tokens.vocab.%d" % wrong_source_vocab_size) + prefix + "_source.tokens.vocab.%d" % wrong_source_vocab_size) target_vocab_filename = os.path.join( model_hparams.data_dir, - "wsj_target.tokens.vocab.%d" % wrong_target_vocab_size) + prefix + "_target.tokens.vocab.%d" % wrong_target_vocab_size) source_subtokenizer = text_encoder.SubwordTextEncoder(source_vocab_filename) target_subtokenizer = text_encoder.SubwordTextEncoder(target_vocab_filename) p.input_modality = { @@ -588,6 +589,41 @@ def wsj_parsing_tokens(model_hparams, wrong_source_vocab_size, return p +def ice_parsing_tokens(model_hparams, wrong_source_vocab_size): + """Icelandic to parse tree translation benchmark. + + Args: + model_hparams: a tf.contrib.training.HParams + wrong_source_vocab_size: a number used in the filename indicating the + approximate vocabulary size. This is not to be confused with the actual + vocabulary size. + + Returns: + A tf.contrib.training.HParams object. + """ + p = default_problem_hparams() + # This vocab file must be present within the data directory. + source_vocab_filename = os.path.join( + model_hparams.data_dir, + "ice_source.tokens.vocab.%d" % wrong_source_vocab_size) + target_vocab_filename = os.path.join( + model_hparams.data_dir, + "ice_target.tokens.vocab.256") + source_subtokenizer = text_encoder.SubwordTextEncoder(source_vocab_filename) + target_subtokenizer = text_encoder.SubwordTextEncoder(target_vocab_filename) + p.input_modality = { + "inputs": (registry.Modalities.SYMBOL, source_subtokenizer.vocab_size) + } + p.target_modality = (registry.Modalities.SYMBOL, 256) + p.vocabulary = { + "inputs": source_subtokenizer, + "targets": target_subtokenizer, + } + p.input_space_id = 18 # Icelandic tokens + p.target_space_id = 19 # Icelandic parse tokens + return p + + def image_cifar10(unused_model_hparams): """CIFAR-10.""" p = default_problem_hparams() @@ -703,43 +739,31 @@ def img2img_imagenet(unused_model_hparams): "lm1b_32k": lm1b_32k, "wiki_32k": wiki_32k, "lmptb_10k": lmptb_10k, - "wmt_parsing_characters": wmt_parsing_characters, + "ice_parsing_characters": wmt_parsing_characters, + "ice_parsing_tokens": lambda p: ice_parsing_tokens(p, 2**13), "wmt_parsing_tokens_8k": lambda p: wmt_parsing_tokens(p, 2**13), - "wsj_parsing_tokens_16k": lambda p: wsj_parsing_tokens(p, 2**14, 2**9), - "wsj_parsing_tokens_32k": lambda p: wsj_parsing_tokens(p, 2**15, 2**9), + "wsj_parsing_tokens_16k": lambda p: wsj_parsing_tokens( # pylint: disable=g-long-lambda + p, "wsj", 2**14, 2**9), "wmt_enfr_characters": wmt_enfr_characters, "wmt_enfr_tokens_8k": lambda p: wmt_enfr_tokens(p, 2**13), "wmt_enfr_tokens_32k": lambda p: wmt_enfr_tokens(p, 2**15), "wmt_enfr_tokens_32k_shuffled": lambda p: wmt_enfr_tokens(p, 2**15), "wmt_enfr_tokens_32k_combined": lambda p: wmt_enfr_tokens(p, 2**15), "wmt_enfr_tokens_128k": lambda p: wmt_enfr_tokens(p, 2**17), - # bytes per subtoken: 3.267350 - "wmt_ende_concat_8k": lambda p: wmt_concat(p, 2**13), - # bytes per subtoken: 4.236272 - "wmt_ende_concat_32k": lambda p: wmt_concat(p, 2**15), "wmt_ende_characters": wmt_ende_characters, "wmt_ende_tokens_8k": lambda p: wmt_ende_tokens(p, 2**13), "wmt_ende_tokens_32k": lambda p: wmt_ende_tokens(p, 2**15), "wmt_ende_tokens_128k": lambda p: wmt_ende_tokens(p, 2**17), - # bytes per subtoken: 4.59291664162 "wmt_ende_bpe32k": wmt_ende_bpe32k, - "wmt_ende_bpe32k_shuffled": wmt_ende_bpe32k, - "wmt_ende_bpe32k_combined": wmt_ende_bpe32k, - "wmt_ende_bpe32k_160": wmt_ende_bpe32k, - "wmt_ende_v2_32k_combined": lambda p: wmt_ende_v2(p, 2**15), - "wmt_ende_v2_16k_combined": lambda p: wmt_ende_v2(p, 2**14), + "wmt_zhen_tokens_32k": lambda p: wmt_zhen_tokens(p, 2**15), "image_cifar10_tune": image_cifar10, "image_cifar10_test": image_cifar10, "image_mnist_tune": image_mnist, "image_mnist_test": image_mnist, "image_mscoco_characters_tune": image_mscoco_characters, "image_mscoco_characters_test": image_mscoco_characters, - "image_mscoco_tokens_8k_tune": lambda p: image_mscoco_tokens(p, 2**13), "image_mscoco_tokens_8k_test": lambda p: image_mscoco_tokens(p, 2**13), - "image_mscoco_tokens_32k_tune": lambda p: image_mscoco_tokens(p, 2**15), "image_mscoco_tokens_32k_test": lambda p: image_mscoco_tokens(p, 2**15), - "image_mscoco_tokens_128k_tune": lambda p: image_mscoco_tokens(p, 2**17), - "image_mscoco_tokens_128k_test": lambda p: image_mscoco_tokens(p, 2**17), "image_imagenet": image_imagenet, "img2img_imagenet": img2img_imagenet, } diff --git a/tensor2tensor/data_generators/text_encoder.py b/tensor2tensor/data_generators/text_encoder.py index 61c2e7326..4a5a784c2 100644 --- a/tensor2tensor/data_generators/text_encoder.py +++ b/tensor2tensor/data_generators/text_encoder.py @@ -36,28 +36,41 @@ import tensorflow as tf -# Conversion between Unicode and UTF-8, if required (on Python2) -def native_to_unicode(s): - return s.decode("utf-8") if (PY2 and not isinstance(s, unicode)) else s - - -unicode_to_native = (lambda s: s.encode("utf-8")) if PY2 else (lambda s: s) - - # Reserved tokens for things like padding and EOS symbols. -PAD, PAD_ID = "<pad>", 0 -EOS, EOS_ID = "<EOS>", 1 +PAD = "<pad>" +EOS = "<EOS>" RESERVED_TOKENS = [PAD, EOS] -if six.PY2: +NUM_RESERVED_TOKENS = len(RESERVED_TOKENS) +PAD_TOKEN = RESERVED_TOKENS.index(PAD) # Normally 0 +EOS_TOKEN = RESERVED_TOKENS.index(EOS) # Normally 1 + +if PY2: RESERVED_TOKENS_BYTES = RESERVED_TOKENS else: RESERVED_TOKENS_BYTES = [bytes(PAD, "ascii"), bytes(EOS, "ascii")] +def native_to_unicode_py2(s): + """Python 2: transform native string to Unicode.""" + if isinstance(s, unicode): + return s + return s.decode("utf-8") + + +# Conversion between Unicode and UTF-8, if required (on Python2) +if PY2: + native_to_unicode = native_to_unicode_py2 + unicode_to_native = lambda s: s.encode("utf-8") +else: + # No conversion required on Python3 + native_to_unicode = lambda s: s + unicode_to_native = lambda s: s + + class TextEncoder(object): """Base class for converting from ints to/from human readable strings.""" - def __init__(self, num_reserved_ids=2): + def __init__(self, num_reserved_ids=NUM_RESERVED_TOKENS): self._num_reserved_ids = num_reserved_ids @property @@ -109,7 +122,7 @@ class ByteTextEncoder(TextEncoder): def encode(self, s): numres = self._num_reserved_ids - if six.PY2: + if PY2: return [ord(c) + numres for c in s] # Python3: explicitly convert to UTF-8 return [c + numres for c in s.encode("utf-8")] @@ -123,10 +136,10 @@ def decode(self, ids): decoded_ids.append(RESERVED_TOKENS_BYTES[int(id_)]) else: decoded_ids.append(int2byte(id_ - numres)) - if six.PY2: + if PY2: return "".join(decoded_ids) # Python3: join byte arrays and then decode string - return b"".join(decoded_ids).decode("utf-8") + return b"".join(decoded_ids).decode("utf-8", "replace") @property def vocab_size(self): @@ -136,7 +149,8 @@ def vocab_size(self): class TokenTextEncoder(TextEncoder): """Encoder based on a user-supplied vocabulary.""" - def __init__(self, vocab_filename, reverse=False, num_reserved_ids=2): + def __init__(self, vocab_filename, reverse=False, + num_reserved_ids=NUM_RESERVED_TOKENS): """Initialize from a file, one token per line.""" super(TokenTextEncoder, self).__init__(num_reserved_ids=num_reserved_ids) self._reverse = reverse @@ -349,7 +363,7 @@ def build_from_token_counts(self, token_counts, min_count, num_iterations=4, - num_reserved_ids=2): + num_reserved_ids=NUM_RESERVED_TOKENS): """Train a SubwordTextEncoder based on a dictionary of word counts. Args: @@ -375,6 +389,8 @@ def build_from_token_counts(self, # We build iteratively. On each iteration, we segment all the words, # then count the resulting potential subtokens, keeping the ones # with high enough counts for our new vocabulary. + if min_count < 1: + min_count = 1 for i in xrange(num_iterations): tf.logging.info("Iteration {0}".format(i)) counts = defaultdict(int) @@ -466,7 +482,7 @@ def store_to_file(self, filename): f.write("'" + unicode_to_native(subtoken_string) + "'\n") def _escape_token(self, token): - r"""Escape away underscores and OOV characters and append '_'. + """Escape away underscores and OOV characters and append '_'. This allows the token to be experessed as the concatenation of a list of subtokens from the vocabulary. The underscore acts as a sentinel @@ -477,7 +493,7 @@ def _escape_token(self, token): Returns: escaped_token: a unicode string """ - assert isinstance(token, unicode) + assert isinstance(token, six.text_type) token = token.replace(u"\\", u"\\\\").replace(u"_", u"\\u") + u"_" ret = u"" for c in token: @@ -488,7 +504,7 @@ def _escape_token(self, token): return ret def _unescape_token(self, escaped_token): - r"""Inverse of _escape_token(). + """Inverse of _escape_token(). Args: escaped_token: a unicode string diff --git a/tensor2tensor/data_generators/tokenizer.py b/tensor2tensor/data_generators/tokenizer.py index 5d6cc9c06..2b1cf572c 100644 --- a/tensor2tensor/data_generators/tokenizer.py +++ b/tensor2tensor/data_generators/tokenizer.py @@ -141,6 +141,7 @@ def read_corpus(): if corpus_max_lines > 0 and lines_read > corpus_max_lines: return docs return docs + counts = defaultdict(int) for doc in read_corpus(): for tok in encode(_native_to_unicode(doc)): diff --git a/tensor2tensor/data_generators/wiki.py b/tensor2tensor/data_generators/wiki.py index 208d679bd..8f905aa96 100644 --- a/tensor2tensor/data_generators/wiki.py +++ b/tensor2tensor/data_generators/wiki.py @@ -60,7 +60,7 @@ def page_generator(tmp_dir, max_docs=None): count = 0 corpus_filepath = _maybe_download_corpus(tmp_dir) for line in bz2.BZ2File(corpus_filepath, "r"): - line = unicode(line, "utf-8") + line = unicode(line, "utf-8") if six.PY2 else line.decode("utf-8") if not doc and line != u" <page>\n": continue doc += line diff --git a/tensor2tensor/data_generators/wmt.py b/tensor2tensor/data_generators/wmt.py index eb09a68ca..573a3bcad 100644 --- a/tensor2tensor/data_generators/wmt.py +++ b/tensor2tensor/data_generators/wmt.py @@ -38,9 +38,8 @@ FLAGS = tf.flags.FLAGS -# End-of-sentence marker (should correspond to the position of EOS in the -# RESERVED_TOKENS list in text_encoder.py) -EOS = 1 +# End-of-sentence marker. +EOS = text_encoder.EOS_TOKEN def character_generator(source_path, target_path, character_vocab, eos=None): @@ -72,6 +71,36 @@ def character_generator(source_path, target_path, character_vocab, eos=None): source, target = source_file.readline(), target_file.readline() +def tabbed_generator(source_path, source_vocab, target_vocab, eos=None): + r"""Generator for sequence-to-sequence tasks using tabbed files. + + Tokens are derived from text files where each line contains both + a source and a target string. The two strings are separated by a tab + character ('\t'). It yields dictionaries of "inputs" and "targets" where + inputs are characters from the source lines converted to integers, and + targets are characters from the target lines, also converted to integers. + + Args: + source_path: path to the file with source and target sentences. + source_vocab: a SunwordTextEncoder to encode the source string. + target_vocab: a SunwordTextEncoder to encode the target string. + eos: integer to append at the end of each sequence (default: None). + + Yields: + A dictionary {"inputs": source-line, "targets": target-line} where + the lines are integer lists converted from characters in the file lines. + """ + eos_list = [] if eos is None else [eos] + with tf.gfile.GFile(source_path, mode="r") as source_file: + for line in source_file: + if line and "\t" in line: + parts = line.split("\t", maxsplit=1) + source, target = parts[0].strip(), parts[1].strip() + source_ints = source_vocab.encode(source) + eos_list + target_ints = target_vocab.encode(target) + eos_list + yield {"inputs": source_ints, "targets": target_ints} + + def token_generator(source_path, target_path, token_vocab, eos=None): """Generator for sequence-to-sequence tasks that uses tokens. @@ -101,6 +130,39 @@ def token_generator(source_path, target_path, token_vocab, eos=None): source, target = source_file.readline(), target_file.readline() +def bi_vocabs_token_generator(source_path, target_path, + source_token_vocab, + target_token_vocab, + eos=None): + """Generator for sequence-to-sequence tasks that uses tokens. + + This generator assumes the files at source_path and target_path have + the same number of lines and yields dictionaries of "inputs" and "targets" + where inputs are token ids from the " "-split source (and target, resp.) lines + converted to integers using the token_map. + + Args: + source_path: path to the file with source sentences. + target_path: path to the file with target sentences. + source_token_vocab: text_encoder.TextEncoder object. + target_token_vocab: text_encoder.TextEncoder object. + eos: integer to append at the end of each sequence (default: None). + + Yields: + A dictionary {"inputs": source-line, "targets": target-line} where + the lines are integer lists converted from tokens in the file lines. + """ + eos_list = [] if eos is None else [eos] + with tf.gfile.GFile(source_path, mode="r") as source_file: + with tf.gfile.GFile(target_path, mode="r") as target_file: + source, target = source_file.readline(), target_file.readline() + while source and target: + source_ints = source_token_vocab.encode(source.strip()) + eos_list + target_ints = target_token_vocab.encode(target.strip()) + eos_list + yield {"inputs": source_ints, "targets": target_ints} + source, target = source_file.readline(), target_file.readline() + + def _get_wmt_ende_dataset(directory, filename): """Extract the WMT en-de corpus `filename` to directory unless it's there.""" train_path = os.path.join(directory, filename) @@ -122,7 +184,8 @@ def ende_bpe_token_generator(tmp_dir, train): train_path = _get_wmt_ende_dataset(tmp_dir, dataset_path) token_path = os.path.join(tmp_dir, "vocab.bpe.32000") token_vocab = text_encoder.TokenTextEncoder(vocab_filename=token_path) - return token_generator(train_path + ".en", train_path + ".de", token_vocab, 1) + return token_generator(train_path + ".en", train_path + ".de", + token_vocab, EOS) _ENDE_TRAIN_DATASETS = [ @@ -177,60 +240,61 @@ def ende_bpe_token_generator(tmp_dir, train): ], ] +_ZHEN_TRAIN_DATASETS = [ + [ + ("http://data.statmt.org/wmt17/translation-task/" + "training-parallel-nc-v12.tgz"), + ("training/news-commentary-v12.zh-en.zh", + "training/news-commentary-v12.zh-en.en") + ] +] + +_ZHEN_TEST_DATASETS = [ + [ + "http://data.statmt.org/wmt17/translation-task/dev.tgz", + ("dev/newsdev2017-zhen-src.zh", + "dev/newsdev2017-zhen-ref.en") + ] +] + def _compile_data(tmp_dir, datasets, filename): """Concatenate all `datasets` and save to `filename`.""" filename = os.path.join(tmp_dir, filename) - lang1_lines, lang2_lines = [], [] - for dataset in datasets: - url = dataset[0] - compressed_filename = os.path.basename(url) - compressed_filepath = os.path.join(tmp_dir, compressed_filename) - - lang1_filename, lang2_filename = dataset[1] - lang1_filepath = os.path.join(tmp_dir, lang1_filename) - lang2_filepath = os.path.join(tmp_dir, lang2_filename) - - if not os.path.exists(compressed_filepath): - generator_utils.maybe_download(tmp_dir, compressed_filename, url) - if not os.path.exists(lang1_filepath) or not os.path.exists(lang2_filepath): - mode = "r:gz" if "gz" in compressed_filepath else "r" - with tarfile.open(compressed_filepath, mode) as corpus_tar: - corpus_tar.extractall(tmp_dir) - if ".gz" in lang1_filepath: - new_filepath = lang1_filepath.strip(".gz") - generator_utils.gunzip_file(lang1_filepath, new_filepath) - lang1_filepath = new_filepath - if ".gz" in lang2_filepath: - new_filepath = lang2_filepath.strip(".gz") - generator_utils.gunzip_file(lang2_filepath, new_filepath) - lang2_filepath = new_filepath - with tf.gfile.GFile(lang1_filepath, mode="r") as lang1_file: - with tf.gfile.GFile(lang2_filepath, mode="r") as lang2_file: - lang1_file_lines = lang1_file.readlines() - lang2_file_lines = lang2_file.readlines() - assert len(lang1_file_lines) == len(lang2_file_lines), lang1_filepath - lang1_lines.extend(lang1_file_lines) - lang2_lines.extend(lang2_file_lines) - - write_chunk_size = 10000 - assert len(lang1_lines) == len(lang2_lines) - with tf.gfile.GFile(filename + ".lang1", mode="w") as lang1_file: - i = 0 - while i <= len(lang1_lines): - for line in lang1_lines[i * write_chunk_size:(i + 1) * write_chunk_size]: - lang1_file.write(line) - i += 1 - for line in lang1_lines[i * write_chunk_size:]: - lang1_file.write(line) - with tf.gfile.GFile(filename + ".lang2", mode="w") as lang2_file: - i = 0 - while i <= len(lang2_lines): - for line in lang2_lines[i * write_chunk_size:(i + 1) * write_chunk_size]: - lang2_file.write(line) - i += 1 - for line in lang2_lines[i * write_chunk_size:]: - lang2_file.write(line) + with tf.gfile.GFile(filename + ".lang1", mode="w") as lang1_resfile: + with tf.gfile.GFile(filename + ".lang2", mode="w") as lang2_resfile: + for dataset in datasets: + url = dataset[0] + compressed_filename = os.path.basename(url) + compressed_filepath = os.path.join(tmp_dir, compressed_filename) + + lang1_filename, lang2_filename = dataset[1] + lang1_filepath = os.path.join(tmp_dir, lang1_filename) + lang2_filepath = os.path.join(tmp_dir, lang2_filename) + + if not os.path.exists(compressed_filepath): + generator_utils.maybe_download(tmp_dir, compressed_filename, url) + if not (os.path.exists(lang1_filepath) and + os.path.exists(lang2_filepath)): + mode = "r:gz" if "gz" in compressed_filepath else "r" + with tarfile.open(compressed_filepath, mode) as corpus_tar: + corpus_tar.extractall(tmp_dir) + if ".gz" in lang1_filepath: + new_filepath = lang1_filepath.strip(".gz") + generator_utils.gunzip_file(lang1_filepath, new_filepath) + lang1_filepath = new_filepath + if ".gz" in lang2_filepath: + new_filepath = lang2_filepath.strip(".gz") + generator_utils.gunzip_file(lang2_filepath, new_filepath) + lang2_filepath = new_filepath + with tf.gfile.GFile(lang1_filepath, mode="r") as lang1_file: + with tf.gfile.GFile(lang2_filepath, mode="r") as lang2_file: + line1, line2 = lang1_file.readline(), lang2_file.readline() + while line1 or line2: + lang1_resfile.write(line1.strip() + "\n") + lang2_resfile.write(line2.strip() + "\n") + line1, line2 = lang1_file.readline(), lang2_file.readline() + return filename @@ -253,6 +317,26 @@ def ende_character_generator(tmp_dir, train): character_vocab, EOS) +def zhen_wordpiece_token_generator(tmp_dir, train, + source_vocab_size, + target_vocab_size): + """Wordpiece generator for the WMT'17 zh-en dataset.""" + datasets = _ZHEN_TRAIN_DATASETS if train else _ZHEN_TEST_DATASETS + source_datasets = [[item[0], [item[1][0]]] for item in datasets] + target_datasets = [[item[0], [item[1][1]]] for item in datasets] + source_vocab = generator_utils.get_or_generate_vocab( + tmp_dir, "tokens.vocab.zh.%d" % source_vocab_size, + source_vocab_size, source_datasets) + target_vocab = generator_utils.get_or_generate_vocab( + tmp_dir, "tokens.vocab.en.%d" % target_vocab_size, + target_vocab_size, target_datasets) + tag = "train" if train else "dev" + data_path = _compile_data(tmp_dir, datasets, "wmt_zhen_tok_%s" % tag) + return bi_vocabs_token_generator(data_path + ".lang1", + data_path + ".lang2", + source_vocab, target_vocab, EOS) + + def enfr_wordpiece_token_generator(tmp_dir, train, vocab_size): """Instance of token generator for the WMT en->fr task.""" symbolizer_vocab = generator_utils.get_or_generate_vocab( @@ -274,6 +358,38 @@ def enfr_character_generator(tmp_dir, train): character_vocab, EOS) +def parsing_character_generator(tmp_dir, train): + character_vocab = text_encoder.ByteTextEncoder() + filename = "parsing_%s" % ("train" if train else "dev") + text_filepath = os.path.join(tmp_dir, filename + ".text") + tags_filepath = os.path.join(tmp_dir, filename + ".tags") + return character_generator(text_filepath, tags_filepath, character_vocab, EOS) + + +def tabbed_parsing_token_generator(tmp_dir, train, prefix, + source_vocab_size, target_vocab_size): + """Generate source and target data from a single file.""" + source_vocab = generator_utils.get_or_generate_tabbed_vocab( + tmp_dir, "parsing_train.pairs", 0, + prefix + "_source.tokens.vocab.%d" % source_vocab_size, + source_vocab_size) + target_vocab = generator_utils.get_or_generate_tabbed_vocab( + tmp_dir, "parsing_train.pairs", 1, + prefix + "_target.tokens.vocab.%d" % target_vocab_size, + target_vocab_size) + filename = "parsing_%s" % ("train" if train else "dev") + pair_filepath = os.path.join(tmp_dir, filename + ".pairs") + return tabbed_generator(pair_filepath, source_vocab, target_vocab, EOS) + + +def tabbed_parsing_character_generator(tmp_dir, train): + """Generate source and target data from a single file.""" + character_vocab = text_encoder.ByteTextEncoder() + filename = "parsing_%s" % ("train" if train else "dev") + pair_filepath = os.path.join(tmp_dir, filename + ".pairs") + return tabbed_generator(pair_filepath, character_vocab, character_vocab, EOS) + + def parsing_token_generator(tmp_dir, train, vocab_size): symbolizer_vocab = generator_utils.get_or_generate_vocab( tmp_dir, "tokens.vocab.%d" % vocab_size, vocab_size) diff --git a/tensor2tensor/models/common_hparams.py b/tensor2tensor/models/common_hparams.py index e1568f0e1..f067b724e 100644 --- a/tensor2tensor/models/common_hparams.py +++ b/tensor2tensor/models/common_hparams.py @@ -61,6 +61,7 @@ def basic_params1(): weight_noise=0.0, learning_rate_decay_scheme="none", learning_rate_warmup_steps=100, + learning_rate_cosine_cycle_steps=250000, learning_rate=0.1, sampling_method="argmax", # "argmax" or "random" problem_choice="adaptive", # "uniform", "adaptive", "distributed" @@ -202,4 +203,9 @@ def basic_range1(ranged_hparams): rhp.set_float("optimizer_adam_beta1", 0.8, 0.9) rhp.set_float("optimizer_adam_beta2", 0.995, 0.999) rhp.set_categorical("optimizer", - ["Adam", "Adagrad", "Momentum", "RMSProp", "SGD"]) + ["Adam", + "Adagrad", + "Momentum", + "RMSProp", + "SGD", + "YellowFin"]) diff --git a/tensor2tensor/models/common_layers.py b/tensor2tensor/models/common_layers.py index 3ff41bebc..1e7050570 100644 --- a/tensor2tensor/models/common_layers.py +++ b/tensor2tensor/models/common_layers.py @@ -58,9 +58,15 @@ def inverse_exp_decay(max_step, min_value=0.01): return inv_base**tf.maximum(float(max_step) - step, 0.0) -def shakeshake2_py(x, y, equal=False): +def shakeshake2_py(x, y, equal=False, individual=False): """The shake-shake sum of 2 tensors, python version.""" - alpha = 0.5 if equal else tf.random_uniform([]) + if equal: + alpha = 0.5 + if individual: + alpha = tf.random_uniform(tf.get_shape(x)[:1]) + else: + alpha = tf.random_uniform([]) + return alpha * x + (1.0 - alpha) * y @@ -72,6 +78,14 @@ def shakeshake2_grad(x1, x2, dy): return dx +@function.Defun() +def shakeshake2_indiv_grad(x1, x2, dy): + """Overriding gradient for shake-shake of 2 tensors.""" + y = shakeshake2_py(x1, x2, individual=True) + dx = tf.gradients(ys=[y], xs=[x1, x2], grad_ys=[dy]) + return dx + + @function.Defun() def shakeshake2_equal_grad(x1, x2, dy): """Overriding gradient for shake-shake of 2 tensors.""" @@ -86,6 +100,11 @@ def shakeshake2(x1, x2): return shakeshake2_py(x1, x2) +@function.Defun(grad_func=shakeshake2_indiv_grad) +def shakeshake2_indiv(x1, x2): + return shakeshake2_py(x1, x2, individual=True) + + @function.Defun(grad_func=shakeshake2_equal_grad) def shakeshake2_eqgrad(x1, x2): """The shake-shake function with a different alpha for forward/backward.""" diff --git a/tensor2tensor/models/models.py b/tensor2tensor/models/models.py index 594b605ad..0ca11996e 100644 --- a/tensor2tensor/models/models.py +++ b/tensor2tensor/models/models.py @@ -30,6 +30,7 @@ from tensor2tensor.models import modalities from tensor2tensor.models import multimodel from tensor2tensor.models import neural_gpu +from tensor2tensor.models import shake_shake from tensor2tensor.models import slicenet from tensor2tensor.models import transformer from tensor2tensor.models import transformer_alternative diff --git a/tensor2tensor/models/shake_shake.py b/tensor2tensor/models/shake_shake.py new file mode 100644 index 000000000..26d43afb3 --- /dev/null +++ b/tensor2tensor/models/shake_shake.py @@ -0,0 +1,159 @@ +# Copyright 2017 The Tensor2Tensor Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Shake-shake model for CIFAR.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +# Dependency imports + +from six.moves import xrange # pylint: disable=redefined-builtin + +from tensor2tensor.models import common_hparams +from tensor2tensor.models import common_layers +from tensor2tensor.utils import registry +from tensor2tensor.utils import t2t_model + +import tensorflow as tf + + +def shake_shake_block_branch(x, conv_filters, stride): + x = tf.nn.relu(x) + x = tf.layers.conv2d( + x, conv_filters, (3, 3), strides=(stride, stride), padding="SAME") + x = tf.layers.batch_normalization(x) + x = tf.nn.relu(x) + x = tf.layers.conv2d(x, conv_filters, (3, 3), strides=(1, 1), padding="SAME") + x = tf.layers.batch_normalization(x) + return x + + +def downsampling_residual_branch(x, conv_filters): + x = tf.nn.relu(x) + x1 = tf.layers.average_pooling2d(x, pool_size=(1, 1), strides=(2, 2)) + x1 = tf.layers.conv2d(x1, conv_filters / 2, (1, 1), padding="SAME") + x2 = tf.pad(x[:, 1:, 1:], [[0, 0], [0, 1], [0, 1], [0, 0]]) + x2 = tf.layers.average_pooling2d(x2, pool_size=(1, 1), strides=(2, 2)) + x2 = tf.layers.conv2d(x2, conv_filters / 2, (1, 1), padding="SAME") + return tf.concat([x1, x2], axis=3) + + +def shake_shake_block(x, conv_filters, stride, hparams): + """A shake-shake block.""" + with tf.variable_scope("branch_1"): + branch1 = shake_shake_block_branch(x, conv_filters, stride) + with tf.variable_scope("branch_2"): + branch2 = shake_shake_block_branch(x, conv_filters, stride) + if x.shape[-1] == conv_filters: + skip = tf.identity(x) + else: + skip = downsampling_residual_branch(x, conv_filters) + + # TODO(rshin): Use different alpha for each image in batch. + if hparams.mode == tf.contrib.learn.ModeKeys.TRAIN: + if hparams.shakeshake_type == "batch": + shaken = common_layers.shakeshake2(branch1, branch2) + elif hparams.shakeshake_type == "image": + shaken = common_layers.shakeshake2_indiv(branch1, branch2) + elif hparams.shakeshake_type == "equal": + shaken = common_layers.shakeshake2_py(branch1, branch2, equal=True) + else: + raise ValueError("Invalid shakeshake_type: {!r}".format(shaken)) + else: + shaken = common_layers.shakeshake2_py(branch1, branch2, equal=True) + shaken.set_shape(branch1.get_shape()) + + return skip + shaken + + +def shake_shake_stage(x, num_blocks, conv_filters, initial_stride, hparams): + with tf.variable_scope("block_0"): + x = shake_shake_block(x, conv_filters, initial_stride, hparams) + for i in xrange(1, num_blocks): + with tf.variable_scope("block_{}".format(i)): + x = shake_shake_block(x, conv_filters, 1, hparams) + return x + + +@registry.register_model +class ShakeShake(t2t_model.T2TModel): + """Implements the Shake-Shake architecture. + + From <https://arxiv.org/pdf/1705.07485.pdf> + This is intended to match the CIFAR-10 version, and correspond to + "Shake-Shake-Batch" in Table 1. + """ + + def model_fn_body(self, features): + hparams = self._hparams + print(hparams.learning_rate) + + inputs = features["inputs"] + assert (hparams.num_hidden_layers - 2) % 6 == 0 + blocks_per_stage = (hparams.num_hidden_layers - 2) // 6 + + # For canonical Shake-Shake, the entry flow is a 3x3 convolution with 16 + # filters then a batch norm. Instead we will rely on the one in + # SmallImageModality, which seems to instead use a layer norm. + x = inputs + with tf.variable_scope("shake_shake_stage_1"): + x = shake_shake_stage(x, blocks_per_stage, hparams.base_filters, 1, + hparams) + with tf.variable_scope("shake_shake_stage_2"): + x = shake_shake_stage(x, blocks_per_stage, hparams.base_filters * 2, 2, + hparams) + with tf.variable_scope("shake_shake_stage_3"): + x = shake_shake_stage(x, blocks_per_stage, hparams.base_filters * 4, 2, + hparams) + + # For canonical Shake-Shake, we should perform 8x8 average pooling and then + # have a fully-connected layer (which produces the logits for each class). + # Instead, we rely on the Xception exit flow in ClassLabelModality. + # + # Also, this model_fn does not return an extra_loss. However, TensorBoard + # reports an exponential moving average for extra_loss, where the initial + # value for the moving average may be a large number, so extra_loss will + # look large at the beginning of training. + return x + + +@registry.register_hparams +def shakeshake_cifar10(): + """Parameters for CIFAR-10.""" + hparams = common_hparams.basic_params1() + # This leads to effective batch size 128 when number of GPUs is 1 + hparams.batch_size = 4096 * 8 + hparams.hidden_size = 16 + hparams.dropout = 0 + hparams.label_smoothing = 0.0 + hparams.clip_grad_norm = 2.0 + hparams.num_hidden_layers = 26 + hparams.kernel_height = -1 # Unused + hparams.kernel_width = -1 # Unused + hparams.learning_rate_decay_scheme = "cosine" + # Model should be run for 700000 steps with batch size 128 (~1800 epochs) + hparams.learning_rate_cosine_cycle_steps = 700000 + hparams.learning_rate = 0.2 + hparams.learning_rate_warmup_steps = 3000 + hparams.initializer = "uniform_unit_scaling" + hparams.initializer_gain = 1.0 + # TODO(rshin): Adjust so that effective value becomes ~1e-4 + hparams.weight_decay = 3.0 + hparams.optimizer = "Momentum" + hparams.optimizer_momentum_momentum = 0.9 + hparams.add_hparam("base_filters", 16) + hparams.add_hparam("shakeshake_type", "batch") + return hparams diff --git a/tensor2tensor/models/transformer.py b/tensor2tensor/models/transformer.py index b9212e0f3..b341d6fe0 100644 --- a/tensor2tensor/models/transformer.py +++ b/tensor2tensor/models/transformer.py @@ -366,6 +366,15 @@ def transformer_parsing_big(): return hparams +@registry.register_hparams +def transformer_parsing_ice(): + """Hparams for parsing Icelandic text.""" + hparams = transformer_base_single_gpu() + hparams.batch_size = 4096 + hparams.shared_embedding_and_softmax_weights = int(False) + return hparams + + @registry.register_hparams def transformer_tiny(): hparams = transformer_base() diff --git a/tensor2tensor/utils/trainer_utils.py b/tensor2tensor/utils/trainer_utils.py index a0289421b..b5894904d 100644 --- a/tensor2tensor/utils/trainer_utils.py +++ b/tensor2tensor/utils/trainer_utils.py @@ -30,16 +30,17 @@ # pylint: disable=redefined-builtin from six.moves import input from six.moves import xrange -from six.moves import zip # pylint: enable=redefined-builtin from tensor2tensor.data_generators import all_problems # pylint: disable=unused-import from tensor2tensor.data_generators import problem_hparams +from tensor2tensor.data_generators import text_encoder from tensor2tensor.models import models # pylint: disable=unused-import from tensor2tensor.utils import data_reader from tensor2tensor.utils import expert_utils as eu from tensor2tensor.utils import metrics from tensor2tensor.utils import registry +from tensor2tensor.utils import yellowfin import tensorflow as tf from tensorflow.contrib.learn.python.learn import learn_runner @@ -105,7 +106,6 @@ "In inference, use last position only for speedup.") flags.DEFINE_bool("decode_interactive", False, "Interactive local inference mode.") -flags.DEFINE_bool("decode_endless", False, "Run decoding endlessly. Temporary.") flags.DEFINE_bool("decode_save_images", False, "Save inference input images.") flags.DEFINE_string("decode_from_file", None, "Path to decode file") flags.DEFINE_string("decode_to_file", None, "Path to inference output file") @@ -123,6 +123,16 @@ "<beam1>\t<beam2>..\t<input>") +def _save_until_eos(hyp): + """Strips everything after the first <EOS> token, which is normally 1.""" + try: + index = list(hyp).index(text_encoder.EOS_TOKEN) + return hyp[0:index] + except ValueError: + # No EOS_TOKEN: return the array as-is. + return hyp + + def make_experiment_fn(data_dir, model_name, train_steps, eval_steps): """Returns experiment_fn for learn_runner. Wraps create_experiment.""" @@ -334,6 +344,9 @@ def learning_rate_decay(): (step + 1) * warmup_steps**-1.5, (step + 1)**-0.5) elif hparams.learning_rate_decay_scheme == "exp100k": return 0.94**(step // 100000) + elif hparams.learning_rate_decay_scheme == "cosine": + cycle_steps = hparams.learning_rate_cosine_cycle_steps + return 0.5 * (1 + tf.cos(np.pi * (step % cycle_steps) / cycle_steps)) inv_base = tf.exp(tf.log(0.01) / warmup_steps) inv_decay = inv_base**(warmup_steps - step) @@ -370,10 +383,11 @@ def model_fn(features, targets, mode): Returns: A tuple consisting of the prediction, loss, and train_op. """ - if mode == tf.contrib.learn.ModeKeys.INFER and FLAGS.decode_interactive: - features = _interactive_input_tensor_to_features_dict(features, hparams) - if mode == tf.contrib.learn.ModeKeys.INFER and FLAGS.decode_from_file: - features = _decode_input_tensor_to_features_dict(features, hparams) + if mode == tf.contrib.learn.ModeKeys.INFER: + if FLAGS.decode_interactive: + features = _interactive_input_tensor_to_features_dict(features, hparams) + elif FLAGS.decode_from_file: + features = _decode_input_tensor_to_features_dict(features, hparams) # A dictionary containing: # - problem_choice: A Tensor containing an integer indicating which problem # was selected for this run. @@ -578,7 +592,7 @@ def decode_from_dataset(estimator): num_datashards=data_parallelism().n, fixed_problem=i) result_iter = estimator.predict( - input_fn=infer_input_fn, as_iterable=FLAGS.decode_endless) + input_fn=infer_input_fn, as_iterable=False) def log_fn(inputs, targets, @@ -593,12 +607,14 @@ def log_fn(inputs, "%s_prediction_%d.jpg" % (problem, j)) show_and_save_image(inputs / 255., save_path) elif inputs_vocab: - decoded_inputs = inputs_vocab.decode(inputs.flatten()) + decoded_inputs = inputs_vocab.decode(_save_until_eos(inputs.flatten())) tf.logging.info("Inference results INPUT: %s" % decoded_inputs) - decoded_outputs = targets_vocab.decode(outputs.flatten()) - decoded_targets = targets_vocab.decode(targets.flatten()) + decoded_outputs = targets_vocab.decode(_save_until_eos(outputs.flatten())) tf.logging.info("Inference results OUTPUT: %s" % decoded_outputs) + decoded_targets = targets_vocab.decode(_save_until_eos(targets.flatten())) + tf.logging.info("Inference results TARGET: %s" % decoded_targets) + if FLAGS.decode_to_file: output_filepath = FLAGS.decode_to_file + ".outputs." + problem output_file = tf.gfile.Open(output_filepath, "a") @@ -608,32 +624,17 @@ def log_fn(inputs, target_file.write(decoded_targets + "\n") # The function predict() returns an iterable over the network's - # predictions from the test input. if FLAGS.decode_endless is set, it will - # decode over the dev set endlessly, looping over it. We use the returned - # iterator to log inputs and decodes. - if FLAGS.decode_endless: - tf.logging.info("Warning: Decoding endlessly") - for j, result in enumerate(result_iter): - inputs, targets, outputs = (result["inputs"], result["targets"], - result["outputs"]) - if FLAGS.decode_return_beams: - output_beams = np.split(outputs, FLAGS.decode_beam_size, axis=0) - for k, beam in enumerate(output_beams): - tf.logging.info("BEAM %d:" % k) - log_fn(inputs, targets, beam, problem, j) - else: - log_fn(inputs, targets, outputs, problem, j) - else: - for j, (inputs, targets, outputs) in enumerate( - zip(result_iter["inputs"], result_iter["targets"], result_iter[ - "outputs"])): - if FLAGS.decode_return_beams: - output_beams = np.split(outputs, FLAGS.decode_beam_size, axis=0) - for k, beam in enumerate(output_beams): - tf.logging.info("BEAM %d:" % k) - log_fn(inputs, targets, beam, problem, j) - else: - log_fn(inputs, targets, outputs, problem, j) + # predictions from the test input. We use it to log inputs and decodes. + for j, result in enumerate(result_iter): + inputs, targets, outputs = (result["inputs"], result["targets"], + result["outputs"]) + if FLAGS.decode_return_beams: + output_beams = np.split(outputs, FLAGS.decode_beam_size, axis=0) + for k, beam in enumerate(output_beams): + tf.logging.info("BEAM %d:" % k) + log_fn(inputs, targets, beam, problem, j) + else: + log_fn(inputs, targets, outputs, problem, j) def decode_from_file(estimator, filename): @@ -642,22 +643,12 @@ def decode_from_file(estimator, filename): problem_id = FLAGS.decode_problem_id inputs_vocab = hparams.problems[problem_id].vocabulary["inputs"] targets_vocab = hparams.problems[problem_id].vocabulary["targets"] - tf.logging.info("Performing Decoding from a file.") + tf.logging.info("Performing decoding from a file.") sorted_inputs, sorted_keys = _get_sorted_inputs(filename) num_decode_batches = (len(sorted_inputs) - 1) // FLAGS.decode_batch_size + 1 input_fn = _decode_batch_input_fn(problem_id, num_decode_batches, sorted_inputs, inputs_vocab) - # strips everything after the first <EOS> id, which is assumed to be 1 - def _save_until_eos(hyp): # pylint: disable=missing-docstring - ret = [] - index = 0 - # until you reach <EOS> id - while index < len(hyp) and hyp[index] != 1: - ret.append(hyp[index]) - index += 1 - return np.array(ret) - decodes = [] for _ in range(num_decode_batches): result_iter = estimator.predict( @@ -681,7 +672,7 @@ def log_fn(inputs, outputs): for k, beam in enumerate(output_beams): tf.logging.info("BEAM %d:" % k) beam_decodes.append(log_fn(result["inputs"], beam)) - decodes.append(str.join("\t", beam_decodes)) + decodes.append("\t".join(beam_decodes)) else: decodes.append(log_fn(result["inputs"], result["outputs"])) @@ -721,13 +712,14 @@ def decode_interactively(estimator): scores = np.split(result["scores"], FLAGS.decode_beam_size, axis=0) for k, beam in enumerate(beams): tf.logging.info("BEAM %d:" % k) + beam_string = targets_vocab.decode(_save_until_eos(beam.flatten())) if scores is not None: - tf.logging.info("%s\tScore:%f" % - (targets_vocab.decode(beam.flatten()), scores[k])) + tf.logging.info("%s\tScore:%f" % (beam_string, scores[k])) else: - tf.logging.info(targets_vocab.decode(beam.flatten())) + tf.logging.info(beam_string) else: - tf.logging.info(targets_vocab.decode(result["outputs"].flatten())) + tf.logging.info(targets_vocab.decode(_save_until_eos( + result["outputs"].flatten()))) def _decode_batch_input_fn(problem_id, num_decode_batches, sorted_inputs, @@ -737,13 +729,13 @@ def _decode_batch_input_fn(problem_id, num_decode_batches, sorted_inputs, # you'll see it in the first batch sorted_inputs.reverse() for b in range(num_decode_batches): - tf.logging.info("Deocding batch %d" % b) + tf.logging.info("Decoding batch %d" % b) batch_length = 0 batch_inputs = [] - for inputs in sorted_inputs[b * FLAGS.decode_batch_size:( - b + 1) * FLAGS.decode_batch_size]: + for inputs in sorted_inputs[b * FLAGS.decode_batch_size: + (b + 1) * FLAGS.decode_batch_size]: input_ids = vocabulary.encode(inputs) - input_ids.append(1) # Assuming EOS=1. + input_ids.append(text_encoder.EOS_TOKEN) batch_inputs.append(input_ids) if len(input_ids) > batch_length: batch_length = len(input_ids) @@ -836,7 +828,7 @@ def _interactive_input_fn(hparams): if input_type == "text": input_ids = vocabulary.encode(input_string) if has_input: - input_ids.append(1) # assume 1 means end-of-source + input_ids.append(text_encoder.EOS_TOKEN) x = [num_samples, decode_length, len(input_ids)] + input_ids assert len(x) < const_array_size x += [0] * (const_array_size - len(x)) @@ -1103,7 +1095,7 @@ def input_fn(): problem_choice = tf.to_int32(FLAGS.worker_id % problem_count) else: raise ValueError("Value of hparams.problem_choice is %s and must be " - "one of [uniform, adaptive, distributed]", + "one of [uniform, adaptive, distributed]" % hparams.problem_choice) # Inputs and targets conditional on problem_choice. @@ -1155,6 +1147,10 @@ def __init__(self, optimizer_name, lr, hparams): elif optimizer_name == "Momentum": self._opt = tf.train.MomentumOptimizer( lr, momentum=hparams.optimizer_momentum_momentum) + elif optimizer_name == "YellowFin": + tf.logging.info("Init YellowFin Optimizer.") + self._opt = yellowfin.YellowFinOptimizer( + learning_rate=lr, momentum=hparams.optimizer_momentum_momentum) else: self._opt = tf.contrib.layers.OPTIMIZER_CLS_NAMES[optimizer_name](lr) diff --git a/tensor2tensor/utils/yellowfin.py b/tensor2tensor/utils/yellowfin.py new file mode 100644 index 000000000..6bbe31bf6 --- /dev/null +++ b/tensor2tensor/utils/yellowfin.py @@ -0,0 +1,535 @@ +# Copyright 2017 The Tensor2Tensor Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""YellowFin for TensorFlow. Thanks Jian Zhang: zjian [@] stanford [.] edu.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +# Dependency imports + +import numpy as np +import tensorflow as tf +from tensorflow.python.framework import ops + + +# Values for gate_gradients. +GATE_NONE = 0 +GATE_OP = 1 +GATE_GRAPH = 2 + + +class YellowFinOptimizer(tf.train.Optimizer): + """Optimizer that implements the YellowFin algorithm. + + See [Zhang et. al., 2017](https://arxiv.org/abs/1706.03471) for details. + """ + + def __init__(self, + learning_rate=1.0, + momentum=0.0, + clip_thresh=None, + beta=0.999, + curvature_window_width=20, + zero_debias=True, + delta_mu=0.0): + """Construct a new YellowFin optimizer. + + Args: + learning_rate: A Tensor or a floating point value. The learning rate. + momentum: A Tensor or a floating point value. The momentum. + clip_thresh: A Tensor or a floating point value. The cliping threshold for + tf.clip_by_global_norm. If None, no clipping will be carried out. + beta: A float value or a constant float tensor. The smoothing parameter + for estimations. + curvature_window_width: A int value or a constant int tensor. + The curvature window width. + zero_debias: A boolean, zero debias moving-averages. + delta_mu: For extensions. Not necessary in the basic use. + + Note: + clip_thresh is the threshold value on ||lr * gradient||, + delta_mu can be place holder/variable/tensor scalar. + They are used for additional momentum in situations such as + asynchronous-parallel training. + The default is 0.0(or None) for basic usage of the optimizer. + + Other features: + If you want to manually control the learning rates, self.lr_factor is + an interface to the outside, it is an multiplier for the internal + learning rate in YellowFin. It is helpful when you want to do additional + hand tuning or some decaying scheme to the tuned learning rate in + YellowFin. + Example on using lr_factor can be found here: + https://github.com/JianGoForIt/YellowFin/blob/master/char-rnn-tensorflow/train_YF.py#L140 + """ + # Set lr and mu + self._lr = learning_rate + self._mu = momentum + + # Set lr and mu tensor. + self._lr_var = tf.Variable(learning_rate, + dtype=tf.float32, + name="YF_lr", + trainable=False) + self._mu_var = tf.Variable(momentum, + dtype=tf.float32, + name="YF_mu", + trainable=False) + + # Tuning factor for learning rates step or decaying scheme. + self.lr_factor = tf.Variable(1.0, + dtype=tf.float32, + name="YF_lr_factor", + trainable=False) + + # Gradient Clipping Threshold. + if clip_thresh is not None: + self._clip_thresh_var = tf.Variable(clip_thresh, + dtype=tf.float32, + name="YF_clip_thresh", + trainable=False) + else: + self._clip_thresh_var = None + + # Set initial lr and mu for momentum. + self._lr_m = self._lr_var * self.lr_factor + self._mu_m = self._mu_var + delta_mu + + # Init momentum optimizer. + self._momentum_optimizer = tf.train.MomentumOptimizer( + self._lr_m, self._mu_m) + + # Moving average for statistics. + self._beta = beta + self._moving_averager = None + + # Step counting. + self._step = tf.Variable(0, + dtype=tf.int32, + name="YF_step", + trainable=False) + # YF_step + 1 op. + self._increment_step_op = None + + # For conditional tuning. + self._do_tune = tf.greater(self._step, tf.constant(0)) + + # Moving-averages. + self._zero_debias = zero_debias + + # For curvature range. + self.curvature_window_width = curvature_window_width + self._curv_win = None + + # Gradients and Variables. + self._grad = None + self._vars = None + + # Get per var g**2, norm**2 and mean(norm**2). + self._grad_squared = None + self._grad_norm_squared = None + self._grad_norm_squared_avg = None + + # Mean(grad) and Mean(grad**2) to compute Variance. + self._grad_avg = None + self._grad_avg_squared = None + + # Max and Min curvature variations. + self._h_max_t = None + self._h_min_t = None + self._h_min = None + self._h_max = None + + # Gradient Expected Variance. + self._grad_var = None + + # Gradient Norm and Mean(Gradient Norm). + self._grad_norm = None + self._grad_norm_avg = None + + # Distance to optimum and Mean(Distance to optimum). + self._d_t = None + self._dist_to_opt_avg = None + + # Maintains moving averages of variables + # by employing an exponential decay(Beta), + # and (zero_devias) moving-averages. + self._moving_averager = None + + def _curvature_range(self): + """Curvature range. + + Returns: + h_max_t, h_min_t ops + """ + self._curv_win = tf.Variable(np.zeros([self.curvature_window_width,]), + dtype=tf.float32, + name="curv_win", + trainable=False) + + self._curv_win = tf.scatter_update(self._curv_win, + self._step % self.curvature_window_width, + self._grad_norm_squared) + # Note here the iterations start from iteration 0 + valid_window = tf.slice(self._curv_win, + tf.constant([0,]), + tf.expand_dims( + tf.minimum( + tf.constant(self.curvature_window_width), + self._step + 1), axis=0)) + self._h_min_t = tf.reduce_min(valid_window) + self._h_max_t = tf.reduce_max(valid_window) + + curv_range_ops = [] + with tf.control_dependencies([self._h_min_t, self._h_max_t]): + avg_op = self._moving_averager.apply([self._h_min_t, self._h_max_t]) + with tf.control_dependencies([avg_op]): + self._h_min = tf.identity(self._moving_averager.average(self._h_min_t)) + self._h_max = tf.identity(self._moving_averager.average(self._h_max_t)) + curv_range_ops.append(avg_op) + return curv_range_ops # h_max_t, h_min_t + + def _grad_variance(self): + """Estimate of gradient Variance. + + Returns: + C_t ops. + """ + grad_var_ops = [] + tensor_to_avg = [] + for t, g in zip(self._vars, self._grad): + if isinstance(g, tf.IndexedSlices): + tensor_to_avg.append( + tf.reshape(tf.unsorted_segment_sum(g.values, + g.indices, + g.dense_shape[0]), + shape=t.get_shape())) + else: + tensor_to_avg.append(g) + avg_op = self._moving_averager.apply(tensor_to_avg) + grad_var_ops.append(avg_op) + with tf.control_dependencies([avg_op]): + self._grad_avg = [self._moving_averager.average(val) + for val in tensor_to_avg] + self._grad_avg_squared = [tf.square(val) for val in self._grad_avg] + self._grad_avg_squared = tf.add_n([tf.reduce_sum(val) + for val in self._grad_avg_squared]) + # Compute Variance + self._grad_var = self._grad_norm_squared_avg - self._grad_avg_squared + return grad_var_ops # C_t + + def _dist_to_opt(self): + """Distance to optimum. + + Returns: + D_t ops + """ + dist_to_opt_ops = [] + # Running average of the norm of gradeint + self._grad_norm = tf.sqrt(self._grad_norm_squared) + avg_op = self._moving_averager.apply([self._grad_norm,]) + dist_to_opt_ops.append(avg_op) + with tf.control_dependencies([avg_op]): + self._grad_norm_avg = self._moving_averager.average(self._grad_norm) + # Single iteration distance estimation, note here + # self._grad_norm_avg is per variable + self._d_t = self._grad_norm_avg / self._grad_norm_squared_avg + # Running average of distance + avg_op = self._moving_averager.apply([self._d_t]) + dist_to_opt_ops.append(avg_op) + with tf.control_dependencies([avg_op]): + self._dist_to_opt_avg = tf.identity( + self._moving_averager.average(self._d_t)) + return dist_to_opt_ops # D_t + + def _prepare_variables(self): + """Prepare Variables for YellowFin. + + Returns: + Grad**2, Norm, Norm**2, Mean(Norm**2) ops + """ + self._moving_averager = tf.train.ExponentialMovingAverage( + decay=self._beta, zero_debias=self._zero_debias) + assert self._grad + # List for the returned Operations + prepare_variables_op = [] + + # Get per var g**2 and norm**2 + self._grad_squared = [] + self._grad_norm_squared = [] + + # Gradient squared + for v, g in zip(self._vars, self._grad): + if g is None: continue + with ops.colocate_with(v): + self._grad_squared.append(tf.square(g)) + + # Norm squared. + self._grad_norm_squared = [tf.reduce_sum(g_sq) + for g_sq in self._grad_squared] + + # The following running average on squared norm of gradient + # is shared by grad_var and dist_to_opt + avg_op = self._moving_averager.apply(self._grad_norm_squared) + + with tf.control_dependencies([avg_op]): + self._grad_norm_squared_avg = [self._moving_averager.average(val) + for val in self._grad_norm_squared] + self._grad_norm_squared = tf.add_n(self._grad_norm_squared) + self._grad_norm_squared_avg = tf.add_n(self._grad_norm_squared_avg) + + prepare_variables_op.append(avg_op) + return tf.group(*prepare_variables_op) + + def _get_lr_tensor(self): + """Get lr minimzing the surrogate. + + Returns: + The lr_t. + """ + lr = (1.0 - tf.sqrt(self._mu))**2 / self._h_min + return lr + + def _get_mu_tensor(self): + """Get the min mu which minimize the surrogate. + + Returns: + The mu_t. + """ + const_fact = self._dist_to_opt_avg**2 * self._h_min**2 / 2 / self._grad_var + coef = tf.Variable([-1.0, 3.0, 0.0, 1.0], + dtype=tf.float32, + name="cubic_solver_coef") + coef = tf.scatter_update(coef, + tf.constant(2), + -(3 + const_fact)) + roots = tf.py_func(np.roots, + [coef], + Tout=tf.complex64, + stateful=False) + + # Filter out the correct root + root_idx = tf.logical_and( + tf.logical_and( + tf.greater(tf.real(roots), tf.constant(0.0)), + tf.less(tf.real(roots), tf.constant(1.0))), + tf.less(tf.abs(tf.imag(roots)), 1e-5)) + + # In case there are two duplicated roots satisfying the above condition + root = tf.reshape(tf.gather(tf.gather(roots, tf.where(root_idx)), + tf.constant(0)), + shape=[]) + + dr = self._h_max / self._h_min + mu = tf.maximum(tf.real(root)**2, ((tf.sqrt(dr) - 1)/(tf.sqrt(dr) + 1))**2) + return mu + + def _yellowfin(self): + """YellowFin auto-tuning optimizer based on momentum SGD. + + Returns: + YF ops + (Curvature range, + Grad_variance, + Dist_to_opt, + Single-Step, + Auto-Tuning) + """ + # List for the returned Operations. + yellowfin_ops = [] + + # Curvature range ops. + curv_range_ops = self._curvature_range() + yellowfin_ops += curv_range_ops + # Estimate of gradient Variance ops. + grad_var_ops = self._grad_variance() + yellowfin_ops += grad_var_ops + # Distance to optimum ops. + dist_to_opt_ops = self._dist_to_opt() + yellowfin_ops += dist_to_opt_ops + + # Single-Step: minimizes the surrogate for the expected + # squared distance from the optimum of a local quadratic + # approximation after a single step while keeping all directions in the + # robust region. + self._mu = tf.identity(tf.cond(self._do_tune, self._get_mu_tensor, + lambda: self._mu_var)) + with tf.control_dependencies([self._mu]): + self._lr = tf.identity(tf.cond(self._do_tune, + self._get_lr_tensor, + lambda: self._lr_var)) + + # Tune learning rate and momentum. + with tf.control_dependencies([self._mu, self._lr]): + self._mu = self._beta * self._mu_var + (1 - self._beta) * self._mu + self._lr = self._beta * self._lr_var + (1 - self._beta) * self._lr + yellowfin_ops.append(tf.assign(self._mu_var, self._mu)) + yellowfin_ops.append(tf.assign(self._lr_var, self._lr)) + + yellowfin_ops = tf.group(*yellowfin_ops) + return yellowfin_ops + + def apply_gradients(self, grads_and_vars, global_step=None, name=None): + """Applying gradients aand tune hyperparams with YellowFin. + + Args: + grads_and_vars: List of (gradient, variable) pairs as returned by + compute_gradients(). + global_step: Optional Variable to increment by one after the + variables have been updated. + name: Optional name for the returned operation. Default to the + name passed to the Optimizer constructor. + + Returns: + (A group of operations) + Variable Update with Momentum ops, + YellowFin ops(Curvature, Variance, Distance) ops, + SingleStep and lr_mu tuning ops, + Step increment ops. + + """ + self._grad, self._vars = zip(*[(g, t) + for g, t in grads_and_vars if g is not None]) + + # Var update with Momentum. + with tf.variable_scope("apply_updates"): + # Gradient Clipping? + if self._clip_thresh_var is not None: + self._grads_clip, self._grads_norm = tf.clip_by_global_norm( + self._grad, self._clip_thresh_var) + + apply_grad_op = self._momentum_optimizer.apply_gradients( + zip(self._grads_clip, self._vars), global_step=global_step) + else: + apply_grad_op = self._momentum_optimizer.apply_gradients( + zip(self._grad, self._vars), global_step=global_step) + + # Begin lr and mu tuning. + with tf.variable_scope("prepare_yellowFin_variables"): + prepare_variables_op = self._prepare_variables() + + with tf.variable_scope("yellowfin"): + with tf.control_dependencies([prepare_variables_op]): + yellowfin_op = self._yellowfin() + + # Update YellowFin step variable. + with tf.control_dependencies([yellowfin_op]): + self._increment_step_op = tf.assign_add(self._step, 1).op + + return tf.group(apply_grad_op, + prepare_variables_op, + yellowfin_op, + self._increment_step_op) + + def compute_gradients(self, + loss, + var_list, + global_step=None, + gate_gradients=GATE_OP, + aggregation_method=None, + colocate_gradients_with_ops=False, + name=None, + grad_loss=None): + """Compute gradients through momentum optimizer. + + Args: + loss: A Tensor containing the value to minimize. + var_list: Optional list or tuple of tf.Variable to update + to minimize loss. Defaults to the list of variables collected + in the graph under the key GraphKey.TRAINABLE_VARIABLES. + global_step: Optional Variable to increment by one after the + variables have been updated. + gate_gradients: How to gate the computation of gradients. + Can be GATE_NONE, GATE_OP, or GATE_GRAPH. + aggregation_method: Specifies the method used to combine + gradient terms. Valid values are defined in the class AggregationMethod. + colocate_gradients_with_ops: If True, try colocating gradients with + the corresponding op. + name: Optional name for the returned operation. Default to the name + passed to the Optimizer constructor. + grad_loss: Optional. A Tensor holding the gradient computed for loss. + + Returns: + A list of (gradient, variable) pairs. Variable is always present, + but gradient can be None. + """ + return self._momentum_optimizer.compute_gradients( + loss, + var_list=var_list, + gate_gradients=gate_gradients, + aggregation_method=aggregation_method, + colocate_gradients_with_ops=colocate_gradients_with_ops, + grad_loss=grad_loss) + + def minimize(self, + loss, + global_step=None, + var_list=None, + gate_gradients=GATE_OP, + aggregation_method=None, + colocate_gradients_with_ops=False, + name=None, + grad_loss=None): + """Adapted from Tensorflow Optimizer base class member function. + + Add operations to minimize `loss` by updating `var_list`. + This method simply combines calls `compute_gradients()` and + `apply_gradients()`. If you want to process the gradient before applying + them call `tf.gradients()` and `self.apply_gradients()` explicitly instead + of using this function. + + Args: + loss: A Tensor containing the value to minimize. + global_step: Optional Variable to increment by one after the variables + have been updated. + var_list: Optional list or tuple of Variable objects to update to + minimize loss. Defaults to the list of variables collected in + the graph under the key GraphKeys.TRAINABLE_VARIABLES. + gate_gradients: How to gate the computation of gradients. + Can be GATE_NONE, GATE_OP, or GATE_GRAPH. + aggregation_method: Specifies the method used to combine gradient terms. + Valid values are defined in the class AggregationMethod. + colocate_gradients_with_ops: If True, try colocating gradients with + the corresponding op. + name: Optional name for the returned operation. + grad_loss: Optional. A Tensor holding the gradient computed for loss. + + Returns: + An Operation that updates the variables in var_list. + If global_step was not None, that operation also increments global_step. + + Raises: + ValueError: if no gradients are provided for any variable. + """ + grads_and_vars = self._optimizer.compute_gradients( + loss, + var_list=var_list, + gate_gradients=gate_gradients, + aggregation_method=aggregation_method, + colocate_gradients_with_ops=colocate_gradients_with_ops, + grad_loss=grad_loss) + + vars_with_grad = [v for g, v in grads_and_vars if g is not None] + if not vars_with_grad: + raise ValueError( + "No gradients provided for any variable, check your graph for ops" + " that do not support gradients, between variables %s and loss %s." % + ([str(v) for _, v in grads_and_vars], loss)) + for g, v in grads_and_vars: + print("g ", g) + print("v ", v) + + return self.apply_gradients(grads_and_vars, global_step=global_step) diff --git a/tensor2tensor/utils/yellowfin_test.py b/tensor2tensor/utils/yellowfin_test.py new file mode 100644 index 000000000..c4727175b --- /dev/null +++ b/tensor2tensor/utils/yellowfin_test.py @@ -0,0 +1,213 @@ +# Copyright 2017 The Tensor2Tensor Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""YellowFin Test Module for TensorFlow.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +# Dependency imports + +import numpy as np + +from tensor2tensor.utils.yellowfin import YellowFinOptimizer + +import tensorflow as tf + + +n_dim = 1000000 +n_iter = 0 + + +class YellowFinTest(tf.test.TestCase): + + def tuneEverything(self, x0squared, c, t, gmin, gmax): + # First tune based on dynamic range + if c == 0: + dr = gmax / gmin + mustar = ((np.sqrt(dr) - 1) / (np.sqrt(dr) + 1))**2 + alpha_star = (1 + np.sqrt(mustar))**2/gmax + + return alpha_star, mustar + + dist_to_opt = x0squared + grad_var = c + max_curv = gmax + min_curv = gmin + const_fact = dist_to_opt * min_curv**2 / 2 / grad_var + coef = [-1, 3, -(3 + const_fact), 1] + roots = np.roots(coef) + roots = roots[np.real(roots) > 0] + roots = roots[np.real(roots) < 1] + root = roots[np.argmin(np.imag(roots))] + + assert root > 0 and root < 1 and np.absolute(root.imag) < 1e-6 + + dr = max_curv / min_curv + assert max_curv >= min_curv + mu = max(((np.sqrt(dr) - 1) / (np.sqrt(dr) + 1))**2, root**2) + + lr_min = (1 - np.sqrt(mu))**2 / min_curv + + alpha_star = lr_min + mustar = mu + + return alpha_star, mustar + + def testMeasurement(self): + opt = YellowFinOptimizer(zero_debias=False) + w = tf.Variable(np.ones([n_dim,]), + dtype=tf.float32, + name="w", + trainable=True) + b = tf.Variable(np.ones([1,], dtype=np.float32), + dtype=tf.float32, + name="b", + trainable=True) + x = tf.constant(np.ones([n_dim,], dtype=np.float32), + dtype=tf.float32) + _ = tf.multiply(w, x) + b # loss + tvars = tf.trainable_variables() + + w_grad_val = tf.placeholder(tf.float32, shape=(n_dim,)) + b_grad_val = tf.placeholder(tf.float32, shape=(1,)) + apply_op = opt.apply_gradients(zip([w_grad_val, b_grad_val], tvars)) + + init_op = tf.global_variables_initializer() + with tf.Session() as sess: + sess.run(init_op) + target_h_max = 0.0 + target_h_min = 0.0 + g_norm_squared_avg = 0.0 + g_norm_avg = 0.0 + g_avg = 0.0 + target_dist = 0.0 + for i in range(n_iter): + feed_dict = {w_grad_val: (i + 1) * np.ones([n_dim,], dtype=np.float32), + b_grad_val: (i + 1) * np.ones([1,], dtype=np.float32)} + res = sess.run([opt._curv_win, + opt._h_max, + opt._h_min, + opt._grad_var, + opt._dist_to_opt_avg, + apply_op], feed_dict=feed_dict) + + g_norm_squared_avg = ( + 0.999 * g_norm_squared_avg + + 0.001 * np.sum(((i + 1) * np.ones([n_dim + 1,]))**2)) + g_norm_avg = (0.999 * g_norm_avg + + 0.001 * np.linalg.norm((i + 1)*np.ones([n_dim + 1,]))) + g_avg = 0.999 * g_avg + 0.001 * (i + 1) + + target_h_max = 0.999 * target_h_max + 0.001 * (i + 1)**2*(n_dim + 1) + target_h_min = (0.999 * target_h_min + + 0.001 * max(1, i + 2 - 20)**2 * (n_dim + 1)) + target_var = g_norm_squared_avg - g_avg**2 * (n_dim + 1) + target_dist = (0.999 * target_dist + + 0.001 * g_norm_avg / g_norm_squared_avg) + + assert np.abs(target_h_max - res[1]) < np.abs(target_h_max) * 1e-3 + assert np.abs(target_h_min - res[2]) < np.abs(target_h_min) * 1e-3 + assert np.abs(target_var - res[3]) < np.abs(res[3]) * 1e-3 + assert np.abs(target_dist - res[4]) < np.abs(res[4]) * 1e-3 + + def testLrMu(self): + opt = YellowFinOptimizer(learning_rate=0.5, momentum=0.5, zero_debias=False) + w = tf.Variable(np.ones([n_dim,]), + dtype=tf.float32, + name="w", + trainable=True) + b = tf.Variable(np.ones([1,], + dtype=np.float32), + dtype=tf.float32, + name="b", + trainable=True) + x = tf.constant(np.ones([n_dim,], dtype=np.float32), dtype=tf.float32) + _ = tf.multiply(w, x) + b # loss + tvars = tf.trainable_variables() + + w_grad_val = tf.Variable(np.zeros([n_dim,]), + dtype=tf.float32, + trainable=False) + b_grad_val = tf.Variable(np.zeros([1,]), + dtype=tf.float32, + trainable=False) + apply_op = opt.apply_gradients(zip([w_grad_val, b_grad_val], tvars)) + + init_op = tf.global_variables_initializer() + with tf.Session() as sess: + sess.run(init_op) + target_h_max = 0.0 + target_h_min = 0.0 + g_norm_squared_avg = 0.0 + g_norm_avg = 0.0 + g_avg = 0.0 + target_dist = 0.0 + target_lr = 0.5 + target_mu = 0.5 + for i in range(n_iter): + + sess.run(tf.assign(w_grad_val, (i + 1) * np.ones([n_dim,], + dtype=np.float32))) + sess.run(tf.assign(b_grad_val, (i + 1) * np.ones([1,], + dtype=np.float32))) + + res = sess.run([opt._curv_win, + opt._h_max, + opt._h_min, + opt._grad_var, + opt._dist_to_opt_avg, + opt._lr_var, + opt._mu_var, + apply_op]) + + res[5] = opt._lr_var.eval() + res[6] = opt._mu_var.eval() + + g_norm_squared_avg = ( + 0.999 * g_norm_squared_avg + + 0.001 * np.sum(((i + 1) * np.ones([n_dim + 1,]))**2)) + g_norm_avg = (0.999 * g_norm_avg + + 0.001 * np.linalg.norm((i + 1)*np.ones([n_dim + 1,]))) + g_avg = 0.999 * g_avg + 0.001 * (i + 1) + + target_h_max = 0.999 * target_h_max + 0.001 * (i + 1)**2 * (n_dim + 1) + target_h_min = (0.999 * target_h_min + + 0.001 * max(1, i + 2 - 20)**2 * (n_dim + 1)) + target_var = g_norm_squared_avg - g_avg**2 * (n_dim + 1) + target_dist = (0.999 * target_dist + + 0.001 * g_norm_avg / g_norm_squared_avg) + + if i > 0: + lr, mu = self.tuneEverything(target_dist**2, + target_var, + 1, + target_h_min, + target_h_max) + target_lr = 0.999 * target_lr + 0.001 * lr + target_mu = 0.999 * target_mu + 0.001 * mu + + assert np.abs(target_h_max - res[1]) < np.abs(target_h_max) * 1e-3 + assert np.abs(target_h_min - res[2]) < np.abs(target_h_min) * 1e-3 + assert np.abs(target_var - res[3]) < np.abs(res[3]) * 1e-3 + assert np.abs(target_dist - res[4]) < np.abs(res[4]) * 1e-3 + assert (target_lr == 0.0 or + (np.abs(target_lr - res[5]) < np.abs(res[5]) * 1e-3)) + assert (target_mu == 0.0 or + (np.abs(target_mu - res[6]) < np.abs(res[6]) * 5e-3)) + + +if __name__ == "__main__": + tf.test.main() From 1b1d7ed13fe71d5127cc5b50096ed548105e0113 Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Fri, 14 Jul 2017 16:35:49 -0700 Subject: [PATCH 0125/4095] Port WMT en-de tokens 8k/32k to new Problem registry PiperOrigin-RevId: 162025600 --- tensor2tensor/bin/t2t-datagen | 8 -- tensor2tensor/data_generators/algorithmic.py | 8 +- .../data_generators/generator_utils.py | 11 ++ tensor2tensor/data_generators/problem.py | 28 +++-- .../data_generators/problem_hparams.py | 23 ---- tensor2tensor/data_generators/wmt.py | 106 ++++++++++++------ tensor2tensor/utils/trainer_utils_test.py | 9 +- 7 files changed, 109 insertions(+), 84 deletions(-) diff --git a/tensor2tensor/bin/t2t-datagen b/tensor2tensor/bin/t2t-datagen index ca70a06b0..44e4b34d3 100644 --- a/tensor2tensor/bin/t2t-datagen +++ b/tensor2tensor/bin/t2t-datagen @@ -134,14 +134,6 @@ _SUPPORTED_PROBLEM_GENERATORS = { "wmt_ende_bpe32k": ( lambda: wmt.ende_bpe_token_generator(FLAGS.tmp_dir, True), lambda: wmt.ende_bpe_token_generator(FLAGS.tmp_dir, False)), - "wmt_ende_tokens_8k": ( - lambda: wmt.ende_wordpiece_token_generator(FLAGS.tmp_dir, True, 2**13), - lambda: wmt.ende_wordpiece_token_generator(FLAGS.tmp_dir, False, 2**13) - ), - "wmt_ende_tokens_32k": ( - lambda: wmt.ende_wordpiece_token_generator(FLAGS.tmp_dir, True, 2**15), - lambda: wmt.ende_wordpiece_token_generator(FLAGS.tmp_dir, False, 2**15) - ), "wmt_zhen_tokens_32k": ( lambda: wmt.zhen_wordpiece_token_generator(FLAGS.tmp_dir, True, 2**15, 2**15), diff --git a/tensor2tensor/data_generators/algorithmic.py b/tensor2tensor/data_generators/algorithmic.py index a9e47abe0..6ec1f28a0 100644 --- a/tensor2tensor/data_generators/algorithmic.py +++ b/tensor2tensor/data_generators/algorithmic.py @@ -37,12 +37,12 @@ def num_symbols(self): return 2 def generate_data(self, data_dir, _): - utils.generate_files( + utils.generate_dataset_and_shuffle( identity_generator(self.num_symbols, 40, 100000), - self.training_filepaths(data_dir, 100)) - utils.generate_files( + self.training_filepaths(data_dir, 100, shuffled=True), identity_generator(self.num_symbols, 400, 10000), - self.dev_filepaths(data_dir, 1)) + self.dev_filepaths(data_dir, 1, shuffled=True), + shuffle=False) def hparams(self, defaults, unused_model_hparams): p = defaults diff --git a/tensor2tensor/data_generators/generator_utils.py b/tensor2tensor/data_generators/generator_utils.py index 739655301..890f92c2a 100644 --- a/tensor2tensor/data_generators/generator_utils.py +++ b/tensor2tensor/data_generators/generator_utils.py @@ -359,6 +359,17 @@ def write_records(records, out_filename): writer.close() +def generate_dataset_and_shuffle(train_gen, + train_paths, + dev_gen, + dev_paths, + shuffle=True): + generate_files(train_gen, train_paths) + generate_files(dev_gen, dev_paths) + if shuffle: + shuffle_dataset(train_paths + dev_paths) + + def shuffle_dataset(filenames): tf.logging.info("Shuffling data...") for fname in filenames: diff --git a/tensor2tensor/data_generators/problem.py b/tensor2tensor/data_generators/problem.py index 179c2844c..28f4dcb1b 100644 --- a/tensor2tensor/data_generators/problem.py +++ b/tensor2tensor/data_generators/problem.py @@ -78,12 +78,18 @@ class Problem(object): New problems are specified by the following methods: Data generation: - * generate_data(data_dir) + * generate_data(data_dir, tmp_dir) - Generate training and dev datasets into data_dir. - Additonal files, e.g. vocabulary files, should also be written to data_dir. + - Downloads and other files can be written to tmp_dir + - If you have a training and dev generator, you can generate the + training and dev datasets with + generator_utils.generate_dataset_and_shuffle. - Use the self.training_filepaths and self.dev_filepaths functions to - get sharded filenames. + get sharded filenames. If shuffled=False, the filenames will contain + an "unshuffled" suffix; you should then shuffle the data + shard-by-shard with generator_utils.shuffle_dataset. - Subclasses must override * dataset_filename() - Base filename for problem. @@ -125,13 +131,17 @@ def feature_encoders(self, data_dir): # END SUBCLASS INTERFACE # ============================================================================ - def training_filepaths(self, data_dir, num_shards): - return utils.train_data_filenames(self.dataset_filename(), data_dir, - num_shards) - - def dev_filepaths(self, data_dir, num_shards): - return utils.dev_data_filenames(self.dataset_filename(), data_dir, - num_shards) + def training_filepaths(self, data_dir, num_shards, shuffled): + file_basename = self.dataset_filename() + if not shuffled: + file_basename += utils.UNSHUFFLED_SUFFIX + return utils.train_data_filenames(file_basename, data_dir, num_shards) + + def dev_filepaths(self, data_dir, num_shards, shuffled): + file_basename = self.dataset_filename() + if not shuffled: + file_basename += utils.UNSHUFFLED_SUFFIX + return utils.dev_data_filenames(file_basename, data_dir, num_shards) def __init__(self, was_reversed=False, was_copy=False): """Create a Problem. diff --git a/tensor2tensor/data_generators/problem_hparams.py b/tensor2tensor/data_generators/problem_hparams.py index 2f7ad5cd3..70b9dada8 100644 --- a/tensor2tensor/data_generators/problem_hparams.py +++ b/tensor2tensor/data_generators/problem_hparams.py @@ -456,26 +456,6 @@ def wmt_ende_characters(unused_model_hparams): return p -def wmt_ende_tokens(model_hparams, wrong_vocab_size): - """English to German translation benchmark.""" - p = default_problem_hparams() - # This vocab file must be present within the data directory. - vocab_filename = os.path.join(model_hparams.data_dir, - "tokens.vocab.%d" % wrong_vocab_size) - subtokenizer = text_encoder.SubwordTextEncoder(vocab_filename) - p.input_modality = { - "inputs": (registry.Modalities.SYMBOL, subtokenizer.vocab_size) - } - p.target_modality = (registry.Modalities.SYMBOL, subtokenizer.vocab_size) - p.vocabulary = { - "inputs": subtokenizer, - "targets": subtokenizer, - } - p.input_space_id = 3 - p.target_space_id = 8 - return p - - def wmt_zhen_tokens(model_hparams, wrong_vocab_size): """Chinese to English translation benchmark.""" p = default_problem_hparams() @@ -751,9 +731,6 @@ def img2img_imagenet(unused_model_hparams): "wmt_enfr_tokens_32k_combined": lambda p: wmt_enfr_tokens(p, 2**15), "wmt_enfr_tokens_128k": lambda p: wmt_enfr_tokens(p, 2**17), "wmt_ende_characters": wmt_ende_characters, - "wmt_ende_tokens_8k": lambda p: wmt_ende_tokens(p, 2**13), - "wmt_ende_tokens_32k": lambda p: wmt_ende_tokens(p, 2**15), - "wmt_ende_tokens_128k": lambda p: wmt_ende_tokens(p, 2**17), "wmt_ende_bpe32k": wmt_ende_bpe32k, "wmt_zhen_tokens_32k": lambda p: wmt_zhen_tokens(p, 2**15), "image_cifar10_tune": image_cifar10, diff --git a/tensor2tensor/data_generators/wmt.py b/tensor2tensor/data_generators/wmt.py index 573a3bcad..8edab8ba2 100644 --- a/tensor2tensor/data_generators/wmt.py +++ b/tensor2tensor/data_generators/wmt.py @@ -24,20 +24,64 @@ # Dependency imports from tensor2tensor.data_generators import generator_utils +from tensor2tensor.data_generators import problem from tensor2tensor.data_generators import text_encoder from tensor2tensor.data_generators import wsj_parsing +from tensor2tensor.utils import registry import tensorflow as tf - tf.flags.DEFINE_string("ende_bpe_path", "", "Path to BPE files in tmp_dir." "Download from https://drive.google.com/open?" "id=0B_bZck-ksdkpM25jRUN2X2UxMm8") - FLAGS = tf.flags.FLAGS +@registry.register_problem("wmt_ende_tokens_8k") +class WMTEnDeTokens8k(problem.Problem): + """Problem spec for WMT En-De translation.""" + + @property + def target_vocab_size(self): + return 2**13 # 8192 + + def feature_encoders(self, data_dir): + return _default_wmt_feature_encoders(data_dir, self.target_vocab_size) + + def generate_data(self, data_dir, tmp_dir): + generator_utils.generate_dataset_and_shuffle( + ende_wordpiece_token_generator(tmp_dir, True, self.target_vocab_size), + self.training_filepaths(data_dir, 100, shuffled=False), + ende_wordpiece_token_generator(tmp_dir, False, self.target_vocab_size), + self.dev_filepaths(data_dir, 1, shuffled=False)) + + def hparams(self, defaults, unused_model_hparams): + p = defaults + vocab_size = self._encoders["inputs"].vocab_size + p.input_modality = {"inputs": (registry.Modalities.SYMBOL, vocab_size)} + p.target_modality = (registry.Modalities.SYMBOL, vocab_size) + p.input_space_id = problem.SpaceID.EN_TOK + p.target_space_id = problem.SpaceID.DE_TOK + + +@registry.register_problem("wmt_ende_tokens_32k") +class WMTEnDeTokens32k(WMTEnDeTokens8k): + + @property + def target_vocab_size(self): + return 2**15 # 32768 + + +def _default_wmt_feature_encoders(data_dir, target_vocab_size): + vocab_filename = os.path.join(data_dir, "tokens.vocab.%d" % target_vocab_size) + subtokenizer = text_encoder.SubwordTextEncoder(vocab_filename) + return { + "inputs": subtokenizer, + "targets": subtokenizer, + } + + # End-of-sentence marker. EOS = text_encoder.EOS_TOKEN @@ -130,7 +174,8 @@ def token_generator(source_path, target_path, token_vocab, eos=None): source, target = source_file.readline(), target_file.readline() -def bi_vocabs_token_generator(source_path, target_path, +def bi_vocabs_token_generator(source_path, + target_path, source_token_vocab, target_token_vocab, eos=None): @@ -184,8 +229,8 @@ def ende_bpe_token_generator(tmp_dir, train): train_path = _get_wmt_ende_dataset(tmp_dir, dataset_path) token_path = os.path.join(tmp_dir, "vocab.bpe.32000") token_vocab = text_encoder.TokenTextEncoder(vocab_filename=token_path) - return token_generator(train_path + ".en", train_path + ".de", - token_vocab, EOS) + return token_generator(train_path + ".en", train_path + ".de", token_vocab, + EOS) _ENDE_TRAIN_DATASETS = [ @@ -240,22 +285,15 @@ def ende_bpe_token_generator(tmp_dir, train): ], ] -_ZHEN_TRAIN_DATASETS = [ - [ - ("http://data.statmt.org/wmt17/translation-task/" - "training-parallel-nc-v12.tgz"), - ("training/news-commentary-v12.zh-en.zh", - "training/news-commentary-v12.zh-en.en") - ] -] +_ZHEN_TRAIN_DATASETS = [[("http://data.statmt.org/wmt17/translation-task/" + "training-parallel-nc-v12.tgz"), + ("training/news-commentary-v12.zh-en.zh", + "training/news-commentary-v12.zh-en.en")]] -_ZHEN_TEST_DATASETS = [ - [ - "http://data.statmt.org/wmt17/translation-task/dev.tgz", - ("dev/newsdev2017-zhen-src.zh", - "dev/newsdev2017-zhen-ref.en") - ] -] +_ZHEN_TEST_DATASETS = [[ + "http://data.statmt.org/wmt17/translation-task/dev.tgz", + ("dev/newsdev2017-zhen-src.zh", "dev/newsdev2017-zhen-ref.en") +]] def _compile_data(tmp_dir, datasets, filename): @@ -317,23 +355,21 @@ def ende_character_generator(tmp_dir, train): character_vocab, EOS) -def zhen_wordpiece_token_generator(tmp_dir, train, - source_vocab_size, +def zhen_wordpiece_token_generator(tmp_dir, train, source_vocab_size, target_vocab_size): """Wordpiece generator for the WMT'17 zh-en dataset.""" datasets = _ZHEN_TRAIN_DATASETS if train else _ZHEN_TEST_DATASETS source_datasets = [[item[0], [item[1][0]]] for item in datasets] target_datasets = [[item[0], [item[1][1]]] for item in datasets] source_vocab = generator_utils.get_or_generate_vocab( - tmp_dir, "tokens.vocab.zh.%d" % source_vocab_size, - source_vocab_size, source_datasets) + tmp_dir, "tokens.vocab.zh.%d" % source_vocab_size, source_vocab_size, + source_datasets) target_vocab = generator_utils.get_or_generate_vocab( - tmp_dir, "tokens.vocab.en.%d" % target_vocab_size, - target_vocab_size, target_datasets) + tmp_dir, "tokens.vocab.en.%d" % target_vocab_size, target_vocab_size, + target_datasets) tag = "train" if train else "dev" data_path = _compile_data(tmp_dir, datasets, "wmt_zhen_tok_%s" % tag) - return bi_vocabs_token_generator(data_path + ".lang1", - data_path + ".lang2", + return bi_vocabs_token_generator(data_path + ".lang1", data_path + ".lang2", source_vocab, target_vocab, EOS) @@ -366,17 +402,15 @@ def parsing_character_generator(tmp_dir, train): return character_generator(text_filepath, tags_filepath, character_vocab, EOS) -def tabbed_parsing_token_generator(tmp_dir, train, prefix, - source_vocab_size, target_vocab_size): +def tabbed_parsing_token_generator(tmp_dir, train, prefix, source_vocab_size, + target_vocab_size): """Generate source and target data from a single file.""" source_vocab = generator_utils.get_or_generate_tabbed_vocab( tmp_dir, "parsing_train.pairs", 0, - prefix + "_source.tokens.vocab.%d" % source_vocab_size, - source_vocab_size) + prefix + "_source.tokens.vocab.%d" % source_vocab_size, source_vocab_size) target_vocab = generator_utils.get_or_generate_tabbed_vocab( tmp_dir, "parsing_train.pairs", 1, - prefix + "_target.tokens.vocab.%d" % target_vocab_size, - target_vocab_size) + prefix + "_target.tokens.vocab.%d" % target_vocab_size, target_vocab_size) filename = "parsing_%s" % ("train" if train else "dev") pair_filepath = os.path.join(tmp_dir, filename + ".pairs") return tabbed_generator(pair_filepath, source_vocab, target_vocab, EOS) @@ -395,5 +429,5 @@ def parsing_token_generator(tmp_dir, train, vocab_size): tmp_dir, "tokens.vocab.%d" % vocab_size, vocab_size) filename = "%s_%s.trees" % (FLAGS.parsing_path, "train" if train else "dev") tree_filepath = os.path.join(tmp_dir, filename) - return wsj_parsing.token_generator(tree_filepath, - symbolizer_vocab, symbolizer_vocab, EOS) + return wsj_parsing.token_generator(tree_filepath, symbolizer_vocab, + symbolizer_vocab, EOS) diff --git a/tensor2tensor/utils/trainer_utils_test.py b/tensor2tensor/utils/trainer_utils_test.py index b11b4a81d..3ed86952b 100644 --- a/tensor2tensor/utils/trainer_utils_test.py +++ b/tensor2tensor/utils/trainer_utils_test.py @@ -34,13 +34,13 @@ @registry.register_problem class TinyAlgo(algorithmic.AlgorithmicIdentityBinary40): - def generate_data(self, data_dir): + def generate_data(self, data_dir, _): generator_utils.generate_files( algorithmic.identity_generator(self.num_symbols, 40, 100000), - self.training_filepaths(data_dir, 1), 100) + self.training_filepaths(data_dir, 1, shuffled=True), 100) generator_utils.generate_files( algorithmic.identity_generator(self.num_symbols, 400, 10000), - self.dev_filepaths(data_dir, 1), 100) + self.dev_filepaths(data_dir, 1, shuffled=True), 100) @registry.register_hparams @@ -61,7 +61,8 @@ def setUpClass(cls): # Generate a small test dataset FLAGS.problems = "tiny_algo" TrainerUtilsTest.data_dir = tf.test.get_temp_dir() - registry.problem(FLAGS.problems).generate_data(TrainerUtilsTest.data_dir) + registry.problem(FLAGS.problems).generate_data(TrainerUtilsTest.data_dir, + None) def testModelsImported(self): models = registry.list_models() From a80273f4b14b55e5a17d777ed75acc63fbdeb31a Mon Sep 17 00:00:00 2001 From: T2T Team <no-reply@google.com> Date: Fri, 14 Jul 2017 16:35:58 -0700 Subject: [PATCH 0126/4095] Added checks for making sure the key and value depths are divisible by the number of attention heads when doing multihead attention. PiperOrigin-RevId: 162025615 --- tensor2tensor/models/common_attention.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/tensor2tensor/models/common_attention.py b/tensor2tensor/models/common_attention.py index 4470b86a0..49cd40285 100644 --- a/tensor2tensor/models/common_attention.py +++ b/tensor2tensor/models/common_attention.py @@ -374,7 +374,18 @@ def multihead_attention(query_antecedent, Returns: A Tensor. + + Raises: + ValueError: if the key depth or value depth are not divisible by the + number of attention heads. """ + if total_key_depth % num_heads != 0: + raise ValueError("Key depth (%d) must be divisible by the number of " + "attention heads (%d)." % (total_key_depth, num_heads)) + if total_value_depth % num_heads != 0: + raise ValueError("Value depth (%d) must be divisible by the number of " + "attention heads (%d)." % (total_value_depth, num_heads)) + with tf.variable_scope( name, default_name="multihead_attention", From c8b70000535c9b80b04ad6d071521bffe6b642bf Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Fri, 14 Jul 2017 16:37:02 -0700 Subject: [PATCH 0127/4095] Version bump 1.0.14 PiperOrigin-RevId: 162025739 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index b70966986..00325cff2 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ setup( name='tensor2tensor', - version='1.0.13', + version='1.0.14', description='Tensor2Tensor', author='Google Inc.', author_email='no-reply@google.com', From f7f3346aa71fc4e681b3d41ac7ae77097c1c46c7 Mon Sep 17 00:00:00 2001 From: Stefan Schweter <stefan@schweter.it> Date: Sun, 16 Jul 2017 01:04:47 +0200 Subject: [PATCH 0128/4095] Add data file urls for Macedonian-English --- tensor2tensor/data_generators/generator_utils.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tensor2tensor/data_generators/generator_utils.py b/tensor2tensor/data_generators/generator_utils.py index 890f92c2a..9e3e7db2f 100644 --- a/tensor2tensor/data_generators/generator_utils.py +++ b/tensor2tensor/data_generators/generator_utils.py @@ -244,6 +244,11 @@ def gunzip_file(gz_path, new_path): "http://www.statmt.org/wmt13/training-parallel-un.tgz", ["un/undoc.2000.fr-en.en", "un/undoc.2000.fr-en.fr"] ], + # Macedonian-English + [ + "https://github.com/stefan-it/nmt-mk-en/raw/master/data/setimes.mk-en.train.tgz", # pylint: disable=line-too-long + ["train.mk", "train.en"] + ], ] From f34ab16d89c33b0686662539b5776cde7756b037 Mon Sep 17 00:00:00 2001 From: Stefan Schweter <stefan@schweter.it> Date: Sun, 16 Jul 2017 01:05:46 +0200 Subject: [PATCH 0129/4095] Add id for Macedonian tokens --- tensor2tensor/data_generators/problem.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tensor2tensor/data_generators/problem.py b/tensor2tensor/data_generators/problem.py index 28f4dcb1b..992aa3410 100644 --- a/tensor2tensor/data_generators/problem.py +++ b/tensor2tensor/data_generators/problem.py @@ -67,6 +67,8 @@ class SpaceID(object): ICE_TOK = 18 # Icelandic parse tokens ICE_PARSE_TOK = 19 + # Macedonian tokens + MK_TOK = 20 class Problem(object): From 48997b50f203b7429df02433ec75f3daf0ce0ea6 Mon Sep 17 00:00:00 2001 From: Stefan Schweter <stefan@schweter.it> Date: Sun, 16 Jul 2017 01:07:19 +0200 Subject: [PATCH 0130/4095] Register new problem: setimes_mken_tokens_32k for Macedonian to English translation (with SETimes corpus) --- tensor2tensor/data_generators/wmt.py | 53 ++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/tensor2tensor/data_generators/wmt.py b/tensor2tensor/data_generators/wmt.py index 8edab8ba2..504336b52 100644 --- a/tensor2tensor/data_generators/wmt.py +++ b/tensor2tensor/data_generators/wmt.py @@ -81,6 +81,31 @@ def _default_wmt_feature_encoders(data_dir, target_vocab_size): "targets": subtokenizer, } +@registry.register_problem("setimes_mken_tokens_32k") +class SETimesMkEnTokens32k(problem.Problem): + """Problem spec for SETimes Mk-En translation.""" + + @property + def target_vocab_size(self): + return 2**15 # 32768 + + def feature_encoders(self, data_dir): + return _default_wmt_feature_encoders(data_dir, self.target_vocab_size) + + def generate_data(self, data_dir, tmp_dir): + generator_utils.generate_dataset_and_shuffle( + mken_wordpiece_token_generator(tmp_dir, True, self.target_vocab_size), + self.training_filepaths(data_dir, 100, shuffled=False), + mken_wordpiece_token_generator(tmp_dir, False, self.target_vocab_size), + self.dev_filepaths(data_dir, 1, shuffled=False)) + + def hparams(self, defaults, unused_model_hparams): + p = defaults + vocab_size = self._encoders["inputs"].vocab_size + p.input_modality = {"inputs": (registry.Modalities.SYMBOL, vocab_size)} + p.target_modality = (registry.Modalities.SYMBOL, vocab_size) + p.input_space_id = problem.SpaceID.MK_TOK + p.target_space_id = problem.SpaceID.EN_TOK # End-of-sentence marker. EOS = text_encoder.EOS_TOKEN @@ -295,6 +320,21 @@ def ende_bpe_token_generator(tmp_dir, train): ("dev/newsdev2017-zhen-src.zh", "dev/newsdev2017-zhen-ref.en") ]] +# For Macedonian-English the SETimes corpus +# from http://nlp.ffzg.hr/resources/corpora/setimes/ is used. +# The original dataset has 207,777 parallel sentences. +# For training the first 205,777 sentences are used. +_MKEN_TRAIN_DATASETS = [[ + "https://github.com/stefan-it/nmt-mk-en/raw/master/data/setimes.mk-en.train.tgz", # pylint: disable=line-too-long + ("train.mk", "train.en") +]] + +# For development 1000 parallel sentences are used. +_MKEN_TEST_DATASETS = [[ + "https://github.com/stefan-it/nmt-mk-en/raw/master/data/setimes.mk-en.dev.tgz", # pylint: disable=line-too-long + ("dev.mk", "dev.en") +]] + def _compile_data(tmp_dir, datasets, filename): """Concatenate all `datasets` and save to `filename`.""" @@ -393,6 +433,19 @@ def enfr_character_generator(tmp_dir, train): return character_generator(data_path + ".lang1", data_path + ".lang2", character_vocab, EOS) +def mken_wordpiece_token_generator(tmp_dir, train, vocab_size): + """Wordpiece generator for the SETimes Mk-En dataset.""" + datasets = _MKEN_TRAIN_DATASETS if train else _MKEN_TEST_DATASETS + source_datasets = [[item[0], [item[1][0]]] for item in datasets] + target_datasets = [[item[0], [item[1][1]]] for item in datasets] + symbolizer_vocab = generator_utils.get_or_generate_vocab( + tmp_dir, "tokens.vocab.%d" % vocab_size, vocab_size, + source_datasets + target_datasets) + tag = "train" if train else "dev" + data_path = _compile_data(tmp_dir, datasets, "setimes_mken_tok_%s" % tag) + return token_generator(data_path + ".lang1", data_path + ".lang2", + symbolizer_vocab, EOS) + def parsing_character_generator(tmp_dir, train): character_vocab = text_encoder.ByteTextEncoder() From e2ed8ed3b55f64c05688cb8852f465131140fa2e Mon Sep 17 00:00:00 2001 From: vthorsteinsson <vt@extrada.com> Date: Mon, 17 Jul 2017 17:04:31 +0000 Subject: [PATCH 0131/4095] Bug fixes in generator_utils and trainer_utils --- tensor2tensor/data_generators/generator_utils.py | 7 ++++--- tensor2tensor/utils/trainer_utils.py | 9 ++++++--- 2 files changed, 10 insertions(+), 6 deletions(-) mode change 100644 => 100755 tensor2tensor/data_generators/generator_utils.py mode change 100644 => 100755 tensor2tensor/utils/trainer_utils.py diff --git a/tensor2tensor/data_generators/generator_utils.py b/tensor2tensor/data_generators/generator_utils.py old mode 100644 new mode 100755 index 890f92c2a..cacad12fc --- a/tensor2tensor/data_generators/generator_utils.py +++ b/tensor2tensor/data_generators/generator_utils.py @@ -324,6 +324,7 @@ def get_or_generate_tabbed_vocab(tmp_dir, source_filename, return vocab # Use Tokenizer to count the word occurrences. + token_counts = defaultdict(int) filepath = os.path.join(tmp_dir, source_filename) with tf.gfile.GFile(filepath, mode="r") as source_file: for line in source_file: @@ -331,11 +332,11 @@ def get_or_generate_tabbed_vocab(tmp_dir, source_filename, if line and "\t" in line: parts = line.split("\t", maxsplit=1) part = parts[index].strip() - _ = tokenizer.encode(text_encoder.native_to_unicode(part)) + for tok in tokenizer.encode(text_encoder.native_to_unicode(part)): + token_counts[tok] += 1 vocab = text_encoder.SubwordTextEncoder.build_to_target_size( - vocab_size, tokenizer.token_counts, 1, - min(1e3, vocab_size + text_encoder.NUM_RESERVED_TOKENS)) + vocab_size, token_counts, 1, 1e3) vocab.store_to_file(vocab_filepath) return vocab diff --git a/tensor2tensor/utils/trainer_utils.py b/tensor2tensor/utils/trainer_utils.py old mode 100644 new mode 100755 index b5894904d..66a01487c --- a/tensor2tensor/utils/trainer_utils.py +++ b/tensor2tensor/utils/trainer_utils.py @@ -585,6 +585,7 @@ def decode_from_dataset(estimator): tf.logging.info("Performing local inference.") infer_problems_data = get_datasets_for_mode(hparams.data_dir, tf.contrib.learn.ModeKeys.INFER) + infer_input_fn = get_input_fn( mode=tf.contrib.learn.ModeKeys.INFER, hparams=hparams, @@ -625,9 +626,11 @@ def log_fn(inputs, # The function predict() returns an iterable over the network's # predictions from the test input. We use it to log inputs and decodes. - for j, result in enumerate(result_iter): - inputs, targets, outputs = (result["inputs"], result["targets"], - result["outputs"]) + inputs_iter = result_iter["inputs"] + targets_iter = result_iter["targets"] + outputs_iter = result_iter["outputs"] + for j, result in enumerate(zip(inputs_iter, targets_iter, outputs_iter)): + inputs, targets, outputs = result if FLAGS.decode_return_beams: output_beams = np.split(outputs, FLAGS.decode_beam_size, axis=0) for k, beam in enumerate(output_beams): From 042c44c9ed83913e71025a228d2af5c6a8208bd2 Mon Sep 17 00:00:00 2001 From: Huyen Nguyen <huyenn@netflix.com> Date: Tue, 18 Jul 2017 10:58:22 -0700 Subject: [PATCH 0132/4095] take in account FLAGS.num_shards --- tensor2tensor/bin/t2t-datagen | 2 +- tensor2tensor/data_generators/wmt.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tensor2tensor/bin/t2t-datagen b/tensor2tensor/bin/t2t-datagen index 44e4b34d3..e70736d3e 100755 --- a/tensor2tensor/bin/t2t-datagen +++ b/tensor2tensor/bin/t2t-datagen @@ -356,7 +356,7 @@ def generate_data_for_problem(problem): def generate_data_for_registered_problem(problem_name): problem = registry.problem(problem_name) - problem.generate_data(FLAGS.data_dir, FLAGS.tmp_dir) + problem.generate_data(FLAGS.data_dir, FLAGS.tmp_dir, FLAGS.num_shards) if __name__ == "__main__": diff --git a/tensor2tensor/data_generators/wmt.py b/tensor2tensor/data_generators/wmt.py index 504336b52..0e7dd2005 100644 --- a/tensor2tensor/data_generators/wmt.py +++ b/tensor2tensor/data_generators/wmt.py @@ -49,10 +49,10 @@ def target_vocab_size(self): def feature_encoders(self, data_dir): return _default_wmt_feature_encoders(data_dir, self.target_vocab_size) - def generate_data(self, data_dir, tmp_dir): + def generate_data(self, data_dir, tmp_dir, num_shards=100): generator_utils.generate_dataset_and_shuffle( ende_wordpiece_token_generator(tmp_dir, True, self.target_vocab_size), - self.training_filepaths(data_dir, 100, shuffled=False), + self.training_filepaths(data_dir, num_shards, shuffled=False), ende_wordpiece_token_generator(tmp_dir, False, self.target_vocab_size), self.dev_filepaths(data_dir, 1, shuffled=False)) From 9709825a8f1f6c35ead0a8ead44afc81a8b3b537 Mon Sep 17 00:00:00 2001 From: Huyen Nguyen <huyenn@netflix.com> Date: Tue, 18 Jul 2017 11:10:43 -0700 Subject: [PATCH 0133/4095] more num_shards --- tensor2tensor/data_generators/algorithmic.py | 4 ++-- tensor2tensor/data_generators/problem.py | 2 +- tensor2tensor/data_generators/wmt.py | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/tensor2tensor/data_generators/algorithmic.py b/tensor2tensor/data_generators/algorithmic.py index 6ec1f28a0..7e522bfa0 100644 --- a/tensor2tensor/data_generators/algorithmic.py +++ b/tensor2tensor/data_generators/algorithmic.py @@ -36,10 +36,10 @@ class AlgorithmicIdentityBinary40(problem.Problem): def num_symbols(self): return 2 - def generate_data(self, data_dir, _): + def generate_data(self, data_dir, _, num_shards=100): utils.generate_dataset_and_shuffle( identity_generator(self.num_symbols, 40, 100000), - self.training_filepaths(data_dir, 100, shuffled=True), + self.training_filepaths(data_dir, num_shards, shuffled=True), identity_generator(self.num_symbols, 400, 10000), self.dev_filepaths(data_dir, 1, shuffled=True), shuffle=False) diff --git a/tensor2tensor/data_generators/problem.py b/tensor2tensor/data_generators/problem.py index 992aa3410..0eb1987fa 100644 --- a/tensor2tensor/data_generators/problem.py +++ b/tensor2tensor/data_generators/problem.py @@ -113,7 +113,7 @@ class Problem(object): # BEGIN SUBCLASS INTERFACE # ============================================================================ - def generate_data(self, data_dir, tmp_dir): + def generate_data(self, data_dir, tmp_dir, num_shards=100): raise NotImplementedError() def hparams(self, defaults, model_hparams): diff --git a/tensor2tensor/data_generators/wmt.py b/tensor2tensor/data_generators/wmt.py index 0e7dd2005..de5a25e13 100644 --- a/tensor2tensor/data_generators/wmt.py +++ b/tensor2tensor/data_generators/wmt.py @@ -92,10 +92,10 @@ def target_vocab_size(self): def feature_encoders(self, data_dir): return _default_wmt_feature_encoders(data_dir, self.target_vocab_size) - def generate_data(self, data_dir, tmp_dir): + def generate_data(self, data_dir, tmp_dir, num_shards=100): generator_utils.generate_dataset_and_shuffle( mken_wordpiece_token_generator(tmp_dir, True, self.target_vocab_size), - self.training_filepaths(data_dir, 100, shuffled=False), + self.training_filepaths(data_dir, num_shards, shuffled=False), mken_wordpiece_token_generator(tmp_dir, False, self.target_vocab_size), self.dev_filepaths(data_dir, 1, shuffled=False)) From 8c850c901d111967635b96c8d6975fbe7aefbab1 Mon Sep 17 00:00:00 2001 From: Huyen Nguyen <huyenn@netflix.com> Date: Tue, 18 Jul 2017 11:20:55 -0700 Subject: [PATCH 0134/4095] add arguments num_shards to generate_data --- tensor2tensor/data_generators/algorithmic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensor2tensor/data_generators/algorithmic.py b/tensor2tensor/data_generators/algorithmic.py index 7e522bfa0..eebf94016 100644 --- a/tensor2tensor/data_generators/algorithmic.py +++ b/tensor2tensor/data_generators/algorithmic.py @@ -36,7 +36,7 @@ class AlgorithmicIdentityBinary40(problem.Problem): def num_symbols(self): return 2 - def generate_data(self, data_dir, _, num_shards=100): + def generate_data(self, data_dir, tmp_dir, num_shards=100): utils.generate_dataset_and_shuffle( identity_generator(self.num_symbols, 40, 100000), self.training_filepaths(data_dir, num_shards, shuffled=True), From c3a59b4aa00547859f902add642f19c2d933b2ae Mon Sep 17 00:00:00 2001 From: Huyen Nguyen <huyenn@netflix.com> Date: Tue, 18 Jul 2017 11:22:44 -0700 Subject: [PATCH 0135/4095] add arguments num_shards to generate_data --- tensor2tensor/data_generators/algorithmic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensor2tensor/data_generators/algorithmic.py b/tensor2tensor/data_generators/algorithmic.py index eebf94016..7e522bfa0 100644 --- a/tensor2tensor/data_generators/algorithmic.py +++ b/tensor2tensor/data_generators/algorithmic.py @@ -36,7 +36,7 @@ class AlgorithmicIdentityBinary40(problem.Problem): def num_symbols(self): return 2 - def generate_data(self, data_dir, tmp_dir, num_shards=100): + def generate_data(self, data_dir, _, num_shards=100): utils.generate_dataset_and_shuffle( identity_generator(self.num_symbols, 40, 100000), self.training_filepaths(data_dir, num_shards, shuffled=True), From 0d250b31234378f0687ee7e94db42202dab3a99d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?No=C3=A9=20Casas?= <noe.casas@gmail.com> Date: Tue, 18 Jul 2017 20:25:19 +0200 Subject: [PATCH 0136/4095] Support --t2t_usr_dir also in t2t-datagen (#160) * Refactor user directory loading functionality and use it also from t2t-datagen * Move flag declaration to the binary files --- tensor2tensor/bin/t2t-datagen | 9 +++++++++ tensor2tensor/bin/t2t-trainer | 20 ++----------------- tensor2tensor/utils/usr_dir.py | 35 ++++++++++++++++++++++++++++++++++ 3 files changed, 46 insertions(+), 18 deletions(-) create mode 100644 tensor2tensor/utils/usr_dir.py diff --git a/tensor2tensor/bin/t2t-datagen b/tensor2tensor/bin/t2t-datagen index 44e4b34d3..63eb7e45e 100755 --- a/tensor2tensor/bin/t2t-datagen +++ b/tensor2tensor/bin/t2t-datagen @@ -48,6 +48,7 @@ from tensor2tensor.data_generators import wiki from tensor2tensor.data_generators import wmt from tensor2tensor.data_generators import wsj_parsing from tensor2tensor.utils import registry +from tensor2tensor.utils import usr_dir import tensorflow as tf @@ -64,6 +65,13 @@ flags.DEFINE_integer("max_cases", 0, "Maximum number of cases to generate (unbounded if 0).") flags.DEFINE_integer("random_seed", 429459, "Random seed to use.") +flags.DEFINE_string("t2t_usr_dir", "", + "Path to a Python module that will be imported. The " + "__init__.py file should include the necessary imports. " + "The imported files should contain registrations, " + "e.g. @registry.register_model calls, that will then be " + "available to the t2t-datagen.") + # Mapping from problems that we can generate data for to their generators. # pylint: disable=g-long-lambda _SUPPORTED_PROBLEM_GENERATORS = { @@ -273,6 +281,7 @@ def set_random_seed(): def main(_): tf.logging.set_verbosity(tf.logging.INFO) + usr_dir.import_usr_dir(FLAGS.t2t_usr_dir) # Calculate the list of problems to generate. problems = sorted( diff --git a/tensor2tensor/bin/t2t-trainer b/tensor2tensor/bin/t2t-trainer index 322957028..6b3f4de71 100755 --- a/tensor2tensor/bin/t2t-trainer +++ b/tensor2tensor/bin/t2t-trainer @@ -36,7 +36,7 @@ import sys # Dependency imports from tensor2tensor.utils import trainer_utils as utils - +from tensor2tensor.utils import usr_dir import tensorflow as tf flags = tf.flags @@ -49,25 +49,9 @@ flags.DEFINE_string("t2t_usr_dir", "", "e.g. @registry.register_model calls, that will then be " "available to the t2t-trainer.") - -def import_usr_dir(): - """Import module at FLAGS.t2t_usr_dir, if provided.""" - if not FLAGS.t2t_usr_dir: - return - dir_path = os.path.expanduser(FLAGS.t2t_usr_dir) - if dir_path[-1] == "/": - dir_path = dir_path[:-1] - containing_dir, module_name = os.path.split(dir_path) - tf.logging.info("Importing user module %s from path %s", module_name, - containing_dir) - sys.path.insert(0, containing_dir) - importlib.import_module(module_name) - sys.path.pop(0) - - def main(_): tf.logging.set_verbosity(tf.logging.INFO) - import_usr_dir() + usr_dir.import_usr_dir(FLAGS.t2t_usr_dir) utils.log_registry() utils.validate_flags() utils.run( diff --git a/tensor2tensor/utils/usr_dir.py b/tensor2tensor/utils/usr_dir.py new file mode 100644 index 000000000..ed5623c8e --- /dev/null +++ b/tensor2tensor/utils/usr_dir.py @@ -0,0 +1,35 @@ +# Copyright 2017 The Tensor2Tensor Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Utility to load code from an external directory supplied by user.""" + +import os +import sys +import importlib +import tensorflow as tf + + +def import_usr_dir(usr_dir): + """Import user module, if provided.""" + if not usr_dir: + return + dir_path = os.path.expanduser(usr_dir) + if dir_path[-1] == "/": + dir_path = dir_path[:-1] + containing_dir, module_name = os.path.split(dir_path) + tf.logging.info("Importing user module %s from path %s", module_name, + containing_dir) + sys.path.insert(0, containing_dir) + importlib.import_module(module_name) + sys.path.pop(0) From 04b4f4722b69829521af2a99456afef43edc1ead Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Mon, 17 Jul 2017 11:27:14 -0700 Subject: [PATCH 0137/4095] Update documentation for adding new Problems PiperOrigin-RevId: 162242293 --- README.md | 21 ++++-- .../docs => docs}/distributed_training.md | 0 tensor2tensor/bin/t2t-datagen | 11 +-- tensor2tensor/bin/t2t-trainer | 20 +++++- tensor2tensor/data_generators/README.md | 72 +++++++++---------- tensor2tensor/data_generators/algorithmic.py | 4 +- .../data_generators/generator_utils.py | 12 +--- tensor2tensor/data_generators/problem.py | 4 +- tensor2tensor/data_generators/text_encoder.py | 48 ++++++++----- .../data_generators/tokenizer_test.py | 2 +- tensor2tensor/data_generators/wmt.py | 57 +-------------- tensor2tensor/utils/trainer_utils.py | 9 +-- tensor2tensor/utils/usr_dir.py | 35 --------- 13 files changed, 111 insertions(+), 184 deletions(-) rename {tensor2tensor/docs => docs}/distributed_training.md (100%) mode change 100755 => 100644 tensor2tensor/bin/t2t-datagen mode change 100755 => 100644 tensor2tensor/bin/t2t-trainer mode change 100755 => 100644 tensor2tensor/data_generators/generator_utils.py mode change 100755 => 100644 tensor2tensor/data_generators/tokenizer_test.py mode change 100755 => 100644 tensor2tensor/utils/trainer_utils.py delete mode 100644 tensor2tensor/utils/usr_dir.py diff --git a/README.md b/README.md index 27bb47947..7fb18e031 100644 --- a/README.md +++ b/README.md @@ -153,7 +153,7 @@ python -c "from tensor2tensor.models.transformer import Transformer" specification. * Support for multi-GPU machines and synchronous (1 master, many workers) and asynchrounous (independent workers synchronizing through a parameter server) - distributed training. + [distributed training](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/docs/distributed_training.md). * Easily swap amongst datasets and models by command-line flag with the data generation script `t2t-datagen` and the training script `t2t-trainer`. @@ -173,8 +173,10 @@ and many common sequence datasets are already available for generation and use. **Problems** define training-time hyperparameters for the dataset and task, mainly by setting input and output **modalities** (e.g. symbol, image, audio, -label) and vocabularies, if applicable. All problems are defined in -[`problem_hparams.py`](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/data_generators/problem_hparams.py). +label) and vocabularies, if applicable. All problems are defined either in +[`problem_hparams.py`](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/data_generators/problem_hparams.py) +or are registered with `@registry.register_problem` (run `t2t-datagen` to see +the list of all available problems). **Modalities**, defined in [`modality.py`](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/utils/modality.py), abstract away the input and output data types so that **models** may deal with @@ -222,7 +224,7 @@ enables easily adding new ones and easily swapping amongst them by command-line flag. You can add your own components without editing the T2T codebase by specifying the `--t2t_usr_dir` flag in `t2t-trainer`. -You can currently do so for models, hyperparameter sets, and modalities. Please +You can do so for models, hyperparameter sets, modalities, and problems. Please do submit a pull request if your component might be useful to others. Here's an example with a new hyperparameter set: @@ -253,9 +255,18 @@ You'll see under the registered HParams your `transformer_my_very_own_hparams_set`, which you can directly use on the command line with the `--hparams_set` flag. +`t2t-datagen` also supports the `--t2t_usr_dir` flag for `Problem` +registrations. + ## Adding a dataset -See the [data generators +To add a new dataset, subclass +[`Problem`](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/data_generators/problem.py) +and register it with `@registry.register_problem`. See +[`WMTEnDeTokens8k`](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/data_generators/wmt.py) +for an example. + +Also see the [data generators README](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/data_generators/README.md). --- diff --git a/tensor2tensor/docs/distributed_training.md b/docs/distributed_training.md similarity index 100% rename from tensor2tensor/docs/distributed_training.md rename to docs/distributed_training.md diff --git a/tensor2tensor/bin/t2t-datagen b/tensor2tensor/bin/t2t-datagen old mode 100755 new mode 100644 index cbf0a6164..44e4b34d3 --- a/tensor2tensor/bin/t2t-datagen +++ b/tensor2tensor/bin/t2t-datagen @@ -48,7 +48,6 @@ from tensor2tensor.data_generators import wiki from tensor2tensor.data_generators import wmt from tensor2tensor.data_generators import wsj_parsing from tensor2tensor.utils import registry -from tensor2tensor.utils import usr_dir import tensorflow as tf @@ -65,13 +64,6 @@ flags.DEFINE_integer("max_cases", 0, "Maximum number of cases to generate (unbounded if 0).") flags.DEFINE_integer("random_seed", 429459, "Random seed to use.") -flags.DEFINE_string("t2t_usr_dir", "", - "Path to a Python module that will be imported. The " - "__init__.py file should include the necessary imports. " - "The imported files should contain registrations, " - "e.g. @registry.register_model calls, that will then be " - "available to the t2t-datagen.") - # Mapping from problems that we can generate data for to their generators. # pylint: disable=g-long-lambda _SUPPORTED_PROBLEM_GENERATORS = { @@ -281,7 +273,6 @@ def set_random_seed(): def main(_): tf.logging.set_verbosity(tf.logging.INFO) - usr_dir.import_usr_dir(FLAGS.t2t_usr_dir) # Calculate the list of problems to generate. problems = sorted( @@ -365,7 +356,7 @@ def generate_data_for_problem(problem): def generate_data_for_registered_problem(problem_name): problem = registry.problem(problem_name) - problem.generate_data(FLAGS.data_dir, FLAGS.tmp_dir, FLAGS.num_shards) + problem.generate_data(FLAGS.data_dir, FLAGS.tmp_dir) if __name__ == "__main__": diff --git a/tensor2tensor/bin/t2t-trainer b/tensor2tensor/bin/t2t-trainer old mode 100755 new mode 100644 index 6b3f4de71..322957028 --- a/tensor2tensor/bin/t2t-trainer +++ b/tensor2tensor/bin/t2t-trainer @@ -36,7 +36,7 @@ import sys # Dependency imports from tensor2tensor.utils import trainer_utils as utils -from tensor2tensor.utils import usr_dir + import tensorflow as tf flags = tf.flags @@ -49,9 +49,25 @@ flags.DEFINE_string("t2t_usr_dir", "", "e.g. @registry.register_model calls, that will then be " "available to the t2t-trainer.") + +def import_usr_dir(): + """Import module at FLAGS.t2t_usr_dir, if provided.""" + if not FLAGS.t2t_usr_dir: + return + dir_path = os.path.expanduser(FLAGS.t2t_usr_dir) + if dir_path[-1] == "/": + dir_path = dir_path[:-1] + containing_dir, module_name = os.path.split(dir_path) + tf.logging.info("Importing user module %s from path %s", module_name, + containing_dir) + sys.path.insert(0, containing_dir) + importlib.import_module(module_name) + sys.path.pop(0) + + def main(_): tf.logging.set_verbosity(tf.logging.INFO) - usr_dir.import_usr_dir(FLAGS.t2t_usr_dir) + import_usr_dir() utils.log_registry() utils.validate_flags() utils.run( diff --git a/tensor2tensor/data_generators/README.md b/tensor2tensor/data_generators/README.md index f8495c38f..310bc39df 100644 --- a/tensor2tensor/data_generators/README.md +++ b/tensor2tensor/data_generators/README.md @@ -1,7 +1,7 @@ -# Data generators for T2T models. +# T2T Problems. -This directory contains data generators for a number of problems. We use a -naming scheme for the problems, they have names of the form +This directory contains `Problem` specifications for a number of problems. We +use a naming scheme for the problems, they have names of the form `[task-family]_[task]_[specifics]`. Data for all currently supported problems can be generated by calling the main generator binary (`t2t-datagen`). For example: @@ -20,53 +20,51 @@ All tasks produce TFRecord files of `tensorflow.Example` protocol buffers. ## Adding a new problem -1. Implement and register a Python generator for the dataset -1. Add a problem specification to `problem_hparams.py` specifying input and - output modalities +To add a new problem, subclass +[`Problem`](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/data_generators/problem.py) +and register it with `@registry.register_problem`. See +[`WMTEnDeTokens8k`](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/data_generators/wmt.py) +for an example. -To add a new problem, you first need to create python generators for training -and development data for the problem. The python generators should yield -dictionaries with string keys and values being lists of {int, float, str}. -Here is a very simple generator for a data-set where inputs are lists of 1s with -length upto 100 and targets are lists of length 1 with an integer denoting the -length of the input list. +`Problem`s support data generation, training, and decoding. + +Data generation is handles by `Problem.generate_data` which should produce 2 +datasets, training and dev, which should be named according to +`Problem.training_filepaths` and `Problem.dev_filepaths`. +`Problem.generate_data` should also produce any other files that may be required +for training/decoding, e.g. a vocabulary file. + +A particularly easy way to implement `Problem.generate_data` for your dataset is +to create 2 Python generators, one for the training data and another for the +dev data, and pass them to `generator_utils.generate_dataset_and_shuffle`. See +[`WMTEnDeTokens8k.generate_data`](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/data_generators/wmt.py) +for an example of usage. + +The generators should yield dictionaries with string keys and values being lists +of {int, float, str}. Here is a very simple generator for a data-set where +inputs are lists of 2s with length upto 100 and targets are lists of length 1 +with an integer denoting the length of the input list. ``` def length_generator(nbr_cases): for _ in xrange(nbr_cases): length = np.random.randint(100) + 1 - yield {"inputs": [1] * length, "targets": [length]} + yield {"inputs": [2] * length, "targets": [length]} ``` -Note that our data reader uses 0 for padding, so it is a good idea to never -generate 0s, except if all your examples have the same size (in which case -they'll never be padded anyway) or if you're doing padding on your own (in which -case please use 0s for padding). When adding the python generator function, -please also add unit tests to check if the code runs. +Note that our data reader uses 0 for padding and other parts of the code assume +end-of-string (EOS) is 1, so it is a good idea to never generate 0s or 1s, +except if all your examples have the same size (in which case they'll never be +padded anyway) or if you're doing padding on your own (in which case please use +0s for padding). When adding the python generator function, please also add unit +tests to check if the code runs. The generator can do arbitrary setup before beginning to yield examples - for example, downloading data, generating vocabulary files, etc. Some examples: -* [Algorithmic generators](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/data_generators/algorithmic.py) +* [Algorithmic problems](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/data_generators/algorithmic.py) and their [unit tests](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/data_generators/algorithmic_test.py) -* [WMT generators](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/data_generators/wmt.py) +* [WMT problems](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/data_generators/wmt.py) and their [unit tests](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/data_generators/wmt_test.py) - -When your python generator is ready and tested, add it to the -`_SUPPORTED_PROBLEM_GENERATORS` dictionary in the -[data -generator](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/bin/t2t-datagen). -The keys are problem names, and the values are pairs of (training-set-generator -function, dev-set-generator function). For the generator above, one could add -the following lines: - -``` - "algorithmic_length_upto100": - (lambda: algorithmic.length_generator(10000), - lambda: algorithmic.length_generator(1000)), -``` - -Note the lambdas above: we don't want to call the generators too early. - diff --git a/tensor2tensor/data_generators/algorithmic.py b/tensor2tensor/data_generators/algorithmic.py index 7e522bfa0..6ec1f28a0 100644 --- a/tensor2tensor/data_generators/algorithmic.py +++ b/tensor2tensor/data_generators/algorithmic.py @@ -36,10 +36,10 @@ class AlgorithmicIdentityBinary40(problem.Problem): def num_symbols(self): return 2 - def generate_data(self, data_dir, _, num_shards=100): + def generate_data(self, data_dir, _): utils.generate_dataset_and_shuffle( identity_generator(self.num_symbols, 40, 100000), - self.training_filepaths(data_dir, num_shards, shuffled=True), + self.training_filepaths(data_dir, 100, shuffled=True), identity_generator(self.num_symbols, 400, 10000), self.dev_filepaths(data_dir, 1, shuffled=True), shuffle=False) diff --git a/tensor2tensor/data_generators/generator_utils.py b/tensor2tensor/data_generators/generator_utils.py old mode 100755 new mode 100644 index 20f3959d8..890f92c2a --- a/tensor2tensor/data_generators/generator_utils.py +++ b/tensor2tensor/data_generators/generator_utils.py @@ -244,11 +244,6 @@ def gunzip_file(gz_path, new_path): "http://www.statmt.org/wmt13/training-parallel-un.tgz", ["un/undoc.2000.fr-en.en", "un/undoc.2000.fr-en.fr"] ], - # Macedonian-English - [ - "https://github.com/stefan-it/nmt-mk-en/raw/master/data/setimes.mk-en.train.tgz", # pylint: disable=line-too-long - ["train.mk", "train.en"] - ], ] @@ -329,7 +324,6 @@ def get_or_generate_tabbed_vocab(tmp_dir, source_filename, return vocab # Use Tokenizer to count the word occurrences. - token_counts = defaultdict(int) filepath = os.path.join(tmp_dir, source_filename) with tf.gfile.GFile(filepath, mode="r") as source_file: for line in source_file: @@ -337,11 +331,11 @@ def get_or_generate_tabbed_vocab(tmp_dir, source_filename, if line and "\t" in line: parts = line.split("\t", maxsplit=1) part = parts[index].strip() - for tok in tokenizer.encode(text_encoder.native_to_unicode(part)): - token_counts[tok] += 1 + _ = tokenizer.encode(text_encoder.native_to_unicode(part)) vocab = text_encoder.SubwordTextEncoder.build_to_target_size( - vocab_size, token_counts, 1, 1e3) + vocab_size, tokenizer.token_counts, 1, + min(1e3, vocab_size + text_encoder.NUM_RESERVED_TOKENS)) vocab.store_to_file(vocab_filepath) return vocab diff --git a/tensor2tensor/data_generators/problem.py b/tensor2tensor/data_generators/problem.py index 0eb1987fa..28f4dcb1b 100644 --- a/tensor2tensor/data_generators/problem.py +++ b/tensor2tensor/data_generators/problem.py @@ -67,8 +67,6 @@ class SpaceID(object): ICE_TOK = 18 # Icelandic parse tokens ICE_PARSE_TOK = 19 - # Macedonian tokens - MK_TOK = 20 class Problem(object): @@ -113,7 +111,7 @@ class Problem(object): # BEGIN SUBCLASS INTERFACE # ============================================================================ - def generate_data(self, data_dir, tmp_dir, num_shards=100): + def generate_data(self, data_dir, tmp_dir): raise NotImplementedError() def hparams(self, defaults, model_hparams): diff --git a/tensor2tensor/data_generators/text_encoder.py b/tensor2tensor/data_generators/text_encoder.py index c812ced4f..4a5a784c2 100644 --- a/tensor2tensor/data_generators/text_encoder.py +++ b/tensor2tensor/data_generators/text_encoder.py @@ -24,7 +24,6 @@ from __future__ import print_function from collections import defaultdict -import re # Dependency imports @@ -226,7 +225,6 @@ class SubwordTextEncoder(TextEncoder): def __init__(self, filename=None): """Initialize and read from a file, if provided.""" - self._alphabet = set() if filename is not None: self._load_from_file(filename) super(SubwordTextEncoder, self).__init__(num_reserved_ids=None) @@ -505,12 +503,6 @@ def _escape_token(self, token): ret += u"\\%d;" % ord(c) return ret - # Regular expression for unescaping token strings - # '\u' is converted to '_' - # '\\' is converted to '\' - # '\213;' is converted to unichr(213) - _UNESCAPE_REGEX = re.compile(u'|'.join([r"\\u", r"\\\\", r"\\([0-9]+);"])) - def _unescape_token(self, escaped_token): """Inverse of _escape_token(). @@ -519,14 +511,32 @@ def _unescape_token(self, escaped_token): Returns: token: a unicode string """ - def match(m): - if m.group(1) is not None: - # Convert '\213;' to unichr(213) - try: - return unichr(int(m.group(1))) - except (ValueError, OverflowError) as _: - return "" - # Convert '\u' to '_' and '\\' to '\' - return u"_" if m.group(0) == u"\\u" else u"\\" - # Cut off the trailing underscore and apply the regex substitution - return self._UNESCAPE_REGEX.sub(match, escaped_token[:-1]) + ret = u"" + escaped_token = escaped_token[:-1] + pos = 0 + while pos < len(escaped_token): + c = escaped_token[pos] + if c == "\\": + pos += 1 + if pos >= len(escaped_token): + break + c = escaped_token[pos] + if c == u"u": + ret += u"_" + pos += 1 + elif c == "\\": + ret += u"\\" + pos += 1 + else: + semicolon_pos = escaped_token.find(u";", pos) + if semicolon_pos == -1: + continue + try: + ret += unichr(int(escaped_token[pos:semicolon_pos])) + pos = semicolon_pos + 1 + except (ValueError, OverflowError) as _: + pass + else: + ret += c + pos += 1 + return ret diff --git a/tensor2tensor/data_generators/tokenizer_test.py b/tensor2tensor/data_generators/tokenizer_test.py old mode 100755 new mode 100644 index 45a1f7e41..c279290ed --- a/tensor2tensor/data_generators/tokenizer_test.py +++ b/tensor2tensor/data_generators/tokenizer_test.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -13,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +# coding=utf-8 """Tests for tensor2tensor.data_generators.tokenizer.""" from __future__ import absolute_import diff --git a/tensor2tensor/data_generators/wmt.py b/tensor2tensor/data_generators/wmt.py index de5a25e13..8edab8ba2 100644 --- a/tensor2tensor/data_generators/wmt.py +++ b/tensor2tensor/data_generators/wmt.py @@ -49,10 +49,10 @@ def target_vocab_size(self): def feature_encoders(self, data_dir): return _default_wmt_feature_encoders(data_dir, self.target_vocab_size) - def generate_data(self, data_dir, tmp_dir, num_shards=100): + def generate_data(self, data_dir, tmp_dir): generator_utils.generate_dataset_and_shuffle( ende_wordpiece_token_generator(tmp_dir, True, self.target_vocab_size), - self.training_filepaths(data_dir, num_shards, shuffled=False), + self.training_filepaths(data_dir, 100, shuffled=False), ende_wordpiece_token_generator(tmp_dir, False, self.target_vocab_size), self.dev_filepaths(data_dir, 1, shuffled=False)) @@ -81,31 +81,6 @@ def _default_wmt_feature_encoders(data_dir, target_vocab_size): "targets": subtokenizer, } -@registry.register_problem("setimes_mken_tokens_32k") -class SETimesMkEnTokens32k(problem.Problem): - """Problem spec for SETimes Mk-En translation.""" - - @property - def target_vocab_size(self): - return 2**15 # 32768 - - def feature_encoders(self, data_dir): - return _default_wmt_feature_encoders(data_dir, self.target_vocab_size) - - def generate_data(self, data_dir, tmp_dir, num_shards=100): - generator_utils.generate_dataset_and_shuffle( - mken_wordpiece_token_generator(tmp_dir, True, self.target_vocab_size), - self.training_filepaths(data_dir, num_shards, shuffled=False), - mken_wordpiece_token_generator(tmp_dir, False, self.target_vocab_size), - self.dev_filepaths(data_dir, 1, shuffled=False)) - - def hparams(self, defaults, unused_model_hparams): - p = defaults - vocab_size = self._encoders["inputs"].vocab_size - p.input_modality = {"inputs": (registry.Modalities.SYMBOL, vocab_size)} - p.target_modality = (registry.Modalities.SYMBOL, vocab_size) - p.input_space_id = problem.SpaceID.MK_TOK - p.target_space_id = problem.SpaceID.EN_TOK # End-of-sentence marker. EOS = text_encoder.EOS_TOKEN @@ -320,21 +295,6 @@ def ende_bpe_token_generator(tmp_dir, train): ("dev/newsdev2017-zhen-src.zh", "dev/newsdev2017-zhen-ref.en") ]] -# For Macedonian-English the SETimes corpus -# from http://nlp.ffzg.hr/resources/corpora/setimes/ is used. -# The original dataset has 207,777 parallel sentences. -# For training the first 205,777 sentences are used. -_MKEN_TRAIN_DATASETS = [[ - "https://github.com/stefan-it/nmt-mk-en/raw/master/data/setimes.mk-en.train.tgz", # pylint: disable=line-too-long - ("train.mk", "train.en") -]] - -# For development 1000 parallel sentences are used. -_MKEN_TEST_DATASETS = [[ - "https://github.com/stefan-it/nmt-mk-en/raw/master/data/setimes.mk-en.dev.tgz", # pylint: disable=line-too-long - ("dev.mk", "dev.en") -]] - def _compile_data(tmp_dir, datasets, filename): """Concatenate all `datasets` and save to `filename`.""" @@ -433,19 +393,6 @@ def enfr_character_generator(tmp_dir, train): return character_generator(data_path + ".lang1", data_path + ".lang2", character_vocab, EOS) -def mken_wordpiece_token_generator(tmp_dir, train, vocab_size): - """Wordpiece generator for the SETimes Mk-En dataset.""" - datasets = _MKEN_TRAIN_DATASETS if train else _MKEN_TEST_DATASETS - source_datasets = [[item[0], [item[1][0]]] for item in datasets] - target_datasets = [[item[0], [item[1][1]]] for item in datasets] - symbolizer_vocab = generator_utils.get_or_generate_vocab( - tmp_dir, "tokens.vocab.%d" % vocab_size, vocab_size, - source_datasets + target_datasets) - tag = "train" if train else "dev" - data_path = _compile_data(tmp_dir, datasets, "setimes_mken_tok_%s" % tag) - return token_generator(data_path + ".lang1", data_path + ".lang2", - symbolizer_vocab, EOS) - def parsing_character_generator(tmp_dir, train): character_vocab = text_encoder.ByteTextEncoder() diff --git a/tensor2tensor/utils/trainer_utils.py b/tensor2tensor/utils/trainer_utils.py old mode 100755 new mode 100644 index 66a01487c..b5894904d --- a/tensor2tensor/utils/trainer_utils.py +++ b/tensor2tensor/utils/trainer_utils.py @@ -585,7 +585,6 @@ def decode_from_dataset(estimator): tf.logging.info("Performing local inference.") infer_problems_data = get_datasets_for_mode(hparams.data_dir, tf.contrib.learn.ModeKeys.INFER) - infer_input_fn = get_input_fn( mode=tf.contrib.learn.ModeKeys.INFER, hparams=hparams, @@ -626,11 +625,9 @@ def log_fn(inputs, # The function predict() returns an iterable over the network's # predictions from the test input. We use it to log inputs and decodes. - inputs_iter = result_iter["inputs"] - targets_iter = result_iter["targets"] - outputs_iter = result_iter["outputs"] - for j, result in enumerate(zip(inputs_iter, targets_iter, outputs_iter)): - inputs, targets, outputs = result + for j, result in enumerate(result_iter): + inputs, targets, outputs = (result["inputs"], result["targets"], + result["outputs"]) if FLAGS.decode_return_beams: output_beams = np.split(outputs, FLAGS.decode_beam_size, axis=0) for k, beam in enumerate(output_beams): diff --git a/tensor2tensor/utils/usr_dir.py b/tensor2tensor/utils/usr_dir.py deleted file mode 100644 index ed5623c8e..000000000 --- a/tensor2tensor/utils/usr_dir.py +++ /dev/null @@ -1,35 +0,0 @@ -# Copyright 2017 The Tensor2Tensor Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Utility to load code from an external directory supplied by user.""" - -import os -import sys -import importlib -import tensorflow as tf - - -def import_usr_dir(usr_dir): - """Import user module, if provided.""" - if not usr_dir: - return - dir_path = os.path.expanduser(usr_dir) - if dir_path[-1] == "/": - dir_path = dir_path[:-1] - containing_dir, module_name = os.path.split(dir_path) - tf.logging.info("Importing user module %s from path %s", module_name, - containing_dir) - sys.path.insert(0, containing_dir) - importlib.import_module(module_name) - sys.path.pop(0) From 930cb3311d5f3082b8d9bf284b3be098780855a0 Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Mon, 17 Jul 2017 12:40:41 -0700 Subject: [PATCH 0138/4095] Create top-level docs dir for GitHub Pages PiperOrigin-RevId: 162253587 --- README.md | 4 ++-- docs/index.md | 23 +++++++++++++++++++++++ 2 files changed, 25 insertions(+), 2 deletions(-) create mode 100644 docs/index.md diff --git a/README.md b/README.md index 7fb18e031..059fbe429 100644 --- a/README.md +++ b/README.md @@ -153,7 +153,7 @@ python -c "from tensor2tensor.models.transformer import Transformer" specification. * Support for multi-GPU machines and synchronous (1 master, many workers) and asynchrounous (independent workers synchronizing through a parameter server) - [distributed training](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/docs/distributed_training.md). + [distributed training](https://github.com/tensorflow/tensor2tensor/tree/master/docs/distributed_training.md). * Easily swap amongst datasets and models by command-line flag with the data generation script `t2t-datagen` and the training script `t2t-trainer`. @@ -213,7 +213,7 @@ inference. Users can easily switch between problems, models, and hyperparameter sets by using the `--model`, `--problems`, and `--hparams_set` flags. Specific hyperparameters can be overridden with the `--hparams` flag. `--schedule` and related flags control local and distributed training/evaluation -([distributed training documentation](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/docs/distributed_training.md)). +([distributed training documentation](https://github.com/tensorflow/tensor2tensor/tree/master/docs/distributed_training.md)). --- diff --git a/docs/index.md b/docs/index.md new file mode 100644 index 000000000..a5eeba137 --- /dev/null +++ b/docs/index.md @@ -0,0 +1,23 @@ +# T2T: Tensor2Tensor Transformers + +Check us out on +<a href=https://github.com/tensorflow/tensor2tensor> +GitHub +<img src="https://github.com/favicon.ico" width="16"> +</a> +. + +[![PyPI +version](https://badge.fury.io/py/tensor2tensor.svg)](https://badge.fury.io/py/tensor2tensor) +[![GitHub +Issues](https://img.shields.io/github/issues/tensorflow/tensor2tensor.svg)](https://github.com/tensorflow/tensor2tensor/issues) +[![Contributions +welcome](https://img.shields.io/badge/contributions-welcome-brightgreen.svg)](CONTRIBUTING.md) +[![Gitter](https://img.shields.io/gitter/room/nwjs/nw.js.svg)](https://gitter.im/tensor2tensor/Lobby) +[![License](https://img.shields.io/badge/License-Apache%202.0-brightgreen.svg)](https://opensource.org/licenses/Apache-2.0) + +See our +[README](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/README.md) +for documentation. + +More documentation and tutorials coming soon... From 5db92b57f650210e3b601c329f31b17346656e6f Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Mon, 17 Jul 2017 12:43:36 -0700 Subject: [PATCH 0139/4095] Add t2t_usr_dir functionality to t2t-datagen PiperOrigin-RevId: 162253918 --- tensor2tensor/bin/t2t-datagen | 8 ++++++ tensor2tensor/bin/t2t-trainer | 22 ++------------ tensor2tensor/utils/registry_utils.py | 41 +++++++++++++++++++++++++++ 3 files changed, 51 insertions(+), 20 deletions(-) create mode 100644 tensor2tensor/utils/registry_utils.py diff --git a/tensor2tensor/bin/t2t-datagen b/tensor2tensor/bin/t2t-datagen index 44e4b34d3..5caf4a54f 100644 --- a/tensor2tensor/bin/t2t-datagen +++ b/tensor2tensor/bin/t2t-datagen @@ -48,6 +48,7 @@ from tensor2tensor.data_generators import wiki from tensor2tensor.data_generators import wmt from tensor2tensor.data_generators import wsj_parsing from tensor2tensor.utils import registry +from tensor2tensor.utils import registry_utils import tensorflow as tf @@ -63,6 +64,12 @@ flags.DEFINE_integer("num_shards", 10, "How many shards to use.") flags.DEFINE_integer("max_cases", 0, "Maximum number of cases to generate (unbounded if 0).") flags.DEFINE_integer("random_seed", 429459, "Random seed to use.") +flags.DEFINE_string("t2t_usr_dir", "", + "Path to a Python module that will be imported. The " + "__init__.py file should include the necessary imports. " + "The imported files should contain registrations, " + "e.g. @registry.register_problem calls, that will then be " + "available to t2t-datagen.") # Mapping from problems that we can generate data for to their generators. # pylint: disable=g-long-lambda @@ -273,6 +280,7 @@ def set_random_seed(): def main(_): tf.logging.set_verbosity(tf.logging.INFO) + registry_utils.import_usr_dir(FLAGS.t2t_usr_dir) # Calculate the list of problems to generate. problems = sorted( diff --git a/tensor2tensor/bin/t2t-trainer b/tensor2tensor/bin/t2t-trainer index 322957028..371d05702 100644 --- a/tensor2tensor/bin/t2t-trainer +++ b/tensor2tensor/bin/t2t-trainer @@ -29,12 +29,9 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -import importlib -import os -import sys - # Dependency imports +from tensor2tensor.utils import registry_utils from tensor2tensor.utils import trainer_utils as utils import tensorflow as tf @@ -50,24 +47,9 @@ flags.DEFINE_string("t2t_usr_dir", "", "available to the t2t-trainer.") -def import_usr_dir(): - """Import module at FLAGS.t2t_usr_dir, if provided.""" - if not FLAGS.t2t_usr_dir: - return - dir_path = os.path.expanduser(FLAGS.t2t_usr_dir) - if dir_path[-1] == "/": - dir_path = dir_path[:-1] - containing_dir, module_name = os.path.split(dir_path) - tf.logging.info("Importing user module %s from path %s", module_name, - containing_dir) - sys.path.insert(0, containing_dir) - importlib.import_module(module_name) - sys.path.pop(0) - - def main(_): tf.logging.set_verbosity(tf.logging.INFO) - import_usr_dir() + registry_utils.import_usr_dir(FLAGS.t2t_usr_dir) utils.log_registry() utils.validate_flags() utils.run( diff --git a/tensor2tensor/utils/registry_utils.py b/tensor2tensor/utils/registry_utils.py new file mode 100644 index 000000000..05d98f2b4 --- /dev/null +++ b/tensor2tensor/utils/registry_utils.py @@ -0,0 +1,41 @@ +# Copyright 2017 The Tensor2Tensor Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Utilities for the t2t registry.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import importlib +import os +import sys + +# Dependency imports + +import tensorflow as tf + + +def import_usr_dir(usr_dir): + """Import module at usr_dir, if provided.""" + if not usr_dir: + return + dir_path = os.path.expanduser(usr_dir) + if dir_path[-1] == "/": + dir_path = dir_path[:-1] + containing_dir, module_name = os.path.split(dir_path) + tf.logging.info("Importing user module %s from path %s", module_name, + containing_dir) + sys.path.insert(0, containing_dir) + importlib.import_module(module_name) + sys.path.pop(0) From e1ecf01113c34781801fdb6ee946e47cfbfebf09 Mon Sep 17 00:00:00 2001 From: Noam Shazeer <noam@google.com> Date: Mon, 17 Jul 2017 13:56:31 -0700 Subject: [PATCH 0140/4095] Create a "long_answer" model for attacking the wikipedia title->article dataset. PiperOrigin-RevId: 162264770 --- .../data_generators/problem_hparams.py | 7 +- tensor2tensor/models/common_attention.py | 107 ++++++- tensor2tensor/models/common_hparams.py | 3 + tensor2tensor/models/long_answer.py | 275 ++++++++++++++++++ tensor2tensor/models/models.py | 1 + tensor2tensor/utils/trainer_utils.py | 5 +- 6 files changed, 389 insertions(+), 9 deletions(-) create mode 100644 tensor2tensor/models/long_answer.py diff --git a/tensor2tensor/data_generators/problem_hparams.py b/tensor2tensor/data_generators/problem_hparams.py index 70b9dada8..2890ec2a6 100644 --- a/tensor2tensor/data_generators/problem_hparams.py +++ b/tensor2tensor/data_generators/problem_hparams.py @@ -351,10 +351,9 @@ def wiki_32k(model_hparams): p = default_problem_hparams() encoder = text_encoder.SubwordTextEncoder( os.path.join(model_hparams.data_dir, "wiki_32k.subword_text_encoder")) - p.input_modality = { - "inputs": (registry.Modalities.SYMBOL, encoder.vocab_size) - } - p.target_modality = (registry.Modalities.SYMBOL, encoder.vocab_size) + modality_spec = (registry.Modalities.SYMBOL, encoder.vocab_size) + p.input_modality = {"inputs": modality_spec} + p.target_modality = modality_spec p.vocabulary = { "inputs": encoder, "targets": encoder diff --git a/tensor2tensor/models/common_attention.py b/tensor2tensor/models/common_attention.py index 49cd40285..b0d0403cd 100644 --- a/tensor2tensor/models/common_attention.py +++ b/tensor2tensor/models/common_attention.py @@ -280,13 +280,13 @@ def attention_image_summary(attn, image_shapes=None): (query_rows, query_cols, query_channels, memory_rows, memory_cols, memory_channels). """ - num_heads = attn.get_shape().as_list()[1] + num_heads = tf.shape(attn)[1] # [batch, query_length, memory_length, num_heads] image = tf.transpose(attn, [0, 2, 3, 1]) image = tf.pow(image, 0.2) # for high-dynamic-range # Each head will correspond to one of RGB. # pad the heads to be a multiple of 3 - image = tf.pad(image, [[0, 0], [0, 0], [0, 0], [0, -num_heads % 3]]) + image = tf.pad(image, [[0, 0], [0, 0], [0, 0], [0, tf.mod(-num_heads, 3)]]) image = split_last_dimension(image, 3) image = tf.reduce_max(image, 4) if image_shapes is not None: @@ -345,6 +345,95 @@ def dot_product_attention(q, return tf.matmul(weights, v) +def masked_local_attention_1d( + q, k, v, block_length=128, summaries=True, name=None): + """Attention to the source position and a neigborhood to the left of it. + + The sequence is divided into blocks of length block_size. + Attention for a given query position can only see memory positions + less than or equal to the query position, in the corresponding block + and the previous block. + + If mask_right is True, then a target position cannot see greater source + positions. + + Args: + q: a Tensor with shape [batch, heads, length, depth_k] + k: a Tensor with shape [batch, heads, length, depth_k] + v: a Tensor with shape [batch, heads, length, depth_v] + block_length: an integer + summaries: a boolean + name: an optional string + + Returns: + a Tensor of shape [batch, heads, length, depth_v] + """ + with tf.variable_scope(name, default_name="local_attention_1d", + values=[q, k, v]): + v_shape = v.get_shape() + batch = tf.shape(q)[0] + heads = tf.shape(q)[1] + length = tf.shape(q)[2] + # If (length < 2 * block_length), then we use only one block. + block_length = tf.where(tf.less(length, block_length * 2), + length, block_length) + depth_k = tf.shape(q)[3] + depth_v = tf.shape(v)[3] + original_length = length + padding_size = tf.mod(-length, block_length) + length += padding_size + padding = [[0, 0], [0, 0], [0, padding_size], [0, 0]] + q = tf.pad(q, padding) + k = tf.pad(k, padding) + v = tf.pad(v, padding) + num_blocks = tf.div(length, block_length) + + # compute attention for the first query block. + first_q = tf.slice(q, [0, 0, 0, 0], [-1, -1, block_length, -1]) + first_k = tf.slice(k, [0, 0, 0, 0], [-1, -1, block_length, -1]) + first_v = tf.slice(v, [0, 0, 0, 0], [-1, -1, block_length, -1]) + first_output = dot_product_attention( + first_q, first_k, first_v, attention_bias_lower_triangle(block_length), + summaries=summaries, name="fist_block") + + # compute attention for all subsequent query blocks. + q = tf.reshape(q, [batch, heads, num_blocks, block_length, depth_k]) + k = tf.reshape(k, [batch, heads, num_blocks, block_length, depth_k]) + v = tf.reshape(v, [batch, heads, num_blocks, block_length, depth_v]) + + def local(x): + """Create a local version of the keys or values.""" + prev_block = tf.slice( + x, [0, 0, 0, 0, 0], [-1, -1, num_blocks - 1, -1, -1]) + cur_block = tf.slice( + x, [0, 0, 1, 0, 0], [-1, -1, -1, -1, -1]) + return tf.concat([prev_block, cur_block], 3) + local_k = local(k) + local_v = local(v) + tail_q = tf.slice(q, [0, 0, 1, 0, 0], [-1, -1, -1, -1, -1]) + + local_length = tf.shape(local_k)[3] + + # [batch, heads, num_blocks - 1, block_length, local_length] + attention = tf.matmul(tail_q, local_k, transpose_b=True) + + # make sure source_pos <= target_pos + good_part = tf.matrix_band_part( + tf.ones([block_length, local_length]), -1, tf.to_int64(block_length)) + mask = (1.0 - good_part) * -1e9 + attention += tf.reshape(mask, [1, 1, 1, block_length, local_length]) + attention = tf.nn.softmax(attention) + # TODO(noam): figure out how to show a summary for the remaining blocks. + # The naive way currently causes errors due to empty tensors. + # output: [batch, heads, num_blocks-1, block_length, depth_v] + output = tf.matmul(attention, local_v) + output = tf.reshape(output, [batch, heads, -1, depth_v]) + output = tf.concat([first_output, output], axis=2) + output = tf.slice(output, [0, 0, 0, 0], [-1, -1, original_length, -1]) + output.set_shape(v_shape) + return output + + def multihead_attention(query_antecedent, memory_antecedent, bias, @@ -355,6 +444,8 @@ def multihead_attention(query_antecedent, dropout_rate, summaries=False, image_shapes=None, + attention_type="dot_product", + block_length=128, name=None): """Multihead scaled-dot-product attention with input/output transformations. @@ -370,6 +461,8 @@ def multihead_attention(query_antecedent, summaries: a boolean image_shapes: optional tuple of integer scalars. see comments for attention_image_summary() + attention_type: a string, either "dot_product" or "local_mask_right" + block_length: an integer - relevent for "local_mask_right" name: an optional string Returns: @@ -414,8 +507,14 @@ def multihead_attention(query_antecedent, v = split_heads(v, num_heads) key_depth_per_head = total_key_depth // num_heads q *= key_depth_per_head**-0.5 - x = dot_product_attention( - q, k, v, bias, dropout_rate, summaries, image_shapes) + if attention_type == "dot_product": + x = dot_product_attention( + q, k, v, bias, dropout_rate, summaries, image_shapes) + else: + assert attention_type == "local_mask_right" + x = masked_local_attention_1d(q, k, v, + block_length=block_length, + summaries=summaries) x = combine_heads(x) x = common_layers.conv1d(x, output_depth, 1, name="output_transform") return x diff --git a/tensor2tensor/models/common_hparams.py b/tensor2tensor/models/common_hparams.py index f067b724e..ff856968b 100644 --- a/tensor2tensor/models/common_hparams.py +++ b/tensor2tensor/models/common_hparams.py @@ -72,6 +72,9 @@ def basic_params1(): # setting the max length in a minibatch. 0 means default behavior, # max_length = hparams.batch_size * length_multiplier max_length=0, + # If set to True, drop sequences longer than max_length during eval. + # This affects the validity of the evaluation metrics. + eval_drop_long_sequences=int(False), # in SymbolModality, share the output embeddings and the softmax # variables. # You can also share the input embeddings with the output embeddings diff --git a/tensor2tensor/models/long_answer.py b/tensor2tensor/models/long_answer.py new file mode 100644 index 000000000..15067e120 --- /dev/null +++ b/tensor2tensor/models/long_answer.py @@ -0,0 +1,275 @@ +# Copyright 2017 The Tensor2Tensor Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Model to generate long answers to short questions. + +E.g. wiki_32k title->article dataset. + +Variant on attention_lm_moe.py + - prepend the inputs to the targets. + - use masked local attention to avoid quadratic space and time blowup for + long sequences. + +This model is still highly experimental and under rapid iteration. + +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +# Dependency imports + +from six.moves import xrange # pylint: disable=redefined-builtin + +from tensor2tensor.models import common_attention +from tensor2tensor.models import common_hparams +from tensor2tensor.models import common_layers +from tensor2tensor.utils import registry +from tensor2tensor.utils import t2t_model + +import tensorflow as tf + + +@registry.register_model +class LongAnswer(t2t_model.T2TModel): + """Attention net. See file docstring.""" + + def model_fn_body_sharded(self, sharded_features): + # Remove dropout if not training + hparams = self._hparams + dp = self._data_parallelism + targets = sharded_features["targets"] + targets = dp(tf.squeeze, targets, 2) + inputs = sharded_features["inputs"] + inputs = dp(tf.squeeze, inputs, 2) + + decoder_input = dp(long_answer_prepare_decoder, inputs, targets, hparams) + + def residual_fn(x, y): + return common_layers.layer_norm(x + tf.nn.dropout( + y, 1.0 - hparams.residual_dropout)) + + x = dp(tf.nn.dropout, decoder_input, 1.0 - hparams.residual_dropout) + extra_loss = 0.0 + for layer in xrange(hparams.num_hidden_layers): + with tf.variable_scope("layer_%d" % layer): + with tf.variable_scope("attention"): + y = dp(common_attention.multihead_attention, + x, + None, + None, + hparams.attention_key_channels or hparams.hidden_size, + hparams.attention_value_channels or hparams.hidden_size, + hparams.hidden_size, + hparams.num_heads, + hparams.attention_dropout, + summaries=True, + attention_type="local_mask_right", + block_length=hparams.block_length, + name="decoder_self_attention") + x = dp(residual_fn, x, y) + with tf.variable_scope("ffn"): + if str(layer) in hparams.moe_layers.split(","): + y, loss = common_layers.moe_layer( + dp, self._ps_devices, x, + hparams.mode == tf.contrib.learn.ModeKeys.TRAIN, + hparams.hidden_size, + hparams.moe_hidden_size, hparams.moe_n1, hparams.moe_n2, + hparams.moe_loss_coef) + extra_loss += loss + else: + y = dp(common_layers.conv_hidden_relu, + x, + hparams.filter_size, + hparams.hidden_size, + dropout=hparams.relu_dropout) + x = dp(residual_fn, x, y) + x = dp(long_answer_output, x, inputs) + return x, extra_loss + + +def long_answer_prepare_decoder(inputs, targets, hparams): + """Prepare one shard of the model for the decoder. + + Args: + inputs: a Tensor. + targets: a Tensor. + hparams: run hyperparameters + + Returns: + decoder_input: a Tensor, bottom of decoder stack + """ + decoder_input = tf.concat([ + length_embedding(targets, hparams), inputs, + common_layers.shift_left_3d(targets)], 1) + if hparams.pos == "timing": + decoder_input = common_attention.add_timing_signal_1d(decoder_input) + return decoder_input + + +def length_embedding(targets, hparams): + """An embedding indicating approximate target length. + + This is a bit of a hack, where we want to be able to request a particular + target length during inference. + During training, we sometimes provide a target length. + During eval, we never provide a target length. + + Args: + targets: a Tensor. + hparams: run hyperparameters + + Returns: + a Tensor with shape [batch, 1, hparams.hidden_size] + """ + # encode the approx target length in case we want to specify it + # during inference. + batch = tf.shape(targets)[0] + padded_target_length = tf.shape(targets)[1] + if hparams.mode == tf.contrib.learn.ModeKeys.TRAIN: + lengths = padded_target_length * tf.to_int32( + tf.less(tf.random_uniform([batch]), + hparams.answer_length_prob_train)) + elif hparams.mode == tf.contrib.learn.ModeKeys.EVAL: + lengths = 0 + else: + assert hparams.mode == tf.contrib.learn.ModeKeys.INFER + lengths = hparams.answer_length_infer + lengths = tf.to_int32(tf.log(tf.to_float(lengths + 1))) + lengths = tf.zeros([batch], dtype=tf.int32) + lengths + ret = tf.gather( + tf.get_variable("answer_length", [100, hparams.hidden_size]), lengths) + return tf.expand_dims(ret, 1) + + +def long_answer_output(x, inputs): + """Strip initial part corresponding to the inputs and the length embedding.""" + x = tf.slice(x, [0, tf.shape(inputs)[1] + 1, 0], [-1, -1, -1]) + x = tf.expand_dims(x, 2) + return x + + +@registry.register_hparams +def long_answer_base(): + """Set of hyperparameters. + + Returns: + a hparams object + """ + hparams = common_hparams.basic_params1() + hparams.hidden_size = 1024 + hparams.batch_size = 8192 + hparams.max_length = 8192 + hparams.dropout = 0.0 + hparams.batching_mantissa_bits = 3 + hparams.clip_grad_norm = 0. # i.e. no gradient clipping + hparams.optimizer_adam_epsilon = 1e-9 + hparams.learning_rate_decay_scheme = "noam" + hparams.learning_rate = 0.1 + hparams.learning_rate_warmup_steps = 1000 + hparams.initializer_gain = 1.0 + hparams.num_hidden_layers = 4 + hparams.initializer = "uniform_unit_scaling" + hparams.weight_decay = 0.0 + hparams.optimizer_adam_beta1 = 0.9 + hparams.optimizer_adam_beta2 = 0.98 + hparams.num_sampled_classes = 0 + hparams.label_smoothing = 0.0 + hparams.shared_embedding_and_softmax_weights = int(True) + hparams.sampling_method = "random" + hparams.add_hparam("filter_size", 2048) # Add new ones like this. + # comma-separated list of layer numbers. + # At each of these layers, we replace the ffn with a mixture of experts. + hparams.add_hparam("moe_layers", "2") + # If moe_n2 is None, then use a flat MoE with moe_n1 experts. + # If moe_n2 is an integer, then use a hierarchical MoE + # consisting of moe_n1 groups of moe_n2 experts each. + hparams.add_hparam("moe_n1", 64) + hparams.add_hparam("moe_n2", 0) + hparams.add_hparam("moe_hidden_size", 2048) + hparams.add_hparam("moe_loss_coef", 1e-2) + # attention-related flags + hparams.add_hparam("num_heads", 8) + hparams.add_hparam("attention_key_channels", 0) + hparams.add_hparam("attention_value_channels", 0) + # All hyperparameters ending in "dropout" are automatically set to 0.0 + # when not in training mode. + hparams.add_hparam("attention_dropout", 0.0) + hparams.add_hparam("relu_dropout", 0.0) + hparams.add_hparam("residual_dropout", 0.0) + hparams.add_hparam("pos", "timing") # timing, none + hparams.add_hparam("block_length", 512) + hparams.add_hparam("answer_length_prob_train", 0.5) + hparams.add_hparam("answer_length_infer", 1000) + # We cannot handle long sequence at this point, so drop them, during eval. + # This affects evaluation metrics. + # TODO(noam): find a different workaround + hparams.eval_drop_long_sequences = int(True) + return hparams + + +@registry.register_hparams +def long_answer_tiny(): + """Cheap model for validation. + + Returns: + an hparams object. + """ + hparams = long_answer_base() + hparams.num_hidden_layers = 3 + hparams.hidden_size = 512 + hparams.filter_size = 1024 + hparams.moe_layers = "2" + hparams.moe_hidden_size = 1024 + hparams.block_length = 128 + hparams.moe_n1 = 8 + hparams.batch_size = 2048 + hparams.max_length = 2048 + return hparams + + +@registry.register_hparams +def long_answer_small(): + """Cheap model for single-gpu training. + + Returns: + an hparams object. + """ + hparams = long_answer_base() + hparams.num_hidden_layers = 4 + hparams.hidden_size = 512 + hparams.filter_size = 2048 + hparams.moe_n1 = 128 + hparams.moe_layers = "2" + hparams.moe_hidden_size = 2048 + return hparams + + +@registry.register_hparams +def long_answer_large(): + """Large model for distributed training. + + Returns: + an hparams object. + """ + hparams = long_answer_base() + hparams.num_hidden_layers = 5 + hparams.moe_layers = "3" + hparams.hidden_size = 1024 + hparams.filter_size = 4096 + hparams.moe_hidden_size = 4096 + hparams.moe_n1 = 128 + hparams.block_length = 1024 + return hparams diff --git a/tensor2tensor/models/models.py b/tensor2tensor/models/models.py index 0ca11996e..2cf639426 100644 --- a/tensor2tensor/models/models.py +++ b/tensor2tensor/models/models.py @@ -26,6 +26,7 @@ from tensor2tensor.models import attention_lm_moe from tensor2tensor.models import bluenet from tensor2tensor.models import bytenet +from tensor2tensor.models import long_answer from tensor2tensor.models import lstm from tensor2tensor.models import modalities from tensor2tensor.models import multimodel diff --git a/tensor2tensor/utils/trainer_utils.py b/tensor2tensor/utils/trainer_utils.py index b5894904d..7c486df8d 100644 --- a/tensor2tensor/utils/trainer_utils.py +++ b/tensor2tensor/utils/trainer_utils.py @@ -1034,7 +1034,10 @@ def input_fn(): capacity *= num_datashards examples = data_reader.input_pipeline(data_file_patterns[n], capacity, mode) - drop_long_sequences = mode == tf.contrib.learn.ModeKeys.TRAIN + if mode == tf.contrib.learn.ModeKeys.TRAIN: + drop_long_sequences = True + else: + drop_long_sequences = hparams.eval_drop_long_sequences batch_size_multiplier = hparams.problems[n].batch_size_multiplier feature_map = data_reader.batch_examples( examples, From ecacf770fc2f6aabc81c1cc568fda4dd033619a4 Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Tue, 18 Jul 2017 12:49:33 -0700 Subject: [PATCH 0141/4095] Internal merge #160 PiperOrigin-RevId: 162389811 --- tensor2tensor/bin/t2t-datagen | 4 ++-- tensor2tensor/bin/t2t-trainer | 4 ++-- tensor2tensor/utils/{registry_utils.py => usr_dir.py} | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) rename tensor2tensor/utils/{registry_utils.py => usr_dir.py} (94%) diff --git a/tensor2tensor/bin/t2t-datagen b/tensor2tensor/bin/t2t-datagen index 5caf4a54f..4408b2aeb 100644 --- a/tensor2tensor/bin/t2t-datagen +++ b/tensor2tensor/bin/t2t-datagen @@ -48,7 +48,7 @@ from tensor2tensor.data_generators import wiki from tensor2tensor.data_generators import wmt from tensor2tensor.data_generators import wsj_parsing from tensor2tensor.utils import registry -from tensor2tensor.utils import registry_utils +from tensor2tensor.utils import usr_dir import tensorflow as tf @@ -280,7 +280,7 @@ def set_random_seed(): def main(_): tf.logging.set_verbosity(tf.logging.INFO) - registry_utils.import_usr_dir(FLAGS.t2t_usr_dir) + usr_dir.import_usr_dir(FLAGS.t2t_usr_dir) # Calculate the list of problems to generate. problems = sorted( diff --git a/tensor2tensor/bin/t2t-trainer b/tensor2tensor/bin/t2t-trainer index 371d05702..8a801e70e 100644 --- a/tensor2tensor/bin/t2t-trainer +++ b/tensor2tensor/bin/t2t-trainer @@ -31,8 +31,8 @@ from __future__ import print_function # Dependency imports -from tensor2tensor.utils import registry_utils from tensor2tensor.utils import trainer_utils as utils +from tensor2tensor.utils import usr_dir import tensorflow as tf @@ -49,7 +49,7 @@ flags.DEFINE_string("t2t_usr_dir", "", def main(_): tf.logging.set_verbosity(tf.logging.INFO) - registry_utils.import_usr_dir(FLAGS.t2t_usr_dir) + usr_dir.import_usr_dir(FLAGS.t2t_usr_dir) utils.log_registry() utils.validate_flags() utils.run( diff --git a/tensor2tensor/utils/registry_utils.py b/tensor2tensor/utils/usr_dir.py similarity index 94% rename from tensor2tensor/utils/registry_utils.py rename to tensor2tensor/utils/usr_dir.py index 05d98f2b4..0a2d0d15c 100644 --- a/tensor2tensor/utils/registry_utils.py +++ b/tensor2tensor/utils/usr_dir.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -"""Utilities for the t2t registry.""" +"""Utility to load code from an external user-supplied directory.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function From 4f67b7bb5956b9f0a6198763b8bd09af99ba0f2c Mon Sep 17 00:00:00 2001 From: Lukasz Kaiser <lukaszkaiser@google.com> Date: Tue, 18 Jul 2017 16:56:29 -0700 Subject: [PATCH 0142/4095] Move algorithmic and WMT problems to Problem class, correct summaries. PiperOrigin-RevId: 162424062 --- tensor2tensor/bin/t2t-datagen | 59 +-- tensor2tensor/data_generators/algorithmic.py | 268 ++++++++++--- .../data_generators/algorithmic_test.py | 22 +- .../data_generators/generator_utils.py | 11 +- tensor2tensor/data_generators/problem.py | 5 +- .../data_generators/problem_hparams.py | 116 ------ tensor2tensor/data_generators/text_encoder.py | 53 ++- tensor2tensor/data_generators/wmt.py | 364 +++++++++++++++--- tensor2tensor/models/multimodel.py | 3 +- tensor2tensor/models/transformer.py | 14 +- tensor2tensor/models/transformer_test.py | 1 + tensor2tensor/utils/data_reader.py | 7 +- tensor2tensor/utils/trainer_utils.py | 9 +- 13 files changed, 600 insertions(+), 332 deletions(-) diff --git a/tensor2tensor/bin/t2t-datagen b/tensor2tensor/bin/t2t-datagen index 4408b2aeb..b0fd816a2 100644 --- a/tensor2tensor/bin/t2t-datagen +++ b/tensor2tensor/bin/t2t-datagen @@ -35,7 +35,6 @@ import tempfile import numpy as np -from tensor2tensor.data_generators import algorithmic from tensor2tensor.data_generators import algorithmic_math from tensor2tensor.data_generators import all_problems # pylint: disable=unused-import from tensor2tensor.data_generators import audio @@ -60,6 +59,8 @@ flags.DEFINE_string("tmp_dir", "/tmp/t2t_datagen", "Temporary storage directory.") flags.DEFINE_string("problem", "", "The name of the problem to generate data for.") +flags.DEFINE_string("exclude_problems", "", + "Comma-separates list of problems to exclude.") flags.DEFINE_integer("num_shards", 10, "How many shards to use.") flags.DEFINE_integer("max_cases", 0, "Maximum number of cases to generate (unbounded if 0).") @@ -74,37 +75,6 @@ flags.DEFINE_string("t2t_usr_dir", "", # Mapping from problems that we can generate data for to their generators. # pylint: disable=g-long-lambda _SUPPORTED_PROBLEM_GENERATORS = { - "algorithmic_shift_decimal40": ( - lambda: algorithmic.shift_generator(20, 10, 40, 100000), - lambda: algorithmic.shift_generator(20, 10, 80, 10000)), - "algorithmic_reverse_binary40": ( - lambda: algorithmic.reverse_generator(2, 40, 100000), - lambda: algorithmic.reverse_generator(2, 400, 10000)), - "algorithmic_reverse_decimal40": ( - lambda: algorithmic.reverse_generator(10, 40, 100000), - lambda: algorithmic.reverse_generator(10, 400, 10000)), - "algorithmic_addition_binary40": ( - lambda: algorithmic.addition_generator(2, 40, 100000), - lambda: algorithmic.addition_generator(2, 400, 10000)), - "algorithmic_addition_decimal40": ( - lambda: algorithmic.addition_generator(10, 40, 100000), - lambda: algorithmic.addition_generator(10, 400, 10000)), - "algorithmic_multiplication_binary40": ( - lambda: algorithmic.multiplication_generator(2, 40, 100000), - lambda: algorithmic.multiplication_generator(2, 400, 10000)), - "algorithmic_multiplication_decimal40": ( - lambda: algorithmic.multiplication_generator(10, 40, 100000), - lambda: algorithmic.multiplication_generator(10, 400, 10000)), - "algorithmic_reverse_nlplike_decimal8K": ( - lambda: algorithmic.reverse_generator_nlplike(8000, 70, 100000, - 10, 1.300), - lambda: algorithmic.reverse_generator_nlplike(8000, 70, 10000, - 10, 1.300)), - "algorithmic_reverse_nlplike_decimal32K": ( - lambda: algorithmic.reverse_generator_nlplike(32000, 70, 100000, - 10, 1.050), - lambda: algorithmic.reverse_generator_nlplike(32000, 70, 10000, - 10, 1.050)), "algorithmic_algebra_inverse": ( lambda: algorithmic_math.algebra_inverse(26, 0, 2, 100000), lambda: algorithmic_math.algebra_inverse(26, 3, 3, 10000)), @@ -124,29 +94,9 @@ _SUPPORTED_PROBLEM_GENERATORS = { 2**14, 2**9), lambda: wsj_parsing.parsing_token_generator(FLAGS.tmp_dir, False, 2**14, 2**9)), - "wmt_enfr_characters": ( - lambda: wmt.enfr_character_generator(FLAGS.tmp_dir, True), - lambda: wmt.enfr_character_generator(FLAGS.tmp_dir, False)), - "wmt_enfr_tokens_8k": ( - lambda: wmt.enfr_wordpiece_token_generator(FLAGS.tmp_dir, True, 2**13), - lambda: wmt.enfr_wordpiece_token_generator(FLAGS.tmp_dir, False, 2**13) - ), - "wmt_enfr_tokens_32k": ( - lambda: wmt.enfr_wordpiece_token_generator(FLAGS.tmp_dir, True, 2**15), - lambda: wmt.enfr_wordpiece_token_generator(FLAGS.tmp_dir, False, 2**15) - ), - "wmt_ende_characters": ( - lambda: wmt.ende_character_generator(FLAGS.tmp_dir, True), - lambda: wmt.ende_character_generator(FLAGS.tmp_dir, False)), "wmt_ende_bpe32k": ( lambda: wmt.ende_bpe_token_generator(FLAGS.tmp_dir, True), lambda: wmt.ende_bpe_token_generator(FLAGS.tmp_dir, False)), - "wmt_zhen_tokens_32k": ( - lambda: wmt.zhen_wordpiece_token_generator(FLAGS.tmp_dir, True, - 2**15, 2**15), - lambda: wmt.zhen_wordpiece_token_generator(FLAGS.tmp_dir, False, - 2**15, 2**15) - ), "lm1b_32k": ( lambda: lm1b.generator(FLAGS.tmp_dir, True), lambda: lm1b.generator(FLAGS.tmp_dir, False) @@ -285,6 +235,9 @@ def main(_): # Calculate the list of problems to generate. problems = sorted( list(_SUPPORTED_PROBLEM_GENERATORS) + registry.list_problems()) + for exclude in FLAGS.exclude_problems.split(","): + if exclude: + problems = [p for p in problems if exclude not in p] if FLAGS.problem and FLAGS.problem[-1] == "*": problems = [p for p in problems if p.startswith(FLAGS.problem[:-1])] elif FLAGS.problem: @@ -364,7 +317,7 @@ def generate_data_for_problem(problem): def generate_data_for_registered_problem(problem_name): problem = registry.problem(problem_name) - problem.generate_data(FLAGS.data_dir, FLAGS.tmp_dir) + problem.generate_data(FLAGS.data_dir, FLAGS.tmp_dir, FLAGS.num_shards) if __name__ == "__main__": diff --git a/tensor2tensor/data_generators/algorithmic.py b/tensor2tensor/data_generators/algorithmic.py index 6ec1f28a0..2169e1910 100644 --- a/tensor2tensor/data_generators/algorithmic.py +++ b/tensor2tensor/data_generators/algorithmic.py @@ -25,48 +25,86 @@ from tensor2tensor.data_generators import generator_utils as utils from tensor2tensor.data_generators import problem +from tensor2tensor.data_generators import text_encoder from tensor2tensor.utils import registry -@registry.register_problem -class AlgorithmicIdentityBinary40(problem.Problem): - """Problem spec for algorithmic binary identity task.""" +class AlgorithmicProblem(problem.Problem): + """Base class for algorithmic problems.""" @property def num_symbols(self): - return 2 + raise NotImplementedError() + + @property + def train_generator(self): + """Generator; takes 3 args: nbr_symbols, max_length, nbr_cases.""" + raise NotImplementedError() + + @property + def dev_generator(self): + return self.train_generator + + @property + def train_length(self): + return 40 + + @property + def dev_length(self): + return 400 + + @property + def train_size(self): + return 100000 + + @property + def dev_size(self): + return 10000 + + @property + def num_shards(self): + return 10 + + def generate_data(self, data_dir, _, num_shards=None): + if num_shards is None: + num_shards = self.num_shards + + def generator_eos(generator): + """Shift by NUM_RESERVED_IDS and append EOS token.""" + for case in generator: + new_case = {} + for feature in case: + new_case[feature] = [i + text_encoder.NUM_RESERVED_TOKENS + for i in case[feature]] + [text_encoder.EOS_ID] + yield new_case + + train_generator_eos = lambda: generator_eos( # pylint: disable=g-long-lambda + self.train_generator(self.num_symbols, + self.train_length, self.train_size)) + dev_generator_eos = lambda: generator_eos( # pylint: disable=g-long-lambda + self.dev_generator(self.num_symbols, self.dev_length, self.dev_size)) - def generate_data(self, data_dir, _): utils.generate_dataset_and_shuffle( - identity_generator(self.num_symbols, 40, 100000), - self.training_filepaths(data_dir, 100, shuffled=True), - identity_generator(self.num_symbols, 400, 10000), + train_generator_eos(), + self.training_filepaths(data_dir, num_shards, shuffled=True), + dev_generator_eos(), self.dev_filepaths(data_dir, 1, shuffled=True), shuffle=False) def hparams(self, defaults, unused_model_hparams): p = defaults - vocab_size = self.num_symbols + self._encoders["inputs"].num_reserved_ids + vocab_size = self.num_symbols + text_encoder.NUM_RESERVED_TOKENS p.input_modality = {"inputs": (registry.Modalities.SYMBOL, vocab_size)} p.target_modality = (registry.Modalities.SYMBOL, vocab_size) p.input_space_id = problem.SpaceID.DIGIT_0 p.target_space_id = problem.SpaceID.DIGIT_1 -@registry.register_problem -class AlgorithmicIdentityDecimal40(AlgorithmicIdentityBinary40): - """Problem spec for algorithmic decimal identity task.""" - - @property - def num_symbols(self): - return 10 - - def identity_generator(nbr_symbols, max_length, nbr_cases): """Generator for the identity (copy) task on sequences of symbols. The length of the sequence is drawn uniformly at random from [1, max_length] - and then symbols are drawn uniformly at random from [2, nbr_symbols + 2) until + and then symbols are drawn uniformly at random from [0, nbr_symbols) until nbr_cases sequences have been produced. Args: @@ -80,15 +118,37 @@ def identity_generator(nbr_symbols, max_length, nbr_cases): """ for _ in xrange(nbr_cases): l = np.random.randint(max_length) + 1 - inputs = [np.random.randint(nbr_symbols) + 2 for _ in xrange(l)] - yield {"inputs": inputs, "targets": inputs + [1]} # [1] for EOS + inputs = [np.random.randint(nbr_symbols) for _ in xrange(l)] + yield {"inputs": inputs, "targets": inputs} + + +@registry.register_problem +class AlgorithmicIdentityBinary40(AlgorithmicProblem): + """Problem spec for algorithmic binary identity task.""" + + @property + def num_symbols(self): + return 2 + + @property + def train_generator(self): + return identity_generator + + +@registry.register_problem +class AlgorithmicIdentityDecimal40(AlgorithmicIdentityBinary40): + """Problem spec for algorithmic decimal identity task.""" + + @property + def num_symbols(self): + return 10 def shift_generator(nbr_symbols, shift, max_length, nbr_cases): """Generator for the shift task on sequences of symbols. The length of the sequence is drawn uniformly at random from [1, max_length] - and then symbols are drawn uniformly at random from [2, nbr_symbols - shift] + and then symbols are drawn uniformly at random from [0, nbr_symbols - shift] until nbr_cases sequences have been produced (output[i] = input[i] + shift). Args: @@ -103,18 +163,35 @@ def shift_generator(nbr_symbols, shift, max_length, nbr_cases): """ for _ in xrange(nbr_cases): l = np.random.randint(max_length) + 1 - inputs = [np.random.randint(nbr_symbols - shift) + 2 for _ in xrange(l)] + inputs = [np.random.randint(nbr_symbols - shift) for _ in xrange(l)] yield { "inputs": inputs, - "targets": [i + shift for i in inputs] + [1] - } # [1] for EOS + "targets": [i + shift for i in inputs] + } + + +@registry.register_problem +class AlgorithmicShiftDecimal40(AlgorithmicProblem): + """Problem spec for algorithmic decimal shift task.""" + + @property + def num_symbols(self): + return 20 + + @property + def train_generator(self): + return lambda nbr_sym, l, size: shift_generator(nbr_sym, 10, l, size) + + @property + def dev_length(self): + return 80 def reverse_generator(nbr_symbols, max_length, nbr_cases): """Generator for the reversing task on sequences of symbols. The length of the sequence is drawn uniformly at random from [1, max_length] - and then symbols are drawn uniformly at random from [2, nbr_symbols] until + and then symbols are drawn uniformly at random from [0, nbr_symbols) until nbr_cases sequences have been produced. Args: @@ -128,11 +205,33 @@ def reverse_generator(nbr_symbols, max_length, nbr_cases): """ for _ in xrange(nbr_cases): l = np.random.randint(max_length) + 1 - inputs = [np.random.randint(nbr_symbols) + 2 for _ in xrange(l)] + inputs = [np.random.randint(nbr_symbols) for _ in xrange(l)] yield { "inputs": inputs, - "targets": list(reversed(inputs)) + [1] - } # [1] for EOS + "targets": list(reversed(inputs)) + } + + +@registry.register_problem +class AlgorithmicReverseBinary40(AlgorithmicProblem): + """Problem spec for algorithmic binary reversing task.""" + + @property + def num_symbols(self): + return 2 + + @property + def train_generator(self): + return reverse_generator + + +@registry.register_problem +class AlgorithmicReverseDecimal40(AlgorithmicReverseBinary40): + """Problem spec for algorithmic decimal reversing task.""" + + @property + def num_symbols(self): + return 10 def zipf_distribution(nbr_symbols, alpha): @@ -166,11 +265,8 @@ def zipf_random_sample(distr_map, sample_len): """ u = np.random.random(sample_len) # Random produces values in range [0.0,1.0); even if it is almost - # improbable(but possible) that it can generate a clear 0.000..0, - # we have made a sanity check to overcome this issue. On the other hand, - # t+1 is enough from saving us to generate PAD(0) and EOS(1) which are - # reservated symbols. - return [t + 1 if t > 0 else t + 2 for t in np.searchsorted(distr_map, u)] + # improbable(but possible) that it can generate a clear 0.000..0. + return list(np.searchsorted(distr_map, u)) def reverse_generator_nlplike(nbr_symbols, @@ -182,7 +278,7 @@ def reverse_generator_nlplike(nbr_symbols, The length of the sequence is drawn from a Gaussian(Normal) distribution at random from [1, max_length] and with std deviation of 1%, - then symbols are drawn from Zipf's law at random from [2, nbr_symbols] until + then symbols are drawn from Zipf's law at random from [0, nbr_symbols) until nbr_cases sequences have been produced. Args: @@ -206,8 +302,44 @@ def reverse_generator_nlplike(nbr_symbols, inputs = zipf_random_sample(distr_map, l) yield { "inputs": inputs, - "targets": list(reversed(inputs)) + [1] - } # [1] for EOS + "targets": list(reversed(inputs)) + } + + +@registry.register_problem +class AlgorithmicReverseNlplike8K(AlgorithmicProblem): + """Problem spec for algorithmic nlp-like reversing task.""" + + @property + def num_symbols(self): + return 8000 + + @property + def train_generator(self): + return lambda nbr_sym, length, size: reverse_generator_nlplike( # pylint: disable=g-long-lambda + nbr_sym, length, size, 10, 1.300) + + @property + def train_length(self): + return 70 + + @property + def dev_length(self): + return 70 + + +@registry.register_problem +class AlgorithmicReverseNlplike32K(AlgorithmicReverseNlplike8K): + """Problem spec for algorithmic nlp-like reversing task, 32K vocab.""" + + @property + def num_symbols(self): + return 32000 + + @property + def train_generator(self): + return lambda nbr_sym, length, size: reverse_generator_nlplike( # pylint: disable=g-long-lambda + nbr_sym, length, size, 10, 1.050) def lower_endian_to_number(l, base): @@ -235,7 +367,7 @@ def addition_generator(base, max_length, nbr_cases): The length of each number is drawn uniformly at random from [1, max_length/2] and then digits are drawn uniformly at random. The numbers are added and - separated by [base+1] in the input. Stops at nbr_cases. + separated by [base] in the input. Stops at nbr_cases. Args: base: in which base are the numbers. @@ -257,10 +389,31 @@ def addition_generator(base, max_length, nbr_cases): n1 = random_number_lower_endian(l1, base) n2 = random_number_lower_endian(l2, base) result = lower_endian_to_number(n1, base) + lower_endian_to_number(n2, base) - # We shift digits by 1 on input and output to leave 0 for padding. - inputs = [i + 2 for i in n1] + [base + 2] + [i + 2 for i in n2] - targets = [i + 2 for i in number_to_lower_endian(result, base)] - yield {"inputs": inputs, "targets": targets + [1]} # [1] for EOS + inputs = n1 + [base] + n2 + targets = number_to_lower_endian(result, base) + yield {"inputs": inputs, "targets": targets} + + +@registry.register_problem +class AlgorithmicAdditionBinary40(AlgorithmicProblem): + """Problem spec for algorithmic binary addition task.""" + + @property + def num_symbols(self): + return 2 + + @property + def train_generator(self): + return addition_generator + + +@registry.register_problem +class AlgorithmicAdditionDecimal40(AlgorithmicAdditionBinary40): + """Problem spec for algorithmic decimal addition task.""" + + @property + def num_symbols(self): + return 10 def multiplication_generator(base, max_length, nbr_cases): @@ -268,7 +421,7 @@ def multiplication_generator(base, max_length, nbr_cases): The length of each number is drawn uniformly at random from [1, max_length/2] and then digits are drawn uniformly at random. The numbers are multiplied - and separated by [base+1] in the input. Stops at nbr_cases. + and separated by [base] in the input. Stops at nbr_cases. Args: base: in which base are the numbers. @@ -291,7 +444,28 @@ def multiplication_generator(base, max_length, nbr_cases): n1 = random_number_lower_endian(l1, base) n2 = random_number_lower_endian(l2, base) result = lower_endian_to_number(n1, base) * lower_endian_to_number(n2, base) - # We shift digits by 1 on input and output to leave 0 for padding. - inputs = [i + 2 for i in n1] + [base + 2] + [i + 2 for i in n2] - targets = [i + 2 for i in number_to_lower_endian(result, base)] - yield {"inputs": inputs, "targets": targets + [1]} # [1] for EOS + inputs = n1 + [base] + n2 + targets = number_to_lower_endian(result, base) + yield {"inputs": inputs, "targets": targets} + + +@registry.register_problem +class AlgorithmicMultiplicationBinary40(AlgorithmicProblem): + """Problem spec for algorithmic binary multiplication task.""" + + @property + def num_symbols(self): + return 2 + + @property + def train_generator(self): + return multiplication_generator + + +@registry.register_problem +class AlgorithmicMultiplicationDecimal40(AlgorithmicMultiplicationBinary40): + """Problem spec for algorithmic decimal multiplication task.""" + + @property + def num_symbols(self): + return 10 diff --git a/tensor2tensor/data_generators/algorithmic_test.py b/tensor2tensor/data_generators/algorithmic_test.py index 9961e6173..fb8ff6719 100644 --- a/tensor2tensor/data_generators/algorithmic_test.py +++ b/tensor2tensor/data_generators/algorithmic_test.py @@ -31,14 +31,14 @@ def testIdentityGenerator(self): counter = 0 for d in algorithmic.identity_generator(3, 8, 10): counter += 1 - self.assertEqual(d["inputs"] + [1], d["targets"]) + self.assertEqual(d["inputs"], d["targets"]) self.assertEqual(counter, 10) def testReverseGenerator(self): counter = 0 for d in algorithmic.reverse_generator(3, 8, 10): counter += 1 - self.assertEqual(list(reversed(d["inputs"])) + [1], d["targets"]) + self.assertEqual(list(reversed(d["inputs"])), d["targets"]) self.assertEqual(counter, 10) def testZipfDistribution(self): @@ -53,7 +53,7 @@ def testReverseGeneratorNlpLike(self): counter = 0 for d in algorithmic.reverse_generator_nlplike(3, 8, 10): counter += 1 - self.assertEqual(list(reversed(d["inputs"])) + [1], d["targets"]) + self.assertEqual(list(reversed(d["inputs"])), d["targets"]) self.assertEqual(counter, 10) def testLowerEndianToNumber(self): @@ -78,20 +78,20 @@ def testAdditionGenerator(self): counter = 0 for d in algorithmic.addition_generator(4, 8, 10): counter += 1 - self.assertEqual(d["inputs"].count(6), 1) - self.assertEqual(d["inputs"].count(0), 0) - self.assertEqual(d["targets"].count(6), 0) - self.assertEqual(d["targets"].count(0), 0) + self.assertEqual(d["inputs"].count(4), 1) + self.assertEqual(d["inputs"].count(5), 0) + self.assertEqual(d["targets"].count(4), 0) + self.assertEqual(d["targets"].count(5), 0) self.assertEqual(counter, 10) def testMultiplicationGenerator(self): counter = 0 for d in algorithmic.multiplication_generator(4, 8, 10): counter += 1 - self.assertEqual(d["inputs"].count(6), 1) - self.assertEqual(d["inputs"].count(0), 0) - self.assertEqual(d["targets"].count(6), 0) - self.assertEqual(d["targets"].count(0), 0) + self.assertEqual(d["inputs"].count(4), 1) + self.assertEqual(d["inputs"].count(5), 0) + self.assertEqual(d["targets"].count(4), 0) + self.assertEqual(d["targets"].count(5), 0) self.assertEqual(counter, 10) diff --git a/tensor2tensor/data_generators/generator_utils.py b/tensor2tensor/data_generators/generator_utils.py index 890f92c2a..b34a87138 100644 --- a/tensor2tensor/data_generators/generator_utils.py +++ b/tensor2tensor/data_generators/generator_utils.py @@ -244,6 +244,10 @@ def gunzip_file(gz_path, new_path): "http://www.statmt.org/wmt13/training-parallel-un.tgz", ["un/undoc.2000.fr-en.en", "un/undoc.2000.fr-en.fr"] ], + [ + "https://github.com/stefan-it/nmt-mk-en/raw/master/data/setimes.mk-en.train.tgz", # pylint: disable=line-too-long + ["train.mk", "train.en"] + ], ] @@ -324,6 +328,7 @@ def get_or_generate_tabbed_vocab(tmp_dir, source_filename, return vocab # Use Tokenizer to count the word occurrences. + token_counts = defaultdict(int) filepath = os.path.join(tmp_dir, source_filename) with tf.gfile.GFile(filepath, mode="r") as source_file: for line in source_file: @@ -331,11 +336,11 @@ def get_or_generate_tabbed_vocab(tmp_dir, source_filename, if line and "\t" in line: parts = line.split("\t", maxsplit=1) part = parts[index].strip() - _ = tokenizer.encode(text_encoder.native_to_unicode(part)) + for tok in tokenizer.encode(text_encoder.native_to_unicode(part)): + token_counts[tok] += 1 vocab = text_encoder.SubwordTextEncoder.build_to_target_size( - vocab_size, tokenizer.token_counts, 1, - min(1e3, vocab_size + text_encoder.NUM_RESERVED_TOKENS)) + vocab_size, token_counts, 1, 1e3) vocab.store_to_file(vocab_filepath) return vocab diff --git a/tensor2tensor/data_generators/problem.py b/tensor2tensor/data_generators/problem.py index 28f4dcb1b..1182ed7d1 100644 --- a/tensor2tensor/data_generators/problem.py +++ b/tensor2tensor/data_generators/problem.py @@ -67,6 +67,8 @@ class SpaceID(object): ICE_TOK = 18 # Icelandic parse tokens ICE_PARSE_TOK = 19 + # Macedonian tokens + MK_TOK = 20 class Problem(object): @@ -90,6 +92,7 @@ class Problem(object): get sharded filenames. If shuffled=False, the filenames will contain an "unshuffled" suffix; you should then shuffle the data shard-by-shard with generator_utils.shuffle_dataset. + - Allows to specify the number of shards, optionally (can be omitted). - Subclasses must override * dataset_filename() - Base filename for problem. @@ -111,7 +114,7 @@ class Problem(object): # BEGIN SUBCLASS INTERFACE # ============================================================================ - def generate_data(self, data_dir, tmp_dir): + def generate_data(self, data_dir, tmp_dir, num_shards=None): raise NotImplementedError() def hparams(self, defaults, model_hparams): diff --git a/tensor2tensor/data_generators/problem_hparams.py b/tensor2tensor/data_generators/problem_hparams.py index 2890ec2a6..5922ab59a 100644 --- a/tensor2tensor/data_generators/problem_hparams.py +++ b/tensor2tensor/data_generators/problem_hparams.py @@ -217,20 +217,6 @@ def test_problem_hparams(unused_model_hparams, input_vocab_size, return p -def algorithmic(vocab_size, unused_model_hparams): - """Default parameters for algorithmic tasks.""" - p = default_problem_hparams() - p.input_modality = {"inputs": (registry.Modalities.SYMBOL, vocab_size)} - p.target_modality = (registry.Modalities.SYMBOL, vocab_size) - p.vocabulary = { - "inputs": text_encoder.TextEncoder(), - "targets": text_encoder.TextEncoder(), - } - p.input_space_id = 10 - p.target_space_id = 11 - return p - - def audio_timit_characters(unused_model_hparams): """English audio transcription benchmark.""" p = default_problem_hparams() @@ -377,50 +363,6 @@ def lmptb_10k(model_hparams): return p -def wmt_enfr_characters(unused_model_hparams): - """English to French translation benchmark.""" - p = default_problem_hparams() - p.input_modality = {"inputs": (registry.Modalities.SYMBOL, 256)} - p.target_modality = (registry.Modalities.SYMBOL, 256) - p.vocabulary = { - "inputs": text_encoder.ByteTextEncoder(), - "targets": text_encoder.ByteTextEncoder(), - } - p.loss_multiplier = 2.0 - p.input_space_id = 2 - p.target_space_id = 5 - return p - - -def wmt_enfr_tokens(model_hparams, wrong_vocab_size): - """English to French translation benchmark. - - Args: - model_hparams: a tf.contrib.training.HParams - wrong_vocab_size: a number used in the filename indicating the approximate - vocabulary size. This is not to be confused with the actual vocabulary - size. - Returns: - a tf.contrib.training.HParams - """ - p = default_problem_hparams() - # This vocab file must be present within the data directory. - vocab_filename = os.path.join(model_hparams.data_dir, - "tokens.vocab.%d" % wrong_vocab_size) - subtokenizer = text_encoder.SubwordTextEncoder(vocab_filename) - p.input_modality = { - "inputs": (registry.Modalities.SYMBOL, subtokenizer.vocab_size) - } - p.target_modality = (registry.Modalities.SYMBOL, subtokenizer.vocab_size) - p.vocabulary = { - "inputs": subtokenizer, - "targets": subtokenizer, - } - p.input_space_id = 3 - p.target_space_id = 6 - return p - - def wmt_ende_bpe32k(model_hparams): """English to German translation benchmark.""" p = default_problem_hparams() @@ -440,47 +382,6 @@ def wmt_ende_bpe32k(model_hparams): return p -def wmt_ende_characters(unused_model_hparams): - """English to German translation benchmark.""" - p = default_problem_hparams() - p.input_modality = {"inputs": (registry.Modalities.SYMBOL, 256)} - p.target_modality = (registry.Modalities.SYMBOL, 256) - p.vocabulary = { - "inputs": text_encoder.ByteTextEncoder(), - "targets": text_encoder.ByteTextEncoder(), - } - p.loss_multiplier = 2.0 - p.input_space_id = 2 - p.target_space_id = 7 - return p - - -def wmt_zhen_tokens(model_hparams, wrong_vocab_size): - """Chinese to English translation benchmark.""" - p = default_problem_hparams() - # This vocab file must be present within the data directory. - if model_hparams.shared_embedding_and_softmax_weights == 1: - model_hparams.shared_embedding_and_softmax_weights = 0 - source_vocab_filename = os.path.join(model_hparams.data_dir, - "tokens.vocab.zh.%d" % wrong_vocab_size) - target_vocab_filename = os.path.join(model_hparams.data_dir, - "tokens.vocab.en.%d" % wrong_vocab_size) - source_token = text_encoder.SubwordTextEncoder(source_vocab_filename) - target_token = text_encoder.SubwordTextEncoder(target_vocab_filename) - p.input_modality = { - "inputs": (registry.Modalities.SYMBOL, source_token.vocab_size) - } - p.target_modality = (registry.Modalities.SYMBOL, target_token.vocab_size) - p.vocabulary = { - "inputs": source_token, - "targets": target_token, - } - p.loss_multiplier = 1.4 - p.input_space_id = 16 - p.target_space_id = 4 - return p - - def wmt_parsing_characters(model_hparams): """English to parse tree translation benchmark.""" del model_hparams # Unused. @@ -698,15 +599,6 @@ def img2img_imagenet(unused_model_hparams): # Dictionary of named hyperparameter settings for various problems. # This is only accessed through the problem_hparams function below. PROBLEM_HPARAMS_MAP = { - "algorithmic_addition_binary40": lambda p: algorithmic(4, p), - "algorithmic_addition_decimal40": lambda p: algorithmic(12, p), - "algorithmic_multiplication_binary40": lambda p: algorithmic(4, p), - "algorithmic_multiplication_decimal40": lambda p: algorithmic(12, p), - "algorithmic_reverse_binary40": lambda p: algorithmic(4, p), - "algorithmic_reverse_decimal40": lambda p: algorithmic(12, p), - "algorithmic_reverse_nlplike_decimal8K": lambda p: algorithmic(8002, p), - "algorithmic_reverse_nlplike_decimal32K": lambda p: algorithmic(32002, p), - "algorithmic_shift_decimal40": lambda p: algorithmic(22, p), "audio_timit_characters_tune": audio_timit_characters, "audio_timit_characters_test": audio_timit_characters, "audio_timit_tokens_8k_tune": lambda p: audio_timit_tokens(p, 2**13), @@ -723,15 +615,7 @@ def img2img_imagenet(unused_model_hparams): "wmt_parsing_tokens_8k": lambda p: wmt_parsing_tokens(p, 2**13), "wsj_parsing_tokens_16k": lambda p: wsj_parsing_tokens( # pylint: disable=g-long-lambda p, "wsj", 2**14, 2**9), - "wmt_enfr_characters": wmt_enfr_characters, - "wmt_enfr_tokens_8k": lambda p: wmt_enfr_tokens(p, 2**13), - "wmt_enfr_tokens_32k": lambda p: wmt_enfr_tokens(p, 2**15), - "wmt_enfr_tokens_32k_shuffled": lambda p: wmt_enfr_tokens(p, 2**15), - "wmt_enfr_tokens_32k_combined": lambda p: wmt_enfr_tokens(p, 2**15), - "wmt_enfr_tokens_128k": lambda p: wmt_enfr_tokens(p, 2**17), - "wmt_ende_characters": wmt_ende_characters, "wmt_ende_bpe32k": wmt_ende_bpe32k, - "wmt_zhen_tokens_32k": lambda p: wmt_zhen_tokens(p, 2**15), "image_cifar10_tune": image_cifar10, "image_cifar10_test": image_cifar10, "image_mnist_tune": image_mnist, diff --git a/tensor2tensor/data_generators/text_encoder.py b/tensor2tensor/data_generators/text_encoder.py index 4a5a784c2..e0ac1901e 100644 --- a/tensor2tensor/data_generators/text_encoder.py +++ b/tensor2tensor/data_generators/text_encoder.py @@ -24,6 +24,7 @@ from __future__ import print_function from collections import defaultdict +import re # Dependency imports @@ -41,8 +42,8 @@ EOS = "<EOS>" RESERVED_TOKENS = [PAD, EOS] NUM_RESERVED_TOKENS = len(RESERVED_TOKENS) -PAD_TOKEN = RESERVED_TOKENS.index(PAD) # Normally 0 -EOS_TOKEN = RESERVED_TOKENS.index(EOS) # Normally 1 +PAD_ID = RESERVED_TOKENS.index(PAD) # Normally 0 +EOS_ID = RESERVED_TOKENS.index(EOS) # Normally 1 if PY2: RESERVED_TOKENS_BYTES = RESERVED_TOKENS @@ -50,6 +51,13 @@ RESERVED_TOKENS_BYTES = [bytes(PAD, "ascii"), bytes(EOS, "ascii")] +# Regular expression for unescaping token strings. +# '\u' is converted to '_' +# '\\' is converted to '\' +# '\213;' is converted to unichr(213) +_UNESCAPE_REGEX = re.compile(u"|".join([r"\\u", r"\\\\", r"\\([0-9]+);"])) + + def native_to_unicode_py2(s): """Python 2: transform native string to Unicode.""" if isinstance(s, unicode): @@ -225,6 +233,7 @@ class SubwordTextEncoder(TextEncoder): def __init__(self, filename=None): """Initialize and read from a file, if provided.""" + self._alphabet = set() if filename is not None: self._load_from_file(filename) super(SubwordTextEncoder, self).__init__(num_reserved_ids=None) @@ -511,32 +520,14 @@ def _unescape_token(self, escaped_token): Returns: token: a unicode string """ - ret = u"" - escaped_token = escaped_token[:-1] - pos = 0 - while pos < len(escaped_token): - c = escaped_token[pos] - if c == "\\": - pos += 1 - if pos >= len(escaped_token): - break - c = escaped_token[pos] - if c == u"u": - ret += u"_" - pos += 1 - elif c == "\\": - ret += u"\\" - pos += 1 - else: - semicolon_pos = escaped_token.find(u";", pos) - if semicolon_pos == -1: - continue - try: - ret += unichr(int(escaped_token[pos:semicolon_pos])) - pos = semicolon_pos + 1 - except (ValueError, OverflowError) as _: - pass - else: - ret += c - pos += 1 - return ret + def match(m): + if m.group(1) is not None: + # Convert '\213;' to unichr(213) + try: + return unichr(int(m.group(1))) + except (ValueError, OverflowError) as _: + return "" + # Convert '\u' to '_' and '\\' to '\' + return u"_" if m.group(0) == u"\\u" else u"\\" + # Cut off the trailing underscore and apply the regex substitution + return self._UNESCAPE_REGEX.sub(match, escaped_token[:-1]) diff --git a/tensor2tensor/data_generators/wmt.py b/tensor2tensor/data_generators/wmt.py index 8edab8ba2..2e1f1e8af 100644 --- a/tensor2tensor/data_generators/wmt.py +++ b/tensor2tensor/data_generators/wmt.py @@ -38,52 +38,97 @@ FLAGS = tf.flags.FLAGS -@registry.register_problem("wmt_ende_tokens_8k") -class WMTEnDeTokens8k(problem.Problem): - """Problem spec for WMT En-De translation.""" +# End-of-sentence marker. +EOS = text_encoder.EOS_ID + + +def _default_token_feature_encoders(data_dir, target_vocab_size): + vocab_filename = os.path.join(data_dir, "tokens.vocab.%d" % target_vocab_size) + subtokenizer = text_encoder.SubwordTextEncoder(vocab_filename) + return { + "inputs": subtokenizer, + "targets": subtokenizer, + } + + +def _default_character_feature_encoders(): + return { + "inputs": text_encoder.ByteTextEncoder(), + "targets": text_encoder.ByteTextEncoder(), + } + + +class WMTProblem(problem.Problem): + """Base class for WMT problems.""" @property - def target_vocab_size(self): - return 2**13 # 8192 + def is_character_level(self): + return False - def feature_encoders(self, data_dir): - return _default_wmt_feature_encoders(data_dir, self.target_vocab_size) + @property + def targeted_vocab_size(self): + raise NotImplementedError() # Not needed if self.is_character_level. - def generate_data(self, data_dir, tmp_dir): - generator_utils.generate_dataset_and_shuffle( - ende_wordpiece_token_generator(tmp_dir, True, self.target_vocab_size), - self.training_filepaths(data_dir, 100, shuffled=False), - ende_wordpiece_token_generator(tmp_dir, False, self.target_vocab_size), - self.dev_filepaths(data_dir, 1, shuffled=False)) + @property + def train_generator(self): + """Generator; takes tmp_dir, is_training, possibly targeted_vocab_size.""" + raise NotImplementedError() - def hparams(self, defaults, unused_model_hparams): - p = defaults - vocab_size = self._encoders["inputs"].vocab_size - p.input_modality = {"inputs": (registry.Modalities.SYMBOL, vocab_size)} - p.target_modality = (registry.Modalities.SYMBOL, vocab_size) - p.input_space_id = problem.SpaceID.EN_TOK - p.target_space_id = problem.SpaceID.DE_TOK + @property + def dev_generator(self): + return self.train_generator + @property + def input_space_id(self): + raise NotImplementedError() -@registry.register_problem("wmt_ende_tokens_32k") -class WMTEnDeTokens32k(WMTEnDeTokens8k): + @property + def target_space_id(self): + raise NotImplementedError() @property - def target_vocab_size(self): - return 2**15 # 32768 + def num_shards(self): + return 100 + + def generate_data(self, data_dir, tmp_dir, num_shards=None): + if num_shards is None: + num_shards = self.num_shards + if self.is_character_level: + generator_utils.generate_dataset_and_shuffle( + self.train_generator(tmp_dir, True), + self.training_filepaths(data_dir, num_shards, shuffled=False), + self.dev_generator(tmp_dir, False), + self.dev_filepaths(data_dir, 1, shuffled=False)) + else: + generator_utils.generate_dataset_and_shuffle( + self.train_generator(tmp_dir, True, self.targeted_vocab_size), + self.training_filepaths(data_dir, num_shards, shuffled=False), + self.dev_generator(tmp_dir, False, self.targeted_vocab_size), + self.dev_filepaths(data_dir, 1, shuffled=False)) + def feature_encoders(self, data_dir): + if self.is_character_level: + return _default_character_feature_encoders() + return _default_token_feature_encoders(data_dir, self.targeted_vocab_size) -def _default_wmt_feature_encoders(data_dir, target_vocab_size): - vocab_filename = os.path.join(data_dir, "tokens.vocab.%d" % target_vocab_size) - subtokenizer = text_encoder.SubwordTextEncoder(vocab_filename) - return { - "inputs": subtokenizer, - "targets": subtokenizer, - } + def hparams(self, defaults, unused_model_hparams): + p = defaults + if self.is_character_level: + source_vocab_size = 256 + target_vocab_size = 256 + else: + source_vocab_size = self._encoders["inputs"].vocab_size + target_vocab_size = self._encoders["targets"].vocab_size + p.input_modality = {"inputs": (registry.Modalities.SYMBOL, + source_vocab_size)} + p.target_modality = (registry.Modalities.SYMBOL, target_vocab_size) + p.input_space_id = self.input_space_id + p.target_space_id = self.target_space_id + if self.is_character_level: + p.loss_multiplier = 2.0 -# End-of-sentence marker. -EOS = text_encoder.EOS_TOKEN +# Generic generators used later for multiple problems. def character_generator(source_path, target_path, character_vocab, eos=None): @@ -208,29 +253,7 @@ def bi_vocabs_token_generator(source_path, source, target = source_file.readline(), target_file.readline() -def _get_wmt_ende_dataset(directory, filename): - """Extract the WMT en-de corpus `filename` to directory unless it's there.""" - train_path = os.path.join(directory, filename) - if not (tf.gfile.Exists(train_path + ".de") and - tf.gfile.Exists(train_path + ".en")): - # We expect that this file has been downloaded from: - # https://drive.google.com/open?id=0B_bZck-ksdkpM25jRUN2X2UxMm8 and placed - # in `directory`. - corpus_file = os.path.join(directory, FLAGS.ende_bpe_path) - with tarfile.open(corpus_file, "r:gz") as corpus_tar: - corpus_tar.extractall(directory) - return train_path - - -def ende_bpe_token_generator(tmp_dir, train): - """Instance of token generator for the WMT en->de task, training set.""" - dataset_path = ("train.tok.clean.bpe.32000" - if train else "newstest2013.tok.bpe.32000") - train_path = _get_wmt_ende_dataset(tmp_dir, dataset_path) - token_path = os.path.join(tmp_dir, "vocab.bpe.32000") - token_vocab = text_encoder.TokenTextEncoder(vocab_filename=token_path) - return token_generator(train_path + ".en", train_path + ".de", token_vocab, - EOS) +# Data-set URLs. _ENDE_TRAIN_DATASETS = [ @@ -295,6 +318,49 @@ def ende_bpe_token_generator(tmp_dir, train): ("dev/newsdev2017-zhen-src.zh", "dev/newsdev2017-zhen-ref.en") ]] +# For Macedonian-English the SETimes corpus +# from http://nlp.ffzg.hr/resources/corpora/setimes/ is used. +# The original dataset has 207,777 parallel sentences. +# For training the first 205,777 sentences are used. +_MKEN_TRAIN_DATASETS = [[ + "https://github.com/stefan-it/nmt-mk-en/raw/master/data/setimes.mk-en.train.tgz", # pylint: disable=line-too-long + ("train.mk", "train.en") +]] + +# For development 1000 parallel sentences are used. +_MKEN_TEST_DATASETS = [[ + "https://github.com/stefan-it/nmt-mk-en/raw/master/data/setimes.mk-en.dev.tgz", # pylint: disable=line-too-long + ("dev.mk", "dev.en") +]] + + +# Generators. + + +def _get_wmt_ende_dataset(directory, filename): + """Extract the WMT en-de corpus `filename` to directory unless it's there.""" + train_path = os.path.join(directory, filename) + if not (tf.gfile.Exists(train_path + ".de") and + tf.gfile.Exists(train_path + ".en")): + # We expect that this file has been downloaded from: + # https://drive.google.com/open?id=0B_bZck-ksdkpM25jRUN2X2UxMm8 and placed + # in `directory`. + corpus_file = os.path.join(directory, FLAGS.ende_bpe_path) + with tarfile.open(corpus_file, "r:gz") as corpus_tar: + corpus_tar.extractall(directory) + return train_path + + +def ende_bpe_token_generator(tmp_dir, train): + """Instance of token generator for the WMT en->de task, training set.""" + dataset_path = ("train.tok.clean.bpe.32000" + if train else "newstest2013.tok.bpe.32000") + train_path = _get_wmt_ende_dataset(tmp_dir, dataset_path) + token_path = os.path.join(tmp_dir, "vocab.bpe.32000") + token_vocab = text_encoder.TokenTextEncoder(vocab_filename=token_path) + return token_generator(train_path + ".en", train_path + ".de", token_vocab, + EOS) + def _compile_data(tmp_dir, datasets, filename): """Concatenate all `datasets` and save to `filename`.""" @@ -346,6 +412,35 @@ def ende_wordpiece_token_generator(tmp_dir, train, vocab_size): symbolizer_vocab, EOS) +@registry.register_problem("wmt_ende_tokens_8k") +class WMTEnDeTokens8k(WMTProblem): + """Problem spec for WMT En-De translation.""" + + @property + def targeted_vocab_size(self): + return 2**13 # 8192 + + @property + def train_generator(self): + return ende_wordpiece_token_generator + + @property + def input_space_id(self): + return problem.SpaceID.EN_TOK + + @property + def target_space_id(self): + return problem.SpaceID.DE_TOK + + +@registry.register_problem("wmt_ende_tokens_32k") +class WMTEnDeTokens32k(WMTEnDeTokens8k): + + @property + def targeted_vocab_size(self): + return 2**15 # 32768 + + def ende_character_generator(tmp_dir, train): character_vocab = text_encoder.ByteTextEncoder() datasets = _ENDE_TRAIN_DATASETS if train else _ENDE_TEST_DATASETS @@ -355,8 +450,29 @@ def ende_character_generator(tmp_dir, train): character_vocab, EOS) -def zhen_wordpiece_token_generator(tmp_dir, train, source_vocab_size, - target_vocab_size): +@registry.register_problem("wmt_ende_characters") +class WMTEnDeCharacters(WMTProblem): + """Problem spec for WMT En-De translation.""" + + @property + def is_character_level(self): + return True + + @property + def train_generator(self): + return ende_character_generator + + @property + def input_space_id(self): + return problem.SpaceID.EN_CHR + + @property + def target_space_id(self): + return problem.SpaceID.DE_CHR + + +def zhen_wordpiece_token_bigenerator(tmp_dir, train, source_vocab_size, + target_vocab_size): """Wordpiece generator for the WMT'17 zh-en dataset.""" datasets = _ZHEN_TRAIN_DATASETS if train else _ZHEN_TEST_DATASETS source_datasets = [[item[0], [item[1][0]]] for item in datasets] @@ -373,6 +489,53 @@ def zhen_wordpiece_token_generator(tmp_dir, train, source_vocab_size, source_vocab, target_vocab, EOS) +def zhen_wordpiece_token_generator(tmp_dir, train, vocab_size): + return zhen_wordpiece_token_bigenerator(tmp_dir, train, + vocab_size, vocab_size) + + +@registry.register_problem("wmt_zhen_tokens_8k") +class WMTZhEnTokens8k(WMTProblem): + """Problem spec for WMT Zh-En translation.""" + + @property + def targeted_vocab_size(self): + return 2**13 # 8192 + + @property + def train_generator(self): + return zhen_wordpiece_token_generator + + @property + def input_space_id(self): + return problem.SpaceID.ZH_TOK + + @property + def target_space_id(self): + return problem.SpaceID.EN_TOK + + def feature_encoders(self, data_dir): + vocab_size = self.targeted_vocab_size + source_vocab_filename = os.path.join(data_dir, + "tokens.vocab.zh.%d" % vocab_size) + target_vocab_filename = os.path.join(data_dir, + "tokens.vocab.en.%d" % vocab_size) + source_token = text_encoder.SubwordTextEncoder(source_vocab_filename) + target_token = text_encoder.SubwordTextEncoder(target_vocab_filename) + return { + "inputs": source_token, + "targets": target_token, + } + + +@registry.register_problem("wmt_zhen_tokens_32k") +class WMTZhEnTokens32k(WMTZhEnTokens8k): + + @property + def targeted_vocab_size(self): + return 2**15 # 32768 + + def enfr_wordpiece_token_generator(tmp_dir, train, vocab_size): """Instance of token generator for the WMT en->fr task.""" symbolizer_vocab = generator_utils.get_or_generate_vocab( @@ -384,6 +547,35 @@ def enfr_wordpiece_token_generator(tmp_dir, train, vocab_size): symbolizer_vocab, EOS) +@registry.register_problem("wmt_enfr_tokens_8k") +class WMTEnFrTokens8k(WMTProblem): + """Problem spec for WMT En-Fr translation.""" + + @property + def targeted_vocab_size(self): + return 2**13 # 8192 + + @property + def train_generator(self): + return enfr_wordpiece_token_generator + + @property + def input_space_id(self): + return problem.SpaceID.EN_TOK + + @property + def target_space_id(self): + return problem.SpaceID.FR_TOK + + +@registry.register_problem("wmt_enfr_tokens_32k") +class WMTEnFrTokens32k(WMTEnFrTokens8k): + + @property + def targeted_vocab_size(self): + return 2**15 # 32768 + + def enfr_character_generator(tmp_dir, train): """Instance of character generator for the WMT en->fr task.""" character_vocab = text_encoder.ByteTextEncoder() @@ -394,6 +586,62 @@ def enfr_character_generator(tmp_dir, train): character_vocab, EOS) +@registry.register_problem("wmt_enfr_characters") +class WMTEnFrCharacters(WMTProblem): + """Problem spec for WMT En-Fr translation.""" + + @property + def is_character_level(self): + return True + + @property + def train_generator(self): + return enfr_character_generator + + @property + def input_space_id(self): + return problem.SpaceID.EN_CHR + + @property + def target_space_id(self): + return problem.SpaceID.FR_CHR + + +def mken_wordpiece_token_generator(tmp_dir, train, vocab_size): + """Wordpiece generator for the SETimes Mk-En dataset.""" + datasets = _MKEN_TRAIN_DATASETS if train else _MKEN_TEST_DATASETS + source_datasets = [[item[0], [item[1][0]]] for item in datasets] + target_datasets = [[item[0], [item[1][1]]] for item in datasets] + symbolizer_vocab = generator_utils.get_or_generate_vocab( + tmp_dir, "tokens.vocab.%d" % vocab_size, vocab_size, + source_datasets + target_datasets) + tag = "train" if train else "dev" + data_path = _compile_data(tmp_dir, datasets, "setimes_mken_tok_%s" % tag) + return token_generator(data_path + ".lang1", data_path + ".lang2", + symbolizer_vocab, EOS) + + +@registry.register_problem("setimes_mken_tokens_32k") +class SETimesMkEnTokens32k(WMTProblem): + """Problem spec for SETimes Mk-En translation.""" + + @property + def targeted_vocab_size(self): + return 2**15 # 32768 + + @property + def train_generator(self): + return mken_wordpiece_token_generator + + @property + def input_space_id(self): + return problem.SpaceID.MK_TOK + + @property + def target_space_id(self): + return problem.SpaceID.EN_TOK + + def parsing_character_generator(tmp_dir, train): character_vocab = text_encoder.ByteTextEncoder() filename = "parsing_%s" % ("train" if train else "dev") diff --git a/tensor2tensor/models/multimodel.py b/tensor2tensor/models/multimodel.py index 6f12db86d..ee079fa6d 100644 --- a/tensor2tensor/models/multimodel.py +++ b/tensor2tensor/models/multimodel.py @@ -164,7 +164,8 @@ def flatten(inputs): x, hparams.filter_size, hparams.hidden_size, - dropout=hparams.dropout) + dropout=hparams.dropout, + summaries=False) x = dp(residual_fn2, x, y, hparams) x = dp(tf.expand_dims, x, 2) diff --git a/tensor2tensor/models/transformer.py b/tensor2tensor/models/transformer.py index b341d6fe0..0bb01c0f8 100644 --- a/tensor2tensor/models/transformer.py +++ b/tensor2tensor/models/transformer.py @@ -60,8 +60,6 @@ def residual_fn(x, y): return common_layers.layer_norm(x + tf.nn.dropout( y, 1.0 - hparams.residual_dropout)) - # encoder_input = tf.squeeze(encoder_input, 2) - # decoder_input = tf.squeeze(decoder_input, 2) encoder_input = tf.nn.dropout(encoder_input, 1.0 - hparams.residual_dropout) decoder_input = tf.nn.dropout(decoder_input, 1.0 - hparams.residual_dropout) encoder_output = transformer_encoder(encoder_input, residual_fn, @@ -146,7 +144,7 @@ def transformer_encoder(encoder_input, """ x = encoder_input # Summaries don't work in multi-problem setting yet. - summaries = "problems" not in hparams.values() or len(hparams.problems) == 1 + summaries = len(hparams.problems) < 2 with tf.variable_scope(name): for layer in xrange(hparams.num_hidden_layers): with tf.variable_scope("layer_%d" % layer): @@ -192,7 +190,7 @@ def transformer_decoder(decoder_input, """ x = decoder_input # Summaries don't work in multi-problem setting yet. - summaries = "problems" not in hparams.values() or len(hparams.problems) == 1 + summaries = len(hparams.problems) < 2 with tf.variable_scope(name): for layer in xrange(hparams.num_hidden_layers): with tf.variable_scope("layer_%d" % layer): @@ -236,12 +234,15 @@ def transformer_ffn_layer(x, hparams): Returns: a Tensor of shape [batch_size, length, hparams.hidden_size] """ + # Summaries don't work in multi-problem setting yet. + summaries = len(hparams.problems) < 2 if hparams.ffn_layer == "conv_hidden_relu": return common_layers.conv_hidden_relu( x, hparams.filter_size, hparams.hidden_size, - dropout=hparams.relu_dropout) + dropout=hparams.relu_dropout, + summaries=summaries) elif hparams.ffn_layer == "parameter_attention": return common_attention.parameter_attention( x, @@ -259,7 +260,8 @@ def transformer_ffn_layer(x, hparams): kernel_size=(3, 1), second_kernel_size=(31, 1), padding="LEFT", - dropout=hparams.relu_dropout) + dropout=hparams.relu_dropout, + summaries=summaries) else: assert hparams.ffn_layer == "none" return x diff --git a/tensor2tensor/models/transformer_test.py b/tensor2tensor/models/transformer_test.py index ca099c653..997b5d172 100644 --- a/tensor2tensor/models/transformer_test.py +++ b/tensor2tensor/models/transformer_test.py @@ -38,6 +38,7 @@ def _testTransformer(self, net): hparams = transformer.transformer_tiny() p_hparams = problem_hparams.test_problem_hparams(hparams, vocab_size, vocab_size) + hparams.problems = [p_hparams] inputs = -1 + np.random.random_integers( vocab_size, size=(batch_size, input_length, 1, 1)) targets = -1 + np.random.random_integers( diff --git a/tensor2tensor/utils/data_reader.py b/tensor2tensor/utils/data_reader.py index a3e9835ac..cb84b9e3e 100644 --- a/tensor2tensor/utils/data_reader.py +++ b/tensor2tensor/utils/data_reader.py @@ -181,10 +181,13 @@ def input_pipeline(data_file_pattern, capacity, mode): """Input pipeline, returns a dictionary of tensors from queues.""" # Read from image TFRecords if the file has "image" in its name. if data_file_pattern and "image" in data_file_pattern: + label_key = "image/class/label" + if "fsns" in data_file_pattern: + label_key = "image/unpadded_label" data_fields = { "image/encoded": tf.FixedLenFeature((), tf.string), "image/format": tf.FixedLenFeature((), tf.string), - "image/class/label": tf.VarLenFeature(tf.int64) + label_key: tf.VarLenFeature(tf.int64) } data_items_to_decoders = { "inputs": @@ -193,7 +196,7 @@ def input_pipeline(data_file_pattern, capacity, mode): format_key="image/format", channels=1 if "mnist" in data_file_pattern else 3), "targets": - tf.contrib.slim.tfexample_decoder.Tensor("image/class/label"), + tf.contrib.slim.tfexample_decoder.Tensor(label_key), } elif data_file_pattern and "audio" in data_file_pattern: data_type = tf.int64 if "timit" in data_file_pattern else tf.float32 diff --git a/tensor2tensor/utils/trainer_utils.py b/tensor2tensor/utils/trainer_utils.py index 7c486df8d..08faeed2c 100644 --- a/tensor2tensor/utils/trainer_utils.py +++ b/tensor2tensor/utils/trainer_utils.py @@ -585,6 +585,7 @@ def decode_from_dataset(estimator): tf.logging.info("Performing local inference.") infer_problems_data = get_datasets_for_mode(hparams.data_dir, tf.contrib.learn.ModeKeys.INFER) + infer_input_fn = get_input_fn( mode=tf.contrib.learn.ModeKeys.INFER, hparams=hparams, @@ -625,9 +626,11 @@ def log_fn(inputs, # The function predict() returns an iterable over the network's # predictions from the test input. We use it to log inputs and decodes. - for j, result in enumerate(result_iter): - inputs, targets, outputs = (result["inputs"], result["targets"], - result["outputs"]) + inputs_iter = result_iter["inputs"] + targets_iter = result_iter["targets"] + outputs_iter = result_iter["outputs"] + for j, result in enumerate(zip(inputs_iter, targets_iter, outputs_iter)): + inputs, targets, outputs = result if FLAGS.decode_return_beams: output_beams = np.split(outputs, FLAGS.decode_beam_size, axis=0) for k, beam in enumerate(output_beams): From d3502cbea1d8800faafd33bdd5af54e1e106cff9 Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Tue, 18 Jul 2017 16:58:57 -0700 Subject: [PATCH 0143/4095] v1.1.0 PiperOrigin-RevId: 162424323 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 00325cff2..d8fd19cf4 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ setup( name='tensor2tensor', - version='1.0.14', + version='1.1.0', description='Tensor2Tensor', author='Google Inc.', author_email='no-reply@google.com', From f703629068ae3d9f984044e241f2686f481a948a Mon Sep 17 00:00:00 2001 From: T2T Team <no-reply@google.com> Date: Tue, 18 Jul 2017 17:47:09 -0700 Subject: [PATCH 0144/4095] Change summary generation to work better in multi-model case. PiperOrigin-RevId: 162429483 --- tensor2tensor/models/attention_lm.py | 3 --- tensor2tensor/models/attention_lm_moe.py | 1 - tensor2tensor/models/common_attention.py | 17 ++++-------- tensor2tensor/models/common_layers.py | 26 +++++++------------ tensor2tensor/models/long_answer.py | 1 - tensor2tensor/models/multimodel.py | 5 +--- tensor2tensor/models/slicenet.py | 8 +++--- tensor2tensor/models/transformer.py | 15 ++--------- .../models/transformer_alternative.py | 3 --- tensor2tensor/utils/trainer_utils.py | 7 +++++ 10 files changed, 27 insertions(+), 59 deletions(-) diff --git a/tensor2tensor/models/attention_lm.py b/tensor2tensor/models/attention_lm.py index 947dc9306..752de038e 100644 --- a/tensor2tensor/models/attention_lm.py +++ b/tensor2tensor/models/attention_lm.py @@ -101,8 +101,6 @@ def attention_lm_decoder(decoder_input, y: a Tensors """ x = decoder_input - # Summaries don't work in multi-problem setting yet. - summaries = "problems" not in hparams.values() or len(hparams.problems) == 1 with tf.variable_scope(name): for layer in xrange(hparams.num_hidden_layers): with tf.variable_scope("layer_%d" % layer): @@ -117,7 +115,6 @@ def attention_lm_decoder(decoder_input, hparams.hidden_size, hparams.num_heads, hparams.attention_dropout, - summaries=summaries, name="decoder_self_attention")) x = residual_fn(x, common_layers.conv_hidden_relu( diff --git a/tensor2tensor/models/attention_lm_moe.py b/tensor2tensor/models/attention_lm_moe.py index 952ff1a71..2754e8366 100644 --- a/tensor2tensor/models/attention_lm_moe.py +++ b/tensor2tensor/models/attention_lm_moe.py @@ -69,7 +69,6 @@ def residual_fn(x, y): hparams.hidden_size, hparams.num_heads, hparams.attention_dropout, - summaries=True, name="decoder_self_attention") x = dp(residual_fn, x, y) with tf.variable_scope("ffn"): diff --git a/tensor2tensor/models/common_attention.py b/tensor2tensor/models/common_attention.py index b0d0403cd..6aa8a2a07 100644 --- a/tensor2tensor/models/common_attention.py +++ b/tensor2tensor/models/common_attention.py @@ -312,7 +312,6 @@ def dot_product_attention(q, v, bias, dropout_rate=0.0, - summaries=False, image_shapes=None, name=None): """dot-product attention. @@ -323,7 +322,6 @@ def dot_product_attention(q, v: a Tensor with shape [batch, heads, length_kv, depth_v] bias: bias Tensor (see attention_bias()) dropout_rate: a floating point number - summaries: a boolean image_shapes: optional tuple of integer scalars. see comments for attention_image_summary() name: an optional string @@ -340,13 +338,13 @@ def dot_product_attention(q, weights = tf.nn.softmax(logits, name="attention_weights") # dropping out the attention links for each of the heads weights = tf.nn.dropout(weights, 1.0 - dropout_rate) - if summaries and not tf.get_variable_scope().reuse: + if not tf.get_variable_scope().reuse: attention_image_summary(weights, image_shapes) return tf.matmul(weights, v) def masked_local_attention_1d( - q, k, v, block_length=128, summaries=True, name=None): + q, k, v, block_length=128, name=None): """Attention to the source position and a neigborhood to the left of it. The sequence is divided into blocks of length block_size. @@ -362,7 +360,6 @@ def masked_local_attention_1d( k: a Tensor with shape [batch, heads, length, depth_k] v: a Tensor with shape [batch, heads, length, depth_v] block_length: an integer - summaries: a boolean name: an optional string Returns: @@ -394,7 +391,7 @@ def masked_local_attention_1d( first_v = tf.slice(v, [0, 0, 0, 0], [-1, -1, block_length, -1]) first_output = dot_product_attention( first_q, first_k, first_v, attention_bias_lower_triangle(block_length), - summaries=summaries, name="fist_block") + name="fist_block") # compute attention for all subsequent query blocks. q = tf.reshape(q, [batch, heads, num_blocks, block_length, depth_k]) @@ -442,7 +439,6 @@ def multihead_attention(query_antecedent, output_depth, num_heads, dropout_rate, - summaries=False, image_shapes=None, attention_type="dot_product", block_length=128, @@ -458,7 +454,6 @@ def multihead_attention(query_antecedent, output_depth: an integer num_heads: an integer dividing total_key_depth and total_value_depth dropout_rate: a floating point number - summaries: a boolean image_shapes: optional tuple of integer scalars. see comments for attention_image_summary() attention_type: a string, either "dot_product" or "local_mask_right" @@ -509,12 +504,10 @@ def multihead_attention(query_antecedent, q *= key_depth_per_head**-0.5 if attention_type == "dot_product": x = dot_product_attention( - q, k, v, bias, dropout_rate, summaries, image_shapes) + q, k, v, bias, dropout_rate, image_shapes) else: assert attention_type == "local_mask_right" - x = masked_local_attention_1d(q, k, v, - block_length=block_length, - summaries=summaries) + x = masked_local_attention_1d(q, k, v, block_length=block_length) x = combine_heads(x) x = common_layers.conv1d(x, output_depth, 1, name="output_transform") return x diff --git a/tensor2tensor/models/common_layers.py b/tensor2tensor/models/common_layers.py index 1e7050570..638535aa2 100644 --- a/tensor2tensor/models/common_layers.py +++ b/tensor2tensor/models/common_layers.py @@ -777,7 +777,7 @@ def moe_layer(data_parallelism, xs_2d = dp(tf.reshape, xs, [[-1, model_hidden_size]] * dp.n) # Call the MoE moe_out_2d, importance, load, _, _ = moe.Eval( - dp.devices, xs_2d, train, identifiers=None, summaries=True) + dp.devices, xs_2d, train, identifiers=None) # Reshape the output to the original shape. moe_out = dp(tf.reshape, moe_out_2d, dp(tf.shape, xs)) # These losses encourage equal load on the different experts. @@ -785,7 +785,7 @@ def moe_layer(data_parallelism, return moe_out, loss -def simple_attention(target, source, bias=None, summaries=True): +def simple_attention(target, source, bias=None): """A simple attention function. Args: @@ -795,7 +795,6 @@ def simple_attention(target, source, bias=None, summaries=True): `[batch, source_timesteps_1, source_timesteps_2, depth]` bias: an optional `Tensor` with shape `[batch, timesteps, 1, 1]` used to mask the attention to not attend to padding of input. - summaries: Boolean, whether to output summaries. Returns: a `Tensor` with same shape as `target` @@ -814,7 +813,7 @@ def simple_attention(target, source, bias=None, summaries=True): if bias is not None: attention += tf.expand_dims(tf.squeeze(bias, axis=[2, 3]), axis=1) attention = tf.nn.softmax(attention) - if summaries and not tf.get_variable_scope().reuse: + if not tf.get_variable_scope().reuse: tf.summary.image("attention", tf.expand_dims(attention, 3), max_outputs=5) attended = tf.matmul(attention, source) return tf.reshape(attended, target_shape) @@ -861,8 +860,7 @@ def multiscale_conv_sum(inputs, output_size, dilation_rates_and_kernel_sizes, def multiscale_conv_and_attention(x, padding, hparams, - source=None, - summaries=True): + source=None): """A common part of t2t layers. First, do a linear multiscale convolution @@ -875,7 +873,6 @@ def multiscale_conv_and_attention(x, padding: a padding type hparams: hyperparameters for model source: optional source tensor for attention. (encoder output) - summaries: Boolean, whether to output summaries. Returns: a Tensor. @@ -893,7 +890,7 @@ def multiscale_conv_and_attention(x, x = conv(x, hparams.hidden_size, (1, 1)) x = noam_norm(x + conv_sum) if source is not None: - x = noam_norm(x + simple_attention(x, source, summaries=summaries)) + x = noam_norm(x + simple_attention(x, source)) return x @@ -930,8 +927,7 @@ def conv_with_pools(inputs, output_size, kernel_size, pool_sizes, pooling_type, def conv_with_pools_and_attention(x, padding, hparams, - source=None, - summaries=True): + source=None): """A common part of t2t layers. First, do conv_with_pools @@ -944,7 +940,6 @@ def conv_with_pools_and_attention(x, padding: a padding type hparams: hyperparameters for model source: optional source tensor for attention. (encoder output) - summaries: Boolean, whether to output summaries. Returns: a Tensor. @@ -959,7 +954,7 @@ def conv_with_pools_and_attention(x, conv_sum += x x = noam_norm(conv_sum) if source is not None: - x = noam_norm(x + simple_attention(x, source, summaries=summaries)) + x = noam_norm(x + simple_attention(x, source)) return x @@ -1057,7 +1052,6 @@ def attention_1d_v0(source, transform_source=True, transform_target=True, transform_output=True, - summaries=True, name=None): """multi-headed attention. @@ -1075,7 +1069,6 @@ def attention_1d_v0(source, transform_source: a boolean transform_target: a boolean transform_output: a boolean - summaries: a boolean name: an optional string Returns: @@ -1116,7 +1109,7 @@ def _maybe_transform(t, size, should_transform, name): mask = (1.0 - mask) * -1e9 attention += mask attention = tf.nn.softmax(attention) - if summaries and not tf.get_variable_scope().reuse: + if not tf.get_variable_scope().reuse: # Compute a color image summary. image = tf.reshape(attention, [batch, num_heads, target_length, source_length]) @@ -1162,7 +1155,6 @@ def conv_hidden_relu(inputs, output_size, kernel_size=(1, 1), second_kernel_size=(1, 1), - summaries=True, dropout=0.0, **kwargs): """Hidden layer with RELU activation followed by linear projection.""" @@ -1183,7 +1175,7 @@ def conv_hidden_relu(inputs, **kwargs) if dropout != 0.0: h = tf.nn.dropout(h, 1.0 - dropout) - if summaries and not tf.get_variable_scope().reuse: + if not tf.get_variable_scope().reuse: tf.summary.histogram("hidden_density_logit", relu_density_logit( h, list(range(inputs.shape.ndims - 1)))) diff --git a/tensor2tensor/models/long_answer.py b/tensor2tensor/models/long_answer.py index 15067e120..7bb6a4a55 100644 --- a/tensor2tensor/models/long_answer.py +++ b/tensor2tensor/models/long_answer.py @@ -75,7 +75,6 @@ def residual_fn(x, y): hparams.hidden_size, hparams.num_heads, hparams.attention_dropout, - summaries=True, attention_type="local_mask_right", block_length=hparams.block_length, name="decoder_self_attention") diff --git a/tensor2tensor/models/multimodel.py b/tensor2tensor/models/multimodel.py index ee079fa6d..bf06dfd65 100644 --- a/tensor2tensor/models/multimodel.py +++ b/tensor2tensor/models/multimodel.py @@ -138,7 +138,6 @@ def flatten(inputs): hparams.hidden_size, hparams.num_heads, hparams.attention_dropout, - summaries=False, name="decoder_self_attention") z = dp(common_attention.multihead_attention, y, @@ -149,7 +148,6 @@ def flatten(inputs): hparams.hidden_size, hparams.num_heads, hparams.attention_dropout, - summaries=False, name="encdec_attention") x = dp(residual_fn3, x, y, z, hparams) with tf.variable_scope("ffn"): @@ -164,8 +162,7 @@ def flatten(inputs): x, hparams.filter_size, hparams.hidden_size, - dropout=hparams.dropout, - summaries=False) + dropout=hparams.dropout) x = dp(residual_fn2, x, y, hparams) x = dp(tf.expand_dims, x, 2) diff --git a/tensor2tensor/models/slicenet.py b/tensor2tensor/models/slicenet.py index 43913eab1..2ad4c89d1 100644 --- a/tensor2tensor/models/slicenet.py +++ b/tensor2tensor/models/slicenet.py @@ -64,8 +64,7 @@ def attention(targets_shifted, inputs_encoded, norm_fn, hparams, bias=None): hparams.hidden_size, hparams.num_heads, hparams.attention_dropout, - name="self_attention", - summaries=False) + name="self_attention") qv = common_attention.multihead_attention( qv, inputs_encoded, @@ -75,12 +74,11 @@ def attention(targets_shifted, inputs_encoded, norm_fn, hparams, bias=None): hparams.hidden_size, hparams.num_heads, hparams.attention_dropout, - name="encdec_attention", - summaries=False) + name="encdec_attention") return tf.expand_dims(qv, 2) elif hparams.attention_type == "simple": targets_with_attention = common_layers.simple_attention( - targets_timed, inputs_encoded, bias=bias, summaries=False) + targets_timed, inputs_encoded, bias=bias) return norm_fn(targets_shifted + targets_with_attention, name="attn_norm") diff --git a/tensor2tensor/models/transformer.py b/tensor2tensor/models/transformer.py index 0bb01c0f8..b24f7fa50 100644 --- a/tensor2tensor/models/transformer.py +++ b/tensor2tensor/models/transformer.py @@ -143,8 +143,6 @@ def transformer_encoder(encoder_input, y: a Tensors """ x = encoder_input - # Summaries don't work in multi-problem setting yet. - summaries = len(hparams.problems) < 2 with tf.variable_scope(name): for layer in xrange(hparams.num_hidden_layers): with tf.variable_scope("layer_%d" % layer): @@ -159,7 +157,6 @@ def transformer_encoder(encoder_input, hparams.hidden_size, hparams.num_heads, hparams.attention_dropout, - summaries=summaries, name="encoder_self_attention")) x = residual_fn(x, transformer_ffn_layer(x, hparams)) return x @@ -189,8 +186,6 @@ def transformer_decoder(decoder_input, y: a Tensors """ x = decoder_input - # Summaries don't work in multi-problem setting yet. - summaries = len(hparams.problems) < 2 with tf.variable_scope(name): for layer in xrange(hparams.num_hidden_layers): with tf.variable_scope("layer_%d" % layer): @@ -205,7 +200,6 @@ def transformer_decoder(decoder_input, hparams.hidden_size, hparams.num_heads, hparams.attention_dropout, - summaries=summaries, name="decoder_self_attention")) x = residual_fn( x, @@ -218,7 +212,6 @@ def transformer_decoder(decoder_input, hparams.hidden_size, hparams.num_heads, hparams.attention_dropout, - summaries=summaries, name="encdec_attention")) x = residual_fn(x, transformer_ffn_layer(x, hparams)) return x @@ -234,15 +227,12 @@ def transformer_ffn_layer(x, hparams): Returns: a Tensor of shape [batch_size, length, hparams.hidden_size] """ - # Summaries don't work in multi-problem setting yet. - summaries = len(hparams.problems) < 2 if hparams.ffn_layer == "conv_hidden_relu": return common_layers.conv_hidden_relu( x, hparams.filter_size, hparams.hidden_size, - dropout=hparams.relu_dropout, - summaries=summaries) + dropout=hparams.relu_dropout) elif hparams.ffn_layer == "parameter_attention": return common_attention.parameter_attention( x, @@ -260,8 +250,7 @@ def transformer_ffn_layer(x, hparams): kernel_size=(3, 1), second_kernel_size=(31, 1), padding="LEFT", - dropout=hparams.relu_dropout, - summaries=summaries) + dropout=hparams.relu_dropout) else: assert hparams.ffn_layer == "none" return x diff --git a/tensor2tensor/models/transformer_alternative.py b/tensor2tensor/models/transformer_alternative.py index aed074d56..280dbc713 100644 --- a/tensor2tensor/models/transformer_alternative.py +++ b/tensor2tensor/models/transformer_alternative.py @@ -140,8 +140,6 @@ def alt_transformer_decoder(decoder_input, """Alternative decoder.""" x = decoder_input - # Summaries don't work in multi-problem setting yet. - summaries = "problems" not in hparams.values() or len(hparams.problems) == 1 with tf.variable_scope(name): for layer in xrange(hparams.num_hidden_layers): with tf.variable_scope("layer_%d" % layer): @@ -155,7 +153,6 @@ def alt_transformer_decoder(decoder_input, hparams.hidden_size, hparams.num_heads, hparams.attention_dropout, - summaries=summaries, name="encdec_attention") x_ = residual_fn(x_, composite_layer(x_, mask, hparams)) diff --git a/tensor2tensor/utils/trainer_utils.py b/tensor2tensor/utils/trainer_utils.py index 08faeed2c..f7d3010a9 100644 --- a/tensor2tensor/utils/trainer_utils.py +++ b/tensor2tensor/utils/trainer_utils.py @@ -550,6 +550,13 @@ def nth_model(n): optimizer=opt, colocate_gradients_with_ops=True) + # Remove summaries that will fail to run because they are in conditionals. + # TODO(cwhipkey): Test with this code removed, later in 2017. + summaries = tf.get_collection_ref(tf.GraphKeys.SUMMARIES) + for i in range(len(summaries)-1, -1, -1): + if summaries[i].name.startswith("cond_"): + del summaries[i] + tf.logging.info("Global model_fn finished.") return run_info, total_loss, train_op From 80dba8903f053314510ae4042e0171ec970e2f83 Mon Sep 17 00:00:00 2001 From: Niki Parmar <nikip@google.com> Date: Tue, 18 Jul 2017 21:52:38 -0700 Subject: [PATCH 0145/4095] Add celeba dataset, add to problems PiperOrigin-RevId: 162444538 --- tensor2tensor/bin/t2t-datagen | 3 ++ tensor2tensor/data_generators/image.py | 38 +++++++++++++++++++ .../data_generators/problem_hparams.py | 13 +++++++ tensor2tensor/utils/data_reader.py | 4 ++ 4 files changed, 58 insertions(+) diff --git a/tensor2tensor/bin/t2t-datagen b/tensor2tensor/bin/t2t-datagen index b0fd816a2..f0aa26ceb 100644 --- a/tensor2tensor/bin/t2t-datagen +++ b/tensor2tensor/bin/t2t-datagen @@ -120,6 +120,9 @@ _SUPPORTED_PROBLEM_GENERATORS = { "image_mscoco_characters_test": ( lambda: image.mscoco_generator(FLAGS.tmp_dir, True, 80000), lambda: image.mscoco_generator(FLAGS.tmp_dir, False, 40000)), + "image_celeba_tune": ( + lambda: image.celeba_generator(FLAGS.tmp_dir, 162770), + lambda: image.celeba_generator(FLAGS.tmp_dir, 19867, 162770)), "image_mscoco_tokens_8k_test": ( lambda: image.mscoco_generator( FLAGS.tmp_dir, diff --git a/tensor2tensor/data_generators/image.py b/tensor2tensor/data_generators/image.py index 0cba1800b..79bb51f3c 100644 --- a/tensor2tensor/data_generators/image.py +++ b/tensor2tensor/data_generators/image.py @@ -347,3 +347,41 @@ def hparams(self, defaults, model_hparams): p.target_modality = (registry.Modalities.SYMBOL, vocab_size) p.input_space_id = problem.SpaceID.DIGIT_0 p.target_space_id = problem.SpaceID.DIGIT_1 + + +# Filename for CELEBA data. +_CELEBA_NAME = "img_align_celeba" + + +def _get_celeba(directory): + """Download and extract CELEBA to directory unless it is there.""" + path = os.path.join(directory, _CELEBA_NAME) + if not tf.gfile.Exists(path): + # We expect that this file has been downloaded from: + # https://drive.google.com/uc?export=download&id=0B7EVK8r0v71pZjFTYXZWM3FlRnM + # and placed in `directory`. + zipfile.ZipFile(path+".zip", "r").extractall(directory) + + +def celeba_generator(tmp_dir, how_many, start_from=0): + """Image generator for CELEBA dataset. + + Args: + tmp_dir: path to temporary storage directory. + how_many: how many images and labels to generate. + start_from: from which image to start. + + Yields: + A dictionary representing the images with the following fields: + * image/encoded: the string encoding the image as JPEG, + * image/format: the string "jpeg" representing image format, + """ + _get_celeba(tmp_dir) + image_files = tf.gfile.Glob(tmp_dir + "/*.jpg") + for filename in image_files[start_from:start_from+how_many]: + with tf.gfile.Open(filename, "r") as f: + encoded_image_data = f.read() + yield { + "image/encoded": [encoded_image_data], + "image/format": ["jpeg"], + } diff --git a/tensor2tensor/data_generators/problem_hparams.py b/tensor2tensor/data_generators/problem_hparams.py index 5922ab59a..3347fe4f6 100644 --- a/tensor2tensor/data_generators/problem_hparams.py +++ b/tensor2tensor/data_generators/problem_hparams.py @@ -596,6 +596,18 @@ def img2img_imagenet(unused_model_hparams): return p +def image_celeba(unused_model_hparams): + """Image CelebA dataset.""" + p = default_problem_hparams() + p.input_modality = {"inputs": ("image:identity_no_pad", None)} + p.target_modality = ("image:identity_no_pad", None) + p.batch_size_multiplier = 256 + p.max_expected_batch_size_per_shard = 4 + p.input_space_id = 1 + p.target_space_id = 1 + return p + + # Dictionary of named hyperparameter settings for various problems. # This is only accessed through the problem_hparams function below. PROBLEM_HPARAMS_MAP = { @@ -620,6 +632,7 @@ def img2img_imagenet(unused_model_hparams): "image_cifar10_test": image_cifar10, "image_mnist_tune": image_mnist, "image_mnist_test": image_mnist, + "image_celeba_tune": image_celeba, "image_mscoco_characters_tune": image_mscoco_characters, "image_mscoco_characters_test": image_mscoco_characters, "image_mscoco_tokens_8k_test": lambda p: image_mscoco_tokens(p, 2**13), diff --git a/tensor2tensor/utils/data_reader.py b/tensor2tensor/utils/data_reader.py index cb84b9e3e..cd8e6c2d3 100644 --- a/tensor2tensor/utils/data_reader.py +++ b/tensor2tensor/utils/data_reader.py @@ -161,6 +161,10 @@ def preprocess(img): inputs = examples["inputs"] examples["inputs"] = resize(inputs, 16) examples["targets"] = resize(inputs, 64) + elif "image_celeba" in data_file_pattern: + inputs = examples["inputs"] + examples["inputs"] = resize(inputs, 8) + examples["targets"] = resize(inputs, 32) elif "audio" in data_file_pattern: # Reshape audio to proper shape From 78acdb4f3b0908bbdf32fea8b98eee5b65641ef9 Mon Sep 17 00:00:00 2001 From: T2T Team <no-reply@google.com> Date: Wed, 19 Jul 2017 15:13:13 -0700 Subject: [PATCH 0146/4095] Fix a bug in text_encoder. "self._UNESCAPE_REGEX -> _UNESCAPE_REGEX" PiperOrigin-RevId: 162542600 --- tensor2tensor/data_generators/text_encoder.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensor2tensor/data_generators/text_encoder.py b/tensor2tensor/data_generators/text_encoder.py index e0ac1901e..8be22ce0b 100644 --- a/tensor2tensor/data_generators/text_encoder.py +++ b/tensor2tensor/data_generators/text_encoder.py @@ -530,4 +530,4 @@ def match(m): # Convert '\u' to '_' and '\\' to '\' return u"_" if m.group(0) == u"\\u" else u"\\" # Cut off the trailing underscore and apply the regex substitution - return self._UNESCAPE_REGEX.sub(match, escaped_token[:-1]) + return _UNESCAPE_REGEX.sub(match, escaped_token[:-1]) From 293b5f6ef63a7a6f5ae546e050967cf79c74b4d2 Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Wed, 19 Jul 2017 16:36:48 -0700 Subject: [PATCH 0147/4095] Add genetics dataset - data generation only PiperOrigin-RevId: 162553677 --- tensor2tensor/bin/t2t-datagen | 5 +- tensor2tensor/data_generators/all_problems.py | 1 + tensor2tensor/data_generators/genetics.py | 212 ++++++++++++++++++ tensor2tensor/data_generators/problem.py | 6 + 4 files changed, 223 insertions(+), 1 deletion(-) create mode 100644 tensor2tensor/data_generators/genetics.py diff --git a/tensor2tensor/bin/t2t-datagen b/tensor2tensor/bin/t2t-datagen index f0aa26ceb..1ba354695 100644 --- a/tensor2tensor/bin/t2t-datagen +++ b/tensor2tensor/bin/t2t-datagen @@ -28,6 +28,7 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import os import random import tempfile @@ -320,7 +321,9 @@ def generate_data_for_problem(problem): def generate_data_for_registered_problem(problem_name): problem = registry.problem(problem_name) - problem.generate_data(FLAGS.data_dir, FLAGS.tmp_dir, FLAGS.num_shards) + problem.generate_data(os.path.expanduser(FLAGS.data_dir), + os.path.expanduser(FLAGS.tmp_dir), + FLAGS.num_shards) if __name__ == "__main__": diff --git a/tensor2tensor/data_generators/all_problems.py b/tensor2tensor/data_generators/all_problems.py index 364c252a7..0a2503bd2 100644 --- a/tensor2tensor/data_generators/all_problems.py +++ b/tensor2tensor/data_generators/all_problems.py @@ -21,6 +21,7 @@ from tensor2tensor.data_generators import algorithmic from tensor2tensor.data_generators import algorithmic_math from tensor2tensor.data_generators import audio +from tensor2tensor.data_generators import genetics from tensor2tensor.data_generators import image from tensor2tensor.data_generators import lm1b from tensor2tensor.data_generators import ptb diff --git a/tensor2tensor/data_generators/genetics.py b/tensor2tensor/data_generators/genetics.py new file mode 100644 index 000000000..255e0caf9 --- /dev/null +++ b/tensor2tensor/data_generators/genetics.py @@ -0,0 +1,212 @@ +# Copyright 2017 The Tensor2Tensor Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Genetics problems. + +Inputs are bases ACTG (with indices assigned in that order). + +Requires the h5py library. + +File format expected: + * h5 file + * h5 datasets should include {train, valid, test}_{in, na, out}, which will + map to inputs, targets mask, and targets for the train, dev, and test + datasets. + * Each record in *_in is a bool 2-D numpy array with one-hot encoded base + pairs with shape [num_input_timesteps, 4]. The base order is ACTG. + * Each record in *_na is a bool 1-D numpy array with shape + [num_output_timesteps]. + * Each record in *_out is a float 2-D numpy array with shape + [num_output_timesteps, num_predictions]. +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import multiprocessing as mp +import os + +# Dependency imports + +import h5py +import numpy as np + +from six.moves import xrange # pylint: disable=redefined-builtin + +from tensor2tensor.data_generators import generator_utils +from tensor2tensor.data_generators import problem +from tensor2tensor.data_generators import text_encoder +from tensor2tensor.utils import registry + +_bases = list("ACTG") +BASE_TO_ID = dict(zip(_bases, range(len(_bases)))) +ID_TO_BASE = dict(zip(range(len(_bases)), _bases)) +UNK_ID = len(_bases) + + +# TODO(rsepassi): +# * DataEncoder for genetic bases +# * GeneticModality and problem hparams +# * Training preprocessing + + +class GeneticsProblem(problem.Problem): + + @property + def download_url(self): + raise NotImplementedError() + + @property + def h5_file(self): + raise NotImplementedError() + + def generate_data(self, data_dir, tmp_dir, num_shards=None): + if num_shards is None: + num_shards = 100 + + # Download source data + h5_filepath = generator_utils.maybe_download(tmp_dir, self.h5_file, + self.download_url) + with h5py.File(h5_filepath, "r") as h5_file: + num_train_examples = h5_file["train_in"].len() + num_dev_examples = h5_file["valid_in"].len() + num_test_examples = h5_file["test_in"].len() + + # Collect all_filepaths to later shuffle + all_filepaths = [] + # Collect created shard processes to start and join + processes = [] + + datasets = [(self.training_filepaths, num_shards, "train", + num_train_examples), (self.dev_filepaths, 1, "valid", + num_dev_examples), + (self.test_filepaths, 1, "test", num_test_examples)] + for fname_fn, nshards, key_prefix, num_examples in datasets: + outfiles = fname_fn(data_dir, nshards, shuffled=False) + all_filepaths.extend(outfiles) + for start_idx, end_idx, outfile in generate_shard_args( + outfiles, num_examples): + p = mp.Process( + target=generate_dataset, + args=(h5_filepath, key_prefix, [outfile], start_idx, end_idx)) + processes.append(p) + + # Start and wait for processes + assert len(processes) == num_shards + 2 # 1 per training shard + dev + test + for p in processes: + p.start() + for p in processes: + p.join() + + # Shuffle + generator_utils.shuffle_dataset(all_filepaths) + + +@registry.register_problem("genetics_cage10") +class GeneticsCAGE10(GeneticsProblem): + + @property + def download_url(self): + return "https://storage.googleapis.com/262k_binned/cage10_l262k_w128.h5" + + @property + def h5_file(self): + return "cage10.h5" + + +@registry.register_problem("genetics_gm12878") +class GeneticsGM12878(GeneticsProblem): + + @property + def download_url(self): + return "https://storage.googleapis.com/262k_binned/gm12878_l262k_w128.h5" + + @property + def h5_file(self): + return "gm12878.h5" + + +def generate_shard_args(outfiles, num_examples): + """Generate start and end indices per outfile.""" + num_shards = len(outfiles) + num_examples_per_shard = num_examples // num_shards + start_idxs = [i * num_examples_per_shard for i in xrange(num_shards)] + end_idxs = list(start_idxs) + end_idxs.pop(0) + end_idxs.append(num_examples) + return zip(start_idxs, end_idxs, outfiles) + + +def generate_dataset(h5_filepath, + key_prefix, + out_filepaths, + start_idx=None, + end_idx=None): + print("PID: %d, Key: %s, (Start, End): (%s, %s)" % (os.getpid(), key_prefix, + start_idx, end_idx)) + generator_utils.generate_files( + dataset_generator(h5_filepath, key_prefix, start_idx, end_idx), + out_filepaths) + + +def dataset_generator(filepath, dataset, start_idx=None, end_idx=None): + with h5py.File(filepath, "r") as h5_file: + # Get input keys from h5_file + src_keys = [s % dataset for s in ["%s_in", "%s_na", "%s_out"]] + src_values = [h5_file[k] for k in src_keys] + inp_data, mask_data, out_data = src_values + assert len(set([v.len() for v in src_values])) == 1 + + if start_idx is None: + start_idx = 0 + if end_idx is None: + end_idx = inp_data.len() + + for i in xrange(start_idx, end_idx): + if i % 100 == 0: + print("Generating example %d for %s" % (i, dataset)) + inputs, mask, outputs = inp_data[i], mask_data[i], out_data[i] + yield to_example_dict(inputs, mask, outputs) + + +def to_example_dict(inputs, mask, outputs): + """Convert single h5 record to an example dict.""" + # Inputs + input_ids = [] + last_idx = -1 + for row in np.argwhere(inputs): + idx, base_id = row + idx, base_id = int(idx), int(base_id) + assert idx > last_idx # if not, means 2 True values in 1 row + # Some rows are all False. Those rows are mapped to UNK_ID. + while idx != last_idx + 1: + input_ids.append(UNK_ID + text_encoder.NUM_RESERVED_TOKENS) + last_idx += 1 + input_ids.append(base_id + text_encoder.NUM_RESERVED_TOKENS) + last_idx = idx + assert len(inputs) == len(input_ids) + input_ids.append(text_encoder.EOS_ID) + + # Targets: mask and output + targets_mask = [float(v) for v in mask] + # The output is (n, m); store targets_shape so that it can be reshaped + # properly on the other end. + targets = [float(v) for v in outputs.flatten()] + targets_shape = [int(dim) for dim in outputs.shape] + assert mask.shape[0] == outputs.shape[0] + + example_keys = ["inputs", "targets_mask", "targets", "targets_shape"] + ex_dict = dict( + zip(example_keys, [input_ids, targets_mask, targets, targets_shape])) + return ex_dict diff --git a/tensor2tensor/data_generators/problem.py b/tensor2tensor/data_generators/problem.py index 1182ed7d1..e93039b71 100644 --- a/tensor2tensor/data_generators/problem.py +++ b/tensor2tensor/data_generators/problem.py @@ -146,6 +146,12 @@ def dev_filepaths(self, data_dir, num_shards, shuffled): file_basename += utils.UNSHUFFLED_SUFFIX return utils.dev_data_filenames(file_basename, data_dir, num_shards) + def test_filepaths(self, data_dir, num_shards, shuffled): + file_basename = self.dataset_filename() + if not shuffled: + file_basename += utils.UNSHUFFLED_SUFFIX + return utils.test_data_filenames(file_basename, data_dir, num_shards) + def __init__(self, was_reversed=False, was_copy=False): """Create a Problem. From 84445cc6eaabc338285b6a96135c78b0e1e4b26c Mon Sep 17 00:00:00 2001 From: Lukasz Kaiser <lukaszkaiser@google.com> Date: Wed, 19 Jul 2017 19:31:47 -0700 Subject: [PATCH 0148/4095] Change the format of generated vocab files to include languages, put them in data_dir, add --generate_data option. PiperOrigin-RevId: 162569315 --- README.md | 17 ++- tensor2tensor/bin/t2t-datagen | 129 ++++++------------ tensor2tensor/bin/t2t-trainer | 24 +++- tensor2tensor/data_generators/audio.py | 6 +- .../data_generators/generator_utils.py | 14 +- tensor2tensor/data_generators/image.py | 6 +- tensor2tensor/data_generators/inspect.py | 2 +- .../data_generators/problem_hparams.py | 16 +-- tensor2tensor/data_generators/wmt.py | 73 +++++----- tensor2tensor/data_generators/wsj_parsing.py | 11 +- tensor2tensor/models/transformer.py | 2 +- tensor2tensor/utils/trainer_utils.py | 8 +- 12 files changed, 150 insertions(+), 158 deletions(-) diff --git a/README.md b/README.md index 059fbe429..0564a9c99 100644 --- a/README.md +++ b/README.md @@ -26,6 +26,21 @@ issues](https://github.com/tensorflow/tensor2tensor/issues). And chat with us and other users on [Gitter](https://gitter.im/tensor2tensor/Lobby). +Here is a one-command version that installs tensor2tensor, downloads the data, +trains an English-German translation model, and lets you use it interactively: +``` +pip install tensor2tensor && t2t-trainer \ + --generate_data \ + --data_dir=~/t2t_data \ + --problems=wmt_ende_tokens_32k \ + --model=transformer \ + --hparams_set=transformer_base_single_gpu \ + --output_dir=~/t2t_train/base \ + --decode_interactive +``` + +See the [Walkthrough](#walkthrough) below for more details on each step. + ### Contents * [Walkthrough](#walkthrough) @@ -72,8 +87,6 @@ t2t-datagen \ --num_shards=100 \ --problem=$PROBLEM -cp $TMP_DIR/tokens.vocab.* $DATA_DIR - # Train # * If you run out of memory, add --hparams='batch_size=2048' or even 1024. t2t-trainer \ diff --git a/tensor2tensor/bin/t2t-datagen b/tensor2tensor/bin/t2t-datagen index 1ba354695..af5b47f8c 100644 --- a/tensor2tensor/bin/t2t-datagen +++ b/tensor2tensor/bin/t2t-datagen @@ -80,24 +80,30 @@ _SUPPORTED_PROBLEM_GENERATORS = { lambda: algorithmic_math.algebra_inverse(26, 0, 2, 100000), lambda: algorithmic_math.algebra_inverse(26, 3, 3, 10000)), "ice_parsing_tokens": ( - lambda: wmt.tabbed_parsing_token_generator(FLAGS.tmp_dir, - True, "ice", 2**13, 2**8), - lambda: wmt.tabbed_parsing_token_generator(FLAGS.tmp_dir, - False, "ice", 2**13, 2**8)), + lambda: wmt.tabbed_parsing_token_generator( + FLAGS.data_dir, FLAGS.tmp_dir, True, "ice", 2**13, 2**8), + lambda: wmt.tabbed_parsing_token_generator( + FLAGS.data_dir, FLAGS.tmp_dir, False, "ice", 2**13, 2**8)), "ice_parsing_characters": ( - lambda: wmt.tabbed_parsing_character_generator(FLAGS.tmp_dir, True), - lambda: wmt.tabbed_parsing_character_generator(FLAGS.tmp_dir, False)), + lambda: wmt.tabbed_parsing_character_generator( + FLAGS.data_dir, FLAGS.tmp_dir, True), + lambda: wmt.tabbed_parsing_character_generator( + FLAGS.data_dir, FLAGS.tmp_dir, False)), "wmt_parsing_tokens_8k": ( - lambda: wmt.parsing_token_generator(FLAGS.tmp_dir, True, 2**13), - lambda: wmt.parsing_token_generator(FLAGS.tmp_dir, False, 2**13)), + lambda: wmt.parsing_token_generator( + FLAGS.data_dir, FLAGS.tmp_dir, True, 2**13), + lambda: wmt.parsing_token_generator( + FLAGS.data_dir, FLAGS.tmp_dir, False, 2**13)), "wsj_parsing_tokens_16k": ( - lambda: wsj_parsing.parsing_token_generator(FLAGS.tmp_dir, True, - 2**14, 2**9), - lambda: wsj_parsing.parsing_token_generator(FLAGS.tmp_dir, False, - 2**14, 2**9)), + lambda: wsj_parsing.parsing_token_generator( + FLAGS.data_dir, FLAGS.tmp_dir, True, 2**14, 2**9), + lambda: wsj_parsing.parsing_token_generator( + FLAGS.data_dir, FLAGS.tmp_dir, False, 2**14, 2**9)), "wmt_ende_bpe32k": ( - lambda: wmt.ende_bpe_token_generator(FLAGS.tmp_dir, True), - lambda: wmt.ende_bpe_token_generator(FLAGS.tmp_dir, False)), + lambda: wmt.ende_bpe_token_generator( + FLAGS.data_dir, FLAGS.tmp_dir, True), + lambda: wmt.ende_bpe_token_generator( + FLAGS.data_dir, FLAGS.tmp_dir, False)), "lm1b_32k": ( lambda: lm1b.generator(FLAGS.tmp_dir, True), lambda: lm1b.generator(FLAGS.tmp_dir, False) @@ -119,101 +125,50 @@ _SUPPORTED_PROBLEM_GENERATORS = { lambda: image.cifar10_generator(FLAGS.tmp_dir, True, 50000), lambda: image.cifar10_generator(FLAGS.tmp_dir, False, 10000)), "image_mscoco_characters_test": ( - lambda: image.mscoco_generator(FLAGS.tmp_dir, True, 80000), - lambda: image.mscoco_generator(FLAGS.tmp_dir, False, 40000)), + lambda: image.mscoco_generator( + FLAGS.data_dir, FLAGS.tmp_dir, True, 80000), + lambda: image.mscoco_generator( + FLAGS.data_dir, FLAGS.tmp_dir, False, 40000)), "image_celeba_tune": ( lambda: image.celeba_generator(FLAGS.tmp_dir, 162770), lambda: image.celeba_generator(FLAGS.tmp_dir, 19867, 162770)), "image_mscoco_tokens_8k_test": ( lambda: image.mscoco_generator( - FLAGS.tmp_dir, - True, - 80000, - vocab_filename="tokens.vocab.%d" % 2**13, - vocab_size=2**13), + FLAGS.data_dir, FLAGS.tmp_dir, True, 80000, + vocab_filename="vocab.endefr.%d" % 2**13, vocab_size=2**13), lambda: image.mscoco_generator( - FLAGS.tmp_dir, - False, - 40000, - vocab_filename="tokens.vocab.%d" % 2**13, - vocab_size=2**13)), + FLAGS.data_dir, FLAGS.tmp_dir, False, 40000, + vocab_filename="vocab.endefr.%d" % 2**13, vocab_size=2**13)), "image_mscoco_tokens_32k_test": ( lambda: image.mscoco_generator( - FLAGS.tmp_dir, - True, - 80000, - vocab_filename="tokens.vocab.%d" % 2**15, - vocab_size=2**15), + FLAGS.data_dir, FLAGS.tmp_dir, True, 80000, + vocab_filename="vocab.endefr.%d" % 2**15, vocab_size=2**15), lambda: image.mscoco_generator( - FLAGS.tmp_dir, - False, - 40000, - vocab_filename="tokens.vocab.%d" % 2**15, - vocab_size=2**15)), + FLAGS.data_dir, FLAGS.tmp_dir, False, 40000, + vocab_filename="vocab.endefr.%d" % 2**15, vocab_size=2**15)), "snli_32k": ( lambda: snli.snli_token_generator(FLAGS.tmp_dir, True, 2**15), lambda: snli.snli_token_generator(FLAGS.tmp_dir, False, 2**15), ), - "audio_timit_characters_tune": ( - lambda: audio.timit_generator(FLAGS.tmp_dir, True, 1374), - lambda: audio.timit_generator(FLAGS.tmp_dir, True, 344, 1374)), "audio_timit_characters_test": ( - lambda: audio.timit_generator(FLAGS.tmp_dir, True, 1718), - lambda: audio.timit_generator(FLAGS.tmp_dir, False, 626)), - "audio_timit_tokens_8k_tune": ( lambda: audio.timit_generator( - FLAGS.tmp_dir, - True, - 1374, - vocab_filename="tokens.vocab.%d" % 2**13, - vocab_size=2**13), + FLAGS.data_dir, FLAGS.tmp_dir, True, 1718), lambda: audio.timit_generator( - FLAGS.tmp_dir, - True, - 344, - 1374, - vocab_filename="tokens.vocab.%d" % 2**13, - vocab_size=2**13)), + FLAGS.data_dir, FLAGS.tmp_dir, False, 626)), "audio_timit_tokens_8k_test": ( lambda: audio.timit_generator( - FLAGS.tmp_dir, - True, - 1718, - vocab_filename="tokens.vocab.%d" % 2**13, - vocab_size=2**13), - lambda: audio.timit_generator( - FLAGS.tmp_dir, - False, - 626, - vocab_filename="tokens.vocab.%d" % 2**13, - vocab_size=2**13)), - "audio_timit_tokens_32k_tune": ( - lambda: audio.timit_generator( - FLAGS.tmp_dir, - True, - 1374, - vocab_filename="tokens.vocab.%d" % 2**15, - vocab_size=2**15), + FLAGS.data_dir, FLAGS.tmp_dir, True, 1718, + vocab_filename="vocab.endefr.%d" % 2**13, vocab_size=2**13), lambda: audio.timit_generator( - FLAGS.tmp_dir, - True, - 344, - 1374, - vocab_filename="tokens.vocab.%d" % 2**15, - vocab_size=2**15)), + FLAGS.data_dir, FLAGS.tmp_dir, False, 626, + vocab_filename="vocab.endefr.%d" % 2**13, vocab_size=2**13)), "audio_timit_tokens_32k_test": ( lambda: audio.timit_generator( - FLAGS.tmp_dir, - True, - 1718, - vocab_filename="tokens.vocab.%d" % 2**15, - vocab_size=2**15), + FLAGS.data_dir, FLAGS.tmp_dir, True, 1718, + vocab_filename="vocab.endefr.%d" % 2**15, vocab_size=2**15), lambda: audio.timit_generator( - FLAGS.tmp_dir, - False, - 626, - vocab_filename="tokens.vocab.%d" % 2**15, - vocab_size=2**15)), + FLAGS.data_dir, FLAGS.tmp_dir, False, 626, + vocab_filename="vocab.endefr.%d" % 2**15, vocab_size=2**15)), "lmptb_10k": ( lambda: ptb.train_generator( FLAGS.tmp_dir, diff --git a/tensor2tensor/bin/t2t-trainer b/tensor2tensor/bin/t2t-trainer index 8a801e70e..a37767258 100644 --- a/tensor2tensor/bin/t2t-trainer +++ b/tensor2tensor/bin/t2t-trainer @@ -31,7 +31,8 @@ from __future__ import print_function # Dependency imports -from tensor2tensor.utils import trainer_utils as utils +from tensor2tensor.utils import registry +from tensor2tensor.utils import trainer_utils from tensor2tensor.utils import usr_dir import tensorflow as tf @@ -45,14 +46,29 @@ flags.DEFINE_string("t2t_usr_dir", "", "The imported files should contain registrations, " "e.g. @registry.register_model calls, that will then be " "available to the t2t-trainer.") +flags.DEFINE_string("tmp_dir", "/tmp/t2t_datagen", + "Temporary storage directory.") +flags.DEFINE_bool("generate_data", False, "Generate data before training?") def main(_): tf.logging.set_verbosity(tf.logging.INFO) usr_dir.import_usr_dir(FLAGS.t2t_usr_dir) - utils.log_registry() - utils.validate_flags() - utils.run( + trainer_utils.log_registry() + trainer_utils.validate_flags() + tf.gfile.MakeDirs(FLAGS.output_dir) + + # Generate data if requested. + if FLAGS.generate_data: + tf.gfile.MakeDirs(FLAGS.data_dir) + tf.gfile.MakeDirs(FLAGS.tmp_dir) + for problem_name in FLAGS.problems.split("-"): + tf.logging.info("Generating data for %s" % problem_name) + problem = registry.problem(problem_name) + problem.generate_data(FLAGS.data_dir, FLAGS.tmp_dir) + + # Run the trainer. + trainer_utils.run( data_dir=FLAGS.data_dir, model=FLAGS.model, output_dir=FLAGS.output_dir, diff --git a/tensor2tensor/data_generators/audio.py b/tensor2tensor/data_generators/audio.py index 81cfde008..4f8c096a5 100644 --- a/tensor2tensor/data_generators/audio.py +++ b/tensor2tensor/data_generators/audio.py @@ -97,7 +97,8 @@ def _get_text_data(filepath): return " ".join(words) -def timit_generator(tmp_dir, +def timit_generator(data_dir, + tmp_dir, training, how_many, start_from=0, @@ -107,6 +108,7 @@ def timit_generator(tmp_dir, """Data generator for TIMIT transcription problem. Args: + data_dir: path to the data directory. tmp_dir: path to temporary storage directory. training: a Boolean; if true, we use the train set, otherwise the test set. how_many: how many inputs and labels to generate. @@ -128,7 +130,7 @@ def timit_generator(tmp_dir, eos_list = [1] if eos_list is None else eos_list if vocab_filename is not None: vocab_symbolizer = generator_utils.get_or_generate_vocab( - tmp_dir, vocab_filename, vocab_size) + data_dir, tmp_dir, vocab_filename, vocab_size) _get_timit(tmp_dir) datasets = (_TIMIT_TRAIN_DATASETS if training else _TIMIT_TEST_DATASETS) i = 0 diff --git a/tensor2tensor/data_generators/generator_utils.py b/tensor2tensor/data_generators/generator_utils.py index b34a87138..5c0c94bce 100644 --- a/tensor2tensor/data_generators/generator_utils.py +++ b/tensor2tensor/data_generators/generator_utils.py @@ -244,16 +244,13 @@ def gunzip_file(gz_path, new_path): "http://www.statmt.org/wmt13/training-parallel-un.tgz", ["un/undoc.2000.fr-en.en", "un/undoc.2000.fr-en.fr"] ], - [ - "https://github.com/stefan-it/nmt-mk-en/raw/master/data/setimes.mk-en.train.tgz", # pylint: disable=line-too-long - ["train.mk", "train.en"] - ], ] -def get_or_generate_vocab(tmp_dir, vocab_filename, vocab_size, sources=None): +def get_or_generate_vocab(data_dir, tmp_dir, + vocab_filename, vocab_size, sources=None): """Generate a vocabulary from the datasets in sources (_DATA_FILE_URLS).""" - vocab_filepath = os.path.join(tmp_dir, vocab_filename) + vocab_filepath = os.path.join(data_dir, vocab_filename) if tf.gfile.Exists(vocab_filepath): tf.logging.info("Found vocab file: %s", vocab_filepath) vocab = text_encoder.SubwordTextEncoder(vocab_filepath) @@ -304,7 +301,7 @@ def get_or_generate_vocab(tmp_dir, vocab_filename, vocab_size, sources=None): return vocab -def get_or_generate_tabbed_vocab(tmp_dir, source_filename, +def get_or_generate_tabbed_vocab(data_dir, tmp_dir, source_filename, index, vocab_filename, vocab_size): r"""Generate a vocabulary from a tabbed source file. @@ -313,6 +310,7 @@ def get_or_generate_tabbed_vocab(tmp_dir, source_filename, The index parameter specifies 0 for the source or 1 for the target. Args: + data_dir: path to the data directory. tmp_dir: path to the temporary directory. source_filename: the name of the tab-separated source file. index: index. @@ -322,7 +320,7 @@ def get_or_generate_tabbed_vocab(tmp_dir, source_filename, Returns: The vocabulary. """ - vocab_filepath = os.path.join(tmp_dir, vocab_filename) + vocab_filepath = os.path.join(data_dir, vocab_filename) if os.path.exists(vocab_filepath): vocab = text_encoder.SubwordTextEncoder(vocab_filepath) return vocab diff --git a/tensor2tensor/data_generators/image.py b/tensor2tensor/data_generators/image.py index 79bb51f3c..e3567d78f 100644 --- a/tensor2tensor/data_generators/image.py +++ b/tensor2tensor/data_generators/image.py @@ -230,7 +230,8 @@ def _get_mscoco(directory): zipfile.ZipFile(path, "r").extractall(directory) -def mscoco_generator(tmp_dir, +def mscoco_generator(data_dir, + tmp_dir, training, how_many, start_from=0, @@ -240,6 +241,7 @@ def mscoco_generator(tmp_dir, """Image generator for MSCOCO captioning problem with token-wise captions. Args: + data_dir: path to the data directory. tmp_dir: path to temporary storage directory. training: a Boolean; if true, we use the train set, otherwise the test set. how_many: how many images and labels to generate. @@ -261,7 +263,7 @@ def mscoco_generator(tmp_dir, eos_list = [1] if eos_list is None else eos_list if vocab_filename is not None: vocab_symbolizer = generator_utils.get_or_generate_vocab( - tmp_dir, vocab_filename, vocab_size) + data_dir, tmp_dir, vocab_filename, vocab_size) _get_mscoco(tmp_dir) caption_filepath = (_MSCOCO_TRAIN_CAPTION_FILE if training else _MSCOCO_EVAL_CAPTION_FILE) diff --git a/tensor2tensor/data_generators/inspect.py b/tensor2tensor/data_generators/inspect.py index fba3c6492..dad0c1c83 100644 --- a/tensor2tensor/data_generators/inspect.py +++ b/tensor2tensor/data_generators/inspect.py @@ -17,7 +17,7 @@ python data_generators/inspect.py \ --logtostderr \ --print_targets \ - --subword_text_encoder_filename=$DATA_DIR/tokens.vocab.8192 \ + --subword_text_encoder_filename=$DATA_DIR/vocab.endefr.8192 \ --input_filename=$DATA_DIR/wmt_ende_tokens_8k-train-00000-of-00100 """ diff --git a/tensor2tensor/data_generators/problem_hparams.py b/tensor2tensor/data_generators/problem_hparams.py index 3347fe4f6..8e6d032d5 100644 --- a/tensor2tensor/data_generators/problem_hparams.py +++ b/tensor2tensor/data_generators/problem_hparams.py @@ -249,7 +249,7 @@ def audio_timit_tokens(model_hparams, wrong_vocab_size): p = default_problem_hparams() # This vocab file must be present within the data directory. vocab_filename = os.path.join(model_hparams.data_dir, - "tokens.vocab.%d" % wrong_vocab_size) + "vocab.endefr.%d" % wrong_vocab_size) subtokenizer = text_encoder.SubwordTextEncoder(vocab_filename) p.input_modality = { "inputs": (registry.Modalities.AUDIO, None), @@ -298,7 +298,7 @@ def audio_wsj_tokens(model_hparams, wrong_vocab_size): p = default_problem_hparams() # This vocab file must be present within the data directory. vocab_filename = os.path.join(model_hparams.data_dir, - "tokens.vocab.%d" % wrong_vocab_size) + "vocab.endefr.%d" % wrong_vocab_size) subtokenizer = text_encoder.SubwordTextEncoder(vocab_filename) p.input_modality = { "inputs": (registry.Modalities.AUDIO, None), @@ -412,7 +412,7 @@ def wmt_parsing_tokens(model_hparams, wrong_vocab_size): p = default_problem_hparams() # This vocab file must be present within the data directory. vocab_filename = os.path.join(model_hparams.data_dir, - "tokens.vocab.%d" % wrong_vocab_size) + "vocab.endefr.%d" % wrong_vocab_size) subtokenizer = text_encoder.SubwordTextEncoder(vocab_filename) p.input_modality = { "inputs": (registry.Modalities.SYMBOL, subtokenizer.vocab_size) @@ -449,10 +449,10 @@ def wsj_parsing_tokens(model_hparams, # This vocab file must be present within the data directory. source_vocab_filename = os.path.join( model_hparams.data_dir, - prefix + "_source.tokens.vocab.%d" % wrong_source_vocab_size) + prefix + "_source.vocab.%d" % wrong_source_vocab_size) target_vocab_filename = os.path.join( model_hparams.data_dir, - prefix + "_target.tokens.vocab.%d" % wrong_target_vocab_size) + prefix + "_target.vocab.%d" % wrong_target_vocab_size) source_subtokenizer = text_encoder.SubwordTextEncoder(source_vocab_filename) target_subtokenizer = text_encoder.SubwordTextEncoder(target_vocab_filename) p.input_modality = { @@ -485,10 +485,10 @@ def ice_parsing_tokens(model_hparams, wrong_source_vocab_size): # This vocab file must be present within the data directory. source_vocab_filename = os.path.join( model_hparams.data_dir, - "ice_source.tokens.vocab.%d" % wrong_source_vocab_size) + "ice_source.vocab.%d" % wrong_source_vocab_size) target_vocab_filename = os.path.join( model_hparams.data_dir, - "ice_target.tokens.vocab.256") + "ice_target.vocab.256") source_subtokenizer = text_encoder.SubwordTextEncoder(source_vocab_filename) target_subtokenizer = text_encoder.SubwordTextEncoder(target_vocab_filename) p.input_modality = { @@ -573,7 +573,7 @@ def image_mscoco_tokens(model_hparams, vocab_count): p.input_modality = {"inputs": (registry.Modalities.IMAGE, None)} # This vocab file must be present within the data directory. vocab_filename = os.path.join(model_hparams.data_dir, - "tokens.vocab.%d" % vocab_count) + "vocab.endefr.%d" % vocab_count) subtokenizer = text_encoder.SubwordTextEncoder(vocab_filename) p.target_modality = (registry.Modalities.SYMBOL, subtokenizer.vocab_size) p.vocabulary = { diff --git a/tensor2tensor/data_generators/wmt.py b/tensor2tensor/data_generators/wmt.py index 2e1f1e8af..4d134caf1 100644 --- a/tensor2tensor/data_generators/wmt.py +++ b/tensor2tensor/data_generators/wmt.py @@ -43,7 +43,8 @@ def _default_token_feature_encoders(data_dir, target_vocab_size): - vocab_filename = os.path.join(data_dir, "tokens.vocab.%d" % target_vocab_size) + vocab_filename = os.path.join(data_dir, + "vocab.endefr.%d" % target_vocab_size) subtokenizer = text_encoder.SubwordTextEncoder(vocab_filename) return { "inputs": subtokenizer, @@ -71,7 +72,7 @@ def targeted_vocab_size(self): @property def train_generator(self): - """Generator; takes tmp_dir, is_training, possibly targeted_vocab_size.""" + """Generator; takes data_dir, tmp_dir, is_training, targeted_vocab_size.""" raise NotImplementedError() @property @@ -101,9 +102,11 @@ def generate_data(self, data_dir, tmp_dir, num_shards=None): self.dev_filepaths(data_dir, 1, shuffled=False)) else: generator_utils.generate_dataset_and_shuffle( - self.train_generator(tmp_dir, True, self.targeted_vocab_size), + self.train_generator(data_dir, tmp_dir, True, + self.targeted_vocab_size), self.training_filepaths(data_dir, num_shards, shuffled=False), - self.dev_generator(tmp_dir, False, self.targeted_vocab_size), + self.dev_generator(data_dir, tmp_dir, False, + self.targeted_vocab_size), self.dev_filepaths(data_dir, 1, shuffled=False)) def feature_encoders(self, data_dir): @@ -351,12 +354,14 @@ def _get_wmt_ende_dataset(directory, filename): return train_path -def ende_bpe_token_generator(tmp_dir, train): +def ende_bpe_token_generator(data_dir, tmp_dir, train): """Instance of token generator for the WMT en->de task, training set.""" dataset_path = ("train.tok.clean.bpe.32000" if train else "newstest2013.tok.bpe.32000") train_path = _get_wmt_ende_dataset(tmp_dir, dataset_path) - token_path = os.path.join(tmp_dir, "vocab.bpe.32000") + token_tmp_path = os.path.join(tmp_dir, "vocab.bpe.32000") + token_path = os.path.join(data_dir, "vocab.bpe.32000") + tf.gfile.Copy(token_tmp_path, token_path, overwrite=True) token_vocab = text_encoder.TokenTextEncoder(vocab_filename=token_path) return token_generator(train_path + ".en", train_path + ".de", token_vocab, EOS) @@ -402,9 +407,9 @@ def _compile_data(tmp_dir, datasets, filename): return filename -def ende_wordpiece_token_generator(tmp_dir, train, vocab_size): +def ende_wordpiece_token_generator(data_dir, tmp_dir, train, vocab_size): symbolizer_vocab = generator_utils.get_or_generate_vocab( - tmp_dir, "tokens.vocab.%d" % vocab_size, vocab_size) + data_dir, tmp_dir, "vocab.endefr.%d" % vocab_size, vocab_size) datasets = _ENDE_TRAIN_DATASETS if train else _ENDE_TEST_DATASETS tag = "train" if train else "dev" data_path = _compile_data(tmp_dir, datasets, "wmt_ende_tok_%s" % tag) @@ -471,26 +476,26 @@ def target_space_id(self): return problem.SpaceID.DE_CHR -def zhen_wordpiece_token_bigenerator(tmp_dir, train, source_vocab_size, - target_vocab_size): +def zhen_wordpiece_token_bigenerator(data_dir, tmp_dir, train, + source_vocab_size, target_vocab_size): """Wordpiece generator for the WMT'17 zh-en dataset.""" datasets = _ZHEN_TRAIN_DATASETS if train else _ZHEN_TEST_DATASETS - source_datasets = [[item[0], [item[1][0]]] for item in datasets] - target_datasets = [[item[0], [item[1][1]]] for item in datasets] + source_datasets = [[item[0], [item[1][0]]] for item in _ZHEN_TRAIN_DATASETS] + target_datasets = [[item[0], [item[1][1]]] for item in _ZHEN_TRAIN_DATASETS] source_vocab = generator_utils.get_or_generate_vocab( - tmp_dir, "tokens.vocab.zh.%d" % source_vocab_size, source_vocab_size, - source_datasets) + data_dir, tmp_dir, "vocab.zh.%d" % source_vocab_size, + source_vocab_size, source_datasets) target_vocab = generator_utils.get_or_generate_vocab( - tmp_dir, "tokens.vocab.en.%d" % target_vocab_size, target_vocab_size, - target_datasets) + data_dir, tmp_dir, "vocab.en.%d" % target_vocab_size, + target_vocab_size, target_datasets) tag = "train" if train else "dev" data_path = _compile_data(tmp_dir, datasets, "wmt_zhen_tok_%s" % tag) return bi_vocabs_token_generator(data_path + ".lang1", data_path + ".lang2", source_vocab, target_vocab, EOS) -def zhen_wordpiece_token_generator(tmp_dir, train, vocab_size): - return zhen_wordpiece_token_bigenerator(tmp_dir, train, +def zhen_wordpiece_token_generator(data_dir, tmp_dir, train, vocab_size): + return zhen_wordpiece_token_bigenerator(data_dir, tmp_dir, train, vocab_size, vocab_size) @@ -517,9 +522,9 @@ def target_space_id(self): def feature_encoders(self, data_dir): vocab_size = self.targeted_vocab_size source_vocab_filename = os.path.join(data_dir, - "tokens.vocab.zh.%d" % vocab_size) + "vocab.zh.%d" % vocab_size) target_vocab_filename = os.path.join(data_dir, - "tokens.vocab.en.%d" % vocab_size) + "vocab.en.%d" % vocab_size) source_token = text_encoder.SubwordTextEncoder(source_vocab_filename) target_token = text_encoder.SubwordTextEncoder(target_vocab_filename) return { @@ -536,10 +541,10 @@ def targeted_vocab_size(self): return 2**15 # 32768 -def enfr_wordpiece_token_generator(tmp_dir, train, vocab_size): +def enfr_wordpiece_token_generator(data_dir, tmp_dir, train, vocab_size): """Instance of token generator for the WMT en->fr task.""" symbolizer_vocab = generator_utils.get_or_generate_vocab( - tmp_dir, "tokens.vocab.%d" % vocab_size, vocab_size) + data_dir, tmp_dir, "vocab.endefr.%d" % vocab_size, vocab_size) datasets = _ENFR_TRAIN_DATASETS if train else _ENFR_TEST_DATASETS tag = "train" if train else "dev" data_path = _compile_data(tmp_dir, datasets, "wmt_enfr_tok_%s" % tag) @@ -607,13 +612,13 @@ def target_space_id(self): return problem.SpaceID.FR_CHR -def mken_wordpiece_token_generator(tmp_dir, train, vocab_size): +def mken_wordpiece_token_generator(data_dir, tmp_dir, train, vocab_size): """Wordpiece generator for the SETimes Mk-En dataset.""" datasets = _MKEN_TRAIN_DATASETS if train else _MKEN_TEST_DATASETS - source_datasets = [[item[0], [item[1][0]]] for item in datasets] - target_datasets = [[item[0], [item[1][1]]] for item in datasets] + source_datasets = [[item[0], [item[1][0]]] for item in _MKEN_TRAIN_DATASETS] + target_datasets = [[item[0], [item[1][1]]] for item in _MKEN_TRAIN_DATASETS] symbolizer_vocab = generator_utils.get_or_generate_vocab( - tmp_dir, "tokens.vocab.%d" % vocab_size, vocab_size, + data_dir, tmp_dir, "vocab.mken.%d" % vocab_size, vocab_size, source_datasets + target_datasets) tag = "train" if train else "dev" data_path = _compile_data(tmp_dir, datasets, "setimes_mken_tok_%s" % tag) @@ -650,15 +655,15 @@ def parsing_character_generator(tmp_dir, train): return character_generator(text_filepath, tags_filepath, character_vocab, EOS) -def tabbed_parsing_token_generator(tmp_dir, train, prefix, source_vocab_size, - target_vocab_size): +def tabbed_parsing_token_generator(data_dir, tmp_dir, train, prefix, + source_vocab_size, target_vocab_size): """Generate source and target data from a single file.""" source_vocab = generator_utils.get_or_generate_tabbed_vocab( - tmp_dir, "parsing_train.pairs", 0, - prefix + "_source.tokens.vocab.%d" % source_vocab_size, source_vocab_size) + data_dir, tmp_dir, "parsing_train.pairs", 0, + prefix + "_source.vocab.%d" % source_vocab_size, source_vocab_size) target_vocab = generator_utils.get_or_generate_tabbed_vocab( - tmp_dir, "parsing_train.pairs", 1, - prefix + "_target.tokens.vocab.%d" % target_vocab_size, target_vocab_size) + data_dir, tmp_dir, "parsing_train.pairs", 1, + prefix + "_target.vocab.%d" % target_vocab_size, target_vocab_size) filename = "parsing_%s" % ("train" if train else "dev") pair_filepath = os.path.join(tmp_dir, filename + ".pairs") return tabbed_generator(pair_filepath, source_vocab, target_vocab, EOS) @@ -672,9 +677,9 @@ def tabbed_parsing_character_generator(tmp_dir, train): return tabbed_generator(pair_filepath, character_vocab, character_vocab, EOS) -def parsing_token_generator(tmp_dir, train, vocab_size): +def parsing_token_generator(data_dir, tmp_dir, train, vocab_size): symbolizer_vocab = generator_utils.get_or_generate_vocab( - tmp_dir, "tokens.vocab.%d" % vocab_size, vocab_size) + data_dir, tmp_dir, "vocab.endefr.%d" % vocab_size, vocab_size) filename = "%s_%s.trees" % (FLAGS.parsing_path, "train" if train else "dev") tree_filepath = os.path.join(tmp_dir, filename) return wsj_parsing.token_generator(tree_filepath, symbolizer_vocab, diff --git a/tensor2tensor/data_generators/wsj_parsing.py b/tensor2tensor/data_generators/wsj_parsing.py index 7734db646..200754e16 100644 --- a/tensor2tensor/data_generators/wsj_parsing.py +++ b/tensor2tensor/data_generators/wsj_parsing.py @@ -86,7 +86,7 @@ def token_generator(tree_path, source_token_vocab, target_token_vocab, tree_line = tree_file.readline() -def parsing_token_generator(tmp_dir, train, source_vocab_size, +def parsing_token_generator(data_dir, tmp_dir, train, source_vocab_size, target_vocab_size): """Generator for parsing as a sequence-to-sequence task that uses tokens. @@ -94,8 +94,9 @@ def parsing_token_generator(tmp_dir, train, source_vocab_size, trees in wsj format. Args: - tmp_dir: path to the file with source sentences. - train: path to the file with target sentences. + data_dir: path to the data directory. + tmp_dir: path to temporary storage directory. + train: whether we're training or not. source_vocab_size: source vocab size. target_vocab_size: target vocab size. @@ -103,10 +104,10 @@ def parsing_token_generator(tmp_dir, train, source_vocab_size, A generator to a dictionary of inputs and outputs. """ source_symbolizer_vocab = generator_utils.get_or_generate_vocab( - tmp_dir, "wsj_source.tokens.vocab.%d" % source_vocab_size, + data_dir, tmp_dir, "wsj_source.vocab.%d" % source_vocab_size, source_vocab_size) target_symbolizer_vocab = generator_utils.get_or_generate_vocab( - tmp_dir, "wsj_target.tokens.vocab.%d" % target_vocab_size, + data_dir, tmp_dir, "wsj_target.vocab.%d" % target_vocab_size, target_vocab_size) filename = "%s_%s.trees" % (FLAGS.parsing_path, "train" if train else "dev") tree_filepath = os.path.join(tmp_dir, filename) diff --git a/tensor2tensor/models/transformer.py b/tensor2tensor/models/transformer.py index b24f7fa50..c693d1ca3 100644 --- a/tensor2tensor/models/transformer.py +++ b/tensor2tensor/models/transformer.py @@ -324,7 +324,7 @@ def transformer_big_single_gpu(): def transformer_base_single_gpu(): """HParams for transformer base model for single gpu.""" hparams = transformer_base() - hparams.batch_size = 8192 + hparams.batch_size = 2048 hparams.learning_rate_warmup_steps = 16000 hparams.batching_mantissa_bits = 2 return hparams diff --git a/tensor2tensor/utils/trainer_utils.py b/tensor2tensor/utils/trainer_utils.py index f7d3010a9..9b0e10fcb 100644 --- a/tensor2tensor/utils/trainer_utils.py +++ b/tensor2tensor/utils/trainer_utils.py @@ -126,10 +126,10 @@ def _save_until_eos(hyp): """Strips everything after the first <EOS> token, which is normally 1.""" try: - index = list(hyp).index(text_encoder.EOS_TOKEN) + index = list(hyp).index(text_encoder.EOS_ID) return hyp[0:index] except ValueError: - # No EOS_TOKEN: return the array as-is. + # No EOS_ID: return the array as-is. return hyp @@ -745,7 +745,7 @@ def _decode_batch_input_fn(problem_id, num_decode_batches, sorted_inputs, for inputs in sorted_inputs[b * FLAGS.decode_batch_size: (b + 1) * FLAGS.decode_batch_size]: input_ids = vocabulary.encode(inputs) - input_ids.append(text_encoder.EOS_TOKEN) + input_ids.append(text_encoder.EOS_ID) batch_inputs.append(input_ids) if len(input_ids) > batch_length: batch_length = len(input_ids) @@ -838,7 +838,7 @@ def _interactive_input_fn(hparams): if input_type == "text": input_ids = vocabulary.encode(input_string) if has_input: - input_ids.append(text_encoder.EOS_TOKEN) + input_ids.append(text_encoder.EOS_ID) x = [num_samples, decode_length, len(input_ids)] + input_ids assert len(x) < const_array_size x += [0] * (const_array_size - len(x)) From 60dd5e0c333b4631db392745ddcdab23b95f4da0 Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Wed, 19 Jul 2017 19:35:47 -0700 Subject: [PATCH 0149/4095] Add tests for genetics problems PiperOrigin-RevId: 162569505 --- .../data_generators/genetics_test.py | 65 +++++++++++++++++++ 1 file changed, 65 insertions(+) create mode 100644 tensor2tensor/data_generators/genetics_test.py diff --git a/tensor2tensor/data_generators/genetics_test.py b/tensor2tensor/data_generators/genetics_test.py new file mode 100644 index 000000000..70b4fe495 --- /dev/null +++ b/tensor2tensor/data_generators/genetics_test.py @@ -0,0 +1,65 @@ +# Copyright 2017 The Tensor2Tensor Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for Genetics problems.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +# Dependency imports + +import numpy as np + +from tensor2tensor.data_generators import genetics + +import tensorflow as tf + + +class GeneticsTest(tf.test.TestCase): + + def _oneHotBases(self, bases): + one_hots = [] + for base_id in bases: + one_hot = [False] * 4 + if base_id < 4: + one_hot[base_id] = True + one_hots.append(one_hot) + return np.array(one_hots) + + def testRecordToExample(self): + inputs = self._oneHotBases([0, 1, 3, 4, 1, 0]) + mask = np.array([True, False, True]) + outputs = np.array([[1.0, 2.0, 3.0], [5.0, 1.0, 0.2], [5.1, 2.3, 2.3]]) + ex_dict = genetics.to_example_dict(inputs, mask, outputs) + + self.assertAllEqual([2, 3, 5, 6, 3, 2, 1], ex_dict["inputs"]) + self.assertAllEqual([1.0, 0.0, 1.0], ex_dict["targets_mask"]) + self.assertAllEqual([1.0, 2.0, 3.0, 5.0, 1.0, 0.2, 5.1, 2.3, 2.3], + ex_dict["targets"]) + self.assertAllEqual([3, 3], ex_dict["targets_shape"]) + + def testGenerateShardArgs(self): + num_examples = 37 + num_shards = 4 + outfiles = [str(i) for i in range(num_shards)] + shard_args = genetics.generate_shard_args(outfiles, num_examples) + + starts, ends, fnames = zip(*shard_args) + self.assertAllEqual([0, 9, 18, 27], starts) + self.assertAllEqual([9, 18, 27, 37], ends) + self.assertAllEqual(fnames, outfiles) + + +if __name__ == "__main__": + tf.test.main() From a7339cdf81d1dc134a6116e2ca1413731eb5eddd Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Wed, 19 Jul 2017 19:36:10 -0700 Subject: [PATCH 0150/4095] v1.1.1 PiperOrigin-RevId: 162569525 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index d8fd19cf4..9da5293b9 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ setup( name='tensor2tensor', - version='1.1.0', + version='1.1.1', description='Tensor2Tensor', author='Google Inc.', author_email='no-reply@google.com', From 2fd79ec8b708101956b03890ac8d760b309e2683 Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Wed, 19 Jul 2017 20:01:18 -0700 Subject: [PATCH 0151/4095] Update readme and make genetics module optional PiperOrigin-RevId: 162570620 --- README.md | 6 ++++-- tensor2tensor/data_generators/all_problems.py | 10 +++++++++- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 0564a9c99..c0e34e0fe 100644 --- a/README.md +++ b/README.md @@ -23,8 +23,10 @@ send along a pull request to add your dataset or model. See [our contribution doc](CONTRIBUTING.md) for details and our [open issues](https://github.com/tensorflow/tensor2tensor/issues). -And chat with us and other users on -[Gitter](https://gitter.im/tensor2tensor/Lobby). +You can chat with us and other users on +[Gitter](https://gitter.im/tensor2tensor/Lobby) and please join our +[Google Group](https://groups.google.com/forum/#!forum/tensor2tensor) to keep up +with T2T announcements. Here is a one-command version that installs tensor2tensor, downloads the data, trains an English-German translation model, and lets you use it interactively: diff --git a/tensor2tensor/data_generators/all_problems.py b/tensor2tensor/data_generators/all_problems.py index 0a2503bd2..93a8a06a2 100644 --- a/tensor2tensor/data_generators/all_problems.py +++ b/tensor2tensor/data_generators/all_problems.py @@ -21,7 +21,6 @@ from tensor2tensor.data_generators import algorithmic from tensor2tensor.data_generators import algorithmic_math from tensor2tensor.data_generators import audio -from tensor2tensor.data_generators import genetics from tensor2tensor.data_generators import image from tensor2tensor.data_generators import lm1b from tensor2tensor.data_generators import ptb @@ -29,4 +28,13 @@ from tensor2tensor.data_generators import wiki from tensor2tensor.data_generators import wmt from tensor2tensor.data_generators import wsj_parsing + +# Problem modules that require optional dependencies +# pylint: disable=g-import-not-at-top +try: + # Requires h5py + from tensor2tensor.data_generators import genetics +except ImportError: + pass +# pylint: enable=g-import-not-at-top # pylint: enable=unused-import From ac038c5429be5b5495b8ec0c95f46452e83df122 Mon Sep 17 00:00:00 2001 From: Shanbo Cheng <cshanbo@gmail.com> Date: Thu, 20 Jul 2017 14:19:24 +0800 Subject: [PATCH 0152/4095] fix positional embedding --- tensor2tensor/models/common_attention.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tensor2tensor/models/common_attention.py b/tensor2tensor/models/common_attention.py index 6aa8a2a07..c8b4a6068 100644 --- a/tensor2tensor/models/common_attention.py +++ b/tensor2tensor/models/common_attention.py @@ -65,6 +65,9 @@ def add_timing_signal_1d(x, min_timescale=1.0, max_timescale=1.0e4): tf.to_float(tf.range(num_timescales)) * -log_timescale_increment) scaled_time = tf.expand_dims(position, 1) * tf.expand_dims(inv_timescales, 0) signal = tf.concat([tf.sin(scaled_time), tf.cos(scaled_time)], axis=1) + signal = tf.reshape(signal, [length, 2, num_timescales]) + signal = tf.transpose(signal, perm=[0, 2, 1]) + signal = tf.reshape(signal, [length, channels]) signal = tf.pad(signal, [[0, 0], [0, tf.mod(channels, 2)]]) signal = tf.reshape(signal, [1, length, channels]) return x + signal From 93177b02cb72e1b0dd585b6f3a40f7382eefd47a Mon Sep 17 00:00:00 2001 From: vthorsteinsson <vt@extrada.com> Date: Thu, 20 Jul 2017 12:37:13 +0000 Subject: [PATCH 0153/4095] Added ice_parsing_tokens to problem registry --- tensor2tensor/bin/t2t-datagen | 5 -- .../data_generators/generator_utils.py | 4 +- .../data_generators/problem_hparams.py | 36 ------------- tensor2tensor/data_generators/wmt.py | 53 +++++++++++++++++-- tensor2tensor/models/transformer.py | 14 +++++ tensor2tensor/utils/registry.py | 6 +-- 6 files changed, 69 insertions(+), 49 deletions(-) mode change 100644 => 100755 tensor2tensor/data_generators/problem_hparams.py mode change 100644 => 100755 tensor2tensor/data_generators/wmt.py mode change 100644 => 100755 tensor2tensor/models/transformer.py mode change 100644 => 100755 tensor2tensor/utils/registry.py diff --git a/tensor2tensor/bin/t2t-datagen b/tensor2tensor/bin/t2t-datagen index cbf0a6164..2f8a418e2 100755 --- a/tensor2tensor/bin/t2t-datagen +++ b/tensor2tensor/bin/t2t-datagen @@ -109,11 +109,6 @@ _SUPPORTED_PROBLEM_GENERATORS = { "algorithmic_algebra_inverse": ( lambda: algorithmic_math.algebra_inverse(26, 0, 2, 100000), lambda: algorithmic_math.algebra_inverse(26, 3, 3, 10000)), - "ice_parsing_tokens": ( - lambda: wmt.tabbed_parsing_token_generator(FLAGS.tmp_dir, - True, "ice", 2**13, 2**8), - lambda: wmt.tabbed_parsing_token_generator(FLAGS.tmp_dir, - False, "ice", 2**13, 2**8)), "ice_parsing_characters": ( lambda: wmt.tabbed_parsing_character_generator(FLAGS.tmp_dir, True), lambda: wmt.tabbed_parsing_character_generator(FLAGS.tmp_dir, False)), diff --git a/tensor2tensor/data_generators/generator_utils.py b/tensor2tensor/data_generators/generator_utils.py index 20f3959d8..51c8a5899 100755 --- a/tensor2tensor/data_generators/generator_utils.py +++ b/tensor2tensor/data_generators/generator_utils.py @@ -266,7 +266,7 @@ def get_or_generate_vocab(tmp_dir, vocab_filename, vocab_size, sources=None): for source in sources: url = source[0] filename = os.path.basename(url) - read_type = "r:gz" if "tgz" in filename else "r" + read_type = "r:gz" if filename.endswith(".tgz") else "r" compressed_file = maybe_download(tmp_dir, filename, url) @@ -278,7 +278,7 @@ def get_or_generate_vocab(tmp_dir, vocab_filename, vocab_size, sources=None): filepath = os.path.join(tmp_dir, lang_file) # For some datasets a second extraction is necessary. - if ".gz" in lang_file: + if lang_file.endswith(".gz"): new_filepath = os.path.join(tmp_dir, lang_file[:-3]) if tf.gfile.Exists(new_filepath): tf.logging.info( diff --git a/tensor2tensor/data_generators/problem_hparams.py b/tensor2tensor/data_generators/problem_hparams.py old mode 100644 new mode 100755 index 70b9dada8..e071ba60d --- a/tensor2tensor/data_generators/problem_hparams.py +++ b/tensor2tensor/data_generators/problem_hparams.py @@ -569,41 +569,6 @@ def wsj_parsing_tokens(model_hparams, return p -def ice_parsing_tokens(model_hparams, wrong_source_vocab_size): - """Icelandic to parse tree translation benchmark. - - Args: - model_hparams: a tf.contrib.training.HParams - wrong_source_vocab_size: a number used in the filename indicating the - approximate vocabulary size. This is not to be confused with the actual - vocabulary size. - - Returns: - A tf.contrib.training.HParams object. - """ - p = default_problem_hparams() - # This vocab file must be present within the data directory. - source_vocab_filename = os.path.join( - model_hparams.data_dir, - "ice_source.tokens.vocab.%d" % wrong_source_vocab_size) - target_vocab_filename = os.path.join( - model_hparams.data_dir, - "ice_target.tokens.vocab.256") - source_subtokenizer = text_encoder.SubwordTextEncoder(source_vocab_filename) - target_subtokenizer = text_encoder.SubwordTextEncoder(target_vocab_filename) - p.input_modality = { - "inputs": (registry.Modalities.SYMBOL, source_subtokenizer.vocab_size) - } - p.target_modality = (registry.Modalities.SYMBOL, 256) - p.vocabulary = { - "inputs": source_subtokenizer, - "targets": target_subtokenizer, - } - p.input_space_id = 18 # Icelandic tokens - p.target_space_id = 19 # Icelandic parse tokens - return p - - def image_cifar10(unused_model_hparams): """CIFAR-10.""" p = default_problem_hparams() @@ -720,7 +685,6 @@ def img2img_imagenet(unused_model_hparams): "wiki_32k": wiki_32k, "lmptb_10k": lmptb_10k, "ice_parsing_characters": wmt_parsing_characters, - "ice_parsing_tokens": lambda p: ice_parsing_tokens(p, 2**13), "wmt_parsing_tokens_8k": lambda p: wmt_parsing_tokens(p, 2**13), "wsj_parsing_tokens_16k": lambda p: wsj_parsing_tokens( # pylint: disable=g-long-lambda p, "wsj", 2**14, 2**9), diff --git a/tensor2tensor/data_generators/wmt.py b/tensor2tensor/data_generators/wmt.py old mode 100644 new mode 100755 index de5a25e13..f8b20a0e4 --- a/tensor2tensor/data_generators/wmt.py +++ b/tensor2tensor/data_generators/wmt.py @@ -37,6 +37,9 @@ FLAGS = tf.flags.FLAGS +# End-of-sentence marker. +EOS = text_encoder.EOS_TOKEN + @registry.register_problem("wmt_ende_tokens_8k") class WMTEnDeTokens8k(problem.Problem): @@ -81,6 +84,53 @@ def _default_wmt_feature_encoders(data_dir, target_vocab_size): "targets": subtokenizer, } + +@registry.register_problem("ice_parsing_tokens") +class IceParsingTokens(problem.Problem): + """Problem spec for parsing tokenized Icelandic text to + constituency trees, also tokenized but to a smaller vocabulary.""" + + @property + def source_vocab_size(self): + return 2**13 # 8192 + + @property + def target_vocab_size(self): + return 2**8 # 256 + + def feature_encoders(self, data_dir): + source_vocab_filename = os.path.join( + data_dir, "ice_source.tokens.vocab.%d" % self.source_vocab_size) + target_vocab_filename = os.path.join( + data_dir, "ice_target.tokens.vocab.%d" % self.target_vocab_size) + source_subtokenizer = text_encoder.SubwordTextEncoder(source_vocab_filename) + target_subtokenizer = text_encoder.SubwordTextEncoder(target_vocab_filename) + return { + "inputs": source_subtokenizer, + "targets": target_subtokenizer, + } + + def generate_data(self, data_dir, tmp_dir, num_shards=100): + generator_utils.generate_dataset_and_shuffle( + tabbed_parsing_token_generator(tmp_dir, True, "ice", + self.source_vocab_size, + self.target_vocab_size), + self.training_filepaths(data_dir, num_shards, shuffled=False), + tabbed_parsing_token_generator(tmp_dir, False, "ice", + self.source_vocab_size, + self.target_vocab_size), + self.dev_filepaths(data_dir, 1, shuffled=False)) + + def hparams(self, defaults, unused_model_hparams): + p = defaults + source_vocab_size = self._encoders["inputs"].vocab_size + p.input_modality = {"inputs": (registry.Modalities.SYMBOL, source_vocab_size)} + p.target_modality = (registry.Modalities.SYMBOL, self.target_vocab_size) + p.input_space_id = problem.SpaceID.ICE_TOK + p.target_space_id = problem.SpaceID.ICE_PARSE_TOK + p.loss_multiplier = 2.5 # Rough estimate of avg number of tokens per word + + @registry.register_problem("setimes_mken_tokens_32k") class SETimesMkEnTokens32k(problem.Problem): """Problem spec for SETimes Mk-En translation.""" @@ -107,9 +157,6 @@ def hparams(self, defaults, unused_model_hparams): p.input_space_id = problem.SpaceID.MK_TOK p.target_space_id = problem.SpaceID.EN_TOK -# End-of-sentence marker. -EOS = text_encoder.EOS_TOKEN - def character_generator(source_path, target_path, character_vocab, eos=None): """Generator for sequence-to-sequence tasks that just uses characters. diff --git a/tensor2tensor/models/transformer.py b/tensor2tensor/models/transformer.py old mode 100644 new mode 100755 index b341d6fe0..042ce797e --- a/tensor2tensor/models/transformer.py +++ b/tensor2tensor/models/transformer.py @@ -375,6 +375,20 @@ def transformer_parsing_ice(): return hparams +@registry.register_hparams +def transformer_parsing_ice_big(): + """Hparams for parsing Icelandic text, bigger model.""" + hparams = transformer_parsing_ice() + hparams.batch_size = 2048 # 4096 gives Out-of-memory on 8 GB 1080 GTX GPU + hparams.attention_dropout = 0.2 + hparams.residual_dropout = 0.2 + hparams.max_length = 512 + hparams.learning_rate_warmup_steps = 16000 + hparams.hidden_size = 1024 + hparams.learning_rate = 0.05 + return hparams + + @registry.register_hparams def transformer_tiny(): hparams = transformer_base() diff --git a/tensor2tensor/utils/registry.py b/tensor2tensor/utils/registry.py old mode 100644 new mode 100755 index 5a8823510..c9adfb692 --- a/tensor2tensor/utils/registry.py +++ b/tensor2tensor/utils/registry.py @@ -222,10 +222,10 @@ def parse_problem_name(problem_name): was_copy: A boolean. """ # Recursively strip tags until we reach a base name. - if len(problem_name) > 4 and problem_name[-4:] == "_rev": + if problem_name.endswith("_rev"): base, _, was_copy = parse_problem_name(problem_name[:-4]) return base, True, was_copy - elif len(problem_name) > 5 and problem_name[-5:] == "_copy": + elif problem_name.endswith("_copy"): base, was_reversed, _ = parse_problem_name(problem_name[:-5]) return base, was_reversed, True else: @@ -338,7 +338,7 @@ def list_modalities(): def parse_modality_name(name): - name_parts = name.split(":") + name_parts = name.split(":", maxsplit=1) if len(name_parts) < 2: name_parts.append("default") modality_type, modality_name = name_parts From fa92cbe2f293e1189d7481fdab656f724f4bc977 Mon Sep 17 00:00:00 2001 From: vthorsteinsson <vt@extrada.com> Date: Thu, 20 Jul 2017 19:28:00 +0000 Subject: [PATCH 0154/4095] Adaptation to upstream changes --- tensor2tensor/bin/t2t-trainer | 0 tensor2tensor/data_generators/wmt.py | 98 +++++++++++++--------------- 2 files changed, 47 insertions(+), 51 deletions(-) mode change 100644 => 100755 tensor2tensor/bin/t2t-trainer diff --git a/tensor2tensor/bin/t2t-trainer b/tensor2tensor/bin/t2t-trainer old mode 100644 new mode 100755 diff --git a/tensor2tensor/data_generators/wmt.py b/tensor2tensor/data_generators/wmt.py index 2d0902dac..d0f7abaec 100755 --- a/tensor2tensor/data_generators/wmt.py +++ b/tensor2tensor/data_generators/wmt.py @@ -75,57 +75,6 @@ def train_generator(self): """Generator; takes data_dir, tmp_dir, is_training, targeted_vocab_size.""" raise NotImplementedError() - -@registry.register_problem("ice_parsing_tokens") -class IceParsingTokens(problem.Problem): - """Problem spec for parsing tokenized Icelandic text to - constituency trees, also tokenized but to a smaller vocabulary.""" - - @property - def source_vocab_size(self): - return 2**13 # 8192 - - @property - def target_vocab_size(self): - return 2**8 # 256 - - def feature_encoders(self, data_dir): - source_vocab_filename = os.path.join( - data_dir, "ice_source.tokens.vocab.%d" % self.source_vocab_size) - target_vocab_filename = os.path.join( - data_dir, "ice_target.tokens.vocab.%d" % self.target_vocab_size) - source_subtokenizer = text_encoder.SubwordTextEncoder(source_vocab_filename) - target_subtokenizer = text_encoder.SubwordTextEncoder(target_vocab_filename) - return { - "inputs": source_subtokenizer, - "targets": target_subtokenizer, - } - - def generate_data(self, data_dir, tmp_dir, num_shards=100): - generator_utils.generate_dataset_and_shuffle( - tabbed_parsing_token_generator(tmp_dir, True, "ice", - self.source_vocab_size, - self.target_vocab_size), - self.training_filepaths(data_dir, num_shards, shuffled=False), - tabbed_parsing_token_generator(tmp_dir, False, "ice", - self.source_vocab_size, - self.target_vocab_size), - self.dev_filepaths(data_dir, 1, shuffled=False)) - - def hparams(self, defaults, unused_model_hparams): - p = defaults - source_vocab_size = self._encoders["inputs"].vocab_size - p.input_modality = {"inputs": (registry.Modalities.SYMBOL, source_vocab_size)} - p.target_modality = (registry.Modalities.SYMBOL, self.target_vocab_size) - p.input_space_id = problem.SpaceID.ICE_TOK - p.target_space_id = problem.SpaceID.ICE_PARSE_TOK - p.loss_multiplier = 2.5 # Rough estimate of avg number of tokens per word - - -@registry.register_problem("setimes_mken_tokens_32k") -class SETimesMkEnTokens32k(problem.Problem): - """Problem spec for SETimes Mk-En translation.""" - @property def dev_generator(self): return self.train_generator @@ -734,3 +683,50 @@ def parsing_token_generator(data_dir, tmp_dir, train, vocab_size): tree_filepath = os.path.join(tmp_dir, filename) return wsj_parsing.token_generator(tree_filepath, symbolizer_vocab, symbolizer_vocab, EOS) + + +@registry.register_problem("ice_parsing_tokens") +class IceParsingTokens(problem.Problem): + """Problem spec for parsing tokenized Icelandic text to + constituency trees, also tokenized but to a smaller vocabulary.""" + + @property + def source_vocab_size(self): + return 2**13 # 8192 + + @property + def target_vocab_size(self): + return 2**8 # 256 + + def feature_encoders(self, data_dir): + source_vocab_filename = os.path.join( + data_dir, "ice_source.tokens.vocab.%d" % self.source_vocab_size) + target_vocab_filename = os.path.join( + data_dir, "ice_target.tokens.vocab.%d" % self.target_vocab_size) + source_subtokenizer = text_encoder.SubwordTextEncoder(source_vocab_filename) + target_subtokenizer = text_encoder.SubwordTextEncoder(target_vocab_filename) + return { + "inputs": source_subtokenizer, + "targets": target_subtokenizer, + } + + def generate_data(self, data_dir, tmp_dir, num_shards=100): + generator_utils.generate_dataset_and_shuffle( + tabbed_parsing_token_generator(tmp_dir, True, "ice", + self.source_vocab_size, + self.target_vocab_size), + self.training_filepaths(data_dir, num_shards, shuffled=False), + tabbed_parsing_token_generator(tmp_dir, False, "ice", + self.source_vocab_size, + self.target_vocab_size), + self.dev_filepaths(data_dir, 1, shuffled=False)) + + def hparams(self, defaults, unused_model_hparams): + p = defaults + source_vocab_size = self._encoders["inputs"].vocab_size + p.input_modality = {"inputs": (registry.Modalities.SYMBOL, source_vocab_size)} + p.target_modality = (registry.Modalities.SYMBOL, self.target_vocab_size) + p.input_space_id = problem.SpaceID.ICE_TOK + p.target_space_id = problem.SpaceID.ICE_PARSE_TOK + p.loss_multiplier = 2.5 # Rough estimate of avg number of tokens per word + From 10ba26822689cc0428e239426f5f1062b3c7f4cf Mon Sep 17 00:00:00 2001 From: William Woof <awwoof@hotmail.com> Date: Fri, 21 Jul 2017 14:01:30 +0100 Subject: [PATCH 0155/4095] Update notes.md --- notes.md | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/notes.md b/notes.md index 3c082f6e9..a08b96244 100644 --- a/notes.md +++ b/notes.md @@ -73,3 +73,36 @@ def magic_decoder(decoder_input, x = residual_fn(x, transformer.transformer_ffn_layer(x, hparams)) return x ``` + +``` +def sliding_window(q, + k, + v, + bias, + window_size=None, + dropout_rate=0.0, + summaries=False, + name=None): + + def single(index, size, q, k, v, **kwargs): + # q initially of form batch x heads x depth + + length = tf.shape(k)[2] + index_begin = tf.maximum(0, index-size) + index_end = tf.minimum(length-1, index+size) + + q = tf.expand_dims(q, 2) + k = k[:,:,index_begin:index_end,:] + v = v[:,:,index_begin:index_end,:] + out = dot_product_attention(q, k, v, **kwargs) + out = tf.squeeze(out, 2) + return out + + q = tf.transpose(q, [2, 0, 1, 3]) + indices = tf.range(tf.shape(q)[0]) + + out = tf.map_fn(lambda ii: single(ii, 10, q[ii], k, v, bias=None), indices, dtype=tf.float32) + out = tf.transpose(out, [1, 2, 0, 3]) + + return out +``` From 73fa681f5a0caa722f71c07f5de5ea99dc77d2b3 Mon Sep 17 00:00:00 2001 From: William <awwoof@hotmail.com> Date: Fri, 21 Jul 2017 21:18:42 +0100 Subject: [PATCH 0156/4095] Added running pooling and sliding window attention. --- notes.md | 108 ------------------ tensor2tensor/models/common_attention.py | 62 +++++++++- tensor2tensor/models/common_layers.py | 45 +++++--- tensor2tensor/models/transformer.py | 3 +- .../models/transformer_alternative.py | 25 ++-- 5 files changed, 104 insertions(+), 139 deletions(-) delete mode 100644 notes.md diff --git a/notes.md b/notes.md deleted file mode 100644 index a08b96244..000000000 --- a/notes.md +++ /dev/null @@ -1,108 +0,0 @@ - -```python - -def unmagic_encoder(encoder_input, - hparams, - name="encoder"): - x = encoder_input - - # Summaries don't work in multi-problem setting yet. - summaries = "problems" not in hparams.values() or len(hparams.problems) == 1 - - with tf.variable_scope(name): - pass - return x - -def magic_decoder(decoder_input, - encoder_output, - residual_fn, - encoder_self_attention_bias, - decoder_self_attention_bias, - encoder_decoder_attention_bias, - hparams, - name="decoder"): - x = decoder_input - y = encoder_output - # Summaries don't work in multi-problem setting yet. - summaries = "problems" not in hparams.values() or len(hparams.problems) == 1 - with tf.variable_scope(name): - for layer in xrange(hparams.num_hidden_layers): - with tf.variable_scope("layer_%d" % layer): - x = residual_fn( - x, - common_attention.multihead_attention( - x, - None, - decoder_self_attention_bias, - hparams.attention_key_channels or hparams.hidden_size, - hparams.attention_value_channels or hparams.hidden_size, - hparams.hidden_size, - hparams.num_heads, - hparams.attention_dropout, - summaries=summaries, - name="decoder_self_attention")) - with tf.variable_scope("enc"): - y = residual_fn( - y, - common_attention.multihead_attention( - y, - None, - encoder_self_attention_bias, - hparams.attention_key_channels or hparams.hidden_size, - hparams.attention_value_channels or hparams.hidden_size, - hparams.hidden_size, - hparams.num_heads, - hparams.attention_dropout, - summaries=summaries, - name="encoder_self_attention")) - y = residual_fn(y, transformer.transformer_ffn_layer(y, hparams)) - - x = residual_fn( - x, - common_attention.multihead_attention( - x, - y, - encoder_decoder_attention_bias, - hparams.attention_key_channels or hparams.hidden_size, - hparams.attention_value_channels or hparams.hidden_size, - hparams.hidden_size, - hparams.num_heads, - hparams.attention_dropout, - summaries=summaries, - name="encdec_attention")) - x = residual_fn(x, transformer.transformer_ffn_layer(x, hparams)) - return x -``` - -``` -def sliding_window(q, - k, - v, - bias, - window_size=None, - dropout_rate=0.0, - summaries=False, - name=None): - - def single(index, size, q, k, v, **kwargs): - # q initially of form batch x heads x depth - - length = tf.shape(k)[2] - index_begin = tf.maximum(0, index-size) - index_end = tf.minimum(length-1, index+size) - - q = tf.expand_dims(q, 2) - k = k[:,:,index_begin:index_end,:] - v = v[:,:,index_begin:index_end,:] - out = dot_product_attention(q, k, v, **kwargs) - out = tf.squeeze(out, 2) - return out - - q = tf.transpose(q, [2, 0, 1, 3]) - indices = tf.range(tf.shape(q)[0]) - - out = tf.map_fn(lambda ii: single(ii, 10, q[ii], k, v, bias=None), indices, dtype=tf.float32) - out = tf.transpose(out, [1, 2, 0, 3]) - - return out -``` diff --git a/tensor2tensor/models/common_attention.py b/tensor2tensor/models/common_attention.py index b6a5e09d6..e8700433a 100644 --- a/tensor2tensor/models/common_attention.py +++ b/tensor2tensor/models/common_attention.py @@ -345,6 +345,57 @@ def dot_product_attention(q, return tf.matmul(weights, v) +def sliding_window_attention(window_size, + q, + k, + v, + bias, + *args): + """ Sliding window wrapper for dot product attention. Each element only + attends to the elements (window_size/2) before and after it. This reduces + the computational complexity for long sequences at the expense of eliminating + long-term dependencies. + + N.B: For short input sequences this is much slower than just using + un-windowed attention. use only for long sequences. + + Args: + window_size: an integer + q: a Tensor with shape [batch, heads, length_q, depth_k] + k: a Tensor with shape [batch, heads, length_kv, depth_k] + v: a Tensor with shape [batch, heads, length_kv, depth_v] + bias: bias Tensor (see attention_bias()) + + Returns: + A Tensor. + """ + + half_size = window_size // 2 + + # Wrapper function for dot product attention with a single query vector + def single(index, size, q, k, v, bias, **kwargs): + length_kv = tf.shape(k)[2] + index_begin = tf.maximum(0, index-size) + index_end = tf.minimum(length_kv-1, index+size) + q = tf.expand_dims(q, 2) + bias = tf.expand_dims(bias, 3) + k = k[:,:,index_begin:index_end,:] + v = v[:,:,index_begin:index_end,:] + out = dot_product_attention(q, k, v, bias, *args) + out = tf.squeeze(out, 2) + return out + + # We'll loop over each element of q, computing it's corresponding output. + q = tf.transpose(q, [2, 0, 1, 3]) + indices = tf.range(tf.shape(q)[0]) + out = tf.map_fn( + lambda ii: single(ii, half_size, q[ii], k, v, bias[:,:,:,ii]), + indices, + dtype=tf.float32) + out = tf.transpose(out, [1, 2, 0, 3]) + return out + + def multihead_attention(query_antecedent, memory_antecedent, bias, @@ -355,6 +406,7 @@ def multihead_attention(query_antecedent, dropout_rate, summaries=False, image_shapes=None, + window_size=None, name=None): """Multihead scaled-dot-product attention with input/output transformations. @@ -370,6 +422,8 @@ def multihead_attention(query_antecedent, summaries: a boolean image_shapes: optional tuple of integer scalars. see comments for attention_image_summary() + window_size: option size of window for attention. Useful only for very long + sequence lengths. name: an optional string Returns: @@ -403,8 +457,12 @@ def multihead_attention(query_antecedent, v = split_heads(v, num_heads) key_depth_per_head = total_key_depth // num_heads q *= key_depth_per_head**-0.5 - x = dot_product_attention( - q, k, v, bias, dropout_rate, summaries, image_shapes) + if window_size is None: + x = dot_product_attention( + q, k, v, bias, dropout_rate, summaries, image_shapes) + else: + x = sliding_window_attention( + window_size, q, k, v, bias, dropout_rate, False, image_shapes) x = combine_heads(x) x = common_layers.conv1d(x, output_depth, 1, name="output_transform") return x diff --git a/tensor2tensor/models/common_layers.py b/tensor2tensor/models/common_layers.py index 2597ccf7a..1c93077aa 100644 --- a/tensor2tensor/models/common_layers.py +++ b/tensor2tensor/models/common_layers.py @@ -1379,11 +1379,13 @@ def smoothing_cross_entropy(logits, labels, vocab_size, confidence): logits=logits, labels=soft_targets) return xentropy - normalizing + def global_pool_1d(inputs, pooling_type='MAX', mask=None): """ Pools elements across the last dimension. Useful to a list of vectors into a single vector to get a representation of a set. + Concatenating Args inputs: A tensor of dimensions batch_size x sequence_length x input_dims @@ -1415,18 +1417,19 @@ def global_pool_1d(inputs, pooling_type='MAX', mask=None): output = tf.reduce_mean(inputs, axis=1) return output - -def running_global_pool_1d(inputs): + +def running_global_pool_1d(inputs, pooling_type='MAX'): """ Same global pool, but only for the elements up to the current element. Useful for outputs where the state of future elements is not known. Takes no mask as all elements up to the current element are assumed to exist. - Currently only supports maximum. + Currently only supports maximum. Equivalent to using a lower triangle bias. Args inputs: A tensor of dimensions batch_size x sequence_length x input_dims containing the sequences of input vectors. + pooling_type: Pooling type to use. Currently only supports 'MAX'. Outputs output: A tensor of dimensions batch_size x sequence_length x input_dims dimension containing the running 'totals'. @@ -1438,7 +1441,7 @@ def running_global_pool_1d(inputs): # Permute inputs so seq_length is first elems = tf.transpose(inputs, [1, 0, 2]) - # Perform scan + # Perform scan cumulatives = tf.scan(scan_fct, elems, swap_memory=True) # Permute output to get back to original order @@ -1446,7 +1449,7 @@ def running_global_pool_1d(inputs): return output - + def linear_set_layer(layer_size, inputs, context=None, @@ -1464,15 +1467,14 @@ def linear_set_layer(layer_size, layer_size: Dimension to transform the input vectors to inputs: A tensor of dimensions batch_size x sequence_length x input_dims containing the sequences of input vectors. - context: A tensor of dimensions batch_size x context_dims - containing a global statistic about the set. + context: A tensor of dimensions batch_size x context_dims or batch_size x + sequence_length x context_dims containing a global statistic about the + set. dropout: Dropout probability. activation_fn: The activation function to use. Outputs output: A tensor of dimensions batch_size x sequence_length x output_dims dimension containing the sequences of transformed vectors. - - TODO: Add bias add. """ with tf.variable_scope(name, "linear_set_layer", [inputs]): @@ -1500,10 +1502,12 @@ def linear_set_layer(layer_size, return outputs - + + def ravanbakhsh_set_layer(layer_size, inputs, mask=None, + sequential=False, activation_fn=tf.nn.tanh, dropout=0.0, name=None): @@ -1518,18 +1522,27 @@ def ravanbakhsh_set_layer(layer_size, containing the sequences of input vectors. mask: A tensor of dimensions batch_size x sequence_length containing a mask for the inputs with 1's for existing elements, and 0's elsewhere. - activation_fn: The activation function to use. + sequential: If true, will use a running global pool so each element will + only depend on those before it. Set true if this layer is being used in + an ouput sequence. Outputs output: A tensor of dimensions batch_size x sequence_length x vector dimension containing the sequences of transformed vectors. """ with tf.variable_scope(name, "ravanbakhsh_set_layer", [inputs]): - output = linear_set_layer( - layer_size, - inputs - tf.expand_dims(global_pool_1d(inputs, mask=mask), axis=1), - activation_fn=activation_fn, - name=name) + if sequential: + output = linear_set_layer( + layer_size, + inputs - running_global_pool_1d(inputs), + activation_fn=activation_fn, + name=name) + else: + output = linear_set_layer( + layer_size, + inputs - tf.expand_dims(global_pool_1d(inputs, mask=mask), axis=1), + activation_fn=activation_fn, + name=name) return output diff --git a/tensor2tensor/models/transformer.py b/tensor2tensor/models/transformer.py index 544035efd..0b6c97153 100644 --- a/tensor2tensor/models/transformer.py +++ b/tensor2tensor/models/transformer.py @@ -162,7 +162,8 @@ def transformer_encoder(encoder_input, hparams.num_heads, hparams.attention_dropout, summaries=summaries, - name="encoder_self_attention")) + name="encoder_self_attention", + window_size=20)) x = residual_fn(x, transformer_ffn_layer(x, hparams)) return x diff --git a/tensor2tensor/models/transformer_alternative.py b/tensor2tensor/models/transformer_alternative.py index b6c2adc74..5ea6942a4 100644 --- a/tensor2tensor/models/transformer_alternative.py +++ b/tensor2tensor/models/transformer_alternative.py @@ -59,9 +59,8 @@ def model_fn_body(self, features): transformer_prepare_encoder(inputs, target_space, hparams) ) (decoder_input, decoder_self_attention_bias) = transformer.\ transformer_prepare_decoder(targets, hparams) - + encoder_mask = bias_to_mask(encoder_attention_bias) - decoder_mask = bias_to_mask(decoder_self_attention_bias) def residual_fn(x, y): return common_layers.layer_norm(x + tf.nn.dropout( @@ -69,11 +68,12 @@ def residual_fn(x, y): encoder_input = tf.nn.dropout(encoder_input, 1.0 - hparams.residual_dropout) decoder_input = tf.nn.dropout(decoder_input, 1.0 - hparams.residual_dropout) + encoder_output = alt_transformer_encoder( encoder_input, residual_fn, encoder_mask, hparams) decoder_output = alt_transformer_decoder( - decoder_input, encoder_output, residual_fn, decoder_mask, + decoder_input, encoder_output, residual_fn, encoder_attention_bias, hparams) decoder_output = tf.expand_dims(decoder_output, 2) @@ -93,6 +93,7 @@ def composite_layer(inputs, mask, hparams, for_output=False): hparams.hidden_size, x, mask=mask, + sequential=for_output, dropout=hparams.relu_dropout) # Transforms elements to get a context, and then uses this in a final layer @@ -127,12 +128,11 @@ def alt_transformer_encoder(encoder_input, hparams, name="encoder"): - x = encoder_input - # Summaries don't work in multi-problem setting yet. summaries = "problems" not in hparams.values() or len(hparams.problems) == 1 - + with tf.variable_scope(name): + x = encoder_input for layer in xrange(hparams.num_hidden_layers): with tf.variable_scope("layer_%d" % layer): x = residual_fn(x, composite_layer(x, mask, hparams)) @@ -143,16 +143,15 @@ def alt_transformer_encoder(encoder_input, def alt_transformer_decoder(decoder_input, encoder_output, residual_fn, - mask, encoder_decoder_attention_bias, hparams, name="decoder"): - x = decoder_input - # Summaries don't work in multi-problem setting yet. summaries = "problems" not in hparams.values() or len(hparams.problems) == 1 + with tf.variable_scope(name): + x = decoder_input for layer in xrange(hparams.num_hidden_layers): with tf.variable_scope("layer_%d" % layer): @@ -168,7 +167,7 @@ def alt_transformer_decoder(decoder_input, summaries=summaries, name="encdec_attention") - x_ = residual_fn(x_, composite_layer(x_, mask, hparams, for_output=True)) + x_ = residual_fn(x_, composite_layer(x_, None, hparams, for_output=True)) x = residual_fn(x, x_) return x @@ -177,6 +176,7 @@ def bias_to_mask(bias): # We need masks of the form batch size x input sequences # Biases seem to be of the form batch_size x 1 x input sequences x vec dim # Squeeze out dim one, and get the first element of each vector + bias = tf.squeeze(bias, [1])[:,:,0] bias = - tf.clip_by_value(bias, -1.0, 1.0) mask = 1 - bias @@ -189,8 +189,9 @@ def transformer_alt(): """Set of hyperparameters.""" hparams = transformer.transformer_base() hparams.batch_size = 2048 + hparams.num_hidden_layers = 3 hparams.add_hparam("layers_per_layer", 4) - #hparams.add_hparam("composite_layer_type", "ravanbakhsh") #ravanbakhsh or reembedding - hparams.add_hparam("composite_layer_type", "reembedding") + hparams.add_hparam("composite_layer_type", "ravanbakhsh") #ravanbakhsh or reembedding + #hparams.add_hparam("composite_layer_type", "reembedding") return hparams From 422aac7430dc62c3783b09e44d9468a73bb8e0ee Mon Sep 17 00:00:00 2001 From: Stefan Schweter <stefan@schweter.it> Date: Sun, 23 Jul 2017 00:11:29 +0200 Subject: [PATCH 0157/4095] trainer-utils: do not show input sentence in decoded file --- tensor2tensor/utils/trainer_utils.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tensor2tensor/utils/trainer_utils.py b/tensor2tensor/utils/trainer_utils.py index 9b0e10fcb..878dbe107 100644 --- a/tensor2tensor/utils/trainer_utils.py +++ b/tensor2tensor/utils/trainer_utils.py @@ -703,8 +703,7 @@ def log_fn(inputs, outputs): tf.logging.info("Writing decodes into %s" % decode_filename) outfile = tf.gfile.Open(decode_filename, "w") for index in range(len(sorted_inputs)): - outfile.write("%s\t%s\n" % (decodes[sorted_keys[index]], - sorted_inputs[sorted_keys[index]])) + outfile.write("%s\n" % (decodes[sorted_keys[index]])) def decode_interactively(estimator): From 5418429e6caa889a396a0dad9c020c90dd2ba366 Mon Sep 17 00:00:00 2001 From: Stefan Schweter <stefan@schweter.it> Date: Sun, 23 Jul 2017 00:13:39 +0200 Subject: [PATCH 0158/4095] utils: input sentence is no longer shown in decoded file (correct bleu calculation script for en-de) --- tensor2tensor/utils/get_ende_bleu.sh | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tensor2tensor/utils/get_ende_bleu.sh b/tensor2tensor/utils/get_ende_bleu.sh index 09078414f..3493af74c 100755 --- a/tensor2tensor/utils/get_ende_bleu.sh +++ b/tensor2tensor/utils/get_ende_bleu.sh @@ -5,10 +5,8 @@ tok_gold_targets=newstest2013.tok.de decodes_file=$1 -cut -d' ' -f1 $decodes_file > $decodes_file.target - # Tokenize. -perl $mosesdecoder/scripts/tokenizer/tokenizer.perl -l de < $decodes_file.target > $decodes_file.tok +perl $mosesdecoder/scripts/tokenizer/tokenizer.perl -l de < $decodes_file > $decodes_file.tok # Put compounds in ATAT format (comparable to papers like GNMT, ConvS2S). # See https://nlp.stanford.edu/projects/nmt/ : From f793de2b28cc86bdc4e212c9cbe29a585d6409ab Mon Sep 17 00:00:00 2001 From: Martin Popel <popel@ufal.mff.cuni.cz> Date: Mon, 24 Jul 2017 23:58:55 +0200 Subject: [PATCH 0159/4095] Simplify adding new translation language pairs * simplify wmt.py: `train_generator` is now a method (not a property returning a function) and WMTProblem subclasses implement it directly (not via one-purpose functions) * add WMT English-Czech translation problems * introduce `vocab_name` property, so e.g. `tokens.vocab-en-cs.32768` is used instead of `tokens.vocab.32768` if only English+Czech datasets were used --- tensor2tensor/data_generators/problem.py | 4 + .../data_generators/problem_hparams.py | 3 + tensor2tensor/data_generators/wmt.py | 304 ++++++++++-------- 3 files changed, 176 insertions(+), 135 deletions(-) diff --git a/tensor2tensor/data_generators/problem.py b/tensor2tensor/data_generators/problem.py index e93039b71..5beb0385f 100644 --- a/tensor2tensor/data_generators/problem.py +++ b/tensor2tensor/data_generators/problem.py @@ -69,6 +69,10 @@ class SpaceID(object): ICE_PARSE_TOK = 19 # Macedonian tokens MK_TOK = 20 + # Czech tokens + CS_TOK = 21 + # Czech characters + CS_CHR = 22 class Problem(object): diff --git a/tensor2tensor/data_generators/problem_hparams.py b/tensor2tensor/data_generators/problem_hparams.py index 8e6d032d5..51bc0ba62 100644 --- a/tensor2tensor/data_generators/problem_hparams.py +++ b/tensor2tensor/data_generators/problem_hparams.py @@ -180,6 +180,9 @@ def default_problem_hparams(): # 17: Icelandic characters # 18: Icelandic tokens # 19: Icelandic parse tokens + # 20: Macedonian tokens + # 21: Czech tokens + # 22: Czech characters # Add more above if needed. input_space_id=0, target_space_id=0, diff --git a/tensor2tensor/data_generators/wmt.py b/tensor2tensor/data_generators/wmt.py index 4d134caf1..456970e62 100644 --- a/tensor2tensor/data_generators/wmt.py +++ b/tensor2tensor/data_generators/wmt.py @@ -42,23 +42,6 @@ EOS = text_encoder.EOS_ID -def _default_token_feature_encoders(data_dir, target_vocab_size): - vocab_filename = os.path.join(data_dir, - "vocab.endefr.%d" % target_vocab_size) - subtokenizer = text_encoder.SubwordTextEncoder(vocab_filename) - return { - "inputs": subtokenizer, - "targets": subtokenizer, - } - - -def _default_character_feature_encoders(): - return { - "inputs": text_encoder.ByteTextEncoder(), - "targets": text_encoder.ByteTextEncoder(), - } - - class WMTProblem(problem.Problem): """Base class for WMT problems.""" @@ -70,14 +53,13 @@ def is_character_level(self): def targeted_vocab_size(self): raise NotImplementedError() # Not needed if self.is_character_level. - @property - def train_generator(self): - """Generator; takes data_dir, tmp_dir, is_training, targeted_vocab_size.""" + def train_generator(self, data_dir, tmp_dir, is_training): + """Generator of the training data.""" raise NotImplementedError() - @property - def dev_generator(self): - return self.train_generator + def dev_generator(self, data_dir, tmp_dir, is_training): + """Generator of the development data.""" + return self.train_generator(data_dir, tmp_dir, is_training) @property def input_space_id(self): @@ -91,28 +73,35 @@ def target_space_id(self): def num_shards(self): return 100 + @property + def vocab_name(self): + return "vocab.endefr" + + @property + def vocab_file(self): + return "%s.%d" % (self.vocab_name, self.targeted_vocab_size) + def generate_data(self, data_dir, tmp_dir, num_shards=None): if num_shards is None: num_shards = self.num_shards - if self.is_character_level: - generator_utils.generate_dataset_and_shuffle( - self.train_generator(tmp_dir, True), - self.training_filepaths(data_dir, num_shards, shuffled=False), - self.dev_generator(tmp_dir, False), - self.dev_filepaths(data_dir, 1, shuffled=False)) - else: - generator_utils.generate_dataset_and_shuffle( - self.train_generator(data_dir, tmp_dir, True, - self.targeted_vocab_size), - self.training_filepaths(data_dir, num_shards, shuffled=False), - self.dev_generator(data_dir, tmp_dir, False, - self.targeted_vocab_size), - self.dev_filepaths(data_dir, 1, shuffled=False)) + generator_utils.generate_dataset_and_shuffle( + self.train_generator(data_dir, tmp_dir, True), + self.training_filepaths(data_dir, num_shards, shuffled=False), + self.dev_generator(data_dir, tmp_dir, False), + self.dev_filepaths(data_dir, 1, shuffled=False)) def feature_encoders(self, data_dir): if self.is_character_level: - return _default_character_feature_encoders() - return _default_token_feature_encoders(data_dir, self.targeted_vocab_size) + return { + "inputs": text_encoder.ByteTextEncoder(), + "targets": text_encoder.ByteTextEncoder(), + } + vocab_filename = os.path.join(data_dir, self.vocab_file) + subtokenizer = text_encoder.SubwordTextEncoder(vocab_filename) + return { + "inputs": subtokenizer, + "targets": subtokenizer, + } def hparams(self, defaults, unused_model_hparams): p = defaults @@ -174,8 +163,8 @@ def tabbed_generator(source_path, source_vocab, target_vocab, eos=None): Args: source_path: path to the file with source and target sentences. - source_vocab: a SunwordTextEncoder to encode the source string. - target_vocab: a SunwordTextEncoder to encode the target string. + source_vocab: a SubwordTextEncoder to encode the source string. + target_vocab: a SubwordTextEncoder to encode the target string. eos: integer to append at the end of each sequence (default: None). Yields: @@ -336,6 +325,29 @@ def bi_vocabs_token_generator(source_path, ("dev.mk", "dev.en") ]] +# English-Czech datasets +_ENCS_TRAIN_DATASETS = [ + [ + "http://data.statmt.org/wmt16/translation-task/training-parallel-nc-v11.tgz", # pylint: disable=line-too-long + ("training-parallel-nc-v11/news-commentary-v11.cs-en.en", + "training-parallel-nc-v11/news-commentary-v11.cs-en.cs") + ], + [ + "http://www.statmt.org/wmt13/training-parallel-commoncrawl.tgz", + ("commoncrawl.cs-en.en", "commoncrawl.cs-en.cs") + ], + [ + "http://www.statmt.org/wmt13/training-parallel-europarl-v7.tgz", + ("training/europarl-v7.cs-en.en", "training/europarl-v7.cs-en.cs") + ], +] +_ENCS_TEST_DATASETS = [ + [ + "http://data.statmt.org/wmt16/translation-task/dev.tgz", + ("dev/newstest2013.en", "dev/newstest2013.cs") + ], +] + # Generators. @@ -407,16 +419,6 @@ def _compile_data(tmp_dir, datasets, filename): return filename -def ende_wordpiece_token_generator(data_dir, tmp_dir, train, vocab_size): - symbolizer_vocab = generator_utils.get_or_generate_vocab( - data_dir, tmp_dir, "vocab.endefr.%d" % vocab_size, vocab_size) - datasets = _ENDE_TRAIN_DATASETS if train else _ENDE_TEST_DATASETS - tag = "train" if train else "dev" - data_path = _compile_data(tmp_dir, datasets, "wmt_ende_tok_%s" % tag) - return token_generator(data_path + ".lang1", data_path + ".lang2", - symbolizer_vocab, EOS) - - @registry.register_problem("wmt_ende_tokens_8k") class WMTEnDeTokens8k(WMTProblem): """Problem spec for WMT En-De translation.""" @@ -425,9 +427,13 @@ class WMTEnDeTokens8k(WMTProblem): def targeted_vocab_size(self): return 2**13 # 8192 - @property - def train_generator(self): - return ende_wordpiece_token_generator + def train_generator(self, data_dir, tmp_dir, train): + symbolizer_vocab = generator_utils.get_or_generate_vocab( + data_dir, tmp_dir, self.vocab_file, self.targeted_vocab_size) + datasets = _ENDE_TRAIN_DATASETS if train else _ENDE_TEST_DATASETS + tag = "train" if train else "dev" + data_path = _compile_data(tmp_dir, datasets, "wmt_ende_tok_%s" % tag) + return token_generator(data_path + ".lang1", data_path + ".lang2", symbolizer_vocab, EOS) @property def input_space_id(self): @@ -446,15 +452,6 @@ def targeted_vocab_size(self): return 2**15 # 32768 -def ende_character_generator(tmp_dir, train): - character_vocab = text_encoder.ByteTextEncoder() - datasets = _ENDE_TRAIN_DATASETS if train else _ENDE_TEST_DATASETS - tag = "train" if train else "dev" - data_path = _compile_data(tmp_dir, datasets, "wmt_ende_chr_%s" % tag) - return character_generator(data_path + ".lang1", data_path + ".lang2", - character_vocab, EOS) - - @registry.register_problem("wmt_ende_characters") class WMTEnDeCharacters(WMTProblem): """Problem spec for WMT En-De translation.""" @@ -463,9 +460,13 @@ class WMTEnDeCharacters(WMTProblem): def is_character_level(self): return True - @property - def train_generator(self): - return ende_character_generator + def train_generator(self, tmp_dir, train): + character_vocab = text_encoder.ByteTextEncoder() + datasets = _ENDE_TRAIN_DATASETS if train else _ENDE_TEST_DATASETS + tag = "train" if train else "dev" + data_path = _compile_data(tmp_dir, datasets, "wmt_ende_chr_%s" % tag) + return character_generator(data_path + ".lang1", data_path + ".lang2", + character_vocab, EOS) @property def input_space_id(self): @@ -476,29 +477,6 @@ def target_space_id(self): return problem.SpaceID.DE_CHR -def zhen_wordpiece_token_bigenerator(data_dir, tmp_dir, train, - source_vocab_size, target_vocab_size): - """Wordpiece generator for the WMT'17 zh-en dataset.""" - datasets = _ZHEN_TRAIN_DATASETS if train else _ZHEN_TEST_DATASETS - source_datasets = [[item[0], [item[1][0]]] for item in _ZHEN_TRAIN_DATASETS] - target_datasets = [[item[0], [item[1][1]]] for item in _ZHEN_TRAIN_DATASETS] - source_vocab = generator_utils.get_or_generate_vocab( - data_dir, tmp_dir, "vocab.zh.%d" % source_vocab_size, - source_vocab_size, source_datasets) - target_vocab = generator_utils.get_or_generate_vocab( - data_dir, tmp_dir, "vocab.en.%d" % target_vocab_size, - target_vocab_size, target_datasets) - tag = "train" if train else "dev" - data_path = _compile_data(tmp_dir, datasets, "wmt_zhen_tok_%s" % tag) - return bi_vocabs_token_generator(data_path + ".lang1", data_path + ".lang2", - source_vocab, target_vocab, EOS) - - -def zhen_wordpiece_token_generator(data_dir, tmp_dir, train, vocab_size): - return zhen_wordpiece_token_bigenerator(data_dir, tmp_dir, train, - vocab_size, vocab_size) - - @registry.register_problem("wmt_zhen_tokens_8k") class WMTZhEnTokens8k(WMTProblem): """Problem spec for WMT Zh-En translation.""" @@ -507,9 +485,22 @@ class WMTZhEnTokens8k(WMTProblem): def targeted_vocab_size(self): return 2**13 # 8192 - @property - def train_generator(self): - return zhen_wordpiece_token_generator + def train_generator(self, data_dir, tmp_dir, train): + source_vocab_size = self.targeted_vocab_size + target_vocab_size = self.targeted_vocab_size + datasets = _ZHEN_TRAIN_DATASETS if train else _ZHEN_TEST_DATASETS + source_datasets = [[item[0], [item[1][0]]] for item in datasets] + target_datasets = [[item[0], [item[1][1]]] for item in datasets] + source_vocab = generator_utils.get_or_generate_vocab( + data_dir, tmp_dir, "vocab.zh.%d" % source_vocab_size, source_vocab_size, + source_datasets) + target_vocab = generator_utils.get_or_generate_vocab( + data_dir, tmp_dir, "vocab.en.%d" % target_vocab_size, target_vocab_size, + target_datasets) + tag = "train" if train else "dev" + data_path = _compile_data(tmp_dir, datasets, "wmt_zhen_tok_%s" % tag) + return bi_vocabs_token_generator(data_path + ".lang1", data_path + ".lang2", + source_vocab, target_vocab, EOS) @property def input_space_id(self): @@ -541,17 +532,6 @@ def targeted_vocab_size(self): return 2**15 # 32768 -def enfr_wordpiece_token_generator(data_dir, tmp_dir, train, vocab_size): - """Instance of token generator for the WMT en->fr task.""" - symbolizer_vocab = generator_utils.get_or_generate_vocab( - data_dir, tmp_dir, "vocab.endefr.%d" % vocab_size, vocab_size) - datasets = _ENFR_TRAIN_DATASETS if train else _ENFR_TEST_DATASETS - tag = "train" if train else "dev" - data_path = _compile_data(tmp_dir, datasets, "wmt_enfr_tok_%s" % tag) - return token_generator(data_path + ".lang1", data_path + ".lang2", - symbolizer_vocab, EOS) - - @registry.register_problem("wmt_enfr_tokens_8k") class WMTEnFrTokens8k(WMTProblem): """Problem spec for WMT En-Fr translation.""" @@ -560,9 +540,13 @@ class WMTEnFrTokens8k(WMTProblem): def targeted_vocab_size(self): return 2**13 # 8192 - @property - def train_generator(self): - return enfr_wordpiece_token_generator + def train_generator(self, tmp_dir, train): + symbolizer_vocab = generator_utils.get_or_generate_vocab( + data_dir, tmp_dir, self.vocab_file, self.targeted_vocab_size) + datasets = _ENFR_TRAIN_DATASETS if train else _ENFR_TEST_DATASETS + tag = "train" if train else "dev" + data_path = _compile_data(tmp_dir, datasets, "wmt_enfr_tok_%s" % tag) + return token_generator(data_path + ".lang1", data_path + ".lang2", symbolizer_vocab, EOS) @property def input_space_id(self): @@ -581,16 +565,6 @@ def targeted_vocab_size(self): return 2**15 # 32768 -def enfr_character_generator(tmp_dir, train): - """Instance of character generator for the WMT en->fr task.""" - character_vocab = text_encoder.ByteTextEncoder() - datasets = _ENFR_TRAIN_DATASETS if train else _ENFR_TEST_DATASETS - tag = "train" if train else "dev" - data_path = _compile_data(tmp_dir, datasets, "wmt_enfr_chr_%s" % tag) - return character_generator(data_path + ".lang1", data_path + ".lang2", - character_vocab, EOS) - - @registry.register_problem("wmt_enfr_characters") class WMTEnFrCharacters(WMTProblem): """Problem spec for WMT En-Fr translation.""" @@ -599,9 +573,13 @@ class WMTEnFrCharacters(WMTProblem): def is_character_level(self): return True - @property - def train_generator(self): - return enfr_character_generator + def train_generator(self, data_dir, tmp_dir, train): + character_vocab = text_encoder.ByteTextEncoder() + datasets = _ENFR_TRAIN_DATASETS if train else _ENFR_TEST_DATASETS + tag = "train" if train else "dev" + data_path = _compile_data(tmp_dir, datasets, "wmt_enfr_chr_%s" % tag) + return character_generator(data_path + ".lang1", data_path + ".lang2", + character_vocab, EOS) @property def input_space_id(self): @@ -612,20 +590,6 @@ def target_space_id(self): return problem.SpaceID.FR_CHR -def mken_wordpiece_token_generator(data_dir, tmp_dir, train, vocab_size): - """Wordpiece generator for the SETimes Mk-En dataset.""" - datasets = _MKEN_TRAIN_DATASETS if train else _MKEN_TEST_DATASETS - source_datasets = [[item[0], [item[1][0]]] for item in _MKEN_TRAIN_DATASETS] - target_datasets = [[item[0], [item[1][1]]] for item in _MKEN_TRAIN_DATASETS] - symbolizer_vocab = generator_utils.get_or_generate_vocab( - data_dir, tmp_dir, "vocab.mken.%d" % vocab_size, vocab_size, - source_datasets + target_datasets) - tag = "train" if train else "dev" - data_path = _compile_data(tmp_dir, datasets, "setimes_mken_tok_%s" % tag) - return token_generator(data_path + ".lang1", data_path + ".lang2", - symbolizer_vocab, EOS) - - @registry.register_problem("setimes_mken_tokens_32k") class SETimesMkEnTokens32k(WMTProblem): """Problem spec for SETimes Mk-En translation.""" @@ -635,8 +599,20 @@ def targeted_vocab_size(self): return 2**15 # 32768 @property - def train_generator(self): - return mken_wordpiece_token_generator + def vocab_name(self): + return "vocab.mken" + + def train_generator(self, data_dir, tmp_dir, train): + datasets = _MKEN_TRAIN_DATASETS if train else _MKEN_TEST_DATASETS + source_datasets = [[item[0], [item[1][0]]] for item in datasets] + target_datasets = [[item[0], [item[1][1]]] for item in datasets] + symbolizer_vocab = generator_utils.get_or_generate_vocab( + data_dir, tmp_dir, self.vocab_file, self.targeted_vocab_size, + source_datasets + target_datasets) + tag = "train" if train else "dev" + data_path = _compile_data(tmp_dir, datasets, "setimes_mken_tok_%s" % tag) + return token_generator(data_path + ".lang1", data_path + ".lang2", + symbolizer_vocab, EOS) @property def input_space_id(self): @@ -646,7 +622,65 @@ def input_space_id(self): def target_space_id(self): return problem.SpaceID.EN_TOK +@registry.register_problem("wmt_encs_tokens_32k") +class WMTEnCsTokens32k(problem.Problem): + """Problem spec for WMT English-Czech translation.""" + + @property + def target_vocab_size(self): + return 2**15 # 32768 + + @property + def vocab_name(self): + return "vocab.encs" + + def train_generator(self, data_dir, tmp_dir, train): + datasets = _ENCS_TRAIN_DATASETS if train else _ENCS_TEST_DATASETS + source_datasets = [[item[0], [item[1][0]]] for item in datasets] + target_datasets = [[item[0], [item[1][1]]] for item in datasets] + symbolizer_vocab = generator_utils.get_or_generate_vocab( + data_dir, tmp_dir, self.vocab_file, self.targeted_vocab_size, + source_datasets + target_datasets) + tag = "train" if train else "dev" + data_path = _compile_data(tmp_dir, datasets, "wmt_encs_tok_%s" % tag) + return token_generator(data_path + ".lang1", data_path + ".lang2", + symbolizer_vocab, EOS) + + @property + def input_space_id(self): + return problem.SpaceID.EN_TOK + + @property + def target_space_id(self): + return problem.SpaceID.CS_TOK + + +@registry.register_problem("wmt_encs_characters") +class WMTEnCsCharacters(WMTProblem): + """Problem spec for WMT En-Cs character-based translation.""" + + @property + def is_character_level(self): + return True + + def train_generator(self, data_dir, tmp_dir, train): + character_vocab = text_encoder.ByteTextEncoder() + datasets = _ENCS_TRAIN_DATASETS if train else _ENCS_TEST_DATASETS + tag = "train" if train else "dev" + data_path = _compile_data(tmp_dir, datasets, "wmt_encs_chr_%s" % tag) + return character_generator(data_path + ".lang1", data_path + ".lang2", + character_vocab, EOS) + + @property + def input_space_id(self): + return problem.SpaceID.EN_CHR + + @property + def target_space_id(self): + return problem.SpaceID.CS_CHR + +# TODO This function is not used anywhere. def parsing_character_generator(tmp_dir, train): character_vocab = text_encoder.ByteTextEncoder() filename = "parsing_%s" % ("train" if train else "dev") From 13b02cad9ae8fcc34ac0da20ba55734c09fe14eb Mon Sep 17 00:00:00 2001 From: T2T Team <no-reply@google.com> Date: Fri, 21 Jul 2017 12:30:48 -0700 Subject: [PATCH 0160/4095] Refactoring of get_or_generate_vocab* functions. PiperOrigin-RevId: 162771691 --- tensor2tensor/__init__.py | 1 + tensor2tensor/bin/t2t-datagen | 1 + tensor2tensor/bin/t2t-make-tf-configs | 1 + tensor2tensor/bin/t2t-trainer | 1 + tensor2tensor/data_generators/__init__.py | 1 + tensor2tensor/data_generators/algorithmic.py | 1 + .../data_generators/algorithmic_math.py | 1 + .../data_generators/algorithmic_math_test.py | 1 + .../data_generators/algorithmic_test.py | 1 + tensor2tensor/data_generators/all_problems.py | 1 + tensor2tensor/data_generators/audio.py | 1 + tensor2tensor/data_generators/audio_test.py | 1 + .../data_generators/concatenate_examples.py | 1 + .../data_generators/generator_utils.py | 148 +++++---- .../data_generators/generator_utils_test.py | 22 ++ tensor2tensor/data_generators/genetics.py | 1 + .../data_generators/genetics_test.py | 1 + tensor2tensor/data_generators/image.py | 1 + tensor2tensor/data_generators/image_test.py | 1 + tensor2tensor/data_generators/inspect.py | 1 + tensor2tensor/data_generators/lm1b.py | 1 + tensor2tensor/data_generators/problem.py | 5 +- .../data_generators/problem_hparams.py | 4 +- .../data_generators/problem_hparams_test.py | 1 + tensor2tensor/data_generators/ptb.py | 1 + tensor2tensor/data_generators/snli.py | 1 + tensor2tensor/data_generators/text_encoder.py | 1 + .../text_encoder_build_subword.py | 1 + tensor2tensor/data_generators/tokenizer.py | 1 + .../data_generators/tokenizer_test.py | 1 + tensor2tensor/data_generators/wiki.py | 1 + tensor2tensor/data_generators/wmt.py | 305 ++++++++---------- tensor2tensor/data_generators/wmt_test.py | 1 + tensor2tensor/data_generators/wsj_parsing.py | 1 + tensor2tensor/models/__init__.py | 1 + tensor2tensor/models/attention_lm.py | 1 + tensor2tensor/models/attention_lm_moe.py | 1 + tensor2tensor/models/bluenet.py | 1 + tensor2tensor/models/bluenet_test.py | 1 + tensor2tensor/models/bytenet.py | 1 + tensor2tensor/models/bytenet_test.py | 1 + tensor2tensor/models/common_attention.py | 4 +- tensor2tensor/models/common_hparams.py | 1 + tensor2tensor/models/common_layers.py | 1 + tensor2tensor/models/common_layers_test.py | 1 + tensor2tensor/models/long_answer.py | 1 + tensor2tensor/models/lstm.py | 1 + tensor2tensor/models/lstm_test.py | 1 + tensor2tensor/models/modalities.py | 1 + tensor2tensor/models/modalities_test.py | 1 + tensor2tensor/models/models.py | 1 + tensor2tensor/models/multimodel.py | 1 + tensor2tensor/models/multimodel_test.py | 1 + tensor2tensor/models/neural_gpu.py | 1 + tensor2tensor/models/neural_gpu_test.py | 1 + tensor2tensor/models/shake_shake.py | 1 + tensor2tensor/models/slicenet.py | 1 + tensor2tensor/models/slicenet_test.py | 1 + tensor2tensor/models/transformer.py | 1 + .../models/transformer_alternative.py | 1 + tensor2tensor/models/transformer_test.py | 1 + tensor2tensor/models/xception.py | 1 + tensor2tensor/models/xception_test.py | 1 + tensor2tensor/utils/__init__.py | 1 + tensor2tensor/utils/avg_checkpoints.py | 1 + tensor2tensor/utils/beam_search.py | 1 + tensor2tensor/utils/beam_search_test.py | 1 + tensor2tensor/utils/bleu_hook.py | 1 + tensor2tensor/utils/bleu_hook_test.py | 1 + tensor2tensor/utils/data_reader.py | 1 + tensor2tensor/utils/data_reader_test.py | 1 + tensor2tensor/utils/expert_utils.py | 1 + tensor2tensor/utils/get_ende_bleu.sh | 4 +- tensor2tensor/utils/metrics.py | 1 + tensor2tensor/utils/metrics_test.py | 1 + tensor2tensor/utils/modality.py | 1 + tensor2tensor/utils/registry.py | 1 + tensor2tensor/utils/registry_test.py | 1 + tensor2tensor/utils/t2t_model.py | 1 + tensor2tensor/utils/trainer_utils.py | 17 +- tensor2tensor/utils/trainer_utils_test.py | 1 + tensor2tensor/utils/usr_dir.py | 1 + tensor2tensor/utils/yellowfin.py | 1 + tensor2tensor/utils/yellowfin_test.py | 1 + 84 files changed, 341 insertions(+), 244 deletions(-) diff --git a/tensor2tensor/__init__.py b/tensor2tensor/__init__.py index eff6a2b14..3f714ce1f 100644 --- a/tensor2tensor/__init__.py +++ b/tensor2tensor/__init__.py @@ -1,3 +1,4 @@ +# coding=utf-8 # Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/tensor2tensor/bin/t2t-datagen b/tensor2tensor/bin/t2t-datagen index af5b47f8c..57e2b17fb 100644 --- a/tensor2tensor/bin/t2t-datagen +++ b/tensor2tensor/bin/t2t-datagen @@ -1,4 +1,5 @@ #!/usr/bin/env python +# coding=utf-8 # Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/tensor2tensor/bin/t2t-make-tf-configs b/tensor2tensor/bin/t2t-make-tf-configs index 6a4dc8641..0b656aba6 100644 --- a/tensor2tensor/bin/t2t-make-tf-configs +++ b/tensor2tensor/bin/t2t-make-tf-configs @@ -1,4 +1,5 @@ #!/usr/bin/env python +# coding=utf-8 # Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/tensor2tensor/bin/t2t-trainer b/tensor2tensor/bin/t2t-trainer index a37767258..13dd7d355 100644 --- a/tensor2tensor/bin/t2t-trainer +++ b/tensor2tensor/bin/t2t-trainer @@ -1,4 +1,5 @@ #!/usr/bin/env python +# coding=utf-8 # Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/tensor2tensor/data_generators/__init__.py b/tensor2tensor/data_generators/__init__.py index eff6a2b14..3f714ce1f 100644 --- a/tensor2tensor/data_generators/__init__.py +++ b/tensor2tensor/data_generators/__init__.py @@ -1,3 +1,4 @@ +# coding=utf-8 # Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/tensor2tensor/data_generators/algorithmic.py b/tensor2tensor/data_generators/algorithmic.py index 2169e1910..676b4e45f 100644 --- a/tensor2tensor/data_generators/algorithmic.py +++ b/tensor2tensor/data_generators/algorithmic.py @@ -1,3 +1,4 @@ +# coding=utf-8 # Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/tensor2tensor/data_generators/algorithmic_math.py b/tensor2tensor/data_generators/algorithmic_math.py index e65b47ff0..e061ceb0b 100644 --- a/tensor2tensor/data_generators/algorithmic_math.py +++ b/tensor2tensor/data_generators/algorithmic_math.py @@ -1,3 +1,4 @@ +# coding=utf-8 # Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/tensor2tensor/data_generators/algorithmic_math_test.py b/tensor2tensor/data_generators/algorithmic_math_test.py index 5f0de29fb..7cd67a83c 100644 --- a/tensor2tensor/data_generators/algorithmic_math_test.py +++ b/tensor2tensor/data_generators/algorithmic_math_test.py @@ -1,3 +1,4 @@ +# coding=utf-8 # Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/tensor2tensor/data_generators/algorithmic_test.py b/tensor2tensor/data_generators/algorithmic_test.py index fb8ff6719..57faaa80b 100644 --- a/tensor2tensor/data_generators/algorithmic_test.py +++ b/tensor2tensor/data_generators/algorithmic_test.py @@ -1,3 +1,4 @@ +# coding=utf-8 # Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/tensor2tensor/data_generators/all_problems.py b/tensor2tensor/data_generators/all_problems.py index 93a8a06a2..d8007f5e3 100644 --- a/tensor2tensor/data_generators/all_problems.py +++ b/tensor2tensor/data_generators/all_problems.py @@ -1,3 +1,4 @@ +# coding=utf-8 # Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/tensor2tensor/data_generators/audio.py b/tensor2tensor/data_generators/audio.py index 4f8c096a5..d0747a88c 100644 --- a/tensor2tensor/data_generators/audio.py +++ b/tensor2tensor/data_generators/audio.py @@ -1,3 +1,4 @@ +# coding=utf-8 # Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/tensor2tensor/data_generators/audio_test.py b/tensor2tensor/data_generators/audio_test.py index 1c19432c3..57e4e1ccc 100644 --- a/tensor2tensor/data_generators/audio_test.py +++ b/tensor2tensor/data_generators/audio_test.py @@ -1,3 +1,4 @@ +# coding=utf-8 # Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/tensor2tensor/data_generators/concatenate_examples.py b/tensor2tensor/data_generators/concatenate_examples.py index 158bc1b59..60ac7ea8f 100644 --- a/tensor2tensor/data_generators/concatenate_examples.py +++ b/tensor2tensor/data_generators/concatenate_examples.py @@ -1,3 +1,4 @@ +# coding=utf-8 # Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/tensor2tensor/data_generators/generator_utils.py b/tensor2tensor/data_generators/generator_utils.py index 5c0c94bce..866a0f3e7 100644 --- a/tensor2tensor/data_generators/generator_utils.py +++ b/tensor2tensor/data_generators/generator_utils.py @@ -1,3 +1,4 @@ +# coding=utf-8 # Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -247,53 +248,19 @@ def gunzip_file(gz_path, new_path): ] -def get_or_generate_vocab(data_dir, tmp_dir, - vocab_filename, vocab_size, sources=None): - """Generate a vocabulary from the datasets in sources (_DATA_FILE_URLS).""" +def get_or_generate_vocab_inner(data_dir, vocab_filename, vocab_size, + generator_fn): + """Inner implementation for vocab generators.""" vocab_filepath = os.path.join(data_dir, vocab_filename) if tf.gfile.Exists(vocab_filepath): tf.logging.info("Found vocab file: %s", vocab_filepath) vocab = text_encoder.SubwordTextEncoder(vocab_filepath) return vocab - sources = sources or _DATA_FILE_URLS - tf.logging.info("Generating vocab from: %s", str(sources)) token_counts = defaultdict(int) - for source in sources: - url = source[0] - filename = os.path.basename(url) - read_type = "r:gz" if "tgz" in filename else "r" - - compressed_file = maybe_download(tmp_dir, filename, url) - - with tarfile.open(compressed_file, read_type) as corpus_tar: - corpus_tar.extractall(tmp_dir) - - for lang_file in source[1]: - tf.logging.info("Reading file: %s" % lang_file) - filepath = os.path.join(tmp_dir, lang_file) - - # For some datasets a second extraction is necessary. - if ".gz" in lang_file: - new_filepath = os.path.join(tmp_dir, lang_file[:-3]) - if tf.gfile.Exists(new_filepath): - tf.logging.info( - "Subdirectory %s already exists, skipping unpacking" % filepath) - else: - tf.logging.info("Unpacking subdirectory %s" % filepath) - gunzip_file(filepath, new_filepath) - filepath = new_filepath - - # Use Tokenizer to count the word occurrences. - with tf.gfile.GFile(filepath, mode="r") as source_file: - file_byte_budget = 3.5e5 if "en" in filepath else 7e5 - for line in source_file: - if file_byte_budget <= 0: - break - line = line.strip() - file_byte_budget -= len(line) - for tok in tokenizer.encode(text_encoder.native_to_unicode(line)): - token_counts[tok] += 1 + for item in generator_fn(): + for tok in tokenizer.encode(text_encoder.native_to_unicode(item)): + token_counts[tok] += 1 vocab = text_encoder.SubwordTextEncoder.build_to_target_size( vocab_size, token_counts, 1, 1e3) @@ -301,6 +268,55 @@ def get_or_generate_vocab(data_dir, tmp_dir, return vocab +def get_or_generate_vocab(data_dir, + tmp_dir, + vocab_filename, + vocab_size, + sources=None): + """Generate a vocabulary from the datasets in sources (_DATA_FILE_URLS).""" + sources = sources or _DATA_FILE_URLS + + def generate(): + tf.logging.info("Generating vocab from: %s", str(sources)) + for source in sources: + url = source[0] + filename = os.path.basename(url) + read_type = "r:gz" if "tgz" in filename else "r" + + compressed_file = maybe_download(tmp_dir, filename, url) + + with tarfile.open(compressed_file, read_type) as corpus_tar: + corpus_tar.extractall(tmp_dir) + + for lang_file in source[1]: + tf.logging.info("Reading file: %s" % lang_file) + filepath = os.path.join(tmp_dir, lang_file) + + # For some datasets a second extraction is necessary. + if ".gz" in lang_file: + new_filepath = os.path.join(tmp_dir, lang_file[:-3]) + if tf.gfile.Exists(new_filepath): + tf.logging.info( + "Subdirectory %s already exists, skipping unpacking" % filepath) + else: + tf.logging.info("Unpacking subdirectory %s" % filepath) + gunzip_file(filepath, new_filepath) + filepath = new_filepath + + # Use Tokenizer to count the word occurrences. + with tf.gfile.GFile(filepath, mode="r") as source_file: + file_byte_budget = 3.5e5 if "en" in filepath else 7e5 + for line in source_file: + if file_byte_budget <= 0: + break + line = line.strip() + file_byte_budget -= len(line) + yield line + + return get_or_generate_vocab_inner( + data_dir, vocab_filename, vocab_size, generator_fn=generate) + + def get_or_generate_tabbed_vocab(data_dir, tmp_dir, source_filename, index, vocab_filename, vocab_size): r"""Generate a vocabulary from a tabbed source file. @@ -320,27 +336,37 @@ def get_or_generate_tabbed_vocab(data_dir, tmp_dir, source_filename, Returns: The vocabulary. """ - vocab_filepath = os.path.join(data_dir, vocab_filename) - if os.path.exists(vocab_filepath): - vocab = text_encoder.SubwordTextEncoder(vocab_filepath) - return vocab - - # Use Tokenizer to count the word occurrences. - token_counts = defaultdict(int) - filepath = os.path.join(tmp_dir, source_filename) - with tf.gfile.GFile(filepath, mode="r") as source_file: - for line in source_file: - line = line.strip() - if line and "\t" in line: - parts = line.split("\t", maxsplit=1) - part = parts[index].strip() - for tok in tokenizer.encode(text_encoder.native_to_unicode(part)): - token_counts[tok] += 1 - - vocab = text_encoder.SubwordTextEncoder.build_to_target_size( - vocab_size, token_counts, 1, 1e3) - vocab.store_to_file(vocab_filepath) - return vocab + def generate(): + filepath = os.path.join(tmp_dir, source_filename) + tf.logging.info("Generating vocab from %s", filepath) + with tf.gfile.GFile(filepath, mode="r") as source_file: + for line in source_file: + line = line.strip() + if line and "\t" in line: + parts = line.split("\t", maxsplit=1) + part = parts[index].strip() + yield part + + return get_or_generate_vocab_inner( + data_dir, vocab_filename, vocab_size, generator_fn=generate) + + +def get_or_generate_txt_vocab(data_dir, vocab_filename, vocab_size, + filepatterns): + """Generate a vocabulary from txt files with example-per-line.""" + if isinstance(filepatterns, str): + filepatterns = [filepatterns] + + def generate(): + tf.logging.info("Generating vocab from %s", filepatterns) + for filepattern in filepatterns: + for filename in tf.gfile.Glob(filepattern): + with tf.gfile.GFile(filename, mode="r") as source_file: + for line in source_file: + yield line.strip() + + return get_or_generate_vocab_inner( + data_dir, vocab_filename, vocab_size, generator_fn=generate) def read_records(filename): diff --git a/tensor2tensor/data_generators/generator_utils_test.py b/tensor2tensor/data_generators/generator_utils_test.py index c776d120c..fd6e15ca3 100644 --- a/tensor2tensor/data_generators/generator_utils_test.py +++ b/tensor2tensor/data_generators/generator_utils_test.py @@ -1,3 +1,4 @@ +# coding=utf-8 # Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -84,6 +85,27 @@ def testGunzipFile(self): os.remove(tmp_file_path + ".txt") os.remove(tmp_file_path) + def testGetOrGenerateTxtVocab(self): + data_dir = tempfile.mkdtemp(dir=self.get_temp_dir()) + test_file = os.path.join(self.get_temp_dir(), "test.txt") + with tf.gfile.Open(test_file, "w") as outfile: + outfile.write("a b c\n") + outfile.write("d e f\n") + # Create a vocab over the test file. + vocab1 = generator_utils.get_or_generate_txt_vocab( + data_dir, "test.voc", 20, test_file) + self.assertTrue(tf.gfile.Exists(os.path.join(data_dir, "test.voc"))) + self.assertIsNotNone(vocab1) + + # Append a new line to the test file which would change the vocab if + # the vocab were not being read from file. + with tf.gfile.Open(test_file, "a") as outfile: + outfile.write("g h i\n") + vocab2 = generator_utils.get_or_generate_txt_vocab( + data_dir, "test.voc", 20, test_file) + self.assertTrue(tf.gfile.Exists(os.path.join(data_dir, "test.voc"))) + self.assertIsNotNone(vocab2) + self.assertEqual(vocab1.dump(), vocab2.dump()) if __name__ == "__main__": tf.test.main() diff --git a/tensor2tensor/data_generators/genetics.py b/tensor2tensor/data_generators/genetics.py index 255e0caf9..b4ad36544 100644 --- a/tensor2tensor/data_generators/genetics.py +++ b/tensor2tensor/data_generators/genetics.py @@ -1,3 +1,4 @@ +# coding=utf-8 # Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/tensor2tensor/data_generators/genetics_test.py b/tensor2tensor/data_generators/genetics_test.py index 70b4fe495..85d70f934 100644 --- a/tensor2tensor/data_generators/genetics_test.py +++ b/tensor2tensor/data_generators/genetics_test.py @@ -1,3 +1,4 @@ +# coding=utf-8 # Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/tensor2tensor/data_generators/image.py b/tensor2tensor/data_generators/image.py index e3567d78f..f8e3191a2 100644 --- a/tensor2tensor/data_generators/image.py +++ b/tensor2tensor/data_generators/image.py @@ -1,3 +1,4 @@ +# coding=utf-8 # Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/tensor2tensor/data_generators/image_test.py b/tensor2tensor/data_generators/image_test.py index 6c9984265..59cad4226 100644 --- a/tensor2tensor/data_generators/image_test.py +++ b/tensor2tensor/data_generators/image_test.py @@ -1,3 +1,4 @@ +# coding=utf-8 # Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/tensor2tensor/data_generators/inspect.py b/tensor2tensor/data_generators/inspect.py index dad0c1c83..124c07017 100644 --- a/tensor2tensor/data_generators/inspect.py +++ b/tensor2tensor/data_generators/inspect.py @@ -1,3 +1,4 @@ +# coding=utf-8 # Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/tensor2tensor/data_generators/lm1b.py b/tensor2tensor/data_generators/lm1b.py index 78fb001bc..562435184 100644 --- a/tensor2tensor/data_generators/lm1b.py +++ b/tensor2tensor/data_generators/lm1b.py @@ -1,3 +1,4 @@ +# coding=utf-8 # Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/tensor2tensor/data_generators/problem.py b/tensor2tensor/data_generators/problem.py index 5beb0385f..690f14277 100644 --- a/tensor2tensor/data_generators/problem.py +++ b/tensor2tensor/data_generators/problem.py @@ -1,3 +1,4 @@ +# coding=utf-8 # Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -69,10 +70,6 @@ class SpaceID(object): ICE_PARSE_TOK = 19 # Macedonian tokens MK_TOK = 20 - # Czech tokens - CS_TOK = 21 - # Czech characters - CS_CHR = 22 class Problem(object): diff --git a/tensor2tensor/data_generators/problem_hparams.py b/tensor2tensor/data_generators/problem_hparams.py index 51bc0ba62..4343afd27 100644 --- a/tensor2tensor/data_generators/problem_hparams.py +++ b/tensor2tensor/data_generators/problem_hparams.py @@ -1,3 +1,4 @@ +# coding=utf-8 # Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -180,9 +181,6 @@ def default_problem_hparams(): # 17: Icelandic characters # 18: Icelandic tokens # 19: Icelandic parse tokens - # 20: Macedonian tokens - # 21: Czech tokens - # 22: Czech characters # Add more above if needed. input_space_id=0, target_space_id=0, diff --git a/tensor2tensor/data_generators/problem_hparams_test.py b/tensor2tensor/data_generators/problem_hparams_test.py index ad1f0192d..df92919ef 100644 --- a/tensor2tensor/data_generators/problem_hparams_test.py +++ b/tensor2tensor/data_generators/problem_hparams_test.py @@ -1,3 +1,4 @@ +# coding=utf-8 # Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/tensor2tensor/data_generators/ptb.py b/tensor2tensor/data_generators/ptb.py index 9a7db3a78..f71f0d902 100644 --- a/tensor2tensor/data_generators/ptb.py +++ b/tensor2tensor/data_generators/ptb.py @@ -1,3 +1,4 @@ +# coding=utf-8 # Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/tensor2tensor/data_generators/snli.py b/tensor2tensor/data_generators/snli.py index 7322c59ff..cd4ff723d 100644 --- a/tensor2tensor/data_generators/snli.py +++ b/tensor2tensor/data_generators/snli.py @@ -1,3 +1,4 @@ +# coding=utf-8 # Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/tensor2tensor/data_generators/text_encoder.py b/tensor2tensor/data_generators/text_encoder.py index 8be22ce0b..7c53784f3 100644 --- a/tensor2tensor/data_generators/text_encoder.py +++ b/tensor2tensor/data_generators/text_encoder.py @@ -1,3 +1,4 @@ +# coding=utf-8 # Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/tensor2tensor/data_generators/text_encoder_build_subword.py b/tensor2tensor/data_generators/text_encoder_build_subword.py index 093101c68..a0d5d8937 100644 --- a/tensor2tensor/data_generators/text_encoder_build_subword.py +++ b/tensor2tensor/data_generators/text_encoder_build_subword.py @@ -1,3 +1,4 @@ +# coding=utf-8 # Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/tensor2tensor/data_generators/tokenizer.py b/tensor2tensor/data_generators/tokenizer.py index 2b1cf572c..d1faaa7b3 100644 --- a/tensor2tensor/data_generators/tokenizer.py +++ b/tensor2tensor/data_generators/tokenizer.py @@ -1,3 +1,4 @@ +# coding=utf-8 # Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/tensor2tensor/data_generators/tokenizer_test.py b/tensor2tensor/data_generators/tokenizer_test.py index c279290ed..189f19663 100644 --- a/tensor2tensor/data_generators/tokenizer_test.py +++ b/tensor2tensor/data_generators/tokenizer_test.py @@ -1,3 +1,4 @@ +# coding=utf-8 # Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/tensor2tensor/data_generators/wiki.py b/tensor2tensor/data_generators/wiki.py index 8f905aa96..49147962a 100644 --- a/tensor2tensor/data_generators/wiki.py +++ b/tensor2tensor/data_generators/wiki.py @@ -1,3 +1,4 @@ +# coding=utf-8 # Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/tensor2tensor/data_generators/wmt.py b/tensor2tensor/data_generators/wmt.py index 456970e62..bb31d0c0f 100644 --- a/tensor2tensor/data_generators/wmt.py +++ b/tensor2tensor/data_generators/wmt.py @@ -1,3 +1,4 @@ +# coding=utf-8 # Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -42,6 +43,23 @@ EOS = text_encoder.EOS_ID +def _default_token_feature_encoders(data_dir, target_vocab_size): + vocab_filename = os.path.join(data_dir, + "vocab.endefr.%d" % target_vocab_size) + subtokenizer = text_encoder.SubwordTextEncoder(vocab_filename) + return { + "inputs": subtokenizer, + "targets": subtokenizer, + } + + +def _default_character_feature_encoders(): + return { + "inputs": text_encoder.ByteTextEncoder(), + "targets": text_encoder.ByteTextEncoder(), + } + + class WMTProblem(problem.Problem): """Base class for WMT problems.""" @@ -53,13 +71,14 @@ def is_character_level(self): def targeted_vocab_size(self): raise NotImplementedError() # Not needed if self.is_character_level. - def train_generator(self, data_dir, tmp_dir, is_training): - """Generator of the training data.""" + @property + def train_generator(self): + """Generator; takes data_dir, tmp_dir, is_training, targeted_vocab_size.""" raise NotImplementedError() - def dev_generator(self, data_dir, tmp_dir, is_training): - """Generator of the development data.""" - return self.train_generator(data_dir, tmp_dir, is_training) + @property + def dev_generator(self): + return self.train_generator @property def input_space_id(self): @@ -73,35 +92,28 @@ def target_space_id(self): def num_shards(self): return 100 - @property - def vocab_name(self): - return "vocab.endefr" - - @property - def vocab_file(self): - return "%s.%d" % (self.vocab_name, self.targeted_vocab_size) - def generate_data(self, data_dir, tmp_dir, num_shards=None): if num_shards is None: num_shards = self.num_shards - generator_utils.generate_dataset_and_shuffle( - self.train_generator(data_dir, tmp_dir, True), - self.training_filepaths(data_dir, num_shards, shuffled=False), - self.dev_generator(data_dir, tmp_dir, False), - self.dev_filepaths(data_dir, 1, shuffled=False)) + if self.is_character_level: + generator_utils.generate_dataset_and_shuffle( + self.train_generator(tmp_dir, True), + self.training_filepaths(data_dir, num_shards, shuffled=False), + self.dev_generator(tmp_dir, False), + self.dev_filepaths(data_dir, 1, shuffled=False)) + else: + generator_utils.generate_dataset_and_shuffle( + self.train_generator(data_dir, tmp_dir, True, + self.targeted_vocab_size), + self.training_filepaths(data_dir, num_shards, shuffled=False), + self.dev_generator(data_dir, tmp_dir, False, + self.targeted_vocab_size), + self.dev_filepaths(data_dir, 1, shuffled=False)) def feature_encoders(self, data_dir): if self.is_character_level: - return { - "inputs": text_encoder.ByteTextEncoder(), - "targets": text_encoder.ByteTextEncoder(), - } - vocab_filename = os.path.join(data_dir, self.vocab_file) - subtokenizer = text_encoder.SubwordTextEncoder(vocab_filename) - return { - "inputs": subtokenizer, - "targets": subtokenizer, - } + return _default_character_feature_encoders() + return _default_token_feature_encoders(data_dir, self.targeted_vocab_size) def hparams(self, defaults, unused_model_hparams): p = defaults @@ -163,8 +175,8 @@ def tabbed_generator(source_path, source_vocab, target_vocab, eos=None): Args: source_path: path to the file with source and target sentences. - source_vocab: a SubwordTextEncoder to encode the source string. - target_vocab: a SubwordTextEncoder to encode the target string. + source_vocab: a SunwordTextEncoder to encode the source string. + target_vocab: a SunwordTextEncoder to encode the target string. eos: integer to append at the end of each sequence (default: None). Yields: @@ -325,29 +337,6 @@ def bi_vocabs_token_generator(source_path, ("dev.mk", "dev.en") ]] -# English-Czech datasets -_ENCS_TRAIN_DATASETS = [ - [ - "http://data.statmt.org/wmt16/translation-task/training-parallel-nc-v11.tgz", # pylint: disable=line-too-long - ("training-parallel-nc-v11/news-commentary-v11.cs-en.en", - "training-parallel-nc-v11/news-commentary-v11.cs-en.cs") - ], - [ - "http://www.statmt.org/wmt13/training-parallel-commoncrawl.tgz", - ("commoncrawl.cs-en.en", "commoncrawl.cs-en.cs") - ], - [ - "http://www.statmt.org/wmt13/training-parallel-europarl-v7.tgz", - ("training/europarl-v7.cs-en.en", "training/europarl-v7.cs-en.cs") - ], -] -_ENCS_TEST_DATASETS = [ - [ - "http://data.statmt.org/wmt16/translation-task/dev.tgz", - ("dev/newstest2013.en", "dev/newstest2013.cs") - ], -] - # Generators. @@ -419,6 +408,16 @@ def _compile_data(tmp_dir, datasets, filename): return filename +def ende_wordpiece_token_generator(data_dir, tmp_dir, train, vocab_size): + symbolizer_vocab = generator_utils.get_or_generate_vocab( + data_dir, tmp_dir, "vocab.endefr.%d" % vocab_size, vocab_size) + datasets = _ENDE_TRAIN_DATASETS if train else _ENDE_TEST_DATASETS + tag = "train" if train else "dev" + data_path = _compile_data(tmp_dir, datasets, "wmt_ende_tok_%s" % tag) + return token_generator(data_path + ".lang1", data_path + ".lang2", + symbolizer_vocab, EOS) + + @registry.register_problem("wmt_ende_tokens_8k") class WMTEnDeTokens8k(WMTProblem): """Problem spec for WMT En-De translation.""" @@ -427,13 +426,9 @@ class WMTEnDeTokens8k(WMTProblem): def targeted_vocab_size(self): return 2**13 # 8192 - def train_generator(self, data_dir, tmp_dir, train): - symbolizer_vocab = generator_utils.get_or_generate_vocab( - data_dir, tmp_dir, self.vocab_file, self.targeted_vocab_size) - datasets = _ENDE_TRAIN_DATASETS if train else _ENDE_TEST_DATASETS - tag = "train" if train else "dev" - data_path = _compile_data(tmp_dir, datasets, "wmt_ende_tok_%s" % tag) - return token_generator(data_path + ".lang1", data_path + ".lang2", symbolizer_vocab, EOS) + @property + def train_generator(self): + return ende_wordpiece_token_generator @property def input_space_id(self): @@ -452,6 +447,15 @@ def targeted_vocab_size(self): return 2**15 # 32768 +def ende_character_generator(tmp_dir, train): + character_vocab = text_encoder.ByteTextEncoder() + datasets = _ENDE_TRAIN_DATASETS if train else _ENDE_TEST_DATASETS + tag = "train" if train else "dev" + data_path = _compile_data(tmp_dir, datasets, "wmt_ende_chr_%s" % tag) + return character_generator(data_path + ".lang1", data_path + ".lang2", + character_vocab, EOS) + + @registry.register_problem("wmt_ende_characters") class WMTEnDeCharacters(WMTProblem): """Problem spec for WMT En-De translation.""" @@ -460,13 +464,9 @@ class WMTEnDeCharacters(WMTProblem): def is_character_level(self): return True - def train_generator(self, tmp_dir, train): - character_vocab = text_encoder.ByteTextEncoder() - datasets = _ENDE_TRAIN_DATASETS if train else _ENDE_TEST_DATASETS - tag = "train" if train else "dev" - data_path = _compile_data(tmp_dir, datasets, "wmt_ende_chr_%s" % tag) - return character_generator(data_path + ".lang1", data_path + ".lang2", - character_vocab, EOS) + @property + def train_generator(self): + return ende_character_generator @property def input_space_id(self): @@ -477,6 +477,29 @@ def target_space_id(self): return problem.SpaceID.DE_CHR +def zhen_wordpiece_token_bigenerator(data_dir, tmp_dir, train, + source_vocab_size, target_vocab_size): + """Wordpiece generator for the WMT'17 zh-en dataset.""" + datasets = _ZHEN_TRAIN_DATASETS if train else _ZHEN_TEST_DATASETS + source_datasets = [[item[0], [item[1][0]]] for item in _ZHEN_TRAIN_DATASETS] + target_datasets = [[item[0], [item[1][1]]] for item in _ZHEN_TRAIN_DATASETS] + source_vocab = generator_utils.get_or_generate_vocab( + data_dir, tmp_dir, "vocab.zh.%d" % source_vocab_size, + source_vocab_size, source_datasets) + target_vocab = generator_utils.get_or_generate_vocab( + data_dir, tmp_dir, "vocab.en.%d" % target_vocab_size, + target_vocab_size, target_datasets) + tag = "train" if train else "dev" + data_path = _compile_data(tmp_dir, datasets, "wmt_zhen_tok_%s" % tag) + return bi_vocabs_token_generator(data_path + ".lang1", data_path + ".lang2", + source_vocab, target_vocab, EOS) + + +def zhen_wordpiece_token_generator(data_dir, tmp_dir, train, vocab_size): + return zhen_wordpiece_token_bigenerator(data_dir, tmp_dir, train, + vocab_size, vocab_size) + + @registry.register_problem("wmt_zhen_tokens_8k") class WMTZhEnTokens8k(WMTProblem): """Problem spec for WMT Zh-En translation.""" @@ -485,22 +508,9 @@ class WMTZhEnTokens8k(WMTProblem): def targeted_vocab_size(self): return 2**13 # 8192 - def train_generator(self, data_dir, tmp_dir, train): - source_vocab_size = self.targeted_vocab_size - target_vocab_size = self.targeted_vocab_size - datasets = _ZHEN_TRAIN_DATASETS if train else _ZHEN_TEST_DATASETS - source_datasets = [[item[0], [item[1][0]]] for item in datasets] - target_datasets = [[item[0], [item[1][1]]] for item in datasets] - source_vocab = generator_utils.get_or_generate_vocab( - data_dir, tmp_dir, "vocab.zh.%d" % source_vocab_size, source_vocab_size, - source_datasets) - target_vocab = generator_utils.get_or_generate_vocab( - data_dir, tmp_dir, "vocab.en.%d" % target_vocab_size, target_vocab_size, - target_datasets) - tag = "train" if train else "dev" - data_path = _compile_data(tmp_dir, datasets, "wmt_zhen_tok_%s" % tag) - return bi_vocabs_token_generator(data_path + ".lang1", data_path + ".lang2", - source_vocab, target_vocab, EOS) + @property + def train_generator(self): + return zhen_wordpiece_token_generator @property def input_space_id(self): @@ -532,6 +542,17 @@ def targeted_vocab_size(self): return 2**15 # 32768 +def enfr_wordpiece_token_generator(data_dir, tmp_dir, train, vocab_size): + """Instance of token generator for the WMT en->fr task.""" + symbolizer_vocab = generator_utils.get_or_generate_vocab( + data_dir, tmp_dir, "vocab.endefr.%d" % vocab_size, vocab_size) + datasets = _ENFR_TRAIN_DATASETS if train else _ENFR_TEST_DATASETS + tag = "train" if train else "dev" + data_path = _compile_data(tmp_dir, datasets, "wmt_enfr_tok_%s" % tag) + return token_generator(data_path + ".lang1", data_path + ".lang2", + symbolizer_vocab, EOS) + + @registry.register_problem("wmt_enfr_tokens_8k") class WMTEnFrTokens8k(WMTProblem): """Problem spec for WMT En-Fr translation.""" @@ -540,13 +561,9 @@ class WMTEnFrTokens8k(WMTProblem): def targeted_vocab_size(self): return 2**13 # 8192 - def train_generator(self, tmp_dir, train): - symbolizer_vocab = generator_utils.get_or_generate_vocab( - data_dir, tmp_dir, self.vocab_file, self.targeted_vocab_size) - datasets = _ENFR_TRAIN_DATASETS if train else _ENFR_TEST_DATASETS - tag = "train" if train else "dev" - data_path = _compile_data(tmp_dir, datasets, "wmt_enfr_tok_%s" % tag) - return token_generator(data_path + ".lang1", data_path + ".lang2", symbolizer_vocab, EOS) + @property + def train_generator(self): + return enfr_wordpiece_token_generator @property def input_space_id(self): @@ -565,6 +582,16 @@ def targeted_vocab_size(self): return 2**15 # 32768 +def enfr_character_generator(tmp_dir, train): + """Instance of character generator for the WMT en->fr task.""" + character_vocab = text_encoder.ByteTextEncoder() + datasets = _ENFR_TRAIN_DATASETS if train else _ENFR_TEST_DATASETS + tag = "train" if train else "dev" + data_path = _compile_data(tmp_dir, datasets, "wmt_enfr_chr_%s" % tag) + return character_generator(data_path + ".lang1", data_path + ".lang2", + character_vocab, EOS) + + @registry.register_problem("wmt_enfr_characters") class WMTEnFrCharacters(WMTProblem): """Problem spec for WMT En-Fr translation.""" @@ -573,13 +600,9 @@ class WMTEnFrCharacters(WMTProblem): def is_character_level(self): return True - def train_generator(self, data_dir, tmp_dir, train): - character_vocab = text_encoder.ByteTextEncoder() - datasets = _ENFR_TRAIN_DATASETS if train else _ENFR_TEST_DATASETS - tag = "train" if train else "dev" - data_path = _compile_data(tmp_dir, datasets, "wmt_enfr_chr_%s" % tag) - return character_generator(data_path + ".lang1", data_path + ".lang2", - character_vocab, EOS) + @property + def train_generator(self): + return enfr_character_generator @property def input_space_id(self): @@ -590,6 +613,20 @@ def target_space_id(self): return problem.SpaceID.FR_CHR +def mken_wordpiece_token_generator(data_dir, tmp_dir, train, vocab_size): + """Wordpiece generator for the SETimes Mk-En dataset.""" + datasets = _MKEN_TRAIN_DATASETS if train else _MKEN_TEST_DATASETS + source_datasets = [[item[0], [item[1][0]]] for item in _MKEN_TRAIN_DATASETS] + target_datasets = [[item[0], [item[1][1]]] for item in _MKEN_TRAIN_DATASETS] + symbolizer_vocab = generator_utils.get_or_generate_vocab( + data_dir, tmp_dir, "vocab.mken.%d" % vocab_size, vocab_size, + source_datasets + target_datasets) + tag = "train" if train else "dev" + data_path = _compile_data(tmp_dir, datasets, "setimes_mken_tok_%s" % tag) + return token_generator(data_path + ".lang1", data_path + ".lang2", + symbolizer_vocab, EOS) + + @registry.register_problem("setimes_mken_tokens_32k") class SETimesMkEnTokens32k(WMTProblem): """Problem spec for SETimes Mk-En translation.""" @@ -599,20 +636,8 @@ def targeted_vocab_size(self): return 2**15 # 32768 @property - def vocab_name(self): - return "vocab.mken" - - def train_generator(self, data_dir, tmp_dir, train): - datasets = _MKEN_TRAIN_DATASETS if train else _MKEN_TEST_DATASETS - source_datasets = [[item[0], [item[1][0]]] for item in datasets] - target_datasets = [[item[0], [item[1][1]]] for item in datasets] - symbolizer_vocab = generator_utils.get_or_generate_vocab( - data_dir, tmp_dir, self.vocab_file, self.targeted_vocab_size, - source_datasets + target_datasets) - tag = "train" if train else "dev" - data_path = _compile_data(tmp_dir, datasets, "setimes_mken_tok_%s" % tag) - return token_generator(data_path + ".lang1", data_path + ".lang2", - symbolizer_vocab, EOS) + def train_generator(self): + return mken_wordpiece_token_generator @property def input_space_id(self): @@ -622,65 +647,7 @@ def input_space_id(self): def target_space_id(self): return problem.SpaceID.EN_TOK -@registry.register_problem("wmt_encs_tokens_32k") -class WMTEnCsTokens32k(problem.Problem): - """Problem spec for WMT English-Czech translation.""" - - @property - def target_vocab_size(self): - return 2**15 # 32768 - - @property - def vocab_name(self): - return "vocab.encs" - - def train_generator(self, data_dir, tmp_dir, train): - datasets = _ENCS_TRAIN_DATASETS if train else _ENCS_TEST_DATASETS - source_datasets = [[item[0], [item[1][0]]] for item in datasets] - target_datasets = [[item[0], [item[1][1]]] for item in datasets] - symbolizer_vocab = generator_utils.get_or_generate_vocab( - data_dir, tmp_dir, self.vocab_file, self.targeted_vocab_size, - source_datasets + target_datasets) - tag = "train" if train else "dev" - data_path = _compile_data(tmp_dir, datasets, "wmt_encs_tok_%s" % tag) - return token_generator(data_path + ".lang1", data_path + ".lang2", - symbolizer_vocab, EOS) - - @property - def input_space_id(self): - return problem.SpaceID.EN_TOK - - @property - def target_space_id(self): - return problem.SpaceID.CS_TOK - - -@registry.register_problem("wmt_encs_characters") -class WMTEnCsCharacters(WMTProblem): - """Problem spec for WMT En-Cs character-based translation.""" - - @property - def is_character_level(self): - return True - - def train_generator(self, data_dir, tmp_dir, train): - character_vocab = text_encoder.ByteTextEncoder() - datasets = _ENCS_TRAIN_DATASETS if train else _ENCS_TEST_DATASETS - tag = "train" if train else "dev" - data_path = _compile_data(tmp_dir, datasets, "wmt_encs_chr_%s" % tag) - return character_generator(data_path + ".lang1", data_path + ".lang2", - character_vocab, EOS) - - @property - def input_space_id(self): - return problem.SpaceID.EN_CHR - - @property - def target_space_id(self): - return problem.SpaceID.CS_CHR - -# TODO This function is not used anywhere. def parsing_character_generator(tmp_dir, train): character_vocab = text_encoder.ByteTextEncoder() filename = "parsing_%s" % ("train" if train else "dev") diff --git a/tensor2tensor/data_generators/wmt_test.py b/tensor2tensor/data_generators/wmt_test.py index 86b88e5b1..441ceef59 100644 --- a/tensor2tensor/data_generators/wmt_test.py +++ b/tensor2tensor/data_generators/wmt_test.py @@ -1,3 +1,4 @@ +# coding=utf-8 # Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/tensor2tensor/data_generators/wsj_parsing.py b/tensor2tensor/data_generators/wsj_parsing.py index 200754e16..4b1dbdd80 100644 --- a/tensor2tensor/data_generators/wsj_parsing.py +++ b/tensor2tensor/data_generators/wsj_parsing.py @@ -1,3 +1,4 @@ +# coding=utf-8 # Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/tensor2tensor/models/__init__.py b/tensor2tensor/models/__init__.py index eff6a2b14..3f714ce1f 100644 --- a/tensor2tensor/models/__init__.py +++ b/tensor2tensor/models/__init__.py @@ -1,3 +1,4 @@ +# coding=utf-8 # Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/tensor2tensor/models/attention_lm.py b/tensor2tensor/models/attention_lm.py index 752de038e..3b874555f 100644 --- a/tensor2tensor/models/attention_lm.py +++ b/tensor2tensor/models/attention_lm.py @@ -1,3 +1,4 @@ +# coding=utf-8 # Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/tensor2tensor/models/attention_lm_moe.py b/tensor2tensor/models/attention_lm_moe.py index 2754e8366..4b37050bb 100644 --- a/tensor2tensor/models/attention_lm_moe.py +++ b/tensor2tensor/models/attention_lm_moe.py @@ -1,3 +1,4 @@ +# coding=utf-8 # Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/tensor2tensor/models/bluenet.py b/tensor2tensor/models/bluenet.py index 95216f43d..3ac477e4b 100644 --- a/tensor2tensor/models/bluenet.py +++ b/tensor2tensor/models/bluenet.py @@ -1,3 +1,4 @@ +# coding=utf-8 # Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/tensor2tensor/models/bluenet_test.py b/tensor2tensor/models/bluenet_test.py index b3f18249d..d4ce85b1a 100644 --- a/tensor2tensor/models/bluenet_test.py +++ b/tensor2tensor/models/bluenet_test.py @@ -1,3 +1,4 @@ +# coding=utf-8 # Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/tensor2tensor/models/bytenet.py b/tensor2tensor/models/bytenet.py index 301626dc2..28862e594 100644 --- a/tensor2tensor/models/bytenet.py +++ b/tensor2tensor/models/bytenet.py @@ -1,3 +1,4 @@ +# coding=utf-8 # Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/tensor2tensor/models/bytenet_test.py b/tensor2tensor/models/bytenet_test.py index f1e42669e..738b84251 100644 --- a/tensor2tensor/models/bytenet_test.py +++ b/tensor2tensor/models/bytenet_test.py @@ -1,3 +1,4 @@ +# coding=utf-8 # Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/tensor2tensor/models/common_attention.py b/tensor2tensor/models/common_attention.py index c8b4a6068..4f694a7f9 100644 --- a/tensor2tensor/models/common_attention.py +++ b/tensor2tensor/models/common_attention.py @@ -1,3 +1,4 @@ +# coding=utf-8 # Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -65,9 +66,6 @@ def add_timing_signal_1d(x, min_timescale=1.0, max_timescale=1.0e4): tf.to_float(tf.range(num_timescales)) * -log_timescale_increment) scaled_time = tf.expand_dims(position, 1) * tf.expand_dims(inv_timescales, 0) signal = tf.concat([tf.sin(scaled_time), tf.cos(scaled_time)], axis=1) - signal = tf.reshape(signal, [length, 2, num_timescales]) - signal = tf.transpose(signal, perm=[0, 2, 1]) - signal = tf.reshape(signal, [length, channels]) signal = tf.pad(signal, [[0, 0], [0, tf.mod(channels, 2)]]) signal = tf.reshape(signal, [1, length, channels]) return x + signal diff --git a/tensor2tensor/models/common_hparams.py b/tensor2tensor/models/common_hparams.py index ff856968b..a86974d1f 100644 --- a/tensor2tensor/models/common_hparams.py +++ b/tensor2tensor/models/common_hparams.py @@ -1,3 +1,4 @@ +# coding=utf-8 # Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/tensor2tensor/models/common_layers.py b/tensor2tensor/models/common_layers.py index 638535aa2..11b6396a8 100644 --- a/tensor2tensor/models/common_layers.py +++ b/tensor2tensor/models/common_layers.py @@ -1,3 +1,4 @@ +# coding=utf-8 # Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/tensor2tensor/models/common_layers_test.py b/tensor2tensor/models/common_layers_test.py index 3a2fafd8b..8e724587b 100644 --- a/tensor2tensor/models/common_layers_test.py +++ b/tensor2tensor/models/common_layers_test.py @@ -1,3 +1,4 @@ +# coding=utf-8 # Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/tensor2tensor/models/long_answer.py b/tensor2tensor/models/long_answer.py index 7bb6a4a55..be8024f63 100644 --- a/tensor2tensor/models/long_answer.py +++ b/tensor2tensor/models/long_answer.py @@ -1,3 +1,4 @@ +# coding=utf-8 # Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/tensor2tensor/models/lstm.py b/tensor2tensor/models/lstm.py index c3ae0a01e..ae221bdff 100644 --- a/tensor2tensor/models/lstm.py +++ b/tensor2tensor/models/lstm.py @@ -1,3 +1,4 @@ +# coding=utf-8 # Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/tensor2tensor/models/lstm_test.py b/tensor2tensor/models/lstm_test.py index 4ddaf6b64..1e542a666 100644 --- a/tensor2tensor/models/lstm_test.py +++ b/tensor2tensor/models/lstm_test.py @@ -1,3 +1,4 @@ +# coding=utf-8 # Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/tensor2tensor/models/modalities.py b/tensor2tensor/models/modalities.py index 60df80a1c..9a6115558 100644 --- a/tensor2tensor/models/modalities.py +++ b/tensor2tensor/models/modalities.py @@ -1,3 +1,4 @@ +# coding=utf-8 # Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/tensor2tensor/models/modalities_test.py b/tensor2tensor/models/modalities_test.py index 118db3847..4254c6b04 100644 --- a/tensor2tensor/models/modalities_test.py +++ b/tensor2tensor/models/modalities_test.py @@ -1,3 +1,4 @@ +# coding=utf-8 # Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/tensor2tensor/models/models.py b/tensor2tensor/models/models.py index 2cf639426..e92ddd3ed 100644 --- a/tensor2tensor/models/models.py +++ b/tensor2tensor/models/models.py @@ -1,3 +1,4 @@ +# coding=utf-8 # Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/tensor2tensor/models/multimodel.py b/tensor2tensor/models/multimodel.py index bf06dfd65..089889ce6 100644 --- a/tensor2tensor/models/multimodel.py +++ b/tensor2tensor/models/multimodel.py @@ -1,3 +1,4 @@ +# coding=utf-8 # Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/tensor2tensor/models/multimodel_test.py b/tensor2tensor/models/multimodel_test.py index 958fac5d7..03990594b 100644 --- a/tensor2tensor/models/multimodel_test.py +++ b/tensor2tensor/models/multimodel_test.py @@ -1,3 +1,4 @@ +# coding=utf-8 # Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/tensor2tensor/models/neural_gpu.py b/tensor2tensor/models/neural_gpu.py index 30d535098..fc9d75639 100644 --- a/tensor2tensor/models/neural_gpu.py +++ b/tensor2tensor/models/neural_gpu.py @@ -1,3 +1,4 @@ +# coding=utf-8 # Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/tensor2tensor/models/neural_gpu_test.py b/tensor2tensor/models/neural_gpu_test.py index 1dddc1056..3d1cc0562 100644 --- a/tensor2tensor/models/neural_gpu_test.py +++ b/tensor2tensor/models/neural_gpu_test.py @@ -1,3 +1,4 @@ +# coding=utf-8 # Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/tensor2tensor/models/shake_shake.py b/tensor2tensor/models/shake_shake.py index 26d43afb3..7fa40783a 100644 --- a/tensor2tensor/models/shake_shake.py +++ b/tensor2tensor/models/shake_shake.py @@ -1,3 +1,4 @@ +# coding=utf-8 # Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/tensor2tensor/models/slicenet.py b/tensor2tensor/models/slicenet.py index 2ad4c89d1..69e2338b6 100644 --- a/tensor2tensor/models/slicenet.py +++ b/tensor2tensor/models/slicenet.py @@ -1,3 +1,4 @@ +# coding=utf-8 # Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/tensor2tensor/models/slicenet_test.py b/tensor2tensor/models/slicenet_test.py index 911953445..692799571 100644 --- a/tensor2tensor/models/slicenet_test.py +++ b/tensor2tensor/models/slicenet_test.py @@ -1,3 +1,4 @@ +# coding=utf-8 # Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/tensor2tensor/models/transformer.py b/tensor2tensor/models/transformer.py index c693d1ca3..23197fcd9 100644 --- a/tensor2tensor/models/transformer.py +++ b/tensor2tensor/models/transformer.py @@ -1,3 +1,4 @@ +# coding=utf-8 # Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/tensor2tensor/models/transformer_alternative.py b/tensor2tensor/models/transformer_alternative.py index 280dbc713..62413c325 100644 --- a/tensor2tensor/models/transformer_alternative.py +++ b/tensor2tensor/models/transformer_alternative.py @@ -1,3 +1,4 @@ +# coding=utf-8 # Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/tensor2tensor/models/transformer_test.py b/tensor2tensor/models/transformer_test.py index 997b5d172..a7f1fc9ae 100644 --- a/tensor2tensor/models/transformer_test.py +++ b/tensor2tensor/models/transformer_test.py @@ -1,3 +1,4 @@ +# coding=utf-8 # Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/tensor2tensor/models/xception.py b/tensor2tensor/models/xception.py index d3c5a2690..61fa61235 100644 --- a/tensor2tensor/models/xception.py +++ b/tensor2tensor/models/xception.py @@ -1,3 +1,4 @@ +# coding=utf-8 # Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/tensor2tensor/models/xception_test.py b/tensor2tensor/models/xception_test.py index aa5c1c034..bf434aeac 100644 --- a/tensor2tensor/models/xception_test.py +++ b/tensor2tensor/models/xception_test.py @@ -1,3 +1,4 @@ +# coding=utf-8 # Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/tensor2tensor/utils/__init__.py b/tensor2tensor/utils/__init__.py index eff6a2b14..3f714ce1f 100644 --- a/tensor2tensor/utils/__init__.py +++ b/tensor2tensor/utils/__init__.py @@ -1,3 +1,4 @@ +# coding=utf-8 # Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/tensor2tensor/utils/avg_checkpoints.py b/tensor2tensor/utils/avg_checkpoints.py index a84750310..77acd4353 100644 --- a/tensor2tensor/utils/avg_checkpoints.py +++ b/tensor2tensor/utils/avg_checkpoints.py @@ -1,3 +1,4 @@ +# coding=utf-8 # Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/tensor2tensor/utils/beam_search.py b/tensor2tensor/utils/beam_search.py index 3a511907d..dd8275204 100644 --- a/tensor2tensor/utils/beam_search.py +++ b/tensor2tensor/utils/beam_search.py @@ -1,3 +1,4 @@ +# coding=utf-8 # Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/tensor2tensor/utils/beam_search_test.py b/tensor2tensor/utils/beam_search_test.py index e084f1f0e..5223989ea 100644 --- a/tensor2tensor/utils/beam_search_test.py +++ b/tensor2tensor/utils/beam_search_test.py @@ -1,3 +1,4 @@ +# coding=utf-8 # Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/tensor2tensor/utils/bleu_hook.py b/tensor2tensor/utils/bleu_hook.py index 155b10c72..06d62ad1e 100644 --- a/tensor2tensor/utils/bleu_hook.py +++ b/tensor2tensor/utils/bleu_hook.py @@ -1,3 +1,4 @@ +# coding=utf-8 # Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/tensor2tensor/utils/bleu_hook_test.py b/tensor2tensor/utils/bleu_hook_test.py index 8092ab979..bf08174f8 100644 --- a/tensor2tensor/utils/bleu_hook_test.py +++ b/tensor2tensor/utils/bleu_hook_test.py @@ -1,3 +1,4 @@ +# coding=utf-8 # Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/tensor2tensor/utils/data_reader.py b/tensor2tensor/utils/data_reader.py index cd8e6c2d3..d7af960ab 100644 --- a/tensor2tensor/utils/data_reader.py +++ b/tensor2tensor/utils/data_reader.py @@ -1,3 +1,4 @@ +# coding=utf-8 # Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/tensor2tensor/utils/data_reader_test.py b/tensor2tensor/utils/data_reader_test.py index 18507ed06..f0c318e7b 100644 --- a/tensor2tensor/utils/data_reader_test.py +++ b/tensor2tensor/utils/data_reader_test.py @@ -1,3 +1,4 @@ +# coding=utf-8 # Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/tensor2tensor/utils/expert_utils.py b/tensor2tensor/utils/expert_utils.py index c3becbfb4..e21f2453a 100644 --- a/tensor2tensor/utils/expert_utils.py +++ b/tensor2tensor/utils/expert_utils.py @@ -1,3 +1,4 @@ +# coding=utf-8 # Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/tensor2tensor/utils/get_ende_bleu.sh b/tensor2tensor/utils/get_ende_bleu.sh index 3493af74c..09078414f 100755 --- a/tensor2tensor/utils/get_ende_bleu.sh +++ b/tensor2tensor/utils/get_ende_bleu.sh @@ -5,8 +5,10 @@ tok_gold_targets=newstest2013.tok.de decodes_file=$1 +cut -d' ' -f1 $decodes_file > $decodes_file.target + # Tokenize. -perl $mosesdecoder/scripts/tokenizer/tokenizer.perl -l de < $decodes_file > $decodes_file.tok +perl $mosesdecoder/scripts/tokenizer/tokenizer.perl -l de < $decodes_file.target > $decodes_file.tok # Put compounds in ATAT format (comparable to papers like GNMT, ConvS2S). # See https://nlp.stanford.edu/projects/nmt/ : diff --git a/tensor2tensor/utils/metrics.py b/tensor2tensor/utils/metrics.py index cf66f6af8..118e33394 100644 --- a/tensor2tensor/utils/metrics.py +++ b/tensor2tensor/utils/metrics.py @@ -1,3 +1,4 @@ +# coding=utf-8 # Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/tensor2tensor/utils/metrics_test.py b/tensor2tensor/utils/metrics_test.py index de72d797f..0d78e632c 100644 --- a/tensor2tensor/utils/metrics_test.py +++ b/tensor2tensor/utils/metrics_test.py @@ -1,3 +1,4 @@ +# coding=utf-8 # Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/tensor2tensor/utils/modality.py b/tensor2tensor/utils/modality.py index 3ac6153b7..a42f35c24 100644 --- a/tensor2tensor/utils/modality.py +++ b/tensor2tensor/utils/modality.py @@ -1,3 +1,4 @@ +# coding=utf-8 # Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/tensor2tensor/utils/registry.py b/tensor2tensor/utils/registry.py index 5a8823510..0baad2471 100644 --- a/tensor2tensor/utils/registry.py +++ b/tensor2tensor/utils/registry.py @@ -1,3 +1,4 @@ +# coding=utf-8 # Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/tensor2tensor/utils/registry_test.py b/tensor2tensor/utils/registry_test.py index 1f4436b0c..3231809ea 100644 --- a/tensor2tensor/utils/registry_test.py +++ b/tensor2tensor/utils/registry_test.py @@ -1,3 +1,4 @@ +# coding=utf-8 # Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/tensor2tensor/utils/t2t_model.py b/tensor2tensor/utils/t2t_model.py index 2a271afbf..9777568fc 100644 --- a/tensor2tensor/utils/t2t_model.py +++ b/tensor2tensor/utils/t2t_model.py @@ -1,3 +1,4 @@ +# coding=utf-8 # Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/tensor2tensor/utils/trainer_utils.py b/tensor2tensor/utils/trainer_utils.py index 878dbe107..96c43a5a0 100644 --- a/tensor2tensor/utils/trainer_utils.py +++ b/tensor2tensor/utils/trainer_utils.py @@ -1,3 +1,4 @@ +# coding=utf-8 # Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -121,6 +122,8 @@ "Whether to return 1 (False) or all (True) beams. The \n " "output file will have the format " "<beam1>\t<beam2>..\t<input>") +flags.DEFINE_integer("decode_max_input_size", -1, + "Maximum number of ids in input. Or <= 0 for no max.") def _save_until_eos(hyp): @@ -693,17 +696,22 @@ def log_fn(inputs, outputs): decodes.reverse() # Dumping inputs and outputs to file filename.decodes in # format result\tinput in the same order as original inputs + if FLAGS.decode_to_file: + output_filename = FLAGS.decode_to_file + else: + output_filename = filename if FLAGS.decode_shards > 1: - base_filename = filename + ("%.2d" % FLAGS.worker_id) + base_filename = output_filename + ("%.2d" % FLAGS.worker_id) else: - base_filename = filename + base_filename = output_filename decode_filename = (base_filename + "." + FLAGS.model + "." + FLAGS.hparams_set + ".beam" + str(FLAGS.decode_beam_size) + ".alpha" + str(FLAGS.decode_alpha) + ".decodes") tf.logging.info("Writing decodes into %s" % decode_filename) outfile = tf.gfile.Open(decode_filename, "w") for index in range(len(sorted_inputs)): - outfile.write("%s\n" % (decodes[sorted_keys[index]])) + outfile.write("%s\t%s\n" % (decodes[sorted_keys[index]], + sorted_inputs[sorted_keys[index]])) def decode_interactively(estimator): @@ -744,6 +752,9 @@ def _decode_batch_input_fn(problem_id, num_decode_batches, sorted_inputs, for inputs in sorted_inputs[b * FLAGS.decode_batch_size: (b + 1) * FLAGS.decode_batch_size]: input_ids = vocabulary.encode(inputs) + if FLAGS.decode_max_input_size > 0: + # Subtract 1 for the EOS_ID. + input_ids = input_ids[:FLAGS.decode_max_input_size - 1] input_ids.append(text_encoder.EOS_ID) batch_inputs.append(input_ids) if len(input_ids) > batch_length: diff --git a/tensor2tensor/utils/trainer_utils_test.py b/tensor2tensor/utils/trainer_utils_test.py index 3ed86952b..ea88183c9 100644 --- a/tensor2tensor/utils/trainer_utils_test.py +++ b/tensor2tensor/utils/trainer_utils_test.py @@ -1,3 +1,4 @@ +# coding=utf-8 # Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/tensor2tensor/utils/usr_dir.py b/tensor2tensor/utils/usr_dir.py index 0a2d0d15c..d89745b98 100644 --- a/tensor2tensor/utils/usr_dir.py +++ b/tensor2tensor/utils/usr_dir.py @@ -1,3 +1,4 @@ +# coding=utf-8 # Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/tensor2tensor/utils/yellowfin.py b/tensor2tensor/utils/yellowfin.py index 6bbe31bf6..aeb14e76e 100644 --- a/tensor2tensor/utils/yellowfin.py +++ b/tensor2tensor/utils/yellowfin.py @@ -1,3 +1,4 @@ +# coding=utf-8 # Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/tensor2tensor/utils/yellowfin_test.py b/tensor2tensor/utils/yellowfin_test.py index c4727175b..2130be2b3 100644 --- a/tensor2tensor/utils/yellowfin_test.py +++ b/tensor2tensor/utils/yellowfin_test.py @@ -1,3 +1,4 @@ +# coding=utf-8 # Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); From 8f624dbda8d78d0331b5cc7465cc1f39bf259de1 Mon Sep 17 00:00:00 2001 From: T2T Team <no-reply@google.com> Date: Fri, 21 Jul 2017 15:15:04 -0700 Subject: [PATCH 0161/4095] Don't repeatedly concatenate strings in a loop. PiperOrigin-RevId: 162791277 --- tensor2tensor/data_generators/tokenizer.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tensor2tensor/data_generators/tokenizer.py b/tensor2tensor/data_generators/tokenizer.py index d1faaa7b3..0f4141199 100644 --- a/tensor2tensor/data_generators/tokenizer.py +++ b/tensor2tensor/data_generators/tokenizer.py @@ -101,13 +101,13 @@ def decode(tokens): Returns: a unicode string """ - ret = u"" token_is_alnum = [t[0] in _ALPHANUMERIC_CHAR_SET for t in tokens] + ret = [] for i, token in enumerate(tokens): if i > 0 and token_is_alnum[i - 1] and token_is_alnum[i]: - ret += u" " - ret += token - return ret + ret.append(u" ") + ret.append(token) + return "".join(ret) def corpus_token_counts(text_filepattern, corpus_max_lines, From e43ce968f9ce9f06dc5bb83cc0bb57af848fe3ac Mon Sep 17 00:00:00 2001 From: T2T Team <no-reply@google.com> Date: Mon, 24 Jul 2017 09:27:21 -0700 Subject: [PATCH 0162/4095] Set `allow_defun` to False, allowing export to tf.SavedModel PiperOrigin-RevId: 162946551 --- tensor2tensor/models/common_layers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensor2tensor/models/common_layers.py b/tensor2tensor/models/common_layers.py index 11b6396a8..37e791bc3 100644 --- a/tensor2tensor/models/common_layers.py +++ b/tensor2tensor/models/common_layers.py @@ -31,7 +31,7 @@ from tensorflow.python.framework import function # This is a global setting. When turned off, no @function.Defun is used. -allow_defun = True +allow_defun = False def saturating_sigmoid(x): From c422b989ba9963b2900b53aa5d8de8d5505ddc01 Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Mon, 24 Jul 2017 09:27:44 -0700 Subject: [PATCH 0163/4095] Add task_id to Problem for possibly distributed data gen PiperOrigin-RevId: 162946584 --- tensor2tensor/bin/t2t-datagen | 5 ++++- tensor2tensor/data_generators/problem.py | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/tensor2tensor/bin/t2t-datagen b/tensor2tensor/bin/t2t-datagen index 57e2b17fb..ecb5175e6 100644 --- a/tensor2tensor/bin/t2t-datagen +++ b/tensor2tensor/bin/t2t-datagen @@ -67,6 +67,7 @@ flags.DEFINE_integer("num_shards", 10, "How many shards to use.") flags.DEFINE_integer("max_cases", 0, "Maximum number of cases to generate (unbounded if 0).") flags.DEFINE_integer("random_seed", 429459, "Random seed to use.") +flags.DEFINE_integer("task_id", -1, "For distributed data generation.") flags.DEFINE_string("t2t_usr_dir", "", "Path to a Python module that will be imported. The " "__init__.py file should include the necessary imports. " @@ -277,9 +278,11 @@ def generate_data_for_problem(problem): def generate_data_for_registered_problem(problem_name): problem = registry.problem(problem_name) + task_id = None if FLAGS.task_id < 0 else FLAGS.task_id problem.generate_data(os.path.expanduser(FLAGS.data_dir), os.path.expanduser(FLAGS.tmp_dir), - FLAGS.num_shards) + FLAGS.num_shards, + task_id=task_id) if __name__ == "__main__": diff --git a/tensor2tensor/data_generators/problem.py b/tensor2tensor/data_generators/problem.py index 690f14277..99f8e97de 100644 --- a/tensor2tensor/data_generators/problem.py +++ b/tensor2tensor/data_generators/problem.py @@ -115,7 +115,7 @@ class Problem(object): # BEGIN SUBCLASS INTERFACE # ============================================================================ - def generate_data(self, data_dir, tmp_dir, num_shards=None): + def generate_data(self, data_dir, tmp_dir, num_shards=None, task_id=-1): raise NotImplementedError() def hparams(self, defaults, model_hparams): From 7a3c35dabaedbca620e5b7915903076ae93e03a7 Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Mon, 24 Jul 2017 18:03:03 -0700 Subject: [PATCH 0164/4095] GeneExpression Problem, RealModality, and Problem.preprocessing PiperOrigin-RevId: 163016460 --- tensor2tensor/bin/t2t-datagen | 2 +- tensor2tensor/data_generators/algorithmic.py | 2 +- tensor2tensor/data_generators/genetics.py | 171 +++++++++++++++--- .../data_generators/genetics_test.py | 19 +- tensor2tensor/data_generators/image.py | 32 +++- tensor2tensor/data_generators/problem.py | 27 +++ tensor2tensor/data_generators/wmt.py | 2 +- tensor2tensor/models/common_hparams.py | 2 + tensor2tensor/models/modalities.py | 38 +++- tensor2tensor/models/transformer.py | 4 +- tensor2tensor/utils/data_reader.py | 76 ++++++-- tensor2tensor/utils/modality.py | 2 +- tensor2tensor/utils/trainer_utils.py | 72 +++++--- 13 files changed, 363 insertions(+), 86 deletions(-) diff --git a/tensor2tensor/bin/t2t-datagen b/tensor2tensor/bin/t2t-datagen index ecb5175e6..783906d95 100644 --- a/tensor2tensor/bin/t2t-datagen +++ b/tensor2tensor/bin/t2t-datagen @@ -281,7 +281,7 @@ def generate_data_for_registered_problem(problem_name): task_id = None if FLAGS.task_id < 0 else FLAGS.task_id problem.generate_data(os.path.expanduser(FLAGS.data_dir), os.path.expanduser(FLAGS.tmp_dir), - FLAGS.num_shards, + num_shards=FLAGS.num_shards, task_id=task_id) diff --git a/tensor2tensor/data_generators/algorithmic.py b/tensor2tensor/data_generators/algorithmic.py index 676b4e45f..017bc8470 100644 --- a/tensor2tensor/data_generators/algorithmic.py +++ b/tensor2tensor/data_generators/algorithmic.py @@ -66,7 +66,7 @@ def dev_size(self): def num_shards(self): return 10 - def generate_data(self, data_dir, _, num_shards=None): + def generate_data(self, data_dir, _, num_shards=None, task_id=-1): if num_shards is None: num_shards = self.num_shards diff --git a/tensor2tensor/data_generators/genetics.py b/tensor2tensor/data_generators/genetics.py index b4ad36544..848c2341b 100644 --- a/tensor2tensor/data_generators/genetics.py +++ b/tensor2tensor/data_generators/genetics.py @@ -35,6 +35,7 @@ from __future__ import division from __future__ import print_function +import itertools import multiprocessing as mp import os @@ -50,19 +51,13 @@ from tensor2tensor.data_generators import text_encoder from tensor2tensor.utils import registry -_bases = list("ACTG") -BASE_TO_ID = dict(zip(_bases, range(len(_bases)))) -ID_TO_BASE = dict(zip(range(len(_bases)), _bases)) -UNK_ID = len(_bases) - +import tensorflow as tf -# TODO(rsepassi): -# * DataEncoder for genetic bases -# * GeneticModality and problem hparams -# * Training preprocessing +_bases = list("ACTG") -class GeneticsProblem(problem.Problem): +class GeneExpressionProblem(problem.Problem): + """Base Problem for gene expression datasets.""" @property def download_url(self): @@ -72,13 +67,35 @@ def download_url(self): def h5_file(self): raise NotImplementedError() - def generate_data(self, data_dir, tmp_dir, num_shards=None): + @property + def num_output_predictions(self): + """Number of float predictions per timestep.""" + return 10 + + @property + def chunk_size(self): + return 4 + + def feature_encoders(self, data_dir): + del data_dir + return { + "inputs": GeneticBaseEncoder(chunk_size=self.chunk_size), + # TODO(rsepassi): RealEncoder? + "targets": text_encoder.TextEncoder() + } + + def generate_data(self, data_dir, tmp_dir, num_shards=None, task_id=-1): if num_shards is None: num_shards = 100 - # Download source data - h5_filepath = generator_utils.maybe_download(tmp_dir, self.h5_file, - self.download_url) + try: + # Download source data if download_url specified + h5_filepath = generator_utils.maybe_download(tmp_dir, self.h5_file, + self.download_url) + except NotImplementedError: + # Otherwise, look for it locally + h5_filepath = os.path.join(tmp_dir, self.h5_file) + with h5py.File(h5_filepath, "r") as h5_file: num_train_examples = h5_file["train_in"].len() num_dev_examples = h5_file["valid_in"].len() @@ -100,7 +117,8 @@ def generate_data(self, data_dir, tmp_dir, num_shards=None): outfiles, num_examples): p = mp.Process( target=generate_dataset, - args=(h5_filepath, key_prefix, [outfile], start_idx, end_idx)) + args=(h5_filepath, key_prefix, [outfile], self.chunk_size, + start_idx, end_idx)) processes.append(p) # Start and wait for processes @@ -113,9 +131,36 @@ def generate_data(self, data_dir, tmp_dir, num_shards=None): # Shuffle generator_utils.shuffle_dataset(all_filepaths) + def hparams(self, defaults, model_hparams): + p = defaults + vocab_size = self._encoders["inputs"].vocab_size + p.input_modality = {"inputs": (registry.Modalities.SYMBOL, vocab_size)} + p.target_modality = ("%s:real" % registry.Modalities.GENERIC, + self.num_output_predictions) + p.input_space_id = problem.SpaceID.DNA + p.target_space_id = problem.SpaceID.REAL + + def example_reading_spec(self): + # TODO(rsepassi): propagate and apply targets_mask to output RealModality. + data_fields = { + "inputs": tf.VarLenFeature(tf.int64), + "targets_mask": tf.VarLenFeature(tf.float32), + "targets": tf.VarLenFeature(tf.float32), + } + data_items_to_decoders = None + return (data_fields, data_items_to_decoders) + + def preprocess_examples(self, examples, mode): + del mode + + examples["targets"] = tf.reshape(examples["targets"], + [-1, 1, self.num_output_predictions]) + + return examples + @registry.register_problem("genetics_cage10") -class GeneticsCAGE10(GeneticsProblem): +class GeneticsCAGE10(GeneExpressionProblem): @property def download_url(self): @@ -127,7 +172,7 @@ def h5_file(self): @registry.register_problem("genetics_gm12878") -class GeneticsGM12878(GeneticsProblem): +class GeneticsGM12878(GeneExpressionProblem): @property def download_url(self): @@ -138,6 +183,14 @@ def h5_file(self): return "gm12878.h5" +@registry.register_problem("genetics_l262k") +class GeneticsL262k(GeneExpressionProblem): + + @property + def h5_file(self): + return "l262k_w128.h5" + + def generate_shard_args(outfiles, num_examples): """Generate start and end indices per outfile.""" num_shards = len(outfiles) @@ -152,16 +205,22 @@ def generate_shard_args(outfiles, num_examples): def generate_dataset(h5_filepath, key_prefix, out_filepaths, + chunk_size=1, start_idx=None, end_idx=None): print("PID: %d, Key: %s, (Start, End): (%s, %s)" % (os.getpid(), key_prefix, start_idx, end_idx)) generator_utils.generate_files( - dataset_generator(h5_filepath, key_prefix, start_idx, end_idx), - out_filepaths) + dataset_generator(h5_filepath, key_prefix, chunk_size, start_idx, + end_idx), out_filepaths) -def dataset_generator(filepath, dataset, start_idx=None, end_idx=None): +def dataset_generator(filepath, + dataset, + chunk_size=1, + start_idx=None, + end_idx=None): + encoder = GeneticBaseEncoder(chunk_size=chunk_size) with h5py.File(filepath, "r") as h5_file: # Get input keys from h5_file src_keys = [s % dataset for s in ["%s_in", "%s_na", "%s_out"]] @@ -178,12 +237,13 @@ def dataset_generator(filepath, dataset, start_idx=None, end_idx=None): if i % 100 == 0: print("Generating example %d for %s" % (i, dataset)) inputs, mask, outputs = inp_data[i], mask_data[i], out_data[i] - yield to_example_dict(inputs, mask, outputs) + yield to_example_dict(encoder, inputs, mask, outputs) -def to_example_dict(inputs, mask, outputs): +def to_example_dict(encoder, inputs, mask, outputs): """Convert single h5 record to an example dict.""" # Inputs + bases = [] input_ids = [] last_idx = -1 for row in np.argwhere(inputs): @@ -192,11 +252,13 @@ def to_example_dict(inputs, mask, outputs): assert idx > last_idx # if not, means 2 True values in 1 row # Some rows are all False. Those rows are mapped to UNK_ID. while idx != last_idx + 1: - input_ids.append(UNK_ID + text_encoder.NUM_RESERVED_TOKENS) + bases.append(encoder.UNK) last_idx += 1 - input_ids.append(base_id + text_encoder.NUM_RESERVED_TOKENS) + bases.append(_bases[base_id]) last_idx = idx - assert len(inputs) == len(input_ids) + assert len(inputs) == len(bases) + + input_ids = encoder.encode(bases) input_ids.append(text_encoder.EOS_ID) # Targets: mask and output @@ -211,3 +273,62 @@ def to_example_dict(inputs, mask, outputs): ex_dict = dict( zip(example_keys, [input_ids, targets_mask, targets, targets_shape])) return ex_dict + + +class GeneticBaseEncoder(text_encoder.TextEncoder): + """ACTG strings to ints and back. Optionally chunks bases into single ids. + + Uses 'X' as an unknown base. + """ + UNK = "X" + PAD = "0" + + def __init__(self, + chunk_size=1, + num_reserved_ids=text_encoder.NUM_RESERVED_TOKENS): + super(GeneticBaseEncoder, self).__init__(num_reserved_ids=num_reserved_ids) + # Build a vocabulary of chunks of size chunk_size + self._chunk_size = chunk_size + chunks = [] + for size in range(1, chunk_size + 1): + c = itertools.product(_bases + [GeneticBaseEncoder.UNK], repeat=size) + num_pad = chunk_size - size + padding = (GeneticBaseEncoder.PAD,) * num_pad + c = [el + padding for el in c] + chunks.extend(c) + chunks.sort() + ids = range(self._num_reserved_ids, len(chunks) + self._num_reserved_ids) + self._ids_to_chunk = dict(zip(ids, chunks)) + self._chunks_to_ids = dict(zip(chunks, ids)) + + @property + def vocab_size(self): + return len(self._ids_to_chunk) + self._num_reserved_ids + + def encode(self, s): + bases = list(s) + pad = [GeneticBaseEncoder.PAD] * (len(bases) % self._chunk_size) + bases.extend(pad) + assert (len(bases) % self._chunk_size) == 0 + num_chunks = len(bases) // self._chunk_size + ids = [] + for chunk_idx in xrange(num_chunks): + start_idx = chunk_idx * self._chunk_size + end_idx = start_idx + self._chunk_size + chunk = tuple(bases[start_idx:end_idx]) + if chunk not in self._chunks_to_ids: + raise ValueError("Unrecognized chunk %s" % chunk) + ids.append(self._chunks_to_ids[chunk]) + return ids + + def decode(self, ids): + bases = [] + for idx in ids: + if idx >= self._num_reserved_ids: + chunk = self._ids_to_chunk[idx] + if GeneticBaseEncoder.PAD in chunk: + chunk = chunk[:chunk.index(GeneticBaseEncoder.PAD)] + else: + chunk = [text_encoder.RESERVED_TOKENS[idx]] + bases.extend(chunk) + return "".join(bases) diff --git a/tensor2tensor/data_generators/genetics_test.py b/tensor2tensor/data_generators/genetics_test.py index 85d70f934..5eac1b249 100644 --- a/tensor2tensor/data_generators/genetics_test.py +++ b/tensor2tensor/data_generators/genetics_test.py @@ -30,21 +30,28 @@ class GeneticsTest(tf.test.TestCase): def _oneHotBases(self, bases): + ref = ["A", "C", "T", "G"] one_hots = [] - for base_id in bases: + for base in bases: one_hot = [False] * 4 - if base_id < 4: - one_hot[base_id] = True + if base in ref: + one_hot[ref.index(base)] = True one_hots.append(one_hot) return np.array(one_hots) def testRecordToExample(self): - inputs = self._oneHotBases([0, 1, 3, 4, 1, 0]) + encoder = genetics.GeneticBaseEncoder(chunk_size=2) + raw_inputs = ["A", "C", "G", "X", "C", "T"] + + # Put in numpy arrays in the same format as in the h5 file + inputs = self._oneHotBases(raw_inputs) mask = np.array([True, False, True]) outputs = np.array([[1.0, 2.0, 3.0], [5.0, 1.0, 0.2], [5.1, 2.3, 2.3]]) - ex_dict = genetics.to_example_dict(inputs, mask, outputs) + # Convert to example dict + ex_dict = genetics.to_example_dict(encoder, inputs, mask, outputs) - self.assertAllEqual([2, 3, 5, 6, 3, 2, 1], ex_dict["inputs"]) + self.assertEqual(len(raw_inputs) // 2 + 1, len(ex_dict["inputs"])) + self.assertAllEqual(encoder.encode(raw_inputs) + [1], ex_dict["inputs"]) self.assertAllEqual([1.0, 0.0, 1.0], ex_dict["targets_mask"]) self.assertAllEqual([1.0, 2.0, 3.0, 5.0, 1.0, 0.2, 5.1, 2.3, 2.3], ex_dict["targets"]) diff --git a/tensor2tensor/data_generators/image.py b/tensor2tensor/data_generators/image.py index f8e3191a2..acb1128ed 100644 --- a/tensor2tensor/data_generators/image.py +++ b/tensor2tensor/data_generators/image.py @@ -307,14 +307,38 @@ def mscoco_generator(data_dir, "image/width": [width] } + +class ImageProblem(problem.Problem): + + def example_reading_spec(self, label_key=None): + if label_key is None: + label_key = "image/class/label" + + data_fields = { + "image/encoded": tf.FixedLenFeature((), tf.string), + "image/format": tf.FixedLenFeature((), tf.string), + label_key: tf.VarLenFeature(tf.int64) + } + data_items_to_decoders = { + "inputs": + tf.contrib.slim.tfexample_decoder.Image( + image_key="image/encoded", + format_key="image/format", + channels=3), + "targets": + tf.contrib.slim.tfexample_decoder.Tensor(label_key), + } + + return data_fields, data_items_to_decoders + # French street names dataset. @registry.register_problem -class ImageFSNS(problem.Problem): +class ImageFSNS(ImageProblem): """Problem spec for French Street Name recognition.""" - def generate_data(self, data_dir, tmp_dir): + def generate_data(self, data_dir, tmp_dir, num_shards=None, task_id=-1): list_url = ("https://raw.githubusercontent.com/tensorflow/models/master/" "street/python/fsns_urls.txt") fsns_urls = generator_utils.maybe_download( @@ -351,6 +375,10 @@ def hparams(self, defaults, model_hparams): p.input_space_id = problem.SpaceID.DIGIT_0 p.target_space_id = problem.SpaceID.DIGIT_1 + def example_reading_spec(self): + label_key = "image/unpadded_label" + return super(ImageFSNS, self).example_reading_spec(self, + label_key=label_key) # Filename for CELEBA data. _CELEBA_NAME = "img_align_celeba" diff --git a/tensor2tensor/data_generators/problem.py b/tensor2tensor/data_generators/problem.py index 99f8e97de..02e198c03 100644 --- a/tensor2tensor/data_generators/problem.py +++ b/tensor2tensor/data_generators/problem.py @@ -70,6 +70,10 @@ class SpaceID(object): ICE_PARSE_TOK = 19 # Macedonian tokens MK_TOK = 20 + # Genetic bases (ACTG) + DNA = 21 + # Real numbers + REAL = 22 class Problem(object): @@ -131,6 +135,18 @@ def feature_encoders(self, data_dir): "targets": text_encoder.TextEncoder() } + def example_reading_spec(self): + data_fields = { + "inputs": tf.VarLenFeature(tf.int64), + "targets": tf.VarLenFeature(tf.int64) + } + data_items_to_decoders = None + return (data_fields, data_items_to_decoders) + + def preprocess_examples(self, examples, mode): + del mode + return examples + # ============================================================================ # END SUBCLASS INTERFACE # ============================================================================ @@ -193,6 +209,17 @@ def internal_hparams(self, model_hparams): _copy_problem_hparams(hp) return hp + def maybe_reverse_features(self, feature_map): + if not self._was_reversed: + return + inputs, targets = feature_map["inputs"], feature_map["targets"] + feature_map["inputs"], feature_map["targets"] = targets, inputs + + def maybe_copy_features(self, feature_map): + if not self._was_copy: + return + feature_map["targets"] = feature_map["inputs"] + def _copy_problem_hparams(p_hparams): """Use input modality, vocab, and space id for target.""" diff --git a/tensor2tensor/data_generators/wmt.py b/tensor2tensor/data_generators/wmt.py index bb31d0c0f..3fc74473a 100644 --- a/tensor2tensor/data_generators/wmt.py +++ b/tensor2tensor/data_generators/wmt.py @@ -92,7 +92,7 @@ def target_space_id(self): def num_shards(self): return 100 - def generate_data(self, data_dir, tmp_dir, num_shards=None): + def generate_data(self, data_dir, tmp_dir, num_shards=None, task_id=-1): if num_shards is None: num_shards = self.num_shards if self.is_character_level: diff --git a/tensor2tensor/models/common_hparams.py b/tensor2tensor/models/common_hparams.py index a86974d1f..e36b2e4e1 100644 --- a/tensor2tensor/models/common_hparams.py +++ b/tensor2tensor/models/common_hparams.py @@ -50,6 +50,8 @@ def basic_params1(): # when not in training mode. dropout=0.2, clip_grad_norm=2.0, + grad_noise_scale=0.0, + summarize_grads=int(False), initializer="orthogonal", initializer_gain=1.5, label_smoothing=0.1, diff --git a/tensor2tensor/models/modalities.py b/tensor2tensor/models/modalities.py index 9a6115558..50a3da55d 100644 --- a/tensor2tensor/models/modalities.py +++ b/tensor2tensor/models/modalities.py @@ -181,12 +181,11 @@ def top(self, body_output, _): shape = tf.shape(body_output_split[i])[:-1] body_output = tf.reshape(body_output_split[i], [-1, self._body_input_depth]) - channel_logits = tf.matmul(body_output, - output_rgb_embedding_var[i], - transpose_b=True) - rgb_channel_logits.append(tf.reshape( - channel_logits, tf.concat([shape, [self.top_dimensionality]], - 0))) + channel_logits = tf.matmul( + body_output, output_rgb_embedding_var[i], transpose_b=True) + rgb_channel_logits.append( + tf.reshape(channel_logits, + tf.concat([shape, [self.top_dimensionality]], 0))) logits = tf.concat(rgb_channel_logits, axis=3) # Reshape logits to conform to CIFAR image shapes (32 by 32 by 3) @@ -468,6 +467,33 @@ def top(self, body_output, _): return body_output +@registry.register_generic_modality("real") +class RealModality(modality.Modality): + """Modality for real (i.e. float) vectors.""" + + def bottom(self, x): + with tf.variable_scope("real"): + return tf.layers.dense(x, self._body_input_depth) + + def top(self, body_output, _): + with tf.variable_scope("real"): + return tf.layers.dense(body_output, self._vocab_size) + + def top_sharded(self, + sharded_body_output, + sharded_targets, + data_parallelism, + weights_fn=common_layers.weights_nonzero): + sharded_predictions = data_parallelism(self.top, sharded_body_output, + sharded_targets) + + def l2_loss(predictions, targets): + return tf.reduce_mean(tf.pow(predictions - targets, 2)) + + loss = data_parallelism(l2_loss, sharded_predictions, sharded_targets) + return sharded_predictions, tf.add_n(loss) + + @registry.register_image_modality("identity_no_pad") class IdentityModalityNoPad(modality.Modality): """Does nothing except making sure that there is no padding in cross-ent.""" diff --git a/tensor2tensor/models/transformer.py b/tensor2tensor/models/transformer.py index 23197fcd9..c45e88577 100644 --- a/tensor2tensor/models/transformer.py +++ b/tensor2tensor/models/transformer.py @@ -46,8 +46,8 @@ def model_fn_body(self, features): # Remove dropout if not training hparams = copy.copy(self._hparams) targets = features["targets"] - inputs = features.get("inputs") - target_space = features.get("target_space_id") + inputs = features["inputs"] + target_space = features["target_space_id"] inputs = common_layers.flatten4d3d(inputs) targets = common_layers.flatten4d3d(targets) diff --git a/tensor2tensor/utils/data_reader.py b/tensor2tensor/utils/data_reader.py index d7af960ab..24dd31485 100644 --- a/tensor2tensor/utils/data_reader.py +++ b/tensor2tensor/utils/data_reader.py @@ -138,10 +138,12 @@ def preprocessing(examples, data_file_pattern, mode): # Small single-example pre-processing for images. def resize(img, size): return tf.to_int64(tf.image.resize_images(img, [size, size])) + def preprocess(img): img = tf.image.resize_images(img, [360, 360]) img = common_layers.image_augmentation(tf.to_float(img) / 255.) return tf.to_int64(img * 255.) + if ("image_imagenet" in data_file_pattern or "image_mscoco" in data_file_pattern): examples["inputs"] = tf.cast(examples["inputs"], tf.int64) @@ -154,8 +156,8 @@ def preprocess(img): lambda img=inputs: resize(img, 299)) else: examples["inputs"] = tf.to_int64(resize(inputs, 299)) - elif ("image_cifar10" in data_file_pattern - and mode == tf.contrib.learn.ModeKeys.TRAIN): + elif ("image_cifar10" in data_file_pattern and + mode == tf.contrib.learn.ModeKeys.TRAIN): examples["inputs"] = common_layers.cifar_image_augmentation( examples["inputs"]) elif "img2img" in data_file_pattern: @@ -182,8 +184,62 @@ def preprocess(img): return examples -def input_pipeline(data_file_pattern, capacity, mode): +def problem_input_pipeline(problem, data_file_pattern, capacity, mode): + """Input pipeline for Problems.""" + data_fields, data_items_to_decoders = problem.example_reading_spec() + + # Create placeholders for input, rather than reading data from disk. + if data_file_pattern is None: + return feature_placeholders(data_fields) + + # Now the non-trivial case construction. + examples = examples_queue( + [data_file_pattern], + data_fields, + training=(mode == tf.contrib.learn.ModeKeys.TRAIN), + capacity=capacity, + data_items_to_decoders=data_items_to_decoders) + + examples = problem.preprocess_examples(examples, mode) + + # We do not want int64s as they are not supported on GPUs. + examples = cast_int64_to_int32(examples) + + return examples + + +def cast_int64_to_int32(features): + f = {} + for k, v in six.iteritems(features): + if v.dtype == tf.int64: + v = tf.to_int32(v) + f[k] = v + return f + + +def feature_placeholders(data_fields): + feature_map = {} + for (field, tp) in data_fields: + if not field.startswith("targets"): + feature_map[field] = tf.placeholder( + dtype=tp, shape=[None] * 4, name=field) + return feature_map + + +def input_pipeline(problem, data_file_pattern, capacity, mode): """Input pipeline, returns a dictionary of tensors from queues.""" + + if problem is not None: + # problem is not None when the problem is specified with the Problem API, + # which handles Example decoding and preprocessing. + # Otherwise the problem is specified in problem_hparams and is dealt with + # below. + # As problems are ported to the Problem API, the special handling here will + # need to be moved to Problem.example_reading_spec and + # Problem.preprocessing. + return problem_input_pipeline(problem, data_file_pattern, capacity, mode) + + data_items_to_decoders = None # Read from image TFRecords if the file has "image" in its name. if data_file_pattern and "image" in data_file_pattern: label_key = "image/class/label" @@ -211,22 +267,15 @@ def input_pipeline(data_file_pattern, capacity, mode): "audio/sample_width": tf.FixedLenFeature((), tf.int64), "targets": tf.VarLenFeature(tf.int64), } - data_items_to_decoders = None else: data_fields = { "inputs": tf.VarLenFeature(tf.int64), "targets": tf.VarLenFeature(tf.int64) } - data_items_to_decoders = None # Create placeholders for input, rather than reading data from disk. if data_file_pattern is None: - feature_map = {} - for (field, tp) in data_fields: - if field != "targets": - feature_map[field] = tf.placeholder( - dtype=tp, shape=[None] * 4, name=field) - return feature_map + return feature_placeholders(data_fields) # Now the non-trivial case construction. examples = examples_queue( @@ -238,8 +287,9 @@ def input_pipeline(data_file_pattern, capacity, mode): examples = preprocessing(examples, data_file_pattern, mode) - # We do not want int64s as they do are not supported on GPUs. - return {k: tf.to_int32(v) for (k, v) in six.iteritems(examples)} + # We do not want int64s as they are not supported on GPUs. + examples = cast_int64_to_int32(examples) + return examples def batch_examples(examples, batching_scheme): diff --git a/tensor2tensor/utils/modality.py b/tensor2tensor/utils/modality.py index a42f35c24..72169be1f 100644 --- a/tensor2tensor/utils/modality.py +++ b/tensor2tensor/utils/modality.py @@ -43,7 +43,7 @@ class Modality(object): function targets_bottom represents the auto-regressive part of the network. It is applied to the already-generated part of an image, which is given to the decoder to generate the next part. In some cases, e.g., for text, it is - the same as the inputs_bottom function, as that is the default we use. But, + the same as the inputs_bottom function, and that is the default we use. But, e.g., for images, a different function might be needed to regress properly. All 3 functions have simple and sharded versions. A sub-class only needs diff --git a/tensor2tensor/utils/trainer_utils.py b/tensor2tensor/utils/trainer_utils.py index 96c43a5a0..c4bdcf942 100644 --- a/tensor2tensor/utils/trainer_utils.py +++ b/tensor2tensor/utils/trainer_utils.py @@ -229,13 +229,16 @@ def create_hparams(params_id, data_dir): # Add hparams for the problems hparams.problems = [] + hparams.problem_instances = [] for problem_name in FLAGS.problems.split("-"): try: problem = registry.problem(problem_name) p_hparams = problem.internal_hparams(hparams) except ValueError: + problem = None p_hparams = problem_hparams.problem_hparams(problem_name, hparams) + hparams.problem_instances.append(problem) hparams.problems.append(p_hparams) return hparams @@ -304,9 +307,10 @@ def session_config(): gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=FLAGS.worker_gpu_memory_fraction) - config = tf.ConfigProto(allow_soft_placement=True, - graph_options=graph_options, - gpu_options=gpu_options) + config = tf.ConfigProto( + allow_soft_placement=True, + graph_options=graph_options, + gpu_options=gpu_options) return config @@ -422,8 +426,12 @@ def model_fn(features, targets, mode): def nth_model(n): """Build the model for the n-th problem, plus some added variables.""" model_class = registry.model(model)( - hparams, mode, hparams.problems[n], - n, dp, _ps_devices(all_workers=True)) + hparams, + mode, + hparams.problems[n], + n, + dp, + _ps_devices(all_workers=True)) if mode == tf.contrib.learn.ModeKeys.INFER: return model_class.infer( features, @@ -485,8 +493,8 @@ def nth_model(n): if mode == tf.contrib.learn.ModeKeys.EVAL: logits = tf.concat(sharded_logits, 0) if FLAGS.eval_print: - logits = tf.Print(logits, [features["inputs"], logits], - "EVAL PRINT", summarize=10000) + logits = tf.Print( + logits, [features["inputs"], logits], "EVAL PRINT", summarize=10000) # For evaluation, return the logits layer as our predictions. run_info["predictions"] = logits train_op = None @@ -544,19 +552,24 @@ def nth_model(n): # Define the train_op for the TRAIN mode. opt = _ConditionalOptimizer(hparams.optimizer, learning_rate, hparams) tf.logging.info("Computing gradients for global model_fn.") + opt_summaries = ["learning_rate", "loss", "global_gradient_norm"] + if hparams.summarize_grads: + opt_summaries.extend(["gradients", "gradient_norm"]) train_op = tf.contrib.layers.optimize_loss( name="training", loss=total_loss, global_step=tf.contrib.framework.get_global_step(), learning_rate=learning_rate, clip_gradients=hparams.clip_grad_norm or None, + gradient_noise_scale=hparams.grad_noise_scale or None, optimizer=opt, + summaries=opt_summaries, colocate_gradients_with_ops=True) # Remove summaries that will fail to run because they are in conditionals. # TODO(cwhipkey): Test with this code removed, later in 2017. summaries = tf.get_collection_ref(tf.GraphKeys.SUMMARIES) - for i in range(len(summaries)-1, -1, -1): + for i in range(len(summaries) - 1, -1, -1): if summaries[i].name.startswith("cond_"): del summaries[i] @@ -602,8 +615,7 @@ def decode_from_dataset(estimator): data_file_patterns=infer_problems_data, num_datashards=data_parallelism().n, fixed_problem=i) - result_iter = estimator.predict( - input_fn=infer_input_fn, as_iterable=False) + result_iter = estimator.predict(input_fn=infer_input_fn, as_iterable=False) def log_fn(inputs, targets, @@ -735,8 +747,8 @@ def decode_interactively(estimator): else: tf.logging.info(beam_string) else: - tf.logging.info(targets_vocab.decode(_save_until_eos( - result["outputs"].flatten()))) + tf.logging.info( + targets_vocab.decode(_save_until_eos(result["outputs"].flatten()))) def _decode_batch_input_fn(problem_id, num_decode_batches, sorted_inputs, @@ -749,8 +761,8 @@ def _decode_batch_input_fn(problem_id, num_decode_batches, sorted_inputs, tf.logging.info("Decoding batch %d" % b) batch_length = 0 batch_inputs = [] - for inputs in sorted_inputs[b * FLAGS.decode_batch_size: - (b + 1) * FLAGS.decode_batch_size]: + for inputs in sorted_inputs[b * FLAGS.decode_batch_size:( + b + 1) * FLAGS.decode_batch_size]: input_ids = vocabulary.encode(inputs) if FLAGS.decode_max_input_size > 0: # Subtract 1 for the EOS_ID. @@ -1048,12 +1060,13 @@ def input_fn(): for n in xrange(problem_count): if fixed_problem is not None and n != fixed_problem: continue + problem_instance = hparams.problem_instances[n] with tf.name_scope("problem_%d" % n): with tf.device("/cpu:0"): # Input queues are on CPU. capacity = hparams.problems[n].max_expected_batch_size_per_shard capacity *= num_datashards - examples = data_reader.input_pipeline(data_file_patterns[n], - capacity, mode) + examples = data_reader.input_pipeline( + problem_instance, data_file_patterns[n], capacity, mode) if mode == tf.contrib.learn.ModeKeys.TRAIN: drop_long_sequences = True else: @@ -1068,15 +1081,18 @@ def input_fn(): length_multiplier=batch_size_multiplier)) # Reverse inputs and targets features if the problem was reversed. - if hparams.problems[n].was_reversed: - inputs = feature_map["inputs"] - targets = feature_map["targets"] - feature_map["inputs"] = targets - feature_map["targets"] = inputs - - # Use the inputs as the targets if the problem is a copy problem. - if hparams.problems[n].was_copy: - feature_map["targets"] = feature_map["inputs"] + if problem_instance is not None: + problem_instance.maybe_reverse_features(feature_map) + problem_instance.maybe_copy_features(feature_map) + else: + if hparams.problems[n].was_reversed: + inputs = feature_map["inputs"] + targets = feature_map["targets"] + feature_map["inputs"] = targets + feature_map["targets"] = inputs + # Use the inputs as the targets if the problem is a copy problem. + if hparams.problems[n].was_copy: + feature_map["targets"] = feature_map["inputs"] # Ensure inputs and targets are proper rank. while len(feature_map["inputs"].get_shape()) != 4: @@ -1117,9 +1133,9 @@ def input_fn(): assert FLAGS.worker_replicas % problem_count == 0 problem_choice = tf.to_int32(FLAGS.worker_id % problem_count) else: - raise ValueError("Value of hparams.problem_choice is %s and must be " - "one of [uniform, adaptive, distributed]" % - hparams.problem_choice) + raise ValueError( + "Value of hparams.problem_choice is %s and must be " + "one of [uniform, adaptive, distributed]" % hparams.problem_choice) # Inputs and targets conditional on problem_choice. rand_inputs, rand_target, choice, inp_id, tgt_id = _cond_on_index( From 315647cdbf6efc78591f3047627ca064c75c31dc Mon Sep 17 00:00:00 2001 From: Lukasz Kaiser <lukaszkaiser@google.com> Date: Mon, 24 Jul 2017 18:51:17 -0700 Subject: [PATCH 0165/4095] Download newer WMT dev set. PiperOrigin-RevId: 163020223 --- tensor2tensor/data_generators/problem.py | 8 +- .../data_generators/problem_hparams.py | 3 + tensor2tensor/data_generators/wmt.py | 316 ++++++++++-------- tensor2tensor/models/common_attention.py | 3 + tensor2tensor/utils/get_ende_bleu.sh | 4 +- tensor2tensor/utils/trainer_utils.py | 3 +- 6 files changed, 186 insertions(+), 151 deletions(-) diff --git a/tensor2tensor/data_generators/problem.py b/tensor2tensor/data_generators/problem.py index 02e198c03..22b6214e6 100644 --- a/tensor2tensor/data_generators/problem.py +++ b/tensor2tensor/data_generators/problem.py @@ -70,10 +70,14 @@ class SpaceID(object): ICE_PARSE_TOK = 19 # Macedonian tokens MK_TOK = 20 + # Czech tokens + CS_TOK = 21 + # Czech characters + CS_CHR = 22 # Genetic bases (ACTG) - DNA = 21 + DNA = 23 # Real numbers - REAL = 22 + REAL = 24 class Problem(object): diff --git a/tensor2tensor/data_generators/problem_hparams.py b/tensor2tensor/data_generators/problem_hparams.py index 4343afd27..159ea6ac9 100644 --- a/tensor2tensor/data_generators/problem_hparams.py +++ b/tensor2tensor/data_generators/problem_hparams.py @@ -181,6 +181,9 @@ def default_problem_hparams(): # 17: Icelandic characters # 18: Icelandic tokens # 19: Icelandic parse tokens + # 20: Macedonian tokens + # 21: Czech tokens + # 22: Czech characters # Add more above if needed. input_space_id=0, target_space_id=0, diff --git a/tensor2tensor/data_generators/wmt.py b/tensor2tensor/data_generators/wmt.py index 3fc74473a..50125ccd1 100644 --- a/tensor2tensor/data_generators/wmt.py +++ b/tensor2tensor/data_generators/wmt.py @@ -43,23 +43,6 @@ EOS = text_encoder.EOS_ID -def _default_token_feature_encoders(data_dir, target_vocab_size): - vocab_filename = os.path.join(data_dir, - "vocab.endefr.%d" % target_vocab_size) - subtokenizer = text_encoder.SubwordTextEncoder(vocab_filename) - return { - "inputs": subtokenizer, - "targets": subtokenizer, - } - - -def _default_character_feature_encoders(): - return { - "inputs": text_encoder.ByteTextEncoder(), - "targets": text_encoder.ByteTextEncoder(), - } - - class WMTProblem(problem.Problem): """Base class for WMT problems.""" @@ -71,14 +54,13 @@ def is_character_level(self): def targeted_vocab_size(self): raise NotImplementedError() # Not needed if self.is_character_level. - @property - def train_generator(self): - """Generator; takes data_dir, tmp_dir, is_training, targeted_vocab_size.""" + def train_generator(self, data_dir, tmp_dir, is_training): + """Generator of the training data.""" raise NotImplementedError() - @property - def dev_generator(self): - return self.train_generator + def dev_generator(self, data_dir, tmp_dir): + """Generator of the development data.""" + return self.train_generator(data_dir, tmp_dir, False) @property def input_space_id(self): @@ -92,28 +74,35 @@ def target_space_id(self): def num_shards(self): return 100 + @property + def vocab_name(self): + return "vocab.endefr" + + @property + def vocab_file(self): + return "%s.%d" % (self.vocab_name, self.targeted_vocab_size) + def generate_data(self, data_dir, tmp_dir, num_shards=None, task_id=-1): if num_shards is None: num_shards = self.num_shards - if self.is_character_level: - generator_utils.generate_dataset_and_shuffle( - self.train_generator(tmp_dir, True), - self.training_filepaths(data_dir, num_shards, shuffled=False), - self.dev_generator(tmp_dir, False), - self.dev_filepaths(data_dir, 1, shuffled=False)) - else: - generator_utils.generate_dataset_and_shuffle( - self.train_generator(data_dir, tmp_dir, True, - self.targeted_vocab_size), - self.training_filepaths(data_dir, num_shards, shuffled=False), - self.dev_generator(data_dir, tmp_dir, False, - self.targeted_vocab_size), - self.dev_filepaths(data_dir, 1, shuffled=False)) + generator_utils.generate_dataset_and_shuffle( + self.train_generator(data_dir, tmp_dir, True), + self.training_filepaths(data_dir, num_shards, shuffled=False), + self.dev_generator(data_dir, tmp_dir), + self.dev_filepaths(data_dir, 1, shuffled=False)) def feature_encoders(self, data_dir): if self.is_character_level: - return _default_character_feature_encoders() - return _default_token_feature_encoders(data_dir, self.targeted_vocab_size) + return { + "inputs": text_encoder.ByteTextEncoder(), + "targets": text_encoder.ByteTextEncoder(), + } + vocab_filename = os.path.join(data_dir, self.vocab_file) + subtokenizer = text_encoder.SubwordTextEncoder(vocab_filename) + return { + "inputs": subtokenizer, + "targets": subtokenizer, + } def hparams(self, defaults, unused_model_hparams): p = defaults @@ -175,8 +164,8 @@ def tabbed_generator(source_path, source_vocab, target_vocab, eos=None): Args: source_path: path to the file with source and target sentences. - source_vocab: a SunwordTextEncoder to encode the source string. - target_vocab: a SunwordTextEncoder to encode the target string. + source_vocab: a SubwordTextEncoder to encode the source string. + target_vocab: a SubwordTextEncoder to encode the target string. eos: integer to append at the end of each sequence (default: None). Yields: @@ -262,7 +251,7 @@ def bi_vocabs_token_generator(source_path, _ENDE_TRAIN_DATASETS = [ [ - "http://data.statmt.org/wmt16/translation-task/training-parallel-nc-v11.tgz", # pylint: disable=line-too-long + "http://data.statmt.org/wmt17/translation-task/training-parallel-nc-v11.tgz", # pylint: disable=line-too-long ("training-parallel-nc-v11/news-commentary-v11.de-en.en", "training-parallel-nc-v11/news-commentary-v11.de-en.de") ], @@ -277,7 +266,7 @@ def bi_vocabs_token_generator(source_path, ] _ENDE_TEST_DATASETS = [ [ - "http://data.statmt.org/wmt16/translation-task/dev.tgz", + "http://data.statmt.org/wmt17/translation-task/dev.tgz", ("dev/newstest2013.en", "dev/newstest2013.de") ], ] @@ -307,7 +296,7 @@ def bi_vocabs_token_generator(source_path, ] _ENFR_TEST_DATASETS = [ [ - "http://data.statmt.org/wmt16/translation-task/dev.tgz", + "http://data.statmt.org/wmt17/translation-task/dev.tgz", ("dev/newstest2013.en", "dev/newstest2013.fr") ], ] @@ -337,6 +326,29 @@ def bi_vocabs_token_generator(source_path, ("dev.mk", "dev.en") ]] +# English-Czech datasets +_ENCS_TRAIN_DATASETS = [ + [ + "http://data.statmt.org/wmt17/translation-task/training-parallel-nc-v11.tgz", # pylint: disable=line-too-long + ("training-parallel-nc-v11/news-commentary-v11.cs-en.en", + "training-parallel-nc-v11/news-commentary-v11.cs-en.cs") + ], + [ + "http://www.statmt.org/wmt13/training-parallel-commoncrawl.tgz", + ("commoncrawl.cs-en.en", "commoncrawl.cs-en.cs") + ], + [ + "http://www.statmt.org/wmt13/training-parallel-europarl-v7.tgz", + ("training/europarl-v7.cs-en.en", "training/europarl-v7.cs-en.cs") + ], +] +_ENCS_TEST_DATASETS = [ + [ + "http://data.statmt.org/wmt17/translation-task/dev.tgz", + ("dev/newstest2013.en", "dev/newstest2013.cs") + ], +] + # Generators. @@ -408,16 +420,6 @@ def _compile_data(tmp_dir, datasets, filename): return filename -def ende_wordpiece_token_generator(data_dir, tmp_dir, train, vocab_size): - symbolizer_vocab = generator_utils.get_or_generate_vocab( - data_dir, tmp_dir, "vocab.endefr.%d" % vocab_size, vocab_size) - datasets = _ENDE_TRAIN_DATASETS if train else _ENDE_TEST_DATASETS - tag = "train" if train else "dev" - data_path = _compile_data(tmp_dir, datasets, "wmt_ende_tok_%s" % tag) - return token_generator(data_path + ".lang1", data_path + ".lang2", - symbolizer_vocab, EOS) - - @registry.register_problem("wmt_ende_tokens_8k") class WMTEnDeTokens8k(WMTProblem): """Problem spec for WMT En-De translation.""" @@ -426,9 +428,14 @@ class WMTEnDeTokens8k(WMTProblem): def targeted_vocab_size(self): return 2**13 # 8192 - @property - def train_generator(self): - return ende_wordpiece_token_generator + def train_generator(self, data_dir, tmp_dir, train): + symbolizer_vocab = generator_utils.get_or_generate_vocab( + data_dir, tmp_dir, self.vocab_file, self.targeted_vocab_size) + datasets = _ENDE_TRAIN_DATASETS if train else _ENDE_TEST_DATASETS + tag = "train" if train else "dev" + data_path = _compile_data(tmp_dir, datasets, "wmt_ende_tok_%s" % tag) + return token_generator(data_path + ".lang1", data_path + ".lang2", + symbolizer_vocab, EOS) @property def input_space_id(self): @@ -447,15 +454,6 @@ def targeted_vocab_size(self): return 2**15 # 32768 -def ende_character_generator(tmp_dir, train): - character_vocab = text_encoder.ByteTextEncoder() - datasets = _ENDE_TRAIN_DATASETS if train else _ENDE_TEST_DATASETS - tag = "train" if train else "dev" - data_path = _compile_data(tmp_dir, datasets, "wmt_ende_chr_%s" % tag) - return character_generator(data_path + ".lang1", data_path + ".lang2", - character_vocab, EOS) - - @registry.register_problem("wmt_ende_characters") class WMTEnDeCharacters(WMTProblem): """Problem spec for WMT En-De translation.""" @@ -464,9 +462,13 @@ class WMTEnDeCharacters(WMTProblem): def is_character_level(self): return True - @property - def train_generator(self): - return ende_character_generator + def train_generator(self, tmp_dir, train): + character_vocab = text_encoder.ByteTextEncoder() + datasets = _ENDE_TRAIN_DATASETS if train else _ENDE_TEST_DATASETS + tag = "train" if train else "dev" + data_path = _compile_data(tmp_dir, datasets, "wmt_ende_chr_%s" % tag) + return character_generator(data_path + ".lang1", data_path + ".lang2", + character_vocab, EOS) @property def input_space_id(self): @@ -477,29 +479,6 @@ def target_space_id(self): return problem.SpaceID.DE_CHR -def zhen_wordpiece_token_bigenerator(data_dir, tmp_dir, train, - source_vocab_size, target_vocab_size): - """Wordpiece generator for the WMT'17 zh-en dataset.""" - datasets = _ZHEN_TRAIN_DATASETS if train else _ZHEN_TEST_DATASETS - source_datasets = [[item[0], [item[1][0]]] for item in _ZHEN_TRAIN_DATASETS] - target_datasets = [[item[0], [item[1][1]]] for item in _ZHEN_TRAIN_DATASETS] - source_vocab = generator_utils.get_or_generate_vocab( - data_dir, tmp_dir, "vocab.zh.%d" % source_vocab_size, - source_vocab_size, source_datasets) - target_vocab = generator_utils.get_or_generate_vocab( - data_dir, tmp_dir, "vocab.en.%d" % target_vocab_size, - target_vocab_size, target_datasets) - tag = "train" if train else "dev" - data_path = _compile_data(tmp_dir, datasets, "wmt_zhen_tok_%s" % tag) - return bi_vocabs_token_generator(data_path + ".lang1", data_path + ".lang2", - source_vocab, target_vocab, EOS) - - -def zhen_wordpiece_token_generator(data_dir, tmp_dir, train, vocab_size): - return zhen_wordpiece_token_bigenerator(data_dir, tmp_dir, train, - vocab_size, vocab_size) - - @registry.register_problem("wmt_zhen_tokens_8k") class WMTZhEnTokens8k(WMTProblem): """Problem spec for WMT Zh-En translation.""" @@ -508,9 +487,22 @@ class WMTZhEnTokens8k(WMTProblem): def targeted_vocab_size(self): return 2**13 # 8192 - @property - def train_generator(self): - return zhen_wordpiece_token_generator + def train_generator(self, data_dir, tmp_dir, train): + source_vocab_size = self.targeted_vocab_size + target_vocab_size = self.targeted_vocab_size + datasets = _ZHEN_TRAIN_DATASETS if train else _ZHEN_TEST_DATASETS + source_datasets = [[item[0], [item[1][0]]] for item in datasets] + target_datasets = [[item[0], [item[1][1]]] for item in datasets] + source_vocab = generator_utils.get_or_generate_vocab( + data_dir, tmp_dir, "vocab.zh.%d" % source_vocab_size, source_vocab_size, + source_datasets) + target_vocab = generator_utils.get_or_generate_vocab( + data_dir, tmp_dir, "vocab.en.%d" % target_vocab_size, target_vocab_size, + target_datasets) + tag = "train" if train else "dev" + data_path = _compile_data(tmp_dir, datasets, "wmt_zhen_tok_%s" % tag) + return bi_vocabs_token_generator(data_path + ".lang1", data_path + ".lang2", + source_vocab, target_vocab, EOS) @property def input_space_id(self): @@ -542,17 +534,6 @@ def targeted_vocab_size(self): return 2**15 # 32768 -def enfr_wordpiece_token_generator(data_dir, tmp_dir, train, vocab_size): - """Instance of token generator for the WMT en->fr task.""" - symbolizer_vocab = generator_utils.get_or_generate_vocab( - data_dir, tmp_dir, "vocab.endefr.%d" % vocab_size, vocab_size) - datasets = _ENFR_TRAIN_DATASETS if train else _ENFR_TEST_DATASETS - tag = "train" if train else "dev" - data_path = _compile_data(tmp_dir, datasets, "wmt_enfr_tok_%s" % tag) - return token_generator(data_path + ".lang1", data_path + ".lang2", - symbolizer_vocab, EOS) - - @registry.register_problem("wmt_enfr_tokens_8k") class WMTEnFrTokens8k(WMTProblem): """Problem spec for WMT En-Fr translation.""" @@ -561,9 +542,14 @@ class WMTEnFrTokens8k(WMTProblem): def targeted_vocab_size(self): return 2**13 # 8192 - @property - def train_generator(self): - return enfr_wordpiece_token_generator + def train_generator(self, data_dir, tmp_dir, train): + symbolizer_vocab = generator_utils.get_or_generate_vocab( + data_dir, tmp_dir, self.vocab_file, self.targeted_vocab_size) + datasets = _ENFR_TRAIN_DATASETS if train else _ENFR_TEST_DATASETS + tag = "train" if train else "dev" + data_path = _compile_data(tmp_dir, datasets, "wmt_enfr_tok_%s" % tag) + return token_generator(data_path + ".lang1", data_path + ".lang2", + symbolizer_vocab, EOS) @property def input_space_id(self): @@ -582,16 +568,6 @@ def targeted_vocab_size(self): return 2**15 # 32768 -def enfr_character_generator(tmp_dir, train): - """Instance of character generator for the WMT en->fr task.""" - character_vocab = text_encoder.ByteTextEncoder() - datasets = _ENFR_TRAIN_DATASETS if train else _ENFR_TEST_DATASETS - tag = "train" if train else "dev" - data_path = _compile_data(tmp_dir, datasets, "wmt_enfr_chr_%s" % tag) - return character_generator(data_path + ".lang1", data_path + ".lang2", - character_vocab, EOS) - - @registry.register_problem("wmt_enfr_characters") class WMTEnFrCharacters(WMTProblem): """Problem spec for WMT En-Fr translation.""" @@ -600,9 +576,13 @@ class WMTEnFrCharacters(WMTProblem): def is_character_level(self): return True - @property - def train_generator(self): - return enfr_character_generator + def train_generator(self, data_dir, tmp_dir, train): + character_vocab = text_encoder.ByteTextEncoder() + datasets = _ENFR_TRAIN_DATASETS if train else _ENFR_TEST_DATASETS + tag = "train" if train else "dev" + data_path = _compile_data(tmp_dir, datasets, "wmt_enfr_chr_%s" % tag) + return character_generator(data_path + ".lang1", data_path + ".lang2", + character_vocab, EOS) @property def input_space_id(self): @@ -613,20 +593,6 @@ def target_space_id(self): return problem.SpaceID.FR_CHR -def mken_wordpiece_token_generator(data_dir, tmp_dir, train, vocab_size): - """Wordpiece generator for the SETimes Mk-En dataset.""" - datasets = _MKEN_TRAIN_DATASETS if train else _MKEN_TEST_DATASETS - source_datasets = [[item[0], [item[1][0]]] for item in _MKEN_TRAIN_DATASETS] - target_datasets = [[item[0], [item[1][1]]] for item in _MKEN_TRAIN_DATASETS] - symbolizer_vocab = generator_utils.get_or_generate_vocab( - data_dir, tmp_dir, "vocab.mken.%d" % vocab_size, vocab_size, - source_datasets + target_datasets) - tag = "train" if train else "dev" - data_path = _compile_data(tmp_dir, datasets, "setimes_mken_tok_%s" % tag) - return token_generator(data_path + ".lang1", data_path + ".lang2", - symbolizer_vocab, EOS) - - @registry.register_problem("setimes_mken_tokens_32k") class SETimesMkEnTokens32k(WMTProblem): """Problem spec for SETimes Mk-En translation.""" @@ -636,8 +602,20 @@ def targeted_vocab_size(self): return 2**15 # 32768 @property - def train_generator(self): - return mken_wordpiece_token_generator + def vocab_name(self): + return "vocab.mken" + + def train_generator(self, data_dir, tmp_dir, train): + datasets = _MKEN_TRAIN_DATASETS if train else _MKEN_TEST_DATASETS + source_datasets = [[item[0], [item[1][0]]] for item in datasets] + target_datasets = [[item[0], [item[1][1]]] for item in datasets] + symbolizer_vocab = generator_utils.get_or_generate_vocab( + data_dir, tmp_dir, self.vocab_file, self.targeted_vocab_size, + source_datasets + target_datasets) + tag = "train" if train else "dev" + data_path = _compile_data(tmp_dir, datasets, "setimes_mken_tok_%s" % tag) + return token_generator(data_path + ".lang1", data_path + ".lang2", + symbolizer_vocab, EOS) @property def input_space_id(self): @@ -648,12 +626,62 @@ def target_space_id(self): return problem.SpaceID.EN_TOK -def parsing_character_generator(tmp_dir, train): - character_vocab = text_encoder.ByteTextEncoder() - filename = "parsing_%s" % ("train" if train else "dev") - text_filepath = os.path.join(tmp_dir, filename + ".text") - tags_filepath = os.path.join(tmp_dir, filename + ".tags") - return character_generator(text_filepath, tags_filepath, character_vocab, EOS) +@registry.register_problem("wmt_encs_tokens_32k") +class WMTEnCsTokens32k(problem.Problem): + """Problem spec for WMT English-Czech translation.""" + + @property + def target_vocab_size(self): + return 2**15 # 32768 + + @property + def vocab_name(self): + return "vocab.encs" + + def train_generator(self, data_dir, tmp_dir, train): + datasets = _ENCS_TRAIN_DATASETS if train else _ENCS_TEST_DATASETS + source_datasets = [[item[0], [item[1][0]]] for item in datasets] + target_datasets = [[item[0], [item[1][1]]] for item in datasets] + symbolizer_vocab = generator_utils.get_or_generate_vocab( + data_dir, tmp_dir, self.vocab_file, self.targeted_vocab_size, + source_datasets + target_datasets) + tag = "train" if train else "dev" + data_path = _compile_data(tmp_dir, datasets, "wmt_encs_tok_%s" % tag) + return token_generator(data_path + ".lang1", data_path + ".lang2", + symbolizer_vocab, EOS) + + @property + def input_space_id(self): + return problem.SpaceID.EN_TOK + + @property + def target_space_id(self): + return problem.SpaceID.CS_TOK + + +@registry.register_problem("wmt_encs_characters") +class WMTEnCsCharacters(WMTProblem): + """Problem spec for WMT En-Cs character-based translation.""" + + @property + def is_character_level(self): + return True + + def train_generator(self, data_dir, tmp_dir, train): + character_vocab = text_encoder.ByteTextEncoder() + datasets = _ENCS_TRAIN_DATASETS if train else _ENCS_TEST_DATASETS + tag = "train" if train else "dev" + data_path = _compile_data(tmp_dir, datasets, "wmt_encs_chr_%s" % tag) + return character_generator(data_path + ".lang1", data_path + ".lang2", + character_vocab, EOS) + + @property + def input_space_id(self): + return problem.SpaceID.EN_CHR + + @property + def target_space_id(self): + return problem.SpaceID.CS_CHR def tabbed_parsing_token_generator(data_dir, tmp_dir, train, prefix, diff --git a/tensor2tensor/models/common_attention.py b/tensor2tensor/models/common_attention.py index 4f694a7f9..624623f4c 100644 --- a/tensor2tensor/models/common_attention.py +++ b/tensor2tensor/models/common_attention.py @@ -66,6 +66,9 @@ def add_timing_signal_1d(x, min_timescale=1.0, max_timescale=1.0e4): tf.to_float(tf.range(num_timescales)) * -log_timescale_increment) scaled_time = tf.expand_dims(position, 1) * tf.expand_dims(inv_timescales, 0) signal = tf.concat([tf.sin(scaled_time), tf.cos(scaled_time)], axis=1) + signal = tf.reshape(signal, [length, 2, num_timescales]) + signal = tf.transpose(signal, perm=[0, 2, 1]) + signal = tf.reshape(signal, [length, channels]) signal = tf.pad(signal, [[0, 0], [0, tf.mod(channels, 2)]]) signal = tf.reshape(signal, [1, length, channels]) return x + signal diff --git a/tensor2tensor/utils/get_ende_bleu.sh b/tensor2tensor/utils/get_ende_bleu.sh index 09078414f..3493af74c 100755 --- a/tensor2tensor/utils/get_ende_bleu.sh +++ b/tensor2tensor/utils/get_ende_bleu.sh @@ -5,10 +5,8 @@ tok_gold_targets=newstest2013.tok.de decodes_file=$1 -cut -d' ' -f1 $decodes_file > $decodes_file.target - # Tokenize. -perl $mosesdecoder/scripts/tokenizer/tokenizer.perl -l de < $decodes_file.target > $decodes_file.tok +perl $mosesdecoder/scripts/tokenizer/tokenizer.perl -l de < $decodes_file > $decodes_file.tok # Put compounds in ATAT format (comparable to papers like GNMT, ConvS2S). # See https://nlp.stanford.edu/projects/nmt/ : diff --git a/tensor2tensor/utils/trainer_utils.py b/tensor2tensor/utils/trainer_utils.py index c4bdcf942..0943881f3 100644 --- a/tensor2tensor/utils/trainer_utils.py +++ b/tensor2tensor/utils/trainer_utils.py @@ -722,8 +722,7 @@ def log_fn(inputs, outputs): tf.logging.info("Writing decodes into %s" % decode_filename) outfile = tf.gfile.Open(decode_filename, "w") for index in range(len(sorted_inputs)): - outfile.write("%s\t%s\n" % (decodes[sorted_keys[index]], - sorted_inputs[sorted_keys[index]])) + outfile.write("%s\n" % (decodes[sorted_keys[index]])) def decode_interactively(estimator): From bea499320874dc617631c52632f43ffd782542b7 Mon Sep 17 00:00:00 2001 From: T2T Team <no-reply@google.com> Date: Tue, 25 Jul 2017 09:05:13 -0700 Subject: [PATCH 0166/4095] Clean up some code around escaping/unescaping tokens and add tests. PiperOrigin-RevId: 163077617 --- tensor2tensor/data_generators/text_encoder.py | 248 +++++++++--------- .../data_generators/text_encoder_test.py | 68 +++++ 2 files changed, 199 insertions(+), 117 deletions(-) create mode 100644 tensor2tensor/data_generators/text_encoder_test.py diff --git a/tensor2tensor/data_generators/text_encoder.py b/tensor2tensor/data_generators/text_encoder.py index 7c53784f3..afe1da9ae 100644 --- a/tensor2tensor/data_generators/text_encoder.py +++ b/tensor2tensor/data_generators/text_encoder.py @@ -24,15 +24,12 @@ from __future__ import division from __future__ import print_function -from collections import defaultdict +import collections import re # Dependency imports import six -from six import PY2 -from six import unichr # pylint: disable=redefined-builtin -from six.moves import xrange # pylint: disable=redefined-builtin from tensor2tensor.data_generators import tokenizer import tensorflow as tf @@ -46,7 +43,7 @@ PAD_ID = RESERVED_TOKENS.index(PAD) # Normally 0 EOS_ID = RESERVED_TOKENS.index(EOS) # Normally 1 -if PY2: +if six.PY2: RESERVED_TOKENS_BYTES = RESERVED_TOKENS else: RESERVED_TOKENS_BYTES = [bytes(PAD, "ascii"), bytes(EOS, "ascii")] @@ -56,18 +53,17 @@ # '\u' is converted to '_' # '\\' is converted to '\' # '\213;' is converted to unichr(213) -_UNESCAPE_REGEX = re.compile(u"|".join([r"\\u", r"\\\\", r"\\([0-9]+);"])) +_UNESCAPE_REGEX = re.compile(ur"\\u|\\\\|\\([0-9]+);") +_ESCAPE_CHARS = set(u"\\_;0123456789") def native_to_unicode_py2(s): """Python 2: transform native string to Unicode.""" - if isinstance(s, unicode): - return s - return s.decode("utf-8") + return s if isinstance(s, unicode) else s.decode("utf8") # Conversion between Unicode and UTF-8, if required (on Python2) -if PY2: +if six.PY2: native_to_unicode = native_to_unicode_py2 unicode_to_native = lambda s: s.encode("utf-8") else: @@ -131,7 +127,7 @@ class ByteTextEncoder(TextEncoder): def encode(self, s): numres = self._num_reserved_ids - if PY2: + if six.PY2: return [ord(c) + numres for c in s] # Python3: explicitly convert to UTF-8 return [c + numres for c in s.encode("utf-8")] @@ -145,7 +141,7 @@ def decode(self, ids): decoded_ids.append(RESERVED_TOKENS_BYTES[int(id_)]) else: decoded_ids.append(int2byte(id_ - numres)) - if PY2: + if six.PY2: return "".join(decoded_ids) # Python3: join byte arrays and then decode string return b"".join(decoded_ids).decode("utf-8", "replace") @@ -199,6 +195,55 @@ def _load_vocab_from_file(self, filename): self._id_to_token[idx] = tok +def _escape_token(token, alphabet): + """Escape away underscores and OOV characters and append '_'. + + This allows the token to be experessed as the concatenation of a list + of subtokens from the vocabulary. The underscore acts as a sentinel + which allows us to invertibly concatenate multiple such lists. + + Args: + token: A unicode string to be escaped. + alphabet: A set of all characters in the vocabulary's alphabet. + + Returns: + escaped_token: An escaped unicode string. + + Raises: + ValueError: If the provided token is not unicode. + """ + if not isinstance(token, six.text_type): + raise ValueError("Expected string type for token, got %s" % type(token)) + + token = token.replace(u"\\", u"\\\\").replace(u"_", u"\\u") + ret = [ + c if c in alphabet and c != u"\n" else ur"\%d;" % ord(c) + for c in token] + return u"".join(ret) + "_" + + +def _unescape_token(escaped_token): + """Inverse of _escape_token(). + + Args: + escaped_token: a unicode string + + Returns: + token: a unicode string + """ + def match(m): + if m.group(1) is None: + return u"_" if m.group(0) == u"\\u" else u"\\" + + try: + return six.unichr(int(m.group(1))) + except (ValueError, OverflowError) as _: + return "" + + trimmed = escaped_token[:-1] if escaped_token.endswith("_") else escaped_token + return _UNESCAPE_REGEX.sub(match, trimmed) + + class SubwordTextEncoder(TextEncoder): """Class for invertibly encoding text using a limited vocabulary. @@ -276,7 +321,8 @@ def _tokens_to_subtokens(self, tokens): """ ret = [] for token in tokens: - ret.extend(self._escaped_token_to_subtokens(self._escape_token(token))) + ret.extend(self._escaped_token_to_subtokens( + _escape_token(token, self._alphabet))) return ret def _subtokens_to_tokens(self, subtokens): @@ -290,7 +336,7 @@ def _subtokens_to_tokens(self, subtokens): concatenated = "".join( [self._subtoken_to_subtoken_string(s) for s in subtokens]) split = concatenated.split("_") - return [self._unescape_token(t + "_") for t in split if t] + return [_unescape_token(t + "_") for t in split if t] def _subtoken_to_subtoken_string(self, subtoken): """Subtoken_String (string) corresponding to the given subtoken (id).""" @@ -312,12 +358,17 @@ def _escaped_token_to_subtokens(self, escaped_token): while pos < lesc: end = min(lesc, pos + self._max_subtoken_len) while end > pos: - subtoken = self._subtoken_string_to_id.get(escaped_token[pos:end], -1) - if subtoken != -1: + subtoken_id = self._subtoken_string_to_id.get(escaped_token[pos:end]) + if subtoken_id is not None: break end -= 1 - assert end > pos - ret.append(subtoken) + + # If there is no possible encoding of the escaped token then one of the + # characters in the token is not in the alphabet. This should be + # impossible and would be indicative of a bug. + assert subtoken_id is not None + + ret.append(subtoken_id) pos = end return ret @@ -331,27 +382,37 @@ def build_to_target_size(cls, num_iterations=4): """Builds a SubwordTextEncoder that has `vocab_size` near `target_size`. - Uses simple recursive binary search to find a `min_count` value that most + Uses simple recursive binary search to find a minimum token count that most closely matches the `target_size`. Args: - target_size: desired vocab_size to approximate. - token_counts: a dictionary of string to int. - min_val: an integer - lower bound for `min_count`. - max_val: an integer - upper bound for `min_count`. - num_iterations: an integer. how many iterations of refinement. + target_size: Desired vocab_size to approximate. + token_counts: A dictionary of token counts, mapping string to int. + min_val: An integer; lower bound for the minimum token count. + max_val: An integer; upper bound for the minimum token count. + num_iterations: An integer; how many iterations of refinement. Returns: - a SubwordTextEncoder instance. + A SubwordTextEncoder instance. + + Raises: + ValueError: If `min_val` is greater than `max_val`. """ + if min_val > max_val: + raise ValueError( + "Lower bound for the minimum token count " + "is greater than the upper bound.") + def bisect(min_val, max_val): """Bisection to find the right size.""" present_count = (max_val + min_val) // 2 tf.logging.info("Trying min_count %d" % present_count) subtokenizer = cls() - subtokenizer.build_from_token_counts(token_counts, - present_count, num_iterations) - if min_val >= max_val or subtokenizer.vocab_size == target_size: + subtokenizer.build_from_token_counts( + token_counts, present_count, num_iterations) + + # If min_val == max_val, we can't do any better than this. + if subtokenizer.vocab_size == target_size or min_val == max_val: return subtokenizer if subtokenizer.vocab_size > target_size: @@ -382,34 +443,27 @@ def build_from_token_counts(self, num_iterations: an integer. how many iterations of refinement. num_reserved_ids: an integer. how many ids to reserve for special tokens. """ - # first determine the alphabet to include all characters with count at - # least min_count in the dataset. - char_counts = defaultdict(int) - for token, count in six.iteritems(token_counts): - for c in token: - char_counts[c] += count - self._alphabet = set() - for c, count in six.iteritems(char_counts): - if count >= min_count: - self._alphabet.add(c) - # Make sure all characters needed for escaping are included - for c in u"\\_;0123456789": - self._alphabet.add(c) + self._init_alphabet_from_tokens(six.iterkeys(token_counts)) + + # Bootstrap the initial list of subtokens with the characters from the + # alphabet plus the escaping characters. + self._init_subtokens_from_list( + list(self._alphabet), reserved=num_reserved_ids) # We build iteratively. On each iteration, we segment all the words, # then count the resulting potential subtokens, keeping the ones # with high enough counts for our new vocabulary. if min_count < 1: min_count = 1 - for i in xrange(num_iterations): + for i in six.moves.range(num_iterations): tf.logging.info("Iteration {0}".format(i)) - counts = defaultdict(int) + counts = collections.defaultdict(int) for token, count in six.iteritems(token_counts): - escaped_token = self._escape_token(token) + escaped_token = _escape_token(token, self._alphabet) # we will count all tails of the escaped_token, starting from boundaries # determined by our current segmentation. if i == 0: - starts = xrange(len(escaped_token)) + starts = six.moves.range(len(escaped_token)) else: subtokens = self._escaped_token_to_subtokens(escaped_token) pos = 0 @@ -418,48 +472,43 @@ def build_from_token_counts(self, starts.append(pos) pos += len(self._all_subtoken_strings[subtoken]) for start in starts: - for end in xrange(start + 1, len(escaped_token) + 1): + for end in six.moves.range(start + 1, len(escaped_token) + 1): subtoken_string = escaped_token[start:end] counts[subtoken_string] += count - # Make sure all characters needed for escaping are included - for c in self._alphabet: - counts[c] += min_count # Array of sets of candidate subtoken strings, by length len_to_subtoken_strings = [] for subtoken_string, count in six.iteritems(counts): lsub = len(subtoken_string) - if count >= min_count: + # Always include all the alphabet characters or some strings will + # be unencodeable. + if count >= min_count or subtoken_string in self._alphabet: # Add this subtoken string to its length set while len(len_to_subtoken_strings) <= lsub: len_to_subtoken_strings.append(set()) len_to_subtoken_strings[lsub].add(subtoken_string) new_subtoken_strings = [] - # consider the candidates longest to shortest, so that if we accept + # Consider the candidates longest to shortest, so that if we accept # a longer subtoken string, we can decrement the counts of its prefixes. - for lsub in reversed(range(1, len(len_to_subtoken_strings))): + for lsub in six.moves.range(len(len_to_subtoken_strings)-1, 0, -1): subtoken_strings = len_to_subtoken_strings[lsub] for subtoken_string in subtoken_strings: count = counts[subtoken_string] - if count >= min_count: - new_subtoken_strings.append((count, subtoken_string)) - for l in xrange(1, lsub): + if count >= min_count or subtoken_string in self._alphabet: + # Exclude alphabet tokens here, as they must be included later + # explicitly, regardless of count. + if subtoken_string not in self._alphabet: + new_subtoken_strings.append((count, subtoken_string)) + for l in six.moves.range(1, lsub): counts[subtoken_string[:l]] -= count - # Sort in decreasing order by count new_subtoken_strings.sort(reverse=True) - # Now we have a candidate vocabulary - old_alphabet = self._alphabet - self._init_from_list([u""] * num_reserved_ids + - [p[1] for p in new_subtoken_strings]) - assert old_alphabet == self._alphabet - tf.logging.info("vocab_size = %d" % self.vocab_size) - original = "This sentence was encoded by the SubwordTextEncoder." - encoded = self.encode(original) - print(encoded) - print([self._subtoken_to_subtoken_string(s) for s in encoded]) - decoded = self.decode(encoded) - print(decoded) - assert decoded == original + # Reinitialize to the candidate vocabulary, including the alphabet + # explicitly as the highest priority. + self._init_subtokens_from_list( + list(self._alphabet) + + [subtoken for _, subtoken in new_subtoken_strings], + reserved=num_reserved_ids) + tf.logging.info("vocab_size = %d" % self.vocab_size) def dump(self): """Debugging dump of the current subtoken vocabulary.""" @@ -468,15 +517,21 @@ def dump(self): print(u", ".join(u"{0} : '{1}'".format(i, s) for i, s in sorted(subtoken_strings))) - def _init_from_list(self, subtoken_strings): - """Initialize from a list of subtoken strings.""" - self._all_subtoken_strings = subtoken_strings + def _init_subtokens_from_list(self, subtoken_strings, reserved=0): + """Initialize token information from a list of subtoken strings.""" + self._all_subtoken_strings = [u""] * reserved + subtoken_strings # we remember the maximum length of any subtoken to avoid having to # check arbitrarily long strings. self._max_subtoken_len = max([len(s) for s in subtoken_strings]) self._subtoken_string_to_id = { - s: i for i, s in enumerate(subtoken_strings) if s} - self._alphabet = set([c for c in subtoken_strings if len(c) == 1]) + s: i+reserved for i, s in enumerate(subtoken_strings) if s} + + def _init_alphabet_from_tokens(self, tokens): + """Initialize alphabet from an iterable of token or subtoken strings.""" + # Include all characters from all tokens in the alphabet to guarantee that + # any token can be encoded. Additionally, include all escaping characters. + self._alphabet = {c for token in tokens for c in token} + self._alphabet |= _ESCAPE_CHARS def _load_from_file(self, filename): """Load from a file.""" @@ -484,51 +539,10 @@ def _load_from_file(self, filename): with tf.gfile.Open(filename) as f: for line in f: subtoken_strings.append(native_to_unicode(line.strip()[1:-1])) - self._init_from_list(subtoken_strings) + self._init_subtokens_from_list(subtoken_strings) + self._init_alphabet_from_tokens(subtoken_strings) def store_to_file(self, filename): with tf.gfile.Open(filename, "w") as f: for subtoken_string in self._all_subtoken_strings: f.write("'" + unicode_to_native(subtoken_string) + "'\n") - - def _escape_token(self, token): - """Escape away underscores and OOV characters and append '_'. - - This allows the token to be experessed as the concatenation of a list - of subtokens from the vocabulary. The underscore acts as a sentinel - which allows us to invertibly concatenate multiple such lists. - - Args: - token: a unicode string - Returns: - escaped_token: a unicode string - """ - assert isinstance(token, six.text_type) - token = token.replace(u"\\", u"\\\\").replace(u"_", u"\\u") + u"_" - ret = u"" - for c in token: - if c in self._alphabet and c != u"\n": - ret += c - else: - ret += u"\\%d;" % ord(c) - return ret - - def _unescape_token(self, escaped_token): - """Inverse of _escape_token(). - - Args: - escaped_token: a unicode string - Returns: - token: a unicode string - """ - def match(m): - if m.group(1) is not None: - # Convert '\213;' to unichr(213) - try: - return unichr(int(m.group(1))) - except (ValueError, OverflowError) as _: - return "" - # Convert '\u' to '_' and '\\' to '\' - return u"_" if m.group(0) == u"\\u" else u"\\" - # Cut off the trailing underscore and apply the regex substitution - return _UNESCAPE_REGEX.sub(match, escaped_token[:-1]) diff --git a/tensor2tensor/data_generators/text_encoder_test.py b/tensor2tensor/data_generators/text_encoder_test.py new file mode 100644 index 000000000..7ac2ba911 --- /dev/null +++ b/tensor2tensor/data_generators/text_encoder_test.py @@ -0,0 +1,68 @@ +# coding=utf-8 +# Copyright 2017 The Tensor2Tensor Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for tensor2tensor.data_generators.text_encoder.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +# Dependency imports + +from tensor2tensor.data_generators import text_encoder +import tensorflow as tf + + +class EscapeUnescapeTokenTest(tf.test.TestCase): + + def test_escape_token(self): + escaped = text_encoder._escape_token( + u'Foo! Bar.\nunder_score back\\slash', + set('abcdefghijklmnopqrstuvwxyz .\n') | text_encoder._ESCAPE_CHARS) + + self.assertEqual( + u'\\70;oo\\33; \\66;ar.\\10;under\\uscore back\\\\slash_', escaped) + + def test_unescape_token(self): + unescaped = text_encoder._unescape_token( + u'\\70;oo\\33; \\66;ar.\\10;under\\uscore back\\\\slash_') + + self.assertEqual( + u'Foo! Bar.\nunder_score back\\slash', unescaped) + + +class SubwordTextEncoderTest(tf.test.TestCase): + + def test_encode_decode(self): + token_counts = { + u'this': 9, + u'sentence': 14, + u'the': 100, + u'encoded': 1, + u'was': 20, + u'by': 50, + } + encoder = text_encoder.SubwordTextEncoder.build_to_target_size( + 50, token_counts, 2, 10) + encoder.build_from_token_counts(token_counts, min_count=2) + + original = 'This sentence was encoded by the SubwordTextEncoder.' + encoded = encoder.encode(original) + decoded = encoder.decode(encoded) + self.assertEqual(original, decoded) + + +if __name__ == '__main__': + tf.test.main() From 83a757dd43df099b3c545e5cd2e9f9f9f0aed50b Mon Sep 17 00:00:00 2001 From: Ashish Vaswani <avaswani@google.com> Date: Tue, 25 Jul 2017 12:04:03 -0700 Subject: [PATCH 0167/4095] Adding encoder conv attention. A query block attends to a neighborhood to the left and the right of it. Pair programmed (Ashish + Niki) PiperOrigin-RevId: 163103460 --- tensor2tensor/models/common_attention.py | 97 +++++++++++++++++++++++- 1 file changed, 93 insertions(+), 4 deletions(-) diff --git a/tensor2tensor/models/common_attention.py b/tensor2tensor/models/common_attention.py index 624623f4c..98a198f85 100644 --- a/tensor2tensor/models/common_attention.py +++ b/tensor2tensor/models/common_attention.py @@ -435,6 +435,91 @@ def local(x): return output +def unmasked_local_attention_1d(q, k, v, block_length=128, filter_width=100, + name=None): + """strided block local self-attention. + + Args: + q: a Tensor with shape [batch, heads, length, depth_k] + k: a Tensor with shape [batch, heads, length, depth_k] + v: a Tensor with shape [batch, heads, length, depth_v] + block_length: an integer + filter_width: an integer indicating how much to look left. + name: an optional string + + Returns: + a Tensor of shape [batch, heads, length, depth_v] + """ + with tf.variable_scope(name, default_name="local_self_attention_1d", + values=[q, k, v]): + v_shape = v.get_shape() + depth_v = tf.shape(v)[3] + batch_size = tf.shape(q)[0] + num_heads = tf.shape(q)[1] + original_length = tf.shape(q)[2] + # making sure q is a multiple of d + def pad_to_multiple(x, pad_length): + x_length = tf.shape(x)[2] + return tf.pad(x, [[0, 0], [0, 0], [0, -x_length % pad_length], [0, 0]]) + def pad_l_and_r(x, pad_length): + return tf.pad(x, [[0, 0], [0, 0], [pad_length, pad_length], [0, 0]]) + q = pad_to_multiple(q, block_length) + k = pad_to_multiple(k, block_length) + v = pad_to_multiple(v, block_length) + + # Setting up q blocks + new_q_shape = tf.shape(q) + # Setting up q blocks + q = tf.reshape(q, [new_q_shape[0], new_q_shape[1], + new_q_shape[2]//block_length, + block_length, new_q_shape[3]]) + + # Setting up k and v values + k = pad_l_and_r(k, filter_width) + v = pad_l_and_r(v, filter_width) + + length = tf.shape(k)[2] + full_filter_width = block_length + 2*filter_width + # getting gather indices + indices = tf.range(0, length, delta=1, name="index_range") + # making indices [1, length, 1] to appy convs + indices = tf.reshape(indices, [1, -1, 1]) + kernel = tf.expand_dims(tf.eye(full_filter_width), axis=1) + gather_indices = tf.nn.conv1d( + tf.cast(indices, tf.float32), + kernel, + block_length, + padding="VALID", + name="gather_conv") + + gather_indices = tf.squeeze(tf.cast(gather_indices, tf.int32), axis=0) + + # [length, batch, heads, dim] + k_t = tf.transpose(k, [2, 0, 1, 3]) + k_new = tf.gather(k_t, gather_indices) + + # [batch, heads, blocks, block_length, dim] + k_new = tf.transpose(k_new, [2, 3, 0, 1, 4]) + + attention_bias = tf.expand_dims( + tf.to_float(embedding_to_padding(k_new)) * -1e9, axis=-2) + + v_t = tf.transpose(v, [2, 0, 1, 3]) + v_new = tf.gather(v_t, gather_indices) + v_new = tf.transpose(v_new, [2, 3, 0, 1, 4]) + + logits = tf.matmul(q, k_new, transpose_b=True) + + attention = tf.nn.softmax(logits+attention_bias) + output = tf.matmul(attention, v_new) + + output = tf.reshape(output, [batch_size, num_heads, -1, depth_v]) + # Remove the padding if introduced + output = tf.slice(output, [0, 0, 0, 0], [-1, -1, original_length, -1]) + output.set_shape(v_shape) + return output + + def multihead_attention(query_antecedent, memory_antecedent, bias, @@ -460,8 +545,9 @@ def multihead_attention(query_antecedent, dropout_rate: a floating point number image_shapes: optional tuple of integer scalars. see comments for attention_image_summary() - attention_type: a string, either "dot_product" or "local_mask_right" - block_length: an integer - relevent for "local_mask_right" + attention_type: a string, either "dot_product" or "local_mask_right" or + "local_unmasked" + block_length: an integer - relevant for "local_mask_right" name: an optional string Returns: @@ -509,9 +595,11 @@ def multihead_attention(query_antecedent, if attention_type == "dot_product": x = dot_product_attention( q, k, v, bias, dropout_rate, image_shapes) - else: - assert attention_type == "local_mask_right" + elif attention_type == "local_mask_right": x = masked_local_attention_1d(q, k, v, block_length=block_length) + else: + assert attention_type == "local_unmasked" + x = unmasked_local_attention_1d(q, k, v, block_length=block_length) x = combine_heads(x) x = common_layers.conv1d(x, output_depth, 1, name="output_transform") return x @@ -652,4 +740,5 @@ def parameter_attention(x, y = tf.reshape(y, [batch_size, length, total_value_depth]) y.set_shape([None, None, total_value_depth]) y = common_layers.conv1d(y, output_depth, 1, name="output_transform") + return y From d190b79861d849569e42d8ad892b337983df39eb Mon Sep 17 00:00:00 2001 From: Noam Shazeer <noam@google.com> Date: Tue, 25 Jul 2017 15:20:01 -0700 Subject: [PATCH 0168/4095] Update inspect.py to allow decoding with TokenTextEncoder and ByteTextEncoder. PiperOrigin-RevId: 163131045 --- tensor2tensor/data_generators/inspect.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tensor2tensor/data_generators/inspect.py b/tensor2tensor/data_generators/inspect.py index 124c07017..6ba054d3c 100644 --- a/tensor2tensor/data_generators/inspect.py +++ b/tensor2tensor/data_generators/inspect.py @@ -34,6 +34,10 @@ tf.app.flags.DEFINE_string("subword_text_encoder_filename", "", "SubwordTextEncoder vocabulary file") +tf.app.flags.DEFINE_string("token_text_encoder_filename", "", + "TokenTextEncoder vocabulary file") +tf.app.flags.DEFINE_bool("byte_text_encoder", False, + "use a ByteTextEncoder") tf.app.flags.DEFINE_string("input_filename", "", "input filename") tf.app.flags.DEFINE_bool("print_inputs", False, "Print decoded inputs to stdout") @@ -48,6 +52,11 @@ def main(_): if FLAGS.subword_text_encoder_filename: encoder = text_encoder.SubwordTextEncoder( FLAGS.subword_text_encoder_filename) + elif FLAGS.token_text_encoder_filename: + encoder = text_encoder.TokenTextEncoder( + FLAGS.token_text_encoder_filename) + elif FLAGS.byte_text_encoder: + encoder = text_encoder.ByteTextEncoder() else: encoder = None reader = tf.python_io.tf_record_iterator(FLAGS.input_filename) From 7de63bd1dac3482d6c2388b715b958d3726870c9 Mon Sep 17 00:00:00 2001 From: Noam Shazeer <noam@google.com> Date: Tue, 25 Jul 2017 15:58:10 -0700 Subject: [PATCH 0169/4095] Character-level version of lm1b. PiperOrigin-RevId: 163136520 --- tensor2tensor/bin/t2t-datagen | 4 ++++ tensor2tensor/data_generators/lm1b.py | 10 +++++++--- .../data_generators/problem_hparams.py | 17 +++++++++++++++++ tensor2tensor/data_generators/text_encoder.py | 2 ++ 4 files changed, 30 insertions(+), 3 deletions(-) diff --git a/tensor2tensor/bin/t2t-datagen b/tensor2tensor/bin/t2t-datagen index 783906d95..a9fa12255 100644 --- a/tensor2tensor/bin/t2t-datagen +++ b/tensor2tensor/bin/t2t-datagen @@ -110,6 +110,10 @@ _SUPPORTED_PROBLEM_GENERATORS = { lambda: lm1b.generator(FLAGS.tmp_dir, True), lambda: lm1b.generator(FLAGS.tmp_dir, False) ), + "lm1b_characters": ( + lambda: lm1b.generator(FLAGS.tmp_dir, True, characters=True), + lambda: lm1b.generator(FLAGS.tmp_dir, False, characters=True) + ), "wiki_32k": ( lambda: wiki.generator(FLAGS.tmp_dir, True), 1000 diff --git a/tensor2tensor/data_generators/lm1b.py b/tensor2tensor/data_generators/lm1b.py index 562435184..a436e0e6e 100644 --- a/tensor2tensor/data_generators/lm1b.py +++ b/tensor2tensor/data_generators/lm1b.py @@ -63,7 +63,7 @@ def _original_vocab(tmp_dir): def _replace_oov(original_vocab, line): """Replace out-of-vocab words with "UNK". - This maintains compatability with published results. + This maintains compatibility with published results. Args: original_vocab: a set of strings (The standard vocabulary for the dataset) @@ -138,12 +138,13 @@ def _get_or_build_subword_text_encoder(tmp_dir): return ret -def generator(tmp_dir, train): +def generator(tmp_dir, train, characters=False): """Generator for lm1b sentences. Args: tmp_dir: a string. train: a boolean. + characters: a boolean Yields: A dictionary {"inputs": [0], "targets": [<subword ids>]} @@ -152,7 +153,10 @@ def generator(tmp_dir, train): original_vocab = _original_vocab(tmp_dir) files = (_train_data_filenames(tmp_dir) if train else [_dev_data_filename(tmp_dir)]) - encoder = _get_or_build_subword_text_encoder(tmp_dir) + if characters: + encoder = text_encoder.ByteTextEncoder() + else: + encoder = _get_or_build_subword_text_encoder(tmp_dir) for filepath in files: tf.logging.info("filepath = %s", filepath) for line in tf.gfile.Open(filepath): diff --git a/tensor2tensor/data_generators/problem_hparams.py b/tensor2tensor/data_generators/problem_hparams.py index 159ea6ac9..2792c79e9 100644 --- a/tensor2tensor/data_generators/problem_hparams.py +++ b/tensor2tensor/data_generators/problem_hparams.py @@ -336,6 +336,22 @@ def lm1b_32k(model_hparams): return p +def lm1b_characters(unused_model_hparams): + """Billion-word language-modeling benchmark, 32k subword vocabulary.""" + p = default_problem_hparams() + # ratio of dev tokens (including eos) to dev words (including eos) + # 826189 / 159658 = 5.174742 + p.perplexity_exponent = 5.174742 + p.input_modality = {} + encoder = text_encoder.ByteTextEncoder() + p.target_modality = (registry.Modalities.SYMBOL, encoder.vocab_size) + p.vocabulary = { + "targets": encoder + } + p.target_space_id = 2 + return p + + def wiki_32k(model_hparams): """Wikipedia title to article. 32k subtoken vocabulary.""" p = default_problem_hparams() @@ -623,6 +639,7 @@ def image_celeba(unused_model_hparams): "audio_wsj_characters_test": audio_wsj_characters, "audio_wsj_tokens_8k_tune": lambda p: audio_wsj_tokens(p, 2**13), "audio_wsj_tokens_8k_test": lambda p: audio_wsj_tokens(p, 2**13), + "lm1b_characters": lm1b_characters, "lm1b_32k": lm1b_32k, "wiki_32k": wiki_32k, "lmptb_10k": lmptb_10k, diff --git a/tensor2tensor/data_generators/text_encoder.py b/tensor2tensor/data_generators/text_encoder.py index afe1da9ae..6b01e3a35 100644 --- a/tensor2tensor/data_generators/text_encoder.py +++ b/tensor2tensor/data_generators/text_encoder.py @@ -128,6 +128,8 @@ class ByteTextEncoder(TextEncoder): def encode(self, s): numres = self._num_reserved_ids if six.PY2: + if isinstance(s, unicode): + s = s.encode("utf-8") return [ord(c) + numres for c in s] # Python3: explicitly convert to UTF-8 return [c + numres for c in s.encode("utf-8")] From fd1a87d214861ea8d8ec3079cd636b145aad7630 Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Tue, 25 Jul 2017 17:06:12 -0700 Subject: [PATCH 0170/4095] Problem.eval_metrics PiperOrigin-RevId: 163145397 --- tensor2tensor/data_generators/genetics.py | 7 +- tensor2tensor/data_generators/problem.py | 18 ++++ tensor2tensor/data_generators/wmt.py | 15 +++ tensor2tensor/utils/metrics.py | 109 ++++++++++++++-------- tensor2tensor/utils/t2t_model.py | 4 +- tensor2tensor/utils/trainer_utils.py | 26 ++++-- 6 files changed, 129 insertions(+), 50 deletions(-) diff --git a/tensor2tensor/data_generators/genetics.py b/tensor2tensor/data_generators/genetics.py index 848c2341b..309580d53 100644 --- a/tensor2tensor/data_generators/genetics.py +++ b/tensor2tensor/data_generators/genetics.py @@ -49,6 +49,7 @@ from tensor2tensor.data_generators import generator_utils from tensor2tensor.data_generators import problem from tensor2tensor.data_generators import text_encoder +from tensor2tensor.utils import metrics from tensor2tensor.utils import registry import tensorflow as tf @@ -141,7 +142,8 @@ def hparams(self, defaults, model_hparams): p.target_space_id = problem.SpaceID.REAL def example_reading_spec(self): - # TODO(rsepassi): propagate and apply targets_mask to output RealModality. + # TODO(rsepassi): propagate and apply targets_mask to output RealModality + # and to eval metrics (weights_fn?). data_fields = { "inputs": tf.VarLenFeature(tf.int64), "targets_mask": tf.VarLenFeature(tf.float32), @@ -158,6 +160,9 @@ def preprocess_examples(self, examples, mode): return examples + def eval_metrics(self): + return [metrics.Metrics.RMSE] + @registry.register_problem("genetics_cage10") class GeneticsCAGE10(GeneExpressionProblem): diff --git a/tensor2tensor/data_generators/problem.py b/tensor2tensor/data_generators/problem.py index 22b6214e6..69d81e58e 100644 --- a/tensor2tensor/data_generators/problem.py +++ b/tensor2tensor/data_generators/problem.py @@ -22,6 +22,7 @@ from tensor2tensor.data_generators import generator_utils as utils from tensor2tensor.data_generators import text_encoder +from tensor2tensor.utils import metrics import tensorflow as tf @@ -111,6 +112,17 @@ class Problem(object): * hparams(defaults, model_hparams) - Specify the problem hyperparameters (see _default_hparams) - Mutate defaults as needed + * example_reading_spec + - Specify the names and types of the features on disk. + - Specify tf.contrib.slim.tfexample_decoder + * preprocess_examples(examples, mode) + - Preprocess the example feature dict from feature name to Tensor or + SparseTensor. + - Used in training, eval, and inference (specified by mode). + + Eval: + * eval_metrics + - Specify the set of evaluation metrics for this problem. Inference: * feature_encoders(data_dir) @@ -151,6 +163,12 @@ def preprocess_examples(self, examples, mode): del mode return examples + def eval_metrics(self): + return [ + metrics.Metrics.ACC, metrics.Metrics.ACC_TOP5, + metrics.Metrics.ACC_PER_SEQ, metrics.Metrics.NEG_LOG_PERPLEXITY + ] + # ============================================================================ # END SUBCLASS INTERFACE # ============================================================================ diff --git a/tensor2tensor/data_generators/wmt.py b/tensor2tensor/data_generators/wmt.py index 50125ccd1..519d55996 100644 --- a/tensor2tensor/data_generators/wmt.py +++ b/tensor2tensor/data_generators/wmt.py @@ -28,6 +28,7 @@ from tensor2tensor.data_generators import problem from tensor2tensor.data_generators import text_encoder from tensor2tensor.data_generators import wsj_parsing +from tensor2tensor.utils import metrics from tensor2tensor.utils import registry import tensorflow as tf @@ -120,6 +121,13 @@ def hparams(self, defaults, unused_model_hparams): if self.is_character_level: p.loss_multiplier = 2.0 + def eval_metrics(self): + return [ + metrics.Metrics.ACC, metrics.Metrics.ACC_TOP5, + metrics.Metrics.ACC_PER_SEQ, metrics.Metrics.NEG_LOG_PERPLEXITY, + metrics.Metrics.APPROX_BLEU + ] + # Generic generators used later for multiple problems. @@ -658,6 +666,13 @@ def input_space_id(self): def target_space_id(self): return problem.SpaceID.CS_TOK + def eval_metrics(self): + return [ + metrics.Metrics.ACC, metrics.Metrics.ACC_TOP5, + metrics.Metrics.ACC_PER_SEQ, metrics.Metrics.NEG_LOG_PERPLEXITY, + metrics.Metrics.APPROX_BLEU + ] + @registry.register_problem("wmt_encs_characters") class WMTEnCsCharacters(WMTProblem): diff --git a/tensor2tensor/utils/metrics.py b/tensor2tensor/utils/metrics.py index 118e33394..29f44b574 100644 --- a/tensor2tensor/utils/metrics.py +++ b/tensor2tensor/utils/metrics.py @@ -18,8 +18,6 @@ from __future__ import division from __future__ import print_function -import functools - # Dependency imports import six @@ -29,7 +27,24 @@ import tensorflow as tf -FLAGS = tf.flags.FLAGS + +class Metrics(object): + """Available evaluation metrics.""" + # Entries here should match the keys in METRICS_FN below + ACC = "accuracy" + ACC_TOP5 = "accuracy_top5" + ACC_PER_SEQ = "accuracy_per_sequence" + NEG_LOG_PERPLEXITY = "neg_log_perplexity" + APPROX_BLEU = "approx_bleu_score" + RMSE = "rmse" + + +def padded_rmse(predictions, labels, weights_fn=common_layers.weights_all): + predictions, labels = common_layers.pad_with_zeros(predictions, labels) + targets = labels + weights = weights_fn(targets) + error = tf.sqrt(tf.pow(predictions - labels, 2)) + return tf.reduce_sum(error * weights), tf.reduce_sum(weights) def padded_accuracy_topk(predictions, @@ -98,62 +113,76 @@ def create_evaluation_metrics(problems): """Creates the evaluation metrics for the model. Args: - problems: List of strings containing the name of the problems. + problems: List of tuples (problem name, problem instance). Returns: A dictionary with keys that are strings naming the evaluation metrics and values that are functions taking arguments of (predictions, targets), returning a tuple of a tensor of the metric's value together with an op to update the metric's value. + + Raises: + ValueError: if the metrics specified by a problem are not recognized (i.e. + are not defined in the Metrics enum. """ - def append_metric_fns(metric_tup, eval_metrics): - """Append problem-specific and global metrics to eval_metrics.""" - metric_name, metric_function = metric_tup - def fn(predictions, labels, weights, idx, weights_fn): - # The 'weights' argument represents problem-choice here, - # we need to keep this name because MetricSpecs checks it. + def make_problem_specific_metric_fn(metric_fn, problem_idx, weights_fn): + """Create a metric fn conditioned on problem_idx.""" + + def problem_metric_fn(predictions, labels, weights): problem_choice = weights (scores, weights) = tf.cond( - tf.equal(idx, problem_choice), # pylint: disable=cell-var-from-loop - lambda: metric_function(predictions, labels, weights_fn=weights_fn), + tf.equal(problem_idx, problem_choice), + lambda: metric_fn(predictions, labels, weights_fn=weights_fn), lambda: (tf.constant(0.0), tf.constant(0.0))) # The tf.metrics.mean function assures correct aggregation. return tf.metrics.mean(scores, weights) - for i, problem in enumerate(problems): - name = "metrics-%s/%s" % (problem, metric_name) - class_output = "image" in problem and "coco" not in problem - weights_fn = (common_layers.weights_all if class_output - else common_layers.weights_nonzero) - eval_metrics[name] = functools.partial(fn, idx=i, weights_fn=weights_fn) - - def global_fn(predictions, labels, weights): - (scores, weights) = metric_function(predictions, labels) - return tf.metrics.mean(scores, weights) - - eval_metrics["metrics/%s" % metric_name] = global_fn + return problem_metric_fn eval_metrics = dict() - - # Metrics are functions that take predictions and labels and return - # a tensor of metrics and a tensor of weights. - # The results are passed to tf.metrics.mean to accumulate properly. - metrics_list = [("accuracy", padded_accuracy), ("accuracy_top5", - padded_accuracy_top5), - ("accuracy_per_sequence", padded_sequence_accuracy), - ("neg_log_perplexity", padded_neg_log_perplexity)] - - # TODO(nikip): Extend this to support use of custom metrics for problems. - for problem in problems: - if "wmt" in problem: - metrics_list.append(("approx_bleu_score", bleu_hook.bleu_score)) - - for metric in metrics_list: - append_metric_fns(metric, eval_metrics) + for problem_idx, (problem_name, problem_instance) in enumerate(problems): + if problem_instance is None: + # For problems in problem_hparams + metrics = [ + Metrics.ACC, Metrics.ACC_TOP5, Metrics.ACC_PER_SEQ, + Metrics.NEG_LOG_PERPLEXITY + ] + if "wmt" in problem_name: + metrics.append(Metrics.APPROX_BLEU) + else: + # For registered Problems + metrics = problem_instance.eval_metrics() + if not all([m in METRICS_FNS for m in metrics]): + raise ValueError("Unrecognized metric. Problem %s specified metrics " + "%s. Recognized metrics are %s." % + (problem_name, metrics, METRICS_FNS.keys())) + + class_output = "image" in problem_name and "coco" not in problem_name + weights_fn = (common_layers.weights_all + if class_output else common_layers.weights_nonzero) + + for metric in metrics: + metric_fn = METRICS_FNS[metric] + problem_metric_fn = make_problem_specific_metric_fn( + metric_fn, problem_idx, weights_fn) + eval_metrics["metrics-%s/%s" % (problem_name, metric)] = problem_metric_fn return { k: tf.contrib.learn.MetricSpec( v, prediction_key="predictions", weight_key="problem_choice") for (k, v) in six.iteritems(eval_metrics) } + + +# Metrics are functions that take predictions and labels and return +# a tensor of metrics and a tensor of weights. +# The results are passed to tf.metrics.mean to accumulate properly. +METRICS_FNS = { + Metrics.ACC: padded_accuracy, + Metrics.ACC_TOP5: padded_accuracy_top5, + Metrics.ACC_PER_SEQ: padded_sequence_accuracy, + Metrics.NEG_LOG_PERPLEXITY: padded_neg_log_perplexity, + Metrics.APPROX_BLEU: bleu_hook.bleu_score, + Metrics.RMSE: padded_rmse, +} diff --git a/tensor2tensor/utils/t2t_model.py b/tensor2tensor/utils/t2t_model.py index 9777568fc..66e40d495 100644 --- a/tensor2tensor/utils/t2t_model.py +++ b/tensor2tensor/utils/t2t_model.py @@ -164,6 +164,8 @@ def infer(self, Returns: samples: an integer `Tensor`. """ + # TODO(rsepassi): Make decoding work with real-valued model outputs + # (i.e. if the target modality is RealModality). if not self.has_input: # since there is no input, it is more interesting to see randomly # generated sequences, than to see the most likely sequence repeatedly. @@ -500,5 +502,5 @@ def _warn_changed_modality_type(new_name, old_name, feature_name): old_type, old_name = registry.parse_modality_name(old_name) if new_type != old_type: tf.logging.warning("%s has a designated modality type %s (%s) but has been " - "overriden with a modality of type %s (%s).", + "overridden with a modality of type %s (%s).", feature_name, old_type, old_name, new_type, new_name) diff --git a/tensor2tensor/utils/trainer_utils.py b/tensor2tensor/utils/trainer_utils.py index 0943881f3..bf42c36cc 100644 --- a/tensor2tensor/utils/trainer_utils.py +++ b/tensor2tensor/utils/trainer_utils.py @@ -85,7 +85,7 @@ flags.DEFINE_integer("local_eval_frequency", 2000, "Run evaluation every this steps during local training.") flags.DEFINE_bool("locally_shard_to_cpu", False, - "Use CPU as a sharding device runnning locally. This allows " + "Use CPU as a sharding device running locally. This allows " "to test sharded model construction on a machine with 1 GPU.") flags.DEFINE_bool("daisy_chain_variables", True, "copy variables around in a daisy chain") @@ -103,6 +103,9 @@ flags.DEFINE_integer("ps_replicas", 0, "How many ps replicas.") # Decode flags +# Set one of {decode_from_dataset, decode_interactive, decode_from_file} to +# decode. +flags.DEFINE_bool("decode_from_dataset", False, "Decode from dataset on disk.") flags.DEFINE_bool("decode_use_last_position_only", False, "In inference, use last position only for speedup.") flags.DEFINE_bool("decode_interactive", False, @@ -152,17 +155,24 @@ def experiment_fn(output_dir): def create_experiment(output_dir, data_dir, model_name, train_steps, eval_steps): + """Create Experiment.""" hparams = create_hparams(FLAGS.hparams_set, data_dir) estimator, input_fns = create_experiment_components( hparams=hparams, output_dir=output_dir, data_dir=data_dir, model_name=model_name) + eval_metrics = metrics.create_evaluation_metrics( + zip(FLAGS.problems.split("-"), hparams.problem_instances)) + if ("autotune" in FLAGS and FLAGS.autotune and + FLAGS.objective not in eval_metrics): + raise ValueError("Tuning objective %s not among evaluation metrics %s" % + (FLAGS.objective, eval_metrics.keys())) return tf.contrib.learn.Experiment( estimator=estimator, train_input_fn=input_fns["train"], eval_input_fn=input_fns["eval"], - eval_metrics=metrics.create_evaluation_metrics(FLAGS.problems.split("-")), + eval_metrics=eval_metrics, train_steps=train_steps, eval_steps=eval_steps, min_eval_frequency=FLAGS.local_eval_frequency, @@ -585,18 +595,18 @@ def run_locally(exp): Args: exp: Experiment. """ - if exp.train_steps > 0: - # Train - tf.logging.info("Performing local training.") + if exp.train_steps > 0 or exp.eval_steps > 0: + tf.logging.info("Performing local training and evaluation.") exp.train_and_evaluate() + decode(exp.estimator) - # Predict - estimator = exp.estimator + +def decode(estimator): if FLAGS.decode_interactive: decode_interactively(estimator) elif FLAGS.decode_from_file is not None: decode_from_file(estimator, FLAGS.decode_from_file) - else: + elif FLAGS.decode_from_dataset: decode_from_dataset(estimator) From ca08ad9bf1ec957646a17eda089d3b530fb77d93 Mon Sep 17 00:00:00 2001 From: Lukasz Kaiser <lukaszkaiser@google.com> Date: Tue, 25 Jul 2017 17:26:22 -0700 Subject: [PATCH 0171/4095] Un-reorder of timing signals to make trained models work. PiperOrigin-RevId: 163147659 --- tensor2tensor/models/common_attention.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/tensor2tensor/models/common_attention.py b/tensor2tensor/models/common_attention.py index 98a198f85..1a8b2c79d 100644 --- a/tensor2tensor/models/common_attention.py +++ b/tensor2tensor/models/common_attention.py @@ -66,9 +66,6 @@ def add_timing_signal_1d(x, min_timescale=1.0, max_timescale=1.0e4): tf.to_float(tf.range(num_timescales)) * -log_timescale_increment) scaled_time = tf.expand_dims(position, 1) * tf.expand_dims(inv_timescales, 0) signal = tf.concat([tf.sin(scaled_time), tf.cos(scaled_time)], axis=1) - signal = tf.reshape(signal, [length, 2, num_timescales]) - signal = tf.transpose(signal, perm=[0, 2, 1]) - signal = tf.reshape(signal, [length, channels]) signal = tf.pad(signal, [[0, 0], [0, tf.mod(channels, 2)]]) signal = tf.reshape(signal, [1, length, channels]) return x + signal From b9fcd66f14ecded01cc257932655f5b1f493e3b9 Mon Sep 17 00:00:00 2001 From: Lukasz Kaiser <lukaszkaiser@google.com> Date: Tue, 25 Jul 2017 18:12:16 -0700 Subject: [PATCH 0172/4095] Back to wmt16 on one set not downloadable from wmt17, internal merges. PiperOrigin-RevId: 163152415 --- tensor2tensor/bin/t2t-datagen | 1 + tensor2tensor/data_generators/text_encoder.py | 9 +++++---- tensor2tensor/data_generators/wmt.py | 2 +- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/tensor2tensor/bin/t2t-datagen b/tensor2tensor/bin/t2t-datagen index a9fa12255..629014713 100644 --- a/tensor2tensor/bin/t2t-datagen +++ b/tensor2tensor/bin/t2t-datagen @@ -281,6 +281,7 @@ def generate_data_for_problem(problem): def generate_data_for_registered_problem(problem_name): + tf.logging.info("Generating training data for %s.", problem_name) problem = registry.problem(problem_name) task_id = None if FLAGS.task_id < 0 else FLAGS.task_id problem.generate_data(os.path.expanduser(FLAGS.data_dir), diff --git a/tensor2tensor/data_generators/text_encoder.py b/tensor2tensor/data_generators/text_encoder.py index 6b01e3a35..9fc9eed88 100644 --- a/tensor2tensor/data_generators/text_encoder.py +++ b/tensor2tensor/data_generators/text_encoder.py @@ -30,6 +30,7 @@ # Dependency imports import six +from six.moves import xrange # pylint: disable=redefined-builtin from tensor2tensor.data_generators import tokenizer import tensorflow as tf @@ -457,7 +458,7 @@ def build_from_token_counts(self, # with high enough counts for our new vocabulary. if min_count < 1: min_count = 1 - for i in six.moves.range(num_iterations): + for i in xrange(num_iterations): tf.logging.info("Iteration {0}".format(i)) counts = collections.defaultdict(int) for token, count in six.iteritems(token_counts): @@ -474,7 +475,7 @@ def build_from_token_counts(self, starts.append(pos) pos += len(self._all_subtoken_strings[subtoken]) for start in starts: - for end in six.moves.range(start + 1, len(escaped_token) + 1): + for end in xrange(start + 1, len(escaped_token) + 1): subtoken_string = escaped_token[start:end] counts[subtoken_string] += count # Array of sets of candidate subtoken strings, by length @@ -491,7 +492,7 @@ def build_from_token_counts(self, new_subtoken_strings = [] # Consider the candidates longest to shortest, so that if we accept # a longer subtoken string, we can decrement the counts of its prefixes. - for lsub in six.moves.range(len(len_to_subtoken_strings)-1, 0, -1): + for lsub in xrange(len(len_to_subtoken_strings)-1, 0, -1): subtoken_strings = len_to_subtoken_strings[lsub] for subtoken_string in subtoken_strings: count = counts[subtoken_string] @@ -500,7 +501,7 @@ def build_from_token_counts(self, # explicitly, regardless of count. if subtoken_string not in self._alphabet: new_subtoken_strings.append((count, subtoken_string)) - for l in six.moves.range(1, lsub): + for l in xrange(1, lsub): counts[subtoken_string[:l]] -= count new_subtoken_strings.sort(reverse=True) diff --git a/tensor2tensor/data_generators/wmt.py b/tensor2tensor/data_generators/wmt.py index 519d55996..9587d4d2a 100644 --- a/tensor2tensor/data_generators/wmt.py +++ b/tensor2tensor/data_generators/wmt.py @@ -259,7 +259,7 @@ def bi_vocabs_token_generator(source_path, _ENDE_TRAIN_DATASETS = [ [ - "http://data.statmt.org/wmt17/translation-task/training-parallel-nc-v11.tgz", # pylint: disable=line-too-long + "http://data.statmt.org/wmt16/translation-task/training-parallel-nc-v11.tgz", # pylint: disable=line-too-long ("training-parallel-nc-v11/news-commentary-v11.de-en.en", "training-parallel-nc-v11/news-commentary-v11.de-en.de") ], From 92101af0f2fbc4e16557fd688bde9cd9cc33a452 Mon Sep 17 00:00:00 2001 From: Niki Parmar <nikip@google.com> Date: Wed, 26 Jul 2017 09:58:41 -0700 Subject: [PATCH 0173/4095] Bug fix, specify axis for squeeze when computing BLEU score PiperOrigin-RevId: 163219501 --- tensor2tensor/utils/bleu_hook.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/tensor2tensor/utils/bleu_hook.py b/tensor2tensor/utils/bleu_hook.py index 06d62ad1e..20a7c8426 100644 --- a/tensor2tensor/utils/bleu_hook.py +++ b/tensor2tensor/utils/bleu_hook.py @@ -92,7 +92,6 @@ def compute_bleu(reference_corpus, matches_by_order[len(ngram) - 1] += overlap[ngram] for ngram in translation_ngram_counts: possible_matches_by_order[len(ngram)-1] += translation_ngram_counts[ngram] - precisions = [0] * max_order for i in xrange(0, max_order): if possible_matches_by_order[i] > 0: @@ -107,7 +106,6 @@ def compute_bleu(reference_corpus, if use_bp: ratio = translation_length / reference_length bp = math.exp(1 - 1. / ratio) if ratio < 1.0 else 1.0 - bleu = geo_mean * bp return np.float32(bleu) @@ -128,8 +126,8 @@ def bleu_score(predictions, labels, **unused_kwargs): """ outputs = tf.to_int32(tf.argmax(predictions, axis=-1)) # Convert the outputs and labels to a [batch_size, input_length] tensor. - outputs = tf.squeeze(outputs) - labels = tf.squeeze(labels) + outputs = tf.squeeze(outputs, axis=[-1, -2]) + labels = tf.squeeze(labels, axis=[-1, -2]) bleu = tf.py_func(compute_bleu, (labels, outputs), tf.float32) return bleu, tf.constant(1.0) From 28eb48f9d1799fbe83ae54c88c02fa4301f97120 Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Wed, 26 Jul 2017 12:42:01 -0700 Subject: [PATCH 0174/4095] Limit number of concurrent processes in GeneExpressionProblem PiperOrigin-RevId: 163241281 --- tensor2tensor/data_generators/genetics.py | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/tensor2tensor/data_generators/genetics.py b/tensor2tensor/data_generators/genetics.py index 309580d53..88b82cb49 100644 --- a/tensor2tensor/data_generators/genetics.py +++ b/tensor2tensor/data_generators/genetics.py @@ -36,6 +36,7 @@ from __future__ import print_function import itertools +import math import multiprocessing as mp import os @@ -54,6 +55,7 @@ import tensorflow as tf +MAX_CONCURRENT_PROCESSES = 10 _bases = list("ACTG") @@ -122,12 +124,19 @@ def generate_data(self, data_dir, tmp_dir, num_shards=None, task_id=-1): start_idx, end_idx)) processes.append(p) - # Start and wait for processes + # Start and wait for processes in batches assert len(processes) == num_shards + 2 # 1 per training shard + dev + test - for p in processes: - p.start() - for p in processes: - p.join() + + num_batches = int( + math.ceil(float(len(processes)) / MAX_CONCURRENT_PROCESSES)) + for i in xrange(num_batches): + start = i * MAX_CONCURRENT_PROCESSES + end = start + MAX_CONCURRENT_PROCESSES + current = processes[start:end] + for p in current: + p.start() + for p in current: + p.join() # Shuffle generator_utils.shuffle_dataset(all_filepaths) From cff9f4367095e62b415637d6fb839db7bdc8a28d Mon Sep 17 00:00:00 2001 From: T2T Team <no-reply@google.com> Date: Wed, 26 Jul 2017 13:52:47 -0700 Subject: [PATCH 0175/4095] Allow building a subword vocab from a word vocab file and add tests. PiperOrigin-RevId: 163250427 --- .../data_generators/test_data/corpus-1.txt | 4 + .../data_generators/test_data/corpus-2.txt | 3 + .../data_generators/test_data/vocab-1.txt | 2 + .../data_generators/test_data/vocab-2.txt | 3 + tensor2tensor/data_generators/text_encoder.py | 123 ++++++++-------- .../text_encoder_build_subword.py | 36 +++-- .../data_generators/text_encoder_test.py | 107 ++++++++++++-- tensor2tensor/data_generators/tokenizer.py | 124 +++++++++++------ .../data_generators/tokenizer_test.py | 131 +++++++++++++++--- 9 files changed, 387 insertions(+), 146 deletions(-) create mode 100644 tensor2tensor/data_generators/test_data/corpus-1.txt create mode 100644 tensor2tensor/data_generators/test_data/corpus-2.txt create mode 100644 tensor2tensor/data_generators/test_data/vocab-1.txt create mode 100644 tensor2tensor/data_generators/test_data/vocab-2.txt diff --git a/tensor2tensor/data_generators/test_data/corpus-1.txt b/tensor2tensor/data_generators/test_data/corpus-1.txt new file mode 100644 index 000000000..c05e47f90 --- /dev/null +++ b/tensor2tensor/data_generators/test_data/corpus-1.txt @@ -0,0 +1,4 @@ +One morning I shot an elephant in my pajamas. How he got in my pajamas, I don't +know. + +Groucho Marx diff --git a/tensor2tensor/data_generators/test_data/corpus-2.txt b/tensor2tensor/data_generators/test_data/corpus-2.txt new file mode 100644 index 000000000..f45577c4b --- /dev/null +++ b/tensor2tensor/data_generators/test_data/corpus-2.txt @@ -0,0 +1,3 @@ +I haven't slept for 10 days... because that would be too long. + +Mitch Hedberg diff --git a/tensor2tensor/data_generators/test_data/vocab-1.txt b/tensor2tensor/data_generators/test_data/vocab-1.txt new file mode 100644 index 000000000..d34d3d957 --- /dev/null +++ b/tensor2tensor/data_generators/test_data/vocab-1.txt @@ -0,0 +1,2 @@ +lollipop,8 +reverberated,12 diff --git a/tensor2tensor/data_generators/test_data/vocab-2.txt b/tensor2tensor/data_generators/test_data/vocab-2.txt new file mode 100644 index 000000000..7793af4f6 --- /dev/null +++ b/tensor2tensor/data_generators/test_data/vocab-2.txt @@ -0,0 +1,3 @@ +kattywampus,11 +balderdash,10 +jiggery-pokery,14 diff --git a/tensor2tensor/data_generators/text_encoder.py b/tensor2tensor/data_generators/text_encoder.py index 9fc9eed88..69d29779a 100644 --- a/tensor2tensor/data_generators/text_encoder.py +++ b/tensor2tensor/data_generators/text_encoder.py @@ -30,11 +30,11 @@ # Dependency imports import six -from six.moves import xrange # pylint: disable=redefined-builtin from tensor2tensor.data_generators import tokenizer import tensorflow as tf +xrange = six.moves.xrange # pylint: disable=redefined-builtin # Reserved tokens for things like padding and EOS symbols. PAD = "<pad>" @@ -295,7 +295,7 @@ def encode(self, raw_text): Returns: a list of integers in the range [0, vocab_size) """ - return self._tokens_to_subtokens(tokenizer.encode( + return self._tokens_to_subtoken_ids(tokenizer.encode( native_to_unicode(raw_text))) def decode(self, subtokens): @@ -307,14 +307,14 @@ def decode(self, subtokens): a native string """ return unicode_to_native(tokenizer.decode( - self._subtokens_to_tokens(subtokens))) + self._subtoken_ids_to_tokens(subtokens))) @property def vocab_size(self): """The subtoken vocabulary size.""" return len(self._all_subtoken_strings) - def _tokens_to_subtokens(self, tokens): + def _tokens_to_subtoken_ids(self, tokens): """Converts a list of tokens to a list of subtoken ids. Args: @@ -324,11 +324,11 @@ def _tokens_to_subtokens(self, tokens): """ ret = [] for token in tokens: - ret.extend(self._escaped_token_to_subtokens( + ret.extend(self._escaped_token_to_subtoken_ids( _escape_token(token, self._alphabet))) return ret - def _subtokens_to_tokens(self, subtokens): + def _subtoken_ids_to_tokens(self, subtokens): """Converts a list of subtoken ids to a list of tokens. Args: @@ -337,45 +337,58 @@ def _subtokens_to_tokens(self, subtokens): a list of strings. """ concatenated = "".join( - [self._subtoken_to_subtoken_string(s) for s in subtokens]) + [self._subtoken_id_to_subtoken_string(s) for s in subtokens]) split = concatenated.split("_") return [_unescape_token(t + "_") for t in split if t] - def _subtoken_to_subtoken_string(self, subtoken): - """Subtoken_String (string) corresponding to the given subtoken (id).""" + def _subtoken_id_to_subtoken_string(self, subtoken): + """Converts a subtoken integer ID to a subtoken string.""" if 0 <= subtoken < self.vocab_size: return self._all_subtoken_strings[subtoken] return u"" - def _escaped_token_to_subtokens(self, escaped_token): - """Converts an escaped token string to a list of subtokens. + def _escaped_token_to_subtoken_strings(self, escaped_token): + """Converts an escaped token string to a list of subtoken strings. Args: - escaped_token: an escaped token + escaped_token: An escaped token as a unicode string. Returns: - a list of one or more integers. + A list of subtokens as unicode strings. """ + # NOTE: This algorithm is greedy; it won't necessarily produce the "best" + # list of subtokens. ret = [] - pos = 0 - lesc = len(escaped_token) - while pos < lesc: - end = min(lesc, pos + self._max_subtoken_len) - while end > pos: - subtoken_id = self._subtoken_string_to_id.get(escaped_token[pos:end]) - if subtoken_id is not None: + start = 0 + token_len = len(escaped_token) + while start < token_len: + for end in xrange( + min(token_len, start + self._max_subtoken_len), start, -1): + subtoken = escaped_token[start:end] + if subtoken in self._subtoken_string_to_id: + ret.append(subtoken) + start = end break - end -= 1 - # If there is no possible encoding of the escaped token then one of the - # characters in the token is not in the alphabet. This should be - # impossible and would be indicative of a bug. - assert subtoken_id is not None - - ret.append(subtoken_id) - pos = end + else: # Did not break + # If there is no possible encoding of the escaped token then one of the + # characters in the token is not in the alphabet. This should be + # impossible and would be indicative of a bug. + assert False, "Token substring not found in subtoken vocabulary." return ret + def _escaped_token_to_subtoken_ids(self, escaped_token): + """Converts an escaped token string to a list of subtoken IDs. + + Args: + escaped_token: An escaped token as a unicode string. + Returns: + A list of subtoken IDs as integers. + """ + return [ + self._subtoken_string_to_id[subtoken] + for subtoken in self._escaped_token_to_subtoken_strings(escaped_token)] + @classmethod def build_to_target_size(cls, target_size, @@ -460,55 +473,51 @@ def build_from_token_counts(self, min_count = 1 for i in xrange(num_iterations): tf.logging.info("Iteration {0}".format(i)) - counts = collections.defaultdict(int) + + # Collect all substrings of the encoded token that break along current + # subtoken boundaries. + subtoken_counts = collections.defaultdict(int) for token, count in six.iteritems(token_counts): escaped_token = _escape_token(token, self._alphabet) - # we will count all tails of the escaped_token, starting from boundaries - # determined by our current segmentation. - if i == 0: - starts = six.moves.range(len(escaped_token)) - else: - subtokens = self._escaped_token_to_subtokens(escaped_token) - pos = 0 - starts = [] - for subtoken in subtokens: - starts.append(pos) - pos += len(self._all_subtoken_strings[subtoken]) - for start in starts: + subtokens = self._escaped_token_to_subtoken_strings(escaped_token) + start = 0 + for subtoken in subtokens: for end in xrange(start + 1, len(escaped_token) + 1): - subtoken_string = escaped_token[start:end] - counts[subtoken_string] += count - # Array of sets of candidate subtoken strings, by length + new_subtoken = escaped_token[start:end] + subtoken_counts[new_subtoken] += count + start += len(subtoken) + + # Array of sets of candidate subtoken strings, by length. len_to_subtoken_strings = [] - for subtoken_string, count in six.iteritems(counts): + for subtoken_string, count in six.iteritems(subtoken_counts): lsub = len(subtoken_string) - # Always include all the alphabet characters or some strings will - # be unencodeable. - if count >= min_count or subtoken_string in self._alphabet: - # Add this subtoken string to its length set + if count >= min_count: while len(len_to_subtoken_strings) <= lsub: len_to_subtoken_strings.append(set()) len_to_subtoken_strings[lsub].add(subtoken_string) - new_subtoken_strings = [] + # Consider the candidates longest to shortest, so that if we accept # a longer subtoken string, we can decrement the counts of its prefixes. + new_subtoken_strings = [] for lsub in xrange(len(len_to_subtoken_strings)-1, 0, -1): subtoken_strings = len_to_subtoken_strings[lsub] for subtoken_string in subtoken_strings: - count = counts[subtoken_string] - if count >= min_count or subtoken_string in self._alphabet: - # Exclude alphabet tokens here, as they must be included later + count = subtoken_counts[subtoken_string] + if count >= min_count: + # Exclude alphabet tokens here, as they must be included later, # explicitly, regardless of count. if subtoken_string not in self._alphabet: new_subtoken_strings.append((count, subtoken_string)) for l in xrange(1, lsub): - counts[subtoken_string[:l]] -= count + subtoken_counts[subtoken_string[:l]] -= count + + # Include the alphabet explicitly to guarantee all strings are encodable. + new_subtoken_strings.extend( + (subtoken_counts.get(a, 0), a) for a in self._alphabet) new_subtoken_strings.sort(reverse=True) - # Reinitialize to the candidate vocabulary, including the alphabet - # explicitly as the highest priority. + # Reinitialize to the candidate vocabulary. self._init_subtokens_from_list( - list(self._alphabet) + [subtoken for _, subtoken in new_subtoken_strings], reserved=num_reserved_ids) tf.logging.info("vocab_size = %d" % self.vocab_size) diff --git a/tensor2tensor/data_generators/text_encoder_build_subword.py b/tensor2tensor/data_generators/text_encoder_build_subword.py index a0d5d8937..88dfac116 100644 --- a/tensor2tensor/data_generators/text_encoder_build_subword.py +++ b/tensor2tensor/data_generators/text_encoder_build_subword.py @@ -39,10 +39,13 @@ import tensorflow as tf -tf.app.flags.DEFINE_string('output_fn', '/tmp/my.subword_text_encoder', +tf.app.flags.DEFINE_string('output_filename', '/tmp/my.subword_text_encoder', 'where to store the SubwordTextEncoder') tf.app.flags.DEFINE_string('corpus_filepattern', '', 'Corpus of one or more text files') +tf.app.flags.DEFINE_string('vocab_filepattern', '', + 'One or more vocabulary files ' + '(one word per line as "word,count")') tf.app.flags.DEFINE_integer('min_count', 5, 'Minimum subtoken count in corpus') tf.app.flags.DEFINE_integer('corpus_max_lines', 10000, 'How many lines of corpus to read') @@ -52,16 +55,27 @@ def main(unused_argv): - gs = text_encoder.SubwordTextEncoder() - if not FLAGS.corpus_filepattern: - raise ValueError('Must provide --corpus_filepattern') - token_counts = tokenizer.corpus_token_counts( - FLAGS.corpus_filepattern, FLAGS.corpus_max_lines, - split_on_newlines=FLAGS.split_on_newlines) - gs.build_from_token_counts(token_counts, - FLAGS.min_count, - FLAGS.num_iterations) - gs.store_to_file(FLAGS.output_fn) + if FLAGS.corpus_filepattern and FLAGS.vocab_filepattern: + raise ValueError( + 'Must only provide one of --corpus_filepattern or --vocab_filepattern') + + elif FLAGS.corpus_filepattern: + token_counts = tokenizer.corpus_token_counts( + FLAGS.corpus_filepattern, FLAGS.corpus_max_lines, + split_on_newlines=FLAGS.split_on_newlines) + + elif FLAGS.vocab_filepattern: + token_counts = tokenizer.vocab_token_counts( + FLAGS.vocab_filepattern, FLAGS.corpus_max_lines) + + else: + raise ValueError( + 'Must provide one of --corpus_filepattern or --vocab_filepattern') + + encoder = text_encoder.SubwordTextEncoder() + encoder.build_from_token_counts( + token_counts, FLAGS.min_count, FLAGS.num_iterations) + encoder.store_to_file(FLAGS.output_fn) if __name__ == '__main__': diff --git a/tensor2tensor/data_generators/text_encoder_test.py b/tensor2tensor/data_generators/text_encoder_test.py index 7ac2ba911..4142f8699 100644 --- a/tensor2tensor/data_generators/text_encoder_test.py +++ b/tensor2tensor/data_generators/text_encoder_test.py @@ -18,8 +18,12 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +from __future__ import unicode_literals + +import collections # Dependency imports +import mock from tensor2tensor.data_generators import text_encoder import tensorflow as tf @@ -29,40 +33,113 @@ class EscapeUnescapeTokenTest(tf.test.TestCase): def test_escape_token(self): escaped = text_encoder._escape_token( - u'Foo! Bar.\nunder_score back\\slash', + 'Foo! Bar.\nunder_score back\\slash', set('abcdefghijklmnopqrstuvwxyz .\n') | text_encoder._ESCAPE_CHARS) self.assertEqual( - u'\\70;oo\\33; \\66;ar.\\10;under\\uscore back\\\\slash_', escaped) + '\\70;oo\\33; \\66;ar.\\10;under\\uscore back\\\\slash_', escaped) def test_unescape_token(self): unescaped = text_encoder._unescape_token( - u'\\70;oo\\33; \\66;ar.\\10;under\\uscore back\\\\slash_') + '\\70;oo\\33; \\66;ar.\\10;under\\uscore back\\\\slash_') self.assertEqual( - u'Foo! Bar.\nunder_score back\\slash', unescaped) + 'Foo! Bar.\nunder_score back\\slash', unescaped) class SubwordTextEncoderTest(tf.test.TestCase): def test_encode_decode(self): - token_counts = { - u'this': 9, - u'sentence': 14, - u'the': 100, - u'encoded': 1, - u'was': 20, - u'by': 50, - } + corpus = ( + 'This is a corpus of text that provides a bunch of tokens from which ' + 'to build a vocabulary. It will be used when strings are encoded ' + 'with a TextEncoder subclass. The encoder was coded by a coder.') + token_counts = collections.Counter(corpus.split(' ')) + alphabet = set(corpus) ^ {' '} + + original = 'This is a coded sentence encoded by the SubwordTextEncoder.' + token_counts.update(original.split(' ')) + encoder = text_encoder.SubwordTextEncoder.build_to_target_size( - 50, token_counts, 2, 10) - encoder.build_from_token_counts(token_counts, min_count=2) + 100, token_counts, 2, 10) - original = 'This sentence was encoded by the SubwordTextEncoder.' + # Encoding should be reversible. encoded = encoder.encode(original) decoded = encoder.decode(encoded) self.assertEqual(original, decoded) + # The substrings coded and coder are frequent enough in the corpus that + # they should appear in the vocabulary even though they are substrings + # of other included strings. + subtoken_strings = {encoder._all_subtoken_strings[i] for i in encoded} + self.assertIn('encoded_', subtoken_strings) + self.assertIn('coded_', subtoken_strings) + self.assertIn('TextEncoder', encoder._all_subtoken_strings) + self.assertIn('coder', encoder._all_subtoken_strings) + + # Every character in the corpus should be in the encoder's alphabet and + # its subtoken vocabulary. + self.assertTrue(alphabet.issubset(encoder._alphabet)) + for a in alphabet: + self.assertIn(a, encoder._all_subtoken_strings) + + def test_unicode(self): + corpus = 'Cat emoticons. \U0001F638 \U0001F639 \U0001F63A \U0001F63B' + token_counts = collections.Counter(corpus.split(' ')) + + encoder = text_encoder.SubwordTextEncoder.build_to_target_size( + 100, token_counts, 2, 10) + + self.assertIn('\U0001F638', encoder._alphabet) + self.assertIn('\U0001F63B', encoder._all_subtoken_strings) + + def test_small_vocab(self): + corpus = 'The quick brown fox jumps over the lazy dog' + token_counts = collections.Counter(corpus.split(' ')) + alphabet = set(corpus) ^ {' '} + + encoder = text_encoder.SubwordTextEncoder.build_to_target_size( + 10, token_counts, 2, 10) + + # All vocabulary elements are in the alphabet and subtoken strings even + # if we requested a smaller vocabulary to assure all expected strings + # are encodable. + self.assertTrue(alphabet.issubset(encoder._alphabet)) + for a in alphabet: + self.assertIn(a, encoder._all_subtoken_strings) + + def test_encodable_when_not_in_alphabet(self): + corpus = 'the quick brown fox jumps over the lazy dog' + token_counts = collections.Counter(corpus.split(' ')) + + encoder = text_encoder.SubwordTextEncoder.build_to_target_size( + 100, token_counts, 2, 10) + original = 'This has UPPER CASE letters that are out of alphabet' + + # Early versions could have an infinite loop when breaking into subtokens + # if there was any out-of-alphabet characters in the encoded string. + encoded = encoder.encode(original) + decoded = encoder.decode(encoded) + + self.assertEqual(original, decoded) + encoded_str = ''.join(encoder._all_subtoken_strings[i] for i in encoded) + self.assertIn('\\84;', encoded_str) + + @mock.patch.object(text_encoder, '_ESCAPE_CHARS', new=set('\\_;13579')) + def test_raises_exception_when_not_encodable(self): + corpus = 'the quick brown fox jumps over the lazy dog' + token_counts = collections.Counter(corpus.split(' ')) + + # Deliberately exclude some required encoding chars from the alphabet + # and token list, making some strings unencodable. + encoder = text_encoder.SubwordTextEncoder.build_to_target_size( + 100, token_counts, 2, 10) + original = 'This has UPPER CASE letters that are out of alphabet' + + # Previously there was a bug which produced an infinite loop in this case. + with self.assertRaises(AssertionError): + encoder.encode(original) + if __name__ == '__main__': tf.test.main() diff --git a/tensor2tensor/data_generators/tokenizer.py b/tensor2tensor/data_generators/tokenizer.py index 0f4141199..1acffc04c 100644 --- a/tensor2tensor/data_generators/tokenizer.py +++ b/tensor2tensor/data_generators/tokenizer.py @@ -30,7 +30,7 @@ alphanumeric character and a non-alphanumeric character. This produces a list which alternates between "alphanumeric tokens" (strings of alphanumeric characters) and "non-alphanumeric tokens" - (strings of of non-alphanumeric characters). + (strings of non-alphanumeric characters). 2. Remove every token consisting of a single space, unless it is the very first or very last token in the list. These tokens are now @@ -44,28 +44,26 @@ from __future__ import division from __future__ import print_function -from collections import defaultdict +import collections import sys import unicodedata # Dependency imports -from six import PY2 -from six import unichr # pylint: disable=redefined-builtin -from six.moves import xrange # pylint: disable=redefined-builtin - +import six import tensorflow as tf +xrange = six.moves.xrange # pylint: disable=redefined-builtin # Conversion between Unicode and UTF-8, if required (on Python2) -_native_to_unicode = (lambda s: s.decode("utf-8")) if PY2 else (lambda s: s) +_native_to_unicode = (lambda s: s.decode("utf-8")) if six.PY2 else (lambda s: s) # This set contains all letter and number characters. _ALPHANUMERIC_CHAR_SET = set( - unichr(i) for i in xrange(sys.maxunicode) - if (unicodedata.category(unichr(i)).startswith("L") or - unicodedata.category(unichr(i)).startswith("N"))) + six.unichr(i) for i in xrange(sys.maxunicode) + if (unicodedata.category(six.unichr(i)).startswith("L") or + unicodedata.category(six.unichr(i)).startswith("N"))) def encode(text): @@ -110,42 +108,86 @@ def decode(tokens): return "".join(ret) -def corpus_token_counts(text_filepattern, corpus_max_lines, - split_on_newlines=True): +def _read_filepattern(filepattern, max_lines=None, split_on_newlines=True): + """Reads files matching a wildcard pattern, yielding the contents. + + Args: + filepattern: A wildcard pattern matching one or more files. + max_lines: If set, stop reading after reading this many lines. + split_on_newlines: A boolean. If true, then split files by lines and strip + leading and trailing whitespace from each line. Otherwise, treat each + file as a single string. + + Yields: + The contents of the files as lines, if split_on_newlines is True, or + the entire contents of each file if False. + """ + filenames = tf.gfile.Glob(filepattern) + lines_read = 0 + for filename in filenames: + with tf.gfile.Open(filename) as f: + if split_on_newlines: + for line in f: + yield line.strip() + lines_read += 1 + if max_lines and lines_read >= max_lines: + return + + else: + if max_lines: + doc = [] + for line in f: + doc.append(line) + lines_read += 1 + if max_lines and lines_read >= max_lines: + yield "".join(doc) + return + yield "".join(doc) + + else: + yield f.read() + + +def corpus_token_counts( + text_filepattern, corpus_max_lines, split_on_newlines=True): """Read the corpus and compute a dictionary of token counts. Args: - text_filepattern: a pattern matching one or more files - corpus_max_lines: an integer - maximum total lines to read. - split_on_newlines: a boolean. If true, then split files by lines and strip - leading and trailing whitespace from each line. + text_filepattern: A pattern matching one or more files. + corpus_max_lines: An integer; maximum total lines to read. + split_on_newlines: A boolean. If true, then split files by lines and strip + leading and trailing whitespace from each line. Otherwise, treat each + file as a single string. Returns: - a dictionary from token to count. + a dictionary mapping token to count. """ - def read_corpus(): - """Read the corpus.""" - docs = [] - lines_read = 0 - filenames = tf.gfile.Glob(text_filepattern) - for text_filename in filenames: - with tf.gfile.Open(text_filename) as f: - if not split_on_newlines: - docs.append("") - for line in f: - if split_on_newlines: - # The tokenizer updates token_counts in encode() - docs.append(line.strip()) - else: - docs[-1] += line - lines_read += 1 - if corpus_max_lines > 0 and lines_read > corpus_max_lines: - return docs - return docs - - counts = defaultdict(int) - for doc in read_corpus(): - for tok in encode(_native_to_unicode(doc)): - counts[tok] += 1 + counts = collections.Counter() + for doc in _read_filepattern( + text_filepattern, + max_lines=corpus_max_lines, + split_on_newlines=split_on_newlines): + counts.update(encode(_native_to_unicode(doc))) + return counts + +def vocab_token_counts(text_filepattern, max_lines): + """Read a vocab file and return a dictionary of token counts. + + Reads a two-column CSV file of tokens and their frequency in a dataset. The + tokens are presumed to be generated by encode() or the equivalent. + + Args: + text_filepattern: A pattern matching one or more files. + max_lines: An integer; maximum total lines to read. + + Returns: + a dictionary mapping token to count. + """ + ret = {} + for line in _read_filepattern(text_filepattern, max_lines=max_lines): + token, count = line.rsplit(",", 1) + ret[_native_to_unicode(token)] = int(count) + + return ret diff --git a/tensor2tensor/data_generators/tokenizer_test.py b/tensor2tensor/data_generators/tokenizer_test.py index 189f19663..792ef4dbb 100644 --- a/tensor2tensor/data_generators/tokenizer_test.py +++ b/tensor2tensor/data_generators/tokenizer_test.py @@ -20,45 +20,132 @@ from __future__ import division from __future__ import print_function +import os import random # Dependency imports -from six import unichr # pylint: disable=redefined-builtin -from six.moves import xrange # pylint: disable=redefined-builtin +import six from tensor2tensor.data_generators import tokenizer - import tensorflow as tf +xrange = six.moves.xrange # pylint: disable=redefined-builtin + +FLAGS = tf.app.flags.FLAGS + +_TESTDATA = "google3/third_party/py/tensor2tensor/data_generators/test_data" + class TokenizerTest(tf.test.TestCase): - def testEncode(self): - self.assertEqual( - tokenizer.encode(u"Dude - that's so cool."), - [u"Dude", u" - ", u"that", u"'", u"s", u"so", u"cool", u"."]) - self.assertEqual( - tokenizer.encode(u"Łukasz est né en 1981."), - [u"Łukasz", u"est", u"né", u"en", u"1981", u"."]) - self.assertEqual( - tokenizer.encode(u" Spaces at the ends "), - [u" ", u"Spaces", u"at", u"the", u"ends", u" "]) - self.assertEqual(tokenizer.encode(u"802.11b"), [u"802", u".", u"11b"]) - self.assertEqual(tokenizer.encode(u"two. \nlines"), - [u"two", u". \n", u"lines"]) + def test_encode(self): + self.assertListEqual( + [u"Dude", u" - ", u"that", u"'", u"s", u"so", u"cool", u"."], + tokenizer.encode(u"Dude - that's so cool.")) + self.assertListEqual( + [u"Łukasz", u"est", u"né", u"en", u"1981", u"."], + tokenizer.encode(u"Łukasz est né en 1981.")) + self.assertListEqual( + [u" ", u"Spaces", u"at", u"the", u"ends", u" "], + tokenizer.encode(u" Spaces at the ends ")) + self.assertListEqual( + [u"802", u".", u"11b"], + tokenizer.encode(u"802.11b")) + self.assertListEqual( + [u"two", u". \n", u"lines"], + tokenizer.encode(u"two. \nlines")) - def testDecode(self): + def test_decode(self): self.assertEqual( + u"Dude - that's so cool.", tokenizer.decode( - [u"Dude", u" - ", u"that", u"'", u"s", u"so", u"cool", u"."]), - u"Dude - that's so cool.") + [u"Dude", u" - ", u"that", u"'", u"s", u"so", u"cool", u"."])) - def testInvertibilityOnRandomStrings(self): - random.seed(123) + def test_invertibility_on_random_strings(self): for _ in xrange(1000): - s = u"".join([unichr(random.randint(0, 65535)) for _ in xrange(10)]) + s = u"".join( + six.unichr(random.randint(0, 65535)) for _ in xrange(10)) self.assertEqual(s, tokenizer.decode(tokenizer.encode(s))) +class TestTokenCounts(tf.test.TestCase): + + def setUp(self): + super(TestTokenCounts, self).setUp() + self.corpus_path = os.path.join( + FLAGS.test_srcdir, _TESTDATA, "corpus-*.txt") + self.vocab_path = os.path.join( + FLAGS.test_srcdir, _TESTDATA, "vocab-*.txt") + + def test_corpus_token_counts_split_on_newlines(self): + token_counts = tokenizer.corpus_token_counts( + self.corpus_path, corpus_max_lines=0, split_on_newlines=True) + + expected = { + u"'": 2, + u".": 2, + u". ": 1, + u"... ": 1, + u"Groucho": 1, + u"Marx": 1, + u"Mitch": 1, + u"Hedberg": 1, + u"I": 3, + u"in": 2, + u"my": 2, + u"pajamas": 2, + } + self.assertDictContainsSubset(expected, token_counts) + self.assertNotIn(u".\n\n", token_counts) + self.assertNotIn(u"\n", token_counts) + + def test_corpus_token_counts_no_split_on_newlines(self): + token_counts = tokenizer.corpus_token_counts( + self.corpus_path, corpus_max_lines=0, split_on_newlines=False) + + self.assertDictContainsSubset({u".\n\n": 2, u"\n": 3}, token_counts) + + def test_corpus_token_counts_split_with_max_lines(self): + token_counts = tokenizer.corpus_token_counts( + self.corpus_path, corpus_max_lines=5, split_on_newlines=True) + + self.assertIn(u"slept", token_counts) + self.assertNotIn(u"Mitch", token_counts) + + def test_corpus_token_counts_no_split_with_max_lines(self): + token_counts = tokenizer.corpus_token_counts( + self.corpus_path, corpus_max_lines=5, split_on_newlines=False) + + self.assertIn(u"slept", token_counts) + self.assertNotIn(u"Mitch", token_counts) + self.assertDictContainsSubset( + {u".\n\n": 1, u"\n": 2, u".\n": 1}, token_counts) + + def test_vocab_token_counts(self): + token_counts = tokenizer.vocab_token_counts( + self.vocab_path, 0) + + expected = { + "lollipop": 8, + "reverberated": 12, + "kattywampus": 11, + "balderdash": 10, + "jiggery-pokery": 14, + } + self.assertDictEqual(expected, token_counts) + + def test_vocab_token_counts_with_max_lines(self): + token_counts = tokenizer.vocab_token_counts( + self.vocab_path, 4) + + expected = { + "lollipop": 8, + "reverberated": 12, + "kattywampus": 11, + "balderdash": 10, + } + self.assertDictEqual(expected, token_counts) + + if __name__ == "__main__": tf.test.main() From c01617efd2a2f321633ffaeaebc8697d46ed0dc0 Mon Sep 17 00:00:00 2001 From: T2T Team <no-reply@google.com> Date: Wed, 26 Jul 2017 15:01:19 -0700 Subject: [PATCH 0176/4095] Use TensorFlow idiom for importing six.moves.xrange. PiperOrigin-RevId: 163261434 --- tensor2tensor/data_generators/text_encoder.py | 3 +-- tensor2tensor/data_generators/tokenizer.py | 3 +-- tensor2tensor/data_generators/tokenizer_test.py | 3 +-- 3 files changed, 3 insertions(+), 6 deletions(-) diff --git a/tensor2tensor/data_generators/text_encoder.py b/tensor2tensor/data_generators/text_encoder.py index 69d29779a..4bb1c875d 100644 --- a/tensor2tensor/data_generators/text_encoder.py +++ b/tensor2tensor/data_generators/text_encoder.py @@ -30,12 +30,11 @@ # Dependency imports import six +from six.moves import xrange # pylint: disable=redefined-builtin from tensor2tensor.data_generators import tokenizer import tensorflow as tf -xrange = six.moves.xrange # pylint: disable=redefined-builtin - # Reserved tokens for things like padding and EOS symbols. PAD = "<pad>" EOS = "<EOS>" diff --git a/tensor2tensor/data_generators/tokenizer.py b/tensor2tensor/data_generators/tokenizer.py index 1acffc04c..5cb9fd32b 100644 --- a/tensor2tensor/data_generators/tokenizer.py +++ b/tensor2tensor/data_generators/tokenizer.py @@ -51,10 +51,9 @@ # Dependency imports import six +from six.moves import xrange # pylint: disable=redefined-builtin import tensorflow as tf -xrange = six.moves.xrange # pylint: disable=redefined-builtin - # Conversion between Unicode and UTF-8, if required (on Python2) _native_to_unicode = (lambda s: s.decode("utf-8")) if six.PY2 else (lambda s: s) diff --git a/tensor2tensor/data_generators/tokenizer_test.py b/tensor2tensor/data_generators/tokenizer_test.py index 792ef4dbb..ad4a3ff04 100644 --- a/tensor2tensor/data_generators/tokenizer_test.py +++ b/tensor2tensor/data_generators/tokenizer_test.py @@ -26,11 +26,10 @@ # Dependency imports import six +from six.moves import xrange # pylint: disable=redefined-builtin from tensor2tensor.data_generators import tokenizer import tensorflow as tf -xrange = six.moves.xrange # pylint: disable=redefined-builtin - FLAGS = tf.app.flags.FLAGS _TESTDATA = "google3/third_party/py/tensor2tensor/data_generators/test_data" From 5242ac6e59cf553820d31485509fc527339ada92 Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Wed, 26 Jul 2017 17:33:54 -0700 Subject: [PATCH 0177/4095] Rm num_shards from Problem. Problems specify sharding themselves. PiperOrigin-RevId: 163281576 --- README.md | 1 - tensor2tensor/bin/t2t-datagen | 11 +++++++---- tensor2tensor/data_generators/algorithmic.py | 11 ++++------- tensor2tensor/data_generators/genetics.py | 14 ++++++++------ tensor2tensor/data_generators/image.py | 2 +- tensor2tensor/data_generators/problem.py | 2 +- tensor2tensor/data_generators/wmt.py | 6 ++---- 7 files changed, 23 insertions(+), 24 deletions(-) diff --git a/README.md b/README.md index c0e34e0fe..edd6460d0 100644 --- a/README.md +++ b/README.md @@ -86,7 +86,6 @@ mkdir -p $DATA_DIR $TMP_DIR $TRAIN_DIR t2t-datagen \ --data_dir=$DATA_DIR \ --tmp_dir=$TMP_DIR \ - --num_shards=100 \ --problem=$PROBLEM # Train diff --git a/tensor2tensor/bin/t2t-datagen b/tensor2tensor/bin/t2t-datagen index 629014713..e4acb6731 100644 --- a/tensor2tensor/bin/t2t-datagen +++ b/tensor2tensor/bin/t2t-datagen @@ -63,7 +63,8 @@ flags.DEFINE_string("problem", "", "The name of the problem to generate data for.") flags.DEFINE_string("exclude_problems", "", "Comma-separates list of problems to exclude.") -flags.DEFINE_integer("num_shards", 10, "How many shards to use.") +flags.DEFINE_integer("num_shards", 0, "How many shards to use. Ignored for " + "registered Problems.") flags.DEFINE_integer("max_cases", 0, "Maximum number of cases to generate (unbounded if 0).") flags.DEFINE_integer("random_seed", 429459, "Random seed to use.") @@ -252,7 +253,7 @@ def generate_data_for_problem(problem): if isinstance(dev_gen, int): # The dev set and test sets are generated as extra shards using the # training generator. The integer specifies the number of training - # shards. FLAGS.num_shards is ignored. + # shards. FLAGS.num_shards is ignored. num_training_shards = dev_gen tf.logging.info("Generating data for %s.", problem) all_output_files = generator_utils.combined_data_filenames( @@ -263,10 +264,11 @@ def generate_data_for_problem(problem): else: # usual case - train data and dev data are generated using separate # generators. + num_shards = FLAGS.num_shards or 10 tf.logging.info("Generating training data for %s.", problem) train_output_files = generator_utils.train_data_filenames( problem + generator_utils.UNSHUFFLED_SUFFIX, FLAGS.data_dir, - FLAGS.num_shards) + num_shards) generator_utils.generate_files(training_gen(), train_output_files, FLAGS.max_cases) tf.logging.info("Generating development data for %s.", problem) @@ -282,11 +284,12 @@ def generate_data_for_problem(problem): def generate_data_for_registered_problem(problem_name): tf.logging.info("Generating training data for %s.", problem_name) + if FLAGS.num_shards: + raise ValueError("--num_shards should not be set for registered Problem.") problem = registry.problem(problem_name) task_id = None if FLAGS.task_id < 0 else FLAGS.task_id problem.generate_data(os.path.expanduser(FLAGS.data_dir), os.path.expanduser(FLAGS.tmp_dir), - num_shards=FLAGS.num_shards, task_id=task_id) diff --git a/tensor2tensor/data_generators/algorithmic.py b/tensor2tensor/data_generators/algorithmic.py index 017bc8470..c115a1ebe 100644 --- a/tensor2tensor/data_generators/algorithmic.py +++ b/tensor2tensor/data_generators/algorithmic.py @@ -66,10 +66,7 @@ def dev_size(self): def num_shards(self): return 10 - def generate_data(self, data_dir, _, num_shards=None, task_id=-1): - if num_shards is None: - num_shards = self.num_shards - + def generate_data(self, data_dir, _, task_id=-1): def generator_eos(generator): """Shift by NUM_RESERVED_IDS and append EOS token.""" for case in generator: @@ -87,7 +84,7 @@ def generator_eos(generator): utils.generate_dataset_and_shuffle( train_generator_eos(), - self.training_filepaths(data_dir, num_shards, shuffled=True), + self.training_filepaths(data_dir, self.num_shards, shuffled=True), dev_generator_eos(), self.dev_filepaths(data_dir, 1, shuffled=True), shuffle=False) @@ -254,7 +251,7 @@ def zipf_distribution(nbr_symbols, alpha): def zipf_random_sample(distr_map, sample_len): - """Helper function: Generate a random Zipf sample of given lenght. + """Helper function: Generate a random Zipf sample of given length. Args: distr_map: list of float, Zipf's distribution over nbr_symbols. @@ -287,7 +284,7 @@ def reverse_generator_nlplike(nbr_symbols, max_length: integer, maximum length of sequences to generate. nbr_cases: the number of cases to generate. scale_std_dev: float, Normal distribution's standard deviation scale factor - used to draw the lenght of sequence. Default = 1% of the max_length. + used to draw the length of sequence. Default = 1% of the max_length. alpha: float, Zipf's Law Distribution parameter. Default = 1.5. Usually for modelling natural text distribution is in the range [1.1-1.6]. diff --git a/tensor2tensor/data_generators/genetics.py b/tensor2tensor/data_generators/genetics.py index 88b82cb49..4e8a6d987 100644 --- a/tensor2tensor/data_generators/genetics.py +++ b/tensor2tensor/data_generators/genetics.py @@ -87,10 +87,11 @@ def feature_encoders(self, data_dir): "targets": text_encoder.TextEncoder() } - def generate_data(self, data_dir, tmp_dir, num_shards=None, task_id=-1): - if num_shards is None: - num_shards = 100 + @property + def num_shards(self): + return 100 + def generate_data(self, data_dir, tmp_dir, task_id=-1): try: # Download source data if download_url specified h5_filepath = generator_utils.maybe_download(tmp_dir, self.h5_file, @@ -109,7 +110,7 @@ def generate_data(self, data_dir, tmp_dir, num_shards=None, task_id=-1): # Collect created shard processes to start and join processes = [] - datasets = [(self.training_filepaths, num_shards, "train", + datasets = [(self.training_filepaths, self.num_shards, "train", num_train_examples), (self.dev_filepaths, 1, "valid", num_dev_examples), (self.test_filepaths, 1, "test", num_test_examples)] @@ -124,9 +125,10 @@ def generate_data(self, data_dir, tmp_dir, num_shards=None, task_id=-1): start_idx, end_idx)) processes.append(p) - # Start and wait for processes in batches - assert len(processes) == num_shards + 2 # 1 per training shard + dev + test + # 1 per training shard + dev + test + assert len(processes) == self.num_shards + 2 + # Start and wait for processes in batches num_batches = int( math.ceil(float(len(processes)) / MAX_CONCURRENT_PROCESSES)) for i in xrange(num_batches): diff --git a/tensor2tensor/data_generators/image.py b/tensor2tensor/data_generators/image.py index acb1128ed..fdad8d432 100644 --- a/tensor2tensor/data_generators/image.py +++ b/tensor2tensor/data_generators/image.py @@ -338,7 +338,7 @@ def example_reading_spec(self, label_key=None): class ImageFSNS(ImageProblem): """Problem spec for French Street Name recognition.""" - def generate_data(self, data_dir, tmp_dir, num_shards=None, task_id=-1): + def generate_data(self, data_dir, tmp_dir, task_id=-1): list_url = ("https://raw.githubusercontent.com/tensorflow/models/master/" "street/python/fsns_urls.txt") fsns_urls = generator_utils.maybe_download( diff --git a/tensor2tensor/data_generators/problem.py b/tensor2tensor/data_generators/problem.py index 69d81e58e..67e3c6f90 100644 --- a/tensor2tensor/data_generators/problem.py +++ b/tensor2tensor/data_generators/problem.py @@ -135,7 +135,7 @@ class Problem(object): # BEGIN SUBCLASS INTERFACE # ============================================================================ - def generate_data(self, data_dir, tmp_dir, num_shards=None, task_id=-1): + def generate_data(self, data_dir, tmp_dir, task_id=-1): raise NotImplementedError() def hparams(self, defaults, model_hparams): diff --git a/tensor2tensor/data_generators/wmt.py b/tensor2tensor/data_generators/wmt.py index 9587d4d2a..97b191096 100644 --- a/tensor2tensor/data_generators/wmt.py +++ b/tensor2tensor/data_generators/wmt.py @@ -83,12 +83,10 @@ def vocab_name(self): def vocab_file(self): return "%s.%d" % (self.vocab_name, self.targeted_vocab_size) - def generate_data(self, data_dir, tmp_dir, num_shards=None, task_id=-1): - if num_shards is None: - num_shards = self.num_shards + def generate_data(self, data_dir, tmp_dir, task_id=-1): generator_utils.generate_dataset_and_shuffle( self.train_generator(data_dir, tmp_dir, True), - self.training_filepaths(data_dir, num_shards, shuffled=False), + self.training_filepaths(data_dir, self.num_shards, shuffled=False), self.dev_generator(data_dir, tmp_dir), self.dev_filepaths(data_dir, 1, shuffled=False)) From 93b325f420d85d934d6280b316a248eca982c192 Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Wed, 26 Jul 2017 18:18:04 -0700 Subject: [PATCH 0178/4095] Baseline model for GeneExpression problem PiperOrigin-RevId: 163286026 --- tensor2tensor/data_generators/all_problems.py | 2 +- .../{genetics.py => gene_expression.py} | 39 +++--- ...netics_test.py => gene_expression_test.py} | 8 +- tensor2tensor/models/common_layers.py | 31 ++-- tensor2tensor/models/gene_expression.py | 132 ++++++++++++++++++ tensor2tensor/models/gene_expression_test.py | 79 +++++++++++ tensor2tensor/models/modalities.py | 15 +- tensor2tensor/models/models.py | 1 + tensor2tensor/utils/metrics.py | 2 +- tensor2tensor/utils/trainer_utils.py | 12 +- 10 files changed, 279 insertions(+), 42 deletions(-) rename tensor2tensor/data_generators/{genetics.py => gene_expression.py} (90%) rename tensor2tensor/data_generators/{genetics_test.py => gene_expression_test.py} (89%) create mode 100644 tensor2tensor/models/gene_expression.py create mode 100644 tensor2tensor/models/gene_expression_test.py diff --git a/tensor2tensor/data_generators/all_problems.py b/tensor2tensor/data_generators/all_problems.py index d8007f5e3..6830cf0bf 100644 --- a/tensor2tensor/data_generators/all_problems.py +++ b/tensor2tensor/data_generators/all_problems.py @@ -34,7 +34,7 @@ # pylint: disable=g-import-not-at-top try: # Requires h5py - from tensor2tensor.data_generators import genetics + from tensor2tensor.data_generators import gene_expression except ImportError: pass # pylint: enable=g-import-not-at-top diff --git a/tensor2tensor/data_generators/genetics.py b/tensor2tensor/data_generators/gene_expression.py similarity index 90% rename from tensor2tensor/data_generators/genetics.py rename to tensor2tensor/data_generators/gene_expression.py index 4e8a6d987..31d1cd150 100644 --- a/tensor2tensor/data_generators/genetics.py +++ b/tensor2tensor/data_generators/gene_expression.py @@ -13,7 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -"""Genetics problems. +"""Gene expression problems. Inputs are bases ACTG (with indices assigned in that order). @@ -82,7 +82,7 @@ def chunk_size(self): def feature_encoders(self, data_dir): del data_dir return { - "inputs": GeneticBaseEncoder(chunk_size=self.chunk_size), + "inputs": DNAEncoder(chunk_size=self.chunk_size), # TODO(rsepassi): RealEncoder? "targets": text_encoder.TextEncoder() } @@ -166,8 +166,15 @@ def example_reading_spec(self): def preprocess_examples(self, examples, mode): del mode + # Reshape targets examples["targets"] = tf.reshape(examples["targets"], [-1, 1, self.num_output_predictions]) + examples["targets_mask"] = tf.reshape(examples["targets_mask"], [-1, 1, 1]) + + # Set masked targets to 0 (i.e. pad) so that loss and metrics ignore them. + # Add epsilon because some unmasked labels are actually 0. + examples["targets"] += 1e-6 + examples["targets"] *= examples["targets_mask"] return examples @@ -175,8 +182,8 @@ def eval_metrics(self): return [metrics.Metrics.RMSE] -@registry.register_problem("genetics_cage10") -class GeneticsCAGE10(GeneExpressionProblem): +@registry.register_problem("gene_expression_cage10") +class GeneExpressionCAGE10(GeneExpressionProblem): @property def download_url(self): @@ -187,8 +194,8 @@ def h5_file(self): return "cage10.h5" -@registry.register_problem("genetics_gm12878") -class GeneticsGM12878(GeneExpressionProblem): +@registry.register_problem("gene_expression_gm12878") +class GeneExpressionGM12878(GeneExpressionProblem): @property def download_url(self): @@ -199,8 +206,8 @@ def h5_file(self): return "gm12878.h5" -@registry.register_problem("genetics_l262k") -class GeneticsL262k(GeneExpressionProblem): +@registry.register_problem("gene_expression_l262k") +class GeneExpressionL262k(GeneExpressionProblem): @property def h5_file(self): @@ -236,7 +243,7 @@ def dataset_generator(filepath, chunk_size=1, start_idx=None, end_idx=None): - encoder = GeneticBaseEncoder(chunk_size=chunk_size) + encoder = DNAEncoder(chunk_size=chunk_size) with h5py.File(filepath, "r") as h5_file: # Get input keys from h5_file src_keys = [s % dataset for s in ["%s_in", "%s_na", "%s_out"]] @@ -291,7 +298,7 @@ def to_example_dict(encoder, inputs, mask, outputs): return ex_dict -class GeneticBaseEncoder(text_encoder.TextEncoder): +class DNAEncoder(text_encoder.TextEncoder): """ACTG strings to ints and back. Optionally chunks bases into single ids. Uses 'X' as an unknown base. @@ -302,14 +309,14 @@ class GeneticBaseEncoder(text_encoder.TextEncoder): def __init__(self, chunk_size=1, num_reserved_ids=text_encoder.NUM_RESERVED_TOKENS): - super(GeneticBaseEncoder, self).__init__(num_reserved_ids=num_reserved_ids) + super(DNAEncoder, self).__init__(num_reserved_ids=num_reserved_ids) # Build a vocabulary of chunks of size chunk_size self._chunk_size = chunk_size chunks = [] for size in range(1, chunk_size + 1): - c = itertools.product(_bases + [GeneticBaseEncoder.UNK], repeat=size) + c = itertools.product(_bases + [DNAEncoder.UNK], repeat=size) num_pad = chunk_size - size - padding = (GeneticBaseEncoder.PAD,) * num_pad + padding = (DNAEncoder.PAD,) * num_pad c = [el + padding for el in c] chunks.extend(c) chunks.sort() @@ -323,7 +330,7 @@ def vocab_size(self): def encode(self, s): bases = list(s) - pad = [GeneticBaseEncoder.PAD] * (len(bases) % self._chunk_size) + pad = [DNAEncoder.PAD] * (len(bases) % self._chunk_size) bases.extend(pad) assert (len(bases) % self._chunk_size) == 0 num_chunks = len(bases) // self._chunk_size @@ -342,8 +349,8 @@ def decode(self, ids): for idx in ids: if idx >= self._num_reserved_ids: chunk = self._ids_to_chunk[idx] - if GeneticBaseEncoder.PAD in chunk: - chunk = chunk[:chunk.index(GeneticBaseEncoder.PAD)] + if DNAEncoder.PAD in chunk: + chunk = chunk[:chunk.index(DNAEncoder.PAD)] else: chunk = [text_encoder.RESERVED_TOKENS[idx]] bases.extend(chunk) diff --git a/tensor2tensor/data_generators/genetics_test.py b/tensor2tensor/data_generators/gene_expression_test.py similarity index 89% rename from tensor2tensor/data_generators/genetics_test.py rename to tensor2tensor/data_generators/gene_expression_test.py index 5eac1b249..2d7bbe832 100644 --- a/tensor2tensor/data_generators/genetics_test.py +++ b/tensor2tensor/data_generators/gene_expression_test.py @@ -22,7 +22,7 @@ import numpy as np -from tensor2tensor.data_generators import genetics +from tensor2tensor.data_generators import gene_expression import tensorflow as tf @@ -40,7 +40,7 @@ def _oneHotBases(self, bases): return np.array(one_hots) def testRecordToExample(self): - encoder = genetics.GeneticBaseEncoder(chunk_size=2) + encoder = gene_expression.DNAEncoder(chunk_size=2) raw_inputs = ["A", "C", "G", "X", "C", "T"] # Put in numpy arrays in the same format as in the h5 file @@ -48,7 +48,7 @@ def testRecordToExample(self): mask = np.array([True, False, True]) outputs = np.array([[1.0, 2.0, 3.0], [5.0, 1.0, 0.2], [5.1, 2.3, 2.3]]) # Convert to example dict - ex_dict = genetics.to_example_dict(encoder, inputs, mask, outputs) + ex_dict = gene_expression.to_example_dict(encoder, inputs, mask, outputs) self.assertEqual(len(raw_inputs) // 2 + 1, len(ex_dict["inputs"])) self.assertAllEqual(encoder.encode(raw_inputs) + [1], ex_dict["inputs"]) @@ -61,7 +61,7 @@ def testGenerateShardArgs(self): num_examples = 37 num_shards = 4 outfiles = [str(i) for i in range(num_shards)] - shard_args = genetics.generate_shard_args(outfiles, num_examples) + shard_args = gene_expression.generate_shard_args(outfiles, num_examples) starts, ends, fnames = zip(*shard_args) self.assertAllEqual([0, 9, 18, 27], starts) diff --git a/tensor2tensor/models/common_layers.py b/tensor2tensor/models/common_layers.py index 37e791bc3..e98531d88 100644 --- a/tensor2tensor/models/common_layers.py +++ b/tensor2tensor/models/common_layers.py @@ -469,7 +469,10 @@ def get_norm(norm_type): "'noam', 'none'.") -def residual_fn(x, y, norm_type, residual_dropout, +def residual_fn(x, + y, + norm_type, + residual_dropout, filters=None, epsilon=1e-16, name="residual"): @@ -559,11 +562,17 @@ def conv_block_internal(conv_fn, def conv_block(inputs, filters, dilation_rates_and_kernel_sizes, **kwargs): - """A block of standard convolutions.""" + """A block of standard 2d convolutions.""" return conv_block_internal(conv, inputs, filters, dilation_rates_and_kernel_sizes, **kwargs) +def conv1d_block(inputs, filters, dilation_rates_and_kernel_sizes, **kwargs): + """A block of standard 1d convolutions.""" + return conv_block_internal(conv1d, inputs, filters, + dilation_rates_and_kernel_sizes, **kwargs) + + def separable_conv_block(inputs, filters, dilation_rates_and_kernel_sizes, **kwargs): """A block of separable convolutions.""" @@ -858,10 +867,7 @@ def multiscale_conv_sum(inputs, output_size, dilation_rates_and_kernel_sizes, return tf.add_n(results) * (len(results)**-0.5) -def multiscale_conv_and_attention(x, - padding, - hparams, - source=None): +def multiscale_conv_and_attention(x, padding, hparams, source=None): """A common part of t2t layers. First, do a linear multiscale convolution @@ -925,10 +931,7 @@ def conv_with_pools(inputs, output_size, kernel_size, pool_sizes, pooling_type, return tf.add_n(results) * (len(results)**-0.5) -def conv_with_pools_and_attention(x, - padding, - hparams, - source=None): +def conv_with_pools_and_attention(x, padding, hparams, source=None): """A common part of t2t layers. First, do conv_with_pools @@ -1389,8 +1392,8 @@ def padded_cross_entropy(logits, vocab_size = tf.shape(logits)[-1] with tf.name_scope("padded_cross_entropy", [logits, labels]): pad_logits, pad_labels = pad_with_zeros(logits, labels) - xent = smoothing_cross_entropy(pad_logits, pad_labels, - vocab_size, confidence) + xent = smoothing_cross_entropy(pad_logits, pad_labels, vocab_size, + confidence) weights = weights_fn(pad_labels) if not reduce_sum: return xent * weights, weights @@ -1493,8 +1496,8 @@ def linear_set_layer(layer_size, # Unfortunately tf doesn't support broadcasting via concat, but we can # simply add the transformed context to get the same effect. context = tf.expand_dims(context, axis=1) - cont_tfm = conv1d(context, layer_size, 1, - activation=None, name="cont_conv") + cont_tfm = conv1d( + context, layer_size, 1, activation=None, name="cont_conv") outputs += cont_tfm if activation_fn is not None: diff --git a/tensor2tensor/models/gene_expression.py b/tensor2tensor/models/gene_expression.py new file mode 100644 index 000000000..bdb93509b --- /dev/null +++ b/tensor2tensor/models/gene_expression.py @@ -0,0 +1,132 @@ +# coding=utf-8 +# Copyright 2017 The Tensor2Tensor Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Models for gene expression from DNA.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +# Dependency imports + +from six.moves import xrange # pylint: disable=redefined-builtin + +from tensor2tensor.models import common_hparams +from tensor2tensor.models import common_layers +from tensor2tensor.utils import registry +from tensor2tensor.utils import t2t_model + +import tensorflow as tf + + +@registry.register_model +class GeneExpressionConv(t2t_model.T2TModel): + """Gene expression conv net. + + Based on "Basenji" model from + http://www.biorxiv.org/content/early/2017/07/10/161851 + + Uses layer_norm instead of batch_norm. + """ + + def model_fn_body(self, features): + inputs = features["inputs"] + inputs.get_shape().assert_has_rank(4) + + hp = self._hparams + + out = inputs + out = common_layers.flatten4d3d(out) + + # Conv layers + for i in xrange(hp.num_conv_layers): + out = conv_layer( + out, + hp.hidden_size, + hp.kernel_width, + hp.stride, + hp.pooling_windows[i], + hp.dropout, + 1, + name="conv_%d" % (i + 1)) + + # Dense dilated conv layers + for i in xrange(hp.num_dconv_layers): + dilation_rate = 2**(i + 1) + dconv_out = conv_layer( + out, + hp.hidden_size, + hp.kernel_width, + 1, + 0, + hp.dropout, + dilation_rate, + name="dconv_%d" % (i + 1)) + out = tf.concat([out, dconv_out], axis=2) + + # Fully connected layer + out = fc_layer(out, hp.hidden_size, hp.dropout, name="fc") + + out.get_shape().assert_has_rank(3) + out = tf.expand_dims(out, 2) + return out + + +def conv_layer(x, + hidden_size, + kernel_size, + stride, + pooling_window, + dropout_rate, + dilation_rate, + name="conv"): + with tf.variable_scope(name): + out = x + out = common_layers.conv1d_block( + out, + hidden_size, [(dilation_rate, kernel_size)], + strides=stride, + first_relu=False, + padding="same") + out = tf.nn.relu(out) + if pooling_window: + out = tf.layers.max_pooling1d( + out, pooling_window, pooling_window, padding="same") + out = tf.layers.dropout(out, dropout_rate) + return out + + +def fc_layer(x, num_out, dropout_rate, name="fc"): + with tf.variable_scope(name): + out = x + out = tf.layers.dense(out, num_out) + out = tf.contrib.layers.layer_norm(out) + out = tf.nn.relu(out) + out = tf.layers.dropout(out, dropout_rate) + return out + + +@registry.register_hparams +def gene_expression_conv_base(): + """Hparams for GeneExpressionConv model.""" + hparams = common_hparams.basic_params1() + hparams.add_hparam("num_conv_layers", 4) + hparams.add_hparam("num_dconv_layers", 7) + hparams.add_hparam("pooling_windows", [2, 4, 4, 4]) + + # TODO(rsepassi): Correct the values of these hyperparameters + hparams.hidden_size = 128 + hparams.kernel_width = 128 + hparams.add_hparam("stride", 1) + return hparams diff --git a/tensor2tensor/models/gene_expression_test.py b/tensor2tensor/models/gene_expression_test.py new file mode 100644 index 000000000..bec5268fd --- /dev/null +++ b/tensor2tensor/models/gene_expression_test.py @@ -0,0 +1,79 @@ +# coding=utf-8 +# Copyright 2017 The Tensor2Tensor Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for Gene Expression models.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +# Dependency imports + +import numpy as np + +from tensor2tensor.data_generators import gene_expression as gene_data +from tensor2tensor.models import gene_expression +from tensor2tensor.models import modalities # pylint: disable=unused-import + +import tensorflow as tf + + +def gene_expression_conv_test(): + hparams = gene_expression.gene_expression_conv_base() + hparams.hidden_size = 8 + hparams.num_dconv_layers = 2 + return hparams + + +class GeneExpressionModelsTest(tf.test.TestCase): + + def _testModel(self, hparams, model_cls): + batch_size = 3 + target_length = 6 + target_out = 10 # GeneExpressionProblem.num_output_predictions + input_length = target_length * 128 + input_vocab_size = 5 + + inputs = np.random.random_integers( + input_vocab_size, size=(batch_size, input_length, 1, 1)) + targets = np.random.random_sample((batch_size, target_length, 1, + target_out)) + + features = { + "inputs": tf.constant(inputs, dtype=tf.int32), + "targets": tf.constant(targets, dtype=tf.float32), + } + p_hparams, = hparams.problems + sharded_logits, _, _ = model_cls(hparams, tf.contrib.learn.ModeKeys.TRAIN, + p_hparams).model_fn(features) + logits = tf.concat(sharded_logits, 0) + + with self.test_session() as sess: + sess.run(tf.global_variables_initializer()) + res = sess.run(logits) + + self.assertEqual(res.shape, (batch_size, target_length, 1, target_out)) + + def testGeneExpressionModels(self): + models_hparams = [(gene_expression.GeneExpressionConv, + gene_expression_conv_test())] + for model_cls, hparams in models_hparams: + hparams.add_hparam("data_dir", None) + p_hparams = gene_data.GeneExpressionCAGE10().internal_hparams(hparams) + hparams.problems = [p_hparams] + self._testModel(hparams, model_cls) + + +if __name__ == "__main__": + tf.test.main() diff --git a/tensor2tensor/models/modalities.py b/tensor2tensor/models/modalities.py index 50a3da55d..20464c0a2 100644 --- a/tensor2tensor/models/modalities.py +++ b/tensor2tensor/models/modalities.py @@ -166,7 +166,7 @@ def targets_bottom(self, inputs): def top(self, body_output, _): with tf.variable_scope("rgb_softmax"): - # seperate embedding for each channel + # separate embedding for each channel # assuming the body output returns a tensor of shape # [batch_size, rows, cols, channels, self._body_input_depth] body_output_split = tf.split(body_output, self._channels, axis=3) @@ -488,10 +488,15 @@ def top_sharded(self, sharded_targets) def l2_loss(predictions, targets): - return tf.reduce_mean(tf.pow(predictions - targets, 2)) - - loss = data_parallelism(l2_loss, sharded_predictions, sharded_targets) - return sharded_predictions, tf.add_n(loss) + with tf.name_scope("l2"): + weights = weights_fn(targets) + l2 = tf.pow(predictions - targets, 2) + return tf.reduce_sum(l2 * weights), tf.reduce_sum(weights) + + loss_num, loss_den = data_parallelism(l2_loss, sharded_predictions, + sharded_targets) + loss = tf.add_n(loss_num) / tf.maximum(1.0, tf.add_n(loss_den)) + return sharded_predictions, loss @registry.register_image_modality("identity_no_pad") diff --git a/tensor2tensor/models/models.py b/tensor2tensor/models/models.py index e92ddd3ed..907a801cf 100644 --- a/tensor2tensor/models/models.py +++ b/tensor2tensor/models/models.py @@ -27,6 +27,7 @@ from tensor2tensor.models import attention_lm_moe from tensor2tensor.models import bluenet from tensor2tensor.models import bytenet +from tensor2tensor.models import gene_expression from tensor2tensor.models import long_answer from tensor2tensor.models import lstm from tensor2tensor.models import modalities diff --git a/tensor2tensor/utils/metrics.py b/tensor2tensor/utils/metrics.py index 29f44b574..ae9ce3882 100644 --- a/tensor2tensor/utils/metrics.py +++ b/tensor2tensor/utils/metrics.py @@ -39,7 +39,7 @@ class Metrics(object): RMSE = "rmse" -def padded_rmse(predictions, labels, weights_fn=common_layers.weights_all): +def padded_rmse(predictions, labels, weights_fn=common_layers.weights_nonzero): predictions, labels = common_layers.pad_with_zeros(predictions, labels) targets = labels weights = weights_fn(targets) diff --git a/tensor2tensor/utils/trainer_utils.py b/tensor2tensor/utils/trainer_utils.py index bf42c36cc..1dbb84d4f 100644 --- a/tensor2tensor/utils/trainer_utils.py +++ b/tensor2tensor/utils/trainer_utils.py @@ -45,6 +45,7 @@ import tensorflow as tf from tensorflow.contrib.learn.python.learn import learn_runner +from tensorflow.python import debug from tensorflow.python.ops import init_ops # Number of samples to draw for an image input (in such cases as captioning) @@ -55,6 +56,8 @@ flags.DEFINE_bool("registry_help", False, "If True, logs the contents of the registry and exits.") +flags.DEFINE_bool("tfdbg", False, + "If True, use the TF debugger CLI on train/eval.") flags.DEFINE_string("output_dir", "", "Base output directory for run.") flags.DEFINE_string("model", "", "Which model to use.") flags.DEFINE_string("hparams_set", "", "Which parameters to use.") @@ -168,6 +171,12 @@ def create_experiment(output_dir, data_dir, model_name, train_steps, FLAGS.objective not in eval_metrics): raise ValueError("Tuning objective %s not among evaluation metrics %s" % (FLAGS.objective, eval_metrics.keys())) + train_monitors = [] + eval_hooks = [] + if FLAGS.tfdbg: + hook = debug.LocalCLIDebugHook() + train_monitors.append(hook) + eval_hooks.append(hook) return tf.contrib.learn.Experiment( estimator=estimator, train_input_fn=input_fns["train"], @@ -176,7 +185,8 @@ def create_experiment(output_dir, data_dir, model_name, train_steps, train_steps=train_steps, eval_steps=eval_steps, min_eval_frequency=FLAGS.local_eval_frequency, - train_monitors=[]) + train_monitors=train_monitors, + eval_hooks=eval_hooks) def create_experiment_components(hparams, output_dir, data_dir, model_name): From 175a125927961a366a023fa4925c15e39561e003 Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Wed, 26 Jul 2017 19:19:52 -0700 Subject: [PATCH 0179/4095] v1.1.2 PiperOrigin-RevId: 163290663 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 9da5293b9..66d51d7e1 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ setup( name='tensor2tensor', - version='1.1.1', + version='1.1.2', description='Tensor2Tensor', author='Google Inc.', author_email='no-reply@google.com', From 36766d84aa3da941be1f74efb10fbc4b409500d4 Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Thu, 27 Jul 2017 11:49:11 -0700 Subject: [PATCH 0180/4095] internal-external fixes and enable tests PiperOrigin-RevId: 163370562 --- setup.py | 3 + .../data_generators/concatenate_examples.py | 21 +++--- tensor2tensor/data_generators/inspect.py | 24 +++---- .../text_encoder_build_subword.py | 36 +++++----- tensor2tensor/data_generators/tokenizer.py | 2 +- .../data_generators/tokenizer_test.py | 65 +++++++++---------- tensor2tensor/utils/trainer_utils.py | 4 +- 7 files changed, 74 insertions(+), 81 deletions(-) diff --git a/setup.py b/setup.py index 66d51d7e1..6be9aba04 100644 --- a/setup.py +++ b/setup.py @@ -12,6 +12,7 @@ url='http://github.com/tensorflow/tensor2tensor', license='Apache 2.0', packages=find_packages(), + package_data={'tensor2tensor.data_generators': ['test_data/*']}, scripts=[ 'tensor2tensor/bin/t2t-trainer', 'tensor2tensor/bin/t2t-datagen', @@ -26,6 +27,8 @@ 'tensorflow': ['tensorflow>=1.2.0rc1'], 'tensorflow_gpu': ['tensorflow-gpu>=1.2.0rc1'], }, + tests_require=['nose'], + test_suite='nose.collector', classifiers=[ 'Development Status :: 4 - Beta', 'Intended Audience :: Developers', diff --git a/tensor2tensor/data_generators/concatenate_examples.py b/tensor2tensor/data_generators/concatenate_examples.py index 60ac7ea8f..9d7678fc4 100644 --- a/tensor2tensor/data_generators/concatenate_examples.py +++ b/tensor2tensor/data_generators/concatenate_examples.py @@ -34,7 +34,7 @@ + subtokenizer.encode("target French Je t'aime.") + [1]) } -We add a dummy feature "inputs"=[0] for compatability with seq-to-seq models. +We add a dummy feature "inputs"=[0] for compatibility with seq-to-seq models. If FLAGS.combine_to_length is nonzero, then we combine multiple examples into examples of a constant length, possibly with some padding at the end. @@ -53,34 +53,33 @@ from tensor2tensor.data_generators import text_encoder import tensorflow as tf -tf.app.flags.DEFINE_string("vocab_file", "", - "SubwordTextEncoder vocabulary file") +tf.flags.DEFINE_string("vocab_file", "", "SubwordTextEncoder vocabulary file") -tf.app.flags.DEFINE_boolean( +tf.flags.DEFINE_boolean( "random_reverse", False, "If true, write half of the example with source/target reversed") -tf.app.flags.DEFINE_boolean( +tf.flags.DEFINE_boolean( "count_everything", False, "If true, assign positive weights to designators, source and target. " "If false, assign positive weights only to target.") -tf.app.flags.DEFINE_string("source_domain_string", "English", "") -tf.app.flags.DEFINE_string("target_domain_string", "French", "") +tf.flags.DEFINE_string("source_domain_string", "English", "") +tf.flags.DEFINE_string("target_domain_string", "French", "") -tf.app.flags.DEFINE_integer( +tf.flags.DEFINE_integer( "combine_to_length", 0, "If positive, concatenate examples to form examples with target length " " equal to this value. Targets are padded with subtoken id=0.") -tf.app.flags.DEFINE_string("in_file", "", "input filename") +tf.flags.DEFINE_string("in_file", "", "input filename") -tf.app.flags.DEFINE_string( +tf.flags.DEFINE_string( "out_prefix", "/usr/local/google/tmp/concat", "The output filename is equal to out_prefix plus " "the last 15 characters of in_file. (e.g. -00001-of-00100)") -FLAGS = tf.app.flags.FLAGS +FLAGS = tf.flags.FLAGS def _make_example(ids, weights, raw_num_bytes): diff --git a/tensor2tensor/data_generators/inspect.py b/tensor2tensor/data_generators/inspect.py index 6ba054d3c..848b74a2d 100644 --- a/tensor2tensor/data_generators/inspect.py +++ b/tensor2tensor/data_generators/inspect.py @@ -32,19 +32,16 @@ import tensorflow as tf -tf.app.flags.DEFINE_string("subword_text_encoder_filename", "", - "SubwordTextEncoder vocabulary file") -tf.app.flags.DEFINE_string("token_text_encoder_filename", "", - "TokenTextEncoder vocabulary file") -tf.app.flags.DEFINE_bool("byte_text_encoder", False, - "use a ByteTextEncoder") -tf.app.flags.DEFINE_string("input_filename", "", "input filename") -tf.app.flags.DEFINE_bool("print_inputs", False, - "Print decoded inputs to stdout") -tf.app.flags.DEFINE_bool("print_targets", False, - "Print decoded targets to stdout") +tf.flags.DEFINE_string("subword_text_encoder_filename", "", + "SubwordTextEncoder vocabulary file") +tf.flags.DEFINE_string("token_text_encoder_filename", "", + "TokenTextEncoder vocabulary file") +tf.flags.DEFINE_bool("byte_text_encoder", False, "use a ByteTextEncoder") +tf.flags.DEFINE_string("input_filename", "", "input filename") +tf.flags.DEFINE_bool("print_inputs", False, "Print decoded inputs to stdout") +tf.flags.DEFINE_bool("print_targets", False, "Print decoded targets to stdout") -FLAGS = tf.app.flags.FLAGS +FLAGS = tf.flags.FLAGS def main(_): @@ -53,8 +50,7 @@ def main(_): encoder = text_encoder.SubwordTextEncoder( FLAGS.subword_text_encoder_filename) elif FLAGS.token_text_encoder_filename: - encoder = text_encoder.TokenTextEncoder( - FLAGS.token_text_encoder_filename) + encoder = text_encoder.TokenTextEncoder(FLAGS.token_text_encoder_filename) elif FLAGS.byte_text_encoder: encoder = text_encoder.ByteTextEncoder() else: diff --git a/tensor2tensor/data_generators/text_encoder_build_subword.py b/tensor2tensor/data_generators/text_encoder_build_subword.py index 88dfac116..47e82a176 100644 --- a/tensor2tensor/data_generators/text_encoder_build_subword.py +++ b/tensor2tensor/data_generators/text_encoder_build_subword.py @@ -39,19 +39,18 @@ import tensorflow as tf -tf.app.flags.DEFINE_string('output_filename', '/tmp/my.subword_text_encoder', - 'where to store the SubwordTextEncoder') -tf.app.flags.DEFINE_string('corpus_filepattern', '', - 'Corpus of one or more text files') -tf.app.flags.DEFINE_string('vocab_filepattern', '', - 'One or more vocabulary files ' - '(one word per line as "word,count")') -tf.app.flags.DEFINE_integer('min_count', 5, 'Minimum subtoken count in corpus') -tf.app.flags.DEFINE_integer('corpus_max_lines', 10000, - 'How many lines of corpus to read') -tf.app.flags.DEFINE_integer('num_iterations', 4, 'Number of iterations') -tf.app.flags.DEFINE_bool('split_on_newlines', True, 'Break corpus into lines.') -FLAGS = tf.app.flags.FLAGS +tf.flags.DEFINE_string('output_filename', '/tmp/my.subword_text_encoder', + 'where to store the SubwordTextEncoder') +tf.flags.DEFINE_string('corpus_filepattern', '', + 'Corpus of one or more text files') +tf.flags.DEFINE_string('vocab_filepattern', '', 'One or more vocabulary files ' + '(one word per line as "word,count")') +tf.flags.DEFINE_integer('min_count', 5, 'Minimum subtoken count in corpus') +tf.flags.DEFINE_integer('corpus_max_lines', 10000, + 'How many lines of corpus to read') +tf.flags.DEFINE_integer('num_iterations', 4, 'Number of iterations') +tf.flags.DEFINE_bool('split_on_newlines', True, 'Break corpus into lines.') +FLAGS = tf.flags.FLAGS def main(unused_argv): @@ -61,20 +60,21 @@ def main(unused_argv): elif FLAGS.corpus_filepattern: token_counts = tokenizer.corpus_token_counts( - FLAGS.corpus_filepattern, FLAGS.corpus_max_lines, + FLAGS.corpus_filepattern, + FLAGS.corpus_max_lines, split_on_newlines=FLAGS.split_on_newlines) elif FLAGS.vocab_filepattern: - token_counts = tokenizer.vocab_token_counts( - FLAGS.vocab_filepattern, FLAGS.corpus_max_lines) + token_counts = tokenizer.vocab_token_counts(FLAGS.vocab_filepattern, + FLAGS.corpus_max_lines) else: raise ValueError( 'Must provide one of --corpus_filepattern or --vocab_filepattern') encoder = text_encoder.SubwordTextEncoder() - encoder.build_from_token_counts( - token_counts, FLAGS.min_count, FLAGS.num_iterations) + encoder.build_from_token_counts(token_counts, FLAGS.min_count, + FLAGS.num_iterations) encoder.store_to_file(FLAGS.output_fn) diff --git a/tensor2tensor/data_generators/tokenizer.py b/tensor2tensor/data_generators/tokenizer.py index 5cb9fd32b..0e8daa75f 100644 --- a/tensor2tensor/data_generators/tokenizer.py +++ b/tensor2tensor/data_generators/tokenizer.py @@ -121,7 +121,7 @@ def _read_filepattern(filepattern, max_lines=None, split_on_newlines=True): The contents of the files as lines, if split_on_newlines is True, or the entire contents of each file if False. """ - filenames = tf.gfile.Glob(filepattern) + filenames = sorted(tf.gfile.Glob(filepattern)) lines_read = 0 for filename in filenames: with tf.gfile.Open(filename) as f: diff --git a/tensor2tensor/data_generators/tokenizer_test.py b/tensor2tensor/data_generators/tokenizer_test.py index ad4a3ff04..0c299bd0b 100644 --- a/tensor2tensor/data_generators/tokenizer_test.py +++ b/tensor2tensor/data_generators/tokenizer_test.py @@ -30,9 +30,10 @@ from tensor2tensor.data_generators import tokenizer import tensorflow as tf -FLAGS = tf.app.flags.FLAGS +FLAGS = tf.flags.FLAGS -_TESTDATA = "google3/third_party/py/tensor2tensor/data_generators/test_data" +pkg_dir, _ = os.path.split(__file__) +_TESTDATA = os.path.join(pkg_dir, "test_data") class TokenizerTest(tf.test.TestCase): @@ -41,18 +42,13 @@ def test_encode(self): self.assertListEqual( [u"Dude", u" - ", u"that", u"'", u"s", u"so", u"cool", u"."], tokenizer.encode(u"Dude - that's so cool.")) - self.assertListEqual( - [u"Łukasz", u"est", u"né", u"en", u"1981", u"."], - tokenizer.encode(u"Łukasz est né en 1981.")) - self.assertListEqual( - [u" ", u"Spaces", u"at", u"the", u"ends", u" "], - tokenizer.encode(u" Spaces at the ends ")) - self.assertListEqual( - [u"802", u".", u"11b"], - tokenizer.encode(u"802.11b")) - self.assertListEqual( - [u"two", u". \n", u"lines"], - tokenizer.encode(u"two. \nlines")) + self.assertListEqual([u"Łukasz", u"est", u"né", u"en", u"1981", u"."], + tokenizer.encode(u"Łukasz est né en 1981.")) + self.assertListEqual([u" ", u"Spaces", u"at", u"the", u"ends", u" "], + tokenizer.encode(u" Spaces at the ends ")) + self.assertListEqual([u"802", u".", u"11b"], tokenizer.encode(u"802.11b")) + self.assertListEqual([u"two", u". \n", u"lines"], + tokenizer.encode(u"two. \nlines")) def test_decode(self): self.assertEqual( @@ -62,8 +58,7 @@ def test_decode(self): def test_invertibility_on_random_strings(self): for _ in xrange(1000): - s = u"".join( - six.unichr(random.randint(0, 65535)) for _ in xrange(10)) + s = u"".join(six.unichr(random.randint(0, 65535)) for _ in xrange(10)) self.assertEqual(s, tokenizer.decode(tokenizer.encode(s))) @@ -71,10 +66,8 @@ class TestTokenCounts(tf.test.TestCase): def setUp(self): super(TestTokenCounts, self).setUp() - self.corpus_path = os.path.join( - FLAGS.test_srcdir, _TESTDATA, "corpus-*.txt") - self.vocab_path = os.path.join( - FLAGS.test_srcdir, _TESTDATA, "vocab-*.txt") + self.corpus_path = os.path.join(_TESTDATA, "corpus-*.txt") + self.vocab_path = os.path.join(_TESTDATA, "vocab-*.txt") def test_corpus_token_counts_split_on_newlines(self): token_counts = tokenizer.corpus_token_counts( @@ -117,31 +110,33 @@ def test_corpus_token_counts_no_split_with_max_lines(self): self.assertIn(u"slept", token_counts) self.assertNotIn(u"Mitch", token_counts) - self.assertDictContainsSubset( - {u".\n\n": 1, u"\n": 2, u".\n": 1}, token_counts) + self.assertDictContainsSubset({ + u".\n\n": 1, + u"\n": 2, + u".\n": 1 + }, token_counts) def test_vocab_token_counts(self): - token_counts = tokenizer.vocab_token_counts( - self.vocab_path, 0) + token_counts = tokenizer.vocab_token_counts(self.vocab_path, 0) expected = { - "lollipop": 8, - "reverberated": 12, - "kattywampus": 11, - "balderdash": 10, - "jiggery-pokery": 14, + u"lollipop": 8, + u"reverberated": 12, + u"kattywampus": 11, + u"balderdash": 10, + u"jiggery-pokery": 14, } self.assertDictEqual(expected, token_counts) def test_vocab_token_counts_with_max_lines(self): - token_counts = tokenizer.vocab_token_counts( - self.vocab_path, 4) + # vocab-1 has 2 lines, vocab-2 has 3 + token_counts = tokenizer.vocab_token_counts(self.vocab_path, 4) expected = { - "lollipop": 8, - "reverberated": 12, - "kattywampus": 11, - "balderdash": 10, + u"lollipop": 8, + u"reverberated": 12, + u"kattywampus": 11, + u"balderdash": 10, } self.assertDictEqual(expected, token_counts) diff --git a/tensor2tensor/utils/trainer_utils.py b/tensor2tensor/utils/trainer_utils.py index 1dbb84d4f..bf105c5ae 100644 --- a/tensor2tensor/utils/trainer_utils.py +++ b/tensor2tensor/utils/trainer_utils.py @@ -167,7 +167,7 @@ def create_experiment(output_dir, data_dir, model_name, train_steps, model_name=model_name) eval_metrics = metrics.create_evaluation_metrics( zip(FLAGS.problems.split("-"), hparams.problem_instances)) - if ("autotune" in FLAGS and FLAGS.autotune and + if (hasattr(FLAGS, "autotune") and FLAGS.autotune and FLAGS.objective not in eval_metrics): raise ValueError("Tuning objective %s not among evaluation metrics %s" % (FLAGS.objective, eval_metrics.keys())) @@ -572,7 +572,7 @@ def nth_model(n): # Define the train_op for the TRAIN mode. opt = _ConditionalOptimizer(hparams.optimizer, learning_rate, hparams) tf.logging.info("Computing gradients for global model_fn.") - opt_summaries = ["learning_rate", "loss", "global_gradient_norm"] + opt_summaries = ["learning_rate", "loss"] if hparams.summarize_grads: opt_summaries.extend(["gradients", "gradient_norm"]) train_op = tf.contrib.layers.optimize_loss( From 2e55ec24d728be1323ba3b20b08facb4abf8004e Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Thu, 27 Jul 2017 12:30:18 -0700 Subject: [PATCH 0181/4095] Modality.loss PiperOrigin-RevId: 163376624 --- tensor2tensor/models/modalities.py | 68 ++++++----------------- tensor2tensor/models/modalities_test.py | 10 ++-- tensor2tensor/utils/modality.py | 73 +++++++++++++------------ tensor2tensor/utils/t2t_model.py | 13 +++-- 4 files changed, 69 insertions(+), 95 deletions(-) diff --git a/tensor2tensor/models/modalities.py b/tensor2tensor/models/modalities.py index 20464c0a2..c57a97905 100644 --- a/tensor2tensor/models/modalities.py +++ b/tensor2tensor/models/modalities.py @@ -96,12 +96,11 @@ def targets_bottom(self, x): else: return self.bottom_simple(x, "target_emb", reuse=None) - def top(self, body_output, targets): + def top(self, body_output, _): """Generate logits. Args: body_output: A Tensor with shape [batch, p0, p1, body_input_depth] - targets: A Tensor with shape [batch, p0, p1, 1] Returns: logits: A Tensor with shape [batch, p0, p1, ?, vocab_size]. """ @@ -192,18 +191,11 @@ def top(self, body_output, _): return logits - def top_sharded(self, - sharded_body_output, - sharded_targets, - data_parallelism, - weights_fn=common_layers.weights_all): + def loss(self, top_out, targets, weights_fn=common_layers.weights_all): # Call the default implementation, but weight 1.0 on 0s by default. # (Since we're processing images and so have no padding and some pixel 0s.) - return super(SmallImageModality, self).top_sharded( - sharded_body_output, - sharded_targets, - data_parallelism, - weights_fn=weights_fn) + return super(SmallImageModality, self).loss( + top_out, targets, weights_fn=weights_fn) @registry.register_image_modality("default") @@ -425,18 +417,11 @@ def top(self, body_output, _): res = common_layers.conv(x, self._vocab_size, (1, 1)) return tf.expand_dims(res, 3) - def top_sharded(self, - sharded_body_output, - sharded_targets, - data_parallelism, - weights_fn=common_layers.weights_all): + def loss(self, top_out, targets, weights_fn=common_layers.weights_all): # Call the default implementation, but weight 1.0 on 0s by default. - # (Since we're processing images and so have no padding and some labels 0.) - return super(ClassLabelModality, self).top_sharded( - sharded_body_output, - sharded_targets, - data_parallelism, - weights_fn=weights_fn) + # (Since we're processing images and so have no padding and some pixel 0s.) + return super(ClassLabelModality, self).loss( + top_out, targets, weights_fn=weights_fn) @registry.register_class_label_modality("class_label_2d") @@ -479,24 +464,12 @@ def top(self, body_output, _): with tf.variable_scope("real"): return tf.layers.dense(body_output, self._vocab_size) - def top_sharded(self, - sharded_body_output, - sharded_targets, - data_parallelism, - weights_fn=common_layers.weights_nonzero): - sharded_predictions = data_parallelism(self.top, sharded_body_output, - sharded_targets) - - def l2_loss(predictions, targets): - with tf.name_scope("l2"): - weights = weights_fn(targets) - l2 = tf.pow(predictions - targets, 2) - return tf.reduce_sum(l2 * weights), tf.reduce_sum(weights) - - loss_num, loss_den = data_parallelism(l2_loss, sharded_predictions, - sharded_targets) - loss = tf.add_n(loss_num) / tf.maximum(1.0, tf.add_n(loss_den)) - return sharded_predictions, loss + def loss(self, top_out, targets, weights_fn=common_layers.weights_nonzero): + predictions = top_out + with tf.name_scope("l2"): + weights = weights_fn(targets) + l2 = tf.pow(predictions - targets, 2) + return tf.reduce_sum(l2 * weights), tf.reduce_sum(weights) @registry.register_image_modality("identity_no_pad") @@ -513,15 +486,8 @@ def bottom(self, x): def top(self, body_output, _): return body_output - def top_sharded(self, - sharded_body_output, - sharded_targets, - data_parallelism, - weights_fn=common_layers.weights_all): + def loss(self, top_out, targets, weights_fn=common_layers.weights_all): # Call the default implementation, but weight 1.0 on 0s by default. # (Since we're processing images and so have no padding and some pixel 0s.) - return super(IdentityModalityNoPad, self).top_sharded( - sharded_body_output, - sharded_targets, - data_parallelism, - weights_fn=weights_fn) + return super(IdentityModalityNoPad, self).loss( + top_out, targets, weights_fn=weights_fn) diff --git a/tensor2tensor/models/modalities_test.py b/tensor2tensor/models/modalities_test.py index 4254c6b04..9130613b9 100644 --- a/tensor2tensor/models/modalities_test.py +++ b/tensor2tensor/models/modalities_test.py @@ -41,8 +41,8 @@ def testSymbolModalityInputs(self): hidden_size=hidden_size, multiply_embedding_mode="sqrt_depth", shared_embedding_and_softmax_weights=0) - x = -1 + np.random.random_integers(vocab_size, size=( - batch_size, length, 1, 1)) + x = -1 + np.random.random_integers( + vocab_size, size=(batch_size, length, 1, 1)) m = modalities.SymbolModality(model_hparams, vocab_size) data_parallelism = expert_utils.Parallelism( ["/device:CPU:0"] * num_datashards, reuse=True) @@ -76,8 +76,10 @@ def testSymbolModalityTargets(self): with self.test_session() as session: sharded_body_output = tf.split(tf.to_float(body_output), num_datashards) sharded_targets = tf.split(targets, num_datashards) - sharded_logits, train_loss = m.top_sharded( - sharded_body_output, sharded_targets, data_parallelism) + sharded_logits = m.top_sharded(sharded_body_output, sharded_targets, + data_parallelism) + train_loss = m.loss_sharded(sharded_logits, sharded_targets, + data_parallelism) logits = tf.concat(sharded_logits, 0) session.run(tf.global_variables_initializer()) res1, res2 = session.run((logits, train_loss)) diff --git a/tensor2tensor/utils/modality.py b/tensor2tensor/utils/modality.py index 72169be1f..5c596e10f 100644 --- a/tensor2tensor/utils/modality.py +++ b/tensor2tensor/utils/modality.py @@ -31,23 +31,26 @@ class Modality(object): """Abstract Modality class for data transformations. An abstract class representing modalities for transforming data to a space - interpretable by sequence models. It has 3 functions: - * bottom: called on inputs entering the model. + interpretable by T2T models. It has 4 functions: + * bottom: called on inputs entering the model. * targets_bottom: called on targets entering the model (e.g., the decoder). - * top: called on targets to generate predictions. - - For example, think about a modality for images. The inputs_bottom function - represents the part of the model applied to an incoming image, e.g., an entry - flow of a convolutional network. The targets_top function represents the top - part of a model that is generating images, e.g., a PixelCNN network. The final - function targets_bottom represents the auto-regressive part of the network. - It is applied to the already-generated part of an image, which is given to - the decoder to generate the next part. In some cases, e.g., for text, it is - the same as the inputs_bottom function, and that is the default we use. But, - e.g., for images, a different function might be needed to regress properly. - - All 3 functions have simple and sharded versions. A sub-class only needs - to implement the simple version, the default sharding will be used then. + * top: called on model outputs to generate predictions (e.g., logits). + * loss: called on predictions (outputs of top) and targets. + + For example, think about a modality for images: + * `bottom` represents the part of the model applied to an incoming image, + e.g., an entry flow of a convolutional network. + * `top` represents the top part of a model that is generating images, e.g., a + PixelCNN network. + * `targets_bottom` represents the auto-regressive part of the network. It is + applied to the already-generated part of an image, which is given to the + decoder to generate the next part. In some cases, e.g., for text, it is the + same as the `bottom` function, and that is the default we use. But, e.g., + for images, a different function might be needed to regress properly. + * `loss` would compare the generated image to the target image and score it. + + All the functions have simple and sharded versions. A sub-class only needs to + implement the simple version, the default sharding will be used then. """ def __init__(self, model_hparams, vocab_size=None): @@ -116,7 +119,7 @@ def targets_bottom_sharded(self, xs, data_parallelism): return data_parallelism(self.targets_bottom, xs) def top(self, body_output, targets): - """Transform one shard of output. + """Generate predictions/logits for one shard of output. Most classes will override this function. @@ -129,12 +132,8 @@ def top(self, body_output, targets): """ raise NotImplementedError("Abstract Method") - def top_sharded(self, - sharded_body_output, - sharded_targets, - data_parallelism, - weights_fn=common_layers.weights_nonzero): - """Transform all shards of targets. + def top_sharded(self, sharded_body_output, sharded_targets, data_parallelism): + """Generate predictions/logits for all shards. Classes with cross-shard interaction will override this function. @@ -142,18 +141,24 @@ def top_sharded(self, sharded_body_output: A list of Tensors. sharded_targets: A list of Tensors. data_parallelism: a expert_utils.Parallelism object. - weights_fn: function from targets to target weights. Returns: - shaded_logits: A list of Tensors. - training_loss: a Scalar. + sharded_logits: A list of Tensors. """ - sharded_logits = data_parallelism(self.top, sharded_body_output, - sharded_targets) - loss_num, loss_den = data_parallelism( - common_layers.padded_cross_entropy, - sharded_logits, - sharded_targets, + return data_parallelism(self.top, sharded_body_output, sharded_targets) + + def loss(self, top_out, targets, weights_fn=common_layers.weights_nonzero): + """Compute loss numerator and denominator for one shard of output.""" + logits = top_out + return common_layers.padded_cross_entropy( + logits, + targets, self._model_hparams.label_smoothing, weights_fn=weights_fn) - loss = tf.add_n(loss_num) / tf.maximum(1.0, tf.add_n(loss_den)) - return sharded_logits, loss + + def loss_sharded(self, sharded_top_out, sharded_targets, data_parallelism): + """Compute loss for all shards.""" + sharded_loss_num, sharded_loss_den = data_parallelism( + self.loss, sharded_top_out, sharded_targets) + loss = tf.add_n(sharded_loss_num) / tf.maximum(1.0, + tf.add_n(sharded_loss_den)) + return loss diff --git a/tensor2tensor/utils/t2t_model.py b/tensor2tensor/utils/t2t_model.py index 66e40d495..f67cc9540 100644 --- a/tensor2tensor/utils/t2t_model.py +++ b/tensor2tensor/utils/t2t_model.py @@ -424,8 +424,10 @@ def model_fn(self, features, skip=False, last_position_only=False): with tf.variable_scope(target_modality.name, reuse=target_reuse): if not last_position_only: - sharded_logits, training_loss = (target_modality.top_sharded( - body_outputs, sharded_features["targets"], self._data_parallelism)) + sharded_logits = target_modality.top_sharded( + body_outputs, sharded_features["targets"], self._data_parallelism) + training_loss = target_modality.loss_sharded( + sharded_logits, sharded_features["targets"], self._data_parallelism) training_loss *= self._problem_hparams.loss_multiplier else: @@ -439,10 +441,9 @@ def model_fn(self, features, skip=False, last_position_only=False): tf.expand_dims(target_shard[:, -1:, :, :], axis=[1]) for target_shard in sharded_features["targets"] ] - sharded_logits, training_loss = (target_modality.top_sharded( - last_position_body_outputs, last_position_targets, - self._data_parallelism)) - + sharded_logits = target_modality.top_sharded(last_position_body_outputs, + last_position_targets, + self._data_parallelism) training_loss = None tf.logging.info("This model_fn took %.3f sec." % (time.time() - start_time)) From b82bdfd13a94a05a82dc6755126c1ad1bcc6c02c Mon Sep 17 00:00:00 2001 From: Lukasz Kaiser <lukaszkaiser@google.com> Date: Thu, 27 Jul 2017 16:05:10 -0700 Subject: [PATCH 0182/4095] correct metrics and some generator and python3 corrections. PiperOrigin-RevId: 163402917 --- tensor2tensor/data_generators/gene_expression.py | 12 ++++++------ tensor2tensor/data_generators/generator_utils.py | 2 +- tensor2tensor/data_generators/text_encoder.py | 4 ++-- tensor2tensor/data_generators/wmt.py | 7 ++++--- tensor2tensor/utils/metrics.py | 3 +++ 5 files changed, 16 insertions(+), 12 deletions(-) diff --git a/tensor2tensor/data_generators/gene_expression.py b/tensor2tensor/data_generators/gene_expression.py index 31d1cd150..1bb9d4ab3 100644 --- a/tensor2tensor/data_generators/gene_expression.py +++ b/tensor2tensor/data_generators/gene_expression.py @@ -110,10 +110,10 @@ def generate_data(self, data_dir, tmp_dir, task_id=-1): # Collect created shard processes to start and join processes = [] - datasets = [(self.training_filepaths, self.num_shards, "train", - num_train_examples), (self.dev_filepaths, 1, "valid", - num_dev_examples), - (self.test_filepaths, 1, "test", num_test_examples)] + datasets = [ + (self.training_filepaths, self.num_shards, "train", num_train_examples), + (self.dev_filepaths, 10, "valid", num_dev_examples), + (self.test_filepaths, 10, "test", num_test_examples)] for fname_fn, nshards, key_prefix, num_examples in datasets: outfiles = fname_fn(data_dir, nshards, shuffled=False) all_filepaths.extend(outfiles) @@ -125,8 +125,8 @@ def generate_data(self, data_dir, tmp_dir, task_id=-1): start_idx, end_idx)) processes.append(p) - # 1 per training shard + dev + test - assert len(processes) == self.num_shards + 2 + # 1 per training shard + 10 for dev + 10 for test + assert len(processes) == self.num_shards + 20 # Start and wait for processes in batches num_batches = int( diff --git a/tensor2tensor/data_generators/generator_utils.py b/tensor2tensor/data_generators/generator_utils.py index 866a0f3e7..5c7f9f2a1 100644 --- a/tensor2tensor/data_generators/generator_utils.py +++ b/tensor2tensor/data_generators/generator_utils.py @@ -305,7 +305,7 @@ def generate(): # Use Tokenizer to count the word occurrences. with tf.gfile.GFile(filepath, mode="r") as source_file: - file_byte_budget = 3.5e5 if "en" in filepath else 7e5 + file_byte_budget = 3.5e5 if filepath.endswith("en") else 7e5 for line in source_file: if file_byte_budget <= 0: break diff --git a/tensor2tensor/data_generators/text_encoder.py b/tensor2tensor/data_generators/text_encoder.py index 4bb1c875d..ff284bcc6 100644 --- a/tensor2tensor/data_generators/text_encoder.py +++ b/tensor2tensor/data_generators/text_encoder.py @@ -53,7 +53,7 @@ # '\u' is converted to '_' # '\\' is converted to '\' # '\213;' is converted to unichr(213) -_UNESCAPE_REGEX = re.compile(ur"\\u|\\\\|\\([0-9]+);") +_UNESCAPE_REGEX = re.compile(r"\\u|\\\\|\\([0-9]+);") _ESCAPE_CHARS = set(u"\\_;0123456789") @@ -219,7 +219,7 @@ def _escape_token(token, alphabet): token = token.replace(u"\\", u"\\\\").replace(u"_", u"\\u") ret = [ - c if c in alphabet and c != u"\n" else ur"\%d;" % ord(c) + c if c in alphabet and c != u"\n" else r"\%d;" % ord(c) for c in token] return u"".join(ret) + "_" diff --git a/tensor2tensor/data_generators/wmt.py b/tensor2tensor/data_generators/wmt.py index 97b191096..7fde9b3b4 100644 --- a/tensor2tensor/data_generators/wmt.py +++ b/tensor2tensor/data_generators/wmt.py @@ -404,14 +404,15 @@ def _compile_data(tmp_dir, datasets, filename): generator_utils.maybe_download(tmp_dir, compressed_filename, url) if not (os.path.exists(lang1_filepath) and os.path.exists(lang2_filepath)): - mode = "r:gz" if "gz" in compressed_filepath else "r" + # For .tar.gz and .tgz files, we read compressed. + mode = "r:gz" if compressed_filepath.endswith("gz") else "r" with tarfile.open(compressed_filepath, mode) as corpus_tar: corpus_tar.extractall(tmp_dir) - if ".gz" in lang1_filepath: + if lang1_filepath.endswith(".gz"): new_filepath = lang1_filepath.strip(".gz") generator_utils.gunzip_file(lang1_filepath, new_filepath) lang1_filepath = new_filepath - if ".gz" in lang2_filepath: + if lang2_filepath.endswith(".gz"): new_filepath = lang2_filepath.strip(".gz") generator_utils.gunzip_file(lang2_filepath, new_filepath) lang2_filepath = new_filepath diff --git a/tensor2tensor/utils/metrics.py b/tensor2tensor/utils/metrics.py index ae9ce3882..4435707cd 100644 --- a/tensor2tensor/utils/metrics.py +++ b/tensor2tensor/utils/metrics.py @@ -59,6 +59,7 @@ def padded_accuracy_topk(predictions, effective_k = tf.minimum(k, tf.shape(padded_predictions)[-1]) _, outputs = tf.nn.top_k(padded_predictions, k=effective_k) outputs = tf.to_int32(outputs) + padded_labels = tf.to_int32(padded_labels) padded_labels = tf.expand_dims(padded_labels, axis=-1) padded_labels += tf.zeros_like(outputs) # Pad to same shape. same = tf.to_float(tf.equal(outputs, padded_labels)) @@ -82,6 +83,7 @@ def padded_sequence_accuracy(predictions, predictions, labels) weights = weights_fn(padded_labels) outputs = tf.to_int32(tf.argmax(padded_predictions, axis=-1)) + padded_labels = tf.to_int32(padded_labels) not_correct = tf.to_float(tf.not_equal(outputs, padded_labels)) * weights axis = list(range(1, len(outputs.get_shape()))) correct_seq = 1.0 - tf.minimum(1.0, tf.reduce_sum(not_correct, axis=axis)) @@ -106,6 +108,7 @@ def padded_accuracy(predictions, predictions, labels) weights = weights_fn(padded_labels) outputs = tf.to_int32(tf.argmax(padded_predictions, axis=-1)) + padded_labels = tf.to_int32(padded_labels) return tf.to_float(tf.equal(outputs, padded_labels)), weights From 8ad79b60d29bef80c7724f5d5d5dfa0ff2ff8cab Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Thu, 27 Jul 2017 16:39:45 -0700 Subject: [PATCH 0183/4095] Move examples reading to new Datasets API PiperOrigin-RevId: 163407588 --- .../data_generators/gene_expression.py | 4 +- tensor2tensor/utils/data_reader.py | 90 +++++++++---------- tensor2tensor/utils/data_reader_test.py | 8 +- tensor2tensor/utils/trainer_utils.py | 35 ++++---- 4 files changed, 64 insertions(+), 73 deletions(-) diff --git a/tensor2tensor/data_generators/gene_expression.py b/tensor2tensor/data_generators/gene_expression.py index 1bb9d4ab3..60e38a90f 100644 --- a/tensor2tensor/data_generators/gene_expression.py +++ b/tensor2tensor/data_generators/gene_expression.py @@ -168,8 +168,8 @@ def preprocess_examples(self, examples, mode): # Reshape targets examples["targets"] = tf.reshape(examples["targets"], - [-1, 1, self.num_output_predictions]) - examples["targets_mask"] = tf.reshape(examples["targets_mask"], [-1, 1, 1]) + [-1, self.num_output_predictions]) + examples["targets_mask"] = tf.reshape(examples["targets_mask"], [-1, 1]) # Set masked targets to 0 (i.e. pad) so that loss and metrics ignore them. # Add epsilon because some unmasked labels are actually 0. diff --git a/tensor2tensor/utils/data_reader.py b/tensor2tensor/utils/data_reader.py index 24dd31485..e78e22344 100644 --- a/tensor2tensor/utils/data_reader.py +++ b/tensor2tensor/utils/data_reader.py @@ -20,6 +20,7 @@ import math import os +import random # Dependency imports @@ -33,19 +34,15 @@ import tensorflow as tf -def examples_queue(data_sources, - data_fields_to_features, - training, - capacity=32, - data_items_to_decoders=None, - data_items_to_decode=None): - """Contruct a queue of training or evaluation examples. +def examples_reader(data_sources, + data_fields_to_features, + training, + capacity=32, + data_items_to_decoders=None, + data_items_to_decode=None): + """Reads Examples from data_sources and decodes to Tensors. - This function will create a reader from files given by data_sources, - then enqueue the tf.Examples from these files, shuffling if training - is true, and finally parse these tf.Examples to tensors. - - The dictionary data_fields_to_features for an image dataset can be this: + The dictionary data_fields_to_features for an image dataset can be: data_fields_to_features = { 'image/encoded': tf.FixedLenFeature((), tf.string, default_value=''), @@ -54,7 +51,7 @@ def examples_queue(data_sources, [1], tf.int64, default_value=tf.zeros([1], dtype=tf.int64)), } - and for a simple algorithmic dataset with variable-length data it is this: + and for a simple algorithmic dataset with variable-length data it is: data_fields_to_features = { 'inputs': tf.VarLenFeature(tf.int64), @@ -63,7 +60,7 @@ def examples_queue(data_sources, The data_items_to_decoders dictionary argument can be left as None if there is no decoding to be performed. But, e.g. for images, it should be set so that - the images are decoded from the features, e.g., like this for MNIST: + the images are decoded from the features, e.g., for MNIST: data_items_to_decoders = { 'image': tfexample_decoder.Image( @@ -83,7 +80,7 @@ def examples_queue(data_sources, data_fields_to_features: a dictionary from data fields in the data sources to features, such as tf.VarLenFeature(tf.int64), see above for examples. training: a Boolean, whether to read for training or evaluation. - capacity: integer, queue capacity; set to 2 * max_batch_size or more. + capacity: integer, buffer capacity; set to 2 * max_batch_size or more. data_items_to_decoders: a dictionary mapping data items (that will be in the returned result) to decoders that will decode them using features defined in data_fields_to_features; see above for examples. By default @@ -93,43 +90,40 @@ def examples_queue(data_sources, Returns: A dictionary mapping each data_field to a corresponding 1D int64 tensor - read from the created queue. - - Raises: - ValueError: if no files are found with the provided data_prefix or no data - fields were provided. + read from the created Dataset. """ - with tf.name_scope("examples_queue"): - # Read serialized examples using slim parallel_reader. - num_epochs = None if training else 1 - data_files = tf.contrib.slim.parallel_reader.get_data_files(data_sources) - num_readers = min(4 if training else 1, len(data_files)) - _, example_serialized = tf.contrib.slim.parallel_reader.parallel_read( - data_sources, - tf.TFRecordReader, - num_epochs=num_epochs, - shuffle=training, - capacity=2 * capacity, - min_after_dequeue=capacity, - num_readers=num_readers) - - if data_items_to_decoders is None: - data_items_to_decoders = { + + def decode_record(record): + """Serialized Example to dict of <feature name, Tensor>.""" + example_serialized = record + item_decoders = data_items_to_decoders + if item_decoders is None: + item_decoders = { field: tf.contrib.slim.tfexample_decoder.Tensor(field) for field in data_fields_to_features } decoder = tf.contrib.slim.tfexample_decoder.TFExampleDecoder( - data_fields_to_features, data_items_to_decoders) + data_fields_to_features, item_decoders) - if data_items_to_decode is None: - data_items_to_decode = list(data_items_to_decoders) + decode_items = data_items_to_decode + if decode_items is None: + decode_items = list(item_decoders) - decoded = decoder.decode(example_serialized, items=data_items_to_decode) - return { - field: tensor - for (field, tensor) in zip(data_items_to_decode, decoded) - } + decoded = decoder.decode(example_serialized, items=decode_items) + return dict(zip(decode_items, decoded)) + + with tf.name_scope("examples_in"): + data_files = tf.contrib.slim.parallel_reader.get_data_files(data_sources) + random.shuffle(data_files) + dataset = tf.contrib.data.TFRecordDataset(data_files) + num_readers = min(4 if training else 1, len(data_files)) + dataset = dataset.map(decode_record, num_threads=num_readers) + if training: + dataset = dataset.shuffle(capacity) + dataset = dataset.repeat(None if training else 1) + it = dataset.make_one_shot_iterator() + return it.get_next() def preprocessing(examples, data_file_pattern, mode): @@ -193,7 +187,7 @@ def problem_input_pipeline(problem, data_file_pattern, capacity, mode): return feature_placeholders(data_fields) # Now the non-trivial case construction. - examples = examples_queue( + examples = examples_reader( [data_file_pattern], data_fields, training=(mode == tf.contrib.learn.ModeKeys.TRAIN), @@ -278,7 +272,7 @@ def input_pipeline(problem, data_file_pattern, capacity, mode): return feature_placeholders(data_fields) # Now the non-trivial case construction. - examples = examples_queue( + examples = examples_reader( [data_file_pattern], data_fields, training=(mode == tf.contrib.learn.ModeKeys.TRAIN), @@ -296,7 +290,7 @@ def batch_examples(examples, batching_scheme): """Given a queue of examples, create batches of examples with similar lengths. We assume that examples is a dictionary with string keys and tensor values, - possibly coming from a queue, e.g., constructed by examples_queue above. + possibly coming from a queue, e.g., constructed by examples_reader above. Each tensor in examples is assumed to be 1D. We will put tensors of similar length into batches togeter. We return a dictionary with the same keys as examples, and with values being batches of size batch_size. If elements have @@ -407,7 +401,7 @@ def constant_batching_scheme(constant_batch_size_in_sequences): } -def get_datasets(problems, data_dir, mode): +def get_data_filepatterns(problems, data_dir, mode): """Return the location of a dataset for a given mode.""" datasets = [] for problem in problems.split("-"): diff --git a/tensor2tensor/utils/data_reader_test.py b/tensor2tensor/utils/data_reader_test.py index f0c318e7b..ea98da06d 100644 --- a/tensor2tensor/utils/data_reader_test.py +++ b/tensor2tensor/utils/data_reader_test.py @@ -50,13 +50,13 @@ def test_generator(): generator_utils.generate_files(test_generator(), filenames) self.assertTrue(tf.gfile.Exists(tmp_file_path + "-train-00000-of-00001")) - examples_train = data_reader.examples_queue( + examples_train = data_reader.examples_reader( [tmp_file_path + "*"], { "inputs": tf.VarLenFeature(tf.int64), "targets": tf.VarLenFeature(tf.int64) }, training=True) - examples_eval = data_reader.examples_queue( + examples_eval = data_reader.examples_reader( [tmp_file_path + "*"], { "inputs": tf.VarLenFeature(tf.int64), "targets": tf.VarLenFeature(tf.int64), @@ -103,12 +103,12 @@ def test_generator(): generator_utils.generate_files(test_generator(), filenames) self.assertTrue(tf.gfile.Exists(tmp_file_path + "-train-00000-of-00001")) - examples_train = data_reader.examples_queue([tmp_file_path + "*"], { + examples_train = data_reader.examples_reader([tmp_file_path + "*"], { "inputs": tf.VarLenFeature(tf.int64), "targets": tf.VarLenFeature(tf.int64) }, True) batch_train = data_reader.batch_examples(examples_train, 4) - examples_eval = data_reader.examples_queue([tmp_file_path + "*"], { + examples_eval = data_reader.examples_reader([tmp_file_path + "*"], { "inputs": tf.VarLenFeature(tf.int64), "targets": tf.VarLenFeature(tf.int64) }, False) diff --git a/tensor2tensor/utils/trainer_utils.py b/tensor2tensor/utils/trainer_utils.py index bf105c5ae..260ec6a00 100644 --- a/tensor2tensor/utils/trainer_utils.py +++ b/tensor2tensor/utils/trainer_utils.py @@ -197,14 +197,14 @@ def create_experiment_components(hparams, output_dir, data_dir, model_name): train_input_fn = get_input_fn( mode=tf.contrib.learn.ModeKeys.TRAIN, hparams=hparams, - data_file_patterns=get_datasets_for_mode(data_dir, + data_file_patterns=get_data_filepatterns(data_dir, tf.contrib.learn.ModeKeys.TRAIN), num_datashards=num_datashards) eval_input_fn = get_input_fn( mode=tf.contrib.learn.ModeKeys.EVAL, hparams=hparams, - data_file_patterns=get_datasets_for_mode(data_dir, + data_file_patterns=get_data_filepatterns(data_dir, tf.contrib.learn.ModeKeys.EVAL), num_datashards=num_datashards) estimator = tf.contrib.learn.Estimator( @@ -626,7 +626,7 @@ def decode_from_dataset(estimator): inputs_vocab = hparams.problems[i].vocabulary.get("inputs", None) targets_vocab = hparams.problems[i].vocabulary["targets"] tf.logging.info("Performing local inference.") - infer_problems_data = get_datasets_for_mode(hparams.data_dir, + infer_problems_data = get_data_filepatterns(hparams.data_dir, tf.contrib.learn.ModeKeys.INFER) infer_input_fn = get_input_fn( @@ -801,8 +801,8 @@ def _decode_batch_input_fn(problem_id, num_decode_batches, sorted_inputs, } -def get_datasets_for_mode(data_dir, mode): - return data_reader.get_datasets(FLAGS.problems, data_dir, mode) +def get_data_filepatterns(data_dir, mode): + return data_reader.get_data_filepatterns(FLAGS.problems, data_dir, mode) def _cond_on_index(fn, index_tensor, cur_idx, max_idx): @@ -1075,42 +1075,39 @@ def input_fn(): ValueError: if one of the parameters has an unsupported value. """ problem_count, batches = len(data_file_patterns), [] - with tf.name_scope("input_queues"): + with tf.name_scope("input_reader"): for n in xrange(problem_count): if fixed_problem is not None and n != fixed_problem: continue problem_instance = hparams.problem_instances[n] + p_hparams = hparams.problems[n] with tf.name_scope("problem_%d" % n): - with tf.device("/cpu:0"): # Input queues are on CPU. - capacity = hparams.problems[n].max_expected_batch_size_per_shard + with tf.device("/cpu:0"): # Input reading on CPU + capacity = p_hparams.max_expected_batch_size_per_shard capacity *= num_datashards examples = data_reader.input_pipeline( problem_instance, data_file_patterns[n], capacity, mode) - if mode == tf.contrib.learn.ModeKeys.TRAIN: - drop_long_sequences = True - else: - drop_long_sequences = hparams.eval_drop_long_sequences - batch_size_multiplier = hparams.problems[n].batch_size_multiplier feature_map = data_reader.batch_examples( examples, data_reader.hparams_to_batching_scheme( hparams, shard_multiplier=num_datashards, - drop_long_sequences=drop_long_sequences, - length_multiplier=batch_size_multiplier)) + drop_long_sequences=(mode == tf.contrib.learn.ModeKeys.TRAIN + or hparams.eval_drop_long_sequences), + length_multiplier=(p_hparams.batch_size_multiplier))) # Reverse inputs and targets features if the problem was reversed. if problem_instance is not None: problem_instance.maybe_reverse_features(feature_map) problem_instance.maybe_copy_features(feature_map) else: - if hparams.problems[n].was_reversed: + if p_hparams.was_reversed: inputs = feature_map["inputs"] targets = feature_map["targets"] feature_map["inputs"] = targets feature_map["targets"] = inputs # Use the inputs as the targets if the problem is a copy problem. - if hparams.problems[n].was_copy: + if p_hparams.was_copy: feature_map["targets"] = feature_map["inputs"] # Ensure inputs and targets are proper rank. @@ -1122,8 +1119,8 @@ def input_fn(): batches.append( (feature_map["inputs"], feature_map["targets"], tf.constant(n), - tf.constant(hparams.problems[n].input_space_id), - tf.constant(hparams.problems[n].target_space_id))) + tf.constant(p_hparams.input_space_id), + tf.constant(p_hparams.target_space_id))) # We choose which problem to process. loss_moving_avgs = [] # Need loss moving averages for that. From daea72a7d81b8d1559bfdc5202a7cb237ac17d0e Mon Sep 17 00:00:00 2001 From: Lukasz Kaiser <lukaszkaiser@google.com> Date: Thu, 27 Jul 2017 18:18:05 -0700 Subject: [PATCH 0184/4095] Make a generic Text2TextProblem class, use in WMT, move PTB. PiperOrigin-RevId: 163417898 --- tensor2tensor/bin/t2t-datagen | 7 -- tensor2tensor/data_generators/problem.py | 114 ++++++++++++++++-- .../data_generators/problem_hparams.py | 16 --- tensor2tensor/data_generators/ptb.py | 103 ++++++++++------ tensor2tensor/data_generators/wmt.py | 79 +----------- tensor2tensor/utils/data_reader.py | 3 +- 6 files changed, 177 insertions(+), 145 deletions(-) diff --git a/tensor2tensor/bin/t2t-datagen b/tensor2tensor/bin/t2t-datagen index e4acb6731..1f876c981 100644 --- a/tensor2tensor/bin/t2t-datagen +++ b/tensor2tensor/bin/t2t-datagen @@ -43,7 +43,6 @@ from tensor2tensor.data_generators import audio from tensor2tensor.data_generators import generator_utils from tensor2tensor.data_generators import image from tensor2tensor.data_generators import lm1b -from tensor2tensor.data_generators import ptb from tensor2tensor.data_generators import snli from tensor2tensor.data_generators import wiki from tensor2tensor.data_generators import wmt @@ -176,12 +175,6 @@ _SUPPORTED_PROBLEM_GENERATORS = { lambda: audio.timit_generator( FLAGS.data_dir, FLAGS.tmp_dir, False, 626, vocab_filename="vocab.endefr.%d" % 2**15, vocab_size=2**15)), - "lmptb_10k": ( - lambda: ptb.train_generator( - FLAGS.tmp_dir, - FLAGS.data_dir, - False), - ptb.valid_generator), } # pylint: enable=g-long-lambda diff --git a/tensor2tensor/data_generators/problem.py b/tensor2tensor/data_generators/problem.py index 67e3c6f90..9623791f5 100644 --- a/tensor2tensor/data_generators/problem.py +++ b/tensor2tensor/data_generators/problem.py @@ -18,11 +18,14 @@ from __future__ import division from __future__ import print_function +import os + # Dependency imports -from tensor2tensor.data_generators import generator_utils as utils +from tensor2tensor.data_generators import generator_utils from tensor2tensor.data_generators import text_encoder from tensor2tensor.utils import metrics +from tensor2tensor.utils import registry import tensorflow as tf @@ -176,20 +179,23 @@ def eval_metrics(self): def training_filepaths(self, data_dir, num_shards, shuffled): file_basename = self.dataset_filename() if not shuffled: - file_basename += utils.UNSHUFFLED_SUFFIX - return utils.train_data_filenames(file_basename, data_dir, num_shards) + file_basename += generator_utils.UNSHUFFLED_SUFFIX + return generator_utils.train_data_filenames( + file_basename, data_dir, num_shards) def dev_filepaths(self, data_dir, num_shards, shuffled): file_basename = self.dataset_filename() if not shuffled: - file_basename += utils.UNSHUFFLED_SUFFIX - return utils.dev_data_filenames(file_basename, data_dir, num_shards) + file_basename += generator_utils.UNSHUFFLED_SUFFIX + return generator_utils.dev_data_filenames( + file_basename, data_dir, num_shards) def test_filepaths(self, data_dir, num_shards, shuffled): file_basename = self.dataset_filename() if not shuffled: - file_basename += utils.UNSHUFFLED_SUFFIX - return utils.test_data_filenames(file_basename, data_dir, num_shards) + file_basename += generator_utils.UNSHUFFLED_SUFFIX + return generator_utils.test_data_filenames( + file_basename, data_dir, num_shards) def __init__(self, was_reversed=False, was_copy=False): """Create a Problem. @@ -323,3 +329,97 @@ def _default_hparams(): # class. input_space_id=SpaceID.GENERIC, target_space_id=SpaceID.GENERIC) + + +class Text2TextProblem(Problem): + """Base class for text-to-text problems.""" + + @property + def is_character_level(self): + raise NotImplementedError() + + @property + def targeted_vocab_size(self): + raise NotImplementedError() # Not needed if self.is_character_level. + + def train_generator(self, data_dir, tmp_dir, is_training): + """Generator of the training data.""" + raise NotImplementedError() + + def dev_generator(self, data_dir, tmp_dir): + """Generator of the development data.""" + return self.train_generator(data_dir, tmp_dir, False) + + @property + def input_space_id(self): + raise NotImplementedError() + + @property + def target_space_id(self): + raise NotImplementedError() + + @property + def num_shards(self): + raise NotImplementedError() + + @property + def vocab_name(self): + raise NotImplementedError() + + @property + def vocab_file(self): + return "%s.%d" % (self.vocab_name, self.targeted_vocab_size) + + @property + def use_subword_tokenizer(self): + raise NotImplementedError() + + @property + def has_inputs(self): + return True # Set to False for language models. + + def generate_data(self, data_dir, tmp_dir, task_id=-1): + generator_utils.generate_dataset_and_shuffle( + self.train_generator(data_dir, tmp_dir, True), + self.training_filepaths(data_dir, self.num_shards, shuffled=False), + self.dev_generator(data_dir, tmp_dir), + self.dev_filepaths(data_dir, 1, shuffled=False)) + + def feature_encoders(self, data_dir): + vocab_filename = os.path.join(data_dir, self.vocab_file) + if self.is_character_level: + encoder = text_encoder.ByteTextEncoder(), + elif self.use_subword_tokenizer: + encoder = text_encoder.SubwordTextEncoder(vocab_filename) + else: + encoder = text_encoder.TokenTextEncoder(vocab_filename) + if self.has_inputs: + return {"inputs": encoder, "targets": encoder} + return {"targets": encoder} + + def hparams(self, defaults, unused_model_hparams): + p = defaults + if self.is_character_level: + source_vocab_size = 256 + target_vocab_size = 256 + else: + target_vocab_size = self._encoders["targets"].vocab_size + if self.has_inputs: + source_vocab_size = self._encoders["inputs"].vocab_size + + if self.has_inputs: + p.input_modality = {"inputs": (registry.Modalities.SYMBOL, + source_vocab_size)} + p.target_modality = (registry.Modalities.SYMBOL, target_vocab_size) + if self.has_inputs: + p.input_space_id = self.input_space_id + p.target_space_id = self.target_space_id + if self.is_character_level: + p.loss_multiplier = 2.0 + + def eval_metrics(self): + return [ + metrics.Metrics.ACC, metrics.Metrics.ACC_TOP5, + metrics.Metrics.ACC_PER_SEQ, metrics.Metrics.NEG_LOG_PERPLEXITY, + metrics.Metrics.APPROX_BLEU + ] diff --git a/tensor2tensor/data_generators/problem_hparams.py b/tensor2tensor/data_generators/problem_hparams.py index 2792c79e9..3c829eeac 100644 --- a/tensor2tensor/data_generators/problem_hparams.py +++ b/tensor2tensor/data_generators/problem_hparams.py @@ -368,21 +368,6 @@ def wiki_32k(model_hparams): return p -def lmptb_10k(model_hparams): - """Penn Tree Bank language-modeling benchmark, 10k token vocabulary.""" - p = default_problem_hparams() - p.input_modality = {} - p.target_modality = (registry.Modalities.SYMBOL, 10000) - vocabulary = text_encoder.TokenTextEncoder( - os.path.join(model_hparams.data_dir, "lmptb_10k.vocab")) - p.vocabulary = { - "targets": vocabulary, - } - p.input_space_id = 3 - p.target_space_id = 3 - return p - - def wmt_ende_bpe32k(model_hparams): """English to German translation benchmark.""" p = default_problem_hparams() @@ -642,7 +627,6 @@ def image_celeba(unused_model_hparams): "lm1b_characters": lm1b_characters, "lm1b_32k": lm1b_32k, "wiki_32k": wiki_32k, - "lmptb_10k": lmptb_10k, "ice_parsing_characters": wmt_parsing_characters, "ice_parsing_tokens": lambda p: ice_parsing_tokens(p, 2**13), "wmt_parsing_tokens_8k": lambda p: wmt_parsing_tokens(p, 2**13), diff --git a/tensor2tensor/data_generators/ptb.py b/tensor2tensor/data_generators/ptb.py index f71f0d902..18aedd640 100644 --- a/tensor2tensor/data_generators/ptb.py +++ b/tensor2tensor/data_generators/ptb.py @@ -27,7 +27,9 @@ # Dependency imports from tensor2tensor.data_generators import generator_utils +from tensor2tensor.data_generators import problem from tensor2tensor.data_generators import text_encoder +from tensor2tensor.utils import registry import tensorflow as tf @@ -48,7 +50,7 @@ def _read_words(filename): def _build_vocab(filename, vocab_path, vocab_size): """Reads a file to build a vocabulary of `vocab_size` most common words. - The vocabulary is sorted by occurence count and has one word per line. + The vocabulary is sorted by occurrence count and has one word per line. Originally from: https://github.com/tensorflow/models/blob/master/tutorials/rnn/ptb/reader.py @@ -66,26 +68,47 @@ def _build_vocab(filename, vocab_path, vocab_size): f.write("\n".join(words)) -def _get_token_encoder(vocab_dir, filename): +def _get_token_encoder(vocab_dir, vocab_name, filename): """Reads from file and returns a `TokenTextEncoder` for the vocabulary.""" - vocab_name = "lmptb_10k.vocab" vocab_path = os.path.join(vocab_dir, vocab_name) - _build_vocab(filename, vocab_path, 10000) + if not tf.gfile.Exists(vocab_path): + _build_vocab(filename, vocab_path, 10000) return text_encoder.TokenTextEncoder(vocab_path) -class PTB(object): +class PTBProblem(problem.Text2TextProblem): """A class for generating PTB data.""" - def __init__(self, tmp_dir, data_dir, char=False): - assert not char, "char mode for PTB is not yet implemented" - self.char = char - self.data_dir = data_dir + @property + def has_inputs(self): + return False - url = PTB_URL - filename = os.path.basename(url) + @property + def target_space_id(self): + if self.is_character_level: + return problem.SpaceID.EN_CHR + return problem.SpaceID.EN_TOK + + @property + def num_shards(self): + return 10 + + @property + def vocab_name(self): + return "vocab.lmptb_10k" + + @property + def use_subword_tokenizer(self): + return False + + @property + def targeted_vocab_size(self): + return 10000 + + def train_generator(self, data_dir, tmp_dir, train): + filename = os.path.basename(PTB_URL) compressed_filepath = generator_utils.maybe_download( - tmp_dir, filename, url) + tmp_dir, filename, PTB_URL) ptb_files = [] ptb_char_files = [] with tarfile.open(compressed_filepath, "r:gz") as tgz: @@ -101,50 +124,52 @@ def __init__(self, tmp_dir, data_dir, char=False): tgz.extractall(tmp_dir, members=files) - if self.char: + if self.is_character_level: files = ptb_char_files else: files = ptb_files - files = files + train_file, valid_file = None, None for filename in files: if "train" in filename: - self.train = os.path.join(tmp_dir, filename) + train_file = os.path.join(tmp_dir, filename) elif "valid" in filename: - self.valid = os.path.join(tmp_dir, filename) + valid_file = os.path.join(tmp_dir, filename) - assert hasattr(self, "train"), "Training file not found" - assert hasattr(self, "valid"), "Validation file not found" - self.encoder = _get_token_encoder(data_dir, self.train) + assert train_file, "Training file not found" + assert valid_file, "Validation file not found" - def train_generator(self): - return self._generator(self.train) + if self.is_character_level: + encoder = text_encoder.ByteTextEncoder() + else: + encoder = _get_token_encoder(data_dir, self.vocab_file, train_file) - def valid_generator(self): - return self._generator(self.valid) + if train: + return self._generator(train_file, encoder) + return self._generator(valid_file, encoder) - def _generator(self, filename): + def _generator(self, filename, encoder): with tf.gfile.GFile(filename, "r") as f: for line in f: line = " ".join(line.replace("\n", EOS).split()) - tok = self.encoder.encode(line) - yield {"inputs": tok[:-1], "targets": tok[1:]} + tok = encoder.encode(line) + if tok: + yield {"inputs": [0], "targets": tok} -# Using a object "singleton" -# `train_generator` must be called before -# `valid_generator` in order to work -_ptb = {} +@registry.register_problem("lm_ptb_10k") +class LmPtb10k(PTBProblem): + """A class for generating PTB data, 10k vocab.""" + @property + def is_character_level(self): + return False -def train_generator(*args, **kwargs): - """The train data generator to be called.""" - global _ptb - _ptb = PTB(*args, **kwargs) - return _ptb.train_generator() +@registry.register_problem +class LmPtbCharacters(PTBProblem): + """A class for generating PTB data, character-level.""" -def valid_generator(): - """Validation (aka. dev) data generator.""" - global _ptb # pylint:disable=global-variable-not-assigned - return _ptb.valid_generator() + @property + def is_character_level(self): + return True diff --git a/tensor2tensor/data_generators/wmt.py b/tensor2tensor/data_generators/wmt.py index 7fde9b3b4..456f36321 100644 --- a/tensor2tensor/data_generators/wmt.py +++ b/tensor2tensor/data_generators/wmt.py @@ -28,7 +28,6 @@ from tensor2tensor.data_generators import problem from tensor2tensor.data_generators import text_encoder from tensor2tensor.data_generators import wsj_parsing -from tensor2tensor.utils import metrics from tensor2tensor.utils import registry import tensorflow as tf @@ -44,33 +43,13 @@ EOS = text_encoder.EOS_ID -class WMTProblem(problem.Problem): +class WMTProblem(problem.Text2TextProblem): """Base class for WMT problems.""" @property def is_character_level(self): return False - @property - def targeted_vocab_size(self): - raise NotImplementedError() # Not needed if self.is_character_level. - - def train_generator(self, data_dir, tmp_dir, is_training): - """Generator of the training data.""" - raise NotImplementedError() - - def dev_generator(self, data_dir, tmp_dir): - """Generator of the development data.""" - return self.train_generator(data_dir, tmp_dir, False) - - @property - def input_space_id(self): - raise NotImplementedError() - - @property - def target_space_id(self): - raise NotImplementedError() - @property def num_shards(self): return 100 @@ -80,51 +59,8 @@ def vocab_name(self): return "vocab.endefr" @property - def vocab_file(self): - return "%s.%d" % (self.vocab_name, self.targeted_vocab_size) - - def generate_data(self, data_dir, tmp_dir, task_id=-1): - generator_utils.generate_dataset_and_shuffle( - self.train_generator(data_dir, tmp_dir, True), - self.training_filepaths(data_dir, self.num_shards, shuffled=False), - self.dev_generator(data_dir, tmp_dir), - self.dev_filepaths(data_dir, 1, shuffled=False)) - - def feature_encoders(self, data_dir): - if self.is_character_level: - return { - "inputs": text_encoder.ByteTextEncoder(), - "targets": text_encoder.ByteTextEncoder(), - } - vocab_filename = os.path.join(data_dir, self.vocab_file) - subtokenizer = text_encoder.SubwordTextEncoder(vocab_filename) - return { - "inputs": subtokenizer, - "targets": subtokenizer, - } - - def hparams(self, defaults, unused_model_hparams): - p = defaults - if self.is_character_level: - source_vocab_size = 256 - target_vocab_size = 256 - else: - source_vocab_size = self._encoders["inputs"].vocab_size - target_vocab_size = self._encoders["targets"].vocab_size - p.input_modality = {"inputs": (registry.Modalities.SYMBOL, - source_vocab_size)} - p.target_modality = (registry.Modalities.SYMBOL, target_vocab_size) - p.input_space_id = self.input_space_id - p.target_space_id = self.target_space_id - if self.is_character_level: - p.loss_multiplier = 2.0 - - def eval_metrics(self): - return [ - metrics.Metrics.ACC, metrics.Metrics.ACC_TOP5, - metrics.Metrics.ACC_PER_SEQ, metrics.Metrics.NEG_LOG_PERPLEXITY, - metrics.Metrics.APPROX_BLEU - ] + def use_subword_tokenizer(self): + return True # Generic generators used later for multiple problems. @@ -634,7 +570,7 @@ def target_space_id(self): @registry.register_problem("wmt_encs_tokens_32k") -class WMTEnCsTokens32k(problem.Problem): +class WMTEnCsTokens32k(WMTProblem): """Problem spec for WMT English-Czech translation.""" @property @@ -665,13 +601,6 @@ def input_space_id(self): def target_space_id(self): return problem.SpaceID.CS_TOK - def eval_metrics(self): - return [ - metrics.Metrics.ACC, metrics.Metrics.ACC_TOP5, - metrics.Metrics.ACC_PER_SEQ, metrics.Metrics.NEG_LOG_PERPLEXITY, - metrics.Metrics.APPROX_BLEU - ] - @registry.register_problem("wmt_encs_characters") class WMTEnCsCharacters(WMTProblem): diff --git a/tensor2tensor/utils/data_reader.py b/tensor2tensor/utils/data_reader.py index e78e22344..454e4f321 100644 --- a/tensor2tensor/utils/data_reader.py +++ b/tensor2tensor/utils/data_reader.py @@ -115,7 +115,8 @@ def decode_record(record): with tf.name_scope("examples_in"): data_files = tf.contrib.slim.parallel_reader.get_data_files(data_sources) - random.shuffle(data_files) + if training: + random.shuffle(data_files) dataset = tf.contrib.data.TFRecordDataset(data_files) num_readers = min(4 if training else 1, len(data_files)) dataset = dataset.map(decode_record, num_threads=num_readers) From 01f245fdecdf9fbdfae8a610cf9246e222c0891a Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Thu, 27 Jul 2017 18:21:21 -0700 Subject: [PATCH 0185/4095] v1.1.3 PiperOrigin-RevId: 163418167 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 6be9aba04..ae028d847 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ setup( name='tensor2tensor', - version='1.1.2', + version='1.1.3', description='Tensor2Tensor', author='Google Inc.', author_email='no-reply@google.com', From 7c072d7b77ada142bd577d01919a9be32900dd0c Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Thu, 27 Jul 2017 19:04:00 -0700 Subject: [PATCH 0186/4095] Revert usage of Datasets API PiperOrigin-RevId: 163421122 --- tensor2tensor/utils/data_reader.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/tensor2tensor/utils/data_reader.py b/tensor2tensor/utils/data_reader.py index 454e4f321..ba5139433 100644 --- a/tensor2tensor/utils/data_reader.py +++ b/tensor2tensor/utils/data_reader.py @@ -20,7 +20,6 @@ import math import os -import random # Dependency imports @@ -114,17 +113,18 @@ def decode_record(record): return dict(zip(decode_items, decoded)) with tf.name_scope("examples_in"): + # Read serialized examples using slim parallel_reader. data_files = tf.contrib.slim.parallel_reader.get_data_files(data_sources) - if training: - random.shuffle(data_files) - dataset = tf.contrib.data.TFRecordDataset(data_files) num_readers = min(4 if training else 1, len(data_files)) - dataset = dataset.map(decode_record, num_threads=num_readers) - if training: - dataset = dataset.shuffle(capacity) - dataset = dataset.repeat(None if training else 1) - it = dataset.make_one_shot_iterator() - return it.get_next() + _, example_serialized = tf.contrib.slim.parallel_reader.parallel_read( + data_sources, + tf.TFRecordReader, + num_epochs=None if training else 1, + shuffle=training, + capacity=2 * capacity, + min_after_dequeue=capacity, + num_readers=num_readers) + return decode_record(example_serialized) def preprocessing(examples, data_file_pattern, mode): From 37e7dedf22063d3e1e1cc965a8b98e29ce5964a6 Mon Sep 17 00:00:00 2001 From: William <awwoof@hotmail.com> Date: Fri, 28 Jul 2017 14:06:55 +0100 Subject: [PATCH 0187/4095] Updated sliding window --- .gitignore | 1 + tensor2tensor/models/common_attention.py | 263 +++++++++++++++++++---- tensor2tensor/models/models.py | 1 + tensor2tensor/models/transformer.py | 3 +- 4 files changed, 229 insertions(+), 39 deletions(-) diff --git a/.gitignore b/.gitignore index c9dd3db88..fbd98dca5 100644 --- a/.gitignore +++ b/.gitignore @@ -10,6 +10,7 @@ _pycache__/ # PyPI distribution artifacts. build/ dist/ +data/ # Sublime project files *.sublime-project diff --git a/tensor2tensor/models/common_attention.py b/tensor2tensor/models/common_attention.py index e8700433a..c1d469eb1 100644 --- a/tensor2tensor/models/common_attention.py +++ b/tensor2tensor/models/common_attention.py @@ -345,55 +345,244 @@ def dot_product_attention(q, return tf.matmul(weights, v) -def sliding_window_attention(window_size, - q, - k, - v, - bias, - *args): - """ Sliding window wrapper for dot product attention. Each element only - attends to the elements (window_size/2) before and after it. This reduces +def masked_local_attention_1d( + q, k, v, block_length=128, name=None): + """Attention to the source position and a neigborhood to the left of it. + + The sequence is divided into blocks of length block_size. + Attention for a given query position can only see memory positions + less than or equal to the query position, in the corresponding block + and the previous block. + + If mask_right is True, then a target position cannot see greater source + positions. + + Args: + q: a Tensor with shape [batch, heads, length, depth_k] + k: a Tensor with shape [batch, heads, length, depth_k] + v: a Tensor with shape [batch, heads, length, depth_v] + block_length: an integer + name: an optional string + + Returns: + a Tensor of shape [batch, heads, length, depth_v] + """ + with tf.variable_scope(name, default_name="local_attention_1d", + values=[q, k, v]): + v_shape = v.get_shape() + batch = tf.shape(q)[0] + heads = tf.shape(q)[1] + length = tf.shape(q)[2] + # If (length < 2 * block_length), then we use only one block. + block_length = tf.where(tf.less(length, block_length * 2), + length, block_length) + depth_k = tf.shape(q)[3] + depth_v = tf.shape(v)[3] + original_length = length + padding_size = tf.mod(-length, block_length) + length += padding_size + padding = [[0, 0], [0, 0], [0, padding_size], [0, 0]] + q = tf.pad(q, padding) + k = tf.pad(k, padding) + v = tf.pad(v, padding) + num_blocks = tf.div(length, block_length) + + # compute attention for the first query block. + first_q = tf.slice(q, [0, 0, 0, 0], [-1, -1, block_length, -1]) + first_k = tf.slice(k, [0, 0, 0, 0], [-1, -1, block_length, -1]) + first_v = tf.slice(v, [0, 0, 0, 0], [-1, -1, block_length, -1]) + first_output = dot_product_attention( + first_q, first_k, first_v, attention_bias_lower_triangle(block_length), + name="fist_block") + + # compute attention for all subsequent query blocks. + q = tf.reshape(q, [batch, heads, num_blocks, block_length, depth_k]) + k = tf.reshape(k, [batch, heads, num_blocks, block_length, depth_k]) + v = tf.reshape(v, [batch, heads, num_blocks, block_length, depth_v]) + + def local(x): + """Create a local version of the keys or values.""" + prev_block = tf.slice( + x, [0, 0, 0, 0, 0], [-1, -1, num_blocks - 1, -1, -1]) + cur_block = tf.slice( + x, [0, 0, 1, 0, 0], [-1, -1, -1, -1, -1]) + return tf.concat([prev_block, cur_block], 3) + local_k = local(k) + local_v = local(v) + tail_q = tf.slice(q, [0, 0, 1, 0, 0], [-1, -1, -1, -1, -1]) + + local_length = tf.shape(local_k)[3] + + # [batch, heads, num_blocks - 1, block_length, local_length] + attention = tf.matmul(tail_q, local_k, transpose_b=True) + + # make sure source_pos <= target_pos + good_part = tf.matrix_band_part( + tf.ones([block_length, local_length]), -1, tf.to_int64(block_length)) + mask = (1.0 - good_part) * -1e9 + attention += tf.reshape(mask, [1, 1, 1, block_length, local_length]) + attention = tf.nn.softmax(attention) + # TODO(noam): figure out how to show a summary for the remaining blocks. + # The naive way currently causes errors due to empty tensors. + # output: [batch, heads, num_blocks-1, block_length, depth_v] + output = tf.matmul(attention, local_v) + output = tf.reshape(output, [batch, heads, -1, depth_v]) + output = tf.concat([first_output, output], axis=2) + output = tf.slice(output, [0, 0, 0, 0], [-1, -1, original_length, -1]) + output.set_shape(v_shape) + return output + + +def unmasked_local_attention_1d(q, k, v, block_length=128, filter_width=100, + name=None): + """strided block local self-attention. + + Args: + q: a Tensor with shape [batch, heads, length, depth_k] + k: a Tensor with shape [batch, heads, length, depth_k] + v: a Tensor with shape [batch, heads, length, depth_v] + block_length: an integer + filter_width: an integer indicating how much to look left. + name: an optional string + + Returns: + a Tensor of shape [batch, heads, length, depth_v] + """ + with tf.variable_scope(name, default_name="local_self_attention_1d", + values=[q, k, v]): + v_shape = v.get_shape() + depth_v = tf.shape(v)[3] + batch_size = tf.shape(q)[0] + num_heads = tf.shape(q)[1] + original_length = tf.shape(q)[2] + # making sure q is a multiple of d + def pad_to_multiple(x, pad_length): + x_length = tf.shape(x)[2] + return tf.pad(x, [[0, 0], [0, 0], [0, -x_length % pad_length], [0, 0]]) + def pad_l_and_r(x, pad_length): + return tf.pad(x, [[0, 0], [0, 0], [pad_length, pad_length], [0, 0]]) + q = pad_to_multiple(q, block_length) + k = pad_to_multiple(k, block_length) + v = pad_to_multiple(v, block_length) + + # Setting up q blocks + new_q_shape = tf.shape(q) + # Setting up q blocks + q = tf.reshape(q, [new_q_shape[0], new_q_shape[1], + new_q_shape[2]//block_length, + block_length, new_q_shape[3]]) + + # Setting up k and v values + k = pad_l_and_r(k, filter_width) + v = pad_l_and_r(v, filter_width) + + length = tf.shape(k)[2] + full_filter_width = block_length + 2*filter_width + # getting gather indices + indices = tf.range(0, length, delta=1, name="index_range") + # making indices [1, length, 1] to appy convs + indices = tf.reshape(indices, [1, -1, 1]) + kernel = tf.expand_dims(tf.eye(full_filter_width), axis=1) + gather_indices = tf.nn.conv1d( + tf.cast(indices, tf.float32), + kernel, + block_length, + padding="VALID", + name="gather_conv") + + gather_indices = tf.squeeze(tf.cast(gather_indices, tf.int32), axis=0) + + # [length, batch, heads, dim] + k_t = tf.transpose(k, [2, 0, 1, 3]) + k_new = tf.gather(k_t, gather_indices) + + # [batch, heads, blocks, block_length, dim] + k_new = tf.transpose(k_new, [2, 3, 0, 1, 4]) + + attention_bias = tf.expand_dims( + tf.to_float(embedding_to_padding(k_new)) * -1e9, axis=-2) + + v_t = tf.transpose(v, [2, 0, 1, 3]) + v_new = tf.gather(v_t, gather_indices) + v_new = tf.transpose(v_new, [2, 3, 0, 1, 4]) + + logits = tf.matmul(q, k_new, transpose_b=True) + + attention = tf.nn.softmax(logits+attention_bias) + output = tf.matmul(attention, v_new) + + output = tf.reshape(output, [batch_size, num_heads, -1, depth_v]) + # Remove the padding if introduced + output = tf.slice(output, [0, 0, 0, 0], [-1, -1, original_length, -1]) + output.set_shape(v_shape) + return output + + +def windowed_local_attention_1d(q, + k, + v, + window_start, + window_end, + bias, + *args): + """ Local window wrapper for dot product attention. Each element only + attends to the elements from window_start to window_end. This reduces the computational complexity for long sequences at the expense of eliminating long-term dependencies. N.B: For short input sequences this is much slower than just using - un-windowed attention. use only for long sequences. + un-windowed attention. Use only for long sequences. Args: window_size: an integer q: a Tensor with shape [batch, heads, length_q, depth_k] k: a Tensor with shape [batch, heads, length_kv, depth_k] v: a Tensor with shape [batch, heads, length_kv, depth_v] + window_start: an integer Tensor with shape [length_q] + window_end: an integer Tensor with shape [length_q] bias: bias Tensor (see attention_bias()) Returns: A Tensor. """ - - half_size = window_size // 2 - - # Wrapper function for dot product attention with a single query vector - def single(index, size, q, k, v, bias, **kwargs): - length_kv = tf.shape(k)[2] - index_begin = tf.maximum(0, index-size) - index_end = tf.minimum(length_kv-1, index+size) - q = tf.expand_dims(q, 2) - bias = tf.expand_dims(bias, 3) - k = k[:,:,index_begin:index_end,:] - v = v[:,:,index_begin:index_end,:] - out = dot_product_attention(q, k, v, bias, *args) - out = tf.squeeze(out, 2) + with tf.name_scope("windowed"): + + # Wrapper function for dot product attention with a single query vector + def single(index_begin, index_end, q, k, v, bias): + #Normalise range + #Reshape to right shape + q = tf.expand_dims(q, 2) + bias = tf.expand_dims(bias, 3) + #Get slices + k = k[:,:,index_begin:index_end,:] + v = v[:,:,index_begin:index_end,:] + out = dot_product_attention(q, k, v, bias, *args) + out = tf.squeeze(out, 2) + return out + + # We'll loop over each element of q, computing its corresponding output. + q = tf.transpose(q, [2, 0, 1, 3]) + indices = tf.range(tf.shape(q)[0]) + out = tf.map_fn( + lambda ii: single( + window_start[ii], + window_end[ii], + q[ii], + k, + v, + bias[:,:,:,ii]), + indices, + dtype=tf.float32) + out = tf.transpose(out, [1, 2, 0, 3]) return out - - # We'll loop over each element of q, computing it's corresponding output. - q = tf.transpose(q, [2, 0, 1, 3]) - indices = tf.range(tf.shape(q)[0]) - out = tf.map_fn( - lambda ii: single(ii, half_size, q[ii], k, v, bias[:,:,:,ii]), - indices, - dtype=tf.float32) - out = tf.transpose(out, [1, 2, 0, 3]) - return out + + +def local_sliding_window(length, window_size, look_right=True): + indices = tf.range(length) + size = window_size + starts = tf.maximum(0, indices-size) + ends = tf.minimum(length-1, indices+size) + return starts, ends def multihead_attention(query_antecedent, @@ -420,8 +609,6 @@ def multihead_attention(query_antecedent, num_heads: an integer dividing total_key_depth and total_value_depth dropout_rate: a floating point number summaries: a boolean - image_shapes: optional tuple of integer scalars. - see comments for attention_image_summary() window_size: option size of window for attention. Useful only for very long sequence lengths. name: an optional string @@ -461,8 +648,10 @@ def multihead_attention(query_antecedent, x = dot_product_attention( q, k, v, bias, dropout_rate, summaries, image_shapes) else: - x = sliding_window_attention( - window_size, q, k, v, bias, dropout_rate, False, image_shapes) + length = tf.shape(k)[2] + window_start, window_end = local_sliding_window(length, window_size) + x = windowed_local_attention_1d( + q, k, v, window_start, window_end, bias, dropout_rate, False) x = combine_heads(x) x = common_layers.conv1d(x, output_depth, 1, name="output_transform") return x diff --git a/tensor2tensor/models/models.py b/tensor2tensor/models/models.py index b8f0811e5..ae0e0da61 100644 --- a/tensor2tensor/models/models.py +++ b/tensor2tensor/models/models.py @@ -32,5 +32,6 @@ from tensor2tensor.models import neural_gpu from tensor2tensor.models import slicenet from tensor2tensor.models import transformer +from tensor2tensor.models import transformer_alternative from tensor2tensor.models import xception # pylint: enable=unused-import diff --git a/tensor2tensor/models/transformer.py b/tensor2tensor/models/transformer.py index 0b6c97153..544035efd 100644 --- a/tensor2tensor/models/transformer.py +++ b/tensor2tensor/models/transformer.py @@ -162,8 +162,7 @@ def transformer_encoder(encoder_input, hparams.num_heads, hparams.attention_dropout, summaries=summaries, - name="encoder_self_attention", - window_size=20)) + name="encoder_self_attention")) x = residual_fn(x, transformer_ffn_layer(x, hparams)) return x From d69ad4d3e7619dd898e648743125517f26a43d44 Mon Sep 17 00:00:00 2001 From: vthorsteinsson <vt@extrada.com> Date: Fri, 28 Jul 2017 16:21:15 +0000 Subject: [PATCH 0188/4095] Moved Icelandic parsing to separate module --- tensor2tensor/data_generators/wmt.py | 127 ++++++----------------- tensor2tensor/ice_parsing/__init__.py | 2 + tensor2tensor/ice_parsing/ice_parsing.py | 127 +++++++++++++++++++++++ tensor2tensor/models/transformer.py | 23 ---- 4 files changed, 159 insertions(+), 120 deletions(-) create mode 100644 tensor2tensor/ice_parsing/__init__.py create mode 100755 tensor2tensor/ice_parsing/ice_parsing.py diff --git a/tensor2tensor/data_generators/wmt.py b/tensor2tensor/data_generators/wmt.py index d0f7abaec..3d01ab46e 100755 --- a/tensor2tensor/data_generators/wmt.py +++ b/tensor2tensor/data_generators/wmt.py @@ -162,36 +162,6 @@ def character_generator(source_path, target_path, character_vocab, eos=None): source, target = source_file.readline(), target_file.readline() -def tabbed_generator(source_path, source_vocab, target_vocab, eos=None): - r"""Generator for sequence-to-sequence tasks using tabbed files. - - Tokens are derived from text files where each line contains both - a source and a target string. The two strings are separated by a tab - character ('\t'). It yields dictionaries of "inputs" and "targets" where - inputs are characters from the source lines converted to integers, and - targets are characters from the target lines, also converted to integers. - - Args: - source_path: path to the file with source and target sentences. - source_vocab: a SunwordTextEncoder to encode the source string. - target_vocab: a SunwordTextEncoder to encode the target string. - eos: integer to append at the end of each sequence (default: None). - - Yields: - A dictionary {"inputs": source-line, "targets": target-line} where - the lines are integer lists converted from characters in the file lines. - """ - eos_list = [] if eos is None else [eos] - with tf.gfile.GFile(source_path, mode="r") as source_file: - for line in source_file: - if line and "\t" in line: - parts = line.split("\t", maxsplit=1) - source, target = parts[0].strip(), parts[1].strip() - source_ints = source_vocab.encode(source) + eos_list - target_ints = target_vocab.encode(target) + eos_list - yield {"inputs": source_ints, "targets": target_ints} - - def token_generator(source_path, target_path, token_vocab, eos=None): """Generator for sequence-to-sequence tasks that uses tokens. @@ -255,6 +225,36 @@ def bi_vocabs_token_generator(source_path, source, target = source_file.readline(), target_file.readline() +def tabbed_generator(source_path, source_vocab, target_vocab, eos=None): + r"""Generator for sequence-to-sequence tasks using tabbed files. + + Tokens are derived from text files where each line contains both + a source and a target string. The two strings are separated by a tab + character ('\t'). It yields dictionaries of "inputs" and "targets" where + inputs are characters from the source lines converted to integers, and + targets are characters from the target lines, also converted to integers. + + Args: + source_path: path to the file with source and target sentences. + source_vocab: a SunwordTextEncoder to encode the source string. + target_vocab: a SunwordTextEncoder to encode the target string. + eos: integer to append at the end of each sequence (default: None). + + Yields: + A dictionary {"inputs": source-line, "targets": target-line} where + the lines are integer lists converted from characters in the file lines. + """ + eos_list = [] if eos is None else [eos] + with tf.gfile.GFile(source_path, mode="r") as source_file: + for line in source_file: + if line and "\t" in line: + parts = line.split("\t", maxsplit=1) + source, target = parts[0].strip(), parts[1].strip() + source_ints = source_vocab.encode(source) + eos_list + target_ints = target_vocab.encode(target) + eos_list + yield {"inputs": source_ints, "targets": target_ints} + + # Data-set URLs. @@ -654,28 +654,6 @@ def parsing_character_generator(tmp_dir, train): return character_generator(text_filepath, tags_filepath, character_vocab, EOS) -def tabbed_parsing_token_generator(data_dir, tmp_dir, train, prefix, - source_vocab_size, target_vocab_size): - """Generate source and target data from a single file.""" - source_vocab = generator_utils.get_or_generate_tabbed_vocab( - data_dir, tmp_dir, "parsing_train.pairs", 0, - prefix + "_source.vocab.%d" % source_vocab_size, source_vocab_size) - target_vocab = generator_utils.get_or_generate_tabbed_vocab( - data_dir, tmp_dir, "parsing_train.pairs", 1, - prefix + "_target.vocab.%d" % target_vocab_size, target_vocab_size) - filename = "parsing_%s" % ("train" if train else "dev") - pair_filepath = os.path.join(tmp_dir, filename + ".pairs") - return tabbed_generator(pair_filepath, source_vocab, target_vocab, EOS) - - -def tabbed_parsing_character_generator(tmp_dir, train): - """Generate source and target data from a single file.""" - character_vocab = text_encoder.ByteTextEncoder() - filename = "parsing_%s" % ("train" if train else "dev") - pair_filepath = os.path.join(tmp_dir, filename + ".pairs") - return tabbed_generator(pair_filepath, character_vocab, character_vocab, EOS) - - def parsing_token_generator(data_dir, tmp_dir, train, vocab_size): symbolizer_vocab = generator_utils.get_or_generate_vocab( data_dir, tmp_dir, "vocab.endefr.%d" % vocab_size, vocab_size) @@ -685,48 +663,3 @@ def parsing_token_generator(data_dir, tmp_dir, train, vocab_size): symbolizer_vocab, EOS) -@registry.register_problem("ice_parsing_tokens") -class IceParsingTokens(problem.Problem): - """Problem spec for parsing tokenized Icelandic text to - constituency trees, also tokenized but to a smaller vocabulary.""" - - @property - def source_vocab_size(self): - return 2**13 # 8192 - - @property - def target_vocab_size(self): - return 2**8 # 256 - - def feature_encoders(self, data_dir): - source_vocab_filename = os.path.join( - data_dir, "ice_source.tokens.vocab.%d" % self.source_vocab_size) - target_vocab_filename = os.path.join( - data_dir, "ice_target.tokens.vocab.%d" % self.target_vocab_size) - source_subtokenizer = text_encoder.SubwordTextEncoder(source_vocab_filename) - target_subtokenizer = text_encoder.SubwordTextEncoder(target_vocab_filename) - return { - "inputs": source_subtokenizer, - "targets": target_subtokenizer, - } - - def generate_data(self, data_dir, tmp_dir, num_shards=100): - generator_utils.generate_dataset_and_shuffle( - tabbed_parsing_token_generator(tmp_dir, True, "ice", - self.source_vocab_size, - self.target_vocab_size), - self.training_filepaths(data_dir, num_shards, shuffled=False), - tabbed_parsing_token_generator(tmp_dir, False, "ice", - self.source_vocab_size, - self.target_vocab_size), - self.dev_filepaths(data_dir, 1, shuffled=False)) - - def hparams(self, defaults, unused_model_hparams): - p = defaults - source_vocab_size = self._encoders["inputs"].vocab_size - p.input_modality = {"inputs": (registry.Modalities.SYMBOL, source_vocab_size)} - p.target_modality = (registry.Modalities.SYMBOL, self.target_vocab_size) - p.input_space_id = problem.SpaceID.ICE_TOK - p.target_space_id = problem.SpaceID.ICE_PARSE_TOK - p.loss_multiplier = 2.5 # Rough estimate of avg number of tokens per word - diff --git a/tensor2tensor/ice_parsing/__init__.py b/tensor2tensor/ice_parsing/__init__.py new file mode 100644 index 000000000..36f468dcb --- /dev/null +++ b/tensor2tensor/ice_parsing/__init__.py @@ -0,0 +1,2 @@ + +from .ice_parsing import IceParsingTokens, transformer_parsing_ice, transformer_parsing_ice_big diff --git a/tensor2tensor/ice_parsing/ice_parsing.py b/tensor2tensor/ice_parsing/ice_parsing.py new file mode 100755 index 000000000..d8dd41cf7 --- /dev/null +++ b/tensor2tensor/ice_parsing/ice_parsing.py @@ -0,0 +1,127 @@ +# Copyright 2017 The Tensor2Tensor Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This module implements the ice_parsing_* problems, which +# parse plain text into flattened parse trees and POS tags. +# The training data is stored in files named `parsing_train.pairs` +# and `parsing_dev.pairs`. These files are UTF-8 text files where +# each line contains an input sentence and a target parse tree, +# separated by a tab character. + +import os + +# Dependency imports + +from tensor2tensor.data_generators import generator_utils +from tensor2tensor.data_generators import problem +from tensor2tensor.data_generators import text_encoder +from tensor2tensor.data_generators.wmt import tabbed_generator +from tensor2tensor.utils import registry +from tensor2tensor.models import transformer + +import tensorflow as tf + + +# End-of-sentence marker. +EOS = text_encoder.EOS_ID + + +def tabbed_parsing_token_generator(data_dir, tmp_dir, train, prefix, + source_vocab_size, target_vocab_size): + """Generate source and target data from a single file.""" + filename = "parsing_{0}.pairs".format("train" if train else "dev") + source_vocab = generator_utils.get_or_generate_tabbed_vocab( + data_dir, tmp_dir, filename, 0, + prefix + "_source.tokens.vocab.%d" % source_vocab_size, source_vocab_size) + target_vocab = generator_utils.get_or_generate_tabbed_vocab( + data_dir, tmp_dir, filename, 1, + prefix + "_target.tokens.vocab.%d" % target_vocab_size, target_vocab_size) + pair_filepath = os.path.join(tmp_dir, filename) + return tabbed_generator(pair_filepath, source_vocab, target_vocab, EOS) + + +def tabbed_parsing_character_generator(tmp_dir, train): + """Generate source and target data from a single file.""" + character_vocab = text_encoder.ByteTextEncoder() + filename = "parsing_{0}.pairs".format("train" if train else "dev") + pair_filepath = os.path.join(tmp_dir, filename) + return tabbed_generator(pair_filepath, character_vocab, character_vocab, EOS) + + +@registry.register_problem("ice_parsing_tokens") +class IceParsingTokens(problem.Problem): + """Problem spec for parsing tokenized Icelandic text to + constituency trees, also tokenized but to a smaller vocabulary.""" + + @property + def source_vocab_size(self): + return 2**13 # 8192 + + @property + def target_vocab_size(self): + return 2**8 # 256 + + def feature_encoders(self, data_dir): + source_vocab_filename = os.path.join( + data_dir, "ice_source.tokens.vocab.%d" % self.source_vocab_size) + target_vocab_filename = os.path.join( + data_dir, "ice_target.tokens.vocab.%d" % self.target_vocab_size) + source_subtokenizer = text_encoder.SubwordTextEncoder(source_vocab_filename) + target_subtokenizer = text_encoder.SubwordTextEncoder(target_vocab_filename) + return { + "inputs": source_subtokenizer, + "targets": target_subtokenizer, + } + + def generate_data(self, data_dir, tmp_dir, num_shards=100): + generator_utils.generate_dataset_and_shuffle( + tabbed_parsing_token_generator(data_dir, tmp_dir, True, "ice", + self.source_vocab_size, + self.target_vocab_size), + self.training_filepaths(data_dir, num_shards, shuffled=False), + tabbed_parsing_token_generator(data_dir, tmp_dir, False, "ice", + self.source_vocab_size, + self.target_vocab_size), + self.dev_filepaths(data_dir, 1, shuffled=False)) + + def hparams(self, defaults, unused_model_hparams): + p = defaults + source_vocab_size = self._encoders["inputs"].vocab_size + p.input_modality = {"inputs": (registry.Modalities.SYMBOL, source_vocab_size)} + p.target_modality = (registry.Modalities.SYMBOL, self.target_vocab_size) + p.input_space_id = problem.SpaceID.ICE_TOK + p.target_space_id = problem.SpaceID.ICE_PARSE_TOK + p.loss_multiplier = 2.5 # Rough estimate of avg number of tokens per word + + +@registry.register_hparams +def transformer_parsing_ice(): + """Hparams for parsing Icelandic text.""" + hparams = transformer.transformer_base_single_gpu() + hparams.batch_size = 4096 + hparams.shared_embedding_and_softmax_weights = int(False) + return hparams + + +@registry.register_hparams +def transformer_parsing_ice_big(): + """Hparams for parsing Icelandic text, bigger model.""" + hparams = transformer_parsing_ice() + hparams.batch_size = 2048 # 4096 gives Out-of-memory on 8 GB 1080 GTX GPU + hparams.attention_dropout = 0.05 + hparams.residual_dropout = 0.05 + hparams.max_length = 512 + hparams.hidden_size = 1024 + return hparams + diff --git a/tensor2tensor/models/transformer.py b/tensor2tensor/models/transformer.py index f03f173e2..0489567a0 100755 --- a/tensor2tensor/models/transformer.py +++ b/tensor2tensor/models/transformer.py @@ -357,29 +357,6 @@ def transformer_parsing_big(): return hparams -@registry.register_hparams -def transformer_parsing_ice(): - """Hparams for parsing Icelandic text.""" - hparams = transformer_base_single_gpu() - hparams.batch_size = 4096 - hparams.shared_embedding_and_softmax_weights = int(False) - return hparams - - -@registry.register_hparams -def transformer_parsing_ice_big(): - """Hparams for parsing Icelandic text, bigger model.""" - hparams = transformer_parsing_ice() - hparams.batch_size = 2048 # 4096 gives Out-of-memory on 8 GB 1080 GTX GPU - hparams.attention_dropout = 0.2 - hparams.residual_dropout = 0.2 - hparams.max_length = 512 - hparams.learning_rate_warmup_steps = 16000 - hparams.hidden_size = 1024 - hparams.learning_rate = 0.05 - return hparams - - @registry.register_hparams def transformer_tiny(): hparams = transformer_base() From d6a6924886b78f1f8f75d27b523d9140fedc3e10 Mon Sep 17 00:00:00 2001 From: William <awwoof@hotmail.com> Date: Fri, 28 Jul 2017 18:10:55 +0100 Subject: [PATCH 0189/4095] Added middle window for local attention --- tensor2tensor/models/common_attention.py | 160 +++++------------- tensor2tensor/models/common_attention_test.py | 64 +++++++ 2 files changed, 110 insertions(+), 114 deletions(-) create mode 100644 tensor2tensor/models/common_attention_test.py diff --git a/tensor2tensor/models/common_attention.py b/tensor2tensor/models/common_attention.py index c1d469eb1..abf989402 100644 --- a/tensor2tensor/models/common_attention.py +++ b/tensor2tensor/models/common_attention.py @@ -346,7 +346,7 @@ def dot_product_attention(q, def masked_local_attention_1d( - q, k, v, block_length=128, name=None): + q, k, v, block_length=128, mask_right=False, name=None): """Attention to the source position and a neigborhood to the left of it. The sequence is divided into blocks of length block_size. @@ -362,6 +362,7 @@ def masked_local_attention_1d( k: a Tensor with shape [batch, heads, length, depth_k] v: a Tensor with shape [batch, heads, length, depth_v] block_length: an integer + mask_right: a bool name: an optional string Returns: @@ -373,150 +374,76 @@ def masked_local_attention_1d( batch = tf.shape(q)[0] heads = tf.shape(q)[1] length = tf.shape(q)[2] - # If (length < 2 * block_length), then we use only one block. - block_length = tf.where(tf.less(length, block_length * 2), - length, block_length) depth_k = tf.shape(q)[3] depth_v = tf.shape(v)[3] + original_length = length + + # If (length < 2 * block_length), then we use only one block. + block_length = tf.where(tf.less(length, block_length * 2), + length, block_length) padding_size = tf.mod(-length, block_length) length += padding_size + num_blocks = tf.div(length, block_length) + padding = [[0, 0], [0, 0], [0, padding_size], [0, 0]] q = tf.pad(q, padding) - k = tf.pad(k, padding) - v = tf.pad(v, padding) - num_blocks = tf.div(length, block_length) - # compute attention for the first query block. - first_q = tf.slice(q, [0, 0, 0, 0], [-1, -1, block_length, -1]) - first_k = tf.slice(k, [0, 0, 0, 0], [-1, -1, block_length, -1]) - first_v = tf.slice(v, [0, 0, 0, 0], [-1, -1, block_length, -1]) - first_output = dot_product_attention( - first_q, first_k, first_v, attention_bias_lower_triangle(block_length), - name="fist_block") + if mask_right: + #Add extra padding so we son't have to do an initial query + extra_padding = [[0, 0], [0, 0], [block_length, padding_size], [0, 0]] + else: + #We shift everything over by half a block so query is in centre + pad_right = block_length // 2 + pad_left = block_length - pad_right + extra_padding = [[0, 0], [0, 0], + [pad_left,padding_size+pad_right], [0, 0]] + + k = tf.pad(k, extra_padding) + v = tf.pad(v, extra_padding) + # compute attention for all subsequent query blocks. q = tf.reshape(q, [batch, heads, num_blocks, block_length, depth_k]) - k = tf.reshape(k, [batch, heads, num_blocks, block_length, depth_k]) - v = tf.reshape(v, [batch, heads, num_blocks, block_length, depth_v]) + k = tf.reshape(k, [batch, heads, num_blocks+1, block_length, depth_k]) + v = tf.reshape(v, [batch, heads, num_blocks+1, block_length, depth_v]) def local(x): """Create a local version of the keys or values.""" prev_block = tf.slice( - x, [0, 0, 0, 0, 0], [-1, -1, num_blocks - 1, -1, -1]) + x, [0, 0, 0, 0, 0], [-1, -1, num_blocks, -1, -1]) cur_block = tf.slice( x, [0, 0, 1, 0, 0], [-1, -1, -1, -1, -1]) return tf.concat([prev_block, cur_block], 3) + local_k = local(k) local_v = local(v) - tail_q = tf.slice(q, [0, 0, 1, 0, 0], [-1, -1, -1, -1, -1]) local_length = tf.shape(local_k)[3] - # [batch, heads, num_blocks - 1, block_length, local_length] - attention = tf.matmul(tail_q, local_k, transpose_b=True) + # [batch, heads, num_blocks, block_length, local_length] + attention = tf.matmul(q, local_k, transpose_b=True) - # make sure source_pos <= target_pos good_part = tf.matrix_band_part( - tf.ones([block_length, local_length]), -1, tf.to_int64(block_length)) - mask = (1.0 - good_part) * -1e9 - attention += tf.reshape(mask, [1, 1, 1, block_length, local_length]) + tf.ones([block_length, local_length]), 0, tf.to_int64(block_length)) + + good_part = tf.cast(good_part, tf.float64) + attention *= tf.reshape(good_part, [1, 1, 1, block_length, local_length]) attention = tf.nn.softmax(attention) - # TODO(noam): figure out how to show a summary for the remaining blocks. - # The naive way currently causes errors due to empty tensors. - # output: [batch, heads, num_blocks-1, block_length, depth_v] + output = tf.matmul(attention, local_v) output = tf.reshape(output, [batch, heads, -1, depth_v]) - output = tf.concat([first_output, output], axis=2) + + # remove added padding output = tf.slice(output, [0, 0, 0, 0], [-1, -1, original_length, -1]) output.set_shape(v_shape) return output -def unmasked_local_attention_1d(q, k, v, block_length=128, filter_width=100, - name=None): - """strided block local self-attention. - - Args: - q: a Tensor with shape [batch, heads, length, depth_k] - k: a Tensor with shape [batch, heads, length, depth_k] - v: a Tensor with shape [batch, heads, length, depth_v] - block_length: an integer - filter_width: an integer indicating how much to look left. - name: an optional string - Returns: - a Tensor of shape [batch, heads, length, depth_v] - """ - with tf.variable_scope(name, default_name="local_self_attention_1d", - values=[q, k, v]): - v_shape = v.get_shape() - depth_v = tf.shape(v)[3] - batch_size = tf.shape(q)[0] - num_heads = tf.shape(q)[1] - original_length = tf.shape(q)[2] - # making sure q is a multiple of d - def pad_to_multiple(x, pad_length): - x_length = tf.shape(x)[2] - return tf.pad(x, [[0, 0], [0, 0], [0, -x_length % pad_length], [0, 0]]) - def pad_l_and_r(x, pad_length): - return tf.pad(x, [[0, 0], [0, 0], [pad_length, pad_length], [0, 0]]) - q = pad_to_multiple(q, block_length) - k = pad_to_multiple(k, block_length) - v = pad_to_multiple(v, block_length) - - # Setting up q blocks - new_q_shape = tf.shape(q) - # Setting up q blocks - q = tf.reshape(q, [new_q_shape[0], new_q_shape[1], - new_q_shape[2]//block_length, - block_length, new_q_shape[3]]) - - # Setting up k and v values - k = pad_l_and_r(k, filter_width) - v = pad_l_and_r(v, filter_width) - - length = tf.shape(k)[2] - full_filter_width = block_length + 2*filter_width - # getting gather indices - indices = tf.range(0, length, delta=1, name="index_range") - # making indices [1, length, 1] to appy convs - indices = tf.reshape(indices, [1, -1, 1]) - kernel = tf.expand_dims(tf.eye(full_filter_width), axis=1) - gather_indices = tf.nn.conv1d( - tf.cast(indices, tf.float32), - kernel, - block_length, - padding="VALID", - name="gather_conv") - - gather_indices = tf.squeeze(tf.cast(gather_indices, tf.int32), axis=0) - - # [length, batch, heads, dim] - k_t = tf.transpose(k, [2, 0, 1, 3]) - k_new = tf.gather(k_t, gather_indices) - - # [batch, heads, blocks, block_length, dim] - k_new = tf.transpose(k_new, [2, 3, 0, 1, 4]) - - attention_bias = tf.expand_dims( - tf.to_float(embedding_to_padding(k_new)) * -1e9, axis=-2) - - v_t = tf.transpose(v, [2, 0, 1, 3]) - v_new = tf.gather(v_t, gather_indices) - v_new = tf.transpose(v_new, [2, 3, 0, 1, 4]) - - logits = tf.matmul(q, k_new, transpose_b=True) - - attention = tf.nn.softmax(logits+attention_bias) - output = tf.matmul(attention, v_new) - - output = tf.reshape(output, [batch_size, num_heads, -1, depth_v]) - # Remove the padding if introduced - output = tf.slice(output, [0, 0, 0, 0], [-1, -1, original_length, -1]) - output.set_shape(v_shape) - return output +############################################################################### +### Not used, left in for reference ########################################### def windowed_local_attention_1d(q, k, @@ -556,12 +483,13 @@ def single(index_begin, index_end, q, k, v, bias): #Get slices k = k[:,:,index_begin:index_end,:] v = v[:,:,index_begin:index_end,:] - out = dot_product_attention(q, k, v, bias, *args) + out = dot_product_attention(q, k, v, *args) out = tf.squeeze(out, 2) return out # We'll loop over each element of q, computing its corresponding output. q = tf.transpose(q, [2, 0, 1, 3]) + bias = tf.transpose(bias, [3, 0, 1, 2]) indices = tf.range(tf.shape(q)[0]) out = tf.map_fn( lambda ii: single( @@ -570,13 +498,12 @@ def single(index_begin, index_end, q, k, v, bias): q[ii], k, v, - bias[:,:,:,ii]), + bias[ii]), indices, dtype=tf.float32) out = tf.transpose(out, [1, 2, 0, 3]) return out - def local_sliding_window(length, window_size, look_right=True): indices = tf.range(length) size = window_size @@ -584,6 +511,11 @@ def local_sliding_window(length, window_size, look_right=True): ends = tf.minimum(length-1, indices+size) return starts, ends +### ### +############################################################################### + + + def multihead_attention(query_antecedent, memory_antecedent, @@ -648,7 +580,7 @@ def multihead_attention(query_antecedent, x = dot_product_attention( q, k, v, bias, dropout_rate, summaries, image_shapes) else: - length = tf.shape(k)[2] + length = tf.shape(q)[2] window_start, window_end = local_sliding_window(length, window_size) x = windowed_local_attention_1d( q, k, v, window_start, window_end, bias, dropout_rate, False) diff --git a/tensor2tensor/models/common_attention_test.py b/tensor2tensor/models/common_attention_test.py new file mode 100644 index 000000000..14754794c --- /dev/null +++ b/tensor2tensor/models/common_attention_test.py @@ -0,0 +1,64 @@ +# Copyright 2017 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for common layers.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +# Dependency imports + +import numpy as np +from tensor2tensor.models import common_attention + +import tensorflow as tf + + +class CommonAttentionTest(tf.test.TestCase): + + def testLocalAttention(self): + #q = np.array([[[ [1.0, 0.0, 0.0, 0.0], + # [1.0, 0.0, 0.0, 0.0], + # [1.0, 0.0, 0.0, 0.0], + # [1.0, 0.0, 0.0, 0.0], + # [1.0, 0.0, 0.0, 0.0], + # [1.0, 0.0, 0.0, 0.0], + # [1.0, 0.0, 0.0, 0.0], + # [1.0, 0.0, 0.0, 0.0] ]]]) + #k = np.array([[[ [0.0, 0.0, 0.0, 0.0], + # [0.0, 0.0, 0.0, 0.0], + # [0.0, 0.0, 0.0, 0.0], + # [0.0, 0.0, 0.0, 0.0], + # [0.0, 0.0, 0.0, 0.0], + # [0.0, 0.0, 0.0, 0.0], + # [0.0, 0.0, 0.0, 0.0], + # [0.0, 0.0, 0.0, 0.0] ]]]) + #v = np.ones((1, 1, 8, 1)) + + q = np.random.rand(5, 7, 13, 3) + k = np.random.rand(5, 7, 13, 3) + v = np.random.rand(5, 7, 13, 11) + + with self.test_session() as session: + q_ = tf.constant(q) + k_ = tf.constant(k) + v_ = tf.constant(v) + y = common_attention.masked_local_attention_1d(q_, k_, v_, block_length=tf.constant(3)) + res = session.run(y) + self.assertEqual(res.shape, (5, 7, 13, 11)) + + +if __name__ == "__main__": + tf.test.main() From 1bf3b449d9bffb496075b5fdae63d2fd86e4db8b Mon Sep 17 00:00:00 2001 From: Martin Popel <popel@ufal.mff.cuni.cz> Date: Tue, 1 Aug 2017 11:19:46 +0200 Subject: [PATCH 0190/4095] typo --- tensor2tensor/data_generators/wmt.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensor2tensor/data_generators/wmt.py b/tensor2tensor/data_generators/wmt.py index 456f36321..bcd29e1d4 100644 --- a/tensor2tensor/data_generators/wmt.py +++ b/tensor2tensor/data_generators/wmt.py @@ -574,7 +574,7 @@ class WMTEnCsTokens32k(WMTProblem): """Problem spec for WMT English-Czech translation.""" @property - def target_vocab_size(self): + def targeted_vocab_size(self): return 2**15 # 32768 @property From 8839cf96bf42095780f1414852fb945ecd295de4 Mon Sep 17 00:00:00 2001 From: Martin Popel <popel@ufal.mff.cuni.cz> Date: Tue, 1 Aug 2017 11:20:55 +0200 Subject: [PATCH 0191/4095] better walkthrough README The default batch_size for transformer_base_single_gpu has been decreased to 2048 in transform.py (it used to be 4096 I think). So if it is too much, the user must use a smaller value. --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index edd6460d0..bb0f6f534 100644 --- a/README.md +++ b/README.md @@ -89,7 +89,7 @@ t2t-datagen \ --problem=$PROBLEM # Train -# * If you run out of memory, add --hparams='batch_size=2048' or even 1024. +# * If you run out of memory, add --hparams='batch_size=1024'. t2t-trainer \ --data_dir=$DATA_DIR \ --problems=$PROBLEM \ @@ -166,7 +166,7 @@ python -c "from tensor2tensor.models.transformer import Transformer" with `Modality` objects, which are specified per-feature in the dataset/task specification. * Support for multi-GPU machines and synchronous (1 master, many workers) and - asynchrounous (independent workers synchronizing through a parameter server) + asynchronous (independent workers synchronizing through a parameter server) [distributed training](https://github.com/tensorflow/tensor2tensor/tree/master/docs/distributed_training.md). * Easily swap amongst datasets and models by command-line flag with the data generation script `t2t-datagen` and the training script `t2t-trainer`. From 2ced78dbb2a9bb921ebd3e327c704efb790dc140 Mon Sep 17 00:00:00 2001 From: William <awwoof@hotmail.com> Date: Tue, 1 Aug 2017 11:08:24 +0100 Subject: [PATCH 0192/4095] Unify methods and started work on Bias --- tensor2tensor/models/common_attention.py | 207 ++++++++---------- tensor2tensor/models/common_attention_test.py | 62 ++++-- .../models/transformer_alternative.py | 7 +- 3 files changed, 138 insertions(+), 138 deletions(-) diff --git a/tensor2tensor/models/common_attention.py b/tensor2tensor/models/common_attention.py index abf989402..2004e1bac 100644 --- a/tensor2tensor/models/common_attention.py +++ b/tensor2tensor/models/common_attention.py @@ -345,24 +345,34 @@ def dot_product_attention(q, return tf.matmul(weights, v) -def masked_local_attention_1d( - q, k, v, block_length=128, mask_right=False, name=None): - """Attention to the source position and a neigborhood to the left of it. - The sequence is divided into blocks of length block_size. - Attention for a given query position can only see memory positions - less than or equal to the query position, in the corresponding block - and the previous block. +def local_attention_1d(q, k, v, bias=None, + block_length=128, look_right=True, use_whole_block=False, + truncate_bias=True, name=None): + """Attention to the source position and a neigborhood around it. - If mask_right is True, then a target position cannot see greater source + The sequence is divided into blocks of length block_size. Attention for a + given query position can only see memory positions within a certain number + of positions before and behind it. + + If look_right is True then each query will attend to block_length//2 + positions either side, otherwise it will attend to block_length previous positions. + If use_whole_block is True then no mask will be applied to the local blocks + meaning the full blocks are used (if look_right is True then the elements to + the right of the current position are still masked out). This allows use to + attend to more elements without additional overhead, but means we have + inconsistent window positions and sizes. + Args: - q: a Tensor with shape [batch, heads, length, depth_k] - k: a Tensor with shape [batch, heads, length, depth_k] - v: a Tensor with shape [batch, heads, length, depth_v] + q: a Tensor with shape [batch, heads, length_q, depth_k] + k: a Tensor with shape [batch, heads, length_kv, depth_k] + v: a Tensor with shape [batch, heads, length_kv, depth_v] + bias: Not currently used [batch, heads, length_q, length_k] block_length: an integer - mask_right: a bool + look_right: a bool + use_whole_block: a bool name: an optional string Returns: @@ -379,8 +389,9 @@ def masked_local_attention_1d( original_length = length - # If (length < 2 * block_length), then we use only one block. - block_length = tf.where(tf.less(length, block_length * 2), + #Pad to desired length + #If (length < 2 * block_length), then we use only one block. + block_length = tf.where(tf.less(length, block_length), length, block_length) padding_size = tf.mod(-length, block_length) length += padding_size @@ -389,25 +400,27 @@ def masked_local_attention_1d( padding = [[0, 0], [0, 0], [0, padding_size], [0, 0]] q = tf.pad(q, padding) - if mask_right: + if not look_right: #Add extra padding so we son't have to do an initial query extra_padding = [[0, 0], [0, 0], [block_length, padding_size], [0, 0]] + bp = [[0, 0], [0, 0], [0, padding_size], [block_length, padding_size]] else: #We shift everything over by half a block so query is in centre pad_right = block_length // 2 pad_left = block_length - pad_right extra_padding = [[0, 0], [0, 0], - [pad_left,padding_size+pad_right], [0, 0]] - + [pad_left, padding_size+pad_right], [0, 0]] + bp = [[0, 0], [0, 0], + [0, padding_size], [pad_left, padding_size+pad_right]] k = tf.pad(k, extra_padding) v = tf.pad(v, extra_padding) - - # compute attention for all subsequent query blocks. + # Reshape into blocks q = tf.reshape(q, [batch, heads, num_blocks, block_length, depth_k]) k = tf.reshape(k, [batch, heads, num_blocks+1, block_length, depth_k]) v = tf.reshape(v, [batch, heads, num_blocks+1, block_length, depth_v]) + # Get local blocks by slicing def local(x): """Create a local version of the keys or values.""" prev_block = tf.slice( @@ -415,108 +428,72 @@ def local(x): cur_block = tf.slice( x, [0, 0, 1, 0, 0], [-1, -1, -1, -1, -1]) return tf.concat([prev_block, cur_block], 3) - local_k = local(k) local_v = local(v) - local_length = tf.shape(local_k)[3] # [batch, heads, num_blocks, block_length, local_length] attention = tf.matmul(q, local_k, transpose_b=True) + + # Apply bias (N.B: This is not currently working) + if bias is not None: + with tf.name_scope('bias'): + b_batch = tf.shape(bias)[0] + b_heads = tf.shape(bias)[1] + bias_ = bias + #bias = 1.0 + tf.clip_by_value(bias, -1.0, 1.0) + if truncate_bias: + # Use only the query dimension + bias = tf.expand_dims(bias[:,:,:,0], 2) + bias = tf.pad(bias, extra_padding, name='bias_pad_b')# 17, 5, 3 + bias = tf.reshape(bias, + [b_batch, b_heads, 1, num_blocks+1, block_length], + name='divide_blocks') + local_b = tf.reshape(local(bias), + [b_batch, b_heads, num_blocks, 1, -1], name='reshape_local') + else: + bias = tf.pad(bias, bp, name='pad') + bias = tf.reshape(bias, + [b_batch, b_heads, num_blocks, block_length, + num_blocks+1, block_length], name='divide_blocks') + bias = tf.transpose(bias, [4,2,0,1,3,5]) + bias = tf.reshape(bias, + [num_blocks*(num_blocks+1), b_batch, b_heads, + block_length, block_length], name='combine') + indices = (num_blocks+1)*tf.range(num_blocks) + prev_block = tf.gather(bias, indices) + cur_block = tf.gather(bias, indices+num_blocks) + local_b = tf.concat([prev_block, cur_block], 4) + local_b = tf.transpose(local_b, [1,2,0,3,4]) + return l-local_b + attention += local_b + + attention = tf.nn.softmax(attention) + + # Get local mask + if not use_whole_block: + good_part = tf.matrix_band_part( + tf.ones([block_length, local_length]), 0, tf.to_int64(block_length)) + elif not look_right: + good_part = tf.matrix_band_part( + tf.ones([block_length, local_length]), -1, tf.to_int64(block_length)) + else: + good_part = tf.ones([block_length, local_length]) - good_part = tf.matrix_band_part( - tf.ones([block_length, local_length]), 0, tf.to_int64(block_length)) - - good_part = tf.cast(good_part, tf.float64) + #good_part = tf.cast(good_part, tf.float64) attention *= tf.reshape(good_part, [1, 1, 1, block_length, local_length]) - attention = tf.nn.softmax(attention) + output = tf.matmul(attention, local_v) output = tf.reshape(output, [batch, heads, -1, depth_v]) - # remove added padding + # Remove added padding output = tf.slice(output, [0, 0, 0, 0], [-1, -1, original_length, -1]) output.set_shape(v_shape) return output - -############################################################################### -### Not used, left in for reference ########################################### - -def windowed_local_attention_1d(q, - k, - v, - window_start, - window_end, - bias, - *args): - """ Local window wrapper for dot product attention. Each element only - attends to the elements from window_start to window_end. This reduces - the computational complexity for long sequences at the expense of eliminating - long-term dependencies. - - N.B: For short input sequences this is much slower than just using - un-windowed attention. Use only for long sequences. - - Args: - window_size: an integer - q: a Tensor with shape [batch, heads, length_q, depth_k] - k: a Tensor with shape [batch, heads, length_kv, depth_k] - v: a Tensor with shape [batch, heads, length_kv, depth_v] - window_start: an integer Tensor with shape [length_q] - window_end: an integer Tensor with shape [length_q] - bias: bias Tensor (see attention_bias()) - - Returns: - A Tensor. - """ - with tf.name_scope("windowed"): - - # Wrapper function for dot product attention with a single query vector - def single(index_begin, index_end, q, k, v, bias): - #Normalise range - #Reshape to right shape - q = tf.expand_dims(q, 2) - bias = tf.expand_dims(bias, 3) - #Get slices - k = k[:,:,index_begin:index_end,:] - v = v[:,:,index_begin:index_end,:] - out = dot_product_attention(q, k, v, *args) - out = tf.squeeze(out, 2) - return out - - # We'll loop over each element of q, computing its corresponding output. - q = tf.transpose(q, [2, 0, 1, 3]) - bias = tf.transpose(bias, [3, 0, 1, 2]) - indices = tf.range(tf.shape(q)[0]) - out = tf.map_fn( - lambda ii: single( - window_start[ii], - window_end[ii], - q[ii], - k, - v, - bias[ii]), - indices, - dtype=tf.float32) - out = tf.transpose(out, [1, 2, 0, 3]) - return out - -def local_sliding_window(length, window_size, look_right=True): - indices = tf.range(length) - size = window_size - starts = tf.maximum(0, indices-size) - ends = tf.minimum(length-1, indices+size) - return starts, ends - -### ### -############################################################################### - - - - def multihead_attention(query_antecedent, memory_antecedent, bias, @@ -527,7 +504,8 @@ def multihead_attention(query_antecedent, dropout_rate, summaries=False, image_shapes=None, - window_size=None, + attention_type="dot_product", + block_length=128, name=None): """Multihead scaled-dot-product attention with input/output transformations. @@ -540,9 +518,11 @@ def multihead_attention(query_antecedent, output_depth: an integer num_heads: an integer dividing total_key_depth and total_value_depth dropout_rate: a floating point number - summaries: a boolean - window_size: option size of window for attention. Useful only for very long - sequence lengths. + image_shapes: optional tuple of integer scalars. + see comments for attention_image_summary() + attention_type: a string, either "dot_product" or "local" or + "local_mask_right" + block_length: an integer - relevant for "local_mask_right" name: an optional string Returns: @@ -576,14 +556,15 @@ def multihead_attention(query_antecedent, v = split_heads(v, num_heads) key_depth_per_head = total_key_depth // num_heads q *= key_depth_per_head**-0.5 - if window_size is None: + if attention_type == "dot_product": x = dot_product_attention( - q, k, v, bias, dropout_rate, summaries, image_shapes) + q, k, v, bias, dropout_rate, image_shapes) + elif attention_type == "local": + x = local_attention_1d(q, k, v, block_length=block_length) else: - length = tf.shape(q)[2] - window_start, window_end = local_sliding_window(length, window_size) - x = windowed_local_attention_1d( - q, k, v, window_start, window_end, bias, dropout_rate, False) + assert attention_type == "local_mask_right" + x = local_attention_1d( + q, k, v, block_length=block_length, look_right=False) x = combine_heads(x) x = common_layers.conv1d(x, output_depth, 1, name="output_transform") return x diff --git a/tensor2tensor/models/common_attention_test.py b/tensor2tensor/models/common_attention_test.py index 14754794c..2e534ba1a 100644 --- a/tensor2tensor/models/common_attention_test.py +++ b/tensor2tensor/models/common_attention_test.py @@ -29,35 +29,53 @@ class CommonAttentionTest(tf.test.TestCase): def testLocalAttention(self): - #q = np.array([[[ [1.0, 0.0, 0.0, 0.0], - # [1.0, 0.0, 0.0, 0.0], - # [1.0, 0.0, 0.0, 0.0], - # [1.0, 0.0, 0.0, 0.0], - # [1.0, 0.0, 0.0, 0.0], - # [1.0, 0.0, 0.0, 0.0], - # [1.0, 0.0, 0.0, 0.0], - # [1.0, 0.0, 0.0, 0.0] ]]]) - #k = np.array([[[ [0.0, 0.0, 0.0, 0.0], - # [0.0, 0.0, 0.0, 0.0], - # [0.0, 0.0, 0.0, 0.0], - # [0.0, 0.0, 0.0, 0.0], - # [0.0, 0.0, 0.0, 0.0], - # [0.0, 0.0, 0.0, 0.0], - # [0.0, 0.0, 0.0, 0.0], - # [0.0, 0.0, 0.0, 0.0] ]]]) - #v = np.ones((1, 1, 8, 1)) + q = np.array([[[ [1.0, 0.0, 0.0, 0.0], + [1.0, 0.0, 0.0, 0.0], + [1.0, 0.0, 0.0, 0.0], + [1.0, 0.0, 0.0, 0.0], + [1.0, 0.0, 0.0, 0.0], + [1.0, 0.0, 0.0, 0.0], + [1.0, 0.0, 0.0, 0.0], + [1.0, 0.0, 0.0, 0.0] ]]]) - q = np.random.rand(5, 7, 13, 3) - k = np.random.rand(5, 7, 13, 3) - v = np.random.rand(5, 7, 13, 11) + k = np.array([[[ [1.0, 0.0, 0.0, 0.0], + [1.0, 0.0, 0.0, 0.0], + [1.0, 0.0, 0.0, 0.0], + [1.0, 0.0, 0.0, 0.0], + [1.0, 0.0, 0.0, 0.0], + [1.0, 0.0, 0.0, 0.0], + [1.0, 0.0, 0.0, 0.0], + [1.0, 0.0, 0.0, 0.0] ]]]) + + b = np.array([[[ [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], + [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], + [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], + [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], + [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], + [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], + [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], + [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] ]]]) + + #b = np.ones((1,1,8,8)) + #b = (1-b) * (-1e9) + v = np.ones((1, 1, 8, 1)) + + #q = np.random.rand(5, 7, 13, 3) + #k = np.random.rand(5, 7, 13, 3) + #v = np.random.rand(5, 7, 13, 11) + #b = np.random.rand(5, 1, 13, 1) with self.test_session() as session: q_ = tf.constant(q) k_ = tf.constant(k) v_ = tf.constant(v) - y = common_attention.masked_local_attention_1d(q_, k_, v_, block_length=tf.constant(3)) + b_ = tf.constant(b) + y = common_attention.local_attention_1d(q_, k_, v_, b_, block_length=tf.constant(2)) res = session.run(y) - self.assertEqual(res.shape, (5, 7, 13, 11)) + #print(q) + #rint(k) + print(res) + #self.assertEqual(res.shape, (5, 7, 13, 11)) if __name__ == "__main__": diff --git a/tensor2tensor/models/transformer_alternative.py b/tensor2tensor/models/transformer_alternative.py index 5ea6942a4..d0e04f078 100644 --- a/tensor2tensor/models/transformer_alternative.py +++ b/tensor2tensor/models/transformer_alternative.py @@ -174,8 +174,9 @@ def alt_transformer_decoder(decoder_input, def bias_to_mask(bias): # We need masks of the form batch size x input sequences - # Biases seem to be of the form batch_size x 1 x input sequences x vec dim - # Squeeze out dim one, and get the first element of each vector + # Biases are of the form batch_size x num_heads x input sequences x + # output sequences. Squeeze out dim one, and get the first element of + # each vector. bias = tf.squeeze(bias, [1])[:,:,0] bias = - tf.clip_by_value(bias, -1.0, 1.0) @@ -189,7 +190,7 @@ def transformer_alt(): """Set of hyperparameters.""" hparams = transformer.transformer_base() hparams.batch_size = 2048 - hparams.num_hidden_layers = 3 + hparams.num_hidden_layers = 10 hparams.add_hparam("layers_per_layer", 4) hparams.add_hparam("composite_layer_type", "ravanbakhsh") #ravanbakhsh or reembedding #hparams.add_hparam("composite_layer_type", "reembedding") From 33d47f0ac2edf0a276ba7efc3bd7dfd893bdb9d1 Mon Sep 17 00:00:00 2001 From: Niki Parmar <nikip@google.com> Date: Thu, 27 Jul 2017 23:46:12 -0700 Subject: [PATCH 0193/4095] Add tests for dot product and local unmasked attention PiperOrigin-RevId: 163436731 --- .gitignore | 1 - README.md | 4 +- tensor2tensor/data_generators/wmt.py | 2 +- tensor2tensor/models/common_attention.py | 244 ++++++++++-------- tensor2tensor/models/common_attention_test.py | 84 +++--- tensor2tensor/models/common_layers.py | 146 ++++------- .../models/transformer_alternative.py | 90 +++---- 7 files changed, 265 insertions(+), 306 deletions(-) diff --git a/.gitignore b/.gitignore index fbd98dca5..c9dd3db88 100644 --- a/.gitignore +++ b/.gitignore @@ -10,7 +10,6 @@ _pycache__/ # PyPI distribution artifacts. build/ dist/ -data/ # Sublime project files *.sublime-project diff --git a/README.md b/README.md index bb0f6f534..edd6460d0 100644 --- a/README.md +++ b/README.md @@ -89,7 +89,7 @@ t2t-datagen \ --problem=$PROBLEM # Train -# * If you run out of memory, add --hparams='batch_size=1024'. +# * If you run out of memory, add --hparams='batch_size=2048' or even 1024. t2t-trainer \ --data_dir=$DATA_DIR \ --problems=$PROBLEM \ @@ -166,7 +166,7 @@ python -c "from tensor2tensor.models.transformer import Transformer" with `Modality` objects, which are specified per-feature in the dataset/task specification. * Support for multi-GPU machines and synchronous (1 master, many workers) and - asynchronous (independent workers synchronizing through a parameter server) + asynchrounous (independent workers synchronizing through a parameter server) [distributed training](https://github.com/tensorflow/tensor2tensor/tree/master/docs/distributed_training.md). * Easily swap amongst datasets and models by command-line flag with the data generation script `t2t-datagen` and the training script `t2t-trainer`. diff --git a/tensor2tensor/data_generators/wmt.py b/tensor2tensor/data_generators/wmt.py index bcd29e1d4..456f36321 100644 --- a/tensor2tensor/data_generators/wmt.py +++ b/tensor2tensor/data_generators/wmt.py @@ -574,7 +574,7 @@ class WMTEnCsTokens32k(WMTProblem): """Problem spec for WMT English-Czech translation.""" @property - def targeted_vocab_size(self): + def target_vocab_size(self): return 2**15 # 32768 @property diff --git a/tensor2tensor/models/common_attention.py b/tensor2tensor/models/common_attention.py index 94d75b48d..95e982790 100644 --- a/tensor2tensor/models/common_attention.py +++ b/tensor2tensor/models/common_attention.py @@ -344,33 +344,23 @@ def dot_product_attention(q, return tf.matmul(weights, v) -def local_attention_1d(q, k, v, bias=None, - block_length=128, look_right=True, use_whole_block=False, - truncate_bias=True, name=None): - """Attention to the source position and a neigborhood around it. +def masked_local_attention_1d( + q, k, v, block_length=128, name=None): + """Attention to the source position and a neigborhood to the left of it. - The sequence is divided into blocks of length block_size. Attention for a - given query position can only see memory positions within a certain number - of positions before and behind it. + The sequence is divided into blocks of length block_size. + Attention for a given query position can only see memory positions + less than or equal to the query position, in the corresponding block + and the previous block. - If look_right is True then each query will attend to block_length//2 - positions either side, otherwise it will attend to block_length previous + If mask_right is True, then a target position cannot see greater source positions. - If use_whole_block is True then no mask will be applied to the local blocks - meaning the full blocks are used (if look_right is True then the elements to - the right of the current position are still masked out). This allows use to - attend to more elements without additional overhead, but means we have - inconsistent window positions and sizes. - Args: - q: a Tensor with shape [batch, heads, length_q, depth_k] - k: a Tensor with shape [batch, heads, length_kv, depth_k] - v: a Tensor with shape [batch, heads, length_kv, depth_v] - bias: Not currently used [batch, heads, length_q, length_k] + q: a Tensor with shape [batch, heads, length, depth_k] + k: a Tensor with shape [batch, heads, length, depth_k] + v: a Tensor with shape [batch, heads, length, depth_v] block_length: an integer - look_right: a bool - use_whole_block: a bool name: an optional string Returns: @@ -382,110 +372,146 @@ def local_attention_1d(q, k, v, bias=None, batch = tf.shape(q)[0] heads = tf.shape(q)[1] length = tf.shape(q)[2] + # If (length < 2 * block_length), then we use only one block. + block_length = tf.where(tf.less(length, block_length * 2), + length, block_length) depth_k = tf.shape(q)[3] depth_v = tf.shape(v)[3] - original_length = length - - #Pad to desired length - #If (length < block_length), then we use only one block. - block_length = tf.where(tf.less(length, block_length), - length, block_length) padding_size = tf.mod(-length, block_length) length += padding_size - num_blocks = tf.div(length, block_length) - padding = [[0, 0], [0, 0], [0, padding_size], [0, 0]] q = tf.pad(q, padding) + k = tf.pad(k, padding) + v = tf.pad(v, padding) + num_blocks = tf.div(length, block_length) - if not look_right: - #Add extra padding so we son't have to do an initial query - extra_padding = [[0, 0], [0, 0], [block_length, padding_size], [0, 0]] - bp = [[0, 0], [0, 0], [0, padding_size], [block_length, padding_size]] - else: - #We shift everything over by half a block so query is in centre - pad_right = block_length // 2 - pad_left = block_length - pad_right - extra_padding = [[0, 0], [0, 0], - [pad_left, padding_size+pad_right], [0, 0]] - bp = [[0, 0], [0, 0], - [0, padding_size], [pad_left, padding_size+pad_right]] - k = tf.pad(k, extra_padding) - v = tf.pad(v, extra_padding) - - # Reshape into blocks + # compute attention for the first query block. + first_q = tf.slice(q, [0, 0, 0, 0], [-1, -1, block_length, -1]) + first_k = tf.slice(k, [0, 0, 0, 0], [-1, -1, block_length, -1]) + first_v = tf.slice(v, [0, 0, 0, 0], [-1, -1, block_length, -1]) + first_output = dot_product_attention( + first_q, first_k, first_v, attention_bias_lower_triangle(block_length), + name="fist_block") + + # compute attention for all subsequent query blocks. q = tf.reshape(q, [batch, heads, num_blocks, block_length, depth_k]) - k = tf.reshape(k, [batch, heads, num_blocks+1, block_length, depth_k]) - v = tf.reshape(v, [batch, heads, num_blocks+1, block_length, depth_v]) + k = tf.reshape(k, [batch, heads, num_blocks, block_length, depth_k]) + v = tf.reshape(v, [batch, heads, num_blocks, block_length, depth_v]) - # Get local blocks by slicing def local(x): """Create a local version of the keys or values.""" prev_block = tf.slice( - x, [0, 0, 0, 0, 0], [-1, -1, num_blocks, -1, -1]) + x, [0, 0, 0, 0, 0], [-1, -1, num_blocks - 1, -1, -1]) cur_block = tf.slice( x, [0, 0, 1, 0, 0], [-1, -1, -1, -1, -1]) return tf.concat([prev_block, cur_block], 3) local_k = local(k) local_v = local(v) - local_length = tf.shape(local_k)[3] + tail_q = tf.slice(q, [0, 0, 1, 0, 0], [-1, -1, -1, -1, -1]) - # [batch, heads, num_blocks, block_length, local_length] - attention = tf.matmul(q, local_k, transpose_b=True) - - # Apply bias (N.B: This is not currently working) - if bias is not None: - with tf.name_scope('bias'): - b_batch = tf.shape(bias)[0] - b_heads = tf.shape(bias)[1] - bias_ = bias - #bias = 1.0 + tf.clip_by_value(bias, -1.0, 1.0) - if truncate_bias: - # Use only the query dimension - bias = tf.expand_dims(bias[:,:,:,0], 2) - bias = tf.pad(bias, extra_padding, name='bias_pad_b')# 17, 5, 3 - bias = tf.reshape(bias, - [b_batch, b_heads, 1, num_blocks+1, block_length], - name='divide_blocks') - local_b = tf.reshape(local(bias), - [b_batch, b_heads, num_blocks, 1, -1], name='reshape_local') - else: - bias = tf.pad(bias, bp, name='pad') - bias = tf.reshape(bias, - [b_batch, b_heads, num_blocks, block_length, - num_blocks+1, block_length], name='divide_blocks') - bias = tf.transpose(bias, [4,2,0,1,3,5]) - bias = tf.reshape(bias, - [num_blocks*(num_blocks+1), b_batch, b_heads, - block_length, block_length], name='combine') - indices = (num_blocks+1)*tf.range(num_blocks) - prev_block = tf.gather(bias, indices) - cur_block = tf.gather(bias, indices+num_blocks) - local_b = tf.concat([prev_block, cur_block], 4) - local_b = tf.transpose(local_b, [1,2,0,3,4]) - return l-local_b - attention += local_b - - attention = tf.nn.softmax(attention) - - # Get local mask - if not use_whole_block: - good_part = tf.matrix_band_part( - tf.ones([block_length, local_length]), 0, tf.to_int64(block_length)) - elif not look_right: - good_part = tf.matrix_band_part( - tf.ones([block_length, local_length]), -1, tf.to_int64(block_length)) - else: - good_part = tf.ones([block_length, local_length]) + local_length = tf.shape(local_k)[3] - #good_part = tf.cast(good_part, tf.float64) - attention *= tf.reshape(good_part, [1, 1, 1, block_length, local_length]) + # [batch, heads, num_blocks - 1, block_length, local_length] + attention = tf.matmul(tail_q, local_k, transpose_b=True) - + # make sure source_pos <= target_pos + good_part = tf.matrix_band_part( + tf.ones([block_length, local_length]), -1, tf.to_int64(block_length)) + mask = (1.0 - good_part) * -1e9 + attention += tf.reshape(mask, [1, 1, 1, block_length, local_length]) + attention = tf.nn.softmax(attention) + # TODO(noam): figure out how to show a summary for the remaining blocks. + # The naive way currently causes errors due to empty tensors. + # output: [batch, heads, num_blocks-1, block_length, depth_v] output = tf.matmul(attention, local_v) output = tf.reshape(output, [batch, heads, -1, depth_v]) + output = tf.concat([first_output, output], axis=2) + output = tf.slice(output, [0, 0, 0, 0], [-1, -1, original_length, -1]) + output.set_shape(v_shape) + return output + + +def unmasked_local_attention_1d(q, k, v, block_length=128, filter_width=100, + name=None): + """strided block local self-attention. + + Args: + q: a Tensor with shape [batch, heads, length, depth_k] + k: a Tensor with shape [batch, heads, length, depth_k] + v: a Tensor with shape [batch, heads, length, depth_v] + block_length: an integer + filter_width: an integer indicating how much to look left. + name: an optional string - # Remove added padding + Returns: + a Tensor of shape [batch, heads, length, depth_v] + """ + with tf.variable_scope(name, default_name="local_self_attention_1d", + values=[q, k, v]): + v_shape = v.get_shape() + depth_v = tf.shape(v)[3] + batch_size = tf.shape(q)[0] + num_heads = tf.shape(q)[1] + original_length = tf.shape(q)[2] + # making sure q is a multiple of d + def pad_to_multiple(x, pad_length): + x_length = tf.shape(x)[2] + return tf.pad(x, [[0, 0], [0, 0], [0, -x_length % pad_length], [0, 0]]) + def pad_l_and_r(x, pad_length): + return tf.pad(x, [[0, 0], [0, 0], [pad_length, pad_length], [0, 0]]) + q = pad_to_multiple(q, block_length) + k = pad_to_multiple(k, block_length) + v = pad_to_multiple(v, block_length) + + # Setting up q blocks + new_q_shape = tf.shape(q) + # Setting up q blocks + q = tf.reshape(q, [new_q_shape[0], new_q_shape[1], + new_q_shape[2]//block_length, + block_length, new_q_shape[3]]) + + # Setting up k and v values + k = pad_l_and_r(k, filter_width) + v = pad_l_and_r(v, filter_width) + + length = tf.shape(k)[2] + full_filter_width = block_length + 2*filter_width + # getting gather indices + indices = tf.range(0, length, delta=1, name="index_range") + # making indices [1, length, 1] to appy convs + indices = tf.reshape(indices, [1, -1, 1]) + kernel = tf.expand_dims(tf.eye(full_filter_width), axis=1) + gather_indices = tf.nn.conv1d( + tf.cast(indices, tf.float32), + kernel, + block_length, + padding="VALID", + name="gather_conv") + + gather_indices = tf.squeeze(tf.cast(gather_indices, tf.int32), axis=0) + + # [length, batch, heads, dim] + k_t = tf.transpose(k, [2, 0, 1, 3]) + k_new = tf.gather(k_t, gather_indices) + + # [batch, heads, blocks, block_length, dim] + k_new = tf.transpose(k_new, [2, 3, 0, 1, 4]) + + attention_bias = tf.expand_dims( + tf.to_float(embedding_to_padding(k_new)) * -1e9, axis=-2) + + v_t = tf.transpose(v, [2, 0, 1, 3]) + v_new = tf.gather(v_t, gather_indices) + v_new = tf.transpose(v_new, [2, 3, 0, 1, 4]) + + logits = tf.matmul(q, k_new, transpose_b=True) + + attention = tf.nn.softmax(logits+attention_bias) + output = tf.matmul(attention, v_new) + + output = tf.reshape(output, [batch_size, num_heads, -1, depth_v]) + # Remove the padding if introduced output = tf.slice(output, [0, 0, 0, 0], [-1, -1, original_length, -1]) output.set_shape(v_shape) return output @@ -502,6 +528,7 @@ def multihead_attention(query_antecedent, image_shapes=None, attention_type="dot_product", block_length=128, + block_width=128, name=None): """Multihead scaled-dot-product attention with input/output transformations. @@ -516,9 +543,10 @@ def multihead_attention(query_antecedent, dropout_rate: a floating point number image_shapes: optional tuple of integer scalars. see comments for attention_image_summary() - attention_type: a string, either "dot_product" or "local" or - "local_mask_right" + attention_type: a string, either "dot_product" or "local_mask_right" or + "local_unmasked" block_length: an integer - relevant for "local_mask_right" + block_width: an integer - relevant for "local_unmasked" name: an optional string Returns: @@ -566,12 +594,12 @@ def multihead_attention(query_antecedent, if attention_type == "dot_product": x = dot_product_attention( q, k, v, bias, dropout_rate, image_shapes) - elif attention_type == "local": - x = local_attention_1d(q, k, v, block_length=block_length) + elif attention_type == "local_mask_right": + x = masked_local_attention_1d(q, k, v, block_length=block_length) else: - assert attention_type == "local_mask_right" - x = local_attention_1d( - q, k, v, block_length=block_length, look_right=False) + assert attention_type == "local_unmasked" + x = unmasked_local_attention_1d(q, k, v, block_length=block_length, + filter_width=block_width) x = combine_heads(x) x = common_layers.conv1d(x, output_depth, 1, name="output_transform") return x diff --git a/tensor2tensor/models/common_attention_test.py b/tensor2tensor/models/common_attention_test.py index 2e534ba1a..78be4b645 100644 --- a/tensor2tensor/models/common_attention_test.py +++ b/tensor2tensor/models/common_attention_test.py @@ -1,4 +1,5 @@ -# Copyright 2017 Google Inc. +# coding=utf-8 +# Copyright 2017 The Tensor2Tensor Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,7 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -"""Tests for common layers.""" +"""Tests for common attention.""" from __future__ import absolute_import from __future__ import division @@ -28,54 +29,43 @@ class CommonAttentionTest(tf.test.TestCase): - def testLocalAttention(self): - q = np.array([[[ [1.0, 0.0, 0.0, 0.0], - [1.0, 0.0, 0.0, 0.0], - [1.0, 0.0, 0.0, 0.0], - [1.0, 0.0, 0.0, 0.0], - [1.0, 0.0, 0.0, 0.0], - [1.0, 0.0, 0.0, 0.0], - [1.0, 0.0, 0.0, 0.0], - [1.0, 0.0, 0.0, 0.0] ]]]) - - k = np.array([[[ [1.0, 0.0, 0.0, 0.0], - [1.0, 0.0, 0.0, 0.0], - [1.0, 0.0, 0.0, 0.0], - [1.0, 0.0, 0.0, 0.0], - [1.0, 0.0, 0.0, 0.0], - [1.0, 0.0, 0.0, 0.0], - [1.0, 0.0, 0.0, 0.0], - [1.0, 0.0, 0.0, 0.0] ]]]) - - b = np.array([[[ [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], - [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], - [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], - [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], - [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], - [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], - [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], - [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] ]]]) - - #b = np.ones((1,1,8,8)) - #b = (1-b) * (-1e9) - v = np.ones((1, 1, 8, 1)) + def testDotProductAttention(self): + x = np.random.rand(5, 7, 12, 32) + y = np.random.rand(5, 7, 12, 32) + with self.test_session() as session: + a = common_attention.dot_product_attention( + tf.constant(x, dtype=tf.float32), + tf.constant(y, dtype=tf.float32), + tf.constant(y, dtype=tf.float32), None) + session.run(tf.global_variables_initializer()) + res = session.run(a) + self.assertEqual(res.shape, (5, 7, 12, 32)) - #q = np.random.rand(5, 7, 13, 3) - #k = np.random.rand(5, 7, 13, 3) - #v = np.random.rand(5, 7, 13, 11) - #b = np.random.rand(5, 1, 13, 1) + def testLocalUnmaskedAttention(self): + x = np.random.rand(5, 4, 25, 16) + y = np.random.rand(5, 4, 25, 16) + with self.test_session() as session: + a = common_attention.unmasked_local_attention_1d( + tf.constant(x, dtype=tf.float32), + tf.constant(y, dtype=tf.float32), + tf.constant(y, dtype=tf.float32), + block_length=4, filter_width=3) + session.run(tf.global_variables_initializer()) + res = session.run(a) + self.assertEqual(res.shape, (5, 4, 25, 16)) + def testLocalUnmaskedAttentionMatchingBlockLength(self): + x = np.random.rand(5, 4, 25, 16) + y = np.random.rand(5, 4, 25, 16) with self.test_session() as session: - q_ = tf.constant(q) - k_ = tf.constant(k) - v_ = tf.constant(v) - b_ = tf.constant(b) - y = common_attention.local_attention_1d(q_, k_, v_, b_, block_length=tf.constant(2)) - res = session.run(y) - #print(q) - #rint(k) - print(res) - #self.assertEqual(res.shape, (5, 7, 13, 11)) + a = common_attention.unmasked_local_attention_1d( + tf.constant(x, dtype=tf.float32), + tf.constant(y, dtype=tf.float32), + tf.constant(y, dtype=tf.float32), + block_length=5, filter_width=3) + session.run(tf.global_variables_initializer()) + res = session.run(a) + self.assertEqual(res.shape, (5, 4, 25, 16)) if __name__ == "__main__": diff --git a/tensor2tensor/models/common_layers.py b/tensor2tensor/models/common_layers.py index ae6d0cede..e98531d88 100644 --- a/tensor2tensor/models/common_layers.py +++ b/tensor2tensor/models/common_layers.py @@ -1420,22 +1420,22 @@ def smoothing_cross_entropy(logits, labels, vocab_size, confidence): return xentropy - normalizing +def global_pool_1d(inputs, pooling_type="MAX", mask=None): + """Pool elements across the last dimension. -def global_pool_1d(inputs, pooling_type='MAX', mask=None): - """ - Pools elements across the last dimension. Useful to a list of vectors into a - single vector to get a representation of a set. - Concatenating - - Args - inputs: A tensor of dimensions batch_size x sequence_length x input_dims - containing the sequences of input vectors. - pooling_type: the pooling type to use, MAX or AVR - mask: A tensor of dimensions batch_size x sequence_length containing a - mask for the inputs with 1's for existing elements, and 0's elsewhere. - Returns - output: A tensor of dimensions batch_size x input_dims - dimension containing the sequences of transformed vectors. + Useful to convert a list of vectors into a single vector so as + to get a representation of a set. + + Args: + inputs: A tensor of dimensions batch_size x sequence_length x input_dims + containing the sequences of input vectors. + pooling_type: the pooling type to use, MAX or AVR + mask: A tensor of dimensions batch_size x sequence_length containing a + mask for the inputs with 1's for existing elements, and 0's elsewhere. + + Returns: + output: A tensor of dimensions batch_size x input_dims + dimension containing the sequences of transformed vectors. """ with tf.name_scope("global_pool", [inputs]): if mask is not None: @@ -1457,38 +1457,6 @@ def global_pool_1d(inputs, pooling_type='MAX', mask=None): return output - -def running_global_pool_1d(inputs, pooling_type='MAX'): - """ - Same global pool, but only for the elements up to the current element. Useful - for outputs where the state of future elements is not known. - Takes no mask as all elements up to the current element are assumed to exist. - Currently only supports maximum. Equivalent to using a lower triangle bias. - - Args - inputs: A tensor of dimensions batch_size x sequence_length x input_dims - containing the sequences of input vectors. - pooling_type: Pooling type to use. Currently only supports 'MAX'. - Returns - output: A tensor of dimensions batch_size x sequence_length x input_dims - dimension containing the running 'totals'. - """ - - with tf.name_scope("running_global_pool", [inputs]): - scan_fct = tf.maximum - - # Permute inputs so seq_length is first - elems = tf.transpose(inputs, [1, 0, 2]) - - # Perform scan - cumulatives = tf.scan(scan_fct, elems, swap_memory=True) - - # Permute output to get back to original order - output = tf.transpose(cumulatives, [1, 0, 2]) - - return output - - def linear_set_layer(layer_size, inputs, context=None, @@ -1502,19 +1470,21 @@ def linear_set_layer(layer_size, e.g. One can use global_pool_1d to get a representation of the set which can then be used as the context for the next layer. - Args - layer_size: Dimension to transform the input vectors to - inputs: A tensor of dimensions batch_size x sequence_length x input_dims - containing the sequences of input vectors. - context: A tensor of dimensions batch_size x context_dims or batch_size x - sequence_length x context_dims containing a global statistic about the - set. - dropout: Dropout probability. - activation_fn: The activation function to use. - Returns - output: A tensor of dimensions batch_size x sequence_length x output_dims - dimension containing the sequences of transformed vectors. + TODO: Add bias add (or control the biases used). + Args: + layer_size: Dimension to transform the input vectors to. + inputs: A tensor of dimensions batch_size x sequence_length x input_dims + containing the sequences of input vectors. + context: A tensor of dimensions batch_size x context_dims + containing a global statistic about the set. + activation_fn: The activation function to use. + dropout: Dropout probability. + name: name. + + Returns: + output: A tensor of dimensions batch_size x sequence_length x output_dims + dimension containing the sequences of transformed vectors. """ with tf.variable_scope(name, "linear_set_layer", [inputs]): # Apply 1D convolution to apply linear filter to each element @@ -1524,12 +1494,10 @@ def linear_set_layer(layer_size, # Apply the context if it exists. if context is not None: # Unfortunately tf doesn't support broadcasting via concat, but we can - # simply add the transformed context to get the same effect - if len(context.get_shape().as_list())==2: - context = tf.expand_dims(context, axis=1) - #context_size = context.get_shape().as_list()[-1] - cont_tfm = conv1d(context, layer_size, 1, - activation=None, name="cont_conv") + # simply add the transformed context to get the same effect. + context = tf.expand_dims(context, axis=1) + cont_tfm = conv1d( + context, layer_size, 1, activation=None, name="cont_conv") outputs += cont_tfm if activation_fn is not None: @@ -1544,7 +1512,6 @@ def linear_set_layer(layer_size, def ravanbakhsh_set_layer(layer_size, inputs, mask=None, - sequential=False, activation_fn=tf.nn.tanh, dropout=0.0, name=None): @@ -1552,35 +1519,26 @@ def ravanbakhsh_set_layer(layer_size, More parameter-efficient verstion of a linear-set-layer with context. - Args - layer_size: Dimension to transform the input vectors to. - inputs: A tensor of dimensions batch_size x sequence_length x vector - containing the sequences of input vectors. - mask: A tensor of dimensions batch_size x sequence_length containing a - mask for the inputs with 1's for existing elements, and 0's elsewhere. - sequential: If true, will use a running global pool so each element will - only depend on those before it. Set true if this layer is being used in - an ouput sequence. - Returns - output: A tensor of dimensions batch_size x sequence_length x vector - dimension containing the sequences of transformed vectors. + Args: + layer_size: Dimension to transform the input vectors to. + inputs: A tensor of dimensions batch_size x sequence_length x vector + containing the sequences of input vectors. + mask: A tensor of dimensions batch_size x sequence_length containing a + mask for the inputs with 1's for existing elements, and 0's elsewhere. + activation_fn: The activation function to use. + dropout: dropout. + name: name. + + Returns: + output: A tensor of dimensions batch_size x sequence_length x vector + dimension containing the sequences of transformed vectors. """ with tf.variable_scope(name, "ravanbakhsh_set_layer", [inputs]): - - if sequential: - output = linear_set_layer( - layer_size, - inputs - running_global_pool_1d(inputs), - activation_fn=activation_fn, - name=name) - else: - output = linear_set_layer( - layer_size, - inputs - tf.expand_dims(global_pool_1d(inputs, mask=mask), axis=1), - activation_fn=activation_fn, - name=name) - - return output - + output = linear_set_layer( + layer_size, + inputs - tf.expand_dims(global_pool_1d(inputs, mask=mask), axis=1), + activation_fn=activation_fn, + dropout=dropout, + name=name) return output diff --git a/tensor2tensor/models/transformer_alternative.py b/tensor2tensor/models/transformer_alternative.py index 78398471a..62413c325 100644 --- a/tensor2tensor/models/transformer_alternative.py +++ b/tensor2tensor/models/transformer_alternative.py @@ -50,13 +50,17 @@ def model_fn_body(self, features): inputs = common_layers.flatten4d3d(inputs) targets = common_layers.flatten4d3d(targets) + (encoder_input, encoder_attention_bias, + _) = transformer.transformer_prepare_encoder(inputs, target_space, hparams) + (decoder_input, + decoder_self_attention_bias) = transformer.transformer_prepare_decoder( + targets, hparams) - (encoder_input, encoder_attention_bias, _) = (transformer.\ - transformer_prepare_encoder(inputs, target_space, hparams) ) - (decoder_input, decoder_self_attention_bias) = transformer.\ - transformer_prepare_decoder(targets, hparams) - - encoder_mask = bias_to_mask(encoder_attention_bias) + # We need masks of the form batch size x input sequences + # Biases seem to be of the form batch_size x 1 x input sequences x vec dim + # Squeeze out dim one, and get the first element of each vector. + encoder_mask = tf.squeeze(encoder_attention_bias, [1])[:, :, 0] + decoder_mask = tf.squeeze(decoder_self_attention_bias, [1])[:, :, 0] def residual_fn(x, y): return common_layers.layer_norm(x + tf.nn.dropout( @@ -64,20 +68,20 @@ def residual_fn(x, y): encoder_input = tf.nn.dropout(encoder_input, 1.0 - hparams.residual_dropout) decoder_input = tf.nn.dropout(decoder_input, 1.0 - hparams.residual_dropout) - encoder_output = alt_transformer_encoder( encoder_input, residual_fn, encoder_mask, hparams) decoder_output = alt_transformer_decoder( - decoder_input, encoder_output, residual_fn, + decoder_input, encoder_output, residual_fn, decoder_mask, encoder_attention_bias, hparams) decoder_output = tf.expand_dims(decoder_output, 2) return decoder_output - -def composite_layer(inputs, mask, hparams, for_output=False): + +def composite_layer(inputs, mask, hparams): + """Composite layer.""" x = inputs # Applies ravanbakhsh on top of each other. @@ -85,32 +89,28 @@ def composite_layer(inputs, mask, hparams, for_output=False): for layer in xrange(hparams.layers_per_layer): with tf.variable_scope(".%d" % layer): x = common_layers.ravanbakhsh_set_layer( - hparams.hidden_size, - x, - mask=mask, - sequential=for_output, - dropout=hparams.relu_dropout) - - # Transforms elements to get a context, and then uses this in a final layer + hparams.hidden_size, + x, + mask=mask, + dropout=0.0) + + # Transforms elements to get a context, and then uses this in a final layer. elif hparams.composite_layer_type == "reembedding": # Transform elements n times and then pool. for layer in xrange(hparams.layers_per_layer): - with tf.variable_scope("sub_layer_%d" % layer): + with tf.variable_scope(".%d" % layer): x = common_layers.linear_set_layer( - hparams.hidden_size, - x, - dropout=hparams.relu_dropout) - if for_output: - context = common_layers.running_global_pool_1d(x) - else: - context = common_layers.global_pool_1d(x, mask=mask) - - #Final layer - x = common_layers.linear_set_layer( hparams.hidden_size, x, - context=context, - dropout=hparams.relu_dropout) + dropout=0.0) + context = common_layers.global_pool_1d(x, mask=mask) + + # Final layer. + x = common_layers.linear_set_layer( + hparams.hidden_size, + x, + context=context, + dropout=0.0) return x @@ -120,12 +120,10 @@ def alt_transformer_encoder(encoder_input, mask, hparams, name="encoder"): - """Alternative encoder.""" x = encoder_input with tf.variable_scope(name): - x = encoder_input for layer in xrange(hparams.num_hidden_layers): with tf.variable_scope("layer_%d" % layer): x = residual_fn(x, composite_layer(x, mask, hparams)) @@ -136,12 +134,14 @@ def alt_transformer_encoder(encoder_input, def alt_transformer_decoder(decoder_input, encoder_output, residual_fn, + mask, encoder_decoder_attention_bias, hparams, name="decoder"): + """Alternative decoder.""" + x = decoder_input with tf.variable_scope(name): - x = decoder_input for layer in xrange(hparams.num_hidden_layers): with tf.variable_scope("layer_%d" % layer): @@ -156,33 +156,17 @@ def alt_transformer_decoder(decoder_input, hparams.attention_dropout, name="encdec_attention") - x_ = residual_fn(x_, composite_layer(x_, None, hparams, for_output=True)) + x_ = residual_fn(x_, composite_layer(x_, mask, hparams)) x = residual_fn(x, x_) - - return x - - -def bias_to_mask(bias): - # We need masks of the form batch size x input sequences - # Biases are of the form batch_size x num_heads x input sequences x - # output sequences. Squeeze out dim one, and get the first element of - # each vector. - - bias = tf.squeeze(bias, [1])[:,:,0] - bias = - tf.clip_by_value(bias, -1.0, 1.0) - mask = 1 - bias - return mask + return x @registry.register_hparams def transformer_alt(): """Set of hyperparameters.""" hparams = transformer.transformer_base() - hparams.batch_size = 2048 - hparams.num_hidden_layers = 10 + hparams.batch_size = 64 hparams.add_hparam("layers_per_layer", 4) - hparams.add_hparam("composite_layer_type", "ravanbakhsh") #ravanbakhsh or reembedding - #hparams.add_hparam("composite_layer_type", "reembedding") - + hparams.add_hparam("composite_layer_type", "reembedding") return hparams From 7db5ee881c6e9b961ff83fd316c6e983951fdf76 Mon Sep 17 00:00:00 2001 From: Ashish Vaswani <avaswani@google.com> Date: Fri, 28 Jul 2017 14:12:04 -0700 Subject: [PATCH 0194/4095] Forgot to change trainer_utils. Added an option for "label" input type and printing out image decodes without decoding with the vocabulary. PiperOrigin-RevId: 163516796 --- tensor2tensor/utils/trainer_utils.py | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/tensor2tensor/utils/trainer_utils.py b/tensor2tensor/utils/trainer_utils.py index 260ec6a00..91a638529 100644 --- a/tensor2tensor/utils/trainer_utils.py +++ b/tensor2tensor/utils/trainer_utils.py @@ -130,6 +130,7 @@ "<beam1>\t<beam2>..\t<input>") flags.DEFINE_integer("decode_max_input_size", -1, "Maximum number of ids in input. Or <= 0 for no max.") +flags.DEFINE_bool("identity_output", False, "To print the output as identity") def _save_until_eos(hyp): @@ -766,8 +767,11 @@ def decode_interactively(estimator): else: tf.logging.info(beam_string) else: - tf.logging.info( - targets_vocab.decode(_save_until_eos(result["outputs"].flatten()))) + if FLAGS.identity_output: + tf.logging.info(" ".join(map(str, result["outputs"].flatten()))) + else: + tf.logging.info(targets_vocab.decode(_save_until_eos( + result["outputs"].flatten()))) def _decode_batch_input_fn(problem_id, num_decode_batches, sorted_inputs, @@ -843,7 +847,7 @@ def _interactive_input_fn(hparams): const_array_size = 10000 while True: prompt = ("INTERACTIVE MODE num_samples=%d decode_length=%d \n" - " it=<input_type> ('text' or 'image')\n" + " it=<input_type> ('text' or 'image' or 'label')\n" " pr=<problem_num> (set the problem number)\n" " in=<input_problem> (set the input problem number)\n" " ou=<output_problem> (set the output problem number)\n" @@ -894,6 +898,13 @@ def _interactive_input_fn(hparams): "inputs": img, "problem_choice": np.array(problem_id) } + elif input_type == "label": + input_ids = [int(input_string)] + x = [num_samples, decode_length, len(input_ids)] + input_ids + yield problem_id, { + "inputs": np.array(x), + "problem_choice": np.array(problem_id) + } else: raise Exception("Unsupported input type.") From 0416d957cc04f6778cb18d11e4426522e1c69b1d Mon Sep 17 00:00:00 2001 From: T2T Team <no-reply@google.com> Date: Fri, 28 Jul 2017 14:54:37 -0700 Subject: [PATCH 0195/4095] Prevent extremely unlikely bug around escaping characters. PiperOrigin-RevId: 163522446 --- tensor2tensor/data_generators/text_encoder.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensor2tensor/data_generators/text_encoder.py b/tensor2tensor/data_generators/text_encoder.py index ff284bcc6..21215472d 100644 --- a/tensor2tensor/data_generators/text_encoder.py +++ b/tensor2tensor/data_generators/text_encoder.py @@ -54,7 +54,7 @@ # '\\' is converted to '\' # '\213;' is converted to unichr(213) _UNESCAPE_REGEX = re.compile(r"\\u|\\\\|\\([0-9]+);") -_ESCAPE_CHARS = set(u"\\_;0123456789") +_ESCAPE_CHARS = set(u"\\_u;0123456789") def native_to_unicode_py2(s): From d79d67ac04df89a2a161cf082083a56eb0aa33dd Mon Sep 17 00:00:00 2001 From: Lukasz Kaiser <lukaszkaiser@google.com> Date: Sun, 30 Jul 2017 22:37:10 -0700 Subject: [PATCH 0196/4095] Correct generator, refactor T2TModel.model_fn to return a dict of losses. PiperOrigin-RevId: 163653918 --- tensor2tensor/data_generators/text_encoder.py | 2 +- tensor2tensor/data_generators/wmt.py | 5 +- tensor2tensor/models/bluenet_test.py | 2 +- tensor2tensor/models/bytenet_test.py | 2 +- tensor2tensor/models/common_hparams.py | 4 +- tensor2tensor/models/gene_expression_test.py | 4 +- tensor2tensor/models/lstm_test.py | 4 +- tensor2tensor/models/multimodel_test.py | 2 +- tensor2tensor/models/neural_gpu_test.py | 2 +- tensor2tensor/models/slicenet_test.py | 2 +- tensor2tensor/models/transformer.py | 16 +++--- tensor2tensor/models/transformer_test.py | 2 +- tensor2tensor/models/xception_test.py | 2 +- tensor2tensor/utils/t2t_model.py | 21 +++---- tensor2tensor/utils/trainer_utils.py | 55 +++++++++++-------- 15 files changed, 66 insertions(+), 59 deletions(-) diff --git a/tensor2tensor/data_generators/text_encoder.py b/tensor2tensor/data_generators/text_encoder.py index 21215472d..cd6ca0eea 100644 --- a/tensor2tensor/data_generators/text_encoder.py +++ b/tensor2tensor/data_generators/text_encoder.py @@ -427,7 +427,7 @@ def bisect(min_val, max_val): token_counts, present_count, num_iterations) # If min_val == max_val, we can't do any better than this. - if subtokenizer.vocab_size == target_size or min_val == max_val: + if subtokenizer.vocab_size == target_size or min_val >= max_val: return subtokenizer if subtokenizer.vocab_size > target_size: diff --git a/tensor2tensor/data_generators/wmt.py b/tensor2tensor/data_generators/wmt.py index 456f36321..4975971c6 100644 --- a/tensor2tensor/data_generators/wmt.py +++ b/tensor2tensor/data_generators/wmt.py @@ -271,7 +271,7 @@ def bi_vocabs_token_generator(source_path, # English-Czech datasets _ENCS_TRAIN_DATASETS = [ [ - "http://data.statmt.org/wmt17/translation-task/training-parallel-nc-v11.tgz", # pylint: disable=line-too-long + "http://data.statmt.org/wmt16/translation-task/training-parallel-nc-v11.tgz", # pylint: disable=line-too-long ("training-parallel-nc-v11/news-commentary-v11.cs-en.en", "training-parallel-nc-v11/news-commentary-v11.cs-en.cs") ], @@ -336,8 +336,7 @@ def _compile_data(tmp_dir, datasets, filename): lang1_filepath = os.path.join(tmp_dir, lang1_filename) lang2_filepath = os.path.join(tmp_dir, lang2_filename) - if not os.path.exists(compressed_filepath): - generator_utils.maybe_download(tmp_dir, compressed_filename, url) + generator_utils.maybe_download(tmp_dir, compressed_filename, url) if not (os.path.exists(lang1_filepath) and os.path.exists(lang2_filepath)): # For .tar.gz and .tgz files, we read compressed. diff --git a/tensor2tensor/models/bluenet_test.py b/tensor2tensor/models/bluenet_test.py index d4ce85b1a..70b8defe9 100644 --- a/tensor2tensor/models/bluenet_test.py +++ b/tensor2tensor/models/bluenet_test.py @@ -46,7 +46,7 @@ def testBlueNet(self): } model = bluenet.BlueNet( hparams, tf.contrib.learn.ModeKeys.TRAIN, p_hparams) - sharded_logits, _, _ = model.model_fn(features) + sharded_logits, _ = model.model_fn(features) logits = tf.concat(sharded_logits, 0) session.run(tf.global_variables_initializer()) res = session.run(logits) diff --git a/tensor2tensor/models/bytenet_test.py b/tensor2tensor/models/bytenet_test.py index 738b84251..536d348e7 100644 --- a/tensor2tensor/models/bytenet_test.py +++ b/tensor2tensor/models/bytenet_test.py @@ -45,7 +45,7 @@ def testByteNet(self): } model = bytenet.ByteNet( hparams, tf.contrib.learn.ModeKeys.TRAIN, p_hparams) - sharded_logits, _, _ = model.model_fn(features) + sharded_logits, _ = model.model_fn(features) logits = tf.concat(sharded_logits, 0) session.run(tf.global_variables_initializer()) res = session.run(logits) diff --git a/tensor2tensor/models/common_hparams.py b/tensor2tensor/models/common_hparams.py index e36b2e4e1..cf58b33e8 100644 --- a/tensor2tensor/models/common_hparams.py +++ b/tensor2tensor/models/common_hparams.py @@ -88,10 +88,10 @@ def basic_params1(): # modality, add an entry to this semicolon-separated string. Entries are # formatted "feature_name:modality_type:modality_name", e.g. # "inputs:image:small_image_modality;other_inputs:audio:identity". - input_modalities="", + input_modalities="default", # We don't use empty string in params. # To override the default target modality, specify # "modality_type:modality_name", e.g. "image:small_image_modality". - target_modality="") + target_modality="default") class RangedHParams(object): diff --git a/tensor2tensor/models/gene_expression_test.py b/tensor2tensor/models/gene_expression_test.py index bec5268fd..a43eda97a 100644 --- a/tensor2tensor/models/gene_expression_test.py +++ b/tensor2tensor/models/gene_expression_test.py @@ -55,8 +55,8 @@ def _testModel(self, hparams, model_cls): "targets": tf.constant(targets, dtype=tf.float32), } p_hparams, = hparams.problems - sharded_logits, _, _ = model_cls(hparams, tf.contrib.learn.ModeKeys.TRAIN, - p_hparams).model_fn(features) + sharded_logits, _ = model_cls(hparams, tf.contrib.learn.ModeKeys.TRAIN, + p_hparams).model_fn(features) logits = tf.concat(sharded_logits, 0) with self.test_session() as sess: diff --git a/tensor2tensor/models/lstm_test.py b/tensor2tensor/models/lstm_test.py index 1e542a666..31380afa5 100644 --- a/tensor2tensor/models/lstm_test.py +++ b/tensor2tensor/models/lstm_test.py @@ -46,7 +46,7 @@ def testLSTMSeq2Seq(self): } model = lstm.LSTMSeq2Seq( hparams, tf.contrib.learn.ModeKeys.TRAIN, p_hparams) - sharded_logits, _, _ = model.model_fn(features) + sharded_logits, _ = model.model_fn(features) logits = tf.concat(sharded_logits, 0) session.run(tf.global_variables_initializer()) res = session.run(logits) @@ -70,7 +70,7 @@ def testLSTMSeq2SeqAttention(self): } model = lstm.LSTMSeq2SeqAttention( hparams, tf.contrib.learn.ModeKeys.TRAIN, p_hparams) - sharded_logits, _, _ = model.model_fn(features) + sharded_logits, _ = model.model_fn(features) logits = tf.concat(sharded_logits, 0) session.run(tf.global_variables_initializer()) res = session.run(logits) diff --git a/tensor2tensor/models/multimodel_test.py b/tensor2tensor/models/multimodel_test.py index 03990594b..cf109beb4 100644 --- a/tensor2tensor/models/multimodel_test.py +++ b/tensor2tensor/models/multimodel_test.py @@ -45,7 +45,7 @@ def testMultiModel(self): } model = multimodel.MultiModel( hparams, tf.contrib.learn.ModeKeys.TRAIN, p_hparams) - sharded_logits, _, _ = model.model_fn(features) + sharded_logits, _ = model.model_fn(features) logits = tf.concat(sharded_logits, 0) session.run(tf.global_variables_initializer()) res = session.run(logits) diff --git a/tensor2tensor/models/neural_gpu_test.py b/tensor2tensor/models/neural_gpu_test.py index 3d1cc0562..46c01f403 100644 --- a/tensor2tensor/models/neural_gpu_test.py +++ b/tensor2tensor/models/neural_gpu_test.py @@ -52,7 +52,7 @@ def testNeuralGPU(self): } model = neural_gpu.NeuralGPU( hparams, tf.contrib.learn.ModeKeys.TRAIN, p_hparams) - shadred_logits, _, _ = model.model_fn(features) + shadred_logits, _ = model.model_fn(features) logits = tf.concat(shadred_logits, 0) session.run(tf.global_variables_initializer()) res = session.run(logits) diff --git a/tensor2tensor/models/slicenet_test.py b/tensor2tensor/models/slicenet_test.py index 692799571..54b57a9f2 100644 --- a/tensor2tensor/models/slicenet_test.py +++ b/tensor2tensor/models/slicenet_test.py @@ -45,7 +45,7 @@ def testSliceNet(self): } model = slicenet.SliceNet( hparams, tf.contrib.learn.ModeKeys.TRAIN, p_hparams) - sharded_logits, _, _ = model.model_fn(features) + sharded_logits, _ = model.model_fn(features) logits = tf.concat(sharded_logits, 0) session.run(tf.global_variables_initializer()) res = session.run(logits) diff --git a/tensor2tensor/models/transformer.py b/tensor2tensor/models/transformer.py index c45e88577..1d4ee04ed 100644 --- a/tensor2tensor/models/transformer.py +++ b/tensor2tensor/models/transformer.py @@ -23,8 +23,6 @@ from __future__ import division from __future__ import print_function -import copy - # Dependency imports from six.moves import xrange # pylint: disable=redefined-builtin @@ -43,8 +41,7 @@ class Transformer(t2t_model.T2TModel): """Attention net. See file docstring.""" def model_fn_body(self, features): - # Remove dropout if not training - hparams = copy.copy(self._hparams) + hparams = self._hparams targets = features["targets"] inputs = features["inputs"] target_space = features["target_space_id"] @@ -541,13 +538,16 @@ def transformer_parameter_attention_b(): return hparams -@registry.register_ranged_hparams("transformer_big_single_gpu") -def transformer_range1(rhp): +@registry.register_ranged_hparams("transformer_base") +def transformer_base_range(rhp): """Small range of hyperparameters.""" - hparams = transformer_big_single_gpu() + hparams = transformer_base() common_hparams.fill_ranged_hparams_from_hparams(hparams, rhp) - + # After starting from base, set intervals for some parameters. rhp.set_float("learning_rate", 0.3, 3.0, scale=rhp.LOG_SCALE) + rhp.set_discrete("learning_rate_warmup_steps", + [1000, 2000, 4000, 8000, 16000]) rhp.set_float("initializer_gain", 0.5, 2.0) + rhp.set_float("optimizer_adam_beta2", 0.85, 0.95) rhp.set_float("optimizer_adam_beta2", 0.97, 0.99) rhp.set_float("weight_decay", 0.0, 2.0) diff --git a/tensor2tensor/models/transformer_test.py b/tensor2tensor/models/transformer_test.py index a7f1fc9ae..8f4d26339 100644 --- a/tensor2tensor/models/transformer_test.py +++ b/tensor2tensor/models/transformer_test.py @@ -51,7 +51,7 @@ def _testTransformer(self, net): "target_space_id": tf.constant(1, dtype=tf.int32), } model = net(hparams, tf.contrib.learn.ModeKeys.TRAIN, p_hparams) - shadred_logits, _, _ = model.model_fn(features) + shadred_logits, _ = model.model_fn(features) logits = tf.concat(shadred_logits, 0) session.run(tf.global_variables_initializer()) res = session.run(logits) diff --git a/tensor2tensor/models/xception_test.py b/tensor2tensor/models/xception_test.py index bf434aeac..776d1306a 100644 --- a/tensor2tensor/models/xception_test.py +++ b/tensor2tensor/models/xception_test.py @@ -45,7 +45,7 @@ def testXception(self): } model = xception.Xception( hparams, tf.contrib.learn.ModeKeys.TRAIN, p_hparams) - sharded_logits, _, _ = model.model_fn(features) + sharded_logits, _ = model.model_fn(features) logits = tf.concat(sharded_logits, 0) session.run(tf.global_variables_initializer()) res = session.run(logits) diff --git a/tensor2tensor/utils/t2t_model.py b/tensor2tensor/utils/t2t_model.py index f67cc9540..835a60259 100644 --- a/tensor2tensor/utils/t2t_model.py +++ b/tensor2tensor/utils/t2t_model.py @@ -104,13 +104,14 @@ def _create_modalities(self, problem_hparams, hparams): input_modality_overrides = {} for override_str in hparams.input_modalities.split(";"): - parts = override_str.split(":") - feature_name = parts[0] - modality_name = ":".join(parts[1:]) - input_modality_overrides[feature_name] = modality_name + if override_str != "default": + parts = override_str.split(":") + feature_name = parts[0] + modality_name = ":".join(parts[1:]) + input_modality_overrides[feature_name] = modality_name target_modality_name = None - if hparams.target_modality: + if hparams.target_modality and hparams.target_modality != "default": target_modality_name = hparams.target_modality input_modality = {} @@ -206,7 +207,7 @@ def symbols_to_logits_fn(ids): features["targets"] = ids self._coverage = None - sharded_logits, _, _ = self.model_fn( + sharded_logits, _ = self.model_fn( features, False, last_position_only=last_position_only) # now self._coverage is a coverage tensor for the first datashard. # it has shape [batch_size] and contains floats between 0 and @@ -330,7 +331,7 @@ def sample(self, features, last_position_only=False): Returns: samples: an integer `Tensor`. """ - sharded_logits, _, _ = self.model_fn( + sharded_logits, _ = self.model_fn( features, False, last_position_only=last_position_only) if self._hparams.sampling_method == "argmax": sharded_samples = self._data_parallelism(tf.argmax, sharded_logits, 4) @@ -362,7 +363,7 @@ def _shard_features(self, features): # pylint: disable=missing-docstring return sharded_features def model_fn(self, features, skip=False, last_position_only=False): - """Computes the entire model and produces sharded logits and training loss. + """Computes the entire model and produces sharded logits and losses. Args: features: A dictionary of feature name to tensor. @@ -372,7 +373,7 @@ def model_fn(self, features, skip=False, last_position_only=False): Returns: sharded_logits: a list of `Tensor`s, one per datashard. - training_loss: a floating point `Scalar`. + losses: a dictionary: {loss-name (string): floating point `Scalar`}. """ start_time = time.time() dp = self._data_parallelism @@ -447,7 +448,7 @@ def model_fn(self, features, skip=False, last_position_only=False): training_loss = None tf.logging.info("This model_fn took %.3f sec." % (time.time() - start_time)) - return sharded_logits, training_loss, extra_loss + return sharded_logits, {"training": training_loss, "extra": extra_loss} def model_fn_body_sharded(self, sharded_features): """Mixture-of-experts models will override this function. diff --git a/tensor2tensor/utils/trainer_utils.py b/tensor2tensor/utils/trainer_utils.py index 91a638529..9900da459 100644 --- a/tensor2tensor/utils/trainer_utils.py +++ b/tensor2tensor/utils/trainer_utils.py @@ -228,6 +228,24 @@ def log_registry(): sys.exit(0) +def add_problem_hparams(hparams, problems): + """Add problem hparams for the problems.""" + hparams.problems = [] + hparams.problem_instances = [] + for problem_name in problems.split("-"): + try: + problem = registry.problem(problem_name) + p_hparams = problem.internal_hparams(hparams) + except ValueError: + problem = None + p_hparams = problem_hparams.problem_hparams(problem_name, hparams) + + hparams.problem_instances.append(problem) + hparams.problems.append(p_hparams) + + return hparams + + def create_hparams(params_id, data_dir): """Returns hyperparameters, including any flag value overrides. @@ -248,21 +266,7 @@ def create_hparams(params_id, data_dir): if FLAGS.hparams: hparams = hparams.parse(FLAGS.hparams) - # Add hparams for the problems - hparams.problems = [] - hparams.problem_instances = [] - for problem_name in FLAGS.problems.split("-"): - try: - problem = registry.problem(problem_name) - p_hparams = problem.internal_hparams(hparams) - except ValueError: - problem = None - p_hparams = problem_hparams.problem_hparams(problem_name, hparams) - - hparams.problem_instances.append(problem) - hparams.problems.append(p_hparams) - - return hparams + return add_problem_hparams(hparams, FLAGS.problems) def run(data_dir, model, output_dir, train_steps, eval_steps, schedule): @@ -469,21 +473,24 @@ def nth_model(n): # On worker 0 also build graph for problems <= 1. # TODO(lukaszkaiser): why is this hack needed for variables init? Repair. skip_this_one = skip_this_one and (FLAGS.worker_id != 0 or n > 1) - sharded_logits, training_loss, extra_loss = model_class.model_fn( + sharded_logits, losses_dict = model_class.model_fn( features, skip=(skipping_is_on and skip_this_one)) with tf.variable_scope("losses_avg", reuse=True): - loss_moving_avg = tf.get_variable("problem_%d/training_loss" % n) - o1 = loss_moving_avg.assign(loss_moving_avg * 0.9 + training_loss * 0.1) - loss_moving_avg = tf.get_variable("problem_%d/extra_loss" % n) - o2 = loss_moving_avg.assign(loss_moving_avg * 0.9 + extra_loss * 0.1) + total_loss, ops = 0.0, [] + for loss_key, loss_value in losses_dict.iteritems(): + loss_moving_avg = tf.get_variable("problem_%d/%s_loss" + % (n, loss_key)) + ops.append(loss_moving_avg.assign( + loss_moving_avg * 0.9 + loss_value * 0.1)) + total_loss += loss_value loss_moving_avg = tf.get_variable("problem_%d/total_loss" % n) - total_loss = training_loss + extra_loss - o3 = loss_moving_avg.assign(loss_moving_avg * 0.9 + total_loss * 0.1) + ops.append(loss_moving_avg.assign( + loss_moving_avg * 0.9 + total_loss * 0.1)) with tf.variable_scope("train_stats"): # Count steps for this problem. problem_steps = tf.get_variable( "problem_%d_steps" % n, initializer=0, trainable=False) - o4 = problem_steps.assign_add(1) - with tf.control_dependencies([o1, o2, o3, o4]): # Make sure the ops run. + ops.append(problem_steps.assign_add(1)) + with tf.control_dependencies(ops): # Make sure the ops run. # Ensure the loss is a scalar here. total_loss = tf.reshape(total_loss, [], name="total_loss_control_id") return [total_loss] + sharded_logits # Need to flatten for cond later. From 401903019f896d4f8cb31f39d2f492e09cb5d0dd Mon Sep 17 00:00:00 2001 From: Lukasz Kaiser <lukaszkaiser@google.com> Date: Mon, 31 Jul 2017 11:03:43 -0700 Subject: [PATCH 0197/4095] More corrections to WMT data-sets. PiperOrigin-RevId: 163717751 --- tensor2tensor/data_generators/wmt.py | 46 ++++++++++++++++++++-------- 1 file changed, 34 insertions(+), 12 deletions(-) diff --git a/tensor2tensor/data_generators/wmt.py b/tensor2tensor/data_generators/wmt.py index 4975971c6..c9b43d507 100644 --- a/tensor2tensor/data_generators/wmt.py +++ b/tensor2tensor/data_generators/wmt.py @@ -193,9 +193,9 @@ def bi_vocabs_token_generator(source_path, _ENDE_TRAIN_DATASETS = [ [ - "http://data.statmt.org/wmt16/translation-task/training-parallel-nc-v11.tgz", # pylint: disable=line-too-long - ("training-parallel-nc-v11/news-commentary-v11.de-en.en", - "training-parallel-nc-v11/news-commentary-v11.de-en.de") + "http://data.statmt.org/wmt17/translation-task/training-parallel-nc-v12.tgz", # pylint: disable=line-too-long + ("training/news-commentary-v12.de-en.en", + "training/news-commentary-v12.de-en.de") ], [ "http://www.statmt.org/wmt13/training-parallel-commoncrawl.tgz", @@ -250,7 +250,7 @@ def bi_vocabs_token_generator(source_path, _ZHEN_TEST_DATASETS = [[ "http://data.statmt.org/wmt17/translation-task/dev.tgz", - ("dev/newsdev2017-zhen-src.zh", "dev/newsdev2017-zhen-ref.en") + ("dev/newsdev2017-zhen-src.zh.sgm", "dev/newsdev2017-zhen-ref.en.sgm") ]] # For Macedonian-English the SETimes corpus @@ -271,9 +271,9 @@ def bi_vocabs_token_generator(source_path, # English-Czech datasets _ENCS_TRAIN_DATASETS = [ [ - "http://data.statmt.org/wmt16/translation-task/training-parallel-nc-v11.tgz", # pylint: disable=line-too-long - ("training-parallel-nc-v11/news-commentary-v11.cs-en.en", - "training-parallel-nc-v11/news-commentary-v11.cs-en.cs") + "http://data.statmt.org/wmt17/translation-task/training-parallel-nc-v12.tgz", # pylint: disable=line-too-long + ("training/news-commentary-v12.cs-en.en", + "training/news-commentary-v12.cs-en.cs") ], [ "http://www.statmt.org/wmt13/training-parallel-commoncrawl.tgz", @@ -322,6 +322,23 @@ def ende_bpe_token_generator(data_dir, tmp_dir, train): EOS) +def _preprocess_sgm(line, is_sgm): + """Preprocessing to strip tags in SGM files.""" + if not is_sgm: + return line + # In SGM files, remove <srcset ...>, <p>, <doc ...> lines. + if line.startswith("<srcset") or line.startswith("</srcset"): + return "" + if line.startswith("<doc") or line.startswith("</doc"): + return "" + if line.startswith("<p>") or line.startswith("</p>"): + return "" + # Strip <seg> tags. + if line.startswith("<seg") and line.endswith("</seg>"): + i = line.index(">") + return line[i+1:-6] # Strip first <seg ...> and last </seg>. + + def _compile_data(tmp_dir, datasets, filename): """Concatenate all `datasets` and save to `filename`.""" filename = os.path.join(tmp_dir, filename) @@ -335,6 +352,8 @@ def _compile_data(tmp_dir, datasets, filename): lang1_filename, lang2_filename = dataset[1] lang1_filepath = os.path.join(tmp_dir, lang1_filename) lang2_filepath = os.path.join(tmp_dir, lang2_filename) + is_sgm = (lang1_filename.endswith("sgm") and + lang2_filename.endswith("sgm")) generator_utils.maybe_download(tmp_dir, compressed_filename, url) if not (os.path.exists(lang1_filepath) and @@ -355,8 +374,11 @@ def _compile_data(tmp_dir, datasets, filename): with tf.gfile.GFile(lang2_filepath, mode="r") as lang2_file: line1, line2 = lang1_file.readline(), lang2_file.readline() while line1 or line2: - lang1_resfile.write(line1.strip() + "\n") - lang2_resfile.write(line2.strip() + "\n") + line1res = _preprocess_sgm(line1, is_sgm) + line2res = _preprocess_sgm(line2, is_sgm) + if line1res or line2res: + lang1_resfile.write(line1res.strip() + "\n") + lang2_resfile.write(line2res.strip() + "\n") line1, line2 = lang1_file.readline(), lang2_file.readline() return filename @@ -433,8 +455,8 @@ def train_generator(self, data_dir, tmp_dir, train): source_vocab_size = self.targeted_vocab_size target_vocab_size = self.targeted_vocab_size datasets = _ZHEN_TRAIN_DATASETS if train else _ZHEN_TEST_DATASETS - source_datasets = [[item[0], [item[1][0]]] for item in datasets] - target_datasets = [[item[0], [item[1][1]]] for item in datasets] + source_datasets = [[item[0], [item[1][0]]] for item in _ZHEN_TRAIN_DATASETS] + target_datasets = [[item[0], [item[1][1]]] for item in _ZHEN_TRAIN_DATASETS] source_vocab = generator_utils.get_or_generate_vocab( data_dir, tmp_dir, "vocab.zh.%d" % source_vocab_size, source_vocab_size, source_datasets) @@ -573,7 +595,7 @@ class WMTEnCsTokens32k(WMTProblem): """Problem spec for WMT English-Czech translation.""" @property - def target_vocab_size(self): + def targeted_vocab_size(self): return 2**15 # 32768 @property From cd006ff6a7b5688e2c2a527d28b2d62afe5d97e9 Mon Sep 17 00:00:00 2001 From: Noam Shazeer <noam@google.com> Date: Mon, 31 Jul 2017 13:28:44 -0700 Subject: [PATCH 0198/4095] Deep-copy the model hparams between modes to eliminate side-effects PiperOrigin-RevId: 163739337 --- tensor2tensor/utils/t2t_model.py | 8 ----- tensor2tensor/utils/trainer_utils.py | 46 ++++++++++++++++++---------- 2 files changed, 29 insertions(+), 25 deletions(-) diff --git a/tensor2tensor/utils/t2t_model.py b/tensor2tensor/utils/t2t_model.py index 835a60259..95774dabc 100644 --- a/tensor2tensor/utils/t2t_model.py +++ b/tensor2tensor/utils/t2t_model.py @@ -28,7 +28,6 @@ from tensor2tensor.utils import beam_search from tensor2tensor.utils import expert_utils as eu -from tensor2tensor.utils import modality from tensor2tensor.utils import registry import tensorflow as tf @@ -116,11 +115,6 @@ def _create_modalities(self, problem_hparams, hparams): input_modality = {} for f, modality_spec in six.iteritems(problem_hparams.input_modality): - if isinstance(modality_spec, modality.Modality): - # This function has been previously run (e.g. for training and now is - # being called for eval) and the modalities have already been - # constructed. Return. - return if f in input_modality_overrides: _warn_changed_modality_type(input_modality_overrides[f], modality_spec[0], f) @@ -129,8 +123,6 @@ def _create_modalities(self, problem_hparams, hparams): problem_hparams.input_modality = input_modality target_modality_spec = problem_hparams.target_modality - if isinstance(target_modality_spec, modality.Modality): - return if target_modality_name: _warn_changed_modality_type(target_modality_name, target_modality_spec[0], "target") diff --git a/tensor2tensor/utils/trainer_utils.py b/tensor2tensor/utils/trainer_utils.py index 9900da459..ef1d1dcb3 100644 --- a/tensor2tensor/utils/trainer_utils.py +++ b/tensor2tensor/utils/trainer_utils.py @@ -19,6 +19,7 @@ from __future__ import division from __future__ import print_function +import copy import math import operator import os @@ -415,11 +416,22 @@ def model_fn(features, targets, mode): Returns: A tuple consisting of the prediction, loss, and train_op. """ + # Deep-copy the model hparams between modes to eliminate + # side-effects caused by abuse of the linked problem_hparams + # objects which are used to share modality objects between + # problems. We do not want to share the modality objects between + # modes, since the modality objects may decide to do something + # mode-specific. A better fix would be to stop abusing the + # hparams in this way and instead use a separate dictionary to + # share the modality objects between problems. This dictionary + # could be created once per mode and passed to the constructor of + # t2t_model. + my_hp = copy.deepcopy(hparams) if mode == tf.contrib.learn.ModeKeys.INFER: if FLAGS.decode_interactive: - features = _interactive_input_tensor_to_features_dict(features, hparams) + features = _interactive_input_tensor_to_features_dict(features, my_hp) elif FLAGS.decode_from_file: - features = _decode_input_tensor_to_features_dict(features, hparams) + features = _decode_input_tensor_to_features_dict(features, my_hp) # A dictionary containing: # - problem_choice: A Tensor containing an integer indicating which problem # was selected for this run. @@ -451,9 +463,9 @@ def model_fn(features, targets, mode): def nth_model(n): """Build the model for the n-th problem, plus some added variables.""" model_class = registry.model(model)( - hparams, + my_hp, mode, - hparams.problems[n], + my_hp.problems[n], n, dp, _ps_devices(all_workers=True)) @@ -467,8 +479,8 @@ def nth_model(n): alpha=FLAGS.decode_alpha, decode_length=FLAGS.decode_extra_length) # In distributed mode, we build graph for problem=0 and problem=worker_id. - skipping_is_on = hparams.problem_choice == "distributed" and train - problem_worker_id = FLAGS.worker_id % len(hparams.problems) + skipping_is_on = my_hp.problem_choice == "distributed" and train + problem_worker_id = FLAGS.worker_id % len(my_hp.problems) skip_this_one = n != 0 and n % FLAGS.worker_replicas != problem_worker_id # On worker 0 also build graph for problems <= 1. # TODO(lukaszkaiser): why is this hack needed for variables init? Repair. @@ -496,7 +508,7 @@ def nth_model(n): return [total_loss] + sharded_logits # Need to flatten for cond later. result_list = _cond_on_index(nth_model, features["problem_choice"], 0, - len(hparams.problems) - 1) + len(my_hp.problems) - 1) if mode == tf.contrib.learn.ModeKeys.INFER: # Beam search in sequence model returns both decodes withe key "outputs" @@ -532,11 +544,11 @@ def nth_model(n): # Some training statistics. with tf.name_scope("training_stats"): - learning_rate = hparams.learning_rate * learning_rate_decay() + learning_rate = my_hp.learning_rate * learning_rate_decay() learning_rate /= math.sqrt(float(FLAGS.worker_replicas)) tf.summary.scalar("learning_rate", learning_rate) global_step = tf.to_float(tf.contrib.framework.get_global_step()) - for n in xrange(len(hparams.problems)): + for n in xrange(len(my_hp.problems)): with tf.variable_scope("losses_avg", reuse=True): total_loss_var = tf.get_variable("problem_%d/total_loss" % n) training_loss_var = tf.get_variable("problem_%d/training_loss" % n) @@ -558,27 +570,27 @@ def nth_model(n): tf.logging.info("Weight %s\tshape %s\tsize %d", v.name[:-2].ljust(80), str(v.shape).ljust(20), v_size) total_size += v_size - if hparams.weight_decay > 0.0 and len(v.shape.as_list()) > 1: + if my_hp.weight_decay > 0.0 and len(v.shape.as_list()) > 1: # Add weight regularization if set and the weight is not a bias (dim>1). with tf.device(v._ref().device): # pylint: disable=protected-access v_loss = tf.nn.l2_loss(v) / v_size weight_decay_loss += v_loss is_body = len(v_name) > 5 and v_name[:5] == "body/" - if hparams.weight_noise > 0.0 and is_body: - # Add weight noise if set in hparams. + if my_hp.weight_noise > 0.0 and is_body: + # Add weight noise if set in my_hp. with tf.device(v._ref().device): # pylint: disable=protected-access scale = learning_rate * 0.001 - noise = tf.truncated_normal(v.shape) * hparams.weight_noise * scale + noise = tf.truncated_normal(v.shape) * my_hp.weight_noise * scale noise_op = v.assign_add(noise) with tf.control_dependencies([noise_op]): total_loss = tf.identity(total_loss) tf.logging.info("Total trainable variables size: %d", total_size) - if hparams.weight_decay > 0.0: - total_loss += weight_decay_loss * hparams.weight_decay + if my_hp.weight_decay > 0.0: + total_loss += weight_decay_loss * my_hp.weight_decay total_loss = tf.identity(total_loss, name="total_loss") # Define the train_op for the TRAIN mode. - opt = _ConditionalOptimizer(hparams.optimizer, learning_rate, hparams) + opt = _ConditionalOptimizer(my_hp.optimizer, learning_rate, my_hp) tf.logging.info("Computing gradients for global model_fn.") opt_summaries = ["learning_rate", "loss"] if hparams.summarize_grads: @@ -588,7 +600,7 @@ def nth_model(n): loss=total_loss, global_step=tf.contrib.framework.get_global_step(), learning_rate=learning_rate, - clip_gradients=hparams.clip_grad_norm or None, + clip_gradients=my_hp.clip_grad_norm or None, gradient_noise_scale=hparams.grad_noise_scale or None, optimizer=opt, summaries=opt_summaries, From 474843d7d2f2c2a080e49bd163343698a4dbd69e Mon Sep 17 00:00:00 2001 From: T2T Team <no-reply@google.com> Date: Tue, 1 Aug 2017 08:45:13 -0700 Subject: [PATCH 0199/4095] Small typo correction, "handles" -> "handled". PiperOrigin-RevId: 163833578 --- tensor2tensor/data_generators/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensor2tensor/data_generators/README.md b/tensor2tensor/data_generators/README.md index 310bc39df..0e6d64dd2 100644 --- a/tensor2tensor/data_generators/README.md +++ b/tensor2tensor/data_generators/README.md @@ -28,7 +28,7 @@ for an example. `Problem`s support data generation, training, and decoding. -Data generation is handles by `Problem.generate_data` which should produce 2 +Data generation is handled by `Problem.generate_data` which should produce 2 datasets, training and dev, which should be named according to `Problem.training_filepaths` and `Problem.dev_filepaths`. `Problem.generate_data` should also produce any other files that may be required From 9b1740227e3e9fe53ad69aab2530875875aed190 Mon Sep 17 00:00:00 2001 From: T2T Team <no-reply@google.com> Date: Tue, 1 Aug 2017 10:15:33 -0700 Subject: [PATCH 0200/4095] Freeing character level WMTProblems from needing a targeted_vocab_size. PiperOrigin-RevId: 163846429 --- tensor2tensor/data_generators/problem.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tensor2tensor/data_generators/problem.py b/tensor2tensor/data_generators/problem.py index 9623791f5..339703676 100644 --- a/tensor2tensor/data_generators/problem.py +++ b/tensor2tensor/data_generators/problem.py @@ -386,12 +386,13 @@ def generate_data(self, data_dir, tmp_dir, task_id=-1): self.dev_filepaths(data_dir, 1, shuffled=False)) def feature_encoders(self, data_dir): - vocab_filename = os.path.join(data_dir, self.vocab_file) if self.is_character_level: encoder = text_encoder.ByteTextEncoder(), elif self.use_subword_tokenizer: + vocab_filename = os.path.join(data_dir, self.vocab_file) encoder = text_encoder.SubwordTextEncoder(vocab_filename) else: + vocab_filename = os.path.join(data_dir, self.vocab_file) encoder = text_encoder.TokenTextEncoder(vocab_filename) if self.has_inputs: return {"inputs": encoder, "targets": encoder} From 2ba6a56c9f3514bea851b8d1b3cec4131e1f113e Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Tue, 1 Aug 2017 13:03:54 -0700 Subject: [PATCH 0201/4095] Get vocab size from encoder for Text2Text chr Problems PiperOrigin-RevId: 163873675 --- tensor2tensor/data_generators/problem.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/tensor2tensor/data_generators/problem.py b/tensor2tensor/data_generators/problem.py index 339703676..37d3ed4da 100644 --- a/tensor2tensor/data_generators/problem.py +++ b/tensor2tensor/data_generators/problem.py @@ -400,17 +400,12 @@ def feature_encoders(self, data_dir): def hparams(self, defaults, unused_model_hparams): p = defaults - if self.is_character_level: - source_vocab_size = 256 - target_vocab_size = 256 - else: - target_vocab_size = self._encoders["targets"].vocab_size - if self.has_inputs: - source_vocab_size = self._encoders["inputs"].vocab_size if self.has_inputs: + source_vocab_size = self._encoders["inputs"].vocab_size p.input_modality = {"inputs": (registry.Modalities.SYMBOL, source_vocab_size)} + target_vocab_size = self._encoders["targets"].vocab_size p.target_modality = (registry.Modalities.SYMBOL, target_vocab_size) if self.has_inputs: p.input_space_id = self.input_space_id From 1e8ed5a1267040882a361888bfd911f7fa835617 Mon Sep 17 00:00:00 2001 From: Noam Shazeer <noam@google.com> Date: Tue, 1 Aug 2017 14:03:55 -0700 Subject: [PATCH 0202/4095] Add option for proximity bias. Hypothesis: this should help with training of PiperOrigin-RevId: 163882942 --- tensor2tensor/models/common_attention.py | 14 +++++++++ tensor2tensor/models/transformer.py | 36 +++++++++++++++--------- 2 files changed, 37 insertions(+), 13 deletions(-) diff --git a/tensor2tensor/models/common_attention.py b/tensor2tensor/models/common_attention.py index 95e982790..2c7e8afc9 100644 --- a/tensor2tensor/models/common_attention.py +++ b/tensor2tensor/models/common_attention.py @@ -205,6 +205,20 @@ def attention_bias_ignore_padding(memory_padding): return tf.expand_dims(tf.expand_dims(ret, 1), 1) +def attention_bias_proximal(length): + """Bias for self-attention to encourage attention to close positions. + + Args: + length: an integer scalar. + + Returns: + a Tensor with shape [1, 1, length, length] + """ + r = tf.to_float(tf.range(length)) + diff = tf.expand_dims(r, 0) - tf.expand_dims(r, 1) + return tf.expand_dims(tf.expand_dims(-tf.log(1 + tf.abs(diff)), 0), 0) + + def split_last_dimension(x, n): """Reshape x so that the last dimension becomes two dimensions. diff --git a/tensor2tensor/models/transformer.py b/tensor2tensor/models/transformer.py index 1d4ee04ed..c9d0a2db2 100644 --- a/tensor2tensor/models/transformer.py +++ b/tensor2tensor/models/transformer.py @@ -49,8 +49,10 @@ def model_fn_body(self, features): inputs = common_layers.flatten4d3d(inputs) targets = common_layers.flatten4d3d(targets) - (encoder_input, encoder_attention_bias, _) = (transformer_prepare_encoder( - inputs, target_space, hparams)) + (encoder_input, + encoder_self_attention_bias, + encoder_decoder_attention_bias) = ( + transformer_prepare_encoder(inputs, target_space, hparams)) (decoder_input, decoder_self_attention_bias) = transformer_prepare_decoder( targets, hparams) @@ -61,11 +63,11 @@ def residual_fn(x, y): encoder_input = tf.nn.dropout(encoder_input, 1.0 - hparams.residual_dropout) decoder_input = tf.nn.dropout(decoder_input, 1.0 - hparams.residual_dropout) encoder_output = transformer_encoder(encoder_input, residual_fn, - encoder_attention_bias, hparams) + encoder_self_attention_bias, hparams) decoder_output = transformer_decoder( decoder_input, encoder_output, residual_fn, decoder_self_attention_bias, - encoder_attention_bias, hparams) + encoder_decoder_attention_bias, hparams) decoder_output = tf.expand_dims(decoder_output, 2) return decoder_output @@ -81,17 +83,20 @@ def transformer_prepare_encoder(inputs, target_space, hparams): Returns: encoder_input: a Tensor, bottom of encoder stack - encoder_self_attention_bias: a Tensor, containing large negative values - to implement masked attention and possibly baises for diagonal - alignments - encoder_padding: a Tensor + encoder_self_attention_bias: a bias tensor for use in encoder self-attention + encoder_decoder_attention_bias: a bias tensor for use in encoder-decoder + attention """ - # Flatten inputs. ishape_static = inputs.shape.as_list() encoder_input = inputs encoder_padding = common_attention.embedding_to_padding(encoder_input) - encoder_self_attention_bias = common_attention.attention_bias_ignore_padding( + ignore_padding = common_attention.attention_bias_ignore_padding( encoder_padding) + encoder_self_attention_bias = ignore_padding + encoder_decoder_attention_bias = ignore_padding + if hparams.proximity_bias: + encoder_self_attention_bias += common_attention.attention_bias_proximal( + tf.shape(inputs)[1]) # Append target_space_id embedding to inputs. emb_target_space = common_layers.embedding( target_space, 32, ishape_static[-1], name="target_space_embedding") @@ -99,7 +104,9 @@ def transformer_prepare_encoder(inputs, target_space, hparams): encoder_input += emb_target_space if hparams.pos == "timing": encoder_input = common_attention.add_timing_signal_1d(encoder_input) - return (encoder_input, encoder_self_attention_bias, encoder_padding) + return (encoder_input, + encoder_self_attention_bias, + encoder_decoder_attention_bias) def transformer_prepare_decoder(targets, hparams): @@ -111,11 +118,13 @@ def transformer_prepare_decoder(targets, hparams): Returns: decoder_input: a Tensor, bottom of decoder stack - decoder_self_attention_bias: a Tensor, containing large negative values - to implement masked attention and possibly baises for diagonal alignments + decoder_self_attention_bias: a bias tensor for use in encoder self-attention """ decoder_self_attention_bias = ( common_attention.attention_bias_lower_triangle(tf.shape(targets)[1])) + if hparams.proximity_bias: + decoder_self_attention_bias += common_attention.attention_bias_proximal( + tf.shape(targets)[1]) decoder_input = common_layers.shift_left_3d(targets) if hparams.pos == "timing": decoder_input = common_attention.add_timing_signal_1d(decoder_input) @@ -292,6 +301,7 @@ def transformer_base(): hparams.add_hparam("residual_dropout", 0.1) hparams.add_hparam("pos", "timing") # timing, none hparams.add_hparam("nbr_decoder_problems", 1) + hparams.add_hparam("proximity_bias", int(False)) return hparams From 36b1c59ae9ce8ce563d8bf9d8486c82ab874d6a6 Mon Sep 17 00:00:00 2001 From: T2T Team <no-reply@google.com> Date: Tue, 1 Aug 2017 14:21:19 -0700 Subject: [PATCH 0203/4095] Make tokenizer just log a warning on malformed vocab file lines and fix wrong flags in text_encoder_build_subword.py. PiperOrigin-RevId: 163885555 --- tensor2tensor/data_generators/test_data/vocab-2.txt | 1 + .../data_generators/text_encoder_build_subword.py | 4 ++-- tensor2tensor/data_generators/tokenizer.py | 7 ++++++- tensor2tensor/data_generators/tokenizer_test.py | 2 +- 4 files changed, 10 insertions(+), 4 deletions(-) diff --git a/tensor2tensor/data_generators/test_data/vocab-2.txt b/tensor2tensor/data_generators/test_data/vocab-2.txt index 7793af4f6..1ad6d20b9 100644 --- a/tensor2tensor/data_generators/test_data/vocab-2.txt +++ b/tensor2tensor/data_generators/test_data/vocab-2.txt @@ -1,3 +1,4 @@ kattywampus,11 +kaput balderdash,10 jiggery-pokery,14 diff --git a/tensor2tensor/data_generators/text_encoder_build_subword.py b/tensor2tensor/data_generators/text_encoder_build_subword.py index 47e82a176..0c366c896 100644 --- a/tensor2tensor/data_generators/text_encoder_build_subword.py +++ b/tensor2tensor/data_generators/text_encoder_build_subword.py @@ -24,7 +24,7 @@ python data_generators/text_encoder_build_subword.py \ --corpus_filepattern=$DATA_DIR/my_problem-train-* \ --corpus_max_lines=12345 \ - --output_fn=$DATA_DIR/my_problem.subword_text_encoder \ + --output_filename=$DATA_DIR/my_problem.subword_text_encoder \ --logtostderr """ @@ -75,7 +75,7 @@ def main(unused_argv): encoder = text_encoder.SubwordTextEncoder() encoder.build_from_token_counts(token_counts, FLAGS.min_count, FLAGS.num_iterations) - encoder.store_to_file(FLAGS.output_fn) + encoder.store_to_file(FLAGS.output_filename) if __name__ == '__main__': diff --git a/tensor2tensor/data_generators/tokenizer.py b/tensor2tensor/data_generators/tokenizer.py index 0e8daa75f..5cfd7c42e 100644 --- a/tensor2tensor/data_generators/tokenizer.py +++ b/tensor2tensor/data_generators/tokenizer.py @@ -185,7 +185,12 @@ def vocab_token_counts(text_filepattern, max_lines): a dictionary mapping token to count. """ ret = {} - for line in _read_filepattern(text_filepattern, max_lines=max_lines): + for i, line in enumerate( + _read_filepattern(text_filepattern, max_lines=max_lines)): + if "," not in line: + tf.logging.warning("Malformed vocab line #%d '%s'", i, line) + continue + token, count = line.rsplit(",", 1) ret[_native_to_unicode(token)] = int(count) diff --git a/tensor2tensor/data_generators/tokenizer_test.py b/tensor2tensor/data_generators/tokenizer_test.py index 0c299bd0b..065a32e91 100644 --- a/tensor2tensor/data_generators/tokenizer_test.py +++ b/tensor2tensor/data_generators/tokenizer_test.py @@ -130,7 +130,7 @@ def test_vocab_token_counts(self): def test_vocab_token_counts_with_max_lines(self): # vocab-1 has 2 lines, vocab-2 has 3 - token_counts = tokenizer.vocab_token_counts(self.vocab_path, 4) + token_counts = tokenizer.vocab_token_counts(self.vocab_path, 5) expected = { u"lollipop": 8, From 23f53cb3aeaf13320b58f2c67717b8a48e3977ef Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Tue, 1 Aug 2017 17:07:05 -0700 Subject: [PATCH 0204/4095] Simplify input_pipeline PiperOrigin-RevId: 163910099 --- tensor2tensor/utils/data_reader.py | 60 ++++++++++-------------------- 1 file changed, 20 insertions(+), 40 deletions(-) diff --git a/tensor2tensor/utils/data_reader.py b/tensor2tensor/utils/data_reader.py index ba5139433..567f186d5 100644 --- a/tensor2tensor/utils/data_reader.py +++ b/tensor2tensor/utils/data_reader.py @@ -179,30 +179,6 @@ def preprocess(img): return examples -def problem_input_pipeline(problem, data_file_pattern, capacity, mode): - """Input pipeline for Problems.""" - data_fields, data_items_to_decoders = problem.example_reading_spec() - - # Create placeholders for input, rather than reading data from disk. - if data_file_pattern is None: - return feature_placeholders(data_fields) - - # Now the non-trivial case construction. - examples = examples_reader( - [data_file_pattern], - data_fields, - training=(mode == tf.contrib.learn.ModeKeys.TRAIN), - capacity=capacity, - data_items_to_decoders=data_items_to_decoders) - - examples = problem.preprocess_examples(examples, mode) - - # We do not want int64s as they are not supported on GPUs. - examples = cast_int64_to_int32(examples) - - return examples - - def cast_int64_to_int32(features): f = {} for k, v in six.iteritems(features): @@ -221,19 +197,10 @@ def feature_placeholders(data_fields): return feature_map -def input_pipeline(problem, data_file_pattern, capacity, mode): - """Input pipeline, returns a dictionary of tensors from queues.""" - - if problem is not None: - # problem is not None when the problem is specified with the Problem API, - # which handles Example decoding and preprocessing. - # Otherwise the problem is specified in problem_hparams and is dealt with - # below. - # As problems are ported to the Problem API, the special handling here will - # need to be moved to Problem.example_reading_spec and - # Problem.preprocessing. - return problem_input_pipeline(problem, data_file_pattern, capacity, mode) - +def default_example_reading_spec(data_file_pattern): + """Example reading spec for problem_hparams problems.""" + # This function is for problems that have yet to be ported to the new Problem + # API. Do not add here. data_items_to_decoders = None # Read from image TFRecords if the file has "image" in its name. if data_file_pattern and "image" in data_file_pattern: @@ -267,12 +234,21 @@ def input_pipeline(problem, data_file_pattern, capacity, mode): "inputs": tf.VarLenFeature(tf.int64), "targets": tf.VarLenFeature(tf.int64) } + return data_fields, data_items_to_decoders + + +def input_pipeline(problem, data_file_pattern, capacity, mode): + """Input pipeline, returns a dictionary of tensors from queues.""" + if problem is None: + data_fields, data_items_to_decoders = default_example_reading_spec( + data_file_pattern) + else: + data_fields, data_items_to_decoders = problem.example_reading_spec() - # Create placeholders for input, rather than reading data from disk. if data_file_pattern is None: + # Create placeholders for input, rather than reading data from disk. return feature_placeholders(data_fields) - # Now the non-trivial case construction. examples = examples_reader( [data_file_pattern], data_fields, @@ -280,10 +256,14 @@ def input_pipeline(problem, data_file_pattern, capacity, mode): capacity=capacity, data_items_to_decoders=data_items_to_decoders) - examples = preprocessing(examples, data_file_pattern, mode) + if problem is None: + examples = preprocessing(examples, data_file_pattern, mode) + else: + examples = problem.preprocess_examples(examples, mode) # We do not want int64s as they are not supported on GPUs. examples = cast_int64_to_int32(examples) + return examples From 768ef743e6198ad1a6226da816ff8f35bda4df7e Mon Sep 17 00:00:00 2001 From: T2T Team <no-reply@google.com> Date: Tue, 1 Aug 2017 17:20:53 -0700 Subject: [PATCH 0205/4095] Removed bogus comma from feature_encoders. PiperOrigin-RevId: 163911814 --- tensor2tensor/data_generators/problem.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensor2tensor/data_generators/problem.py b/tensor2tensor/data_generators/problem.py index 37d3ed4da..577e051d3 100644 --- a/tensor2tensor/data_generators/problem.py +++ b/tensor2tensor/data_generators/problem.py @@ -387,7 +387,7 @@ def generate_data(self, data_dir, tmp_dir, task_id=-1): def feature_encoders(self, data_dir): if self.is_character_level: - encoder = text_encoder.ByteTextEncoder(), + encoder = text_encoder.ByteTextEncoder() elif self.use_subword_tokenizer: vocab_filename = os.path.join(data_dir, self.vocab_file) encoder = text_encoder.SubwordTextEncoder(vocab_filename) From 6b54dfbddec8f2408748fc72344a0d6ea0b63954 Mon Sep 17 00:00:00 2001 From: Niki Parmar <nikip@google.com> Date: Tue, 1 Aug 2017 17:57:53 -0700 Subject: [PATCH 0206/4095] Add functionality to download from google drive. Enable that option for celeba. PiperOrigin-RevId: 163915517 --- .../data_generators/generator_utils.py | 51 +++++++++++++++++++ .../data_generators/generator_utils_test.py | 14 +++++ tensor2tensor/data_generators/image.py | 14 ++--- 3 files changed, 72 insertions(+), 7 deletions(-) diff --git a/tensor2tensor/data_generators/generator_utils.py b/tensor2tensor/data_generators/generator_utils.py index 5c7f9f2a1..b38531c1a 100644 --- a/tensor2tensor/data_generators/generator_utils.py +++ b/tensor2tensor/data_generators/generator_utils.py @@ -28,6 +28,7 @@ # Dependency imports +import requests import six from six.moves import xrange # pylint: disable=redefined-builtin import six.moves.urllib_request as urllib # Imports urllib on Python2, urllib.request on Python3 @@ -196,6 +197,56 @@ def maybe_download(directory, filename, url): return filepath +def maybe_download_from_drive(directory, filename, url): + """Download filename from google drive unless it's already in directory. + + Args: + directory: path to the directory that will be used. + filename: name of the file to download to (do nothing if it already exists). + url: URL to download from. + + Returns: + The path to the downloaded file. + """ + if not tf.gfile.Exists(directory): + tf.logging.info("Creating directory %s" % directory) + os.mkdir(directory) + filepath = os.path.join(directory, filename) + confirm_token = None + if tf.gfile.Exists(filepath): + tf.logging.info("Not downloading, file already found: %s" % filepath) + return filepath + + # Since the file is big, drive will scan it for virus and take it to a + # warning page. We find the confirm token on this page and append it to the + # URL to start the download process. + confirm_token = None + session = requests.Session() + response = session.get(url, stream=True) + for k, v in response.cookies.items(): + if k.startswith("download_warning"): + confirm_token = v + + if confirm_token: + url = url + "&confirm=" + confirm_token + tf.logging.info("Downloading %s to %s" % (url, filepath)) + + response = session.get(url, stream=True) + # Now begin the download. + chunk_size = 16 * 1024 + with open(filepath, "wb") as f: + for chunk in response.iter_content(chunk_size): + if chunk: + f.write(chunk) + + # Print newline to clear the carriage return from the download progress + print() + statinfo = os.stat(filepath) + tf.logging.info("Succesfully downloaded %s, %s bytes." % (filename, + statinfo.st_size)) + return filepath + + def gunzip_file(gz_path, new_path): """Unzips from gz_path into new_path. diff --git a/tensor2tensor/data_generators/generator_utils_test.py b/tensor2tensor/data_generators/generator_utils_test.py index fd6e15ca3..144507e6b 100644 --- a/tensor2tensor/data_generators/generator_utils_test.py +++ b/tensor2tensor/data_generators/generator_utils_test.py @@ -64,6 +64,20 @@ def testMaybeDownload(self): os.remove(tmp_file_path + ".http") os.remove(tmp_file_path) + def testMaybeDownloadFromDrive(self): + tmp_dir = self.get_temp_dir() + (_, tmp_file_path) = tempfile.mkstemp(dir=tmp_dir) + tmp_file_name = os.path.basename(tmp_file_path) + + # Download Google index to the temporary file.http. + res_path = generator_utils.maybe_download_from_drive( + tmp_dir, tmp_file_name + ".http", "http://drive.google.com") + self.assertEqual(res_path, tmp_file_path + ".http") + + # Clean up. + os.remove(tmp_file_path + ".http") + os.remove(tmp_file_path) + def testGunzipFile(self): tmp_dir = self.get_temp_dir() (_, tmp_file_path) = tempfile.mkstemp(dir=tmp_dir) diff --git a/tensor2tensor/data_generators/image.py b/tensor2tensor/data_generators/image.py index fdad8d432..44e2fda15 100644 --- a/tensor2tensor/data_generators/image.py +++ b/tensor2tensor/data_generators/image.py @@ -380,18 +380,18 @@ def example_reading_spec(self): return super(ImageFSNS, self).example_reading_spec(self, label_key=label_key) -# Filename for CELEBA data. +# URL and filename for CELEBA data. _CELEBA_NAME = "img_align_celeba" +_CELEBA_URL = "https://drive.google.com/uc?export=download&id=0B7EVK8r0v71pZjFTYXZWM3FlRnM" def _get_celeba(directory): """Download and extract CELEBA to directory unless it is there.""" - path = os.path.join(directory, _CELEBA_NAME) + # path = os.path.join(directory, _CELEBA_NAME) + path = generator_utils.maybe_download_from_drive(directory, + _CELEBA_NAME, _CELEBA_URL) if not tf.gfile.Exists(path): - # We expect that this file has been downloaded from: - # https://drive.google.com/uc?export=download&id=0B7EVK8r0v71pZjFTYXZWM3FlRnM - # and placed in `directory`. - zipfile.ZipFile(path+".zip", "r").extractall(directory) + zipfile.ZipFile(path + ".zip", "r").extractall(directory) def celeba_generator(tmp_dir, how_many, start_from=0): @@ -408,7 +408,7 @@ def celeba_generator(tmp_dir, how_many, start_from=0): * image/format: the string "jpeg" representing image format, """ _get_celeba(tmp_dir) - image_files = tf.gfile.Glob(tmp_dir + "/*.jpg") + image_files = tf.gfile.Glob(os.path.join(tmp_dir, _CELEBA_NAME) + "/*.jpg") for filename in image_files[start_from:start_from+how_many]: with tf.gfile.Open(filename, "r") as f: encoded_image_data = f.read() From 465ae188ea9205f27c92c0bb532a2d8f1b336009 Mon Sep 17 00:00:00 2001 From: T2T Team <no-reply@google.com> Date: Tue, 1 Aug 2017 18:01:10 -0700 Subject: [PATCH 0207/4095] Add an option to truncate long input and target sequences. PiperOrigin-RevId: 163915865 --- tensor2tensor/data_generators/gene_expression.py | 3 ++- tensor2tensor/data_generators/problem.py | 6 +++++- tensor2tensor/models/common_hparams.py | 14 +++++++++++++- tensor2tensor/utils/data_reader.py | 4 ++-- tensor2tensor/utils/trainer_utils.py | 5 +++-- 5 files changed, 25 insertions(+), 7 deletions(-) diff --git a/tensor2tensor/data_generators/gene_expression.py b/tensor2tensor/data_generators/gene_expression.py index 60e38a90f..82c15414a 100644 --- a/tensor2tensor/data_generators/gene_expression.py +++ b/tensor2tensor/data_generators/gene_expression.py @@ -163,8 +163,9 @@ def example_reading_spec(self): data_items_to_decoders = None return (data_fields, data_items_to_decoders) - def preprocess_examples(self, examples, mode): + def preprocess_examples(self, examples, mode, hparams): del mode + del hparams # Reshape targets examples["targets"] = tf.reshape(examples["targets"], diff --git a/tensor2tensor/data_generators/problem.py b/tensor2tensor/data_generators/problem.py index 577e051d3..6f49a8d97 100644 --- a/tensor2tensor/data_generators/problem.py +++ b/tensor2tensor/data_generators/problem.py @@ -162,8 +162,12 @@ def example_reading_spec(self): data_items_to_decoders = None return (data_fields, data_items_to_decoders) - def preprocess_examples(self, examples, mode): + def preprocess_examples(self, examples, mode, hparams): del mode + if hparams.max_input_seq_length > 0: + examples["inputs"] = examples["inputs"][:hparams.max_input_seq_length] + if hparams.max_target_seq_length > 0: + examples["targets"] = examples["targets"][:hparams.max_target_seq_length] return examples def eval_metrics(self): diff --git a/tensor2tensor/models/common_hparams.py b/tensor2tensor/models/common_hparams.py index cf58b33e8..353586393 100644 --- a/tensor2tensor/models/common_hparams.py +++ b/tensor2tensor/models/common_hparams.py @@ -91,7 +91,19 @@ def basic_params1(): input_modalities="default", # We don't use empty string in params. # To override the default target modality, specify # "modality_type:modality_name", e.g. "image:small_image_modality". - target_modality="default") + target_modality="default", + # The maximum length of "input" sequence. + # Sequences longer than this value will be truncated. 0 or negative values + # mean there is no maximum or truncation. + # You can change this behavior by overridding preprocess_examples() method + # in your problem class. + max_input_seq_length=0, + # The maximum length of "target" sequence. + # Sequences longer than this value will be truncated. 0 or negative values + # mean there is no maximum or truncation. + # You can change this behavior by overridding preprocess_examples() method + # in your problem class. + max_target_seq_length=0) class RangedHParams(object): diff --git a/tensor2tensor/utils/data_reader.py b/tensor2tensor/utils/data_reader.py index 567f186d5..81dcb52a5 100644 --- a/tensor2tensor/utils/data_reader.py +++ b/tensor2tensor/utils/data_reader.py @@ -237,7 +237,7 @@ def default_example_reading_spec(data_file_pattern): return data_fields, data_items_to_decoders -def input_pipeline(problem, data_file_pattern, capacity, mode): +def input_pipeline(problem, data_file_pattern, capacity, mode, hparams): """Input pipeline, returns a dictionary of tensors from queues.""" if problem is None: data_fields, data_items_to_decoders = default_example_reading_spec( @@ -259,7 +259,7 @@ def input_pipeline(problem, data_file_pattern, capacity, mode): if problem is None: examples = preprocessing(examples, data_file_pattern, mode) else: - examples = problem.preprocess_examples(examples, mode) + examples = problem.preprocess_examples(examples, mode, hparams) # We do not want int64s as they are not supported on GPUs. examples = cast_int64_to_int32(examples) diff --git a/tensor2tensor/utils/trainer_utils.py b/tensor2tensor/utils/trainer_utils.py index ef1d1dcb3..33053806d 100644 --- a/tensor2tensor/utils/trainer_utils.py +++ b/tensor2tensor/utils/trainer_utils.py @@ -1115,8 +1115,9 @@ def input_fn(): with tf.device("/cpu:0"): # Input reading on CPU capacity = p_hparams.max_expected_batch_size_per_shard capacity *= num_datashards - examples = data_reader.input_pipeline( - problem_instance, data_file_patterns[n], capacity, mode) + examples = data_reader.input_pipeline(problem_instance, + data_file_patterns[n], + capacity, mode, hparams) feature_map = data_reader.batch_examples( examples, data_reader.hparams_to_batching_scheme( From 75270907dbba3c7c224a08515d33167c29e26aed Mon Sep 17 00:00:00 2001 From: Lukasz Kaiser <lukaszkaiser@google.com> Date: Tue, 1 Aug 2017 18:03:10 -0700 Subject: [PATCH 0208/4095] More WMT corrections, port image problems to Problem. PiperOrigin-RevId: 163916109 --- README.md | 4 +- tensor2tensor/bin/t2t-datagen | 31 -- tensor2tensor/data_generators/image.py | 416 +++++++++++++++--- tensor2tensor/data_generators/problem.py | 5 +- .../data_generators/problem_hparams.py | 91 ---- tensor2tensor/data_generators/wmt.py | 2 +- tensor2tensor/models/bluenet.py | 1 - tensor2tensor/models/common_attention.py | 102 +++-- tensor2tensor/models/common_attention_test.py | 28 ++ tensor2tensor/models/common_layers.py | 71 ++- tensor2tensor/models/lstm.py | 8 +- tensor2tensor/models/lstm_test.py | 4 +- tensor2tensor/models/modalities.py | 8 +- tensor2tensor/models/multimodel.py | 1 - tensor2tensor/models/multimodel_test.py | 9 +- tensor2tensor/models/slicenet.py | 1 - tensor2tensor/models/slicenet_test.py | 10 +- tensor2tensor/models/transformer.py | 8 +- .../models/transformer_alternative.py | 67 +-- tensor2tensor/models/xception.py | 1 - tensor2tensor/utils/data_reader.py | 34 +- tensor2tensor/utils/registry.py | 2 +- tensor2tensor/utils/registry_test.py | 5 +- tensor2tensor/utils/t2t_model.py | 14 +- tensor2tensor/utils/trainer_utils_test.py | 2 +- 25 files changed, 594 insertions(+), 331 deletions(-) diff --git a/README.md b/README.md index edd6460d0..bb0f6f534 100644 --- a/README.md +++ b/README.md @@ -89,7 +89,7 @@ t2t-datagen \ --problem=$PROBLEM # Train -# * If you run out of memory, add --hparams='batch_size=2048' or even 1024. +# * If you run out of memory, add --hparams='batch_size=1024'. t2t-trainer \ --data_dir=$DATA_DIR \ --problems=$PROBLEM \ @@ -166,7 +166,7 @@ python -c "from tensor2tensor.models.transformer import Transformer" with `Modality` objects, which are specified per-feature in the dataset/task specification. * Support for multi-GPU machines and synchronous (1 master, many workers) and - asynchrounous (independent workers synchronizing through a parameter server) + asynchronous (independent workers synchronizing through a parameter server) [distributed training](https://github.com/tensorflow/tensor2tensor/tree/master/docs/distributed_training.md). * Easily swap amongst datasets and models by command-line flag with the data generation script `t2t-datagen` and the training script `t2t-trainer`. diff --git a/tensor2tensor/bin/t2t-datagen b/tensor2tensor/bin/t2t-datagen index 1f876c981..837d6d203 100644 --- a/tensor2tensor/bin/t2t-datagen +++ b/tensor2tensor/bin/t2t-datagen @@ -118,40 +118,9 @@ _SUPPORTED_PROBLEM_GENERATORS = { lambda: wiki.generator(FLAGS.tmp_dir, True), 1000 ), - "image_mnist_tune": ( - lambda: image.mnist_generator(FLAGS.tmp_dir, True, 55000), - lambda: image.mnist_generator(FLAGS.tmp_dir, True, 5000, 55000)), - "image_mnist_test": ( - lambda: image.mnist_generator(FLAGS.tmp_dir, True, 60000), - lambda: image.mnist_generator(FLAGS.tmp_dir, False, 10000)), - "image_cifar10_tune": ( - lambda: image.cifar10_generator(FLAGS.tmp_dir, True, 48000), - lambda: image.cifar10_generator(FLAGS.tmp_dir, True, 2000, 48000)), - "image_cifar10_test": ( - lambda: image.cifar10_generator(FLAGS.tmp_dir, True, 50000), - lambda: image.cifar10_generator(FLAGS.tmp_dir, False, 10000)), - "image_mscoco_characters_test": ( - lambda: image.mscoco_generator( - FLAGS.data_dir, FLAGS.tmp_dir, True, 80000), - lambda: image.mscoco_generator( - FLAGS.data_dir, FLAGS.tmp_dir, False, 40000)), "image_celeba_tune": ( lambda: image.celeba_generator(FLAGS.tmp_dir, 162770), lambda: image.celeba_generator(FLAGS.tmp_dir, 19867, 162770)), - "image_mscoco_tokens_8k_test": ( - lambda: image.mscoco_generator( - FLAGS.data_dir, FLAGS.tmp_dir, True, 80000, - vocab_filename="vocab.endefr.%d" % 2**13, vocab_size=2**13), - lambda: image.mscoco_generator( - FLAGS.data_dir, FLAGS.tmp_dir, False, 40000, - vocab_filename="vocab.endefr.%d" % 2**13, vocab_size=2**13)), - "image_mscoco_tokens_32k_test": ( - lambda: image.mscoco_generator( - FLAGS.data_dir, FLAGS.tmp_dir, True, 80000, - vocab_filename="vocab.endefr.%d" % 2**15, vocab_size=2**15), - lambda: image.mscoco_generator( - FLAGS.data_dir, FLAGS.tmp_dir, False, 40000, - vocab_filename="vocab.endefr.%d" % 2**15, vocab_size=2**15)), "snli_32k": ( lambda: snli.snli_token_generator(FLAGS.tmp_dir, True, 2**15), lambda: snli.snli_token_generator(FLAGS.tmp_dir, False, 2**15), diff --git a/tensor2tensor/data_generators/image.py b/tensor2tensor/data_generators/image.py index 44e2fda15..a2e328f00 100644 --- a/tensor2tensor/data_generators/image.py +++ b/tensor2tensor/data_generators/image.py @@ -36,11 +36,189 @@ from tensor2tensor.data_generators import generator_utils from tensor2tensor.data_generators import problem from tensor2tensor.data_generators import text_encoder +from tensor2tensor.models import common_layers from tensor2tensor.utils import registry import tensorflow as tf +class ImageProblem(problem.Problem): + + def example_reading_spec(self, label_key=None): + if label_key is None: + label_key = "image/class/label" + + data_fields = { + "image/encoded": tf.FixedLenFeature((), tf.string), + "image/format": tf.FixedLenFeature((), tf.string), + label_key: tf.VarLenFeature(tf.int64) + } + data_items_to_decoders = { + "inputs": + tf.contrib.slim.tfexample_decoder.Image( + image_key="image/encoded", + format_key="image/format", + channels=3), + "targets": + tf.contrib.slim.tfexample_decoder.Tensor(label_key), + } + + return data_fields, data_items_to_decoders + + +# French street names dataset. + + +@registry.register_problem +class ImageFSNS(ImageProblem): + """Problem spec for French Street Name recognition.""" + + def generate_data(self, data_dir, tmp_dir, task_id=-1): + list_url = ("https://raw.githubusercontent.com/tensorflow/models/master/" + "street/python/fsns_urls.txt") + fsns_urls = generator_utils.maybe_download( + tmp_dir, "fsns_urls.txt", list_url) + fsns_files = [f.strip() for f in open(fsns_urls, "r") + if f.startswith("http://")] + for url in fsns_files: + if "/train/train" in url: + generator_utils.maybe_download( + data_dir, "image_fsns-train" + url[-len("-00100-of-00512"):], url) + elif "/validation/validation" in url: + generator_utils.maybe_download( + data_dir, "image_fsns-dev" + url[-len("-00100-of-00512"):], url) + elif "charset" in url: + generator_utils.maybe_download( + data_dir, "charset_size134.txt", url) + + def feature_encoders(self, data_dir): + # This vocab file must be present within the data directory. + vocab_filename = os.path.join(data_dir, "charset_size134.txt") + return { + "inputs": text_encoder.TextEncoder(), + "targets": text_encoder.SubwordTextEncoder(vocab_filename) + } + + def hparams(self, defaults, model_hparams): + p = defaults + p.input_modality = {"inputs": (registry.Modalities.IMAGE, None)} + vocab_size = self._encoders["targets"].vocab_size + p.target_modality = (registry.Modalities.SYMBOL, vocab_size) + p.batch_size_multiplier = 256 + p.max_expected_batch_size_per_shard = 2 + p.input_space_id = problem.SpaceID.IMAGE + p.target_space_id = problem.SpaceID.EN_TOK + + def example_reading_spec(self): + label_key = "image/unpadded_label" + return super(ImageFSNS, self).example_reading_spec(self, + label_key=label_key) + + +class Image2ClassProblem(ImageProblem): + """Base class for image classification problems.""" + + @property + def is_small(self): + raise NotImplementedError() + + @property + def num_classes(self): + raise NotImplementedError() + + @property + def train_shards(self): + raise NotImplementedError() + + @property + def dev_shards(self): + return 1 + + def generator(self, data_dir, tmp_dir, is_training): + raise NotImplementedError() + + def hparams(self, defaults, model_hparams): + p = defaults + small_modality = "%s:small_image_modality" % registry.Modalities.IMAGE + modality = small_modality if self.is_small else registry.Modalities.IMAGE + p.input_modality = {"inputs": (modality, None)} + p.target_modality = (registry.Modalities.CLASS_LABEL, self.num_classes) + p.batch_size_multiplier = 4 if self.is_small else 256 + p.max_expected_batch_size_per_shard = 8 if self.is_small else 2 + p.loss_multiplier = 3.0 if self.is_small else 1.0 + if self._was_reversed: + p.loss_multiplier = 1.0 + p.input_space_id = problem.SpaceID.IMAGE + p.target_space_id = problem.SpaceID.IMAGE_LABEL + + def generate_data(self, data_dir, tmp_dir, task_id=-1): + generator_utils.generate_dataset_and_shuffle( + self.generator(data_dir, tmp_dir, True), + self.training_filepaths(data_dir, self.train_shards, shuffled=False), + self.generator(data_dir, tmp_dir, False), + self.dev_filepaths(data_dir, self.dev_shards, shuffled=False)) + + +def imagenet_preprocess_examples(examples, mode): + """Preprocessing used for Imagenet and similar problems.""" + def preprocess(img): + img = tf.image.resize_images(img, [360, 360]) + img = common_layers.image_augmentation(tf.to_float(img) / 255.) + return tf.to_int64(img * 255.) + + def resize(img): + return tf.to_int64(tf.image.resize_images(img, [299, 299])) + + inputs = tf.cast(examples["inputs"], tf.int64) + if mode == tf.contrib.learn.ModeKeys.TRAIN: + examples["inputs"] = tf.cond( # Preprocess 90% of the time. + tf.less(tf.random_uniform([]), 0.9), + lambda img=inputs: preprocess(img), + lambda img=inputs: resize(img)) + else: + examples["inputs"] = resize(inputs) + return examples + + +@registry.register_problem +class ImageImagenet(Image2ClassProblem): + """Imagenet.""" + + @property + def is_small(self): + return False + + @property + def num_classes(self): + return 1000 + + def generate_data(self, data_dir, tmp_dir, task_id=-1): + # TODO(lukaszkaiser): find a better way than printing this. + print("To generate the ImageNet dataset in the proper format, follow " + "instructions at https://github.com/tensorflow/models/blob/master" + "/inception/README.md#getting-started") + + def preprocess_examples(self, examples, mode): + return imagenet_preprocess_examples(examples, mode) + + +@registry.register_problem +class ImageImagenet32(Image2ClassProblem): + """Imagenet rescaled to 32x32.""" + + def dataset_filename(self): + return "image_imagenet" # Reuse Imagenet data. + + @property + def is_small(self): + return True # Modalities like for CIFAR. + + def preprocess_examples(self, examples, mode): + examples = imagenet_preprocess_examples(examples, mode) + examples["inputs"] = tf.to_int64(tf.image.resize_images( + examples["inputs"], [32, 32])) + + def image_generator(images, labels): """Generator for images that takes image and labels lists and creates pngs. @@ -158,6 +336,39 @@ def mnist_generator(tmp_dir, training, how_many, start_from=0): labels[start_from:start_from + how_many]) +@registry.register_problem +class ImageMnistTune(Image2ClassProblem): + """MNIST, tuning data.""" + + @property + def is_small(self): + return True + + @property + def num_classes(self): + return 10 + + @property + def train_shards(self): + return 10 + + def generator(self, data_dir, tmp_dir, is_training): + if is_training: + return mnist_generator(tmp_dir, True, 55000) + else: + return mnist_generator(tmp_dir, True, 5000, 55000) + + +@registry.register_problem +class ImageMnist(ImageMnistTune): + + def generator(self, data_dir, tmp_dir, is_training): + if is_training: + return mnist_generator(tmp_dir, True, 60000) + else: + return mnist_generator(tmp_dir, False, 10000) + + # URLs and filenames for CIFAR data. _CIFAR10_URL = "https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz" _CIFAR10_PREFIX = "cifar-10-batches-py/" @@ -208,6 +419,39 @@ def cifar10_generator(tmp_dir, training, how_many, start_from=0): all_labels[start_from:start_from + how_many]) +@registry.register_problem +class ImageCifar10Tune(ImageMnistTune): + + def preprocess_examples(self, examples, mode): + if mode == tf.contrib.learn.ModeKeys.TRAIN: + examples["inputs"] = common_layers.cifar_image_augmentation( + examples["inputs"]) + return examples + + def generator(self, data_dir, tmp_dir, is_training): + if is_training: + return cifar10_generator(tmp_dir, True, 48000) + else: + return cifar10_generator(tmp_dir, True, 2000, 48000) + + +@registry.register_problem +class ImageCifar10(ImageCifar10Tune): + + def generator(self, data_dir, tmp_dir, is_training): + if is_training: + return cifar10_generator(tmp_dir, True, 50000) + else: + return cifar10_generator(tmp_dir, False, 10000) + + +@registry.register_problem +class ImageCifar10Plain(ImageCifar10): + + def preprocess_examples(self, examples, mode): + return examples + + # URLs and filenames for MSCOCO data. _MSCOCO_ROOT_URL = "http://msvocds.blob.core.windows.net/" _MSCOCO_URLS = [ @@ -308,77 +552,135 @@ def mscoco_generator(data_dir, } -class ImageProblem(problem.Problem): +class Image2TextProblem(ImageProblem): + """Base class for image-to-text problems.""" - def example_reading_spec(self, label_key=None): - if label_key is None: - label_key = "image/class/label" + @property + def is_character_level(self): + raise NotImplementedError() - data_fields = { - "image/encoded": tf.FixedLenFeature((), tf.string), - "image/format": tf.FixedLenFeature((), tf.string), - label_key: tf.VarLenFeature(tf.int64) - } - data_items_to_decoders = { - "inputs": - tf.contrib.slim.tfexample_decoder.Image( - image_key="image/encoded", - format_key="image/format", - channels=3), - "targets": - tf.contrib.slim.tfexample_decoder.Tensor(label_key), - } + @property + def targeted_vocab_size(self): + raise NotImplementedError() # Not needed if self.is_character_level. - return data_fields, data_items_to_decoders + @property + def target_space_id(self): + raise NotImplementedError() -# French street names dataset. + @property + def train_shards(self): + raise NotImplementedError() + @property + def dev_shards(self): + raise NotImplementedError() -@registry.register_problem -class ImageFSNS(ImageProblem): - """Problem spec for French Street Name recognition.""" + def generator(self, data_dir, tmp_dir, is_training): + raise NotImplementedError() - def generate_data(self, data_dir, tmp_dir, task_id=-1): - list_url = ("https://raw.githubusercontent.com/tensorflow/models/master/" - "street/python/fsns_urls.txt") - fsns_urls = generator_utils.maybe_download( - tmp_dir, "fsns_urls.txt", list_url) - fsns_files = [f.strip() for f in open(fsns_urls, "r") - if f.startswith("http://")] - for url in fsns_files: - if "/train/train" in url: - generator_utils.maybe_download( - data_dir, "image_fsns-train" + url[-len("-00100-of-00512"):], url) - elif "/validation/validation" in url: - generator_utils.maybe_download( - data_dir, "image_fsns-dev" + url[-len("-00100-of-00512"):], url) - elif "charset" in url: - generator_utils.maybe_download( - data_dir, "charset_size134.txt", url) + def feature_encoders(self, data_dir): + if self.is_character_level: + encoder = text_encoder.ByteTextEncoder() + else: + vocab_filename = os.path.join( + data_dir, "vocab.endefr.%d" % self.targeted_vocab_size) + encoder = text_encoder.SubwordTextEncoder(vocab_filename) + return {"targets": encoder} def hparams(self, defaults, model_hparams): p = defaults p.input_modality = {"inputs": (registry.Modalities.IMAGE, None)} - # This vocab file must be present within the data directory. - vocab_filename = os.path.join(model_hparams.data_dir, "charset_size134.txt") - subtokenizer = text_encoder.SubwordTextEncoder(vocab_filename) - p.target_modality = (registry.Modalities.SYMBOL, subtokenizer.vocab_size) - p.vocabulary = { - "inputs": text_encoder.TextEncoder(), - "targets": subtokenizer, - } + encoder = self._encoders["targets"] + p.target_modality = (registry.Modalities.SYMBOL, encoder.vocab_size) p.batch_size_multiplier = 256 p.max_expected_batch_size_per_shard = 2 - vocab_size = 144 - p.input_modality = {"inputs": (registry.Modalities.SYMBOL, vocab_size)} - p.target_modality = (registry.Modalities.SYMBOL, vocab_size) - p.input_space_id = problem.SpaceID.DIGIT_0 - p.target_space_id = problem.SpaceID.DIGIT_1 + p.loss_multiplier = 1.0 + p.input_space_id = problem.SpaceID.IMAGE + p.target_space_id = self.target_space_id + + def generate_data(self, data_dir, tmp_dir, task_id=-1): + generator_utils.generate_dataset_and_shuffle( + self.generator(data_dir, tmp_dir, True), + self.training_filepaths(data_dir, self.train_shards, shuffled=False), + self.generator(data_dir, tmp_dir, False), + self.dev_filepaths(data_dir, self.dev_shards, shuffled=False)) + + +@registry.register_problem +class ImageMsCocoCharacters(Image2TextProblem): + """MSCOCO, character level.""" + + @property + def is_character_level(self): + return True + + @property + def target_space_id(self): + return problem.SpaceID.EN_CHR + + @property + def train_shards(self): + return 100 + + @property + def dev_shards(self): + return 10 + + def preprocess_examples(self, examples, mode): + return imagenet_preprocess_examples(examples, mode) + + def generator(self, data_dir, tmp_dir, is_training): + if is_training: + return mscoco_generator(data_dir, tmp_dir, True, 80000) + else: + return mscoco_generator(data_dir, tmp_dir, False, 40000) + raise NotImplementedError() + + +@registry.register_problem +class ImageMsCocoTokens8k(ImageMsCocoCharacters): + """MSCOCO, 8k tokens vocab.""" + + @property + def is_character_level(self): + return False + + @property + def targeted_vocab_size(self): + return 2**13 # 8192 + + @property + def target_space_id(self): + return problem.SpaceID.EN_TOK + + @property + def train_shards(self): + return 100 + + @property + def dev_shards(self): + return 10 + + def generator(self, data_dir, tmp_dir, is_training): + vocab_filename = "vocab.endefr.%d" % self.targeted_vocab_size + if is_training: + return mscoco_generator( + data_dir, tmp_dir, True, 80000, + vocab_filename=vocab_filename, vocab_size=self.targeted_vocab_size) + else: + return mscoco_generator( + data_dir, tmp_dir, False, 40000, + vocab_filename=vocab_filename, vocab_size=self.targeted_vocab_size) + + +@registry.register_problem +class ImageMsCocoTokens32k(ImageMsCocoTokens8k): + """MSCOCO, 32k tokens vocab.""" + + @property + def targeted_vocab_size(self): + return 2**15 # 32768 - def example_reading_spec(self): - label_key = "image/unpadded_label" - return super(ImageFSNS, self).example_reading_spec(self, - label_key=label_key) # URL and filename for CELEBA data. _CELEBA_NAME = "img_align_celeba" diff --git a/tensor2tensor/data_generators/problem.py b/tensor2tensor/data_generators/problem.py index 6f49a8d97..3d30ec239 100644 --- a/tensor2tensor/data_generators/problem.py +++ b/tensor2tensor/data_generators/problem.py @@ -82,6 +82,8 @@ class SpaceID(object): DNA = 23 # Real numbers REAL = 24 + # Images + IMAGE = 25 class Problem(object): @@ -234,9 +236,6 @@ def internal_hparams(self, model_hparams): if self._was_reversed: _reverse_problem_hparams(hp) - # TODO(rsepassi): Move this into the cifar10 Problem - if "image_cifar10" in self.name: - hp.loss_multiplier = 1. if self._was_copy: _copy_problem_hparams(hp) return hp diff --git a/tensor2tensor/data_generators/problem_hparams.py b/tensor2tensor/data_generators/problem_hparams.py index 3c829eeac..607078d2f 100644 --- a/tensor2tensor/data_generators/problem_hparams.py +++ b/tensor2tensor/data_generators/problem_hparams.py @@ -48,8 +48,6 @@ def problem_hparams(problem_name, model_hparams): p = _lookup_problem_hparams_fn(base_name)(model_hparams) if was_reversed: _reverse_problem_hparams(p) - if "image_cifar10" in base_name: - p.loss_multiplier = 1. if was_copy: _copy_problem_hparams(p) return p @@ -509,86 +507,6 @@ def ice_parsing_tokens(model_hparams, wrong_source_vocab_size): return p -def image_cifar10(unused_model_hparams): - """CIFAR-10.""" - p = default_problem_hparams() - p.input_modality = { - "inputs": ("%s:small_image_modality" % registry.Modalities.IMAGE, None) - } - p.target_modality = (registry.Modalities.CLASS_LABEL, 10) - p.batch_size_multiplier = 4 - p.max_expected_batch_size_per_shard = 8 - p.loss_multiplier = 3.0 - p.input_space_id = 1 - p.target_space_id = 1 - return p - - -def image_mnist(unused_model_hparams): - """MNIST.""" - p = default_problem_hparams() - p.input_modality = {"inputs": (registry.Modalities.SYMBOL, 256)} - p.target_modality = (registry.Modalities.CLASS_LABEL, 10) - p.batch_size_multiplier = 4 - p.max_expected_batch_size_per_shard = 8 - p.loss_multiplier = 3.0 - p.input_space_id = 1 - p.target_space_id = 1 - return p - - -def image_imagenet(model_hparams): - """ImageNet.""" - p = default_problem_hparams() - p.input_modality = { - "inputs": (registry.Modalities.IMAGE, None), - } - target_modality = ("%s:class_label_2d" % registry.Modalities.CLASS_LABEL - if model_hparams.imagenet_use_2d else - registry.Modalities.CLASS_LABEL) - p.target_modality = (target_modality, 1000) - p.batch_size_multiplier = 256 - p.max_expected_batch_size_per_shard = 2 - p.loss_multiplier = 0.7 - p.input_space_id = 1 - p.target_space_id = 1 - return p - - -def image_mscoco_characters(unused_model_hparams): - """COCO image captioning with captions as characters.""" - p = default_problem_hparams() - p.input_modality = {"inputs": (registry.Modalities.IMAGE, None)} - p.target_modality = (registry.Modalities.SYMBOL, 256) - p.vocabulary = { - "inputs": text_encoder.TextEncoder(), - "targets": text_encoder.ByteTextEncoder(), - } - p.batch_size_multiplier = 128 - p.max_expected_batch_size_per_shard = 2 - p.loss_multiplier = 2.0 - p.input_space_id = 1 - p.target_space_id = 2 - return p - - -def image_mscoco_tokens(model_hparams, vocab_count): - """COCO image captioning with captions as tokens.""" - p = default_problem_hparams() - p.input_modality = {"inputs": (registry.Modalities.IMAGE, None)} - # This vocab file must be present within the data directory. - vocab_filename = os.path.join(model_hparams.data_dir, - "vocab.endefr.%d" % vocab_count) - subtokenizer = text_encoder.SubwordTextEncoder(vocab_filename) - p.target_modality = (registry.Modalities.SYMBOL, subtokenizer.vocab_size) - p.vocabulary = { - "inputs": text_encoder.TextEncoder(), - "targets": subtokenizer, - } - p.batch_size_multiplier = 256 - p.max_expected_batch_size_per_shard = 2 - - def img2img_imagenet(unused_model_hparams): """Image 2 Image for imagenet dataset.""" p = default_problem_hparams() @@ -633,15 +551,6 @@ def image_celeba(unused_model_hparams): "wsj_parsing_tokens_16k": lambda p: wsj_parsing_tokens( # pylint: disable=g-long-lambda p, "wsj", 2**14, 2**9), "wmt_ende_bpe32k": wmt_ende_bpe32k, - "image_cifar10_tune": image_cifar10, - "image_cifar10_test": image_cifar10, - "image_mnist_tune": image_mnist, - "image_mnist_test": image_mnist, "image_celeba_tune": image_celeba, - "image_mscoco_characters_tune": image_mscoco_characters, - "image_mscoco_characters_test": image_mscoco_characters, - "image_mscoco_tokens_8k_test": lambda p: image_mscoco_tokens(p, 2**13), - "image_mscoco_tokens_32k_test": lambda p: image_mscoco_tokens(p, 2**15), - "image_imagenet": image_imagenet, "img2img_imagenet": img2img_imagenet, } diff --git a/tensor2tensor/data_generators/wmt.py b/tensor2tensor/data_generators/wmt.py index c9b43d507..0a47e9989 100644 --- a/tensor2tensor/data_generators/wmt.py +++ b/tensor2tensor/data_generators/wmt.py @@ -426,7 +426,7 @@ class WMTEnDeCharacters(WMTProblem): def is_character_level(self): return True - def train_generator(self, tmp_dir, train): + def train_generator(self, _, tmp_dir, train): character_vocab = text_encoder.ByteTextEncoder() datasets = _ENDE_TRAIN_DATASETS if train else _ENDE_TEST_DATASETS tag = "train" if train else "dev" diff --git a/tensor2tensor/models/bluenet.py b/tensor2tensor/models/bluenet.py index 3ac477e4b..87ad70e41 100644 --- a/tensor2tensor/models/bluenet.py +++ b/tensor2tensor/models/bluenet.py @@ -546,7 +546,6 @@ def bluenet_base(): hparams.optimizer_adam_epsilon = 1e-6 hparams.optimizer_adam_beta1 = 0.85 hparams.optimizer_adam_beta2 = 0.997 - hparams.add_hparam("imagenet_use_2d", True) hparams.add_hparam("anneal_until", 40000) hparams.add_hparam("batch_deviation_loss_factor", 5.0) return hparams diff --git a/tensor2tensor/models/common_attention.py b/tensor2tensor/models/common_attention.py index 2c7e8afc9..b52fb8aea 100644 --- a/tensor2tensor/models/common_attention.py +++ b/tensor2tensor/models/common_attention.py @@ -358,23 +358,33 @@ def dot_product_attention(q, return tf.matmul(weights, v) -def masked_local_attention_1d( - q, k, v, block_length=128, name=None): - """Attention to the source position and a neigborhood to the left of it. +def masked_local_attention_1d(q, k, v, + block_length=128, look_right=True, + use_whole_block=False, name=None): + """Attention to the source position and a neigborhood around it. - The sequence is divided into blocks of length block_size. - Attention for a given query position can only see memory positions - less than or equal to the query position, in the corresponding block - and the previous block. + The sequence is divided into blocks of length block_size. Attention for a + given query position can only see memory positions within a certain number + of positions before and behind it. - If mask_right is True, then a target position cannot see greater source + + If look_right is True then each query will attend to block_length//2 + positions either side, otherwise it will attend to block_length previous positions. + If use_whole_block is True then no mask will be applied to the local blocks + meaning the full blocks are used (if look_right is True then the elements to + the right of the current position are still masked out). This allows to + attend to more elements without additional overhead, but means we have + inconsistent window positions and sizes. + Args: - q: a Tensor with shape [batch, heads, length, depth_k] - k: a Tensor with shape [batch, heads, length, depth_k] - v: a Tensor with shape [batch, heads, length, depth_v] + q: a Tensor with shape [batch, heads, length_q, depth_k] + k: a Tensor with shape [batch, heads, length_kv, depth_k] + v: a Tensor with shape [batch, heads, length_kv, depth_v] block_length: an integer + look_right: a bool + use_whole_block: a bool name: an optional string Returns: @@ -386,61 +396,71 @@ def masked_local_attention_1d( batch = tf.shape(q)[0] heads = tf.shape(q)[1] length = tf.shape(q)[2] - # If (length < 2 * block_length), then we use only one block. - block_length = tf.where(tf.less(length, block_length * 2), - length, block_length) depth_k = tf.shape(q)[3] depth_v = tf.shape(v)[3] original_length = length + + # If (length < block_length), then we use only one block. + block_length = tf.where(tf.less(length, block_length), + length, block_length) + # Pad to desired length. padding_size = tf.mod(-length, block_length) length += padding_size + num_blocks = tf.div(length, block_length) padding = [[0, 0], [0, 0], [0, padding_size], [0, 0]] q = tf.pad(q, padding) - k = tf.pad(k, padding) - v = tf.pad(v, padding) - num_blocks = tf.div(length, block_length) - - # compute attention for the first query block. - first_q = tf.slice(q, [0, 0, 0, 0], [-1, -1, block_length, -1]) - first_k = tf.slice(k, [0, 0, 0, 0], [-1, -1, block_length, -1]) - first_v = tf.slice(v, [0, 0, 0, 0], [-1, -1, block_length, -1]) - first_output = dot_product_attention( - first_q, first_k, first_v, attention_bias_lower_triangle(block_length), - name="fist_block") - # compute attention for all subsequent query blocks. + if not look_right: + # Add extra padding so we son't have to do an initial query block. + extra_padding = [[0, 0], [0, 0], [block_length, padding_size], [0, 0]] + else: + # We shift everything over by half a block so query is in center. + pad_right = block_length // 2 + pad_left = block_length - pad_right + extra_padding = [[0, 0], [0, 0], + [pad_left, padding_size+pad_right], [0, 0]] + k = tf.pad(k, extra_padding) + v = tf.pad(v, extra_padding) + + # Reshape into blocks. q = tf.reshape(q, [batch, heads, num_blocks, block_length, depth_k]) - k = tf.reshape(k, [batch, heads, num_blocks, block_length, depth_k]) - v = tf.reshape(v, [batch, heads, num_blocks, block_length, depth_v]) + k = tf.reshape(k, [batch, heads, num_blocks+1, block_length, depth_k]) + v = tf.reshape(v, [batch, heads, num_blocks+1, block_length, depth_v]) + # Get local blocks by slicing. def local(x): """Create a local version of the keys or values.""" prev_block = tf.slice( - x, [0, 0, 0, 0, 0], [-1, -1, num_blocks - 1, -1, -1]) + x, [0, 0, 0, 0, 0], [-1, -1, num_blocks, -1, -1]) cur_block = tf.slice( x, [0, 0, 1, 0, 0], [-1, -1, -1, -1, -1]) return tf.concat([prev_block, cur_block], 3) local_k = local(k) local_v = local(v) - tail_q = tf.slice(q, [0, 0, 1, 0, 0], [-1, -1, -1, -1, -1]) - local_length = tf.shape(local_k)[3] - # [batch, heads, num_blocks - 1, block_length, local_length] - attention = tf.matmul(tail_q, local_k, transpose_b=True) - - # make sure source_pos <= target_pos - good_part = tf.matrix_band_part( - tf.ones([block_length, local_length]), -1, tf.to_int64(block_length)) - mask = (1.0 - good_part) * -1e9 - attention += tf.reshape(mask, [1, 1, 1, block_length, local_length]) + # [batch, heads, num_blocks, block_length, local_length] + attention = tf.matmul(q, local_k, transpose_b=True) attention = tf.nn.softmax(attention) + + # Get local mask + if not use_whole_block: + good_part = tf.matrix_band_part( + tf.ones([block_length, local_length]), 0, tf.to_int64(block_length)) + elif not look_right: + good_part = tf.matrix_band_part( + tf.ones([block_length, local_length]), -1, tf.to_int64(block_length)) + else: + good_part = tf.ones([block_length, local_length]) + + attention *= tf.reshape(good_part, [1, 1, 1, block_length, local_length]) + # TODO(noam): figure out how to show a summary for the remaining blocks. # The naive way currently causes errors due to empty tensors. - # output: [batch, heads, num_blocks-1, block_length, depth_v] output = tf.matmul(attention, local_v) output = tf.reshape(output, [batch, heads, -1, depth_v]) - output = tf.concat([first_output, output], axis=2) + + # Remove added padding output = tf.slice(output, [0, 0, 0, 0], [-1, -1, original_length, -1]) output.set_shape(v_shape) return output diff --git a/tensor2tensor/models/common_attention_test.py b/tensor2tensor/models/common_attention_test.py index 78be4b645..a09da74e1 100644 --- a/tensor2tensor/models/common_attention_test.py +++ b/tensor2tensor/models/common_attention_test.py @@ -41,6 +41,34 @@ def testDotProductAttention(self): res = session.run(a) self.assertEqual(res.shape, (5, 7, 12, 32)) + def testMaskedLocalAttention(self): + q = np.array([[[[1.0, 0.0, 0.0, 0.0], + [1.0, 0.0, 0.0, 0.0], + [1.0, 0.0, 0.0, 0.0], + [1.0, 0.0, 0.0, 0.0], + [1.0, 0.0, 0.0, 0.0], + [1.0, 0.0, 0.0, 0.0], + [1.0, 0.0, 0.0, 0.0], + [1.0, 0.0, 0.0, 0.0]]]]) + k = np.array([[[[1.0, 0.0, 0.0, 0.0], + [1.0, 0.0, 0.0, 0.0], + [1.0, 0.0, 0.0, 0.0], + [1.0, 0.0, 0.0, 0.0], + [1.0, 0.0, 0.0, 0.0], + [1.0, 0.0, 0.0, 0.0], + [1.0, 0.0, 0.0, 0.0], + [1.0, 0.0, 0.0, 0.0]]]]) + v = np.ones((1, 1, 8, 1)) + with self.test_session() as session: + q_ = tf.constant(q, dtype=tf.float32) + k_ = tf.constant(k, dtype=tf.float32) + v_ = tf.constant(v, dtype=tf.float32) + y = common_attention.masked_local_attention_1d( + q_, k_, v_, block_length=tf.constant(2)) + res = session.run(y) + + self.assertEqual(res.shape, (1, 1, 8, 1)) + def testLocalUnmaskedAttention(self): x = np.random.rand(5, 4, 25, 16) y = np.random.rand(5, 4, 25, 16) diff --git a/tensor2tensor/models/common_layers.py b/tensor2tensor/models/common_layers.py index e98531d88..5449a8bef 100644 --- a/tensor2tensor/models/common_layers.py +++ b/tensor2tensor/models/common_layers.py @@ -475,7 +475,7 @@ def residual_fn(x, residual_dropout, filters=None, epsilon=1e-16, - name="residual"): + name=None, reuse=None): """Returns a function for combining layer input and layer output. The returned function on x (layer input) and y (layer output) computes: @@ -489,16 +489,19 @@ def residual_fn(x, filters: integer, dimension for layer norm, optional epsilon: integer, value of layer norm epsilon name: string, name + reuse: bool, whether to reuse Returns: residual layer output with applied norm_fn. """ - norm_fn = get_norm(norm_type) - res = x + tf.nn.dropout(y, 1.0 - residual_dropout) - if norm_type == "layer": - return norm_fn(res, name=name, filters=filters, epsilon=epsilon) - else: - return norm_fn(res, name=name) + with tf.variable_scope(name, default_name="residual", + values=[x, y], reuse=reuse): + norm_fn = get_norm(norm_type) + res = x + tf.nn.dropout(y, 1.0 - residual_dropout) + if norm_type == "layer": + return norm_fn(res, filters=filters, epsilon=epsilon, name=norm_type) + else: + return norm_fn(res, name=norm_type) def conv_block_internal(conv_fn, @@ -1457,6 +1460,34 @@ def global_pool_1d(inputs, pooling_type="MAX", mask=None): return output +def running_global_pool_1d(inputs, pooling_type="MAX"): + """Same global pool, but only for the elements up to the current element. + + Useful for outputs where the state of future elements is not known. + Takes no mask as all elements up to the current element are assumed to exist. + Currently only supports maximum. Equivalent to using a lower triangle bias. + + Args: + inputs: A tensor of dimensions batch_size x sequence_length x input_dims + containing the sequences of input vectors. + pooling_type: Pooling type to use. Currently only supports 'MAX'. + + Returns: + output: A tensor of dimensions batch_size x sequence_length x input_dims + dimension containing the running 'totals'. + """ + del pooling_type + with tf.name_scope("running_global_pool", [inputs]): + scan_fct = tf.maximum + # Permute inputs so seq_length is first. + elems = tf.transpose(inputs, [1, 0, 2]) + # Perform scan. + cumulatives = tf.scan(scan_fct, elems, swap_memory=True) + # Permute output to get back to original order. + output = tf.transpose(cumulatives, [1, 0, 2]) + return output + + def linear_set_layer(layer_size, inputs, context=None, @@ -1486,7 +1517,8 @@ def linear_set_layer(layer_size, output: A tensor of dimensions batch_size x sequence_length x output_dims dimension containing the sequences of transformed vectors. """ - with tf.variable_scope(name, "linear_set_layer", [inputs]): + with tf.variable_scope(name, default_name="linear_set_layer", + values=[inputs]): # Apply 1D convolution to apply linear filter to each element # along the 2nd dimension. outputs = conv1d(inputs, layer_size, 1, activation=None, name="set_conv") @@ -1495,9 +1527,10 @@ def linear_set_layer(layer_size, if context is not None: # Unfortunately tf doesn't support broadcasting via concat, but we can # simply add the transformed context to get the same effect. - context = tf.expand_dims(context, axis=1) - cont_tfm = conv1d( - context, layer_size, 1, activation=None, name="cont_conv") + if len(context.get_shape().as_list()) == 2: + context = tf.expand_dims(context, axis=1) + cont_tfm = conv1d(context, layer_size, 1, + activation=None, name="cont_conv") outputs += cont_tfm if activation_fn is not None: @@ -1512,6 +1545,7 @@ def linear_set_layer(layer_size, def ravanbakhsh_set_layer(layer_size, inputs, mask=None, + sequential=False, activation_fn=tf.nn.tanh, dropout=0.0, name=None): @@ -1525,6 +1559,9 @@ def ravanbakhsh_set_layer(layer_size, containing the sequences of input vectors. mask: A tensor of dimensions batch_size x sequence_length containing a mask for the inputs with 1's for existing elements, and 0's elsewhere. + sequential: If true, will use a running global pool so each element will + only depend on those before it. Set true if this layer is being used in + an output sequence. activation_fn: The activation function to use. dropout: dropout. name: name. @@ -1533,12 +1570,16 @@ def ravanbakhsh_set_layer(layer_size, output: A tensor of dimensions batch_size x sequence_length x vector dimension containing the sequences of transformed vectors. """ + del dropout with tf.variable_scope(name, "ravanbakhsh_set_layer", [inputs]): - output = linear_set_layer( + if sequential: + return linear_set_layer( + layer_size, + inputs - running_global_pool_1d(inputs), + activation_fn=activation_fn, + name=name) + return linear_set_layer( layer_size, inputs - tf.expand_dims(global_pool_1d(inputs, mask=mask), axis=1), activation_fn=activation_fn, - dropout=dropout, name=name) - - return output diff --git a/tensor2tensor/models/lstm.py b/tensor2tensor/models/lstm.py index ae221bdff..195879d78 100644 --- a/tensor2tensor/models/lstm.py +++ b/tensor2tensor/models/lstm.py @@ -247,8 +247,8 @@ def lstm_seq2seq_internal_attention(inputs, targets, hparams, train): return tf.expand_dims(decoder_outputs, axis=2) -@registry.register_model("baseline_lstm_seq2seq") -class LSTMSeq2Seq(t2t_model.T2TModel): +@registry.register_model +class LSTMSeq2seq(t2t_model.T2TModel): def model_fn_body(self, features): train = self._hparams.mode == tf.contrib.learn.ModeKeys.TRAIN @@ -256,8 +256,8 @@ def model_fn_body(self, features): self._hparams, train) -@registry.register_model("baseline_lstm_seq2seq_attention") -class LSTMSeq2SeqAttention(t2t_model.T2TModel): +@registry.register_model +class LSTMSeq2seqAttention(t2t_model.T2TModel): def model_fn_body(self, features): train = self._hparams.mode == tf.contrib.learn.ModeKeys.TRAIN diff --git a/tensor2tensor/models/lstm_test.py b/tensor2tensor/models/lstm_test.py index 31380afa5..6ac792f48 100644 --- a/tensor2tensor/models/lstm_test.py +++ b/tensor2tensor/models/lstm_test.py @@ -44,7 +44,7 @@ def testLSTMSeq2Seq(self): "inputs": tf.constant(x, dtype=tf.int32), "targets": tf.constant(y, dtype=tf.int32), } - model = lstm.LSTMSeq2Seq( + model = lstm.LSTMSeq2seq( hparams, tf.contrib.learn.ModeKeys.TRAIN, p_hparams) sharded_logits, _ = model.model_fn(features) logits = tf.concat(sharded_logits, 0) @@ -68,7 +68,7 @@ def testLSTMSeq2SeqAttention(self): "inputs": x, "targets": tf.constant(y, dtype=tf.int32), } - model = lstm.LSTMSeq2SeqAttention( + model = lstm.LSTMSeq2seqAttention( hparams, tf.contrib.learn.ModeKeys.TRAIN, p_hparams) sharded_logits, _ = model.model_fn(features) logits = tf.concat(sharded_logits, 0) diff --git a/tensor2tensor/models/modalities.py b/tensor2tensor/models/modalities.py index c57a97905..912c54f8c 100644 --- a/tensor2tensor/models/modalities.py +++ b/tensor2tensor/models/modalities.py @@ -359,7 +359,7 @@ def xnet_resblock(x, filters, res_relu, name): class ClassLabelModality(modality.Modality): """Used for label data.""" - def __init__(self, model_hparams, vocab_size, is2d=False): + def __init__(self, model_hparams, vocab_size, is2d=True): super(ClassLabelModality, self).__init__(model_hparams, vocab_size) self._is_2d = is2d self._kernel = (3, 3) if is2d else (5, 1) @@ -425,12 +425,12 @@ def loss(self, top_out, targets, weights_fn=common_layers.weights_all): @registry.register_class_label_modality("class_label_2d") -class ClassLabel2DModality(ClassLabelModality): +class ClassLabel1DModality(ClassLabelModality): """Used for label data.""" def __init__(self, model_hparams, vocab_size): - super(ClassLabel2DModality, self).__init__( - model_hparams=model_hparams, vocab_size=vocab_size, is2d=True) + super(ClassLabel1DModality, self).__init__( + model_hparams=model_hparams, vocab_size=vocab_size, is2d=False) @registry.register_generic_modality("default") diff --git a/tensor2tensor/models/multimodel.py b/tensor2tensor/models/multimodel.py index 089889ce6..6f60dbfbf 100644 --- a/tensor2tensor/models/multimodel.py +++ b/tensor2tensor/models/multimodel.py @@ -190,7 +190,6 @@ def multimodel_base(): hparams.add_hparam("moe_n2", 0) hparams.add_hparam("moe_layers", "2") hparams.add_hparam("moe_loss_coef", 1e-2) - hparams.add_hparam("imagenet_use_2d", int(True)) return hparams diff --git a/tensor2tensor/models/multimodel_test.py b/tensor2tensor/models/multimodel_test.py index cf109beb4..73a8436cc 100644 --- a/tensor2tensor/models/multimodel_test.py +++ b/tensor2tensor/models/multimodel_test.py @@ -23,8 +23,9 @@ import numpy as np -from tensor2tensor.data_generators import problem_hparams +from tensor2tensor.data_generators import image # pylint: disable=unused-import from tensor2tensor.models import multimodel +from tensor2tensor.utils import registry import tensorflow as tf @@ -32,10 +33,12 @@ class MultiModelTest(tf.test.TestCase): def testMultiModel(self): - x = np.random.random_integers(0, high=255, size=(3, 5, 4, 3)) + x = np.random.random_integers(0, high=255, size=(3, 5, 5, 3)) y = np.random.random_integers(0, high=9, size=(3, 5, 1, 1)) hparams = multimodel.multimodel_tiny() - p_hparams = problem_hparams.image_cifar10(hparams) + hparams.add_hparam("data_dir", "") + problem = registry.problem("image_cifar10") + p_hparams = problem.internal_hparams(hparams) hparams.problems = [p_hparams] with self.test_session() as session: features = { diff --git a/tensor2tensor/models/slicenet.py b/tensor2tensor/models/slicenet.py index 69e2338b6..f1534137c 100644 --- a/tensor2tensor/models/slicenet.py +++ b/tensor2tensor/models/slicenet.py @@ -316,7 +316,6 @@ def slicenet_params1(): hparams.add_hparam("moe_n1", 32) hparams.add_hparam("moe_n2", 0) hparams.add_hparam("moe_loss_coef", 1e-2) - hparams.add_hparam("imagenet_use_2d", int(True)) # attention-related flags hparams.add_hparam("attention_type", "simple") hparams.add_hparam("num_heads", 8) diff --git a/tensor2tensor/models/slicenet_test.py b/tensor2tensor/models/slicenet_test.py index 54b57a9f2..c357448e4 100644 --- a/tensor2tensor/models/slicenet_test.py +++ b/tensor2tensor/models/slicenet_test.py @@ -23,8 +23,10 @@ import numpy as np -from tensor2tensor.data_generators import problem_hparams +from tensor2tensor.data_generators import image # pylint: disable=unused-import +from tensor2tensor.models import modalities # pylint: disable=unused-import from tensor2tensor.models import slicenet +from tensor2tensor.utils import registry import tensorflow as tf @@ -32,10 +34,12 @@ class SliceNetTest(tf.test.TestCase): def testSliceNet(self): - x = np.random.random_integers(0, high=255, size=(3, 5, 4, 3)) + x = np.random.random_integers(0, high=255, size=(3, 5, 5, 3)) y = np.random.random_integers(0, high=9, size=(3, 5, 1, 1)) hparams = slicenet.slicenet_params1_tiny() - p_hparams = problem_hparams.image_cifar10(hparams) + hparams.add_hparam("data_dir", "") + problem = registry.problem("image_cifar10") + p_hparams = problem.internal_hparams(hparams) hparams.problems = [p_hparams] with self.test_session() as session: features = { diff --git a/tensor2tensor/models/transformer.py b/tensor2tensor/models/transformer.py index c9d0a2db2..2320a57f1 100644 --- a/tensor2tensor/models/transformer.py +++ b/tensor2tensor/models/transformer.py @@ -57,8 +57,11 @@ def model_fn_body(self, features): targets, hparams) def residual_fn(x, y): - return common_layers.layer_norm(x + tf.nn.dropout( - y, 1.0 - hparams.residual_dropout)) + return common_layers.residual_fn(x, y, + hparams.norm_type, + hparams.residual_dropout, + hparams.hidden_size, + epsilon=hparams.layer_norm_epsilon) encoder_input = tf.nn.dropout(encoder_input, 1.0 - hparams.residual_dropout) decoder_input = tf.nn.dropout(decoder_input, 1.0 - hparams.residual_dropout) @@ -267,6 +270,7 @@ def transformer_ffn_layer(x, hparams): def transformer_base(): """Set of hyperparameters.""" hparams = common_hparams.basic_params1() + hparams.norm_type = "layer" hparams.hidden_size = 512 hparams.batch_size = 4096 hparams.max_length = 256 diff --git a/tensor2tensor/models/transformer_alternative.py b/tensor2tensor/models/transformer_alternative.py index 62413c325..1f20bfb51 100644 --- a/tensor2tensor/models/transformer_alternative.py +++ b/tensor2tensor/models/transformer_alternative.py @@ -50,17 +50,12 @@ def model_fn_body(self, features): inputs = common_layers.flatten4d3d(inputs) targets = common_layers.flatten4d3d(targets) - (encoder_input, encoder_attention_bias, - _) = transformer.transformer_prepare_encoder(inputs, target_space, hparams) - (decoder_input, - decoder_self_attention_bias) = transformer.transformer_prepare_decoder( - targets, hparams) - - # We need masks of the form batch size x input sequences - # Biases seem to be of the form batch_size x 1 x input sequences x vec dim - # Squeeze out dim one, and get the first element of each vector. - encoder_mask = tf.squeeze(encoder_attention_bias, [1])[:, :, 0] - decoder_mask = tf.squeeze(decoder_self_attention_bias, [1])[:, :, 0] + (encoder_input, encoder_attention_bias, _) = ( + transformer.transformer_prepare_encoder(inputs, target_space, hparams)) + (decoder_input, _) = ( + transformer.transformer_prepare_decoder(targets, hparams)) + + encoder_mask = bias_to_mask(encoder_attention_bias) def residual_fn(x, y): return common_layers.layer_norm(x + tf.nn.dropout( @@ -68,11 +63,12 @@ def residual_fn(x, y): encoder_input = tf.nn.dropout(encoder_input, 1.0 - hparams.residual_dropout) decoder_input = tf.nn.dropout(decoder_input, 1.0 - hparams.residual_dropout) + encoder_output = alt_transformer_encoder( encoder_input, residual_fn, encoder_mask, hparams) decoder_output = alt_transformer_decoder( - decoder_input, encoder_output, residual_fn, decoder_mask, + decoder_input, encoder_output, residual_fn, encoder_attention_bias, hparams) decoder_output = tf.expand_dims(decoder_output, 2) @@ -80,7 +76,7 @@ def residual_fn(x, y): return decoder_output -def composite_layer(inputs, mask, hparams): +def composite_layer(inputs, mask, hparams, for_output=False): """Composite layer.""" x = inputs @@ -92,26 +88,28 @@ def composite_layer(inputs, mask, hparams): hparams.hidden_size, x, mask=mask, - dropout=0.0) + sequential=for_output, + dropout=hparams.relu_dropout) # Transforms elements to get a context, and then uses this in a final layer. elif hparams.composite_layer_type == "reembedding": # Transform elements n times and then pool. for layer in xrange(hparams.layers_per_layer): - with tf.variable_scope(".%d" % layer): + with tf.variable_scope("sub_layer_%d" % layer): x = common_layers.linear_set_layer( hparams.hidden_size, x, - dropout=0.0) - context = common_layers.global_pool_1d(x, mask=mask) - + dropout=hparams.relu_dropout) + if for_output: + context = common_layers.running_global_pool_1d(x) + else: + context = common_layers.global_pool_1d(x, mask=mask) # Final layer. x = common_layers.linear_set_layer( hparams.hidden_size, x, context=context, - dropout=0.0) - + dropout=hparams.relu_dropout) return x @@ -122,29 +120,25 @@ def alt_transformer_encoder(encoder_input, name="encoder"): """Alternative encoder.""" x = encoder_input - with tf.variable_scope(name): + x = encoder_input for layer in xrange(hparams.num_hidden_layers): with tf.variable_scope("layer_%d" % layer): x = residual_fn(x, composite_layer(x, mask, hparams)) - return x def alt_transformer_decoder(decoder_input, encoder_output, residual_fn, - mask, encoder_decoder_attention_bias, hparams, name="decoder"): """Alternative decoder.""" - x = decoder_input - with tf.variable_scope(name): + x = decoder_input for layer in xrange(hparams.num_hidden_layers): with tf.variable_scope("layer_%d" % layer): - x_ = common_attention.multihead_attention( x, encoder_output, @@ -156,17 +150,30 @@ def alt_transformer_decoder(decoder_input, hparams.attention_dropout, name="encdec_attention") - x_ = residual_fn(x_, composite_layer(x_, mask, hparams)) + x_ = residual_fn(x_, composite_layer(x_, None, hparams, + for_output=True)) x = residual_fn(x, x_) - return x +def bias_to_mask(bias): + # We need masks of the form batch size x input sequences + # Biases are of the form batch_size x num_heads x input sequences x + # output sequences. Squeeze out dim one, and get the first element of + # each vector. + bias = tf.squeeze(bias, [1])[:, :, 0] + bias = - tf.clip_by_value(bias, -1.0, 1.0) + mask = 1 - bias + return mask + + @registry.register_hparams def transformer_alt(): """Set of hyperparameters.""" hparams = transformer.transformer_base() - hparams.batch_size = 64 + hparams.batch_size = 2048 + hparams.num_hidden_layers = 10 hparams.add_hparam("layers_per_layer", 4) - hparams.add_hparam("composite_layer_type", "reembedding") + # Composite layer: ravanbakhsh or reembedding. + hparams.add_hparam("composite_layer_type", "ravanbakhsh") return hparams diff --git a/tensor2tensor/models/xception.py b/tensor2tensor/models/xception.py index 61fa61235..f2e69da21 100644 --- a/tensor2tensor/models/xception.py +++ b/tensor2tensor/models/xception.py @@ -86,7 +86,6 @@ def xception_base(): hparams.optimizer_adam_epsilon = 1e-6 hparams.optimizer_adam_beta1 = 0.85 hparams.optimizer_adam_beta2 = 0.997 - hparams.add_hparam("imagenet_use_2d", True) return hparams diff --git a/tensor2tensor/utils/data_reader.py b/tensor2tensor/utils/data_reader.py index 81dcb52a5..05aa9bf26 100644 --- a/tensor2tensor/utils/data_reader.py +++ b/tensor2tensor/utils/data_reader.py @@ -27,7 +27,6 @@ from six.moves import zip # pylint: disable=redefined-builtin from tensor2tensor.data_generators import problem_hparams -from tensor2tensor.models import common_layers from tensor2tensor.utils import registry import tensorflow as tf @@ -127,35 +126,15 @@ def decode_record(record): return decode_record(example_serialized) -def preprocessing(examples, data_file_pattern, mode): +def preprocessing(examples, data_file_pattern): """Preprocessing of examples.""" + # This function is for obsolete problems only, as we're porting them + # all to the Problem class and its preprocess_examples method. Don't add. if "image" in data_file_pattern: - # Small single-example pre-processing for images. def resize(img, size): return tf.to_int64(tf.image.resize_images(img, [size, size])) - def preprocess(img): - img = tf.image.resize_images(img, [360, 360]) - img = common_layers.image_augmentation(tf.to_float(img) / 255.) - return tf.to_int64(img * 255.) - - if ("image_imagenet" in data_file_pattern or - "image_mscoco" in data_file_pattern): - examples["inputs"] = tf.cast(examples["inputs"], tf.int64) - # For imagnet/coco, resize images to 299x299 as is standard. - inputs = examples["inputs"] - if mode == tf.contrib.learn.ModeKeys.TRAIN: - examples["inputs"] = tf.cond( # Preprocess 80% of the time. - tf.less(tf.random_uniform([]), 0.8), - lambda img=inputs: preprocess(img), - lambda img=inputs: resize(img, 299)) - else: - examples["inputs"] = tf.to_int64(resize(inputs, 299)) - elif ("image_cifar10" in data_file_pattern and - mode == tf.contrib.learn.ModeKeys.TRAIN): - examples["inputs"] = common_layers.cifar_image_augmentation( - examples["inputs"]) - elif "img2img" in data_file_pattern: + if "img2img" in data_file_pattern: inputs = examples["inputs"] examples["inputs"] = resize(inputs, 16) examples["targets"] = resize(inputs, 64) @@ -163,7 +142,6 @@ def preprocess(img): inputs = examples["inputs"] examples["inputs"] = resize(inputs, 8) examples["targets"] = resize(inputs, 32) - elif "audio" in data_file_pattern: # Reshape audio to proper shape sample_count = tf.to_int32(examples.pop("audio/sample_count")) @@ -205,8 +183,6 @@ def default_example_reading_spec(data_file_pattern): # Read from image TFRecords if the file has "image" in its name. if data_file_pattern and "image" in data_file_pattern: label_key = "image/class/label" - if "fsns" in data_file_pattern: - label_key = "image/unpadded_label" data_fields = { "image/encoded": tf.FixedLenFeature((), tf.string), "image/format": tf.FixedLenFeature((), tf.string), @@ -257,7 +233,7 @@ def input_pipeline(problem, data_file_pattern, capacity, mode, hparams): data_items_to_decoders=data_items_to_decoders) if problem is None: - examples = preprocessing(examples, data_file_pattern, mode) + examples = preprocessing(examples, data_file_pattern) else: examples = problem.preprocess_examples(examples, mode, hparams) diff --git a/tensor2tensor/utils/registry.py b/tensor2tensor/utils/registry.py index 0baad2471..9d5e1e0a6 100644 --- a/tensor2tensor/utils/registry.py +++ b/tensor2tensor/utils/registry.py @@ -76,7 +76,7 @@ class Modalities(object): # Camel case to snake case utils _first_cap_re = re.compile("(.)([A-Z][a-z0-9]+)") -_all_cap_re = re.compile("([a-z])([A-Z])") +_all_cap_re = re.compile("([a-z0-9])([A-Z])") def _convert_camel_to_snake(name): diff --git a/tensor2tensor/utils/registry_test.py b/tensor2tensor/utils/registry_test.py index 3231809ea..62c24b054 100644 --- a/tensor2tensor/utils/registry_test.py +++ b/tensor2tensor/utils/registry_test.py @@ -94,8 +94,9 @@ def testSnakeCase(self): convert = registry._convert_camel_to_snake self.assertEqual("typical_camel_case", convert("TypicalCamelCase")) - self.assertEqual("numbers_fuse2gether", convert("NumbersFuse2Gether")) - self.assertEqual("lstm_seq2seq", convert("LSTMSeq2Seq")) + self.assertEqual("numbers_fuse2gether", convert("NumbersFuse2gether")) + self.assertEqual("numbers_fuse2_gether", convert("NumbersFuse2Gether")) + self.assertEqual("lstm_seq2_seq", convert("LSTMSeq2Seq")) self.assertEqual("starts_lower", convert("startsLower")) self.assertEqual("starts_lower_caps", convert("startsLowerCAPS")) self.assertEqual("caps_fuse_together", convert("CapsFUSETogether")) diff --git a/tensor2tensor/utils/t2t_model.py b/tensor2tensor/utils/t2t_model.py index 95774dabc..7cb484bc8 100644 --- a/tensor2tensor/utils/t2t_model.py +++ b/tensor2tensor/utils/t2t_model.py @@ -410,10 +410,13 @@ def model_fn(self, features, skip=False, last_position_only=False): # Construct the model body. with tf.variable_scope("body", reuse=self._problem_idx > 0): if skip: - body_outputs, extra_loss = transformed_features["targets"], 0.0 + body_outputs = transformed_features["targets"] + losses = {"extra": 0.0} else: - body_outputs, extra_loss = self.model_fn_body_sharded( + body_outputs, losses = self.model_fn_body_sharded( transformed_features) + if isinstance(losses, tf.Tensor): # If it's a single extra loss. + losses = {"extra": losses} with tf.variable_scope(target_modality.name, reuse=target_reuse): if not last_position_only: @@ -440,7 +443,8 @@ def model_fn(self, features, skip=False, last_position_only=False): training_loss = None tf.logging.info("This model_fn took %.3f sec." % (time.time() - start_time)) - return sharded_logits, {"training": training_loss, "extra": extra_loss} + losses["training"] = training_loss + return sharded_logits, losses def model_fn_body_sharded(self, sharded_features): """Mixture-of-experts models will override this function. @@ -465,10 +469,10 @@ def model_fn_body_sharded(self, sharded_features): _with_timing(self.model_fn_body, "model_fn_body"), datashard_to_features) if isinstance(output, tuple): - loss = tf.reduce_mean(output[1]) + loss = {"extra": tf.reduce_mean(output[1])} output = output[0] else: - loss = 0.0 + loss = {"extra": 0.0} return output, loss def model_fn_body(self, features): diff --git a/tensor2tensor/utils/trainer_utils_test.py b/tensor2tensor/utils/trainer_utils_test.py index ea88183c9..562279623 100644 --- a/tensor2tensor/utils/trainer_utils_test.py +++ b/tensor2tensor/utils/trainer_utils_test.py @@ -67,7 +67,7 @@ def setUpClass(cls): def testModelsImported(self): models = registry.list_models() - self.assertTrue("baseline_lstm_seq2seq" in models) + self.assertTrue("lstm_seq2seq" in models) def testHParamsImported(self): hparams = registry.list_hparams() From 41bca6896dd4c906ce67faeb19ca6422fbd3b6c3 Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Tue, 1 Aug 2017 18:06:29 -0700 Subject: [PATCH 0209/4095] v1.1.4 PiperOrigin-RevId: 163916460 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index ae028d847..6f509d03e 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ setup( name='tensor2tensor', - version='1.1.3', + version='1.1.4', description='Tensor2Tensor', author='Google Inc.', author_email='no-reply@google.com', From 0bdfcbb57fb0f22e44d3f852889a94716009fffc Mon Sep 17 00:00:00 2001 From: T2T Team <no-reply@google.com> Date: Tue, 1 Aug 2017 18:49:03 -0700 Subject: [PATCH 0210/4095] Use get_residual_fn to get the residual_fn in the transformer. PiperOrigin-RevId: 163919630 --- README.md | 2 +- .../generator.py} | 1 - tensor2tensor/models/transformer.py | 18 ++++++++++++------ tensor2tensor/{bin/t2t-trainer => trainer.py} | 1 - 4 files changed, 13 insertions(+), 9 deletions(-) rename tensor2tensor/{bin/t2t-datagen => data_generators/generator.py} (99%) rename tensor2tensor/{bin/t2t-trainer => trainer.py} (99%) diff --git a/README.md b/README.md index bb0f6f534..5bb1c31a3 100644 --- a/README.md +++ b/README.md @@ -180,7 +180,7 @@ python -c "from tensor2tensor.models.transformer import Transformer" **Datasets** are all standardized on `TFRecord` files with `tensorflow.Example` protocol buffers. All datasets are registered and generated with the [data -generator](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/bin/t2t-datagen) +generator](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/data_generators/generator.py) and many common sequence datasets are already available for generation and use. ### Problems and Modalities diff --git a/tensor2tensor/bin/t2t-datagen b/tensor2tensor/data_generators/generator.py similarity index 99% rename from tensor2tensor/bin/t2t-datagen rename to tensor2tensor/data_generators/generator.py index 837d6d203..bc79f2384 100644 --- a/tensor2tensor/bin/t2t-datagen +++ b/tensor2tensor/data_generators/generator.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python # coding=utf-8 # Copyright 2017 The Tensor2Tensor Authors. # diff --git a/tensor2tensor/models/transformer.py b/tensor2tensor/models/transformer.py index 2320a57f1..a2b55febf 100644 --- a/tensor2tensor/models/transformer.py +++ b/tensor2tensor/models/transformer.py @@ -56,12 +56,7 @@ def model_fn_body(self, features): (decoder_input, decoder_self_attention_bias) = transformer_prepare_decoder( targets, hparams) - def residual_fn(x, y): - return common_layers.residual_fn(x, y, - hparams.norm_type, - hparams.residual_dropout, - hparams.hidden_size, - epsilon=hparams.layer_norm_epsilon) + residual_fn = get_residual_fn(hparams) encoder_input = tf.nn.dropout(encoder_input, 1.0 - hparams.residual_dropout) decoder_input = tf.nn.dropout(decoder_input, 1.0 - hparams.residual_dropout) @@ -76,6 +71,17 @@ def residual_fn(x, y): return decoder_output +def get_residual_fn(hparams): + """Get residual_fn.""" + def residual_fn(x, y): + return common_layers.residual_fn(x, y, + hparams.norm_type, + hparams.residual_dropout, + hparams.hidden_size, + epsilon=hparams.layer_norm_epsilon) + return residual_fn + + def transformer_prepare_encoder(inputs, target_space, hparams): """Prepare one shard of the model for the encoder. diff --git a/tensor2tensor/bin/t2t-trainer b/tensor2tensor/trainer.py similarity index 99% rename from tensor2tensor/bin/t2t-trainer rename to tensor2tensor/trainer.py index 13dd7d355..41c9cd33b 100644 --- a/tensor2tensor/bin/t2t-trainer +++ b/tensor2tensor/trainer.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python # coding=utf-8 # Copyright 2017 The Tensor2Tensor Authors. # From 4390618e692f790871019aadc0371efcd76a89f4 Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Wed, 2 Aug 2017 10:49:20 -0700 Subject: [PATCH 0211/4095] Add requests dependency PiperOrigin-RevId: 164005758 --- setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.py b/setup.py index 6f509d03e..fd8e77a46 100644 --- a/setup.py +++ b/setup.py @@ -20,6 +20,7 @@ ], install_requires=[ 'numpy', + 'requests', 'sympy', 'six', ], From 9394d0e3f2ecc0f7fa14d59dec17b0da3cff9a21 Mon Sep 17 00:00:00 2001 From: T2T Team <no-reply@google.com> Date: Wed, 2 Aug 2017 11:05:05 -0700 Subject: [PATCH 0212/4095] Use ModeKeys enum consistently in trainer_utils instead of string literals. PiperOrigin-RevId: 164008619 --- tensor2tensor/utils/trainer_utils.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tensor2tensor/utils/trainer_utils.py b/tensor2tensor/utils/trainer_utils.py index 33053806d..5c0240e16 100644 --- a/tensor2tensor/utils/trainer_utils.py +++ b/tensor2tensor/utils/trainer_utils.py @@ -181,8 +181,8 @@ def create_experiment(output_dir, data_dir, model_name, train_steps, eval_hooks.append(hook) return tf.contrib.learn.Experiment( estimator=estimator, - train_input_fn=input_fns["train"], - eval_input_fn=input_fns["eval"], + train_input_fn=input_fns[tf.contrib.learn.ModeKeys.TRAIN], + eval_input_fn=input_fns[tf.contrib.learn.ModeKeys.EVAL], eval_metrics=eval_metrics, train_steps=train_steps, eval_steps=eval_steps, @@ -220,7 +220,9 @@ def create_experiment_components(hparams, output_dir, data_dir, model_name): keep_checkpoint_max=FLAGS.keep_checkpoint_max)) # Store the hparams in the estimator as well estimator.hparams = hparams - return estimator, {"train": train_input_fn, "eval": eval_input_fn} + return estimator, { + tf.contrib.learn.ModeKeys.TRAIN: train_input_fn, + tf.contrib.learn.ModeKeys.EVAL: eval_input_fn} def log_registry(): From f6799b9515e0e214d2d4295f4e4cf94cf27cf333 Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Wed, 2 Aug 2017 16:45:57 -0700 Subject: [PATCH 0213/4095] File/code moves PiperOrigin-RevId: 164058229 --- README.md | 2 +- .../generator.py => bin/t2t-datagen} | 26 +- tensor2tensor/{trainer.py => bin/t2t-trainer} | 1 + tensor2tensor/data_generators/all_problems.py | 1 + tensor2tensor/data_generators/image.py | 45 +- .../data_generators/problem_hparams.py | 88 +- .../{models => layers}/common_attention.py | 131 +- .../common_attention_test.py | 32 +- .../{models => layers}/common_hparams.py | 10 +- .../{models => layers}/common_layers.py | 15 +- .../{models => layers}/common_layers_test.py | 9 +- .../{models => layers}/modalities.py | 2 +- .../{models => layers}/modalities_test.py | 2 +- tensor2tensor/models/attention_lm.py | 10 +- tensor2tensor/models/attention_lm_moe.py | 43 +- tensor2tensor/models/bluenet.py | 128 +- tensor2tensor/models/bytenet.py | 13 +- tensor2tensor/models/gene_expression.py | 4 +- tensor2tensor/models/gene_expression_test.py | 2 +- tensor2tensor/models/long_answer.py | 53 +- tensor2tensor/models/lstm.py | 48 +- tensor2tensor/models/lstm_test.py | 6 +- tensor2tensor/models/models.py | 2 +- tensor2tensor/models/multimodel.py | 99 +- tensor2tensor/models/neural_gpu.py | 5 +- tensor2tensor/models/neural_gpu_test.py | 6 +- tensor2tensor/models/shake_shake.py | 4 +- tensor2tensor/models/slicenet.py | 19 +- tensor2tensor/models/slicenet_test.py | 6 +- tensor2tensor/models/transformer.py | 37 +- .../models/transformer_alternative.py | 39 +- tensor2tensor/models/xception.py | 4 +- tensor2tensor/utils/decoding.py | 371 ++++++ tensor2tensor/utils/devices.py | 147 +++ tensor2tensor/utils/input_fn_builder.py | 200 +++ tensor2tensor/utils/metrics.py | 2 +- tensor2tensor/utils/modality.py | 2 +- tensor2tensor/utils/model_builder.py | 451 +++++++ tensor2tensor/utils/registry.py | 6 +- tensor2tensor/utils/trainer_utils.py | 1085 +---------------- tensor2tensor/utils/trainer_utils_test.py | 4 +- 41 files changed, 1648 insertions(+), 1512 deletions(-) rename tensor2tensor/{data_generators/generator.py => bin/t2t-datagen} (93%) rename tensor2tensor/{trainer.py => bin/t2t-trainer} (99%) rename tensor2tensor/{models => layers}/common_attention.py (89%) rename tensor2tensor/{models => layers}/common_attention_test.py (77%) rename tensor2tensor/{models => layers}/common_hparams.py (97%) rename tensor2tensor/{models => layers}/common_layers.py (99%) rename tensor2tensor/{models => layers}/common_layers_test.py (98%) rename tensor2tensor/{models => layers}/modalities.py (99%) rename tensor2tensor/{models => layers}/modalities_test.py (98%) create mode 100644 tensor2tensor/utils/decoding.py create mode 100644 tensor2tensor/utils/devices.py create mode 100644 tensor2tensor/utils/input_fn_builder.py create mode 100644 tensor2tensor/utils/model_builder.py diff --git a/README.md b/README.md index 5bb1c31a3..bb0f6f534 100644 --- a/README.md +++ b/README.md @@ -180,7 +180,7 @@ python -c "from tensor2tensor.models.transformer import Transformer" **Datasets** are all standardized on `TFRecord` files with `tensorflow.Example` protocol buffers. All datasets are registered and generated with the [data -generator](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/data_generators/generator.py) +generator](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/bin/t2t-datagen) and many common sequence datasets are already available for generation and use. ### Problems and Modalities diff --git a/tensor2tensor/data_generators/generator.py b/tensor2tensor/bin/t2t-datagen similarity index 93% rename from tensor2tensor/data_generators/generator.py rename to tensor2tensor/bin/t2t-datagen index bc79f2384..39453dbee 100644 --- a/tensor2tensor/data_generators/generator.py +++ b/tensor2tensor/bin/t2t-datagen @@ -1,3 +1,4 @@ +#!/usr/bin/env python # coding=utf-8 # Copyright 2017 The Tensor2Tensor Authors. # @@ -15,14 +16,15 @@ """Produces the training and dev data for --problem into --data_dir. -generator.py produces sharded and shuffled TFRecord files of tensorflow.Example -protocol buffers for a variety of datasets registered in this file. +Produces sharded and shuffled TFRecord files of tensorflow.Example protocol +buffers for a variety of registered datasets. -All datasets are registered in _SUPPORTED_PROBLEM_GENERATORS. Each entry maps a -string name (selectable on the command-line with --problem) to a function that -takes 2 arguments - input_directory and mode (one of "train" or "dev") - and -yields for each training example a dictionary mapping string feature names to -lists of {string, int, float}. The generator will be run once for each mode. +All Problems are registered with @registry.register_problem or are in +_SUPPORTED_PROBLEM_GENERATORS in this file. Each entry maps a string name +(selectable on the command-line with --problem) to a function that takes 2 +arguments - input_directory and mode (one of "train" or "dev") - and yields for +each training example a dictionary mapping string feature names to lists of +{string, int, float}. The generator will be run once for each mode. """ from __future__ import absolute_import from __future__ import division @@ -228,8 +230,7 @@ def generate_data_for_problem(problem): num_shards = FLAGS.num_shards or 10 tf.logging.info("Generating training data for %s.", problem) train_output_files = generator_utils.train_data_filenames( - problem + generator_utils.UNSHUFFLED_SUFFIX, FLAGS.data_dir, - num_shards) + problem + generator_utils.UNSHUFFLED_SUFFIX, FLAGS.data_dir, num_shards) generator_utils.generate_files(training_gen(), train_output_files, FLAGS.max_cases) tf.logging.info("Generating development data for %s.", problem) @@ -249,9 +250,10 @@ def generate_data_for_registered_problem(problem_name): raise ValueError("--num_shards should not be set for registered Problem.") problem = registry.problem(problem_name) task_id = None if FLAGS.task_id < 0 else FLAGS.task_id - problem.generate_data(os.path.expanduser(FLAGS.data_dir), - os.path.expanduser(FLAGS.tmp_dir), - task_id=task_id) + problem.generate_data( + os.path.expanduser(FLAGS.data_dir), + os.path.expanduser(FLAGS.tmp_dir), + task_id=task_id) if __name__ == "__main__": diff --git a/tensor2tensor/trainer.py b/tensor2tensor/bin/t2t-trainer similarity index 99% rename from tensor2tensor/trainer.py rename to tensor2tensor/bin/t2t-trainer index 41c9cd33b..13dd7d355 100644 --- a/tensor2tensor/trainer.py +++ b/tensor2tensor/bin/t2t-trainer @@ -1,3 +1,4 @@ +#!/usr/bin/env python # coding=utf-8 # Copyright 2017 The Tensor2Tensor Authors. # diff --git a/tensor2tensor/data_generators/all_problems.py b/tensor2tensor/data_generators/all_problems.py index 6830cf0bf..9be133a61 100644 --- a/tensor2tensor/data_generators/all_problems.py +++ b/tensor2tensor/data_generators/all_problems.py @@ -30,6 +30,7 @@ from tensor2tensor.data_generators import wmt from tensor2tensor.data_generators import wsj_parsing + # Problem modules that require optional dependencies # pylint: disable=g-import-not-at-top try: diff --git a/tensor2tensor/data_generators/image.py b/tensor2tensor/data_generators/image.py index a2e328f00..d70d9339e 100644 --- a/tensor2tensor/data_generators/image.py +++ b/tensor2tensor/data_generators/image.py @@ -36,7 +36,7 @@ from tensor2tensor.data_generators import generator_utils from tensor2tensor.data_generators import problem from tensor2tensor.data_generators import text_encoder -from tensor2tensor.models import common_layers +from tensor2tensor.layers import common_layers from tensor2tensor.utils import registry import tensorflow as tf @@ -76,10 +76,11 @@ class ImageFSNS(ImageProblem): def generate_data(self, data_dir, tmp_dir, task_id=-1): list_url = ("https://raw.githubusercontent.com/tensorflow/models/master/" "street/python/fsns_urls.txt") - fsns_urls = generator_utils.maybe_download( - tmp_dir, "fsns_urls.txt", list_url) - fsns_files = [f.strip() for f in open(fsns_urls, "r") - if f.startswith("http://")] + fsns_urls = generator_utils.maybe_download(tmp_dir, "fsns_urls.txt", + list_url) + fsns_files = [ + f.strip() for f in open(fsns_urls, "r") if f.startswith("http://") + ] for url in fsns_files: if "/train/train" in url: generator_utils.maybe_download( @@ -88,8 +89,7 @@ def generate_data(self, data_dir, tmp_dir, task_id=-1): generator_utils.maybe_download( data_dir, "image_fsns-dev" + url[-len("-00100-of-00512"):], url) elif "charset" in url: - generator_utils.maybe_download( - data_dir, "charset_size134.txt", url) + generator_utils.maybe_download(data_dir, "charset_size134.txt", url) def feature_encoders(self, data_dir): # This vocab file must be present within the data directory. @@ -111,8 +111,8 @@ def hparams(self, defaults, model_hparams): def example_reading_spec(self): label_key = "image/unpadded_label" - return super(ImageFSNS, self).example_reading_spec(self, - label_key=label_key) + return super(ImageFSNS, self).example_reading_spec( + self, label_key=label_key) class Image2ClassProblem(ImageProblem): @@ -161,6 +161,7 @@ def generate_data(self, data_dir, tmp_dir, task_id=-1): def imagenet_preprocess_examples(examples, mode): """Preprocessing used for Imagenet and similar problems.""" + def preprocess(img): img = tf.image.resize_images(img, [360, 360]) img = common_layers.image_augmentation(tf.to_float(img) / 255.) @@ -215,8 +216,8 @@ def is_small(self): def preprocess_examples(self, examples, mode): examples = imagenet_preprocess_examples(examples, mode) - examples["inputs"] = tf.to_int64(tf.image.resize_images( - examples["inputs"], [32, 32])) + examples["inputs"] = tf.to_int64( + tf.image.resize_images(examples["inputs"], [32, 32])) def image_generator(images, labels): @@ -665,12 +666,20 @@ def generator(self, data_dir, tmp_dir, is_training): vocab_filename = "vocab.endefr.%d" % self.targeted_vocab_size if is_training: return mscoco_generator( - data_dir, tmp_dir, True, 80000, - vocab_filename=vocab_filename, vocab_size=self.targeted_vocab_size) + data_dir, + tmp_dir, + True, + 80000, + vocab_filename=vocab_filename, + vocab_size=self.targeted_vocab_size) else: return mscoco_generator( - data_dir, tmp_dir, False, 40000, - vocab_filename=vocab_filename, vocab_size=self.targeted_vocab_size) + data_dir, + tmp_dir, + False, + 40000, + vocab_filename=vocab_filename, + vocab_size=self.targeted_vocab_size) @registry.register_problem @@ -690,8 +699,8 @@ def targeted_vocab_size(self): def _get_celeba(directory): """Download and extract CELEBA to directory unless it is there.""" # path = os.path.join(directory, _CELEBA_NAME) - path = generator_utils.maybe_download_from_drive(directory, - _CELEBA_NAME, _CELEBA_URL) + path = generator_utils.maybe_download_from_drive(directory, _CELEBA_NAME, + _CELEBA_URL) if not tf.gfile.Exists(path): zipfile.ZipFile(path + ".zip", "r").extractall(directory) @@ -711,7 +720,7 @@ def celeba_generator(tmp_dir, how_many, start_from=0): """ _get_celeba(tmp_dir) image_files = tf.gfile.Glob(os.path.join(tmp_dir, _CELEBA_NAME) + "/*.jpg") - for filename in image_files[start_from:start_from+how_many]: + for filename in image_files[start_from:start_from + how_many]: with tf.gfile.Open(filename, "r") as f: encoded_image_data = f.read() yield { diff --git a/tensor2tensor/data_generators/problem_hparams.py b/tensor2tensor/data_generators/problem_hparams.py index 607078d2f..d0577db52 100644 --- a/tensor2tensor/data_generators/problem_hparams.py +++ b/tensor2tensor/data_generators/problem_hparams.py @@ -25,7 +25,7 @@ # Dependency imports from tensor2tensor.data_generators import text_encoder -from tensor2tensor.models import modalities # pylint: disable=unused-import +from tensor2tensor.layers import modalities # pylint: disable=unused-import from tensor2tensor.utils import registry import tensorflow as tf @@ -202,8 +202,7 @@ def default_problem_hparams(): # the targets. For instance `problem_copy` will copy the inputs, but # `problem_rev_copy` will copy the targets. was_reversed=False, - was_copy=False, - ) + was_copy=False,) def test_problem_hparams(unused_model_hparams, input_vocab_size, @@ -327,9 +326,7 @@ def lm1b_32k(model_hparams): encoder = text_encoder.SubwordTextEncoder( os.path.join(model_hparams.data_dir, "lm1b_32k.subword_text_encoder")) p.target_modality = (registry.Modalities.SYMBOL, encoder.vocab_size) - p.vocabulary = { - "targets": encoder - } + p.vocabulary = {"targets": encoder} p.target_space_id = 3 return p @@ -343,9 +340,7 @@ def lm1b_characters(unused_model_hparams): p.input_modality = {} encoder = text_encoder.ByteTextEncoder() p.target_modality = (registry.Modalities.SYMBOL, encoder.vocab_size) - p.vocabulary = { - "targets": encoder - } + p.vocabulary = {"targets": encoder} p.target_space_id = 2 return p @@ -358,10 +353,7 @@ def wiki_32k(model_hparams): modality_spec = (registry.Modalities.SYMBOL, encoder.vocab_size) p.input_modality = {"inputs": modality_spec} p.target_modality = modality_spec - p.vocabulary = { - "inputs": encoder, - "targets": encoder - } + p.vocabulary = {"inputs": encoder, "targets": encoder} p.target_space_id = 3 return p @@ -430,9 +422,7 @@ def wmt_parsing_tokens(model_hparams, wrong_vocab_size): return p -def wsj_parsing_tokens(model_hparams, - prefix, - wrong_source_vocab_size, +def wsj_parsing_tokens(model_hparams, prefix, wrong_source_vocab_size, wrong_target_vocab_size): """English to parse tree translation benchmark. @@ -487,11 +477,9 @@ def ice_parsing_tokens(model_hparams, wrong_source_vocab_size): p = default_problem_hparams() # This vocab file must be present within the data directory. source_vocab_filename = os.path.join( - model_hparams.data_dir, - "ice_source.vocab.%d" % wrong_source_vocab_size) - target_vocab_filename = os.path.join( - model_hparams.data_dir, - "ice_target.vocab.256") + model_hparams.data_dir, "ice_source.vocab.%d" % wrong_source_vocab_size) + target_vocab_filename = os.path.join(model_hparams.data_dir, + "ice_target.vocab.256") source_subtokenizer = text_encoder.SubwordTextEncoder(source_vocab_filename) target_subtokenizer = text_encoder.SubwordTextEncoder(target_vocab_filename) p.input_modality = { @@ -502,7 +490,7 @@ def ice_parsing_tokens(model_hparams, wrong_source_vocab_size): "inputs": source_subtokenizer, "targets": target_subtokenizer, } - p.input_space_id = 18 # Icelandic tokens + p.input_space_id = 18 # Icelandic tokens p.target_space_id = 19 # Icelandic parse tokens return p @@ -534,23 +522,41 @@ def image_celeba(unused_model_hparams): # Dictionary of named hyperparameter settings for various problems. # This is only accessed through the problem_hparams function below. PROBLEM_HPARAMS_MAP = { - "audio_timit_characters_tune": audio_timit_characters, - "audio_timit_characters_test": audio_timit_characters, - "audio_timit_tokens_8k_tune": lambda p: audio_timit_tokens(p, 2**13), - "audio_timit_tokens_8k_test": lambda p: audio_timit_tokens(p, 2**13), - "audio_wsj_characters_tune": audio_wsj_characters, - "audio_wsj_characters_test": audio_wsj_characters, - "audio_wsj_tokens_8k_tune": lambda p: audio_wsj_tokens(p, 2**13), - "audio_wsj_tokens_8k_test": lambda p: audio_wsj_tokens(p, 2**13), - "lm1b_characters": lm1b_characters, - "lm1b_32k": lm1b_32k, - "wiki_32k": wiki_32k, - "ice_parsing_characters": wmt_parsing_characters, - "ice_parsing_tokens": lambda p: ice_parsing_tokens(p, 2**13), - "wmt_parsing_tokens_8k": lambda p: wmt_parsing_tokens(p, 2**13), - "wsj_parsing_tokens_16k": lambda p: wsj_parsing_tokens( # pylint: disable=g-long-lambda - p, "wsj", 2**14, 2**9), - "wmt_ende_bpe32k": wmt_ende_bpe32k, - "image_celeba_tune": image_celeba, - "img2img_imagenet": img2img_imagenet, + "audio_timit_characters_tune": + audio_timit_characters, + "audio_timit_characters_test": + audio_timit_characters, + "audio_timit_tokens_8k_tune": + lambda p: audio_timit_tokens(p, 2**13), + "audio_timit_tokens_8k_test": + lambda p: audio_timit_tokens(p, 2**13), + "audio_wsj_characters_tune": + audio_wsj_characters, + "audio_wsj_characters_test": + audio_wsj_characters, + "audio_wsj_tokens_8k_tune": + lambda p: audio_wsj_tokens(p, 2**13), + "audio_wsj_tokens_8k_test": + lambda p: audio_wsj_tokens(p, 2**13), + "lm1b_characters": + lm1b_characters, + "lm1b_32k": + lm1b_32k, + "wiki_32k": + wiki_32k, + "ice_parsing_characters": + wmt_parsing_characters, + "ice_parsing_tokens": + lambda p: ice_parsing_tokens(p, 2**13), + "wmt_parsing_tokens_8k": + lambda p: wmt_parsing_tokens(p, 2**13), + "wsj_parsing_tokens_16k": + lambda p: wsj_parsing_tokens( # pylint: disable=g-long-lambda + p, "wsj", 2**14, 2**9), + "wmt_ende_bpe32k": + wmt_ende_bpe32k, + "image_celeba_tune": + image_celeba, + "img2img_imagenet": + img2img_imagenet, } diff --git a/tensor2tensor/models/common_attention.py b/tensor2tensor/layers/common_attention.py similarity index 89% rename from tensor2tensor/models/common_attention.py rename to tensor2tensor/layers/common_attention.py index b52fb8aea..e343dba0a 100644 --- a/tensor2tensor/models/common_attention.py +++ b/tensor2tensor/layers/common_attention.py @@ -22,7 +22,7 @@ # Dependency imports -from tensor2tensor.models import common_layers +from tensor2tensor.layers import common_layers import tensorflow as tf @@ -157,9 +157,10 @@ def add_positional_embedding_nd(x, max_length, name): shape[i + 1] = max_length size[i + 1] = dynamic_shape[i + 1] var = (tf.get_variable( - name + "_%d" % i, shape, - initializer=tf.random_normal_initializer(0, depth ** -0.5)) - * (depth ** 0.5)) + name + "_%d" % i, + shape, + initializer=tf.random_normal_initializer(0, depth**-0.5)) * + (depth**0.5)) x += tf.slice(var, start, size) return x @@ -314,11 +315,13 @@ def attention_image_summary(attn, image_shapes=None): assert len(image_shapes) == 6 q_rows, q_cols, q_channnels, m_rows, m_cols, m_channels = list( image_shapes) - image = tf.reshape(image, [-1, q_rows, q_cols, q_channnels, - m_rows, m_cols, m_channels, 3]) + image = tf.reshape(image, [ + -1, q_rows, q_cols, q_channnels, m_rows, m_cols, m_channels, 3 + ]) image = tf.transpose(image, [0, 1, 4, 3, 2, 5, 6, 7]) - image = tf.reshape(image, [-1, q_rows * m_rows * q_channnels, - q_cols * m_cols * m_channels, 3]) + image = tf.reshape(image, [ + -1, q_rows * m_rows * q_channnels, q_cols * m_cols * m_channels, 3 + ]) tf.summary.image("attention", image, max_outputs=1) @@ -358,9 +361,13 @@ def dot_product_attention(q, return tf.matmul(weights, v) -def masked_local_attention_1d(q, k, v, - block_length=128, look_right=True, - use_whole_block=False, name=None): +def masked_local_attention_1d(q, + k, + v, + block_length=128, + look_right=True, + use_whole_block=False, + name=None): """Attention to the source position and a neigborhood around it. The sequence is divided into blocks of length block_size. Attention for a @@ -390,8 +397,8 @@ def masked_local_attention_1d(q, k, v, Returns: a Tensor of shape [batch, heads, length, depth_v] """ - with tf.variable_scope(name, default_name="local_attention_1d", - values=[q, k, v]): + with tf.variable_scope( + name, default_name="local_attention_1d", values=[q, k, v]): v_shape = v.get_shape() batch = tf.shape(q)[0] heads = tf.shape(q)[1] @@ -401,8 +408,7 @@ def masked_local_attention_1d(q, k, v, original_length = length # If (length < block_length), then we use only one block. - block_length = tf.where(tf.less(length, block_length), - length, block_length) + block_length = tf.where(tf.less(length, block_length), length, block_length) # Pad to desired length. padding_size = tf.mod(-length, block_length) length += padding_size @@ -417,24 +423,23 @@ def masked_local_attention_1d(q, k, v, # We shift everything over by half a block so query is in center. pad_right = block_length // 2 pad_left = block_length - pad_right - extra_padding = [[0, 0], [0, 0], - [pad_left, padding_size+pad_right], [0, 0]] + extra_padding = [[0, 0], [0, 0], [pad_left, padding_size + pad_right], + [0, 0]] k = tf.pad(k, extra_padding) v = tf.pad(v, extra_padding) # Reshape into blocks. q = tf.reshape(q, [batch, heads, num_blocks, block_length, depth_k]) - k = tf.reshape(k, [batch, heads, num_blocks+1, block_length, depth_k]) - v = tf.reshape(v, [batch, heads, num_blocks+1, block_length, depth_v]) + k = tf.reshape(k, [batch, heads, num_blocks + 1, block_length, depth_k]) + v = tf.reshape(v, [batch, heads, num_blocks + 1, block_length, depth_v]) # Get local blocks by slicing. def local(x): """Create a local version of the keys or values.""" - prev_block = tf.slice( - x, [0, 0, 0, 0, 0], [-1, -1, num_blocks, -1, -1]) - cur_block = tf.slice( - x, [0, 0, 1, 0, 0], [-1, -1, -1, -1, -1]) + prev_block = tf.slice(x, [0, 0, 0, 0, 0], [-1, -1, num_blocks, -1, -1]) + cur_block = tf.slice(x, [0, 0, 1, 0, 0], [-1, -1, -1, -1, -1]) return tf.concat([prev_block, cur_block], 3) + local_k = local(k) local_v = local(v) local_length = tf.shape(local_k)[3] @@ -466,7 +471,11 @@ def local(x): return output -def unmasked_local_attention_1d(q, k, v, block_length=128, filter_width=100, +def unmasked_local_attention_1d(q, + k, + v, + block_length=128, + filter_width=100, name=None): """strided block local self-attention. @@ -481,19 +490,22 @@ def unmasked_local_attention_1d(q, k, v, block_length=128, filter_width=100, Returns: a Tensor of shape [batch, heads, length, depth_v] """ - with tf.variable_scope(name, default_name="local_self_attention_1d", - values=[q, k, v]): + with tf.variable_scope( + name, default_name="local_self_attention_1d", values=[q, k, v]): v_shape = v.get_shape() depth_v = tf.shape(v)[3] batch_size = tf.shape(q)[0] num_heads = tf.shape(q)[1] original_length = tf.shape(q)[2] + # making sure q is a multiple of d def pad_to_multiple(x, pad_length): x_length = tf.shape(x)[2] return tf.pad(x, [[0, 0], [0, 0], [0, -x_length % pad_length], [0, 0]]) + def pad_l_and_r(x, pad_length): return tf.pad(x, [[0, 0], [0, 0], [pad_length, pad_length], [0, 0]]) + q = pad_to_multiple(q, block_length) k = pad_to_multiple(k, block_length) v = pad_to_multiple(v, block_length) @@ -501,16 +513,17 @@ def pad_l_and_r(x, pad_length): # Setting up q blocks new_q_shape = tf.shape(q) # Setting up q blocks - q = tf.reshape(q, [new_q_shape[0], new_q_shape[1], - new_q_shape[2]//block_length, - block_length, new_q_shape[3]]) + q = tf.reshape(q, [ + new_q_shape[0], new_q_shape[1], new_q_shape[2] // block_length, + block_length, new_q_shape[3] + ]) # Setting up k and v values k = pad_l_and_r(k, filter_width) v = pad_l_and_r(v, filter_width) length = tf.shape(k)[2] - full_filter_width = block_length + 2*filter_width + full_filter_width = block_length + 2 * filter_width # getting gather indices indices = tf.range(0, length, delta=1, name="index_range") # making indices [1, length, 1] to appy convs @@ -541,7 +554,7 @@ def pad_l_and_r(x, pad_length): logits = tf.matmul(q, k_new, transpose_b=True) - attention = tf.nn.softmax(logits+attention_bias) + attention = tf.nn.softmax(logits + attention_bias) output = tf.matmul(attention, v_new) output = tf.reshape(output, [batch_size, num_heads, -1, depth_v]) @@ -626,14 +639,13 @@ def multihead_attention(query_antecedent, key_depth_per_head = total_key_depth // num_heads q *= key_depth_per_head**-0.5 if attention_type == "dot_product": - x = dot_product_attention( - q, k, v, bias, dropout_rate, image_shapes) + x = dot_product_attention(q, k, v, bias, dropout_rate, image_shapes) elif attention_type == "local_mask_right": x = masked_local_attention_1d(q, k, v, block_length=block_length) else: assert attention_type == "local_unmasked" - x = unmasked_local_attention_1d(q, k, v, block_length=block_length, - filter_width=block_width) + x = unmasked_local_attention_1d( + q, k, v, block_length=block_length, filter_width=block_width) x = combine_heads(x) x = common_layers.conv1d(x, output_depth, 1, name="output_transform") return x @@ -669,29 +681,22 @@ def ffn_self_attention_layer(x, A Tensor. """ - with tf.variable_scope(name, default_name="feedforward_self_attention", - values=[x]): + with tf.variable_scope( + name, default_name="feedforward_self_attention", values=[x]): x_shape = tf.shape(x) part_depth = filter_depth // num_parts if not share_kv: combined = common_layers.conv1d( - x, - filter_depth * 3, - 1, - name="qkv_transform") + x, filter_depth * 3, 1, name="qkv_transform") combined = tf.expand_dims(combined, axis=2) q, k, v = tf.split(combined, 3, axis=3) else: - q = tf.expand_dims(common_layers.conv1d( - x, - filter_depth, - 1, - name="q_transform"), axis=2) - kv_combined = tf.expand_dims(common_layers.conv1d( - tf.concat([x, x], axis=1), - filter_depth, - 1, - name="kv_transform"), axis=2) + q = tf.expand_dims( + common_layers.conv1d(x, filter_depth, 1, name="q_transform"), axis=2) + kv_combined = tf.expand_dims( + common_layers.conv1d( + tf.concat([x, x], axis=1), filter_depth, 1, name="kv_transform"), + axis=2) k, v = tf.split(kv_combined, [x_shape[1], x_shape[1]], axis=1) batch_q = tf.reshape(q, [-1, 1, num_parts, part_depth]) @@ -701,8 +706,7 @@ def ffn_self_attention_layer(x, batch_q *= part_depth**-0.5 # non-masked bias bias = None - x = dot_product_attention( - batch_q, batch_k, batch_v, bias, dropout_rate) + x = dot_product_attention(batch_q, batch_k, batch_v, bias, dropout_rate) x = tf.reshape(x, [x_shape[0], x_shape[1], filter_depth]) x = common_layers.conv1d(x, output_depth, 1, name="output_transform") return x @@ -738,20 +742,21 @@ def parameter_attention(x, Returns: A Tensor. """ - with tf.variable_scope(name, default_name="parameter_attention", - values=[x]): + with tf.variable_scope(name, default_name="parameter_attention", values=[x]): head_size_k = total_key_depth // num_heads head_size_v = total_value_depth // num_heads var_shape_k = [num_heads, memory_rows, head_size_k] var_shape_v = [num_heads, memory_rows, head_size_v] k = tf.get_variable( - "k", var_shape_k, - initializer=tf.random_normal_initializer( - 0, output_depth ** -0.5)) * (num_heads ** 0.5) + "k", + var_shape_k, + initializer=tf.random_normal_initializer(0, output_depth**-0.5)) * ( + num_heads**0.5) v = tf.get_variable( - "v", var_shape_v, - initializer=tf.random_normal_initializer( - 0, output_depth ** -0.5)) * (output_depth ** 0.5) + "v", + var_shape_v, + initializer=tf.random_normal_initializer(0, output_depth**-0.5)) * ( + output_depth**0.5) batch_size = tf.shape(x)[0] length = tf.shape(x)[1] q = common_layers.conv1d(x, total_key_depth, 1, name="q_transform") @@ -759,8 +764,8 @@ def parameter_attention(x, # This is a cheaper form of attention dropout where we use to use # the same dropout decisions across batch elemets and query positions, # but different decisions across heads and memory positions. - v = tf.nn.dropout(v, 1.0 - dropout_rate, - noise_shape=[num_heads, memory_rows, 1]) + v = tf.nn.dropout( + v, 1.0 - dropout_rate, noise_shape=[num_heads, memory_rows, 1]) # query is [batch, length, hidden_size] # reshape and transpose it to [heads, batch * length, head_size] q = tf.reshape(q, [batch_size, length, num_heads, head_size_k]) diff --git a/tensor2tensor/models/common_attention_test.py b/tensor2tensor/layers/common_attention_test.py similarity index 77% rename from tensor2tensor/models/common_attention_test.py rename to tensor2tensor/layers/common_attention_test.py index a09da74e1..61855b876 100644 --- a/tensor2tensor/models/common_attention_test.py +++ b/tensor2tensor/layers/common_attention_test.py @@ -22,7 +22,7 @@ # Dependency imports import numpy as np -from tensor2tensor.models import common_attention +from tensor2tensor.layers import common_attention import tensorflow as tf @@ -42,22 +42,14 @@ def testDotProductAttention(self): self.assertEqual(res.shape, (5, 7, 12, 32)) def testMaskedLocalAttention(self): - q = np.array([[[[1.0, 0.0, 0.0, 0.0], - [1.0, 0.0, 0.0, 0.0], - [1.0, 0.0, 0.0, 0.0], - [1.0, 0.0, 0.0, 0.0], - [1.0, 0.0, 0.0, 0.0], - [1.0, 0.0, 0.0, 0.0], - [1.0, 0.0, 0.0, 0.0], - [1.0, 0.0, 0.0, 0.0]]]]) - k = np.array([[[[1.0, 0.0, 0.0, 0.0], - [1.0, 0.0, 0.0, 0.0], - [1.0, 0.0, 0.0, 0.0], - [1.0, 0.0, 0.0, 0.0], - [1.0, 0.0, 0.0, 0.0], - [1.0, 0.0, 0.0, 0.0], - [1.0, 0.0, 0.0, 0.0], - [1.0, 0.0, 0.0, 0.0]]]]) + q = np.array([[[[1.0, 0.0, 0.0, 0.0], [1.0, 0.0, 0.0, 0.0], [ + 1.0, 0.0, 0.0, 0.0 + ], [1.0, 0.0, 0.0, 0.0], [1.0, 0.0, 0.0, 0.0], [1.0, 0.0, 0.0, 0.0], + [1.0, 0.0, 0.0, 0.0], [1.0, 0.0, 0.0, 0.0]]]]) + k = np.array([[[[1.0, 0.0, 0.0, 0.0], [1.0, 0.0, 0.0, 0.0], [ + 1.0, 0.0, 0.0, 0.0 + ], [1.0, 0.0, 0.0, 0.0], [1.0, 0.0, 0.0, 0.0], [1.0, 0.0, 0.0, 0.0], + [1.0, 0.0, 0.0, 0.0], [1.0, 0.0, 0.0, 0.0]]]]) v = np.ones((1, 1, 8, 1)) with self.test_session() as session: q_ = tf.constant(q, dtype=tf.float32) @@ -77,7 +69,8 @@ def testLocalUnmaskedAttention(self): tf.constant(x, dtype=tf.float32), tf.constant(y, dtype=tf.float32), tf.constant(y, dtype=tf.float32), - block_length=4, filter_width=3) + block_length=4, + filter_width=3) session.run(tf.global_variables_initializer()) res = session.run(a) self.assertEqual(res.shape, (5, 4, 25, 16)) @@ -90,7 +83,8 @@ def testLocalUnmaskedAttentionMatchingBlockLength(self): tf.constant(x, dtype=tf.float32), tf.constant(y, dtype=tf.float32), tf.constant(y, dtype=tf.float32), - block_length=5, filter_width=3) + block_length=5, + filter_width=3) session.run(tf.global_variables_initializer()) res = session.run(a) self.assertEqual(res.shape, (5, 4, 25, 16)) diff --git a/tensor2tensor/models/common_hparams.py b/tensor2tensor/layers/common_hparams.py similarity index 97% rename from tensor2tensor/models/common_hparams.py rename to tensor2tensor/layers/common_hparams.py index 353586393..6ecb06fb4 100644 --- a/tensor2tensor/models/common_hparams.py +++ b/tensor2tensor/layers/common_hparams.py @@ -220,10 +220,6 @@ def basic_range1(ranged_hparams): rhp.set_float("optimizer_adam_epsilon", 1e-7, 1e-2, scale=rhp.LOG_SCALE) rhp.set_float("optimizer_adam_beta1", 0.8, 0.9) rhp.set_float("optimizer_adam_beta2", 0.995, 0.999) - rhp.set_categorical("optimizer", - ["Adam", - "Adagrad", - "Momentum", - "RMSProp", - "SGD", - "YellowFin"]) + rhp.set_categorical("optimizer", [ + "Adam", "Adagrad", "Momentum", "RMSProp", "SGD", "YellowFin" + ]) diff --git a/tensor2tensor/models/common_layers.py b/tensor2tensor/layers/common_layers.py similarity index 99% rename from tensor2tensor/models/common_layers.py rename to tensor2tensor/layers/common_layers.py index 5449a8bef..8a58cd065 100644 --- a/tensor2tensor/models/common_layers.py +++ b/tensor2tensor/layers/common_layers.py @@ -475,7 +475,8 @@ def residual_fn(x, residual_dropout, filters=None, epsilon=1e-16, - name=None, reuse=None): + name=None, + reuse=None): """Returns a function for combining layer input and layer output. The returned function on x (layer input) and y (layer output) computes: @@ -494,8 +495,8 @@ def residual_fn(x, Returns: residual layer output with applied norm_fn. """ - with tf.variable_scope(name, default_name="residual", - values=[x, y], reuse=reuse): + with tf.variable_scope( + name, default_name="residual", values=[x, y], reuse=reuse): norm_fn = get_norm(norm_type) res = x + tf.nn.dropout(y, 1.0 - residual_dropout) if norm_type == "layer": @@ -1517,8 +1518,8 @@ def linear_set_layer(layer_size, output: A tensor of dimensions batch_size x sequence_length x output_dims dimension containing the sequences of transformed vectors. """ - with tf.variable_scope(name, default_name="linear_set_layer", - values=[inputs]): + with tf.variable_scope( + name, default_name="linear_set_layer", values=[inputs]): # Apply 1D convolution to apply linear filter to each element # along the 2nd dimension. outputs = conv1d(inputs, layer_size, 1, activation=None, name="set_conv") @@ -1529,8 +1530,8 @@ def linear_set_layer(layer_size, # simply add the transformed context to get the same effect. if len(context.get_shape().as_list()) == 2: context = tf.expand_dims(context, axis=1) - cont_tfm = conv1d(context, layer_size, 1, - activation=None, name="cont_conv") + cont_tfm = conv1d( + context, layer_size, 1, activation=None, name="cont_conv") outputs += cont_tfm if activation_fn is not None: diff --git a/tensor2tensor/models/common_layers_test.py b/tensor2tensor/layers/common_layers_test.py similarity index 98% rename from tensor2tensor/models/common_layers_test.py rename to tensor2tensor/layers/common_layers_test.py index 8e724587b..df3ccc68f 100644 --- a/tensor2tensor/models/common_layers_test.py +++ b/tensor2tensor/layers/common_layers_test.py @@ -22,7 +22,7 @@ # Dependency imports import numpy as np -from tensor2tensor.models import common_layers +from tensor2tensor.layers import common_layers import tensorflow as tf @@ -351,8 +351,7 @@ def testResidualFn(self): x2 = np.random.rand(5, 2, 1, 11) x3 = common_layers.residual_fn( tf.constant(x1, dtype=tf.float32), - tf.constant(x2, dtype=tf.float32), - norm_type, 0.1) + tf.constant(x2, dtype=tf.float32), norm_type, 0.1) session.run(tf.global_variables_initializer()) actual = session.run(x3) self.assertEqual(actual.shape, (5, 2, 1, 11)) @@ -365,7 +364,9 @@ def testResidualFnWithLayerNorm(self): x3 = common_layers.residual_fn( tf.constant(x1, dtype=tf.float32), tf.constant(x2, dtype=tf.float32), - norm_type, 0.1, epsilon=0.1) + norm_type, + 0.1, + epsilon=0.1) session.run(tf.global_variables_initializer()) actual = session.run(x3) self.assertEqual(actual.shape, (5, 2, 1, 11)) diff --git a/tensor2tensor/models/modalities.py b/tensor2tensor/layers/modalities.py similarity index 99% rename from tensor2tensor/models/modalities.py rename to tensor2tensor/layers/modalities.py index 912c54f8c..523c52fa8 100644 --- a/tensor2tensor/models/modalities.py +++ b/tensor2tensor/layers/modalities.py @@ -22,7 +22,7 @@ from six.moves import xrange # pylint: disable=redefined-builtin -from tensor2tensor.models import common_layers +from tensor2tensor.layers import common_layers from tensor2tensor.utils import expert_utils as eu from tensor2tensor.utils import modality from tensor2tensor.utils import registry diff --git a/tensor2tensor/models/modalities_test.py b/tensor2tensor/layers/modalities_test.py similarity index 98% rename from tensor2tensor/models/modalities_test.py rename to tensor2tensor/layers/modalities_test.py index 9130613b9..0ccd13777 100644 --- a/tensor2tensor/models/modalities_test.py +++ b/tensor2tensor/layers/modalities_test.py @@ -22,7 +22,7 @@ import numpy as np -from tensor2tensor.models import modalities +from tensor2tensor.layers import modalities from tensor2tensor.utils import expert_utils import tensorflow as tf diff --git a/tensor2tensor/models/attention_lm.py b/tensor2tensor/models/attention_lm.py index 3b874555f..664bc9e21 100644 --- a/tensor2tensor/models/attention_lm.py +++ b/tensor2tensor/models/attention_lm.py @@ -29,9 +29,9 @@ from six.moves import xrange # pylint: disable=redefined-builtin -from tensor2tensor.models import common_attention -from tensor2tensor.models import common_hparams -from tensor2tensor.models import common_layers +from tensor2tensor.layers import common_attention +from tensor2tensor.layers import common_hparams +from tensor2tensor.layers import common_layers from tensor2tensor.utils import registry from tensor2tensor.utils import t2t_model @@ -56,8 +56,8 @@ def residual_fn(x, y): y, 1.0 - hparams.residual_dropout)) decoder_input = tf.nn.dropout(decoder_input, 1.0 - hparams.residual_dropout) - decoder_output = attention_lm_decoder( - decoder_input, residual_fn, decoder_self_attention_bias, hparams) + decoder_output = attention_lm_decoder(decoder_input, residual_fn, + decoder_self_attention_bias, hparams) decoder_output = tf.expand_dims(decoder_output, 2) return decoder_output diff --git a/tensor2tensor/models/attention_lm_moe.py b/tensor2tensor/models/attention_lm_moe.py index 4b37050bb..780478fec 100644 --- a/tensor2tensor/models/attention_lm_moe.py +++ b/tensor2tensor/models/attention_lm_moe.py @@ -29,9 +29,9 @@ from six.moves import xrange # pylint: disable=redefined-builtin -from tensor2tensor.models import common_attention -from tensor2tensor.models import common_hparams -from tensor2tensor.models import common_layers +from tensor2tensor.layers import common_attention +from tensor2tensor.layers import common_hparams +from tensor2tensor.layers import common_layers from tensor2tensor.utils import registry from tensor2tensor.utils import t2t_model @@ -61,32 +61,33 @@ def residual_fn(x, y): for layer in xrange(hparams.num_hidden_layers): with tf.variable_scope("layer_%d" % layer): with tf.variable_scope("attention"): - y = dp(common_attention.multihead_attention, - x, - None, - decoder_self_attention_bias, - hparams.attention_key_channels or hparams.hidden_size, - hparams.attention_value_channels or hparams.hidden_size, - hparams.hidden_size, - hparams.num_heads, - hparams.attention_dropout, - name="decoder_self_attention") + y = dp( + common_attention.multihead_attention, + x, + None, + decoder_self_attention_bias, + hparams.attention_key_channels or hparams.hidden_size, + hparams.attention_value_channels or hparams.hidden_size, + hparams.hidden_size, + hparams.num_heads, + hparams.attention_dropout, + name="decoder_self_attention") x = dp(residual_fn, x, y) with tf.variable_scope("ffn"): if str(layer) in hparams.moe_layers.split(","): y, loss = common_layers.moe_layer( dp, self._ps_devices, x, hparams.mode == tf.contrib.learn.ModeKeys.TRAIN, - hparams.hidden_size, - hparams.moe_hidden_size, hparams.moe_n1, hparams.moe_n2, - hparams.moe_loss_coef) + hparams.hidden_size, hparams.moe_hidden_size, hparams.moe_n1, + hparams.moe_n2, hparams.moe_loss_coef) extra_loss += loss else: - y = dp(common_layers.conv_hidden_relu, - x, - hparams.filter_size, - hparams.hidden_size, - dropout=hparams.relu_dropout) + y = dp( + common_layers.conv_hidden_relu, + x, + hparams.filter_size, + hparams.hidden_size, + dropout=hparams.relu_dropout) x = dp(residual_fn, x, y) decoder_output = dp(tf.expand_dims, x, 2) return decoder_output, extra_loss diff --git a/tensor2tensor/models/bluenet.py b/tensor2tensor/models/bluenet.py index 87ad70e41..96cb60615 100644 --- a/tensor2tensor/models/bluenet.py +++ b/tensor2tensor/models/bluenet.py @@ -27,14 +27,13 @@ from six.moves import xrange # pylint: disable=redefined-builtin -from tensor2tensor.models import common_hparams -from tensor2tensor.models import common_layers +from tensor2tensor.layers import common_hparams +from tensor2tensor.layers import common_layers from tensor2tensor.utils import registry from tensor2tensor.utils import t2t_model import tensorflow as tf - # var: 1d tensor, raw weights for each choice # tempered_var: raw weights with temperature applied # inv_t: inverse of the temperature to use when normalizing `var` @@ -86,7 +85,7 @@ def create_selection_weights(name, assert len(shape) == 1 # TODO(rshin): Change this to select without replacement? selection = tf.multinomial(tf.expand_dims(var, axis=0), 4) - selection = tf.squeeze(selection, axis=0) # [k] selected classes. + selection = tf.squeeze(selection, axis=0) # [k] selected classes. to_run = tf.one_hot(selection, shape[0]) # [k x nmodules] one-hot. # [nmodules], 0=not run, 1=run. to_run = tf.minimum(tf.reduce_sum(to_run, axis=0), 1) @@ -101,16 +100,12 @@ def create_selection_weights(name, if names is not None: tf.get_collection_ref("selection_weight_names/" + var.name).extend( - names.flatten() - if isinstance(names, np.ndarray) else names) + names.flatten() if isinstance(names, np.ndarray) else names) tf.add_to_collection("selection_weight_names_tensor/" + var.name, tf.constant(names)) return SelectionWeights( - var=var, - tempered_var=tempered_var, - inv_t=inv_t, - normalized=weights) + var=var, tempered_var=tempered_var, inv_t=inv_t, normalized=weights) def kernel_premultiplier(max_kernel_size, kernel_sizes, input_channels, @@ -155,18 +150,13 @@ def kernel_premultiplier(max_kernel_size, kernel_sizes, input_channels, channel_weights.append(channel_weight) channel_weight = tf.add_n(channel_weights) - multiplier = (tf.reshape(kernel_weight, max_kernel_size + (1, 1)) * - tf.reshape(channel_weight, (1, 1, -1, 1))) + multiplier = (tf.reshape(kernel_weight, max_kernel_size + + (1, 1)) * tf.reshape(channel_weight, (1, 1, -1, 1))) return multiplier -def make_subseparable_kernel( - kernel_size, - input_channels, - filters, - separability, - kernel_initializer, - kernel_regularizer): +def make_subseparable_kernel(kernel_size, input_channels, filters, separability, + kernel_initializer, kernel_regularizer): """Make a kernel to do subseparable convolution wiht `tf.nn.conv2d`. Args: @@ -198,16 +188,14 @@ def make_subseparable_kernel( regularizer=kernel_regularizer) pointwise_kernel = tf.get_variable( - "pointwise_kernel", - (input_channels, filters), + "pointwise_kernel", (input_channels, filters), initializer=kernel_initializer, regularizer=kernel_regularizer) expanded_depthwise_kernel = tf.transpose( tf.scatter_nd( indices=tf.tile( - tf.expand_dims( - tf.range(0, input_channels), axis=1), [1, 2]), + tf.expand_dims(tf.range(0, input_channels), axis=1), [1, 2]), updates=tf.transpose(depthwise_kernel, (2, 0, 1)), shape=(input_channels, input_channels) + kernel_size), (2, 3, 0, 1)) @@ -230,21 +218,20 @@ def make_subseparable_kernel( raise NotImplementedError -def multi_subseparable_conv( - inputs, - filters, - kernel_sizes, - input_channels, - separabilities, - kernel_selection_weights=None, - channel_selection_weights=None, - separability_selection_weights=None, - kernel_selection_weights_params=None, - channel_selection_weights_params=None, - separability_selection_weights_params=None, - kernel_initializer=None, - kernel_regularizer=None, - scope=None): +def multi_subseparable_conv(inputs, + filters, + kernel_sizes, + input_channels, + separabilities, + kernel_selection_weights=None, + channel_selection_weights=None, + separability_selection_weights=None, + kernel_selection_weights_params=None, + channel_selection_weights_params=None, + separability_selection_weights_params=None, + kernel_initializer=None, + kernel_regularizer=None, + scope=None): """Simultaneously compute different kinds of convolutions on subsets of input. Args: @@ -299,44 +286,33 @@ def multi_subseparable_conv( kernel_selection_weights = create_selection_weights( "kernels", "softmax", (len(kernel_sizes),), - names=[ - "kernel_h{}_w{}".format(h, w) for h, w in kernel_sizes - ], + names=["kernel_h{}_w{}".format(h, w) for h, w in kernel_sizes], **kernel_selection_weights_params) if channel_selection_weights is None: channel_selection_weights = create_selection_weights( "channels", "softmax", (len(input_channels),), - names=[ - "channels_{}_{}".format(c1, c2) for c1, c2 in input_channels - ], + names=["channels_{}_{}".format(c1, c2) for c1, c2 in input_channels], **channel_selection_weights_params) if separability_selection_weights is None: separability_selection_weights = create_selection_weights( "separability", "softmax", (len(separabilities),), - names=[ - "separability_{}".format(s) for s in separabilities - ], + names=["separability_{}".format(s) for s in separabilities], **separability_selection_weights_params) kernels = [] for separability in separabilities: with tf.variable_scope("separablity_{}".format(separability)): - kernel = make_subseparable_kernel( - max_kernel_size, - max_num_channels, - filters, - separability, - kernel_initializer, - kernel_regularizer) + kernel = make_subseparable_kernel(max_kernel_size, max_num_channels, + filters, separability, + kernel_initializer, kernel_regularizer) premultiplier = kernel_premultiplier( max_kernel_size, kernel_sizes, input_channels, - kernel_selection_weights, - channel_selection_weights) + kernel_selection_weights, channel_selection_weights) kernels.append(kernel * premultiplier) @@ -358,18 +334,24 @@ def multi_subseparable_conv( def conv_module(kw, kh, sep, div): + def convfn(x, hparams): return common_layers.subseparable_conv( - x, hparams.hidden_size // div, (kw, kh), - padding="SAME", separability=sep, + x, + hparams.hidden_size // div, (kw, kh), + padding="SAME", + separability=sep, name="conv_%d%d_sep%d_div%d" % (kw, kh, sep, div)) + return convfn def multi_conv_module(kernel_sizes, seps): + def convfn(x, hparams): return multi_subseparable_conv(x, hparams.hidden_size, kernel_sizes, [(0, hparams.hidden_size)], seps) + return convfn @@ -438,15 +420,16 @@ def run_unary_modules_basic(modules, cur, hparams): def run_unary_modules_sample(modules, cur, hparams, k): """Run modules, sampling k.""" selection_weights = create_selection_weights( - "selection", - ("softmax_topk", k), + "selection", ("softmax_topk", k), shape=[len(modules)], inv_t=100.0 * common_layers.inverse_exp_decay( hparams.anneal_until, min_value=0.01)) - all_res = [tf.cond(tf.less(selection_weights.normalized[n], 1e-6), - lambda: tf.zeros_like(cur), - lambda i=n: modules[i](cur, hparams)) - for n in xrange(len(modules))] + all_res = [ + tf.cond( + tf.less(selection_weights.normalized[n], 1e-6), + lambda: tf.zeros_like(cur), + lambda i=n: modules[i](cur, hparams)) for n in xrange(len(modules)) + ] all_res = tf.concat([tf.expand_dims(r, axis=0) for r in all_res], axis=0) res = all_res * tf.reshape(selection_weights.normalized, [-1, 1, 1, 1, 1]) return tf.reduce_sum(res, axis=0) @@ -461,8 +444,7 @@ def run_unary_modules(modules, cur, hparams): def batch_deviation(x): """Average deviation of the batch.""" x_mean = tf.reduce_mean(x, axis=[0], keep_dims=True) - x_variance = tf.reduce_mean( - tf.square(x - x_mean), axis=[0], keep_dims=True) + x_variance = tf.reduce_mean(tf.square(x - x_mean), axis=[0], keep_dims=True) return tf.reduce_mean(tf.sqrt(x_variance)) @@ -475,13 +457,15 @@ def model_fn_body(self, features): multi_conv = multi_conv_module( kernel_sizes=[(3, 3), (5, 5), (7, 7)], seps=[0, 1]) conv_modules = [multi_conv, identity_module] - activation_modules = [identity_module, - lambda x, _: tf.nn.relu(x), - lambda x, _: tf.nn.elu(x), - lambda x, _: tf.tanh(x)] + activation_modules = [ + identity_module, lambda x, _: tf.nn.relu(x), lambda x, _: tf.nn.elu(x), + lambda x, _: tf.tanh(x) + ] norm_modules = [identity_module, layernorm_module, noamnorm_module] - binary_modules = [first_binary_module, second_binary_module, - sum_binary_module, shakeshake_binary_module] + binary_modules = [ + first_binary_module, second_binary_module, sum_binary_module, + shakeshake_binary_module + ] inputs = features["inputs"] def run_unary(x, name): diff --git a/tensor2tensor/models/bytenet.py b/tensor2tensor/models/bytenet.py index 28862e594..d9c4e29a9 100644 --- a/tensor2tensor/models/bytenet.py +++ b/tensor2tensor/models/bytenet.py @@ -23,8 +23,8 @@ from six.moves import xrange # pylint: disable=redefined-builtin -from tensor2tensor.models import common_hparams -from tensor2tensor.models import common_layers +from tensor2tensor.layers import common_hparams +from tensor2tensor.layers import common_layers from tensor2tensor.utils import registry from tensor2tensor.utils import t2t_model @@ -63,8 +63,8 @@ def bytenet_internal(inputs, targets, hparams): # Pad inputs and targets to be the same length, divisible by 50. inputs, targets = common_layers.pad_to_same_length( inputs, targets, final_length_divisible_by=50) - final_encoder = residual_dilated_conv( - inputs, hparams.num_block_repeat, "SAME", "encoder", hparams) + final_encoder = residual_dilated_conv(inputs, hparams.num_block_repeat, + "SAME", "encoder", hparams) shifted_targets = common_layers.shift_left(targets) kernel = (hparams.kernel_height, hparams.kernel_width) @@ -73,9 +73,8 @@ def bytenet_internal(inputs, targets, hparams): hparams.hidden_size, [((1, 1), kernel)], padding="LEFT") - return residual_dilated_conv( - decoder_start, hparams.num_block_repeat, - "LEFT", "decoder", hparams) + return residual_dilated_conv(decoder_start, hparams.num_block_repeat, + "LEFT", "decoder", hparams) @registry.register_model diff --git a/tensor2tensor/models/gene_expression.py b/tensor2tensor/models/gene_expression.py index bdb93509b..af2d83158 100644 --- a/tensor2tensor/models/gene_expression.py +++ b/tensor2tensor/models/gene_expression.py @@ -22,8 +22,8 @@ from six.moves import xrange # pylint: disable=redefined-builtin -from tensor2tensor.models import common_hparams -from tensor2tensor.models import common_layers +from tensor2tensor.layers import common_hparams +from tensor2tensor.layers import common_layers from tensor2tensor.utils import registry from tensor2tensor.utils import t2t_model diff --git a/tensor2tensor/models/gene_expression_test.py b/tensor2tensor/models/gene_expression_test.py index a43eda97a..3b1dc6873 100644 --- a/tensor2tensor/models/gene_expression_test.py +++ b/tensor2tensor/models/gene_expression_test.py @@ -23,8 +23,8 @@ import numpy as np from tensor2tensor.data_generators import gene_expression as gene_data +from tensor2tensor.layers import modalities # pylint: disable=unused-import from tensor2tensor.models import gene_expression -from tensor2tensor.models import modalities # pylint: disable=unused-import import tensorflow as tf diff --git a/tensor2tensor/models/long_answer.py b/tensor2tensor/models/long_answer.py index be8024f63..a9fb45e4a 100644 --- a/tensor2tensor/models/long_answer.py +++ b/tensor2tensor/models/long_answer.py @@ -34,9 +34,9 @@ from six.moves import xrange # pylint: disable=redefined-builtin -from tensor2tensor.models import common_attention -from tensor2tensor.models import common_hparams -from tensor2tensor.models import common_layers +from tensor2tensor.layers import common_attention +from tensor2tensor.layers import common_hparams +from tensor2tensor.layers import common_layers from tensor2tensor.utils import registry from tensor2tensor.utils import t2t_model @@ -67,34 +67,35 @@ def residual_fn(x, y): for layer in xrange(hparams.num_hidden_layers): with tf.variable_scope("layer_%d" % layer): with tf.variable_scope("attention"): - y = dp(common_attention.multihead_attention, - x, - None, - None, - hparams.attention_key_channels or hparams.hidden_size, - hparams.attention_value_channels or hparams.hidden_size, - hparams.hidden_size, - hparams.num_heads, - hparams.attention_dropout, - attention_type="local_mask_right", - block_length=hparams.block_length, - name="decoder_self_attention") + y = dp( + common_attention.multihead_attention, + x, + None, + None, + hparams.attention_key_channels or hparams.hidden_size, + hparams.attention_value_channels or hparams.hidden_size, + hparams.hidden_size, + hparams.num_heads, + hparams.attention_dropout, + attention_type="local_mask_right", + block_length=hparams.block_length, + name="decoder_self_attention") x = dp(residual_fn, x, y) with tf.variable_scope("ffn"): if str(layer) in hparams.moe_layers.split(","): y, loss = common_layers.moe_layer( dp, self._ps_devices, x, hparams.mode == tf.contrib.learn.ModeKeys.TRAIN, - hparams.hidden_size, - hparams.moe_hidden_size, hparams.moe_n1, hparams.moe_n2, - hparams.moe_loss_coef) + hparams.hidden_size, hparams.moe_hidden_size, hparams.moe_n1, + hparams.moe_n2, hparams.moe_loss_coef) extra_loss += loss else: - y = dp(common_layers.conv_hidden_relu, - x, - hparams.filter_size, - hparams.hidden_size, - dropout=hparams.relu_dropout) + y = dp( + common_layers.conv_hidden_relu, + x, + hparams.filter_size, + hparams.hidden_size, + dropout=hparams.relu_dropout) x = dp(residual_fn, x, y) x = dp(long_answer_output, x, inputs) return x, extra_loss @@ -113,7 +114,8 @@ def long_answer_prepare_decoder(inputs, targets, hparams): """ decoder_input = tf.concat([ length_embedding(targets, hparams), inputs, - common_layers.shift_left_3d(targets)], 1) + common_layers.shift_left_3d(targets) + ], 1) if hparams.pos == "timing": decoder_input = common_attention.add_timing_signal_1d(decoder_input) return decoder_input @@ -140,8 +142,7 @@ def length_embedding(targets, hparams): padded_target_length = tf.shape(targets)[1] if hparams.mode == tf.contrib.learn.ModeKeys.TRAIN: lengths = padded_target_length * tf.to_int32( - tf.less(tf.random_uniform([batch]), - hparams.answer_length_prob_train)) + tf.less(tf.random_uniform([batch]), hparams.answer_length_prob_train)) elif hparams.mode == tf.contrib.learn.ModeKeys.EVAL: lengths = 0 else: diff --git a/tensor2tensor/models/lstm.py b/tensor2tensor/models/lstm.py index 195879d78..d79b04494 100644 --- a/tensor2tensor/models/lstm.py +++ b/tensor2tensor/models/lstm.py @@ -23,25 +23,29 @@ # Dependency imports -from tensor2tensor.models import common_hparams -from tensor2tensor.models import common_layers +from tensor2tensor.layers import common_hparams +from tensor2tensor.layers import common_layers from tensor2tensor.utils import registry from tensor2tensor.utils import t2t_model import tensorflow as tf from tensorflow.python.util import nest - # Track Tuple of state and attention values -AttentionTuple = collections.namedtuple("AttentionTuple", - ("state", "attention")) +AttentionTuple = collections.namedtuple("AttentionTuple", ("state", + "attention")) class ExternalAttentionCellWrapper(tf.contrib.rnn.RNNCell): """Wrapper for external attention states for an encoder-decoder setup.""" - def __init__(self, cell, attn_states, attn_vec_size=None, - input_size=None, state_is_tuple=True, reuse=None): + def __init__(self, + cell, + attn_states, + attn_vec_size=None, + input_size=None, + state_is_tuple=True, + reuse=None): """Create a cell with attention. Args: @@ -137,8 +141,8 @@ def call(self, inputs, state): new_attns = self._attention(new_state_cat, attn_states, attn_length) with tf.variable_scope("attn_output_projection"): - output = tf.layers.dense(tf.concat([lstm_output, new_attns], axis=1), - self._attn_size) + output = tf.layers.dense( + tf.concat([lstm_output, new_attns], axis=1), self._attn_size) new_state = AttentionTuple(new_state, new_attns) @@ -151,18 +155,16 @@ def _attention(self, query, attn_states, attn_length): tanh = tf.tanh with tf.variable_scope("attention"): - k = tf.get_variable( - "attn_w", [1, 1, self._attn_size, self._attn_vec_size]) + k = tf.get_variable("attn_w", + [1, 1, self._attn_size, self._attn_vec_size]) v = tf.get_variable("attn_v", [self._attn_vec_size, 1]) - hidden = tf.reshape(attn_states, - [-1, attn_length, 1, self._attn_size]) + hidden = tf.reshape(attn_states, [-1, attn_length, 1, self._attn_size]) hidden_features = conv2d(hidden, k, [1, 1, 1, 1], "SAME") y = tf.layers.dense(query, self._attn_vec_size) y = tf.reshape(y, [-1, 1, 1, self._attn_vec_size]) s = reduce_sum(v * tanh(hidden_features + y), [2, 3]) a = softmax(s) - d = reduce_sum( - tf.reshape(a, [-1, attn_length, 1, 1]) * hidden, [1, 2]) + d = reduce_sum(tf.reshape(a, [-1, attn_length, 1, 1]) * hidden, [1, 2]) new_attns = tf.reshape(d, [-1, self._attn_size]) return new_attns @@ -186,8 +188,8 @@ def dropout_lstm_cell(): time_major=False) -def lstm_attention_decoder(inputs, hparams, train, name, - initial_state, attn_states): +def lstm_attention_decoder(inputs, hparams, train, name, initial_state, + attn_states): """Run LSTM cell with attention on inputs of shape [batch x time x size].""" def dropout_lstm_cell(): @@ -196,9 +198,10 @@ def dropout_lstm_cell(): input_keep_prob=1.0 - hparams.dropout * tf.to_float(train)) layers = [dropout_lstm_cell() for _ in range(hparams.num_hidden_layers)] - cell = ExternalAttentionCellWrapper(tf.nn.rnn_cell.MultiRNNCell(layers), - attn_states, - attn_vec_size=hparams.attn_vec_size) + cell = ExternalAttentionCellWrapper( + tf.nn.rnn_cell.MultiRNNCell(layers), + attn_states, + attn_vec_size=hparams.attn_vec_size) initial_state = cell.combine_state(initial_state) with tf.variable_scope(name): return tf.nn.dynamic_rnn( @@ -239,10 +242,7 @@ def lstm_seq2seq_internal_attention(inputs, targets, hparams, train): # LSTM decoder with attention shifted_targets = common_layers.shift_left(targets) decoder_outputs, _ = lstm_attention_decoder( - common_layers.flatten4d3d(shifted_targets), - hparams, - train, - "decoder", + common_layers.flatten4d3d(shifted_targets), hparams, train, "decoder", final_encoder_state, encoder_outputs) return tf.expand_dims(decoder_outputs, axis=2) diff --git a/tensor2tensor/models/lstm_test.py b/tensor2tensor/models/lstm_test.py index 6ac792f48..7da3d2380 100644 --- a/tensor2tensor/models/lstm_test.py +++ b/tensor2tensor/models/lstm_test.py @@ -24,7 +24,7 @@ import numpy as np from tensor2tensor.data_generators import problem_hparams -from tensor2tensor.models import common_hparams +from tensor2tensor.layers import common_hparams from tensor2tensor.models import lstm import tensorflow as tf @@ -44,8 +44,8 @@ def testLSTMSeq2Seq(self): "inputs": tf.constant(x, dtype=tf.int32), "targets": tf.constant(y, dtype=tf.int32), } - model = lstm.LSTMSeq2seq( - hparams, tf.contrib.learn.ModeKeys.TRAIN, p_hparams) + model = lstm.LSTMSeq2seq(hparams, tf.contrib.learn.ModeKeys.TRAIN, + p_hparams) sharded_logits, _ = model.model_fn(features) logits = tf.concat(sharded_logits, 0) session.run(tf.global_variables_initializer()) diff --git a/tensor2tensor/models/models.py b/tensor2tensor/models/models.py index 907a801cf..c2a904888 100644 --- a/tensor2tensor/models/models.py +++ b/tensor2tensor/models/models.py @@ -23,6 +23,7 @@ # pylint: disable=unused-import +from tensor2tensor.layers import modalities from tensor2tensor.models import attention_lm from tensor2tensor.models import attention_lm_moe from tensor2tensor.models import bluenet @@ -30,7 +31,6 @@ from tensor2tensor.models import gene_expression from tensor2tensor.models import long_answer from tensor2tensor.models import lstm -from tensor2tensor.models import modalities from tensor2tensor.models import multimodel from tensor2tensor.models import neural_gpu from tensor2tensor.models import shake_shake diff --git a/tensor2tensor/models/multimodel.py b/tensor2tensor/models/multimodel.py index 6f60dbfbf..290c78732 100644 --- a/tensor2tensor/models/multimodel.py +++ b/tensor2tensor/models/multimodel.py @@ -22,10 +22,10 @@ from six.moves import xrange # pylint: disable=redefined-builtin -from tensor2tensor.models import common_attention -from tensor2tensor.models import common_hparams -from tensor2tensor.models import common_layers -from tensor2tensor.models import modalities +from tensor2tensor.layers import common_attention +from tensor2tensor.layers import common_hparams +from tensor2tensor.layers import common_layers +from tensor2tensor.layers import modalities from tensor2tensor.models import slicenet from tensor2tensor.utils import registry from tensor2tensor.utils import t2t_model @@ -41,12 +41,22 @@ def conv_res_step(x, hparams, padding, mask): dilations_and_kernels2 = [((1, 1), k2), ((4, 4), k2)] with tf.variable_scope("conv_res_step"): y = common_layers.subseparable_conv_block( - x, hparams.filter_size, dilations_and_kernels1, - padding=padding, mask=mask, separabilities=0, name="residual1") + x, + hparams.filter_size, + dilations_and_kernels1, + padding=padding, + mask=mask, + separabilities=0, + name="residual1") y = tf.nn.dropout(y, 1.0 - hparams.dropout) return common_layers.subseparable_conv_block( - y, hparams.hidden_size, dilations_and_kernels2, - padding=padding, mask=mask, separabilities=0, name="residual2") + y, + hparams.hidden_size, + dilations_and_kernels2, + padding=padding, + mask=mask, + separabilities=0, + name="residual2") def residual_fn2(x, y, hparams): @@ -102,9 +112,9 @@ def flatten(inputs): expert_loss = 0.0 for i in xrange(hparams.num_hidden_layers): with tf.variable_scope("enc_layer_%d" % i): - inputs_encoded, moe_loss = conv_experts( - inputs_encoded, hparams, dp, self._ps_devices, "SAME", - inputs_mask, i) + inputs_encoded, moe_loss = conv_experts(inputs_encoded, hparams, dp, + self._ps_devices, "SAME", + inputs_mask, i) expert_loss += tf.reduce_mean(moe_loss) * hparams.moe_loss_coef # If we're just predicing a class, there is no use for a decoder, return. @@ -116,54 +126,57 @@ def flatten(inputs): inputs3d = dp(tf.squeeze, inputs, 2) inputs_encoded3d = dp(tf.squeeze, inputs_encoded, 2) encoder_padding = dp(common_attention.embedding_to_padding, inputs3d) - encoder_attention_bias = dp( - common_attention.attention_bias_ignore_padding, encoder_padding) + encoder_attention_bias = dp(common_attention.attention_bias_ignore_padding, + encoder_padding) targets = dp(common_layers.flatten4d3d, sharded_features["targets"]) target_space_emb = dp(slicenet.embed_target_space, sharded_features["target_space_id"], hparams.hidden_size) - (decoder_input, decoder_self_attention_bias) = dp( - prepare_decoder, targets, target_space_emb) + (decoder_input, decoder_self_attention_bias) = dp(prepare_decoder, targets, + target_space_emb) x = dp(tf.nn.dropout, decoder_input, 1.0 - hparams.dropout) for layer in xrange(hparams.num_hidden_layers): with tf.variable_scope("dec_layer_%d" % layer): with tf.variable_scope("attention"): - y = dp(common_attention.multihead_attention, - x, - None, - decoder_self_attention_bias, - hparams.hidden_size, - hparams.hidden_size, - hparams.hidden_size, - hparams.num_heads, - hparams.attention_dropout, - name="decoder_self_attention") - z = dp(common_attention.multihead_attention, - y, - inputs_encoded3d, - encoder_attention_bias, - hparams.hidden_size, - hparams.hidden_size, - hparams.hidden_size, - hparams.num_heads, - hparams.attention_dropout, - name="encdec_attention") + y = dp( + common_attention.multihead_attention, + x, + None, + decoder_self_attention_bias, + hparams.hidden_size, + hparams.hidden_size, + hparams.hidden_size, + hparams.num_heads, + hparams.attention_dropout, + name="decoder_self_attention") + z = dp( + common_attention.multihead_attention, + y, + inputs_encoded3d, + encoder_attention_bias, + hparams.hidden_size, + hparams.hidden_size, + hparams.hidden_size, + hparams.num_heads, + hparams.attention_dropout, + name="encdec_attention") x = dp(residual_fn3, x, y, z, hparams) with tf.variable_scope("ffn"): if str(layer) in hparams.moe_layers.split(","): y, moe_loss = common_layers.moe_layer( - dp, self._ps_devices, x, train, - hparams.hidden_size, hparams.filter_size, - hparams.moe_n1, hparams.moe_n2, hparams.moe_loss_coef) + dp, self._ps_devices, x, train, hparams.hidden_size, + hparams.filter_size, hparams.moe_n1, hparams.moe_n2, + hparams.moe_loss_coef) expert_loss += tf.reduce_mean(moe_loss) else: - y = dp(common_layers.conv_hidden_relu, - x, - hparams.filter_size, - hparams.hidden_size, - dropout=hparams.dropout) + y = dp( + common_layers.conv_hidden_relu, + x, + hparams.filter_size, + hparams.hidden_size, + dropout=hparams.dropout) x = dp(residual_fn2, x, y, hparams) x = dp(tf.expand_dims, x, 2) diff --git a/tensor2tensor/models/neural_gpu.py b/tensor2tensor/models/neural_gpu.py index fc9d75639..4037aa8d4 100644 --- a/tensor2tensor/models/neural_gpu.py +++ b/tensor2tensor/models/neural_gpu.py @@ -23,8 +23,8 @@ from six.moves import xrange # pylint: disable=redefined-builtin -from tensor2tensor.models import common_hparams -from tensor2tensor.models import common_layers +from tensor2tensor.layers import common_hparams +from tensor2tensor.layers import common_layers from tensor2tensor.utils import registry from tensor2tensor.utils import t2t_model @@ -34,6 +34,7 @@ def neural_gpu(inputs, hparams, name=None): """The core Neural GPU.""" with tf.variable_scope(name, "neural_gpu"): + def step(state, inp): # pylint: disable=missing-docstring x = tf.nn.dropout(state, 1.0 - hparams.dropout) for layer in xrange(hparams.num_hidden_layers): diff --git a/tensor2tensor/models/neural_gpu_test.py b/tensor2tensor/models/neural_gpu_test.py index 46c01f403..b7a1e98f7 100644 --- a/tensor2tensor/models/neural_gpu_test.py +++ b/tensor2tensor/models/neural_gpu_test.py @@ -24,7 +24,7 @@ import numpy as np from tensor2tensor.data_generators import problem_hparams -from tensor2tensor.models import common_hparams +from tensor2tensor.layers import common_hparams from tensor2tensor.models import neural_gpu import tensorflow as tf @@ -50,8 +50,8 @@ def testNeuralGPU(self): "inputs": tf.constant(inputs, dtype=tf.int32), "targets": tf.constant(targets, dtype=tf.int32) } - model = neural_gpu.NeuralGPU( - hparams, tf.contrib.learn.ModeKeys.TRAIN, p_hparams) + model = neural_gpu.NeuralGPU(hparams, tf.contrib.learn.ModeKeys.TRAIN, + p_hparams) shadred_logits, _ = model.model_fn(features) logits = tf.concat(shadred_logits, 0) session.run(tf.global_variables_initializer()) diff --git a/tensor2tensor/models/shake_shake.py b/tensor2tensor/models/shake_shake.py index 7fa40783a..aa91654a3 100644 --- a/tensor2tensor/models/shake_shake.py +++ b/tensor2tensor/models/shake_shake.py @@ -23,8 +23,8 @@ from six.moves import xrange # pylint: disable=redefined-builtin -from tensor2tensor.models import common_hparams -from tensor2tensor.models import common_layers +from tensor2tensor.layers import common_hparams +from tensor2tensor.layers import common_layers from tensor2tensor.utils import registry from tensor2tensor.utils import t2t_model diff --git a/tensor2tensor/models/slicenet.py b/tensor2tensor/models/slicenet.py index f1534137c..8900e6d11 100644 --- a/tensor2tensor/models/slicenet.py +++ b/tensor2tensor/models/slicenet.py @@ -23,9 +23,9 @@ from six.moves import xrange # pylint: disable=redefined-builtin from six.moves import zip # pylint: disable=redefined-builtin -from tensor2tensor.models import common_attention -from tensor2tensor.models import common_hparams -from tensor2tensor.models import common_layers +from tensor2tensor.layers import common_attention +from tensor2tensor.layers import common_hparams +from tensor2tensor.layers import common_layers from tensor2tensor.utils import registry from tensor2tensor.utils import t2t_model @@ -83,8 +83,7 @@ def attention(targets_shifted, inputs_encoded, norm_fn, hparams, bias=None): return norm_fn(targets_shifted + targets_with_attention, name="attn_norm") -def multi_conv_res(x, padding, name, layers, hparams, - mask=None, source=None): +def multi_conv_res(x, padding, name, layers, hparams, mask=None, source=None): """A stack of separable convolution blocks with residual connections.""" with tf.variable_scope(name): padding_bias = None @@ -200,7 +199,10 @@ def slicenet_middle(inputs_encoded, targets, target_space_emb, mask, hparams): else: inputs_padding_bias = (1.0 - mask) * -1e9 # Bias to not attend to padding. targets_with_attention = attention( - targets_shifted, inputs_encoded, norm_fn, hparams, + targets_shifted, + inputs_encoded, + norm_fn, + hparams, bias=inputs_padding_bias) # Positional targets: merge attention and raw. @@ -237,8 +239,8 @@ def slicenet_internal(inputs, targets, target_space, problem_idx, hparams): inputs = common_layers.add_timing_signal(inputs) # Add position info. target_space_emb = embed_target_space(target_space, hparams.hidden_size) extra_layers = int(hparams.num_hidden_layers * 1.5) - inputs_encoded = multi_conv_res(inputs, "SAME", "encoder", extra_layers, - hparams, mask=inputs_mask) + inputs_encoded = multi_conv_res( + inputs, "SAME", "encoder", extra_layers, hparams, mask=inputs_mask) target_modality_name = hparams.problems[problem_idx].target_modality.name if "class_label_modality" in target_modality_name: # If we're just predicing a class, there is no use for a decoder. @@ -266,6 +268,7 @@ def model_fn_body(self, features): features["target_space_id"], self._problem_idx, self._hparams) + _KERNEL_SCHEMES = { "3.3.3.3": [(3, 1), (3, 1), (3, 1), (3, 1)], "3.7.7.7": [(3, 1), (7, 1), (7, 1), (7, 1)], diff --git a/tensor2tensor/models/slicenet_test.py b/tensor2tensor/models/slicenet_test.py index c357448e4..388acde1b 100644 --- a/tensor2tensor/models/slicenet_test.py +++ b/tensor2tensor/models/slicenet_test.py @@ -24,7 +24,7 @@ import numpy as np from tensor2tensor.data_generators import image # pylint: disable=unused-import -from tensor2tensor.models import modalities # pylint: disable=unused-import +from tensor2tensor.layers import modalities # pylint: disable=unused-import from tensor2tensor.models import slicenet from tensor2tensor.utils import registry @@ -47,8 +47,8 @@ def testSliceNet(self): "targets": tf.constant(y, dtype=tf.int32), "target_space_id": tf.constant(1, dtype=tf.int32), } - model = slicenet.SliceNet( - hparams, tf.contrib.learn.ModeKeys.TRAIN, p_hparams) + model = slicenet.SliceNet(hparams, tf.contrib.learn.ModeKeys.TRAIN, + p_hparams) sharded_logits, _ = model.model_fn(features) logits = tf.concat(sharded_logits, 0) session.run(tf.global_variables_initializer()) diff --git a/tensor2tensor/models/transformer.py b/tensor2tensor/models/transformer.py index a2b55febf..1add44115 100644 --- a/tensor2tensor/models/transformer.py +++ b/tensor2tensor/models/transformer.py @@ -27,9 +27,9 @@ from six.moves import xrange # pylint: disable=redefined-builtin -from tensor2tensor.models import common_attention -from tensor2tensor.models import common_hparams -from tensor2tensor.models import common_layers +from tensor2tensor.layers import common_attention +from tensor2tensor.layers import common_hparams +from tensor2tensor.layers import common_layers from tensor2tensor.utils import registry from tensor2tensor.utils import t2t_model @@ -49,10 +49,9 @@ def model_fn_body(self, features): inputs = common_layers.flatten4d3d(inputs) targets = common_layers.flatten4d3d(targets) - (encoder_input, - encoder_self_attention_bias, - encoder_decoder_attention_bias) = ( - transformer_prepare_encoder(inputs, target_space, hparams)) + (encoder_input, encoder_self_attention_bias, + encoder_decoder_attention_bias) = (transformer_prepare_encoder( + inputs, target_space, hparams)) (decoder_input, decoder_self_attention_bias) = transformer_prepare_decoder( targets, hparams) @@ -73,12 +72,16 @@ def model_fn_body(self, features): def get_residual_fn(hparams): """Get residual_fn.""" + def residual_fn(x, y): - return common_layers.residual_fn(x, y, - hparams.norm_type, - hparams.residual_dropout, - hparams.hidden_size, - epsilon=hparams.layer_norm_epsilon) + return common_layers.residual_fn( + x, + y, + hparams.norm_type, + hparams.residual_dropout, + hparams.hidden_size, + epsilon=hparams.layer_norm_epsilon) + return residual_fn @@ -113,8 +116,7 @@ def transformer_prepare_encoder(inputs, target_space, hparams): encoder_input += emb_target_space if hparams.pos == "timing": encoder_input = common_attention.add_timing_signal_1d(encoder_input) - return (encoder_input, - encoder_self_attention_bias, + return (encoder_input, encoder_self_attention_bias, encoder_decoder_attention_bias) @@ -251,12 +253,9 @@ def transformer_ffn_layer(x, hparams): dropout=hparams.relu_dropout) elif hparams.ffn_layer == "parameter_attention": return common_attention.parameter_attention( - x, - hparams.parameter_attention_key_channels or hparams.hidden_size, + x, hparams.parameter_attention_key_channels or hparams.hidden_size, hparams.parameter_attention_value_channels or hparams.hidden_size, - hparams.hidden_size, - hparams.filter_size, - hparams.num_heads, + hparams.hidden_size, hparams.filter_size, hparams.num_heads, hparams.attention_dropout) elif hparams.ffn_layer == "conv_hidden_relu_with_sepconv": return common_layers.conv_hidden_relu( diff --git a/tensor2tensor/models/transformer_alternative.py b/tensor2tensor/models/transformer_alternative.py index 1f20bfb51..2604748be 100644 --- a/tensor2tensor/models/transformer_alternative.py +++ b/tensor2tensor/models/transformer_alternative.py @@ -20,7 +20,6 @@ Code is mostly copied from original Transformer source. """ - from __future__ import absolute_import from __future__ import division from __future__ import print_function @@ -29,8 +28,8 @@ from six.moves import xrange # pylint: disable=redefined-builtin -from tensor2tensor.models import common_attention -from tensor2tensor.models import common_layers +from tensor2tensor.layers import common_attention +from tensor2tensor.layers import common_layers from tensor2tensor.models import transformer from tensor2tensor.utils import registry from tensor2tensor.utils import t2t_model @@ -50,10 +49,11 @@ def model_fn_body(self, features): inputs = common_layers.flatten4d3d(inputs) targets = common_layers.flatten4d3d(targets) - (encoder_input, encoder_attention_bias, _) = ( - transformer.transformer_prepare_encoder(inputs, target_space, hparams)) - (decoder_input, _) = ( - transformer.transformer_prepare_decoder(targets, hparams)) + (encoder_input, + encoder_attention_bias, _) = (transformer.transformer_prepare_encoder( + inputs, target_space, hparams)) + (decoder_input, _) = (transformer.transformer_prepare_decoder( + targets, hparams)) encoder_mask = bias_to_mask(encoder_attention_bias) @@ -64,12 +64,12 @@ def residual_fn(x, y): encoder_input = tf.nn.dropout(encoder_input, 1.0 - hparams.residual_dropout) decoder_input = tf.nn.dropout(decoder_input, 1.0 - hparams.residual_dropout) - encoder_output = alt_transformer_encoder( - encoder_input, residual_fn, encoder_mask, hparams) + encoder_output = alt_transformer_encoder(encoder_input, residual_fn, + encoder_mask, hparams) - decoder_output = alt_transformer_decoder( - decoder_input, encoder_output, residual_fn, - encoder_attention_bias, hparams) + decoder_output = alt_transformer_decoder(decoder_input, encoder_output, + residual_fn, + encoder_attention_bias, hparams) decoder_output = tf.expand_dims(decoder_output, 2) @@ -97,19 +97,14 @@ def composite_layer(inputs, mask, hparams, for_output=False): for layer in xrange(hparams.layers_per_layer): with tf.variable_scope("sub_layer_%d" % layer): x = common_layers.linear_set_layer( - hparams.hidden_size, - x, - dropout=hparams.relu_dropout) + hparams.hidden_size, x, dropout=hparams.relu_dropout) if for_output: context = common_layers.running_global_pool_1d(x) else: context = common_layers.global_pool_1d(x, mask=mask) # Final layer. x = common_layers.linear_set_layer( - hparams.hidden_size, - x, - context=context, - dropout=hparams.relu_dropout) + hparams.hidden_size, x, context=context, dropout=hparams.relu_dropout) return x @@ -150,8 +145,8 @@ def alt_transformer_decoder(decoder_input, hparams.attention_dropout, name="encdec_attention") - x_ = residual_fn(x_, composite_layer(x_, None, hparams, - for_output=True)) + x_ = residual_fn(x_, composite_layer( + x_, None, hparams, for_output=True)) x = residual_fn(x, x_) return x @@ -162,7 +157,7 @@ def bias_to_mask(bias): # output sequences. Squeeze out dim one, and get the first element of # each vector. bias = tf.squeeze(bias, [1])[:, :, 0] - bias = - tf.clip_by_value(bias, -1.0, 1.0) + bias = -tf.clip_by_value(bias, -1.0, 1.0) mask = 1 - bias return mask diff --git a/tensor2tensor/models/xception.py b/tensor2tensor/models/xception.py index f2e69da21..a61687f48 100644 --- a/tensor2tensor/models/xception.py +++ b/tensor2tensor/models/xception.py @@ -23,8 +23,8 @@ from six.moves import xrange # pylint: disable=redefined-builtin -from tensor2tensor.models import common_hparams -from tensor2tensor.models import common_layers +from tensor2tensor.layers import common_hparams +from tensor2tensor.layers import common_layers from tensor2tensor.utils import registry from tensor2tensor.utils import t2t_model diff --git a/tensor2tensor/utils/decoding.py b/tensor2tensor/utils/decoding.py new file mode 100644 index 000000000..12057d8e6 --- /dev/null +++ b/tensor2tensor/utils/decoding.py @@ -0,0 +1,371 @@ +# coding=utf-8 +# Copyright 2017 The Tensor2Tensor Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Decoding utilities.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import operator +import os + +# Dependency imports + +import numpy as np +import six + +from six.moves import input # pylint: disable=redefined-builtin + +from tensor2tensor.data_generators import text_encoder +from tensor2tensor.utils import data_reader +from tensor2tensor.utils import devices +from tensor2tensor.utils import input_fn_builder +import tensorflow as tf + +FLAGS = tf.flags.FLAGS + + +def decode_from_dataset(estimator): + hparams = estimator.hparams + for i, problem in enumerate(FLAGS.problems.split("-")): + inputs_vocab = hparams.problems[i].vocabulary.get("inputs", None) + targets_vocab = hparams.problems[i].vocabulary["targets"] + tf.logging.info("Performing local inference.") + infer_problems_data = data_reader.get_data_filepatterns( + FLAGS.problems, hparams.data_dir, tf.contrib.learn.ModeKeys.INFER) + infer_input_fn = input_fn_builder.build_input_fn( + mode=tf.contrib.learn.ModeKeys.INFER, + hparams=hparams, + data_file_patterns=infer_problems_data, + num_datashards=devices.data_parallelism().n, + fixed_problem=i) + result_iter = estimator.predict(input_fn=infer_input_fn, as_iterable=False) + + def log_fn(inputs, + targets, + outputs, + problem, + j, + inputs_vocab=inputs_vocab, + targets_vocab=targets_vocab): + """Log inference results.""" + if "image" in problem and FLAGS.decode_save_images: + save_path = os.path.join(estimator.model_dir, + "%s_prediction_%d.jpg" % (problem, j)) + show_and_save_image(inputs / 255., save_path) + elif inputs_vocab: + decoded_inputs = inputs_vocab.decode(_save_until_eos(inputs.flatten())) + tf.logging.info("Inference results INPUT: %s" % decoded_inputs) + + decoded_outputs = targets_vocab.decode(_save_until_eos(outputs.flatten())) + tf.logging.info("Inference results OUTPUT: %s" % decoded_outputs) + decoded_targets = targets_vocab.decode(_save_until_eos(targets.flatten())) + tf.logging.info("Inference results TARGET: %s" % decoded_targets) + + if FLAGS.decode_to_file: + output_filepath = FLAGS.decode_to_file + ".outputs." + problem + output_file = tf.gfile.Open(output_filepath, "a") + output_file.write(decoded_outputs + "\n") + target_filepath = FLAGS.decode_to_file + ".targets." + problem + target_file = tf.gfile.Open(target_filepath, "a") + target_file.write(decoded_targets + "\n") + + # The function predict() returns an iterable over the network's + # predictions from the test input. We use it to log inputs and decodes. + inputs_iter = result_iter["inputs"] + targets_iter = result_iter["targets"] + outputs_iter = result_iter["outputs"] + for j, result in enumerate(zip(inputs_iter, targets_iter, outputs_iter)): + inputs, targets, outputs = result + if FLAGS.decode_return_beams: + output_beams = np.split(outputs, FLAGS.decode_beam_size, axis=0) + for k, beam in enumerate(output_beams): + tf.logging.info("BEAM %d:" % k) + log_fn(inputs, targets, beam, problem, j) + else: + log_fn(inputs, targets, outputs, problem, j) + + +def decode_from_file(estimator, filename): + """Compute predictions on entries in filename and write them out.""" + hparams = estimator.hparams + problem_id = FLAGS.decode_problem_id + inputs_vocab = hparams.problems[problem_id].vocabulary["inputs"] + targets_vocab = hparams.problems[problem_id].vocabulary["targets"] + tf.logging.info("Performing decoding from a file.") + sorted_inputs, sorted_keys = _get_sorted_inputs(filename) + num_decode_batches = (len(sorted_inputs) - 1) // FLAGS.decode_batch_size + 1 + input_fn = _decode_batch_input_fn(problem_id, num_decode_batches, + sorted_inputs, inputs_vocab) + + decodes = [] + for _ in range(num_decode_batches): + result_iter = estimator.predict( + input_fn=input_fn.next if six.PY2 else input_fn.__next__, + as_iterable=True) + for result in result_iter: + + def log_fn(inputs, outputs): + decoded_inputs = inputs_vocab.decode(_save_until_eos(inputs.flatten())) + tf.logging.info("Inference results INPUT: %s" % decoded_inputs) + + decoded_outputs = targets_vocab.decode( + _save_until_eos(outputs.flatten())) + tf.logging.info("Inference results OUTPUT: %s" % decoded_outputs) + return decoded_outputs + + if FLAGS.decode_return_beams: + beam_decodes = [] + output_beams = np.split( + result["outputs"], FLAGS.decode_beam_size, axis=0) + for k, beam in enumerate(output_beams): + tf.logging.info("BEAM %d:" % k) + beam_decodes.append(log_fn(result["inputs"], beam)) + decodes.append("\t".join(beam_decodes)) + + else: + decodes.append(log_fn(result["inputs"], result["outputs"])) + + # Reversing the decoded inputs and outputs because they were reversed in + # _decode_batch_input_fn + sorted_inputs.reverse() + decodes.reverse() + # Dumping inputs and outputs to file filename.decodes in + # format result\tinput in the same order as original inputs + if FLAGS.decode_to_file: + output_filename = FLAGS.decode_to_file + else: + output_filename = filename + if FLAGS.decode_shards > 1: + base_filename = output_filename + ("%.2d" % FLAGS.worker_id) + else: + base_filename = output_filename + decode_filename = (base_filename + "." + FLAGS.model + "." + FLAGS.hparams_set + + ".beam" + str(FLAGS.decode_beam_size) + ".alpha" + + str(FLAGS.decode_alpha) + ".decodes") + tf.logging.info("Writing decodes into %s" % decode_filename) + outfile = tf.gfile.Open(decode_filename, "w") + for index in range(len(sorted_inputs)): + outfile.write("%s\n" % (decodes[sorted_keys[index]])) + + +def decode_interactively(estimator): + hparams = estimator.hparams + + infer_input_fn = _interactive_input_fn(hparams) + for problem_idx, example in infer_input_fn: + targets_vocab = hparams.problems[problem_idx].vocabulary["targets"] + result_iter = estimator.predict(input_fn=lambda e=example: e) + for result in result_iter: + if FLAGS.decode_return_beams: + beams = np.split(result["outputs"], FLAGS.decode_beam_size, axis=0) + scores = None + if "scores" in result: + scores = np.split(result["scores"], FLAGS.decode_beam_size, axis=0) + for k, beam in enumerate(beams): + tf.logging.info("BEAM %d:" % k) + beam_string = targets_vocab.decode(_save_until_eos(beam.flatten())) + if scores is not None: + tf.logging.info("%s\tScore:%f" % (beam_string, scores[k])) + else: + tf.logging.info(beam_string) + else: + if FLAGS.identity_output: + tf.logging.info(" ".join(map(str, result["outputs"].flatten()))) + else: + tf.logging.info( + targets_vocab.decode( + _save_until_eos(result["outputs"].flatten()))) + + +def _decode_batch_input_fn(problem_id, num_decode_batches, sorted_inputs, + vocabulary): + tf.logging.info(" batch %d" % num_decode_batches) + # First reverse all the input sentences so that if you're going to get OOMs, + # you'll see it in the first batch + sorted_inputs.reverse() + for b in range(num_decode_batches): + tf.logging.info("Decoding batch %d" % b) + batch_length = 0 + batch_inputs = [] + for inputs in sorted_inputs[b * FLAGS.decode_batch_size:( + b + 1) * FLAGS.decode_batch_size]: + input_ids = vocabulary.encode(inputs) + if FLAGS.decode_max_input_size > 0: + # Subtract 1 for the EOS_ID. + input_ids = input_ids[:FLAGS.decode_max_input_size - 1] + input_ids.append(text_encoder.EOS_ID) + batch_inputs.append(input_ids) + if len(input_ids) > batch_length: + batch_length = len(input_ids) + final_batch_inputs = [] + for input_ids in batch_inputs: + assert len(input_ids) <= batch_length + x = input_ids + [0] * (batch_length - len(input_ids)) + final_batch_inputs.append(x) + yield { + "inputs": np.array(final_batch_inputs), + "problem_choice": np.array(problem_id) + } + + +def _interactive_input_fn(hparams): + """Generator that reads from the terminal and yields "interactive inputs". + + Due to temporary limitations in tf.learn, if we don't want to reload the + whole graph, then we are stuck encoding all of the input as one fixed-size + numpy array. + + We yield int64 arrays with shape [const_array_size]. The format is: + [num_samples, decode_length, len(input ids), <input ids>, <padding>] + + Args: + hparams: model hparams + Yields: + numpy arrays + + Raises: + Exception: when `input_type` is invalid. + """ + num_samples = 3 + decode_length = 100 + input_type = "text" + problem_id = 0 + p_hparams = hparams.problems[problem_id] + has_input = "inputs" in p_hparams.input_modality + vocabulary = p_hparams.vocabulary["inputs" if has_input else "targets"] + # This should be longer than the longest input. + const_array_size = 10000 + while True: + prompt = ("INTERACTIVE MODE num_samples=%d decode_length=%d \n" + " it=<input_type> ('text' or 'image' or 'label')\n" + " pr=<problem_num> (set the problem number)\n" + " in=<input_problem> (set the input problem number)\n" + " ou=<output_problem> (set the output problem number)\n" + " ns=<num_samples> (changes number of samples)\n" + " dl=<decode_length> (changes decode legnth)\n" + " <%s> (decode)\n" + " q (quit)\n" + ">" % (num_samples, decode_length, "source_string" + if has_input else "target_prefix")) + input_string = input(prompt) + if input_string == "q": + return + elif input_string[:3] == "pr=": + problem_id = int(input_string[3:]) + p_hparams = hparams.problems[problem_id] + has_input = "inputs" in p_hparams.input_modality + vocabulary = p_hparams.vocabulary["inputs" if has_input else "targets"] + elif input_string[:3] == "in=": + problem = int(input_string[3:]) + p_hparams.input_modality = hparams.problems[problem].input_modality + p_hparams.input_space_id = hparams.problems[problem].input_space_id + elif input_string[:3] == "ou=": + problem = int(input_string[3:]) + p_hparams.target_modality = hparams.problems[problem].target_modality + p_hparams.target_space_id = hparams.problems[problem].target_space_id + elif input_string[:3] == "ns=": + num_samples = int(input_string[3:]) + elif input_string[:3] == "dl=": + decode_length = int(input_string[3:]) + elif input_string[:3] == "it=": + input_type = input_string[3:] + else: + if input_type == "text": + input_ids = vocabulary.encode(input_string) + if has_input: + input_ids.append(text_encoder.EOS_ID) + x = [num_samples, decode_length, len(input_ids)] + input_ids + assert len(x) < const_array_size + x += [0] * (const_array_size - len(x)) + yield problem_id, { + "inputs": np.array(x), + "problem_choice": np.array(problem_id) + } + elif input_type == "image": + input_path = input_string + img = read_image(input_path) + yield problem_id, { + "inputs": img, + "problem_choice": np.array(problem_id) + } + elif input_type == "label": + input_ids = [int(input_string)] + x = [num_samples, decode_length, len(input_ids)] + input_ids + yield problem_id, { + "inputs": np.array(x), + "problem_choice": np.array(problem_id) + } + else: + raise Exception("Unsupported input type.") + + +def read_image(path): + try: + import matplotlib.image as im # pylint: disable=g-import-not-at-top + except ImportError as e: + tf.logging.warning( + "Reading an image requires matplotlib to be installed: %s", e) + raise NotImplementedError("Image reading not implemented.") + return im.imread(path) + + +def show_and_save_image(img, save_path): + try: + import matplotlib.pyplot as plt # pylint: disable=g-import-not-at-top + except ImportError as e: + tf.logging.warning("Showing and saving an image requires matplotlib to be " + "installed: %s", e) + raise NotImplementedError("Image display and save not implemented.") + plt.imshow(img) + plt.savefig(save_path) + + +def _get_sorted_inputs(filename): + """Returning inputs sorted according to length. + + Args: + filename: path to file with inputs, 1 per line. + + Returns: + a sorted list of inputs + + """ + tf.logging.info("Getting sorted inputs") + # read file and sort inputs according them according to input length. + if FLAGS.decode_shards > 1: + decode_filename = filename + ("%.2d" % FLAGS.worker_id) + else: + decode_filename = filename + inputs = [line.strip() for line in tf.gfile.Open(decode_filename)] + input_lens = [(i, len(line.strip().split())) for i, line in enumerate(inputs)] + sorted_input_lens = sorted(input_lens, key=operator.itemgetter(1)) + # We'll need the keys to rearrange the inputs back into their original order + sorted_keys = {} + sorted_inputs = [] + for i, (index, _) in enumerate(sorted_input_lens): + sorted_inputs.append(inputs[index]) + sorted_keys[index] = i + return sorted_inputs, sorted_keys + + +def _save_until_eos(hyp): + """Strips everything after the first <EOS> token, which is normally 1.""" + try: + index = list(hyp).index(text_encoder.EOS_ID) + return hyp[0:index] + except ValueError: + # No EOS_ID: return the array as-is. + return hyp diff --git a/tensor2tensor/utils/devices.py b/tensor2tensor/utils/devices.py new file mode 100644 index 000000000..4f76367e9 --- /dev/null +++ b/tensor2tensor/utils/devices.py @@ -0,0 +1,147 @@ +# coding=utf-8 +# Copyright 2017 The Tensor2Tensor Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Device placement and data parallelism.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +# Dependency imports + +# pylint: disable=redefined-builtin +from six.moves import xrange +# pylint: enable=redefined-builtin + +from tensor2tensor.utils import expert_utils as eu +import tensorflow as tf + +# TODO(rsepassi): Rm dep on FLAGS here +FLAGS = tf.flags.FLAGS + + +def _ps_replicas(all_workers=False): + if all_workers: + return list(range(FLAGS.ps_replicas)) + # Worker K will be using replicas {0,...n-1} + K*n if we have n replicas. + num_replicas = FLAGS.ps_replicas // FLAGS.worker_replicas + return [d + FLAGS.worker_id * num_replicas for d in xrange(num_replicas)] + + +def _gpu_order(num_gpus): + if FLAGS.gpu_order: + ret = [int(s) for s in FLAGS.gpu_order.split(" ")] + if len(ret) == num_gpus: + return ret + return list(range(num_gpus)) + + +def _ps_gpus(all_workers=False): + ps_gpus = [] + for d in _ps_replicas(all_workers=all_workers): + ps_gpus.extend([(d, gpu) for gpu in _gpu_order(FLAGS.ps_gpu)]) + return ps_gpus + + +def ps_devices(all_workers=False): + """List of ps devices (where to put the experts). + + Args: + all_workers: whether the list is for all async workers or just this one. + + Returns: + a list of device names + """ + if FLAGS.ps_replicas > 0: + if FLAGS.ps_gpu > 0: + return [ + FLAGS.ps_job + "/task:%d/GPU:%d" % (d, gpu) + for (d, gpu) in _ps_gpus(all_workers=all_workers) + ] + else: + return [ + FLAGS.ps_job + "/task:%d" % d + for d in _ps_replicas(all_workers=all_workers) + ] + else: + if FLAGS.worker_gpu > 0: + return ["gpu:%d" % d for d in _gpu_order(FLAGS.worker_gpu)] + else: + return [""] + + +def data_parallelism(all_workers=False): + """Over which devices do we split each training batch. + + In old-fashioned async mode, we split the batch over all GPUs on the + current worker. + + In sync mode, we split the batch over all the parameter server GPUs. + + This function returns an expert_utils.Parallelism object, which can be used + to build the model. It is configured in a way that any variables created + by `tf.get_variable` will be assigned to the parameter servers and shared + between datashards. + + Args: + all_workers: whether the devices are all async workers or just this one. + + Returns: + a expert_utils.Parallelism. + """ + + def _replica_device_setter(worker_device): + if FLAGS.ps_replicas == 0: + return worker_device + return tf.train.replica_device_setter( + worker_device=worker_device, + ps_tasks=FLAGS.ps_replicas, + ps_device=FLAGS.ps_job + "/GPU:0" if FLAGS.ps_gpu > 0 else FLAGS.ps_job) + + if FLAGS.schedule == "local_run": + assert not FLAGS.sync + datashard_devices = ["gpu:%d" % d for d in _gpu_order(FLAGS.worker_gpu)] + if FLAGS.locally_shard_to_cpu: + datashard_devices += ["cpu:0"] + caching_devices = None + elif FLAGS.sync: + assert FLAGS.ps_replicas > 0 + datashard_devices = [ + _replica_device_setter(d) for d in ps_devices(all_workers=all_workers) + ] + if FLAGS.ps_gpu > 0 and FLAGS.ps_replicas > 1: + caching_devices = [ + FLAGS.ps_job + "/task:%d/cpu:0" % d + for (d, _) in _ps_gpus(all_workers=all_workers) + ] + else: + caching_devices = None + else: + # old fashioned async - compute on worker + if FLAGS.worker_gpu > 1: + datashard_devices = [ + _replica_device_setter(FLAGS.worker_job + "/GPU:%d" % d) + for d in _gpu_order(FLAGS.worker_gpu) + ] + caching_devices = [FLAGS.worker_job + "/GPU:0"] * FLAGS.worker_gpu + else: + datashard_devices = [_replica_device_setter(FLAGS.worker_job)] + caching_devices = None + tf.logging.info("datashard_devices: %s", datashard_devices) + tf.logging.info("caching_devices: %s", caching_devices) + return eu.Parallelism( + datashard_devices, + reuse=True, + caching_devices=caching_devices, + daisy_chain_variables=FLAGS.daisy_chain_variables) diff --git a/tensor2tensor/utils/input_fn_builder.py b/tensor2tensor/utils/input_fn_builder.py new file mode 100644 index 000000000..1fac64c8b --- /dev/null +++ b/tensor2tensor/utils/input_fn_builder.py @@ -0,0 +1,200 @@ +# coding=utf-8 +# Copyright 2017 The Tensor2Tensor Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Input function building.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +# Dependency imports + +from six.moves import xrange # pylint: disable=redefined-builtin + +from tensor2tensor.utils import data_reader + +import tensorflow as tf + +# TODO(rsepassi): Rm dep on FLAGS here +FLAGS = tf.flags.FLAGS + + +def build_input_fn(mode, + hparams, + data_file_patterns=None, + num_datashards=None, + fixed_problem=None): + """Provides input to the graph, either from disk or via a placeholder. + + This function produces an input function that will feed data into + the network. There are two modes of operation: + + 1. If data_file_pattern and all subsequent arguments are None, then + it creates a placeholder for a serialized tf.Example proto. + 2. If data_file_pattern is defined, it will read the data from the + files at the given location. Use this mode for training, + evaluation, and testing prediction. + + Args: + mode: The execution mode, as defined in tf.contrib.learn.ModeKeys. + hparams: HParams object. + data_file_patterns: The list of file patterns to use to read in data. Set to + `None` if you want to create a placeholder for the input data. The + `problems` flag is a list of problem names joined by the `-` character. + The flag's string is then split along the `-` and each problem gets its + own example queue. + num_datashards: An integer. + fixed_problem: An integer indicating the problem to fetch data for, or None + if the input is to be randomly selected. + + Returns: + A function that returns a dictionary of features and the target labels. + """ + + def input_fn(): + """Supplies input to our model. + + This function supplies input to our model, where this input is a + function of the mode. For example, we supply different data if + we're performing training versus evaluation. + + Returns: + A tuple consisting of 1) a dictionary of tensors whose keys are + the feature names, and 2) a tensor of target labels if the mode + is not INFER (and None, otherwise). + + Raises: + ValueError: if one of the parameters has an unsupported value. + """ + problem_count, batches = len(data_file_patterns), [] + with tf.name_scope("input_reader"): + for n in xrange(problem_count): + if fixed_problem is not None and n != fixed_problem: + continue + problem_instance = hparams.problem_instances[n] + p_hparams = hparams.problems[n] + with tf.name_scope("problem_%d" % n): + with tf.device("/cpu:0"): # Input reading on CPU + capacity = p_hparams.max_expected_batch_size_per_shard + capacity *= num_datashards + examples = data_reader.input_pipeline(problem_instance, + data_file_patterns[n], + capacity, mode, hparams) + feature_map = data_reader.batch_examples( + examples, + data_reader.hparams_to_batching_scheme( + hparams, + shard_multiplier=num_datashards, + drop_long_sequences=(mode == tf.contrib.learn.ModeKeys.TRAIN + or hparams.eval_drop_long_sequences), + length_multiplier=(p_hparams.batch_size_multiplier))) + + # Reverse inputs and targets features if the problem was reversed. + if problem_instance is not None: + problem_instance.maybe_reverse_features(feature_map) + problem_instance.maybe_copy_features(feature_map) + else: + if p_hparams.was_reversed: + inputs = feature_map["inputs"] + targets = feature_map["targets"] + feature_map["inputs"] = targets + feature_map["targets"] = inputs + # Use the inputs as the targets if the problem is a copy problem. + if p_hparams.was_copy: + feature_map["targets"] = feature_map["inputs"] + + # Ensure inputs and targets are proper rank. + while len(feature_map["inputs"].get_shape()) != 4: + feature_map["inputs"] = tf.expand_dims(feature_map["inputs"], axis=-1) + while len(feature_map["targets"].get_shape()) != 4: + feature_map["targets"] = tf.expand_dims( + feature_map["targets"], axis=-1) + + batches.append((feature_map["inputs"], feature_map["targets"], + tf.constant(n), tf.constant(p_hparams.input_space_id), + tf.constant(p_hparams.target_space_id))) + + # We choose which problem to process. + loss_moving_avgs = [] # Need loss moving averages for that. + for n in xrange(problem_count): + with tf.variable_scope("losses_avg"): + loss_moving_avgs.append( + tf.get_variable( + "problem_%d/total_loss" % n, initializer=100.0, + trainable=False)) + tf.get_variable( + "problem_%d/training_loss" % n, initializer=100.0, trainable=False) + tf.get_variable( + "problem_%d/extra_loss" % n, initializer=100.0, trainable=False) + if fixed_problem is None: + if (hparams.problem_choice == "uniform" or + mode != tf.contrib.learn.ModeKeys.TRAIN): + problem_choice = tf.random_uniform( + [], maxval=problem_count, dtype=tf.int32) + elif hparams.problem_choice == "adaptive": + loss_moving_avgs = tf.stack(loss_moving_avgs) + problem_choice = tf.multinomial( + tf.reshape(loss_moving_avgs, [1, -1]), 1) + problem_choice = tf.to_int32(tf.squeeze(problem_choice)) + elif hparams.problem_choice == "distributed": + assert FLAGS.worker_replicas >= problem_count + assert FLAGS.worker_replicas % problem_count == 0 + problem_choice = tf.to_int32(FLAGS.worker_id % problem_count) + else: + raise ValueError( + "Value of hparams.problem_choice is %s and must be " + "one of [uniform, adaptive, distributed]" % hparams.problem_choice) + + # Inputs and targets conditional on problem_choice. + rand_inputs, rand_target, choice, inp_id, tgt_id = cond_on_index( + lambda n: batches[n], problem_choice, 0, problem_count - 1) + else: + problem_choice = tf.constant(fixed_problem) + # Take the only constructed batch, which is the fixed_problem. + rand_inputs, rand_target, choice, inp_id, tgt_id = batches[0] + + # Set shapes so the ranks are clear. + rand_inputs.set_shape([None, None, None, None]) + rand_target.set_shape([None, None, None, None]) + choice.set_shape([]) + inp_id.set_shape([]) + tgt_id.set_shape([]) + # Forced shape obfuscation is necessary for inference. + if mode == tf.contrib.learn.ModeKeys.INFER: + rand_inputs._shape = tf.TensorShape([None, None, None, None]) # pylint: disable=protected-access + rand_target._shape = tf.TensorShape([None, None, None, None]) # pylint: disable=protected-access + + # Final feature map. + rand_feature_map = { + "inputs": rand_inputs, + "problem_choice": choice, + "input_space_id": inp_id, + "target_space_id": tgt_id + } + if mode == tf.contrib.learn.ModeKeys.INFER: + rand_feature_map["infer_targets"] = rand_target + rand_target = None + return rand_feature_map, rand_target + + return input_fn + + +def cond_on_index(fn, index_tensor, cur_idx, max_idx): + """Call fn(index_tensor) using tf.cond in [cur_id, max_idx].""" + if cur_idx == max_idx: + return fn(cur_idx) + return tf.cond( + tf.equal(index_tensor, cur_idx), lambda: fn(cur_idx), + lambda: cond_on_index(fn, index_tensor, cur_idx + 1, max_idx)) diff --git a/tensor2tensor/utils/metrics.py b/tensor2tensor/utils/metrics.py index 4435707cd..db60e07c8 100644 --- a/tensor2tensor/utils/metrics.py +++ b/tensor2tensor/utils/metrics.py @@ -22,7 +22,7 @@ import six -from tensor2tensor.models import common_layers +from tensor2tensor.layers import common_layers from tensor2tensor.utils import bleu_hook import tensorflow as tf diff --git a/tensor2tensor/utils/modality.py b/tensor2tensor/utils/modality.py index 5c596e10f..4bcf21f4d 100644 --- a/tensor2tensor/utils/modality.py +++ b/tensor2tensor/utils/modality.py @@ -22,7 +22,7 @@ # Dependency imports -from tensor2tensor.models import common_layers +from tensor2tensor.layers import common_layers import tensorflow as tf diff --git a/tensor2tensor/utils/model_builder.py b/tensor2tensor/utils/model_builder.py new file mode 100644 index 000000000..a12aa1122 --- /dev/null +++ b/tensor2tensor/utils/model_builder.py @@ -0,0 +1,451 @@ +# coding=utf-8 +# Copyright 2017 The Tensor2Tensor Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Model building.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import copy +import math + +# Dependency imports + +import numpy as np +import six +# pylint: disable=redefined-builtin +from six.moves import xrange +# pylint: enable=redefined-builtin + +from tensor2tensor.models import models # pylint: disable=unused-import +from tensor2tensor.utils import devices +from tensor2tensor.utils import input_fn_builder +from tensor2tensor.utils import registry +from tensor2tensor.utils import yellowfin + +import tensorflow as tf +from tensorflow.python.ops import init_ops + +# TODO(rsepassi): Rm dep on FLAGS here +FLAGS = tf.flags.FLAGS + +# Number of samples to draw for an image input (in such cases as captioning) +IMAGE_DECODE_LENGTH = 100 + + +def build_model_fn(model, hparams): + """Returns a function to build the model. + + Args: + model: The name of the model to use. + hparams: The hyperparameters. + + Returns: + A function to build the model's graph. This function is called by + the Estimator object to construct the graph. + """ + + def initializer(): + if hparams.initializer == "orthogonal": + return tf.orthogonal_initializer(gain=hparams.initializer_gain) + elif hparams.initializer == "uniform": + max_val = 0.1 * hparams.initializer_gain + return tf.random_uniform_initializer(-max_val, max_val) + elif hparams.initializer == "normal_unit_scaling": + return init_ops.variance_scaling_initializer( + hparams.initializer_gain, mode="fan_avg", distribution="normal") + elif hparams.initializer == "uniform_unit_scaling": + return init_ops.variance_scaling_initializer( + hparams.initializer_gain, mode="fan_avg", distribution="uniform") + else: + raise ValueError("Unrecognized initializer: %s" % hparams.initializer) + + def learning_rate_decay(): + """Inverse-decay learning rate until warmup_steps, then decay.""" + warmup_steps = tf.to_float( + hparams.learning_rate_warmup_steps * FLAGS.worker_replicas) + step = tf.to_float(tf.contrib.framework.get_global_step()) + if hparams.learning_rate_decay_scheme == "noam": + return 5000.0 * hparams.hidden_size**-0.5 * tf.minimum( + (step + 1) * warmup_steps**-1.5, (step + 1)**-0.5) + elif hparams.learning_rate_decay_scheme == "exp100k": + return 0.94**(step // 100000) + elif hparams.learning_rate_decay_scheme == "cosine": + cycle_steps = hparams.learning_rate_cosine_cycle_steps + return 0.5 * (1 + tf.cos(np.pi * (step % cycle_steps) / cycle_steps)) + + inv_base = tf.exp(tf.log(0.01) / warmup_steps) + inv_decay = inv_base**(warmup_steps - step) + if hparams.learning_rate_decay_scheme == "sqrt": + decay = _sqrt_decay(step - warmup_steps) + elif hparams.learning_rate_decay_scheme == "exp10k": + decay = _exp_decay_after(step - warmup_steps, 0.9995, + FLAGS.train_steps - warmup_steps - 10000) + elif hparams.learning_rate_decay_scheme == "exp50k": + decay = _exp_decay_after(step - warmup_steps, 0.99995, + FLAGS.train_steps - warmup_steps - 50000) + elif hparams.learning_rate_decay_scheme == "exp500k": + decay = _exp_decay_after(step - warmup_steps, 0.9999955, + FLAGS.train_steps - warmup_steps - 500000) + elif hparams.learning_rate_decay_scheme == "none": + decay = tf.constant(1.0) + else: + raise ValueError("Unrecognized learning rate decay scheme: %s" % + hparams.learning_rate_decay_scheme) + return tf.cond( + step < warmup_steps, + lambda: inv_decay, + lambda: decay, + name="learning_rate_decay_warump_cond") + + def model_fn(features, targets, mode): + """Creates the prediction, loss, and train ops. + + Args: + features: A dictionary of tensors keyed by the feature name. + targets: A tensor representing the labels (targets). + mode: The execution mode, as defined in tf.contrib.learn.ModeKeys. + + Returns: + A tuple consisting of the prediction, loss, and train_op. + """ + # Deep-copy the model hparams between modes to eliminate + # side-effects caused by abuse of the linked problem_hparams + # objects which are used to share modality objects between + # problems. We do not want to share the modality objects between + # modes, since the modality objects may decide to do something + # mode-specific. A better fix would be to stop abusing the + # hparams in this way and instead use a separate dictionary to + # share the modality objects between problems. This dictionary + # could be created once per mode and passed to the constructor of + # t2t_model. + my_hp = copy.deepcopy(hparams) + if mode == tf.contrib.learn.ModeKeys.INFER: + if FLAGS.decode_interactive: + features = _interactive_input_tensor_to_features_dict(features, my_hp) + elif FLAGS.decode_from_file: + features = _decode_input_tensor_to_features_dict(features, my_hp) + # A dictionary containing: + # - problem_choice: A Tensor containing an integer indicating which problem + # was selected for this run. + # - predictions: A Tensor containing the model's output predictions. + run_info = dict() + run_info["problem_choice"] = features["problem_choice"] + + if targets is not None: + features["targets"] = targets + + dp = devices.data_parallelism() + + # Add input statistics for incoming features. + with tf.name_scope("input_stats"): + for (k, v) in six.iteritems(features): + if isinstance(v, tf.Tensor) and v.get_shape().ndims > 1: + tf.summary.scalar("%s_batch" % k, tf.shape(v)[0] // dp.n) + tf.summary.scalar("%s_length" % k, tf.shape(v)[1]) + nonpadding = tf.to_float(tf.not_equal(v, 0)) + tf.summary.scalar("%s_nonpadding_tokens" % k, + tf.reduce_sum(nonpadding)) + tf.summary.scalar("%s_nonpadding_fraction" % k, + tf.reduce_mean(nonpadding)) + + tf.get_variable_scope().set_initializer(initializer()) + train = mode == tf.contrib.learn.ModeKeys.TRAIN + + # Get multi-problem logits and loss based on features["problem_choice"]. + def nth_model(n): + """Build the model for the n-th problem, plus some added variables.""" + model_class = registry.model(model)( + my_hp, + mode, + my_hp.problems[n], + n, + dp, + devices.ps_devices(all_workers=True)) + if mode == tf.contrib.learn.ModeKeys.INFER: + return model_class.infer( + features, + beam_size=FLAGS.decode_beam_size, + top_beams=(FLAGS.decode_beam_size + if FLAGS.decode_return_beams else 1), + last_position_only=FLAGS.decode_use_last_position_only, + alpha=FLAGS.decode_alpha, + decode_length=FLAGS.decode_extra_length) + # In distributed mode, we build graph for problem=0 and problem=worker_id. + skipping_is_on = my_hp.problem_choice == "distributed" and train + problem_worker_id = FLAGS.worker_id % len(my_hp.problems) + skip_this_one = n != 0 and n % FLAGS.worker_replicas != problem_worker_id + # On worker 0 also build graph for problems <= 1. + # TODO(lukaszkaiser): why is this hack needed for variables init? Repair. + skip_this_one = skip_this_one and (FLAGS.worker_id != 0 or n > 1) + sharded_logits, losses_dict = model_class.model_fn( + features, skip=(skipping_is_on and skip_this_one)) + with tf.variable_scope("losses_avg", reuse=True): + total_loss, ops = 0.0, [] + for loss_key, loss_value in losses_dict.iteritems(): + loss_moving_avg = tf.get_variable("problem_%d/%s_loss" % (n, + loss_key)) + ops.append( + loss_moving_avg.assign(loss_moving_avg * 0.9 + loss_value * 0.1)) + total_loss += loss_value + loss_moving_avg = tf.get_variable("problem_%d/total_loss" % n) + ops.append( + loss_moving_avg.assign(loss_moving_avg * 0.9 + total_loss * 0.1)) + with tf.variable_scope("train_stats"): # Count steps for this problem. + problem_steps = tf.get_variable( + "problem_%d_steps" % n, initializer=0, trainable=False) + ops.append(problem_steps.assign_add(1)) + with tf.control_dependencies(ops): # Make sure the ops run. + # Ensure the loss is a scalar here. + total_loss = tf.reshape(total_loss, [], name="total_loss_control_id") + return [total_loss] + sharded_logits # Need to flatten for cond later. + + result_list = input_fn_builder.cond_on_index(nth_model, + features["problem_choice"], 0, + len(my_hp.problems) - 1) + + if mode == tf.contrib.learn.ModeKeys.INFER: + # Beam search in sequence model returns both decodes withe key "outputs" + # and scores with they key "scores". If return list is a dict, we expect + # that it will have keys "outputs", a tensor of int32 and scores, a + # tensor of floats. This is useful if we want to return scores from + # estimator.predict + if not isinstance(result_list, dict): + ret = {"outputs": result_list}, None, None + else: + ret = { + "outputs": result_list["outputs"], + "scores": result_list["scores"] + }, None, None + if "inputs" in features: + ret[0]["inputs"] = features["inputs"] + if "infer_targets" in features: + ret[0]["targets"] = features["infer_targets"] + return ret + + sharded_logits, total_loss = result_list[1:], result_list[0] + if mode == tf.contrib.learn.ModeKeys.EVAL: + logits = tf.concat(sharded_logits, 0) + if FLAGS.eval_print: + logits = tf.Print( + logits, [features["inputs"], logits], "EVAL PRINT", summarize=10000) + # For evaluation, return the logits layer as our predictions. + run_info["predictions"] = logits + train_op = None + return run_info, total_loss, None + + assert mode == tf.contrib.learn.ModeKeys.TRAIN + + # Some training statistics. + with tf.name_scope("training_stats"): + learning_rate = my_hp.learning_rate * learning_rate_decay() + learning_rate /= math.sqrt(float(FLAGS.worker_replicas)) + tf.summary.scalar("learning_rate", learning_rate) + global_step = tf.to_float(tf.contrib.framework.get_global_step()) + for n in xrange(len(my_hp.problems)): + with tf.variable_scope("losses_avg", reuse=True): + total_loss_var = tf.get_variable("problem_%d/total_loss" % n) + training_loss_var = tf.get_variable("problem_%d/training_loss" % n) + extra_loss_var = tf.get_variable("problem_%d/extra_loss" % n) + tf.summary.scalar("loss_avg_%d/total_loss" % n, total_loss_var) + tf.summary.scalar("loss_avg_%d/training_loss" % n, training_loss_var) + tf.summary.scalar("loss_avg_%d/extra_loss" % n, extra_loss_var) + with tf.variable_scope("train_stats", reuse=True): + nth_steps = tf.get_variable("problem_%d_steps" % n, dtype=tf.int32) + tf.summary.scalar("problem_%d_frequency" % n, + tf.to_float(nth_steps) / (global_step + 1.0)) + + # Log trainable weights and add decay. + total_size, weight_decay_loss = 0, 0.0 + all_weights = {v.name: v for v in tf.trainable_variables()} + for v_name in sorted(list(all_weights)): + v = all_weights[v_name] + v_size = int(np.prod(np.array(v.shape.as_list()))) + tf.logging.info("Weight %s\tshape %s\tsize %d", + v.name[:-2].ljust(80), str(v.shape).ljust(20), v_size) + total_size += v_size + if my_hp.weight_decay > 0.0 and len(v.shape.as_list()) > 1: + # Add weight regularization if set and the weight is not a bias (dim>1). + with tf.device(v._ref().device): # pylint: disable=protected-access + v_loss = tf.nn.l2_loss(v) / v_size + weight_decay_loss += v_loss + is_body = len(v_name) > 5 and v_name[:5] == "body/" + if my_hp.weight_noise > 0.0 and is_body: + # Add weight noise if set in my_hp. + with tf.device(v._ref().device): # pylint: disable=protected-access + scale = learning_rate * 0.001 + noise = tf.truncated_normal(v.shape) * my_hp.weight_noise * scale + noise_op = v.assign_add(noise) + with tf.control_dependencies([noise_op]): + total_loss = tf.identity(total_loss) + tf.logging.info("Total trainable variables size: %d", total_size) + if my_hp.weight_decay > 0.0: + total_loss += weight_decay_loss * my_hp.weight_decay + total_loss = tf.identity(total_loss, name="total_loss") + + # Define the train_op for the TRAIN mode. + opt = _ConditionalOptimizer(my_hp.optimizer, learning_rate, my_hp) + tf.logging.info("Computing gradients for global model_fn.") + opt_summaries = ["learning_rate", "loss"] + if hparams.summarize_grads: + opt_summaries.extend(["gradients", "gradient_norm"]) + train_op = tf.contrib.layers.optimize_loss( + name="training", + loss=total_loss, + global_step=tf.contrib.framework.get_global_step(), + learning_rate=learning_rate, + clip_gradients=my_hp.clip_grad_norm or None, + gradient_noise_scale=hparams.grad_noise_scale or None, + optimizer=opt, + summaries=opt_summaries, + colocate_gradients_with_ops=True) + + # Remove summaries that will fail to run because they are in conditionals. + # TODO(cwhipkey): Test with this code removed, later in 2017. + summaries = tf.get_collection_ref(tf.GraphKeys.SUMMARIES) + for i in range(len(summaries) - 1, -1, -1): + if summaries[i].name.startswith("cond_"): + del summaries[i] + + tf.logging.info("Global model_fn finished.") + return run_info, total_loss, train_op + + return model_fn + + +class _ConditionalOptimizer(tf.train.Optimizer): + """Conditional optimizer.""" + + def __init__(self, optimizer_name, lr, hparams): + if optimizer_name == "Adam": + # We change the default epsilon for Adam and re-scale lr. + # Using LazyAdam as it's much faster for large vocabulary embeddings. + self._opt = tf.contrib.opt.LazyAdamOptimizer( + lr / 500.0, + beta1=hparams.optimizer_adam_beta1, + beta2=hparams.optimizer_adam_beta2, + epsilon=hparams.optimizer_adam_epsilon) + elif optimizer_name == "Momentum": + self._opt = tf.train.MomentumOptimizer( + lr, momentum=hparams.optimizer_momentum_momentum) + elif optimizer_name == "YellowFin": + tf.logging.info("Init YellowFin Optimizer.") + self._opt = yellowfin.YellowFinOptimizer( + learning_rate=lr, momentum=hparams.optimizer_momentum_momentum) + else: + self._opt = tf.contrib.layers.OPTIMIZER_CLS_NAMES[optimizer_name](lr) + + def compute_gradients(self, loss, var_list, colocate_gradients_with_ops): + return self._opt.compute_gradients( + loss, var_list, colocate_gradients_with_ops=colocate_gradients_with_ops) + + def apply_gradients(self, gradients, global_step=None, name=None): + return self._opt.apply_gradients( + gradients, global_step=global_step, name=name) + + +def _sqrt_decay(step): + """Decay like 1 / sqrt(step), multiplied by 500 to normalize.""" + return 500.0 / tf.sqrt(tf.maximum(step, 1.0)) + + +def _exp_decay_after(step, rate, from_which_step): + """Decay exponentially by rate (per step) starting at from_which_step.""" + return tf.cond( + step < from_which_step, + lambda: tf.constant(1.0), + lambda: rate**(step - from_which_step), + name="exponential_decay_step_cond") + + +def _interactive_input_tensor_to_features_dict(feature_map, hparams): + """Convert the interactive input format (see above) to a dictionary. + + Args: + feature_map: a dictionary with keys `problem_choice` and `input` containing + Tensors. + hparams: model hyperparameters + + Returns: + a features dictionary, as expected by the decoder. + """ + inputs = tf.constant(feature_map["inputs"]) + input_is_image = False if len(inputs.shape) < 3 else True + + def input_fn(problem_choice, x=inputs): # pylint: disable=missing-docstring + p_hparams = hparams.problems[problem_choice] + if not input_is_image: + # Remove the batch dimension. + num_samples = x[0] + length = x[2] + x = tf.slice(x, [3], tf.to_int32([length])) + x = tf.reshape(x, [1, -1, 1, 1]) + # Transform into a batch of size num_samples to get that many random + # decodes. + x = tf.tile(x, tf.to_int32([num_samples, 1, 1, 1])) + else: + x = tf.image.resize_images(x, [299, 299]) + x = tf.reshape(x, [1, 299, 299, -1]) + x = tf.to_int32(x) + return (tf.constant(p_hparams.input_space_id), + tf.constant(p_hparams.target_space_id), x) + + input_space_id, target_space_id, x = input_fn_builder.cond_on_index( + input_fn, feature_map["problem_choice"], 0, len(hparams.problems) - 1) + + features = {} + features["problem_choice"] = tf.constant(feature_map["problem_choice"]) + features["input_space_id"] = input_space_id + features["target_space_id"] = target_space_id + features["decode_length"] = (IMAGE_DECODE_LENGTH + if input_is_image else inputs[1]) + features["inputs"] = x + return features + + +def _decode_input_tensor_to_features_dict(feature_map, hparams): + """Convert the interactive input format (see above) to a dictionary. + + Args: + feature_map: a dictionary with keys `problem_choice` and `input` containing + Tensors. + hparams: model hyperparameters + + Returns: + a features dictionary, as expected by the decoder. + """ + inputs = tf.constant(feature_map["inputs"]) + input_is_image = False + + def input_fn(problem_choice, x=inputs): # pylint: disable=missing-docstring + p_hparams = hparams.problems[problem_choice] + # Add a third empty dimension dimension + x = tf.expand_dims(x, axis=[2]) + x = tf.to_int32(x) + return (tf.constant(p_hparams.input_space_id), + tf.constant(p_hparams.target_space_id), x) + + input_space_id, target_space_id, x = input_fn_builder.cond_on_index( + input_fn, feature_map["problem_choice"], 0, len(hparams.problems) - 1) + + features = {} + features["problem_choice"] = feature_map["problem_choice"] + features["input_space_id"] = input_space_id + features["target_space_id"] = target_space_id + features["decode_length"] = (IMAGE_DECODE_LENGTH + if input_is_image else tf.shape(x)[1] + 50) + features["inputs"] = x + return features diff --git a/tensor2tensor/utils/registry.py b/tensor2tensor/utils/registry.py index 9d5e1e0a6..5402e5bde 100644 --- a/tensor2tensor/utils/registry.py +++ b/tensor2tensor/utils/registry.py @@ -24,7 +24,7 @@ class MyModel(T2TModel): ``` Access by snake-cased name: `registry.model("my_model")`. If you're using -`trainer.py`, you can pass on the command-line: `--model=my_model`. +`t2t_trainer.py`, you can pass on the command-line: `--model=my_model`. See all the models registered: `registry.list_models()`. @@ -32,13 +32,13 @@ class MyModel(T2TModel): * Register: `registry.register_hparams` * List: `registry.list_hparams` * Retrieve by name: `registry.hparams` - * Command-line flag in `trainer.py`: `--hparams_set=name` + * Command-line flag in `t2t_trainer.py`: `--hparams_set=name` For hyperparameter ranges: * Register: `registry.register_ranged_hparams` * List: `registry.list_ranged_hparams` * Retrieve by name: `registry.ranged_hparams` - * Command-line flag in `trainer.py`: `--hparams_range=name` + * Command-line flag in `t2t_trainer.py`: `--hparams_range=name` """ from __future__ import absolute_import from __future__ import division diff --git a/tensor2tensor/utils/trainer_utils.py b/tensor2tensor/utils/trainer_utils.py index 5c0240e16..c5f3296ee 100644 --- a/tensor2tensor/utils/trainer_utils.py +++ b/tensor2tensor/utils/trainer_utils.py @@ -19,38 +19,24 @@ from __future__ import division from __future__ import print_function -import copy -import math -import operator -import os import sys # Dependency imports -import numpy as np -import six -# pylint: disable=redefined-builtin -from six.moves import input -from six.moves import xrange -# pylint: enable=redefined-builtin - from tensor2tensor.data_generators import all_problems # pylint: disable=unused-import from tensor2tensor.data_generators import problem_hparams -from tensor2tensor.data_generators import text_encoder from tensor2tensor.models import models # pylint: disable=unused-import from tensor2tensor.utils import data_reader -from tensor2tensor.utils import expert_utils as eu +from tensor2tensor.utils import decoding +from tensor2tensor.utils import devices +from tensor2tensor.utils import input_fn_builder from tensor2tensor.utils import metrics +from tensor2tensor.utils import model_builder from tensor2tensor.utils import registry -from tensor2tensor.utils import yellowfin import tensorflow as tf from tensorflow.contrib.learn.python.learn import learn_runner from tensorflow.python import debug -from tensorflow.python.ops import init_ops - -# Number of samples to draw for an image input (in such cases as captioning) -IMAGE_DECODE_LENGTH = 100 flags = tf.flags FLAGS = flags.FLAGS @@ -134,16 +120,6 @@ flags.DEFINE_bool("identity_output", False, "To print the output as identity") -def _save_until_eos(hyp): - """Strips everything after the first <EOS> token, which is normally 1.""" - try: - index = list(hyp).index(text_encoder.EOS_ID) - return hyp[0:index] - except ValueError: - # No EOS_ID: return the array as-is. - return hyp - - def make_experiment_fn(data_dir, model_name, train_steps, eval_steps): """Returns experiment_fn for learn_runner. Wraps create_experiment.""" @@ -195,22 +171,22 @@ def create_experiment_components(hparams, output_dir, data_dir, model_name): """Constructs and returns Estimator and train/eval input functions.""" tf.logging.info("Creating experiment, storing model files in %s", output_dir) - num_datashards = data_parallelism().n - train_input_fn = get_input_fn( + num_datashards = devices.data_parallelism().n + train_input_fn = input_fn_builder.build_input_fn( mode=tf.contrib.learn.ModeKeys.TRAIN, hparams=hparams, data_file_patterns=get_data_filepatterns(data_dir, tf.contrib.learn.ModeKeys.TRAIN), num_datashards=num_datashards) - eval_input_fn = get_input_fn( + eval_input_fn = input_fn_builder.build_input_fn( mode=tf.contrib.learn.ModeKeys.EVAL, hparams=hparams, data_file_patterns=get_data_filepatterns(data_dir, tf.contrib.learn.ModeKeys.EVAL), num_datashards=num_datashards) estimator = tf.contrib.learn.Estimator( - model_fn=model_builder(model_name, hparams=hparams), + model_fn=model_builder.build_model_fn(model_name, hparams=hparams), model_dir=output_dir, config=tf.contrib.learn.RunConfig( master=FLAGS.master, @@ -222,7 +198,8 @@ def create_experiment_components(hparams, output_dir, data_dir, model_name): estimator.hparams = hparams return estimator, { tf.contrib.learn.ModeKeys.TRAIN: train_input_fn, - tf.contrib.learn.ModeKeys.EVAL: eval_input_fn} + tf.contrib.learn.ModeKeys.EVAL: eval_input_fn + } def log_registry(): @@ -297,7 +274,11 @@ def run(data_dir, model, output_dir, train_steps, eval_steps, schedule): if schedule == "local_run": # Run the local demo. - run_locally(exp_fn(output_dir)) + exp = exp_fn(output_dir) + if exp.train_steps > 0 or exp.eval_steps > 0: + tf.logging.info("Performing local training and evaluation.") + exp.train_and_evaluate() + decode(exp.estimator) else: # Perform distributed training/evaluation. learn_runner.run( @@ -342,1040 +323,14 @@ def session_config(): return config -def model_builder(model, hparams): - """Returns a function to build the model. - - Args: - model: The name of the model to use. - hparams: The hyperparameters. - - Returns: - A function to build the model's graph. This function is called by - the Estimator object to construct the graph. - """ - - def initializer(): - if hparams.initializer == "orthogonal": - return tf.orthogonal_initializer(gain=hparams.initializer_gain) - elif hparams.initializer == "uniform": - max_val = 0.1 * hparams.initializer_gain - return tf.random_uniform_initializer(-max_val, max_val) - elif hparams.initializer == "normal_unit_scaling": - return init_ops.variance_scaling_initializer( - hparams.initializer_gain, mode="fan_avg", distribution="normal") - elif hparams.initializer == "uniform_unit_scaling": - return init_ops.variance_scaling_initializer( - hparams.initializer_gain, mode="fan_avg", distribution="uniform") - else: - raise ValueError("Unrecognized initializer: %s" % hparams.initializer) - - def learning_rate_decay(): - """Inverse-decay learning rate until warmup_steps, then decay.""" - warmup_steps = tf.to_float( - hparams.learning_rate_warmup_steps * FLAGS.worker_replicas) - step = tf.to_float(tf.contrib.framework.get_global_step()) - if hparams.learning_rate_decay_scheme == "noam": - return 5000.0 * hparams.hidden_size**-0.5 * tf.minimum( - (step + 1) * warmup_steps**-1.5, (step + 1)**-0.5) - elif hparams.learning_rate_decay_scheme == "exp100k": - return 0.94**(step // 100000) - elif hparams.learning_rate_decay_scheme == "cosine": - cycle_steps = hparams.learning_rate_cosine_cycle_steps - return 0.5 * (1 + tf.cos(np.pi * (step % cycle_steps) / cycle_steps)) - - inv_base = tf.exp(tf.log(0.01) / warmup_steps) - inv_decay = inv_base**(warmup_steps - step) - if hparams.learning_rate_decay_scheme == "sqrt": - decay = _sqrt_decay(step - warmup_steps) - elif hparams.learning_rate_decay_scheme == "exp10k": - decay = _exp_decay_after(step - warmup_steps, 0.9995, - FLAGS.train_steps - warmup_steps - 10000) - elif hparams.learning_rate_decay_scheme == "exp50k": - decay = _exp_decay_after(step - warmup_steps, 0.99995, - FLAGS.train_steps - warmup_steps - 50000) - elif hparams.learning_rate_decay_scheme == "exp500k": - decay = _exp_decay_after(step - warmup_steps, 0.9999955, - FLAGS.train_steps - warmup_steps - 500000) - elif hparams.learning_rate_decay_scheme == "none": - decay = tf.constant(1.0) - else: - raise ValueError("Unrecognized learning rate decay scheme: %s" % - hparams.learning_rate_decay_scheme) - return tf.cond( - step < warmup_steps, - lambda: inv_decay, - lambda: decay, - name="learning_rate_decay_warump_cond") - - def model_fn(features, targets, mode): - """Creates the prediction, loss, and train ops. - - Args: - features: A dictionary of tensors keyed by the feature name. - targets: A tensor representing the labels (targets). - mode: The execution mode, as defined in tf.contrib.learn.ModeKeys. - - Returns: - A tuple consisting of the prediction, loss, and train_op. - """ - # Deep-copy the model hparams between modes to eliminate - # side-effects caused by abuse of the linked problem_hparams - # objects which are used to share modality objects between - # problems. We do not want to share the modality objects between - # modes, since the modality objects may decide to do something - # mode-specific. A better fix would be to stop abusing the - # hparams in this way and instead use a separate dictionary to - # share the modality objects between problems. This dictionary - # could be created once per mode and passed to the constructor of - # t2t_model. - my_hp = copy.deepcopy(hparams) - if mode == tf.contrib.learn.ModeKeys.INFER: - if FLAGS.decode_interactive: - features = _interactive_input_tensor_to_features_dict(features, my_hp) - elif FLAGS.decode_from_file: - features = _decode_input_tensor_to_features_dict(features, my_hp) - # A dictionary containing: - # - problem_choice: A Tensor containing an integer indicating which problem - # was selected for this run. - # - predictions: A Tensor containing the model's output predictions. - run_info = dict() - run_info["problem_choice"] = features["problem_choice"] - - if targets is not None: - features["targets"] = targets - - dp = data_parallelism() - - # Add input statistics for incoming features. - with tf.name_scope("input_stats"): - for (k, v) in six.iteritems(features): - if isinstance(v, tf.Tensor) and v.get_shape().ndims > 1: - tf.summary.scalar("%s_batch" % k, tf.shape(v)[0] // dp.n) - tf.summary.scalar("%s_length" % k, tf.shape(v)[1]) - nonpadding = tf.to_float(tf.not_equal(v, 0)) - tf.summary.scalar("%s_nonpadding_tokens" % k, - tf.reduce_sum(nonpadding)) - tf.summary.scalar("%s_nonpadding_fraction" % k, - tf.reduce_mean(nonpadding)) - - tf.get_variable_scope().set_initializer(initializer()) - train = mode == tf.contrib.learn.ModeKeys.TRAIN - - # Get multi-problem logits and loss based on features["problem_choice"]. - def nth_model(n): - """Build the model for the n-th problem, plus some added variables.""" - model_class = registry.model(model)( - my_hp, - mode, - my_hp.problems[n], - n, - dp, - _ps_devices(all_workers=True)) - if mode == tf.contrib.learn.ModeKeys.INFER: - return model_class.infer( - features, - beam_size=FLAGS.decode_beam_size, - top_beams=(FLAGS.decode_beam_size - if FLAGS.decode_return_beams else 1), - last_position_only=FLAGS.decode_use_last_position_only, - alpha=FLAGS.decode_alpha, - decode_length=FLAGS.decode_extra_length) - # In distributed mode, we build graph for problem=0 and problem=worker_id. - skipping_is_on = my_hp.problem_choice == "distributed" and train - problem_worker_id = FLAGS.worker_id % len(my_hp.problems) - skip_this_one = n != 0 and n % FLAGS.worker_replicas != problem_worker_id - # On worker 0 also build graph for problems <= 1. - # TODO(lukaszkaiser): why is this hack needed for variables init? Repair. - skip_this_one = skip_this_one and (FLAGS.worker_id != 0 or n > 1) - sharded_logits, losses_dict = model_class.model_fn( - features, skip=(skipping_is_on and skip_this_one)) - with tf.variable_scope("losses_avg", reuse=True): - total_loss, ops = 0.0, [] - for loss_key, loss_value in losses_dict.iteritems(): - loss_moving_avg = tf.get_variable("problem_%d/%s_loss" - % (n, loss_key)) - ops.append(loss_moving_avg.assign( - loss_moving_avg * 0.9 + loss_value * 0.1)) - total_loss += loss_value - loss_moving_avg = tf.get_variable("problem_%d/total_loss" % n) - ops.append(loss_moving_avg.assign( - loss_moving_avg * 0.9 + total_loss * 0.1)) - with tf.variable_scope("train_stats"): # Count steps for this problem. - problem_steps = tf.get_variable( - "problem_%d_steps" % n, initializer=0, trainable=False) - ops.append(problem_steps.assign_add(1)) - with tf.control_dependencies(ops): # Make sure the ops run. - # Ensure the loss is a scalar here. - total_loss = tf.reshape(total_loss, [], name="total_loss_control_id") - return [total_loss] + sharded_logits # Need to flatten for cond later. - - result_list = _cond_on_index(nth_model, features["problem_choice"], 0, - len(my_hp.problems) - 1) - - if mode == tf.contrib.learn.ModeKeys.INFER: - # Beam search in sequence model returns both decodes withe key "outputs" - # and scores with they key "scores". If return list is a dict, we expect - # that it will have keys "outputs", a tensor of int32 and scores, a - # tensor of floats. This is useful if we want to return scores from - # estimator.predict - if not isinstance(result_list, dict): - ret = {"outputs": result_list}, None, None - else: - ret = { - "outputs": result_list["outputs"], - "scores": result_list["scores"] - }, None, None - if "inputs" in features: - ret[0]["inputs"] = features["inputs"] - if "infer_targets" in features: - ret[0]["targets"] = features["infer_targets"] - return ret - - sharded_logits, total_loss = result_list[1:], result_list[0] - if mode == tf.contrib.learn.ModeKeys.EVAL: - logits = tf.concat(sharded_logits, 0) - if FLAGS.eval_print: - logits = tf.Print( - logits, [features["inputs"], logits], "EVAL PRINT", summarize=10000) - # For evaluation, return the logits layer as our predictions. - run_info["predictions"] = logits - train_op = None - return run_info, total_loss, None - - assert mode == tf.contrib.learn.ModeKeys.TRAIN - - # Some training statistics. - with tf.name_scope("training_stats"): - learning_rate = my_hp.learning_rate * learning_rate_decay() - learning_rate /= math.sqrt(float(FLAGS.worker_replicas)) - tf.summary.scalar("learning_rate", learning_rate) - global_step = tf.to_float(tf.contrib.framework.get_global_step()) - for n in xrange(len(my_hp.problems)): - with tf.variable_scope("losses_avg", reuse=True): - total_loss_var = tf.get_variable("problem_%d/total_loss" % n) - training_loss_var = tf.get_variable("problem_%d/training_loss" % n) - extra_loss_var = tf.get_variable("problem_%d/extra_loss" % n) - tf.summary.scalar("loss_avg_%d/total_loss" % n, total_loss_var) - tf.summary.scalar("loss_avg_%d/training_loss" % n, training_loss_var) - tf.summary.scalar("loss_avg_%d/extra_loss" % n, extra_loss_var) - with tf.variable_scope("train_stats", reuse=True): - nth_steps = tf.get_variable("problem_%d_steps" % n, dtype=tf.int32) - tf.summary.scalar("problem_%d_frequency" % n, - tf.to_float(nth_steps) / (global_step + 1.0)) - - # Log trainable weights and add decay. - total_size, weight_decay_loss = 0, 0.0 - all_weights = {v.name: v for v in tf.trainable_variables()} - for v_name in sorted(list(all_weights)): - v = all_weights[v_name] - v_size = int(np.prod(np.array(v.shape.as_list()))) - tf.logging.info("Weight %s\tshape %s\tsize %d", - v.name[:-2].ljust(80), str(v.shape).ljust(20), v_size) - total_size += v_size - if my_hp.weight_decay > 0.0 and len(v.shape.as_list()) > 1: - # Add weight regularization if set and the weight is not a bias (dim>1). - with tf.device(v._ref().device): # pylint: disable=protected-access - v_loss = tf.nn.l2_loss(v) / v_size - weight_decay_loss += v_loss - is_body = len(v_name) > 5 and v_name[:5] == "body/" - if my_hp.weight_noise > 0.0 and is_body: - # Add weight noise if set in my_hp. - with tf.device(v._ref().device): # pylint: disable=protected-access - scale = learning_rate * 0.001 - noise = tf.truncated_normal(v.shape) * my_hp.weight_noise * scale - noise_op = v.assign_add(noise) - with tf.control_dependencies([noise_op]): - total_loss = tf.identity(total_loss) - tf.logging.info("Total trainable variables size: %d", total_size) - if my_hp.weight_decay > 0.0: - total_loss += weight_decay_loss * my_hp.weight_decay - total_loss = tf.identity(total_loss, name="total_loss") - - # Define the train_op for the TRAIN mode. - opt = _ConditionalOptimizer(my_hp.optimizer, learning_rate, my_hp) - tf.logging.info("Computing gradients for global model_fn.") - opt_summaries = ["learning_rate", "loss"] - if hparams.summarize_grads: - opt_summaries.extend(["gradients", "gradient_norm"]) - train_op = tf.contrib.layers.optimize_loss( - name="training", - loss=total_loss, - global_step=tf.contrib.framework.get_global_step(), - learning_rate=learning_rate, - clip_gradients=my_hp.clip_grad_norm or None, - gradient_noise_scale=hparams.grad_noise_scale or None, - optimizer=opt, - summaries=opt_summaries, - colocate_gradients_with_ops=True) - - # Remove summaries that will fail to run because they are in conditionals. - # TODO(cwhipkey): Test with this code removed, later in 2017. - summaries = tf.get_collection_ref(tf.GraphKeys.SUMMARIES) - for i in range(len(summaries) - 1, -1, -1): - if summaries[i].name.startswith("cond_"): - del summaries[i] - - tf.logging.info("Global model_fn finished.") - return run_info, total_loss, train_op - - return model_fn - - -def run_locally(exp): - """Runs an Experiment locally - trains, evaluates, and decodes. - - Args: - exp: Experiment. - """ - if exp.train_steps > 0 or exp.eval_steps > 0: - tf.logging.info("Performing local training and evaluation.") - exp.train_and_evaluate() - decode(exp.estimator) +def get_data_filepatterns(data_dir, mode): + return data_reader.get_data_filepatterns(FLAGS.problems, data_dir, mode) def decode(estimator): if FLAGS.decode_interactive: - decode_interactively(estimator) + decoding.decode_interactively(estimator) elif FLAGS.decode_from_file is not None: - decode_from_file(estimator, FLAGS.decode_from_file) + decoding.decode_from_file(estimator, FLAGS.decode_from_file) elif FLAGS.decode_from_dataset: - decode_from_dataset(estimator) - - -def decode_from_dataset(estimator): - hparams = estimator.hparams - for i, problem in enumerate(FLAGS.problems.split("-")): - inputs_vocab = hparams.problems[i].vocabulary.get("inputs", None) - targets_vocab = hparams.problems[i].vocabulary["targets"] - tf.logging.info("Performing local inference.") - infer_problems_data = get_data_filepatterns(hparams.data_dir, - tf.contrib.learn.ModeKeys.INFER) - - infer_input_fn = get_input_fn( - mode=tf.contrib.learn.ModeKeys.INFER, - hparams=hparams, - data_file_patterns=infer_problems_data, - num_datashards=data_parallelism().n, - fixed_problem=i) - result_iter = estimator.predict(input_fn=infer_input_fn, as_iterable=False) - - def log_fn(inputs, - targets, - outputs, - problem, - j, - inputs_vocab=inputs_vocab, - targets_vocab=targets_vocab): - """Log inference results.""" - if "image" in problem and FLAGS.decode_save_images: - save_path = os.path.join(estimator.model_dir, - "%s_prediction_%d.jpg" % (problem, j)) - show_and_save_image(inputs / 255., save_path) - elif inputs_vocab: - decoded_inputs = inputs_vocab.decode(_save_until_eos(inputs.flatten())) - tf.logging.info("Inference results INPUT: %s" % decoded_inputs) - - decoded_outputs = targets_vocab.decode(_save_until_eos(outputs.flatten())) - tf.logging.info("Inference results OUTPUT: %s" % decoded_outputs) - decoded_targets = targets_vocab.decode(_save_until_eos(targets.flatten())) - tf.logging.info("Inference results TARGET: %s" % decoded_targets) - - if FLAGS.decode_to_file: - output_filepath = FLAGS.decode_to_file + ".outputs." + problem - output_file = tf.gfile.Open(output_filepath, "a") - output_file.write(decoded_outputs + "\n") - target_filepath = FLAGS.decode_to_file + ".targets." + problem - target_file = tf.gfile.Open(target_filepath, "a") - target_file.write(decoded_targets + "\n") - - # The function predict() returns an iterable over the network's - # predictions from the test input. We use it to log inputs and decodes. - inputs_iter = result_iter["inputs"] - targets_iter = result_iter["targets"] - outputs_iter = result_iter["outputs"] - for j, result in enumerate(zip(inputs_iter, targets_iter, outputs_iter)): - inputs, targets, outputs = result - if FLAGS.decode_return_beams: - output_beams = np.split(outputs, FLAGS.decode_beam_size, axis=0) - for k, beam in enumerate(output_beams): - tf.logging.info("BEAM %d:" % k) - log_fn(inputs, targets, beam, problem, j) - else: - log_fn(inputs, targets, outputs, problem, j) - - -def decode_from_file(estimator, filename): - """Compute predictions on entries in filename and write them out.""" - hparams = estimator.hparams - problem_id = FLAGS.decode_problem_id - inputs_vocab = hparams.problems[problem_id].vocabulary["inputs"] - targets_vocab = hparams.problems[problem_id].vocabulary["targets"] - tf.logging.info("Performing decoding from a file.") - sorted_inputs, sorted_keys = _get_sorted_inputs(filename) - num_decode_batches = (len(sorted_inputs) - 1) // FLAGS.decode_batch_size + 1 - input_fn = _decode_batch_input_fn(problem_id, num_decode_batches, - sorted_inputs, inputs_vocab) - - decodes = [] - for _ in range(num_decode_batches): - result_iter = estimator.predict( - input_fn=input_fn.next if six.PY2 else input_fn.__next__, - as_iterable=True) - for result in result_iter: - - def log_fn(inputs, outputs): - decoded_inputs = inputs_vocab.decode(_save_until_eos(inputs.flatten())) - tf.logging.info("Inference results INPUT: %s" % decoded_inputs) - - decoded_outputs = targets_vocab.decode( - _save_until_eos(outputs.flatten())) - tf.logging.info("Inference results OUTPUT: %s" % decoded_outputs) - return decoded_outputs - - if FLAGS.decode_return_beams: - beam_decodes = [] - output_beams = np.split( - result["outputs"], FLAGS.decode_beam_size, axis=0) - for k, beam in enumerate(output_beams): - tf.logging.info("BEAM %d:" % k) - beam_decodes.append(log_fn(result["inputs"], beam)) - decodes.append("\t".join(beam_decodes)) - - else: - decodes.append(log_fn(result["inputs"], result["outputs"])) - - # Reversing the decoded inputs and outputs because they were reversed in - # _decode_batch_input_fn - sorted_inputs.reverse() - decodes.reverse() - # Dumping inputs and outputs to file filename.decodes in - # format result\tinput in the same order as original inputs - if FLAGS.decode_to_file: - output_filename = FLAGS.decode_to_file - else: - output_filename = filename - if FLAGS.decode_shards > 1: - base_filename = output_filename + ("%.2d" % FLAGS.worker_id) - else: - base_filename = output_filename - decode_filename = (base_filename + "." + FLAGS.model + "." + FLAGS.hparams_set - + ".beam" + str(FLAGS.decode_beam_size) + ".alpha" + - str(FLAGS.decode_alpha) + ".decodes") - tf.logging.info("Writing decodes into %s" % decode_filename) - outfile = tf.gfile.Open(decode_filename, "w") - for index in range(len(sorted_inputs)): - outfile.write("%s\n" % (decodes[sorted_keys[index]])) - - -def decode_interactively(estimator): - hparams = estimator.hparams - - infer_input_fn = _interactive_input_fn(hparams) - for problem_idx, example in infer_input_fn: - targets_vocab = hparams.problems[problem_idx].vocabulary["targets"] - result_iter = estimator.predict(input_fn=lambda e=example: e) - for result in result_iter: - if FLAGS.decode_return_beams: - beams = np.split(result["outputs"], FLAGS.decode_beam_size, axis=0) - scores = None - if "scores" in result: - scores = np.split(result["scores"], FLAGS.decode_beam_size, axis=0) - for k, beam in enumerate(beams): - tf.logging.info("BEAM %d:" % k) - beam_string = targets_vocab.decode(_save_until_eos(beam.flatten())) - if scores is not None: - tf.logging.info("%s\tScore:%f" % (beam_string, scores[k])) - else: - tf.logging.info(beam_string) - else: - if FLAGS.identity_output: - tf.logging.info(" ".join(map(str, result["outputs"].flatten()))) - else: - tf.logging.info(targets_vocab.decode(_save_until_eos( - result["outputs"].flatten()))) - - -def _decode_batch_input_fn(problem_id, num_decode_batches, sorted_inputs, - vocabulary): - tf.logging.info(" batch %d" % num_decode_batches) - # First reverse all the input sentences so that if you're going to get OOMs, - # you'll see it in the first batch - sorted_inputs.reverse() - for b in range(num_decode_batches): - tf.logging.info("Decoding batch %d" % b) - batch_length = 0 - batch_inputs = [] - for inputs in sorted_inputs[b * FLAGS.decode_batch_size:( - b + 1) * FLAGS.decode_batch_size]: - input_ids = vocabulary.encode(inputs) - if FLAGS.decode_max_input_size > 0: - # Subtract 1 for the EOS_ID. - input_ids = input_ids[:FLAGS.decode_max_input_size - 1] - input_ids.append(text_encoder.EOS_ID) - batch_inputs.append(input_ids) - if len(input_ids) > batch_length: - batch_length = len(input_ids) - final_batch_inputs = [] - for input_ids in batch_inputs: - assert len(input_ids) <= batch_length - x = input_ids + [0] * (batch_length - len(input_ids)) - final_batch_inputs.append(x) - yield { - "inputs": np.array(final_batch_inputs), - "problem_choice": np.array(problem_id) - } - - -def get_data_filepatterns(data_dir, mode): - return data_reader.get_data_filepatterns(FLAGS.problems, data_dir, mode) - - -def _cond_on_index(fn, index_tensor, cur_idx, max_idx): - """Call fn(index_tensor) using tf.cond in [cur_id, max_idx].""" - if cur_idx == max_idx: - return fn(cur_idx) - return tf.cond( - tf.equal(index_tensor, cur_idx), lambda: fn(cur_idx), - lambda: _cond_on_index(fn, index_tensor, cur_idx + 1, max_idx)) - - -def _interactive_input_fn(hparams): - """Generator that reads from the terminal and yields "interactive inputs". - - Due to temporary limitations in tf.learn, if we don't want to reload the - whole graph, then we are stuck encoding all of the input as one fixed-size - numpy array. - - We yield int64 arrays with shape [const_array_size]. The format is: - [num_samples, decode_length, len(input ids), <input ids>, <padding>] - - Args: - hparams: model hparams - Yields: - numpy arrays - - Raises: - Exception: when `input_type` is invalid. - """ - num_samples = 3 - decode_length = 100 - input_type = "text" - problem_id = 0 - p_hparams = hparams.problems[problem_id] - has_input = "inputs" in p_hparams.input_modality - vocabulary = p_hparams.vocabulary["inputs" if has_input else "targets"] - # This should be longer than the longest input. - const_array_size = 10000 - while True: - prompt = ("INTERACTIVE MODE num_samples=%d decode_length=%d \n" - " it=<input_type> ('text' or 'image' or 'label')\n" - " pr=<problem_num> (set the problem number)\n" - " in=<input_problem> (set the input problem number)\n" - " ou=<output_problem> (set the output problem number)\n" - " ns=<num_samples> (changes number of samples)\n" - " dl=<decode_length> (changes decode legnth)\n" - " <%s> (decode)\n" - " q (quit)\n" - ">" % (num_samples, decode_length, "source_string" - if has_input else "target_prefix")) - input_string = input(prompt) - if input_string == "q": - return - elif input_string[:3] == "pr=": - problem_id = int(input_string[3:]) - p_hparams = hparams.problems[problem_id] - has_input = "inputs" in p_hparams.input_modality - vocabulary = p_hparams.vocabulary["inputs" if has_input else "targets"] - elif input_string[:3] == "in=": - problem = int(input_string[3:]) - p_hparams.input_modality = hparams.problems[problem].input_modality - p_hparams.input_space_id = hparams.problems[problem].input_space_id - elif input_string[:3] == "ou=": - problem = int(input_string[3:]) - p_hparams.target_modality = hparams.problems[problem].target_modality - p_hparams.target_space_id = hparams.problems[problem].target_space_id - elif input_string[:3] == "ns=": - num_samples = int(input_string[3:]) - elif input_string[:3] == "dl=": - decode_length = int(input_string[3:]) - elif input_string[:3] == "it=": - input_type = input_string[3:] - else: - if input_type == "text": - input_ids = vocabulary.encode(input_string) - if has_input: - input_ids.append(text_encoder.EOS_ID) - x = [num_samples, decode_length, len(input_ids)] + input_ids - assert len(x) < const_array_size - x += [0] * (const_array_size - len(x)) - yield problem_id, { - "inputs": np.array(x), - "problem_choice": np.array(problem_id) - } - elif input_type == "image": - input_path = input_string - img = read_image(input_path) - yield problem_id, { - "inputs": img, - "problem_choice": np.array(problem_id) - } - elif input_type == "label": - input_ids = [int(input_string)] - x = [num_samples, decode_length, len(input_ids)] + input_ids - yield problem_id, { - "inputs": np.array(x), - "problem_choice": np.array(problem_id) - } - else: - raise Exception("Unsupported input type.") - - -def read_image(path): - try: - import matplotlib.image as im # pylint: disable=g-import-not-at-top - except ImportError as e: - tf.logging.warning( - "Reading an image requires matplotlib to be installed: %s", e) - raise NotImplementedError("Image reading not implemented.") - return im.imread(path) - - -def show_and_save_image(img, save_path): - try: - import matplotlib.pyplot as plt # pylint: disable=g-import-not-at-top - except ImportError as e: - tf.logging.warning("Showing and saving an image requires matplotlib to be " - "installed: %s", e) - raise NotImplementedError("Image display and save not implemented.") - plt.imshow(img) - plt.savefig(save_path) - - -def _get_sorted_inputs(filename): - """Returning inputs sorted according to length. - - Args: - filename: path to file with inputs, 1 per line. - - Returns: - a sorted list of inputs - - """ - tf.logging.info("Getting sorted inputs") - # read file and sort inputs according them according to input length. - if FLAGS.decode_shards > 1: - decode_filename = filename + ("%.2d" % FLAGS.worker_id) - else: - decode_filename = filename - inputs = [line.strip() for line in tf.gfile.Open(decode_filename)] - input_lens = [(i, len(line.strip().split())) for i, line in enumerate(inputs)] - sorted_input_lens = sorted(input_lens, key=operator.itemgetter(1)) - # We'll need the keys to rearrange the inputs back into their original order - sorted_keys = {} - sorted_inputs = [] - for i, (index, _) in enumerate(sorted_input_lens): - sorted_inputs.append(inputs[index]) - sorted_keys[index] = i - return sorted_inputs, sorted_keys - - -def _interactive_input_tensor_to_features_dict(feature_map, hparams): - """Convert the interactive input format (see above) to a dictionary. - - Args: - feature_map: a dictionary with keys `problem_choice` and `input` containing - Tensors. - hparams: model hyperparameters - - Returns: - a features dictionary, as expected by the decoder. - """ - inputs = tf.constant(feature_map["inputs"]) - input_is_image = False if len(inputs.shape) < 3 else True - - def input_fn(problem_choice, x=inputs): # pylint: disable=missing-docstring - p_hparams = hparams.problems[problem_choice] - if not input_is_image: - # Remove the batch dimension. - num_samples = x[0] - length = x[2] - x = tf.slice(x, [3], tf.to_int32([length])) - x = tf.reshape(x, [1, -1, 1, 1]) - # Transform into a batch of size num_samples to get that many random - # decodes. - x = tf.tile(x, tf.to_int32([num_samples, 1, 1, 1])) - else: - x = tf.image.resize_images(x, [299, 299]) - x = tf.reshape(x, [1, 299, 299, -1]) - x = tf.to_int32(x) - return (tf.constant(p_hparams.input_space_id), - tf.constant(p_hparams.target_space_id), x) - - input_space_id, target_space_id, x = _cond_on_index( - input_fn, feature_map["problem_choice"], 0, len(hparams.problems) - 1) - - features = {} - features["problem_choice"] = tf.constant(feature_map["problem_choice"]) - features["input_space_id"] = input_space_id - features["target_space_id"] = target_space_id - features["decode_length"] = (IMAGE_DECODE_LENGTH - if input_is_image else inputs[1]) - features["inputs"] = x - return features - - -def _decode_input_tensor_to_features_dict(feature_map, hparams): - """Convert the interactive input format (see above) to a dictionary. - - Args: - feature_map: a dictionary with keys `problem_choice` and `input` containing - Tensors. - hparams: model hyperparameters - - Returns: - a features dictionary, as expected by the decoder. - """ - inputs = tf.constant(feature_map["inputs"]) - input_is_image = False - - def input_fn(problem_choice, x=inputs): # pylint: disable=missing-docstring - p_hparams = hparams.problems[problem_choice] - # Add a third empty dimension dimension - x = tf.expand_dims(x, axis=[2]) - x = tf.to_int32(x) - return (tf.constant(p_hparams.input_space_id), - tf.constant(p_hparams.target_space_id), x) - - input_space_id, target_space_id, x = _cond_on_index( - input_fn, feature_map["problem_choice"], 0, len(hparams.problems) - 1) - - features = {} - features["problem_choice"] = feature_map["problem_choice"] - features["input_space_id"] = input_space_id - features["target_space_id"] = target_space_id - features["decode_length"] = (IMAGE_DECODE_LENGTH - if input_is_image else tf.shape(x)[1] + 50) - features["inputs"] = x - return features - - -def get_input_fn(mode, - hparams, - data_file_patterns=None, - num_datashards=None, - fixed_problem=None): - """Provides input to the graph, either from disk or via a placeholder. - - This function produces an input function that will feed data into - the network. There are two modes of operation: - - 1. If data_file_pattern and all subsequent arguments are None, then - it creates a placeholder for a serialized tf.Example proto. - 2. If data_file_pattern is defined, it will read the data from the - files at the given location. Use this mode for training, - evaluation, and testing prediction. - - Args: - mode: The execution mode, as defined in tf.contrib.learn.ModeKeys. - hparams: HParams object. - data_file_patterns: The list of file patterns to use to read in data. Set to - `None` if you want to create a placeholder for the input data. The - `problems` flag is a list of problem names joined by the `-` character. - The flag's string is then split along the `-` and each problem gets its - own example queue. - num_datashards: An integer. - fixed_problem: An integer indicating the problem to fetch data for, or None - if the input is to be randomly selected. - - Returns: - A function that returns a dictionary of features and the target labels. - """ - - def input_fn(): - """Supplies input to our model. - - This function supplies input to our model, where this input is a - function of the mode. For example, we supply different data if - we're performing training versus evaluation. - - Returns: - A tuple consisting of 1) a dictionary of tensors whose keys are - the feature names, and 2) a tensor of target labels if the mode - is not INFER (and None, otherwise). - - Raises: - ValueError: if one of the parameters has an unsupported value. - """ - problem_count, batches = len(data_file_patterns), [] - with tf.name_scope("input_reader"): - for n in xrange(problem_count): - if fixed_problem is not None and n != fixed_problem: - continue - problem_instance = hparams.problem_instances[n] - p_hparams = hparams.problems[n] - with tf.name_scope("problem_%d" % n): - with tf.device("/cpu:0"): # Input reading on CPU - capacity = p_hparams.max_expected_batch_size_per_shard - capacity *= num_datashards - examples = data_reader.input_pipeline(problem_instance, - data_file_patterns[n], - capacity, mode, hparams) - feature_map = data_reader.batch_examples( - examples, - data_reader.hparams_to_batching_scheme( - hparams, - shard_multiplier=num_datashards, - drop_long_sequences=(mode == tf.contrib.learn.ModeKeys.TRAIN - or hparams.eval_drop_long_sequences), - length_multiplier=(p_hparams.batch_size_multiplier))) - - # Reverse inputs and targets features if the problem was reversed. - if problem_instance is not None: - problem_instance.maybe_reverse_features(feature_map) - problem_instance.maybe_copy_features(feature_map) - else: - if p_hparams.was_reversed: - inputs = feature_map["inputs"] - targets = feature_map["targets"] - feature_map["inputs"] = targets - feature_map["targets"] = inputs - # Use the inputs as the targets if the problem is a copy problem. - if p_hparams.was_copy: - feature_map["targets"] = feature_map["inputs"] - - # Ensure inputs and targets are proper rank. - while len(feature_map["inputs"].get_shape()) != 4: - feature_map["inputs"] = tf.expand_dims(feature_map["inputs"], axis=-1) - while len(feature_map["targets"].get_shape()) != 4: - feature_map["targets"] = tf.expand_dims( - feature_map["targets"], axis=-1) - - batches.append( - (feature_map["inputs"], feature_map["targets"], tf.constant(n), - tf.constant(p_hparams.input_space_id), - tf.constant(p_hparams.target_space_id))) - - # We choose which problem to process. - loss_moving_avgs = [] # Need loss moving averages for that. - for n in xrange(problem_count): - with tf.variable_scope("losses_avg"): - loss_moving_avgs.append( - tf.get_variable( - "problem_%d/total_loss" % n, initializer=100.0, - trainable=False)) - tf.get_variable( - "problem_%d/training_loss" % n, initializer=100.0, trainable=False) - tf.get_variable( - "problem_%d/extra_loss" % n, initializer=100.0, trainable=False) - if fixed_problem is None: - if (hparams.problem_choice == "uniform" or - mode != tf.contrib.learn.ModeKeys.TRAIN): - problem_choice = tf.random_uniform( - [], maxval=problem_count, dtype=tf.int32) - elif hparams.problem_choice == "adaptive": - loss_moving_avgs = tf.stack(loss_moving_avgs) - problem_choice = tf.multinomial( - tf.reshape(loss_moving_avgs, [1, -1]), 1) - problem_choice = tf.to_int32(tf.squeeze(problem_choice)) - elif hparams.problem_choice == "distributed": - assert FLAGS.worker_replicas >= problem_count - assert FLAGS.worker_replicas % problem_count == 0 - problem_choice = tf.to_int32(FLAGS.worker_id % problem_count) - else: - raise ValueError( - "Value of hparams.problem_choice is %s and must be " - "one of [uniform, adaptive, distributed]" % hparams.problem_choice) - - # Inputs and targets conditional on problem_choice. - rand_inputs, rand_target, choice, inp_id, tgt_id = _cond_on_index( - lambda n: batches[n], problem_choice, 0, problem_count - 1) - else: - problem_choice = tf.constant(fixed_problem) - # Take the only constructed batch, which is the fixed_problem. - rand_inputs, rand_target, choice, inp_id, tgt_id = batches[0] - - # Set shapes so the ranks are clear. - rand_inputs.set_shape([None, None, None, None]) - rand_target.set_shape([None, None, None, None]) - choice.set_shape([]) - inp_id.set_shape([]) - tgt_id.set_shape([]) - # Forced shape obfuscation is necessary for inference. - if mode == tf.contrib.learn.ModeKeys.INFER: - rand_inputs._shape = tf.TensorShape([None, None, None, None]) # pylint: disable=protected-access - rand_target._shape = tf.TensorShape([None, None, None, None]) # pylint: disable=protected-access - - # Final feature map. - rand_feature_map = { - "inputs": rand_inputs, - "problem_choice": choice, - "input_space_id": inp_id, - "target_space_id": tgt_id - } - if mode == tf.contrib.learn.ModeKeys.INFER: - rand_feature_map["infer_targets"] = rand_target - rand_target = None - return rand_feature_map, rand_target - - return input_fn - - -class _ConditionalOptimizer(tf.train.Optimizer): - """Conditional optimizer.""" - - def __init__(self, optimizer_name, lr, hparams): - if optimizer_name == "Adam": - # We change the default epsilon for Adam and re-scale lr. - # Using LazyAdam as it's much faster for large vocabulary embeddings. - self._opt = tf.contrib.opt.LazyAdamOptimizer( - lr / 500.0, - beta1=hparams.optimizer_adam_beta1, - beta2=hparams.optimizer_adam_beta2, - epsilon=hparams.optimizer_adam_epsilon) - elif optimizer_name == "Momentum": - self._opt = tf.train.MomentumOptimizer( - lr, momentum=hparams.optimizer_momentum_momentum) - elif optimizer_name == "YellowFin": - tf.logging.info("Init YellowFin Optimizer.") - self._opt = yellowfin.YellowFinOptimizer( - learning_rate=lr, momentum=hparams.optimizer_momentum_momentum) - else: - self._opt = tf.contrib.layers.OPTIMIZER_CLS_NAMES[optimizer_name](lr) - - def compute_gradients(self, loss, var_list, colocate_gradients_with_ops): - return self._opt.compute_gradients( - loss, var_list, colocate_gradients_with_ops=colocate_gradients_with_ops) - - def apply_gradients(self, gradients, global_step=None, name=None): - return self._opt.apply_gradients( - gradients, global_step=global_step, name=name) - - -def _sqrt_decay(step): - """Decay like 1 / sqrt(step), multiplied by 500 to normalize.""" - return 500.0 / tf.sqrt(tf.maximum(step, 1.0)) - - -def _exp_decay_after(step, rate, from_which_step): - """Decay exponentially by rate (per step) starting at from_which_step.""" - return tf.cond( - step < from_which_step, - lambda: tf.constant(1.0), - lambda: rate**(step - from_which_step), - name="exponential_decay_step_cond") - - -def _ps_replicas(all_workers=False): - if all_workers: - return list(range(FLAGS.ps_replicas)) - # Worker K will be using replicas {0,...n-1} + K*n if we have n replicas. - num_replicas = FLAGS.ps_replicas // FLAGS.worker_replicas - return [d + FLAGS.worker_id * num_replicas for d in xrange(num_replicas)] - - -def _gpu_order(num_gpus): - if FLAGS.gpu_order: - ret = [int(s) for s in FLAGS.gpu_order.split(" ")] - if len(ret) == num_gpus: - return ret - return list(range(num_gpus)) - - -def _ps_gpus(all_workers=False): - ps_gpus = [] - for d in _ps_replicas(all_workers=all_workers): - ps_gpus.extend([(d, gpu) for gpu in _gpu_order(FLAGS.ps_gpu)]) - return ps_gpus - - -def _ps_devices(all_workers=False): - """List of ps devices (where to put the experts). - - Args: - all_workers: whether the list is for all async workers or just this one. - - Returns: - a list of device names - """ - if FLAGS.ps_replicas > 0: - if FLAGS.ps_gpu > 0: - return [ - FLAGS.ps_job + "/task:%d/GPU:%d" % (d, gpu) - for (d, gpu) in _ps_gpus(all_workers=all_workers) - ] - else: - return [ - FLAGS.ps_job + "/task:%d" % d - for d in _ps_replicas(all_workers=all_workers) - ] - else: - if FLAGS.worker_gpu > 0: - return ["gpu:%d" % d for d in _gpu_order(FLAGS.worker_gpu)] - else: - return [""] - - -def data_parallelism(all_workers=False): - """Over which devices do we split each training batch. - - In old-fashioned async mode, we split the batch over all GPUs on the - current worker. - - In sync mode, we split the batch over all the parameter server GPUs. - - This function returns an expert_utils.Parallelism object, which can be used - to build the model. It is configured in a way that any variables created - by `tf.get_variable` will be assigned to the parameter servers and shared - between datashards. - - Args: - all_workers: whether the devices are all async workers or just this one. - - Returns: - a expert_utils.Parallelism. - """ - - def _replica_device_setter(worker_device): - if FLAGS.ps_replicas == 0: - return worker_device - return tf.train.replica_device_setter( - worker_device=worker_device, - ps_tasks=FLAGS.ps_replicas, - ps_device=FLAGS.ps_job + "/GPU:0" if FLAGS.ps_gpu > 0 else FLAGS.ps_job) - - if FLAGS.schedule == "local_run": - assert not FLAGS.sync - datashard_devices = ["gpu:%d" % d for d in _gpu_order(FLAGS.worker_gpu)] - if FLAGS.locally_shard_to_cpu: - datashard_devices += ["cpu:0"] - caching_devices = None - elif FLAGS.sync: - assert FLAGS.ps_replicas > 0 - datashard_devices = [ - _replica_device_setter(d) for d in _ps_devices(all_workers=all_workers) - ] - if FLAGS.ps_gpu > 0 and FLAGS.ps_replicas > 1: - caching_devices = [ - FLAGS.ps_job + "/task:%d/cpu:0" % d - for (d, _) in _ps_gpus(all_workers=all_workers) - ] - else: - caching_devices = None - else: - # old fashioned async - compute on worker - if FLAGS.worker_gpu > 1: - datashard_devices = [ - _replica_device_setter(FLAGS.worker_job + "/GPU:%d" % d) - for d in _gpu_order(FLAGS.worker_gpu) - ] - caching_devices = [FLAGS.worker_job + "/GPU:0"] * FLAGS.worker_gpu - else: - datashard_devices = [_replica_device_setter(FLAGS.worker_job)] - caching_devices = None - tf.logging.info("datashard_devices: %s", datashard_devices) - tf.logging.info("caching_devices: %s", caching_devices) - return eu.Parallelism( - datashard_devices, - reuse=True, - caching_devices=caching_devices, - daisy_chain_variables=FLAGS.daisy_chain_variables) + decoding.decode_from_dataset(estimator) diff --git a/tensor2tensor/utils/trainer_utils_test.py b/tensor2tensor/utils/trainer_utils_test.py index 562279623..8a71afe68 100644 --- a/tensor2tensor/utils/trainer_utils_test.py +++ b/tensor2tensor/utils/trainer_utils_test.py @@ -25,7 +25,7 @@ from tensor2tensor.data_generators import generator_utils from tensor2tensor.models import transformer from tensor2tensor.utils import registry -from tensor2tensor.utils import trainer_utils as utils # pylint: disable=unused-import +from tensor2tensor.utils import trainer_utils import tensorflow as tf @@ -76,7 +76,7 @@ def testHParamsImported(self): def testSingleStep(self): model_name = "transformer" FLAGS.hparams_set = "transformer_test" - exp = utils.create_experiment( + exp = trainer_utils.create_experiment( output_dir=tf.test.get_temp_dir(), data_dir=TrainerUtilsTest.data_dir, model_name=model_name, From fbe8c61a1aaea95b9b32fb56d49c1e790660ea09 Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Wed, 2 Aug 2017 17:13:21 -0700 Subject: [PATCH 0214/4095] v1.1.5 PiperOrigin-RevId: 164061568 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index fd8e77a46..38b2fcc48 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ setup( name='tensor2tensor', - version='1.1.4', + version='1.1.5', description='Tensor2Tensor', author='Google Inc.', author_email='no-reply@google.com', From eee190b3b770d917931b3ccb3972109b27b48f6d Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Wed, 2 Aug 2017 17:25:40 -0700 Subject: [PATCH 0215/4095] Add layers init and update gitignore for nose --- .gitignore | 1 + tensor2tensor/layers/__init__.py | 0 2 files changed, 1 insertion(+) create mode 100644 tensor2tensor/layers/__init__.py diff --git a/.gitignore b/.gitignore index c9dd3db88..362753caa 100644 --- a/.gitignore +++ b/.gitignore @@ -6,6 +6,7 @@ _pycache__/ # Python egg metadata, regenerated from source files by setuptools. /*.egg-info +/*.egg # PyPI distribution artifacts. build/ diff --git a/tensor2tensor/layers/__init__.py b/tensor2tensor/layers/__init__.py new file mode 100644 index 000000000..e69de29bb From 14f80839a01677acb9259a3a93423bbb2eaecc7d Mon Sep 17 00:00:00 2001 From: Aidan Gomez <aidan.n.gomez@gmail.com> Date: Thu, 3 Aug 2017 14:48:26 -0400 Subject: [PATCH 0216/4095] Add algorithmic cipher --- .../data_generators/algorithmic_cipher.py | 213 ++++++++++++++++++ tensor2tensor/data_generators/all_problems.py | 1 + 2 files changed, 214 insertions(+) create mode 100644 tensor2tensor/data_generators/algorithmic_cipher.py diff --git a/tensor2tensor/data_generators/algorithmic_cipher.py b/tensor2tensor/data_generators/algorithmic_cipher.py new file mode 100644 index 000000000..df7e2dcc6 --- /dev/null +++ b/tensor2tensor/data_generators/algorithmic_cipher.py @@ -0,0 +1,213 @@ +from collections import deque +import numpy as np + +from tensor2tensor.data_generators import problem, algorithmic +from tensor2tensor.utils import registry + + +@registry.register_problem +class AlgorithmicShiftCipher5(algorithmic.AlgorithmicProblem): + + @property + def num_symbols(self): + return 5 + + @property + def distribution(self): + return [0.4, 0.3, 0.2, 0.08, 0.02] + + @property + def shift(self): + return 1 + + @property + def train_generator(self): + """Generator; takes 3 args: nbr_symbols, max_length, nbr_cases.""" + + def _gen(nbr_symbols, max_length, nbr_cases): + plain_vocab = range(nbr_symbols) + indices = generate_plaintext_random(plain_vocab, self.distribution, + nbr_cases, max_length) + codes = encipher_shift(indices, plain_vocab, self.shift) + + for plain, code in zip(indices, codes): + yield { + "X": plain, + "Y": code, + } + + return _gen + + @property + def train_length(self): + return 100 + + @property + def dev_length(self): + return self.train_length + + +@registry.register_problem +class AlgorithmicVigenereCipher5(algorithmic.AlgorithmicProblem): + + @property + def num_symbols(self): + return 5 + + @property + def distribution(self): + return [0.4, 0.3, 0.2, 0.08, 0.02] + + @property + def key(self): + return [1, 3] + + @property + def train_generator(self): + """Generator; takes 3 args: nbr_symbols, max_length, nbr_cases.""" + + def _gen(nbr_symbols, max_length, nbr_cases): + plain_vocab = range(nbr_symbols) + indices = generate_plaintext_random(plain_vocab, self.distribution, + nbr_cases, max_length) + codes = encipher_vigenere(indices, plain_vocab, self.key) + + for plain, code in zip(indices, codes): + yield { + "X": plain, + "Y": code, + } + + return _gen + + @property + def train_length(self): + return 200 + + @property + def dev_length(self): + return self.train_length + + +@registry.register_problem +class AlgorithmicShiftCipher200(AlgorithmicShiftCipher5): + + @property + def num_symbols(self): + return 200 + + @property + def distribution(self): + vals = range(self.num_symbols) + val_sum = sum(vals) + return [v / val_sum for v in vals] + + +@registry.register_problem +class AlgorithmicVigenereCipher200(AlgorithmicVigenereCipher5): + + @property + def num_symbols(self): + return 200 + + @property + def distribution(self): + vals = range(self.num_symbols) + val_sum = sum(vals) + return [v / val_sum for v in vals] + + @property + def key(self): + return [1, 3] + + +class Layer(): + """A single layer for shift""" + + def __init__(self, vocab, shift): + """Initialize shift layer + + Args: + vocab (list of String): the vocabulary + shift (Integer): the amount of shift apply to the alphabet. Positive number implies + shift to the right, negative number implies shift to the left. + """ + self.shift = shift + alphabet = vocab + shifted_alphabet = deque(alphabet) + shifted_alphabet.rotate(shift) + self.encrypt = dict(zip(alphabet, list(shifted_alphabet))) + self.decrypt = dict(zip(list(shifted_alphabet), alphabet)) + + def encrypt_character(self, character): + return self.encrypt[character] + + def decrypt_character(self, character): + return self.decrypt[character] + + +def generate_plaintext_random(plain_vocab, distribution, train_samples, + length): + """Generates samples of text from the provided vocabulary. + Returns: + train_indices (np.array of Integers): random integers generated for training. + shape = [num_samples, length] + test_indices (np.array of Integers): random integers generated for testing. + shape = [num_samples, length] + plain_vocab (list of Integers): unique vocabularies. + """ + if distribution is not None: + assert len(distribution) == len(plain_vocab) + + train_indices = np.random.choice( + range(len(plain_vocab)), (train_samples, length), p=distribution) + + return train_indices + + +def encipher_shift(plaintext, plain_vocab, shift): + """Encrypt plain text with a single shift layer + Args: + plaintext (list of list of Strings): a list of plain text to encrypt. + plain_vocab (list of Integer): unique vocabularies being used. + shift (Integer): number of shift, shift to the right if shift is positive. + Returns: + ciphertext (list of Strings): encrypted plain text. + """ + ciphertext = [] + cipher = Layer(plain_vocab, shift) + + for i, sentence in enumerate(plaintext): + cipher_sentence = [] + for j, character in enumerate(sentence): + encrypted_char = cipher.encrypt_character(character) + cipher_sentence.append(encrypted_char) + ciphertext.append(cipher_sentence) + + return ciphertext + + +def encipher_vigenere(plaintext, plain_vocab, key): + """Encrypt plain text with given key + Args: + plaintext (list of list of Strings): a list of plain text to encrypt. + plain_vocab (list of Integer): unique vocabularies being used. + key (list of Integer): key to encrypt cipher using Vigenere table. + Returns: + ciphertext (list of Strings): encrypted plain text. + """ + ciphertext = [] + # generate Vigenere table + layers = [] + for i in range(len(plain_vocab)): + layers.append(Layer(plain_vocab, i)) + + for i, sentence in enumerate(plaintext): + cipher_sentence = [] + for j, character in enumerate(sentence): + key_idx = key[j % len(key)] + encrypted_char = layers[key_idx].encrypt_character(character) + cipher_sentence.append(encrypted_char) + ciphertext.append(cipher_sentence) + + return ciphertext diff --git a/tensor2tensor/data_generators/all_problems.py b/tensor2tensor/data_generators/all_problems.py index 6830cf0bf..b391485dd 100644 --- a/tensor2tensor/data_generators/all_problems.py +++ b/tensor2tensor/data_generators/all_problems.py @@ -21,6 +21,7 @@ # pylint: disable=unused-import from tensor2tensor.data_generators import algorithmic from tensor2tensor.data_generators import algorithmic_math +from tensor2tensor.data_generators import algorithmic_cipher from tensor2tensor.data_generators import audio from tensor2tensor.data_generators import image from tensor2tensor.data_generators import lm1b From 90e72afdcd194b01568b4000e71d381946664f23 Mon Sep 17 00:00:00 2001 From: Aidan Gomez <aidan.n.gomez@gmail.com> Date: Thu, 3 Aug 2017 15:47:23 -0400 Subject: [PATCH 0217/4095] Fix naming --- tensor2tensor/data_generators/all_problems.py | 2 +- .../data_generators/{algorithmic_cipher.py => cipher.py} | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) rename tensor2tensor/data_generators/{algorithmic_cipher.py => cipher.py} (95%) diff --git a/tensor2tensor/data_generators/all_problems.py b/tensor2tensor/data_generators/all_problems.py index b391485dd..450cb32a7 100644 --- a/tensor2tensor/data_generators/all_problems.py +++ b/tensor2tensor/data_generators/all_problems.py @@ -21,8 +21,8 @@ # pylint: disable=unused-import from tensor2tensor.data_generators import algorithmic from tensor2tensor.data_generators import algorithmic_math -from tensor2tensor.data_generators import algorithmic_cipher from tensor2tensor.data_generators import audio +from tensor2tensor.data_generators import cipher from tensor2tensor.data_generators import image from tensor2tensor.data_generators import lm1b from tensor2tensor.data_generators import ptb diff --git a/tensor2tensor/data_generators/algorithmic_cipher.py b/tensor2tensor/data_generators/cipher.py similarity index 95% rename from tensor2tensor/data_generators/algorithmic_cipher.py rename to tensor2tensor/data_generators/cipher.py index df7e2dcc6..546ba1739 100644 --- a/tensor2tensor/data_generators/algorithmic_cipher.py +++ b/tensor2tensor/data_generators/cipher.py @@ -6,7 +6,7 @@ @registry.register_problem -class AlgorithmicShiftCipher5(algorithmic.AlgorithmicProblem): +class CipherShift5(algorithmic.AlgorithmicProblem): @property def num_symbols(self): @@ -48,7 +48,7 @@ def dev_length(self): @registry.register_problem -class AlgorithmicVigenereCipher5(algorithmic.AlgorithmicProblem): +class CipherVigenere5(algorithmic.AlgorithmicProblem): @property def num_symbols(self): @@ -90,7 +90,7 @@ def dev_length(self): @registry.register_problem -class AlgorithmicShiftCipher200(AlgorithmicShiftCipher5): +class CipherShift200(CipherShift5): @property def num_symbols(self): @@ -104,7 +104,7 @@ def distribution(self): @registry.register_problem -class AlgorithmicVigenereCipher200(AlgorithmicVigenereCipher5): +class CipherVigenere200(CipherVigenere5): @property def num_symbols(self): From ff360cc5431c75cfad3258a37ad1adc9dda80208 Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Wed, 2 Aug 2017 17:26:03 -0700 Subject: [PATCH 0218/4095] Add layers/__init__.py and update gitignore for nose PiperOrigin-RevId: 164063212 --- .gitignore | 1 - tensor2tensor/data_generators/all_problems.py | 1 - tensor2tensor/data_generators/cipher.py | 213 ------------------ tensor2tensor/layers/__init__.py | 15 ++ 4 files changed, 15 insertions(+), 215 deletions(-) delete mode 100644 tensor2tensor/data_generators/cipher.py diff --git a/.gitignore b/.gitignore index 362753caa..c9dd3db88 100644 --- a/.gitignore +++ b/.gitignore @@ -6,7 +6,6 @@ _pycache__/ # Python egg metadata, regenerated from source files by setuptools. /*.egg-info -/*.egg # PyPI distribution artifacts. build/ diff --git a/tensor2tensor/data_generators/all_problems.py b/tensor2tensor/data_generators/all_problems.py index 843bd0a66..9be133a61 100644 --- a/tensor2tensor/data_generators/all_problems.py +++ b/tensor2tensor/data_generators/all_problems.py @@ -22,7 +22,6 @@ from tensor2tensor.data_generators import algorithmic from tensor2tensor.data_generators import algorithmic_math from tensor2tensor.data_generators import audio -from tensor2tensor.data_generators import cipher from tensor2tensor.data_generators import image from tensor2tensor.data_generators import lm1b from tensor2tensor.data_generators import ptb diff --git a/tensor2tensor/data_generators/cipher.py b/tensor2tensor/data_generators/cipher.py deleted file mode 100644 index 546ba1739..000000000 --- a/tensor2tensor/data_generators/cipher.py +++ /dev/null @@ -1,213 +0,0 @@ -from collections import deque -import numpy as np - -from tensor2tensor.data_generators import problem, algorithmic -from tensor2tensor.utils import registry - - -@registry.register_problem -class CipherShift5(algorithmic.AlgorithmicProblem): - - @property - def num_symbols(self): - return 5 - - @property - def distribution(self): - return [0.4, 0.3, 0.2, 0.08, 0.02] - - @property - def shift(self): - return 1 - - @property - def train_generator(self): - """Generator; takes 3 args: nbr_symbols, max_length, nbr_cases.""" - - def _gen(nbr_symbols, max_length, nbr_cases): - plain_vocab = range(nbr_symbols) - indices = generate_plaintext_random(plain_vocab, self.distribution, - nbr_cases, max_length) - codes = encipher_shift(indices, plain_vocab, self.shift) - - for plain, code in zip(indices, codes): - yield { - "X": plain, - "Y": code, - } - - return _gen - - @property - def train_length(self): - return 100 - - @property - def dev_length(self): - return self.train_length - - -@registry.register_problem -class CipherVigenere5(algorithmic.AlgorithmicProblem): - - @property - def num_symbols(self): - return 5 - - @property - def distribution(self): - return [0.4, 0.3, 0.2, 0.08, 0.02] - - @property - def key(self): - return [1, 3] - - @property - def train_generator(self): - """Generator; takes 3 args: nbr_symbols, max_length, nbr_cases.""" - - def _gen(nbr_symbols, max_length, nbr_cases): - plain_vocab = range(nbr_symbols) - indices = generate_plaintext_random(plain_vocab, self.distribution, - nbr_cases, max_length) - codes = encipher_vigenere(indices, plain_vocab, self.key) - - for plain, code in zip(indices, codes): - yield { - "X": plain, - "Y": code, - } - - return _gen - - @property - def train_length(self): - return 200 - - @property - def dev_length(self): - return self.train_length - - -@registry.register_problem -class CipherShift200(CipherShift5): - - @property - def num_symbols(self): - return 200 - - @property - def distribution(self): - vals = range(self.num_symbols) - val_sum = sum(vals) - return [v / val_sum for v in vals] - - -@registry.register_problem -class CipherVigenere200(CipherVigenere5): - - @property - def num_symbols(self): - return 200 - - @property - def distribution(self): - vals = range(self.num_symbols) - val_sum = sum(vals) - return [v / val_sum for v in vals] - - @property - def key(self): - return [1, 3] - - -class Layer(): - """A single layer for shift""" - - def __init__(self, vocab, shift): - """Initialize shift layer - - Args: - vocab (list of String): the vocabulary - shift (Integer): the amount of shift apply to the alphabet. Positive number implies - shift to the right, negative number implies shift to the left. - """ - self.shift = shift - alphabet = vocab - shifted_alphabet = deque(alphabet) - shifted_alphabet.rotate(shift) - self.encrypt = dict(zip(alphabet, list(shifted_alphabet))) - self.decrypt = dict(zip(list(shifted_alphabet), alphabet)) - - def encrypt_character(self, character): - return self.encrypt[character] - - def decrypt_character(self, character): - return self.decrypt[character] - - -def generate_plaintext_random(plain_vocab, distribution, train_samples, - length): - """Generates samples of text from the provided vocabulary. - Returns: - train_indices (np.array of Integers): random integers generated for training. - shape = [num_samples, length] - test_indices (np.array of Integers): random integers generated for testing. - shape = [num_samples, length] - plain_vocab (list of Integers): unique vocabularies. - """ - if distribution is not None: - assert len(distribution) == len(plain_vocab) - - train_indices = np.random.choice( - range(len(plain_vocab)), (train_samples, length), p=distribution) - - return train_indices - - -def encipher_shift(plaintext, plain_vocab, shift): - """Encrypt plain text with a single shift layer - Args: - plaintext (list of list of Strings): a list of plain text to encrypt. - plain_vocab (list of Integer): unique vocabularies being used. - shift (Integer): number of shift, shift to the right if shift is positive. - Returns: - ciphertext (list of Strings): encrypted plain text. - """ - ciphertext = [] - cipher = Layer(plain_vocab, shift) - - for i, sentence in enumerate(plaintext): - cipher_sentence = [] - for j, character in enumerate(sentence): - encrypted_char = cipher.encrypt_character(character) - cipher_sentence.append(encrypted_char) - ciphertext.append(cipher_sentence) - - return ciphertext - - -def encipher_vigenere(plaintext, plain_vocab, key): - """Encrypt plain text with given key - Args: - plaintext (list of list of Strings): a list of plain text to encrypt. - plain_vocab (list of Integer): unique vocabularies being used. - key (list of Integer): key to encrypt cipher using Vigenere table. - Returns: - ciphertext (list of Strings): encrypted plain text. - """ - ciphertext = [] - # generate Vigenere table - layers = [] - for i in range(len(plain_vocab)): - layers.append(Layer(plain_vocab, i)) - - for i, sentence in enumerate(plaintext): - cipher_sentence = [] - for j, character in enumerate(sentence): - key_idx = key[j % len(key)] - encrypted_char = layers[key_idx].encrypt_character(character) - cipher_sentence.append(encrypted_char) - ciphertext.append(cipher_sentence) - - return ciphertext diff --git a/tensor2tensor/layers/__init__.py b/tensor2tensor/layers/__init__.py index e69de29bb..3f714ce1f 100644 --- a/tensor2tensor/layers/__init__.py +++ b/tensor2tensor/layers/__init__.py @@ -0,0 +1,15 @@ +# coding=utf-8 +# Copyright 2017 The Tensor2Tensor Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + From f47930391a88974ff5253e7068377025f3b27ccb Mon Sep 17 00:00:00 2001 From: Ashish Vaswani <avaswani@google.com> Date: Wed, 2 Aug 2017 22:07:37 -0700 Subject: [PATCH 0219/4095] Fix image problem preprocess_examples signature PiperOrigin-RevId: 164081124 --- tensor2tensor/data_generators/image.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensor2tensor/data_generators/image.py b/tensor2tensor/data_generators/image.py index d70d9339e..f61f85b54 100644 --- a/tensor2tensor/data_generators/image.py +++ b/tensor2tensor/data_generators/image.py @@ -423,7 +423,7 @@ def cifar10_generator(tmp_dir, training, how_many, start_from=0): @registry.register_problem class ImageCifar10Tune(ImageMnistTune): - def preprocess_examples(self, examples, mode): + def preprocess_examples(self, examples, mode, hparams): if mode == tf.contrib.learn.ModeKeys.TRAIN: examples["inputs"] = common_layers.cifar_image_augmentation( examples["inputs"]) @@ -449,7 +449,7 @@ def generator(self, data_dir, tmp_dir, is_training): @registry.register_problem class ImageCifar10Plain(ImageCifar10): - def preprocess_examples(self, examples, mode): + def preprocess_examples(self, examples, mode, hparams): return examples From fec81d2f60892759b9d55f2a0cbab75d3a9ce8cb Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Thu, 3 Aug 2017 12:29:07 -0700 Subject: [PATCH 0220/4095] Rm FLAGS from input fn builder and fix placeholder logic PiperOrigin-RevId: 164164402 --- tensor2tensor/utils/input_fn_builder.py | 25 ++++++++++++++----------- tensor2tensor/utils/trainer_utils.py | 8 ++++++-- 2 files changed, 20 insertions(+), 13 deletions(-) diff --git a/tensor2tensor/utils/input_fn_builder.py b/tensor2tensor/utils/input_fn_builder.py index 1fac64c8b..79a765ca2 100644 --- a/tensor2tensor/utils/input_fn_builder.py +++ b/tensor2tensor/utils/input_fn_builder.py @@ -27,15 +27,14 @@ import tensorflow as tf -# TODO(rsepassi): Rm dep on FLAGS here -FLAGS = tf.flags.FLAGS - def build_input_fn(mode, hparams, data_file_patterns=None, num_datashards=None, - fixed_problem=None): + fixed_problem=None, + worker_replicas=None, + worker_id=None): """Provides input to the graph, either from disk or via a placeholder. This function produces an input function that will feed data into @@ -58,6 +57,10 @@ def build_input_fn(mode, num_datashards: An integer. fixed_problem: An integer indicating the problem to fetch data for, or None if the input is to be randomly selected. + worker_replicas: int, number of worker replicas. Used in multiproblem + setting with hparams.problem_choice == distributed. + worker_id: int, id of this worker replica. Used in multiproblem setting with + hparams.problem_choice == distributed. Returns: A function that returns a dictionary of features and the target labels. @@ -78,7 +81,7 @@ def input_fn(): Raises: ValueError: if one of the parameters has an unsupported value. """ - problem_count, batches = len(data_file_patterns), [] + problem_count, batches = len(hparams.problems), [] with tf.name_scope("input_reader"): for n in xrange(problem_count): if fixed_problem is not None and n != fixed_problem: @@ -89,9 +92,9 @@ def input_fn(): with tf.device("/cpu:0"): # Input reading on CPU capacity = p_hparams.max_expected_batch_size_per_shard capacity *= num_datashards - examples = data_reader.input_pipeline(problem_instance, - data_file_patterns[n], - capacity, mode, hparams) + examples = data_reader.input_pipeline( + problem_instance, data_file_patterns and data_file_patterns[n], + capacity, mode, hparams) feature_map = data_reader.batch_examples( examples, data_reader.hparams_to_batching_scheme( @@ -149,9 +152,9 @@ def input_fn(): tf.reshape(loss_moving_avgs, [1, -1]), 1) problem_choice = tf.to_int32(tf.squeeze(problem_choice)) elif hparams.problem_choice == "distributed": - assert FLAGS.worker_replicas >= problem_count - assert FLAGS.worker_replicas % problem_count == 0 - problem_choice = tf.to_int32(FLAGS.worker_id % problem_count) + assert worker_replicas >= problem_count + assert worker_replicas % problem_count == 0 + problem_choice = tf.to_int32(worker_id % problem_count) else: raise ValueError( "Value of hparams.problem_choice is %s and must be " diff --git a/tensor2tensor/utils/trainer_utils.py b/tensor2tensor/utils/trainer_utils.py index c5f3296ee..9e869c15c 100644 --- a/tensor2tensor/utils/trainer_utils.py +++ b/tensor2tensor/utils/trainer_utils.py @@ -177,14 +177,18 @@ def create_experiment_components(hparams, output_dir, data_dir, model_name): hparams=hparams, data_file_patterns=get_data_filepatterns(data_dir, tf.contrib.learn.ModeKeys.TRAIN), - num_datashards=num_datashards) + num_datashards=num_datashards, + worker_replicas=FLAGS.worker_replicas, + worker_id=FLAGS.worker_id) eval_input_fn = input_fn_builder.build_input_fn( mode=tf.contrib.learn.ModeKeys.EVAL, hparams=hparams, data_file_patterns=get_data_filepatterns(data_dir, tf.contrib.learn.ModeKeys.EVAL), - num_datashards=num_datashards) + num_datashards=num_datashards, + worker_replicas=FLAGS.worker_replicas, + worker_id=FLAGS.worker_id) estimator = tf.contrib.learn.Estimator( model_fn=model_builder.build_model_fn(model_name, hparams=hparams), model_dir=output_dir, From 21404237d7c12c9a650603c8ab6391cd1a5438b4 Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Thu, 3 Aug 2017 13:59:04 -0700 Subject: [PATCH 0221/4095] Add PEPTIDE SpaceID and enable TokenTextEncoder to take a list of tokens PiperOrigin-RevId: 164177483 --- tensor2tensor/data_generators/problem.py | 2 + tensor2tensor/data_generators/text_encoder.py | 98 ++++++++++++------- 2 files changed, 64 insertions(+), 36 deletions(-) diff --git a/tensor2tensor/data_generators/problem.py b/tensor2tensor/data_generators/problem.py index 3d30ec239..72334b76d 100644 --- a/tensor2tensor/data_generators/problem.py +++ b/tensor2tensor/data_generators/problem.py @@ -84,6 +84,8 @@ class SpaceID(object): REAL = 24 # Images IMAGE = 25 + # Peptide + PEPTIDE = 26 class Problem(object): diff --git a/tensor2tensor/data_generators/text_encoder.py b/tensor2tensor/data_generators/text_encoder.py index cd6ca0eea..ad9c04c96 100644 --- a/tensor2tensor/data_generators/text_encoder.py +++ b/tensor2tensor/data_generators/text_encoder.py @@ -48,7 +48,6 @@ else: RESERVED_TOKENS_BYTES = [bytes(PAD, "ascii"), bytes(EOS, "ascii")] - # Regular expression for unescaping token strings. # '\u' is converted to '_' # '\\' is converted to '\' @@ -154,14 +153,21 @@ def vocab_size(self): class TokenTextEncoder(TextEncoder): - """Encoder based on a user-supplied vocabulary.""" + """Encoder based on a user-supplied vocabulary (file or list).""" - def __init__(self, vocab_filename, reverse=False, + def __init__(self, + vocab_filename, + reverse=False, + vocab_list=None, num_reserved_ids=NUM_RESERVED_TOKENS): - """Initialize from a file, one token per line.""" + """Initialize from a file or list, one token per line.""" super(TokenTextEncoder, self).__init__(num_reserved_ids=num_reserved_ids) self._reverse = reverse - self._load_vocab_from_file(vocab_filename) + if vocab_filename: + self._init_vocab_from_file(vocab_filename) + else: + assert vocab_list is not None + self._init_vocab_from_list(vocab_list) def encode(self, sentence): """Converts a space-separated string of tokens to a list of ids.""" @@ -179,22 +185,40 @@ def vocab_size(self): def _safe_id_to_token(self, idx): return self._id_to_token.get(idx, "ID_%d" % idx) - def _load_vocab_from_file(self, filename): + def _init_vocab_from_file(self, filename): """Load vocab from a file.""" - self._token_to_id = {} + + def token_gen(): + with tf.gfile.Open(filename) as f: + for line in f: + token = line.strip() + yield token + + self._init_vocab(token_gen()) + + def _init_vocab_from_list(self, vocab_list): + + def token_gen(): + for token in vocab_list: + yield token + + self._init_vocab(token_gen()) + + def _init_vocab(self, token_generator): + """Initialize vocabulary with tokens from token_generator.""" self._id_to_token = {} - for idx, tok in enumerate(RESERVED_TOKENS): - self._token_to_id[tok] = idx - self._id_to_token[idx] = tok + # Add reserved tokens + self._id_to_token.update(dict(list(enumerate(RESERVED_TOKENS)))) - token_start_idx = self._num_reserved_ids - with tf.gfile.Open(filename) as f: - for i, line in enumerate(f): - idx = token_start_idx + i - tok = line.strip() - self._token_to_id[tok] = idx - self._id_to_token[idx] = tok + token_id = len(RESERVED_TOKENS) + for token in token_generator: + self._id_to_token[token_id] = token + token_id += 1 + + # _token_to_id is the reverse of _id_to_token + self._token_to_id = dict([(v, k) + for k, v in six.iteritems(self._id_to_token)]) def _escape_token(token, alphabet): @@ -218,9 +242,7 @@ def _escape_token(token, alphabet): raise ValueError("Expected string type for token, got %s" % type(token)) token = token.replace(u"\\", u"\\\\").replace(u"_", u"\\u") - ret = [ - c if c in alphabet and c != u"\n" else r"\%d;" % ord(c) - for c in token] + ret = [c if c in alphabet and c != u"\n" else r"\%d;" % ord(c) for c in token] return u"".join(ret) + "_" @@ -233,6 +255,7 @@ def _unescape_token(escaped_token): Returns: token: a unicode string """ + def match(m): if m.group(1) is None: return u"_" if m.group(0) == u"\\u" else u"\\" @@ -294,8 +317,8 @@ def encode(self, raw_text): Returns: a list of integers in the range [0, vocab_size) """ - return self._tokens_to_subtoken_ids(tokenizer.encode( - native_to_unicode(raw_text))) + return self._tokens_to_subtoken_ids( + tokenizer.encode(native_to_unicode(raw_text))) def decode(self, subtokens): """Converts a sequence of subtoken ids to a native string. @@ -305,8 +328,8 @@ def decode(self, subtokens): Returns: a native string """ - return unicode_to_native(tokenizer.decode( - self._subtoken_ids_to_tokens(subtokens))) + return unicode_to_native( + tokenizer.decode(self._subtoken_ids_to_tokens(subtokens))) @property def vocab_size(self): @@ -323,8 +346,9 @@ def _tokens_to_subtoken_ids(self, tokens): """ ret = [] for token in tokens: - ret.extend(self._escaped_token_to_subtoken_ids( - _escape_token(token, self._alphabet))) + ret.extend( + self._escaped_token_to_subtoken_ids( + _escape_token(token, self._alphabet))) return ret def _subtoken_ids_to_tokens(self, subtokens): @@ -386,7 +410,8 @@ def _escaped_token_to_subtoken_ids(self, escaped_token): """ return [ self._subtoken_string_to_id[subtoken] - for subtoken in self._escaped_token_to_subtoken_strings(escaped_token)] + for subtoken in self._escaped_token_to_subtoken_strings(escaped_token) + ] @classmethod def build_to_target_size(cls, @@ -414,17 +439,16 @@ def build_to_target_size(cls, ValueError: If `min_val` is greater than `max_val`. """ if min_val > max_val: - raise ValueError( - "Lower bound for the minimum token count " - "is greater than the upper bound.") + raise ValueError("Lower bound for the minimum token count " + "is greater than the upper bound.") def bisect(min_val, max_val): """Bisection to find the right size.""" present_count = (max_val + min_val) // 2 tf.logging.info("Trying min_count %d" % present_count) subtokenizer = cls() - subtokenizer.build_from_token_counts( - token_counts, present_count, num_iterations) + subtokenizer.build_from_token_counts(token_counts, present_count, + num_iterations) # If min_val == max_val, we can't do any better than this. if subtokenizer.vocab_size == target_size or min_val >= max_val: @@ -498,7 +522,7 @@ def build_from_token_counts(self, # Consider the candidates longest to shortest, so that if we accept # a longer subtoken string, we can decrement the counts of its prefixes. new_subtoken_strings = [] - for lsub in xrange(len(len_to_subtoken_strings)-1, 0, -1): + for lsub in xrange(len(len_to_subtoken_strings) - 1, 0, -1): subtoken_strings = len_to_subtoken_strings[lsub] for subtoken_string in subtoken_strings: count = subtoken_counts[subtoken_string] @@ -511,8 +535,8 @@ def build_from_token_counts(self, subtoken_counts[subtoken_string[:l]] -= count # Include the alphabet explicitly to guarantee all strings are encodable. - new_subtoken_strings.extend( - (subtoken_counts.get(a, 0), a) for a in self._alphabet) + new_subtoken_strings.extend((subtoken_counts.get(a, 0), a) + for a in self._alphabet) new_subtoken_strings.sort(reverse=True) # Reinitialize to the candidate vocabulary. @@ -535,7 +559,9 @@ def _init_subtokens_from_list(self, subtoken_strings, reserved=0): # check arbitrarily long strings. self._max_subtoken_len = max([len(s) for s in subtoken_strings]) self._subtoken_string_to_id = { - s: i+reserved for i, s in enumerate(subtoken_strings) if s} + s: i + reserved + for i, s in enumerate(subtoken_strings) if s + } def _init_alphabet_from_tokens(self, tokens): """Initialize alphabet from an iterable of token or subtoken strings.""" From 34a961f0d4f9fa38d8dddc9df1d3366b1d7703cf Mon Sep 17 00:00:00 2001 From: T2T Team <no-reply@google.com> Date: Thu, 3 Aug 2017 14:56:08 -0700 Subject: [PATCH 0222/4095] Add desc2code problem (From the OpenAI Description2Code dataset). PiperOrigin-RevId: 164187018 --- tensor2tensor/data_generators/all_problems.py | 1 + tensor2tensor/data_generators/desc2code.py | 246 ++++++++++++++++++ tensor2tensor/data_generators/problem.py | 2 + 3 files changed, 249 insertions(+) create mode 100644 tensor2tensor/data_generators/desc2code.py diff --git a/tensor2tensor/data_generators/all_problems.py b/tensor2tensor/data_generators/all_problems.py index 9be133a61..af2030d89 100644 --- a/tensor2tensor/data_generators/all_problems.py +++ b/tensor2tensor/data_generators/all_problems.py @@ -22,6 +22,7 @@ from tensor2tensor.data_generators import algorithmic from tensor2tensor.data_generators import algorithmic_math from tensor2tensor.data_generators import audio +from tensor2tensor.data_generators import desc2code from tensor2tensor.data_generators import image from tensor2tensor.data_generators import lm1b from tensor2tensor.data_generators import ptb diff --git a/tensor2tensor/data_generators/desc2code.py b/tensor2tensor/data_generators/desc2code.py new file mode 100644 index 000000000..52513e63c --- /dev/null +++ b/tensor2tensor/data_generators/desc2code.py @@ -0,0 +1,246 @@ +# coding=utf-8 +# Copyright 2017 The Tensor2Tensor Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Data generators for the Description2Code OpenAI data-set.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import collections +import os +import random +import zipfile + +# Dependency imports + +from tensor2tensor.data_generators import generator_utils +from tensor2tensor.data_generators import problem +from tensor2tensor.data_generators import text_encoder +from tensor2tensor.utils import registry + +import tensorflow as tf + + +# End-of-sentence marker. +EOS = text_encoder.EOS_ID + +_DATASET_URL = "https://drive.google.com/uc?export=download&id=0Bz3fihKG133ceWNFQTQ5S0xhZUk" +_DATASET_FILENAME = "description2code_current.zip" +_DATASET_PB_PATH = "description2code_current/" + +_DESC_DIR_NAME = "description" +_CODE_PY_DIR_NAME = "solutions_python" + +_VOCAB_EN_FILENAME = "vocab_desc2code_tok_en" +_VOCAB_PY_FILENAME = "vocab_desc2code_tok_py" + +# Struct containing a coding problem (contains the paths to the descriptions +# and code files) +CodingPbInfo = collections.namedtuple("CodingPbInfo", "desc_file, code_files") + + +class Desc2CodeProblem(problem.Text2TextProblem): + """Base class for Description2Code problems.""" + + @property + def is_character_level(self): + return False + + @property + def num_shards(self): + return 100 + + @property + def use_subword_tokenizer(self): + return True + + +@registry.register_problem("desc2code_py") +class Desc2CodePyProblem(Desc2CodeProblem): + """Description2Code for python problem.""" + + @property + def targeted_vocab_size(self): + return 2**13 # 8192 + + @property + def input_space_id(self): + return problem.SpaceID.EN_TOK + + @property + def target_space_id(self): + return problem.SpaceID.PY_TOK + + @property + def vocab_input_filename(self): + return "{}.{}".format(_VOCAB_EN_FILENAME, self.targeted_vocab_size) + + @property + def vocab_target_filename(self): + return "{}.{}".format(_VOCAB_PY_FILENAME, self.targeted_vocab_size) + + def train_generator(self, data_dir, tmp_dir, train): + # Called twice: for train and test + + # Get the list of the training samples (coding challenge samples) + samples = list(generator_samples(tmp_dir)) + + # Split between train and dev + # Suffle to get problems from diverse sources (CodeChef and CodeForces) and + # dificulties in each set. + # Need to sort the samples first before shuffling (as walk() isn't + # deterministic) + samples.sort(key=lambda x: x.desc_file) # in-place + rng = random.Random(7531) # Local fixed seed + rng.shuffle(samples) # in-place + + # Train: 5019/5228 problems + # Dev: 209/5228 problems + len_samples = len(samples) + split = len_samples // 25 + samples = samples[split:] if train else samples[:split] + tf.logging.info("Number of samples for {}: {}/{}".format( + "train" if train else "dev", + len(samples), + len_samples + )) + + def generator_samples_content(get_source, get_target): + source, target = None, None + # Iterate over the coding samples + for sample in samples: + if get_source: + with tf.gfile.GFile(sample.desc_file, mode="r") as source_file: + source = source_file.read() + + if get_target: + # Each challenge can have multiple implementations (or none) + for code_file in sample.code_files: + with tf.gfile.GFile(code_file, mode="r") as target_file: + target = target_file.read() + yield source, target + elif sample.code_files: # Only take the source if a target exists + yield source, target + + def generator_source(): + for source, _ in generator_samples_content(True, False): + yield source.strip() + + def generator_target(): + for _, target in generator_samples_content(False, True): + yield target.strip() + + # Generate vocab for both source and target + + source_vocab = generator_utils.get_or_generate_vocab_inner( + data_dir=data_dir, + vocab_filename=self.vocab_input_filename, + vocab_size=self.targeted_vocab_size, + generator_fn=generator_source, + ) + + target_vocab = generator_utils.get_or_generate_vocab_inner( + data_dir=data_dir, + vocab_filename=self.vocab_target_filename, + vocab_size=self.targeted_vocab_size, + generator_fn=generator_target, + ) + + # Yield the training and testing samples + eos_list = [EOS] + for source, target in generator_samples_content(True, True): + source_ints = source_vocab.encode(source.strip()) + eos_list + target_ints = target_vocab.encode(target.strip()) + eos_list + yield { + "inputs": source_ints, + "targets": target_ints, + } + + def feature_encoders(self, data_dir): + source_vocab_filename = os.path.join(data_dir, self.vocab_input_filename) + target_vocab_filename = os.path.join(data_dir, self.vocab_target_filename) + source_token = text_encoder.SubwordTextEncoder(source_vocab_filename) + target_token = text_encoder.SubwordTextEncoder(target_vocab_filename) + return { + "inputs": source_token, + "targets": target_token, + } + + +# Utils functions + + +def generator_samples(tmp_dir): + """Generator for the dataset samples. + + If not present, download and extract the dataset. + + Args: + tmp_dir: path to the directory where to download the dataset. + + Yields: + A CodingPbInfo object containing the next challenge informations. + """ + # Step1: Download dataset (eventually) + data_zip_path = generator_utils.maybe_download_from_drive( + directory=tmp_dir, + filename=_DATASET_FILENAME, + url=_DATASET_URL, + ) + tf.logging.info("Data downloaded in: {}".format(data_zip_path)) + + # Step2: Extract dataset + # We could deduce _DATASET_PB_PATH from the zip file (instead of + # hardcoded path) + data_rootdir = os.path.join(tmp_dir, _DATASET_PB_PATH) + if not tf.gfile.Exists(data_rootdir): + with zipfile.ZipFile(data_zip_path, "r") as corpus_zip: + corpus_zip.extractall(tmp_dir) + # We could remove the extracted __MACOSX folder + tf.logging.info("Data extracted in: {}".format(tmp_dir)) + else: + tf.logging.info("Data already extracted in: {}".format(tmp_dir)) + + # Step3: Extract the problems list on the extracted folder + def contains_samples(subdir, dirs, files): # pylint: disable=unused-argument + """Check that the folder contains a problem.""" + return ( + _DESC_DIR_NAME in dirs and + _CODE_PY_DIR_NAME in dirs + ) + + def next_sample(subdir, dirs, files): # pylint: disable=unused-argument + """Return the filenames of the problem.""" + # More could be extracted (like the expected inputs/outputs + # pairs, the problem difficulty, the names of the algorithmic techniques + # needed) + desc_file = os.path.join(subdir, _DESC_DIR_NAME, "description.txt") + code_rootdir = os.path.join(subdir, _CODE_PY_DIR_NAME) + code_files = [ + f for f in tf.gfile.Glob(os.path.join(code_rootdir, "*.txt")) + ] + return CodingPbInfo( + desc_file=desc_file, + code_files=code_files + ) + + # The dataset contains problem from two different sources (CodeChef + # and CodeForces). Due to the limited number of samples, all problems from + # both sources are merged + for w in tf.gfile.Walk(data_rootdir): + if contains_samples(*w): + yield next_sample(*w) + diff --git a/tensor2tensor/data_generators/problem.py b/tensor2tensor/data_generators/problem.py index 72334b76d..fb7e53cb7 100644 --- a/tensor2tensor/data_generators/problem.py +++ b/tensor2tensor/data_generators/problem.py @@ -86,6 +86,8 @@ class SpaceID(object): IMAGE = 25 # Peptide PEPTIDE = 26 + # Python + PY_TOK = 27 class Problem(object): From 95ee9e5b2e979c22ed81bf78dd62f7a6cb42de84 Mon Sep 17 00:00:00 2001 From: Noam Shazeer <noam@google.com> Date: Thu, 3 Aug 2017 15:20:06 -0700 Subject: [PATCH 0223/4095] added transformer_moe - a transformer model with mixtures-of-experts. PiperOrigin-RevId: 164190826 --- tensor2tensor/models/models.py | 1 + tensor2tensor/models/transformer_moe.py | 216 ++++++++++++++++++++++++ 2 files changed, 217 insertions(+) create mode 100644 tensor2tensor/models/transformer_moe.py diff --git a/tensor2tensor/models/models.py b/tensor2tensor/models/models.py index c2a904888..963975780 100644 --- a/tensor2tensor/models/models.py +++ b/tensor2tensor/models/models.py @@ -37,5 +37,6 @@ from tensor2tensor.models import slicenet from tensor2tensor.models import transformer from tensor2tensor.models import transformer_alternative +from tensor2tensor.models import transformer_moe from tensor2tensor.models import xception # pylint: enable=unused-import diff --git a/tensor2tensor/models/transformer_moe.py b/tensor2tensor/models/transformer_moe.py new file mode 100644 index 000000000..8072f2cf8 --- /dev/null +++ b/tensor2tensor/models/transformer_moe.py @@ -0,0 +1,216 @@ +# coding=utf-8 +# Copyright 2017 The Tensor2Tensor Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""transformer (attention seq-seq model) with mixtures of experts. + +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +# Dependency imports + +from six.moves import xrange # pylint: disable=redefined-builtin + +from tensor2tensor.layers import common_attention +from tensor2tensor.layers import common_hparams +from tensor2tensor.layers import common_layers +from tensor2tensor.models import transformer +from tensor2tensor.utils import registry +from tensor2tensor.utils import t2t_model + +import tensorflow as tf + + +@registry.register_model +class TransformerMoe(t2t_model.T2TModel): + """Attention net. See file docstring.""" + + def model_fn_body_sharded(self, sharded_features): + hparams = self._hparams + dp = self._data_parallelism + targets = sharded_features["targets"] + inputs = sharded_features["inputs"] + target_space = sharded_features["target_space_id"] + + inputs = dp(common_layers.flatten4d3d, inputs) + targets = dp(common_layers.flatten4d3d, targets) + + (encoder_input, encoder_self_attention_bias, + encoder_decoder_attention_bias) = dp( + transformer.transformer_prepare_encoder, + inputs, target_space, hparams) + (decoder_input, decoder_self_attention_bias) = dp( + transformer.transformer_prepare_decoder, targets, hparams) + residual_fn = transformer.get_residual_fn(hparams) + encoder_input = dp(tf.nn.dropout, encoder_input, + 1.0 - hparams.residual_dropout) + decoder_input = dp(tf.nn.dropout, decoder_input, + 1.0 - hparams.residual_dropout) + extra_loss = 0 + x = encoder_input + for layer in xrange(hparams.num_hidden_layers): + with tf.variable_scope("encoder_layer_%d" % layer): + with tf.variable_scope("encoder_self_attention"): + y = dp( + common_attention.multihead_attention, + x, + None, + encoder_self_attention_bias, + hparams.attention_key_channels or hparams.hidden_size, + hparams.attention_value_channels or hparams.hidden_size, + hparams.hidden_size, + hparams.num_heads, + hparams.attention_dropout) + x = dp(residual_fn, x, y) + with tf.variable_scope("ffn"): + if str(layer) in hparams.moe_layers_encoder.split(","): + y, loss = common_layers.moe_layer( + dp, self._ps_devices, x, + hparams.mode == tf.contrib.learn.ModeKeys.TRAIN, + hparams.hidden_size, hparams.moe_hidden_size, hparams.moe_n1, + hparams.moe_n2, hparams.moe_loss_coef) + extra_loss += loss + else: + y = dp( + common_layers.conv_hidden_relu, + x, + hparams.filter_size, + hparams.hidden_size, + dropout=hparams.relu_dropout) + x = dp(residual_fn, x, y) + encoder_output = x + x = decoder_input + for layer in xrange(hparams.num_hidden_layers): + with tf.variable_scope("decoder_layer_%d" % layer): + with tf.variable_scope("decoder_self_attention"): + y = dp( + common_attention.multihead_attention, + x, + None, + decoder_self_attention_bias, + hparams.attention_key_channels or hparams.hidden_size, + hparams.attention_value_channels or hparams.hidden_size, + hparams.hidden_size, + hparams.num_heads, + hparams.attention_dropout) + x = dp(residual_fn, x, y) + with tf.variable_scope("encoder_decoder_attention"): + y = dp( + common_attention.multihead_attention, + x, + encoder_output, + encoder_decoder_attention_bias, + hparams.attention_key_channels or hparams.hidden_size, + hparams.attention_value_channels or hparams.hidden_size, + hparams.hidden_size, + hparams.num_heads, + hparams.attention_dropout) + x = dp(residual_fn, x, y) + with tf.variable_scope("ffn"): + if str(layer) in hparams.moe_layers_decoder.split(","): + y, loss = common_layers.moe_layer( + dp, self._ps_devices, x, + hparams.mode == tf.contrib.learn.ModeKeys.TRAIN, + hparams.hidden_size, hparams.moe_hidden_size, hparams.moe_n1, + hparams.moe_n2, hparams.moe_loss_coef) + extra_loss += loss + else: + y = dp( + common_layers.conv_hidden_relu, + x, + hparams.filter_size, + hparams.hidden_size, + dropout=hparams.relu_dropout) + x = dp(residual_fn, x, y) + decoder_output = dp(tf.expand_dims, x, 2) + return decoder_output, extra_loss + + +@registry.register_hparams +def transformer_moe_base(): + """Set of hyperparameters.""" + hparams = common_hparams.basic_params1() + hparams.norm_type = "layer" + hparams.hidden_size = 512 + hparams.batch_size = 4096 + hparams.max_length = 2001 + hparams.max_input_seq_length = 2000 + hparams.max_target_seq_length = 2000 + hparams.dropout = 0.0 + hparams.clip_grad_norm = 0. # i.e. no gradient clipping + hparams.optimizer_adam_epsilon = 1e-9 + hparams.learning_rate_decay_scheme = "noam" + hparams.learning_rate = 0.1 + hparams.learning_rate_warmup_steps = 4000 + hparams.initializer_gain = 1.0 + hparams.num_hidden_layers = 5 + hparams.initializer = "uniform_unit_scaling" + hparams.weight_decay = 0.0 + hparams.optimizer_adam_beta1 = 0.9 + hparams.optimizer_adam_beta2 = 0.98 + hparams.num_sampled_classes = 0 + hparams.label_smoothing = 0.0 + hparams.shared_embedding_and_softmax_weights = int(True) + + hparams.add_hparam("filter_size", 2048) # Add new ones like this. + # attention-related flags + hparams.add_hparam("num_heads", 8) + hparams.add_hparam("attention_key_channels", 0) + hparams.add_hparam("attention_value_channels", 0) + hparams.add_hparam("ffn_layer", "conv_hidden_relu") + hparams.add_hparam("parameter_attention_key_channels", 0) + hparams.add_hparam("parameter_attention_value_channels", 0) + # All hyperparameters ending in "dropout" are automatically set to 0.0 + # when not in training mode. + hparams.add_hparam("attention_dropout", 0.0) + hparams.add_hparam("relu_dropout", 0.0) + hparams.add_hparam("residual_dropout", 0.1) + hparams.add_hparam("pos", "timing") # timing, none + hparams.add_hparam("nbr_decoder_problems", 1) + hparams.add_hparam("proximity_bias", int(False)) + # FLAGS RELATED TO MIXTURE-OF-EXPERTS + # comma-separated list of layer numbers. + # At each of these layers, we replace the ffn with a mixture of experts. + hparams.add_hparam("moe_layers_encoder", "2") + hparams.add_hparam("moe_layers_decoder", "2") + # If moe_n2 is None, then use a flat MoE with moe_n1 experts. + # If moe_n2 is an integer, then use a hierarchical MoE + # consisting of moe_n1 groups of moe_n2 experts each. + hparams.add_hparam("moe_n1", 32) + hparams.add_hparam("moe_n2", 0) + hparams.add_hparam("moe_hidden_size", 2048) + hparams.add_hparam("moe_loss_coef", 1e-2) + return hparams + + +@registry.register_hparams +def transformer_no_moe(): + """Without the mixture of experts (for comparison).""" + hparams = transformer_moe_base() + hparams.moe_layers_encoder = "" + hparams.moe_layers_decoder = "" + return hparams + + +@registry.register_hparams +def transformer_moe_1b(): + """1-billion parameter model - requires multi-gpu sync training.""" + hparams = transformer_moe_base() + hparams.moe_n1 = 128 + hparams.moe_layers_encoder = "1,3" + hparams.moe_layers_decoder = "1,3" + return hparams From 554973f1d4d8b93b466ec1b428a58e3359356519 Mon Sep 17 00:00:00 2001 From: Alexander Ku <alexku@google.com> Date: Thu, 3 Aug 2017 16:42:53 -0700 Subject: [PATCH 0224/4095] Adding a minimum viable DNA data encoder. PiperOrigin-RevId: 164201984 --- tensor2tensor/data_generators/dna_encoder.py | 124 ++++++++++++++++++ .../data_generators/dna_encoder_test.py | 52 ++++++++ .../data_generators/gene_expression.py | 68 +--------- .../data_generators/gene_expression_test.py | 5 +- 4 files changed, 183 insertions(+), 66 deletions(-) create mode 100644 tensor2tensor/data_generators/dna_encoder.py create mode 100644 tensor2tensor/data_generators/dna_encoder_test.py diff --git a/tensor2tensor/data_generators/dna_encoder.py b/tensor2tensor/data_generators/dna_encoder.py new file mode 100644 index 000000000..0f6a8d68f --- /dev/null +++ b/tensor2tensor/data_generators/dna_encoder.py @@ -0,0 +1,124 @@ +# coding=utf-8 +# Copyright 2017 The Tensor2Tensor Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Encoders for DNA data. + +* DNAEncoder: ACTG strings to ints and back +* DelimitedDNAEncoder: for delimited subsequences +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import itertools +# Dependency imports + +from six.moves import xrange # pylint: disable=redefined-builtin +from tensor2tensor.data_generators import text_encoder + + +class DNAEncoder(text_encoder.TextEncoder): + """ACTG strings to ints and back. Optionally chunks bases into single ids. + + To use a different character set, subclass and set BASES to the char set. UNK + and PAD must not appear in the char set, but can also be reset. + + Uses 'N' as an unknown base. + """ + BASES = list("ACTG") + UNK = "N" + PAD = "0" + + def __init__(self, + chunk_size=1, + num_reserved_ids=text_encoder.NUM_RESERVED_TOKENS): + super(DNAEncoder, self).__init__(num_reserved_ids=num_reserved_ids) + # Build a vocabulary of chunks of size chunk_size + self._chunk_size = chunk_size + tokens = self._tokens() + tokens.sort() + ids = range(self._num_reserved_ids, len(tokens) + self._num_reserved_ids) + self._ids_to_tokens = dict(zip(ids, tokens)) + self._tokens_to_ids = dict(zip(tokens, ids)) + + def _tokens(self): + chunks = [] + for size in range(1, self._chunk_size + 1): + c = itertools.product(self.BASES + [self.UNK], repeat=size) + num_pad = self._chunk_size - size + padding = (self.PAD,) * num_pad + c = [el + padding for el in c] + chunks.extend(c) + return chunks + + @property + def vocab_size(self): + return len(self._ids_to_tokens) + self._num_reserved_ids + + def encode(self, s): + bases = list(s) + extra = len(bases) % self._chunk_size + if extra > 0: + pad = [self.PAD] * (self._chunk_size - extra) + bases.extend(pad) + assert (len(bases) % self._chunk_size) == 0 + num_chunks = len(bases) // self._chunk_size + ids = [] + for chunk_idx in xrange(num_chunks): + start_idx = chunk_idx * self._chunk_size + end_idx = start_idx + self._chunk_size + chunk = tuple(bases[start_idx:end_idx]) + if chunk not in self._tokens_to_ids: + raise ValueError("Unrecognized token %s" % chunk) + ids.append(self._tokens_to_ids[chunk]) + return ids + + def decode(self, ids): + bases = [] + for idx in ids: + if idx >= self._num_reserved_ids: + chunk = self._ids_to_tokens[idx] + if self.PAD in chunk: + chunk = chunk[:chunk.index(self.PAD)] + else: + chunk = [text_encoder.RESERVED_TOKENS[idx]] + bases.extend(chunk) + return "".join(bases) + + +class DelimitedDNAEncoder(DNAEncoder): + """DNAEncoder for delimiter separated subsequences. + + Uses ',' as default delimiter. + """ + + def __init__(self, delimiter=",", **kwargs): + self._delimiter = delimiter + super(DelimitedDNAEncoder, self).__init__(**kwargs) + + @property + def delimiter(self): + return self._delimiter + + def _tokens(self): + return super(DelimitedDNAEncoder, self)._tokens() + [self.delimiter] + + def encode(self, delimited_string): + ids = [] + for s in delimited_string.split(self.delimiter): + ids.extend(super(DelimitedDNAEncoder, self).encode(s)) + ids.append(self._tokens_to_ids[self.delimiter]) + return ids[:-1] diff --git a/tensor2tensor/data_generators/dna_encoder_test.py b/tensor2tensor/data_generators/dna_encoder_test.py new file mode 100644 index 000000000..a84f06442 --- /dev/null +++ b/tensor2tensor/data_generators/dna_encoder_test.py @@ -0,0 +1,52 @@ +# coding=utf-8 +# Copyright 2017 The Tensor2Tensor Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for tensor2tensor.data_generators.dna_encoder.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +# Dependency imports + +from tensor2tensor.data_generators import dna_encoder +import tensorflow as tf + + +class DnaEncoderTest(tf.test.TestCase): + + def test_encode_decode(self): + original = 'TTCGCGGNNNAACCCAACGCCATCTATGTANNTTGAGTTGTTGAGTTAAA' + + # Encoding should be reversible for any reasonable chunk size. + for chunk_size in [1, 2, 4, 6, 8]: + encoder = dna_encoder.DNAEncoder(chunk_size=chunk_size) + encoded = encoder.encode(original) + decoded = encoder.decode(encoded) + self.assertEqual(original, decoded) + + def test_delimited_dna_encoder(self): + original = 'TTCGCGGNNN,AACCCAACGC,CATCTATGTA,NNTTGAGTTG,TTGAGTTAAA' + + # Encoding should be reversible for any reasonable chunk size. + for chunk_size in [1, 2, 4, 6, 8]: + encoder = dna_encoder.DelimitedDNAEncoder(chunk_size=chunk_size) + encoded = encoder.encode(original) + decoded = encoder.decode(encoded) + self.assertEqual(original, decoded) + + +if __name__ == '__main__': + tf.test.main() diff --git a/tensor2tensor/data_generators/gene_expression.py b/tensor2tensor/data_generators/gene_expression.py index 82c15414a..d314cec59 100644 --- a/tensor2tensor/data_generators/gene_expression.py +++ b/tensor2tensor/data_generators/gene_expression.py @@ -35,7 +35,6 @@ from __future__ import division from __future__ import print_function -import itertools import math import multiprocessing as mp import os @@ -47,6 +46,7 @@ from six.moves import xrange # pylint: disable=redefined-builtin +from tensor2tensor.data_generators import dna_encoder from tensor2tensor.data_generators import generator_utils from tensor2tensor.data_generators import problem from tensor2tensor.data_generators import text_encoder @@ -56,7 +56,6 @@ import tensorflow as tf MAX_CONCURRENT_PROCESSES = 10 -_bases = list("ACTG") class GeneExpressionProblem(problem.Problem): @@ -82,7 +81,7 @@ def chunk_size(self): def feature_encoders(self, data_dir): del data_dir return { - "inputs": DNAEncoder(chunk_size=self.chunk_size), + "inputs": dna_encoder.DNAEncoder(chunk_size=self.chunk_size), # TODO(rsepassi): RealEncoder? "targets": text_encoder.TextEncoder() } @@ -244,7 +243,7 @@ def dataset_generator(filepath, chunk_size=1, start_idx=None, end_idx=None): - encoder = DNAEncoder(chunk_size=chunk_size) + encoder = dna_encoder.DNAEncoder(chunk_size=chunk_size) with h5py.File(filepath, "r") as h5_file: # Get input keys from h5_file src_keys = [s % dataset for s in ["%s_in", "%s_na", "%s_out"]] @@ -278,7 +277,7 @@ def to_example_dict(encoder, inputs, mask, outputs): while idx != last_idx + 1: bases.append(encoder.UNK) last_idx += 1 - bases.append(_bases[base_id]) + bases.append(encoder.BASES[base_id]) last_idx = idx assert len(inputs) == len(bases) @@ -297,62 +296,3 @@ def to_example_dict(encoder, inputs, mask, outputs): ex_dict = dict( zip(example_keys, [input_ids, targets_mask, targets, targets_shape])) return ex_dict - - -class DNAEncoder(text_encoder.TextEncoder): - """ACTG strings to ints and back. Optionally chunks bases into single ids. - - Uses 'X' as an unknown base. - """ - UNK = "X" - PAD = "0" - - def __init__(self, - chunk_size=1, - num_reserved_ids=text_encoder.NUM_RESERVED_TOKENS): - super(DNAEncoder, self).__init__(num_reserved_ids=num_reserved_ids) - # Build a vocabulary of chunks of size chunk_size - self._chunk_size = chunk_size - chunks = [] - for size in range(1, chunk_size + 1): - c = itertools.product(_bases + [DNAEncoder.UNK], repeat=size) - num_pad = chunk_size - size - padding = (DNAEncoder.PAD,) * num_pad - c = [el + padding for el in c] - chunks.extend(c) - chunks.sort() - ids = range(self._num_reserved_ids, len(chunks) + self._num_reserved_ids) - self._ids_to_chunk = dict(zip(ids, chunks)) - self._chunks_to_ids = dict(zip(chunks, ids)) - - @property - def vocab_size(self): - return len(self._ids_to_chunk) + self._num_reserved_ids - - def encode(self, s): - bases = list(s) - pad = [DNAEncoder.PAD] * (len(bases) % self._chunk_size) - bases.extend(pad) - assert (len(bases) % self._chunk_size) == 0 - num_chunks = len(bases) // self._chunk_size - ids = [] - for chunk_idx in xrange(num_chunks): - start_idx = chunk_idx * self._chunk_size - end_idx = start_idx + self._chunk_size - chunk = tuple(bases[start_idx:end_idx]) - if chunk not in self._chunks_to_ids: - raise ValueError("Unrecognized chunk %s" % chunk) - ids.append(self._chunks_to_ids[chunk]) - return ids - - def decode(self, ids): - bases = [] - for idx in ids: - if idx >= self._num_reserved_ids: - chunk = self._ids_to_chunk[idx] - if DNAEncoder.PAD in chunk: - chunk = chunk[:chunk.index(DNAEncoder.PAD)] - else: - chunk = [text_encoder.RESERVED_TOKENS[idx]] - bases.extend(chunk) - return "".join(bases) diff --git a/tensor2tensor/data_generators/gene_expression_test.py b/tensor2tensor/data_generators/gene_expression_test.py index 2d7bbe832..797170070 100644 --- a/tensor2tensor/data_generators/gene_expression_test.py +++ b/tensor2tensor/data_generators/gene_expression_test.py @@ -22,6 +22,7 @@ import numpy as np +from tensor2tensor.data_generators import dna_encoder from tensor2tensor.data_generators import gene_expression import tensorflow as tf @@ -40,8 +41,8 @@ def _oneHotBases(self, bases): return np.array(one_hots) def testRecordToExample(self): - encoder = gene_expression.DNAEncoder(chunk_size=2) - raw_inputs = ["A", "C", "G", "X", "C", "T"] + encoder = dna_encoder.DNAEncoder(chunk_size=2) + raw_inputs = ["A", "C", "G", "N", "C", "T"] # Put in numpy arrays in the same format as in the h5 file inputs = self._oneHotBases(raw_inputs) From 7efdbeebe777dfcbf005e335c620db4f810ecd16 Mon Sep 17 00:00:00 2001 From: Lukasz Kaiser <lukaszkaiser@google.com> Date: Thu, 3 Aug 2017 17:26:41 -0700 Subject: [PATCH 0225/4095] Small transformer models (reasonable translations in 1h on 1080). PiperOrigin-RevId: 164207044 --- tensor2tensor/data_generators/all_problems.py | 1 + tensor2tensor/data_generators/cipher.py | 251 ++++++++++++++++++ tensor2tensor/layers/common_layers.py | 7 + tensor2tensor/models/transformer.py | 15 +- 4 files changed, 272 insertions(+), 2 deletions(-) create mode 100644 tensor2tensor/data_generators/cipher.py diff --git a/tensor2tensor/data_generators/all_problems.py b/tensor2tensor/data_generators/all_problems.py index af2030d89..ca6dccfda 100644 --- a/tensor2tensor/data_generators/all_problems.py +++ b/tensor2tensor/data_generators/all_problems.py @@ -22,6 +22,7 @@ from tensor2tensor.data_generators import algorithmic from tensor2tensor.data_generators import algorithmic_math from tensor2tensor.data_generators import audio +from tensor2tensor.data_generators import cipher from tensor2tensor.data_generators import desc2code from tensor2tensor.data_generators import image from tensor2tensor.data_generators import lm1b diff --git a/tensor2tensor/data_generators/cipher.py b/tensor2tensor/data_generators/cipher.py new file mode 100644 index 000000000..3a743337a --- /dev/null +++ b/tensor2tensor/data_generators/cipher.py @@ -0,0 +1,251 @@ +# coding=utf-8 +# Copyright 2017 The Tensor2Tensor Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Cipher data generators.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from collections import deque + +# Dependency imports + +import numpy as np + +from tensor2tensor.data_generators import algorithmic +from tensor2tensor.utils import registry + + +@registry.register_problem +class CipherShift5(algorithmic.AlgorithmicProblem): + """Shift cipher.""" + + @property + def num_symbols(self): + return 5 + + @property + def distribution(self): + return [0.4, 0.3, 0.2, 0.08, 0.02] + + @property + def shift(self): + return 1 + + @property + def train_generator(self): + """Generator; takes 3 args: nbr_symbols, max_length, nbr_cases.""" + + def _gen(nbr_symbols, max_length, nbr_cases): + plain_vocab = range(nbr_symbols) + indices = generate_plaintext_random(plain_vocab, self.distribution, + nbr_cases, max_length) + codes = encipher_shift(indices, plain_vocab, self.shift) + + for plain, code in zip(indices, codes): + yield { + "X": plain, + "Y": code, + } + + return _gen + + @property + def train_length(self): + return 100 + + @property + def dev_length(self): + return self.train_length + + +@registry.register_problem +class CipherVigenere5(algorithmic.AlgorithmicProblem): + """Vinegre cipher.""" + + @property + def num_symbols(self): + return 5 + + @property + def distribution(self): + return [0.4, 0.3, 0.2, 0.08, 0.02] + + @property + def key(self): + return [1, 3] + + @property + def train_generator(self): + """Generator; takes 3 args: nbr_symbols, max_length, nbr_cases.""" + + def _gen(nbr_symbols, max_length, nbr_cases): + plain_vocab = range(nbr_symbols) + indices = generate_plaintext_random(plain_vocab, self.distribution, + nbr_cases, max_length) + codes = encipher_vigenere(indices, plain_vocab, self.key) + + for plain, code in zip(indices, codes): + yield { + "X": plain, + "Y": code, + } + + return _gen + + @property + def train_length(self): + return 200 + + @property + def dev_length(self): + return self.train_length + + +@registry.register_problem +class CipherShift200(CipherShift5): + """Shift cipher.""" + + @property + def num_symbols(self): + return 200 + + @property + def distribution(self): + vals = range(self.num_symbols) + val_sum = sum(vals) + return [v / val_sum for v in vals] + + +@registry.register_problem +class CipherVigenere200(CipherVigenere5): + """Vinegre cipher.""" + + @property + def num_symbols(self): + return 200 + + @property + def distribution(self): + vals = range(self.num_symbols) + val_sum = sum(vals) + return [v / val_sum for v in vals] + + @property + def key(self): + return [1, 3] + + +class Layer(object): + """A single layer for shift.""" + + def __init__(self, vocab, shift): + """Initialize shift layer. + + Args: + vocab: (list of String) the vocabulary + shift: (Integer) the amount of shift apply to the alphabet. + Positive number implies shift to the right, negative number + implies shift to the left. + """ + self.shift = shift + alphabet = vocab + shifted_alphabet = deque(alphabet) + shifted_alphabet.rotate(shift) + self.encrypt = dict(zip(alphabet, list(shifted_alphabet))) + self.decrypt = dict(zip(list(shifted_alphabet), alphabet)) + + def encrypt_character(self, character): + return self.encrypt[character] + + def decrypt_character(self, character): + return self.decrypt[character] + + +def generate_plaintext_random(plain_vocab, distribution, train_samples, + length): + """Generates samples of text from the provided vocabulary. + + Args: + plain_vocab: vocabulary. + distribution: distribution. + train_samples: samples for training. + length: length. + + Returns: + train_indices (np.array of Integers): random integers for training. + shape = [num_samples, length] + test_indices (np.array of Integers): random integers for testing. + shape = [num_samples, length] + plain_vocab (list of Integers): unique vocabularies. + """ + if distribution is not None: + assert len(distribution) == len(plain_vocab) + + train_indices = np.random.choice( + range(len(plain_vocab)), (train_samples, length), p=distribution) + + return train_indices + + +def encipher_shift(plaintext, plain_vocab, shift): + """Encrypt plain text with a single shift layer. + + Args: + plaintext (list of list of Strings): a list of plain text to encrypt. + plain_vocab (list of Integer): unique vocabularies being used. + shift (Integer): number of shift, shift to the right if shift is positive. + Returns: + ciphertext (list of Strings): encrypted plain text. + """ + ciphertext = [] + cipher = Layer(plain_vocab, shift) + + for _, sentence in enumerate(plaintext): + cipher_sentence = [] + for _, character in enumerate(sentence): + encrypted_char = cipher.encrypt_character(character) + cipher_sentence.append(encrypted_char) + ciphertext.append(cipher_sentence) + + return ciphertext + + +def encipher_vigenere(plaintext, plain_vocab, key): + """Encrypt plain text with given key. + + Args: + plaintext (list of list of Strings): a list of plain text to encrypt. + plain_vocab (list of Integer): unique vocabularies being used. + key (list of Integer): key to encrypt cipher using Vigenere table. + + Returns: + ciphertext (list of Strings): encrypted plain text. + """ + ciphertext = [] + # generate Vigenere table + layers = [] + for i in range(len(plain_vocab)): + layers.append(Layer(plain_vocab, i)) + + for i, sentence in enumerate(plaintext): + cipher_sentence = [] + for j, character in enumerate(sentence): + key_idx = key[j % len(key)] + encrypted_char = layers[key_idx].encrypt_character(character) + cipher_sentence.append(encrypted_char) + ciphertext.append(cipher_sentence) + + return ciphertext diff --git a/tensor2tensor/layers/common_layers.py b/tensor2tensor/layers/common_layers.py index 8a58cd065..ea18322e4 100644 --- a/tensor2tensor/layers/common_layers.py +++ b/tensor2tensor/layers/common_layers.py @@ -59,6 +59,13 @@ def inverse_exp_decay(max_step, min_value=0.01): return inv_base**tf.maximum(float(max_step) - step, 0.0) +def inverse_lin_decay(max_step, min_value=0.01): + """Inverse-decay linearly from 0.01 to 1.0 reached at max_step.""" + step = tf.to_float(tf.contrib.framework.get_global_step()) + progress = tf.minimum(step / float(max_step), 1.0) + return progress * (1.0 - min_value) + min_value + + def shakeshake2_py(x, y, equal=False, individual=False): """The shake-shake sum of 2 tensors, python version.""" if equal: diff --git a/tensor2tensor/models/transformer.py b/tensor2tensor/models/transformer.py index 1add44115..c9c87da07 100644 --- a/tensor2tensor/models/transformer.py +++ b/tensor2tensor/models/transformer.py @@ -386,8 +386,19 @@ def transformer_parsing_ice(): @registry.register_hparams def transformer_tiny(): hparams = transformer_base() - hparams.hidden_size = 64 - hparams.filter_size = 128 + hparams.num_hidden_layers = 2 + hparams.hidden_size = 128 + hparams.filter_size = 512 + hparams.num_heads = 4 + return hparams + + +@registry.register_hparams +def transformer_small(): + hparams = transformer_base() + hparams.num_hidden_layers = 2 + hparams.hidden_size = 256 + hparams.filter_size = 1024 hparams.num_heads = 4 return hparams From e8ae5894e40b8c18a37601762eefa51484bf4953 Mon Sep 17 00:00:00 2001 From: T2T Team <no-reply@google.com> Date: Fri, 4 Aug 2017 13:52:42 -0700 Subject: [PATCH 0226/4095] Support for dictionary losses in model_fn_body to be consistent with model_fn_body_sharded. Also updated inline doc. PiperOrigin-RevId: 164305140 --- tensor2tensor/utils/t2t_model.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/tensor2tensor/utils/t2t_model.py b/tensor2tensor/utils/t2t_model.py index 7cb484bc8..3af4f10c1 100644 --- a/tensor2tensor/utils/t2t_model.py +++ b/tensor2tensor/utils/t2t_model.py @@ -469,7 +469,10 @@ def model_fn_body_sharded(self, sharded_features): _with_timing(self.model_fn_body, "model_fn_body"), datashard_to_features) if isinstance(output, tuple): - loss = {"extra": tf.reduce_mean(output[1])} + if isinstance(output[1], dict): + loss = output[1] + else: + loss = {"extra": tf.reduce_mean(output[1])} output = output[0] else: loss = {"extra": 0.0} @@ -483,10 +486,12 @@ def model_fn_body(self, features): Args: features: A dictionary of key to Tensor. Each Tensor has shape - `[batch_size, ?, ?, hidden_size]`. + [batch_size, ?, ?, hidden_size]. Returns: - a `Tensor` of logits with shape `[batch_size, O, P, body_output_size]`. + output: tensor of logits with shape [batch_size, O, P, body_output_size. + losses: either single loss as a scalar, a list, a tensor (to be averaged) + or a dictionary of losses. """ raise NotImplementedError("Abstract Method") From a0bd0177bf766c953041b7451398ab1791adb1e5 Mon Sep 17 00:00:00 2001 From: Ashish Vaswani <avaswani@google.com> Date: Fri, 4 Aug 2017 14:41:57 -0700 Subject: [PATCH 0227/4095] Reverted back to the previous masked_local_attention_1d because the current one was giving 0 losses indicating that it was peeking into the future. The way the attention bias was being added also seemed wrong. Renamed unmasked_local_attention_1d to local_attention_1d. The user can specify local_attention_1d if they want to look left and right of the query block. PiperOrigin-RevId: 164312109 --- tensor2tensor/layers/common_attention.py | 132 ++++++++---------- tensor2tensor/layers/common_attention_test.py | 4 +- 2 files changed, 57 insertions(+), 79 deletions(-) diff --git a/tensor2tensor/layers/common_attention.py b/tensor2tensor/layers/common_attention.py index e343dba0a..a43afec47 100644 --- a/tensor2tensor/layers/common_attention.py +++ b/tensor2tensor/layers/common_attention.py @@ -361,122 +361,100 @@ def dot_product_attention(q, return tf.matmul(weights, v) -def masked_local_attention_1d(q, - k, - v, - block_length=128, - look_right=True, - use_whole_block=False, - name=None): - """Attention to the source position and a neigborhood around it. - - The sequence is divided into blocks of length block_size. Attention for a - given query position can only see memory positions within a certain number - of positions before and behind it. - - - If look_right is True then each query will attend to block_length//2 - positions either side, otherwise it will attend to block_length previous - positions. +def masked_local_attention_1d( + q, k, v, block_length=128, name=None): + """Attention to the source position and a neigborhood to the left of it. + + The sequence is divided into blocks of length block_size. + Attention for a given query position can only see memory positions + less than or equal to the query position, in the corresponding block + and the previous block. - If use_whole_block is True then no mask will be applied to the local blocks - meaning the full blocks are used (if look_right is True then the elements to - the right of the current position are still masked out). This allows to - attend to more elements without additional overhead, but means we have - inconsistent window positions and sizes. + If mask_right is True, then a target position cannot see greater source + positions. Args: - q: a Tensor with shape [batch, heads, length_q, depth_k] - k: a Tensor with shape [batch, heads, length_kv, depth_k] - v: a Tensor with shape [batch, heads, length_kv, depth_v] + q: a Tensor with shape [batch, heads, length, depth_k] + k: a Tensor with shape [batch, heads, length, depth_k] + v: a Tensor with shape [batch, heads, length, depth_v] block_length: an integer - look_right: a bool - use_whole_block: a bool name: an optional string Returns: a Tensor of shape [batch, heads, length, depth_v] """ - with tf.variable_scope( - name, default_name="local_attention_1d", values=[q, k, v]): + with tf.variable_scope(name, default_name="local_attention_1d", + values=[q, k, v]): v_shape = v.get_shape() batch = tf.shape(q)[0] heads = tf.shape(q)[1] length = tf.shape(q)[2] + # If (length < 2 * block_length), then we use only one block. + block_length = tf.where(tf.less(length, block_length * 2), + length, block_length) depth_k = tf.shape(q)[3] depth_v = tf.shape(v)[3] original_length = length - - # If (length < block_length), then we use only one block. - block_length = tf.where(tf.less(length, block_length), length, block_length) - # Pad to desired length. padding_size = tf.mod(-length, block_length) length += padding_size - num_blocks = tf.div(length, block_length) padding = [[0, 0], [0, 0], [0, padding_size], [0, 0]] q = tf.pad(q, padding) + k = tf.pad(k, padding) + v = tf.pad(v, padding) + num_blocks = tf.div(length, block_length) - if not look_right: - # Add extra padding so we son't have to do an initial query block. - extra_padding = [[0, 0], [0, 0], [block_length, padding_size], [0, 0]] - else: - # We shift everything over by half a block so query is in center. - pad_right = block_length // 2 - pad_left = block_length - pad_right - extra_padding = [[0, 0], [0, 0], [pad_left, padding_size + pad_right], - [0, 0]] - k = tf.pad(k, extra_padding) - v = tf.pad(v, extra_padding) - - # Reshape into blocks. + # compute attention for the first query block. + first_q = tf.slice(q, [0, 0, 0, 0], [-1, -1, block_length, -1]) + first_k = tf.slice(k, [0, 0, 0, 0], [-1, -1, block_length, -1]) + first_v = tf.slice(v, [0, 0, 0, 0], [-1, -1, block_length, -1]) + first_output = dot_product_attention( + first_q, first_k, first_v, attention_bias_lower_triangle(block_length), + name="fist_block") + + # compute attention for all subsequent query blocks. q = tf.reshape(q, [batch, heads, num_blocks, block_length, depth_k]) - k = tf.reshape(k, [batch, heads, num_blocks + 1, block_length, depth_k]) - v = tf.reshape(v, [batch, heads, num_blocks + 1, block_length, depth_v]) + k = tf.reshape(k, [batch, heads, num_blocks, block_length, depth_k]) + v = tf.reshape(v, [batch, heads, num_blocks, block_length, depth_v]) - # Get local blocks by slicing. def local(x): """Create a local version of the keys or values.""" - prev_block = tf.slice(x, [0, 0, 0, 0, 0], [-1, -1, num_blocks, -1, -1]) - cur_block = tf.slice(x, [0, 0, 1, 0, 0], [-1, -1, -1, -1, -1]) + prev_block = tf.slice( + x, [0, 0, 0, 0, 0], [-1, -1, num_blocks - 1, -1, -1]) + cur_block = tf.slice( + x, [0, 0, 1, 0, 0], [-1, -1, -1, -1, -1]) return tf.concat([prev_block, cur_block], 3) - local_k = local(k) local_v = local(v) - local_length = tf.shape(local_k)[3] + tail_q = tf.slice(q, [0, 0, 1, 0, 0], [-1, -1, -1, -1, -1]) - # [batch, heads, num_blocks, block_length, local_length] - attention = tf.matmul(q, local_k, transpose_b=True) - attention = tf.nn.softmax(attention) - - # Get local mask - if not use_whole_block: - good_part = tf.matrix_band_part( - tf.ones([block_length, local_length]), 0, tf.to_int64(block_length)) - elif not look_right: - good_part = tf.matrix_band_part( - tf.ones([block_length, local_length]), -1, tf.to_int64(block_length)) - else: - good_part = tf.ones([block_length, local_length]) + local_length = tf.shape(local_k)[3] - attention *= tf.reshape(good_part, [1, 1, 1, block_length, local_length]) + # [batch, heads, num_blocks - 1, block_length, local_length] + attention = tf.matmul(tail_q, local_k, transpose_b=True) + # make sure source_pos <= target_pos + good_part = tf.matrix_band_part( + tf.ones([block_length, local_length]), -1, tf.to_int64(block_length)) + mask = (1.0 - good_part) * -1e9 + attention += tf.reshape(mask, [1, 1, 1, block_length, local_length]) + attention = tf.nn.softmax(attention) # TODO(noam): figure out how to show a summary for the remaining blocks. # The naive way currently causes errors due to empty tensors. + # output: [batch, heads, num_blocks-1, block_length, depth_v] output = tf.matmul(attention, local_v) output = tf.reshape(output, [batch, heads, -1, depth_v]) - - # Remove added padding + output = tf.concat([first_output, output], axis=2) output = tf.slice(output, [0, 0, 0, 0], [-1, -1, original_length, -1]) output.set_shape(v_shape) return output -def unmasked_local_attention_1d(q, - k, - v, - block_length=128, - filter_width=100, - name=None): +def local_attention_1d(q, + k, + v, + block_length=128, + filter_width=100, + name=None): """strided block local self-attention. Args: @@ -644,7 +622,7 @@ def multihead_attention(query_antecedent, x = masked_local_attention_1d(q, k, v, block_length=block_length) else: assert attention_type == "local_unmasked" - x = unmasked_local_attention_1d( + x = local_attention_1d( q, k, v, block_length=block_length, filter_width=block_width) x = combine_heads(x) x = common_layers.conv1d(x, output_depth, 1, name="output_transform") diff --git a/tensor2tensor/layers/common_attention_test.py b/tensor2tensor/layers/common_attention_test.py index 61855b876..e846c2002 100644 --- a/tensor2tensor/layers/common_attention_test.py +++ b/tensor2tensor/layers/common_attention_test.py @@ -65,7 +65,7 @@ def testLocalUnmaskedAttention(self): x = np.random.rand(5, 4, 25, 16) y = np.random.rand(5, 4, 25, 16) with self.test_session() as session: - a = common_attention.unmasked_local_attention_1d( + a = common_attention.local_attention_1d( tf.constant(x, dtype=tf.float32), tf.constant(y, dtype=tf.float32), tf.constant(y, dtype=tf.float32), @@ -79,7 +79,7 @@ def testLocalUnmaskedAttentionMatchingBlockLength(self): x = np.random.rand(5, 4, 25, 16) y = np.random.rand(5, 4, 25, 16) with self.test_session() as session: - a = common_attention.unmasked_local_attention_1d( + a = common_attention.local_attention_1d( tf.constant(x, dtype=tf.float32), tf.constant(y, dtype=tf.float32), tf.constant(y, dtype=tf.float32), From f25af0f7eebde41ec310e3dce5759a8969d5e214 Mon Sep 17 00:00:00 2001 From: Lukasz Kaiser <lukaszkaiser@google.com> Date: Fri, 4 Aug 2017 15:04:18 -0700 Subject: [PATCH 0228/4095] Share desc2code source vocab with translation, baseline to play with VAE. PiperOrigin-RevId: 164315503 --- tensor2tensor/data_generators/desc2code.py | 68 ++++---- tensor2tensor/models/models.py | 1 + tensor2tensor/models/transformer_vae.py | 185 +++++++++++++++++++++ 3 files changed, 218 insertions(+), 36 deletions(-) create mode 100644 tensor2tensor/models/transformer_vae.py diff --git a/tensor2tensor/data_generators/desc2code.py b/tensor2tensor/data_generators/desc2code.py index 52513e63c..98c93aacd 100644 --- a/tensor2tensor/data_generators/desc2code.py +++ b/tensor2tensor/data_generators/desc2code.py @@ -44,8 +44,8 @@ _DESC_DIR_NAME = "description" _CODE_PY_DIR_NAME = "solutions_python" -_VOCAB_EN_FILENAME = "vocab_desc2code_tok_en" -_VOCAB_PY_FILENAME = "vocab_desc2code_tok_py" +_VOCAB_EN_FILENAME = "vocab.endefr" +_VOCAB_PY_FILENAME = "vocab.py" # Struct containing a coding problem (contains the paths to the descriptions # and code files) @@ -61,21 +61,43 @@ def is_character_level(self): @property def num_shards(self): - return 100 + return 10 @property def use_subword_tokenizer(self): return True + @property + def input_vocab_size(self): + return 2**15 # 32k + + @property + def target_vocab_size(self): + return 2**12 # 4k + + @property + def vocab_input_filename(self): + return "{}.{}".format(_VOCAB_EN_FILENAME, self.input_vocab_size) + + @property + def vocab_target_filename(self): + return "{}.{}".format(_VOCAB_PY_FILENAME, self.target_vocab_size) + + def feature_encoders(self, data_dir): + source_vocab_filename = os.path.join(data_dir, self.vocab_input_filename) + target_vocab_filename = os.path.join(data_dir, self.vocab_target_filename) + source_token = text_encoder.SubwordTextEncoder(source_vocab_filename) + target_token = text_encoder.SubwordTextEncoder(target_vocab_filename) + return { + "inputs": source_token, + "targets": target_token, + } + @registry.register_problem("desc2code_py") class Desc2CodePyProblem(Desc2CodeProblem): """Description2Code for python problem.""" - @property - def targeted_vocab_size(self): - return 2**13 # 8192 - @property def input_space_id(self): return problem.SpaceID.EN_TOK @@ -84,14 +106,6 @@ def input_space_id(self): def target_space_id(self): return problem.SpaceID.PY_TOK - @property - def vocab_input_filename(self): - return "{}.{}".format(_VOCAB_EN_FILENAME, self.targeted_vocab_size) - - @property - def vocab_target_filename(self): - return "{}.{}".format(_VOCAB_PY_FILENAME, self.targeted_vocab_size) - def train_generator(self, data_dir, tmp_dir, train): # Called twice: for train and test @@ -135,27 +149,19 @@ def generator_samples_content(get_source, get_target): elif sample.code_files: # Only take the source if a target exists yield source, target - def generator_source(): - for source, _ in generator_samples_content(True, False): - yield source.strip() - def generator_target(): for _, target in generator_samples_content(False, True): yield target.strip() # Generate vocab for both source and target - source_vocab = generator_utils.get_or_generate_vocab_inner( - data_dir=data_dir, - vocab_filename=self.vocab_input_filename, - vocab_size=self.targeted_vocab_size, - generator_fn=generator_source, - ) + source_vocab = generator_utils.get_or_generate_vocab( + data_dir, tmp_dir, self.vocab_input_filename, self.input_vocab_size) target_vocab = generator_utils.get_or_generate_vocab_inner( data_dir=data_dir, vocab_filename=self.vocab_target_filename, - vocab_size=self.targeted_vocab_size, + vocab_size=self.target_vocab_size, generator_fn=generator_target, ) @@ -169,16 +175,6 @@ def generator_target(): "targets": target_ints, } - def feature_encoders(self, data_dir): - source_vocab_filename = os.path.join(data_dir, self.vocab_input_filename) - target_vocab_filename = os.path.join(data_dir, self.vocab_target_filename) - source_token = text_encoder.SubwordTextEncoder(source_vocab_filename) - target_token = text_encoder.SubwordTextEncoder(target_vocab_filename) - return { - "inputs": source_token, - "targets": target_token, - } - # Utils functions diff --git a/tensor2tensor/models/models.py b/tensor2tensor/models/models.py index 963975780..4b1355dba 100644 --- a/tensor2tensor/models/models.py +++ b/tensor2tensor/models/models.py @@ -38,5 +38,6 @@ from tensor2tensor.models import transformer from tensor2tensor.models import transformer_alternative from tensor2tensor.models import transformer_moe +from tensor2tensor.models import transformer_vae from tensor2tensor.models import xception # pylint: enable=unused-import diff --git a/tensor2tensor/models/transformer_vae.py b/tensor2tensor/models/transformer_vae.py new file mode 100644 index 000000000..31de7bd5f --- /dev/null +++ b/tensor2tensor/models/transformer_vae.py @@ -0,0 +1,185 @@ +# coding=utf-8 +# Copyright 2017 The Tensor2Tensor Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""VAE Transformer.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +# Dependency imports + +from six.moves import xrange # pylint: disable=redefined-builtin + +from tensor2tensor.layers import common_layers +from tensor2tensor.models import transformer +from tensor2tensor.utils import registry +from tensor2tensor.utils import t2t_model + +import tensorflow as tf + + +def decompress(source, hparams, name): + """Decompression function.""" + with tf.variable_scope(name): + shape = tf.shape(source) + thicker = common_layers.conv_block( + source, hparams.hidden_size * 2, [((1, 1), (1, 1))], + name="decompress_conv") + return tf.reshape(thicker, [shape[0], shape[1] * 2, 1, hparams.hidden_size]) + + +def vae(x, hparams, name): + with tf.variable_scope(name): + mu = tf.layers.dense(x, hparams.z_size, name="mu") + log_sigma = tf.layers.dense(x, hparams.z_size, name="log_sigma") + shape = tf.shape(x) + epsilon = tf.random_normal([shape[0], shape[1], 1, hparams.z_size]) + z = mu + tf.exp(log_sigma / 2) * epsilon + dense = tf.layers.dense(z, hparams.hidden_size, name="z_to_dense") + kl = 0.5 * tf.reduce_mean( + tf.exp(log_sigma) + tf.square(mu) - 1. - log_sigma, axis=-1) + return dense, tf.reduce_mean(kl) + + +def compress_vae(inputs, hparams, name): + """Compress, then VAE.""" + with tf.variable_scope(name): + # Run compression by strided convs. + cur = tf.expand_dims(inputs, axis=2) + for i in xrange(hparams.num_compress_steps): + cur = common_layers.conv_block( + cur, hparams.hidden_size, [((1, 1), (2, 1))], + strides=(2, 1), name="compress_%d" % i) + + # Convolve and ReLu to get state. + cur = common_layers.conv_block( + cur, hparams.hidden_size, [((1, 1), (1, 1))], name="mid_conv") + + cur, kl_loss = vae(cur, hparams, name="vae") + return cur, kl_loss + + +def vae_transformer_internal(inputs, targets, target_space, hparams): + """VAE Transformer, main step used for training.""" + with tf.variable_scope("vae_transformer"): + is_training = hparams.mode == tf.contrib.learn.ModeKeys.TRAIN + # Prepare inputs, targets, and k. + inputs = common_layers.flatten4d3d(inputs) + targets = common_layers.flatten4d3d(targets) + k = 2**hparams.num_compress_steps + _, targets = common_layers.pad_to_same_length( + inputs, targets, final_length_divisible_by=k) + + # Transformer preparations and encoder. + (encoder_input, encoder_self_attention_bias, + encoder_decoder_attention_bias) = transformer.transformer_prepare_encoder( + inputs, target_space, hparams) + residual_fn = transformer.get_residual_fn(hparams) + encoder_input = tf.nn.dropout(encoder_input, 1.0 - hparams.residual_dropout) + encoder_output = transformer.transformer_encoder( + encoder_input, residual_fn, encoder_self_attention_bias, hparams) + + def get_decoder_autoregressive(): + """Decoder input for autoregressive computation.""" + (a, b) = transformer.transformer_prepare_decoder(targets, hparams) + return (a, b, tf.constant(0.0)) + + # 10% of the time we compress all-zeros, as will be at decoding start. + prob_targets = 0.9 if is_training else 1.0 + to_compress = tf.cond(tf.less(tf.random_uniform([]), prob_targets), + lambda: targets, lambda: tf.zeros_like(targets)) + z, kl_loss = compress_vae(to_compress, hparams, "vae") + # Decompress. + for i in xrange(hparams.num_compress_steps): + j = hparams.num_hidden_layers - i - 1 + z = decompress(z, hparams, "decompress_%d" % j) + + def get_decoder_from_vae(): + """Decoder input computed by VAE.""" + # Return decoder stuff. + (a, b) = transformer.transformer_prepare_decoder( + tf.squeeze(z, axis=2), hparams) + return (a, b, kl_loss) + + # Randomize decoder inputs.. + prob_do_vae = common_layers.inverse_exp_decay(40000) * 0.7 + step = tf.to_float(tf.contrib.framework.get_global_step()) + if not is_training: + prob_do_vae = tf.cond(tf.less(step, 40000.0), lambda: tf.constant(0.0), + lambda: tf.constant(1.0)) + (decoder_input, decoder_self_attention_bias, kl_loss2) = tf.cond( + tf.less(tf.random_uniform([]), prob_do_vae), + get_decoder_from_vae, get_decoder_autoregressive) + + # Transformer decoder. + decoder_output = transformer.transformer_decoder( + decoder_input, encoder_output, residual_fn, decoder_self_attention_bias, + encoder_decoder_attention_bias, hparams) + decoder_output = tf.expand_dims(decoder_output, 2) + + cond_self = tf.cond(tf.less(step, 30000.0), lambda: tf.constant(1.0), + lambda: tf.constant(0.0)) + prob_self = 0.4 if is_training else cond_self + (ret, kl_loss) = tf.cond(tf.less(tf.random_uniform([]), prob_self), + lambda: (z, kl_loss), + lambda: (decoder_output, kl_loss2)) + + kl_loss *= common_layers.inverse_exp_decay(50000) * 2.0 + return ret, kl_loss + + +@registry.register_model +class TransformerVAE(t2t_model.T2TModel): + + def model_fn_body(self, features): + return vae_transformer_internal( + features["inputs"], features["targets"], features["target_space_id"], + self._hparams) + + def infer(self, features=None, decode_length=50, beam_size=1, top_beams=1, + last_position_only=False, alpha=0.0): + """A inference method, see T2TModel.""" + if not features: + features = {} + inputs_old = None + if "inputs" in features and len(features["inputs"].shape) < 4: + inputs_old = features["inputs"] + features["inputs"] = tf.expand_dims(features["inputs"], 2) + + # Create an initial targets tensor. + if "partial_targets" in features: + initial_output = tf.convert_to_tensor(features["partial_targets"]) + else: + batch_size = tf.shape(features["inputs"])[0] + initial_output = tf.zeros((batch_size, 1, 1, 1), dtype=tf.int64) + + features["targets"] = initial_output + sharded_logits, _ = self.model_fn( + features, False, last_position_only=last_position_only) + sharded_samples = self._data_parallelism(tf.argmax, sharded_logits, 4) + samples = tf.concat(sharded_samples, 0) + if inputs_old is not None: # Restore to not confuse Estimator. + features["inputs"] = inputs_old + return samples + + +@registry.register_hparams +def transformer_vae_small(): + """Set of hyperparameters.""" + hparams = transformer.transformer_small() + hparams.add_hparam("z_size", 128) + hparams.add_hparam("num_compress_steps", 4) + return hparams From 932e5c2cbd46bc579917b7785b544012ba41ea2a Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Fri, 4 Aug 2017 15:50:54 -0700 Subject: [PATCH 0229/4095] v1.1.6 PiperOrigin-RevId: 164321289 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 38b2fcc48..c62b3409c 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ setup( name='tensor2tensor', - version='1.1.5', + version='1.1.6', description='Tensor2Tensor', author='Google Inc.', author_email='no-reply@google.com', From 51279cf5b2ee2fbb67ea49905fa2c43ce88e4056 Mon Sep 17 00:00:00 2001 From: Adam Bouhenguel <ajbouh@users.noreply.github.com> Date: Sun, 6 Aug 2017 07:44:57 -0700 Subject: [PATCH 0230/4095] Fix Python3 support in model_builder.py Use six.iteritems(...) instead of losses_dict.iteritems() --- tensor2tensor/utils/model_builder.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensor2tensor/utils/model_builder.py b/tensor2tensor/utils/model_builder.py index a12aa1122..48d5dd7a0 100644 --- a/tensor2tensor/utils/model_builder.py +++ b/tensor2tensor/utils/model_builder.py @@ -195,7 +195,7 @@ def nth_model(n): features, skip=(skipping_is_on and skip_this_one)) with tf.variable_scope("losses_avg", reuse=True): total_loss, ops = 0.0, [] - for loss_key, loss_value in losses_dict.iteritems(): + for loss_key, loss_value in six.iteritems(losses_dict): loss_moving_avg = tf.get_variable("problem_%d/%s_loss" % (n, loss_key)) ops.append( From d2af7cfe0b74a0e5ec3e0621ff7ad0e0776281df Mon Sep 17 00:00:00 2001 From: vthorsteinsson <vt@extrada.com> Date: Wed, 9 Aug 2017 11:18:31 +0000 Subject: [PATCH 0231/4095] Added readline import in trainer_utils.py --- tensor2tensor/utils/trainer_utils.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) mode change 100644 => 100755 tensor2tensor/utils/trainer_utils.py diff --git a/tensor2tensor/utils/trainer_utils.py b/tensor2tensor/utils/trainer_utils.py old mode 100644 new mode 100755 index 260ec6a00..dc8238c51 --- a/tensor2tensor/utils/trainer_utils.py +++ b/tensor2tensor/utils/trainer_utils.py @@ -841,6 +841,11 @@ def _interactive_input_fn(hparams): vocabulary = p_hparams.vocabulary["inputs" if has_input else "targets"] # This should be longer than the longest input. const_array_size = 10000 + # For ease of input, activate the readline module if available. + try: + import readline + except ImportError: + pass while True: prompt = ("INTERACTIVE MODE num_samples=%d decode_length=%d \n" " it=<input_type> ('text' or 'image')\n" @@ -848,7 +853,7 @@ def _interactive_input_fn(hparams): " in=<input_problem> (set the input problem number)\n" " ou=<output_problem> (set the output problem number)\n" " ns=<num_samples> (changes number of samples)\n" - " dl=<decode_length> (changes decode legnth)\n" + " dl=<decode_length> (changes decode length)\n" " <%s> (decode)\n" " q (quit)\n" ">" % (num_samples, decode_length, "source_string" From f2714e93b05753e364e34c1fe9fd69d6e405f5c1 Mon Sep 17 00:00:00 2001 From: vthorsteinsson <vt@extrada.com> Date: Wed, 9 Aug 2017 11:53:15 +0000 Subject: [PATCH 0232/4095] Sync with upstream --- tensor2tensor/utils/trainer_utils.py | 1113 ++------------------------ 1 file changed, 50 insertions(+), 1063 deletions(-) mode change 100755 => 100644 tensor2tensor/utils/trainer_utils.py diff --git a/tensor2tensor/utils/trainer_utils.py b/tensor2tensor/utils/trainer_utils.py old mode 100755 new mode 100644 index dc8238c51..9e869c15c --- a/tensor2tensor/utils/trainer_utils.py +++ b/tensor2tensor/utils/trainer_utils.py @@ -19,37 +19,24 @@ from __future__ import division from __future__ import print_function -import math -import operator -import os import sys # Dependency imports -import numpy as np -import six -# pylint: disable=redefined-builtin -from six.moves import input -from six.moves import xrange -# pylint: enable=redefined-builtin - from tensor2tensor.data_generators import all_problems # pylint: disable=unused-import from tensor2tensor.data_generators import problem_hparams -from tensor2tensor.data_generators import text_encoder from tensor2tensor.models import models # pylint: disable=unused-import from tensor2tensor.utils import data_reader -from tensor2tensor.utils import expert_utils as eu +from tensor2tensor.utils import decoding +from tensor2tensor.utils import devices +from tensor2tensor.utils import input_fn_builder from tensor2tensor.utils import metrics +from tensor2tensor.utils import model_builder from tensor2tensor.utils import registry -from tensor2tensor.utils import yellowfin import tensorflow as tf from tensorflow.contrib.learn.python.learn import learn_runner from tensorflow.python import debug -from tensorflow.python.ops import init_ops - -# Number of samples to draw for an image input (in such cases as captioning) -IMAGE_DECODE_LENGTH = 100 flags = tf.flags FLAGS = flags.FLAGS @@ -130,16 +117,7 @@ "<beam1>\t<beam2>..\t<input>") flags.DEFINE_integer("decode_max_input_size", -1, "Maximum number of ids in input. Or <= 0 for no max.") - - -def _save_until_eos(hyp): - """Strips everything after the first <EOS> token, which is normally 1.""" - try: - index = list(hyp).index(text_encoder.EOS_ID) - return hyp[0:index] - except ValueError: - # No EOS_ID: return the array as-is. - return hyp +flags.DEFINE_bool("identity_output", False, "To print the output as identity") def make_experiment_fn(data_dir, model_name, train_steps, eval_steps): @@ -179,8 +157,8 @@ def create_experiment(output_dir, data_dir, model_name, train_steps, eval_hooks.append(hook) return tf.contrib.learn.Experiment( estimator=estimator, - train_input_fn=input_fns["train"], - eval_input_fn=input_fns["eval"], + train_input_fn=input_fns[tf.contrib.learn.ModeKeys.TRAIN], + eval_input_fn=input_fns[tf.contrib.learn.ModeKeys.EVAL], eval_metrics=eval_metrics, train_steps=train_steps, eval_steps=eval_steps, @@ -193,22 +171,26 @@ def create_experiment_components(hparams, output_dir, data_dir, model_name): """Constructs and returns Estimator and train/eval input functions.""" tf.logging.info("Creating experiment, storing model files in %s", output_dir) - num_datashards = data_parallelism().n - train_input_fn = get_input_fn( + num_datashards = devices.data_parallelism().n + train_input_fn = input_fn_builder.build_input_fn( mode=tf.contrib.learn.ModeKeys.TRAIN, hparams=hparams, data_file_patterns=get_data_filepatterns(data_dir, tf.contrib.learn.ModeKeys.TRAIN), - num_datashards=num_datashards) + num_datashards=num_datashards, + worker_replicas=FLAGS.worker_replicas, + worker_id=FLAGS.worker_id) - eval_input_fn = get_input_fn( + eval_input_fn = input_fn_builder.build_input_fn( mode=tf.contrib.learn.ModeKeys.EVAL, hparams=hparams, data_file_patterns=get_data_filepatterns(data_dir, tf.contrib.learn.ModeKeys.EVAL), - num_datashards=num_datashards) + num_datashards=num_datashards, + worker_replicas=FLAGS.worker_replicas, + worker_id=FLAGS.worker_id) estimator = tf.contrib.learn.Estimator( - model_fn=model_builder(model_name, hparams=hparams), + model_fn=model_builder.build_model_fn(model_name, hparams=hparams), model_dir=output_dir, config=tf.contrib.learn.RunConfig( master=FLAGS.master, @@ -218,7 +200,10 @@ def create_experiment_components(hparams, output_dir, data_dir, model_name): keep_checkpoint_max=FLAGS.keep_checkpoint_max)) # Store the hparams in the estimator as well estimator.hparams = hparams - return estimator, {"train": train_input_fn, "eval": eval_input_fn} + return estimator, { + tf.contrib.learn.ModeKeys.TRAIN: train_input_fn, + tf.contrib.learn.ModeKeys.EVAL: eval_input_fn + } def log_registry(): @@ -227,6 +212,24 @@ def log_registry(): sys.exit(0) +def add_problem_hparams(hparams, problems): + """Add problem hparams for the problems.""" + hparams.problems = [] + hparams.problem_instances = [] + for problem_name in problems.split("-"): + try: + problem = registry.problem(problem_name) + p_hparams = problem.internal_hparams(hparams) + except ValueError: + problem = None + p_hparams = problem_hparams.problem_hparams(problem_name, hparams) + + hparams.problem_instances.append(problem) + hparams.problems.append(p_hparams) + + return hparams + + def create_hparams(params_id, data_dir): """Returns hyperparameters, including any flag value overrides. @@ -247,21 +250,7 @@ def create_hparams(params_id, data_dir): if FLAGS.hparams: hparams = hparams.parse(FLAGS.hparams) - # Add hparams for the problems - hparams.problems = [] - hparams.problem_instances = [] - for problem_name in FLAGS.problems.split("-"): - try: - problem = registry.problem(problem_name) - p_hparams = problem.internal_hparams(hparams) - except ValueError: - problem = None - p_hparams = problem_hparams.problem_hparams(problem_name, hparams) - - hparams.problem_instances.append(problem) - hparams.problems.append(p_hparams) - - return hparams + return add_problem_hparams(hparams, FLAGS.problems) def run(data_dir, model, output_dir, train_steps, eval_steps, schedule): @@ -289,7 +278,11 @@ def run(data_dir, model, output_dir, train_steps, eval_steps, schedule): if schedule == "local_run": # Run the local demo. - run_locally(exp_fn(output_dir)) + exp = exp_fn(output_dir) + if exp.train_steps > 0 or exp.eval_steps > 0: + tf.logging.info("Performing local training and evaluation.") + exp.train_and_evaluate() + decode(exp.estimator) else: # Perform distributed training/evaluation. learn_runner.run( @@ -334,1020 +327,14 @@ def session_config(): return config -def model_builder(model, hparams): - """Returns a function to build the model. - - Args: - model: The name of the model to use. - hparams: The hyperparameters. - - Returns: - A function to build the model's graph. This function is called by - the Estimator object to construct the graph. - """ - - def initializer(): - if hparams.initializer == "orthogonal": - return tf.orthogonal_initializer(gain=hparams.initializer_gain) - elif hparams.initializer == "uniform": - max_val = 0.1 * hparams.initializer_gain - return tf.random_uniform_initializer(-max_val, max_val) - elif hparams.initializer == "normal_unit_scaling": - return init_ops.variance_scaling_initializer( - hparams.initializer_gain, mode="fan_avg", distribution="normal") - elif hparams.initializer == "uniform_unit_scaling": - return init_ops.variance_scaling_initializer( - hparams.initializer_gain, mode="fan_avg", distribution="uniform") - else: - raise ValueError("Unrecognized initializer: %s" % hparams.initializer) - - def learning_rate_decay(): - """Inverse-decay learning rate until warmup_steps, then decay.""" - warmup_steps = tf.to_float( - hparams.learning_rate_warmup_steps * FLAGS.worker_replicas) - step = tf.to_float(tf.contrib.framework.get_global_step()) - if hparams.learning_rate_decay_scheme == "noam": - return 5000.0 * hparams.hidden_size**-0.5 * tf.minimum( - (step + 1) * warmup_steps**-1.5, (step + 1)**-0.5) - elif hparams.learning_rate_decay_scheme == "exp100k": - return 0.94**(step // 100000) - elif hparams.learning_rate_decay_scheme == "cosine": - cycle_steps = hparams.learning_rate_cosine_cycle_steps - return 0.5 * (1 + tf.cos(np.pi * (step % cycle_steps) / cycle_steps)) - - inv_base = tf.exp(tf.log(0.01) / warmup_steps) - inv_decay = inv_base**(warmup_steps - step) - if hparams.learning_rate_decay_scheme == "sqrt": - decay = _sqrt_decay(step - warmup_steps) - elif hparams.learning_rate_decay_scheme == "exp10k": - decay = _exp_decay_after(step - warmup_steps, 0.9995, - FLAGS.train_steps - warmup_steps - 10000) - elif hparams.learning_rate_decay_scheme == "exp50k": - decay = _exp_decay_after(step - warmup_steps, 0.99995, - FLAGS.train_steps - warmup_steps - 50000) - elif hparams.learning_rate_decay_scheme == "exp500k": - decay = _exp_decay_after(step - warmup_steps, 0.9999955, - FLAGS.train_steps - warmup_steps - 500000) - elif hparams.learning_rate_decay_scheme == "none": - decay = tf.constant(1.0) - else: - raise ValueError("Unrecognized learning rate decay scheme: %s" % - hparams.learning_rate_decay_scheme) - return tf.cond( - step < warmup_steps, - lambda: inv_decay, - lambda: decay, - name="learning_rate_decay_warump_cond") - - def model_fn(features, targets, mode): - """Creates the prediction, loss, and train ops. - - Args: - features: A dictionary of tensors keyed by the feature name. - targets: A tensor representing the labels (targets). - mode: The execution mode, as defined in tf.contrib.learn.ModeKeys. - - Returns: - A tuple consisting of the prediction, loss, and train_op. - """ - if mode == tf.contrib.learn.ModeKeys.INFER: - if FLAGS.decode_interactive: - features = _interactive_input_tensor_to_features_dict(features, hparams) - elif FLAGS.decode_from_file: - features = _decode_input_tensor_to_features_dict(features, hparams) - # A dictionary containing: - # - problem_choice: A Tensor containing an integer indicating which problem - # was selected for this run. - # - predictions: A Tensor containing the model's output predictions. - run_info = dict() - run_info["problem_choice"] = features["problem_choice"] - - if targets is not None: - features["targets"] = targets - - dp = data_parallelism() - - # Add input statistics for incoming features. - with tf.name_scope("input_stats"): - for (k, v) in six.iteritems(features): - if isinstance(v, tf.Tensor) and v.get_shape().ndims > 1: - tf.summary.scalar("%s_batch" % k, tf.shape(v)[0] // dp.n) - tf.summary.scalar("%s_length" % k, tf.shape(v)[1]) - nonpadding = tf.to_float(tf.not_equal(v, 0)) - tf.summary.scalar("%s_nonpadding_tokens" % k, - tf.reduce_sum(nonpadding)) - tf.summary.scalar("%s_nonpadding_fraction" % k, - tf.reduce_mean(nonpadding)) - - tf.get_variable_scope().set_initializer(initializer()) - train = mode == tf.contrib.learn.ModeKeys.TRAIN - - # Get multi-problem logits and loss based on features["problem_choice"]. - def nth_model(n): - """Build the model for the n-th problem, plus some added variables.""" - model_class = registry.model(model)( - hparams, - mode, - hparams.problems[n], - n, - dp, - _ps_devices(all_workers=True)) - if mode == tf.contrib.learn.ModeKeys.INFER: - return model_class.infer( - features, - beam_size=FLAGS.decode_beam_size, - top_beams=(FLAGS.decode_beam_size - if FLAGS.decode_return_beams else 1), - last_position_only=FLAGS.decode_use_last_position_only, - alpha=FLAGS.decode_alpha, - decode_length=FLAGS.decode_extra_length) - # In distributed mode, we build graph for problem=0 and problem=worker_id. - skipping_is_on = hparams.problem_choice == "distributed" and train - problem_worker_id = FLAGS.worker_id % len(hparams.problems) - skip_this_one = n != 0 and n % FLAGS.worker_replicas != problem_worker_id - # On worker 0 also build graph for problems <= 1. - # TODO(lukaszkaiser): why is this hack needed for variables init? Repair. - skip_this_one = skip_this_one and (FLAGS.worker_id != 0 or n > 1) - sharded_logits, training_loss, extra_loss = model_class.model_fn( - features, skip=(skipping_is_on and skip_this_one)) - with tf.variable_scope("losses_avg", reuse=True): - loss_moving_avg = tf.get_variable("problem_%d/training_loss" % n) - o1 = loss_moving_avg.assign(loss_moving_avg * 0.9 + training_loss * 0.1) - loss_moving_avg = tf.get_variable("problem_%d/extra_loss" % n) - o2 = loss_moving_avg.assign(loss_moving_avg * 0.9 + extra_loss * 0.1) - loss_moving_avg = tf.get_variable("problem_%d/total_loss" % n) - total_loss = training_loss + extra_loss - o3 = loss_moving_avg.assign(loss_moving_avg * 0.9 + total_loss * 0.1) - with tf.variable_scope("train_stats"): # Count steps for this problem. - problem_steps = tf.get_variable( - "problem_%d_steps" % n, initializer=0, trainable=False) - o4 = problem_steps.assign_add(1) - with tf.control_dependencies([o1, o2, o3, o4]): # Make sure the ops run. - # Ensure the loss is a scalar here. - total_loss = tf.reshape(total_loss, [], name="total_loss_control_id") - return [total_loss] + sharded_logits # Need to flatten for cond later. - - result_list = _cond_on_index(nth_model, features["problem_choice"], 0, - len(hparams.problems) - 1) - - if mode == tf.contrib.learn.ModeKeys.INFER: - # Beam search in sequence model returns both decodes withe key "outputs" - # and scores with they key "scores". If return list is a dict, we expect - # that it will have keys "outputs", a tensor of int32 and scores, a - # tensor of floats. This is useful if we want to return scores from - # estimator.predict - if not isinstance(result_list, dict): - ret = {"outputs": result_list}, None, None - else: - ret = { - "outputs": result_list["outputs"], - "scores": result_list["scores"] - }, None, None - if "inputs" in features: - ret[0]["inputs"] = features["inputs"] - if "infer_targets" in features: - ret[0]["targets"] = features["infer_targets"] - return ret - - sharded_logits, total_loss = result_list[1:], result_list[0] - if mode == tf.contrib.learn.ModeKeys.EVAL: - logits = tf.concat(sharded_logits, 0) - if FLAGS.eval_print: - logits = tf.Print( - logits, [features["inputs"], logits], "EVAL PRINT", summarize=10000) - # For evaluation, return the logits layer as our predictions. - run_info["predictions"] = logits - train_op = None - return run_info, total_loss, None - - assert mode == tf.contrib.learn.ModeKeys.TRAIN - - # Some training statistics. - with tf.name_scope("training_stats"): - learning_rate = hparams.learning_rate * learning_rate_decay() - learning_rate /= math.sqrt(float(FLAGS.worker_replicas)) - tf.summary.scalar("learning_rate", learning_rate) - global_step = tf.to_float(tf.contrib.framework.get_global_step()) - for n in xrange(len(hparams.problems)): - with tf.variable_scope("losses_avg", reuse=True): - total_loss_var = tf.get_variable("problem_%d/total_loss" % n) - training_loss_var = tf.get_variable("problem_%d/training_loss" % n) - extra_loss_var = tf.get_variable("problem_%d/extra_loss" % n) - tf.summary.scalar("loss_avg_%d/total_loss" % n, total_loss_var) - tf.summary.scalar("loss_avg_%d/training_loss" % n, training_loss_var) - tf.summary.scalar("loss_avg_%d/extra_loss" % n, extra_loss_var) - with tf.variable_scope("train_stats", reuse=True): - nth_steps = tf.get_variable("problem_%d_steps" % n, dtype=tf.int32) - tf.summary.scalar("problem_%d_frequency" % n, - tf.to_float(nth_steps) / (global_step + 1.0)) - - # Log trainable weights and add decay. - total_size, weight_decay_loss = 0, 0.0 - all_weights = {v.name: v for v in tf.trainable_variables()} - for v_name in sorted(list(all_weights)): - v = all_weights[v_name] - v_size = int(np.prod(np.array(v.shape.as_list()))) - tf.logging.info("Weight %s\tshape %s\tsize %d", - v.name[:-2].ljust(80), str(v.shape).ljust(20), v_size) - total_size += v_size - if hparams.weight_decay > 0.0 and len(v.shape.as_list()) > 1: - # Add weight regularization if set and the weight is not a bias (dim>1). - with tf.device(v._ref().device): # pylint: disable=protected-access - v_loss = tf.nn.l2_loss(v) / v_size - weight_decay_loss += v_loss - is_body = len(v_name) > 5 and v_name[:5] == "body/" - if hparams.weight_noise > 0.0 and is_body: - # Add weight noise if set in hparams. - with tf.device(v._ref().device): # pylint: disable=protected-access - scale = learning_rate * 0.001 - noise = tf.truncated_normal(v.shape) * hparams.weight_noise * scale - noise_op = v.assign_add(noise) - with tf.control_dependencies([noise_op]): - total_loss = tf.identity(total_loss) - tf.logging.info("Total trainable variables size: %d", total_size) - if hparams.weight_decay > 0.0: - total_loss += weight_decay_loss * hparams.weight_decay - total_loss = tf.identity(total_loss, name="total_loss") - - # Define the train_op for the TRAIN mode. - opt = _ConditionalOptimizer(hparams.optimizer, learning_rate, hparams) - tf.logging.info("Computing gradients for global model_fn.") - opt_summaries = ["learning_rate", "loss"] - if hparams.summarize_grads: - opt_summaries.extend(["gradients", "gradient_norm"]) - train_op = tf.contrib.layers.optimize_loss( - name="training", - loss=total_loss, - global_step=tf.contrib.framework.get_global_step(), - learning_rate=learning_rate, - clip_gradients=hparams.clip_grad_norm or None, - gradient_noise_scale=hparams.grad_noise_scale or None, - optimizer=opt, - summaries=opt_summaries, - colocate_gradients_with_ops=True) - - # Remove summaries that will fail to run because they are in conditionals. - # TODO(cwhipkey): Test with this code removed, later in 2017. - summaries = tf.get_collection_ref(tf.GraphKeys.SUMMARIES) - for i in range(len(summaries) - 1, -1, -1): - if summaries[i].name.startswith("cond_"): - del summaries[i] - - tf.logging.info("Global model_fn finished.") - return run_info, total_loss, train_op - - return model_fn - - -def run_locally(exp): - """Runs an Experiment locally - trains, evaluates, and decodes. - - Args: - exp: Experiment. - """ - if exp.train_steps > 0 or exp.eval_steps > 0: - tf.logging.info("Performing local training and evaluation.") - exp.train_and_evaluate() - decode(exp.estimator) +def get_data_filepatterns(data_dir, mode): + return data_reader.get_data_filepatterns(FLAGS.problems, data_dir, mode) def decode(estimator): if FLAGS.decode_interactive: - decode_interactively(estimator) + decoding.decode_interactively(estimator) elif FLAGS.decode_from_file is not None: - decode_from_file(estimator, FLAGS.decode_from_file) + decoding.decode_from_file(estimator, FLAGS.decode_from_file) elif FLAGS.decode_from_dataset: - decode_from_dataset(estimator) - - -def decode_from_dataset(estimator): - hparams = estimator.hparams - for i, problem in enumerate(FLAGS.problems.split("-")): - inputs_vocab = hparams.problems[i].vocabulary.get("inputs", None) - targets_vocab = hparams.problems[i].vocabulary["targets"] - tf.logging.info("Performing local inference.") - infer_problems_data = get_data_filepatterns(hparams.data_dir, - tf.contrib.learn.ModeKeys.INFER) - - infer_input_fn = get_input_fn( - mode=tf.contrib.learn.ModeKeys.INFER, - hparams=hparams, - data_file_patterns=infer_problems_data, - num_datashards=data_parallelism().n, - fixed_problem=i) - result_iter = estimator.predict(input_fn=infer_input_fn, as_iterable=False) - - def log_fn(inputs, - targets, - outputs, - problem, - j, - inputs_vocab=inputs_vocab, - targets_vocab=targets_vocab): - """Log inference results.""" - if "image" in problem and FLAGS.decode_save_images: - save_path = os.path.join(estimator.model_dir, - "%s_prediction_%d.jpg" % (problem, j)) - show_and_save_image(inputs / 255., save_path) - elif inputs_vocab: - decoded_inputs = inputs_vocab.decode(_save_until_eos(inputs.flatten())) - tf.logging.info("Inference results INPUT: %s" % decoded_inputs) - - decoded_outputs = targets_vocab.decode(_save_until_eos(outputs.flatten())) - tf.logging.info("Inference results OUTPUT: %s" % decoded_outputs) - decoded_targets = targets_vocab.decode(_save_until_eos(targets.flatten())) - tf.logging.info("Inference results TARGET: %s" % decoded_targets) - - if FLAGS.decode_to_file: - output_filepath = FLAGS.decode_to_file + ".outputs." + problem - output_file = tf.gfile.Open(output_filepath, "a") - output_file.write(decoded_outputs + "\n") - target_filepath = FLAGS.decode_to_file + ".targets." + problem - target_file = tf.gfile.Open(target_filepath, "a") - target_file.write(decoded_targets + "\n") - - # The function predict() returns an iterable over the network's - # predictions from the test input. We use it to log inputs and decodes. - inputs_iter = result_iter["inputs"] - targets_iter = result_iter["targets"] - outputs_iter = result_iter["outputs"] - for j, result in enumerate(zip(inputs_iter, targets_iter, outputs_iter)): - inputs, targets, outputs = result - if FLAGS.decode_return_beams: - output_beams = np.split(outputs, FLAGS.decode_beam_size, axis=0) - for k, beam in enumerate(output_beams): - tf.logging.info("BEAM %d:" % k) - log_fn(inputs, targets, beam, problem, j) - else: - log_fn(inputs, targets, outputs, problem, j) - - -def decode_from_file(estimator, filename): - """Compute predictions on entries in filename and write them out.""" - hparams = estimator.hparams - problem_id = FLAGS.decode_problem_id - inputs_vocab = hparams.problems[problem_id].vocabulary["inputs"] - targets_vocab = hparams.problems[problem_id].vocabulary["targets"] - tf.logging.info("Performing decoding from a file.") - sorted_inputs, sorted_keys = _get_sorted_inputs(filename) - num_decode_batches = (len(sorted_inputs) - 1) // FLAGS.decode_batch_size + 1 - input_fn = _decode_batch_input_fn(problem_id, num_decode_batches, - sorted_inputs, inputs_vocab) - - decodes = [] - for _ in range(num_decode_batches): - result_iter = estimator.predict( - input_fn=input_fn.next if six.PY2 else input_fn.__next__, - as_iterable=True) - for result in result_iter: - - def log_fn(inputs, outputs): - decoded_inputs = inputs_vocab.decode(_save_until_eos(inputs.flatten())) - tf.logging.info("Inference results INPUT: %s" % decoded_inputs) - - decoded_outputs = targets_vocab.decode( - _save_until_eos(outputs.flatten())) - tf.logging.info("Inference results OUTPUT: %s" % decoded_outputs) - return decoded_outputs - - if FLAGS.decode_return_beams: - beam_decodes = [] - output_beams = np.split( - result["outputs"], FLAGS.decode_beam_size, axis=0) - for k, beam in enumerate(output_beams): - tf.logging.info("BEAM %d:" % k) - beam_decodes.append(log_fn(result["inputs"], beam)) - decodes.append("\t".join(beam_decodes)) - - else: - decodes.append(log_fn(result["inputs"], result["outputs"])) - - # Reversing the decoded inputs and outputs because they were reversed in - # _decode_batch_input_fn - sorted_inputs.reverse() - decodes.reverse() - # Dumping inputs and outputs to file filename.decodes in - # format result\tinput in the same order as original inputs - if FLAGS.decode_to_file: - output_filename = FLAGS.decode_to_file - else: - output_filename = filename - if FLAGS.decode_shards > 1: - base_filename = output_filename + ("%.2d" % FLAGS.worker_id) - else: - base_filename = output_filename - decode_filename = (base_filename + "." + FLAGS.model + "." + FLAGS.hparams_set - + ".beam" + str(FLAGS.decode_beam_size) + ".alpha" + - str(FLAGS.decode_alpha) + ".decodes") - tf.logging.info("Writing decodes into %s" % decode_filename) - outfile = tf.gfile.Open(decode_filename, "w") - for index in range(len(sorted_inputs)): - outfile.write("%s\n" % (decodes[sorted_keys[index]])) - - -def decode_interactively(estimator): - hparams = estimator.hparams - - infer_input_fn = _interactive_input_fn(hparams) - for problem_idx, example in infer_input_fn: - targets_vocab = hparams.problems[problem_idx].vocabulary["targets"] - result_iter = estimator.predict(input_fn=lambda e=example: e) - for result in result_iter: - if FLAGS.decode_return_beams: - beams = np.split(result["outputs"], FLAGS.decode_beam_size, axis=0) - scores = None - if "scores" in result: - scores = np.split(result["scores"], FLAGS.decode_beam_size, axis=0) - for k, beam in enumerate(beams): - tf.logging.info("BEAM %d:" % k) - beam_string = targets_vocab.decode(_save_until_eos(beam.flatten())) - if scores is not None: - tf.logging.info("%s\tScore:%f" % (beam_string, scores[k])) - else: - tf.logging.info(beam_string) - else: - tf.logging.info( - targets_vocab.decode(_save_until_eos(result["outputs"].flatten()))) - - -def _decode_batch_input_fn(problem_id, num_decode_batches, sorted_inputs, - vocabulary): - tf.logging.info(" batch %d" % num_decode_batches) - # First reverse all the input sentences so that if you're going to get OOMs, - # you'll see it in the first batch - sorted_inputs.reverse() - for b in range(num_decode_batches): - tf.logging.info("Decoding batch %d" % b) - batch_length = 0 - batch_inputs = [] - for inputs in sorted_inputs[b * FLAGS.decode_batch_size:( - b + 1) * FLAGS.decode_batch_size]: - input_ids = vocabulary.encode(inputs) - if FLAGS.decode_max_input_size > 0: - # Subtract 1 for the EOS_ID. - input_ids = input_ids[:FLAGS.decode_max_input_size - 1] - input_ids.append(text_encoder.EOS_ID) - batch_inputs.append(input_ids) - if len(input_ids) > batch_length: - batch_length = len(input_ids) - final_batch_inputs = [] - for input_ids in batch_inputs: - assert len(input_ids) <= batch_length - x = input_ids + [0] * (batch_length - len(input_ids)) - final_batch_inputs.append(x) - yield { - "inputs": np.array(final_batch_inputs), - "problem_choice": np.array(problem_id) - } - - -def get_data_filepatterns(data_dir, mode): - return data_reader.get_data_filepatterns(FLAGS.problems, data_dir, mode) - - -def _cond_on_index(fn, index_tensor, cur_idx, max_idx): - """Call fn(index_tensor) using tf.cond in [cur_id, max_idx].""" - if cur_idx == max_idx: - return fn(cur_idx) - return tf.cond( - tf.equal(index_tensor, cur_idx), lambda: fn(cur_idx), - lambda: _cond_on_index(fn, index_tensor, cur_idx + 1, max_idx)) - - -def _interactive_input_fn(hparams): - """Generator that reads from the terminal and yields "interactive inputs". - - Due to temporary limitations in tf.learn, if we don't want to reload the - whole graph, then we are stuck encoding all of the input as one fixed-size - numpy array. - - We yield int64 arrays with shape [const_array_size]. The format is: - [num_samples, decode_length, len(input ids), <input ids>, <padding>] - - Args: - hparams: model hparams - Yields: - numpy arrays - - Raises: - Exception: when `input_type` is invalid. - """ - num_samples = 3 - decode_length = 100 - input_type = "text" - problem_id = 0 - p_hparams = hparams.problems[problem_id] - has_input = "inputs" in p_hparams.input_modality - vocabulary = p_hparams.vocabulary["inputs" if has_input else "targets"] - # This should be longer than the longest input. - const_array_size = 10000 - # For ease of input, activate the readline module if available. - try: - import readline - except ImportError: - pass - while True: - prompt = ("INTERACTIVE MODE num_samples=%d decode_length=%d \n" - " it=<input_type> ('text' or 'image')\n" - " pr=<problem_num> (set the problem number)\n" - " in=<input_problem> (set the input problem number)\n" - " ou=<output_problem> (set the output problem number)\n" - " ns=<num_samples> (changes number of samples)\n" - " dl=<decode_length> (changes decode length)\n" - " <%s> (decode)\n" - " q (quit)\n" - ">" % (num_samples, decode_length, "source_string" - if has_input else "target_prefix")) - input_string = input(prompt) - if input_string == "q": - return - elif input_string[:3] == "pr=": - problem_id = int(input_string[3:]) - p_hparams = hparams.problems[problem_id] - has_input = "inputs" in p_hparams.input_modality - vocabulary = p_hparams.vocabulary["inputs" if has_input else "targets"] - elif input_string[:3] == "in=": - problem = int(input_string[3:]) - p_hparams.input_modality = hparams.problems[problem].input_modality - p_hparams.input_space_id = hparams.problems[problem].input_space_id - elif input_string[:3] == "ou=": - problem = int(input_string[3:]) - p_hparams.target_modality = hparams.problems[problem].target_modality - p_hparams.target_space_id = hparams.problems[problem].target_space_id - elif input_string[:3] == "ns=": - num_samples = int(input_string[3:]) - elif input_string[:3] == "dl=": - decode_length = int(input_string[3:]) - elif input_string[:3] == "it=": - input_type = input_string[3:] - else: - if input_type == "text": - input_ids = vocabulary.encode(input_string) - if has_input: - input_ids.append(text_encoder.EOS_ID) - x = [num_samples, decode_length, len(input_ids)] + input_ids - assert len(x) < const_array_size - x += [0] * (const_array_size - len(x)) - yield problem_id, { - "inputs": np.array(x), - "problem_choice": np.array(problem_id) - } - elif input_type == "image": - input_path = input_string - img = read_image(input_path) - yield problem_id, { - "inputs": img, - "problem_choice": np.array(problem_id) - } - else: - raise Exception("Unsupported input type.") - - -def read_image(path): - try: - import matplotlib.image as im # pylint: disable=g-import-not-at-top - except ImportError as e: - tf.logging.warning( - "Reading an image requires matplotlib to be installed: %s", e) - raise NotImplementedError("Image reading not implemented.") - return im.imread(path) - - -def show_and_save_image(img, save_path): - try: - import matplotlib.pyplot as plt # pylint: disable=g-import-not-at-top - except ImportError as e: - tf.logging.warning("Showing and saving an image requires matplotlib to be " - "installed: %s", e) - raise NotImplementedError("Image display and save not implemented.") - plt.imshow(img) - plt.savefig(save_path) - - -def _get_sorted_inputs(filename): - """Returning inputs sorted according to length. - - Args: - filename: path to file with inputs, 1 per line. - - Returns: - a sorted list of inputs - - """ - tf.logging.info("Getting sorted inputs") - # read file and sort inputs according them according to input length. - if FLAGS.decode_shards > 1: - decode_filename = filename + ("%.2d" % FLAGS.worker_id) - else: - decode_filename = filename - inputs = [line.strip() for line in tf.gfile.Open(decode_filename)] - input_lens = [(i, len(line.strip().split())) for i, line in enumerate(inputs)] - sorted_input_lens = sorted(input_lens, key=operator.itemgetter(1)) - # We'll need the keys to rearrange the inputs back into their original order - sorted_keys = {} - sorted_inputs = [] - for i, (index, _) in enumerate(sorted_input_lens): - sorted_inputs.append(inputs[index]) - sorted_keys[index] = i - return sorted_inputs, sorted_keys - - -def _interactive_input_tensor_to_features_dict(feature_map, hparams): - """Convert the interactive input format (see above) to a dictionary. - - Args: - feature_map: a dictionary with keys `problem_choice` and `input` containing - Tensors. - hparams: model hyperparameters - - Returns: - a features dictionary, as expected by the decoder. - """ - inputs = tf.constant(feature_map["inputs"]) - input_is_image = False if len(inputs.shape) < 3 else True - - def input_fn(problem_choice, x=inputs): # pylint: disable=missing-docstring - p_hparams = hparams.problems[problem_choice] - if not input_is_image: - # Remove the batch dimension. - num_samples = x[0] - length = x[2] - x = tf.slice(x, [3], tf.to_int32([length])) - x = tf.reshape(x, [1, -1, 1, 1]) - # Transform into a batch of size num_samples to get that many random - # decodes. - x = tf.tile(x, tf.to_int32([num_samples, 1, 1, 1])) - else: - x = tf.image.resize_images(x, [299, 299]) - x = tf.reshape(x, [1, 299, 299, -1]) - x = tf.to_int32(x) - return (tf.constant(p_hparams.input_space_id), - tf.constant(p_hparams.target_space_id), x) - - input_space_id, target_space_id, x = _cond_on_index( - input_fn, feature_map["problem_choice"], 0, len(hparams.problems) - 1) - - features = {} - features["problem_choice"] = tf.constant(feature_map["problem_choice"]) - features["input_space_id"] = input_space_id - features["target_space_id"] = target_space_id - features["decode_length"] = (IMAGE_DECODE_LENGTH - if input_is_image else inputs[1]) - features["inputs"] = x - return features - - -def _decode_input_tensor_to_features_dict(feature_map, hparams): - """Convert the interactive input format (see above) to a dictionary. - - Args: - feature_map: a dictionary with keys `problem_choice` and `input` containing - Tensors. - hparams: model hyperparameters - - Returns: - a features dictionary, as expected by the decoder. - """ - inputs = tf.constant(feature_map["inputs"]) - input_is_image = False - - def input_fn(problem_choice, x=inputs): # pylint: disable=missing-docstring - p_hparams = hparams.problems[problem_choice] - # Add a third empty dimension dimension - x = tf.expand_dims(x, axis=[2]) - x = tf.to_int32(x) - return (tf.constant(p_hparams.input_space_id), - tf.constant(p_hparams.target_space_id), x) - - input_space_id, target_space_id, x = _cond_on_index( - input_fn, feature_map["problem_choice"], 0, len(hparams.problems) - 1) - - features = {} - features["problem_choice"] = feature_map["problem_choice"] - features["input_space_id"] = input_space_id - features["target_space_id"] = target_space_id - features["decode_length"] = (IMAGE_DECODE_LENGTH - if input_is_image else tf.shape(x)[1] + 50) - features["inputs"] = x - return features - - -def get_input_fn(mode, - hparams, - data_file_patterns=None, - num_datashards=None, - fixed_problem=None): - """Provides input to the graph, either from disk or via a placeholder. - - This function produces an input function that will feed data into - the network. There are two modes of operation: - - 1. If data_file_pattern and all subsequent arguments are None, then - it creates a placeholder for a serialized tf.Example proto. - 2. If data_file_pattern is defined, it will read the data from the - files at the given location. Use this mode for training, - evaluation, and testing prediction. - - Args: - mode: The execution mode, as defined in tf.contrib.learn.ModeKeys. - hparams: HParams object. - data_file_patterns: The list of file patterns to use to read in data. Set to - `None` if you want to create a placeholder for the input data. The - `problems` flag is a list of problem names joined by the `-` character. - The flag's string is then split along the `-` and each problem gets its - own example queue. - num_datashards: An integer. - fixed_problem: An integer indicating the problem to fetch data for, or None - if the input is to be randomly selected. - - Returns: - A function that returns a dictionary of features and the target labels. - """ - - def input_fn(): - """Supplies input to our model. - - This function supplies input to our model, where this input is a - function of the mode. For example, we supply different data if - we're performing training versus evaluation. - - Returns: - A tuple consisting of 1) a dictionary of tensors whose keys are - the feature names, and 2) a tensor of target labels if the mode - is not INFER (and None, otherwise). - - Raises: - ValueError: if one of the parameters has an unsupported value. - """ - problem_count, batches = len(data_file_patterns), [] - with tf.name_scope("input_reader"): - for n in xrange(problem_count): - if fixed_problem is not None and n != fixed_problem: - continue - problem_instance = hparams.problem_instances[n] - p_hparams = hparams.problems[n] - with tf.name_scope("problem_%d" % n): - with tf.device("/cpu:0"): # Input reading on CPU - capacity = p_hparams.max_expected_batch_size_per_shard - capacity *= num_datashards - examples = data_reader.input_pipeline( - problem_instance, data_file_patterns[n], capacity, mode) - feature_map = data_reader.batch_examples( - examples, - data_reader.hparams_to_batching_scheme( - hparams, - shard_multiplier=num_datashards, - drop_long_sequences=(mode == tf.contrib.learn.ModeKeys.TRAIN - or hparams.eval_drop_long_sequences), - length_multiplier=(p_hparams.batch_size_multiplier))) - - # Reverse inputs and targets features if the problem was reversed. - if problem_instance is not None: - problem_instance.maybe_reverse_features(feature_map) - problem_instance.maybe_copy_features(feature_map) - else: - if p_hparams.was_reversed: - inputs = feature_map["inputs"] - targets = feature_map["targets"] - feature_map["inputs"] = targets - feature_map["targets"] = inputs - # Use the inputs as the targets if the problem is a copy problem. - if p_hparams.was_copy: - feature_map["targets"] = feature_map["inputs"] - - # Ensure inputs and targets are proper rank. - while len(feature_map["inputs"].get_shape()) != 4: - feature_map["inputs"] = tf.expand_dims(feature_map["inputs"], axis=-1) - while len(feature_map["targets"].get_shape()) != 4: - feature_map["targets"] = tf.expand_dims( - feature_map["targets"], axis=-1) - - batches.append( - (feature_map["inputs"], feature_map["targets"], tf.constant(n), - tf.constant(p_hparams.input_space_id), - tf.constant(p_hparams.target_space_id))) - - # We choose which problem to process. - loss_moving_avgs = [] # Need loss moving averages for that. - for n in xrange(problem_count): - with tf.variable_scope("losses_avg"): - loss_moving_avgs.append( - tf.get_variable( - "problem_%d/total_loss" % n, initializer=100.0, - trainable=False)) - tf.get_variable( - "problem_%d/training_loss" % n, initializer=100.0, trainable=False) - tf.get_variable( - "problem_%d/extra_loss" % n, initializer=100.0, trainable=False) - if fixed_problem is None: - if (hparams.problem_choice == "uniform" or - mode != tf.contrib.learn.ModeKeys.TRAIN): - problem_choice = tf.random_uniform( - [], maxval=problem_count, dtype=tf.int32) - elif hparams.problem_choice == "adaptive": - loss_moving_avgs = tf.stack(loss_moving_avgs) - problem_choice = tf.multinomial( - tf.reshape(loss_moving_avgs, [1, -1]), 1) - problem_choice = tf.to_int32(tf.squeeze(problem_choice)) - elif hparams.problem_choice == "distributed": - assert FLAGS.worker_replicas >= problem_count - assert FLAGS.worker_replicas % problem_count == 0 - problem_choice = tf.to_int32(FLAGS.worker_id % problem_count) - else: - raise ValueError( - "Value of hparams.problem_choice is %s and must be " - "one of [uniform, adaptive, distributed]" % hparams.problem_choice) - - # Inputs and targets conditional on problem_choice. - rand_inputs, rand_target, choice, inp_id, tgt_id = _cond_on_index( - lambda n: batches[n], problem_choice, 0, problem_count - 1) - else: - problem_choice = tf.constant(fixed_problem) - # Take the only constructed batch, which is the fixed_problem. - rand_inputs, rand_target, choice, inp_id, tgt_id = batches[0] - - # Set shapes so the ranks are clear. - rand_inputs.set_shape([None, None, None, None]) - rand_target.set_shape([None, None, None, None]) - choice.set_shape([]) - inp_id.set_shape([]) - tgt_id.set_shape([]) - # Forced shape obfuscation is necessary for inference. - if mode == tf.contrib.learn.ModeKeys.INFER: - rand_inputs._shape = tf.TensorShape([None, None, None, None]) # pylint: disable=protected-access - rand_target._shape = tf.TensorShape([None, None, None, None]) # pylint: disable=protected-access - - # Final feature map. - rand_feature_map = { - "inputs": rand_inputs, - "problem_choice": choice, - "input_space_id": inp_id, - "target_space_id": tgt_id - } - if mode == tf.contrib.learn.ModeKeys.INFER: - rand_feature_map["infer_targets"] = rand_target - rand_target = None - return rand_feature_map, rand_target - - return input_fn - - -class _ConditionalOptimizer(tf.train.Optimizer): - """Conditional optimizer.""" - - def __init__(self, optimizer_name, lr, hparams): - if optimizer_name == "Adam": - # We change the default epsilon for Adam and re-scale lr. - # Using LazyAdam as it's much faster for large vocabulary embeddings. - self._opt = tf.contrib.opt.LazyAdamOptimizer( - lr / 500.0, - beta1=hparams.optimizer_adam_beta1, - beta2=hparams.optimizer_adam_beta2, - epsilon=hparams.optimizer_adam_epsilon) - elif optimizer_name == "Momentum": - self._opt = tf.train.MomentumOptimizer( - lr, momentum=hparams.optimizer_momentum_momentum) - elif optimizer_name == "YellowFin": - tf.logging.info("Init YellowFin Optimizer.") - self._opt = yellowfin.YellowFinOptimizer( - learning_rate=lr, momentum=hparams.optimizer_momentum_momentum) - else: - self._opt = tf.contrib.layers.OPTIMIZER_CLS_NAMES[optimizer_name](lr) - - def compute_gradients(self, loss, var_list, colocate_gradients_with_ops): - return self._opt.compute_gradients( - loss, var_list, colocate_gradients_with_ops=colocate_gradients_with_ops) - - def apply_gradients(self, gradients, global_step=None, name=None): - return self._opt.apply_gradients( - gradients, global_step=global_step, name=name) - - -def _sqrt_decay(step): - """Decay like 1 / sqrt(step), multiplied by 500 to normalize.""" - return 500.0 / tf.sqrt(tf.maximum(step, 1.0)) - - -def _exp_decay_after(step, rate, from_which_step): - """Decay exponentially by rate (per step) starting at from_which_step.""" - return tf.cond( - step < from_which_step, - lambda: tf.constant(1.0), - lambda: rate**(step - from_which_step), - name="exponential_decay_step_cond") - - -def _ps_replicas(all_workers=False): - if all_workers: - return list(range(FLAGS.ps_replicas)) - # Worker K will be using replicas {0,...n-1} + K*n if we have n replicas. - num_replicas = FLAGS.ps_replicas // FLAGS.worker_replicas - return [d + FLAGS.worker_id * num_replicas for d in xrange(num_replicas)] - - -def _gpu_order(num_gpus): - if FLAGS.gpu_order: - ret = [int(s) for s in FLAGS.gpu_order.split(" ")] - if len(ret) == num_gpus: - return ret - return list(range(num_gpus)) - - -def _ps_gpus(all_workers=False): - ps_gpus = [] - for d in _ps_replicas(all_workers=all_workers): - ps_gpus.extend([(d, gpu) for gpu in _gpu_order(FLAGS.ps_gpu)]) - return ps_gpus - - -def _ps_devices(all_workers=False): - """List of ps devices (where to put the experts). - - Args: - all_workers: whether the list is for all async workers or just this one. - - Returns: - a list of device names - """ - if FLAGS.ps_replicas > 0: - if FLAGS.ps_gpu > 0: - return [ - FLAGS.ps_job + "/task:%d/GPU:%d" % (d, gpu) - for (d, gpu) in _ps_gpus(all_workers=all_workers) - ] - else: - return [ - FLAGS.ps_job + "/task:%d" % d - for d in _ps_replicas(all_workers=all_workers) - ] - else: - if FLAGS.worker_gpu > 0: - return ["gpu:%d" % d for d in _gpu_order(FLAGS.worker_gpu)] - else: - return [""] - - -def data_parallelism(all_workers=False): - """Over which devices do we split each training batch. - - In old-fashioned async mode, we split the batch over all GPUs on the - current worker. - - In sync mode, we split the batch over all the parameter server GPUs. - - This function returns an expert_utils.Parallelism object, which can be used - to build the model. It is configured in a way that any variables created - by `tf.get_variable` will be assigned to the parameter servers and shared - between datashards. - - Args: - all_workers: whether the devices are all async workers or just this one. - - Returns: - a expert_utils.Parallelism. - """ - - def _replica_device_setter(worker_device): - if FLAGS.ps_replicas == 0: - return worker_device - return tf.train.replica_device_setter( - worker_device=worker_device, - ps_tasks=FLAGS.ps_replicas, - ps_device=FLAGS.ps_job + "/GPU:0" if FLAGS.ps_gpu > 0 else FLAGS.ps_job) - - if FLAGS.schedule == "local_run": - assert not FLAGS.sync - datashard_devices = ["gpu:%d" % d for d in _gpu_order(FLAGS.worker_gpu)] - if FLAGS.locally_shard_to_cpu: - datashard_devices += ["cpu:0"] - caching_devices = None - elif FLAGS.sync: - assert FLAGS.ps_replicas > 0 - datashard_devices = [ - _replica_device_setter(d) for d in _ps_devices(all_workers=all_workers) - ] - if FLAGS.ps_gpu > 0 and FLAGS.ps_replicas > 1: - caching_devices = [ - FLAGS.ps_job + "/task:%d/cpu:0" % d - for (d, _) in _ps_gpus(all_workers=all_workers) - ] - else: - caching_devices = None - else: - # old fashioned async - compute on worker - if FLAGS.worker_gpu > 1: - datashard_devices = [ - _replica_device_setter(FLAGS.worker_job + "/GPU:%d" % d) - for d in _gpu_order(FLAGS.worker_gpu) - ] - caching_devices = [FLAGS.worker_job + "/GPU:0"] * FLAGS.worker_gpu - else: - datashard_devices = [_replica_device_setter(FLAGS.worker_job)] - caching_devices = None - tf.logging.info("datashard_devices: %s", datashard_devices) - tf.logging.info("caching_devices: %s", caching_devices) - return eu.Parallelism( - datashard_devices, - reuse=True, - caching_devices=caching_devices, - daisy_chain_variables=FLAGS.daisy_chain_variables) + decoding.decode_from_dataset(estimator) From accf019e8676afbecc14048fe2151ae85a645be1 Mon Sep 17 00:00:00 2001 From: vthorsteinsson <vt@extrada.com> Date: Wed, 9 Aug 2017 12:01:20 +0000 Subject: [PATCH 0233/4095] Import readline in decoding.py --- tensor2tensor/utils/decoding.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) mode change 100644 => 100755 tensor2tensor/utils/decoding.py diff --git a/tensor2tensor/utils/decoding.py b/tensor2tensor/utils/decoding.py old mode 100644 new mode 100755 index 12057d8e6..cf981a1e3 --- a/tensor2tensor/utils/decoding.py +++ b/tensor2tensor/utils/decoding.py @@ -248,6 +248,11 @@ def _interactive_input_fn(hparams): vocabulary = p_hparams.vocabulary["inputs" if has_input else "targets"] # This should be longer than the longest input. const_array_size = 10000 + # Import readline if available for command line editing and recall + try: + import readline + except ImportError: + pass while True: prompt = ("INTERACTIVE MODE num_samples=%d decode_length=%d \n" " it=<input_type> ('text' or 'image' or 'label')\n" @@ -255,7 +260,7 @@ def _interactive_input_fn(hparams): " in=<input_problem> (set the input problem number)\n" " ou=<output_problem> (set the output problem number)\n" " ns=<num_samples> (changes number of samples)\n" - " dl=<decode_length> (changes decode legnth)\n" + " dl=<decode_length> (changes decode length)\n" " <%s> (decode)\n" " q (quit)\n" ">" % (num_samples, decode_length, "source_string" From 8b622a4c177c9555955e9f24e47ec21d8134a6be Mon Sep 17 00:00:00 2001 From: T2T Team <no-reply@google.com> Date: Fri, 4 Aug 2017 16:27:46 -0700 Subject: [PATCH 0234/4095] Add rouge metrics (rouge-2 and rouge-l f1 scores) to wiki_sum problems. PiperOrigin-RevId: 164325588 --- tensor2tensor/data_generators/problem.py | 3 +- tensor2tensor/utils/metrics.py | 5 + tensor2tensor/utils/model_builder.py | 2 +- tensor2tensor/utils/rouge.py | 249 +++++++++++++++++++++++ tensor2tensor/utils/rouge_test.py | 120 +++++++++++ 5 files changed, 377 insertions(+), 2 deletions(-) create mode 100644 tensor2tensor/utils/rouge.py create mode 100644 tensor2tensor/utils/rouge_test.py diff --git a/tensor2tensor/data_generators/problem.py b/tensor2tensor/data_generators/problem.py index fb7e53cb7..2bbc88192 100644 --- a/tensor2tensor/data_generators/problem.py +++ b/tensor2tensor/data_generators/problem.py @@ -424,5 +424,6 @@ def eval_metrics(self): return [ metrics.Metrics.ACC, metrics.Metrics.ACC_TOP5, metrics.Metrics.ACC_PER_SEQ, metrics.Metrics.NEG_LOG_PERPLEXITY, - metrics.Metrics.APPROX_BLEU + metrics.Metrics.APPROX_BLEU, metrics.Metrics.ROUGE_2_F, + metrics.Metrics.ROUGE_L_F ] diff --git a/tensor2tensor/utils/metrics.py b/tensor2tensor/utils/metrics.py index db60e07c8..ea2187427 100644 --- a/tensor2tensor/utils/metrics.py +++ b/tensor2tensor/utils/metrics.py @@ -24,6 +24,7 @@ from tensor2tensor.layers import common_layers from tensor2tensor.utils import bleu_hook +from tensor2tensor.utils import rouge import tensorflow as tf @@ -37,6 +38,8 @@ class Metrics(object): NEG_LOG_PERPLEXITY = "neg_log_perplexity" APPROX_BLEU = "approx_bleu_score" RMSE = "rmse" + ROUGE_2_F = "rouge_2_fscore" + ROUGE_L_F = "rouge_L_fscore" def padded_rmse(predictions, labels, weights_fn=common_layers.weights_nonzero): @@ -188,4 +191,6 @@ def problem_metric_fn(predictions, labels, weights): Metrics.NEG_LOG_PERPLEXITY: padded_neg_log_perplexity, Metrics.APPROX_BLEU: bleu_hook.bleu_score, Metrics.RMSE: padded_rmse, + Metrics.ROUGE_2_F: rouge.rouge_2_fscore, + Metrics.ROUGE_L_F: rouge.rouge_l_fscore, } diff --git a/tensor2tensor/utils/model_builder.py b/tensor2tensor/utils/model_builder.py index 48d5dd7a0..a12aa1122 100644 --- a/tensor2tensor/utils/model_builder.py +++ b/tensor2tensor/utils/model_builder.py @@ -195,7 +195,7 @@ def nth_model(n): features, skip=(skipping_is_on and skip_this_one)) with tf.variable_scope("losses_avg", reuse=True): total_loss, ops = 0.0, [] - for loss_key, loss_value in six.iteritems(losses_dict): + for loss_key, loss_value in losses_dict.iteritems(): loss_moving_avg = tf.get_variable("problem_%d/%s_loss" % (n, loss_key)) ops.append( diff --git a/tensor2tensor/utils/rouge.py b/tensor2tensor/utils/rouge.py new file mode 100644 index 000000000..29c84729f --- /dev/null +++ b/tensor2tensor/utils/rouge.py @@ -0,0 +1,249 @@ +# coding=utf-8 +# Copyright 2017 The Tensor2Tensor Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# -*- coding: utf-8 -*- +# Copyright 2017 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""ROUGe metric implementation. + +This is a modified and slightly extended verison of +https://github.com/miso-belica/sumy/blob/dev/sumy/evaluation/rouge.py. +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals + +import numpy as np + +import tensorflow as tf + + +def _len_lcs(x, y): + """Returns the length of the Longest Common Subsequence between two seqs. + + Source: http://www.algorithmist.com/index.php/Longest_Common_Subsequence + + Args: + x: sequence of words + y: sequence of words + + Returns + integer: Length of LCS between x and y + """ + table = _lcs(x, y) + n, m = len(x), len(y) + return table[n, m] + + +def _lcs(x, y): + """Computes the length of the LCS between two seqs. + + The implementation below uses a DP programming algorithm and runs + in O(nm) time where n = len(x) and m = len(y). + Source: http://www.algorithmist.com/index.php/Longest_Common_Subsequence + + Args: + x: collection of words + y: collection of words + + Returns: + Table of dictionary of coord and len lcs + """ + n, m = len(x), len(y) + table = dict() + for i in range(n + 1): + for j in range(m + 1): + if i == 0 or j == 0: + table[i, j] = 0 + elif x[i - 1] == y[j - 1]: + table[i, j] = table[i - 1, j - 1] + 1 + else: + table[i, j] = max(table[i - 1, j], table[i, j - 1]) + return table + + +def _f_lcs(llcs, m, n): + """Computes the LCS-based F-measure score. + + Source: http://research.microsoft.com/en-us/um/people/cyl/download/papers/ + rouge-working-note-v1.3.1.pdf + + Args: + llcs: Length of LCS + m: number of words in reference summary + n: number of words in candidate summary + + Returns: + Float. LCS-based F-measure score + """ + r_lcs = llcs / m + p_lcs = llcs / n + beta = p_lcs / (r_lcs + 1e-12) + num = (1 + (beta**2)) * r_lcs * p_lcs + denom = r_lcs + ((beta**2) * p_lcs) + f_lcs = num / (denom + 1e-12) + return f_lcs + + +def rouge_l_sentence_level(eval_sentences, ref_sentences): + """Computes ROUGE-L (sentence level) of two collections of sentences. + + Source: http://research.microsoft.com/en-us/um/people/cyl/download/papers/ + rouge-working-note-v1.3.1.pdf + + Calculated according to: + R_lcs = LCS(X,Y)/m + P_lcs = LCS(X,Y)/n + F_lcs = ((1 + beta^2)*R_lcs*P_lcs) / (R_lcs + (beta^2) * P_lcs) + + where: + X = reference summary + Y = Candidate summary + m = length of reference summary + n = length of candidate summary + + Args: + eval_sentences: The sentences that have been picked by the summarizer + ref_sentences: The sentences from the referene set + + Returns: + A float: F_lcs + """ + + f1_scores = [] + for eval_sentence, ref_sentence in zip(eval_sentences, ref_sentences): + m = len(ref_sentence) + n = len(eval_sentence) + lcs = _len_lcs(eval_sentence, ref_sentence) + f1_scores.append(_f_lcs(lcs, m, n)) + return np.mean(f1_scores, dtype=np.float32) + + +def rouge_l_fscore(predictions, labels, **unused_kwargs): + """ROUGE scores computation between labels and predictions. + + This is an approximate ROUGE scoring method since we do not glue word pieces + or decode the ids and tokenize the output. + + Args: + predictions: tensor, model predicitons + labels: tensor, gold output. + + Returns: + rouge_l_fscore: approx rouge-l f1 score. + """ + outputs = tf.to_int32(tf.argmax(predictions, axis=-1)) + # Convert the outputs and labels to a [batch_size, input_length] tensor. + outputs = tf.squeeze(outputs, axis=[-1, -2]) + labels = tf.squeeze(labels, axis=[-1, -2]) + rouge_l_f_score = tf.py_func(rouge_l_sentence_level, (labels, outputs), + tf.float32) + return rouge_l_f_score, tf.constant(1.0) + + +def _get_ngrams(n, text): + """Calcualtes n-grams. + + Args: + n: which n-grams to calculate + text: An array of tokens + + Returns: + A set of n-grams + """ + ngram_set = set() + text_length = len(text) + max_index_ngram_start = text_length - n + for i in range(max_index_ngram_start + 1): + ngram_set.add(tuple(text[i:i + n])) + return ngram_set + + +def rouge_n(eval_sentences, ref_sentences, n=2): + """Computes ROUGE-N f1 score of two text collections of sentences. + + Sourece: http://research.microsoft.com/en-us/um/people/cyl/download/ + papers/rouge-working-note-v1.3.1.pdf + + Args: + eval_sentences: The sentences that have been picked by the summarizer + ref_sentences: The sentences from the reference set + n: Size of ngram. Defaults to 2. + + Returns: + f1 score for ROUGE-N + """ + + f1_scores = [] + for eval_sentence, ref_sentence in zip(eval_sentences, ref_sentences): + eval_ngrams = _get_ngrams(n, eval_sentence) + ref_ngrams = _get_ngrams(n, ref_sentence) + ref_count = len(ref_ngrams) + eval_count = len(eval_ngrams) + + # Gets the overlapping ngrams between evaluated and reference + overlapping_ngrams = eval_ngrams.intersection(ref_ngrams) + overlapping_count = len(overlapping_ngrams) + + # Handle edge case. This isn't mathematically correct, but it's good enough + if eval_count == 0: + precision = 0.0 + else: + precision = overlapping_count / eval_count + + if ref_count == 0: + recall = 0.0 + else: + recall = overlapping_count / ref_count + + f1_scores.append(2.0 * ((precision * recall) / (precision + recall + 1e-8))) + + # return overlapping_count / reference_count + return np.mean(f1_scores, dtype=np.float32) + + +def rouge_2_fscore(predictions, labels, **unused_kwargs): + """ROUGE-2 F1 score computation between labels and predictions. + + This is an approximate ROUGE scoring method since we do not glue word pieces + or decode the ids and tokenize the output. + + Args: + predictions: tensor, model predicitons + labels: tensor, gold output. + + Returns: + rouge2_fscore: approx rouge-2 f1 score. + """ + + outputs = tf.to_int32(tf.argmax(predictions, axis=-1)) + # Convert the outputs and labels to a [batch_size, input_length] tensor. + outputs = tf.squeeze(outputs, axis=[-1, -2]) + labels = tf.squeeze(labels, axis=[-1, -2]) + rouge_2_f_score = tf.py_func(rouge_n, (labels, outputs), tf.float32) + return rouge_2_f_score, tf.constant(1.0) diff --git a/tensor2tensor/utils/rouge_test.py b/tensor2tensor/utils/rouge_test.py new file mode 100644 index 000000000..2a8c260e2 --- /dev/null +++ b/tensor2tensor/utils/rouge_test.py @@ -0,0 +1,120 @@ +# coding=utf-8 +# Copyright 2017 The Tensor2Tensor Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for Rouge metric.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +# Dependency imports + +import numpy as np +from tensor2tensor.utils import rouge + +import tensorflow as tf + + +class TestRouge2Metric(tf.test.TestCase): + """Tests the rouge-2 metric.""" + + def testRouge2Identical(self): + hypotheses = np.array([[1, 2, 3, 4, 5, 1, 6, 7, 0], + [1, 2, 3, 4, 5, 1, 6, 8, 7]]) + references = np.array([[1, 2, 3, 4, 5, 1, 6, 7, 0], + [1, 2, 3, 4, 5, 1, 6, 8, 7]]) + self.assertAllClose(rouge.rouge_n(hypotheses, references), 1.0, atol=1e-03) + + def testRouge2Disjoint(self): + hypotheses = np.array([[1, 2, 3, 4, 5, 1, 6, 7, 0], + [1, 2, 3, 4, 5, 1, 6, 8, 7]]) + references = np.array([[8, 9, 10, 11, 12, 13, 14, 15, 16, 17], + [9, 10, 11, 12, 13, 14, 15, 16, 17, 0]]) + self.assertEqual(rouge.rouge_n(hypotheses, references), 0.0) + + def testRouge2PartialOverlap(self): + hypotheses = np.array([[1, 2, 3, 4, 5, 1, 6, 7, 0], + [1, 2, 3, 4, 5, 1, 6, 8, 7]]) + references = np.array([[1, 9, 2, 3, 4, 5, 1, 10, 6, 7], + [1, 9, 2, 3, 4, 5, 1, 10, 6, 7]]) + self.assertAllClose(rouge.rouge_n(hypotheses, references), 0.53, atol=1e-03) + + +class TestRougeLMetric(tf.test.TestCase): + """Tests the rouge-l metric.""" + + def testRougeLIdentical(self): + hypotheses = np.array([[1, 2, 3, 4, 5, 1, 6, 7, 0], + [1, 2, 3, 4, 5, 1, 6, 8, 7]]) + references = np.array([[1, 2, 3, 4, 5, 1, 6, 7, 0], + [1, 2, 3, 4, 5, 1, 6, 8, 7]]) + self.assertAllClose( + rouge.rouge_l_sentence_level(hypotheses, references), 1.0, atol=1e-03) + + def testRougeLDisjoint(self): + hypotheses = np.array([[1, 2, 3, 4, 5, 1, 6, 7, 0], + [1, 2, 3, 4, 5, 1, 6, 8, 7]]) + references = np.array([[8, 9, 10, 11, 12, 13, 14, 15, 16, 17], + [9, 10, 11, 12, 13, 14, 15, 16, 17, 0]]) + self.assertEqual(rouge.rouge_l_sentence_level(hypotheses, references), 0.0) + + def testRougeLPartialOverlap(self): + hypotheses = np.array([[1, 2, 3, 4, 5, 1, 6, 7, 0], + [1, 2, 3, 4, 5, 1, 6, 8, 7]]) + references = np.array([[1, 9, 2, 3, 4, 5, 1, 10, 6, 7], + [1, 9, 2, 3, 4, 5, 1, 10, 6, 7]]) + self.assertAllClose( + rouge.rouge_l_sentence_level(hypotheses, references), 0.837, atol=1e-03) + + +class TestRougeMetricsE2E(tf.test.TestCase): + """Tests the rouge metrics end-to-end.""" + + def testRouge2MetricE2E(self): + vocab_size = 4 + batch_size = 12 + seq_length = 12 + predictions = tf.one_hot( + np.random.randint(vocab_size, size=(batch_size, seq_length, 1, 1)), + depth=4, + dtype=tf.float32) + targets = np.random.randint(4, size=(12, 12, 1, 1)) + with self.test_session() as session: + scores, _ = rouge.rouge_2_fscore(predictions, + tf.constant(targets, dtype=tf.int32)) + a = tf.reduce_mean(scores) + session.run(tf.global_variables_initializer()) + session.run(a) + + def testRougeLMetricE2E(self): + vocab_size = 4 + batch_size = 12 + seq_length = 12 + predictions = tf.one_hot( + np.random.randint(vocab_size, size=(batch_size, seq_length, 1, 1)), + depth=4, + dtype=tf.float32) + targets = np.random.randint(4, size=(12, 12, 1, 1)) + with self.test_session() as session: + scores, _ = rouge.rouge_l_fscore( + predictions, + tf.constant(targets, dtype=tf.int32)) + a = tf.reduce_mean(scores) + session.run(tf.global_variables_initializer()) + session.run(a) + + +if __name__ == "__main__": + tf.test.main() From 13ac3b45994e28a00284a28943ace21d57a14dc6 Mon Sep 17 00:00:00 2001 From: T2T Team <no-reply@google.com> Date: Mon, 7 Aug 2017 11:39:43 -0700 Subject: [PATCH 0235/4095] Filter C++/java files from the des2code dataset and replace tabs by spaces PiperOrigin-RevId: 164487331 --- tensor2tensor/data_generators/desc2code.py | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/tensor2tensor/data_generators/desc2code.py b/tensor2tensor/data_generators/desc2code.py index 98c93aacd..b6f0d38a2 100644 --- a/tensor2tensor/data_generators/desc2code.py +++ b/tensor2tensor/data_generators/desc2code.py @@ -43,6 +43,7 @@ _DESC_DIR_NAME = "description" _CODE_PY_DIR_NAME = "solutions_python" +_CODE_PY_FILTER_PATERNS = ["#include", "# include", "import java."] _VOCAB_EN_FILENAME = "vocab.endefr" _VOCAB_PY_FILENAME = "vocab.py" @@ -145,6 +146,7 @@ def generator_samples_content(get_source, get_target): for code_file in sample.code_files: with tf.gfile.GFile(code_file, mode="r") as target_file: target = target_file.read() + target = target.replace("\t", " ") yield source, target elif sample.code_files: # Only take the source if a target exists yield source, target @@ -224,10 +226,18 @@ def next_sample(subdir, dirs, files): # pylint: disable=unused-argument # pairs, the problem difficulty, the names of the algorithmic techniques # needed) desc_file = os.path.join(subdir, _DESC_DIR_NAME, "description.txt") - code_rootdir = os.path.join(subdir, _CODE_PY_DIR_NAME) - code_files = [ - f for f in tf.gfile.Glob(os.path.join(code_rootdir, "*.txt")) - ] + code_files = [] + # As the dataset is noisy, the program deduce the language from the file + # content. + code_pattern = os.path.join(subdir, _CODE_PY_DIR_NAME, "*.txt") + for f in tf.gfile.Glob(code_pattern): + with tf.gfile.GFile(f, mode="r") as target_file: + # Hack to filter C++/Java files. In theory some python comments could + # make the file be concidered as C++ but in practice the chance of + # getting a false negative is low. + content = target_file.read() + if not any(p in content for p in _CODE_PY_FILTER_PATERNS): + code_files.append(f) return CodingPbInfo( desc_file=desc_file, code_files=code_files @@ -239,4 +249,3 @@ def next_sample(subdir, dirs, files): # pylint: disable=unused-argument for w in tf.gfile.Walk(data_rootdir): if contains_samples(*w): yield next_sample(*w) - From 032dab0cdf5d74932ad451fef24435625183ddc0 Mon Sep 17 00:00:00 2001 From: T2T Team <no-reply@google.com> Date: Mon, 7 Aug 2017 16:38:37 -0700 Subject: [PATCH 0236/4095] Add Desc2Cpp problem PiperOrigin-RevId: 164528773 --- tensor2tensor/data_generators/desc2code.py | 108 ++++++++++++++---- .../data_generators/desc2code_test.py | 64 +++++++++++ tensor2tensor/data_generators/problem.py | 2 + 3 files changed, 151 insertions(+), 23 deletions(-) create mode 100644 tensor2tensor/data_generators/desc2code_test.py diff --git a/tensor2tensor/data_generators/desc2code.py b/tensor2tensor/data_generators/desc2code.py index b6f0d38a2..6cef6db63 100644 --- a/tensor2tensor/data_generators/desc2code.py +++ b/tensor2tensor/data_generators/desc2code.py @@ -22,6 +22,7 @@ import collections import os import random +import re import zipfile # Dependency imports @@ -42,11 +43,32 @@ _DATASET_PB_PATH = "description2code_current/" _DESC_DIR_NAME = "description" -_CODE_PY_DIR_NAME = "solutions_python" -_CODE_PY_FILTER_PATERNS = ["#include", "# include", "import java."] _VOCAB_EN_FILENAME = "vocab.endefr" -_VOCAB_PY_FILENAME = "vocab.py" + +_RE_CPP_INLINE_COMMENT = re.compile("//.*?\n") # Compiled once + + +# Constant defined for a language problem +CodingPbConstants = collections.namedtuple("CodingPbConstants", [ + "code_dir_name", + "vocab_filename", + "filter_patterns", + "target_space", +]) + +PB_PY = CodingPbConstants( + code_dir_name="solutions_python", + vocab_filename="vocab.py", + filter_patterns=["#include", "# include", "import java."], + target_space=problem.SpaceID.PY_TOK, +) +PB_CPP = CodingPbConstants( + code_dir_name="solutions_c++", + vocab_filename="vocab.cpp", + filter_patterns=["import java."], + target_space=problem.SpaceID.CPP_TOK, +) # Struct containing a coding problem (contains the paths to the descriptions # and code files) @@ -68,6 +90,14 @@ def num_shards(self): def use_subword_tokenizer(self): return True + @property + def input_space_id(self): + return problem.SpaceID.EN_TOK + + @property + def target_space_id(self): + return self.pb_constants.target_space + @property def input_vocab_size(self): return 2**15 # 32k @@ -82,7 +112,21 @@ def vocab_input_filename(self): @property def vocab_target_filename(self): - return "{}.{}".format(_VOCAB_PY_FILENAME, self.target_vocab_size) + return "{}.{}".format( + self.pb_constants.vocab_filename, self.target_vocab_size) + + def preprocess_target(self, target): + """Apply some preprocessing to the target. + + For instance, remove space/tabs. + + Args: + target (str): code source content + + Returns: + the pre-processed string content + """ + return target def feature_encoders(self, data_dir): source_vocab_filename = os.path.join(data_dir, self.vocab_input_filename) @@ -94,24 +138,11 @@ def feature_encoders(self, data_dir): "targets": target_token, } - -@registry.register_problem("desc2code_py") -class Desc2CodePyProblem(Desc2CodeProblem): - """Description2Code for python problem.""" - - @property - def input_space_id(self): - return problem.SpaceID.EN_TOK - - @property - def target_space_id(self): - return problem.SpaceID.PY_TOK - def train_generator(self, data_dir, tmp_dir, train): # Called twice: for train and test # Get the list of the training samples (coding challenge samples) - samples = list(generator_samples(tmp_dir)) + samples = list(generator_samples(tmp_dir, self.pb_constants)) # Split between train and dev # Suffle to get problems from diverse sources (CodeChef and CodeForces) and @@ -146,7 +177,7 @@ def generator_samples_content(get_source, get_target): for code_file in sample.code_files: with tf.gfile.GFile(code_file, mode="r") as target_file: target = target_file.read() - target = target.replace("\t", " ") + target = self.preprocess_target(target) yield source, target elif sample.code_files: # Only take the source if a target exists yield source, target @@ -178,16 +209,47 @@ def generator_target(): } +@registry.register_problem("desc2code_py") +class Desc2CodePyProblem(Desc2CodeProblem): + """Description2Code for python problem.""" + + @property + def pb_constants(self): + return PB_PY + + def preprocess_target(self, target): + """Simple tab to space replacement.""" + return target.replace("\t", " ") + + +@registry.register_problem("desc2code_cpp") +class Desc2CodeCppProblem(Desc2CodeProblem): + """Description2Code for C++ problem.""" + + @property + def pb_constants(self): + return PB_CPP + + def preprocess_target(self, target): + """Pre-process Cpp files.""" + target = re.sub(_RE_CPP_INLINE_COMMENT, " ", target) # Remove comments + # The regex rule is quite simple, So will fail if a // is inside a string, + # and don't remove /* */ comments + target = " ".join(target.split()) # Normalize all spaces + return target + + # Utils functions -def generator_samples(tmp_dir): +def generator_samples(tmp_dir, pb_cst): """Generator for the dataset samples. If not present, download and extract the dataset. Args: tmp_dir: path to the directory where to download the dataset. + pb_cst: CodingPbConstants object defining paths Yields: A CodingPbInfo object containing the next challenge informations. @@ -217,7 +279,7 @@ def contains_samples(subdir, dirs, files): # pylint: disable=unused-argument """Check that the folder contains a problem.""" return ( _DESC_DIR_NAME in dirs and - _CODE_PY_DIR_NAME in dirs + pb_cst.code_dir_name in dirs ) def next_sample(subdir, dirs, files): # pylint: disable=unused-argument @@ -229,14 +291,14 @@ def next_sample(subdir, dirs, files): # pylint: disable=unused-argument code_files = [] # As the dataset is noisy, the program deduce the language from the file # content. - code_pattern = os.path.join(subdir, _CODE_PY_DIR_NAME, "*.txt") + code_pattern = os.path.join(subdir, pb_cst.code_dir_name, "*.txt") for f in tf.gfile.Glob(code_pattern): with tf.gfile.GFile(f, mode="r") as target_file: # Hack to filter C++/Java files. In theory some python comments could # make the file be concidered as C++ but in practice the chance of # getting a false negative is low. content = target_file.read() - if not any(p in content for p in _CODE_PY_FILTER_PATERNS): + if not any(p in content for p in pb_cst.filter_patterns): code_files.append(f) return CodingPbInfo( desc_file=desc_file, diff --git a/tensor2tensor/data_generators/desc2code_test.py b/tensor2tensor/data_generators/desc2code_test.py new file mode 100644 index 000000000..0d10c7d6f --- /dev/null +++ b/tensor2tensor/data_generators/desc2code_test.py @@ -0,0 +1,64 @@ +# coding=utf-8 +# Copyright 2017 The Tensor2Tensor Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for google3.third_party.py.tensor2tensor.data_generators.desc2code.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +# Dependency imports +from tensor2tensor.data_generators import desc2code +from google3.testing.pybase import googletest + + +CODE_CPP_IN = """ + #include <iostream> + +void main() { // This comment will be removed + // This too. + // + /* Not this one */ +\t +\t + int a \t\n = 3;// +// +} + +""" + +CODE_CPP_OUT = "#include <iostream> void main() { /* Not this one */ int a = 3; }" # pylint: disable=line-too-loong + + +class Desc2codeTest(googletest.TestCase): + + def testCppPreprocess(self): + """Check that the file correctly preprocess the code source.""" + cpp_pb = desc2code.Desc2CodeCppProblem() + + self.assertEqual( # Add space beween two lines + cpp_pb.preprocess_target("firstline//comm1\nsecondline//comm2\n"), + "firstline secondline" + ) + # Checking for boths comments and spaces + self.assertEqual(cpp_pb.preprocess_target(CODE_CPP_IN), CODE_CPP_OUT) + self.assertEqual( + cpp_pb.preprocess_target(" not removed //abcd "), + "not removed //abcd" + ) + + +if __name__ == "__main__": + googletest.main() diff --git a/tensor2tensor/data_generators/problem.py b/tensor2tensor/data_generators/problem.py index 2bbc88192..66cd7df4f 100644 --- a/tensor2tensor/data_generators/problem.py +++ b/tensor2tensor/data_generators/problem.py @@ -88,6 +88,8 @@ class SpaceID(object): PEPTIDE = 26 # Python PY_TOK = 27 + # C++ + CPP_TOK = 28 class Problem(object): From 4517651aea3891584b65224093e7ce182752fecd Mon Sep 17 00:00:00 2001 From: Lukasz Kaiser <lukaszkaiser@google.com> Date: Mon, 7 Aug 2017 19:09:39 -0700 Subject: [PATCH 0237/4095] Play more with VAE, small corrections in README and ByteNet and shared weights hparams. PiperOrigin-RevId: 164543518 --- tensor2tensor/bin/t2t-trainer | 17 ++- tensor2tensor/data_generators/cipher.py | 14 +-- tensor2tensor/models/bytenet.py | 6 +- tensor2tensor/models/transformer_vae.py | 154 ++++++++++++++++-------- tensor2tensor/utils/model_builder.py | 6 +- tensor2tensor/utils/t2t_model.py | 12 +- 6 files changed, 136 insertions(+), 73 deletions(-) diff --git a/tensor2tensor/bin/t2t-trainer b/tensor2tensor/bin/t2t-trainer index 13dd7d355..6e0be3f23 100644 --- a/tensor2tensor/bin/t2t-trainer +++ b/tensor2tensor/bin/t2t-trainer @@ -30,6 +30,8 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import os + # Dependency imports from tensor2tensor.utils import registry @@ -57,22 +59,25 @@ def main(_): usr_dir.import_usr_dir(FLAGS.t2t_usr_dir) trainer_utils.log_registry() trainer_utils.validate_flags() - tf.gfile.MakeDirs(FLAGS.output_dir) + output_dir = os.path.expanduser(FLAGS.output_dir) + tmp_dir = os.path.expanduser(FLAGS.tmp_dir) + data_dir = os.path.expanduser(FLAGS.data_dir) + tf.gfile.MakeDir(output_dir) # Generate data if requested. if FLAGS.generate_data: - tf.gfile.MakeDirs(FLAGS.data_dir) - tf.gfile.MakeDirs(FLAGS.tmp_dir) + tf.gfile.MakeDirs(data_dir) + tf.gfile.MakeDirs(tmp_dir) for problem_name in FLAGS.problems.split("-"): tf.logging.info("Generating data for %s" % problem_name) problem = registry.problem(problem_name) - problem.generate_data(FLAGS.data_dir, FLAGS.tmp_dir) + problem.generate_data(data_dir, tmp_dir) # Run the trainer. trainer_utils.run( - data_dir=FLAGS.data_dir, + data_dir=data_dir, model=FLAGS.model, - output_dir=FLAGS.output_dir, + output_dir=output_dir, train_steps=FLAGS.train_steps, eval_steps=FLAGS.eval_steps, schedule=FLAGS.schedule) diff --git a/tensor2tensor/data_generators/cipher.py b/tensor2tensor/data_generators/cipher.py index 3a743337a..41dcbd80e 100644 --- a/tensor2tensor/data_generators/cipher.py +++ b/tensor2tensor/data_generators/cipher.py @@ -56,8 +56,8 @@ def _gen(nbr_symbols, max_length, nbr_cases): for plain, code in zip(indices, codes): yield { - "X": plain, - "Y": code, + "inputs": plain, + "targets": code, } return _gen @@ -99,8 +99,8 @@ def _gen(nbr_symbols, max_length, nbr_cases): for plain, code in zip(indices, codes): yield { - "X": plain, - "Y": code, + "inputs": plain, + "targets": code, } return _gen @@ -148,7 +148,7 @@ def key(self): return [1, 3] -class Layer(object): +class ShiftEncryptionLayer(object): """A single layer for shift.""" def __init__(self, vocab, shift): @@ -211,7 +211,7 @@ def encipher_shift(plaintext, plain_vocab, shift): ciphertext (list of Strings): encrypted plain text. """ ciphertext = [] - cipher = Layer(plain_vocab, shift) + cipher = ShiftEncryptionLayer(plain_vocab, shift) for _, sentence in enumerate(plaintext): cipher_sentence = [] @@ -238,7 +238,7 @@ def encipher_vigenere(plaintext, plain_vocab, key): # generate Vigenere table layers = [] for i in range(len(plain_vocab)): - layers.append(Layer(plain_vocab, i)) + layers.append(ShiftEncryptionLayer(plain_vocab, i)) for i, sentence in enumerate(plaintext): cipher_sentence = [] diff --git a/tensor2tensor/models/bytenet.py b/tensor2tensor/models/bytenet.py index d9c4e29a9..e4537ef3f 100644 --- a/tensor2tensor/models/bytenet.py +++ b/tensor2tensor/models/bytenet.py @@ -40,13 +40,13 @@ def residual_dilated_conv(x, repeat, padding, name, hparams): for i in xrange(repeat): with tf.variable_scope("repeat_%d" % i): y = common_layers.conv_block( - x, + common_layers.layer_norm(x, hparams.hidden_size, name="lnorm"), hparams.hidden_size, dilations_and_kernels, padding=padding, name="residual_conv") - x = common_layers.layer_norm(x + y, hparams.hidden_size, name="lnorm") - x = tf.nn.dropout(x, hparams.dropout) + y = tf.nn.dropout(y, 1.0 - hparams.dropout) + x += y return x diff --git a/tensor2tensor/models/transformer_vae.py b/tensor2tensor/models/transformer_vae.py index 31de7bd5f..f3d400045 100644 --- a/tensor2tensor/models/transformer_vae.py +++ b/tensor2tensor/models/transformer_vae.py @@ -31,13 +31,31 @@ import tensorflow as tf -def decompress(source, hparams, name): +def residual_conv(x, repeat, hparams, name): + """A stack of convolution blocks with residual connections.""" + with tf.variable_scope(name): + k = (3, 1) + dilations_and_kernels = [((1, 1), k) for _ in xrange(3)] + for i in xrange(repeat): + with tf.variable_scope("repeat_%d" % i): + y = common_layers.conv_block( + common_layers.layer_norm(x, hparams.hidden_size, name="lnorm"), + hparams.hidden_size, + dilations_and_kernels, + padding="SAME", + name="residual_conv") + y = tf.nn.dropout(y, 1.0 - hparams.dropout) + x += y + return x + + +def decompress(source, hparams, first_relu, name): """Decompression function.""" with tf.variable_scope(name): shape = tf.shape(source) thicker = common_layers.conv_block( source, hparams.hidden_size * 2, [((1, 1), (1, 1))], - name="decompress_conv") + first_relu=first_relu, name="decompress_conv") return tf.reshape(thicker, [shape[0], shape[1] * 2, 1, hparams.hidden_size]) @@ -60,6 +78,7 @@ def compress_vae(inputs, hparams, name): # Run compression by strided convs. cur = tf.expand_dims(inputs, axis=2) for i in xrange(hparams.num_compress_steps): + cur = residual_conv(cur, 1, hparams, "compress_rc_%d" % i) cur = common_layers.conv_block( cur, hparams.hidden_size, [((1, 1), (2, 1))], strides=(2, 1), name="compress_%d" % i) @@ -72,73 +91,78 @@ def compress_vae(inputs, hparams, name): return cur, kl_loss +def encode(x, x_space, hparams, name): + """Transformer preparations and encoder.""" + with tf.variable_scope(name): + (encoder_input, encoder_self_attention_bias, + _) = transformer.transformer_prepare_encoder(x, x_space, hparams) + residual_fn = transformer.get_residual_fn(hparams) + encoder_input = tf.nn.dropout(encoder_input, 1.0 - hparams.residual_dropout) + return transformer.transformer_encoder( + encoder_input, residual_fn, encoder_self_attention_bias, hparams) + + +def dropmask(targets, targets_dropout_max, is_training): + if not is_training: + return targets + targets_drop_prob = tf.random_uniform([]) * targets_dropout_max + drop_mask = tf.random_uniform(tf.shape(targets)[:-1]) + drop_mask = tf.to_float(tf.less(drop_mask, targets_drop_prob)) + keep_mask = tf.expand_dims(1.0 - drop_mask, axis=2) + return targets * keep_mask + + def vae_transformer_internal(inputs, targets, target_space, hparams): """VAE Transformer, main step used for training.""" with tf.variable_scope("vae_transformer"): is_training = hparams.mode == tf.contrib.learn.ModeKeys.TRAIN # Prepare inputs, targets, and k. inputs = common_layers.flatten4d3d(inputs) + input_len = tf.shape(inputs)[1] # Double input size to cover targets. + inputs = tf.pad(inputs, [[0, 0], [0, input_len], [0, 0]]) + inputs.set_shape([None, None, hparams.hidden_size]) targets = common_layers.flatten4d3d(targets) k = 2**hparams.num_compress_steps - _, targets = common_layers.pad_to_same_length( + inputs, targets = common_layers.pad_to_same_length( inputs, targets, final_length_divisible_by=k) + inputs = encode(inputs, target_space, hparams, "input_enc") + + # Dropout targets or swap for zeros 5% of the time. + max_prestep = 90000 + prob_targets = 0.95 if is_training else 1.0 + targets_dropout_max = common_layers.inverse_lin_decay(max_prestep) - 0.01 + targets = dropmask(targets, targets_dropout_max, is_training) + targets = tf.cond(tf.less(tf.random_uniform([]), prob_targets), + lambda: targets, lambda: tf.zeros_like(targets)) + + # Join targets with inputs, run encoder. + # to_encode = common_layers.conv_block( + # tf.expand_dims(tf.concat([targets, inputs], axis=2), axis=2), + # hparams.hidden_size, [((1, 1), (1, 1))], + # first_relu=False, name="join_targets") + # to_compress = encode(tf.squeeze(to_encode, axis=2), + # target_space, hparams, "enc") + + # Compress and vae. + z, kl_loss = compress_vae(targets, hparams, "vae") - # Transformer preparations and encoder. - (encoder_input, encoder_self_attention_bias, - encoder_decoder_attention_bias) = transformer.transformer_prepare_encoder( - inputs, target_space, hparams) - residual_fn = transformer.get_residual_fn(hparams) - encoder_input = tf.nn.dropout(encoder_input, 1.0 - hparams.residual_dropout) - encoder_output = transformer.transformer_encoder( - encoder_input, residual_fn, encoder_self_attention_bias, hparams) - - def get_decoder_autoregressive(): - """Decoder input for autoregressive computation.""" - (a, b) = transformer.transformer_prepare_decoder(targets, hparams) - return (a, b, tf.constant(0.0)) - - # 10% of the time we compress all-zeros, as will be at decoding start. - prob_targets = 0.9 if is_training else 1.0 - to_compress = tf.cond(tf.less(tf.random_uniform([]), prob_targets), - lambda: targets, lambda: tf.zeros_like(targets)) - z, kl_loss = compress_vae(to_compress, hparams, "vae") # Decompress. for i in xrange(hparams.num_compress_steps): j = hparams.num_hidden_layers - i - 1 - z = decompress(z, hparams, "decompress_%d" % j) + z = residual_conv(z, 1, hparams, "dec_rc_%d" % j) + z = decompress(z, hparams, i > 0, "decompress_%d" % j) - def get_decoder_from_vae(): - """Decoder input computed by VAE.""" - # Return decoder stuff. - (a, b) = transformer.transformer_prepare_decoder( - tf.squeeze(z, axis=2), hparams) - return (a, b, kl_loss) + # Join z with inputs, run decoder. + to_decode = common_layers.conv_block( + tf.concat([z, tf.expand_dims(inputs, axis=2)], axis=3), + hparams.hidden_size, [((1, 1), (1, 1))], name="join_z") + ret = encode(tf.squeeze(to_decode, axis=2), target_space, hparams, "dec") + # to_decode = residual_conv(to_decode, 2, hparams, "dec_conv") + # ret = tf.squeeze(to_decode, axis=2) # Randomize decoder inputs.. - prob_do_vae = common_layers.inverse_exp_decay(40000) * 0.7 - step = tf.to_float(tf.contrib.framework.get_global_step()) - if not is_training: - prob_do_vae = tf.cond(tf.less(step, 40000.0), lambda: tf.constant(0.0), - lambda: tf.constant(1.0)) - (decoder_input, decoder_self_attention_bias, kl_loss2) = tf.cond( - tf.less(tf.random_uniform([]), prob_do_vae), - get_decoder_from_vae, get_decoder_autoregressive) - - # Transformer decoder. - decoder_output = transformer.transformer_decoder( - decoder_input, encoder_output, residual_fn, decoder_self_attention_bias, - encoder_decoder_attention_bias, hparams) - decoder_output = tf.expand_dims(decoder_output, 2) - - cond_self = tf.cond(tf.less(step, 30000.0), lambda: tf.constant(1.0), - lambda: tf.constant(0.0)) - prob_self = 0.4 if is_training else cond_self - (ret, kl_loss) = tf.cond(tf.less(tf.random_uniform([]), prob_self), - lambda: (z, kl_loss), - lambda: (decoder_output, kl_loss2)) - - kl_loss *= common_layers.inverse_exp_decay(50000) * 2.0 - return ret, kl_loss + kl_loss *= common_layers.inverse_exp_decay(max_prestep) * 3.0 + return tf.expand_dims(ret, axis=2), kl_loss @registry.register_model @@ -171,6 +195,15 @@ def infer(self, features=None, decode_length=50, beam_size=1, top_beams=1, features, False, last_position_only=last_position_only) sharded_samples = self._data_parallelism(tf.argmax, sharded_logits, 4) samples = tf.concat(sharded_samples, 0) + + # 2nd step. + with tf.variable_scope(tf.get_variable_scope(), reuse=True): + features["targets"] = samples + sharded_logits, _ = self.model_fn( + features, False, last_position_only=last_position_only) + sharded_samples = self._data_parallelism(tf.argmax, sharded_logits, 4) + samples = tf.concat(sharded_samples, 0) + if inputs_old is not None: # Restore to not confuse Estimator. features["inputs"] = inputs_old return samples @@ -180,6 +213,21 @@ def infer(self, features=None, decode_length=50, beam_size=1, top_beams=1, def transformer_vae_small(): """Set of hyperparameters.""" hparams = transformer.transformer_small() + hparams.batch_size = 2048 hparams.add_hparam("z_size", 128) hparams.add_hparam("num_compress_steps", 4) return hparams + + +@registry.register_hparams +def transformer_vae_base(): + """Set of hyperparameters.""" + hparams = transformer_vae_small() + hparams.hidden_size = 512 + hparams.filter_size = 2048 + hparams.attention_dropout = 0.1 + hparams.relu_dropout = 0.1 + hparams.dropout = 0.1 + hparams.num_hidden_layers = 4 + hparams.z_size = 256 + return hparams diff --git a/tensor2tensor/utils/model_builder.py b/tensor2tensor/utils/model_builder.py index a12aa1122..01768e263 100644 --- a/tensor2tensor/utils/model_builder.py +++ b/tensor2tensor/utils/model_builder.py @@ -195,9 +195,9 @@ def nth_model(n): features, skip=(skipping_is_on and skip_this_one)) with tf.variable_scope("losses_avg", reuse=True): total_loss, ops = 0.0, [] - for loss_key, loss_value in losses_dict.iteritems(): - loss_moving_avg = tf.get_variable("problem_%d/%s_loss" % (n, - loss_key)) + for loss_key, loss_value in six.iteritems(losses_dict): + loss_moving_avg = tf.get_variable( + "problem_%d/%s_loss" % (n, loss_key)) ops.append( loss_moving_avg.assign(loss_moving_avg * 0.9 + loss_value * 0.1)) total_loss += loss_value diff --git a/tensor2tensor/utils/t2t_model.py b/tensor2tensor/utils/t2t_model.py index 3af4f10c1..a63f5cc7f 100644 --- a/tensor2tensor/utils/t2t_model.py +++ b/tensor2tensor/utils/t2t_model.py @@ -85,11 +85,21 @@ def __init__(self, ps_devices = [""] hparams = copy.copy(hparams) hparams.add_hparam("mode", mode) - # when not in training mode, set all forms of dropout to zero. + # When not in training mode, set all forms of dropout to zero. if mode != tf.contrib.learn.ModeKeys.TRAIN: for key in hparams.values(): if key[-len("dropout"):] == "dropout": setattr(hparams, key, 0.0) + # If vocabularies differ, unset shared_embedding_and_softmax_weights. + if hparams.shared_embedding_and_softmax_weights: + same_vocab_sizes = True + for problem in hparams.problems: + if "inputs" in problem.input_modality: + if problem.input_modality["inputs"] != problem.target_modality: + same_vocab_sizes = False + if not same_vocab_sizes: + tf.logging.info("Unsetting shared_embedding_and_softmax_weights.") + hparams.shared_embedding_and_softmax_weights = 0 self._hparams = hparams self._data_parallelism = data_parallelism self._num_datashards = data_parallelism.n From ce59768a415c4ceeeb47872d28ad90a10026ac18 Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Tue, 8 Aug 2017 09:17:50 -0700 Subject: [PATCH 0238/4095] Add reversible residual block (from RevNet) PiperOrigin-RevId: 164601350 --- tensor2tensor/layers/rev_block.py | 195 +++++++++++++++++++++++++ tensor2tensor/layers/rev_block_test.py | 92 ++++++++++++ 2 files changed, 287 insertions(+) create mode 100644 tensor2tensor/layers/rev_block.py create mode 100644 tensor2tensor/layers/rev_block_test.py diff --git a/tensor2tensor/layers/rev_block.py b/tensor2tensor/layers/rev_block.py new file mode 100644 index 000000000..1e1a7b848 --- /dev/null +++ b/tensor2tensor/layers/rev_block.py @@ -0,0 +1,195 @@ +# coding=utf-8 +# Copyright 2017 The Tensor2Tensor Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Reversible Residual Block. + +From +[The Reversible Residual Network: Backpropagation Without Storing +Activations](https://arxiv.org/abs/1707.04585). +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import re + +# Dependency imports + +import tensorflow as tf +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import function + +LAYER_RE = re.compile(".*revlayer_([0-9]*)/([fg])/.*") + + +def _rev_layer_forward(xs, f, g): + """Forward for 1 reversible layer.""" + x1, x2 = xs + with tf.variable_scope("f"): + y1 = x1 + f(x2) + with tf.variable_scope("g"): + y2 = x2 + g(y1) + return (y1, y2) + + +def _rev_layer_backward(ys, grad_ys, f, g, f_vars, g_vars): + """Backprop for 1 layer.""" + y1, y2 = ys + grad_y1, grad_y2 = grad_ys + + # Reconstruct intermediates and inputs (x1, x2) + # stop_gradients required on y1 and x2 to prevent infinite recursion into this + # grad function on the calls to tf.gradients. + y1_stop = tf.stop_gradient(y1) + with tf.variable_scope("g"): + gy1 = g(y1_stop) + + x2 = y2 - gy1 + x2_stop = tf.stop_gradient(x2) + with tf.variable_scope("f"): + fx2 = f(x2_stop) + + x1 = y1 - fx2 + + # Compute gradients wrt to inputs + # dL/dy2 * dG(y1)/y1 + grad_gy1_y2 = tf.gradients(gy1, y1_stop, grad_y2)[0] + grad_x1 = grad_y1 + grad_gy1_y2 + grad_x2 = (tf.gradients(fx2, x2_stop, grad_y1)[0] + grad_y2 + tf.gradients( + fx2, x2_stop, grad_gy1_y2)[0]) + + # Compute gradients wrt to vars in f and g + grad_g_vars = tf.gradients(gy1, g_vars, grad_y2) + grad_f_y1 = tf.gradients(fx2, f_vars, grad_y1) + grad_f_y2 = tf.gradients(fx2, f_vars, grad_gy1_y2) + grad_f_vars = [tf.add_n(grads) for grads in zip(grad_f_y1, grad_f_y2)] + + return (x1, x2), (grad_x1, grad_x2), grad_f_vars, grad_g_vars + + +def _rev_block_forward(x, f, g, num_layers=1, layer_scopes=None, name=None): + """Forward for a series of reversible layers.""" + x1, x2 = tf.split(x, 2, axis=len(x.get_shape()) - 1) + out = (x1, x2) + with tf.variable_scope(name, default_name="revblock"): + for i in xrange(num_layers): + with tf.variable_scope("revlayer_%d" % i) as layer_vs: + if layer_scopes is not None: + layer_scopes.append(layer_vs) + out = _rev_layer_forward(out, f, g) + + y1, y2 = out + y = tf.concat([y1, y2], axis=-1) + return y + + +def rev_block(x, f, g, num_layers=1, is_training=True): + """A block of reversible residual layers. + + A reversible residual layer is defined as: + + ``` + x1, x2 = tf.split(x, 2, axis=-1) + y1 = x1 + f(x2) + y2 = x2 + g(y1) + y = tf.concat([y1, y2], axis=-1) + ``` + + Args: + x: a float Tensor, input, will be split evenly across the last dim. + f: a function, (Tensor) -> (Tensor). Should not change the shape of the + Tensor. May create variables. Should NOT close over any Tensor values. + g: a function, (Tensor) -> (Tensor). Should not change the shape of the + Tensor. May create variables. Should NOT close over any Tensor values. + num_layers: int, number of reversible residual layers. Each layer will + apply f and g according to the equations above, with new variables in each + layer. + is_training: bool, whether to actually use the efficient backprop codepath. + + Returns: + y: a float Tensor, output. + """ + layer_scopes = [] + + def rev_block_grad(op, grad_y): + """Custom gradient fn for a block of reversible residual layers.""" + y = op.outputs[0] + ys = tf.split(y, 2, axis=len(y.get_shape()) - 1) + grad_ys = tf.split(grad_y, 2, axis=len(y.get_shape()) - 1) + + # Find all variables from f and from g + # Keep track of their positions in all_vars + all_vars = op.inputs[1:] + f_vars = [[] for _ in range(num_layers)] + g_vars = [[] for _ in range(num_layers)] + f_vars_idxs = [[] for _ in range(num_layers)] + g_vars_idxs = [[] for _ in range(num_layers)] + + for i, v in enumerate(all_vars): + ref = v.op.inputs[0] + assert ref.dtype == dtypes.float32_ref + regex = LAYER_RE.match(v.name) + layer_no = int(regex.group(1)) + fn_name = regex.group(2) + if fn_name == "f": + f_vars[layer_no].append(ref) + f_vars_idxs[layer_no].append(i) + else: + assert fn_name == "g" + g_vars[layer_no].append(ref) + g_vars_idxs[layer_no].append(i) + + f_grads = [] + g_grads = [] + + # Reverse state containers to go backward + layer_scopes.reverse() + f_vars.reverse() + g_vars.reverse() + + for i in xrange(num_layers): + with tf.variable_scope(layer_scopes[i], reuse=True): + ys, grad_ys, grad_f_vars, grad_g_vars = _rev_layer_backward( + ys, grad_ys, f, g, f_vars[i], g_vars[i]) + f_grads.append(grad_f_vars) + g_grads.append(grad_g_vars) + + # Gradients were collected in reverse layer order + f_grads.reverse() + g_grads.reverse() + + # Reorder the gradients so they match the original order of all_vars + var_grads = [None] * len(all_vars) + for idxs, grads in zip(f_vars_idxs, f_grads) + zip(g_vars_idxs, g_grads): + for i, grad in zip(idxs, grads): + var_grads[i] = grad + + grad_x = tf.concat(grad_ys, axis=-1) + all_grads = [grad_x] + var_grads + return all_grads + + @function.Defun( + tf.float32, + python_grad_func=rev_block_grad, + shape_func=lambda _: [x.get_shape()]) + def rev_block_defun(inp): + inp.set_shape(x.get_shape()) + return _rev_block_forward( + inp, f, g, num_layers=num_layers, layer_scopes=layer_scopes) + + if is_training: + return rev_block_defun(x) + else: + return _rev_block_forward(x, f, g, num_layers=num_layers) diff --git a/tensor2tensor/layers/rev_block_test.py b/tensor2tensor/layers/rev_block_test.py new file mode 100644 index 000000000..bc4bcc6a4 --- /dev/null +++ b/tensor2tensor/layers/rev_block_test.py @@ -0,0 +1,92 @@ +# coding=utf-8 +# Copyright 2017 The Tensor2Tensor Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for RevBlock.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +# Dependency imports + +from tensor2tensor.layers import rev_block + +import tensorflow as tf + + +class RevBlockTest(tf.test.TestCase): + + def testSmoke(self): + channels = 8 + num_layers = 4 + batch_size = 16 + use_defun = True + tf.set_random_seed(1234) + + def f(x): + return tf.layers.dense(x, channels // 2, use_bias=True) + + def g(x): + return tf.layers.dense(x, channels // 2, use_bias=True) + + x = tf.random_uniform([batch_size, channels], dtype=tf.float32) + y = rev_block.rev_block( + x, f, g, num_layers=num_layers, is_training=use_defun) + loss = tf.reduce_mean(y + 10.) + grads = tf.gradients(loss, [x] + tf.global_variables()) + with self.test_session() as sess: + sess.run(tf.global_variables_initializer()) + _ = sess.run(grads) + + def testRevBlock(self): + channels = 8 + num_layers = 4 + batch_size = 16 + tf.set_random_seed(1234) + + def f(x): + return tf.layers.dense(x, channels // 2, use_bias=True) + + def g(x): + return tf.layers.dense(x, channels // 2, use_bias=True) + + x = tf.random_uniform([batch_size, channels], dtype=tf.float32) + + with tf.variable_scope("defun") as vs: + y_defun = rev_block.rev_block(x, f, g, num_layers=num_layers) + fg_vars = vs.trainable_variables() + + num_vars = len(tf.global_variables()) + with tf.variable_scope(vs, reuse=True): + y = rev_block.rev_block(x, f, g, num_layers=num_layers, is_training=False) + # Ensure no new vars were created - full reuse + assert len(tf.global_variables()) == num_vars + + loss_defun = tf.reduce_mean(y_defun + 10.) + loss = tf.reduce_mean(y + 10.) + + grads_defun = tf.gradients(loss_defun, [x] + fg_vars) + grads = tf.gradients(loss, [x] + fg_vars) + + with self.test_session() as sess: + sess.run(tf.global_variables_initializer()) + y_val, yd_val, gd_val, g_val = sess.run([y, y_defun, grads_defun, grads]) + self.assertAllClose(y_val, yd_val) + for g1, g2 in zip(gd_val, g_val): + self.assertAllClose(g1, g2) + + +if __name__ == "__main__": + tf.test.main() From 9c54d864b73bc12103883f86ad3dead9aa3d4ce7 Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Tue, 8 Aug 2017 11:14:05 -0700 Subject: [PATCH 0239/4095] Real modalities PiperOrigin-RevId: 164617848 --- .../data_generators/gene_expression.py | 15 ++------ tensor2tensor/layers/common_layers.py | 18 +++++++-- tensor2tensor/layers/modalities.py | 38 +++++++++++++++++-- tensor2tensor/models/gene_expression.py | 8 ++-- tensor2tensor/utils/metrics.py | 38 +++++++++++++++++-- tensor2tensor/utils/registry.py | 14 +++++++ tensor2tensor/utils/trainer_utils.py | 5 ++- 7 files changed, 110 insertions(+), 26 deletions(-) diff --git a/tensor2tensor/data_generators/gene_expression.py b/tensor2tensor/data_generators/gene_expression.py index d314cec59..ea3aa4cc4 100644 --- a/tensor2tensor/data_generators/gene_expression.py +++ b/tensor2tensor/data_generators/gene_expression.py @@ -146,17 +146,14 @@ def hparams(self, defaults, model_hparams): p = defaults vocab_size = self._encoders["inputs"].vocab_size p.input_modality = {"inputs": (registry.Modalities.SYMBOL, vocab_size)} - p.target_modality = ("%s:real" % registry.Modalities.GENERIC, + p.target_modality = ("%s:log_poisson_loss" % registry.Modalities.REAL, self.num_output_predictions) p.input_space_id = problem.SpaceID.DNA p.target_space_id = problem.SpaceID.REAL def example_reading_spec(self): - # TODO(rsepassi): propagate and apply targets_mask to output RealModality - # and to eval metrics (weights_fn?). data_fields = { "inputs": tf.VarLenFeature(tf.int64), - "targets_mask": tf.VarLenFeature(tf.float32), "targets": tf.VarLenFeature(tf.float32), } data_items_to_decoders = None @@ -168,18 +165,12 @@ def preprocess_examples(self, examples, mode, hparams): # Reshape targets examples["targets"] = tf.reshape(examples["targets"], - [-1, self.num_output_predictions]) - examples["targets_mask"] = tf.reshape(examples["targets_mask"], [-1, 1]) - - # Set masked targets to 0 (i.e. pad) so that loss and metrics ignore them. - # Add epsilon because some unmasked labels are actually 0. - examples["targets"] += 1e-6 - examples["targets"] *= examples["targets_mask"] + [-1, 1, self.num_output_predictions]) return examples def eval_metrics(self): - return [metrics.Metrics.RMSE] + return [metrics.Metrics.LOG_POISSON, metrics.Metrics.R2] @registry.register_problem("gene_expression_cage10") diff --git a/tensor2tensor/layers/common_layers.py b/tensor2tensor/layers/common_layers.py index ea18322e4..8054b27df 100644 --- a/tensor2tensor/layers/common_layers.py +++ b/tensor2tensor/layers/common_layers.py @@ -359,13 +359,23 @@ def conv2d_kernel(kernel_size_arg, name_suffix): return conv2d_kernel(kernel_size, "single") -def conv(inputs, filters, kernel_size, **kwargs): - return conv_internal(tf.layers.conv2d, inputs, filters, kernel_size, **kwargs) +def conv(inputs, filters, kernel_size, dilation_rate=1, **kwargs): + return conv_internal( + tf.layers.conv2d, + inputs, + filters, + kernel_size, + dilation_rate=dilation_rate, + **kwargs) -def conv1d(inputs, filters, kernel_size, **kwargs): +def conv1d(inputs, filters, kernel_size, dilation_rate=1, **kwargs): return tf.squeeze( - conv(tf.expand_dims(inputs, 2), filters, (kernel_size, 1), **kwargs), 2) + conv( + tf.expand_dims(inputs, 2), + filters, (kernel_size, 1), + dilation_rate=(dilation_rate, 1), + **kwargs), 2) def separable_conv(inputs, filters, kernel_size, **kwargs): diff --git a/tensor2tensor/layers/modalities.py b/tensor2tensor/layers/modalities.py index 523c52fa8..5071a49ad 100644 --- a/tensor2tensor/layers/modalities.py +++ b/tensor2tensor/layers/modalities.py @@ -438,6 +438,7 @@ def __init__(self, model_hparams, vocab_size): @registry.register_image_modality("identity") @registry.register_symbol_modality("identity") @registry.register_class_label_modality("identity") +@registry.register_real_modality("identity") class IdentityModality(modality.Modality): """Does nothing.""" @@ -452,9 +453,12 @@ def top(self, body_output, _): return body_output -@registry.register_generic_modality("real") class RealModality(modality.Modality): - """Modality for real (i.e. float) vectors.""" + """Base class for real (i.e. float) vectors. + + * Bottom is a linear projection layer to hparams.hidden_size. + * Top is a linear projection layer to vocab_size. + """ def bottom(self, x): with tf.variable_scope("real"): @@ -464,7 +468,16 @@ def top(self, body_output, _): with tf.variable_scope("real"): return tf.layers.dense(body_output, self._vocab_size) - def loss(self, top_out, targets, weights_fn=common_layers.weights_nonzero): + def loss(self, top_out, targets, weights_fn=common_layers.weights_all): + raise NotImplementedError() + + +@registry.register_real_modality("default") +@registry.register_real_modality("l2_loss") +class RealL2LossModality(RealModality): + """Modality for real (i.e. float) vectors with L2 (Gaussian) loss.""" + + def loss(self, top_out, targets, weights_fn=common_layers.weights_all): predictions = top_out with tf.name_scope("l2"): weights = weights_fn(targets) @@ -472,6 +485,25 @@ def loss(self, top_out, targets, weights_fn=common_layers.weights_nonzero): return tf.reduce_sum(l2 * weights), tf.reduce_sum(weights) +@registry.register_real_modality("log_poisson_loss") +class RealLogPoissonLossModality(RealL2LossModality): + """Modality for real (i.e. float) vectors with log Poisson regression loss. + + * Top is a linear projection to vocab size followed by a log transform. + """ + + def top(self, body_output, _): + with tf.variable_scope("real"): + return tf.log(tf.layers.dense(body_output, self._vocab_size)) + + def loss(self, top_out, targets, weights_fn=common_layers.weights_all): + predictions = top_out + with tf.name_scope("log_possion"): + weights = weights_fn(targets) + lp_loss = tf.nn.log_poisson_loss(targets, predictions) + return tf.reduce_sum(lp_loss * weights), tf.reduce_sum(weights) + + @registry.register_image_modality("identity_no_pad") class IdentityModalityNoPad(modality.Modality): """Does nothing except making sure that there is no padding in cross-ent.""" diff --git a/tensor2tensor/models/gene_expression.py b/tensor2tensor/models/gene_expression.py index af2d83158..ad15926ac 100644 --- a/tensor2tensor/models/gene_expression.py +++ b/tensor2tensor/models/gene_expression.py @@ -121,12 +121,14 @@ def fc_layer(x, num_out, dropout_rate, name="fc"): def gene_expression_conv_base(): """Hparams for GeneExpressionConv model.""" hparams = common_hparams.basic_params1() + hparams.max_length = 10000000 + hparams.batch_size = 1024 + hparams.dropout = 0.1 hparams.add_hparam("num_conv_layers", 4) hparams.add_hparam("num_dconv_layers", 7) hparams.add_hparam("pooling_windows", [2, 4, 4, 4]) - # TODO(rsepassi): Correct the values of these hyperparameters - hparams.hidden_size = 128 - hparams.kernel_width = 128 + hparams.hidden_size = 256 + hparams.kernel_width = 20 hparams.add_hparam("stride", 1) return hparams diff --git a/tensor2tensor/utils/metrics.py b/tensor2tensor/utils/metrics.py index ea2187427..b4d7360ca 100644 --- a/tensor2tensor/utils/metrics.py +++ b/tensor2tensor/utils/metrics.py @@ -38,11 +38,13 @@ class Metrics(object): NEG_LOG_PERPLEXITY = "neg_log_perplexity" APPROX_BLEU = "approx_bleu_score" RMSE = "rmse" + LOG_POISSON = "log_poisson" + R2 = "r_squared" ROUGE_2_F = "rouge_2_fscore" ROUGE_L_F = "rouge_L_fscore" -def padded_rmse(predictions, labels, weights_fn=common_layers.weights_nonzero): +def padded_rmse(predictions, labels, weights_fn=common_layers.weights_all): predictions, labels = common_layers.pad_with_zeros(predictions, labels) targets = labels weights = weights_fn(targets) @@ -50,6 +52,33 @@ def padded_rmse(predictions, labels, weights_fn=common_layers.weights_nonzero): return tf.reduce_sum(error * weights), tf.reduce_sum(weights) +def padded_log_poisson(predictions, + labels, + weights_fn=common_layers.weights_all): + # Expects predictions to already be transformed into log space + predictions, labels = common_layers.pad_with_zeros(predictions, labels) + targets = labels + weights = weights_fn(targets) + + lp_loss = tf.nn.log_poisson_loss(targets, predictions, compute_full_loss=True) + return tf.reduce_sum(lp_loss * weights), tf.reduce_sum(weights) + + +def padded_variance_explained(predictions, + labels, + weights_fn=common_layers.weights_all): + # aka R^2 + predictions, labels = common_layers.pad_with_zeros(predictions, labels) + targets = labels + weights = weights_fn(targets) + + y_bar = tf.reduce_mean(weights * targets) + tot_ss = tf.reduce_sum(weights * tf.pow(targets - y_bar, 2)) + res_ss = tf.reduce_sum(weights * tf.pow(targets - predictions, 2)) + r2 = 1. - res_ss / tot_ss + return r2, tf.reduce_sum(weights) + + def padded_accuracy_topk(predictions, labels, k, @@ -165,8 +194,9 @@ def problem_metric_fn(predictions, labels, weights): (problem_name, metrics, METRICS_FNS.keys())) class_output = "image" in problem_name and "coco" not in problem_name - weights_fn = (common_layers.weights_all - if class_output else common_layers.weights_nonzero) + real_output = "gene_expression" in problem_name + weights_fn = (common_layers.weights_all if class_output or real_output else + common_layers.weights_nonzero) for metric in metrics: metric_fn = METRICS_FNS[metric] @@ -191,6 +221,8 @@ def problem_metric_fn(predictions, labels, weights): Metrics.NEG_LOG_PERPLEXITY: padded_neg_log_perplexity, Metrics.APPROX_BLEU: bleu_hook.bleu_score, Metrics.RMSE: padded_rmse, + Metrics.LOG_POISSON: padded_log_poisson, + Metrics.R2: padded_variance_explained, Metrics.ROUGE_2_F: rouge.rouge_2_fscore, Metrics.ROUGE_L_F: rouge.rouge_l_fscore, } diff --git a/tensor2tensor/utils/registry.py b/tensor2tensor/utils/registry.py index 5402e5bde..fea647b2b 100644 --- a/tensor2tensor/utils/registry.py +++ b/tensor2tensor/utils/registry.py @@ -64,6 +64,7 @@ class Modalities(object): AUDIO = "audio" CLASS_LABEL = "class_label" GENERIC = "generic" + REAL = "real" _MODALITIES = { @@ -72,6 +73,7 @@ class Modalities(object): Modalities.AUDIO: {}, Modalities.CLASS_LABEL: {}, Modalities.GENERIC: {}, + Modalities.REAL: {}, } # Camel case to snake case utils @@ -277,6 +279,11 @@ def class_label_modality(name=None): Modalities.CLASS_LABEL.capitalize()) +def real_modality(name=None): + return _internal_get_modality(name, _MODALITIES[Modalities.REAL], + Modalities.REAL.capitalize()) + + def _internal_register_modality(name, mod_collection, collection_str): """Register a modality into mod_collection.""" @@ -309,6 +316,12 @@ def register_generic_modality(name=None): Modalities.GENERIC.capitalize()) +def register_real_modality(name=None): + """Register a real modality. name defaults to class name snake-cased.""" + return _internal_register_modality(name, _MODALITIES[Modalities.REAL], + Modalities.REAL.capitalize()) + + def register_audio_modality(name=None): """Register an audio modality. name defaults to class name snake-cased.""" return _internal_register_modality(name, _MODALITIES[Modalities.AUDIO], @@ -366,6 +379,7 @@ def create_modality(modality_spec, model_hparams): Modalities.IMAGE: image_modality, Modalities.CLASS_LABEL: class_label_modality, Modalities.GENERIC: generic_modality, + Modalities.REAL: real_modality, } modality_full_name, vocab_size = modality_spec diff --git a/tensor2tensor/utils/trainer_utils.py b/tensor2tensor/utils/trainer_utils.py index 9e869c15c..ebf58ee97 100644 --- a/tensor2tensor/utils/trainer_utils.py +++ b/tensor2tensor/utils/trainer_utils.py @@ -219,10 +219,13 @@ def add_problem_hparams(hparams, problems): for problem_name in problems.split("-"): try: problem = registry.problem(problem_name) - p_hparams = problem.internal_hparams(hparams) except ValueError: problem = None + + if problem is None: p_hparams = problem_hparams.problem_hparams(problem_name, hparams) + else: + p_hparams = problem.internal_hparams(hparams) hparams.problem_instances.append(problem) hparams.problems.append(p_hparams) From f5c9b17e617ea9179b7d84d36b1e8162cb369f25 Mon Sep 17 00:00:00 2001 From: Noam Shazeer <noam@google.com> Date: Tue, 8 Aug 2017 12:42:38 -0700 Subject: [PATCH 0240/4095] Added options for configuring different types of processing on layer input and layer output (normalization, dropout, residuals). These settings are configured by common_hparams, and should work across many models. Normalization on layer input instead of after the residual seems to help in learning deep networks. This change breaks current model checkpoints. PiperOrigin-RevId: 164630450 --- tensor2tensor/layers/common_hparams.py | 17 +- tensor2tensor/layers/common_layers.py | 146 ++++++++++++----- tensor2tensor/layers/common_layers_test.py | 55 ++----- tensor2tensor/models/attention_lm.py | 54 +++---- tensor2tensor/models/attention_lm_moe.py | 27 ++-- tensor2tensor/models/slicenet.py | 10 +- tensor2tensor/models/transformer.py | 176 ++++++++++++--------- tensor2tensor/models/transformer_moe.py | 39 +++-- tensor2tensor/models/transformer_vae.py | 3 +- 9 files changed, 307 insertions(+), 220 deletions(-) diff --git a/tensor2tensor/layers/common_hparams.py b/tensor2tensor/layers/common_hparams.py index 6ecb06fb4..498b5eb37 100644 --- a/tensor2tensor/layers/common_hparams.py +++ b/tensor2tensor/layers/common_hparams.py @@ -69,8 +69,23 @@ def basic_params1(): sampling_method="argmax", # "argmax" or "random" problem_choice="adaptive", # "uniform", "adaptive", "distributed" multiply_embedding_mode="sqrt_depth", + # Sequences of operations to perform on layer input and layer output. + # Used by common_layers.layer_preprocess, common_layers.layer_postprocess + # Each character repsesnts an operation: + # d: apply dropout + # n: apply normalization (see norm_type and norm_epsilon) + # a: add layer input (residual connection - only during postprocess) + # TODO(noam): The current settings ("", "dan") are the published version + # of the transformer. ("n", "da") seems better for harder-to-learn + # models, so it should probably be the default. + layer_preprocess_sequence="", + layer_postprocess_sequence="dan", + # dropout rate to use during layer_preprocess and layer_postprocess + layer_prepostprocess_dropout=0.1, + # What type of normalization to use norm_type="none", # "batch", layer", "noam", "none". - layer_norm_epsilon=1e-6, + # epsilon parameter to normalization function + norm_epsilon=1e-6, symbol_modality_num_shards=16, # setting the max length in a minibatch. 0 means default behavior, # max_length = hparams.batch_size * length_multiplier diff --git a/tensor2tensor/layers/common_layers.py b/tensor2tensor/layers/common_layers.py index 8054b27df..31bc0bced 100644 --- a/tensor2tensor/layers/common_layers.py +++ b/tensor2tensor/layers/common_layers.py @@ -462,64 +462,136 @@ def layer_norm(x, filters=None, epsilon=1e-6, name=None, reuse=None): return result -def noam_norm(x, name=None): +def noam_norm(x, epsilon=1.0, name=None): """One version of layer normalization.""" with tf.name_scope(name, default_name="noam_norm", values=[x]): shape = x.get_shape() ndims = len(shape) - return (tf.nn.l2_normalize(x, ndims - 1, epsilon=1.0) * + return (tf.nn.l2_normalize(x, ndims - 1, epsilon=epsilon) * tf.sqrt(tf.to_float(shape[-1]))) -def get_norm(norm_type): - """Get the normalizer function.""" +def apply_norm(x, norm_type, depth, epsilon): + """Apply Normalization.""" if norm_type == "layer": - return lambda x, name, filters=None, epsilon=1e-6: layer_norm( # pylint: disable=g-long-lambda - x, filters=filters, epsilon=epsilon, name=name) + return layer_norm(x, filters=depth, epsilon=epsilon) if norm_type == "batch": - return tf.layers.batch_normalization + return tf.layers.batch_normalization(x, epsilon=epsilon) if norm_type == "noam": - return noam_norm + return noam_norm(x, epsilon) if norm_type == "none": - return lambda x, name: x + return x raise ValueError("Parameter normalizer_fn must be one of: 'layer', 'batch'," "'noam', 'none'.") -def residual_fn(x, - y, - norm_type, - residual_dropout, - filters=None, - epsilon=1e-16, - name=None, - reuse=None): - """Returns a function for combining layer input and layer output. +def layer_prepostprocess(previous_value, + x, + sequence, + dropout_rate, + norm_type, + depth, + epsilon, + name): + """Apply a sequence of functions to the input or output of a layer. + + The sequence is specified as a string which may contain the following + characters: + a: add previous_value + n: apply normalization + d: apply dropout - The returned function on x (layer input) and y (layer output) computes: - norm_function(x + dropout(y)) + For example, if sequence=="dna", then the output is + previous_value + normalize(dropout(x)) Args: - x: tensor, input layer - y: tensor, output layer - norm_type: string, type of normalizer function - residual_dropout: integer, dropout value for residual connection - filters: integer, dimension for layer norm, optional - epsilon: integer, value of layer norm epsilon - name: string, name - reuse: bool, whether to reuse + previous_value: A Tensor, to be added as a residual connection ('a') + x: A Tensor to be transformed. + sequence: a string. + dropout_rate: a float + norm_type: a string (see apply_norm()) + depth: an integer (size of last dimension of x). + epsilon: a float (parameter for normalization) + name: a string Returns: - residual layer output with applied norm_fn. + a Tensor """ - with tf.variable_scope( - name, default_name="residual", values=[x, y], reuse=reuse): - norm_fn = get_norm(norm_type) - res = x + tf.nn.dropout(y, 1.0 - residual_dropout) - if norm_type == "layer": - return norm_fn(res, filters=filters, epsilon=epsilon, name=norm_type) - else: - return norm_fn(res, name=norm_type) + with tf.variable_scope(name): + for c in sequence: + if c == "a": + x += previous_value + elif c == "n": + x = apply_norm(x, norm_type, depth, epsilon) + else: + assert c == "d", ("Unknown sequence step %s" % c) + x = tf.nn.dropout(x, 1.0 - dropout_rate) + return x + + +def layer_preprocess(layer_input, hparams): + """Apply layer preprocessing. + + See layer_prepostprocess() for details. + + A hyperparemeters object is passed for convenience. The hyperparameters + that may be used are: + + layer_preprocess_sequence + layer_prepostprocess_dropout + norm_type + hidden_size + norm_epsilon + + Args: + layer_input: a Tensor + hparams: a hyperparameters object. + + Returns: + a Tensor + """ + assert "a" not in hparams.layer_preprocess_sequence, ( + "No residual connections allowed in hparams.layer_preprocess_sequence") + return layer_prepostprocess( + None, layer_input, + sequence=hparams.layer_preprocess_sequence, + dropout_rate=hparams.layer_prepostprocess_dropout, + norm_type=hparams.norm_type, + depth=hparams.hidden_size, + epsilon=hparams.norm_epsilon, + name="layer_prepostprocess") + + +def layer_postprocess(layer_input, layer_output, hparams): + """Apply layer postprocessing. + + See layer_prepostprocess() for details. + + A hyperparemeters object is passed for convenience. The hyperparameters + that may be used are: + + layer_postprocess_sequence + layer_prepostprocess_dropout + norm_type + hidden_size + norm_epsilon + + Args: + layer_input: a Tensor + layer_output: a Tensor + hparams: a hyperparameters object. + + Returns: + a Tensor + """ + return layer_prepostprocess( + layer_input, layer_output, + sequence=hparams.layer_postprocess_sequence, + dropout_rate=hparams.layer_prepostprocess_dropout, + norm_type=hparams.norm_type, + depth=hparams.hidden_size, + epsilon=hparams.norm_epsilon, + name="layer_postprocess") def conv_block_internal(conv_fn, diff --git a/tensor2tensor/layers/common_layers_test.py b/tensor2tensor/layers/common_layers_test.py index df3ccc68f..3cf3f3374 100644 --- a/tensor2tensor/layers/common_layers_test.py +++ b/tensor2tensor/layers/common_layers_test.py @@ -303,74 +303,43 @@ def testDeconvStride2MultiStep(self): actual = session.run(a) self.assertEqual(actual.shape, (5, 32, 1, 16)) - def testGetNormLayerFn(self): - norm_type = "layer" + def testApplyNormLayer(self): with self.test_session() as session: - a = common_layers.get_norm(norm_type) x1 = np.random.rand(5, 2, 1, 11) - x2 = a(tf.constant(x1, dtype=tf.float32), name="layer", filters=11) + x2 = common_layers.apply_norm( + tf.constant(x1, dtype=tf.float32), "layer", depth=11, epsilon=1e-6) session.run(tf.global_variables_initializer()) actual = session.run(x2) self.assertEqual(actual.shape, (5, 2, 1, 11)) - def testGetNormNoamFn(self): - norm_type = "noam" + def testApplyNormNoam(self): with self.test_session() as session: - a = common_layers.get_norm(norm_type) x1 = np.random.rand(5, 2, 1, 11) - x2 = a(tf.constant(x1, dtype=tf.float32), name="noam") + x2 = common_layers.apply_norm( + tf.constant(x1, dtype=tf.float32), "noam", depth=11, epsilon=1e-6) session.run(tf.global_variables_initializer()) actual = session.run(x2) self.assertEqual(actual.shape, (5, 2, 1, 11)) - def testGetNormBatchFn(self): - norm_type = "batch" + def testApplyNormBatch(self): with self.test_session() as session: - a = common_layers.get_norm(norm_type) x1 = np.random.rand(5, 2, 1, 11) - x2 = a(tf.constant(x1, dtype=tf.float32), name="batch") + x2 = common_layers.apply_norm( + tf.constant(x1, dtype=tf.float32), "batch", depth=11, epsilon=1e-6) session.run(tf.global_variables_initializer()) actual = session.run(x2) self.assertEqual(actual.shape, (5, 2, 1, 11)) - def testGetNormNoneFn(self): - norm_type = "none" + def testApplyNormNone(self): with self.test_session() as session: - a = common_layers.get_norm(norm_type) x1 = np.random.rand(5, 2, 1, 11) - x2 = a(tf.constant(x1, dtype=tf.float32), name="none") + x2 = common_layers.apply_norm( + tf.constant(x1, dtype=tf.float32), "none", depth=11, epsilon=1e-6) session.run(tf.global_variables_initializer()) actual = session.run(x2) self.assertEqual(actual.shape, (5, 2, 1, 11)) self.assertAllClose(actual, x1, atol=1e-03) - def testResidualFn(self): - norm_type = "batch" - with self.test_session() as session: - x1 = np.random.rand(5, 2, 1, 11) - x2 = np.random.rand(5, 2, 1, 11) - x3 = common_layers.residual_fn( - tf.constant(x1, dtype=tf.float32), - tf.constant(x2, dtype=tf.float32), norm_type, 0.1) - session.run(tf.global_variables_initializer()) - actual = session.run(x3) - self.assertEqual(actual.shape, (5, 2, 1, 11)) - - def testResidualFnWithLayerNorm(self): - norm_type = "layer" - with self.test_session() as session: - x1 = np.random.rand(5, 2, 1, 11) - x2 = np.random.rand(5, 2, 1, 11) - x3 = common_layers.residual_fn( - tf.constant(x1, dtype=tf.float32), - tf.constant(x2, dtype=tf.float32), - norm_type, - 0.1, - epsilon=0.1) - session.run(tf.global_variables_initializer()) - actual = session.run(x3) - self.assertEqual(actual.shape, (5, 2, 1, 11)) - def testGlobalPool1d(self): x1 = np.random.rand(5, 4, 11) no_mask = np.ones((5, 4)) diff --git a/tensor2tensor/models/attention_lm.py b/tensor2tensor/models/attention_lm.py index 664bc9e21..495f25634 100644 --- a/tensor2tensor/models/attention_lm.py +++ b/tensor2tensor/models/attention_lm.py @@ -51,13 +51,10 @@ def model_fn_body(self, features): (decoder_input, decoder_self_attention_bias) = attention_lm_prepare_decoder( targets, hparams) - def residual_fn(x, y): - return common_layers.layer_norm(x + tf.nn.dropout( - y, 1.0 - hparams.residual_dropout)) - - decoder_input = tf.nn.dropout(decoder_input, 1.0 - hparams.residual_dropout) - decoder_output = attention_lm_decoder(decoder_input, residual_fn, - decoder_self_attention_bias, hparams) + decoder_input = tf.nn.dropout( + decoder_input, 1.0 - hparams.layer_prepostprocess_dropout) + decoder_output = attention_lm_decoder( + decoder_input, decoder_self_attention_bias, hparams) decoder_output = tf.expand_dims(decoder_output, 2) return decoder_output @@ -84,7 +81,6 @@ def attention_lm_prepare_decoder(targets, hparams): def attention_lm_decoder(decoder_input, - residual_fn, decoder_self_attention_bias, hparams, name="decoder"): @@ -92,7 +88,6 @@ def attention_lm_decoder(decoder_input, Args: decoder_input: a Tensor - residual_fn: a function from (layer_input, layer_output) -> combined_output decoder_self_attention_bias: bias Tensor for self-attention (see common_attention.attention_bias()) hparams: hyperparameters for model @@ -105,25 +100,25 @@ def attention_lm_decoder(decoder_input, with tf.variable_scope(name): for layer in xrange(hparams.num_hidden_layers): with tf.variable_scope("layer_%d" % layer): - x = residual_fn( - x, - common_attention.multihead_attention( - x, - None, - decoder_self_attention_bias, - hparams.attention_key_channels or hparams.hidden_size, - hparams.attention_value_channels or hparams.hidden_size, - hparams.hidden_size, - hparams.num_heads, - hparams.attention_dropout, - name="decoder_self_attention")) - x = residual_fn(x, - common_layers.conv_hidden_relu( - x, - hparams.filter_size, - hparams.hidden_size, - dropout=hparams.relu_dropout)) - return x + with tf.variable_scope("self_attention"): + y = common_attention.multihead_attention( + common_layers.layer_preprocess(x, hparams), + None, + decoder_self_attention_bias, + hparams.attention_key_channels or hparams.hidden_size, + hparams.attention_value_channels or hparams.hidden_size, + hparams.hidden_size, + hparams.num_heads, + hparams.attention_dropout) + x = common_layers.layer_postprocess(x, y, hparams) + with tf.variable_scope("ffn"): + y = common_layers.conv_hidden_relu( + common_layers.layer_preprocess(x, hparams), + hparams.filter_size, + hparams.hidden_size, + dropout=hparams.relu_dropout) + x = common_layers.layer_postprocess(x, y, hparams) + return common_layers.layer_preprocess(x, hparams) @registry.register_hparams @@ -158,7 +153,6 @@ def attention_lm_base(): # when not in training mode. hparams.add_hparam("attention_dropout", 0.0) hparams.add_hparam("relu_dropout", 0.0) - hparams.add_hparam("residual_dropout", 0.1) hparams.add_hparam("pos", "timing") # timing, none return hparams @@ -178,5 +172,5 @@ def attention_lm_small(): hparams.num_hidden_layers = 4 hparams.hidden_size = 512 hparams.filter_size = 2048 - hparams.residual_dropout = 0.5 + hparams.layer_prepostprocess_dropout = 0.5 return hparams diff --git a/tensor2tensor/models/attention_lm_moe.py b/tensor2tensor/models/attention_lm_moe.py index 780478fec..1869eef66 100644 --- a/tensor2tensor/models/attention_lm_moe.py +++ b/tensor2tensor/models/attention_lm_moe.py @@ -49,21 +49,24 @@ def model_fn_body_sharded(self, sharded_features): targets = sharded_features["targets"] targets = dp(tf.squeeze, targets, 2) + def preprocess(x): + return dp(common_layers.layer_preprocess, x, hparams) + + def postprocess(x, y): + return dp(common_layers.layer_postprocess, x, y, hparams) + (decoder_input, decoder_self_attention_bias) = dp( attention_lm_moe_prepare_decoder, targets, hparams) - def residual_fn(x, y): - return common_layers.layer_norm(x + tf.nn.dropout( - y, 1.0 - hparams.residual_dropout)) - - x = dp(tf.nn.dropout, decoder_input, 1.0 - hparams.residual_dropout) + x = dp(tf.nn.dropout, decoder_input, + 1.0 - hparams.layer_prepostprocess_dropout) extra_loss = 0.0 for layer in xrange(hparams.num_hidden_layers): with tf.variable_scope("layer_%d" % layer): with tf.variable_scope("attention"): y = dp( common_attention.multihead_attention, - x, + preprocess(x), None, decoder_self_attention_bias, hparams.attention_key_channels or hparams.hidden_size, @@ -72,11 +75,11 @@ def residual_fn(x, y): hparams.num_heads, hparams.attention_dropout, name="decoder_self_attention") - x = dp(residual_fn, x, y) + x = postprocess(x, y) with tf.variable_scope("ffn"): if str(layer) in hparams.moe_layers.split(","): y, loss = common_layers.moe_layer( - dp, self._ps_devices, x, + dp, self._ps_devices, preprocess(x), hparams.mode == tf.contrib.learn.ModeKeys.TRAIN, hparams.hidden_size, hparams.moe_hidden_size, hparams.moe_n1, hparams.moe_n2, hparams.moe_loss_coef) @@ -84,11 +87,12 @@ def residual_fn(x, y): else: y = dp( common_layers.conv_hidden_relu, - x, + preprocess(x), hparams.filter_size, hparams.hidden_size, dropout=hparams.relu_dropout) - x = dp(residual_fn, x, y) + x = postprocess(x, y) + x = preprocess(x) decoder_output = dp(tf.expand_dims, x, 2) return decoder_output, extra_loss @@ -163,7 +167,6 @@ def attention_lm_moe_base(): # when not in training mode. hparams.add_hparam("attention_dropout", 0.0) hparams.add_hparam("relu_dropout", 0.0) - hparams.add_hparam("residual_dropout", 0.1) hparams.add_hparam("pos", "timing") # timing, none return hparams @@ -232,5 +235,5 @@ def attention_lm_moe_large(): hparams.filter_size = 4096 hparams.moe_hidden_size = 4096 hparams.moe_n1 = 128 - hparams.residual_dropout = 0.2 + hparams.layer_prepostprocess_dropout = 0.2 return hparams diff --git a/tensor2tensor/models/slicenet.py b/tensor2tensor/models/slicenet.py index 8900e6d11..1079659b5 100644 --- a/tensor2tensor/models/slicenet.py +++ b/tensor2tensor/models/slicenet.py @@ -111,7 +111,10 @@ def multi_conv_res(x, padding, name, layers, hparams, mask=None, source=None): hparams.separability - i for i in reversed(range(len(dilations_and_kernels2))) ] - norm_fn = common_layers.get_norm(hparams.norm_type) + def norm_fn(x, name): + with tf.variable_scope(name, default_name="norm"): + return common_layers.apply_norm( + x, hparams.norm_type, hparams.hidden_size, hparams.norm_epsilon) for layer in xrange(layers): with tf.variable_scope("layer_%d" % layer): y = common_layers.subseparable_conv_block( @@ -171,7 +174,10 @@ def similarity_cost(inputs_encoded, targets_encoded): def slicenet_middle(inputs_encoded, targets, target_space_emb, mask, hparams): """Middle part of slicenet, connecting encoder and decoder.""" - norm_fn = common_layers.get_norm(hparams.norm_type) + def norm_fn(x, name): + with tf.variable_scope(name, default_name="norm"): + return common_layers.apply_norm( + x, hparams.norm_type, hparams.hidden_size, hparams.norm_epsilon) # Flatten targets and embed target_space_id. targets_flat = tf.expand_dims(common_layers.flatten4d3d(targets), axis=2) diff --git a/tensor2tensor/models/transformer.py b/tensor2tensor/models/transformer.py index c9c87da07..caf8ab198 100644 --- a/tensor2tensor/models/transformer.py +++ b/tensor2tensor/models/transformer.py @@ -55,36 +55,22 @@ def model_fn_body(self, features): (decoder_input, decoder_self_attention_bias) = transformer_prepare_decoder( targets, hparams) - residual_fn = get_residual_fn(hparams) - - encoder_input = tf.nn.dropout(encoder_input, 1.0 - hparams.residual_dropout) - decoder_input = tf.nn.dropout(decoder_input, 1.0 - hparams.residual_dropout) - encoder_output = transformer_encoder(encoder_input, residual_fn, - encoder_self_attention_bias, hparams) + encoder_input = tf.nn.dropout( + encoder_input, 1.0 - hparams.layer_prepostprocess_dropout) + decoder_input = tf.nn.dropout( + decoder_input, 1.0 - hparams.layer_prepostprocess_dropout) + encoder_output = transformer_encoder( + encoder_input, encoder_self_attention_bias, hparams) decoder_output = transformer_decoder( - decoder_input, encoder_output, residual_fn, decoder_self_attention_bias, + decoder_input, encoder_output, + decoder_self_attention_bias, encoder_decoder_attention_bias, hparams) decoder_output = tf.expand_dims(decoder_output, 2) return decoder_output -def get_residual_fn(hparams): - """Get residual_fn.""" - - def residual_fn(x, y): - return common_layers.residual_fn( - x, - y, - hparams.norm_type, - hparams.residual_dropout, - hparams.hidden_size, - epsilon=hparams.layer_norm_epsilon) - - return residual_fn - - def transformer_prepare_encoder(inputs, target_space, hparams): """Prepare one shard of the model for the encoder. @@ -143,7 +129,6 @@ def transformer_prepare_decoder(targets, hparams): def transformer_encoder(encoder_input, - residual_fn, encoder_self_attention_bias, hparams, name="encoder"): @@ -151,7 +136,6 @@ def transformer_encoder(encoder_input, Args: encoder_input: a Tensor - residual_fn: a function from (layer_input, layer_output) -> combined_output encoder_self_attention_bias: bias Tensor for self-attention (see common_attention.attention_bias()) hparams: hyperparameters for model @@ -164,25 +148,29 @@ def transformer_encoder(encoder_input, with tf.variable_scope(name): for layer in xrange(hparams.num_hidden_layers): with tf.variable_scope("layer_%d" % layer): - x = residual_fn( - x, - common_attention.multihead_attention( - x, - None, - encoder_self_attention_bias, - hparams.attention_key_channels or hparams.hidden_size, - hparams.attention_value_channels or hparams.hidden_size, - hparams.hidden_size, - hparams.num_heads, - hparams.attention_dropout, - name="encoder_self_attention")) - x = residual_fn(x, transformer_ffn_layer(x, hparams)) - return x + with tf.variable_scope("self_attention"): + y = common_attention.multihead_attention( + common_layers.layer_preprocess(x, hparams), + None, + encoder_self_attention_bias, + hparams.attention_key_channels or hparams.hidden_size, + hparams.attention_value_channels or hparams.hidden_size, + hparams.hidden_size, + hparams.num_heads, + hparams.attention_dropout) + x = common_layers.layer_postprocess(x, y, hparams) + with tf.variable_scope("ffn"): + y = transformer_ffn_layer( + common_layers.layer_preprocess(x, hparams), hparams) + x = common_layers.layer_postprocess(x, y, hparams) + # if normalization is done in layer_preprocess, then it shuold also be done + # on the output, since the output can grow very large, being the sum of + # a whole stack of unnormalized layer outputs. + return common_layers.layer_preprocess(x, hparams) def transformer_decoder(decoder_input, encoder_output, - residual_fn, decoder_self_attention_bias, encoder_decoder_attention_bias, hparams, @@ -192,7 +180,6 @@ def transformer_decoder(decoder_input, Args: decoder_input: a Tensor encoder_output: a Tensor - residual_fn: a function from (layer_input, layer_output) -> combined_output decoder_self_attention_bias: bias Tensor for self-attention (see common_attention.attention_bias()) encoder_decoder_attention_bias: bias Tensor for encoder-decoder attention @@ -207,32 +194,36 @@ def transformer_decoder(decoder_input, with tf.variable_scope(name): for layer in xrange(hparams.num_hidden_layers): with tf.variable_scope("layer_%d" % layer): - x = residual_fn( - x, - common_attention.multihead_attention( - x, - None, - decoder_self_attention_bias, - hparams.attention_key_channels or hparams.hidden_size, - hparams.attention_value_channels or hparams.hidden_size, - hparams.hidden_size, - hparams.num_heads, - hparams.attention_dropout, - name="decoder_self_attention")) - x = residual_fn( - x, - common_attention.multihead_attention( - x, - encoder_output, - encoder_decoder_attention_bias, - hparams.attention_key_channels or hparams.hidden_size, - hparams.attention_value_channels or hparams.hidden_size, - hparams.hidden_size, - hparams.num_heads, - hparams.attention_dropout, - name="encdec_attention")) - x = residual_fn(x, transformer_ffn_layer(x, hparams)) - return x + with tf.variable_scope("self_attention"): + y = common_attention.multihead_attention( + common_layers.layer_preprocess(x, hparams), + None, + decoder_self_attention_bias, + hparams.attention_key_channels or hparams.hidden_size, + hparams.attention_value_channels or hparams.hidden_size, + hparams.hidden_size, + hparams.num_heads, + hparams.attention_dropout) + x = common_layers.layer_postprocess(x, y, hparams) + with tf.variable_scope("encdec_attention"): + y = common_attention.multihead_attention( + common_layers.layer_preprocess(x, hparams), + encoder_output, + encoder_decoder_attention_bias, + hparams.attention_key_channels or hparams.hidden_size, + hparams.attention_value_channels or hparams.hidden_size, + hparams.hidden_size, + hparams.num_heads, + hparams.attention_dropout) + x = common_layers.layer_postprocess(x, y, hparams) + with tf.variable_scope("ffn"): + y = transformer_ffn_layer( + common_layers.layer_preprocess(x, hparams), hparams) + x = common_layers.layer_postprocess(x, y, hparams) + # if normalization is done in layer_preprocess, then it shuold also be done + # on the output, since the output can grow very large, being the sum of + # a whole stack of unnormalized layer outputs. + return common_layers.layer_preprocess(x, hparams) def transformer_ffn_layer(x, hparams): @@ -307,13 +298,39 @@ def transformer_base(): # when not in training mode. hparams.add_hparam("attention_dropout", 0.0) hparams.add_hparam("relu_dropout", 0.0) - hparams.add_hparam("residual_dropout", 0.1) hparams.add_hparam("pos", "timing") # timing, none hparams.add_hparam("nbr_decoder_problems", 1) hparams.add_hparam("proximity_bias", int(False)) return hparams +@registry.register_hparams +def transformer_n_da(): + """Normalize on layer input, instead of after residual connection. + + This version seems to cure failure-to-learn bugs - for example, with very + deep networks or hard-to-learn mappings. + + Probably this should become the default. + + Returns: + a hyperparameters. + """ + hparams = transformer_base() + hparams.layer_preprocess_sequence = "n" + hparams.layer_postprocess_sequence = "da" + # This version seems to benefit from a higher learning rate. + hparams.learning_rate = 0.4 + return hparams + + +@registry.register_hparams +def transformer_n_da_l10(): + hparams = transformer_n_da() + hparams.num_hidden_layers = 10 + return hparams + + @registry.register_hparams def transformer_big(): """HParams for transfomer big model on WMT.""" @@ -322,7 +339,7 @@ def transformer_big(): hparams.filter_size = 4096 hparams.num_heads = 16 hparams.batching_mantissa_bits = 2 - hparams.residual_dropout = 0.3 + hparams.layer_prepostprocess_dropout = 0.3 return hparams @@ -330,7 +347,7 @@ def transformer_big(): def transformer_big_single_gpu(): """HParams for transformer big model for single gpu.""" hparams = transformer_big() - hparams.residual_dropout = 0.1 + hparams.layer_prepostprocess_dropout = 0.1 hparams.learning_rate_warmup_steps = 16000 hparams.optimizer_adam_beta2 = 0.998 hparams.batching_mantissa_bits = 3 @@ -352,7 +369,7 @@ def transformer_parsing_base(): """Hparams for parsing on wsj only.""" hparams = transformer_base() hparams.attention_dropout = 0.2 - hparams.residual_dropout = 0.2 + hparams.layer_prepostprocess_dropout = 0.2 hparams.max_length = 512 hparams.learning_rate_warmup_steps = 16000 hparams.hidden_size = 1024 @@ -368,7 +385,7 @@ def transformer_parsing_big(): hparams.max_length = 512 hparams.shared_source_target_embedding = int(False) hparams.learning_rate_warmup_steps = 4000 - hparams.residual_dropout = 0.1 + hparams.layer_prepostprocess_dropout = 0.1 hparams.batch_size = 2048 hparams.learning_rate = 0.05 return hparams @@ -424,6 +441,13 @@ def transformer_l8(): return hparams +@registry.register_hparams +def transformer_l10(): + hparams = transformer_base() + hparams.num_hidden_layers = 10 + return hparams + + @registry.register_hparams def transformer_h1(): hparams = transformer_base() @@ -483,14 +507,14 @@ def transformer_ff4096(): @registry.register_hparams def transformer_dr0(): hparams = transformer_base() - hparams.residual_dropout = 0.0 + hparams.layer_prepostprocess_dropout = 0.0 return hparams @registry.register_hparams def transformer_dr2(): hparams = transformer_base() - hparams.residual_dropout = 0.2 + hparams.layer_prepostprocess_dropout = 0.2 return hparams @@ -528,7 +552,7 @@ def transformer_big_dr1(): hparams.hidden_size = 1024 hparams.filter_size = 4096 hparams.num_heads = 16 - hparams.residual_dropout = 0.1 + hparams.layer_prepostprocess_dropout = 0.1 hparams.batching_mantissa_bits = 2 return hparams @@ -538,14 +562,14 @@ def transformer_big_enfr(): hparams = transformer_big_dr1() hparams.shared_embedding_and_softmax_weights = int(False) hparams.filter_size = 8192 - hparams.residual_dropout = 0.1 + hparams.layer_prepostprocess_dropout = 0.1 return hparams @registry.register_hparams def transformer_big_dr2(): hparams = transformer_big_dr1() - hparams.residual_dropout = 0.2 + hparams.layer_prepostprocess_dropout = 0.2 return hparams diff --git a/tensor2tensor/models/transformer_moe.py b/tensor2tensor/models/transformer_moe.py index 8072f2cf8..6f01667d8 100644 --- a/tensor2tensor/models/transformer_moe.py +++ b/tensor2tensor/models/transformer_moe.py @@ -49,17 +49,22 @@ def model_fn_body_sharded(self, sharded_features): inputs = dp(common_layers.flatten4d3d, inputs) targets = dp(common_layers.flatten4d3d, targets) + def preprocess(x): + return dp(common_layers.layer_preprocess, x, hparams) + + def postprocess(x, y): + return dp(common_layers.layer_postprocess, x, y, hparams) + (encoder_input, encoder_self_attention_bias, encoder_decoder_attention_bias) = dp( transformer.transformer_prepare_encoder, inputs, target_space, hparams) (decoder_input, decoder_self_attention_bias) = dp( transformer.transformer_prepare_decoder, targets, hparams) - residual_fn = transformer.get_residual_fn(hparams) encoder_input = dp(tf.nn.dropout, encoder_input, - 1.0 - hparams.residual_dropout) + 1.0 - hparams.layer_prepostprocess_dropout) decoder_input = dp(tf.nn.dropout, decoder_input, - 1.0 - hparams.residual_dropout) + 1.0 - hparams.layer_prepostprocess_dropout) extra_loss = 0 x = encoder_input for layer in xrange(hparams.num_hidden_layers): @@ -67,7 +72,7 @@ def model_fn_body_sharded(self, sharded_features): with tf.variable_scope("encoder_self_attention"): y = dp( common_attention.multihead_attention, - x, + preprocess(x), None, encoder_self_attention_bias, hparams.attention_key_channels or hparams.hidden_size, @@ -75,11 +80,11 @@ def model_fn_body_sharded(self, sharded_features): hparams.hidden_size, hparams.num_heads, hparams.attention_dropout) - x = dp(residual_fn, x, y) + x = postprocess(x, y) with tf.variable_scope("ffn"): if str(layer) in hparams.moe_layers_encoder.split(","): y, loss = common_layers.moe_layer( - dp, self._ps_devices, x, + dp, self._ps_devices, preprocess(x), hparams.mode == tf.contrib.learn.ModeKeys.TRAIN, hparams.hidden_size, hparams.moe_hidden_size, hparams.moe_n1, hparams.moe_n2, hparams.moe_loss_coef) @@ -87,19 +92,19 @@ def model_fn_body_sharded(self, sharded_features): else: y = dp( common_layers.conv_hidden_relu, - x, + preprocess(x), hparams.filter_size, hparams.hidden_size, dropout=hparams.relu_dropout) - x = dp(residual_fn, x, y) - encoder_output = x + x = postprocess(x, y) + encoder_output = preprocess(x) x = decoder_input for layer in xrange(hparams.num_hidden_layers): with tf.variable_scope("decoder_layer_%d" % layer): with tf.variable_scope("decoder_self_attention"): y = dp( common_attention.multihead_attention, - x, + preprocess(x), None, decoder_self_attention_bias, hparams.attention_key_channels or hparams.hidden_size, @@ -107,11 +112,11 @@ def model_fn_body_sharded(self, sharded_features): hparams.hidden_size, hparams.num_heads, hparams.attention_dropout) - x = dp(residual_fn, x, y) + x = postprocess(x, y) with tf.variable_scope("encoder_decoder_attention"): y = dp( common_attention.multihead_attention, - x, + preprocess(x), encoder_output, encoder_decoder_attention_bias, hparams.attention_key_channels or hparams.hidden_size, @@ -119,11 +124,11 @@ def model_fn_body_sharded(self, sharded_features): hparams.hidden_size, hparams.num_heads, hparams.attention_dropout) - x = dp(residual_fn, x, y) + x = postprocess(x, y) with tf.variable_scope("ffn"): if str(layer) in hparams.moe_layers_decoder.split(","): y, loss = common_layers.moe_layer( - dp, self._ps_devices, x, + dp, self._ps_devices, preprocess(x), hparams.mode == tf.contrib.learn.ModeKeys.TRAIN, hparams.hidden_size, hparams.moe_hidden_size, hparams.moe_n1, hparams.moe_n2, hparams.moe_loss_coef) @@ -131,11 +136,12 @@ def model_fn_body_sharded(self, sharded_features): else: y = dp( common_layers.conv_hidden_relu, - x, + preprocess(x), hparams.filter_size, hparams.hidden_size, dropout=hparams.relu_dropout) - x = dp(residual_fn, x, y) + x = postprocess(x, y) + x = preprocess(x) decoder_output = dp(tf.expand_dims, x, 2) return decoder_output, extra_loss @@ -178,7 +184,6 @@ def transformer_moe_base(): # when not in training mode. hparams.add_hparam("attention_dropout", 0.0) hparams.add_hparam("relu_dropout", 0.0) - hparams.add_hparam("residual_dropout", 0.1) hparams.add_hparam("pos", "timing") # timing, none hparams.add_hparam("nbr_decoder_problems", 1) hparams.add_hparam("proximity_bias", int(False)) diff --git a/tensor2tensor/models/transformer_vae.py b/tensor2tensor/models/transformer_vae.py index f3d400045..47fcacd51 100644 --- a/tensor2tensor/models/transformer_vae.py +++ b/tensor2tensor/models/transformer_vae.py @@ -96,10 +96,9 @@ def encode(x, x_space, hparams, name): with tf.variable_scope(name): (encoder_input, encoder_self_attention_bias, _) = transformer.transformer_prepare_encoder(x, x_space, hparams) - residual_fn = transformer.get_residual_fn(hparams) encoder_input = tf.nn.dropout(encoder_input, 1.0 - hparams.residual_dropout) return transformer.transformer_encoder( - encoder_input, residual_fn, encoder_self_attention_bias, hparams) + encoder_input, encoder_self_attention_bias, hparams) def dropmask(targets, targets_dropout_max, is_training): From 342e3090a47b0f625e2bba9d4f40ca995a8067c0 Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Tue, 8 Aug 2017 14:18:05 -0700 Subject: [PATCH 0241/4095] Use softplus instead of log for RealLogPoissonLossModality PiperOrigin-RevId: 164643866 --- .../data_generators/gene_expression.py | 20 +++++++++++----- tensor2tensor/layers/modalities.py | 5 ++-- tensor2tensor/models/gene_expression.py | 23 ++++++++++++++----- tensor2tensor/models/gene_expression_test.py | 2 +- 4 files changed, 35 insertions(+), 15 deletions(-) diff --git a/tensor2tensor/data_generators/gene_expression.py b/tensor2tensor/data_generators/gene_expression.py index ea3aa4cc4..341a20c71 100644 --- a/tensor2tensor/data_generators/gene_expression.py +++ b/tensor2tensor/data_generators/gene_expression.py @@ -109,10 +109,10 @@ def generate_data(self, data_dir, tmp_dir, task_id=-1): # Collect created shard processes to start and join processes = [] - datasets = [ - (self.training_filepaths, self.num_shards, "train", num_train_examples), - (self.dev_filepaths, 10, "valid", num_dev_examples), - (self.test_filepaths, 10, "test", num_test_examples)] + datasets = [(self.training_filepaths, self.num_shards, "train", + num_train_examples), (self.dev_filepaths, 10, "valid", + num_dev_examples), + (self.test_filepaths, 10, "test", num_test_examples)] for fname_fn, nshards, key_prefix, num_examples in datasets: outfiles = fname_fn(data_dir, nshards, shuffled=False) all_filepaths.extend(outfiles) @@ -163,9 +163,12 @@ def preprocess_examples(self, examples, mode, hparams): del mode del hparams - # Reshape targets + # Reshape targets to contain num_output_predictions per output timestep examples["targets"] = tf.reshape(examples["targets"], [-1, 1, self.num_output_predictions]) + # Slice off EOS - not needed, and messes up the GeneExpressionConv model + # which expects the input length to be a multiple of the target length. + examples["inputs"] = examples["inputs"][:-1] return examples @@ -251,7 +254,12 @@ def dataset_generator(filepath, if i % 100 == 0: print("Generating example %d for %s" % (i, dataset)) inputs, mask, outputs = inp_data[i], mask_data[i], out_data[i] - yield to_example_dict(encoder, inputs, mask, outputs) + ex_dict = to_example_dict(encoder, inputs, mask, outputs) + # Original data has one output for every 128 input bases. Ensure that the + # ratio has been maintained given the chunk size and removing EOS. + assert (len(ex_dict["inputs"]) - 1) == (( + 128 // chunk_size) * ex_dict["targets_shape"][0]) + yield ex_dict def to_example_dict(encoder, inputs, mask, outputs): diff --git a/tensor2tensor/layers/modalities.py b/tensor2tensor/layers/modalities.py index 5071a49ad..e44729041 100644 --- a/tensor2tensor/layers/modalities.py +++ b/tensor2tensor/layers/modalities.py @@ -489,12 +489,13 @@ def loss(self, top_out, targets, weights_fn=common_layers.weights_all): class RealLogPoissonLossModality(RealL2LossModality): """Modality for real (i.e. float) vectors with log Poisson regression loss. - * Top is a linear projection to vocab size followed by a log transform. + * Top is a linear projection to vocab size followed by a softplus + transform (log(exp(features) + 1)). """ def top(self, body_output, _): with tf.variable_scope("real"): - return tf.log(tf.layers.dense(body_output, self._vocab_size)) + return tf.nn.softplus(tf.layers.dense(body_output, self._vocab_size)) def loss(self, top_out, targets, weights_fn=common_layers.weights_all): predictions = top_out diff --git a/tensor2tensor/models/gene_expression.py b/tensor2tensor/models/gene_expression.py index ad15926ac..27aa631c6 100644 --- a/tensor2tensor/models/gene_expression.py +++ b/tensor2tensor/models/gene_expression.py @@ -38,6 +38,14 @@ class GeneExpressionConv(t2t_model.T2TModel): http://www.biorxiv.org/content/early/2017/07/10/161851 Uses layer_norm instead of batch_norm. + + Model expects that if targets are of length m, inputs are of length 32*m. The + original data expected that inputs would be of length 128*m, but the data has + been preprocessed to chunk every 4 bases into 1 ID (see + data_generators/gene_expression.py). + + The magnitude of the length reduction is controlled by the pooling sizes + (hparams.pooling_windows) at each conv layer (hparams.num_conv_layers). """ def model_fn_body(self, features): @@ -50,6 +58,7 @@ def model_fn_body(self, features): out = common_layers.flatten4d3d(out) # Conv layers + assert hp.num_conv_layers == len(hp.pooling_windows) for i in xrange(hp.num_conv_layers): out = conv_layer( out, @@ -58,7 +67,7 @@ def model_fn_body(self, features): hp.stride, hp.pooling_windows[i], hp.dropout, - 1, + dilation_rate=1, name="conv_%d" % (i + 1)) # Dense dilated conv layers @@ -68,10 +77,10 @@ def model_fn_body(self, features): out, hp.hidden_size, hp.kernel_width, - 1, - 0, - hp.dropout, - dilation_rate, + stride=1, + pooling_window=0, + dropout_rate=hp.dropout, + dilation_rate=dilation_rate, name="dconv_%d" % (i + 1)) out = tf.concat([out, dconv_out], axis=2) @@ -126,7 +135,9 @@ def gene_expression_conv_base(): hparams.dropout = 0.1 hparams.add_hparam("num_conv_layers", 4) hparams.add_hparam("num_dconv_layers", 7) - hparams.add_hparam("pooling_windows", [2, 4, 4, 4]) + # The product of these pooling windows should match + # input_length/target_length. + hparams.add_hparam("pooling_windows", [2, 2, 2, 4]) hparams.hidden_size = 256 hparams.kernel_width = 20 diff --git a/tensor2tensor/models/gene_expression_test.py b/tensor2tensor/models/gene_expression_test.py index 3b1dc6873..e2307f49f 100644 --- a/tensor2tensor/models/gene_expression_test.py +++ b/tensor2tensor/models/gene_expression_test.py @@ -42,7 +42,7 @@ def _testModel(self, hparams, model_cls): batch_size = 3 target_length = 6 target_out = 10 # GeneExpressionProblem.num_output_predictions - input_length = target_length * 128 + input_length = target_length * 128 // 4 # chunk_size=4 input_vocab_size = 5 inputs = np.random.random_integers( From daee057e764f008230cc9b88a61165d62a17891a Mon Sep 17 00:00:00 2001 From: Noam Shazeer <noam@google.com> Date: Tue, 8 Aug 2017 16:37:04 -0700 Subject: [PATCH 0242/4095] Fix a bug in t2t_model for the case where the model returns a float PiperOrigin-RevId: 164662524 --- tensor2tensor/utils/t2t_model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensor2tensor/utils/t2t_model.py b/tensor2tensor/utils/t2t_model.py index a63f5cc7f..a33b0e0cd 100644 --- a/tensor2tensor/utils/t2t_model.py +++ b/tensor2tensor/utils/t2t_model.py @@ -425,7 +425,7 @@ def model_fn(self, features, skip=False, last_position_only=False): else: body_outputs, losses = self.model_fn_body_sharded( transformed_features) - if isinstance(losses, tf.Tensor): # If it's a single extra loss. + if not isinstance(losses, dict): # If it's a single extra loss. losses = {"extra": losses} with tf.variable_scope(target_modality.name, reuse=target_reuse): From 4bd21c2a7b4a7e7a4d260b5b630e769017e2ee4f Mon Sep 17 00:00:00 2001 From: Noam Shazeer <noam@google.com> Date: Tue, 8 Aug 2017 23:36:26 -0700 Subject: [PATCH 0243/4095] introduce a model hparam - prepend_inputs_to_targets, which transforms PiperOrigin-RevId: 164690710 --- .../data_generators/concatenate_examples.py | 180 ------------------ tensor2tensor/data_generators/problem.py | 18 +- tensor2tensor/layers/common_hparams.py | 8 +- tensor2tensor/layers/common_layers.py | 16 ++ tensor2tensor/models/attention_lm.py | 17 +- tensor2tensor/utils/data_reader.py | 2 + tensor2tensor/utils/metrics.py | 12 +- tensor2tensor/utils/trainer_utils.py | 2 +- 8 files changed, 63 insertions(+), 192 deletions(-) delete mode 100644 tensor2tensor/data_generators/concatenate_examples.py diff --git a/tensor2tensor/data_generators/concatenate_examples.py b/tensor2tensor/data_generators/concatenate_examples.py deleted file mode 100644 index 9d7678fc4..000000000 --- a/tensor2tensor/data_generators/concatenate_examples.py +++ /dev/null @@ -1,180 +0,0 @@ -# coding=utf-8 -# Copyright 2017 The Tensor2Tensor Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -r"""Convert seq-seq examples to "concatenated" examples. - -The concatenated example has no "inputs". -Instead the source is at the beginning of the target. - -We can now use a simple language model. - -Example: -seq-seq mode: -{ - "inputs": subtokenizer.encode("I love you.") + [1] - "targets": subtokenizer.encode("Je t'aime.") + [1] -} --> -concatenated mode: -{ - "inputs": [0] - "targets": (subtokenizer.encode("source English I love you.") + [1] - + subtokenizer.encode("target French Je t'aime.") + [1]) -} - -We add a dummy feature "inputs"=[0] for compatibility with seq-to-seq models. - -If FLAGS.combine_to_length is nonzero, then we combine multiple examples into -examples of a constant length, possibly with some padding at the end. - -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import random - -# Dependency imports - -from tensor2tensor.data_generators import generator_utils -from tensor2tensor.data_generators import text_encoder -import tensorflow as tf - -tf.flags.DEFINE_string("vocab_file", "", "SubwordTextEncoder vocabulary file") - -tf.flags.DEFINE_boolean( - "random_reverse", False, - "If true, write half of the example with source/target reversed") - -tf.flags.DEFINE_boolean( - "count_everything", False, - "If true, assign positive weights to designators, source and target. " - "If false, assign positive weights only to target.") - -tf.flags.DEFINE_string("source_domain_string", "English", "") -tf.flags.DEFINE_string("target_domain_string", "French", "") - -tf.flags.DEFINE_integer( - "combine_to_length", 0, - "If positive, concatenate examples to form examples with target length " - " equal to this value. Targets are padded with subtoken id=0.") - -tf.flags.DEFINE_string("in_file", "", "input filename") - -tf.flags.DEFINE_string( - "out_prefix", "/usr/local/google/tmp/concat", - "The output filename is equal to out_prefix plus " - "the last 15 characters of in_file. (e.g. -00001-of-00100)") - -FLAGS = tf.flags.FLAGS - - -def _make_example(ids, weights, raw_num_bytes): - if FLAGS.combine_to_length > 0: - ids += [0] * (FLAGS.combine_to_length - len(ids)) - return generator_utils.to_example({ - "targets": ids, - "target_weights": weights, - "inputs": [0], - "raw_num_bytes": [raw_num_bytes] - }).SerializeToString() - - -def main(_): - """Convert a file to examples.""" - subtokenizer = text_encoder.SubwordTextEncoder(FLAGS.vocab_file) - total_bytes = 0 - total_subtokens = 0 - total_examples = 0 - dropped_examples = 0 - - combined_subtokens = [] - combined_num_bytes = 0 - combined_weights = [] - - source_specifier = subtokenizer.encode("source " + FLAGS.source_domain_string) - target_specifier = subtokenizer.encode("target " + FLAGS.target_domain_string) - if FLAGS.random_reverse: - r_source_specifier = subtokenizer.encode("source " + - FLAGS.target_domain_string) - r_target_specifier = subtokenizer.encode("target " + - FLAGS.source_domain_string) - - reader = tf.python_io.tf_record_iterator(FLAGS.in_file) - - out_file = FLAGS.out_prefix + FLAGS.in_file[-15:] - writer = tf.python_io.TFRecordWriter(out_file) - - for record in reader: - total_examples += 1 - if total_examples % 1000 == 0: - tf.logging.info("total_examples: %d", total_examples) - x = tf.train.Example() - x.ParseFromString(record) - inputs = [i for i in x.features.feature["inputs"].int64_list.value] - targets = [i for i in x.features.feature["targets"].int64_list.value] - should_reverse = FLAGS.random_reverse and random.random() < 0.5 - source_bytes = len(subtokenizer.decode(inputs[:-1])) + 1 - target_bytes = len(subtokenizer.decode(targets[:-1])) + 1 - if not should_reverse: - subtokens = source_specifier + inputs + target_specifier + targets - weights = ([0.0] * - (len(source_specifier) + len(inputs) + len(target_specifier)) + - [1.0] * len(targets)) - num_bytes = target_bytes - else: - subtokens = r_source_specifier + targets + r_target_specifier + inputs - weights = ( - [0.0] * - (len(r_source_specifier) + len(targets) + len(r_target_specifier)) + - [1.0] * len(inputs)) - num_bytes = source_bytes - if FLAGS.count_everything: - weights = [1.0] * len(subtokens) - num_bytes = source_bytes + target_bytes - total_bytes += num_bytes - total_subtokens += sum(weights) - if FLAGS.combine_to_length: - if combined_subtokens and (len(combined_subtokens) + len(subtokens) > - FLAGS.combine_to_length): - writer.write( - _make_example(combined_subtokens, combined_weights, - combined_num_bytes)) - combined_subtokens = [] - combined_weights = [] - combined_num_bytes = 0 - if len(subtokens) <= FLAGS.combine_to_length: - combined_subtokens.extend(subtokens) - combined_weights.extend(weights) - combined_num_bytes += num_bytes - else: - dropped_examples += 1 - else: - writer.write(_make_example(subtokens, weights, num_bytes)) - if combined_subtokens: - writer.write( - _make_example(combined_subtokens, combined_weights, combined_num_bytes)) - writer.close() - - tf.logging.info("total bytes: %d", total_bytes) - tf.logging.info("total subtokens: %d", total_subtokens) - tf.logging.info("bytes per subtoken: %f", total_bytes / total_subtokens) - tf.logging.info("total documents: %d", total_examples) - tf.logging.info("dropped documents: %d", dropped_examples) - - -if __name__ == "__main__": - tf.app.run() diff --git a/tensor2tensor/data_generators/problem.py b/tensor2tensor/data_generators/problem.py index 66cd7df4f..07fafb492 100644 --- a/tensor2tensor/data_generators/problem.py +++ b/tensor2tensor/data_generators/problem.py @@ -92,6 +92,18 @@ class SpaceID(object): CPP_TOK = 28 +def preprocess_examples_common(examples, hparams): + """Preprocessing steps common to all models.""" + if hparams.max_input_seq_length > 0: + examples["inputs"] = examples["inputs"][:hparams.max_input_seq_length] + if hparams.max_target_seq_length > 0: + examples["targets"] = examples["targets"][:hparams.max_target_seq_length] + if hparams.prepend_inputs_to_targets: + examples["targets"] = tf.concat( + [examples["inputs"], [0], examples["targets"]], 0) + return examples + + class Problem(object): """Problem base class. Specifies a T2T problem. @@ -172,11 +184,7 @@ def example_reading_spec(self): def preprocess_examples(self, examples, mode, hparams): del mode - if hparams.max_input_seq_length > 0: - examples["inputs"] = examples["inputs"][:hparams.max_input_seq_length] - if hparams.max_target_seq_length > 0: - examples["targets"] = examples["targets"][:hparams.max_target_seq_length] - return examples + return preprocess_examples_common(examples, hparams) def eval_metrics(self): return [ diff --git a/tensor2tensor/layers/common_hparams.py b/tensor2tensor/layers/common_hparams.py index 498b5eb37..10b5e7e59 100644 --- a/tensor2tensor/layers/common_hparams.py +++ b/tensor2tensor/layers/common_hparams.py @@ -118,7 +118,13 @@ def basic_params1(): # mean there is no maximum or truncation. # You can change this behavior by overridding preprocess_examples() method # in your problem class. - max_target_seq_length=0) + max_target_seq_length=0, + # Treat a seq-to-seq problem as a language model by prepending the + # inputs to the targets. During training, the loss is on both the + # inputs and the targets. During eval, metrics are computed only on the + # target portion. + prepend_inputs_to_targets=int(False), + ) class RangedHParams(object): diff --git a/tensor2tensor/layers/common_layers.py b/tensor2tensor/layers/common_layers.py index 31bc0bced..a85430c1c 100644 --- a/tensor2tensor/layers/common_layers.py +++ b/tensor2tensor/layers/common_layers.py @@ -1426,6 +1426,22 @@ def weights_nonzero(labels): return tf.to_float(tf.not_equal(labels, 0)) +def weights_second_part(labels): + """Weights function for 'prepend_inputs_to_targets'. + + Weight 1.0 is assigned to all nonzero labels past the first zero. + + Args: + labels: A Tensor of int32s. + + Returns: + A Tensor of floats. + """ + past_first_zero = tf.cumsum(tf.to_float(tf.equal(labels, 0))) + nonzero = tf.to_float(labels) + return tf.to_float(tf.not_equal(past_first_zero * nonzero, 0)) + + def weights_all(labels): """Assign weight 1.0 to all labels.""" return tf.ones_like(labels, dtype=tf.float32) diff --git a/tensor2tensor/models/attention_lm.py b/tensor2tensor/models/attention_lm.py index 495f25634..19f1915e8 100644 --- a/tensor2tensor/models/attention_lm.py +++ b/tensor2tensor/models/attention_lm.py @@ -118,7 +118,7 @@ def attention_lm_decoder(decoder_input, hparams.hidden_size, dropout=hparams.relu_dropout) x = common_layers.layer_postprocess(x, y, hparams) - return common_layers.layer_preprocess(x, hparams) + return common_layers.layer_preprocess(x, hparams) @registry.register_hparams @@ -140,7 +140,6 @@ def attention_lm_base(): hparams.weight_decay = 0.0 hparams.optimizer_adam_beta1 = 0.9 hparams.optimizer_adam_beta2 = 0.98 - hparams.num_sampled_classes = 0 hparams.label_smoothing = 0.0 hparams.shared_embedding_and_softmax_weights = int(False) @@ -174,3 +173,17 @@ def attention_lm_small(): hparams.filter_size = 2048 hparams.layer_prepostprocess_dropout = 0.5 return hparams + + +@registry.register_hparams +def attention_lm_translation(): + """Version to use for seq2seq.""" + hparams = attention_lm_base() + hparams.layer_preprocess_sequence = "n" + hparams.layer_postprocess_sequence = "da" + hparams.learning_rate = 0.1 + hparams.prepend_inputs_to_targets = int(True) + hparams.max_length = 512 + hparams.label_smoothing = 0.1 + hparams.shared_embedding_and_softmax_weights = int(True) + return hparams diff --git a/tensor2tensor/utils/data_reader.py b/tensor2tensor/utils/data_reader.py index 05aa9bf26..5c7041014 100644 --- a/tensor2tensor/utils/data_reader.py +++ b/tensor2tensor/utils/data_reader.py @@ -27,6 +27,7 @@ from six.moves import zip # pylint: disable=redefined-builtin from tensor2tensor.data_generators import problem_hparams +from tensor2tensor.data_generators.problem import preprocess_examples_common from tensor2tensor.utils import registry import tensorflow as tf @@ -233,6 +234,7 @@ def input_pipeline(problem, data_file_pattern, capacity, mode, hparams): data_items_to_decoders=data_items_to_decoders) if problem is None: + examples = preprocess_examples_common(examples, hparams) examples = preprocessing(examples, data_file_pattern) else: examples = problem.preprocess_examples(examples, mode, hparams) diff --git a/tensor2tensor/utils/metrics.py b/tensor2tensor/utils/metrics.py index b4d7360ca..fd82adc30 100644 --- a/tensor2tensor/utils/metrics.py +++ b/tensor2tensor/utils/metrics.py @@ -144,11 +144,12 @@ def padded_accuracy(predictions, return tf.to_float(tf.equal(outputs, padded_labels)), weights -def create_evaluation_metrics(problems): +def create_evaluation_metrics(problems, model_hparams): """Creates the evaluation metrics for the model. Args: problems: List of tuples (problem name, problem instance). + model_hparams: a set of hparams. Returns: A dictionary with keys that are strings naming the evaluation @@ -195,8 +196,13 @@ def problem_metric_fn(predictions, labels, weights): class_output = "image" in problem_name and "coco" not in problem_name real_output = "gene_expression" in problem_name - weights_fn = (common_layers.weights_all if class_output or real_output else - common_layers.weights_nonzero) + if model_hparams.prepend_inputs_to_targets: + assert not class_output + weights_fn = common_layers.weights_second_part + elif class_output or real_output: + weights_fn = common_layers.weights_all + else: + weights_fn = common_layers.weights_nonzero for metric in metrics: metric_fn = METRICS_FNS[metric] diff --git a/tensor2tensor/utils/trainer_utils.py b/tensor2tensor/utils/trainer_utils.py index ebf58ee97..703bc5b2f 100644 --- a/tensor2tensor/utils/trainer_utils.py +++ b/tensor2tensor/utils/trainer_utils.py @@ -144,7 +144,7 @@ def create_experiment(output_dir, data_dir, model_name, train_steps, data_dir=data_dir, model_name=model_name) eval_metrics = metrics.create_evaluation_metrics( - zip(FLAGS.problems.split("-"), hparams.problem_instances)) + zip(FLAGS.problems.split("-"), hparams.problem_instances), hparams) if (hasattr(FLAGS, "autotune") and FLAGS.autotune and FLAGS.objective not in eval_metrics): raise ValueError("Tuning objective %s not among evaluation metrics %s" % From 331c6e783f4fda28e0092c8f8f9afc8d906a387c Mon Sep 17 00:00:00 2001 From: Lukasz Kaiser <lukaszkaiser@google.com> Date: Wed, 9 Aug 2017 00:20:51 -0700 Subject: [PATCH 0244/4095] Allow multiple losses to play with GANs. PiperOrigin-RevId: 164693578 --- tensor2tensor/bin/t2t-trainer | 2 +- tensor2tensor/models/cycle_gan.py | 204 ++++++++++++++++++++++++ tensor2tensor/models/models.py | 1 + tensor2tensor/models/transformer_vae.py | 45 +++--- tensor2tensor/utils/input_fn_builder.py | 4 - tensor2tensor/utils/model_builder.py | 26 ++- tensor2tensor/utils/t2t_model.py | 10 +- 7 files changed, 258 insertions(+), 34 deletions(-) create mode 100644 tensor2tensor/models/cycle_gan.py diff --git a/tensor2tensor/bin/t2t-trainer b/tensor2tensor/bin/t2t-trainer index 6e0be3f23..7c7b48932 100644 --- a/tensor2tensor/bin/t2t-trainer +++ b/tensor2tensor/bin/t2t-trainer @@ -62,7 +62,7 @@ def main(_): output_dir = os.path.expanduser(FLAGS.output_dir) tmp_dir = os.path.expanduser(FLAGS.tmp_dir) data_dir = os.path.expanduser(FLAGS.data_dir) - tf.gfile.MakeDir(output_dir) + tf.gfile.MakeDirs(output_dir) # Generate data if requested. if FLAGS.generate_data: diff --git a/tensor2tensor/models/cycle_gan.py b/tensor2tensor/models/cycle_gan.py new file mode 100644 index 000000000..5fcf96266 --- /dev/null +++ b/tensor2tensor/models/cycle_gan.py @@ -0,0 +1,204 @@ +# coding=utf-8 +# Copyright 2017 The Tensor2Tensor Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Cycle GAN.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +# Dependency imports + +from tensor2tensor.layers import common_layers +from tensor2tensor.models import transformer_vae +from tensor2tensor.utils import registry +from tensor2tensor.utils import t2t_model + +import tensorflow as tf + + +def reconstruct_loss(x, gt, hparams, reuse=None): + pred = tf.layers.dense(x, hparams.vocab_size, name="softmax", reuse=reuse) + xent, w = common_layers.padded_cross_entropy(pred, gt, 0.0) + return xent / w + + +def discriminator(x, compress, hparams, name, reuse=None): + with tf.variable_scope(name, reuse=reuse): + x = tf.stop_gradient(2 * x) - x # Reverse gradient. + if compress: + x = transformer_vae.compress(x, hparams, "compress") + else: + x = transformer_vae.residual_conv(x, 1, hparams, "compress_rc") + y = tf.reduce_mean(x, axis=1) + return tf.tanh(tf.layers.dense(y, 1, name="reduce")) + + +def discriminate_loss(x, y, compress, hparams, name): + with tf.variable_scope(name): + d1 = discriminator(x, compress, hparams, "discriminator") + d2 = discriminator(y, compress, hparams, "discriminator", reuse=True) + dloss = tf.reduce_mean(tf.abs(d1 - d2)) + return - dloss + + +def split_on_batch(x): + batch_size = tf.shape(x)[0] + i = batch_size // 2 + return x[:i, :, :, :], x[i:2*i, :, :, :] + + +def cycle_gan_internal(inputs, targets, _, hparams): + """Cycle GAN, main step used for training.""" + with tf.variable_scope("cycle_gan"): + # Embed inputs and targets. + inputs_orig, targets_orig = tf.to_int32(inputs), tf.to_int32(targets) + inputs = common_layers.embedding( + inputs_orig, hparams.vocab_size, hparams.hidden_size, "embed") + targets = common_layers.embedding( + targets_orig, hparams.vocab_size, hparams.hidden_size, + "embed", reuse=True) + + # Split the batch into input-input and target-target parts. + inputs1, _ = split_on_batch(inputs) + _, targets2 = split_on_batch(targets) + + # Define F and G, called inp2tgt and tgt2inp here. + def inp2tgt(x, reuse=False): + return transformer_vae.residual_conv(x, 1, hparams, "inp2tgt", reuse) + def tgt2inp(x, reuse=False): + return transformer_vae.residual_conv(x, 1, hparams, "tgt2inp", reuse) + + # Input-input part. + inp1_tgt = inp2tgt(inputs1) + inp1_back = tgt2inp(inp1_tgt) + + # Target-target part. + tgt2_inp = tgt2inp(targets2, reuse=True) + tgt2_back = inp2tgt(tgt2_inp, reuse=True) + + # Reconstruction losses. + inp1_orig, _ = split_on_batch(inputs_orig) + _, tgt2_orig = split_on_batch(targets_orig) + inp1_loss = reconstruct_loss( + inp1_back, tf.squeeze(inp1_orig, axis=3), hparams) + tgt2_loss = reconstruct_loss( + tgt2_back, tf.squeeze(tgt2_orig, axis=3), hparams, reuse=True) + + # Discriminator losses. + dloss1 = discriminate_loss(inputs1, tgt2_inp, True, hparams, "inp_disc") + dloss2 = discriminate_loss(targets2, inp1_tgt, True, hparams, "tgt_disc") + + # Reconstruct targets from inputs. + tgt = inp2tgt(inputs, reuse=True) + tgt = tf.layers.dense(tgt, hparams.vocab_size, name="softmax", reuse=True) + + # We use the reconstruction only for tracking progress, no gradients here! + tgt = tf.stop_gradient(tf.expand_dims(tgt, axis=2)) + + losses = {"input_input": hparams.cycle_loss_multiplier * inp1_loss, + "target_target": hparams.cycle_loss_multiplier * tgt2_loss, + "input_disc": dloss1, + "target_disc": dloss2} + return tgt, losses + + +@registry.register_model +class CycleGAN(t2t_model.T2TModel): + + def model_fn_body(self, features): + return cycle_gan_internal( + features["inputs"], features["targets"], features["target_space_id"], + self._hparams) + + +def cycle_vae_gan_internal(inputs, targets, _, hparams): + """Cycle GAN, main step used for training.""" + with tf.variable_scope("cycle_vae_gan"): + # Embed inputs and targets. + inputs_orig, targets_orig = tf.to_int32(inputs), tf.to_int32(targets) + k = 2**hparams.num_compress_steps + inputs_orig, targets_orig = common_layers.pad_to_same_length( + inputs_orig, targets_orig, final_length_divisible_by=k) + inputs = common_layers.embedding( + inputs_orig, hparams.vocab_size, hparams.hidden_size, "embed") + targets = common_layers.embedding( + targets_orig, hparams.vocab_size, hparams.hidden_size, + "embed", reuse=True) + + # Split the batch into input-input and target-target parts. + inputs1, _ = split_on_batch(inputs) + _, targets2 = split_on_batch(targets) + + # Input-input part. + inp1_back, kl_loss1, inp1_mu, inp1_log_sigma = transformer_vae.vae_compress( + inputs1, hparams, "inp2hyp", "hyp2inp") + inp1_hyp = tf.concat([inp1_mu, inp1_log_sigma], axis=3) + + # Target-target part. + tgt2_back, kl_loss2, tgt2_mu, tgt2_log_sigma = transformer_vae.vae_compress( + targets2, hparams, "tgt2hyp", "hyp2tgt") + tgt2_hyp = tf.concat([tgt2_mu, tgt2_log_sigma], axis=3) + + # Reconstruction losses. + inp1_orig, _ = split_on_batch(inputs_orig) + _, tgt2_orig = split_on_batch(targets_orig) + inp1_loss = reconstruct_loss( + inp1_back, tf.squeeze(inp1_orig, axis=3), hparams) + tgt2_loss = reconstruct_loss( + tgt2_back, tf.squeeze(tgt2_orig, axis=3), hparams, reuse=True) + + # Discriminator loss. + dloss = discriminate_loss(inp1_hyp, tgt2_hyp, False, hparams, "dloss") + + # Reconstruct targets from inputs. + tgt, _, _, _ = transformer_vae.vae_compress( + inputs, hparams, "inp2hyp", "hyp2tgt", reuse=True) + tgt = tf.layers.dense(tgt, hparams.vocab_size, name="softmax", reuse=True) + # We use the reconstruction only for tracking progress, no gradients here! + tgt = tf.stop_gradient(tf.expand_dims(tgt, axis=2)) + + kl_rev_decay = common_layers.inverse_exp_decay(hparams.kl_warmup_steps) + losses = {"input_input": hparams.cycle_loss_multiplier * inp1_loss, + "target_target": hparams.cycle_loss_multiplier * tgt2_loss, + "input_kl": kl_loss1 * kl_rev_decay, + "target_kl": kl_loss2 * kl_rev_decay, + "discriminator": dloss} + return tgt, losses + + +@registry.register_model +class CycleVaeGAN(t2t_model.T2TModel): + + def model_fn_body(self, features): + return cycle_vae_gan_internal( + features["inputs"], features["targets"], features["target_space_id"], + self._hparams) + + +@registry.register_hparams +def cycle_gan_small(): + """Set of hyperparameters.""" + hparams = transformer_vae.transformer_vae_small() + hparams.batch_size = 2048 + hparams.input_modalities = "inputs:symbol:identity" + hparams.target_modality = "symbol:identity" + hparams.weight_decay = 3.0 + hparams.learning_rate = 0.005 + hparams.kl_warmup_steps = 5000 + hparams.learning_rate_warmup_steps = 3000 + hparams.add_hparam("vocab_size", 32) # Vocabulary size, need to set here. + hparams.add_hparam("cycle_loss_multiplier", 2.0) + return hparams diff --git a/tensor2tensor/models/models.py b/tensor2tensor/models/models.py index 4b1355dba..cba779fc9 100644 --- a/tensor2tensor/models/models.py +++ b/tensor2tensor/models/models.py @@ -28,6 +28,7 @@ from tensor2tensor.models import attention_lm_moe from tensor2tensor.models import bluenet from tensor2tensor.models import bytenet +from tensor2tensor.models import cycle_gan from tensor2tensor.models import gene_expression from tensor2tensor.models import long_answer from tensor2tensor.models import lstm diff --git a/tensor2tensor/models/transformer_vae.py b/tensor2tensor/models/transformer_vae.py index 47fcacd51..404d17783 100644 --- a/tensor2tensor/models/transformer_vae.py +++ b/tensor2tensor/models/transformer_vae.py @@ -31,9 +31,9 @@ import tensorflow as tf -def residual_conv(x, repeat, hparams, name): +def residual_conv(x, repeat, hparams, name, reuse=None): """A stack of convolution blocks with residual connections.""" - with tf.variable_scope(name): + with tf.variable_scope(name, reuse=reuse): k = (3, 1) dilations_and_kernels = [((1, 1), k) for _ in xrange(3)] for i in xrange(repeat): @@ -49,7 +49,7 @@ def residual_conv(x, repeat, hparams, name): return x -def decompress(source, hparams, first_relu, name): +def decompress_step(source, hparams, first_relu, name): """Decompression function.""" with tf.variable_scope(name): shape = tf.shape(source) @@ -66,29 +66,42 @@ def vae(x, hparams, name): shape = tf.shape(x) epsilon = tf.random_normal([shape[0], shape[1], 1, hparams.z_size]) z = mu + tf.exp(log_sigma / 2) * epsilon - dense = tf.layers.dense(z, hparams.hidden_size, name="z_to_dense") kl = 0.5 * tf.reduce_mean( tf.exp(log_sigma) + tf.square(mu) - 1. - log_sigma, axis=-1) - return dense, tf.reduce_mean(kl) + return z, tf.reduce_mean(kl), mu, log_sigma -def compress_vae(inputs, hparams, name): - """Compress, then VAE.""" +def compress(inputs, hparams, name): + """Compress.""" with tf.variable_scope(name): # Run compression by strided convs. - cur = tf.expand_dims(inputs, axis=2) + cur = inputs for i in xrange(hparams.num_compress_steps): cur = residual_conv(cur, 1, hparams, "compress_rc_%d" % i) cur = common_layers.conv_block( cur, hparams.hidden_size, [((1, 1), (2, 1))], strides=(2, 1), name="compress_%d" % i) + return cur + +def vae_compress(inputs, hparams, compress_name, decompress_name, reuse=None): + """Compress, then VAE.""" + with tf.variable_scope(compress_name, reuse=reuse): + cur = compress(inputs, hparams, "compress") # Convolve and ReLu to get state. cur = common_layers.conv_block( cur, hparams.hidden_size, [((1, 1), (1, 1))], name="mid_conv") + z, kl_loss, mu, log_sigma = vae(cur, hparams, name="vae") + + with tf.variable_scope(decompress_name, reuse=reuse): + # Decompress. + z = tf.layers.dense(z, hparams.hidden_size, name="z_to_dense") - cur, kl_loss = vae(cur, hparams, name="vae") - return cur, kl_loss + for i in xrange(hparams.num_compress_steps): + j = hparams.num_compress_steps - i - 1 + z = residual_conv(z, 1, hparams, "decompress_rc_%d" % j) + z = decompress_step(z, hparams, i > 0, "decompress__step_%d" % j) + return z, kl_loss, mu, log_sigma def encode(x, x_space, hparams, name): @@ -127,7 +140,7 @@ def vae_transformer_internal(inputs, targets, target_space, hparams): inputs = encode(inputs, target_space, hparams, "input_enc") # Dropout targets or swap for zeros 5% of the time. - max_prestep = 90000 + max_prestep = hparams.kl_warmup_steps prob_targets = 0.95 if is_training else 1.0 targets_dropout_max = common_layers.inverse_lin_decay(max_prestep) - 0.01 targets = dropmask(targets, targets_dropout_max, is_training) @@ -143,13 +156,8 @@ def vae_transformer_internal(inputs, targets, target_space, hparams): # target_space, hparams, "enc") # Compress and vae. - z, kl_loss = compress_vae(targets, hparams, "vae") - - # Decompress. - for i in xrange(hparams.num_compress_steps): - j = hparams.num_hidden_layers - i - 1 - z = residual_conv(z, 1, hparams, "dec_rc_%d" % j) - z = decompress(z, hparams, i > 0, "decompress_%d" % j) + z, kl_loss, _, _ = vae_compress(tf.expand_dims(targets, axis=2), hparams, + "vae_compress", "vae_decompress") # Join z with inputs, run decoder. to_decode = common_layers.conv_block( @@ -215,6 +223,7 @@ def transformer_vae_small(): hparams.batch_size = 2048 hparams.add_hparam("z_size", 128) hparams.add_hparam("num_compress_steps", 4) + hparams.add_hparam("kl_warmup_steps", 50000) return hparams diff --git a/tensor2tensor/utils/input_fn_builder.py b/tensor2tensor/utils/input_fn_builder.py index 79a765ca2..d1b68aa02 100644 --- a/tensor2tensor/utils/input_fn_builder.py +++ b/tensor2tensor/utils/input_fn_builder.py @@ -137,10 +137,6 @@ def input_fn(): tf.get_variable( "problem_%d/total_loss" % n, initializer=100.0, trainable=False)) - tf.get_variable( - "problem_%d/training_loss" % n, initializer=100.0, trainable=False) - tf.get_variable( - "problem_%d/extra_loss" % n, initializer=100.0, trainable=False) if fixed_problem is None: if (hparams.problem_choice == "uniform" or mode != tf.contrib.learn.ModeKeys.TRAIN): diff --git a/tensor2tensor/utils/model_builder.py b/tensor2tensor/utils/model_builder.py index 01768e263..34d062d45 100644 --- a/tensor2tensor/utils/model_builder.py +++ b/tensor2tensor/utils/model_builder.py @@ -166,6 +166,7 @@ def model_fn(features, targets, mode): train = mode == tf.contrib.learn.ModeKeys.TRAIN # Get multi-problem logits and loss based on features["problem_choice"]. + loss_variable_names = [] def nth_model(n): """Build the model for the n-th problem, plus some added variables.""" model_class = registry.model(model)( @@ -193,15 +194,19 @@ def nth_model(n): skip_this_one = skip_this_one and (FLAGS.worker_id != 0 or n > 1) sharded_logits, losses_dict = model_class.model_fn( features, skip=(skipping_is_on and skip_this_one)) - with tf.variable_scope("losses_avg", reuse=True): + with tf.variable_scope("losses_avg"): total_loss, ops = 0.0, [] for loss_key, loss_value in six.iteritems(losses_dict): + loss_name = "problem_%d/%s_loss" % (n, loss_key) loss_moving_avg = tf.get_variable( - "problem_%d/%s_loss" % (n, loss_key)) + loss_name, initializer=100.0, trainable=False) + loss_variable_names.append(loss_name) ops.append( loss_moving_avg.assign(loss_moving_avg * 0.9 + loss_value * 0.1)) total_loss += loss_value - loss_moving_avg = tf.get_variable("problem_%d/total_loss" % n) + with tf.variable_scope(tf.get_variable_scope(), reuse=True): + # Total loss was already constructed on input. + loss_moving_avg = tf.get_variable("problem_%d/total_loss" % n) ops.append( loss_moving_avg.assign(loss_moving_avg * 0.9 + total_loss * 0.1)) with tf.variable_scope("train_stats"): # Count steps for this problem. @@ -256,13 +261,18 @@ def nth_model(n): tf.summary.scalar("learning_rate", learning_rate) global_step = tf.to_float(tf.contrib.framework.get_global_step()) for n in xrange(len(my_hp.problems)): + names_and_vars = [] with tf.variable_scope("losses_avg", reuse=True): total_loss_var = tf.get_variable("problem_%d/total_loss" % n) - training_loss_var = tf.get_variable("problem_%d/training_loss" % n) - extra_loss_var = tf.get_variable("problem_%d/extra_loss" % n) - tf.summary.scalar("loss_avg_%d/total_loss" % n, total_loss_var) - tf.summary.scalar("loss_avg_%d/training_loss" % n, training_loss_var) - tf.summary.scalar("loss_avg_%d/extra_loss" % n, extra_loss_var) + names_and_vars.append(("total_loss", total_loss_var)) + with tf.variable_scope("losses_avg", reuse=True): + for loss_name in loss_variable_names: + if loss_name.startswith("problem_%d/" % n): + loss_var = tf.get_variable(loss_name) + loss_suffix = loss_name[loss_name.index("/") + 1:] + names_and_vars.append((loss_suffix, loss_var)) + for (loss_name, loss_var) in names_and_vars: + tf.summary.scalar("loss_avg_%d/%s" % (n, loss_name), loss_var) with tf.variable_scope("train_stats", reuse=True): nth_steps = tf.get_variable("problem_%d_steps" % n, dtype=tf.int32) tf.summary.scalar("problem_%d_frequency" % n, diff --git a/tensor2tensor/utils/t2t_model.py b/tensor2tensor/utils/t2t_model.py index a33b0e0cd..76e2164b1 100644 --- a/tensor2tensor/utils/t2t_model.py +++ b/tensor2tensor/utils/t2t_model.py @@ -479,10 +479,14 @@ def model_fn_body_sharded(self, sharded_features): _with_timing(self.model_fn_body, "model_fn_body"), datashard_to_features) if isinstance(output, tuple): - if isinstance(output[1], dict): - loss = output[1] + losses_sharded = output[1] + if isinstance(losses_sharded[0], dict): + loss = {} + for k in losses_sharded[0].keys(): + k_loss_sharded = [losses[k] for losses in losses_sharded] + loss[k] = tf.reduce_mean(k_loss_sharded) else: - loss = {"extra": tf.reduce_mean(output[1])} + loss = {"extra": tf.reduce_mean(losses_sharded)} output = output[0] else: loss = {"extra": 0.0} From d12cb9d641ce4d5d56a092e779e0c442924c741b Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Wed, 9 Aug 2017 11:53:17 -0700 Subject: [PATCH 0245/4095] v1.1.7 PiperOrigin-RevId: 164753007 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index c62b3409c..5beeb1b3e 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ setup( name='tensor2tensor', - version='1.1.6', + version='1.1.7', description='Tensor2Tensor', author='Google Inc.', author_email='no-reply@google.com', From 0eeb116aa0d6a42d421c4b20dc5e4b0f12f28c7c Mon Sep 17 00:00:00 2001 From: Niki Parmar <nikip@google.com> Date: Wed, 9 Aug 2017 12:11:24 -0700 Subject: [PATCH 0246/4095] Evaluate auto-regressively in t2t. Currently, we use the actual output during eval. To use the predicted output in the previous step, extend the infer code to run eval auto-regressively. PiperOrigin-RevId: 164755091 --- tensor2tensor/utils/model_builder.py | 8 ++- tensor2tensor/utils/t2t_model.py | 77 ++++++++++++++++++++++------ tensor2tensor/utils/trainer_utils.py | 3 ++ 3 files changed, 71 insertions(+), 17 deletions(-) diff --git a/tensor2tensor/utils/model_builder.py b/tensor2tensor/utils/model_builder.py index 34d062d45..da33e1e40 100644 --- a/tensor2tensor/utils/model_builder.py +++ b/tensor2tensor/utils/model_builder.py @@ -192,8 +192,12 @@ def nth_model(n): # On worker 0 also build graph for problems <= 1. # TODO(lukaszkaiser): why is this hack needed for variables init? Repair. skip_this_one = skip_this_one and (FLAGS.worker_id != 0 or n > 1) - sharded_logits, losses_dict = model_class.model_fn( - features, skip=(skipping_is_on and skip_this_one)) + if (FLAGS.eval_run_autoregressive and + mode == tf.contrib.learn.ModeKeys.EVAL): + sharded_logits, losses_dict = model_class.eval_autoregressive(features) + else: + sharded_logits, losses_dict = model_class.model_fn( + features, skip=(skipping_is_on and skip_this_one)) with tf.variable_scope("losses_avg"): total_loss, ops = 0.0, [] for loss_key, loss_value in six.iteritems(losses_dict): diff --git a/tensor2tensor/utils/t2t_model.py b/tensor2tensor/utils/t2t_model.py index 76e2164b1..8fcf2482d 100644 --- a/tensor2tensor/utils/t2t_model.py +++ b/tensor2tensor/utils/t2t_model.py @@ -144,6 +144,30 @@ def _create_modalities(self, problem_hparams, hparams): def has_input(self): return self._problem_hparams.input_modality + def eval_autoregressive(self, + features=None, + decode_length=50, + last_position_only=False): + """Autoregressive eval. + + Quadratic time in decode_length. + + Args: + features: an map of string to `Tensor` + decode_length: an integer. How many additional timesteps to decode. + last_position_only: a boolean, speed-up by computing last position only. + + Returns: + sharded_logits: a list of `Tensor`s. Assumes one datashard. + losses: a dictionary: {loss-name (string): floating point `Scalar`}. + Contains a single key "training". + """ + _, logits, losses = self._greedy_infer( + features, + decode_length=decode_length, + last_position_only=last_position_only) + return [logits], losses + def infer(self, features=None, decode_length=50, @@ -179,11 +203,13 @@ def infer(self, beam_size = 1 # No use to run beam-search for a single class. if beam_size == 1: tf.logging.info("Greedy Decoding") - return self._greedy_infer(features, decode_length, last_position_only) + samples, _, _ = self._greedy_infer(features, decode_length, + last_position_only) else: tf.logging.info("Beam Decoding with beam size %d" % beam_size) - return self._beam_decode(features, decode_length, beam_size, top_beams, - last_position_only, alpha) + samples = self._beam_decode(features, decode_length, beam_size, top_beams, + last_position_only, alpha) + return samples def _beam_decode(self, features, decode_length, beam_size, top_beams, last_position_only, alpha): @@ -268,6 +294,8 @@ def _greedy_infer(self, features, decode_length, last_position_only): Returns: samples: an integer `Tensor`. + logits: `Tensor` of shape [batch_size, time, 1, 1, vocab_size]. + losses: a dictionary: {loss-name (string): floating point `Scalar`} """ if not features: features = {} @@ -278,14 +306,15 @@ def _greedy_infer(self, features, decode_length, last_position_only): if not self.has_input: features["partial_targets"] = tf.to_int64(features["inputs"]) - def infer_step(recent_output, _): + def infer_step(recent_output, recent_logits, unused_loss): """Inference step.""" recent_output.set_shape([None, None, None, 1]) padded = tf.pad(recent_output, [[0, 0], [0, 1], [0, 0], [0, 0]]) features["targets"] = padded # This is inefficient in that it generates samples at all timesteps, # not just the last one, except if last_position_only is set (dangerous). - samples = self.sample(features, last_position_only=last_position_only) + samples, logits, losses = self.sample( + features, last_position_only=last_position_only) # Concatenate the already-generated recent_output with last timestep # of the newly-generated samples. if last_position_only: @@ -295,7 +324,11 @@ def infer_step(recent_output, _): cur_sample = tf.to_int64(tf.expand_dims(cur_sample, axis=1)) samples = tf.concat([recent_output, cur_sample], axis=1) samples.set_shape([None, None, None, 1]) - return samples + + # Assuming we have one shard for logits. + logits = tf.concat([recent_logits, logits[0][:, -1:]], 1) + loss = sum(losses.values()) + return samples, logits, loss # Create an initial output tensor. This will be passed # to the infer_step, which adds one timestep at every iteration. @@ -308,20 +341,32 @@ def infer_step(recent_output, _): # input shape, so we confuse it about the input shape. initial_output = tf.slice(initial_output, [0, 0, 0, 0], tf.shape(initial_output)) - if _is_class_modality( - self._hparams.problems[self._problem_idx].target_modality): + target_modality = self._hparams.problems[self._problem_idx].target_modality + if _is_class_modality(target_modality): decode_length = 1 else: decode_length = tf.shape(features["inputs"])[1] + decode_length - result = tf.foldl( - infer_step, - tf.range(decode_length), - initializer=initial_output, + # Initial values of result, logits and loss. + result = initial_output + # tensor of shape [batch_size, time, 1, 1, vocab_size] + logits = tf.zeros((batch_size, 0, 1, 1, target_modality.top_dimensionality)) + logits.set_shape([None, None, None, None, None]) + loss = 0.0 + + result, logits, loss = tf.while_loop( + lambda result, logits, loss: tf.shape(result)[1] < decode_length, + infer_step, [result, logits, loss], + shape_invariants=[ + tf.TensorShape([None, None, None, None]), + tf.TensorShape([None, None, None, None, None]), + tf.TensorShape([]), + ], back_prop=False, parallel_iterations=1) if inputs_old is not None: # Restore to not confuse Estimator. features["inputs"] = inputs_old - return result + losses = {"training": loss} + return result, logits, losses def sample(self, features, last_position_only=False): """Run the model and extract samples. @@ -332,8 +377,10 @@ def sample(self, features, last_position_only=False): Returns: samples: an integer `Tensor`. + logits: a list of `Tensor`s, one per datashard. + losses: a dictionary: {loss-name (string): floating point `Scalar`}. """ - sharded_logits, _ = self.model_fn( + sharded_logits, losses = self.model_fn( features, False, last_position_only=last_position_only) if self._hparams.sampling_method == "argmax": sharded_samples = self._data_parallelism(tf.argmax, sharded_logits, 4) @@ -349,7 +396,7 @@ def _multinomial_squeeze(logits): sharded_samples = self._data_parallelism(_multinomial_squeeze, sharded_logits) - return tf.concat(sharded_samples, 0) + return tf.concat(sharded_samples, 0), sharded_logits, losses def _shard_features(self, features): # pylint: disable=missing-docstring sharded_features = dict() diff --git a/tensor2tensor/utils/trainer_utils.py b/tensor2tensor/utils/trainer_utils.py index 703bc5b2f..e72938867 100644 --- a/tensor2tensor/utils/trainer_utils.py +++ b/tensor2tensor/utils/trainer_utils.py @@ -63,6 +63,9 @@ "The number of steps to run training for.") flags.DEFINE_integer("eval_steps", 10, "Number of steps in evaluation.") flags.DEFINE_bool("eval_print", False, "Print eval logits and predictions.") +flags.DEFINE_bool("eval_run_autoregressive", False, + "Run eval autoregressively where we condition on previous" + "generated output instead of the actual target.") flags.DEFINE_integer("keep_checkpoint_max", 20, "How many recent checkpoints to keep.") flags.DEFINE_bool("experimental_optimize_placement", False, From a9826deb47ea061b597c128935a2a79ec7a67193 Mon Sep 17 00:00:00 2001 From: Niki Parmar <nikip@google.com> Date: Wed, 9 Aug 2017 13:15:53 -0700 Subject: [PATCH 0247/4095] Extend decode_from_dataset to run decode iteratively for specified number of samples rather than one PiperOrigin-RevId: 164761976 --- tensor2tensor/utils/decoding.py | 41 ++++++++++++++++++---------- tensor2tensor/utils/trainer_utils.py | 3 ++ 2 files changed, 29 insertions(+), 15 deletions(-) diff --git a/tensor2tensor/utils/decoding.py b/tensor2tensor/utils/decoding.py index 12057d8e6..5e8f4d482 100644 --- a/tensor2tensor/utils/decoding.py +++ b/tensor2tensor/utils/decoding.py @@ -45,13 +45,13 @@ def decode_from_dataset(estimator): tf.logging.info("Performing local inference.") infer_problems_data = data_reader.get_data_filepatterns( FLAGS.problems, hparams.data_dir, tf.contrib.learn.ModeKeys.INFER) + infer_input_fn = input_fn_builder.build_input_fn( mode=tf.contrib.learn.ModeKeys.INFER, hparams=hparams, data_file_patterns=infer_problems_data, num_datashards=devices.data_parallelism().n, fixed_problem=i) - result_iter = estimator.predict(input_fn=infer_input_fn, as_iterable=False) def log_fn(inputs, targets, @@ -66,14 +66,21 @@ def log_fn(inputs, "%s_prediction_%d.jpg" % (problem, j)) show_and_save_image(inputs / 255., save_path) elif inputs_vocab: - decoded_inputs = inputs_vocab.decode(_save_until_eos(inputs.flatten())) + decoded_inputs = inputs_vocab.decode( + _save_until_eos(inputs.flatten())) tf.logging.info("Inference results INPUT: %s" % decoded_inputs) - decoded_outputs = targets_vocab.decode(_save_until_eos(outputs.flatten())) + if FLAGS.identity_output: + decoded_outputs = " ".join(map(str, outputs.flatten())) + decoded_targets = " ".join(map(str, targets.flatten())) + else: + decoded_outputs = targets_vocab.decode( + _save_until_eos(outputs.flatten())) + decoded_targets = targets_vocab.decode( + _save_until_eos(targets.flatten())) + tf.logging.info("Inference results OUTPUT: %s" % decoded_outputs) - decoded_targets = targets_vocab.decode(_save_until_eos(targets.flatten())) tf.logging.info("Inference results TARGET: %s" % decoded_targets) - if FLAGS.decode_to_file: output_filepath = FLAGS.decode_to_file + ".outputs." + problem output_file = tf.gfile.Open(output_filepath, "a") @@ -81,21 +88,25 @@ def log_fn(inputs, target_filepath = FLAGS.decode_to_file + ".targets." + problem target_file = tf.gfile.Open(target_filepath, "a") target_file.write(decoded_targets + "\n") - - # The function predict() returns an iterable over the network's - # predictions from the test input. We use it to log inputs and decodes. - inputs_iter = result_iter["inputs"] - targets_iter = result_iter["targets"] - outputs_iter = result_iter["outputs"] - for j, result in enumerate(zip(inputs_iter, targets_iter, outputs_iter)): - inputs, targets, outputs = result + result_iter = estimator.predict(input_fn=infer_input_fn, as_iterable=True) + count = 0 + for result in result_iter: + # predictions from the test input. We use it to log inputs and decodes. + inputs = result["inputs"] + targets = result["targets"] + outputs = result["outputs"] if FLAGS.decode_return_beams: output_beams = np.split(outputs, FLAGS.decode_beam_size, axis=0) for k, beam in enumerate(output_beams): tf.logging.info("BEAM %d:" % k) - log_fn(inputs, targets, beam, problem, j) + log_fn(inputs, targets, beam, problem, count) else: - log_fn(inputs, targets, outputs, problem, j) + log_fn(inputs, targets, outputs, problem, count) + + count += 1 + if FLAGS.decode_num_samples != -1 and count >= FLAGS.decode_num_samples: + break + tf.logging.info("Completed inference on %d samples." % count) def decode_from_file(estimator, filename): diff --git a/tensor2tensor/utils/trainer_utils.py b/tensor2tensor/utils/trainer_utils.py index e72938867..22fd727f9 100644 --- a/tensor2tensor/utils/trainer_utils.py +++ b/tensor2tensor/utils/trainer_utils.py @@ -121,6 +121,9 @@ flags.DEFINE_integer("decode_max_input_size", -1, "Maximum number of ids in input. Or <= 0 for no max.") flags.DEFINE_bool("identity_output", False, "To print the output as identity") +flags.DEFINE_integer("decode_num_samples", -1, + "Number of samples to decode. Currently used in" + "decode_from_dataset. Use -1 for all.") def make_experiment_fn(data_dir, model_name, train_steps, eval_steps): From 4a36fb88638effd2262522f9eab93d02b339be95 Mon Sep 17 00:00:00 2001 From: Lukasz Kaiser <lukaszkaiser@google.com> Date: Wed, 9 Aug 2017 14:15:46 -0700 Subject: [PATCH 0248/4095] Fix for issue #215 on github, update transformer_vae. PiperOrigin-RevId: 164771762 --- tensor2tensor/models/transformer_vae.py | 6 +++--- tensor2tensor/utils/devices.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tensor2tensor/models/transformer_vae.py b/tensor2tensor/models/transformer_vae.py index 404d17783..74f1e4c8f 100644 --- a/tensor2tensor/models/transformer_vae.py +++ b/tensor2tensor/models/transformer_vae.py @@ -109,7 +109,7 @@ def encode(x, x_space, hparams, name): with tf.variable_scope(name): (encoder_input, encoder_self_attention_bias, _) = transformer.transformer_prepare_encoder(x, x_space, hparams) - encoder_input = tf.nn.dropout(encoder_input, 1.0 - hparams.residual_dropout) + encoder_input = tf.nn.dropout(encoder_input, 1.0 - hparams.dropout) return transformer.transformer_encoder( encoder_input, encoder_self_attention_bias, hparams) @@ -143,7 +143,7 @@ def vae_transformer_internal(inputs, targets, target_space, hparams): max_prestep = hparams.kl_warmup_steps prob_targets = 0.95 if is_training else 1.0 targets_dropout_max = common_layers.inverse_lin_decay(max_prestep) - 0.01 - targets = dropmask(targets, targets_dropout_max, is_training) + targets = dropmask(targets, targets_dropout_max * 0.7, is_training) targets = tf.cond(tf.less(tf.random_uniform([]), prob_targets), lambda: targets, lambda: tf.zeros_like(targets)) @@ -168,7 +168,7 @@ def vae_transformer_internal(inputs, targets, target_space, hparams): # ret = tf.squeeze(to_decode, axis=2) # Randomize decoder inputs.. - kl_loss *= common_layers.inverse_exp_decay(max_prestep) * 3.0 + kl_loss *= common_layers.inverse_exp_decay(max_prestep) * 10.0 return tf.expand_dims(ret, axis=2), kl_loss diff --git a/tensor2tensor/utils/devices.py b/tensor2tensor/utils/devices.py index 4f76367e9..d04b73563 100644 --- a/tensor2tensor/utils/devices.py +++ b/tensor2tensor/utils/devices.py @@ -112,7 +112,7 @@ def _replica_device_setter(worker_device): if FLAGS.schedule == "local_run": assert not FLAGS.sync datashard_devices = ["gpu:%d" % d for d in _gpu_order(FLAGS.worker_gpu)] - if FLAGS.locally_shard_to_cpu: + if FLAGS.locally_shard_to_cpu or FLAGS.worker_gpu < 1: datashard_devices += ["cpu:0"] caching_devices = None elif FLAGS.sync: From ae4919238bc1837f6c613ef8951a7c78322f5dda Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Wed, 9 Aug 2017 14:45:45 -0700 Subject: [PATCH 0249/4095] fix some open-source imports PiperOrigin-RevId: 164776330 --- tensor2tensor/data_generators/desc2code_test.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/tensor2tensor/data_generators/desc2code_test.py b/tensor2tensor/data_generators/desc2code_test.py index 0d10c7d6f..24b7568d0 100644 --- a/tensor2tensor/data_generators/desc2code_test.py +++ b/tensor2tensor/data_generators/desc2code_test.py @@ -13,7 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -"""Tests for google3.third_party.py.tensor2tensor.data_generators.desc2code.""" +"""Tests for desc2code.""" from __future__ import absolute_import from __future__ import division @@ -21,8 +21,8 @@ # Dependency imports from tensor2tensor.data_generators import desc2code -from google3.testing.pybase import googletest +import tensorflow as tf CODE_CPP_IN = """ #include <iostream> @@ -39,10 +39,11 @@ """ -CODE_CPP_OUT = "#include <iostream> void main() { /* Not this one */ int a = 3; }" # pylint: disable=line-too-loong +CODE_CPP_OUT = ("#include <iostream> void main() { /* Not this one */ int a = " + "3; }") -class Desc2codeTest(googletest.TestCase): +class Desc2codeTest(tf.test.TestCase): def testCppPreprocess(self): """Check that the file correctly preprocess the code source.""" @@ -50,15 +51,13 @@ def testCppPreprocess(self): self.assertEqual( # Add space beween two lines cpp_pb.preprocess_target("firstline//comm1\nsecondline//comm2\n"), - "firstline secondline" - ) + "firstline secondline") # Checking for boths comments and spaces self.assertEqual(cpp_pb.preprocess_target(CODE_CPP_IN), CODE_CPP_OUT) self.assertEqual( cpp_pb.preprocess_target(" not removed //abcd "), - "not removed //abcd" - ) + "not removed //abcd") if __name__ == "__main__": - googletest.main() + tf.test.main() From af4f1e03b24cccfd56a2eb49ed50caf29f6cd361 Mon Sep 17 00:00:00 2001 From: T2T Team <no-reply@google.com> Date: Wed, 9 Aug 2017 14:54:45 -0700 Subject: [PATCH 0250/4095] Simplify calls to embedding_to_padding, we always end up converting the padding mask to a float tensor. PiperOrigin-RevId: 164777753 --- tensor2tensor/layers/common_attention.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/tensor2tensor/layers/common_attention.py b/tensor2tensor/layers/common_attention.py index a43afec47..2b1bd124f 100644 --- a/tensor2tensor/layers/common_attention.py +++ b/tensor2tensor/layers/common_attention.py @@ -166,17 +166,17 @@ def add_positional_embedding_nd(x, max_length, name): def embedding_to_padding(emb): - """Input embeddings -> is_padding. + """Calculates the padding mask based on which embeddings are all zero. We have hacked symbol_modality to return all-zero embeddings for padding. Args: emb: a Tensor with shape [..., depth]. Returns: - a boolean Tensor with shape [...]. + a float Tensor with shape [...]. """ emb_sum = tf.reduce_sum(tf.abs(emb), axis=-1) - return tf.equal(emb_sum, 0.0) + return tf.to_float(tf.equal(emb_sum, 0.0)) def attention_bias_lower_triangle(length): @@ -197,13 +197,13 @@ def attention_bias_ignore_padding(memory_padding): """Create an bias tensor to be added to attention logits. Args: - memory_padding: a boolean `Tensor` with shape [batch, memory_length]. + memory_padding: a float `Tensor` with shape [batch, memory_length]. Returns: a `Tensor` with shape [batch, 1, 1, memory_length]. """ - ret = tf.to_float(memory_padding) * -1e9 - return tf.expand_dims(tf.expand_dims(ret, 1), 1) + ret = memory_padding * -1e9 + return tf.expand_dims(tf.expand_dims(ret, axis=1), axis=1) def attention_bias_proximal(length): @@ -523,8 +523,7 @@ def pad_l_and_r(x, pad_length): # [batch, heads, blocks, block_length, dim] k_new = tf.transpose(k_new, [2, 3, 0, 1, 4]) - attention_bias = tf.expand_dims( - tf.to_float(embedding_to_padding(k_new)) * -1e9, axis=-2) + attention_bias = tf.expand_dims(embedding_to_padding(k_new) * -1e9, axis=-2) v_t = tf.transpose(v, [2, 0, 1, 3]) v_new = tf.gather(v_t, gather_indices) From b3de49a72743d212fef786e80ee01044eb89be98 Mon Sep 17 00:00:00 2001 From: vthorsteinsson <vt@extrada.com> Date: Thu, 10 Aug 2017 17:59:50 +0000 Subject: [PATCH 0251/4095] Larger source vocab; adapt to new upstream version --- tensor2tensor/ice_parsing/ice_parsing.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tensor2tensor/ice_parsing/ice_parsing.py b/tensor2tensor/ice_parsing/ice_parsing.py index d8dd41cf7..df9748589 100755 --- a/tensor2tensor/ice_parsing/ice_parsing.py +++ b/tensor2tensor/ice_parsing/ice_parsing.py @@ -66,7 +66,7 @@ class IceParsingTokens(problem.Problem): @property def source_vocab_size(self): - return 2**13 # 8192 + return 2**14 # 16384 @property def target_vocab_size(self): @@ -84,18 +84,18 @@ def feature_encoders(self, data_dir): "targets": target_subtokenizer, } - def generate_data(self, data_dir, tmp_dir, num_shards=100): + def generate_data(self, data_dir, tmp_dir, task_id=-1): generator_utils.generate_dataset_and_shuffle( tabbed_parsing_token_generator(data_dir, tmp_dir, True, "ice", self.source_vocab_size, self.target_vocab_size), - self.training_filepaths(data_dir, num_shards, shuffled=False), + self.training_filepaths(data_dir, 1, shuffled=False), tabbed_parsing_token_generator(data_dir, tmp_dir, False, "ice", self.source_vocab_size, self.target_vocab_size), self.dev_filepaths(data_dir, 1, shuffled=False)) - def hparams(self, defaults, unused_model_hparams): + def hparams(self, defaults, model_hparams): p = defaults source_vocab_size = self._encoders["inputs"].vocab_size p.input_modality = {"inputs": (registry.Modalities.SYMBOL, source_vocab_size)} From b4de995cec2430acf61c367fbb61a00f61fc5097 Mon Sep 17 00:00:00 2001 From: vthorsteinsson <vt@extrada.com> Date: Fri, 11 Aug 2017 00:17:55 +0000 Subject: [PATCH 0252/4095] Moved ice_parsing to data_generators; updated to 1.1.7 --- tensor2tensor/data_generators/all_problems.py | 1 + .../ice_parsing.py | 44 +++++++------------ tensor2tensor/data_generators/wmt.py | 30 ------------- tensor2tensor/ice_parsing/__init__.py | 2 - tensor2tensor/models/transformer.py | 9 ++++ 5 files changed, 27 insertions(+), 59 deletions(-) mode change 100644 => 100755 tensor2tensor/data_generators/all_problems.py rename tensor2tensor/{ice_parsing => data_generators}/ice_parsing.py (82%) delete mode 100644 tensor2tensor/ice_parsing/__init__.py diff --git a/tensor2tensor/data_generators/all_problems.py b/tensor2tensor/data_generators/all_problems.py old mode 100644 new mode 100755 index ca6dccfda..10a4764f5 --- a/tensor2tensor/data_generators/all_problems.py +++ b/tensor2tensor/data_generators/all_problems.py @@ -31,6 +31,7 @@ from tensor2tensor.data_generators import wiki from tensor2tensor.data_generators import wmt from tensor2tensor.data_generators import wsj_parsing +from tensor2tensor.data_generators import ice_parsing # Problem modules that require optional dependencies diff --git a/tensor2tensor/ice_parsing/ice_parsing.py b/tensor2tensor/data_generators/ice_parsing.py similarity index 82% rename from tensor2tensor/ice_parsing/ice_parsing.py rename to tensor2tensor/data_generators/ice_parsing.py index df9748589..f6e6bdca4 100755 --- a/tensor2tensor/ice_parsing/ice_parsing.py +++ b/tensor2tensor/data_generators/ice_parsing.py @@ -28,7 +28,6 @@ from tensor2tensor.data_generators import text_encoder from tensor2tensor.data_generators.wmt import tabbed_generator from tensor2tensor.utils import registry -from tensor2tensor.models import transformer import tensorflow as tf @@ -69,9 +68,21 @@ def source_vocab_size(self): return 2**14 # 16384 @property - def target_vocab_size(self): + def targeted_vocab_size(self): return 2**8 # 256 + @property + def input_space_id(self): + return problem.SpaceID.ICE_TOK + + @property + def target_space_id(self): + return problem.SpaceID.ICE_PARSE_TOK + + @property + def num_shards(self): + return 10 + def feature_encoders(self, data_dir): source_vocab_filename = os.path.join( data_dir, "ice_source.tokens.vocab.%d" % self.source_vocab_size) @@ -89,7 +100,7 @@ def generate_data(self, data_dir, tmp_dir, task_id=-1): tabbed_parsing_token_generator(data_dir, tmp_dir, True, "ice", self.source_vocab_size, self.target_vocab_size), - self.training_filepaths(data_dir, 1, shuffled=False), + self.training_filepaths(data_dir, self.num_shards, shuffled=False), tabbed_parsing_token_generator(data_dir, tmp_dir, False, "ice", self.source_vocab_size, self.target_vocab_size), @@ -99,29 +110,8 @@ def hparams(self, defaults, model_hparams): p = defaults source_vocab_size = self._encoders["inputs"].vocab_size p.input_modality = {"inputs": (registry.Modalities.SYMBOL, source_vocab_size)} - p.target_modality = (registry.Modalities.SYMBOL, self.target_vocab_size) - p.input_space_id = problem.SpaceID.ICE_TOK - p.target_space_id = problem.SpaceID.ICE_PARSE_TOK + p.target_modality = (registry.Modalities.SYMBOL, self.targeted_vocab_size) + p.input_space_id = self.input_space_id + p.target_space_id = self.target_space_id p.loss_multiplier = 2.5 # Rough estimate of avg number of tokens per word - -@registry.register_hparams -def transformer_parsing_ice(): - """Hparams for parsing Icelandic text.""" - hparams = transformer.transformer_base_single_gpu() - hparams.batch_size = 4096 - hparams.shared_embedding_and_softmax_weights = int(False) - return hparams - - -@registry.register_hparams -def transformer_parsing_ice_big(): - """Hparams for parsing Icelandic text, bigger model.""" - hparams = transformer_parsing_ice() - hparams.batch_size = 2048 # 4096 gives Out-of-memory on 8 GB 1080 GTX GPU - hparams.attention_dropout = 0.05 - hparams.residual_dropout = 0.05 - hparams.max_length = 512 - hparams.hidden_size = 1024 - return hparams - diff --git a/tensor2tensor/data_generators/wmt.py b/tensor2tensor/data_generators/wmt.py index f673dee82..f66e366d1 100755 --- a/tensor2tensor/data_generators/wmt.py +++ b/tensor2tensor/data_generators/wmt.py @@ -187,36 +187,6 @@ def bi_vocabs_token_generator(source_path, source, target = source_file.readline(), target_file.readline() -def tabbed_generator(source_path, source_vocab, target_vocab, eos=None): - r"""Generator for sequence-to-sequence tasks using tabbed files. - - Tokens are derived from text files where each line contains both - a source and a target string. The two strings are separated by a tab - character ('\t'). It yields dictionaries of "inputs" and "targets" where - inputs are characters from the source lines converted to integers, and - targets are characters from the target lines, also converted to integers. - - Args: - source_path: path to the file with source and target sentences. - source_vocab: a SunwordTextEncoder to encode the source string. - target_vocab: a SunwordTextEncoder to encode the target string. - eos: integer to append at the end of each sequence (default: None). - - Yields: - A dictionary {"inputs": source-line, "targets": target-line} where - the lines are integer lists converted from characters in the file lines. - """ - eos_list = [] if eos is None else [eos] - with tf.gfile.GFile(source_path, mode="r") as source_file: - for line in source_file: - if line and "\t" in line: - parts = line.split("\t", maxsplit=1) - source, target = parts[0].strip(), parts[1].strip() - source_ints = source_vocab.encode(source) + eos_list - target_ints = target_vocab.encode(target) + eos_list - yield {"inputs": source_ints, "targets": target_ints} - - # Data-set URLs. diff --git a/tensor2tensor/ice_parsing/__init__.py b/tensor2tensor/ice_parsing/__init__.py deleted file mode 100644 index 36f468dcb..000000000 --- a/tensor2tensor/ice_parsing/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ - -from .ice_parsing import IceParsingTokens, transformer_parsing_ice, transformer_parsing_ice_big diff --git a/tensor2tensor/models/transformer.py b/tensor2tensor/models/transformer.py index c6fb74958..f1b2d761f 100755 --- a/tensor2tensor/models/transformer.py +++ b/tensor2tensor/models/transformer.py @@ -391,6 +391,15 @@ def transformer_parsing_big(): return hparams +@registry.register_hparams +def transformer_parsing_ice(): + """Hparams for parsing and tagging Icelandic text.""" + hparams = transformer.transformer_base_single_gpu() + hparams.batch_size = 4096 + hparams.shared_embedding_and_softmax_weights = int(False) + return hparams + + @registry.register_hparams def transformer_tiny(): hparams = transformer_base() From b9e216b4c76ca973773a6bd4a04372a4dc4cffe3 Mon Sep 17 00:00:00 2001 From: vthorsteinsson <vt@extrada.com> Date: Fri, 11 Aug 2017 00:22:58 +0000 Subject: [PATCH 0253/4095] Adaptation to 1.1.7 --- tensor2tensor/bin/t2t-datagen | 5 ----- tensor2tensor/data_generators/problem_hparams.py | 2 -- tensor2tensor/data_generators/wmt.py | 3 +-- tensor2tensor/models/transformer.py | 2 +- 4 files changed, 2 insertions(+), 10 deletions(-) diff --git a/tensor2tensor/bin/t2t-datagen b/tensor2tensor/bin/t2t-datagen index 8ea0d9bc6..97bbd1241 100755 --- a/tensor2tensor/bin/t2t-datagen +++ b/tensor2tensor/bin/t2t-datagen @@ -82,11 +82,6 @@ _SUPPORTED_PROBLEM_GENERATORS = { "algorithmic_algebra_inverse": ( lambda: algorithmic_math.algebra_inverse(26, 0, 2, 100000), lambda: algorithmic_math.algebra_inverse(26, 3, 3, 10000)), - "ice_parsing_characters": ( - lambda: wmt.tabbed_parsing_character_generator( - FLAGS.data_dir, FLAGS.tmp_dir, True), - lambda: wmt.tabbed_parsing_character_generator( - FLAGS.data_dir, FLAGS.tmp_dir, False)), "wmt_parsing_tokens_8k": ( lambda: wmt.parsing_token_generator( FLAGS.data_dir, FLAGS.tmp_dir, True, 2**13), diff --git a/tensor2tensor/data_generators/problem_hparams.py b/tensor2tensor/data_generators/problem_hparams.py index 2f417a992..b0ed44f5b 100755 --- a/tensor2tensor/data_generators/problem_hparams.py +++ b/tensor2tensor/data_generators/problem_hparams.py @@ -511,8 +511,6 @@ def image_celeba(unused_model_hparams): lm1b_32k, "wiki_32k": wiki_32k, - "ice_parsing_characters": - wmt_parsing_characters, "wmt_parsing_tokens_8k": lambda p: wmt_parsing_tokens(p, 2**13), "wsj_parsing_tokens_16k": diff --git a/tensor2tensor/data_generators/wmt.py b/tensor2tensor/data_generators/wmt.py index f66e366d1..35d1b5fca 100755 --- a/tensor2tensor/data_generators/wmt.py +++ b/tensor2tensor/data_generators/wmt.py @@ -65,6 +65,7 @@ def use_subword_tokenizer(self): # Generic generators used later for multiple problems. + def character_generator(source_path, target_path, character_vocab, eos=None): """Generator for sequence-to-sequence tasks that just uses characters. @@ -654,5 +655,3 @@ def parsing_token_generator(data_dir, tmp_dir, train, vocab_size): tree_filepath = os.path.join(tmp_dir, filename) return wsj_parsing.token_generator(tree_filepath, symbolizer_vocab, symbolizer_vocab, EOS) - - diff --git a/tensor2tensor/models/transformer.py b/tensor2tensor/models/transformer.py index f1b2d761f..fa7ecdf81 100755 --- a/tensor2tensor/models/transformer.py +++ b/tensor2tensor/models/transformer.py @@ -394,7 +394,7 @@ def transformer_parsing_big(): @registry.register_hparams def transformer_parsing_ice(): """Hparams for parsing and tagging Icelandic text.""" - hparams = transformer.transformer_base_single_gpu() + hparams = transformer_base_single_gpu() hparams.batch_size = 4096 hparams.shared_embedding_and_softmax_weights = int(False) return hparams From ab9b00465add968ee1a09bd749cd35f53a9659cd Mon Sep 17 00:00:00 2001 From: vthorsteinsson <vt@extrada.com> Date: Fri, 11 Aug 2017 11:43:59 +0000 Subject: [PATCH 0254/4095] Bugfix in ice_parsing.py --- tensor2tensor/data_generators/ice_parsing.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tensor2tensor/data_generators/ice_parsing.py b/tensor2tensor/data_generators/ice_parsing.py index f6e6bdca4..7a90fec45 100755 --- a/tensor2tensor/data_generators/ice_parsing.py +++ b/tensor2tensor/data_generators/ice_parsing.py @@ -87,7 +87,7 @@ def feature_encoders(self, data_dir): source_vocab_filename = os.path.join( data_dir, "ice_source.tokens.vocab.%d" % self.source_vocab_size) target_vocab_filename = os.path.join( - data_dir, "ice_target.tokens.vocab.%d" % self.target_vocab_size) + data_dir, "ice_target.tokens.vocab.%d" % self.targeted_vocab_size) source_subtokenizer = text_encoder.SubwordTextEncoder(source_vocab_filename) target_subtokenizer = text_encoder.SubwordTextEncoder(target_vocab_filename) return { @@ -99,11 +99,11 @@ def generate_data(self, data_dir, tmp_dir, task_id=-1): generator_utils.generate_dataset_and_shuffle( tabbed_parsing_token_generator(data_dir, tmp_dir, True, "ice", self.source_vocab_size, - self.target_vocab_size), + self.targeted_vocab_size), self.training_filepaths(data_dir, self.num_shards, shuffled=False), tabbed_parsing_token_generator(data_dir, tmp_dir, False, "ice", self.source_vocab_size, - self.target_vocab_size), + self.targeted_vocab_size), self.dev_filepaths(data_dir, 1, shuffled=False)) def hparams(self, defaults, model_hparams): From d30ec6bd36760dbb02cabb4b434dd1fc89edbd03 Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Wed, 9 Aug 2017 15:51:00 -0700 Subject: [PATCH 0255/4095] Add TransformerEncoder and TransformerDecoder models PiperOrigin-RevId: 164785525 --- tensor2tensor/bin/t2t-datagen | 10 ++ tensor2tensor/bin/t2t-trainer | 0 tensor2tensor/data_generators/all_problems.py | 1 - .../data_generators/generator_utils.py | 0 tensor2tensor/data_generators/ice_parsing.py | 117 ------------------ .../data_generators/problem_hparams.py | 37 ++++++ tensor2tensor/data_generators/wmt.py | 22 ++++ tensor2tensor/models/transformer.py | 102 ++++++++++----- tensor2tensor/utils/decoding.py | 7 +- tensor2tensor/utils/registry.py | 6 +- 10 files changed, 143 insertions(+), 159 deletions(-) mode change 100755 => 100644 tensor2tensor/bin/t2t-datagen mode change 100755 => 100644 tensor2tensor/bin/t2t-trainer mode change 100755 => 100644 tensor2tensor/data_generators/all_problems.py mode change 100755 => 100644 tensor2tensor/data_generators/generator_utils.py delete mode 100755 tensor2tensor/data_generators/ice_parsing.py mode change 100755 => 100644 tensor2tensor/data_generators/problem_hparams.py mode change 100755 => 100644 tensor2tensor/data_generators/wmt.py mode change 100755 => 100644 tensor2tensor/models/transformer.py mode change 100755 => 100644 tensor2tensor/utils/decoding.py mode change 100755 => 100644 tensor2tensor/utils/registry.py diff --git a/tensor2tensor/bin/t2t-datagen b/tensor2tensor/bin/t2t-datagen old mode 100755 new mode 100644 index 97bbd1241..39453dbee --- a/tensor2tensor/bin/t2t-datagen +++ b/tensor2tensor/bin/t2t-datagen @@ -82,6 +82,16 @@ _SUPPORTED_PROBLEM_GENERATORS = { "algorithmic_algebra_inverse": ( lambda: algorithmic_math.algebra_inverse(26, 0, 2, 100000), lambda: algorithmic_math.algebra_inverse(26, 3, 3, 10000)), + "ice_parsing_tokens": ( + lambda: wmt.tabbed_parsing_token_generator( + FLAGS.data_dir, FLAGS.tmp_dir, True, "ice", 2**13, 2**8), + lambda: wmt.tabbed_parsing_token_generator( + FLAGS.data_dir, FLAGS.tmp_dir, False, "ice", 2**13, 2**8)), + "ice_parsing_characters": ( + lambda: wmt.tabbed_parsing_character_generator( + FLAGS.data_dir, FLAGS.tmp_dir, True), + lambda: wmt.tabbed_parsing_character_generator( + FLAGS.data_dir, FLAGS.tmp_dir, False)), "wmt_parsing_tokens_8k": ( lambda: wmt.parsing_token_generator( FLAGS.data_dir, FLAGS.tmp_dir, True, 2**13), diff --git a/tensor2tensor/bin/t2t-trainer b/tensor2tensor/bin/t2t-trainer old mode 100755 new mode 100644 diff --git a/tensor2tensor/data_generators/all_problems.py b/tensor2tensor/data_generators/all_problems.py old mode 100755 new mode 100644 index 10a4764f5..ca6dccfda --- a/tensor2tensor/data_generators/all_problems.py +++ b/tensor2tensor/data_generators/all_problems.py @@ -31,7 +31,6 @@ from tensor2tensor.data_generators import wiki from tensor2tensor.data_generators import wmt from tensor2tensor.data_generators import wsj_parsing -from tensor2tensor.data_generators import ice_parsing # Problem modules that require optional dependencies diff --git a/tensor2tensor/data_generators/generator_utils.py b/tensor2tensor/data_generators/generator_utils.py old mode 100755 new mode 100644 diff --git a/tensor2tensor/data_generators/ice_parsing.py b/tensor2tensor/data_generators/ice_parsing.py deleted file mode 100755 index 7a90fec45..000000000 --- a/tensor2tensor/data_generators/ice_parsing.py +++ /dev/null @@ -1,117 +0,0 @@ -# Copyright 2017 The Tensor2Tensor Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# This module implements the ice_parsing_* problems, which -# parse plain text into flattened parse trees and POS tags. -# The training data is stored in files named `parsing_train.pairs` -# and `parsing_dev.pairs`. These files are UTF-8 text files where -# each line contains an input sentence and a target parse tree, -# separated by a tab character. - -import os - -# Dependency imports - -from tensor2tensor.data_generators import generator_utils -from tensor2tensor.data_generators import problem -from tensor2tensor.data_generators import text_encoder -from tensor2tensor.data_generators.wmt import tabbed_generator -from tensor2tensor.utils import registry - -import tensorflow as tf - - -# End-of-sentence marker. -EOS = text_encoder.EOS_ID - - -def tabbed_parsing_token_generator(data_dir, tmp_dir, train, prefix, - source_vocab_size, target_vocab_size): - """Generate source and target data from a single file.""" - filename = "parsing_{0}.pairs".format("train" if train else "dev") - source_vocab = generator_utils.get_or_generate_tabbed_vocab( - data_dir, tmp_dir, filename, 0, - prefix + "_source.tokens.vocab.%d" % source_vocab_size, source_vocab_size) - target_vocab = generator_utils.get_or_generate_tabbed_vocab( - data_dir, tmp_dir, filename, 1, - prefix + "_target.tokens.vocab.%d" % target_vocab_size, target_vocab_size) - pair_filepath = os.path.join(tmp_dir, filename) - return tabbed_generator(pair_filepath, source_vocab, target_vocab, EOS) - - -def tabbed_parsing_character_generator(tmp_dir, train): - """Generate source and target data from a single file.""" - character_vocab = text_encoder.ByteTextEncoder() - filename = "parsing_{0}.pairs".format("train" if train else "dev") - pair_filepath = os.path.join(tmp_dir, filename) - return tabbed_generator(pair_filepath, character_vocab, character_vocab, EOS) - - -@registry.register_problem("ice_parsing_tokens") -class IceParsingTokens(problem.Problem): - """Problem spec for parsing tokenized Icelandic text to - constituency trees, also tokenized but to a smaller vocabulary.""" - - @property - def source_vocab_size(self): - return 2**14 # 16384 - - @property - def targeted_vocab_size(self): - return 2**8 # 256 - - @property - def input_space_id(self): - return problem.SpaceID.ICE_TOK - - @property - def target_space_id(self): - return problem.SpaceID.ICE_PARSE_TOK - - @property - def num_shards(self): - return 10 - - def feature_encoders(self, data_dir): - source_vocab_filename = os.path.join( - data_dir, "ice_source.tokens.vocab.%d" % self.source_vocab_size) - target_vocab_filename = os.path.join( - data_dir, "ice_target.tokens.vocab.%d" % self.targeted_vocab_size) - source_subtokenizer = text_encoder.SubwordTextEncoder(source_vocab_filename) - target_subtokenizer = text_encoder.SubwordTextEncoder(target_vocab_filename) - return { - "inputs": source_subtokenizer, - "targets": target_subtokenizer, - } - - def generate_data(self, data_dir, tmp_dir, task_id=-1): - generator_utils.generate_dataset_and_shuffle( - tabbed_parsing_token_generator(data_dir, tmp_dir, True, "ice", - self.source_vocab_size, - self.targeted_vocab_size), - self.training_filepaths(data_dir, self.num_shards, shuffled=False), - tabbed_parsing_token_generator(data_dir, tmp_dir, False, "ice", - self.source_vocab_size, - self.targeted_vocab_size), - self.dev_filepaths(data_dir, 1, shuffled=False)) - - def hparams(self, defaults, model_hparams): - p = defaults - source_vocab_size = self._encoders["inputs"].vocab_size - p.input_modality = {"inputs": (registry.Modalities.SYMBOL, source_vocab_size)} - p.target_modality = (registry.Modalities.SYMBOL, self.targeted_vocab_size) - p.input_space_id = self.input_space_id - p.target_space_id = self.target_space_id - p.loss_multiplier = 2.5 # Rough estimate of avg number of tokens per word - diff --git a/tensor2tensor/data_generators/problem_hparams.py b/tensor2tensor/data_generators/problem_hparams.py old mode 100755 new mode 100644 index b0ed44f5b..d0577db52 --- a/tensor2tensor/data_generators/problem_hparams.py +++ b/tensor2tensor/data_generators/problem_hparams.py @@ -462,6 +462,39 @@ def wsj_parsing_tokens(model_hparams, prefix, wrong_source_vocab_size, return p +def ice_parsing_tokens(model_hparams, wrong_source_vocab_size): + """Icelandic to parse tree translation benchmark. + + Args: + model_hparams: a tf.contrib.training.HParams + wrong_source_vocab_size: a number used in the filename indicating the + approximate vocabulary size. This is not to be confused with the actual + vocabulary size. + + Returns: + A tf.contrib.training.HParams object. + """ + p = default_problem_hparams() + # This vocab file must be present within the data directory. + source_vocab_filename = os.path.join( + model_hparams.data_dir, "ice_source.vocab.%d" % wrong_source_vocab_size) + target_vocab_filename = os.path.join(model_hparams.data_dir, + "ice_target.vocab.256") + source_subtokenizer = text_encoder.SubwordTextEncoder(source_vocab_filename) + target_subtokenizer = text_encoder.SubwordTextEncoder(target_vocab_filename) + p.input_modality = { + "inputs": (registry.Modalities.SYMBOL, source_subtokenizer.vocab_size) + } + p.target_modality = (registry.Modalities.SYMBOL, 256) + p.vocabulary = { + "inputs": source_subtokenizer, + "targets": target_subtokenizer, + } + p.input_space_id = 18 # Icelandic tokens + p.target_space_id = 19 # Icelandic parse tokens + return p + + def img2img_imagenet(unused_model_hparams): """Image 2 Image for imagenet dataset.""" p = default_problem_hparams() @@ -511,6 +544,10 @@ def image_celeba(unused_model_hparams): lm1b_32k, "wiki_32k": wiki_32k, + "ice_parsing_characters": + wmt_parsing_characters, + "ice_parsing_tokens": + lambda p: ice_parsing_tokens(p, 2**13), "wmt_parsing_tokens_8k": lambda p: wmt_parsing_tokens(p, 2**13), "wsj_parsing_tokens_16k": diff --git a/tensor2tensor/data_generators/wmt.py b/tensor2tensor/data_generators/wmt.py old mode 100755 new mode 100644 index 35d1b5fca..0a47e9989 --- a/tensor2tensor/data_generators/wmt.py +++ b/tensor2tensor/data_generators/wmt.py @@ -648,6 +648,28 @@ def target_space_id(self): return problem.SpaceID.CS_CHR +def tabbed_parsing_token_generator(data_dir, tmp_dir, train, prefix, + source_vocab_size, target_vocab_size): + """Generate source and target data from a single file.""" + source_vocab = generator_utils.get_or_generate_tabbed_vocab( + data_dir, tmp_dir, "parsing_train.pairs", 0, + prefix + "_source.vocab.%d" % source_vocab_size, source_vocab_size) + target_vocab = generator_utils.get_or_generate_tabbed_vocab( + data_dir, tmp_dir, "parsing_train.pairs", 1, + prefix + "_target.vocab.%d" % target_vocab_size, target_vocab_size) + filename = "parsing_%s" % ("train" if train else "dev") + pair_filepath = os.path.join(tmp_dir, filename + ".pairs") + return tabbed_generator(pair_filepath, source_vocab, target_vocab, EOS) + + +def tabbed_parsing_character_generator(tmp_dir, train): + """Generate source and target data from a single file.""" + character_vocab = text_encoder.ByteTextEncoder() + filename = "parsing_%s" % ("train" if train else "dev") + pair_filepath = os.path.join(tmp_dir, filename + ".pairs") + return tabbed_generator(pair_filepath, character_vocab, character_vocab, EOS) + + def parsing_token_generator(data_dir, tmp_dir, train, vocab_size): symbolizer_vocab = generator_utils.get_or_generate_vocab( data_dir, tmp_dir, "vocab.endefr.%d" % vocab_size, vocab_size) diff --git a/tensor2tensor/models/transformer.py b/tensor2tensor/models/transformer.py old mode 100755 new mode 100644 index fa7ecdf81..37c1206bd --- a/tensor2tensor/models/transformer.py +++ b/tensor2tensor/models/transformer.py @@ -55,22 +55,66 @@ def model_fn_body(self, features): (decoder_input, decoder_self_attention_bias) = transformer_prepare_decoder( targets, hparams) - encoder_input = tf.nn.dropout( - encoder_input, 1.0 - hparams.layer_prepostprocess_dropout) - decoder_input = tf.nn.dropout( - decoder_input, 1.0 - hparams.layer_prepostprocess_dropout) - encoder_output = transformer_encoder( - encoder_input, encoder_self_attention_bias, hparams) + encoder_input = tf.nn.dropout(encoder_input, + 1.0 - hparams.layer_prepostprocess_dropout) + decoder_input = tf.nn.dropout(decoder_input, + 1.0 - hparams.layer_prepostprocess_dropout) + encoder_output = transformer_encoder(encoder_input, + encoder_self_attention_bias, hparams) decoder_output = transformer_decoder( - decoder_input, encoder_output, - decoder_self_attention_bias, + decoder_input, encoder_output, decoder_self_attention_bias, encoder_decoder_attention_bias, hparams) decoder_output = tf.expand_dims(decoder_output, 2) return decoder_output +@registry.register_model +class TransformerEncoder(t2t_model.T2TModel): + """Transformer, encoder only.""" + + def model_fn_body(self, features): + hparams = self._hparams + inputs = features["inputs"] + target_space = features["target_space_id"] + + inputs = common_layers.flatten4d3d(inputs) + + (encoder_input, encoder_self_attention_bias, + _) = (transformer_prepare_encoder(inputs, target_space, hparams)) + + encoder_input = tf.nn.dropout(encoder_input, + 1.0 - hparams.layer_prepostprocess_dropout) + encoder_output = transformer_encoder(encoder_input, + encoder_self_attention_bias, hparams) + + return encoder_output + + +@registry.register_model +class TransformerDecoder(t2t_model.T2TModel): + """Transformer, decoder only.""" + + def model_fn_body(self, features): + hparams = self._hparams + targets = features["targets"] + + targets = common_layers.flatten4d3d(targets) + + (decoder_input, decoder_self_attention_bias) = transformer_prepare_decoder( + targets, hparams) + + decoder_input = tf.nn.dropout(decoder_input, + 1.0 - hparams.layer_prepostprocess_dropout) + + decoder_output = transformer_decoder( + decoder_input, None, decoder_self_attention_bias, None, hparams) + decoder_output = tf.expand_dims(decoder_output, 2) + + return decoder_output + + def transformer_prepare_encoder(inputs, target_space, hparams): """Prepare one shard of the model for the encoder. @@ -150,14 +194,11 @@ def transformer_encoder(encoder_input, with tf.variable_scope("layer_%d" % layer): with tf.variable_scope("self_attention"): y = common_attention.multihead_attention( - common_layers.layer_preprocess(x, hparams), - None, - encoder_self_attention_bias, + common_layers.layer_preprocess( + x, hparams), None, encoder_self_attention_bias, hparams.attention_key_channels or hparams.hidden_size, hparams.attention_value_channels or hparams.hidden_size, - hparams.hidden_size, - hparams.num_heads, - hparams.attention_dropout) + hparams.hidden_size, hparams.num_heads, hparams.attention_dropout) x = common_layers.layer_postprocess(x, y, hparams) with tf.variable_scope("ffn"): y = transformer_ffn_layer( @@ -196,26 +237,23 @@ def transformer_decoder(decoder_input, with tf.variable_scope("layer_%d" % layer): with tf.variable_scope("self_attention"): y = common_attention.multihead_attention( - common_layers.layer_preprocess(x, hparams), - None, - decoder_self_attention_bias, - hparams.attention_key_channels or hparams.hidden_size, - hparams.attention_value_channels or hparams.hidden_size, - hparams.hidden_size, - hparams.num_heads, - hparams.attention_dropout) - x = common_layers.layer_postprocess(x, y, hparams) - with tf.variable_scope("encdec_attention"): - y = common_attention.multihead_attention( - common_layers.layer_preprocess(x, hparams), - encoder_output, - encoder_decoder_attention_bias, + common_layers.layer_preprocess( + x, hparams), None, decoder_self_attention_bias, hparams.attention_key_channels or hparams.hidden_size, hparams.attention_value_channels or hparams.hidden_size, - hparams.hidden_size, - hparams.num_heads, - hparams.attention_dropout) + hparams.hidden_size, hparams.num_heads, hparams.attention_dropout) x = common_layers.layer_postprocess(x, y, hparams) + if encoder_output is not None: + assert encoder_decoder_attention_bias is not None + with tf.variable_scope("encdec_attention"): + y = common_attention.multihead_attention( + common_layers.layer_preprocess( + x, hparams), encoder_output, encoder_decoder_attention_bias, + hparams.attention_key_channels or hparams.hidden_size, + hparams.attention_value_channels or hparams.hidden_size, + hparams.hidden_size, hparams.num_heads, + hparams.attention_dropout) + x = common_layers.layer_postprocess(x, y, hparams) with tf.variable_scope("ffn"): y = transformer_ffn_layer( common_layers.layer_preprocess(x, hparams), hparams) @@ -393,7 +431,7 @@ def transformer_parsing_big(): @registry.register_hparams def transformer_parsing_ice(): - """Hparams for parsing and tagging Icelandic text.""" + """Hparams for parsing Icelandic text.""" hparams = transformer_base_single_gpu() hparams.batch_size = 4096 hparams.shared_embedding_and_softmax_weights = int(False) diff --git a/tensor2tensor/utils/decoding.py b/tensor2tensor/utils/decoding.py old mode 100755 new mode 100644 index fc9eb566f..5e8f4d482 --- a/tensor2tensor/utils/decoding.py +++ b/tensor2tensor/utils/decoding.py @@ -259,11 +259,6 @@ def _interactive_input_fn(hparams): vocabulary = p_hparams.vocabulary["inputs" if has_input else "targets"] # This should be longer than the longest input. const_array_size = 10000 - # Import readline if available for command line editing and recall - try: - import readline - except ImportError: - pass while True: prompt = ("INTERACTIVE MODE num_samples=%d decode_length=%d \n" " it=<input_type> ('text' or 'image' or 'label')\n" @@ -271,7 +266,7 @@ def _interactive_input_fn(hparams): " in=<input_problem> (set the input problem number)\n" " ou=<output_problem> (set the output problem number)\n" " ns=<num_samples> (changes number of samples)\n" - " dl=<decode_length> (changes decode length)\n" + " dl=<decode_length> (changes decode legnth)\n" " <%s> (decode)\n" " q (quit)\n" ">" % (num_samples, decode_length, "source_string" diff --git a/tensor2tensor/utils/registry.py b/tensor2tensor/utils/registry.py old mode 100755 new mode 100644 index d79eef484..fea647b2b --- a/tensor2tensor/utils/registry.py +++ b/tensor2tensor/utils/registry.py @@ -225,10 +225,10 @@ def parse_problem_name(problem_name): was_copy: A boolean. """ # Recursively strip tags until we reach a base name. - if problem_name.endswith("_rev"): + if len(problem_name) > 4 and problem_name[-4:] == "_rev": base, _, was_copy = parse_problem_name(problem_name[:-4]) return base, True, was_copy - elif problem_name.endswith("_copy"): + elif len(problem_name) > 5 and problem_name[-5:] == "_copy": base, was_reversed, _ = parse_problem_name(problem_name[:-5]) return base, was_reversed, True else: @@ -352,7 +352,7 @@ def list_modalities(): def parse_modality_name(name): - name_parts = name.split(":", maxsplit=1) + name_parts = name.split(":") if len(name_parts) < 2: name_parts.append("default") modality_type, modality_name = name_parts From 12c59a7d3fa452af0de7b792126d32c35d60d37f Mon Sep 17 00:00:00 2001 From: Noam Shazeer <noam@google.com> Date: Thu, 10 Aug 2017 10:20:53 -0700 Subject: [PATCH 0256/4095] Massively simplify expert_utils. Breaks checkpoints for models that use experts. Fixed bug in Parallelism, where caching devices were always used, even when none. Fixed bug in attention_lm, attention_lm_moe by setting the default norm_type to "layer" instead of "none". PiperOrigin-RevId: 164869403 --- tensor2tensor/layers/common_hparams.py | 7 +- tensor2tensor/layers/common_layers.py | 67 +- tensor2tensor/layers/modalities.py | 2 +- tensor2tensor/models/attention_lm_moe.py | 48 +- tensor2tensor/models/long_answer.py | 276 ------ tensor2tensor/models/models.py | 1 - tensor2tensor/models/multimodel.py | 38 +- tensor2tensor/models/slicenet.py | 4 - tensor2tensor/models/transformer_moe.py | 37 +- tensor2tensor/utils/expert_utils.py | 1010 +++++----------------- 10 files changed, 313 insertions(+), 1177 deletions(-) delete mode 100644 tensor2tensor/models/long_answer.py diff --git a/tensor2tensor/layers/common_hparams.py b/tensor2tensor/layers/common_hparams.py index 10b5e7e59..0ed62685f 100644 --- a/tensor2tensor/layers/common_hparams.py +++ b/tensor2tensor/layers/common_hparams.py @@ -69,6 +69,11 @@ def basic_params1(): sampling_method="argmax", # "argmax" or "random" problem_choice="adaptive", # "uniform", "adaptive", "distributed" multiply_embedding_mode="sqrt_depth", + # Parameters related to mixtures of experts. + moe_hidden_sizes="2048", # hidden layer sizes (comma-separated) + moe_num_experts=64, # number of experts per layer + moe_k=2, # how many experts to use for each batch element + moe_loss_coef=1e-2, # Sequences of operations to perform on layer input and layer output. # Used by common_layers.layer_preprocess, common_layers.layer_postprocess # Each character repsesnts an operation: @@ -83,7 +88,7 @@ def basic_params1(): # dropout rate to use during layer_preprocess and layer_postprocess layer_prepostprocess_dropout=0.1, # What type of normalization to use - norm_type="none", # "batch", layer", "noam", "none". + norm_type="layer", # "batch", layer", "noam", "none". # epsilon parameter to normalization function norm_epsilon=1e-6, symbol_modality_num_shards=16, diff --git a/tensor2tensor/layers/common_layers.py b/tensor2tensor/layers/common_layers.py index a85430c1c..e9b195195 100644 --- a/tensor2tensor/layers/common_layers.py +++ b/tensor2tensor/layers/common_layers.py @@ -193,7 +193,7 @@ def embedding(x, vocab_size, dense_size, name=None, reuse=None, multiplier=1.0): # On the backwards pass, we want to convert the gradient from # an indexed-slices to a regular tensor before sending it back to the # parameter server. This avoids excess computation on the parameter server. - embedding_var = eu.ConvertGradientToTensor(embedding_var) + embedding_var = eu.convert_gradient_to_tensor(embedding_var) emb_x = tf.gather(embedding_var, x) if multiplier != 1.0: emb_x *= multiplier @@ -823,71 +823,6 @@ def decompress_seqcnn(x, return tf.layers.dense(outputs, targets_vocab_size) -def moe_layer(data_parallelism, - ps_devices, - xs, - train, - model_hidden_size, - expert_hidden_size, - n1, - n2, - loss_coef, - autoscale=True, - name=None): - """A mixture of experts layer. - - Args: - data_parallelism: a expert_utils.Parallelism object. - ps_devices: a list of strings - xs: a list of input tensors. - train: a boolean scalar. - model_hidden_size: an integer (input/output size for this layer) - expert_hidden_size: an integer (size of each expert's hidden layer) - n1: an integer - number of experts (or # of groups for hierarchical MoE) - n2: optional integer - size of each group of experts for hierarchical MoE - loss_coef: a scalar - multiplier on load-balancing losses - autoscale: a boolean - name: a string - - Returns: - ys: a list of tensors: - extra_training_loss: a scalar - """ - dp = data_parallelism - with tf.variable_scope(name, default_name="moe"): - # Set up the hyperparameters for the gating networks. - primary_gating_hp = eu.NoisyTopKGatingParams() - primary_gating_hp.num_experts = n1 - if n2: - # hierarchical MoE containing moe_n1 groups of moe_n2 experts. - assert n2 > 1 - secondary_gating_hp = eu.NoisyTopKGatingParams() - secondary_gating_hp.num_experts = n2 - else: - # flat mixture of moe_n1 experts. - secondary_gating_hp = None - # Set up the hyperparameters for the expert networks. - # Each expert contains a hidden RELU layer of size filter_size - expert_hp = eu.FeedForwardExpertParams() - expert_hp.autoscale = autoscale - expert_hp.hidden_layer_sizes = [expert_hidden_size] - # Create the mixture of experts. - moe = eu.DistributedMixtureOfExperts(primary_gating_hp, secondary_gating_hp, - expert_hp, model_hidden_size, - model_hidden_size, ps_devices, "moe") - # MoE expects input tensors to be 2d. - # Flatten out spatial dimensions. - xs_2d = dp(tf.reshape, xs, [[-1, model_hidden_size]] * dp.n) - # Call the MoE - moe_out_2d, importance, load, _, _ = moe.Eval( - dp.devices, xs_2d, train, identifiers=None) - # Reshape the output to the original shape. - moe_out = dp(tf.reshape, moe_out_2d, dp(tf.shape, xs)) - # These losses encourage equal load on the different experts. - loss = loss_coef * (eu.CVSquared(importance) + eu.CVSquared(load)) - return moe_out, loss - - def simple_attention(target, source, bias=None): """A simple attention function. diff --git a/tensor2tensor/layers/modalities.py b/tensor2tensor/layers/modalities.py index e44729041..acaacbf99 100644 --- a/tensor2tensor/layers/modalities.py +++ b/tensor2tensor/layers/modalities.py @@ -70,7 +70,7 @@ def _get_weights(self): ret = shards[0] else: ret = tf.concat(shards, 0) - ret = eu.ConvertGradientToTensor(ret) + ret = eu.convert_gradient_to_tensor(ret) return ret def bottom_simple(self, x, name, reuse): diff --git a/tensor2tensor/models/attention_lm_moe.py b/tensor2tensor/models/attention_lm_moe.py index 1869eef66..268e93f7b 100644 --- a/tensor2tensor/models/attention_lm_moe.py +++ b/tensor2tensor/models/attention_lm_moe.py @@ -32,6 +32,7 @@ from tensor2tensor.layers import common_attention from tensor2tensor.layers import common_hparams from tensor2tensor.layers import common_layers +from tensor2tensor.utils import expert_utils from tensor2tensor.utils import registry from tensor2tensor.utils import t2t_model @@ -61,6 +62,7 @@ def postprocess(x, y): x = dp(tf.nn.dropout, decoder_input, 1.0 - hparams.layer_prepostprocess_dropout) extra_loss = 0.0 + moe_hidden_sizes = [int(s) for s in hparams.moe_hidden_sizes.split(",")] for layer in xrange(hparams.num_hidden_layers): with tf.variable_scope("layer_%d" % layer): with tf.variable_scope("attention"): @@ -78,11 +80,18 @@ def postprocess(x, y): x = postprocess(x, y) with tf.variable_scope("ffn"): if str(layer) in hparams.moe_layers.split(","): - y, loss = common_layers.moe_layer( - dp, self._ps_devices, preprocess(x), + y, loss = expert_utils.distributed_moe( + dp, + self._ps_devices, + preprocess(x), hparams.mode == tf.contrib.learn.ModeKeys.TRAIN, - hparams.hidden_size, hparams.moe_hidden_size, hparams.moe_n1, - hparams.moe_n2, hparams.moe_loss_coef) + input_size=hparams.hidden_size, + expert_fn=expert_utils.ffn_expert_fn( + hparams.hidden_size, moe_hidden_sizes, + hparams.hidden_size), + num_experts=hparams.moe_num_experts, + k=hparams.moe_k, + loss_coef=hparams.moe_loss_coef) extra_loss += loss else: y = dp( @@ -149,16 +158,7 @@ def attention_lm_moe_base(): hparams.label_smoothing = 0.0 hparams.shared_embedding_and_softmax_weights = int(False) hparams.add_hparam("filter_size", 2048) # Add new ones like this. - # comma-separated list of layer numbers. - # At each of these layers, we replace the ffn with a mixture of experts. - hparams.add_hparam("moe_layers", "2") - # If moe_n2 is None, then use a flat MoE with moe_n1 experts. - # If moe_n2 is an integer, then use a hierarchical MoE - # consisting of moe_n1 groups of moe_n2 experts each. - hparams.add_hparam("moe_n1", 32) - hparams.add_hparam("moe_n2", 0) - hparams.add_hparam("moe_hidden_size", 2048) - hparams.add_hparam("moe_loss_coef", 1e-2) + hparams.moe_num_experts = 32 # attention-related flags hparams.add_hparam("num_heads", 8) hparams.add_hparam("attention_key_channels", 0) @@ -168,6 +168,7 @@ def attention_lm_moe_base(): hparams.add_hparam("attention_dropout", 0.0) hparams.add_hparam("relu_dropout", 0.0) hparams.add_hparam("pos", "timing") # timing, none + hparams.add_hparam("moe_layers", "2") # comma separated list of layer numbers return hparams @@ -188,9 +189,20 @@ def attention_lm_moe_small(): hparams.num_hidden_layers = 4 hparams.hidden_size = 512 hparams.filter_size = 2048 - hparams.moe_n1 = 128 + hparams.moe_num_experts = 128 hparams.moe_layers = "2" - hparams.moe_hidden_size = 2048 + return hparams + + +@registry.register_hparams +def attention_lm_moe_tiny(): + """Cheap model for debugging. + + Returns: + an hparams object. + """ + hparams = attention_lm_moe_small() + hparams.moe_num_experts = 32 return hparams @@ -233,7 +245,7 @@ def attention_lm_moe_large(): hparams.hidden_size = 1024 hparams.num_heads = 16 hparams.filter_size = 4096 - hparams.moe_hidden_size = 4096 - hparams.moe_n1 = 128 + hparams.moe_hidden_sizes = "4096" + hparams.moe_num_experts = 128 hparams.layer_prepostprocess_dropout = 0.2 return hparams diff --git a/tensor2tensor/models/long_answer.py b/tensor2tensor/models/long_answer.py deleted file mode 100644 index a9fb45e4a..000000000 --- a/tensor2tensor/models/long_answer.py +++ /dev/null @@ -1,276 +0,0 @@ -# coding=utf-8 -# Copyright 2017 The Tensor2Tensor Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Model to generate long answers to short questions. - -E.g. wiki_32k title->article dataset. - -Variant on attention_lm_moe.py - - prepend the inputs to the targets. - - use masked local attention to avoid quadratic space and time blowup for - long sequences. - -This model is still highly experimental and under rapid iteration. - -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -# Dependency imports - -from six.moves import xrange # pylint: disable=redefined-builtin - -from tensor2tensor.layers import common_attention -from tensor2tensor.layers import common_hparams -from tensor2tensor.layers import common_layers -from tensor2tensor.utils import registry -from tensor2tensor.utils import t2t_model - -import tensorflow as tf - - -@registry.register_model -class LongAnswer(t2t_model.T2TModel): - """Attention net. See file docstring.""" - - def model_fn_body_sharded(self, sharded_features): - # Remove dropout if not training - hparams = self._hparams - dp = self._data_parallelism - targets = sharded_features["targets"] - targets = dp(tf.squeeze, targets, 2) - inputs = sharded_features["inputs"] - inputs = dp(tf.squeeze, inputs, 2) - - decoder_input = dp(long_answer_prepare_decoder, inputs, targets, hparams) - - def residual_fn(x, y): - return common_layers.layer_norm(x + tf.nn.dropout( - y, 1.0 - hparams.residual_dropout)) - - x = dp(tf.nn.dropout, decoder_input, 1.0 - hparams.residual_dropout) - extra_loss = 0.0 - for layer in xrange(hparams.num_hidden_layers): - with tf.variable_scope("layer_%d" % layer): - with tf.variable_scope("attention"): - y = dp( - common_attention.multihead_attention, - x, - None, - None, - hparams.attention_key_channels or hparams.hidden_size, - hparams.attention_value_channels or hparams.hidden_size, - hparams.hidden_size, - hparams.num_heads, - hparams.attention_dropout, - attention_type="local_mask_right", - block_length=hparams.block_length, - name="decoder_self_attention") - x = dp(residual_fn, x, y) - with tf.variable_scope("ffn"): - if str(layer) in hparams.moe_layers.split(","): - y, loss = common_layers.moe_layer( - dp, self._ps_devices, x, - hparams.mode == tf.contrib.learn.ModeKeys.TRAIN, - hparams.hidden_size, hparams.moe_hidden_size, hparams.moe_n1, - hparams.moe_n2, hparams.moe_loss_coef) - extra_loss += loss - else: - y = dp( - common_layers.conv_hidden_relu, - x, - hparams.filter_size, - hparams.hidden_size, - dropout=hparams.relu_dropout) - x = dp(residual_fn, x, y) - x = dp(long_answer_output, x, inputs) - return x, extra_loss - - -def long_answer_prepare_decoder(inputs, targets, hparams): - """Prepare one shard of the model for the decoder. - - Args: - inputs: a Tensor. - targets: a Tensor. - hparams: run hyperparameters - - Returns: - decoder_input: a Tensor, bottom of decoder stack - """ - decoder_input = tf.concat([ - length_embedding(targets, hparams), inputs, - common_layers.shift_left_3d(targets) - ], 1) - if hparams.pos == "timing": - decoder_input = common_attention.add_timing_signal_1d(decoder_input) - return decoder_input - - -def length_embedding(targets, hparams): - """An embedding indicating approximate target length. - - This is a bit of a hack, where we want to be able to request a particular - target length during inference. - During training, we sometimes provide a target length. - During eval, we never provide a target length. - - Args: - targets: a Tensor. - hparams: run hyperparameters - - Returns: - a Tensor with shape [batch, 1, hparams.hidden_size] - """ - # encode the approx target length in case we want to specify it - # during inference. - batch = tf.shape(targets)[0] - padded_target_length = tf.shape(targets)[1] - if hparams.mode == tf.contrib.learn.ModeKeys.TRAIN: - lengths = padded_target_length * tf.to_int32( - tf.less(tf.random_uniform([batch]), hparams.answer_length_prob_train)) - elif hparams.mode == tf.contrib.learn.ModeKeys.EVAL: - lengths = 0 - else: - assert hparams.mode == tf.contrib.learn.ModeKeys.INFER - lengths = hparams.answer_length_infer - lengths = tf.to_int32(tf.log(tf.to_float(lengths + 1))) - lengths = tf.zeros([batch], dtype=tf.int32) + lengths - ret = tf.gather( - tf.get_variable("answer_length", [100, hparams.hidden_size]), lengths) - return tf.expand_dims(ret, 1) - - -def long_answer_output(x, inputs): - """Strip initial part corresponding to the inputs and the length embedding.""" - x = tf.slice(x, [0, tf.shape(inputs)[1] + 1, 0], [-1, -1, -1]) - x = tf.expand_dims(x, 2) - return x - - -@registry.register_hparams -def long_answer_base(): - """Set of hyperparameters. - - Returns: - a hparams object - """ - hparams = common_hparams.basic_params1() - hparams.hidden_size = 1024 - hparams.batch_size = 8192 - hparams.max_length = 8192 - hparams.dropout = 0.0 - hparams.batching_mantissa_bits = 3 - hparams.clip_grad_norm = 0. # i.e. no gradient clipping - hparams.optimizer_adam_epsilon = 1e-9 - hparams.learning_rate_decay_scheme = "noam" - hparams.learning_rate = 0.1 - hparams.learning_rate_warmup_steps = 1000 - hparams.initializer_gain = 1.0 - hparams.num_hidden_layers = 4 - hparams.initializer = "uniform_unit_scaling" - hparams.weight_decay = 0.0 - hparams.optimizer_adam_beta1 = 0.9 - hparams.optimizer_adam_beta2 = 0.98 - hparams.num_sampled_classes = 0 - hparams.label_smoothing = 0.0 - hparams.shared_embedding_and_softmax_weights = int(True) - hparams.sampling_method = "random" - hparams.add_hparam("filter_size", 2048) # Add new ones like this. - # comma-separated list of layer numbers. - # At each of these layers, we replace the ffn with a mixture of experts. - hparams.add_hparam("moe_layers", "2") - # If moe_n2 is None, then use a flat MoE with moe_n1 experts. - # If moe_n2 is an integer, then use a hierarchical MoE - # consisting of moe_n1 groups of moe_n2 experts each. - hparams.add_hparam("moe_n1", 64) - hparams.add_hparam("moe_n2", 0) - hparams.add_hparam("moe_hidden_size", 2048) - hparams.add_hparam("moe_loss_coef", 1e-2) - # attention-related flags - hparams.add_hparam("num_heads", 8) - hparams.add_hparam("attention_key_channels", 0) - hparams.add_hparam("attention_value_channels", 0) - # All hyperparameters ending in "dropout" are automatically set to 0.0 - # when not in training mode. - hparams.add_hparam("attention_dropout", 0.0) - hparams.add_hparam("relu_dropout", 0.0) - hparams.add_hparam("residual_dropout", 0.0) - hparams.add_hparam("pos", "timing") # timing, none - hparams.add_hparam("block_length", 512) - hparams.add_hparam("answer_length_prob_train", 0.5) - hparams.add_hparam("answer_length_infer", 1000) - # We cannot handle long sequence at this point, so drop them, during eval. - # This affects evaluation metrics. - # TODO(noam): find a different workaround - hparams.eval_drop_long_sequences = int(True) - return hparams - - -@registry.register_hparams -def long_answer_tiny(): - """Cheap model for validation. - - Returns: - an hparams object. - """ - hparams = long_answer_base() - hparams.num_hidden_layers = 3 - hparams.hidden_size = 512 - hparams.filter_size = 1024 - hparams.moe_layers = "2" - hparams.moe_hidden_size = 1024 - hparams.block_length = 128 - hparams.moe_n1 = 8 - hparams.batch_size = 2048 - hparams.max_length = 2048 - return hparams - - -@registry.register_hparams -def long_answer_small(): - """Cheap model for single-gpu training. - - Returns: - an hparams object. - """ - hparams = long_answer_base() - hparams.num_hidden_layers = 4 - hparams.hidden_size = 512 - hparams.filter_size = 2048 - hparams.moe_n1 = 128 - hparams.moe_layers = "2" - hparams.moe_hidden_size = 2048 - return hparams - - -@registry.register_hparams -def long_answer_large(): - """Large model for distributed training. - - Returns: - an hparams object. - """ - hparams = long_answer_base() - hparams.num_hidden_layers = 5 - hparams.moe_layers = "3" - hparams.hidden_size = 1024 - hparams.filter_size = 4096 - hparams.moe_hidden_size = 4096 - hparams.moe_n1 = 128 - hparams.block_length = 1024 - return hparams diff --git a/tensor2tensor/models/models.py b/tensor2tensor/models/models.py index cba779fc9..d4514408d 100644 --- a/tensor2tensor/models/models.py +++ b/tensor2tensor/models/models.py @@ -30,7 +30,6 @@ from tensor2tensor.models import bytenet from tensor2tensor.models import cycle_gan from tensor2tensor.models import gene_expression -from tensor2tensor.models import long_answer from tensor2tensor.models import lstm from tensor2tensor.models import multimodel from tensor2tensor.models import neural_gpu diff --git a/tensor2tensor/models/multimodel.py b/tensor2tensor/models/multimodel.py index 290c78732..c8d515c8d 100644 --- a/tensor2tensor/models/multimodel.py +++ b/tensor2tensor/models/multimodel.py @@ -27,6 +27,7 @@ from tensor2tensor.layers import common_layers from tensor2tensor.layers import modalities from tensor2tensor.models import slicenet +from tensor2tensor.utils import expert_utils from tensor2tensor.utils import registry from tensor2tensor.utils import t2t_model @@ -76,9 +77,19 @@ def conv_experts(xs, hparams, dp, ps, padding, mask, layer_id): train = hparams.mode == tf.contrib.learn.ModeKeys.TRAIN, conv_out = dp(conv_res_step, xs, hparams, padding, mask) loss = 0.0 - moe_out, loss = common_layers.moe_layer( - dp, ps, xs, train, hparams.hidden_size, hparams.filter_size, - hparams.moe_n1, hparams.moe_n2, 1.0) + moe_hidden_sizes = [hparams.filter_size] + expert_fn = expert_utils.ffn_expert_fn( + hparams.hidden_size, moe_hidden_sizes, hparams.hidden_size) + moe_out, loss = expert_utils.distributed_moe( + dp, + ps, + xs, + train, + input_size=hparams.hidden_size, + expert_fn=expert_fn, + num_experts=hparams.moe_num_experts, + k=hparams.moe_k, + loss_coef=1.0) return dp(residual_fn3, xs, moe_out, conv_out, hparams), loss @@ -136,6 +147,9 @@ def flatten(inputs): (decoder_input, decoder_self_attention_bias) = dp(prepare_decoder, targets, target_space_emb) + moe_hidden_sizes = [int(s) for s in hparams.moe_hidden_sizes.split(",")] + expert_fn = expert_utils.ffn_expert_fn( + hparams.hidden_size, moe_hidden_sizes, hparams.hidden_size) x = dp(tf.nn.dropout, decoder_input, 1.0 - hparams.dropout) for layer in xrange(hparams.num_hidden_layers): with tf.variable_scope("dec_layer_%d" % layer): @@ -165,10 +179,16 @@ def flatten(inputs): x = dp(residual_fn3, x, y, z, hparams) with tf.variable_scope("ffn"): if str(layer) in hparams.moe_layers.split(","): - y, moe_loss = common_layers.moe_layer( - dp, self._ps_devices, x, train, hparams.hidden_size, - hparams.filter_size, hparams.moe_n1, hparams.moe_n2, - hparams.moe_loss_coef) + y, moe_loss = expert_utils.distributed_moe( + dp, + self._ps_devices, + x, + train, + input_size=hparams.hidden_size, + expert_fn=expert_fn, + num_experts=hparams.moe_num_experts, + k=hparams.moe_k, + loss_coef=hparams.moe_loss_coef) expert_loss += tf.reduce_mean(moe_loss) else: y = dp( @@ -199,10 +219,8 @@ def multimodel_base(): hparams.add_hparam("large_kernel_size", 15) hparams.add_hparam("attention_dropout", 0.1) hparams.add_hparam("num_heads", 8) - hparams.add_hparam("moe_n1", 30) - hparams.add_hparam("moe_n2", 0) hparams.add_hparam("moe_layers", "2") - hparams.add_hparam("moe_loss_coef", 1e-2) + hparams.moe_num_experts = 30 return hparams diff --git a/tensor2tensor/models/slicenet.py b/tensor2tensor/models/slicenet.py index 1079659b5..6b07dc640 100644 --- a/tensor2tensor/models/slicenet.py +++ b/tensor2tensor/models/slicenet.py @@ -322,9 +322,6 @@ def slicenet_params1(): # A kernel scheme, one of _KERNEL_SCHEMES; overrides large_kernel_size. hparams.add_hparam("kernel_scheme", "3.7.15.31") hparams.add_hparam("audio_compression", 8) - hparams.add_hparam("moe_n1", 32) - hparams.add_hparam("moe_n2", 0) - hparams.add_hparam("moe_loss_coef", 1e-2) # attention-related flags hparams.add_hparam("attention_type", "simple") hparams.add_hparam("num_heads", 8) @@ -358,7 +355,6 @@ def slicenet_params1_tiny(): hparams.separability = 0 hparams.hidden_size = 128 hparams.num_hidden_layers = 2 - hparams.moe_n1 = 2 hparams.batch_size = 512 hparams.learning_rate_warmup_steps = 200 return hparams diff --git a/tensor2tensor/models/transformer_moe.py b/tensor2tensor/models/transformer_moe.py index 6f01667d8..669b1842b 100644 --- a/tensor2tensor/models/transformer_moe.py +++ b/tensor2tensor/models/transformer_moe.py @@ -29,6 +29,7 @@ from tensor2tensor.layers import common_hparams from tensor2tensor.layers import common_layers from tensor2tensor.models import transformer +from tensor2tensor.utils import expert_utils from tensor2tensor.utils import registry from tensor2tensor.utils import t2t_model @@ -66,6 +67,9 @@ def postprocess(x, y): decoder_input = dp(tf.nn.dropout, decoder_input, 1.0 - hparams.layer_prepostprocess_dropout) extra_loss = 0 + moe_hidden_sizes = [int(s) for s in hparams.moe_hidden_sizes.split(",")] + expert_fn = expert_utils.ffn_expert_fn( + hparams.hidden_size, moe_hidden_sizes, hparams.hidden_size) x = encoder_input for layer in xrange(hparams.num_hidden_layers): with tf.variable_scope("encoder_layer_%d" % layer): @@ -83,11 +87,16 @@ def postprocess(x, y): x = postprocess(x, y) with tf.variable_scope("ffn"): if str(layer) in hparams.moe_layers_encoder.split(","): - y, loss = common_layers.moe_layer( - dp, self._ps_devices, preprocess(x), + y, loss = expert_utils.distributed_moe( + dp, + self._ps_devices, + preprocess(x), hparams.mode == tf.contrib.learn.ModeKeys.TRAIN, - hparams.hidden_size, hparams.moe_hidden_size, hparams.moe_n1, - hparams.moe_n2, hparams.moe_loss_coef) + input_size=hparams.hidden_size, + expert_fn=expert_fn, + num_experts=hparams.moe_num_experts, + k=hparams.moe_k, + loss_coef=hparams.moe_loss_coef) extra_loss += loss else: y = dp( @@ -127,11 +136,16 @@ def postprocess(x, y): x = postprocess(x, y) with tf.variable_scope("ffn"): if str(layer) in hparams.moe_layers_decoder.split(","): - y, loss = common_layers.moe_layer( - dp, self._ps_devices, preprocess(x), + y, loss = expert_utils.distributed_moe( + dp, + self._ps_devices, + preprocess(x), hparams.mode == tf.contrib.learn.ModeKeys.TRAIN, - hparams.hidden_size, hparams.moe_hidden_size, hparams.moe_n1, - hparams.moe_n2, hparams.moe_loss_coef) + input_size=hparams.hidden_size, + expert_fn=expert_fn, + num_experts=hparams.moe_num_experts, + k=hparams.moe_k, + loss_coef=hparams.moe_loss_coef) extra_loss += loss else: y = dp( @@ -192,13 +206,6 @@ def transformer_moe_base(): # At each of these layers, we replace the ffn with a mixture of experts. hparams.add_hparam("moe_layers_encoder", "2") hparams.add_hparam("moe_layers_decoder", "2") - # If moe_n2 is None, then use a flat MoE with moe_n1 experts. - # If moe_n2 is an integer, then use a hierarchical MoE - # consisting of moe_n1 groups of moe_n2 experts each. - hparams.add_hparam("moe_n1", 32) - hparams.add_hparam("moe_n2", 0) - hparams.add_hparam("moe_hidden_size", 2048) - hparams.add_hparam("moe_loss_coef", 1e-2) return hparams diff --git a/tensor2tensor/utils/expert_utils.py b/tensor2tensor/utils/expert_utils.py index e21f2453a..ac58ef3cd 100644 --- a/tensor2tensor/utils/expert_utils.py +++ b/tensor2tensor/utils/expert_utils.py @@ -15,8 +15,8 @@ """Utilities for creating Sparsely-Gated Mixture-of-Experts Layers. -See the most recent draft of our ICLR paper: -https://openreview.net/pdf?id=B1ckMDqlg +See "Outrageously Large Neural Networks" +https://arxiv.org/abs/1701.06538 """ from __future__ import absolute_import @@ -35,122 +35,10 @@ from tensorflow.python.framework import function -def NoisyTopKGatingParams(): - """Hyperparams defining NoisyTopK Gating Network. - - Returns: - a tf.contrib.training.HParams object - """ - return tf.contrib.training.HParams( - gating_class=NoisyTopKGating, - num_experts=16, # The number of experts - k=2, # 'The number of experts to use per example - input_size=None, # size of input to MoE. Set by MoE class - dtype=tf.float32, # floating point data type - initializer=tf.zeros_initializer(), # initializer for weight matrices - noisy_gating=True, # Add tunable noise (necessary for load-balancing) - noise_epsilon=1e-2, # Added to noise stddev for numerical stability - ) - - -def FeedForwardExpertParams(): - """Hyperparameters defining feed-forward expert networks. - - Returns: - a tf.contrib.training.HParams object - """ - return tf.contrib.training.HParams( - # The class that implements the expert network - expert_class=FeedForwardExpert, - input_size=None, # Size of input to MoE. Set by MoE class. - # List of hidden layer sizes, or None for no hidden layers. - # The length of this list determines the number of hidden layers - hidden_layer_sizes=None, - output_size=None, # Size of output from MoE. Set by MoE class. - dtype=tf.float32, # Floating point data type) - # Activation function applied at each hidden layer) - hidden_activation=tf.nn.relu, - initializer=None, # Optional initializer for weight matrices.) - # If autoscale=True, At each hidden/output layer, multiply by - # rsqrt(prev_layer_size / input_size). This scaling happens - # before application of hidden_activation) - autoscale=True,) - - -def _SetInputOutputSizes(hp, input_size, output_size): - """Fill in the input_size and output_size hyperparameters. - - This is used by LocalMixtureOfExperts and DistributedMixtureOfExperts to - fill in the input_size and output_size on the gating parameters and expert - parameters so that the user does not have to set them in multiple places. - - Args: - hp: a hyperparameters - input_size: an integer - output_size: an integer - """ - if hp.input_size is None: - hp.input_size = input_size - else: - assert hp.input_size == input_size - if output_size is not None: - if hp.output_size is None: - hp.output_size = output_size - else: - assert hp.output_size == output_size - - -class FeedForwardExpert(object): - """An object representing a feed forward network (used as an expert). - """ - - def __init__(self, hp, name): - """Creates a FeedForwardExpert. - - Args: - hp: hyperparameters. Call FeedForwardExpertParams() to create these. - name: a string. - """ - self._hp = hp - hidden_layer_sizes = hp.hidden_layer_sizes or [] - num_layers = 1 + len(hidden_layer_sizes) - layer_sizes = [hp.input_size] + hidden_layer_sizes + [hp.output_size] - self._layer_sizes = layer_sizes - self._w = [] - for layer in range(num_layers): - shape = layer_sizes[layer:layer + 2] - self._w.append( - tf.get_variable('%s_layer_%d' % (name, layer), shape, hp.dtype, - hp.initializer)) - - def Eval(self, x): - """Evaluate the FeedForwardExpert on the given input. - - Args: - x: a `Tensor` of shape `[batch_size, hp.input_size]` - - Returns: - a `Tensor` of shape `[batch_size, hp.output_size]` - """ - hp = self._hp - num_layers = len(self._w) - for i in xrange(num_layers): - x = tf.matmul(x, self._w[i]) - if hp.autoscale and self._layer_sizes[i] != hp.input_size: - x *= (self._layer_sizes[i] / hp.input_size)**-0.5 - if i + 1 < num_layers and hp.hidden_activation: - x = hp.hidden_activation(x) - return x - - @property - def vars(self): - return self._w - - @function.Defun( python_grad_func=lambda x, dy: tf.convert_to_tensor(dy), shape_func=lambda op: [op.inputs[0].get_shape()]) -def ConvertGradientToTensor(x): +def convert_gradient_to_tensor(x): """Identity operation whose gradient is converted to a `Tensor`. Currently, the gradient to `tf.concat` is particularly expensive to @@ -159,7 +47,7 @@ def ConvertGradientToTensor(x): the output of the `tf.concat` is eventually passed to `tf.gather`. It is sometimes faster to convert the gradient to a `Tensor`, so as to get the cheaper gradient for `tf.concat`. To do this, replace - `tf.concat(x)` with `ConvertGradientToTensor(tf.concat(x))`. + `tf.concat(x)` with `convert_gradient_to_tensor(tf.concat(x))`. Args: x: A `Tensor`. @@ -196,7 +84,7 @@ def __init__(self, """Create a Parallelism. Args: - device_names_or_functions: A list of of length n, containing device names + device_names_or_functions: A list of length n, containing device names or device functions (see `tf.device`) reuse: True or None. Whether to reuse variables created in the first replica in the subsequent replicas. @@ -212,7 +100,7 @@ def __init__(self, self._devices = device_names_or_functions self._n = len(device_names_or_functions) self._reuse = reuse - self._caching_devices = self._MaybeRepeat(caching_devices) + self._caching_devices = self._maybe_repeat(caching_devices) self._daisy_chain_variables = daisy_chain_variables def __call__(self, fn, *args, **kwargs): @@ -231,24 +119,25 @@ def __call__(self, fn, *args, **kwargs): """ # Construct lists or args and kwargs for each function. if args: - my_args = TransposeListOfLists([self._MaybeRepeat(arg) for arg in args]) + my_args = transpose_list_of_lists( + [self._maybe_repeat(arg) for arg in args]) else: my_args = [[] for _ in xrange(self.n)] my_kwargs = [{} for _ in xrange(self.n)] for k, v in six.iteritems(kwargs): - vals = self._MaybeRepeat(v) + vals = self._maybe_repeat(v) for i in xrange(self.n): my_kwargs[i][k] = vals[i] # Construct lists of functions. - fns = self._MaybeRepeat(fn) + fns = self._maybe_repeat(fn) # Now make the parallel call. outputs = [] cache = {} for i in xrange(self.n): - def DaisyChainGetter(getter, name, *args, **kwargs): + def daisy_chain_getter(getter, name, *args, **kwargs): """Get a variable and cache in a daisy chain.""" device_var_key = (self._devices[i], name) if device_var_key in cache: @@ -268,7 +157,7 @@ def DaisyChainGetter(getter, name, *args, **kwargs): # Variable scope will not reset caching_device on reused variables, # so we make a custom getter that uses identity to cache the variable. # pylint: disable=cell-var-from-loop - def CachingGetter(getter, name, *args, **kwargs): + def caching_getter(getter, name, *args, **kwargs): v = getter(name, *args, **kwargs) key = (self._caching_devices[i], name) if key in cache: @@ -279,15 +168,15 @@ def CachingGetter(getter, name, *args, **kwargs): return ret if self._daisy_chain_variables: - custom_getter = DaisyChainGetter - elif self._caching_devices: - custom_getter = CachingGetter + custom_getter = daisy_chain_getter + elif self._caching_devices[i]: + custom_getter = caching_getter else: custom_getter = None # pylint: enable=cell-var-from-loop - with tf.name_scope('parallel_%d' % i): + with tf.name_scope("parallel_%d" % i): with tf.variable_scope( - tf.get_variable_scope(), + tf.get_variable_scope() if self._reuse else "parallel_%d" % i, reuse=True if i > 0 and self._reuse else None, caching_device=self._caching_devices[i], custom_getter=custom_getter): @@ -306,7 +195,7 @@ def n(self): def devices(self): return self._devices - def _MaybeRepeat(self, x): + def _maybe_repeat(self, x): """Utility function for processing arguments that are singletons or lists. Args: @@ -322,25 +211,7 @@ def _MaybeRepeat(self, x): return [x] * self.n -def Parallel(device_names_or_functions, fn, *args): - """Deprecated interface. - - Use `Parallelism(device_names_or_functions)(fn, *args)` instead. - - Args: - device_names_or_functions: A list of length n. - fn: a function or a list of n functions. - *args: additional args. Each arg should either be not a list, or a list - of length n. - - Returns: - either a single list of length n (if fn does not return a tuple), or a - tuple of lists of length n (if fn returns a tuple). - """ - return Parallelism(device_names_or_functions)(fn, *args) - - -def _RowwiseUnsortedSegmentSum(values, indices, n): +def _rowwise_unsorted_segment_sum(values, indices, n): """UnsortedSegmentSum on each row. Args: @@ -357,7 +228,7 @@ def _RowwiseUnsortedSegmentSum(values, indices, n): return tf.reshape(ret_flat, [batch, n]) -def _NormalDistributionCDF(x, stddev): +def _normal_distribution_cdf(x, stddev): """Evaluates the CDF of the normal distribution. Normal distribution with mean 0 and standard deviation stddev, @@ -376,7 +247,8 @@ def _NormalDistributionCDF(x, stddev): return 0.5 * (1.0 + tf.erf(x / (math.sqrt(2) * stddev + 1e-20))) -def _ProbInTopK(clean_values, noisy_values, noise_stddev, noisy_top_values, k): +def _prob_in_top_k( + clean_values, noisy_values, noise_stddev, noisy_top_values, k): """Helper function to NoisyTopKGating. Computes the probability that value is in top k, given different random noise. @@ -393,7 +265,7 @@ def _ProbInTopK(clean_values, noisy_values, noise_stddev, noisy_top_values, k): normally distributed noise with standard deviation noise_stddev. noise_stddev: a `Tensor` of shape [batch, n], or None noisy_top_values: a `Tensor` of shape [batch, m]. - 'values' Output of tf.top_k(noisy_top_values, m). m >= k+1 + "values" Output of tf.top_k(noisy_top_values, m). m >= k+1 k: an integer. Returns: @@ -415,15 +287,15 @@ def _ProbInTopK(clean_values, noisy_values, noise_stddev, noisy_top_values, k): threshold_if_out = tf.expand_dims( tf.gather(top_values_flat, threshold_positions_if_out), 1) # is each value currently in the top k. - prob_if_in = _NormalDistributionCDF(clean_values - threshold_if_in, - noise_stddev) - prob_if_out = _NormalDistributionCDF(clean_values - threshold_if_out, - noise_stddev) + prob_if_in = _normal_distribution_cdf(clean_values - threshold_if_in, + noise_stddev) + prob_if_out = _normal_distribution_cdf(clean_values - threshold_if_out, + noise_stddev) prob = tf.where(is_in, prob_if_in, prob_if_out) return prob -def CVSquared(x): +def cv_squared(x): """The squared coefficient of variation of a sample. Useful as a loss to encourage a positive distribution to be more uniform. @@ -443,33 +315,7 @@ def CVSquared(x): return variance / (tf.square(mean) + epsilon) -def MaxOverload(load): - """The load of the hardest-hit device relative to average. - - This is useful for monitoring the performance of MoEs. - - The load of an expert is the number of examples assigned to that expert. - The load of a device is the sum of the loads of all experts on that device. - - The input to this function is generally the 'load' output of - DistributedMixtureOfExperts.Eval(), which is either a 1d or 2d `Tensor` of - per-expert loads. In either case, the fist dimension corresponds to devices. - - This function sums over all dimensions other than dimension zero, then - computes the ratio of the maxmium value to the mean value. - - Args: - load: a 1d or 2d `Tensor`. - - Returns: - a `Scalar`. - """ - per_device_load = tf.reduce_sum(tf.reshape(load, [tf.shape(load)[0], -1]), 1) - return (tf.reduce_max(per_device_load) / - (tf.reduce_mean(per_device_load) + 1e-10)) - - -def _GatesToLoad(gates): +def _gates_to_load(gates): """Compute the true load per expert, given the gates. The load is the number of examples for which the corresponding gate is >0. @@ -482,11 +328,16 @@ def _GatesToLoad(gates): return tf.reduce_sum(tf.to_float(gates > 0), 0) -def _MyTopK(x, k): +def _my_top_k(x, k): """GPU-compatible version of top-k that works for very small constant k. Calls argmax repeatedly. + tf.nn.top_k is implemented for GPU, but the gradient, sparse_to_dense, + seems not to be, so if we use tf.nn.top_k, then both the top_k and its + gradient go on cpu. Once this is not an issue, this function becomes + obselete and should be replaced by tf.nn.top_k. + Args: x: a 2d Tensor. k: a small integer. @@ -509,374 +360,72 @@ def _MyTopK(x, k): return tf.stack(values, axis=1), tf.to_int32(tf.stack(indices, axis=1)) -class NoisyTopKGating(object): - """Noisy top-k gating network. +def noisy_top_k_gating(x, + input_size, + num_experts, + train, + k=2, + initializer=tf.zeros_initializer(), + noisy_gating=True, + noise_epsilon=1e-2, + name=None): + """Noisy top-k gating. See paper: https://arxiv.org/abs/1701.06538. - """ - - def __init__(self, hp, name): - """Create a NoisyTopKGating network. - - Args: - hp: a hyperparameters created by NoisyTopKGatingParams() - name: a string - """ - self._vars = [] - self._hp = hp - self._w_gate = tf.get_variable('%s_gate' % name, - [hp.input_size, - hp.num_experts], hp.dtype, hp.initializer) - self._vars.append(self._w_gate) - if hp.noisy_gating: - self._w_noise = tf.get_variable('%s_noise' % name, - [hp.input_size, hp.num_experts], hp.dtype, - hp.initializer) - self._vars.append(self._w_noise) - - def Eval(self, x, train=True, summaries=False): - """Compute noisy top-k gating. - - Args: - x: a `Tensor` of shape `[batch_size, input_size]`. - train: a boolean `Scalar`. Setting this to false turns off noise. - summaries: a boolean. Whether to add summaries. - Returns: - gates: a `Tensor` of shape `[batch_size, n]` - load: a `Tensor` of shape `[n]`. - If we are using noise, this is a smooth approximation of the load, - and you can define a loss in terms of it to help with load-balancing. - """ - with tf.variable_scope('NoisyTopKGating'): - hp = self._hp - clean_logits = tf.matmul(x, self._w_gate) - if hp.noisy_gating: - raw_noise_stddev = tf.matmul(x, self._w_noise) - noise_stddev = ((tf.nn.softplus(raw_noise_stddev) + hp.noise_epsilon) * - (tf.to_float(train))) - noisy_logits = clean_logits + ( - tf.random_normal(tf.shape(clean_logits)) * noise_stddev) - logits = noisy_logits - if summaries: - tf.summary.histogram('noisy_logits', noisy_logits) - tf.summary.histogram('noise_stddev', noise_stddev) - else: - logits = clean_logits - top_logits, top_indices = _MyTopK(logits, min(hp.k + 1, hp.num_experts)) - top_k_logits = tf.slice(top_logits, [0, 0], [-1, hp.k]) - top_k_indices = tf.slice(top_indices, [0, 0], [-1, hp.k]) - top_k_gates = tf.nn.softmax(top_k_logits) - # This will be a `Tensor` of shape `[batch_size, n]`, with zeros in the - # positions corresponding to all but the top k experts per example. - gates = _RowwiseUnsortedSegmentSum(top_k_gates, top_k_indices, - hp.num_experts) - if hp.noisy_gating and hp.k < hp.num_experts: - load = tf.reduce_sum( - _ProbInTopK(clean_logits, noisy_logits, noise_stddev, top_logits, - hp.k), 0) - else: - load = _GatesToLoad(gates) - if summaries: - tf.summary.histogram('importance', tf.reduce_sum(gates, 0)) - tf.summary.histogram('load', load) - return gates, load - - @property - def vars(self): - return self._vars - - -class LocalMixtureOfExperts(object): - """A MoE on a single device. - """ - - def __init__(self, gating_hp, expert_hp, input_size, output_size, name): - """Create a LocalMixtureOfExperts. - - Args: - gating_hp: hyperparameters for the gating network. - e.g. NoisyTopKGatingParams() - expert_hp: hyperparameters for the expert networks. - e.g. FeedForwardExpertParams() - input_size: an integer. - output_size: an integer. - name: a string. - """ - self._name = name - _SetInputOutputSizes(gating_hp, input_size, None) - _SetInputOutputSizes(expert_hp, input_size, output_size) - self._gating_hp = gating_hp - self._gating = gating_hp.gating_class(gating_hp, name + '_gating') - self._expert_hp = expert_hp - self._experts = [ - expert_hp.expert_class(expert_hp, name + '_%d' % i) - for i in xrange(gating_hp.num_experts) - ] - - def Eval(self, - x, - train=True, - per_example_multiplier=None, - summaries=False, - identifiers=None): - """Evaluate mixture of experts. - - We provide a convenient debugging tool for determining the set of examples - that we passed to each expert. The caller may provide a `Tensor` of - "identifiers", of any type whose first dimension matches the number of - input examples. The function will then return a list - "expert_to_identifiers", with one `Tensor` for each expert containing the - identifiers for all examples assigned to that expert. A parallel list of - `Tensor`s, "expert_to_gates", is also returned, containing the - corresponding gate values. - - Args: - x: a `Tensor` of shape `[batch_size, input_size]` - train: a boolean Scalar. Are we in training mode? - per_example_multiplier: an optional `Tensor` of shape `[batch_size]` which - gets multiplied into the gate values. If this LocalMixtureOfExperts - represents one secondary MoE in a hierarchical MoE, then we pass in - in the gate values from the primary gating function here. This causes - the computed values (`y`, `importance` and `expert_to_gates`) to also - reflect the primary gate values. - summaries: an boolean. Enable summaries. - identifiers: an optional `Tensor` whose first dimension is equal to - batch_size. - - Returns: - y: a `Tensor` of shape `[batch_size, output_size]`. Output of the MoE. - importance: a `Tensor` of shape `[n]`. Batchwise sum of gates. - load: a `Tensor` of shape `[n]`. Smooth estimator of the number of - examples passed to each expert. This is useful for load-balancing, - as any gradient on this `Tensor` will back-propagate to the gating - network. - expert_to_identifiers: if `identifiers` was passed in, a list of - length `num_experts`. Each element is a `Tensor` whose shape matches - that of `identifiers` in all but the first dimension. Contains the - slices of `identifiers` corresponding to the batch elements that were - dispatched to that expert. - expert_to_gates: A list of length `num_experts`. Each element contains - a 1-dimensional tensor - """ - gating_hp = self._gating_hp - gates, load = self._gating.Eval(x, train, summaries) - if per_example_multiplier is not None: - gates *= tf.expand_dims(per_example_multiplier, 1) - dispatcher = SparseDispatcher(gating_hp.num_experts, gates) - expert_input = dispatcher.Dispatch(x) - expert_output = [ - self._experts[i].Eval(expert_input[i]) - for i in xrange(gating_hp.num_experts) - ] - y = dispatcher.Combine(expert_output) - if identifiers is not None: - expert_to_identifiers = dispatcher.Dispatch(identifiers) - else: - expert_to_identifiers = None - return (y, tf.reduce_sum(gates, 0), load, expert_to_identifiers, - dispatcher.ExpertToGates()) - - @property - def vars(self): - ret = [] - for x in self._experts: - ret.extend(x.vars) - ret.extend(self._gating.vars) - return ret - - -class DistributedMixtureOfExperts(object): - """Distributed (optionally Hierarchical) Mixture of Experts. - - This class implements the scheme described in our paper. - See link at the top of this file. - - The model is trained synchronously using one large TF graph using - multiple devices. - The conventional (non-MoE) layers use data-parallelism, with each device - processing a subset of the training batch. We call these datashards. + Args: + x: input Tensor with shape [batch_size, input_size] + input_size: an integer + num_experts: an integer + train: a boolean - we only add noise at training time. + k: an integer - number of experts per example + initializer: an initializer + noisy_gating: a boolean + noise_epsilon: a float + name: an optional string - The MoE layer (this object) uses model parallelism. Each expert is assigned - to a particular device, which hosts the expert parameters and performs the - expert computation for all examples assigned to that expert. In the case - of a hierarchical MoE, each second-level MoE is assigned to a device. + Returns: + gates: a Tensor with shape [batch_size, num_experts] + load: a Tensor with shape [num_experts] """ - - def __init__(self, primary_gating_hp, secondary_gating_hp, expert_hp, - input_size, output_size, expert_devices, name): - """Create a DistributedMixtureOfExperts. - - If `secondary_gating_hp` is `None`, then this is a flat MoE with - `primary_gating_hp.num_experts` experts. Otherwise, this is a hierarchical - MoE with `primary_gating_hp.num_experts` groups of - `secondary_gating_hp.num_experts` experts. - - The assignemnt of experts (or groups of experts) to devices is by - round-robin. So to make equal use of all the devices, one should set - `primary_gating_hp.num_experts` to the number of devices or a multiple - thereof. - - Args: - primary_gating_hp: hyperparameters for the primary gating network. - e.g. NoisyTopKGatingParams(). - secondary_gating_hp: hyperparameters for the secondary gating network. - e.g. NoisyTopKGatingParams(). None indicates a flat MoE. - expert_hp: hyperparameters for the expert networks. - e.g. FeedForwardExpertParams() - input_size: an integer. - output_size: an integer. - expert_devices: a list of device strings. The devices to be used for - the experts. - name: a string. - """ - self._name = name - # fill in the missing values in the hyperparameters - _SetInputOutputSizes(primary_gating_hp, input_size, None) - _SetInputOutputSizes(expert_hp, input_size, output_size) - self._is_hierarchical = secondary_gating_hp is not None - self._primary_gating_hp = primary_gating_hp - self._primary_gating = primary_gating_hp.gating_class( - primary_gating_hp, name + '_primary_gating') - n1 = self._primary_gating_hp.num_experts - # round robin assignment of experts to devices. - expert_devices = [ - expert_devices[i % len(expert_devices)] for i in xrange(n1) - ] - self._expert_devices = expert_devices - self._all_vars = [] - self._all_vars.extend(self._primary_gating.vars) - if self._is_hierarchical: - # hierarchical MoE - self._secondary_moe = [] - for i in xrange(n1): - with tf.device(expert_devices[i]): - secondary_moe = LocalMixtureOfExperts(secondary_gating_hp, expert_hp, - input_size, output_size, - '%s_secondary_%d' % (name, i)) - self._secondary_moe.append(secondary_moe) - self._all_vars.extend(secondary_moe.vars) + with tf.variable_scope(name, default_name="noisy_top_k_gating"): + w_gate = tf.get_variable( + "w_gate", [input_size, num_experts], tf.float32, initializer) + if noisy_gating: + w_noise = tf.get_variable("w_noise", + [input_size, num_experts], tf.float32, + initializer) + clean_logits = tf.matmul(x, w_gate) + if noisy_gating: + raw_noise_stddev = tf.matmul(x, w_noise) + noise_stddev = ((tf.nn.softplus(raw_noise_stddev) + noise_epsilon) * + (tf.to_float(train))) + noisy_logits = clean_logits + ( + tf.random_normal(tf.shape(clean_logits)) * noise_stddev) + logits = noisy_logits + if not tf.get_variable_scope().reuse: + tf.summary.histogram("noisy_logits", noisy_logits) + tf.summary.histogram("noise_stddev", noise_stddev) else: - # flat MoE - self._experts = [] - for i in xrange(n1): - with tf.device(expert_devices[i]): - expert = expert_hp.expert_class(expert_hp, name + '_%d' % i) - self._experts.append(expert) - self._all_vars.extend(expert.vars) - - def Eval(self, - datashard_devices, - xs, - train=True, - summaries=False, - identifiers=None, - shadow_xs=None): - """Evaluate MoE on given inputs. - - This class is designed for the case where the rest of the model is using - data parallelism. We receive an array of input `Tensor`s, one per - datashard, and we produce a list of output Tensors, one per datashard. - - We provide a convenient debugging tool for determining the set of examples - that we passed to each expert. The caller may provide a `Tensor` of - "identifiers", of any type whose first dimension matches the number of - input examples. The function will then return a list - "expert_to_identifiers", with one `Tensor` for each expert containing the - identifiers for all examples assigned to that expert. A parallel list of - `Tensor`s, "expert_to_gates", is also returned, containing the - corresponding gate values. - - Args: - datashard_devices: a `list` of device strings of length `num_datashards`. - Which devices to use for the output tensors. - xs: A `list` of `Tensor`s of length `num_datashards`. Each has shape - `[batch_size[d], input_size]. - train: a boolean `Scalar`. When train=`True`, noise is added to the - gating function. - summaries: a boolean. Whether to write summaries. - identifiers: an optional list of tensors. - Each tensor has shape [<batch_size[datashard]>, extra_dims] - shadow_xs: Optional `list` of `Tensor`s of length `num_datashards`. Each - has shape `[batch_size[d], input_size]. Shadow_xs is useful if you want - to dispatch a transformed version of xs to the experts, but you want - untransformed xs for the gating network. - - Returns: - ys: the output (a list of one tensor per datashard). Each has shape - `[batch_size[d], output_size]. - importance: a `Tensor` of shape `[n]` for a flat MoE or `[n1, n2]` for a - hierarchical MoE. Batchwise sum of gates. - load: a `Tensor` of shape `[n]` for a flat MoE or `[n1, n2]` for a - hierarchical MoE. Smooth estimator of the number of - examples passed to each expert. This is useful for load-balancing, - as any gradient on this `Tensor` will back-propagate to the gating - network. - expert_to_identifiers: if `identifiers` was passed in, a list of - length `num_experts`. Each element is a `Tensor` whose shape matches - that of `identifiers` in all but the first dimension. Contains the - slices of `identifiers` corresponding to the batch elements that were - dispatched to that expert. - expert_to_gates: a list of one tensor per expert. - Each tensor has shape [<num_examples[expert]>] - - """ - n1 = self._primary_gating_hp.num_experts - epsilon = 1e-10 - assert len(datashard_devices) == len(xs) - num_datashards = len(xs) - expert_devices = self._expert_devices - has_identifiers = identifiers is not None - # pylint: disable=unbalanced-tuple-unpacking - primary_gates, primary_smooth_load = Parallel( - datashard_devices, self._primary_gating.Eval, xs, train, - [summaries] + [False] * (num_datashards - 1)) - primary_importance = tf.add_n( - Parallel(datashard_devices, tf.reduce_sum, primary_gates, 0)) - primary_smooth_load = tf.add_n(primary_smooth_load) - primary_true_load = tf.add_n( - Parallel(datashard_devices, _GatesToLoad, primary_gates)) - primary_dispatcher = DistributedSparseDispatcher( - datashard_devices, expert_devices, primary_gates) - - if shadow_xs is None: - secondary_input = primary_dispatcher.Dispatch(xs) + logits = clean_logits + top_logits, top_indices = _my_top_k(logits, min(k + 1, num_experts)) + top_k_logits = tf.slice(top_logits, [0, 0], [-1, k]) + top_k_indices = tf.slice(top_indices, [0, 0], [-1, k]) + top_k_gates = tf.nn.softmax(top_k_logits) + # This will be a `Tensor` of shape `[batch_size, n]`, with zeros in the + # positions corresponding to all but the top k experts per example. + gates = _rowwise_unsorted_segment_sum(top_k_gates, top_k_indices, + num_experts) + if noisy_gating and k < num_experts: + load = tf.reduce_sum( + _prob_in_top_k(clean_logits, noisy_logits, noise_stddev, top_logits, + k), 0) else: - secondary_input = primary_dispatcher.Dispatch(shadow_xs) - - primary_expert_to_identifiers = (primary_dispatcher.Dispatch(identifiers) - if has_identifiers else None) - primary_expert_to_gates = primary_dispatcher.ExpertToGates() - if not self._is_hierarchical: - # one-level distributed mixture of experts - secondary_output = Parallel(expert_devices, lambda a, b: a.Eval(b), - self._experts, secondary_input) - ys = primary_dispatcher.Combine(secondary_output) - return (ys, primary_importance, primary_smooth_load, - primary_expert_to_identifiers, primary_expert_to_gates) - # two-level hierarchical MoE - (secondary_output, secondary_importance, secondary_load, - secondary_expert_to_identifiers, secondary_expert_to_gates) = (Parallel( - expert_devices, [m.Eval for m in self._secondary_moe], secondary_input, - train, primary_expert_to_gates, [summaries] + [False] * (n1 - 1), - primary_expert_to_identifiers)) - # pylint: enable=unbalanced-tuple-unpacking - ys = primary_dispatcher.Combine(secondary_output, multiply_by_gates=False) - importance = tf.stack(secondary_importance) - load = tf.stack(secondary_load) * tf.expand_dims(primary_smooth_load / ( - primary_true_load + epsilon), 1) - expert_to_identifiers = [] - if identifiers is not None: - for el in secondary_expert_to_identifiers: - expert_to_identifiers.extend(el) - expert_to_gates = [] - for el in secondary_expert_to_gates: - expert_to_gates.extend(el) - return (ys, importance, load, expert_to_identifiers, expert_to_gates) - - @property - def vars(self): - return self._all_vars + load = _gates_to_load(gates) + if not tf.get_variable_scope().reuse: + tf.summary.histogram("importance", tf.reduce_sum(gates, 0)) + tf.summary.histogram("load", load) + return gates, load class SparseDispatcher(object): @@ -889,9 +438,9 @@ class SparseDispatcher(object): experts: a list of length `num_experts` containing sub-networks. dispatcher = SparseDispatcher(num_experts, gates) - expert_inputs = dispatcher.Dispatch(inputs) + expert_inputs = dispatcher.dispatch(inputs) expert_outputs = [experts[i](expert_inputs[i]) for i in range(num_experts)] - outputs = dispatcher.Combine(expert_outputs) + outputs = dispatcher.combine(expert_outputs) The preceding code sets the output for a particular example b to: output[b] = Sum_i(gates[b, i] * experts[i](inputs[b])) @@ -920,14 +469,14 @@ def __init__(self, num_experts, gates): tf.reshape(self._gates, [-1]), self._batch_index * num_experts + self._expert_index) - def Dispatch(self, inp): + def dispatch(self, inp): """Create one input Tensor for each expert. The `Tensor` for a expert `i` contains the slices of `inp` corresponding to the batch elements `b` where `gates[b, i] > 0`. Args: - inp: a `Tensor` of shape '[batch_size, <extra_input_dims>]` + inp: a `Tensor` of shape "[batch_size, <extra_input_dims>]` Returns: a list of `num_experts` `Tensor`s with shapes `[expert_batch_size_i, <extra_input_dims>]`. @@ -935,7 +484,7 @@ def Dispatch(self, inp): inp = tf.gather(inp, self._batch_index) return tf.split(inp, self._part_sizes_tensor, 0) - def Combine(self, expert_out, multiply_by_gates=True): + def combine(self, expert_out, multiply_by_gates=True): """Sum together the expert output, weighted by the gates. The slice corresponding to a particular batch element `b` is computed @@ -951,15 +500,15 @@ def Combine(self, expert_out, multiply_by_gates=True): Returns: a `Tensor` with shape `[batch_size, <extra_output_dims>]`. """ - # see comments on ConvertGradientToTensor - stitched = ConvertGradientToTensor(tf.concat(expert_out, 0)) + # see comments on convert_gradient_to_tensor + stitched = convert_gradient_to_tensor(tf.concat(expert_out, 0)) if multiply_by_gates: stitched *= tf.expand_dims(self._nonzero_gates, 1) combined = tf.unsorted_segment_sum(stitched, self._batch_index, tf.shape(self._gates)[0]) return combined - def ExpertToGates(self): + def expert_to_gates(self): """Gate values corresponding to the examples in the per-expert `Tensor`s. Returns: @@ -985,28 +534,25 @@ class DistributedSparseDispatcher(object): `Tensor`s are created on those devices. There is no single-device bottleneck. """ - def __init__(self, datashard_devices, expert_devices, gates): + def __init__(self, data_parallelism, expert_parallelism, gates): """Create a DistributedSparseDispatcher. Args: - datashard_devices: a list of num_datashards device strings. - expert_devices: a list of num_experts device strings. - gates: a list of num_datashards `Tensor`s of shapes + data_parallelism: a Parallelism object. + expert_parallelism: a Parallelism object. + gates: a list of datashard_parallelism.n `Tensor`s of shapes `[batch_size[d], num_experts]`. Returns: a DistributedSparseDispatcher """ self._gates = gates - self._num_experts = len(expert_devices) - assert len(gates) == len(datashard_devices) - self._num_datashards = len(gates) - self._datashard_devices = datashard_devices - self._expert_devices = expert_devices - self._dispatchers = Parallel(self._datashard_devices, SparseDispatcher, - self._num_experts, gates) - - def Dispatch(self, inp): + self._dp = data_parallelism + self._ep = expert_parallelism + assert len(gates) == self._dp.n + self._dispatchers = self._dp(SparseDispatcher, self._ep.n, gates) + + def dispatch(self, inp): """Create one input Tensor for each expert. Args: @@ -1016,16 +562,14 @@ def Dispatch(self, inp): a list of `num_experts` `Tensor`s with shapes `[num_examples[i], <extra_input_dims>]`. """ - dispatched = Parallel(self._datashard_devices, lambda a, b: a.Dispatch(b), - self._dispatchers, inp) - ret = Parallel(self._expert_devices, tf.concat, - TransposeListOfLists(dispatched), 0) + dispatched = self._dp(lambda a, b: a.dispatch(b), self._dispatchers, inp) + ret = self._ep(tf.concat, transpose_list_of_lists(dispatched), 0) if ret[0].dtype == tf.float32: - # see comments on ConvertGradientToTensor - ret = Parallel(self._expert_devices, ConvertGradientToTensor, ret) + # see comments on convert_gradient_to_tensor + ret = self._ep(convert_gradient_to_tensor, ret) return ret - def Combine(self, expert_out, multiply_by_gates=True): + def combine(self, expert_out, multiply_by_gates=True): """Sum together the expert output, multiplied by the corresponding gates. Args: @@ -1038,40 +582,31 @@ def Combine(self, expert_out, multiply_by_gates=True): `[batch_size[d], <extra_output_dims>]`. """ expert_part_sizes = tf.unstack( - tf.stack([ - self._dispatchers[d].part_sizes - for d in xrange(self._num_datashards) - ]), - num=self._num_experts, + tf.stack([d.part_sizes for d in self._dispatchers]), + num=self._ep.n, axis=1) # list of lists of shape [num_experts][num_datashards] - expert_output_parts = Parallel(self._expert_devices, tf.split, expert_out, - expert_part_sizes) - expert_output_parts_t = TransposeListOfLists(expert_output_parts) - ret = [] - for d in xrange(self._num_datashards): - with tf.device(self._datashard_devices[d]): - ret.append(self._dispatchers[d].Combine( - # see comments on ConvertGradientToTensor - ConvertGradientToTensor(tf.concat(expert_output_parts_t[d], 0)), - multiply_by_gates=multiply_by_gates)) - return ret - - def ExpertToGates(self): + expert_output_parts = self._ep(tf.split, expert_out, expert_part_sizes) + expert_output_parts_t = transpose_list_of_lists(expert_output_parts) + def my_combine(dispatcher, parts): + return dispatcher.combine( + convert_gradient_to_tensor(tf.concat(parts, 0)), + multiply_by_gates=multiply_by_gates) + return self._dp(my_combine, self._dispatchers, expert_output_parts_t) + + def expert_to_gates(self): """Gate values corresponding to the examples in the per-expert `Tensor`s. Returns: a list of `num_experts` one-dimensional `Tensor`s of type `tf.float32`. """ - return Parallel(self._expert_devices, tf.concat, - TransposeListOfLists( - Parallel(self._datashard_devices, [ - self._dispatchers[d].ExpertToGates - for d in xrange(self._num_datashards) - ])), 0) + return self._ep( + tf.concat, + transpose_list_of_lists( + self._dp(lambda d: d.expert_to_gates(), self._dispatchers)), 0) -def TransposeListOfLists(lol): +def transpose_list_of_lists(lol): """Transpose a list of equally-sized python lists. Args: @@ -1079,205 +614,110 @@ def TransposeListOfLists(lol): Returns: a list of lists """ - assert lol, 'cannot pass the empty list' + assert lol, "cannot pass the empty list" return [list(x) for x in zip(*lol)] -class DistributedSingleDispatcher(object): - """Dispatches to experts according to gates. - - Each example goes to one expert. - - Unlike SparseDispatcher, the gates are one-dimensional `Tensor`s of integer - expert ids. There are no weights. - """ +def ffn_expert_fn(input_size, + hidden_sizes, + output_size, + hidden_activation=tf.nn.relu): + """Returns a function that creates a feed-forward network. - def __init__(self, data_parallelism, model_parallelism, gates): - """Constructs a Dispatcher. - - Args: - data_parallelism: a Parallelism object. - model_parallelism: a Parallelism object. - gates: a list of 1d integer `Tensor`s, one per datashard. - Says which expert to use for each batch element. - - Returns: - a DistributedSingleDispatcher - """ - gates = data_parallelism(tf.to_int32, gates) - self._gates = gates - self._data_parallelism = data_parallelism - self._model_parallelism = model_parallelism - - # Compute the sizes number of examples going from each datashard to each - # expert. - def _PartSizes(gates): - return tf.unsorted_segment_sum( - tf.ones_like(gates), gates, model_parallelism.n) - - part_sizes_by_datashard = data_parallelism(_PartSizes, gates) - self._part_sizes_by_expert = tf.unstack( - tf.stack(part_sizes_by_datashard), num=model_parallelism.n, axis=1) - - # These indices will be used to combine the output on the datashards. - def _StitchIndices(gates): - return tf.dynamic_partition( - tf.range(tf.size(gates)), gates, model_parallelism.n) - - self._stitch_indices = data_parallelism(_StitchIndices, gates) - - def Dispatch(self, d_tensors): - """Reshuffles input `Tensor`s to produce output `Tensor`s. - - The dimensions of all input and output `Tensor`s match, except for - dimension 0. In dimension 0, the input `Tensor`s match the corresponding - `gates` `Tensor`s which were passed to the constructor. - - Args: - d_tensors: a list of `Tensor`s, one per datashard. - - Returns: - a list of `Tensor`s, one per expert. - - """ - parts = self._data_parallelism(tf.dynamic_partition, d_tensors, self._gates, - self._model_parallelism.n) - parts_by_expert = TransposeListOfLists(parts) - x_tensors = self._model_parallelism(tf.concat, parts_by_expert, 0) - return x_tensors - - def Combine(self, x_tensors): - """Reshuffles per-expert `Tensor`s to produce per-datashard `Tensor`s. - - Dispatch must have been called at least once first. - - The dimensions of all input and output `Tensor`s match, except for - dimension 0. In dimension 0, the input `Tensor`s match the corresponding - outputs of `Dispatch`, and the output `Tensor`s match the corresponding - `gates` `Tensor`s which were passed to the constructor. - - Args: - x_tensors: a list of `Tensor`s, one per expert. - - Returns: - a list of `Tensor`s, one per datashard. - """ - parts = self._model_parallelism(tf.split, x_tensors, - self._part_sizes_by_expert) - d_tensors = self._data_parallelism(tf.dynamic_stitch, self._stitch_indices, - TransposeListOfLists(parts)) - return d_tensors - - -def ParallelEmbeddingLookup(params, ids, data_parallelism): - """Mod-sharded embedding lookup with multiple datashards. - - TODO(noam): does this work when vocab_size is not a multiple of `num_shards`? + Use this function to create the expert_fn argument to distributed_moe. Args: - params: A list of `num_shards` `Tensors`, each with shapes - `[vocab_size / num_params, depth]`. - ids: A list of `num_datashards` one-dimensional ineger `Tensors`, - with shapes `[batch_size[i]]` - data_parallelism: A Parallelism object. + input_size: an integer + hidden_sizes: a list of integers + output_size: an integer + hidden_activation: a unary function. Returns: - a list of `num_datashards` `Tensors`, each with shape - `[batch_size[i], depth]`. + a unary function """ - param_devices = [x.device for x in params] - model_parallelism = Parallelism(param_devices) - num_shards = len(param_devices) - # pylint: disable=unbalanced-tuple-unpacking - ids, unique_idx = data_parallelism(tf.unique, ids) - # pylint: enable=unbalanced-tuple-unpacking - gates = data_parallelism(tf.mod, ids, num_shards) - ids_div = data_parallelism(tf.div, ids, num_shards) - dispatcher = DistributedSingleDispatcher(data_parallelism, model_parallelism, - gates) - x_ids_div = dispatcher.Dispatch(ids_div) - params = model_parallelism(ConvertGradientToTensor, params) - x_emb = model_parallelism(tf.gather, params, x_ids_div) - r_emb = dispatcher.Combine(x_emb) - r_emb = data_parallelism(tf.gather, r_emb, unique_idx) - return r_emb - - -def SampledSoftmaxLoss(features, sampler, num_classes, target_classes, - target_params, sampled_classes, sampled_params): - """Loss for training softmax classifiers on large label vocabulary. - - This function assumes that we have already chosen the sampled classes and - fetched the parameters for the target classes and the sampled classes. + def my_fn(x): + layer_sizes = [input_size] + hidden_sizes + [output_size] + for i in xrange(1 + len(hidden_sizes)): + w = tf.get_variable("w_%d" % i, layer_sizes[i:i+2], tf.float32) + x = tf.matmul(x, w) + if i < len(hidden_sizes): + x = hidden_activation(x) + if layer_sizes[i] != input_size: + x *= (layer_sizes[i] / float(input_size))**-0.5 + return x + return my_fn - Args: - features: a Tensor with shape [batch_size, hidden_size] - sampler: a candidate sampler object - num_classes: an integer - target_classes: an integer Tensor with shape [batch_size] - target_params: a Tensor with shape [batch_size, hidden_size] - The parameters corresponding to the target classes. - sampled_classes: an integer tensor with shape [num_sampled_classes] - sampled_params: a Tensor with shape [num_sampled_classes, hidden_size] - The parameters corresponding to the sampled classes. - Returns: - a Tensor with shape [batch_size] - """ - sampled_logits = (tf.matmul(features, sampled_params, transpose_b=True) - - sampler.log_expected_count(sampled_classes)) - target_logits = (tf.reduce_sum(target_params * features, 1) - - sampler.log_expected_count(target_classes)) - sampled_log_denominator = tf.reduce_logsumexp( - sampled_logits, [1], name='SampledLogDenominator') - sampled_classes_mask = tf.unsorted_segment_sum( - tf.fill(tf.shape(sampled_classes), float('-inf')), sampled_classes, - num_classes) - target_log_denominator = ( - target_logits + tf.gather(sampled_classes_mask, target_classes)) - combined_log_denominator = tf.reduce_logsumexp( - tf.stack([sampled_log_denominator, target_log_denominator]), [0]) - loss = combined_log_denominator - target_logits - return loss - - -def ParallelSampledSoftmaxLoss(params, - features, - target_classes, - sampler, - num_classes, - data_parallelism, - target_weights=None): - """Computes sampled softmax loss across many datashards. - - This is used during training to efficiently train a softmax classifier layer. +def reshape_like(a, b): + """Reshapes a to match the shape of b in all but the last dimension.""" + ret = tf.reshape(a, tf.concat([tf.shape(b)[:-1], tf.shape(a)[-1:]], 0)) + ret.set_shape(b.get_shape().as_list()[:-1] + a.get_shape().as_list()[-1:]) + return ret + + +def distributed_moe(data_parallelism, + expert_devices, + xs, + train, + input_size, + expert_fn, + num_experts, + k=2, + loss_coef=1e-2, + name=None): + """Call a distributed mixture of experts. Args: - params: A list of num_param_shards Tensors, each with shape - [num_classes / num_param_shards, num_features]. - The parameters are assumed to be mod-sharded by class. - features: a list of num_datashards Tensors, each with shape - [batch_size_i, num_features] - target_classes: A list of num_datashards integer Tensors each with shape - [batch_size_i] - sampler: a candidate sampler object - num_classes: an Integer - data_parallelism: a Parallelism object - target_weights: an optional list of num_datashards Tensors each with - shape [batch_size_i] + data_parallelism: a expert_utils.Parallelism object. + expert_devices: a list of strings. We round-robin the experts across these + devices. + xs: a list of input tensors, each with shape [... , input_size] + train: a boolean scalar. + input_size: an integer (input size for this layer) + expert_fn: a unary function for each expert to run + It should take a Tensor with shape [batch_size, input_size] + and return a Tensor with shape [batch_size, output_size] + num_experts: an integer - number of experts + k: an integer - how many experts to use for each batch element + loss_coef: a scalar - multiplier on load-balancing losses + name: a string + Returns: - a Scalar. + ys: a list of tensors. Each Tensor has the same shape as the corresponding + Tensor in xs, except for the last dimension, which is output_size. + extra_training_loss: a scalar. This should be added into the overall + training loss of the model. The backpropagation of this loss + encourages all experts to be approximately equally used across a batch. """ - sampled_classes = data_parallelism(sampler.sample) - sampled_params = ParallelEmbeddingLookup(params, sampled_classes, - data_parallelism) - target_params = ParallelEmbeddingLookup(params, target_classes, - data_parallelism) - ret = data_parallelism(SampledSoftmaxLoss, features, sampler, num_classes, - target_classes, target_params, sampled_classes, - sampled_params) - if target_weights is not None: - ret = data_parallelism(tf.multiply, ret, target_weights) - ret = data_parallelism(tf.reduce_sum, ret) - ret = tf.add_n(ret) - return ret + dp = data_parallelism + # create a parallelism object for running the experts. + # We use the default of reuse=False. Otherwise, the experts would all + # use the same variables. + ep = Parallelism( + [expert_devices[i % len(expert_devices)] for i in xrange(num_experts)]) + # Experts expect 2d input tensors, so flatten the batch dimension and all + # spatial dimensions together. + xs_flat = dp(tf.reshape, xs, [[-1, input_size]] * dp.n) + with tf.variable_scope(name, default_name="moe"): + # The gates indicate which batch elements go to which tensors. + # load is a measure of approximately how many examples go to each expert + gates, load = dp(noisy_top_k_gating, + xs_flat, + input_size, + num_experts, + train, + k, + initializer=tf.zeros_initializer(), + noisy_gating=True, + noise_epsilon=1e-2) + # This magic object helps us shuffle data between datashards and experts. + dispatcher = DistributedSparseDispatcher(dp, ep, gates) + expert_in = dispatcher.dispatch(xs_flat) + expert_out = ep(expert_fn, expert_in) + ys_flat = dispatcher.combine(expert_out) + ys = dp(reshape_like, ys_flat, xs) + # compute some load-balancing losses. + load = tf.add_n(load) + importance = tf.add_n(dp(tf.reduce_sum, gates, 0)) + loss = loss_coef * (cv_squared(importance) + cv_squared(load)) + return ys, loss From 35416daf4af61361113b51218c4960f25f38bfb7 Mon Sep 17 00:00:00 2001 From: Alexander Ku <alexku@google.com> Date: Thu, 10 Aug 2017 10:23:50 -0700 Subject: [PATCH 0257/4095] adding function for local_attention_2d PiperOrigin-RevId: 164869818 --- tensor2tensor/layers/common_attention.py | 105 ++++++++++++++++++ tensor2tensor/layers/common_attention_test.py | 46 ++++++-- 2 files changed, 142 insertions(+), 9 deletions(-) diff --git a/tensor2tensor/layers/common_attention.py b/tensor2tensor/layers/common_attention.py index 2b1bd124f..4f1273163 100644 --- a/tensor2tensor/layers/common_attention.py +++ b/tensor2tensor/layers/common_attention.py @@ -541,6 +541,111 @@ def pad_l_and_r(x, pad_length): return output +def local_attention_2d(q, + k, + v, + block_length=128, + filter_flange=100, + name=None): + """strided block local self-attention. + + Args: + q: a Tensor with shape [batch, heads, h, w, depth_k] + k: a Tensor with shape [batch, heads, h, w, depth_k] + v: a Tensor with shape [batch, heads, h, w, depth_v] + block_length: an integer indicating the side length of each square block. + filter_flange: an integer indicating how much to look around each block. + name: an optional string + + Returns: + a Tensor of shape [batch, heads, h, w, depth_v] + """ + with tf.variable_scope( + name, default_name="local_self_attention_2d", values=[q, k, v]): + v_shape = tf.shape(v) + depth_v = tf.shape(v)[4] + batch_size = tf.shape(q)[0] + num_heads = tf.shape(q)[1] + original_length = tf.shape(q)[2] * tf.shape(q)[3] + + def reshape_range(tensor, i, j, shape): + """Reshapes a tensor between dimensions i and j.""" + target_shape = tf.concat( + [tf.shape(tensor)[:i], shape, tf.shape(tensor)[j:]], + axis=0) + return tf.reshape(tensor, target_shape) + + def pad_to_multiple(x, d): + """Making sure x is a multiple of d.""" + height_padding = -tf.shape(x)[1] % d + width_padding = -tf.shape(x)[2] % d + paddings = [[0, 0], [0, 0], [0, height_padding], + [0, width_padding], [0, 0]] + return tf.pad(x, paddings) + + def gather_indices(x, block_length, stride): + """Getting gather indices.""" + # making an identity matrix kernel + kernel = tf.eye(block_length ** 2) + kernel = reshape_range(kernel, 0, 1, [block_length, block_length, 1]) + # making indices [1, h, w, 1] to appy convs + indices = tf.range(0, tf.shape(x)[2] * tf.shape(x)[3], delta=1) + indices = tf.reshape(indices, [1, tf.shape(x)[2], tf.shape(x)[3], 1]) + indices = tf.nn.conv2d( + tf.cast(indices, tf.float32), + kernel, + strides=[1, stride, stride, 1], + padding="VALID") + # making indices [num_blocks, dim] to gather + num_blocks = tf.reduce_prod(tf.shape(indices)[:2]) + indices = tf.reshape(indices, [num_blocks, -1]) + return tf.cast(indices, tf.int32) + + def gather_blocks(x, indices): + """Gathers flattened blocks from x.""" + x_shape = tf.shape(x) + x = reshape_range(x, 2, 4, [tf.reduce_prod(x_shape[2:4])]) + # [length, batch, heads, dim] + x_t = tf.transpose(x, [2, 0, 1, 3]) + x_new = tf.gather(x_t, indices) + # returns [batch, heads, num_blocks, block_length ** 2, dim] + return tf.transpose(x_new, [2, 3, 0, 1, 4]) + + q = pad_to_multiple(q, block_length) + k = pad_to_multiple(k, block_length) + v = pad_to_multiple(v, block_length) + + # Setting up k and v values + paddings = [[0, 0], [0, 0], [filter_flange, filter_flange], + [filter_flange, filter_flange], [0, 0]] + k = tf.pad(k, paddings) + v = tf.pad(v, paddings) + + # Setting up q blocks + q_indices = gather_indices(q, block_length, block_length) + q_new = gather_blocks(q, q_indices) + + # Setting up k and v blocks + full_filter_width = block_length + 2 * filter_flange + k_and_v_indices = gather_indices(k, full_filter_width, block_length) + k_new = gather_blocks(k, k_and_v_indices) + v_new = gather_blocks(v, k_and_v_indices) + + attention_bias = tf.expand_dims( + tf.to_float(embedding_to_padding(k_new)) * -1e9, axis=-2) + + logits = tf.matmul(q_new, k_new, transpose_b=True) + + attention = tf.nn.softmax(logits + attention_bias) + output = tf.matmul(attention, v_new) + + output = tf.reshape(output, [batch_size, num_heads, -1, depth_v]) + # Remove the padding if introduced + output = tf.slice(output, [0, 0, 0, 0], [-1, -1, original_length, -1]) + # [batch, heads, h, w, depth_v] + return tf.reshape(output, v_shape) + + def multihead_attention(query_antecedent, memory_antecedent, bias, diff --git a/tensor2tensor/layers/common_attention_test.py b/tensor2tensor/layers/common_attention_test.py index e846c2002..e49999fbb 100644 --- a/tensor2tensor/layers/common_attention_test.py +++ b/tensor2tensor/layers/common_attention_test.py @@ -41,14 +41,14 @@ def testDotProductAttention(self): res = session.run(a) self.assertEqual(res.shape, (5, 7, 12, 32)) - def testMaskedLocalAttention(self): - q = np.array([[[[1.0, 0.0, 0.0, 0.0], [1.0, 0.0, 0.0, 0.0], [ - 1.0, 0.0, 0.0, 0.0 - ], [1.0, 0.0, 0.0, 0.0], [1.0, 0.0, 0.0, 0.0], [1.0, 0.0, 0.0, 0.0], + def testMaskedLocalAttention1D(self): + q = np.array([[[[1.0, 0.0, 0.0, 0.0], [1.0, 0.0, 0.0, 0.0], + [1.0, 0.0, 0.0, 0.0], [1.0, 0.0, 0.0, 0.0], + [1.0, 0.0, 0.0, 0.0], [1.0, 0.0, 0.0, 0.0], [1.0, 0.0, 0.0, 0.0], [1.0, 0.0, 0.0, 0.0]]]]) - k = np.array([[[[1.0, 0.0, 0.0, 0.0], [1.0, 0.0, 0.0, 0.0], [ - 1.0, 0.0, 0.0, 0.0 - ], [1.0, 0.0, 0.0, 0.0], [1.0, 0.0, 0.0, 0.0], [1.0, 0.0, 0.0, 0.0], + k = np.array([[[[1.0, 0.0, 0.0, 0.0], [1.0, 0.0, 0.0, 0.0], + [1.0, 0.0, 0.0, 0.0], [1.0, 0.0, 0.0, 0.0], + [1.0, 0.0, 0.0, 0.0], [1.0, 0.0, 0.0, 0.0], [1.0, 0.0, 0.0, 0.0], [1.0, 0.0, 0.0, 0.0]]]]) v = np.ones((1, 1, 8, 1)) with self.test_session() as session: @@ -61,7 +61,7 @@ def testMaskedLocalAttention(self): self.assertEqual(res.shape, (1, 1, 8, 1)) - def testLocalUnmaskedAttention(self): + def testLocalUnmaskedAttention1D(self): x = np.random.rand(5, 4, 25, 16) y = np.random.rand(5, 4, 25, 16) with self.test_session() as session: @@ -75,7 +75,7 @@ def testLocalUnmaskedAttention(self): res = session.run(a) self.assertEqual(res.shape, (5, 4, 25, 16)) - def testLocalUnmaskedAttentionMatchingBlockLength(self): + def testLocalUnmaskedAttention1DMatchingBlockLength(self): x = np.random.rand(5, 4, 25, 16) y = np.random.rand(5, 4, 25, 16) with self.test_session() as session: @@ -89,6 +89,34 @@ def testLocalUnmaskedAttentionMatchingBlockLength(self): res = session.run(a) self.assertEqual(res.shape, (5, 4, 25, 16)) + def testLocalUnmaskedAttention2D(self): + x = np.random.rand(5, 4, 25, 25, 16) + y = np.random.rand(5, 4, 25, 25, 16) + with self.test_session() as session: + a = common_attention.local_attention_2d( + tf.constant(x, dtype=tf.float32), + tf.constant(y, dtype=tf.float32), + tf.constant(y, dtype=tf.float32), + block_length=4, + filter_flange=3) + session.run(tf.global_variables_initializer()) + res = session.run(a) + self.assertEqual(res.shape, (5, 4, 25, 25, 16)) + + def testLocalUnmaskedAttention2DMatchingBlockLength(self): + x = np.random.rand(5, 4, 25, 25, 16) + y = np.random.rand(5, 4, 25, 25, 16) + with self.test_session() as session: + a = common_attention.local_attention_2d( + tf.constant(x, dtype=tf.float32), + tf.constant(y, dtype=tf.float32), + tf.constant(y, dtype=tf.float32), + block_length=5, + filter_flange=3) + session.run(tf.global_variables_initializer()) + res = session.run(a) + self.assertEqual(res.shape, (5, 4, 25, 25, 16)) + if __name__ == "__main__": tf.test.main() From 94eca0c50e8c32d30d262fc249c03e3019ac03f7 Mon Sep 17 00:00:00 2001 From: Lukasz Kaiser <lukaszkaiser@google.com> Date: Fri, 11 Aug 2017 13:14:39 -0700 Subject: [PATCH 0258/4095] Rename train_generator to just generator and port wiki_32k to Problem. Also cleaning and speeding up vocab generation, algorithmic problems, wmt_zhen and BPE download. PiperOrigin-RevId: 165015579 --- setup.py | 1 + tensor2tensor/bin/t2t-datagen | 61 +--- tensor2tensor/data_generators/algorithmic.py | 326 ++++++++---------- .../data_generators/algorithmic_test.py | 12 +- tensor2tensor/data_generators/all_problems.py | 1 + tensor2tensor/data_generators/cipher.py | 48 +-- tensor2tensor/data_generators/desc2code.py | 2 +- .../data_generators/generator_utils.py | 1 + tensor2tensor/data_generators/ice_parsing.py | 120 +++++++ tensor2tensor/data_generators/problem.py | 30 +- .../data_generators/problem_hparams.py | 52 --- tensor2tensor/data_generators/ptb.py | 2 +- tensor2tensor/data_generators/text_encoder.py | 6 +- tensor2tensor/data_generators/wiki.py | 102 +++--- tensor2tensor/data_generators/wmt.py | 75 ++-- tensor2tensor/models/transformer.py | 2 +- tensor2tensor/utils/decoding.py | 5 + tensor2tensor/utils/registry.py | 4 +- tensor2tensor/utils/trainer_utils_test.py | 5 +- 19 files changed, 418 insertions(+), 437 deletions(-) create mode 100644 tensor2tensor/data_generators/ice_parsing.py diff --git a/setup.py b/setup.py index 5beeb1b3e..4ada714b6 100644 --- a/setup.py +++ b/setup.py @@ -19,6 +19,7 @@ 'tensor2tensor/bin/t2t-make-tf-configs', ], install_requires=[ + 'bz2file', 'numpy', 'requests', 'sympy', diff --git a/tensor2tensor/bin/t2t-datagen b/tensor2tensor/bin/t2t-datagen index 39453dbee..30784fa60 100644 --- a/tensor2tensor/bin/t2t-datagen +++ b/tensor2tensor/bin/t2t-datagen @@ -45,7 +45,6 @@ from tensor2tensor.data_generators import generator_utils from tensor2tensor.data_generators import image from tensor2tensor.data_generators import lm1b from tensor2tensor.data_generators import snli -from tensor2tensor.data_generators import wiki from tensor2tensor.data_generators import wmt from tensor2tensor.data_generators import wsj_parsing from tensor2tensor.utils import registry @@ -82,16 +81,6 @@ _SUPPORTED_PROBLEM_GENERATORS = { "algorithmic_algebra_inverse": ( lambda: algorithmic_math.algebra_inverse(26, 0, 2, 100000), lambda: algorithmic_math.algebra_inverse(26, 3, 3, 10000)), - "ice_parsing_tokens": ( - lambda: wmt.tabbed_parsing_token_generator( - FLAGS.data_dir, FLAGS.tmp_dir, True, "ice", 2**13, 2**8), - lambda: wmt.tabbed_parsing_token_generator( - FLAGS.data_dir, FLAGS.tmp_dir, False, "ice", 2**13, 2**8)), - "ice_parsing_characters": ( - lambda: wmt.tabbed_parsing_character_generator( - FLAGS.data_dir, FLAGS.tmp_dir, True), - lambda: wmt.tabbed_parsing_character_generator( - FLAGS.data_dir, FLAGS.tmp_dir, False)), "wmt_parsing_tokens_8k": ( lambda: wmt.parsing_token_generator( FLAGS.data_dir, FLAGS.tmp_dir, True, 2**13), @@ -115,10 +104,6 @@ _SUPPORTED_PROBLEM_GENERATORS = { lambda: lm1b.generator(FLAGS.tmp_dir, True, characters=True), lambda: lm1b.generator(FLAGS.tmp_dir, False, characters=True) ), - "wiki_32k": ( - lambda: wiki.generator(FLAGS.tmp_dir, True), - 1000 - ), "image_celeba_tune": ( lambda: image.celeba_generator(FLAGS.tmp_dir, 162770), lambda: image.celeba_generator(FLAGS.tmp_dir, 19867, 162770)), @@ -180,17 +165,14 @@ def main(_): # Remove parsing if paths are not given. if not FLAGS.parsing_path: problems = [p for p in problems if "parsing" not in p] - # Remove en-de BPE if paths are not given. - if not FLAGS.ende_bpe_path: - problems = [p for p in problems if "ende_bpe" not in p] if not problems: problems_str = "\n * ".join( sorted(list(_SUPPORTED_PROBLEM_GENERATORS) + registry.list_problems())) error_msg = ("You must specify one of the supported problems to " "generate data for:\n * " + problems_str + "\n") - error_msg += ("TIMIT, ende_bpe and parsing need data_sets specified with " - "--timit_paths, --ende_bpe_path and --parsing_path.") + error_msg += ("TIMIT and parsing need data_sets specified with " + "--timit_paths and --parsing_path.") raise ValueError(error_msg) if not FLAGS.data_dir: @@ -213,34 +195,17 @@ def generate_data_for_problem(problem): """Generate data for a problem in _SUPPORTED_PROBLEM_GENERATORS.""" training_gen, dev_gen = _SUPPORTED_PROBLEM_GENERATORS[problem] - if isinstance(dev_gen, int): - # The dev set and test sets are generated as extra shards using the - # training generator. The integer specifies the number of training - # shards. FLAGS.num_shards is ignored. - num_training_shards = dev_gen - tf.logging.info("Generating data for %s.", problem) - all_output_files = generator_utils.combined_data_filenames( - problem + generator_utils.UNSHUFFLED_SUFFIX, FLAGS.data_dir, - num_training_shards) - generator_utils.generate_files(training_gen(), all_output_files, - FLAGS.max_cases) - else: - # usual case - train data and dev data are generated using separate - # generators. - num_shards = FLAGS.num_shards or 10 - tf.logging.info("Generating training data for %s.", problem) - train_output_files = generator_utils.train_data_filenames( - problem + generator_utils.UNSHUFFLED_SUFFIX, FLAGS.data_dir, num_shards) - generator_utils.generate_files(training_gen(), train_output_files, - FLAGS.max_cases) - tf.logging.info("Generating development data for %s.", problem) - dev_shards = 10 if "coco" in problem else 1 - dev_output_files = generator_utils.dev_data_filenames( - problem + generator_utils.UNSHUFFLED_SUFFIX, FLAGS.data_dir, dev_shards) - generator_utils.generate_files(dev_gen(), dev_output_files) - all_output_files = train_output_files + dev_output_files - - tf.logging.info("Shuffling data...") + num_shards = FLAGS.num_shards or 10 + tf.logging.info("Generating training data for %s.", problem) + train_output_files = generator_utils.train_data_filenames( + problem + generator_utils.UNSHUFFLED_SUFFIX, FLAGS.data_dir, num_shards) + generator_utils.generate_files(training_gen(), train_output_files, + FLAGS.max_cases) + tf.logging.info("Generating development data for %s.", problem) + dev_output_files = generator_utils.dev_data_filenames( + problem + generator_utils.UNSHUFFLED_SUFFIX, FLAGS.data_dir, 1) + generator_utils.generate_files(dev_gen(), dev_output_files) + all_output_files = train_output_files + dev_output_files generator_utils.shuffle_dataset(all_output_files) diff --git a/tensor2tensor/data_generators/algorithmic.py b/tensor2tensor/data_generators/algorithmic.py index c115a1ebe..c44ce65d8 100644 --- a/tensor2tensor/data_generators/algorithmic.py +++ b/tensor2tensor/data_generators/algorithmic.py @@ -37,15 +37,10 @@ class AlgorithmicProblem(problem.Problem): def num_symbols(self): raise NotImplementedError() - @property - def train_generator(self): - """Generator; takes 3 args: nbr_symbols, max_length, nbr_cases.""" + def generator(self, nbr_symbols, max_length, nbr_cases): + """Generates the data.""" raise NotImplementedError() - @property - def dev_generator(self): - return self.train_generator - @property def train_length(self): return 40 @@ -67,25 +62,19 @@ def num_shards(self): return 10 def generate_data(self, data_dir, _, task_id=-1): - def generator_eos(generator): + def generator_eos(nbr_symbols, max_length, nbr_cases): """Shift by NUM_RESERVED_IDS and append EOS token.""" - for case in generator: + for case in self.generator(nbr_symbols, max_length, nbr_cases): new_case = {} for feature in case: new_case[feature] = [i + text_encoder.NUM_RESERVED_TOKENS for i in case[feature]] + [text_encoder.EOS_ID] yield new_case - train_generator_eos = lambda: generator_eos( # pylint: disable=g-long-lambda - self.train_generator(self.num_symbols, - self.train_length, self.train_size)) - dev_generator_eos = lambda: generator_eos( # pylint: disable=g-long-lambda - self.dev_generator(self.num_symbols, self.dev_length, self.dev_size)) - utils.generate_dataset_and_shuffle( - train_generator_eos(), + generator_eos(self.num_symbols, self.train_length, self.train_size), self.training_filepaths(data_dir, self.num_shards, shuffled=True), - dev_generator_eos(), + generator_eos(self.num_symbols, self.dev_length, self.dev_size), self.dev_filepaths(data_dir, 1, shuffled=True), shuffle=False) @@ -98,28 +87,6 @@ def hparams(self, defaults, unused_model_hparams): p.target_space_id = problem.SpaceID.DIGIT_1 -def identity_generator(nbr_symbols, max_length, nbr_cases): - """Generator for the identity (copy) task on sequences of symbols. - - The length of the sequence is drawn uniformly at random from [1, max_length] - and then symbols are drawn uniformly at random from [0, nbr_symbols) until - nbr_cases sequences have been produced. - - Args: - nbr_symbols: number of symbols to use in each sequence. - max_length: integer, maximum length of sequences to generate. - nbr_cases: the number of cases to generate. - - Yields: - A dictionary {"inputs": input-list, "targets": target-list} where - input-list and target-list are the same. - """ - for _ in xrange(nbr_cases): - l = np.random.randint(max_length) + 1 - inputs = [np.random.randint(nbr_symbols) for _ in xrange(l)] - yield {"inputs": inputs, "targets": inputs} - - @registry.register_problem class AlgorithmicIdentityBinary40(AlgorithmicProblem): """Problem spec for algorithmic binary identity task.""" @@ -128,9 +95,26 @@ class AlgorithmicIdentityBinary40(AlgorithmicProblem): def num_symbols(self): return 2 - @property - def train_generator(self): - return identity_generator + def generator(self, nbr_symbols, max_length, nbr_cases): + """Generator for the identity (copy) task on sequences of symbols. + + The length of the sequence is drawn uniformly at random from [1, max_length] + and then symbols are drawn uniformly at random from [0, nbr_symbols) until + nbr_cases sequences have been produced. + + Args: + nbr_symbols: number of symbols to use in each sequence. + max_length: integer, maximum length of sequences to generate. + nbr_cases: the number of cases to generate. + + Yields: + A dictionary {"inputs": input-list, "targets": target-list} where + input-list and target-list are the same. + """ + for _ in xrange(nbr_cases): + l = np.random.randint(max_length) + 1 + inputs = [np.random.randint(nbr_symbols) for _ in xrange(l)] + yield {"inputs": inputs, "targets": inputs} @registry.register_problem @@ -142,32 +126,6 @@ def num_symbols(self): return 10 -def shift_generator(nbr_symbols, shift, max_length, nbr_cases): - """Generator for the shift task on sequences of symbols. - - The length of the sequence is drawn uniformly at random from [1, max_length] - and then symbols are drawn uniformly at random from [0, nbr_symbols - shift] - until nbr_cases sequences have been produced (output[i] = input[i] + shift). - - Args: - nbr_symbols: number of symbols to use in each sequence (input + output). - shift: by how much to shift the input. - max_length: integer, maximum length of sequences to generate. - nbr_cases: the number of cases to generate. - - Yields: - A dictionary {"inputs": input-list, "targets": target-list} where - target-list[i] = input-list[i] + shift. - """ - for _ in xrange(nbr_cases): - l = np.random.randint(max_length) + 1 - inputs = [np.random.randint(nbr_symbols - shift) for _ in xrange(l)] - yield { - "inputs": inputs, - "targets": [i + shift for i in inputs] - } - - @registry.register_problem class AlgorithmicShiftDecimal40(AlgorithmicProblem): """Problem spec for algorithmic decimal shift task.""" @@ -176,40 +134,36 @@ class AlgorithmicShiftDecimal40(AlgorithmicProblem): def num_symbols(self): return 20 - @property - def train_generator(self): - return lambda nbr_sym, l, size: shift_generator(nbr_sym, 10, l, size) + def generator(self, nbr_symbols, max_length, nbr_cases): + """Generator for the shift task on sequences of symbols. + + The length of the sequence is drawn uniformly at random from [1, max_length] + and then symbols are drawn uniformly at random from [0, nbr_symbols - shift] + until nbr_cases sequences have been produced (output[i] = input[i] + shift). + + Args: + nbr_symbols: number of symbols to use in each sequence (input + output). + max_length: integer, maximum length of sequences to generate. + nbr_cases: the number of cases to generate. + + Yields: + A dictionary {"inputs": input-list, "targets": target-list} where + target-list[i] = input-list[i] + shift. + """ + shift = 10 + for _ in xrange(nbr_cases): + l = np.random.randint(max_length) + 1 + inputs = [np.random.randint(nbr_symbols - shift) for _ in xrange(l)] + yield { + "inputs": inputs, + "targets": [i + shift for i in inputs] + } @property def dev_length(self): return 80 -def reverse_generator(nbr_symbols, max_length, nbr_cases): - """Generator for the reversing task on sequences of symbols. - - The length of the sequence is drawn uniformly at random from [1, max_length] - and then symbols are drawn uniformly at random from [0, nbr_symbols) until - nbr_cases sequences have been produced. - - Args: - nbr_symbols: number of symbols to use in each sequence. - max_length: integer, maximum length of sequences to generate. - nbr_cases: the number of cases to generate. - - Yields: - A dictionary {"inputs": input-list, "targets": target-list} where - target-list is input-list reversed. - """ - for _ in xrange(nbr_cases): - l = np.random.randint(max_length) + 1 - inputs = [np.random.randint(nbr_symbols) for _ in xrange(l)] - yield { - "inputs": inputs, - "targets": list(reversed(inputs)) - } - - @registry.register_problem class AlgorithmicReverseBinary40(AlgorithmicProblem): """Problem spec for algorithmic binary reversing task.""" @@ -218,9 +172,29 @@ class AlgorithmicReverseBinary40(AlgorithmicProblem): def num_symbols(self): return 2 - @property - def train_generator(self): - return reverse_generator + def generator(self, nbr_symbols, max_length, nbr_cases): + """Generator for the reversing task on sequences of symbols. + + The length of the sequence is drawn uniformly at random from [1, max_length] + and then symbols are drawn uniformly at random from [0, nbr_symbols) until + nbr_cases sequences have been produced. + + Args: + nbr_symbols: number of symbols to use in each sequence. + max_length: integer, maximum length of sequences to generate. + nbr_cases: the number of cases to generate. + + Yields: + A dictionary {"inputs": input-list, "targets": target-list} where + target-list is input-list reversed. + """ + for _ in xrange(nbr_cases): + l = np.random.randint(max_length) + 1 + inputs = [np.random.randint(nbr_symbols) for _ in xrange(l)] + yield { + "inputs": inputs, + "targets": list(reversed(inputs)) + } @registry.register_problem @@ -305,17 +279,16 @@ def reverse_generator_nlplike(nbr_symbols, @registry.register_problem -class AlgorithmicReverseNlplike8K(AlgorithmicProblem): +class AlgorithmicReverseNlplike8k(AlgorithmicProblem): """Problem spec for algorithmic nlp-like reversing task.""" @property def num_symbols(self): return 8000 - @property - def train_generator(self): - return lambda nbr_sym, length, size: reverse_generator_nlplike( # pylint: disable=g-long-lambda - nbr_sym, length, size, 10, 1.300) + def generator(self, nbr_symbols, max_length, nbr_cases): + return reverse_generator_nlplike( + nbr_symbols, max_length, nbr_cases, 10, 1.300) @property def train_length(self): @@ -327,17 +300,16 @@ def dev_length(self): @registry.register_problem -class AlgorithmicReverseNlplike32K(AlgorithmicReverseNlplike8K): - """Problem spec for algorithmic nlp-like reversing task, 32K vocab.""" +class AlgorithmicReverseNlplike32k(AlgorithmicReverseNlplike8k): + """Problem spec for algorithmic nlp-like reversing task, 32k vocab.""" @property def num_symbols(self): return 32000 - @property - def train_generator(self): - return lambda nbr_sym, length, size: reverse_generator_nlplike( # pylint: disable=g-long-lambda - nbr_sym, length, size, 10, 1.050) + def generator(self, nbr_symbols, max_length, nbr_cases): + return reverse_generator_nlplike( + nbr_symbols, max_length, nbr_cases, 10, 1.050) def lower_endian_to_number(l, base): @@ -360,38 +332,6 @@ def random_number_lower_endian(length, base): return prefix + [np.random.randint(base - 1) + 1] # Last digit is not 0. -def addition_generator(base, max_length, nbr_cases): - """Generator for the addition task. - - The length of each number is drawn uniformly at random from [1, max_length/2] - and then digits are drawn uniformly at random. The numbers are added and - separated by [base] in the input. Stops at nbr_cases. - - Args: - base: in which base are the numbers. - max_length: integer, maximum length of sequences to generate. - nbr_cases: the number of cases to generate. - - Yields: - A dictionary {"inputs": input-list, "targets": target-list} where - input-list are the 2 numbers and target-list is the result of adding them. - - Raises: - ValueError: if max_length is lower than 3. - """ - if max_length < 3: - raise ValueError("Maximum length must be at least 3.") - for _ in xrange(nbr_cases): - l1 = np.random.randint(max_length // 2) + 1 - l2 = np.random.randint(max_length - l1 - 1) + 1 - n1 = random_number_lower_endian(l1, base) - n2 = random_number_lower_endian(l2, base) - result = lower_endian_to_number(n1, base) + lower_endian_to_number(n2, base) - inputs = n1 + [base] + n2 - targets = number_to_lower_endian(result, base) - yield {"inputs": inputs, "targets": targets} - - @registry.register_problem class AlgorithmicAdditionBinary40(AlgorithmicProblem): """Problem spec for algorithmic binary addition task.""" @@ -400,9 +340,37 @@ class AlgorithmicAdditionBinary40(AlgorithmicProblem): def num_symbols(self): return 2 - @property - def train_generator(self): - return addition_generator + def generator(self, base, max_length, nbr_cases): + """Generator for the addition task. + + The length of each number is drawn uniformly at random in [1, max_length/2] + and then digits are drawn uniformly at random. The numbers are added and + separated by [base] in the input. Stops at nbr_cases. + + Args: + base: in which base are the numbers. + max_length: integer, maximum length of sequences to generate. + nbr_cases: the number of cases to generate. + + Yields: + A dictionary {"inputs": input-list, "targets": target-list} where + input-list are the 2 numbers and target-list is the result of adding them. + + Raises: + ValueError: if max_length is lower than 3. + """ + if max_length < 3: + raise ValueError("Maximum length must be at least 3.") + for _ in xrange(nbr_cases): + l1 = np.random.randint(max_length // 2) + 1 + l2 = np.random.randint(max_length - l1 - 1) + 1 + n1 = random_number_lower_endian(l1, base) + n2 = random_number_lower_endian(l2, base) + result = lower_endian_to_number(n1, base) + lower_endian_to_number( + n2, base) + inputs = n1 + [base] + n2 + targets = number_to_lower_endian(result, base) + yield {"inputs": inputs, "targets": targets} @registry.register_problem @@ -414,39 +382,6 @@ def num_symbols(self): return 10 -def multiplication_generator(base, max_length, nbr_cases): - """Generator for the multiplication task. - - The length of each number is drawn uniformly at random from [1, max_length/2] - and then digits are drawn uniformly at random. The numbers are multiplied - and separated by [base] in the input. Stops at nbr_cases. - - Args: - base: in which base are the numbers. - max_length: integer, maximum length of sequences to generate. - nbr_cases: the number of cases to generate. - - Yields: - A dictionary {"inputs": input-list, "targets": target-list} where - input-list are the 2 numbers and target-list is the result of multiplying - them. - - Raises: - ValueError: if max_length is lower than 3. - """ - if max_length < 3: - raise ValueError("Maximum length must be at least 3.") - for _ in xrange(nbr_cases): - l1 = np.random.randint(max_length // 2) + 1 - l2 = np.random.randint(max_length - l1 - 1) + 1 - n1 = random_number_lower_endian(l1, base) - n2 = random_number_lower_endian(l2, base) - result = lower_endian_to_number(n1, base) * lower_endian_to_number(n2, base) - inputs = n1 + [base] + n2 - targets = number_to_lower_endian(result, base) - yield {"inputs": inputs, "targets": targets} - - @registry.register_problem class AlgorithmicMultiplicationBinary40(AlgorithmicProblem): """Problem spec for algorithmic binary multiplication task.""" @@ -455,9 +390,38 @@ class AlgorithmicMultiplicationBinary40(AlgorithmicProblem): def num_symbols(self): return 2 - @property - def train_generator(self): - return multiplication_generator + def generator(self, base, max_length, nbr_cases): + """Generator for the multiplication task. + + The length of each number is drawn uniformly at random in [1, max_length/2] + and then digits are drawn uniformly at random. The numbers are multiplied + and separated by [base] in the input. Stops at nbr_cases. + + Args: + base: in which base are the numbers. + max_length: integer, maximum length of sequences to generate. + nbr_cases: the number of cases to generate. + + Yields: + A dictionary {"inputs": input-list, "targets": target-list} where + input-list are the 2 numbers and target-list is the result of multiplying + them. + + Raises: + ValueError: if max_length is lower than 3. + """ + if max_length < 3: + raise ValueError("Maximum length must be at least 3.") + for _ in xrange(nbr_cases): + l1 = np.random.randint(max_length // 2) + 1 + l2 = np.random.randint(max_length - l1 - 1) + 1 + n1 = random_number_lower_endian(l1, base) + n2 = random_number_lower_endian(l2, base) + result = lower_endian_to_number(n1, base) * lower_endian_to_number( + n2, base) + inputs = n1 + [base] + n2 + targets = number_to_lower_endian(result, base) + yield {"inputs": inputs, "targets": targets} @registry.register_problem diff --git a/tensor2tensor/data_generators/algorithmic_test.py b/tensor2tensor/data_generators/algorithmic_test.py index 57faaa80b..4ac6d3123 100644 --- a/tensor2tensor/data_generators/algorithmic_test.py +++ b/tensor2tensor/data_generators/algorithmic_test.py @@ -29,15 +29,17 @@ class AlgorithmicTest(tf.test.TestCase): def testIdentityGenerator(self): + identity_problem = algorithmic.AlgorithmicIdentityBinary40() counter = 0 - for d in algorithmic.identity_generator(3, 8, 10): + for d in identity_problem.generator(3, 8, 10): counter += 1 self.assertEqual(d["inputs"], d["targets"]) self.assertEqual(counter, 10) def testReverseGenerator(self): + reversing_problem = algorithmic.AlgorithmicReverseBinary40() counter = 0 - for d in algorithmic.reverse_generator(3, 8, 10): + for d in reversing_problem.generator(3, 8, 10): counter += 1 self.assertEqual(list(reversed(d["inputs"])), d["targets"]) self.assertEqual(counter, 10) @@ -76,8 +78,9 @@ def testNumberToLowerEndian(self): self.assertEqual(algorithmic.number_to_lower_endian(2137, 10), [7, 3, 1, 2]) def testAdditionGenerator(self): + addition_problem = algorithmic.AlgorithmicAdditionBinary40() counter = 0 - for d in algorithmic.addition_generator(4, 8, 10): + for d in addition_problem.generator(4, 8, 10): counter += 1 self.assertEqual(d["inputs"].count(4), 1) self.assertEqual(d["inputs"].count(5), 0) @@ -86,8 +89,9 @@ def testAdditionGenerator(self): self.assertEqual(counter, 10) def testMultiplicationGenerator(self): + multiplication_problem = algorithmic.AlgorithmicMultiplicationBinary40() counter = 0 - for d in algorithmic.multiplication_generator(4, 8, 10): + for d in multiplication_problem.generator(4, 8, 10): counter += 1 self.assertEqual(d["inputs"].count(4), 1) self.assertEqual(d["inputs"].count(5), 0) diff --git a/tensor2tensor/data_generators/all_problems.py b/tensor2tensor/data_generators/all_problems.py index ca6dccfda..0078eb3f9 100644 --- a/tensor2tensor/data_generators/all_problems.py +++ b/tensor2tensor/data_generators/all_problems.py @@ -24,6 +24,7 @@ from tensor2tensor.data_generators import audio from tensor2tensor.data_generators import cipher from tensor2tensor.data_generators import desc2code +from tensor2tensor.data_generators import ice_parsing from tensor2tensor.data_generators import image from tensor2tensor.data_generators import lm1b from tensor2tensor.data_generators import ptb diff --git a/tensor2tensor/data_generators/cipher.py b/tensor2tensor/data_generators/cipher.py index 41dcbd80e..a11776b84 100644 --- a/tensor2tensor/data_generators/cipher.py +++ b/tensor2tensor/data_generators/cipher.py @@ -44,23 +44,13 @@ def distribution(self): def shift(self): return 1 - @property - def train_generator(self): - """Generator; takes 3 args: nbr_symbols, max_length, nbr_cases.""" - - def _gen(nbr_symbols, max_length, nbr_cases): - plain_vocab = range(nbr_symbols) - indices = generate_plaintext_random(plain_vocab, self.distribution, - nbr_cases, max_length) - codes = encipher_shift(indices, plain_vocab, self.shift) - - for plain, code in zip(indices, codes): - yield { - "inputs": plain, - "targets": code, - } - - return _gen + def generator(self, nbr_symbols, max_length, nbr_cases): + plain_vocab = range(nbr_symbols) + indices = generate_plaintext_random( + plain_vocab, self.distribution, nbr_cases, max_length) + codes = encipher_shift(indices, plain_vocab, self.shift) + for plain, code in zip(indices, codes): + yield {"inputs": plain, "targets": code} @property def train_length(self): @@ -87,23 +77,13 @@ def distribution(self): def key(self): return [1, 3] - @property - def train_generator(self): - """Generator; takes 3 args: nbr_symbols, max_length, nbr_cases.""" - - def _gen(nbr_symbols, max_length, nbr_cases): - plain_vocab = range(nbr_symbols) - indices = generate_plaintext_random(plain_vocab, self.distribution, - nbr_cases, max_length) - codes = encipher_vigenere(indices, plain_vocab, self.key) - - for plain, code in zip(indices, codes): - yield { - "inputs": plain, - "targets": code, - } - - return _gen + def generator(self, nbr_symbols, max_length, nbr_cases): + plain_vocab = range(nbr_symbols) + indices = generate_plaintext_random(plain_vocab, self.distribution, + nbr_cases, max_length) + codes = encipher_vigenere(indices, plain_vocab, self.key) + for plain, code in zip(indices, codes): + yield {"inputs": plain, "targets": code} @property def train_length(self): diff --git a/tensor2tensor/data_generators/desc2code.py b/tensor2tensor/data_generators/desc2code.py index 6cef6db63..438c116c8 100644 --- a/tensor2tensor/data_generators/desc2code.py +++ b/tensor2tensor/data_generators/desc2code.py @@ -138,7 +138,7 @@ def feature_encoders(self, data_dir): "targets": target_token, } - def train_generator(self, data_dir, tmp_dir, train): + def generator(self, data_dir, tmp_dir, train): # Called twice: for train and test # Get the list of the training samples (coding challenge samples) diff --git a/tensor2tensor/data_generators/generator_utils.py b/tensor2tensor/data_generators/generator_utils.py index b38531c1a..eadca9bd6 100644 --- a/tensor2tensor/data_generators/generator_utils.py +++ b/tensor2tensor/data_generators/generator_utils.py @@ -308,6 +308,7 @@ def get_or_generate_vocab_inner(data_dir, vocab_filename, vocab_size, vocab = text_encoder.SubwordTextEncoder(vocab_filepath) return vocab + tf.logging.info("Generating vocab file: %s", vocab_filepath) token_counts = defaultdict(int) for item in generator_fn(): for tok in tokenizer.encode(text_encoder.native_to_unicode(item)): diff --git a/tensor2tensor/data_generators/ice_parsing.py b/tensor2tensor/data_generators/ice_parsing.py new file mode 100644 index 000000000..591b205da --- /dev/null +++ b/tensor2tensor/data_generators/ice_parsing.py @@ -0,0 +1,120 @@ +# coding=utf-8 +# Copyright 2017 The Tensor2Tensor Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""This module implements the ice_parsing_* problems.""" + +# These parse plain text into flattened parse trees and POS tags. +# The training data is stored in files named `parsing_train.pairs` +# and `parsing_dev.pairs`. These files are UTF-8 text files where +# each line contains an input sentence and a target parse tree, +# separated by a tab character. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os + +# Dependency imports + +from tensor2tensor.data_generators import generator_utils +from tensor2tensor.data_generators import problem +from tensor2tensor.data_generators import text_encoder +from tensor2tensor.data_generators.wmt import tabbed_generator +from tensor2tensor.utils import registry + + +# End-of-sentence marker. +EOS = text_encoder.EOS_ID + + +def tabbed_parsing_token_generator(data_dir, tmp_dir, train, prefix, + source_vocab_size, target_vocab_size): + """Generate source and target data from a single file.""" + filename = "parsing_{0}.pairs".format("train" if train else "dev") + source_vocab = generator_utils.get_or_generate_tabbed_vocab( + data_dir, tmp_dir, filename, 0, + prefix + "_source.tokens.vocab.%d" % source_vocab_size, source_vocab_size) + target_vocab = generator_utils.get_or_generate_tabbed_vocab( + data_dir, tmp_dir, filename, 1, + prefix + "_target.tokens.vocab.%d" % target_vocab_size, target_vocab_size) + pair_filepath = os.path.join(tmp_dir, filename) + return tabbed_generator(pair_filepath, source_vocab, target_vocab, EOS) + + +def tabbed_parsing_character_generator(tmp_dir, train): + """Generate source and target data from a single file.""" + character_vocab = text_encoder.ByteTextEncoder() + filename = "parsing_{0}.pairs".format("train" if train else "dev") + pair_filepath = os.path.join(tmp_dir, filename) + return tabbed_generator(pair_filepath, character_vocab, character_vocab, EOS) + + +@registry.register_problem("ice_parsing_tokens") +class IceParsingTokens(problem.Problem): + """Problem spec for parsing tokenized Icelandic text to constituency trees.""" + + @property + def source_vocab_size(self): + return 2**14 # 16384 + + @property + def targeted_vocab_size(self): + return 2**8 # 256 + + @property + def input_space_id(self): + return problem.SpaceID.ICE_TOK + + @property + def target_space_id(self): + return problem.SpaceID.ICE_PARSE_TOK + + @property + def num_shards(self): + return 10 + + def feature_encoders(self, data_dir): + source_vocab_filename = os.path.join( + data_dir, "ice_source.tokens.vocab.%d" % self.source_vocab_size) + target_vocab_filename = os.path.join( + data_dir, "ice_target.tokens.vocab.%d" % self.targeted_vocab_size) + source_subtokenizer = text_encoder.SubwordTextEncoder(source_vocab_filename) + target_subtokenizer = text_encoder.SubwordTextEncoder(target_vocab_filename) + return { + "inputs": source_subtokenizer, + "targets": target_subtokenizer, + } + + def generate_data(self, data_dir, tmp_dir, task_id=-1): + generator_utils.generate_dataset_and_shuffle( + tabbed_parsing_token_generator(data_dir, tmp_dir, True, "ice", + self.source_vocab_size, + self.targeted_vocab_size), + self.training_filepaths(data_dir, self.num_shards, shuffled=False), + tabbed_parsing_token_generator(data_dir, tmp_dir, False, "ice", + self.source_vocab_size, + self.targeted_vocab_size), + self.dev_filepaths(data_dir, 1, shuffled=False)) + + def hparams(self, defaults, model_hparams): + p = defaults + source_vocab_size = self._encoders["inputs"].vocab_size + p.input_modality = {"inputs": (registry.Modalities.SYMBOL, + source_vocab_size)} + p.target_modality = (registry.Modalities.SYMBOL, self.targeted_vocab_size) + p.input_space_id = self.input_space_id + p.target_space_id = self.target_space_id + p.loss_multiplier = 2.5 # Rough estimate of avg number of tokens per word diff --git a/tensor2tensor/data_generators/problem.py b/tensor2tensor/data_generators/problem.py index 07fafb492..7a84aac93 100644 --- a/tensor2tensor/data_generators/problem.py +++ b/tensor2tensor/data_generators/problem.py @@ -359,13 +359,14 @@ def is_character_level(self): def targeted_vocab_size(self): raise NotImplementedError() # Not needed if self.is_character_level. - def train_generator(self, data_dir, tmp_dir, is_training): - """Generator of the training data.""" + def generator(self, data_dir, tmp_dir, is_training): + """Generator for the training and evaluation data.""" raise NotImplementedError() - def dev_generator(self, data_dir, tmp_dir): - """Generator of the development data.""" - return self.train_generator(data_dir, tmp_dir, False) + @property + def use_train_shards_for_dev(self): + """If true, we only generate training data and hold out shards for dev.""" + return False @property def input_space_id(self): @@ -379,6 +380,10 @@ def target_space_id(self): def num_shards(self): raise NotImplementedError() + @property + def num_dev_shards(self): + return 1 + @property def vocab_name(self): raise NotImplementedError() @@ -396,11 +401,20 @@ def has_inputs(self): return True # Set to False for language models. def generate_data(self, data_dir, tmp_dir, task_id=-1): + train_paths = self.training_filepaths( + data_dir, self.num_shards, shuffled=False) + dev_paths = self.dev_filepaths( + data_dir, self.num_dev_shards, shuffled=False) + if self.use_train_shards_for_dev: + all_paths = train_paths + dev_paths + generator_utils.generate_files( + self.generator(data_dir, tmp_dir, True), all_paths) + generator_utils.shuffle_dataset(all_paths) generator_utils.generate_dataset_and_shuffle( - self.train_generator(data_dir, tmp_dir, True), + self.generator(data_dir, tmp_dir, True), self.training_filepaths(data_dir, self.num_shards, shuffled=False), - self.dev_generator(data_dir, tmp_dir), - self.dev_filepaths(data_dir, 1, shuffled=False)) + self.generator(data_dir, tmp_dir, False), + self.dev_filepaths(data_dir, self.num_dev_shards, shuffled=False)) def feature_encoders(self, data_dir): if self.is_character_level: diff --git a/tensor2tensor/data_generators/problem_hparams.py b/tensor2tensor/data_generators/problem_hparams.py index d0577db52..b33438d6d 100644 --- a/tensor2tensor/data_generators/problem_hparams.py +++ b/tensor2tensor/data_generators/problem_hparams.py @@ -345,19 +345,6 @@ def lm1b_characters(unused_model_hparams): return p -def wiki_32k(model_hparams): - """Wikipedia title to article. 32k subtoken vocabulary.""" - p = default_problem_hparams() - encoder = text_encoder.SubwordTextEncoder( - os.path.join(model_hparams.data_dir, "wiki_32k.subword_text_encoder")) - modality_spec = (registry.Modalities.SYMBOL, encoder.vocab_size) - p.input_modality = {"inputs": modality_spec} - p.target_modality = modality_spec - p.vocabulary = {"inputs": encoder, "targets": encoder} - p.target_space_id = 3 - return p - - def wmt_ende_bpe32k(model_hparams): """English to German translation benchmark.""" p = default_problem_hparams() @@ -462,39 +449,6 @@ def wsj_parsing_tokens(model_hparams, prefix, wrong_source_vocab_size, return p -def ice_parsing_tokens(model_hparams, wrong_source_vocab_size): - """Icelandic to parse tree translation benchmark. - - Args: - model_hparams: a tf.contrib.training.HParams - wrong_source_vocab_size: a number used in the filename indicating the - approximate vocabulary size. This is not to be confused with the actual - vocabulary size. - - Returns: - A tf.contrib.training.HParams object. - """ - p = default_problem_hparams() - # This vocab file must be present within the data directory. - source_vocab_filename = os.path.join( - model_hparams.data_dir, "ice_source.vocab.%d" % wrong_source_vocab_size) - target_vocab_filename = os.path.join(model_hparams.data_dir, - "ice_target.vocab.256") - source_subtokenizer = text_encoder.SubwordTextEncoder(source_vocab_filename) - target_subtokenizer = text_encoder.SubwordTextEncoder(target_vocab_filename) - p.input_modality = { - "inputs": (registry.Modalities.SYMBOL, source_subtokenizer.vocab_size) - } - p.target_modality = (registry.Modalities.SYMBOL, 256) - p.vocabulary = { - "inputs": source_subtokenizer, - "targets": target_subtokenizer, - } - p.input_space_id = 18 # Icelandic tokens - p.target_space_id = 19 # Icelandic parse tokens - return p - - def img2img_imagenet(unused_model_hparams): """Image 2 Image for imagenet dataset.""" p = default_problem_hparams() @@ -542,12 +496,6 @@ def image_celeba(unused_model_hparams): lm1b_characters, "lm1b_32k": lm1b_32k, - "wiki_32k": - wiki_32k, - "ice_parsing_characters": - wmt_parsing_characters, - "ice_parsing_tokens": - lambda p: ice_parsing_tokens(p, 2**13), "wmt_parsing_tokens_8k": lambda p: wmt_parsing_tokens(p, 2**13), "wsj_parsing_tokens_16k": diff --git a/tensor2tensor/data_generators/ptb.py b/tensor2tensor/data_generators/ptb.py index 18aedd640..b9014bcd6 100644 --- a/tensor2tensor/data_generators/ptb.py +++ b/tensor2tensor/data_generators/ptb.py @@ -105,7 +105,7 @@ def use_subword_tokenizer(self): def targeted_vocab_size(self): return 10000 - def train_generator(self, data_dir, tmp_dir, train): + def generator(self, data_dir, tmp_dir, train): filename = os.path.basename(PTB_URL) compressed_filepath = generator_utils.maybe_download( tmp_dir, filename, PTB_URL) diff --git a/tensor2tensor/data_generators/text_encoder.py b/tensor2tensor/data_generators/text_encoder.py index ad9c04c96..b628a538f 100644 --- a/tensor2tensor/data_generators/text_encoder.py +++ b/tensor2tensor/data_generators/text_encoder.py @@ -441,6 +441,8 @@ def build_to_target_size(cls, if min_val > max_val: raise ValueError("Lower bound for the minimum token count " "is greater than the upper bound.") + if target_size < 1: + raise ValueError("Target size must be positive.") def bisect(min_val, max_val): """Bisection to find the right size.""" @@ -450,8 +452,10 @@ def bisect(min_val, max_val): subtokenizer.build_from_token_counts(token_counts, present_count, num_iterations) + # Being within 1% of the target size is ok. + is_ok = abs(subtokenizer.vocab_size - target_size) * 100 < target_size # If min_val == max_val, we can't do any better than this. - if subtokenizer.vocab_size == target_size or min_val >= max_val: + if is_ok or min_val >= max_val or present_count < 2: return subtokenizer if subtokenizer.vocab_size > target_size: diff --git a/tensor2tensor/data_generators/wiki.py b/tensor2tensor/data_generators/wiki.py index 49147962a..1e427dbe8 100644 --- a/tensor2tensor/data_generators/wiki.py +++ b/tensor2tensor/data_generators/wiki.py @@ -19,23 +19,21 @@ from __future__ import division from __future__ import print_function -import bz2 -from collections import defaultdict import os # Dependency imports +import bz2file + import six from tensor2tensor.data_generators import generator_utils +from tensor2tensor.data_generators import problem from tensor2tensor.data_generators import text_encoder -from tensor2tensor.data_generators import tokenizer - -import tensorflow as tf +from tensor2tensor.utils import registry -# End-of-sentence marker (should correspond to the position of EOS in the -# RESERVED_TOKENS list in text_encoder.py) -EOS = 1 +# End-of-sentence marker. +EOS = text_encoder.EOS_ID def _maybe_download_corpus(tmp_dir): @@ -60,7 +58,7 @@ def page_generator(tmp_dir, max_docs=None): doc = u"" count = 0 corpus_filepath = _maybe_download_corpus(tmp_dir) - for line in bz2.BZ2File(corpus_filepath, "r"): + for line in bz2file.BZ2File(corpus_filepath, "r", buffering=1000000): line = unicode(line, "utf-8") if six.PY2 else line.decode("utf-8") if not doc and line != u" <page>\n": continue @@ -82,48 +80,52 @@ def _page_title(page): return page[start_pos:end_pos] -def _get_or_build_subword_text_encoder(tmp_dir): - """Builds a SubwordTextEncoder based on the corpus. +@registry.register_problem +class Wiki32k(problem.Text2TextProblem): + """A class for generating PTB data.""" - Args: - tmp_dir: a string + @property + def is_character_level(self): + return False - Returns: - a SubwordTextEncoder. - """ - filename = os.path.join(tmp_dir, "wiki_32k.subword_text_encoder") - if tf.gfile.Exists(filename): - return text_encoder.SubwordTextEncoder(filename) - token_counts = defaultdict(int) - for page in page_generator(tmp_dir, max_docs=1000): - tokens = tokenizer.encode(page) - tokens = set(tokens) - for tok in tokens: - token_counts[tok] += 1 - new_token_counts = defaultdict(int) - for token, count in six.iteritems(token_counts): - if count >= 3: - new_token_counts[token] = count - ret = text_encoder.SubwordTextEncoder() - ret.build_from_token_counts(new_token_counts, min_count=10) - ret.store_to_file(filename) - return ret - - -def generator(tmp_dir, train): - """Generator for lm1b sentences. + @property + def has_inputs(self): + return True - Args: - tmp_dir: a string. - train: a boolean. + @property + def input_space_id(self): + return problem.SpaceID.EN_TOK - Yields: - A dictionary {"inputs": [<subword ids>], "targets": [<subword ids>]} - """ - assert train - encoder = _get_or_build_subword_text_encoder(tmp_dir) - for page in page_generator(tmp_dir): - title = _page_title(page) - encoded = encoder.encode(page) + [EOS] - encoded_title = encoder.encode(title) + [EOS] - yield {"inputs": encoded_title, "targets": encoded} + @property + def target_space_id(self): + return problem.SpaceID.EN_TOK + + @property + def num_shards(self): + return 1000 + + @property + def vocab_name(self): + return "vocab.wiki" + + @property + def use_subword_tokenizer(self): + return True + + @property + def targeted_vocab_size(self): + return 2**15 # 32768 + + @property + def use_train_shards_for_dev(self): + return True + + def generator(self, data_dir, tmp_dir, _): + encoder = generator_utils.get_or_generate_vocab_inner( + data_dir, self.vocab_file, self.targeted_vocab_size, + lambda: page_generator(tmp_dir, max_docs=10000)) + for page in page_generator(tmp_dir): + title = _page_title(page) + encoded = encoder.encode(page) + [EOS] + encoded_title = encoder.encode(title) + [EOS] + yield {"inputs": encoded_title, "targets": encoded} diff --git a/tensor2tensor/data_generators/wmt.py b/tensor2tensor/data_generators/wmt.py index 0a47e9989..52990eb5f 100644 --- a/tensor2tensor/data_generators/wmt.py +++ b/tensor2tensor/data_generators/wmt.py @@ -32,10 +32,6 @@ import tensorflow as tf -tf.flags.DEFINE_string("ende_bpe_path", "", "Path to BPE files in tmp_dir." - "Download from https://drive.google.com/open?" - "id=0B_bZck-ksdkpM25jRUN2X2UxMm8") - FLAGS = tf.flags.FLAGS @@ -295,15 +291,15 @@ def bi_vocabs_token_generator(source_path, # Generators. -def _get_wmt_ende_dataset(directory, filename): +def _get_wmt_ende_bpe_dataset(directory, filename): """Extract the WMT en-de corpus `filename` to directory unless it's there.""" train_path = os.path.join(directory, filename) if not (tf.gfile.Exists(train_path + ".de") and tf.gfile.Exists(train_path + ".en")): - # We expect that this file has been downloaded from: - # https://drive.google.com/open?id=0B_bZck-ksdkpM25jRUN2X2UxMm8 and placed - # in `directory`. - corpus_file = os.path.join(directory, FLAGS.ende_bpe_path) + url = ("https://drive.google.com/uc?export=download&id=" + "0B_bZck-ksdkpM25jRUN2X2UxMm8") + corpus_file = generator_utils.maybe_download_from_drive( + directory, "wmt16_en_de.tar.gz", url) with tarfile.open(corpus_file, "r:gz") as corpus_tar: corpus_tar.extractall(directory) return train_path @@ -313,7 +309,7 @@ def ende_bpe_token_generator(data_dir, tmp_dir, train): """Instance of token generator for the WMT en->de task, training set.""" dataset_path = ("train.tok.clean.bpe.32000" if train else "newstest2013.tok.bpe.32000") - train_path = _get_wmt_ende_dataset(tmp_dir, dataset_path) + train_path = _get_wmt_ende_bpe_dataset(tmp_dir, dataset_path) token_tmp_path = os.path.join(tmp_dir, "vocab.bpe.32000") token_path = os.path.join(data_dir, "vocab.bpe.32000") tf.gfile.Copy(token_tmp_path, token_path, overwrite=True) @@ -334,6 +330,7 @@ def _preprocess_sgm(line, is_sgm): if line.startswith("<p>") or line.startswith("</p>"): return "" # Strip <seg> tags. + line = line.strip() if line.startswith("<seg") and line.endswith("</seg>"): i = line.index(">") return line[i+1:-6] # Strip first <seg ...> and last </seg>. @@ -392,7 +389,7 @@ class WMTEnDeTokens8k(WMTProblem): def targeted_vocab_size(self): return 2**13 # 8192 - def train_generator(self, data_dir, tmp_dir, train): + def generator(self, data_dir, tmp_dir, train): symbolizer_vocab = generator_utils.get_or_generate_vocab( data_dir, tmp_dir, self.vocab_file, self.targeted_vocab_size) datasets = _ENDE_TRAIN_DATASETS if train else _ENDE_TEST_DATASETS @@ -426,7 +423,7 @@ class WMTEnDeCharacters(WMTProblem): def is_character_level(self): return True - def train_generator(self, _, tmp_dir, train): + def generator(self, _, tmp_dir, train): character_vocab = text_encoder.ByteTextEncoder() datasets = _ENDE_TRAIN_DATASETS if train else _ENDE_TEST_DATASETS tag = "train" if train else "dev" @@ -451,18 +448,22 @@ class WMTZhEnTokens8k(WMTProblem): def targeted_vocab_size(self): return 2**13 # 8192 - def train_generator(self, data_dir, tmp_dir, train): + @property + def num_shards(self): + return 10 # This is a small dataset. + + def generator(self, data_dir, tmp_dir, train): source_vocab_size = self.targeted_vocab_size target_vocab_size = self.targeted_vocab_size datasets = _ZHEN_TRAIN_DATASETS if train else _ZHEN_TEST_DATASETS source_datasets = [[item[0], [item[1][0]]] for item in _ZHEN_TRAIN_DATASETS] target_datasets = [[item[0], [item[1][1]]] for item in _ZHEN_TRAIN_DATASETS] source_vocab = generator_utils.get_or_generate_vocab( - data_dir, tmp_dir, "vocab.zh.%d" % source_vocab_size, source_vocab_size, - source_datasets) + data_dir, tmp_dir, "vocab.zhen-zh.%d" % source_vocab_size, + source_vocab_size, source_datasets) target_vocab = generator_utils.get_or_generate_vocab( - data_dir, tmp_dir, "vocab.en.%d" % target_vocab_size, target_vocab_size, - target_datasets) + data_dir, tmp_dir, "vocab.zhen-en.%d" % target_vocab_size, + target_vocab_size, target_datasets) tag = "train" if train else "dev" data_path = _compile_data(tmp_dir, datasets, "wmt_zhen_tok_%s" % tag) return bi_vocabs_token_generator(data_path + ".lang1", data_path + ".lang2", @@ -490,14 +491,6 @@ def feature_encoders(self, data_dir): } -@registry.register_problem("wmt_zhen_tokens_32k") -class WMTZhEnTokens32k(WMTZhEnTokens8k): - - @property - def targeted_vocab_size(self): - return 2**15 # 32768 - - @registry.register_problem("wmt_enfr_tokens_8k") class WMTEnFrTokens8k(WMTProblem): """Problem spec for WMT En-Fr translation.""" @@ -506,7 +499,7 @@ class WMTEnFrTokens8k(WMTProblem): def targeted_vocab_size(self): return 2**13 # 8192 - def train_generator(self, data_dir, tmp_dir, train): + def generator(self, data_dir, tmp_dir, train): symbolizer_vocab = generator_utils.get_or_generate_vocab( data_dir, tmp_dir, self.vocab_file, self.targeted_vocab_size) datasets = _ENFR_TRAIN_DATASETS if train else _ENFR_TEST_DATASETS @@ -540,7 +533,7 @@ class WMTEnFrCharacters(WMTProblem): def is_character_level(self): return True - def train_generator(self, data_dir, tmp_dir, train): + def generator(self, data_dir, tmp_dir, train): character_vocab = text_encoder.ByteTextEncoder() datasets = _ENFR_TRAIN_DATASETS if train else _ENFR_TEST_DATASETS tag = "train" if train else "dev" @@ -569,7 +562,7 @@ def targeted_vocab_size(self): def vocab_name(self): return "vocab.mken" - def train_generator(self, data_dir, tmp_dir, train): + def generator(self, data_dir, tmp_dir, train): datasets = _MKEN_TRAIN_DATASETS if train else _MKEN_TEST_DATASETS source_datasets = [[item[0], [item[1][0]]] for item in datasets] target_datasets = [[item[0], [item[1][1]]] for item in datasets] @@ -602,7 +595,7 @@ def targeted_vocab_size(self): def vocab_name(self): return "vocab.encs" - def train_generator(self, data_dir, tmp_dir, train): + def generator(self, data_dir, tmp_dir, train): datasets = _ENCS_TRAIN_DATASETS if train else _ENCS_TEST_DATASETS source_datasets = [[item[0], [item[1][0]]] for item in datasets] target_datasets = [[item[0], [item[1][1]]] for item in datasets] @@ -631,7 +624,7 @@ class WMTEnCsCharacters(WMTProblem): def is_character_level(self): return True - def train_generator(self, data_dir, tmp_dir, train): + def generator(self, data_dir, tmp_dir, train): character_vocab = text_encoder.ByteTextEncoder() datasets = _ENCS_TRAIN_DATASETS if train else _ENCS_TEST_DATASETS tag = "train" if train else "dev" @@ -648,28 +641,6 @@ def target_space_id(self): return problem.SpaceID.CS_CHR -def tabbed_parsing_token_generator(data_dir, tmp_dir, train, prefix, - source_vocab_size, target_vocab_size): - """Generate source and target data from a single file.""" - source_vocab = generator_utils.get_or_generate_tabbed_vocab( - data_dir, tmp_dir, "parsing_train.pairs", 0, - prefix + "_source.vocab.%d" % source_vocab_size, source_vocab_size) - target_vocab = generator_utils.get_or_generate_tabbed_vocab( - data_dir, tmp_dir, "parsing_train.pairs", 1, - prefix + "_target.vocab.%d" % target_vocab_size, target_vocab_size) - filename = "parsing_%s" % ("train" if train else "dev") - pair_filepath = os.path.join(tmp_dir, filename + ".pairs") - return tabbed_generator(pair_filepath, source_vocab, target_vocab, EOS) - - -def tabbed_parsing_character_generator(tmp_dir, train): - """Generate source and target data from a single file.""" - character_vocab = text_encoder.ByteTextEncoder() - filename = "parsing_%s" % ("train" if train else "dev") - pair_filepath = os.path.join(tmp_dir, filename + ".pairs") - return tabbed_generator(pair_filepath, character_vocab, character_vocab, EOS) - - def parsing_token_generator(data_dir, tmp_dir, train, vocab_size): symbolizer_vocab = generator_utils.get_or_generate_vocab( data_dir, tmp_dir, "vocab.endefr.%d" % vocab_size, vocab_size) diff --git a/tensor2tensor/models/transformer.py b/tensor2tensor/models/transformer.py index 37c1206bd..06f49b231 100644 --- a/tensor2tensor/models/transformer.py +++ b/tensor2tensor/models/transformer.py @@ -431,7 +431,7 @@ def transformer_parsing_big(): @registry.register_hparams def transformer_parsing_ice(): - """Hparams for parsing Icelandic text.""" + """Hparams for parsing and tagging Icelandic text.""" hparams = transformer_base_single_gpu() hparams.batch_size = 4096 hparams.shared_embedding_and_softmax_weights = int(False) diff --git a/tensor2tensor/utils/decoding.py b/tensor2tensor/utils/decoding.py index 5e8f4d482..da33cf90e 100644 --- a/tensor2tensor/utils/decoding.py +++ b/tensor2tensor/utils/decoding.py @@ -259,6 +259,11 @@ def _interactive_input_fn(hparams): vocabulary = p_hparams.vocabulary["inputs" if has_input else "targets"] # This should be longer than the longest input. const_array_size = 10000 + # Import readline if available for command line editing and recall. + try: + import readline # pylint: disable=g-import-not-at-top,unused-variable + except ImportError: + pass while True: prompt = ("INTERACTIVE MODE num_samples=%d decode_length=%d \n" " it=<input_type> ('text' or 'image' or 'label')\n" diff --git a/tensor2tensor/utils/registry.py b/tensor2tensor/utils/registry.py index fea647b2b..6ce650ac3 100644 --- a/tensor2tensor/utils/registry.py +++ b/tensor2tensor/utils/registry.py @@ -225,10 +225,10 @@ def parse_problem_name(problem_name): was_copy: A boolean. """ # Recursively strip tags until we reach a base name. - if len(problem_name) > 4 and problem_name[-4:] == "_rev": + if problem_name.endswith("_rev"): base, _, was_copy = parse_problem_name(problem_name[:-4]) return base, True, was_copy - elif len(problem_name) > 5 and problem_name[-5:] == "_copy": + elif problem_name.endswith("_copy"): base, was_reversed, _ = parse_problem_name(problem_name[:-5]) return base, was_reversed, True else: diff --git a/tensor2tensor/utils/trainer_utils_test.py b/tensor2tensor/utils/trainer_utils_test.py index 8a71afe68..61156f227 100644 --- a/tensor2tensor/utils/trainer_utils_test.py +++ b/tensor2tensor/utils/trainer_utils_test.py @@ -36,11 +36,12 @@ class TinyAlgo(algorithmic.AlgorithmicIdentityBinary40): def generate_data(self, data_dir, _): + identity_problem = algorithmic.AlgorithmicIdentityBinary40() generator_utils.generate_files( - algorithmic.identity_generator(self.num_symbols, 40, 100000), + identity_problem.generator(self.num_symbols, 40, 100000), self.training_filepaths(data_dir, 1, shuffled=True), 100) generator_utils.generate_files( - algorithmic.identity_generator(self.num_symbols, 400, 10000), + identity_problem.generator(self.num_symbols, 400, 10000), self.dev_filepaths(data_dir, 1, shuffled=True), 100) From d1f9bb26d3ebaaa65d1b26069ad6253b628aefd4 Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Fri, 11 Aug 2017 14:01:02 -0700 Subject: [PATCH 0259/4095] Fix memory usage of rev_block PiperOrigin-RevId: 165021509 --- tensor2tensor/layers/rev_block.py | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/tensor2tensor/layers/rev_block.py b/tensor2tensor/layers/rev_block.py index 1e1a7b848..d6fb95cf3 100644 --- a/tensor2tensor/layers/rev_block.py +++ b/tensor2tensor/layers/rev_block.py @@ -41,7 +41,7 @@ def _rev_layer_forward(xs, f, g): y1 = x1 + f(x2) with tf.variable_scope("g"): y2 = x2 + g(y1) - return (y1, y2) + return tf.tuple([y1, y2]) def _rev_layer_backward(ys, grad_ys, f, g, f_vars, g_vars): @@ -65,17 +65,26 @@ def _rev_layer_backward(ys, grad_ys, f, g, f_vars, g_vars): # Compute gradients wrt to inputs # dL/dy2 * dG(y1)/y1 - grad_gy1_y2 = tf.gradients(gy1, y1_stop, grad_y2)[0] + grad_gy1_y2 = tf.gradients(gy1, y1_stop, grad_y2, gate_gradients=True)[0] grad_x1 = grad_y1 + grad_gy1_y2 - grad_x2 = (tf.gradients(fx2, x2_stop, grad_y1)[0] + grad_y2 + tf.gradients( - fx2, x2_stop, grad_gy1_y2)[0]) + grad_x2 = ( + tf.gradients(fx2, x2_stop, grad_y1, gate_gradients=True)[0] + grad_y2 + + tf.gradients(fx2, x2_stop, grad_gy1_y2, gate_gradients=True)[0]) # Compute gradients wrt to vars in f and g - grad_g_vars = tf.gradients(gy1, g_vars, grad_y2) - grad_f_y1 = tf.gradients(fx2, f_vars, grad_y1) - grad_f_y2 = tf.gradients(fx2, f_vars, grad_gy1_y2) + grad_g_vars = tf.gradients(gy1, g_vars, grad_y2, gate_gradients=True) + grad_f_y1 = tf.gradients(fx2, f_vars, grad_y1, gate_gradients=True) + grad_f_y2 = tf.gradients(fx2, f_vars, grad_gy1_y2, gate_gradients=True) grad_f_vars = [tf.add_n(grads) for grads in zip(grad_f_y1, grad_f_y2)] + # Put returns in a tuple to ensure a constant memory budget (i.e. don't want + # the subsequent layer to start computing and consuming memory based on a + # subset of these values). + outs = tf.tuple([x1, x2, grad_x1, grad_x2] + grad_f_vars + grad_g_vars) + x1, x2, grad_x1, grad_x2 = outs[:4] + grad_f_vars = outs[4:4 + len(grad_f_vars)] + grad_g_vars = outs[4 + len(grad_f_vars):] + return (x1, x2), (grad_x1, grad_x2), grad_f_vars, grad_g_vars From b31b3ae341407139ea0c52e8e813896db866f56e Mon Sep 17 00:00:00 2001 From: Lukasz Kaiser <lukaszkaiser@google.com> Date: Fri, 11 Aug 2017 15:10:45 -0700 Subject: [PATCH 0260/4095] Play more with VAE, small corrections elsewhere. PiperOrigin-RevId: 165031077 --- tensor2tensor/layers/modalities.py | 5 +- tensor2tensor/models/cycle_gan.py | 14 +-- tensor2tensor/models/shake_shake.py | 2 - tensor2tensor/models/transformer.py | 1 - tensor2tensor/models/transformer_vae.py | 129 ++++++++++++++++-------- 5 files changed, 98 insertions(+), 53 deletions(-) diff --git a/tensor2tensor/layers/modalities.py b/tensor2tensor/layers/modalities.py index acaacbf99..84f9adbe7 100644 --- a/tensor2tensor/layers/modalities.py +++ b/tensor2tensor/layers/modalities.py @@ -406,10 +406,11 @@ def top(self, body_output, _): # Assume input is a square with self._body_input_depth channels. if self._is_2d: length_float = tf.to_float(tf.shape(x)[1]) + length_float *= tf.to_float(tf.shape(x)[2]) spatial_dim_float = tf.sqrt(length_float) spatial_dim = tf.to_int32(spatial_dim_float) - x = tf.reshape(x, - [-1, spatial_dim, spatial_dim, self._body_input_depth]) + x_depth = int(x.get_shape()[3]) + x = tf.reshape(x, [-1, spatial_dim, spatial_dim, x_depth]) x = common_layers.conv_block_downsample(x, self._kernel, self._strides, self._padding) x = tf.nn.relu(x) diff --git a/tensor2tensor/models/cycle_gan.py b/tensor2tensor/models/cycle_gan.py index 5fcf96266..c17becbbe 100644 --- a/tensor2tensor/models/cycle_gan.py +++ b/tensor2tensor/models/cycle_gan.py @@ -39,7 +39,7 @@ def discriminator(x, compress, hparams, name, reuse=None): with tf.variable_scope(name, reuse=reuse): x = tf.stop_gradient(2 * x) - x # Reverse gradient. if compress: - x = transformer_vae.compress(x, hparams, "compress") + x = transformer_vae.compress(x, None, hparams, "compress") else: x = transformer_vae.residual_conv(x, 1, hparams, "compress_rc") y = tf.reduce_mean(x, axis=1) @@ -144,12 +144,12 @@ def cycle_vae_gan_internal(inputs, targets, _, hparams): # Input-input part. inp1_back, kl_loss1, inp1_mu, inp1_log_sigma = transformer_vae.vae_compress( - inputs1, hparams, "inp2hyp", "hyp2inp") + inputs1, None, hparams, "inp2hyp", "hyp2inp") inp1_hyp = tf.concat([inp1_mu, inp1_log_sigma], axis=3) # Target-target part. tgt2_back, kl_loss2, tgt2_mu, tgt2_log_sigma = transformer_vae.vae_compress( - targets2, hparams, "tgt2hyp", "hyp2tgt") + targets2, None, hparams, "tgt2hyp", "hyp2tgt") tgt2_hyp = tf.concat([tgt2_mu, tgt2_log_sigma], axis=3) # Reconstruction losses. @@ -165,7 +165,7 @@ def cycle_vae_gan_internal(inputs, targets, _, hparams): # Reconstruct targets from inputs. tgt, _, _, _ = transformer_vae.vae_compress( - inputs, hparams, "inp2hyp", "hyp2tgt", reuse=True) + inputs, None, hparams, "inp2hyp", "hyp2tgt", reuse=True) tgt = tf.layers.dense(tgt, hparams.vocab_size, name="softmax", reuse=True) # We use the reconstruction only for tracking progress, no gradients here! tgt = tf.stop_gradient(tf.expand_dims(tgt, axis=2)) @@ -173,8 +173,8 @@ def cycle_vae_gan_internal(inputs, targets, _, hparams): kl_rev_decay = common_layers.inverse_exp_decay(hparams.kl_warmup_steps) losses = {"input_input": hparams.cycle_loss_multiplier * inp1_loss, "target_target": hparams.cycle_loss_multiplier * tgt2_loss, - "input_kl": kl_loss1 * kl_rev_decay, - "target_kl": kl_loss2 * kl_rev_decay, + "input_kl": kl_loss1 * kl_rev_decay * 15.0, + "target_kl": kl_loss2 * kl_rev_decay * 15.0, "discriminator": dloss} return tgt, losses @@ -196,7 +196,7 @@ def cycle_gan_small(): hparams.input_modalities = "inputs:symbol:identity" hparams.target_modality = "symbol:identity" hparams.weight_decay = 3.0 - hparams.learning_rate = 0.005 + hparams.learning_rate = 0.05 hparams.kl_warmup_steps = 5000 hparams.learning_rate_warmup_steps = 3000 hparams.add_hparam("vocab_size", 32) # Vocabulary size, need to set here. diff --git a/tensor2tensor/models/shake_shake.py b/tensor2tensor/models/shake_shake.py index aa91654a3..a7b379e11 100644 --- a/tensor2tensor/models/shake_shake.py +++ b/tensor2tensor/models/shake_shake.py @@ -100,8 +100,6 @@ class ShakeShake(t2t_model.T2TModel): def model_fn_body(self, features): hparams = self._hparams - print(hparams.learning_rate) - inputs = features["inputs"] assert (hparams.num_hidden_layers - 2) % 6 == 0 blocks_per_stage = (hparams.num_hidden_layers - 2) // 6 diff --git a/tensor2tensor/models/transformer.py b/tensor2tensor/models/transformer.py index 06f49b231..0eed2dbdb 100644 --- a/tensor2tensor/models/transformer.py +++ b/tensor2tensor/models/transformer.py @@ -244,7 +244,6 @@ def transformer_decoder(decoder_input, hparams.hidden_size, hparams.num_heads, hparams.attention_dropout) x = common_layers.layer_postprocess(x, y, hparams) if encoder_output is not None: - assert encoder_decoder_attention_bias is not None with tf.variable_scope("encdec_attention"): y = common_attention.multihead_attention( common_layers.layer_preprocess( diff --git a/tensor2tensor/models/transformer_vae.py b/tensor2tensor/models/transformer_vae.py index 74f1e4c8f..ffd791a04 100644 --- a/tensor2tensor/models/transformer_vae.py +++ b/tensor2tensor/models/transformer_vae.py @@ -23,6 +23,7 @@ from six.moves import xrange # pylint: disable=redefined-builtin +from tensor2tensor.layers import common_attention from tensor2tensor.layers import common_layers from tensor2tensor.models import transformer from tensor2tensor.utils import registry @@ -49,13 +50,43 @@ def residual_conv(x, repeat, hparams, name, reuse=None): return x -def decompress_step(source, hparams, first_relu, name): +def attend(x, source, hparams, name): + with tf.variable_scope(name): + x = tf.squeeze(x, axis=2) + if len(source.get_shape()) > 3: + source = tf.squeeze(source, axis=2) + source = common_attention.add_timing_signal_1d(source) + y = common_attention.multihead_attention( + common_layers.layer_preprocess(x, hparams), source, None, + hparams.attention_key_channels or hparams.hidden_size, + hparams.attention_value_channels or hparams.hidden_size, + hparams.hidden_size, hparams.num_heads, + hparams.attention_dropout) + res = common_layers.layer_postprocess(x, y, hparams) + return tf.expand_dims(res, axis=2) + + +def interleave(x, y, axis=1): + x = tf.expand_dims(x, axis=axis+1) + y = tf.expand_dims(y, axis=axis+1) + return tf.concat([x, y], axis=axis+1) + + +def decompress_step(source, c, hparams, first_relu, name): """Decompression function.""" with tf.variable_scope(name): shape = tf.shape(source) - thicker = common_layers.conv_block( - source, hparams.hidden_size * 2, [((1, 1), (1, 1))], - first_relu=first_relu, name="decompress_conv") + if c is not None: + source = attend(source, c, hparams, "decompress_attend") + first = common_layers.conv_block( + source, + hparams.hidden_size, [((1, 1), (3, 1)), ((1, 1), (3, 1))], + first_relu=first_relu, padding="SAME", name="decompress_conv1") + second = common_layers.conv_block( + tf.concat([source, first], axis=3), + hparams.hidden_size, [((1, 1), (3, 1)), ((1, 1), (3, 1))], + first_relu=first_relu, padding="SAME", name="decompress_conv2") + thicker = interleave(first, second) return tf.reshape(thicker, [shape[0], shape[1] * 2, 1, hparams.hidden_size]) @@ -71,12 +102,14 @@ def vae(x, hparams, name): return z, tf.reduce_mean(kl), mu, log_sigma -def compress(inputs, hparams, name): +def compress(x, c, hparams, name): """Compress.""" with tf.variable_scope(name): # Run compression by strided convs. - cur = inputs + cur = x for i in xrange(hparams.num_compress_steps): + if c is not None: + cur = attend(cur, c, hparams, "compress_attend_%d" % i) cur = residual_conv(cur, 1, hparams, "compress_rc_%d" % i) cur = common_layers.conv_block( cur, hparams.hidden_size, [((1, 1), (2, 1))], @@ -84,10 +117,10 @@ def compress(inputs, hparams, name): return cur -def vae_compress(inputs, hparams, compress_name, decompress_name, reuse=None): +def vae_compress(x, c, hparams, compress_name, decompress_name, reuse=None): """Compress, then VAE.""" with tf.variable_scope(compress_name, reuse=reuse): - cur = compress(inputs, hparams, "compress") + cur = compress(x, c, hparams, "compress") # Convolve and ReLu to get state. cur = common_layers.conv_block( cur, hparams.hidden_size, [((1, 1), (1, 1))], name="mid_conv") @@ -100,7 +133,7 @@ def vae_compress(inputs, hparams, compress_name, decompress_name, reuse=None): for i in xrange(hparams.num_compress_steps): j = hparams.num_compress_steps - i - 1 z = residual_conv(z, 1, hparams, "decompress_rc_%d" % j) - z = decompress_step(z, hparams, i > 0, "decompress__step_%d" % j) + z = decompress_step(z, c, hparams, i > 0, "decompress__step_%d" % j) return z, kl_loss, mu, log_sigma @@ -124,6 +157,13 @@ def dropmask(targets, targets_dropout_max, is_training): return targets * keep_mask +def ffn(x, hparams, name): + with tf.variable_scope(name): + y = transformer.transformer_ffn_layer( + common_layers.layer_preprocess(x, hparams), hparams) + return common_layers.layer_postprocess(x, y, hparams) + + def vae_transformer_internal(inputs, targets, target_space, hparams): """VAE Transformer, main step used for training.""" with tf.variable_scope("vae_transformer"): @@ -140,36 +180,40 @@ def vae_transformer_internal(inputs, targets, target_space, hparams): inputs = encode(inputs, target_space, hparams, "input_enc") # Dropout targets or swap for zeros 5% of the time. + targets_nodrop = targets max_prestep = hparams.kl_warmup_steps prob_targets = 0.95 if is_training else 1.0 targets_dropout_max = common_layers.inverse_lin_decay(max_prestep) - 0.01 targets = dropmask(targets, targets_dropout_max * 0.7, is_training) targets = tf.cond(tf.less(tf.random_uniform([]), prob_targets), lambda: targets, lambda: tf.zeros_like(targets)) - - # Join targets with inputs, run encoder. - # to_encode = common_layers.conv_block( - # tf.expand_dims(tf.concat([targets, inputs], axis=2), axis=2), - # hparams.hidden_size, [((1, 1), (1, 1))], - # first_relu=False, name="join_targets") - # to_compress = encode(tf.squeeze(to_encode, axis=2), - # target_space, hparams, "enc") + targets = targets_nodrop # Compress and vae. - z, kl_loss, _, _ = vae_compress(tf.expand_dims(targets, axis=2), hparams, - "vae_compress", "vae_decompress") + z = tf.get_variable("z", [hparams.hidden_size]) + z = tf.reshape(z, [1, 1, 1, -1]) + z = tf.tile(z, [tf.shape(inputs)[0], 1, 1, 1]) + + z = attend(z, inputs, hparams, "z_attendsi") + z = ffn(z, hparams, "zff2") + z = attend(z, targets, hparams, "z_attendst2") + z = ffn(z, hparams, "zff3") + z, kl_loss, _, _ = vae(z, hparams, name="vae") + z = tf.layers.dense(z, hparams.hidden_size, name="z_to_dense") + + # z, kl_loss, _, _ = vae_compress( + # tf.expand_dims(targets, axis=2), tf.expand_dims(inputs, axis=2), + # hparams, "vae_compress", "vae_decompress") - # Join z with inputs, run decoder. - to_decode = common_layers.conv_block( - tf.concat([z, tf.expand_dims(inputs, axis=2)], axis=3), - hparams.hidden_size, [((1, 1), (1, 1))], name="join_z") - ret = encode(tf.squeeze(to_decode, axis=2), target_space, hparams, "dec") - # to_decode = residual_conv(to_decode, 2, hparams, "dec_conv") - # ret = tf.squeeze(to_decode, axis=2) + decoder_in = tf.squeeze(z, axis=2) + tf.zeros_like(targets) + (decoder_input, decoder_self_attention_bias) = ( + transformer.transformer_prepare_decoder(decoder_in, hparams)) + ret = transformer.transformer_decoder( + decoder_input, inputs, decoder_self_attention_bias, None, hparams) - # Randomize decoder inputs.. - kl_loss *= common_layers.inverse_exp_decay(max_prestep) * 10.0 - return tf.expand_dims(ret, axis=2), kl_loss + kl_loss *= common_layers.inverse_exp_decay(int(max_prestep * 1.5)) * 5.0 + losses = {"kl": kl_loss} + return tf.expand_dims(ret, axis=2), losses @registry.register_model @@ -203,13 +247,15 @@ def infer(self, features=None, decode_length=50, beam_size=1, top_beams=1, sharded_samples = self._data_parallelism(tf.argmax, sharded_logits, 4) samples = tf.concat(sharded_samples, 0) - # 2nd step. - with tf.variable_scope(tf.get_variable_scope(), reuse=True): - features["targets"] = samples - sharded_logits, _ = self.model_fn( - features, False, last_position_only=last_position_only) - sharded_samples = self._data_parallelism(tf.argmax, sharded_logits, 4) - samples = tf.concat(sharded_samples, 0) + # More steps. + how_many_more_steps = 20 + for _ in xrange(how_many_more_steps): + with tf.variable_scope(tf.get_variable_scope(), reuse=True): + features["targets"] = samples + sharded_logits, _ = self.model_fn( + features, False, last_position_only=last_position_only) + sharded_samples = self._data_parallelism(tf.argmax, sharded_logits, 4) + samples = tf.concat(sharded_samples, 0) if inputs_old is not None: # Restore to not confuse Estimator. features["inputs"] = inputs_old @@ -221,9 +267,10 @@ def transformer_vae_small(): """Set of hyperparameters.""" hparams = transformer.transformer_small() hparams.batch_size = 2048 + hparams.learning_rate_warmup_steps = 16000 hparams.add_hparam("z_size", 128) hparams.add_hparam("num_compress_steps", 4) - hparams.add_hparam("kl_warmup_steps", 50000) + hparams.add_hparam("kl_warmup_steps", 60000) return hparams @@ -233,9 +280,9 @@ def transformer_vae_base(): hparams = transformer_vae_small() hparams.hidden_size = 512 hparams.filter_size = 2048 - hparams.attention_dropout = 0.1 - hparams.relu_dropout = 0.1 - hparams.dropout = 0.1 - hparams.num_hidden_layers = 4 + hparams.attention_dropout = 0.0 + hparams.relu_dropout = 0.0 + hparams.dropout = 0.0 + hparams.num_hidden_layers = 3 hparams.z_size = 256 return hparams From 8abc5d29b4b22a93c4aaa9ea17aa3b3302d1da86 Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Fri, 11 Aug 2017 16:16:35 -0700 Subject: [PATCH 0261/4095] v1.1.8 PiperOrigin-RevId: 165038950 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 4ada714b6..ff1503990 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ setup( name='tensor2tensor', - version='1.1.7', + version='1.1.8', description='Tensor2Tensor', author='Google Inc.', author_email='no-reply@google.com', From 8043cf18866148d008397691169002e3d267dffa Mon Sep 17 00:00:00 2001 From: Niki Parmar <nikip@google.com> Date: Mon, 14 Aug 2017 11:31:53 -0700 Subject: [PATCH 0262/4095] Add top_dimensionality to IdentityModalityNoPad PiperOrigin-RevId: 165205518 --- tensor2tensor/layers/modalities.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tensor2tensor/layers/modalities.py b/tensor2tensor/layers/modalities.py index 84f9adbe7..01728ba24 100644 --- a/tensor2tensor/layers/modalities.py +++ b/tensor2tensor/layers/modalities.py @@ -510,6 +510,10 @@ def loss(self, top_out, targets, weights_fn=common_layers.weights_all): class IdentityModalityNoPad(modality.Modality): """Does nothing except making sure that there is no padding in cross-ent.""" + @property + def top_dimensionality(self): + return 256 + @property def targets_dimensionality(self): return self._vocab_size From 35c54355a187e129be01d979c8ca7c040b0f762e Mon Sep 17 00:00:00 2001 From: Noam Shazeer <noam@google.com> Date: Mon, 14 Aug 2017 13:39:55 -0700 Subject: [PATCH 0263/4095] Clean up and fix bugs in "prepend_inputs_to_targets" mode. Add an additional option for full self-attention on the inputs portion. PiperOrigin-RevId: 165222328 --- tensor2tensor/data_generators/problem.py | 2 +- tensor2tensor/layers/common_attention.py | 33 ++++++++++++++++++++++++ tensor2tensor/layers/common_hparams.py | 23 +++++++++++++---- tensor2tensor/layers/common_layers.py | 7 ++--- tensor2tensor/models/attention_lm.py | 31 +++++++++++++++++++--- tensor2tensor/models/attention_lm_moe.py | 26 +++++++++++++++++-- tensor2tensor/utils/metrics.py | 7 +++-- 7 files changed, 112 insertions(+), 17 deletions(-) diff --git a/tensor2tensor/data_generators/problem.py b/tensor2tensor/data_generators/problem.py index 7a84aac93..ebec3f8ae 100644 --- a/tensor2tensor/data_generators/problem.py +++ b/tensor2tensor/data_generators/problem.py @@ -98,7 +98,7 @@ def preprocess_examples_common(examples, hparams): examples["inputs"] = examples["inputs"][:hparams.max_input_seq_length] if hparams.max_target_seq_length > 0: examples["targets"] = examples["targets"][:hparams.max_target_seq_length] - if hparams.prepend_inputs_to_targets: + if hparams.prepend_mode != "none": examples["targets"] = tf.concat( [examples["inputs"], [0], examples["targets"]], 0) return examples diff --git a/tensor2tensor/layers/common_attention.py b/tensor2tensor/layers/common_attention.py index 4f1273163..6b54633f7 100644 --- a/tensor2tensor/layers/common_attention.py +++ b/tensor2tensor/layers/common_attention.py @@ -206,6 +206,39 @@ def attention_bias_ignore_padding(memory_padding): return tf.expand_dims(tf.expand_dims(ret, axis=1), axis=1) +def attention_bias_prepend_inputs_full_attention(padding): + """Create a bias tensor for prepend_mode="prepend_inputs_full_attention". + + See prepend_inputs in common_hparams.py. + + Produces a bias tensor to be used in self-attention. + + This bias tensor allows for full connectivity in the "inputs" part of + the sequence and masked connectivity in the targets part. + + Args: + padding: a float `Tensor` with shape [batch, length] with + ones in positions corresponding to padding. In each row, a single + padding position separates the input part from the target part. + + Returns: + a `Tensor` with shape [batch, 1, length, length]. + """ + # Everything past the first padding position is part of the target. + # This Tensor has zeros for the source portion and separator, + # and ones for the target portion. + in_target = tf.cumsum(padding, axis=1, exclusive=True) + # The position within the target, or 0 if part of the source. + target_pos = tf.cumsum(in_target, axis=1) + # A position with a lesser target_pos cannot see a position with greater + # target_pos. + illegal_connections = tf.greater(tf.expand_dims(target_pos, 1), + tf.expand_dims(target_pos, 2)) + bias = tf.to_float(illegal_connections) * -1e9 + bias = tf.expand_dims(bias, 1) + return bias + + def attention_bias_proximal(length): """Bias for self-attention to encourage attention to close positions. diff --git a/tensor2tensor/layers/common_hparams.py b/tensor2tensor/layers/common_hparams.py index 0ed62685f..6bb4d3e9d 100644 --- a/tensor2tensor/layers/common_hparams.py +++ b/tensor2tensor/layers/common_hparams.py @@ -124,11 +124,24 @@ def basic_params1(): # You can change this behavior by overridding preprocess_examples() method # in your problem class. max_target_seq_length=0, - # Treat a seq-to-seq problem as a language model by prepending the - # inputs to the targets. During training, the loss is on both the - # inputs and the targets. During eval, metrics are computed only on the - # target portion. - prepend_inputs_to_targets=int(False), + # This flag allows us to optionally treat a seq-to-seq problem + # as a language model. Legal values are: + # + # "none" - Do not prepend the inputs to the targets. + # "prepend_inputs_masked_attention" + # replace "targets" in preprocessing with + # tf.concat([inputs, [0], targets], axis=1) + # i.e. we prepend the inputs to the targets with a single + # padding token in between. Use masked self-attention on the + # entire resulting sequence. During training, we compute losses on + # the combined sequence. During eval, we compute the metrics + # on only the targets portion. + # "prepend_inputs_full_attention" + # similar to the previous option except that each + # position in the inputs portion can see the + # entire inputs portion. This removes the challenge of + # autoregressively predicting the inputs portion. + prepend_mode="none", ) diff --git a/tensor2tensor/layers/common_layers.py b/tensor2tensor/layers/common_layers.py index e9b195195..8621ddcb1 100644 --- a/tensor2tensor/layers/common_layers.py +++ b/tensor2tensor/layers/common_layers.py @@ -1361,10 +1361,11 @@ def weights_nonzero(labels): return tf.to_float(tf.not_equal(labels, 0)) -def weights_second_part(labels): - """Weights function for 'prepend_inputs_to_targets'. +def weights_prepend_inputs_to_targets(labels): + """Assign weight 1.0 to only the "targets" portion of the labels. Weight 1.0 is assigned to all nonzero labels past the first zero. + See prepend_mode in common_hparams.py Args: labels: A Tensor of int32s. @@ -1372,7 +1373,7 @@ def weights_second_part(labels): Returns: A Tensor of floats. """ - past_first_zero = tf.cumsum(tf.to_float(tf.equal(labels, 0))) + past_first_zero = tf.cumsum(tf.to_float(tf.equal(labels, 0)), axis=1) nonzero = tf.to_float(labels) return tf.to_float(tf.not_equal(past_first_zero * nonzero, 0)) diff --git a/tensor2tensor/models/attention_lm.py b/tensor2tensor/models/attention_lm.py index 19f1915e8..3302f45be 100644 --- a/tensor2tensor/models/attention_lm.py +++ b/tensor2tensor/models/attention_lm.py @@ -72,8 +72,13 @@ def attention_lm_prepare_decoder(targets, hparams): decoder_self_attention_bias: a Tensor, containing large negative values to implement masked attention and possibly baises for diagonal alignments """ - decoder_self_attention_bias = ( - common_attention.attention_bias_lower_triangle(tf.shape(targets)[1])) + if hparams.prepend_mode == "prepend_inputs_full_attention": + decoder_self_attention_bias = ( + common_attention.attention_bias_prepended( + common_attention.embedding_to_padding(targets))) + else: + decoder_self_attention_bias = ( + common_attention.attention_bias_lower_triangle(tf.shape(targets)[1])) decoder_input = common_layers.shift_left_3d(targets) if hparams.pos == "timing": decoder_input = common_attention.add_timing_signal_1d(decoder_input) @@ -153,6 +158,7 @@ def attention_lm_base(): hparams.add_hparam("attention_dropout", 0.0) hparams.add_hparam("relu_dropout", 0.0) hparams.add_hparam("pos", "timing") # timing, none + hparams.add_hparam("encoder_full_attention", int(False)) return hparams @@ -181,9 +187,26 @@ def attention_lm_translation(): hparams = attention_lm_base() hparams.layer_preprocess_sequence = "n" hparams.layer_postprocess_sequence = "da" - hparams.learning_rate = 0.1 - hparams.prepend_inputs_to_targets = int(True) + hparams.learning_rate = 0.4 + hparams.prepend_mode = "prepend_inputs_masked_attention" hparams.max_length = 512 hparams.label_smoothing = 0.1 hparams.shared_embedding_and_softmax_weights = int(True) return hparams + + +@registry.register_hparams +def attention_lm_translation_l12(): + """Version to use for seq2seq.""" + hparams = attention_lm_translation() + hparams.batch_size = 4096 + hparams.num_hidden_layers = 12 + return hparams + + +@registry.register_hparams +def attention_lm_translation_full_attention(): + """Version to use for seq2seq.""" + hparams = attention_lm_translation() + hparams.prepend_mode = "prepend_inputs_full_attention" + return hparams diff --git a/tensor2tensor/models/attention_lm_moe.py b/tensor2tensor/models/attention_lm_moe.py index 268e93f7b..c24f9179b 100644 --- a/tensor2tensor/models/attention_lm_moe.py +++ b/tensor2tensor/models/attention_lm_moe.py @@ -118,8 +118,13 @@ def attention_lm_moe_prepare_decoder(targets, hparams): decoder_self_attention_bias: a Tensor, containing large negative values to implement masked attention and possibly baises for diagonal alignments """ - decoder_self_attention_bias = ( - common_attention.attention_bias_lower_triangle(tf.shape(targets)[1])) + if hparams.prepend_mode == "prepend_inputs_full_attention": + decoder_self_attention_bias = ( + common_attention.attention_bias_prepended( + common_attention.embedding_to_padding(targets))) + else: + decoder_self_attention_bias = ( + common_attention.attention_bias_lower_triangle(tf.shape(targets)[1])) decoder_input = common_layers.shift_left_3d(targets) if hparams.pos == "timing": decoder_input = common_attention.add_timing_signal_1d(decoder_input) @@ -249,3 +254,20 @@ def attention_lm_moe_large(): hparams.moe_num_experts = 128 hparams.layer_prepostprocess_dropout = 0.2 return hparams + + +@registry.register_hparams +def attention_lm_moe_translation(): + """Version to use for seq2seq.""" + hparams = attention_lm_moe_base() + hparams.layer_preprocess_sequence = "n" + hparams.layer_postprocess_sequence = "da" + hparams.learning_rate = 0.4 + hparams.prepend_mode = "prepend_inputs_masked_attention" + hparams.max_length = 512 + hparams.label_smoothing = 0.1 + hparams.layer_prepostprocess_dropout = 0.2 + hparams.num_hidden_layers = 6 + hparams.moe_layers = "0,1,2,3,4,5" + hparams.shared_embedding_and_softmax_weights = int(True) + return hparams diff --git a/tensor2tensor/utils/metrics.py b/tensor2tensor/utils/metrics.py index fd82adc30..e5cb88ddf 100644 --- a/tensor2tensor/utils/metrics.py +++ b/tensor2tensor/utils/metrics.py @@ -196,9 +196,12 @@ def problem_metric_fn(predictions, labels, weights): class_output = "image" in problem_name and "coco" not in problem_name real_output = "gene_expression" in problem_name - if model_hparams.prepend_inputs_to_targets: + if model_hparams.prepend_mode != "none": + assert ( + model_hparams.prepend_mode == "prepend_inputs_masked_attention" or + model_hparams.prepend_mode == "prepend_inputs_full_attention") assert not class_output - weights_fn = common_layers.weights_second_part + weights_fn = common_layers.weights_prepend_inputs_to_targets elif class_output or real_output: weights_fn = common_layers.weights_all else: From ccd1fb8145d7a9b6138a1f0019ce16fa71495936 Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Mon, 14 Aug 2017 15:11:55 -0700 Subject: [PATCH 0264/4095] Add support for rev_block side inputs and add RevTransformer model PiperOrigin-RevId: 165235910 --- tensor2tensor/layers/rev_block.py | 254 +++++++++++++------ tensor2tensor/layers/rev_block_test.py | 62 ++++- tensor2tensor/models/models.py | 1 + tensor2tensor/models/rev_transformer.py | 224 ++++++++++++++++ tensor2tensor/models/rev_transformer_test.py | 77 ++++++ 5 files changed, 542 insertions(+), 76 deletions(-) create mode 100644 tensor2tensor/models/rev_transformer.py create mode 100644 tensor2tensor/models/rev_transformer_test.py diff --git a/tensor2tensor/layers/rev_block.py b/tensor2tensor/layers/rev_block.py index d6fb95cf3..ab424c0e2 100644 --- a/tensor2tensor/layers/rev_block.py +++ b/tensor2tensor/layers/rev_block.py @@ -34,122 +34,197 @@ LAYER_RE = re.compile(".*revlayer_([0-9]*)/([fg])/.*") -def _rev_layer_forward(xs, f, g): +def _acc_grads(*lists_of_grads): + """Accumulates lists of gradients.""" + acc_grads = [] + for grads in zip(*lists_of_grads): + grads = [g for g in grads if g is not None] + if grads: + acc_grads.append(tf.add_n(grads)) + else: + acc_grads.append(None) + return acc_grads + + +def _rev_layer_forward(xs, f, g, f_side_input, g_side_input, + gate_outputs=False): """Forward for 1 reversible layer.""" x1, x2 = xs with tf.variable_scope("f"): - y1 = x1 + f(x2) + y1 = x1 + (f(x2, f_side_input) if f_side_input else f(x2)) with tf.variable_scope("g"): - y2 = x2 + g(y1) - return tf.tuple([y1, y2]) + y2 = x2 + (g(y1, g_side_input) if g_side_input else g(y1)) + if gate_outputs: + return tf.tuple([y1, y2]) + else: + return (y1, y2) -def _rev_layer_backward(ys, grad_ys, f, g, f_vars, g_vars): +def _rev_layer_backward(ys, grad_ys, f, g, f_vars, f_side_input, g_vars, + g_side_input): """Backprop for 1 layer.""" y1, y2 = ys grad_y1, grad_y2 = grad_ys # Reconstruct intermediates and inputs (x1, x2) - # stop_gradients required on y1 and x2 to prevent infinite recursion into this + # stop_gradients required on fn inputs to prevent infinite recursion into this # grad function on the calls to tf.gradients. y1_stop = tf.stop_gradient(y1) + g_side_input = [tf.stop_gradient(t) for t in g_side_input] with tf.variable_scope("g"): - gy1 = g(y1_stop) + gy1 = g(y1_stop, g_side_input) if g_side_input else g(y1_stop) x2 = y2 - gy1 x2_stop = tf.stop_gradient(x2) + f_side_input = [tf.stop_gradient(t) for t in f_side_input] with tf.variable_scope("f"): - fx2 = f(x2_stop) + fx2 = f(x2_stop, f_side_input) if f_side_input else f(x2_stop) x1 = y1 - fx2 # Compute gradients wrt to inputs # dL/dy2 * dG(y1)/y1 - grad_gy1_y2 = tf.gradients(gy1, y1_stop, grad_y2, gate_gradients=True)[0] + grad_gy1_y2 = tf.gradients(gy1, y1_stop, grad_y2)[0] grad_x1 = grad_y1 + grad_gy1_y2 - grad_x2 = ( - tf.gradients(fx2, x2_stop, grad_y1, gate_gradients=True)[0] + grad_y2 + - tf.gradients(fx2, x2_stop, grad_gy1_y2, gate_gradients=True)[0]) + grad_x2 = (tf.gradients(fx2, x2_stop, grad_y1)[0] + grad_y2 + tf.gradients( + fx2, x2_stop, grad_gy1_y2)[0]) + + # Compute gradients wrt to vars and side inputs in f and g + grads1 = tf.gradients(gy1, g_vars + g_side_input, grad_y2) + grad_g_vars, grad_g_side = grads1[:len(g_vars)], grads1[len(g_vars):] + grads2 = tf.gradients(fx2, f_vars + f_side_input, grad_y1) + grad_f_y1, grad_f_side1 = grads2[:len(f_vars)], grads2[len(f_vars):] + grads3 = tf.gradients(fx2, f_vars + f_side_input, grad_gy1_y2) + grad_f_y2, grad_f_side2 = grads3[:len(f_vars)], grads3[len(f_vars):] + grad_f_vars = _acc_grads(grad_f_y1, grad_f_y2) - # Compute gradients wrt to vars in f and g - grad_g_vars = tf.gradients(gy1, g_vars, grad_y2, gate_gradients=True) - grad_f_y1 = tf.gradients(fx2, f_vars, grad_y1, gate_gradients=True) - grad_f_y2 = tf.gradients(fx2, f_vars, grad_gy1_y2, gate_gradients=True) - grad_f_vars = [tf.add_n(grads) for grads in zip(grad_f_y1, grad_f_y2)] + grad_f_side = _acc_grads(grad_f_side1, grad_f_side2) # Put returns in a tuple to ensure a constant memory budget (i.e. don't want # the subsequent layer to start computing and consuming memory based on a # subset of these values). - outs = tf.tuple([x1, x2, grad_x1, grad_x2] + grad_f_vars + grad_g_vars) + outs = tf.tuple([x1, x2, grad_x1, grad_x2] + grad_f_vars + grad_g_vars + + grad_f_side + grad_g_side) x1, x2, grad_x1, grad_x2 = outs[:4] - grad_f_vars = outs[4:4 + len(grad_f_vars)] - grad_g_vars = outs[4 + len(grad_f_vars):] - - return (x1, x2), (grad_x1, grad_x2), grad_f_vars, grad_g_vars - - -def _rev_block_forward(x, f, g, num_layers=1, layer_scopes=None, name=None): + grad_f_vars_end = 4 + len(grad_f_vars) + grad_g_vars_end = grad_f_vars_end + len(grad_g_vars) + grad_f_side_end = grad_g_vars_end + len(grad_f_side) + + grad_f_vars = outs[4:grad_f_vars_end] + grad_g_vars = outs[grad_f_vars_end:grad_g_vars_end] + grad_f_side = outs[grad_g_vars_end:grad_f_side_end] + grad_g_side = outs[grad_f_side_end:] + + return ((x1, x2), (grad_x1, grad_x2), (grad_f_vars, grad_f_side), + (grad_g_vars, grad_g_side)) + + +def _rev_block_forward(x1, + x2, + f, + g, + num_layers=1, + f_side_input=None, + g_side_input=None, + layer_scopes=None, + gate_outputs=False, + name=None): """Forward for a series of reversible layers.""" - x1, x2 = tf.split(x, 2, axis=len(x.get_shape()) - 1) out = (x1, x2) with tf.variable_scope(name, default_name="revblock"): for i in xrange(num_layers): with tf.variable_scope("revlayer_%d" % i) as layer_vs: if layer_scopes is not None: layer_scopes.append(layer_vs) - out = _rev_layer_forward(out, f, g) + out = _rev_layer_forward( + out, f, g, f_side_input, g_side_input, gate_outputs=gate_outputs) y1, y2 = out - y = tf.concat([y1, y2], axis=-1) - return y + return y1, y2 -def rev_block(x, f, g, num_layers=1, is_training=True): +def rev_block(x1, + x2, + f, + g, + num_layers=1, + f_side_input=None, + g_side_input=None, + is_training=True): """A block of reversible residual layers. A reversible residual layer is defined as: ``` - x1, x2 = tf.split(x, 2, axis=-1) y1 = x1 + f(x2) y2 = x2 + g(y1) - y = tf.concat([y1, y2], axis=-1) ``` Args: - x: a float Tensor, input, will be split evenly across the last dim. + x1: a float Tensor. + x2: a float Tensor. f: a function, (Tensor) -> (Tensor). Should not change the shape of the - Tensor. May create variables. Should NOT close over any Tensor values. + Tensor. Expected to create variables. See f_side_input if there are side + inputs. g: a function, (Tensor) -> (Tensor). Should not change the shape of the - Tensor. May create variables. Should NOT close over any Tensor values. + Tensor. Expected to create variables. See g_side_input if there are side + inputs. num_layers: int, number of reversible residual layers. Each layer will apply f and g according to the equations above, with new variables in each layer. + f_side_input: list of Tensors, side input to f. If not None, signature of f + should be (Tensor, list<Tensor>) -> (Tensor). + g_side_input: list of Tensors, side input to g. If not None, signature of g + should be (Tensor, list<Tensor>) -> (Tensor). is_training: bool, whether to actually use the efficient backprop codepath. Returns: - y: a float Tensor, output. + y1, y2: tuple of float Tensors. """ + if f_side_input is None: + f_side_input = [] + if g_side_input is None: + g_side_input = [] + layer_scopes = [] - def rev_block_grad(op, grad_y): + def rev_block_grad(op, *grad_ys): """Custom gradient fn for a block of reversible residual layers.""" - y = op.outputs[0] - ys = tf.split(y, 2, axis=len(y.get_shape()) - 1) - grad_ys = tf.split(grad_y, 2, axis=len(y.get_shape()) - 1) + ys = (op.outputs[0], op.outputs[1]) - # Find all variables from f and from g - # Keep track of their positions in all_vars - all_vars = op.inputs[1:] + # The Defun will have as inputs the main inputs (x1, x2), the variables + # created inside f and g, and the side inputs to f and g. The order of the + # grads returned from this function must match the order of the inputs. + # The code here partitions the hoisted inputs into f variables, f side + # inputs, g variables, and g side inputs and keeps track of their positions + # in hoisted_inputs. + + hoisted_inputs = op.inputs[2:] f_vars = [[] for _ in range(num_layers)] g_vars = [[] for _ in range(num_layers)] f_vars_idxs = [[] for _ in range(num_layers)] g_vars_idxs = [[] for _ in range(num_layers)] - - for i, v in enumerate(all_vars): - ref = v.op.inputs[0] + f_side_idxs = [None] * len(f_side_input) + g_side_idxs = [None] * len(g_side_input) + + for t in f_side_input + g_side_input: + assert t in hoisted_inputs + + for i, t in enumerate(hoisted_inputs): + # Side inputs + if t in f_side_input: + f_side_idxs[f_side_input.index(t)] = i + continue + if t in g_side_input: + g_side_idxs[g_side_input.index(t)] = i + continue + + # Variables + ref = t.op.inputs[0] assert ref.dtype == dtypes.float32_ref - regex = LAYER_RE.match(v.name) + + # Use the name to identify the layer number and function (f or g) + regex = LAYER_RE.match(t.name) layer_no = int(regex.group(1)) fn_name = regex.group(2) if fn_name == "f": @@ -160,45 +235,80 @@ def rev_block_grad(op, grad_y): g_vars[layer_no].append(ref) g_vars_idxs[layer_no].append(i) - f_grads = [] - g_grads = [] + f_var_grads = [] + g_var_grads = [] + f_side_grads = [] + g_side_grads = [] - # Reverse state containers to go backward + # Reverse variable containers to go backward layer_scopes.reverse() f_vars.reverse() g_vars.reverse() for i in xrange(num_layers): with tf.variable_scope(layer_scopes[i], reuse=True): - ys, grad_ys, grad_f_vars, grad_g_vars = _rev_layer_backward( - ys, grad_ys, f, g, f_vars[i], g_vars[i]) - f_grads.append(grad_f_vars) - g_grads.append(grad_g_vars) - - # Gradients were collected in reverse layer order - f_grads.reverse() - g_grads.reverse() - - # Reorder the gradients so they match the original order of all_vars - var_grads = [None] * len(all_vars) - for idxs, grads in zip(f_vars_idxs, f_grads) + zip(g_vars_idxs, g_grads): + ys, grad_ys, f_ret, g_ret = (_rev_layer_backward( + ys, grad_ys, f, g, f_vars[i], f_side_input, g_vars[i], + g_side_input)) + + grad_f_vars, grad_f_side = f_ret + grad_g_vars, grad_g_side = g_ret + f_var_grads.append(grad_f_vars) + g_var_grads.append(grad_g_vars) + f_side_grads.append(grad_f_side) + g_side_grads.append(grad_g_side) + + # Accumulate layer gradients for f_side_input and g_side_input + acc_f_side_grads = _acc_grads(*f_side_grads) + acc_g_side_grads = _acc_grads(*g_side_grads) + + # Use the stored idxs to put gradients in the same order as hoisted_inputs. + hoisted_inputs_grads = [None] * len(hoisted_inputs) + + # Variable gradients were collected in reverse layer order. Reverse to match + # idxs. + f_var_grads.reverse() + g_var_grads.reverse() + for idxs, grads in zip(f_vars_idxs, f_var_grads) + zip( + g_vars_idxs, g_var_grads): for i, grad in zip(idxs, grads): - var_grads[i] = grad + hoisted_inputs_grads[i] = grad - grad_x = tf.concat(grad_ys, axis=-1) - all_grads = [grad_x] + var_grads - return all_grads + for i, grad in zip(f_side_idxs, acc_f_side_grads): + hoisted_inputs_grads[i] = grad + for i, grad in zip(g_side_idxs, acc_g_side_grads): + hoisted_inputs_grads[i] = grad + + grad_x1, grad_x2 = grad_ys + return [grad_x1, grad_x2] + hoisted_inputs_grads @function.Defun( + tf.float32, tf.float32, python_grad_func=rev_block_grad, - shape_func=lambda _: [x.get_shape()]) - def rev_block_defun(inp): - inp.set_shape(x.get_shape()) + shape_func=lambda _: [x1.get_shape(), x2.get_shape()]) + def rev_block_defun(inp1, inp2): + inp1.set_shape(x1.get_shape()) + inp2.set_shape(x2.get_shape()) return _rev_block_forward( - inp, f, g, num_layers=num_layers, layer_scopes=layer_scopes) + inp1, + inp2, + f, + g, + num_layers=num_layers, + f_side_input=f_side_input, + g_side_input=g_side_input, + layer_scopes=layer_scopes, + gate_outputs=True) if is_training: - return rev_block_defun(x) + return rev_block_defun(x1, x2) else: - return _rev_block_forward(x, f, g, num_layers=num_layers) + return _rev_block_forward( + x1, + x2, + f, + g, + num_layers=num_layers, + f_side_input=f_side_input, + g_side_input=g_side_input) diff --git a/tensor2tensor/layers/rev_block_test.py b/tensor2tensor/layers/rev_block_test.py index bc4bcc6a4..8dfa049af 100644 --- a/tensor2tensor/layers/rev_block_test.py +++ b/tensor2tensor/layers/rev_block_test.py @@ -42,8 +42,10 @@ def g(x): return tf.layers.dense(x, channels // 2, use_bias=True) x = tf.random_uniform([batch_size, channels], dtype=tf.float32) - y = rev_block.rev_block( - x, f, g, num_layers=num_layers, is_training=use_defun) + x1, x2 = tf.split(x, 2, axis=1) + y1, y2 = rev_block.rev_block( + x1, x2, f, g, num_layers=num_layers, is_training=use_defun) + y = tf.concat([y1, y2], axis=1) loss = tf.reduce_mean(y + 10.) grads = tf.gradients(loss, [x] + tf.global_variables()) with self.test_session() as sess: @@ -63,14 +65,19 @@ def g(x): return tf.layers.dense(x, channels // 2, use_bias=True) x = tf.random_uniform([batch_size, channels], dtype=tf.float32) + x1, x2 = tf.split(x, 2, axis=1) with tf.variable_scope("defun") as vs: - y_defun = rev_block.rev_block(x, f, g, num_layers=num_layers) + y1_defun, y2_defun = rev_block.rev_block( + x1, x2, f, g, num_layers=num_layers) + y_defun = tf.concat([y1_defun, y2_defun], axis=1) fg_vars = vs.trainable_variables() num_vars = len(tf.global_variables()) with tf.variable_scope(vs, reuse=True): - y = rev_block.rev_block(x, f, g, num_layers=num_layers, is_training=False) + y1, y2 = rev_block.rev_block( + x1, x2, f, g, num_layers=num_layers, is_training=False) + y = tf.concat([y1, y2], axis=1) # Ensure no new vars were created - full reuse assert len(tf.global_variables()) == num_vars @@ -87,6 +94,53 @@ def g(x): for g1, g2 in zip(gd_val, g_val): self.assertAllClose(g1, g2) + def testSideInput(self): + channels = 4 + num_layers = 3 + batch_size = 3 + tf.set_random_seed(1234) + + f_side_input = tf.random_uniform([batch_size, channels // 2]) + + def f(x, side_input): + return tf.layers.dense(x, channels // 2, use_bias=True) + side_input[0] + + def g(x): + return tf.layers.dense(x, channels // 2, use_bias=True) + + x = tf.random_uniform([batch_size, channels], dtype=tf.float32) + x1, x2 = tf.split(x, 2, axis=1) + with tf.variable_scope("defun") as vs: + y1, y2 = rev_block.rev_block( + x1, x2, f, g, num_layers=num_layers, f_side_input=[f_side_input]) + fg_vars = vs.trainable_variables() + + y = tf.concat([y1, y2], axis=1) + loss = tf.reduce_mean(y + 10.) + + with tf.variable_scope(vs, reuse=True): + y1, y2 = rev_block.rev_block( + x1, + x2, + f, + g, + num_layers=num_layers, + f_side_input=[f_side_input], + is_training=False) + y_p = tf.concat([y1, y2], axis=1) + loss_p = tf.reduce_mean(y_p + 10.) + + grads = tf.gradients(loss, [x, f_side_input] + fg_vars) + grads_p = tf.gradients(loss_p, [x, f_side_input] + fg_vars) + self.assertTrue(grads[1] is not None) # f_side_input has a gradient + + with self.test_session() as sess: + sess.run(tf.global_variables_initializer()) + y_val, y_p_val, g_val, g_p_val = sess.run([y, y_p, grads, grads_p]) + self.assertAllClose(y_val, y_p_val) + for g1, g2 in zip(g_val, g_p_val): + self.assertAllClose(g1, g2) + if __name__ == "__main__": tf.test.main() diff --git a/tensor2tensor/models/models.py b/tensor2tensor/models/models.py index d4514408d..af609e22c 100644 --- a/tensor2tensor/models/models.py +++ b/tensor2tensor/models/models.py @@ -33,6 +33,7 @@ from tensor2tensor.models import lstm from tensor2tensor.models import multimodel from tensor2tensor.models import neural_gpu +from tensor2tensor.models import rev_transformer from tensor2tensor.models import shake_shake from tensor2tensor.models import slicenet from tensor2tensor.models import transformer diff --git a/tensor2tensor/models/rev_transformer.py b/tensor2tensor/models/rev_transformer.py new file mode 100644 index 000000000..ce7dec261 --- /dev/null +++ b/tensor2tensor/models/rev_transformer.py @@ -0,0 +1,224 @@ +# coding=utf-8 +# Copyright 2017 The Tensor2Tensor Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Reversible Residual Transformer.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +# Dependency imports + +from tensor2tensor.layers import common_attention +from tensor2tensor.layers import common_layers +from tensor2tensor.layers import rev_block +from tensor2tensor.models import transformer +from tensor2tensor.utils import registry + +import tensorflow as tf + + +@registry.register_model +class RevTransformer(transformer.Transformer): + """Reversible Residual Transformer. + + Layers are reversible and are recomputed on the backward pass. + + y1 = x1 + f(x2) + y2 = x2 + g(y1) + + f: Attention + g: Feed-forward + """ + + def model_fn_body(self, features): + hparams = self._hparams + targets = features["targets"] + inputs = features["inputs"] + target_space = features["target_space_id"] + + inputs = common_layers.flatten4d3d(inputs) + targets = common_layers.flatten4d3d(targets) + + (encoder_input, encoder_self_attention_bias, + encoder_decoder_attention_bias) = (transformer.transformer_prepare_encoder( + inputs, target_space, hparams)) + (decoder_input, + decoder_self_attention_bias) = transformer.transformer_prepare_decoder( + targets, hparams) + + encoder_input = tf.nn.dropout(encoder_input, + 1.0 - hparams.layer_prepostprocess_dropout) + decoder_input = tf.nn.dropout(decoder_input, + 1.0 - hparams.layer_prepostprocess_dropout) + encoder_output = rev_transformer_encoder( + encoder_input, encoder_self_attention_bias, hparams) + + decoder_output = rev_transformer_decoder( + decoder_input, encoder_output, decoder_self_attention_bias, + encoder_decoder_attention_bias, hparams) + decoder_output = tf.expand_dims(decoder_output, 2) + + return decoder_output + + +def rev_transformer_encoder(encoder_input, + encoder_self_attention_bias, + hparams, + name="encoder"): + """A stack of transformer layers. + + Args: + encoder_input: a Tensor + encoder_self_attention_bias: bias Tensor for self-attention + (see common_attention.attention_bias()) + hparams: hyperparameters for model + name: a string + + Returns: + y: a Tensors + """ + + def f(x, side_input): + """f(x) for reversible layer, self-attention layer.""" + encoder_self_attention_bias = side_input[0] + + old_hid_size = hparams.hidden_size + hparams.hidden_size = old_hid_size // 2 + + with tf.variable_scope("self_attention"): + y = common_attention.multihead_attention( + common_layers.layer_preprocess( + x, hparams), None, encoder_self_attention_bias, + hparams.attention_key_channels or hparams.hidden_size, + hparams.attention_value_channels or hparams.hidden_size, + hparams.hidden_size, hparams.num_heads, hparams.attention_dropout) + y = common_layers.layer_postprocess(x, y, hparams) + hparams.hidden_size = old_hid_size + return y + + def g(x): + """g(x) for reversible layer, feed-forward layer.""" + old_hid_size = hparams.hidden_size + hparams.hidden_size = old_hid_size // 2 + + with tf.variable_scope("ffn"): + y = transformer.transformer_ffn_layer( + common_layers.layer_preprocess(x, hparams), hparams) + y = common_layers.layer_postprocess(x, y, hparams) + hparams.hidden_size = old_hid_size + return y + + x1, x2 = tf.split(encoder_input, 2, axis=-1) + + with tf.variable_scope(name): + y1, y2 = rev_block.rev_block( + x1, + x2, + f, + g, + num_layers=hparams.num_hidden_layers, + f_side_input=[encoder_self_attention_bias], + is_training=hparams.mode == tf.contrib.learn.ModeKeys.TRAIN) + y = tf.concat([y1, y2], axis=-1) + + return common_layers.layer_preprocess(y, hparams) + + +def rev_transformer_decoder(decoder_input, + encoder_output, + decoder_self_attention_bias, + encoder_decoder_attention_bias, + hparams, + name="decoder"): + """A stack of transformer layers. + + Args: + decoder_input: a Tensor + encoder_output: a Tensor + decoder_self_attention_bias: bias Tensor for self-attention + (see common_attention.attention_bias()) + encoder_decoder_attention_bias: bias Tensor for encoder-decoder attention + (see common_attention.attention_bias()) + hparams: hyperparameters for model + name: a string + + Returns: + y: a Tensors + """ + + def f(x, side_input): + """f(x) for reversible layer, self-attention and enc-dec attention.""" + decoder_self_attention_bias = side_input[0] + encoder_decoder_attention_bias = side_input[1] + encoder_output = side_input[2] + + old_hid_size = hparams.hidden_size + hparams.hidden_size = old_hid_size // 2 + + with tf.variable_scope("self_attention"): + y = common_attention.multihead_attention( + common_layers.layer_preprocess( + x, hparams), None, decoder_self_attention_bias, + hparams.attention_key_channels or hparams.hidden_size, + hparams.attention_value_channels or hparams.hidden_size, + hparams.hidden_size, hparams.num_heads, hparams.attention_dropout) + y = common_layers.layer_postprocess(x, y, hparams) + if encoder_output is not None: + with tf.variable_scope("encdec_attention"): + y = common_attention.multihead_attention( + common_layers.layer_preprocess( + x, hparams), encoder_output, encoder_decoder_attention_bias, + hparams.attention_key_channels or hparams.hidden_size, + hparams.attention_value_channels or hparams.hidden_size, + hparams.hidden_size, hparams.num_heads, hparams.attention_dropout) + y = common_layers.layer_postprocess(x, y, hparams) + hparams.hidden_size = old_hid_size + return y + + def g(x): + """g(x) for reversible layer, feed-forward layer.""" + old_hid_size = hparams.hidden_size + hparams.hidden_size = old_hid_size // 2 + with tf.variable_scope("ffn"): + y = transformer.transformer_ffn_layer( + common_layers.layer_preprocess(x, hparams), hparams) + y = common_layers.layer_postprocess(x, y, hparams) + hparams.hidden_size = old_hid_size + return y + + x1, x2 = tf.split(decoder_input, 2, axis=-1) + + with tf.variable_scope(name): + y1, y2 = rev_block.rev_block( + x1, + x2, + f, + g, + num_layers=hparams.num_hidden_layers, + f_side_input=[ + decoder_self_attention_bias, encoder_decoder_attention_bias, + encoder_output + ], + is_training=hparams.mode == tf.contrib.learn.ModeKeys.TRAIN) + y = tf.concat([y1, y2], axis=-1) + return common_layers.layer_preprocess(y, hparams) + + +@registry.register_hparams +def rev_transformer_base(): + """Base hparams for RevTransformer.""" + hparams = transformer.transformer_base() + return hparams diff --git a/tensor2tensor/models/rev_transformer_test.py b/tensor2tensor/models/rev_transformer_test.py new file mode 100644 index 000000000..da9e15f72 --- /dev/null +++ b/tensor2tensor/models/rev_transformer_test.py @@ -0,0 +1,77 @@ +# coding=utf-8 +# Copyright 2017 The Tensor2Tensor Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for RevTransformer.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +# Dependency imports + +import numpy as np + +from tensor2tensor.data_generators import problem_hparams +from tensor2tensor.models import rev_transformer + +import tensorflow as tf + + +def rev_transformer_test(): + hparams = rev_transformer.rev_transformer_base() + hparams.num_hidden_layers = 2 + hparams.hidden_size = 128 + hparams.filter_size = 512 + hparams.num_heads = 2 + return hparams + + +class RevTransformerTest(tf.test.TestCase): + + def testTransformer(self): + batch_size = 3 + input_length = 5 + target_length = 7 + vocab_size = 9 + hparams = rev_transformer_test() + p_hparams = problem_hparams.test_problem_hparams(hparams, vocab_size, + vocab_size) + hparams.problems = [p_hparams] + inputs = -1 + np.random.random_integers( + vocab_size, size=(batch_size, input_length, 1, 1)) + targets = -1 + np.random.random_integers( + vocab_size, size=(batch_size, target_length, 1, 1)) + features = { + "inputs": tf.constant(inputs, dtype=tf.int32), + "targets": tf.constant(targets, dtype=tf.int32), + "target_space_id": tf.constant(1, dtype=tf.int32), + } + model = rev_transformer.RevTransformer( + hparams, tf.contrib.learn.ModeKeys.TRAIN, p_hparams) + sharded_logits, _ = model.model_fn(features) + logits = tf.concat(sharded_logits, 0) + grads = tf.gradients( + tf.reduce_mean(logits), [features["inputs"]] + tf.global_variables()) + grads = [g for g in grads if g is not None] + + with self.test_session() as session: + session.run(tf.global_variables_initializer()) + logits_val, _ = session.run([logits, grads]) + self.assertEqual(logits_val.shape, (batch_size, target_length, 1, 1, + vocab_size)) + + +if __name__ == "__main__": + tf.test.main() From 8c72e0f9df8b9807b614518f9f19cfa9ad63c108 Mon Sep 17 00:00:00 2001 From: Lukasz Kaiser <lukaszkaiser@google.com> Date: Mon, 14 Aug 2017 16:04:35 -0700 Subject: [PATCH 0265/4095] Rename problems to put them in a reasonable hierarchy by prefix as there are more and more of them. PiperOrigin-RevId: 165243284 --- tensor2tensor/bin/t2t-datagen | 22 +++++--- tensor2tensor/data_generators/cipher.py | 8 +-- tensor2tensor/data_generators/desc2code.py | 8 +-- .../data_generators/desc2code_test.py | 2 +- .../data_generators/gene_expression.py | 12 ++-- tensor2tensor/data_generators/ice_parsing.py | 4 +- tensor2tensor/data_generators/problem.py | 11 ++-- tensor2tensor/data_generators/ptb.py | 6 +- tensor2tensor/data_generators/wiki.py | 15 ++++- tensor2tensor/data_generators/wmt.py | 56 ++++++++++--------- tensor2tensor/models/gene_expression_test.py | 2 +- tensor2tensor/utils/registry.py | 44 ++++++++------- 12 files changed, 108 insertions(+), 82 deletions(-) diff --git a/tensor2tensor/bin/t2t-datagen b/tensor2tensor/bin/t2t-datagen index 30784fa60..19de46fbf 100644 --- a/tensor2tensor/bin/t2t-datagen +++ b/tensor2tensor/bin/t2t-datagen @@ -66,6 +66,8 @@ flags.DEFINE_integer("num_shards", 0, "How many shards to use. Ignored for " "registered Problems.") flags.DEFINE_integer("max_cases", 0, "Maximum number of cases to generate (unbounded if 0).") +flags.DEFINE_bool("only_list", False, + "If true, we only list the problems that will be generated.") flags.DEFINE_integer("random_seed", 429459, "Random seed to use.") flags.DEFINE_integer("task_id", -1, "For distributed data generation.") flags.DEFINE_string("t2t_usr_dir", "", @@ -81,33 +83,33 @@ _SUPPORTED_PROBLEM_GENERATORS = { "algorithmic_algebra_inverse": ( lambda: algorithmic_math.algebra_inverse(26, 0, 2, 100000), lambda: algorithmic_math.algebra_inverse(26, 3, 3, 10000)), - "wmt_parsing_tokens_8k": ( + "parsing_english_ptb8k": ( lambda: wmt.parsing_token_generator( FLAGS.data_dir, FLAGS.tmp_dir, True, 2**13), lambda: wmt.parsing_token_generator( FLAGS.data_dir, FLAGS.tmp_dir, False, 2**13)), - "wsj_parsing_tokens_16k": ( + "parsing_english_ptb16k": ( lambda: wsj_parsing.parsing_token_generator( FLAGS.data_dir, FLAGS.tmp_dir, True, 2**14, 2**9), lambda: wsj_parsing.parsing_token_generator( FLAGS.data_dir, FLAGS.tmp_dir, False, 2**14, 2**9)), - "wmt_ende_bpe32k": ( + "translate_ende_wmt_bpe32k": ( lambda: wmt.ende_bpe_token_generator( FLAGS.data_dir, FLAGS.tmp_dir, True), lambda: wmt.ende_bpe_token_generator( FLAGS.data_dir, FLAGS.tmp_dir, False)), - "lm1b_32k": ( + "languagemodel_1b32k": ( lambda: lm1b.generator(FLAGS.tmp_dir, True), lambda: lm1b.generator(FLAGS.tmp_dir, False) ), - "lm1b_characters": ( + "languagemodel_1b_characters": ( lambda: lm1b.generator(FLAGS.tmp_dir, True, characters=True), lambda: lm1b.generator(FLAGS.tmp_dir, False, characters=True) ), "image_celeba_tune": ( lambda: image.celeba_generator(FLAGS.tmp_dir, 162770), lambda: image.celeba_generator(FLAGS.tmp_dir, 19867, 162770)), - "snli_32k": ( + "inference_snli32k": ( lambda: snli.snli_token_generator(FLAGS.tmp_dir, True, 2**15), lambda: snli.snli_token_generator(FLAGS.tmp_dir, False, 2**15), ), @@ -181,7 +183,11 @@ def main(_): "Data will be written to default data_dir=%s.", FLAGS.data_dir) - tf.logging.info("Generating problems:\n * %s\n" % "\n * ".join(problems)) + tf.logging.info("Generating problems:\n%s" + % registry.display_list_by_prefix(problems, + starting_spaces=4)) + if FLAGS.only_list: + return for problem in problems: set_random_seed() @@ -210,7 +216,7 @@ def generate_data_for_problem(problem): def generate_data_for_registered_problem(problem_name): - tf.logging.info("Generating training data for %s.", problem_name) + tf.logging.info("Generating data for %s.", problem_name) if FLAGS.num_shards: raise ValueError("--num_shards should not be set for registered Problem.") problem = registry.problem(problem_name) diff --git a/tensor2tensor/data_generators/cipher.py b/tensor2tensor/data_generators/cipher.py index a11776b84..977174880 100644 --- a/tensor2tensor/data_generators/cipher.py +++ b/tensor2tensor/data_generators/cipher.py @@ -29,7 +29,7 @@ @registry.register_problem -class CipherShift5(algorithmic.AlgorithmicProblem): +class AlgorithmicCipherShift5(algorithmic.AlgorithmicProblem): """Shift cipher.""" @property @@ -62,7 +62,7 @@ def dev_length(self): @registry.register_problem -class CipherVigenere5(algorithmic.AlgorithmicProblem): +class AlgorithmicCipherVigenere5(algorithmic.AlgorithmicProblem): """Vinegre cipher.""" @property @@ -95,7 +95,7 @@ def dev_length(self): @registry.register_problem -class CipherShift200(CipherShift5): +class AlgorithmicCipherShift200(AlgorithmicCipherShift5): """Shift cipher.""" @property @@ -110,7 +110,7 @@ def distribution(self): @registry.register_problem -class CipherVigenere200(CipherVigenere5): +class AlgorithmicCipherVigenere200(AlgorithmicCipherVigenere5): """Vinegre cipher.""" @property diff --git a/tensor2tensor/data_generators/desc2code.py b/tensor2tensor/data_generators/desc2code.py index 438c116c8..1e26b000c 100644 --- a/tensor2tensor/data_generators/desc2code.py +++ b/tensor2tensor/data_generators/desc2code.py @@ -209,8 +209,8 @@ def generator_target(): } -@registry.register_problem("desc2code_py") -class Desc2CodePyProblem(Desc2CodeProblem): +@registry.register_problem +class ProgrammingDesc2codePy(Desc2CodeProblem): """Description2Code for python problem.""" @property @@ -222,8 +222,8 @@ def preprocess_target(self, target): return target.replace("\t", " ") -@registry.register_problem("desc2code_cpp") -class Desc2CodeCppProblem(Desc2CodeProblem): +@registry.register_problem +class ProgrammingDesc2codeCpp(Desc2CodeProblem): """Description2Code for C++ problem.""" @property diff --git a/tensor2tensor/data_generators/desc2code_test.py b/tensor2tensor/data_generators/desc2code_test.py index 24b7568d0..79992296b 100644 --- a/tensor2tensor/data_generators/desc2code_test.py +++ b/tensor2tensor/data_generators/desc2code_test.py @@ -47,7 +47,7 @@ class Desc2codeTest(tf.test.TestCase): def testCppPreprocess(self): """Check that the file correctly preprocess the code source.""" - cpp_pb = desc2code.Desc2CodeCppProblem() + cpp_pb = desc2code.ProgrammingDesc2codeCpp() self.assertEqual( # Add space beween two lines cpp_pb.preprocess_target("firstline//comm1\nsecondline//comm2\n"), diff --git a/tensor2tensor/data_generators/gene_expression.py b/tensor2tensor/data_generators/gene_expression.py index 341a20c71..0607aad15 100644 --- a/tensor2tensor/data_generators/gene_expression.py +++ b/tensor2tensor/data_generators/gene_expression.py @@ -176,8 +176,8 @@ def eval_metrics(self): return [metrics.Metrics.LOG_POISSON, metrics.Metrics.R2] -@registry.register_problem("gene_expression_cage10") -class GeneExpressionCAGE10(GeneExpressionProblem): +@registry.register_problem +class GenomicsExpressionCage10(GeneExpressionProblem): @property def download_url(self): @@ -188,8 +188,8 @@ def h5_file(self): return "cage10.h5" -@registry.register_problem("gene_expression_gm12878") -class GeneExpressionGM12878(GeneExpressionProblem): +@registry.register_problem +class GenomicsExpressionGm12878(GeneExpressionProblem): @property def download_url(self): @@ -200,8 +200,8 @@ def h5_file(self): return "gm12878.h5" -@registry.register_problem("gene_expression_l262k") -class GeneExpressionL262k(GeneExpressionProblem): +@registry.register_problem +class GenomicsExpressionL262k(GeneExpressionProblem): @property def h5_file(self): diff --git a/tensor2tensor/data_generators/ice_parsing.py b/tensor2tensor/data_generators/ice_parsing.py index 591b205da..4fb0424bb 100644 --- a/tensor2tensor/data_generators/ice_parsing.py +++ b/tensor2tensor/data_generators/ice_parsing.py @@ -62,8 +62,8 @@ def tabbed_parsing_character_generator(tmp_dir, train): return tabbed_generator(pair_filepath, character_vocab, character_vocab, EOS) -@registry.register_problem("ice_parsing_tokens") -class IceParsingTokens(problem.Problem): +@registry.register_problem +class ParsingIcelandic16k(problem.Problem): """Problem spec for parsing tokenized Icelandic text to constituency trees.""" @property diff --git a/tensor2tensor/data_generators/problem.py b/tensor2tensor/data_generators/problem.py index ebec3f8ae..60b1e842b 100644 --- a/tensor2tensor/data_generators/problem.py +++ b/tensor2tensor/data_generators/problem.py @@ -410,11 +410,12 @@ def generate_data(self, data_dir, tmp_dir, task_id=-1): generator_utils.generate_files( self.generator(data_dir, tmp_dir, True), all_paths) generator_utils.shuffle_dataset(all_paths) - generator_utils.generate_dataset_and_shuffle( - self.generator(data_dir, tmp_dir, True), - self.training_filepaths(data_dir, self.num_shards, shuffled=False), - self.generator(data_dir, tmp_dir, False), - self.dev_filepaths(data_dir, self.num_dev_shards, shuffled=False)) + else: + generator_utils.generate_dataset_and_shuffle( + self.generator(data_dir, tmp_dir, True), + self.training_filepaths(data_dir, self.num_shards, shuffled=False), + self.generator(data_dir, tmp_dir, False), + self.dev_filepaths(data_dir, self.num_dev_shards, shuffled=False)) def feature_encoders(self, data_dir): if self.is_character_level: diff --git a/tensor2tensor/data_generators/ptb.py b/tensor2tensor/data_generators/ptb.py index b9014bcd6..893c2b77c 100644 --- a/tensor2tensor/data_generators/ptb.py +++ b/tensor2tensor/data_generators/ptb.py @@ -157,8 +157,8 @@ def _generator(self, filename, encoder): yield {"inputs": [0], "targets": tok} -@registry.register_problem("lm_ptb_10k") -class LmPtb10k(PTBProblem): +@registry.register_problem +class LanguagemodelPtb10k(PTBProblem): """A class for generating PTB data, 10k vocab.""" @property @@ -167,7 +167,7 @@ def is_character_level(self): @registry.register_problem -class LmPtbCharacters(PTBProblem): +class LanguagemodelPtbCharacters(PTBProblem): """A class for generating PTB data, character-level.""" @property diff --git a/tensor2tensor/data_generators/wiki.py b/tensor2tensor/data_generators/wiki.py index 1e427dbe8..fb73fc725 100644 --- a/tensor2tensor/data_generators/wiki.py +++ b/tensor2tensor/data_generators/wiki.py @@ -81,8 +81,8 @@ def _page_title(page): @registry.register_problem -class Wiki32k(problem.Text2TextProblem): - """A class for generating PTB data.""" +class LanguagemodelWikiFull32k(problem.Text2TextProblem): + """A language model on full English Wikipedia.""" @property def is_character_level(self): @@ -128,4 +128,13 @@ def generator(self, data_dir, tmp_dir, _): title = _page_title(page) encoded = encoder.encode(page) + [EOS] encoded_title = encoder.encode(title) + [EOS] - yield {"inputs": encoded_title, "targets": encoded} + yield {"inputs": encoded, "targets": encoded_title} + + +@registry.register_problem +class LanguagemodelWikiFull8k(problem.Text2TextProblem): + """A language model on full English Wikipedia.""" + + @property + def targeted_vocab_size(self): + return 2**13 # 8192 diff --git a/tensor2tensor/data_generators/wmt.py b/tensor2tensor/data_generators/wmt.py index 52990eb5f..93fc27ac5 100644 --- a/tensor2tensor/data_generators/wmt.py +++ b/tensor2tensor/data_generators/wmt.py @@ -13,7 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -"""Data generators for WMT data-sets.""" +"""Data generators for translation data-sets.""" from __future__ import absolute_import from __future__ import division @@ -39,8 +39,8 @@ EOS = text_encoder.EOS_ID -class WMTProblem(problem.Text2TextProblem): - """Base class for WMT problems.""" +class TranslateProblem(problem.Text2TextProblem): + """Base class for translation problems.""" @property def is_character_level(self): @@ -381,8 +381,8 @@ def _compile_data(tmp_dir, datasets, filename): return filename -@registry.register_problem("wmt_ende_tokens_8k") -class WMTEnDeTokens8k(WMTProblem): +@registry.register_problem +class TranslateEndeWmt8k(TranslateProblem): """Problem spec for WMT En-De translation.""" @property @@ -407,16 +407,16 @@ def target_space_id(self): return problem.SpaceID.DE_TOK -@registry.register_problem("wmt_ende_tokens_32k") -class WMTEnDeTokens32k(WMTEnDeTokens8k): +@registry.register_problem +class TranslateEndeWmt32k(TranslateEndeWmt8k): @property def targeted_vocab_size(self): return 2**15 # 32768 -@registry.register_problem("wmt_ende_characters") -class WMTEnDeCharacters(WMTProblem): +@registry.register_problem +class TranslateEndeWmtCharacters(TranslateProblem): """Problem spec for WMT En-De translation.""" @property @@ -440,8 +440,8 @@ def target_space_id(self): return problem.SpaceID.DE_CHR -@registry.register_problem("wmt_zhen_tokens_8k") -class WMTZhEnTokens8k(WMTProblem): +@registry.register_problem +class TranslateEnzhWmt8k(TranslateProblem): """Problem spec for WMT Zh-En translation.""" @property @@ -466,7 +466,10 @@ def generator(self, data_dir, tmp_dir, train): target_vocab_size, target_datasets) tag = "train" if train else "dev" data_path = _compile_data(tmp_dir, datasets, "wmt_zhen_tok_%s" % tag) - return bi_vocabs_token_generator(data_path + ".lang1", data_path + ".lang2", + # We generate English->X data by convention, to train reverse translation + # just add the "_rev" suffix to the problem name, e.g., like this. + # --problems=translate_enzh_wmt8k_rev + return bi_vocabs_token_generator(data_path + ".lang2", data_path + ".lang1", source_vocab, target_vocab, EOS) @property @@ -491,8 +494,8 @@ def feature_encoders(self, data_dir): } -@registry.register_problem("wmt_enfr_tokens_8k") -class WMTEnFrTokens8k(WMTProblem): +@registry.register_problem +class TranslateEnfrWmt8k(TranslateProblem): """Problem spec for WMT En-Fr translation.""" @property @@ -517,16 +520,16 @@ def target_space_id(self): return problem.SpaceID.FR_TOK -@registry.register_problem("wmt_enfr_tokens_32k") -class WMTEnFrTokens32k(WMTEnFrTokens8k): +@registry.register_problem +class TranslateEnfrWmt32k(TranslateEnfrWmt8k): @property def targeted_vocab_size(self): return 2**15 # 32768 -@registry.register_problem("wmt_enfr_characters") -class WMTEnFrCharacters(WMTProblem): +@registry.register_problem +class TranslateEnfrWmtCharacters(TranslateProblem): """Problem spec for WMT En-Fr translation.""" @property @@ -550,8 +553,8 @@ def target_space_id(self): return problem.SpaceID.FR_CHR -@registry.register_problem("setimes_mken_tokens_32k") -class SETimesMkEnTokens32k(WMTProblem): +@registry.register_problem +class TranslateEnmkSetimes32k(TranslateProblem): """Problem spec for SETimes Mk-En translation.""" @property @@ -571,7 +574,10 @@ def generator(self, data_dir, tmp_dir, train): source_datasets + target_datasets) tag = "train" if train else "dev" data_path = _compile_data(tmp_dir, datasets, "setimes_mken_tok_%s" % tag) - return token_generator(data_path + ".lang1", data_path + ".lang2", + # We generate English->X data by convention, to train reverse translation + # just add the "_rev" suffix to the problem name, e.g., like this. + # --problems=translate_enmk_setimes32k_rev + return token_generator(data_path + ".lang2", data_path + ".lang1", symbolizer_vocab, EOS) @property @@ -583,8 +589,8 @@ def target_space_id(self): return problem.SpaceID.EN_TOK -@registry.register_problem("wmt_encs_tokens_32k") -class WMTEnCsTokens32k(WMTProblem): +@registry.register_problem +class TranslateEncsWmt32k(TranslateProblem): """Problem spec for WMT English-Czech translation.""" @property @@ -616,8 +622,8 @@ def target_space_id(self): return problem.SpaceID.CS_TOK -@registry.register_problem("wmt_encs_characters") -class WMTEnCsCharacters(WMTProblem): +@registry.register_problem +class TranslateEncsWmtCharacters(TranslateProblem): """Problem spec for WMT En-Cs character-based translation.""" @property diff --git a/tensor2tensor/models/gene_expression_test.py b/tensor2tensor/models/gene_expression_test.py index e2307f49f..cc4cd1200 100644 --- a/tensor2tensor/models/gene_expression_test.py +++ b/tensor2tensor/models/gene_expression_test.py @@ -70,7 +70,7 @@ def testGeneExpressionModels(self): gene_expression_conv_test())] for model_cls, hparams in models_hparams: hparams.add_hparam("data_dir", None) - p_hparams = gene_data.GeneExpressionCAGE10().internal_hparams(hparams) + p_hparams = gene_data.GenomicsExpressionCage10().internal_hparams(hparams) hparams.problems = [p_hparams] self._testModel(hparams, model_cls) diff --git a/tensor2tensor/utils/registry.py b/tensor2tensor/utils/registry.py index 6ce650ac3..33a2fd963 100644 --- a/tensor2tensor/utils/registry.py +++ b/tensor2tensor/utils/registry.py @@ -44,13 +44,13 @@ class MyModel(T2TModel): from __future__ import division from __future__ import print_function -import collections import inspect import re # Dependency imports import six +from six.moves import xrange # pylint: disable=redefined-builtin _MODELS = {} _HPARAMS = {} @@ -391,17 +391,16 @@ def create_modality(modality_spec, model_hparams): return retrieval_fns[modality_type](modality_name)(model_hparams, vocab_size) -def _hparams_help_string(): - hparams_names = list_hparams() - prefixes = zip([name.split("_")[0] for name in hparams_names], hparams_names) - names_by_prefix = collections.defaultdict(list) - for (prefix, full_name) in prefixes: - names_by_prefix[prefix].append(full_name) - return "\n".join( - sorted([ - " * %s: %s" % (prefix, sorted(names)) - for prefix, names in six.iteritems(names_by_prefix) - ])) +def display_list_by_prefix(names_list, starting_spaces=0): + cur_prefix, result_lines = None, [] + space = "".join([" " for _ in xrange(starting_spaces)]) + for name in sorted(names_list): + prefix, _ = name.split("_", 1) + if cur_prefix != prefix: + result_lines.append(space + prefix + ":") + cur_prefix = prefix + result_lines.append(space + " * " + name) + return "\n".join(result_lines) def help_string(): @@ -410,24 +409,29 @@ def help_string(): Registry contents: ------------------ - Models: %s + Models: +%s - HParams (by model): + HParams: %s - RangedHParams: %s + RangedHParams: +%s - Modalities: %s + Modalities: +%s - Problems: %s + Problems: +%s """ - m, rhp, mod, probs = [ - sorted(entries) + m, hp, rhp, mod, probs = [ + display_list_by_prefix(entries, starting_spaces=4) for entries in [ list_models(), + list_hparams(), list_ranged_hparams(), list_modalities(), list_problems() ] ] - return help_str % (m, _hparams_help_string(), rhp, mod, probs) + return help_str % (m, hp, rhp, mod, probs) From fd8a2229e6e0f17db3b6901c9fd02e7982727f2a Mon Sep 17 00:00:00 2001 From: Lukasz Kaiser <lukaszkaiser@google.com> Date: Mon, 14 Aug 2017 20:17:45 -0700 Subject: [PATCH 0266/4095] Typo and VAE improvements. PiperOrigin-RevId: 165265220 --- tensor2tensor/models/transformer_vae.py | 104 +++++++++++++----------- tensor2tensor/utils/decoding.py | 2 +- 2 files changed, 56 insertions(+), 50 deletions(-) diff --git a/tensor2tensor/models/transformer_vae.py b/tensor2tensor/models/transformer_vae.py index ffd791a04..7d1575a0b 100644 --- a/tensor2tensor/models/transformer_vae.py +++ b/tensor2tensor/models/transformer_vae.py @@ -78,18 +78,20 @@ def decompress_step(source, c, hparams, first_relu, name): shape = tf.shape(source) if c is not None: source = attend(source, c, hparams, "decompress_attend") - first = common_layers.conv_block( - source, - hparams.hidden_size, [((1, 1), (3, 1)), ((1, 1), (3, 1))], - first_relu=first_relu, padding="SAME", name="decompress_conv1") - second = common_layers.conv_block( - tf.concat([source, first], axis=3), - hparams.hidden_size, [((1, 1), (3, 1)), ((1, 1), (3, 1))], - first_relu=first_relu, padding="SAME", name="decompress_conv2") - thicker = interleave(first, second) + thicker = common_layers.conv_block( + source, hparams.hidden_size * 2, [((1, 1), (1, 1))], + first_relu=first_relu, name="decompress_conv") return tf.reshape(thicker, [shape[0], shape[1] * 2, 1, hparams.hidden_size]) +def dvae(x, hparams, name): + with tf.variable_scope(name): + m = tf.layers.dense(x, hparams.v_size, name="mask") + m = tf.nn.softmax(m) + kl = - tf.reduce_max(m, axis=-1) + return m, tf.reduce_mean(kl) + + def vae(x, hparams, name): with tf.variable_scope(name): mu = tf.layers.dense(x, hparams.z_size, name="mu") @@ -120,21 +122,39 @@ def compress(x, c, hparams, name): def vae_compress(x, c, hparams, compress_name, decompress_name, reuse=None): """Compress, then VAE.""" with tf.variable_scope(compress_name, reuse=reuse): - cur = compress(x, c, hparams, "compress") + cur = compress(x, None, hparams, "compress") # Convolve and ReLu to get state. cur = common_layers.conv_block( cur, hparams.hidden_size, [((1, 1), (1, 1))], name="mid_conv") - z, kl_loss, mu, log_sigma = vae(cur, hparams, name="vae") + # z, kl_loss, mu, log_sigma = vae(cur, hparams, name="vae") + z, kl_loss = dvae(cur, hparams, name="dvae") + mu, log_sigma = None, None + + # Compress context. + with tf.variable_scope(compress_name, reuse=reuse): + compress_c = compress(c, None, hparams, "compress_context") + c_z = tf.layers.dense(compress_c, hparams.v_size, name="mask_context") + reconstruct_loss = tf.nn.softmax_cross_entropy_with_logits( + labels=z, logits=c_z) with tf.variable_scope(decompress_name, reuse=reuse): # Decompress. z = tf.layers.dense(z, hparams.hidden_size, name="z_to_dense") + # Leak at the beginning to help train. + alpha_p = common_layers.inverse_lin_decay(30000) + 0.001 + alpha = tf.random_uniform(tf.shape(cur)) + alpha = tf.to_float(tf.less(alpha, alpha_p)) + z = alpha * z + (1.0 - alpha) * cur + + # TODO(lukaszkaiser): If not training, use the predicted z. + # is_training = hparams.mode == tf.contrib.learn.ModeKeys.TRAIN + for i in xrange(hparams.num_compress_steps): j = hparams.num_compress_steps - i - 1 z = residual_conv(z, 1, hparams, "decompress_rc_%d" % j) - z = decompress_step(z, c, hparams, i > 0, "decompress__step_%d" % j) - return z, kl_loss, mu, log_sigma + z = decompress_step(z, c, hparams, i > 0, "decompress_step_%d" % j) + return z, kl_loss + 0.001 * reconstruct_loss, mu, log_sigma def encode(x, x_space, hparams, name): @@ -167,7 +187,6 @@ def ffn(x, hparams, name): def vae_transformer_internal(inputs, targets, target_space, hparams): """VAE Transformer, main step used for training.""" with tf.variable_scope("vae_transformer"): - is_training = hparams.mode == tf.contrib.learn.ModeKeys.TRAIN # Prepare inputs, targets, and k. inputs = common_layers.flatten4d3d(inputs) input_len = tf.shape(inputs)[1] # Double input size to cover targets. @@ -179,39 +198,25 @@ def vae_transformer_internal(inputs, targets, target_space, hparams): inputs, targets, final_length_divisible_by=k) inputs = encode(inputs, target_space, hparams, "input_enc") - # Dropout targets or swap for zeros 5% of the time. - targets_nodrop = targets - max_prestep = hparams.kl_warmup_steps - prob_targets = 0.95 if is_training else 1.0 - targets_dropout_max = common_layers.inverse_lin_decay(max_prestep) - 0.01 - targets = dropmask(targets, targets_dropout_max * 0.7, is_training) - targets = tf.cond(tf.less(tf.random_uniform([]), prob_targets), - lambda: targets, lambda: tf.zeros_like(targets)) - targets = targets_nodrop - # Compress and vae. - z = tf.get_variable("z", [hparams.hidden_size]) - z = tf.reshape(z, [1, 1, 1, -1]) - z = tf.tile(z, [tf.shape(inputs)[0], 1, 1, 1]) - - z = attend(z, inputs, hparams, "z_attendsi") - z = ffn(z, hparams, "zff2") - z = attend(z, targets, hparams, "z_attendst2") - z = ffn(z, hparams, "zff3") - z, kl_loss, _, _ = vae(z, hparams, name="vae") - z = tf.layers.dense(z, hparams.hidden_size, name="z_to_dense") - - # z, kl_loss, _, _ = vae_compress( - # tf.expand_dims(targets, axis=2), tf.expand_dims(inputs, axis=2), - # hparams, "vae_compress", "vae_decompress") - - decoder_in = tf.squeeze(z, axis=2) + tf.zeros_like(targets) - (decoder_input, decoder_self_attention_bias) = ( - transformer.transformer_prepare_decoder(decoder_in, hparams)) - ret = transformer.transformer_decoder( - decoder_input, inputs, decoder_self_attention_bias, None, hparams) - - kl_loss *= common_layers.inverse_exp_decay(int(max_prestep * 1.5)) * 5.0 + z, kl_loss, _, _ = vae_compress(tf.expand_dims(targets, axis=2), + tf.expand_dims(inputs, axis=2), + hparams, "vae_compress", "vae_decompress") + + # Join z with inputs, run decoder. + to_decode = common_layers.conv_block( + tf.concat([z, tf.expand_dims(inputs, axis=2)], axis=3), + hparams.hidden_size, [((1, 1), (1, 1))], name="join_z") + ret = encode(tf.squeeze(to_decode, axis=2), target_space, hparams, "dec") + + # For experiments with one-sided decoder: + # decoder_in = tf.squeeze(to_decode, axis=2) + # (decoder_input, decoder_self_attention_bias) = ( + # transformer.transformer_prepare_decoder(decoder_in, hparams)) + # ret = transformer.transformer_decoder( + # decoder_input, inputs, decoder_self_attention_bias, None, hparams) + + kl_loss *= common_layers.inverse_exp_decay(hparams.kl_warmup_steps) * 3.0 losses = {"kl": kl_loss} return tf.expand_dims(ret, axis=2), losses @@ -267,10 +272,11 @@ def transformer_vae_small(): """Set of hyperparameters.""" hparams = transformer.transformer_small() hparams.batch_size = 2048 - hparams.learning_rate_warmup_steps = 16000 + hparams.learning_rate_warmup_steps = 4000 hparams.add_hparam("z_size", 128) + hparams.add_hparam("v_size", 1024*8) hparams.add_hparam("num_compress_steps", 4) - hparams.add_hparam("kl_warmup_steps", 60000) + hparams.add_hparam("kl_warmup_steps", 50000) return hparams @@ -283,6 +289,6 @@ def transformer_vae_base(): hparams.attention_dropout = 0.0 hparams.relu_dropout = 0.0 hparams.dropout = 0.0 - hparams.num_hidden_layers = 3 + hparams.num_hidden_layers = 4 hparams.z_size = 256 return hparams diff --git a/tensor2tensor/utils/decoding.py b/tensor2tensor/utils/decoding.py index da33cf90e..4ba8dc71a 100644 --- a/tensor2tensor/utils/decoding.py +++ b/tensor2tensor/utils/decoding.py @@ -271,7 +271,7 @@ def _interactive_input_fn(hparams): " in=<input_problem> (set the input problem number)\n" " ou=<output_problem> (set the output problem number)\n" " ns=<num_samples> (changes number of samples)\n" - " dl=<decode_length> (changes decode legnth)\n" + " dl=<decode_length> (changes decode length)\n" " <%s> (decode)\n" " q (quit)\n" ">" % (num_samples, decode_length, "source_string" From 62c0e9fb2c219c17ed014750b0f968381d20c419 Mon Sep 17 00:00:00 2001 From: Ashish Vaswani <avaswani@google.com> Date: Mon, 14 Aug 2017 23:00:56 -0700 Subject: [PATCH 0267/4095] Sparse expert attention layer (Joint programming with @noam and @epot). A sparse set of positions are sent to an expert where they self-attend to each other. It allows to both dramatically increase the number of parameters (attention heads) and computation by reducing the number of positions that self-attend. Currently Implements encoder and decoder self-attention. Seems to work on a small model tested on lm1b. PiperOrigin-RevId: 165273120 --- tensor2tensor/layers/common_attention.py | 105 ++++++++++++++++++++- tensor2tensor/models/attention_lm_moe.py | 69 +++++++++++--- tensor2tensor/utils/beam_search.py | 2 +- tensor2tensor/utils/expert_utils.py | 112 +++++++++++++++++++++-- 4 files changed, 268 insertions(+), 20 deletions(-) diff --git a/tensor2tensor/layers/common_attention.py b/tensor2tensor/layers/common_attention.py index 6b54633f7..73ac898f9 100644 --- a/tensor2tensor/layers/common_attention.py +++ b/tensor2tensor/layers/common_attention.py @@ -17,12 +17,14 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +from functools import partial import math # Dependency imports from tensor2tensor.layers import common_layers +from tensor2tensor.utils import expert_utils import tensorflow as tf @@ -894,5 +896,106 @@ def parameter_attention(x, y = tf.reshape(y, [batch_size, length, total_value_depth]) y.set_shape([None, None, total_value_depth]) y = common_layers.conv1d(y, output_depth, 1, name="output_transform") - return y + + +def coordinate_tensor(shape, axis): + """Return a tensor with given shape containing coordinte along given axis. + + Args: + shape: a Tensor representing the shape of the output Tensor + axis: an integer + + Returns: + A tensor with shape shape and type tf.int32, where each elements its + coordinate along the given axis. + """ + + r = tf.range(shape[axis]) + r_shape = tf.one_hot( + axis, tf.size(shape), on_value=-1, off_value=1, dtype=tf.int32) + return tf.zeros(shape, dtype=tf.int32) + tf.reshape(r, r_shape) + + +def self_attention_expert(x, batch_coordinate, mask_right=True): + """Implementing attention that runs inside each expert. + + Args: + x: A tensor of shape[batch, depth]. Contains representations from + different positions, which are lexicographically ordered. + batch_coordinate: A tensor of shape [batch, 1] containing the batch + coordinate of each element in x. This is needed to make sure that + positions from different sequences don't attend to each other. + mask_right: A bool. If true, we will not attend to positions on the right, + just as decoder self attention. + + Returns: + out: A tensor of shape [batch, depth]. + example use: + expert_utils.local_moe( + ... + expert_fn=functools.partial(self_attention_expert, mask_right=) + ) + """ + depth = x.get_shape().as_list()[-1] + length = tf.shape(batch_coordinate)[0] + batch_coordinate = tf.squeeze(batch_coordinate, 1) + bias = tf.to_float( + tf.not_equal(tf.expand_dims(batch_coordinate, 1), + tf.expand_dims(batch_coordinate, 0))) * -1e9 + if mask_right: + bias += tf.reshape( + attention_bias_lower_triangle(length), [length, length]) + # bias has shape [length, length] + bias = tf.reshape(bias, [1, 1, length, length]) + x = tf.reshape(x, [1, length, depth]) + out = multihead_attention(x, + None, + bias, + total_key_depth=depth, + total_value_depth=depth, + output_depth=depth, + num_heads=1, + dropout_rate=0.0) + out = tf.squeeze(out, 0) + return out + +# functools.partial(self_attention_expert, mask_right=, depth=) + + +def local_expert_attention(x, k, loss_coef, attention_num_experts, train=True, + mask_right=True): + """Attention using a mixture of experts. + + Positions sent to the same expert can attend to each other. + The mixture of experts is "local" in that it is replicated on each + datashard. + + Args: + x: a Tensor with shape [batch, length, depth] + k: The number of experts to dispatch each example to + loss_coef: a scalar. A multiplier for the expert loss + attention_num_experts: The number of experts to use + train: a boolean for the current mode + mask_right: A boolean. If true, we will mask out positions to the right + for self-attention. + + Returns: + y: a Tensor with shape [batch, length, depth] + loss: a Scalar + """ + with tf.variable_scope("local_expert_attention"): + additional_dispatch_params = { + "batch_coordinate": tf.expand_dims( + coordinate_tensor(tf.shape(x)[:-1], axis=0), axis=-1) + } + return expert_utils.local_moe( + x, + train, + partial(self_attention_expert, mask_right=mask_right), + attention_num_experts, + k=k, + loss_coef=loss_coef, + pass_x=True, + pass_gates=False, + additional_dispatch_params=additional_dispatch_params) diff --git a/tensor2tensor/models/attention_lm_moe.py b/tensor2tensor/models/attention_lm_moe.py index c24f9179b..9c55eadd6 100644 --- a/tensor2tensor/models/attention_lm_moe.py +++ b/tensor2tensor/models/attention_lm_moe.py @@ -39,6 +39,19 @@ import tensorflow as tf +class AttentionMoeType(object): + NONE = "none" + LOCAL = "local" + GLOBAL = "global" + + @staticmethod + def get_choices(): + return [ + AttentionMoeType.NONE, + AttentionMoeType.LOCAL, + ] + + @registry.register_model class AttentionLmMoe(t2t_model.T2TModel): """Attention net. See file docstring.""" @@ -66,17 +79,33 @@ def postprocess(x, y): for layer in xrange(hparams.num_hidden_layers): with tf.variable_scope("layer_%d" % layer): with tf.variable_scope("attention"): - y = dp( - common_attention.multihead_attention, - preprocess(x), - None, - decoder_self_attention_bias, - hparams.attention_key_channels or hparams.hidden_size, - hparams.attention_value_channels or hparams.hidden_size, - hparams.hidden_size, - hparams.num_heads, - hparams.attention_dropout, - name="decoder_self_attention") + x = preprocess(x) + if hparams.attention_moe_type == AttentionMoeType.NONE: + y = dp( + common_attention.multihead_attention, + x, + None, + decoder_self_attention_bias, + hparams.attention_key_channels or hparams.hidden_size, + hparams.attention_value_channels or hparams.hidden_size, + hparams.hidden_size, + hparams.num_heads, + hparams.attention_dropout, + name="decoder_self_attention") + elif hparams.attention_moe_type == AttentionMoeType.LOCAL: + y, loss = dp( + common_attention.local_expert_attention, + x, + k=2, + loss_coef=1e-2, + attention_num_experts=hparams.attention_num_experts, + train=hparams.mode == tf.contrib.learn.ModeKeys.TRAIN, + mask_right=True) + # TODO(avaswani, epot, noam): Do we need to divide by num shards ? + extra_loss += tf.add_n(loss)/dp.n + else: + raise ValueError("Only {} supported for now.".format( + AttentionMoeType.get_choices())) x = postprocess(x, y) with tf.variable_scope("ffn"): if str(layer) in hparams.moe_layers.split(","): @@ -174,6 +203,9 @@ def attention_lm_moe_base(): hparams.add_hparam("relu_dropout", 0.0) hparams.add_hparam("pos", "timing") # timing, none hparams.add_hparam("moe_layers", "2") # comma separated list of layer numbers + # moe params. local attention moe. + hparams.add_hparam("attention_moe_type", AttentionMoeType.NONE) + hparams.add_hparam("attention_num_experts", 16) return hparams @@ -211,6 +243,21 @@ def attention_lm_moe_tiny(): return hparams +@registry.register_hparams +def attention_lm_attention_moe_tiny(): + """Cheap model for debugging. + + Returns: + an hparams object. + """ + hparams = attention_lm_moe_small() + hparams.moe_layers = "" + hparams.attention_num_experts = 16 + hparams.filter_size = 512 + hparams.attention_moe_type = AttentionMoeType.LOCAL + return hparams + + @registry.register_hparams def attention_lm_no_moe_small(): """Without the mixture of experts (for comparison). diff --git a/tensor2tensor/utils/beam_search.py b/tensor2tensor/utils/beam_search.py index dd8275204..be6c28559 100644 --- a/tensor2tensor/utils/beam_search.py +++ b/tensor2tensor/utils/beam_search.py @@ -256,7 +256,7 @@ def grow_topk(i, alive_seq, alive_log_probs): topk_scores, topk_ids = tf.nn.top_k(flat_curr_scores, k=beam_size * 2) - # Recovering the log probs becuase we will need to send them back + # Recovering the log probs because we will need to send them back topk_log_probs = topk_scores * length_penalty # Work out what beam the top probs are in. diff --git a/tensor2tensor/utils/expert_utils.py b/tensor2tensor/utils/expert_utils.py index ac58ef3cd..6f26f20fa 100644 --- a/tensor2tensor/utils/expert_utils.py +++ b/tensor2tensor/utils/expert_utils.py @@ -34,6 +34,8 @@ from tensorflow.python.framework import function +DEFAULT_DEV_STRING = "existing_device" + @function.Defun( python_grad_func=lambda x, dy: tf.convert_to_tensor(dy), @@ -180,7 +182,14 @@ def caching_getter(getter, name, *args, **kwargs): reuse=True if i > 0 and self._reuse else None, caching_device=self._caching_devices[i], custom_getter=custom_getter): - with tf.device(self._devices[i]): + # TODO(noam, epot, avaswani) + # Allows for passing no device in case you want to default to the + # existing device. This is needed when we put all experts on a single + # device, for example in local_moe. + if self._devices[i] != DEFAULT_DEV_STRING: + with tf.device(self._devices[i]): + outputs.append(fns[i](*my_args[i], **my_kwargs[i])) + else: outputs.append(fns[i](*my_args[i], **my_kwargs[i])) if isinstance(outputs[0], tuple): outputs = list(zip(*outputs)) @@ -361,7 +370,6 @@ def _my_top_k(x, k): def noisy_top_k_gating(x, - input_size, num_experts, train, k=2, @@ -375,7 +383,6 @@ def noisy_top_k_gating(x, Args: x: input Tensor with shape [batch_size, input_size] - input_size: an integer num_experts: an integer train: a boolean - we only add noise at training time. k: an integer - number of experts per example @@ -389,6 +396,7 @@ def noisy_top_k_gating(x, load: a Tensor with shape [num_experts] """ with tf.variable_scope(name, default_name="noisy_top_k_gating"): + input_size = x.get_shape().as_list()[-1] w_gate = tf.get_variable( "w_gate", [input_size, num_experts], tf.float32, initializer) if noisy_gating: @@ -431,6 +439,25 @@ def noisy_top_k_gating(x, class SparseDispatcher(object): """Helper for implementing a mixture of experts. + The purpose of this class is to create input minibatches for the + experts and to combine the results of the experts to form a unified + output tensor. + + There are two functions: + dispatch - take an input Tensor and create input Tensors for each expert. + combine - take output Tensors from each expert and form a combined output + Tensor. Outputs from different experts for the same batch element are + summed together, weighted by the provided "gates". + + The class is initialized with a "gates" Tensor, which specifies which + batch elements go to which experts, and the weights to use when combining + the outputs. Batch element b is sent to expert e iff gates[b, e] != 0. + + The inputs and outputs are all two-dimensional [batch, depth]. + Caller is responsible for collapsing additional dimensions prior to + calling this class and reshaping the output to the original shape. + See reshape_like(). + Example use: gates: a float32 `Tensor` with shape `[batch_size, num_experts]` @@ -526,8 +553,8 @@ class DistributedSparseDispatcher(object): """A distributed version of SparseDispatcher. Instead of one batch of input examples, we simultaneously process - num_datashards batches of input examples. The per-expert `Tensor`s contain - a combination of examples from the different datashards. + a list of num_datashards batches of input examples. The per-expert + `Tensor`s contain a combination of examples from the different datashards. Each datashard is associated with a particular device and each expert is associated with a particular device. All per-datashard and per-expert @@ -655,6 +682,13 @@ def reshape_like(a, b): return ret +def flatten_all_but_last(a): + """Flatten all dimensions of a except the last.""" + ret = tf.reshape(a, [-1, tf.shape(a)[-1]]) + ret.set_shape([None] + a.get_shape().as_list()[-1:]) + return ret + + def distributed_moe(data_parallelism, expert_devices, xs, @@ -676,7 +710,8 @@ def distributed_moe(data_parallelism, input_size: an integer (input size for this layer) expert_fn: a unary function for each expert to run It should take a Tensor with shape [batch_size, input_size] - and return a Tensor with shape [batch_size, output_size] + and return a Tensor with shape [batch_size, output_size]. + e.g. ffn_expert_fn(...) num_experts: an integer - number of experts k: an integer - how many experts to use for each batch element loss_coef: a scalar - multiplier on load-balancing losses @@ -703,7 +738,6 @@ def distributed_moe(data_parallelism, # load is a measure of approximately how many examples go to each expert gates, load = dp(noisy_top_k_gating, xs_flat, - input_size, num_experts, train, k, @@ -721,3 +755,67 @@ def distributed_moe(data_parallelism, importance = tf.add_n(dp(tf.reduce_sum, gates, 0)) loss = loss_coef * (cv_squared(importance) + cv_squared(load)) return ys, loss + + +def local_moe(x, + train, + expert_fn, + num_experts, + k=2, + loss_coef=1e-2, + pass_x=True, + pass_gates=False, + additional_dispatch_params=None, + name=None): + """Call a local mixture of experts. + + Args: + x: a tensors with shape [... , input_size] + train: a boolean scalar. + expert_fn: a function. + num_experts: an integer - number of experts + k: an integer - how many experts to use for each batch element + loss_coef: a scalar - multiplier on load-balancing losses + pass_x: a boolean. If true, x will also be dispatched to the experts. + pass_gates: a boolean. If true, gates will be passed to experts. Might be + necessary when dealing with sparse encoder-encoder decoder attention + additional_dispatch_params: The extra tensors that need to be sent to each + expert. Examples include batch batch coordinates (see + common_attention.local_expert_attention) + name: a string + + Returns: + y: a tensor. Has the same shape as x, except for the last dimension, + which is output_size. + extra_training_loss: a scalar. This should be added into the overall + training loss of the model. The backpropagation of this loss + encourages all experts to be approximately equally used across a batch. + """ + with tf.variable_scope(name, default_name="local_moe"): + x_flat = flatten_all_but_last(x) + # The gates indicate which batch elements go to which tensors. + # load is a measure of approximately how many examples go to each expert + gates, load = noisy_top_k_gating( + x_flat, + num_experts, + train, + k, + initializer=tf.zeros_initializer(), + noisy_gating=True, + noise_epsilon=1e-2) + # This magic object helps us shuffle data between datashards and experts. + dispatcher = SparseDispatcher(num_experts, gates) + expert_kwargs = {} + if pass_x: + expert_kwargs["x"] = dispatcher.dispatch(x_flat) + if pass_gates: + expert_kwargs["gates"] = dispatcher.expert_to_gates() + for k, v in six.iteritems(additional_dispatch_params or {}): + expert_kwargs[k] = dispatcher.dispatch(flatten_all_but_last(v)) + ep = Parallelism([DEFAULT_DEV_STRING] * num_experts) + expert_outputs = ep(expert_fn, **expert_kwargs) + y_flat = dispatcher.combine(expert_outputs) + y = reshape_like(y_flat, x) + importance = tf.reduce_sum(gates, 0) + loss = loss_coef * (cv_squared(importance) + cv_squared(load)) + return y, loss From 713ff7c0083149dd84156a9e6381219413d06ec5 Mon Sep 17 00:00:00 2001 From: Lukasz Kaiser <lukaszkaiser@google.com> Date: Tue, 15 Aug 2017 11:10:52 -0700 Subject: [PATCH 0268/4095] Corrections for problems missed during recent renaming. PiperOrigin-RevId: 165331538 --- tensor2tensor/data_generators/problem_hparams.py | 10 +++++----- tensor2tensor/data_generators/wiki.py | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/tensor2tensor/data_generators/problem_hparams.py b/tensor2tensor/data_generators/problem_hparams.py index b33438d6d..4a6053613 100644 --- a/tensor2tensor/data_generators/problem_hparams.py +++ b/tensor2tensor/data_generators/problem_hparams.py @@ -492,16 +492,16 @@ def image_celeba(unused_model_hparams): lambda p: audio_wsj_tokens(p, 2**13), "audio_wsj_tokens_8k_test": lambda p: audio_wsj_tokens(p, 2**13), - "lm1b_characters": + "languagemodel_1b_characters": lm1b_characters, - "lm1b_32k": + "languagemodel_1b32k": lm1b_32k, - "wmt_parsing_tokens_8k": + "parsing_english_ptb8k": lambda p: wmt_parsing_tokens(p, 2**13), - "wsj_parsing_tokens_16k": + "parsing_english_ptb16k": lambda p: wsj_parsing_tokens( # pylint: disable=g-long-lambda p, "wsj", 2**14, 2**9), - "wmt_ende_bpe32k": + "translate_ende_wmt_bpe32k": wmt_ende_bpe32k, "image_celeba_tune": image_celeba, diff --git a/tensor2tensor/data_generators/wiki.py b/tensor2tensor/data_generators/wiki.py index fb73fc725..3cdbac5db 100644 --- a/tensor2tensor/data_generators/wiki.py +++ b/tensor2tensor/data_generators/wiki.py @@ -128,7 +128,7 @@ def generator(self, data_dir, tmp_dir, _): title = _page_title(page) encoded = encoder.encode(page) + [EOS] encoded_title = encoder.encode(title) + [EOS] - yield {"inputs": encoded, "targets": encoded_title} + yield {"inputs": encoded_title, "targets": encoded} @registry.register_problem From d2c9115a937d9c37d43b30ad52e83d05d860e3ab Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Tue, 15 Aug 2017 12:07:45 -0700 Subject: [PATCH 0269/4095] Fix registry help string PiperOrigin-RevId: 165340233 --- tensor2tensor/utils/registry.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tensor2tensor/utils/registry.py b/tensor2tensor/utils/registry.py index 33a2fd963..f5d83cbf1 100644 --- a/tensor2tensor/utils/registry.py +++ b/tensor2tensor/utils/registry.py @@ -50,7 +50,6 @@ class MyModel(T2TModel): # Dependency imports import six -from six.moves import xrange # pylint: disable=redefined-builtin _MODELS = {} _HPARAMS = {} @@ -392,10 +391,12 @@ def create_modality(modality_spec, model_hparams): def display_list_by_prefix(names_list, starting_spaces=0): + """Creates a help string for names_list grouped by prefix.""" cur_prefix, result_lines = None, [] - space = "".join([" " for _ in xrange(starting_spaces)]) + space = " " * starting_spaces for name in sorted(names_list): - prefix, _ = name.split("_", 1) + split = name.split("_", 1) + prefix = split[0] if cur_prefix != prefix: result_lines.append(space + prefix + ":") cur_prefix = prefix From b07158bf2cbe15370ea93cf6efaa93988da8ef32 Mon Sep 17 00:00:00 2001 From: Niki Parmar <nikip@google.com> Date: Tue, 15 Aug 2017 12:11:45 -0700 Subject: [PATCH 0270/4095] Crop CelebA images, change resizing method PiperOrigin-RevId: 165340753 --- tensor2tensor/utils/data_reader.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tensor2tensor/utils/data_reader.py b/tensor2tensor/utils/data_reader.py index 5c7041014..03e7720b6 100644 --- a/tensor2tensor/utils/data_reader.py +++ b/tensor2tensor/utils/data_reader.py @@ -133,7 +133,8 @@ def preprocessing(examples, data_file_pattern): # all to the Problem class and its preprocess_examples method. Don't add. if "image" in data_file_pattern: def resize(img, size): - return tf.to_int64(tf.image.resize_images(img, [size, size])) + return tf.to_int64(tf.image.resize_images( + img, [size, size], tf.image.ResizeMethod.AREA)) if "img2img" in data_file_pattern: inputs = examples["inputs"] @@ -141,6 +142,9 @@ def resize(img, size): examples["targets"] = resize(inputs, 64) elif "image_celeba" in data_file_pattern: inputs = examples["inputs"] + # Remove boundaries in CelebA images. Remove 40 pixels each side + # vertically and 20 pixels each side horizontally. + inputs = tf.image.crop_to_bounding_box(inputs, 40, 20, 218-80, 178-40) examples["inputs"] = resize(inputs, 8) examples["targets"] = resize(inputs, 32) elif "audio" in data_file_pattern: From 8ff454910f935db2a636612bbe56a40f3f9fda4b Mon Sep 17 00:00:00 2001 From: Ashish Vaswani <avaswani@google.com> Date: Tue, 15 Aug 2017 15:55:33 -0700 Subject: [PATCH 0271/4095] Fixing bugs in ImageImagenet32 generation and changing the preprocessing if _rev is used. Improves perplexity significantly. PiperOrigin-RevId: 165371918 --- tensor2tensor/data_generators/image.py | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/tensor2tensor/data_generators/image.py b/tensor2tensor/data_generators/image.py index f61f85b54..d9a6be6ff 100644 --- a/tensor2tensor/data_generators/image.py +++ b/tensor2tensor/data_generators/image.py @@ -214,10 +214,21 @@ def dataset_filename(self): def is_small(self): return True # Modalities like for CIFAR. - def preprocess_examples(self, examples, mode): - examples = imagenet_preprocess_examples(examples, mode) - examples["inputs"] = tf.to_int64( - tf.image.resize_images(examples["inputs"], [32, 32])) + @property + def num_classes(self): + return 1000 + + def preprocess_examples(self, examples, mode, hparams): + # Just resize with area. + if self._was_reversed: + examples["inputs"] = tf.to_int64( + tf.image.resize_images(examples["inputs"], [32, 32], + tf.image.ResizeMethod.AREA)) + else: + examples = imagenet_preprocess_examples(examples, mode) + examples["inputs"] = tf.to_int64( + tf.image.resize_images(examples["inputs"], [32, 32])) + return examples def image_generator(images, labels): From c4526edde718a401ce876d20707db686c50e851a Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Tue, 15 Aug 2017 16:04:52 -0700 Subject: [PATCH 0272/4095] Update batch size in GeneExpressionConv model PiperOrigin-RevId: 165373172 --- tensor2tensor/models/gene_expression.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/tensor2tensor/models/gene_expression.py b/tensor2tensor/models/gene_expression.py index 27aa631c6..9d676632e 100644 --- a/tensor2tensor/models/gene_expression.py +++ b/tensor2tensor/models/gene_expression.py @@ -130,8 +130,14 @@ def fc_layer(x, num_out, dropout_rate, name="fc"): def gene_expression_conv_base(): """Hparams for GeneExpressionConv model.""" hparams = common_hparams.basic_params1() - hparams.max_length = 10000000 - hparams.batch_size = 1024 + + batch_size = 10 + output_length = 2048 + inputs_per_output = 128 + chunk_size = 4 + input_length = output_length * inputs_per_output // chunk_size + hparams.batch_size = input_length * batch_size + hparams.dropout = 0.1 hparams.add_hparam("num_conv_layers", 4) hparams.add_hparam("num_dconv_layers", 7) From 8f99d47a5f2edce34b12ecf4928767de175018ba Mon Sep 17 00:00:00 2001 From: Ashish Vaswani <avaswani@google.com> Date: Tue, 15 Aug 2017 23:25:53 -0700 Subject: [PATCH 0273/4095] The current attention computes compatibilities between single query, key, and value positions. This CL extends it to computing them between windows of queries, keys, and values. It's like a combination of convolution and attention. Does not change defaults. PiperOrigin-RevId: 165405437 --- tensor2tensor/layers/common_attention.py | 98 +++++++++++++++++++----- 1 file changed, 78 insertions(+), 20 deletions(-) diff --git a/tensor2tensor/layers/common_attention.py b/tensor2tensor/layers/common_attention.py index 73ac898f9..2c3e4b71f 100644 --- a/tensor2tensor/layers/common_attention.py +++ b/tensor2tensor/layers/common_attention.py @@ -681,6 +681,70 @@ def gather_blocks(x, indices): return tf.reshape(output, v_shape) +def compute_qkv(query_antecedent, memory_antecedent, total_key_depth, + total_value_depth, q_filter_width=1, kv_filter_width=1, + q_padding="VALID", kv_padding="VALID"): + """Computes query, key and value. + + Args: + query_antecedent: a Tensor with shape [batch, length_q, channels] + memory_antecedent: a Tensor with shape [batch, length_m, channels] + total_key_depth: an integer + total_value_depth: and integer + q_filter_width: An integer specifying how wide you want the query to be. + kv_filter_width: An integer specifying how wide you want the keys and values + to be. + q_padding: One of "VALID", "SAME" or "LEFT". Default is VALID: No padding. + kv_padding: One of "VALID", "SAME" or "LEFT". Default is VALID: No padding. + + Returns: + q, k, v : [batch, length, depth] tensors + """ + if memory_antecedent is None and q_filter_width == kv_filter_width == 1: + # self attention with single position q, k, and v + combined = common_layers.conv1d( + query_antecedent, + total_key_depth * 2 + total_value_depth, + 1, + name="qkv_transform") + q, k, v = tf.split( + combined, [total_key_depth, total_key_depth, total_value_depth], + axis=2) + return q, k, v + + if memory_antecedent is None: + # self attention + q = common_layers.conv1d( + query_antecedent, + total_key_depth, + q_filter_width, + padding=q_padding, + name="q_transform") + kv_combined = common_layers.conv1d( + query_antecedent, + total_key_depth + total_value_depth, + kv_filter_width, + padding=kv_padding, + name="kv_transform") + k, v = tf.split(kv_combined, [total_key_depth, total_value_depth], + axis=2) + return q, k, v + + # encoder-decoder attention + q = common_layers.conv1d( + query_antecedent, total_key_depth, q_filter_width, padding=q_padding, + name="q_transform") + combined = common_layers.conv1d( + memory_antecedent, + total_key_depth + total_value_depth, + 1, + padding=kv_padding, + name="kv_transform") + k, v = tf.split(combined, [total_key_depth, total_value_depth], axis=2) + + return q, k, v + + def multihead_attention(query_antecedent, memory_antecedent, bias, @@ -693,6 +757,10 @@ def multihead_attention(query_antecedent, attention_type="dot_product", block_length=128, block_width=128, + q_filter_width=1, + kv_filter_width=1, + q_padding="VALID", + kv_padding="VALID", name=None): """Multihead scaled-dot-product attention with input/output transformations. @@ -711,6 +779,12 @@ def multihead_attention(query_antecedent, "local_unmasked" block_length: an integer - relevant for "local_mask_right" block_width: an integer - relevant for "local_unmasked" + q_filter_width: An integer specifying how wide you want the query to be. + kv_filter_width: An integer specifying how wide you want the keys and values + to be. + q_padding: One of "VALID", "SAME" or "LEFT". Default is VALID: No padding. + kv_padding: One of "VALID", "SAME" or "LEFT". Default is VALID: No padding. + name: an optional string Returns: @@ -726,30 +800,14 @@ def multihead_attention(query_antecedent, if total_value_depth % num_heads != 0: raise ValueError("Value depth (%d) must be divisible by the number of " "attention heads (%d)." % (total_value_depth, num_heads)) - with tf.variable_scope( name, default_name="multihead_attention", values=[query_antecedent, memory_antecedent]): - if memory_antecedent is None: - # self attention - combined = common_layers.conv1d( - query_antecedent, - total_key_depth * 2 + total_value_depth, - 1, - name="qkv_transform") - q, k, v = tf.split( - combined, [total_key_depth, total_key_depth, total_value_depth], - axis=2) - else: - q = common_layers.conv1d( - query_antecedent, total_key_depth, 1, name="q_transform") - combined = common_layers.conv1d( - memory_antecedent, - total_key_depth + total_value_depth, - 1, - name="kv_transform") - k, v = tf.split(combined, [total_key_depth, total_value_depth], axis=2) + q, k, v = compute_qkv(query_antecedent, memory_antecedent, total_key_depth, + total_value_depth, q_filter_width, kv_filter_width, + q_padding, kv_padding) + q = split_heads(q, num_heads) k = split_heads(k, num_heads) v = split_heads(v, num_heads) From 3e295e79f38a0254d8bfa62fa7afe8df98095d3f Mon Sep 17 00:00:00 2001 From: Lukasz Kaiser <lukaszkaiser@google.com> Date: Wed, 16 Aug 2017 17:32:58 -0700 Subject: [PATCH 0274/4095] Play with VAE more, bump version. PiperOrigin-RevId: 165523404 --- setup.py | 2 +- tensor2tensor/models/transformer_vae.py | 69 ++++++++++++++++++++----- 2 files changed, 57 insertions(+), 14 deletions(-) diff --git a/setup.py b/setup.py index ff1503990..dd80dfd48 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ setup( name='tensor2tensor', - version='1.1.8', + version='1.1.9', description='Tensor2Tensor', author='Google Inc.', author_email='no-reply@google.com', diff --git a/tensor2tensor/models/transformer_vae.py b/tensor2tensor/models/transformer_vae.py index 7d1575a0b..6a3f3afdf 100644 --- a/tensor2tensor/models/transformer_vae.py +++ b/tensor2tensor/models/transformer_vae.py @@ -26,6 +26,7 @@ from tensor2tensor.layers import common_attention from tensor2tensor.layers import common_layers from tensor2tensor.models import transformer +from tensor2tensor.utils import expert_utils from tensor2tensor.utils import registry from tensor2tensor.utils import t2t_model @@ -84,12 +85,37 @@ def decompress_step(source, c, hparams, first_relu, name): return tf.reshape(thicker, [shape[0], shape[1] * 2, 1, hparams.hidden_size]) -def dvae(x, hparams, name): +def top_k_softmax(x, k): + """Calculate softmax(x), select top-k and rescale to sum to 1.""" + x = tf.nn.softmax(x) + top_x, _ = tf.nn.top_k(x, k=k+1) + min_top = tf.reduce_min(top_x, axis=-1, keep_dims=True) + x = tf.nn.relu((x - min_top) + 1e-12) + x /= tf.reduce_sum(x, axis=-1, keep_dims=True) + return x, tf.reduce_max(top_x, axis=-1) + + +def top_k_experts(x, k, hparams): + x_shape = tf.shape(x) + x_flat = tf.reshape(x, [-1, x.get_shape().as_list()[-1]]) + is_training = hparams.mode == tf.contrib.learn.ModeKeys.TRAIN + gates, load = expert_utils.noisy_top_k_gating( + x_flat, hparams.v_size, is_training, k) + gates_shape = [x_shape[0], x_shape[1], x_shape[2], hparams.v_size] + gates = tf.reshape(gates, gates_shape) + load_loss = expert_utils.cv_squared(load) + return gates, load_loss + + +def dvae(x, k, hparams, name): with tf.variable_scope(name): m = tf.layers.dense(x, hparams.v_size, name="mask") - m = tf.nn.softmax(m) - kl = - tf.reduce_max(m, axis=-1) - return m, tf.reduce_mean(kl) + if k is None: + m = tf.nn.softmax(m) + kl = - tf.reduce_max(m, axis=-1) + else: + m, kl = top_k_softmax(m, k) + return m, 1.0 - tf.reduce_mean(kl) def vae(x, hparams, name): @@ -119,17 +145,35 @@ def compress(x, c, hparams, name): return cur +def mix(x1, x2, steps, min_prob=0.0, max_prob=1.0, mode="lin"): + if mode == "lin": + alpha_p = common_layers.inverse_lin_decay(steps) + 0.001 + else: + alpha_p = common_layers.inverse_exp_decay(steps) + 0.001 + alpha_p = alpha_p * (max_prob - min_prob) + min_prob + alpha = tf.random_uniform(tf.shape(x1)) + alpha = tf.to_float(tf.less(alpha, alpha_p)) + return alpha * x1 + (1.0 - alpha) * x2 + + def vae_compress(x, c, hparams, compress_name, decompress_name, reuse=None): """Compress, then VAE.""" + mix_k = 8 with tf.variable_scope(compress_name, reuse=reuse): cur = compress(x, None, hparams, "compress") # Convolve and ReLu to get state. cur = common_layers.conv_block( cur, hparams.hidden_size, [((1, 1), (1, 1))], name="mid_conv") # z, kl_loss, mu, log_sigma = vae(cur, hparams, name="vae") - z, kl_loss = dvae(cur, hparams, name="dvae") + z, kl_loss = dvae(cur, None, hparams, name="dvae") + z1, kl_loss1 = top_k_experts(cur, mix_k, hparams) mu, log_sigma = None, None + # Mix expert-selection and flat selection. + alpha_p = common_layers.inverse_lin_decay(60000) + 0.001 + z = alpha_p * z1 + (1 - alpha_p) * z + kl_loss += kl_loss1 + # Compress context. with tf.variable_scope(compress_name, reuse=reuse): compress_c = compress(c, None, hparams, "compress_context") @@ -137,24 +181,23 @@ def vae_compress(x, c, hparams, compress_name, decompress_name, reuse=None): reconstruct_loss = tf.nn.softmax_cross_entropy_with_logits( labels=z, logits=c_z) + # If not training, use the predicted z instead of the autoregressive one. + # if hparams.mode != tf.contrib.learn.ModeKeys.TRAIN: + # z = mix(c_z, z, 50000, max_prob=0.3, mode="exp") + # z, _ = top_k_softmax(c_z, mix_k) + with tf.variable_scope(decompress_name, reuse=reuse): # Decompress. z = tf.layers.dense(z, hparams.hidden_size, name="z_to_dense") # Leak at the beginning to help train. - alpha_p = common_layers.inverse_lin_decay(30000) + 0.001 - alpha = tf.random_uniform(tf.shape(cur)) - alpha = tf.to_float(tf.less(alpha, alpha_p)) - z = alpha * z + (1.0 - alpha) * cur - - # TODO(lukaszkaiser): If not training, use the predicted z. - # is_training = hparams.mode == tf.contrib.learn.ModeKeys.TRAIN + z = mix(z, cur, 30000) for i in xrange(hparams.num_compress_steps): j = hparams.num_compress_steps - i - 1 z = residual_conv(z, 1, hparams, "decompress_rc_%d" % j) z = decompress_step(z, c, hparams, i > 0, "decompress_step_%d" % j) - return z, kl_loss + 0.001 * reconstruct_loss, mu, log_sigma + return z, kl_loss + 0.0001 * reconstruct_loss, mu, log_sigma def encode(x, x_space, hparams, name): From f5d5405ac705609d8371eea2bff0eb854470cb1f Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Wed, 16 Aug 2017 17:53:01 -0700 Subject: [PATCH 0275/4095] Update implementation of rev_block to use new fn_with_custom_grad (which limits usage of Defun) PiperOrigin-RevId: 165525242 --- tensor2tensor/layers/rev_block.py | 237 +++++++++++++++++------- tensor2tensor/layers/rev_block_test.py | 203 ++++++++++++-------- tensor2tensor/models/rev_transformer.py | 24 ++- 3 files changed, 320 insertions(+), 144 deletions(-) diff --git a/tensor2tensor/layers/rev_block.py b/tensor2tensor/layers/rev_block.py index ab424c0e2..4dd1cde03 100644 --- a/tensor2tensor/layers/rev_block.py +++ b/tensor2tensor/layers/rev_block.py @@ -23,6 +23,7 @@ from __future__ import division from __future__ import print_function +import random import re # Dependency imports @@ -137,12 +138,112 @@ def _rev_block_forward(x1, if layer_scopes is not None: layer_scopes.append(layer_vs) out = _rev_layer_forward( - out, f, g, f_side_input, g_side_input, gate_outputs=gate_outputs) + out, + f[i], + g[i], + f_side_input, + g_side_input, + gate_outputs=gate_outputs) y1, y2 = out return y1, y2 +def _underlying_variable(t): + """Find the underlying variable ref, ignoring Identity ops.""" + while t.op.type == "Identity": + t = t.op.inputs[0] + if t.dtype == dtypes.float32_ref and "Variable" in t.op.type: + return t + else: + return None + + +def fn_with_custom_grad(grad_fn): + """Decorator to create a subgraph with a custom gradient function. + + The subgraph created by the decorated function is NOT put in a Defun and so + does not suffer from the limitations of the Defun (all subgraph ops on the + same device, no summaries). + + Args: + grad_fn: function with signature + (inputs, variables, outputs, output_grads) -> (grad_inputs, grad_vars), + all of which are lists of Tensors. + + Returns: + Decorator for function such that the gradient is defined by grad_fn. + """ + + def dec(fn): + + def wrapped(*args): + return _fn_with_custom_grad(fn, args, grad_fn) + + return wrapped + + return dec + + +def _fn_with_custom_grad(fn, inputs, grad_fn): + """Create a subgraph with a custom gradient. + + Args: + fn: function that takes inputs as arguments and produces 1 or more Tensors. + inputs: list<Tensor>, will be passed as fn(*inputs). + grad_fn: function with signature + (inputs, vars, outputs, output_grads) -> (grad_inputs, grad_vars), + all of which are lists of Tensors. + + Returns: + fn(*inputs) + """ + with tf.variable_scope(None, default_name="fn_with_custom_grad") as vs: + inputs = list(inputs) + outputs = fn(*inputs) + train_vars = list(vs.trainable_variables()) + + if grad_fn is None: + return outputs + else: + if not (isinstance(outputs, tuple) or isinstance(outputs, list)): + outputs = [outputs] + outputs = list(outputs) + + in_types = [t.dtype for t in inputs] + out_types = [t.dtype for t in outputs] + var_types = [t.dtype for t in train_vars] + + def custom_grad_fn(op, *dys): + """Custom grad fn applying grad_fn for identity Defun.""" + dys = list(dys) + fn_inputs = op.inputs[:len(inputs)] + fn_vars = op.inputs[len(inputs):len(inputs) + len(train_vars)] + fn_outputs = op.inputs[len(inputs) + len(train_vars):] + assert len(fn_outputs) == len(outputs) + assert len(fn_outputs) == len(dys) + + grad_inputs, grad_vars = grad_fn(fn_inputs, fn_vars, fn_outputs, dys) + grad_outputs = [None] * len(fn_outputs) + return tuple(grad_inputs + grad_vars + grad_outputs) + + # The Defun takes as input the original inputs, the trainable variables + # created in fn, and the outputs. In the forward it passes through the + # outputs. In the backwards, it produces gradients for the original inputs + # and the trainable variables. + @function.Defun( + *(in_types + var_types + out_types), + func_name="identity_custom_grad%d" % random.randint(1, 10**9), + python_grad_func=custom_grad_fn, + shape_func=lambda _: [t.get_shape() for t in outputs]) + def identity(*args): + outs = args[len(inputs) + len(train_vars):] + return tuple([tf.identity(t) for t in outs]) + + id_out = identity(*(inputs + train_vars + outputs)) + return id_out + + def rev_block(x1, x2, f, @@ -156,19 +257,29 @@ def rev_block(x1, A reversible residual layer is defined as: ``` - y1 = x1 + f(x2) - y2 = x2 + g(y1) + y1 = x1 + f(x2, f_side_input) + y2 = x2 + g(y1, g_side_input) ``` + A reversible residual block, defined here, is a series of reversible residual + layers. + + Limitations: + * f and g must not close over any Tensors; all side inputs to f and g should + be passed in with f_side_input and g_side_input which will be forwarded to + f and g. + * f and g must not change the dimensionality of their inputs in order for the + addition in the equations above to work. + Args: x1: a float Tensor. x2: a float Tensor. - f: a function, (Tensor) -> (Tensor). Should not change the shape of the - Tensor. Expected to create variables. See f_side_input if there are side - inputs. - g: a function, (Tensor) -> (Tensor). Should not change the shape of the - Tensor. Expected to create variables. See g_side_input if there are side - inputs. + f: a function, (Tensor) -> (Tensor) (or list of such of length num_layers). + Should not change the shape of the Tensor. Expected to create variables. + See f_side_input if there are side inputs. + g: a function, (Tensor) -> (Tensor) (or list of such of length num_layers). + Should not change the shape of the Tensor. Expected to create variables. + See g_side_input if there are side inputs. num_layers: int, number of reversible residual layers. Each layer will apply f and g according to the equations above, with new variables in each layer. @@ -185,46 +296,43 @@ def rev_block(x1, f_side_input = [] if g_side_input is None: g_side_input = [] + if isinstance(f, list): + assert len(f) == num_layers + else: + f = [f] * num_layers + if isinstance(g, list): + assert len(g) == num_layers + else: + g = [g] * num_layers + # Filled by the forward function below layer_scopes = [] - def rev_block_grad(op, *grad_ys): + def custom_grad_fn(inputs, variables, ys, grad_ys): """Custom gradient fn for a block of reversible residual layers.""" - ys = (op.outputs[0], op.outputs[1]) - - # The Defun will have as inputs the main inputs (x1, x2), the variables - # created inside f and g, and the side inputs to f and g. The order of the - # grads returned from this function must match the order of the inputs. - # The code here partitions the hoisted inputs into f variables, f side - # inputs, g variables, and g side inputs and keeps track of their positions - # in hoisted_inputs. - - hoisted_inputs = op.inputs[2:] - f_vars = [[] for _ in range(num_layers)] - g_vars = [[] for _ in range(num_layers)] - f_vars_idxs = [[] for _ in range(num_layers)] - g_vars_idxs = [[] for _ in range(num_layers)] + side_inputs = inputs[2:] f_side_idxs = [None] * len(f_side_input) g_side_idxs = [None] * len(g_side_input) + assert len(side_inputs) == len(f_side_input) + len(g_side_input) - for t in f_side_input + g_side_input: - assert t in hoisted_inputs - - for i, t in enumerate(hoisted_inputs): - # Side inputs + for i, t in enumerate(side_inputs): if t in f_side_input: f_side_idxs[f_side_input.index(t)] = i - continue - if t in g_side_input: + elif t in g_side_input: g_side_idxs[g_side_input.index(t)] = i - continue + else: + assert False - # Variables - ref = t.op.inputs[0] - assert ref.dtype == dtypes.float32_ref + f_vars = [[] for _ in range(num_layers)] + g_vars = [[] for _ in range(num_layers)] + f_vars_idxs = [[] for _ in range(num_layers)] + g_vars_idxs = [[] for _ in range(num_layers)] + + for i, t in enumerate(variables): + ref = _underlying_variable(t) # Use the name to identify the layer number and function (f or g) - regex = LAYER_RE.match(t.name) + regex = LAYER_RE.match(ref.name) layer_no = int(regex.group(1)) fn_name = regex.group(2) if fn_name == "f": @@ -244,12 +352,15 @@ def rev_block_grad(op, *grad_ys): layer_scopes.reverse() f_vars.reverse() g_vars.reverse() + f.reverse() + g.reverse() for i in xrange(num_layers): with tf.variable_scope(layer_scopes[i], reuse=True): - ys, grad_ys, f_ret, g_ret = (_rev_layer_backward( - ys, grad_ys, f, g, f_vars[i], f_side_input, g_vars[i], - g_side_input)) + + ys, grad_ys, f_ret, g_ret = _rev_layer_backward(ys, grad_ys, f[i], g[i], + f_vars[i], f_side_input, + g_vars[i], g_side_input) grad_f_vars, grad_f_side = f_ret grad_g_vars, grad_g_side = g_ret @@ -262,8 +373,9 @@ def rev_block_grad(op, *grad_ys): acc_f_side_grads = _acc_grads(*f_side_grads) acc_g_side_grads = _acc_grads(*g_side_grads) - # Use the stored idxs to put gradients in the same order as hoisted_inputs. - hoisted_inputs_grads = [None] * len(hoisted_inputs) + # Use the stored idxs to put gradients in the passed-in order. + side_input_grads = [None] * len(side_inputs) + variable_grads = [None] * len(variables) # Variable gradients were collected in reverse layer order. Reverse to match # idxs. @@ -272,43 +384,30 @@ def rev_block_grad(op, *grad_ys): for idxs, grads in zip(f_vars_idxs, f_var_grads) + zip( g_vars_idxs, g_var_grads): for i, grad in zip(idxs, grads): - hoisted_inputs_grads[i] = grad + variable_grads[i] = grad for i, grad in zip(f_side_idxs, acc_f_side_grads): - hoisted_inputs_grads[i] = grad + side_input_grads[i] = grad for i, grad in zip(g_side_idxs, acc_g_side_grads): - hoisted_inputs_grads[i] = grad + side_input_grads[i] = grad grad_x1, grad_x2 = grad_ys - return [grad_x1, grad_x2] + hoisted_inputs_grads - - @function.Defun( - tf.float32, - tf.float32, - python_grad_func=rev_block_grad, - shape_func=lambda _: [x1.get_shape(), x2.get_shape()]) - def rev_block_defun(inp1, inp2): - inp1.set_shape(x1.get_shape()) - inp2.set_shape(x2.get_shape()) - return _rev_block_forward( - inp1, - inp2, - f, - g, - num_layers=num_layers, - f_side_input=f_side_input, - g_side_input=g_side_input, - layer_scopes=layer_scopes, - gate_outputs=True) + return [grad_x1, grad_x2] + side_input_grads, variable_grads - if is_training: - return rev_block_defun(x1, x2) - else: + # Need a forward function with positional arguments + @fn_with_custom_grad(custom_grad_fn if is_training else None) + def forward(x1, x2, *side_inputs): + f_side = side_inputs[:len(f_side_input)] + g_side = side_inputs[len(f_side_input):] return _rev_block_forward( x1, x2, f, g, num_layers=num_layers, - f_side_input=f_side_input, - g_side_input=g_side_input) + f_side_input=f_side, + g_side_input=g_side, + layer_scopes=layer_scopes, + gate_outputs=is_training) + + return forward(x1, x2, *(f_side_input + g_side_input)) diff --git a/tensor2tensor/layers/rev_block_test.py b/tensor2tensor/layers/rev_block_test.py index 8dfa049af..dd4a62993 100644 --- a/tensor2tensor/layers/rev_block_test.py +++ b/tensor2tensor/layers/rev_block_test.py @@ -27,118 +27,175 @@ class RevBlockTest(tf.test.TestCase): - - def testSmoke(self): - channels = 8 - num_layers = 4 - batch_size = 16 - use_defun = True + CHANNELS = 8 + NUM_LAYERS = 4 + BATCH_SIZE = 16 + + def _testRevBlock(self, + x=None, + f=None, + g=None, + f_side_input=None, + g_side_input=None): tf.set_random_seed(1234) - def f(x): - return tf.layers.dense(x, channels // 2, use_bias=True) + if f is None: - def g(x): - return tf.layers.dense(x, channels // 2, use_bias=True) + def f(x): # pylint: disable=function-redefined + return tf.layers.dense(x, self.CHANNELS // 2, use_bias=True) - x = tf.random_uniform([batch_size, channels], dtype=tf.float32) - x1, x2 = tf.split(x, 2, axis=1) - y1, y2 = rev_block.rev_block( - x1, x2, f, g, num_layers=num_layers, is_training=use_defun) - y = tf.concat([y1, y2], axis=1) - loss = tf.reduce_mean(y + 10.) - grads = tf.gradients(loss, [x] + tf.global_variables()) - with self.test_session() as sess: - sess.run(tf.global_variables_initializer()) - _ = sess.run(grads) + if g is None: - def testRevBlock(self): - channels = 8 - num_layers = 4 - batch_size = 16 - tf.set_random_seed(1234) + def g(x): # pylint: disable=function-redefined + return tf.layers.dense(x, self.CHANNELS // 2, use_bias=True) - def f(x): - return tf.layers.dense(x, channels // 2, use_bias=True) + if f_side_input is None: + f_side_input = [] - def g(x): - return tf.layers.dense(x, channels // 2, use_bias=True) + if g_side_input is None: + g_side_input = [] - x = tf.random_uniform([batch_size, channels], dtype=tf.float32) + x = tf.random_uniform([self.BATCH_SIZE, self.CHANNELS], dtype=tf.float32) x1, x2 = tf.split(x, 2, axis=1) - with tf.variable_scope("defun") as vs: - y1_defun, y2_defun = rev_block.rev_block( - x1, x2, f, g, num_layers=num_layers) - y_defun = tf.concat([y1_defun, y2_defun], axis=1) + with tf.variable_scope("rev_test") as vs: + y1_rev, y2_rev = rev_block.rev_block( + x1, + x2, + f, + g, + f_side_input=f_side_input, + g_side_input=g_side_input, + num_layers=self.NUM_LAYERS) + y_rev = tf.concat([y1_rev, y2_rev], axis=1) fg_vars = vs.trainable_variables() num_vars = len(tf.global_variables()) with tf.variable_scope(vs, reuse=True): y1, y2 = rev_block.rev_block( - x1, x2, f, g, num_layers=num_layers, is_training=False) + x1, + x2, + f, + g, + f_side_input=f_side_input, + g_side_input=g_side_input, + num_layers=self.NUM_LAYERS, + is_training=False) y = tf.concat([y1, y2], axis=1) # Ensure no new vars were created - full reuse assert len(tf.global_variables()) == num_vars - loss_defun = tf.reduce_mean(y_defun + 10.) + loss_rev = tf.reduce_mean(y_rev + 10.) loss = tf.reduce_mean(y + 10.) - grads_defun = tf.gradients(loss_defun, [x] + fg_vars) - grads = tf.gradients(loss, [x] + fg_vars) + wrt = [x] + f_side_input + g_side_input + fg_vars + grads_rev = tf.gradients(loss_rev, wrt) + grads = tf.gradients(loss, wrt) with self.test_session() as sess: sess.run(tf.global_variables_initializer()) - y_val, yd_val, gd_val, g_val = sess.run([y, y_defun, grads_defun, grads]) + y_val, yd_val, gd_val, g_val = sess.run([y, y_rev, grads_rev, grads]) self.assertAllClose(y_val, yd_val) for g1, g2 in zip(gd_val, g_val): self.assertAllClose(g1, g2) - def testSideInput(self): - channels = 4 - num_layers = 3 - batch_size = 3 - tf.set_random_seed(1234) + def testRevBlock(self): + self._testRevBlock() - f_side_input = tf.random_uniform([batch_size, channels // 2]) + def testSideInput(self): + f_side_input = tf.random_uniform([self.BATCH_SIZE, self.CHANNELS // 2]) def f(x, side_input): - return tf.layers.dense(x, channels // 2, use_bias=True) + side_input[0] + return tf.layers.dense( + x, self.CHANNELS // 2, use_bias=True) + side_input[0] - def g(x): - return tf.layers.dense(x, channels // 2, use_bias=True) + self._testRevBlock(f=f, f_side_input=[f_side_input]) - x = tf.random_uniform([batch_size, channels], dtype=tf.float32) - x1, x2 = tf.split(x, 2, axis=1) - with tf.variable_scope("defun") as vs: - y1, y2 = rev_block.rev_block( - x1, x2, f, g, num_layers=num_layers, f_side_input=[f_side_input]) - fg_vars = vs.trainable_variables() + def testMultipleFns(self): - y = tf.concat([y1, y2], axis=1) - loss = tf.reduce_mean(y + 10.) + def f1(x): + return tf.layers.dense(x, self.CHANNELS // 2) - with tf.variable_scope(vs, reuse=True): - y1, y2 = rev_block.rev_block( - x1, - x2, - f, - g, - num_layers=num_layers, - f_side_input=[f_side_input], - is_training=False) - y_p = tf.concat([y1, y2], axis=1) - loss_p = tf.reduce_mean(y_p + 10.) + def f2(x): + return tf.layers.dense(x, self.CHANNELS // 2, activation=tf.nn.relu) + + self._testRevBlock(f=[f1, f2, f1, f2]) + + +class FnWithCustomGradTest(tf.test.TestCase): + + def testCorrectness(self): - grads = tf.gradients(loss, [x, f_side_input] + fg_vars) - grads_p = tf.gradients(loss_p, [x, f_side_input] + fg_vars) - self.assertTrue(grads[1] is not None) # f_side_input has a gradient + w = tf.random_uniform([6, 10]) + + def fn(a, b, c): + return tf.layers.dense( + a, + 10, + use_bias=False, + kernel_initializer=lambda shape, dtype, partition_info: w + ) + tf.matmul(b, c) + + def grad_fn(inputs, variables, outputs, grad_outputs): + outputs = outputs[0] + grad_outputs = grad_outputs[0] + grad_inputs = tf.gradients(outputs, inputs, grad_ys=grad_outputs) + grad_vars = tf.gradients(outputs, variables, grad_ys=grad_outputs) + return grad_inputs, grad_vars + + custom_fn = rev_block.fn_with_custom_grad(grad_fn)(fn) + + a = tf.random_uniform([11, 6]) + b = tf.random_uniform([11, 7]) + c = tf.random_uniform([7, 10]) + + out = fn(a, b, c) + custom_out = custom_fn(a, b, c) + self.assertEqual(out.get_shape().as_list(), + custom_out.get_shape().as_list()) + + loss = tf.reduce_mean(out) + custom_loss = tf.reduce_mean(custom_out) + + grads = tf.gradients(loss, [a, b, c] + [tf.trainable_variables()[0]]) + custom_grads = tf.gradients(custom_loss, + [a, b, c] + [tf.trainable_variables()[1]]) + + with self.test_session() as sess: + sess.run(tf.global_variables_initializer()) + out_val, custom_out_val, grads_val, custom_grads_val = sess.run( + [out, custom_out, grads, custom_grads]) + self.assertAllClose(out_val, custom_out_val) + for g1, g2 in zip(grads_val, custom_grads_val): + self.assertAllClose(g1, g2) + def testCustomGrad(self): + + def fn(a, b, c): + return tf.layers.dense(a, 10, use_bias=False) + tf.matmul(b, c) + + def grad_fn(inputs, variables, unused_outputs, unused_grad_outputs): + grad_inputs = [tf.ones_like(t) * (i + 1.) for i, t in enumerate(inputs)] + grad_vars = [ + tf.ones_like(t) * (i + len(inputs) + 1.) + for i, t in enumerate(variables) + ] + return grad_inputs, grad_vars + + a = tf.random_uniform([11, 6]) + b = tf.random_uniform([11, 7]) + c = tf.random_uniform([7, 10]) + w = tf.random_uniform([6, 10]) + out = rev_block.fn_with_custom_grad(grad_fn)(fn)(a, b, c) + loss = tf.reduce_mean(out) + grads = tf.gradients(loss, [a, b, c, tf.trainable_variables()[0]]) + expected_grads = [ + tf.ones_like(t) * (i + 1.) for i, t in enumerate([a, b, c, w]) + ] with self.test_session() as sess: sess.run(tf.global_variables_initializer()) - y_val, y_p_val, g_val, g_p_val = sess.run([y, y_p, grads, grads_p]) - self.assertAllClose(y_val, y_p_val) - for g1, g2 in zip(g_val, g_p_val): + g_val, eg_val = sess.run([grads, expected_grads]) + for g1, g2 in zip(g_val, eg_val): self.assertAllClose(g1, g2) diff --git a/tensor2tensor/models/rev_transformer.py b/tensor2tensor/models/rev_transformer.py index ce7dec261..d1392a1ee 100644 --- a/tensor2tensor/models/rev_transformer.py +++ b/tensor2tensor/models/rev_transformer.py @@ -214,11 +214,31 @@ def g(x): ], is_training=hparams.mode == tf.contrib.learn.ModeKeys.TRAIN) y = tf.concat([y1, y2], axis=-1) - return common_layers.layer_preprocess(y, hparams) + return common_layers.layer_preprocess(y, hparams) @registry.register_hparams def rev_transformer_base(): """Base hparams for RevTransformer.""" - hparams = transformer.transformer_base() + hparams = transformer.transformer_big() + + # Use settings from transformer_n_da + hparams.layer_preprocess_sequence = "n" + hparams.layer_postprocess_sequence = "da" + hparams.learning_rate = 0.4 + + return hparams + + +@registry.register_hparams +def rev_transformer_big(): + """Base hparams for RevTransformer.""" + hparams = rev_transformer_base() + + # The RevTransformer uses significantly less memory than the Transformer. + # Increase batch size and model size. + hparams.batch_size *= 2 + hparams.hidden_size *= 2 + hparams.num_heads *= 2 + hparams.num_hidden_layers += 1 return hparams From fed8cd70e6cc8c01c0a0a7e58478da866f3cd623 Mon Sep 17 00:00:00 2001 From: Stefan Schweter <stefan@schweter.it> Date: Thu, 17 Aug 2017 23:08:44 +0200 Subject: [PATCH 0276/4095] doc: update problem name for wmt task --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index bb0f6f534..0c781d97f 100644 --- a/README.md +++ b/README.md @@ -34,7 +34,7 @@ trains an English-German translation model, and lets you use it interactively: pip install tensor2tensor && t2t-trainer \ --generate_data \ --data_dir=~/t2t_data \ - --problems=wmt_ende_tokens_32k \ + --problems=translate_ende_wmt32k \ --model=transformer \ --hparams_set=transformer_base_single_gpu \ --output_dir=~/t2t_train/base \ @@ -72,7 +72,7 @@ pip install tensor2tensor # You can easily swap between them (and add new ones). t2t-trainer --registry_help -PROBLEM=wmt_ende_tokens_32k +PROBLEM=translate_ende_wmt32k MODEL=transformer HPARAMS=transformer_base_single_gpu @@ -277,7 +277,7 @@ registrations. To add a new dataset, subclass [`Problem`](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/data_generators/problem.py) and register it with `@registry.register_problem`. See -[`WMTEnDeTokens8k`](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/data_generators/wmt.py) +[`TranslateEndeWmt8k`](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/data_generators/wmt.py) for an example. Also see the [data generators From 3b14e90467ba1af71e633260df5f3cdead0454de Mon Sep 17 00:00:00 2001 From: ReDeiPirati <alessio.gozzoli@gmail.com> Date: Sat, 19 Aug 2017 18:18:11 +0200 Subject: [PATCH 0277/4095] Update YF with last PR and replaced tf.Variable, it works only with TensorFlow 1.3.0 --- tensor2tensor/utils/yellowfin.py | 279 +++++++++++++++++++++++-------- 1 file changed, 205 insertions(+), 74 deletions(-) diff --git a/tensor2tensor/utils/yellowfin.py b/tensor2tensor/utils/yellowfin.py index aeb14e76e..e9b34a3f0 100644 --- a/tensor2tensor/utils/yellowfin.py +++ b/tensor2tensor/utils/yellowfin.py @@ -20,19 +20,18 @@ from __future__ import print_function # Dependency imports - import numpy as np import tensorflow as tf from tensorflow.python.framework import ops # Values for gate_gradients. -GATE_NONE = 0 -GATE_OP = 1 -GATE_GRAPH = 2 +GATE_NONE = tf.train.Optimizer.GATE_NONE +GATE_OP = tf.train.Optimizer.GATE_OP +GATE_GRAPH = tf.train.Optimizer.GATE_GRAPH -class YellowFinOptimizer(tf.train.Optimizer): +class YellowFinOptimizer(object): """Optimizer that implements the YellowFin algorithm. See [Zhang et. al., 2017](https://arxiv.org/abs/1706.03471) for details. @@ -45,20 +44,37 @@ def __init__(self, beta=0.999, curvature_window_width=20, zero_debias=True, - delta_mu=0.0): + delta_mu=0.0, + sparsity_debias=True, + use_locking=False, + name="YellowFin", + use_nesterov=False): """Construct a new YellowFin optimizer. + Implemented as a wrapper around tf.train.MomentumOptimizer Args: learning_rate: A Tensor or a floating point value. The learning rate. + Set to 1.0 in the paper. momentum: A Tensor or a floating point value. The momentum. + Set to 0.0 in the paper. clip_thresh: A Tensor or a floating point value. The cliping threshold for - tf.clip_by_global_norm. If None, no clipping will be carried out. + `tf.clip_by_global_norm`. If None, no clipping will be carried out. beta: A float value or a constant float tensor. The smoothing parameter for estimations. curvature_window_width: A int value or a constant int tensor. The curvature window width. zero_debias: A boolean, zero debias moving-averages. delta_mu: For extensions. Not necessary in the basic use. + sparsity_debias: A boolean. Gradient norm and curvature are + biased to larger values when calculated with sparse gradient. + This is useful when the model is very sparse, e.g. LSTM with + word embedding. For non-sparse CNN, turning it off could + slightly accelerate the speed. + use_locking: If True, use locks for update operations. + name: Optional name prefix for the operations created when + applying gradients. Defaults to "YellowFin". + use_nesterov: If True, the underlying MomentumOptimizer uses Nesterov + Momentum. Set to False in the default YellowFin algorithm. Note: clip_thresh is the threshold value on ||lr * gradient||, @@ -81,27 +97,44 @@ def __init__(self, self._mu = momentum # Set lr and mu tensor. - self._lr_var = tf.Variable(learning_rate, - dtype=tf.float32, - name="YF_lr", - trainable=False) - self._mu_var = tf.Variable(momentum, - dtype=tf.float32, - name="YF_mu", - trainable=False) + # self._lr_var = tf.Variable(learning_rate, + # dtype=tf.float32, + # name="YF_lr", + # trainable=False) + self._lr_var = tf.get_variable("YF_lr", + dtype=tf.float32, + trainable=False, + initializer=learning_rate) + + # self._mu_var = tf.Variable(momentum, + # dtype=tf.float32, + # name="YF_mu", + # trainable=False) + self._mu_var = tf.get_variable("YF_mu", + dtype=tf.float32, + trainable=False, + initializer=tf.constant(momentum)) # Tuning factor for learning rates step or decaying scheme. - self.lr_factor = tf.Variable(1.0, - dtype=tf.float32, - name="YF_lr_factor", - trainable=False) + # self.lr_factor = tf.Variable(1.0, + # dtype=tf.float32, + # name="YF_lr_factor", + # trainable=False) + self.lr_factor = tf.get_variable("YF_lr_factor", + dtype=tf.float32, + trainable=False, + initializer=tf.constant(1.0)) # Gradient Clipping Threshold. if clip_thresh is not None: - self._clip_thresh_var = tf.Variable(clip_thresh, - dtype=tf.float32, - name="YF_clip_thresh", - trainable=False) + # self._clip_thresh_var = tf.Variable(clip_thresh, + # dtype=tf.float32, + # name="YF_clip_thresh", + # trainable=False) + self._clip_thresh_var = tf.get_variable("YF_clip_thresh", + dtype=tf.float32, + trainable=False, + initializer=tf.constant(clip_thresh)) else: self._clip_thresh_var = None @@ -111,17 +144,22 @@ def __init__(self, # Init momentum optimizer. self._momentum_optimizer = tf.train.MomentumOptimizer( - self._lr_m, self._mu_m) + self._lr_m, self._mu_m, use_locking, name, use_nesterov) # Moving average for statistics. self._beta = beta self._moving_averager = None # Step counting. - self._step = tf.Variable(0, - dtype=tf.int32, - name="YF_step", - trainable=False) + # self._step = tf.Variable(0, + # dtype=tf.int32, + # name="YF_step", + # trainable=False) + self._step = tf.get_variable("YF_step", + dtype=tf.int32, + trainable=False, + initializer=tf.constant(0)) + # YF_step + 1 op. self._increment_step_op = None @@ -130,6 +168,7 @@ def __init__(self, # Moving-averages. self._zero_debias = zero_debias + self._sparsity_debias = sparsity_debias # For curvature range. self.curvature_window_width = curvature_window_width @@ -170,27 +209,33 @@ def __init__(self, # and (zero_devias) moving-averages. self._moving_averager = None + def _curvature_range(self): """Curvature range. Returns: h_max_t, h_min_t ops """ - self._curv_win = tf.Variable(np.zeros([self.curvature_window_width,]), - dtype=tf.float32, - name="curv_win", - trainable=False) - + # self._curv_win = tf.Variable(np.zeros([self.curvature_window_width,]), + # dtype=tf.float32, + # name="curv_win", + # trainable=False) + self._curv_win = tf.get_variable("curv_win", + dtype=tf.float32, + trainable=False, + shape=[self.curvature_window_width, ], + initializer=tf.zeros_initializer) + # We use log smoothing for curvature range self._curv_win = tf.scatter_update(self._curv_win, self._step % self.curvature_window_width, - self._grad_norm_squared) + tf.log(self._grad_norm_squared)) # Note here the iterations start from iteration 0 valid_window = tf.slice(self._curv_win, tf.constant([0,]), tf.expand_dims( tf.minimum( tf.constant(self.curvature_window_width), - self._step + 1), axis=0)) + self._step + 1), dim=0)) self._h_min_t = tf.reduce_min(valid_window) self._h_max_t = tf.reduce_max(valid_window) @@ -198,11 +243,17 @@ def _curvature_range(self): with tf.control_dependencies([self._h_min_t, self._h_max_t]): avg_op = self._moving_averager.apply([self._h_min_t, self._h_max_t]) with tf.control_dependencies([avg_op]): - self._h_min = tf.identity(self._moving_averager.average(self._h_min_t)) - self._h_max = tf.identity(self._moving_averager.average(self._h_max_t)) + self._h_min = tf.exp( + tf.identity(self._moving_averager.average(self._h_min_t))) + self._h_max = tf.exp( + tf.identity(self._moving_averager.average(self._h_max_t))) + if self._sparsity_debias: + self._h_min = self._h_min * self._sparsity_avg + self._h_max = self._h_max * self._sparsity_avg curv_range_ops.append(avg_op) return curv_range_ops # h_max_t, h_min_t + def _grad_variance(self): """Estimate of gradient Variance. @@ -212,7 +263,7 @@ def _grad_variance(self): grad_var_ops = [] tensor_to_avg = [] for t, g in zip(self._vars, self._grad): - if isinstance(g, tf.IndexedSlices): + if isinstance(g, ops.IndexedSlices): tensor_to_avg.append( tf.reshape(tf.unsorted_segment_sum(g.values, g.indices, @@ -226,12 +277,17 @@ def _grad_variance(self): self._grad_avg = [self._moving_averager.average(val) for val in tensor_to_avg] self._grad_avg_squared = [tf.square(val) for val in self._grad_avg] - self._grad_avg_squared = tf.add_n([tf.reduce_sum(val) - for val in self._grad_avg_squared]) + # Compute Variance - self._grad_var = self._grad_norm_squared_avg - self._grad_avg_squared + self._grad_var = tf.maximum( + tf.constant(1e-6, dtype=self._grad_norm_squared_avg.dtype), + self._grad_norm_squared_avg + - tf.add_n([tf.reduce_sum(val) for val in self._grad_avg_squared])) + if self._sparsity_debias: + self._grad_var *= self._sparsity_avg return grad_var_ops # C_t + def _dist_to_opt(self): """Distance to optimum. @@ -239,7 +295,7 @@ def _dist_to_opt(self): D_t ops """ dist_to_opt_ops = [] - # Running average of the norm of gradeint + # Running average of the norm of gradient self._grad_norm = tf.sqrt(self._grad_norm_squared) avg_op = self._moving_averager.apply([self._grad_norm,]) dist_to_opt_ops.append(avg_op) @@ -254,8 +310,28 @@ def _dist_to_opt(self): with tf.control_dependencies([avg_op]): self._dist_to_opt_avg = tf.identity( self._moving_averager.average(self._d_t)) + if self._sparsity_debias: + self._dist_to_opt_avg /= tf.sqrt(self._sparsity_avg) return dist_to_opt_ops # D_t + + def _grad_sparsity(self): + # If the sparse minibatch gradient has 10 percent of its entries + # non-zero, its sparsity is 0.1. + # The norm of dense gradient averaged from full dataset + # are roughly estimated norm of minibatch + # sparse gradient norm * sqrt(sparsity) + # An extension maybe only correct the sparse blob. + non_zero_cnt = tf.add_n([tf.count_nonzero(g) for g in self._grad]) + all_entry_cnt = tf.add_n([tf.size(g) for g in self._grad]) + self._sparsity = tf.cast(non_zero_cnt, self._grad[0].dtype) \ + / tf.cast(all_entry_cnt, self._grad[0].dtype) + avg_op = self._moving_averager.apply([self._sparsity, ]) + with tf.control_dependencies([avg_op]): + self._sparsity_avg = self._moving_averager.average(self._sparsity) + return avg_op + + def _prepare_variables(self): """Prepare Variables for YellowFin. @@ -264,7 +340,7 @@ def _prepare_variables(self): """ self._moving_averager = tf.train.ExponentialMovingAverage( decay=self._beta, zero_debias=self._zero_debias) - assert self._grad + # assert self._grad is not None and len(self._grad) > 0 # List for the returned Operations prepare_variables_op = [] @@ -282,6 +358,10 @@ def _prepare_variables(self): self._grad_norm_squared = [tf.reduce_sum(g_sq) for g_sq in self._grad_squared] + if self._sparsity_debias: + avg_op_sparsity = self._grad_sparsity() + prepare_variables_op.append(avg_op_sparsity) + # The following running average on squared norm of gradient # is shared by grad_var and dist_to_opt avg_op = self._moving_averager.apply(self._grad_norm_squared) @@ -295,6 +375,38 @@ def _prepare_variables(self): prepare_variables_op.append(avg_op) return tf.group(*prepare_variables_op) + + def _get_cubic_root(self): + # We have the equation x^2 D^2 + (1-x)^4 * C / h_min^2 + # where x = sqrt(mu). + # We substitute x, which is sqrt(mu), with x = y + 1. + # It gives y^3 + py = q + # where p = (D^2 h_min^2)/(2*C) and q = -p. + # We use the Vieta's substution to compute the root. + # There is only one real solution y (which is in [0, 1] ). + # http://mathworld.wolfram.com/VietasSubstitution.html + assert_array = \ + [tf.Assert( + tf.logical_not(tf.is_nan(self._dist_to_opt_avg)), [self._dist_to_opt_avg, ]), + tf.Assert( + tf.logical_not(tf.is_nan(self._h_min)), [self._h_min,]), + tf.Assert( + tf.logical_not(tf.is_nan(self._grad_var)), [self._grad_var,]), + tf.Assert( + tf.logical_not(tf.is_inf(self._dist_to_opt_avg)), [self._dist_to_opt_avg, ]), + tf.Assert( + tf.logical_not(tf.is_inf(self._h_min)), [self._h_min,]), + tf.Assert( + tf.logical_not(tf.is_inf(self._grad_var)), [self._grad_var,])] + with tf.control_dependencies(assert_array): + p = self._dist_to_opt_avg**2 * self._h_min**2 / 2 / self._grad_var + w3 = (-tf.sqrt(p**2 + 4.0 / 27.0 * p**3) - p) / 2.0 + w = tf.sign(w3) * tf.pow(tf.abs(w3), 1.0/3.0) + y = w - p / 3.0 / w + x = y + 1 + return x + + def _get_lr_tensor(self): """Get lr minimzing the surrogate. @@ -304,40 +416,20 @@ def _get_lr_tensor(self): lr = (1.0 - tf.sqrt(self._mu))**2 / self._h_min return lr + def _get_mu_tensor(self): """Get the min mu which minimize the surrogate. Returns: The mu_t. """ - const_fact = self._dist_to_opt_avg**2 * self._h_min**2 / 2 / self._grad_var - coef = tf.Variable([-1.0, 3.0, 0.0, 1.0], - dtype=tf.float32, - name="cubic_solver_coef") - coef = tf.scatter_update(coef, - tf.constant(2), - -(3 + const_fact)) - roots = tf.py_func(np.roots, - [coef], - Tout=tf.complex64, - stateful=False) - - # Filter out the correct root - root_idx = tf.logical_and( - tf.logical_and( - tf.greater(tf.real(roots), tf.constant(0.0)), - tf.less(tf.real(roots), tf.constant(1.0))), - tf.less(tf.abs(tf.imag(roots)), 1e-5)) - - # In case there are two duplicated roots satisfying the above condition - root = tf.reshape(tf.gather(tf.gather(roots, tf.where(root_idx)), - tf.constant(0)), - shape=[]) - + root = self._get_cubic_root() dr = self._h_max / self._h_min - mu = tf.maximum(tf.real(root)**2, ((tf.sqrt(dr) - 1)/(tf.sqrt(dr) + 1))**2) + mu = tf.maximum( + root**2, ((tf.sqrt(dr) - 1) / (tf.sqrt(dr) + 1))**2) return mu + def _yellowfin(self): """YellowFin auto-tuning optimizer based on momentum SGD. @@ -366,11 +458,12 @@ def _yellowfin(self): # squared distance from the optimum of a local quadratic # approximation after a single step while keeping all directions in the # robust region. - self._mu = tf.identity(tf.cond(self._do_tune, self._get_mu_tensor, + self._mu = tf.identity(tf.cond(self._do_tune, + lambda: self._get_mu_tensor(), lambda: self._mu_var)) with tf.control_dependencies([self._mu]): self._lr = tf.identity(tf.cond(self._do_tune, - self._get_lr_tensor, + lambda: self._get_lr_tensor(), lambda: self._lr_var)) # Tune learning rate and momentum. @@ -383,6 +476,11 @@ def _yellowfin(self): yellowfin_ops = tf.group(*yellowfin_ops) return yellowfin_ops + + def get_name(self): + return self._optimizer.get_name() + + def apply_gradients(self, grads_and_vars, global_step=None, name=None): """Applying gradients aand tune hyperparams with YellowFin. @@ -413,14 +511,20 @@ def apply_gradients(self, grads_and_vars, global_step=None, name=None): self._grad, self._clip_thresh_var) apply_grad_op = self._momentum_optimizer.apply_gradients( - zip(self._grads_clip, self._vars), global_step=global_step) + zip(self._grads_clip, self._vars), global_step=global_step, name=name) else: apply_grad_op = self._momentum_optimizer.apply_gradients( - zip(self._grad, self._vars), global_step=global_step) + zip(self._grad, self._vars), global_step=global_step, name=name) # Begin lr and mu tuning. with tf.variable_scope("prepare_yellowFin_variables"): - prepare_variables_op = self._prepare_variables() + # the dependencies ideally only need to be after clip is done, + # i.e. dependes on self._grads. However, the control_dependencies + # does not support indexed slice for sparse gradients. + # The alternative dependencies here might be slightly slower due + # to less parallelization. + with tf.control_dependencies([apply_grad_op, ]): + prepare_variables_op = self._prepare_variables() with tf.variable_scope("yellowfin"): with tf.control_dependencies([prepare_variables_op]): @@ -435,6 +539,7 @@ def apply_gradients(self, grads_and_vars, global_step=None, name=None): yellowfin_op, self._increment_step_op) + def compute_gradients(self, loss, var_list, @@ -475,6 +580,7 @@ def compute_gradients(self, colocate_gradients_with_ops=colocate_gradients_with_ops, grad_loss=grad_loss) + def minimize(self, loss, global_step=None, @@ -533,4 +639,29 @@ def minimize(self, print("g ", g) print("v ", v) - return self.apply_gradients(grads_and_vars, global_step=global_step) + return self.apply_gradients(grads_and_vars, global_step=global_step, name=name) + + + def get_slot(self, var, name): + """ + Return a slot named `name` created for `var` by + the underlying MomentumOptimizer. + + Args: + var: A variable passed to `minimize()` or `apply_gradients()`. + name: A string. + + Returns: + The `Variable` for the slot if it was created, `None` otherwise. + """ + return self._momentum_optimizer.get_slot(var, name) + + def get_slot_names(self): + """ + Return a list of the names of the slots created by the + underlying MomentumOptimizer. + + Returns: + A list of strings. + """ + return self._momentum_optimizer.get_slot_names() From cb0d4cfc6a6fd533a4130a25a336bf0fbbf253da Mon Sep 17 00:00:00 2001 From: ReDeiPirati <alessio.gozzoli@gmail.com> Date: Sun, 20 Aug 2017 11:39:05 +0200 Subject: [PATCH 0278/4095] Delete comment, unused variable and fix error on gradient clip --- tensor2tensor/utils/yellowfin.py | 88 +++++++++++++++----------------- 1 file changed, 42 insertions(+), 46 deletions(-) diff --git a/tensor2tensor/utils/yellowfin.py b/tensor2tensor/utils/yellowfin.py index e9b34a3f0..c90d43b83 100644 --- a/tensor2tensor/utils/yellowfin.py +++ b/tensor2tensor/utils/yellowfin.py @@ -20,7 +20,6 @@ from __future__ import print_function # Dependency imports -import numpy as np import tensorflow as tf from tensorflow.python.framework import ops @@ -97,29 +96,16 @@ def __init__(self, self._mu = momentum # Set lr and mu tensor. - # self._lr_var = tf.Variable(learning_rate, - # dtype=tf.float32, - # name="YF_lr", - # trainable=False) self._lr_var = tf.get_variable("YF_lr", dtype=tf.float32, trainable=False, initializer=learning_rate) - - # self._mu_var = tf.Variable(momentum, - # dtype=tf.float32, - # name="YF_mu", - # trainable=False) self._mu_var = tf.get_variable("YF_mu", dtype=tf.float32, trainable=False, initializer=tf.constant(momentum)) # Tuning factor for learning rates step or decaying scheme. - # self.lr_factor = tf.Variable(1.0, - # dtype=tf.float32, - # name="YF_lr_factor", - # trainable=False) self.lr_factor = tf.get_variable("YF_lr_factor", dtype=tf.float32, trainable=False, @@ -127,14 +113,11 @@ def __init__(self, # Gradient Clipping Threshold. if clip_thresh is not None: - # self._clip_thresh_var = tf.Variable(clip_thresh, - # dtype=tf.float32, - # name="YF_clip_thresh", - # trainable=False) - self._clip_thresh_var = tf.get_variable("YF_clip_thresh", - dtype=tf.float32, - trainable=False, - initializer=tf.constant(clip_thresh)) + self._clip_thresh_var = \ + tf.get_variable("YF_clip_thresh", + dtype=tf.float32, + trainable=False, + initializer=tf.constant(clip_thresh)) else: self._clip_thresh_var = None @@ -151,10 +134,6 @@ def __init__(self, self._moving_averager = None # Step counting. - # self._step = tf.Variable(0, - # dtype=tf.int32, - # name="YF_step", - # trainable=False) self._step = tf.get_variable("YF_step", dtype=tf.int32, trainable=False, @@ -209,6 +188,9 @@ def __init__(self, # and (zero_devias) moving-averages. self._moving_averager = None + # Handling Sparse Matrix + self._sparsity = None + self._sparsity_avg = None def _curvature_range(self): """Curvature range. @@ -216,10 +198,6 @@ def _curvature_range(self): Returns: h_max_t, h_min_t ops """ - # self._curv_win = tf.Variable(np.zeros([self.curvature_window_width,]), - # dtype=tf.float32, - # name="curv_win", - # trainable=False) self._curv_win = tf.get_variable("curv_win", dtype=tf.float32, trainable=False, @@ -244,9 +222,9 @@ def _curvature_range(self): avg_op = self._moving_averager.apply([self._h_min_t, self._h_max_t]) with tf.control_dependencies([avg_op]): self._h_min = tf.exp( - tf.identity(self._moving_averager.average(self._h_min_t))) + tf.identity(self._moving_averager.average(self._h_min_t))) self._h_max = tf.exp( - tf.identity(self._moving_averager.average(self._h_max_t))) + tf.identity(self._moving_averager.average(self._h_max_t))) if self._sparsity_debias: self._h_min = self._h_min * self._sparsity_avg self._h_max = self._h_max * self._sparsity_avg @@ -280,9 +258,9 @@ def _grad_variance(self): # Compute Variance self._grad_var = tf.maximum( - tf.constant(1e-6, dtype=self._grad_norm_squared_avg.dtype), - self._grad_norm_squared_avg - - tf.add_n([tf.reduce_sum(val) for val in self._grad_avg_squared])) + tf.constant(1e-6, dtype=self._grad_norm_squared_avg.dtype), + self._grad_norm_squared_avg + - tf.add_n([tf.reduce_sum(val) for val in self._grad_avg_squared])) if self._sparsity_debias: self._grad_var *= self._sparsity_avg return grad_var_ops # C_t @@ -316,12 +294,14 @@ def _dist_to_opt(self): def _grad_sparsity(self): + """ # If the sparse minibatch gradient has 10 percent of its entries # non-zero, its sparsity is 0.1. # The norm of dense gradient averaged from full dataset # are roughly estimated norm of minibatch # sparse gradient norm * sqrt(sparsity) # An extension maybe only correct the sparse blob. + """ non_zero_cnt = tf.add_n([tf.count_nonzero(g) for g in self._grad]) all_entry_cnt = tf.add_n([tf.size(g) for g in self._grad]) self._sparsity = tf.cast(non_zero_cnt, self._grad[0].dtype) \ @@ -377,6 +357,7 @@ def _prepare_variables(self): def _get_cubic_root(self): + """ # We have the equation x^2 D^2 + (1-x)^4 * C / h_min^2 # where x = sqrt(mu). # We substitute x, which is sqrt(mu), with x = y + 1. @@ -385,19 +366,27 @@ def _get_cubic_root(self): # We use the Vieta's substution to compute the root. # There is only one real solution y (which is in [0, 1] ). # http://mathworld.wolfram.com/VietasSubstitution.html + """ assert_array = \ [tf.Assert( - tf.logical_not(tf.is_nan(self._dist_to_opt_avg)), [self._dist_to_opt_avg, ]), + tf.logical_not(tf.is_nan(self._dist_to_opt_avg)), + [self._dist_to_opt_avg, ]), \ tf.Assert( - tf.logical_not(tf.is_nan(self._h_min)), [self._h_min,]), + tf.logical_not(tf.is_nan(self._h_min)), + [self._h_min,]), \ tf.Assert( - tf.logical_not(tf.is_nan(self._grad_var)), [self._grad_var,]), + tf.logical_not(tf.is_nan(self._grad_var)), + [self._grad_var,]), \ tf.Assert( - tf.logical_not(tf.is_inf(self._dist_to_opt_avg)), [self._dist_to_opt_avg, ]), + tf.logical_not(tf.is_inf(self._dist_to_opt_avg)), + [self._dist_to_opt_avg, ]), \ tf.Assert( - tf.logical_not(tf.is_inf(self._h_min)), [self._h_min,]), + tf.logical_not(tf.is_inf(self._h_min)), + [self._h_min,]), \ tf.Assert( - tf.logical_not(tf.is_inf(self._grad_var)), [self._grad_var,])] + tf.logical_not(tf.is_inf(self._grad_var)), + [self._grad_var,])] + with tf.control_dependencies(assert_array): p = self._dist_to_opt_avg**2 * self._h_min**2 / 2 / self._grad_var w3 = (-tf.sqrt(p**2 + 4.0 / 27.0 * p**3) - p) / 2.0 @@ -478,7 +467,8 @@ def _yellowfin(self): def get_name(self): - return self._optimizer.get_name() + """Get Optimizer Name""" + return self._momentum_optimizer.get_name() def apply_gradients(self, grads_and_vars, global_step=None, name=None): @@ -507,14 +497,18 @@ def apply_gradients(self, grads_and_vars, global_step=None, name=None): with tf.variable_scope("apply_updates"): # Gradient Clipping? if self._clip_thresh_var is not None: - self._grads_clip, self._grads_norm = tf.clip_by_global_norm( + self._grad, _ = tf.clip_by_global_norm( self._grad, self._clip_thresh_var) apply_grad_op = self._momentum_optimizer.apply_gradients( - zip(self._grads_clip, self._vars), global_step=global_step, name=name) + zip(self._grad, self._vars), + global_step=global_step, + name=name) else: apply_grad_op = self._momentum_optimizer.apply_gradients( - zip(self._grad, self._vars), global_step=global_step, name=name) + zip(self._grad, self._vars), + global_step=global_step, + name=name) # Begin lr and mu tuning. with tf.variable_scope("prepare_yellowFin_variables"): @@ -639,7 +633,9 @@ def minimize(self, print("g ", g) print("v ", v) - return self.apply_gradients(grads_and_vars, global_step=global_step, name=name) + return self.apply_gradients(grads_and_vars, + global_step=global_step, + name=name) def get_slot(self, var, name): From 5926beaf1faf92069b425ce578089200bb7a3c16 Mon Sep 17 00:00:00 2001 From: cclauss <cclauss@bluewin.ch> Date: Tue, 22 Aug 2017 12:11:14 +0200 Subject: [PATCH 0279/4095] from six.moves import xrange for Python 3 xrange() is called on line 53. --- tensor2tensor/data_generators/algorithmic_test.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tensor2tensor/data_generators/algorithmic_test.py b/tensor2tensor/data_generators/algorithmic_test.py index 4ac6d3123..3802d1beb 100644 --- a/tensor2tensor/data_generators/algorithmic_test.py +++ b/tensor2tensor/data_generators/algorithmic_test.py @@ -21,6 +21,7 @@ # Dependency imports +from six.moves import xrange from tensor2tensor.data_generators import algorithmic import tensorflow as tf From cbfe1169392bb582273544c641496d6f43c3cf67 Mon Sep 17 00:00:00 2001 From: cclauss <cclauss@bluewin.ch> Date: Tue, 22 Aug 2017 12:13:26 +0200 Subject: [PATCH 0280/4095] from six.moves import xrange for Python 3 xrange() is called on line 85. --- tensor2tensor/data_generators/lm1b.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tensor2tensor/data_generators/lm1b.py b/tensor2tensor/data_generators/lm1b.py index a436e0e6e..f9f220571 100644 --- a/tensor2tensor/data_generators/lm1b.py +++ b/tensor2tensor/data_generators/lm1b.py @@ -26,6 +26,7 @@ # Dependency imports +from six.moves import xrange from tensor2tensor.data_generators import generator_utils from tensor2tensor.data_generators import text_encoder from tensor2tensor.data_generators import tokenizer From 89eb8489afc0544f103ebb145bcb12a860b66b9e Mon Sep 17 00:00:00 2001 From: cclauss <cclauss@bluewin.ch> Date: Tue, 22 Aug 2017 12:16:07 +0200 Subject: [PATCH 0281/4095] from six.moves import xrange for Python 3 xrange() is called on lines 117, 126, 128, 156. --- tensor2tensor/layers/common_attention.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tensor2tensor/layers/common_attention.py b/tensor2tensor/layers/common_attention.py index 2c3e4b71f..77636ff6c 100644 --- a/tensor2tensor/layers/common_attention.py +++ b/tensor2tensor/layers/common_attention.py @@ -23,6 +23,7 @@ # Dependency imports +from six.moves import xrange from tensor2tensor.layers import common_layers from tensor2tensor.utils import expert_utils From 8dc14c5e1d6a3230b50bdec5e243f0e8d89beb8b Mon Sep 17 00:00:00 2001 From: cclauss <cclauss@bluewin.ch> Date: Tue, 22 Aug 2017 12:18:29 +0200 Subject: [PATCH 0282/4095] from six.moves import xrange for Python 3 xrange() is called on lines 137 and 359. --- tensor2tensor/layers/rev_block.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tensor2tensor/layers/rev_block.py b/tensor2tensor/layers/rev_block.py index 4dd1cde03..f85385e68 100644 --- a/tensor2tensor/layers/rev_block.py +++ b/tensor2tensor/layers/rev_block.py @@ -28,6 +28,7 @@ # Dependency imports +from six.moves import xrange import tensorflow as tf from tensorflow.python.framework import dtypes from tensorflow.python.framework import function From 9d48d3560924df8ddde49844d763fc03ba6add45 Mon Sep 17 00:00:00 2001 From: cclauss <cclauss@bluewin.ch> Date: Wed, 23 Aug 2017 03:54:42 +0200 Subject: [PATCH 0283/4095] Simplify native_to_unicode() & unicode_to_native() The first [uses feature detection, instead of version detection](https://docs.python.org/3/howto/pyporting.html#use-feature-detection-instead-of-version-detection) and the second [avoids assigning a lambda expression to a variable](https://docs.quantifiedcode.com/python-anti-patterns/correctness/assigning_a_lambda_to_a_variable.html). --- tensor2tensor/data_generators/text_encoder.py | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/tensor2tensor/data_generators/text_encoder.py b/tensor2tensor/data_generators/text_encoder.py index b628a538f..5a3220876 100644 --- a/tensor2tensor/data_generators/text_encoder.py +++ b/tensor2tensor/data_generators/text_encoder.py @@ -56,19 +56,18 @@ _ESCAPE_CHARS = set(u"\\_u;0123456789") -def native_to_unicode_py2(s): - """Python 2: transform native string to Unicode.""" - return s if isinstance(s, unicode) else s.decode("utf8") +def native_to_unicode(s): + """Transform native string to Unicode.""" + try: # Python 2 + return s if isinstance(s, unicode) else s.decode("utf8") + except NameError: # Python 3: unicode() was dropped + return s # Conversion between Unicode and UTF-8, if required (on Python2) -if six.PY2: - native_to_unicode = native_to_unicode_py2 - unicode_to_native = lambda s: s.encode("utf-8") -else: - # No conversion required on Python3 - native_to_unicode = lambda s: s - unicode_to_native = lambda s: s +def unicode_to_native(s): + """Transform Unicode to native string.""" + return s.encode("utf-8") if six.PY2 else s # No conversion required on Python3 class TextEncoder(object): From 33e798a5fe004bfda0150d929c31728f1a181c45 Mon Sep 17 00:00:00 2001 From: cclauss <cclauss@bluewin.ch> Date: Wed, 23 Aug 2017 13:31:31 +0200 Subject: [PATCH 0284/4095] A much cleaner approach --- tensor2tensor/data_generators/text_encoder.py | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/tensor2tensor/data_generators/text_encoder.py b/tensor2tensor/data_generators/text_encoder.py index 5a3220876..f6897d04d 100644 --- a/tensor2tensor/data_generators/text_encoder.py +++ b/tensor2tensor/data_generators/text_encoder.py @@ -56,18 +56,13 @@ _ESCAPE_CHARS = set(u"\\_u;0123456789") -def native_to_unicode(s): - """Transform native string to Unicode.""" - try: # Python 2 - return s if isinstance(s, unicode) else s.decode("utf8") - except NameError: # Python 3: unicode() was dropped - return s - - -# Conversion between Unicode and UTF-8, if required (on Python2) -def unicode_to_native(s): - """Transform Unicode to native string.""" - return s.encode("utf-8") if six.PY2 else s # No conversion required on Python3 +if six.PY2: + def native_to_unicode(s): return s if isinstance(s, unicode) else s.decode("utf8") # noqa: F821 + def unicode_to_native(s): return s.encode("utf-8") +else: + # No conversion required on Python >= 3 + def native_to_unicode(s): return s + def unicode_to_native(s): return s class TextEncoder(object): From 6e825430c776711f58259c8f3c5fd67e9e5af481 Mon Sep 17 00:00:00 2001 From: Martin Popel <popel@ufal.mff.cuni.cz> Date: Wed, 23 Aug 2017 22:14:46 +0200 Subject: [PATCH 0285/4095] prevent extracting a tar file which has been already extracted Extracting it again is not only waste of time, but it may fail with Permission denied if the extracted file is not writeable (e.g. -r--r--r--). --- tensor2tensor/data_generators/generator_utils.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/tensor2tensor/data_generators/generator_utils.py b/tensor2tensor/data_generators/generator_utils.py index eadca9bd6..fd4ed51d8 100644 --- a/tensor2tensor/data_generators/generator_utils.py +++ b/tensor2tensor/data_generators/generator_utils.py @@ -333,19 +333,18 @@ def generate(): for source in sources: url = source[0] filename = os.path.basename(url) - read_type = "r:gz" if "tgz" in filename else "r" - compressed_file = maybe_download(tmp_dir, filename, url) - with tarfile.open(compressed_file, read_type) as corpus_tar: - corpus_tar.extractall(tmp_dir) - for lang_file in source[1]: tf.logging.info("Reading file: %s" % lang_file) filepath = os.path.join(tmp_dir, lang_file) + if not tf.gfile.Exists(filepath): + read_type = "r:gz" if filename.endswith("tgz") else "r" + with tarfile.open(compressed_file, read_type) as corpus_tar: + corpus_tar.extractall(tmp_dir) # For some datasets a second extraction is necessary. - if ".gz" in lang_file: + if lang_file.endswith(".gz"): new_filepath = os.path.join(tmp_dir, lang_file[:-3]) if tf.gfile.Exists(new_filepath): tf.logging.info( From f8d5ee8f8a7737d10dac82437b32edbd1720f2a3 Mon Sep 17 00:00:00 2001 From: Martin Popel <popel@ufal.mff.cuni.cz> Date: Wed, 23 Aug 2017 22:20:28 +0200 Subject: [PATCH 0286/4095] bigger translate_encs_wmt32k training data, tsv support * CzEng1.0 (15M sentence pairs) is part of the WMT training data, but has to be downloaded separately * This commits adds (a bit hacky, I admit) support for - src and trg sentences stored in arbitrary columns of tsv files - wildcard patters to support many (e.g. 100 in case of CzEng) files in tar --- tensor2tensor/data_generators/wmt.py | 107 ++++++++++++++++++--------- 1 file changed, 72 insertions(+), 35 deletions(-) diff --git a/tensor2tensor/data_generators/wmt.py b/tensor2tensor/data_generators/wmt.py index 93fc27ac5..9acf1982e 100644 --- a/tensor2tensor/data_generators/wmt.py +++ b/tensor2tensor/data_generators/wmt.py @@ -19,7 +19,9 @@ from __future__ import division from __future__ import print_function +import glob import os +import stat import tarfile # Dependency imports @@ -266,6 +268,10 @@ def bi_vocabs_token_generator(source_path, # English-Czech datasets _ENCS_TRAIN_DATASETS = [ + [ + "https://lindat.mff.cuni.cz/repository/xmlui/bitstream/handle/11234/1-1458/data-plaintext-format.tar", + ('tsv', 3, 2, 'data.plaintext-format/*train.gz') + ], [ "http://data.statmt.org/wmt17/translation-task/training-parallel-nc-v12.tgz", # pylint: disable=line-too-long ("training/news-commentary-v12.cs-en.en", @@ -345,38 +351,64 @@ def _compile_data(tmp_dir, datasets, filename): url = dataset[0] compressed_filename = os.path.basename(url) compressed_filepath = os.path.join(tmp_dir, compressed_filename) - - lang1_filename, lang2_filename = dataset[1] - lang1_filepath = os.path.join(tmp_dir, lang1_filename) - lang2_filepath = os.path.join(tmp_dir, lang2_filename) - is_sgm = (lang1_filename.endswith("sgm") and - lang2_filename.endswith("sgm")) - generator_utils.maybe_download(tmp_dir, compressed_filename, url) - if not (os.path.exists(lang1_filepath) and - os.path.exists(lang2_filepath)): - # For .tar.gz and .tgz files, we read compressed. - mode = "r:gz" if compressed_filepath.endswith("gz") else "r" - with tarfile.open(compressed_filepath, mode) as corpus_tar: - corpus_tar.extractall(tmp_dir) - if lang1_filepath.endswith(".gz"): - new_filepath = lang1_filepath.strip(".gz") - generator_utils.gunzip_file(lang1_filepath, new_filepath) - lang1_filepath = new_filepath - if lang2_filepath.endswith(".gz"): - new_filepath = lang2_filepath.strip(".gz") - generator_utils.gunzip_file(lang2_filepath, new_filepath) - lang2_filepath = new_filepath - with tf.gfile.GFile(lang1_filepath, mode="r") as lang1_file: - with tf.gfile.GFile(lang2_filepath, mode="r") as lang2_file: - line1, line2 = lang1_file.readline(), lang2_file.readline() - while line1 or line2: - line1res = _preprocess_sgm(line1, is_sgm) - line2res = _preprocess_sgm(line2, is_sgm) - if line1res or line2res: - lang1_resfile.write(line1res.strip() + "\n") - lang2_resfile.write(line2res.strip() + "\n") + + if dataset[1][0] == 'tsv': + _, src_column, trg_column, glob_pattern = dataset[1] + filenames = glob.glob(os.path.join(tmp_dir, glob_pattern)) + if not filenames: + mode = "r:gz" if compressed_filepath.endswith("gz") else "r" # *.tgz *.tar.gz + with tarfile.open(compressed_filepath, mode) as corpus_tar: + corpus_tar.extractall(tmp_dir) + filenames = glob.glob(os.path.join(tmp_dir, glob_pattern)) + for tsv_filename in filenames: + if tsv_filename.endswith(".gz"): + new_filename = tsv_filename.strip(".gz") + try: + generator_utils.gunzip_file(tsv_filename, new_filename) + except PermissionError: + tsvdir = os.path.dirname(tsv_filename) + os.chmod(tsvdir, os.stat(tsvdir).st_mode | stat.S_IWRITE) + generator_utils.gunzip_file(tsv_filename, new_filename) + tsv_filename = new_filename + with tf.gfile.GFile(tsv_filename, mode="r") as tsv_file: + for line in tsv_file: + if line and "\t" in line: + parts = line.split("\t") + source, target = parts[src_column], parts[trg_column] + lang1_resfile.write(source.strip() + "\n") + lang2_resfile.write(target.strip() + "\n") + else: + lang1_filename, lang2_filename = dataset[1] + lang1_filepath = os.path.join(tmp_dir, lang1_filename) + lang2_filepath = os.path.join(tmp_dir, lang2_filename) + is_sgm = (lang1_filename.endswith("sgm") and + lang2_filename.endswith("sgm")) + + if not (os.path.exists(lang1_filepath) and + os.path.exists(lang2_filepath)): + # For .tar.gz and .tgz files, we read compressed. + mode = "r:gz" if compressed_filepath.endswith("gz") else "r" + with tarfile.open(compressed_filepath, mode) as corpus_tar: + corpus_tar.extractall(tmp_dir) + if lang1_filepath.endswith(".gz"): + new_filepath = lang1_filepath.strip(".gz") + generator_utils.gunzip_file(lang1_filepath, new_filepath) + lang1_filepath = new_filepath + if lang2_filepath.endswith(".gz"): + new_filepath = lang2_filepath.strip(".gz") + generator_utils.gunzip_file(lang2_filepath, new_filepath) + lang2_filepath = new_filepath + with tf.gfile.GFile(lang1_filepath, mode="r") as lang1_file: + with tf.gfile.GFile(lang2_filepath, mode="r") as lang2_file: line1, line2 = lang1_file.readline(), lang2_file.readline() + while line1 or line2: + line1res = _preprocess_sgm(line1, is_sgm) + line2res = _preprocess_sgm(line2, is_sgm) + if line1res or line2res: + lang1_resfile.write(line1res.strip() + "\n") + lang2_resfile.write(line2res.strip() + "\n") + line1, line2 = lang1_file.readline(), lang2_file.readline() return filename @@ -603,13 +635,18 @@ def vocab_name(self): def generator(self, data_dir, tmp_dir, train): datasets = _ENCS_TRAIN_DATASETS if train else _ENCS_TEST_DATASETS - source_datasets = [[item[0], [item[1][0]]] for item in datasets] - target_datasets = [[item[0], [item[1][1]]] for item in datasets] - symbolizer_vocab = generator_utils.get_or_generate_vocab( - data_dir, tmp_dir, self.vocab_file, self.targeted_vocab_size, - source_datasets + target_datasets) tag = "train" if train else "dev" data_path = _compile_data(tmp_dir, datasets, "wmt_encs_tok_%s" % tag) + vocab_datasets = [] + # CzEng contains 100 gz files with tab-separated columns, so let's expect + # it is the first dataset in datasets and use the newly created *.lang{1,2} files instead. + if datasets[0][0].endswith("data-plaintext-format.tar"): + vocab_datasets.append([datasets[0][0], + ["wmt_encs_tok_%s.lang1" % tag, "wmt_encs_tok_%s.lang2" % tag]]) + datasets = datasets[1:] + vocab_datasets += [[item[0], [item[1][0], item[1][1]]] for item in datasets] + symbolizer_vocab = generator_utils.get_or_generate_vocab( + data_dir, tmp_dir, self.vocab_file, self.targeted_vocab_size, vocab_datasets) return token_generator(data_path + ".lang1", data_path + ".lang2", symbolizer_vocab, EOS) From 1df0fe9056766b79677597c3a98b9d5fa1c470b5 Mon Sep 17 00:00:00 2001 From: Martin Popel <popel@ufal.mff.cuni.cz> Date: Fri, 25 Aug 2017 00:30:42 +0200 Subject: [PATCH 0287/4095] add options save_checkpoints_secs and keep_checkpoint_every_n_hours --- tensor2tensor/utils/trainer_utils.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/tensor2tensor/utils/trainer_utils.py b/tensor2tensor/utils/trainer_utils.py index 22fd727f9..5682ae820 100644 --- a/tensor2tensor/utils/trainer_utils.py +++ b/tensor2tensor/utils/trainer_utils.py @@ -70,6 +70,13 @@ "How many recent checkpoints to keep.") flags.DEFINE_bool("experimental_optimize_placement", False, "Optimize ops placement with experimental session options.") +flags.DEFINE_integer("keep_checkpoint_every_n_hours", 10000, + "Number of hours between each checkpoint to be saved. " + "The default value of 10,000 hours effectively disables the feature.") +flags.DEFINE_integer("save_checkpoints_secs", 0, + "Save checkpoints every this many seconds. " + "Default=0 means let tensorflow.contrib.learn.python.learn decide, " + "which is currently equivalent to 600, i.e. 10 minutes.") # Distributed training flags flags.DEFINE_string("master", "", "Address of TensorFlow master.") @@ -203,7 +210,9 @@ def create_experiment_components(hparams, output_dir, data_dir, model_name): model_dir=output_dir, gpu_memory_fraction=FLAGS.worker_gpu_memory_fraction, session_config=session_config(), - keep_checkpoint_max=FLAGS.keep_checkpoint_max)) + keep_checkpoint_max=FLAGS.keep_checkpoint_max, + keep_checkpoint_every_n_hours=FLAGS.keep_checkpoint_every_n_hours, + save_checkpoints_secs=FLAGS.save_checkpoints_secs,)) # Store the hparams in the estimator as well estimator.hparams = hparams return estimator, { From 6259b740208c1fe0dd947f29aef27fcfa25abae2 Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Thu, 17 Aug 2017 10:20:57 -0700 Subject: [PATCH 0288/4095] Rename RevTransformer to TransformerRevnet PiperOrigin-RevId: 165594482 --- README.md | 6 +- .../data_generators/algorithmic_test.py | 1 - tensor2tensor/data_generators/lm1b.py | 1 - tensor2tensor/data_generators/text_encoder.py | 16 +- tensor2tensor/layers/common_attention.py | 1 - tensor2tensor/layers/rev_block.py | 1 - tensor2tensor/models/models.py | 2 +- ...v_transformer.py => transformer_revnet.py} | 38 +-- ...mer_test.py => transformer_revnet_test.py} | 14 +- tensor2tensor/utils/trainer_utils.py | 11 +- tensor2tensor/utils/yellowfin.py | 279 +++++------------- 11 files changed, 118 insertions(+), 252 deletions(-) rename tensor2tensor/models/{rev_transformer.py => transformer_revnet.py} (88%) rename tensor2tensor/models/{rev_transformer_test.py => transformer_revnet_test.py} (87%) diff --git a/README.md b/README.md index 0c781d97f..bb0f6f534 100644 --- a/README.md +++ b/README.md @@ -34,7 +34,7 @@ trains an English-German translation model, and lets you use it interactively: pip install tensor2tensor && t2t-trainer \ --generate_data \ --data_dir=~/t2t_data \ - --problems=translate_ende_wmt32k \ + --problems=wmt_ende_tokens_32k \ --model=transformer \ --hparams_set=transformer_base_single_gpu \ --output_dir=~/t2t_train/base \ @@ -72,7 +72,7 @@ pip install tensor2tensor # You can easily swap between them (and add new ones). t2t-trainer --registry_help -PROBLEM=translate_ende_wmt32k +PROBLEM=wmt_ende_tokens_32k MODEL=transformer HPARAMS=transformer_base_single_gpu @@ -277,7 +277,7 @@ registrations. To add a new dataset, subclass [`Problem`](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/data_generators/problem.py) and register it with `@registry.register_problem`. See -[`TranslateEndeWmt8k`](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/data_generators/wmt.py) +[`WMTEnDeTokens8k`](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/data_generators/wmt.py) for an example. Also see the [data generators diff --git a/tensor2tensor/data_generators/algorithmic_test.py b/tensor2tensor/data_generators/algorithmic_test.py index 3802d1beb..4ac6d3123 100644 --- a/tensor2tensor/data_generators/algorithmic_test.py +++ b/tensor2tensor/data_generators/algorithmic_test.py @@ -21,7 +21,6 @@ # Dependency imports -from six.moves import xrange from tensor2tensor.data_generators import algorithmic import tensorflow as tf diff --git a/tensor2tensor/data_generators/lm1b.py b/tensor2tensor/data_generators/lm1b.py index f9f220571..a436e0e6e 100644 --- a/tensor2tensor/data_generators/lm1b.py +++ b/tensor2tensor/data_generators/lm1b.py @@ -26,7 +26,6 @@ # Dependency imports -from six.moves import xrange from tensor2tensor.data_generators import generator_utils from tensor2tensor.data_generators import text_encoder from tensor2tensor.data_generators import tokenizer diff --git a/tensor2tensor/data_generators/text_encoder.py b/tensor2tensor/data_generators/text_encoder.py index f6897d04d..b628a538f 100644 --- a/tensor2tensor/data_generators/text_encoder.py +++ b/tensor2tensor/data_generators/text_encoder.py @@ -56,13 +56,19 @@ _ESCAPE_CHARS = set(u"\\_u;0123456789") +def native_to_unicode_py2(s): + """Python 2: transform native string to Unicode.""" + return s if isinstance(s, unicode) else s.decode("utf8") + + +# Conversion between Unicode and UTF-8, if required (on Python2) if six.PY2: - def native_to_unicode(s): return s if isinstance(s, unicode) else s.decode("utf8") # noqa: F821 - def unicode_to_native(s): return s.encode("utf-8") + native_to_unicode = native_to_unicode_py2 + unicode_to_native = lambda s: s.encode("utf-8") else: - # No conversion required on Python >= 3 - def native_to_unicode(s): return s - def unicode_to_native(s): return s + # No conversion required on Python3 + native_to_unicode = lambda s: s + unicode_to_native = lambda s: s class TextEncoder(object): diff --git a/tensor2tensor/layers/common_attention.py b/tensor2tensor/layers/common_attention.py index 77636ff6c..2c3e4b71f 100644 --- a/tensor2tensor/layers/common_attention.py +++ b/tensor2tensor/layers/common_attention.py @@ -23,7 +23,6 @@ # Dependency imports -from six.moves import xrange from tensor2tensor.layers import common_layers from tensor2tensor.utils import expert_utils diff --git a/tensor2tensor/layers/rev_block.py b/tensor2tensor/layers/rev_block.py index f85385e68..4dd1cde03 100644 --- a/tensor2tensor/layers/rev_block.py +++ b/tensor2tensor/layers/rev_block.py @@ -28,7 +28,6 @@ # Dependency imports -from six.moves import xrange import tensorflow as tf from tensorflow.python.framework import dtypes from tensorflow.python.framework import function diff --git a/tensor2tensor/models/models.py b/tensor2tensor/models/models.py index af609e22c..7c31f4e05 100644 --- a/tensor2tensor/models/models.py +++ b/tensor2tensor/models/models.py @@ -33,12 +33,12 @@ from tensor2tensor.models import lstm from tensor2tensor.models import multimodel from tensor2tensor.models import neural_gpu -from tensor2tensor.models import rev_transformer from tensor2tensor.models import shake_shake from tensor2tensor.models import slicenet from tensor2tensor.models import transformer from tensor2tensor.models import transformer_alternative from tensor2tensor.models import transformer_moe +from tensor2tensor.models import transformer_revnet from tensor2tensor.models import transformer_vae from tensor2tensor.models import xception # pylint: enable=unused-import diff --git a/tensor2tensor/models/rev_transformer.py b/tensor2tensor/models/transformer_revnet.py similarity index 88% rename from tensor2tensor/models/rev_transformer.py rename to tensor2tensor/models/transformer_revnet.py index d1392a1ee..942a00660 100644 --- a/tensor2tensor/models/rev_transformer.py +++ b/tensor2tensor/models/transformer_revnet.py @@ -31,7 +31,7 @@ @registry.register_model -class RevTransformer(transformer.Transformer): +class TransformerRevnet(transformer.Transformer): """Reversible Residual Transformer. Layers are reversible and are recomputed on the backward pass. @@ -63,10 +63,10 @@ def model_fn_body(self, features): 1.0 - hparams.layer_prepostprocess_dropout) decoder_input = tf.nn.dropout(decoder_input, 1.0 - hparams.layer_prepostprocess_dropout) - encoder_output = rev_transformer_encoder( + encoder_output = transformer_revnet_encoder( encoder_input, encoder_self_attention_bias, hparams) - decoder_output = rev_transformer_decoder( + decoder_output = transformer_revnet_decoder( decoder_input, encoder_output, decoder_self_attention_bias, encoder_decoder_attention_bias, hparams) decoder_output = tf.expand_dims(decoder_output, 2) @@ -74,10 +74,10 @@ def model_fn_body(self, features): return decoder_output -def rev_transformer_encoder(encoder_input, - encoder_self_attention_bias, - hparams, - name="encoder"): +def transformer_revnet_encoder(encoder_input, + encoder_self_attention_bias, + hparams, + name="encoder"): """A stack of transformer layers. Args: @@ -137,12 +137,12 @@ def g(x): return common_layers.layer_preprocess(y, hparams) -def rev_transformer_decoder(decoder_input, - encoder_output, - decoder_self_attention_bias, - encoder_decoder_attention_bias, - hparams, - name="decoder"): +def transformer_revnet_decoder(decoder_input, + encoder_output, + decoder_self_attention_bias, + encoder_decoder_attention_bias, + hparams, + name="decoder"): """A stack of transformer layers. Args: @@ -218,8 +218,8 @@ def g(x): @registry.register_hparams -def rev_transformer_base(): - """Base hparams for RevTransformer.""" +def transformer_revnet_base(): + """Base hparams for TransformerRevnet.""" hparams = transformer.transformer_big() # Use settings from transformer_n_da @@ -231,11 +231,11 @@ def rev_transformer_base(): @registry.register_hparams -def rev_transformer_big(): - """Base hparams for RevTransformer.""" - hparams = rev_transformer_base() +def transformer_revnet_big(): + """Base hparams for TransformerRevnet.""" + hparams = transformer_revnet_base() - # The RevTransformer uses significantly less memory than the Transformer. + # The TransformerRevnet uses significantly less memory than the Transformer. # Increase batch size and model size. hparams.batch_size *= 2 hparams.hidden_size *= 2 diff --git a/tensor2tensor/models/rev_transformer_test.py b/tensor2tensor/models/transformer_revnet_test.py similarity index 87% rename from tensor2tensor/models/rev_transformer_test.py rename to tensor2tensor/models/transformer_revnet_test.py index da9e15f72..66b493b0b 100644 --- a/tensor2tensor/models/rev_transformer_test.py +++ b/tensor2tensor/models/transformer_revnet_test.py @@ -13,7 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -"""Tests for RevTransformer.""" +"""Tests for TransformerRevnet.""" from __future__ import absolute_import from __future__ import division @@ -24,13 +24,13 @@ import numpy as np from tensor2tensor.data_generators import problem_hparams -from tensor2tensor.models import rev_transformer +from tensor2tensor.models import transformer_revnet import tensorflow as tf -def rev_transformer_test(): - hparams = rev_transformer.rev_transformer_base() +def transformer_revnet_test(): + hparams = transformer_revnet.transformer_revnet_base() hparams.num_hidden_layers = 2 hparams.hidden_size = 128 hparams.filter_size = 512 @@ -38,14 +38,14 @@ def rev_transformer_test(): return hparams -class RevTransformerTest(tf.test.TestCase): +class TransformerRevnetTest(tf.test.TestCase): def testTransformer(self): batch_size = 3 input_length = 5 target_length = 7 vocab_size = 9 - hparams = rev_transformer_test() + hparams = transformer_revnet_test() p_hparams = problem_hparams.test_problem_hparams(hparams, vocab_size, vocab_size) hparams.problems = [p_hparams] @@ -58,7 +58,7 @@ def testTransformer(self): "targets": tf.constant(targets, dtype=tf.int32), "target_space_id": tf.constant(1, dtype=tf.int32), } - model = rev_transformer.RevTransformer( + model = transformer_revnet.TransformerRevnet( hparams, tf.contrib.learn.ModeKeys.TRAIN, p_hparams) sharded_logits, _ = model.model_fn(features) logits = tf.concat(sharded_logits, 0) diff --git a/tensor2tensor/utils/trainer_utils.py b/tensor2tensor/utils/trainer_utils.py index 5682ae820..22fd727f9 100644 --- a/tensor2tensor/utils/trainer_utils.py +++ b/tensor2tensor/utils/trainer_utils.py @@ -70,13 +70,6 @@ "How many recent checkpoints to keep.") flags.DEFINE_bool("experimental_optimize_placement", False, "Optimize ops placement with experimental session options.") -flags.DEFINE_integer("keep_checkpoint_every_n_hours", 10000, - "Number of hours between each checkpoint to be saved. " - "The default value of 10,000 hours effectively disables the feature.") -flags.DEFINE_integer("save_checkpoints_secs", 0, - "Save checkpoints every this many seconds. " - "Default=0 means let tensorflow.contrib.learn.python.learn decide, " - "which is currently equivalent to 600, i.e. 10 minutes.") # Distributed training flags flags.DEFINE_string("master", "", "Address of TensorFlow master.") @@ -210,9 +203,7 @@ def create_experiment_components(hparams, output_dir, data_dir, model_name): model_dir=output_dir, gpu_memory_fraction=FLAGS.worker_gpu_memory_fraction, session_config=session_config(), - keep_checkpoint_max=FLAGS.keep_checkpoint_max, - keep_checkpoint_every_n_hours=FLAGS.keep_checkpoint_every_n_hours, - save_checkpoints_secs=FLAGS.save_checkpoints_secs,)) + keep_checkpoint_max=FLAGS.keep_checkpoint_max)) # Store the hparams in the estimator as well estimator.hparams = hparams return estimator, { diff --git a/tensor2tensor/utils/yellowfin.py b/tensor2tensor/utils/yellowfin.py index c90d43b83..aeb14e76e 100644 --- a/tensor2tensor/utils/yellowfin.py +++ b/tensor2tensor/utils/yellowfin.py @@ -20,17 +20,19 @@ from __future__ import print_function # Dependency imports + +import numpy as np import tensorflow as tf from tensorflow.python.framework import ops # Values for gate_gradients. -GATE_NONE = tf.train.Optimizer.GATE_NONE -GATE_OP = tf.train.Optimizer.GATE_OP -GATE_GRAPH = tf.train.Optimizer.GATE_GRAPH +GATE_NONE = 0 +GATE_OP = 1 +GATE_GRAPH = 2 -class YellowFinOptimizer(object): +class YellowFinOptimizer(tf.train.Optimizer): """Optimizer that implements the YellowFin algorithm. See [Zhang et. al., 2017](https://arxiv.org/abs/1706.03471) for details. @@ -43,37 +45,20 @@ def __init__(self, beta=0.999, curvature_window_width=20, zero_debias=True, - delta_mu=0.0, - sparsity_debias=True, - use_locking=False, - name="YellowFin", - use_nesterov=False): + delta_mu=0.0): """Construct a new YellowFin optimizer. - Implemented as a wrapper around tf.train.MomentumOptimizer Args: learning_rate: A Tensor or a floating point value. The learning rate. - Set to 1.0 in the paper. momentum: A Tensor or a floating point value. The momentum. - Set to 0.0 in the paper. clip_thresh: A Tensor or a floating point value. The cliping threshold for - `tf.clip_by_global_norm`. If None, no clipping will be carried out. + tf.clip_by_global_norm. If None, no clipping will be carried out. beta: A float value or a constant float tensor. The smoothing parameter for estimations. curvature_window_width: A int value or a constant int tensor. The curvature window width. zero_debias: A boolean, zero debias moving-averages. delta_mu: For extensions. Not necessary in the basic use. - sparsity_debias: A boolean. Gradient norm and curvature are - biased to larger values when calculated with sparse gradient. - This is useful when the model is very sparse, e.g. LSTM with - word embedding. For non-sparse CNN, turning it off could - slightly accelerate the speed. - use_locking: If True, use locks for update operations. - name: Optional name prefix for the operations created when - applying gradients. Defaults to "YellowFin". - use_nesterov: If True, the underlying MomentumOptimizer uses Nesterov - Momentum. Set to False in the default YellowFin algorithm. Note: clip_thresh is the threshold value on ||lr * gradient||, @@ -96,28 +81,27 @@ def __init__(self, self._mu = momentum # Set lr and mu tensor. - self._lr_var = tf.get_variable("YF_lr", - dtype=tf.float32, - trainable=False, - initializer=learning_rate) - self._mu_var = tf.get_variable("YF_mu", - dtype=tf.float32, - trainable=False, - initializer=tf.constant(momentum)) + self._lr_var = tf.Variable(learning_rate, + dtype=tf.float32, + name="YF_lr", + trainable=False) + self._mu_var = tf.Variable(momentum, + dtype=tf.float32, + name="YF_mu", + trainable=False) # Tuning factor for learning rates step or decaying scheme. - self.lr_factor = tf.get_variable("YF_lr_factor", - dtype=tf.float32, - trainable=False, - initializer=tf.constant(1.0)) + self.lr_factor = tf.Variable(1.0, + dtype=tf.float32, + name="YF_lr_factor", + trainable=False) # Gradient Clipping Threshold. if clip_thresh is not None: - self._clip_thresh_var = \ - tf.get_variable("YF_clip_thresh", - dtype=tf.float32, - trainable=False, - initializer=tf.constant(clip_thresh)) + self._clip_thresh_var = tf.Variable(clip_thresh, + dtype=tf.float32, + name="YF_clip_thresh", + trainable=False) else: self._clip_thresh_var = None @@ -127,18 +111,17 @@ def __init__(self, # Init momentum optimizer. self._momentum_optimizer = tf.train.MomentumOptimizer( - self._lr_m, self._mu_m, use_locking, name, use_nesterov) + self._lr_m, self._mu_m) # Moving average for statistics. self._beta = beta self._moving_averager = None # Step counting. - self._step = tf.get_variable("YF_step", - dtype=tf.int32, - trainable=False, - initializer=tf.constant(0)) - + self._step = tf.Variable(0, + dtype=tf.int32, + name="YF_step", + trainable=False) # YF_step + 1 op. self._increment_step_op = None @@ -147,7 +130,6 @@ def __init__(self, # Moving-averages. self._zero_debias = zero_debias - self._sparsity_debias = sparsity_debias # For curvature range. self.curvature_window_width = curvature_window_width @@ -188,32 +170,27 @@ def __init__(self, # and (zero_devias) moving-averages. self._moving_averager = None - # Handling Sparse Matrix - self._sparsity = None - self._sparsity_avg = None - def _curvature_range(self): """Curvature range. Returns: h_max_t, h_min_t ops """ - self._curv_win = tf.get_variable("curv_win", - dtype=tf.float32, - trainable=False, - shape=[self.curvature_window_width, ], - initializer=tf.zeros_initializer) - # We use log smoothing for curvature range + self._curv_win = tf.Variable(np.zeros([self.curvature_window_width,]), + dtype=tf.float32, + name="curv_win", + trainable=False) + self._curv_win = tf.scatter_update(self._curv_win, self._step % self.curvature_window_width, - tf.log(self._grad_norm_squared)) + self._grad_norm_squared) # Note here the iterations start from iteration 0 valid_window = tf.slice(self._curv_win, tf.constant([0,]), tf.expand_dims( tf.minimum( tf.constant(self.curvature_window_width), - self._step + 1), dim=0)) + self._step + 1), axis=0)) self._h_min_t = tf.reduce_min(valid_window) self._h_max_t = tf.reduce_max(valid_window) @@ -221,17 +198,11 @@ def _curvature_range(self): with tf.control_dependencies([self._h_min_t, self._h_max_t]): avg_op = self._moving_averager.apply([self._h_min_t, self._h_max_t]) with tf.control_dependencies([avg_op]): - self._h_min = tf.exp( - tf.identity(self._moving_averager.average(self._h_min_t))) - self._h_max = tf.exp( - tf.identity(self._moving_averager.average(self._h_max_t))) - if self._sparsity_debias: - self._h_min = self._h_min * self._sparsity_avg - self._h_max = self._h_max * self._sparsity_avg + self._h_min = tf.identity(self._moving_averager.average(self._h_min_t)) + self._h_max = tf.identity(self._moving_averager.average(self._h_max_t)) curv_range_ops.append(avg_op) return curv_range_ops # h_max_t, h_min_t - def _grad_variance(self): """Estimate of gradient Variance. @@ -241,7 +212,7 @@ def _grad_variance(self): grad_var_ops = [] tensor_to_avg = [] for t, g in zip(self._vars, self._grad): - if isinstance(g, ops.IndexedSlices): + if isinstance(g, tf.IndexedSlices): tensor_to_avg.append( tf.reshape(tf.unsorted_segment_sum(g.values, g.indices, @@ -255,17 +226,12 @@ def _grad_variance(self): self._grad_avg = [self._moving_averager.average(val) for val in tensor_to_avg] self._grad_avg_squared = [tf.square(val) for val in self._grad_avg] - + self._grad_avg_squared = tf.add_n([tf.reduce_sum(val) + for val in self._grad_avg_squared]) # Compute Variance - self._grad_var = tf.maximum( - tf.constant(1e-6, dtype=self._grad_norm_squared_avg.dtype), - self._grad_norm_squared_avg - - tf.add_n([tf.reduce_sum(val) for val in self._grad_avg_squared])) - if self._sparsity_debias: - self._grad_var *= self._sparsity_avg + self._grad_var = self._grad_norm_squared_avg - self._grad_avg_squared return grad_var_ops # C_t - def _dist_to_opt(self): """Distance to optimum. @@ -273,7 +239,7 @@ def _dist_to_opt(self): D_t ops """ dist_to_opt_ops = [] - # Running average of the norm of gradient + # Running average of the norm of gradeint self._grad_norm = tf.sqrt(self._grad_norm_squared) avg_op = self._moving_averager.apply([self._grad_norm,]) dist_to_opt_ops.append(avg_op) @@ -288,30 +254,8 @@ def _dist_to_opt(self): with tf.control_dependencies([avg_op]): self._dist_to_opt_avg = tf.identity( self._moving_averager.average(self._d_t)) - if self._sparsity_debias: - self._dist_to_opt_avg /= tf.sqrt(self._sparsity_avg) return dist_to_opt_ops # D_t - - def _grad_sparsity(self): - """ - # If the sparse minibatch gradient has 10 percent of its entries - # non-zero, its sparsity is 0.1. - # The norm of dense gradient averaged from full dataset - # are roughly estimated norm of minibatch - # sparse gradient norm * sqrt(sparsity) - # An extension maybe only correct the sparse blob. - """ - non_zero_cnt = tf.add_n([tf.count_nonzero(g) for g in self._grad]) - all_entry_cnt = tf.add_n([tf.size(g) for g in self._grad]) - self._sparsity = tf.cast(non_zero_cnt, self._grad[0].dtype) \ - / tf.cast(all_entry_cnt, self._grad[0].dtype) - avg_op = self._moving_averager.apply([self._sparsity, ]) - with tf.control_dependencies([avg_op]): - self._sparsity_avg = self._moving_averager.average(self._sparsity) - return avg_op - - def _prepare_variables(self): """Prepare Variables for YellowFin. @@ -320,7 +264,7 @@ def _prepare_variables(self): """ self._moving_averager = tf.train.ExponentialMovingAverage( decay=self._beta, zero_debias=self._zero_debias) - # assert self._grad is not None and len(self._grad) > 0 + assert self._grad # List for the returned Operations prepare_variables_op = [] @@ -338,10 +282,6 @@ def _prepare_variables(self): self._grad_norm_squared = [tf.reduce_sum(g_sq) for g_sq in self._grad_squared] - if self._sparsity_debias: - avg_op_sparsity = self._grad_sparsity() - prepare_variables_op.append(avg_op_sparsity) - # The following running average on squared norm of gradient # is shared by grad_var and dist_to_opt avg_op = self._moving_averager.apply(self._grad_norm_squared) @@ -355,47 +295,6 @@ def _prepare_variables(self): prepare_variables_op.append(avg_op) return tf.group(*prepare_variables_op) - - def _get_cubic_root(self): - """ - # We have the equation x^2 D^2 + (1-x)^4 * C / h_min^2 - # where x = sqrt(mu). - # We substitute x, which is sqrt(mu), with x = y + 1. - # It gives y^3 + py = q - # where p = (D^2 h_min^2)/(2*C) and q = -p. - # We use the Vieta's substution to compute the root. - # There is only one real solution y (which is in [0, 1] ). - # http://mathworld.wolfram.com/VietasSubstitution.html - """ - assert_array = \ - [tf.Assert( - tf.logical_not(tf.is_nan(self._dist_to_opt_avg)), - [self._dist_to_opt_avg, ]), \ - tf.Assert( - tf.logical_not(tf.is_nan(self._h_min)), - [self._h_min,]), \ - tf.Assert( - tf.logical_not(tf.is_nan(self._grad_var)), - [self._grad_var,]), \ - tf.Assert( - tf.logical_not(tf.is_inf(self._dist_to_opt_avg)), - [self._dist_to_opt_avg, ]), \ - tf.Assert( - tf.logical_not(tf.is_inf(self._h_min)), - [self._h_min,]), \ - tf.Assert( - tf.logical_not(tf.is_inf(self._grad_var)), - [self._grad_var,])] - - with tf.control_dependencies(assert_array): - p = self._dist_to_opt_avg**2 * self._h_min**2 / 2 / self._grad_var - w3 = (-tf.sqrt(p**2 + 4.0 / 27.0 * p**3) - p) / 2.0 - w = tf.sign(w3) * tf.pow(tf.abs(w3), 1.0/3.0) - y = w - p / 3.0 / w - x = y + 1 - return x - - def _get_lr_tensor(self): """Get lr minimzing the surrogate. @@ -405,20 +304,40 @@ def _get_lr_tensor(self): lr = (1.0 - tf.sqrt(self._mu))**2 / self._h_min return lr - def _get_mu_tensor(self): """Get the min mu which minimize the surrogate. Returns: The mu_t. """ - root = self._get_cubic_root() + const_fact = self._dist_to_opt_avg**2 * self._h_min**2 / 2 / self._grad_var + coef = tf.Variable([-1.0, 3.0, 0.0, 1.0], + dtype=tf.float32, + name="cubic_solver_coef") + coef = tf.scatter_update(coef, + tf.constant(2), + -(3 + const_fact)) + roots = tf.py_func(np.roots, + [coef], + Tout=tf.complex64, + stateful=False) + + # Filter out the correct root + root_idx = tf.logical_and( + tf.logical_and( + tf.greater(tf.real(roots), tf.constant(0.0)), + tf.less(tf.real(roots), tf.constant(1.0))), + tf.less(tf.abs(tf.imag(roots)), 1e-5)) + + # In case there are two duplicated roots satisfying the above condition + root = tf.reshape(tf.gather(tf.gather(roots, tf.where(root_idx)), + tf.constant(0)), + shape=[]) + dr = self._h_max / self._h_min - mu = tf.maximum( - root**2, ((tf.sqrt(dr) - 1) / (tf.sqrt(dr) + 1))**2) + mu = tf.maximum(tf.real(root)**2, ((tf.sqrt(dr) - 1)/(tf.sqrt(dr) + 1))**2) return mu - def _yellowfin(self): """YellowFin auto-tuning optimizer based on momentum SGD. @@ -447,12 +366,11 @@ def _yellowfin(self): # squared distance from the optimum of a local quadratic # approximation after a single step while keeping all directions in the # robust region. - self._mu = tf.identity(tf.cond(self._do_tune, - lambda: self._get_mu_tensor(), + self._mu = tf.identity(tf.cond(self._do_tune, self._get_mu_tensor, lambda: self._mu_var)) with tf.control_dependencies([self._mu]): self._lr = tf.identity(tf.cond(self._do_tune, - lambda: self._get_lr_tensor(), + self._get_lr_tensor, lambda: self._lr_var)) # Tune learning rate and momentum. @@ -465,12 +383,6 @@ def _yellowfin(self): yellowfin_ops = tf.group(*yellowfin_ops) return yellowfin_ops - - def get_name(self): - """Get Optimizer Name""" - return self._momentum_optimizer.get_name() - - def apply_gradients(self, grads_and_vars, global_step=None, name=None): """Applying gradients aand tune hyperparams with YellowFin. @@ -497,28 +409,18 @@ def apply_gradients(self, grads_and_vars, global_step=None, name=None): with tf.variable_scope("apply_updates"): # Gradient Clipping? if self._clip_thresh_var is not None: - self._grad, _ = tf.clip_by_global_norm( + self._grads_clip, self._grads_norm = tf.clip_by_global_norm( self._grad, self._clip_thresh_var) apply_grad_op = self._momentum_optimizer.apply_gradients( - zip(self._grad, self._vars), - global_step=global_step, - name=name) + zip(self._grads_clip, self._vars), global_step=global_step) else: apply_grad_op = self._momentum_optimizer.apply_gradients( - zip(self._grad, self._vars), - global_step=global_step, - name=name) + zip(self._grad, self._vars), global_step=global_step) # Begin lr and mu tuning. with tf.variable_scope("prepare_yellowFin_variables"): - # the dependencies ideally only need to be after clip is done, - # i.e. dependes on self._grads. However, the control_dependencies - # does not support indexed slice for sparse gradients. - # The alternative dependencies here might be slightly slower due - # to less parallelization. - with tf.control_dependencies([apply_grad_op, ]): - prepare_variables_op = self._prepare_variables() + prepare_variables_op = self._prepare_variables() with tf.variable_scope("yellowfin"): with tf.control_dependencies([prepare_variables_op]): @@ -533,7 +435,6 @@ def apply_gradients(self, grads_and_vars, global_step=None, name=None): yellowfin_op, self._increment_step_op) - def compute_gradients(self, loss, var_list, @@ -574,7 +475,6 @@ def compute_gradients(self, colocate_gradients_with_ops=colocate_gradients_with_ops, grad_loss=grad_loss) - def minimize(self, loss, global_step=None, @@ -633,31 +533,4 @@ def minimize(self, print("g ", g) print("v ", v) - return self.apply_gradients(grads_and_vars, - global_step=global_step, - name=name) - - - def get_slot(self, var, name): - """ - Return a slot named `name` created for `var` by - the underlying MomentumOptimizer. - - Args: - var: A variable passed to `minimize()` or `apply_gradients()`. - name: A string. - - Returns: - The `Variable` for the slot if it was created, `None` otherwise. - """ - return self._momentum_optimizer.get_slot(var, name) - - def get_slot_names(self): - """ - Return a list of the names of the slots created by the - underlying MomentumOptimizer. - - Returns: - A list of strings. - """ - return self._momentum_optimizer.get_slot_names() + return self.apply_gradients(grads_and_vars, global_step=global_step) From 4a8a715ded57fda4bc2397966356f65fc26aaaee Mon Sep 17 00:00:00 2001 From: Niki Parmar <nikip@google.com> Date: Thu, 17 Aug 2017 14:05:18 -0700 Subject: [PATCH 0289/4095] Big fix in greedy infer when loss is None PiperOrigin-RevId: 165626088 --- tensor2tensor/utils/t2t_model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensor2tensor/utils/t2t_model.py b/tensor2tensor/utils/t2t_model.py index 8fcf2482d..d3fc6dac1 100644 --- a/tensor2tensor/utils/t2t_model.py +++ b/tensor2tensor/utils/t2t_model.py @@ -327,7 +327,7 @@ def infer_step(recent_output, recent_logits, unused_loss): # Assuming we have one shard for logits. logits = tf.concat([recent_logits, logits[0][:, -1:]], 1) - loss = sum(losses.values()) + loss = sum([l for l in losses.values() if l is not None]) return samples, logits, loss # Create an initial output tensor. This will be passed From 3dd2ec6e8b29621e40b07e6c0138ab8563839456 Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Thu, 17 Aug 2017 15:26:45 -0700 Subject: [PATCH 0290/4095] Move to Datasets for input pipeline and bump TF requirement to v1.3.0 PiperOrigin-RevId: 165637834 --- setup.py | 4 +- tensor2tensor/utils/data_reader.py | 190 ++++++++++++++---------- tensor2tensor/utils/data_reader_test.py | 12 +- tensor2tensor/utils/input_fn_builder.py | 6 +- 4 files changed, 120 insertions(+), 92 deletions(-) diff --git a/setup.py b/setup.py index dd80dfd48..088f9b14c 100644 --- a/setup.py +++ b/setup.py @@ -26,8 +26,8 @@ 'six', ], extras_require={ - 'tensorflow': ['tensorflow>=1.2.0rc1'], - 'tensorflow_gpu': ['tensorflow-gpu>=1.2.0rc1'], + 'tensorflow': ['tensorflow>=1.3.0'], + 'tensorflow_gpu': ['tensorflow-gpu>=1.3.0'], }, tests_require=['nose'], test_suite='nose.collector', diff --git a/tensor2tensor/utils/data_reader.py b/tensor2tensor/utils/data_reader.py index 03e7720b6..f6d9c256a 100644 --- a/tensor2tensor/utils/data_reader.py +++ b/tensor2tensor/utils/data_reader.py @@ -20,9 +20,12 @@ import math import os +import random # Dependency imports +import numpy as np + import six from six.moves import zip # pylint: disable=redefined-builtin @@ -88,8 +91,7 @@ def examples_reader(data_sources, by default (if this is None), we decode all items. Returns: - A dictionary mapping each data_field to a corresponding 1D int64 tensor - read from the created Dataset. + A tf.contrib.data.Dataset of dict<feature name, Tensor> """ def decode_record(record): @@ -113,18 +115,17 @@ def decode_record(record): return dict(zip(decode_items, decoded)) with tf.name_scope("examples_in"): - # Read serialized examples using slim parallel_reader. data_files = tf.contrib.slim.parallel_reader.get_data_files(data_sources) - num_readers = min(4 if training else 1, len(data_files)) - _, example_serialized = tf.contrib.slim.parallel_reader.parallel_read( - data_sources, - tf.TFRecordReader, - num_epochs=None if training else 1, - shuffle=training, - capacity=2 * capacity, - min_after_dequeue=capacity, - num_readers=num_readers) - return decode_record(example_serialized) + if training: + random.shuffle(data_files) + dataset = tf.contrib.data.TFRecordDataset(data_files) + num_threads = min(4 if training else 1, len(data_files)) + dataset = dataset.map(decode_record, num_threads=num_threads) + if training: + dataset = dataset.shuffle(capacity) + # Loop inifinitely if training, just once otherwise + dataset = dataset.repeat(None if training else 1) + return dataset def preprocessing(examples, data_file_pattern): @@ -132,9 +133,10 @@ def preprocessing(examples, data_file_pattern): # This function is for obsolete problems only, as we're porting them # all to the Problem class and its preprocess_examples method. Don't add. if "image" in data_file_pattern: + def resize(img, size): - return tf.to_int64(tf.image.resize_images( - img, [size, size], tf.image.ResizeMethod.AREA)) + return tf.to_int64( + tf.image.resize_images(img, [size, size], tf.image.ResizeMethod.AREA)) if "img2img" in data_file_pattern: inputs = examples["inputs"] @@ -144,7 +146,7 @@ def resize(img, size): inputs = examples["inputs"] # Remove boundaries in CelebA images. Remove 40 pixels each side # vertically and 20 pixels each side horizontally. - inputs = tf.image.crop_to_bounding_box(inputs, 40, 20, 218-80, 178-40) + inputs = tf.image.crop_to_bounding_box(inputs, 40, 20, 218 - 80, 178 - 40) examples["inputs"] = resize(inputs, 8) examples["targets"] = resize(inputs, 32) elif "audio" in data_file_pattern: @@ -218,53 +220,16 @@ def default_example_reading_spec(data_file_pattern): return data_fields, data_items_to_decoders -def input_pipeline(problem, data_file_pattern, capacity, mode, hparams): - """Input pipeline, returns a dictionary of tensors from queues.""" - if problem is None: - data_fields, data_items_to_decoders = default_example_reading_spec( - data_file_pattern) - else: - data_fields, data_items_to_decoders = problem.example_reading_spec() - - if data_file_pattern is None: - # Create placeholders for input, rather than reading data from disk. - return feature_placeholders(data_fields) - - examples = examples_reader( - [data_file_pattern], - data_fields, - training=(mode == tf.contrib.learn.ModeKeys.TRAIN), - capacity=capacity, - data_items_to_decoders=data_items_to_decoders) - - if problem is None: - examples = preprocess_examples_common(examples, hparams) - examples = preprocessing(examples, data_file_pattern) - else: - examples = problem.preprocess_examples(examples, mode, hparams) - - # We do not want int64s as they are not supported on GPUs. - examples = cast_int64_to_int32(examples) - - return examples - - -def batch_examples(examples, batching_scheme): - """Given a queue of examples, create batches of examples with similar lengths. - - We assume that examples is a dictionary with string keys and tensor values, - possibly coming from a queue, e.g., constructed by examples_reader above. - Each tensor in examples is assumed to be 1D. We will put tensors of similar - length into batches togeter. We return a dictionary with the same keys as - examples, and with values being batches of size batch_size. If elements have - different lengths, they are padded with 0s. This function is based on - tf.contrib.training.bucket_by_sequence_length so see there for details. - - For example, if examples is a queue containing [1, 2, 3] and [4], then - this function with batch_size=2 will return a batch [[1, 2, 3], [4, 0, 0]]. +def input_pipeline(problem, data_file_pattern, capacity, mode, hparams, + batching_scheme): + """Input pipeline, returns a dictionary of tensors from queues. Args: - examples: a dictionary with string keys and 1D tensor values. + problem: Problem instance for which to build the input pipeline. + data_file_pattern: file pattern for input files. + capacity: int, data pipeline buffer capacity. + mode: tf.contrib.learn.ModeKeys entry. + hparams: an HParams object. batching_scheme: a dictionary containing "boundaries": a list of integers for the boundaries that will be used for bucketing; see tf.contrib.training.bucket_by_sequence_length @@ -273,27 +238,88 @@ def batch_examples(examples, batching_scheme): "max_length": an integer. We drop sequences which are longer. Returns: - A dictionary with the same keys as examples and with values being batches - of examples padded with 0s, i.e., [batch_size x length] tensors. + dict <feature name, batched and padded Tensor> """ - with tf.name_scope("batch_examples"): - # The queue to bucket on will be chosen based on maximum length. - max_length = 0 - for v in examples.values(): - # For images the sequence length is the size of the spatial dimensions. - sequence_length = (tf.shape(v)[0] if len(v.get_shape()) < 3 else - tf.shape(v)[0] * tf.shape(v)[1]) - max_length = tf.maximum(max_length, sequence_length) - (_, outputs) = tf.contrib.training.bucket_by_sequence_length( - max_length, - examples, - batching_scheme["batch_sizes"], - [b + 1 for b in batching_scheme["boundaries"]], - capacity=2, # Number of full batches to store, we don't need many. - bucket_capacities=[2 * b for b in batching_scheme["batch_sizes"]], - dynamic_pad=True, - keep_input=(max_length <= batching_scheme["max_length"])) - return outputs + with tf.name_scope("input_pipeline"): + if problem is None: + data_fields, data_items_to_decoders = default_example_reading_spec( + data_file_pattern) + else: + data_fields, data_items_to_decoders = problem.example_reading_spec() + + if data_file_pattern is None: + # Create placeholders for input, rather than reading data from disk. + return feature_placeholders(data_fields) + + is_training = mode == tf.contrib.learn.ModeKeys.TRAIN + dataset = examples_reader( + [data_file_pattern], + data_fields, + training=is_training, + capacity=capacity, + data_items_to_decoders=data_items_to_decoders) + + def example_len(ex): + length = 0 + # The queue to bucket on will be chosen based on maximum length. + for v in ex.values(): + # For images the sequence length is the size of the spatial dimensions. + sequence_length = (tf.shape(v)[0] if len(v.get_shape()) < 3 else + tf.shape(v)[0] * tf.shape(v)[1]) + length = tf.maximum(length, sequence_length) + return length + + def preprocess(example): + """Preprocessing for example.""" + if problem is None: + example = preprocess_examples_common(example, hparams) + example = preprocessing(example, data_file_pattern) + else: + example = problem.preprocess_examples(example, mode, hparams) + + # We do not want int64s as they are not supported on GPUs. + example = cast_int64_to_int32(example) + + return example + + def example_to_bucket_id(example): + """Return int64 id of the length bucket for this example.""" + seq_length = example_len(example) + + # From tf.contrib.training.bucket_by_sequence_length: + boundaries = list(batching_scheme["boundaries"]) + buckets_min = [np.iinfo(np.int32).min] + boundaries + buckets_max = boundaries + [np.iinfo(np.int32).max] + conditions_c = tf.logical_and( + tf.less_equal(buckets_min, seq_length), + tf.less(seq_length, buckets_max)) + bucket_id = tf.reduce_min(tf.where(conditions_c)) + + return bucket_id + + def batching_fn(bucket_id, grouped_dataset): + batch_sizes = tf.constant(batching_scheme["batch_sizes"], dtype=tf.int64) + batch_size = batch_sizes[bucket_id] + + # Pad each dimension of each feature so that they match. + padded_shapes = dict( + [(name, [None] * len(shape)) + for name, shape in grouped_dataset.output_shapes.items()]) + return grouped_dataset.padded_batch(batch_size, padded_shapes) + + def allowed_length(ex): + return tf.less_equal(example_len(ex), batching_scheme["max_length"]) + + num_threads = 4 if is_training else 1 + dataset = dataset.map(preprocess, num_threads=num_threads) + dataset = dataset.filter(allowed_length) + + window_size = max(batching_scheme["batch_sizes"]) + dataset = dataset.group_by_window(example_to_bucket_id, batching_fn, + window_size) + + batched_examples = dataset.make_one_shot_iterator().get_next() + return batched_examples def bucket_boundaries(max_length, min_length=8, mantissa_bits=2): @@ -338,7 +364,7 @@ def hparams_to_batching_scheme(hparams, ] batch_sizes = [b * shard_multiplier for b in batch_sizes] max_length *= length_multiplier - boundaries = [boundary * length_multiplier for boundary in boundaries] + boundaries = [boundary * length_multiplier + 1 for boundary in boundaries] return { "boundaries": boundaries, "batch_sizes": batch_sizes, diff --git a/tensor2tensor/utils/data_reader_test.py b/tensor2tensor/utils/data_reader_test.py index ea98da06d..3d469af20 100644 --- a/tensor2tensor/utils/data_reader_test.py +++ b/tensor2tensor/utils/data_reader_test.py @@ -50,19 +50,21 @@ def test_generator(): generator_utils.generate_files(test_generator(), filenames) self.assertTrue(tf.gfile.Exists(tmp_file_path + "-train-00000-of-00001")) - examples_train = data_reader.examples_reader( + train_dataset = data_reader.examples_reader( [tmp_file_path + "*"], { "inputs": tf.VarLenFeature(tf.int64), "targets": tf.VarLenFeature(tf.int64) }, training=True) - examples_eval = data_reader.examples_reader( + examples_train = train_dataset.make_one_shot_iterator().get_next() + eval_dataset = data_reader.examples_reader( [tmp_file_path + "*"], { "inputs": tf.VarLenFeature(tf.int64), "targets": tf.VarLenFeature(tf.int64), "floats": tf.VarLenFeature(tf.float32) }, training=False) + examples_eval = eval_dataset.make_one_shot_iterator().get_next() with tf.train.MonitoredSession() as session: # Evaluation data comes in the same order as in the file, check 10. for i in xrange(10): @@ -103,15 +105,17 @@ def test_generator(): generator_utils.generate_files(test_generator(), filenames) self.assertTrue(tf.gfile.Exists(tmp_file_path + "-train-00000-of-00001")) - examples_train = data_reader.examples_reader([tmp_file_path + "*"], { + train_dataset = data_reader.examples_reader([tmp_file_path + "*"], { "inputs": tf.VarLenFeature(tf.int64), "targets": tf.VarLenFeature(tf.int64) }, True) + examples_train = train_dataset.make_one_shot_iterator().get_next() batch_train = data_reader.batch_examples(examples_train, 4) - examples_eval = data_reader.examples_reader([tmp_file_path + "*"], { + eval_dataset = data_reader.examples_reader([tmp_file_path + "*"], { "inputs": tf.VarLenFeature(tf.int64), "targets": tf.VarLenFeature(tf.int64) }, False) + examples_eval = eval_dataset.make_one_shot_iterator().get_next() batch_eval = data_reader.batch_examples(examples_eval, 2) session, coord = tf.Session(), tf.train.Coordinator() with session.as_default(): diff --git a/tensor2tensor/utils/input_fn_builder.py b/tensor2tensor/utils/input_fn_builder.py index d1b68aa02..f8f73a0ee 100644 --- a/tensor2tensor/utils/input_fn_builder.py +++ b/tensor2tensor/utils/input_fn_builder.py @@ -92,11 +92,9 @@ def input_fn(): with tf.device("/cpu:0"): # Input reading on CPU capacity = p_hparams.max_expected_batch_size_per_shard capacity *= num_datashards - examples = data_reader.input_pipeline( + feature_map = data_reader.input_pipeline( problem_instance, data_file_patterns and data_file_patterns[n], - capacity, mode, hparams) - feature_map = data_reader.batch_examples( - examples, + capacity, mode, hparams, data_reader.hparams_to_batching_scheme( hparams, shard_multiplier=num_datashards, From 0aeda96f0cb25542da5989eea7b9f1d4d1320978 Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Thu, 17 Aug 2017 16:36:58 -0700 Subject: [PATCH 0291/4095] Fix preprocess_examples signature in image problems PiperOrigin-RevId: 165646684 --- tensor2tensor/data_generators/image.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensor2tensor/data_generators/image.py b/tensor2tensor/data_generators/image.py index d9a6be6ff..905b084b0 100644 --- a/tensor2tensor/data_generators/image.py +++ b/tensor2tensor/data_generators/image.py @@ -199,7 +199,7 @@ def generate_data(self, data_dir, tmp_dir, task_id=-1): "instructions at https://github.com/tensorflow/models/blob/master" "/inception/README.md#getting-started") - def preprocess_examples(self, examples, mode): + def preprocess_examples(self, examples, mode, _): return imagenet_preprocess_examples(examples, mode) @@ -638,7 +638,7 @@ def train_shards(self): def dev_shards(self): return 10 - def preprocess_examples(self, examples, mode): + def preprocess_examples(self, examples, mode, _): return imagenet_preprocess_examples(examples, mode) def generator(self, data_dir, tmp_dir, is_training): From 014f222ffecb566ec013fcc2a215ae5a4b910ccf Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Fri, 18 Aug 2017 10:20:39 -0700 Subject: [PATCH 0292/4095] Add rev_block test with conv and batch norm and add bucket_by_sequence_length function PiperOrigin-RevId: 165720429 --- tensor2tensor/layers/rev_block_test.py | 19 ++++++++- tensor2tensor/utils/data_reader.py | 56 +++++++++++++++++-------- tensor2tensor/utils/input_fn_builder.py | 4 +- 3 files changed, 58 insertions(+), 21 deletions(-) diff --git a/tensor2tensor/layers/rev_block_test.py b/tensor2tensor/layers/rev_block_test.py index dd4a62993..a668ff726 100644 --- a/tensor2tensor/layers/rev_block_test.py +++ b/tensor2tensor/layers/rev_block_test.py @@ -55,8 +55,9 @@ def g(x): # pylint: disable=function-redefined if g_side_input is None: g_side_input = [] - x = tf.random_uniform([self.BATCH_SIZE, self.CHANNELS], dtype=tf.float32) - x1, x2 = tf.split(x, 2, axis=1) + if x is None: + x = tf.random_uniform([self.BATCH_SIZE, self.CHANNELS], dtype=tf.float32) + x1, x2 = tf.split(x, 2, axis=-1) with tf.variable_scope("rev_test") as vs: y1_rev, y2_rev = rev_block.rev_block( @@ -121,6 +122,20 @@ def f2(x): self._testRevBlock(f=[f1, f2, f1, f2]) + def testConvAndBatchNorm(self): + + x = tf.random_uniform( + [self.BATCH_SIZE, 10, self.CHANNELS], dtype=tf.float32) + + def f(x): + x = tf.layers.conv1d(x, self.CHANNELS // 2, 3, padding="same") + x = tf.layers.batch_normalization(x, training=True) + x = tf.layers.conv1d(x, self.CHANNELS // 2, 3, padding="same") + x = tf.layers.batch_normalization(x, training=True) + return x + + self._testRevBlock(x=x, f=f) + class FnWithCustomGradTest(tf.test.TestCase): diff --git a/tensor2tensor/utils/data_reader.py b/tensor2tensor/utils/data_reader.py index f6d9c256a..ee7da4b19 100644 --- a/tensor2tensor/utils/data_reader.py +++ b/tensor2tensor/utils/data_reader.py @@ -282,12 +282,43 @@ def preprocess(example): return example + def allowed_length(ex): + return tf.less_equal(example_len(ex), batching_scheme["max_length"]) + + num_threads = 4 if is_training else 1 + dataset = dataset.map(preprocess, num_threads=num_threads) + dataset = dataset.filter(allowed_length) + + dataset = bucket_by_sequence_length(dataset, example_len, + batching_scheme["boundaries"], + batching_scheme["batch_sizes"]) + + batched_examples = dataset.make_one_shot_iterator().get_next() + return batched_examples + + +def bucket_by_sequence_length(dataset, example_length_fn, bucket_boundaries, + bucket_batch_sizes): + """Bucket entries in dataset by length. + + Args: + dataset: Dataset of dict<feature name, Tensor>. + example_length_fn: function from example to int, determines the length of + the example, which will determine the bucket it goes into. + bucket_boundaries: list<int>, boundaries of the buckets. + bucket_batch_sizes: list<int>, batch size per bucket. + + Returns: + Dataset of padded and batched examples. + """ + with tf.name_scope("bucket_by_seq_length"): + def example_to_bucket_id(example): """Return int64 id of the length bucket for this example.""" - seq_length = example_len(example) + seq_length = example_length_fn(example) # From tf.contrib.training.bucket_by_sequence_length: - boundaries = list(batching_scheme["boundaries"]) + boundaries = list(bucket_boundaries) buckets_min = [np.iinfo(np.int32).min] + boundaries buckets_max = boundaries + [np.iinfo(np.int32).max] conditions_c = tf.logical_and( @@ -298,7 +329,7 @@ def example_to_bucket_id(example): return bucket_id def batching_fn(bucket_id, grouped_dataset): - batch_sizes = tf.constant(batching_scheme["batch_sizes"], dtype=tf.int64) + batch_sizes = tf.constant(bucket_batch_sizes, dtype=tf.int64) batch_size = batch_sizes[bucket_id] # Pad each dimension of each feature so that they match. @@ -307,22 +338,13 @@ def batching_fn(bucket_id, grouped_dataset): for name, shape in grouped_dataset.output_shapes.items()]) return grouped_dataset.padded_batch(batch_size, padded_shapes) - def allowed_length(ex): - return tf.less_equal(example_len(ex), batching_scheme["max_length"]) - - num_threads = 4 if is_training else 1 - dataset = dataset.map(preprocess, num_threads=num_threads) - dataset = dataset.filter(allowed_length) - - window_size = max(batching_scheme["batch_sizes"]) + window_size = max(bucket_batch_sizes) dataset = dataset.group_by_window(example_to_bucket_id, batching_fn, window_size) - - batched_examples = dataset.make_one_shot_iterator().get_next() - return batched_examples + return dataset -def bucket_boundaries(max_length, min_length=8, mantissa_bits=2): +def _bucket_boundaries(max_length, min_length=8, mantissa_bits=2): """A default set of length-bucket boundaries.""" x = min_length boundaries = [] @@ -356,7 +378,7 @@ def hparams_to_batching_scheme(hparams, a dictionary """ max_length = hparams.max_length or hparams.batch_size - boundaries = bucket_boundaries( + boundaries = _bucket_boundaries( max_length, mantissa_bits=hparams.batching_mantissa_bits) batch_sizes = [ max(1, hparams.batch_size // length) @@ -381,7 +403,7 @@ def constant_batching_scheme(constant_batch_size_in_sequences): Returns: a dictionary """ - boundaries = bucket_boundaries(1024) + boundaries = _bucket_boundaries(1024) batch_sizes = [constant_batch_size_in_sequences] * (1 + len(boundaries)) return { "boundaries": boundaries, diff --git a/tensor2tensor/utils/input_fn_builder.py b/tensor2tensor/utils/input_fn_builder.py index f8f73a0ee..c31ba0f31 100644 --- a/tensor2tensor/utils/input_fn_builder.py +++ b/tensor2tensor/utils/input_fn_builder.py @@ -90,8 +90,8 @@ def input_fn(): p_hparams = hparams.problems[n] with tf.name_scope("problem_%d" % n): with tf.device("/cpu:0"): # Input reading on CPU - capacity = p_hparams.max_expected_batch_size_per_shard - capacity *= num_datashards + capacity = ( + p_hparams.max_expected_batch_size_per_shard * num_datashards) feature_map = data_reader.input_pipeline( problem_instance, data_file_patterns and data_file_patterns[n], capacity, mode, hparams, From d5156828a3d7f7fb5e9a309c08bfb49cdeadfcc1 Mon Sep 17 00:00:00 2001 From: T2T Team <no-reply@google.com> Date: Fri, 18 Aug 2017 22:18:59 -0700 Subject: [PATCH 0293/4095] Minor updates to tensor2tensor README: fixes path of common_hparams.py PiperOrigin-RevId: 165788200 --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index bb0f6f534..1f77e7134 100644 --- a/README.md +++ b/README.md @@ -217,7 +217,7 @@ and are encoded in [`tf.contrib.training.HParams`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/training/python/training/hparam.py) objects. The `HParams` are available to both the problem specification and the model. A basic set of hyperparameters are defined in -[`common_hparams.py`](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/models/common_hparams.py) +[`common_hparams.py`](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/layers/common_hparams.py) and hyperparameter set functions can compose other hyperparameter set functions. ### Trainer From bc713b42b47e1ae9265f26397e7baf7cc822cae7 Mon Sep 17 00:00:00 2001 From: T2T Team <no-reply@google.com> Date: Sun, 20 Aug 2017 22:31:36 -0700 Subject: [PATCH 0294/4095] Better performances for the expert mask computation. PiperOrigin-RevId: 165887935 --- tensor2tensor/layers/common_attention.py | 15 +++++++++++---- tensor2tensor/models/attention_lm_moe.py | 3 ++- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/tensor2tensor/layers/common_attention.py b/tensor2tensor/layers/common_attention.py index 2c3e4b71f..cfd6b37db 100644 --- a/tensor2tensor/layers/common_attention.py +++ b/tensor2tensor/layers/common_attention.py @@ -997,10 +997,17 @@ def self_attention_expert(x, batch_coordinate, mask_right=True): """ depth = x.get_shape().as_list()[-1] length = tf.shape(batch_coordinate)[0] - batch_coordinate = tf.squeeze(batch_coordinate, 1) - bias = tf.to_float( - tf.not_equal(tf.expand_dims(batch_coordinate, 1), - tf.expand_dims(batch_coordinate, 0))) * -1e9 + + with tf.name_scope("expert_mask"): + batch_coordinate = tf.squeeze(batch_coordinate, 1) + # Convert to float first because of b/25387198 + batch_coordinate = tf.to_float(batch_coordinate) + bc_v = tf.expand_dims(batch_coordinate, 1) + bc_h = tf.expand_dims(batch_coordinate, 0) + bias = bc_v - bc_h # Broadcast to create [length, length] mask + bias = tf.minimum(1.0, tf.abs(bias)) # Theshold non zeros to 1.0 + bias *= -1e9 # Set non zeros to -infinity + if mask_right: bias += tf.reshape( attention_bias_lower_triangle(length), [length, length]) diff --git a/tensor2tensor/models/attention_lm_moe.py b/tensor2tensor/models/attention_lm_moe.py index 9c55eadd6..1d1c1519b 100644 --- a/tensor2tensor/models/attention_lm_moe.py +++ b/tensor2tensor/models/attention_lm_moe.py @@ -78,7 +78,8 @@ def postprocess(x, y): moe_hidden_sizes = [int(s) for s in hparams.moe_hidden_sizes.split(",")] for layer in xrange(hparams.num_hidden_layers): with tf.variable_scope("layer_%d" % layer): - with tf.variable_scope("attention"): + with tf.variable_scope( + "attention_{}".format(hparams.attention_moe_type)): x = preprocess(x) if hparams.attention_moe_type == AttentionMoeType.NONE: y = dp( From 775e6c732022aa25b2ef84247c9ca699fd8c4348 Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Mon, 21 Aug 2017 10:31:02 -0700 Subject: [PATCH 0295/4095] Port CelebA dataset to Problem and add landmarks and attrs PiperOrigin-RevId: 165944372 --- tensor2tensor/bin/t2t-datagen | 4 - tensor2tensor/data_generators/image.py | 170 ++++++++++++++---- .../data_generators/problem_hparams.py | 14 -- tensor2tensor/utils/data_reader.py | 7 - 4 files changed, 131 insertions(+), 64 deletions(-) diff --git a/tensor2tensor/bin/t2t-datagen b/tensor2tensor/bin/t2t-datagen index 19de46fbf..f7ea7e1f2 100644 --- a/tensor2tensor/bin/t2t-datagen +++ b/tensor2tensor/bin/t2t-datagen @@ -42,7 +42,6 @@ from tensor2tensor.data_generators import algorithmic_math from tensor2tensor.data_generators import all_problems # pylint: disable=unused-import from tensor2tensor.data_generators import audio from tensor2tensor.data_generators import generator_utils -from tensor2tensor.data_generators import image from tensor2tensor.data_generators import lm1b from tensor2tensor.data_generators import snli from tensor2tensor.data_generators import wmt @@ -106,9 +105,6 @@ _SUPPORTED_PROBLEM_GENERATORS = { lambda: lm1b.generator(FLAGS.tmp_dir, True, characters=True), lambda: lm1b.generator(FLAGS.tmp_dir, False, characters=True) ), - "image_celeba_tune": ( - lambda: image.celeba_generator(FLAGS.tmp_dir, 162770), - lambda: image.celeba_generator(FLAGS.tmp_dir, 19867, 162770)), "inference_snli32k": ( lambda: snli.snli_token_generator(FLAGS.tmp_dir, True, 2**15), lambda: snli.snli_token_generator(FLAGS.tmp_dir, False, 2**15), diff --git a/tensor2tensor/data_generators/image.py b/tensor2tensor/data_generators/image.py index 905b084b0..71f4f0920 100644 --- a/tensor2tensor/data_generators/image.py +++ b/tensor2tensor/data_generators/image.py @@ -66,7 +66,137 @@ def example_reading_spec(self, label_key=None): return data_fields, data_items_to_decoders -# French street names dataset. +@registry.register_problem("image_celeba_tune") +class ImageCeleba(ImageProblem): + """CelebA dataset, aligned and cropped images.""" + IMG_DATA = ("img_align_celeba.zip", + "https://drive.google.com/uc?export=download&" + "id=0B7EVK8r0v71pZjFTYXZWM3FlRnM") + LANDMARKS_DATA = ("celeba_landmarks_align", + "https://drive.google.com/uc?export=download&" + "id=0B7EVK8r0v71pd0FJY3Blby1HUTQ") + ATTR_DATA = ("celeba_attr", "https://drive.google.com/uc?export=download&" + "id=0B7EVK8r0v71pblRyaVFSWGxPY0U") + + LANDMARK_HEADINGS = ("lefteye_x lefteye_y righteye_x righteye_y " + "nose_x nose_y leftmouth_x leftmouth_y rightmouth_x " + "rightmouth_y").split() + ATTR_HEADINGS = ( + "5_o_Clock_Shadow Arched_Eyebrows Attractive Bags_Under_Eyes Bald Bangs " + "Big_Lips Big_Nose Black_Hair Blond_Hair Blurry Brown_Hair " + "Bushy_Eyebrows Chubby Double_Chin Eyeglasses Goatee Gray_Hair " + "Heavy_Makeup High_Cheekbones Male Mouth_Slightly_Open Mustache " + "Narrow_Eyes No_Beard Oval_Face Pale_Skin Pointy_Nose Receding_Hairline " + "Rosy_Cheeks Sideburns Smiling Straight_Hair Wavy_Hair Wearing_Earrings " + "Wearing_Hat Wearing_Lipstick Wearing_Necklace Wearing_Necktie Young" + ).split() + + def preprocess_examples(self, examples, unused_mode, unused_hparams): + + def resize(img, size): + return tf.to_int64( + tf.image.resize_images(img, [size, size], tf.image.ResizeMethod.AREA)) + + inputs = examples["inputs"] + # Remove boundaries in CelebA images. Remove 40 pixels each side + # vertically and 20 pixels each side horizontally. + inputs = tf.image.crop_to_bounding_box(inputs, 40, 20, 218 - 80, 178 - 40) + examples["inputs"] = resize(inputs, 8) + examples["targets"] = resize(inputs, 32) + return examples + + def hparams(self, defaults, model_hparams): + p = defaults + p.input_modality = {"inputs": ("image:identity_no_pad", None)} + p.target_modality = ("image:identity_no_pad", None) + p.batch_size_multiplier = 256 + p.max_expected_batch_size_per_shard = 4 + p.input_space_id = 1 + p.target_space_id = 1 + + def generator(self, tmp_dir, how_many, start_from=0): + """Image generator for CELEBA dataset. + + Args: + tmp_dir: path to temporary storage directory. + how_many: how many images and labels to generate. + start_from: from which image to start. + + Yields: + A dictionary representing the images with the following fields: + * image/encoded: the string encoding the image as JPEG, + * image/format: the string "jpeg" representing image format, + """ + out_paths = [] + for fname, url in [self.IMG_DATA, self.LANDMARKS_DATA, self.ATTR_DATA]: + path = generator_utils.maybe_download_from_drive(tmp_dir, fname, url) + out_paths.append(path) + + img_path, landmarks_path, attr_path = out_paths # pylint: disable=unbalanced-tuple-unpacking + unzipped_folder = img_path[:-4] + if not tf.gfile.Exists(unzipped_folder): + zipfile.ZipFile(img_path, "r").extractall(tmp_dir) + + with tf.gfile.Open(landmarks_path) as f: + landmarks_raw = f.read() + + with tf.gfile.Open(attr_path) as f: + attr_raw = f.read() + + def process_landmarks(raw_data): + landmarks = {} + lines = raw_data.split("\n") + headings = lines[1].strip().split() + for line in lines[2:-1]: + values = line.strip().split() + img_name = values[0] + landmark_values = [int(v) for v in values[1:]] + landmarks[img_name] = landmark_values + return landmarks, headings + + def process_attrs(raw_data): + attrs = {} + lines = raw_data.split("\n") + headings = lines[1].strip().split() + for line in lines[2:-1]: + values = line.strip().split() + img_name = values[0] + attr_values = [int(v) for v in values[1:]] + attrs[img_name] = attr_values + return attrs, headings + + img_landmarks, _ = process_landmarks(landmarks_raw) + img_attrs, _ = process_attrs(attr_raw) + + image_files = tf.gfile.Glob(unzipped_folder + "/*.jpg") + for filename in image_files[start_from:start_from + how_many]: + img_name = os.path.basename(filename) + landmarks = img_landmarks[img_name] + attrs = img_attrs[img_name] + + with tf.gfile.Open(filename, "r") as f: + encoded_image_data = f.read() + yield { + "image/encoded": [encoded_image_data], + "image/format": ["jpeg"], + "attributes": attrs, + "landmarks": landmarks, + } + + @property + def train_shards(self): + return 100 + + @property + def dev_shards(self): + return 10 + + def generate_data(self, data_dir, tmp_dir, task_id=-1): + generator_utils.generate_dataset_and_shuffle( + self.generator(tmp_dir, 162770), # train + self.training_filepaths(data_dir, self.train_shards, shuffled=False), + self.generator(tmp_dir, 19867, 162770), # dev + self.dev_filepaths(data_dir, self.dev_shards, shuffled=False)) @registry.register_problem @@ -700,41 +830,3 @@ class ImageMsCocoTokens32k(ImageMsCocoTokens8k): @property def targeted_vocab_size(self): return 2**15 # 32768 - - -# URL and filename for CELEBA data. -_CELEBA_NAME = "img_align_celeba" -_CELEBA_URL = "https://drive.google.com/uc?export=download&id=0B7EVK8r0v71pZjFTYXZWM3FlRnM" - - -def _get_celeba(directory): - """Download and extract CELEBA to directory unless it is there.""" - # path = os.path.join(directory, _CELEBA_NAME) - path = generator_utils.maybe_download_from_drive(directory, _CELEBA_NAME, - _CELEBA_URL) - if not tf.gfile.Exists(path): - zipfile.ZipFile(path + ".zip", "r").extractall(directory) - - -def celeba_generator(tmp_dir, how_many, start_from=0): - """Image generator for CELEBA dataset. - - Args: - tmp_dir: path to temporary storage directory. - how_many: how many images and labels to generate. - start_from: from which image to start. - - Yields: - A dictionary representing the images with the following fields: - * image/encoded: the string encoding the image as JPEG, - * image/format: the string "jpeg" representing image format, - """ - _get_celeba(tmp_dir) - image_files = tf.gfile.Glob(os.path.join(tmp_dir, _CELEBA_NAME) + "/*.jpg") - for filename in image_files[start_from:start_from + how_many]: - with tf.gfile.Open(filename, "r") as f: - encoded_image_data = f.read() - yield { - "image/encoded": [encoded_image_data], - "image/format": ["jpeg"], - } diff --git a/tensor2tensor/data_generators/problem_hparams.py b/tensor2tensor/data_generators/problem_hparams.py index 4a6053613..63b835f38 100644 --- a/tensor2tensor/data_generators/problem_hparams.py +++ b/tensor2tensor/data_generators/problem_hparams.py @@ -461,18 +461,6 @@ def img2img_imagenet(unused_model_hparams): return p -def image_celeba(unused_model_hparams): - """Image CelebA dataset.""" - p = default_problem_hparams() - p.input_modality = {"inputs": ("image:identity_no_pad", None)} - p.target_modality = ("image:identity_no_pad", None) - p.batch_size_multiplier = 256 - p.max_expected_batch_size_per_shard = 4 - p.input_space_id = 1 - p.target_space_id = 1 - return p - - # Dictionary of named hyperparameter settings for various problems. # This is only accessed through the problem_hparams function below. PROBLEM_HPARAMS_MAP = { @@ -503,8 +491,6 @@ def image_celeba(unused_model_hparams): p, "wsj", 2**14, 2**9), "translate_ende_wmt_bpe32k": wmt_ende_bpe32k, - "image_celeba_tune": - image_celeba, "img2img_imagenet": img2img_imagenet, } diff --git a/tensor2tensor/utils/data_reader.py b/tensor2tensor/utils/data_reader.py index ee7da4b19..4f95be323 100644 --- a/tensor2tensor/utils/data_reader.py +++ b/tensor2tensor/utils/data_reader.py @@ -142,13 +142,6 @@ def resize(img, size): inputs = examples["inputs"] examples["inputs"] = resize(inputs, 16) examples["targets"] = resize(inputs, 64) - elif "image_celeba" in data_file_pattern: - inputs = examples["inputs"] - # Remove boundaries in CelebA images. Remove 40 pixels each side - # vertically and 20 pixels each side horizontally. - inputs = tf.image.crop_to_bounding_box(inputs, 40, 20, 218 - 80, 178 - 40) - examples["inputs"] = resize(inputs, 8) - examples["targets"] = resize(inputs, 32) elif "audio" in data_file_pattern: # Reshape audio to proper shape sample_count = tf.to_int32(examples.pop("audio/sample_count")) From 8ac4ca8c17eb06a75acda3133a83e2e5461a12bc Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Mon, 21 Aug 2017 10:58:45 -0700 Subject: [PATCH 0296/4095] Reverting dataset change until more thoroughly tested PiperOrigin-RevId: 165948341 --- setup.py | 4 +- tensor2tensor/utils/data_reader.py | 215 +++++++++--------------- tensor2tensor/utils/data_reader_test.py | 12 +- tensor2tensor/utils/input_fn_builder.py | 6 +- 4 files changed, 94 insertions(+), 143 deletions(-) diff --git a/setup.py b/setup.py index 088f9b14c..dd80dfd48 100644 --- a/setup.py +++ b/setup.py @@ -26,8 +26,8 @@ 'six', ], extras_require={ - 'tensorflow': ['tensorflow>=1.3.0'], - 'tensorflow_gpu': ['tensorflow-gpu>=1.3.0'], + 'tensorflow': ['tensorflow>=1.2.0rc1'], + 'tensorflow_gpu': ['tensorflow-gpu>=1.2.0rc1'], }, tests_require=['nose'], test_suite='nose.collector', diff --git a/tensor2tensor/utils/data_reader.py b/tensor2tensor/utils/data_reader.py index 4f95be323..667552d83 100644 --- a/tensor2tensor/utils/data_reader.py +++ b/tensor2tensor/utils/data_reader.py @@ -20,12 +20,9 @@ import math import os -import random # Dependency imports -import numpy as np - import six from six.moves import zip # pylint: disable=redefined-builtin @@ -91,7 +88,8 @@ def examples_reader(data_sources, by default (if this is None), we decode all items. Returns: - A tf.contrib.data.Dataset of dict<feature name, Tensor> + A dictionary mapping each data_field to a corresponding 1D int64 tensor + read from the created Dataset. """ def decode_record(record): @@ -115,17 +113,18 @@ def decode_record(record): return dict(zip(decode_items, decoded)) with tf.name_scope("examples_in"): + # Read serialized examples using slim parallel_reader. data_files = tf.contrib.slim.parallel_reader.get_data_files(data_sources) - if training: - random.shuffle(data_files) - dataset = tf.contrib.data.TFRecordDataset(data_files) - num_threads = min(4 if training else 1, len(data_files)) - dataset = dataset.map(decode_record, num_threads=num_threads) - if training: - dataset = dataset.shuffle(capacity) - # Loop inifinitely if training, just once otherwise - dataset = dataset.repeat(None if training else 1) - return dataset + num_readers = min(4 if training else 1, len(data_files)) + _, example_serialized = tf.contrib.slim.parallel_reader.parallel_read( + data_sources, + tf.TFRecordReader, + num_epochs=None if training else 1, + shuffle=training, + capacity=2 * capacity, + min_after_dequeue=capacity, + num_readers=num_readers) + return decode_record(example_serialized) def preprocessing(examples, data_file_pattern): @@ -213,16 +212,53 @@ def default_example_reading_spec(data_file_pattern): return data_fields, data_items_to_decoders -def input_pipeline(problem, data_file_pattern, capacity, mode, hparams, - batching_scheme): - """Input pipeline, returns a dictionary of tensors from queues. +def input_pipeline(problem, data_file_pattern, capacity, mode, hparams): + """Input pipeline, returns a dictionary of tensors from queues.""" + if problem is None: + data_fields, data_items_to_decoders = default_example_reading_spec( + data_file_pattern) + else: + data_fields, data_items_to_decoders = problem.example_reading_spec() + + if data_file_pattern is None: + # Create placeholders for input, rather than reading data from disk. + return feature_placeholders(data_fields) + + examples = examples_reader( + [data_file_pattern], + data_fields, + training=(mode == tf.contrib.learn.ModeKeys.TRAIN), + capacity=capacity, + data_items_to_decoders=data_items_to_decoders) + + if problem is None: + examples = preprocess_examples_common(examples, hparams) + examples = preprocessing(examples, data_file_pattern) + else: + examples = problem.preprocess_examples(examples, mode, hparams) + + # We do not want int64s as they are not supported on GPUs. + examples = cast_int64_to_int32(examples) + + return examples + + +def batch_examples(examples, batching_scheme): + """Given a queue of examples, create batches of examples with similar lengths. + + We assume that examples is a dictionary with string keys and tensor values, + possibly coming from a queue, e.g., constructed by examples_reader above. + Each tensor in examples is assumed to be 1D. We will put tensors of similar + length into batches togeter. We return a dictionary with the same keys as + examples, and with values being batches of size batch_size. If elements have + different lengths, they are padded with 0s. This function is based on + tf.contrib.training.bucket_by_sequence_length so see there for details. + + For example, if examples is a queue containing [1, 2, 3] and [4], then + this function with batch_size=2 will return a batch [[1, 2, 3], [4, 0, 0]]. Args: - problem: Problem instance for which to build the input pipeline. - data_file_pattern: file pattern for input files. - capacity: int, data pipeline buffer capacity. - mode: tf.contrib.learn.ModeKeys entry. - hparams: an HParams object. + examples: a dictionary with string keys and 1D tensor values. batching_scheme: a dictionary containing "boundaries": a list of integers for the boundaries that will be used for bucketing; see tf.contrib.training.bucket_by_sequence_length @@ -231,113 +267,30 @@ def input_pipeline(problem, data_file_pattern, capacity, mode, hparams, "max_length": an integer. We drop sequences which are longer. Returns: - dict <feature name, batched and padded Tensor> - """ - with tf.name_scope("input_pipeline"): - if problem is None: - data_fields, data_items_to_decoders = default_example_reading_spec( - data_file_pattern) - else: - data_fields, data_items_to_decoders = problem.example_reading_spec() - - if data_file_pattern is None: - # Create placeholders for input, rather than reading data from disk. - return feature_placeholders(data_fields) - - is_training = mode == tf.contrib.learn.ModeKeys.TRAIN - dataset = examples_reader( - [data_file_pattern], - data_fields, - training=is_training, - capacity=capacity, - data_items_to_decoders=data_items_to_decoders) - - def example_len(ex): - length = 0 - # The queue to bucket on will be chosen based on maximum length. - for v in ex.values(): - # For images the sequence length is the size of the spatial dimensions. - sequence_length = (tf.shape(v)[0] if len(v.get_shape()) < 3 else - tf.shape(v)[0] * tf.shape(v)[1]) - length = tf.maximum(length, sequence_length) - return length - - def preprocess(example): - """Preprocessing for example.""" - if problem is None: - example = preprocess_examples_common(example, hparams) - example = preprocessing(example, data_file_pattern) - else: - example = problem.preprocess_examples(example, mode, hparams) - - # We do not want int64s as they are not supported on GPUs. - example = cast_int64_to_int32(example) - - return example - - def allowed_length(ex): - return tf.less_equal(example_len(ex), batching_scheme["max_length"]) - - num_threads = 4 if is_training else 1 - dataset = dataset.map(preprocess, num_threads=num_threads) - dataset = dataset.filter(allowed_length) - - dataset = bucket_by_sequence_length(dataset, example_len, - batching_scheme["boundaries"], - batching_scheme["batch_sizes"]) - - batched_examples = dataset.make_one_shot_iterator().get_next() - return batched_examples - - -def bucket_by_sequence_length(dataset, example_length_fn, bucket_boundaries, - bucket_batch_sizes): - """Bucket entries in dataset by length. - - Args: - dataset: Dataset of dict<feature name, Tensor>. - example_length_fn: function from example to int, determines the length of - the example, which will determine the bucket it goes into. - bucket_boundaries: list<int>, boundaries of the buckets. - bucket_batch_sizes: list<int>, batch size per bucket. - - Returns: - Dataset of padded and batched examples. + A dictionary with the same keys as examples and with values being batches + of examples padded with 0s, i.e., [batch_size x length] tensors. """ - with tf.name_scope("bucket_by_seq_length"): - - def example_to_bucket_id(example): - """Return int64 id of the length bucket for this example.""" - seq_length = example_length_fn(example) - - # From tf.contrib.training.bucket_by_sequence_length: - boundaries = list(bucket_boundaries) - buckets_min = [np.iinfo(np.int32).min] + boundaries - buckets_max = boundaries + [np.iinfo(np.int32).max] - conditions_c = tf.logical_and( - tf.less_equal(buckets_min, seq_length), - tf.less(seq_length, buckets_max)) - bucket_id = tf.reduce_min(tf.where(conditions_c)) - - return bucket_id - - def batching_fn(bucket_id, grouped_dataset): - batch_sizes = tf.constant(bucket_batch_sizes, dtype=tf.int64) - batch_size = batch_sizes[bucket_id] - - # Pad each dimension of each feature so that they match. - padded_shapes = dict( - [(name, [None] * len(shape)) - for name, shape in grouped_dataset.output_shapes.items()]) - return grouped_dataset.padded_batch(batch_size, padded_shapes) - - window_size = max(bucket_batch_sizes) - dataset = dataset.group_by_window(example_to_bucket_id, batching_fn, - window_size) - return dataset - - -def _bucket_boundaries(max_length, min_length=8, mantissa_bits=2): + with tf.name_scope("batch_examples"): + # The queue to bucket on will be chosen based on maximum length. + max_length = 0 + for v in examples.values(): + # For images the sequence length is the size of the spatial dimensions. + sequence_length = (tf.shape(v)[0] if len(v.get_shape()) < 3 else + tf.shape(v)[0] * tf.shape(v)[1]) + max_length = tf.maximum(max_length, sequence_length) + (_, outputs) = tf.contrib.training.bucket_by_sequence_length( + max_length, + examples, + batching_scheme["batch_sizes"], + [b + 1 for b in batching_scheme["boundaries"]], + capacity=2, # Number of full batches to store, we don't need many. + bucket_capacities=[2 * b for b in batching_scheme["batch_sizes"]], + dynamic_pad=True, + keep_input=(max_length <= batching_scheme["max_length"])) + return outputs + + +def bucket_boundaries(max_length, min_length=8, mantissa_bits=2): """A default set of length-bucket boundaries.""" x = min_length boundaries = [] @@ -371,7 +324,7 @@ def hparams_to_batching_scheme(hparams, a dictionary """ max_length = hparams.max_length or hparams.batch_size - boundaries = _bucket_boundaries( + boundaries = bucket_boundaries( max_length, mantissa_bits=hparams.batching_mantissa_bits) batch_sizes = [ max(1, hparams.batch_size // length) @@ -379,7 +332,7 @@ def hparams_to_batching_scheme(hparams, ] batch_sizes = [b * shard_multiplier for b in batch_sizes] max_length *= length_multiplier - boundaries = [boundary * length_multiplier + 1 for boundary in boundaries] + boundaries = [boundary * length_multiplier for boundary in boundaries] return { "boundaries": boundaries, "batch_sizes": batch_sizes, @@ -396,7 +349,7 @@ def constant_batching_scheme(constant_batch_size_in_sequences): Returns: a dictionary """ - boundaries = _bucket_boundaries(1024) + boundaries = bucket_boundaries(1024) batch_sizes = [constant_batch_size_in_sequences] * (1 + len(boundaries)) return { "boundaries": boundaries, diff --git a/tensor2tensor/utils/data_reader_test.py b/tensor2tensor/utils/data_reader_test.py index 3d469af20..ea98da06d 100644 --- a/tensor2tensor/utils/data_reader_test.py +++ b/tensor2tensor/utils/data_reader_test.py @@ -50,21 +50,19 @@ def test_generator(): generator_utils.generate_files(test_generator(), filenames) self.assertTrue(tf.gfile.Exists(tmp_file_path + "-train-00000-of-00001")) - train_dataset = data_reader.examples_reader( + examples_train = data_reader.examples_reader( [tmp_file_path + "*"], { "inputs": tf.VarLenFeature(tf.int64), "targets": tf.VarLenFeature(tf.int64) }, training=True) - examples_train = train_dataset.make_one_shot_iterator().get_next() - eval_dataset = data_reader.examples_reader( + examples_eval = data_reader.examples_reader( [tmp_file_path + "*"], { "inputs": tf.VarLenFeature(tf.int64), "targets": tf.VarLenFeature(tf.int64), "floats": tf.VarLenFeature(tf.float32) }, training=False) - examples_eval = eval_dataset.make_one_shot_iterator().get_next() with tf.train.MonitoredSession() as session: # Evaluation data comes in the same order as in the file, check 10. for i in xrange(10): @@ -105,17 +103,15 @@ def test_generator(): generator_utils.generate_files(test_generator(), filenames) self.assertTrue(tf.gfile.Exists(tmp_file_path + "-train-00000-of-00001")) - train_dataset = data_reader.examples_reader([tmp_file_path + "*"], { + examples_train = data_reader.examples_reader([tmp_file_path + "*"], { "inputs": tf.VarLenFeature(tf.int64), "targets": tf.VarLenFeature(tf.int64) }, True) - examples_train = train_dataset.make_one_shot_iterator().get_next() batch_train = data_reader.batch_examples(examples_train, 4) - eval_dataset = data_reader.examples_reader([tmp_file_path + "*"], { + examples_eval = data_reader.examples_reader([tmp_file_path + "*"], { "inputs": tf.VarLenFeature(tf.int64), "targets": tf.VarLenFeature(tf.int64) }, False) - examples_eval = eval_dataset.make_one_shot_iterator().get_next() batch_eval = data_reader.batch_examples(examples_eval, 2) session, coord = tf.Session(), tf.train.Coordinator() with session.as_default(): diff --git a/tensor2tensor/utils/input_fn_builder.py b/tensor2tensor/utils/input_fn_builder.py index c31ba0f31..1be485d56 100644 --- a/tensor2tensor/utils/input_fn_builder.py +++ b/tensor2tensor/utils/input_fn_builder.py @@ -92,9 +92,11 @@ def input_fn(): with tf.device("/cpu:0"): # Input reading on CPU capacity = ( p_hparams.max_expected_batch_size_per_shard * num_datashards) - feature_map = data_reader.input_pipeline( + examples = data_reader.input_pipeline( problem_instance, data_file_patterns and data_file_patterns[n], - capacity, mode, hparams, + capacity, mode, hparams) + feature_map = data_reader.batch_examples( + examples, data_reader.hparams_to_batching_scheme( hparams, shard_multiplier=num_datashards, From 307547781c1bd533a917ca45129622bdaebcadce Mon Sep 17 00:00:00 2001 From: T2T Team <no-reply@google.com> Date: Mon, 21 Aug 2017 13:41:46 -0700 Subject: [PATCH 0297/4095] Add store_to_file option to Text2TextEncoder and minor documentation addition. PiperOrigin-RevId: 165969003 --- tensor2tensor/data_generators/all_problems.py | 1 + tensor2tensor/data_generators/problem.py | 5 +++- tensor2tensor/data_generators/text_encoder.py | 28 ++++++++++++------- tensor2tensor/utils/registry.py | 26 +++++++++++++++-- 4 files changed, 47 insertions(+), 13 deletions(-) diff --git a/tensor2tensor/data_generators/all_problems.py b/tensor2tensor/data_generators/all_problems.py index 0078eb3f9..ec3a9d0af 100644 --- a/tensor2tensor/data_generators/all_problems.py +++ b/tensor2tensor/data_generators/all_problems.py @@ -43,3 +43,4 @@ pass # pylint: enable=g-import-not-at-top # pylint: enable=unused-import + diff --git a/tensor2tensor/data_generators/problem.py b/tensor2tensor/data_generators/problem.py index 60b1e842b..34127d288 100644 --- a/tensor2tensor/data_generators/problem.py +++ b/tensor2tensor/data_generators/problem.py @@ -116,7 +116,10 @@ class Problem(object): * generate_data(data_dir, tmp_dir) - Generate training and dev datasets into data_dir. - Additonal files, e.g. vocabulary files, should also be written to - data_dir. + data_dir. Vocab files are newline-separated files with each line + containing a token. The standard convention for the filename is to + set it to be + ${Problem.vocab_name}.${Problem.targeted_vocab_size} - Downloads and other files can be written to tmp_dir - If you have a training and dev generator, you can generate the training and dev datasets with diff --git a/tensor2tensor/data_generators/text_encoder.py b/tensor2tensor/data_generators/text_encoder.py index b628a538f..ec43efe22 100644 --- a/tensor2tensor/data_generators/text_encoder.py +++ b/tensor2tensor/data_generators/text_encoder.py @@ -206,19 +206,27 @@ def token_gen(): def _init_vocab(self, token_generator): """Initialize vocabulary with tokens from token_generator.""" - self._id_to_token = {} + # Initialize with reserved tokens + self._id_to_token = dict(enumerate(RESERVED_TOKENS)) + self._id_to_token.update( + enumerate(token_generator, start=len(RESERVED_TOKENS))) - # Add reserved tokens - self._id_to_token.update(dict(list(enumerate(RESERVED_TOKENS)))) + # _token_to_id is the reverse of _id_to_token + self._token_to_id = dict((v, k) + for k, v in six.iteritems(self._id_to_token)) + + def store_to_file(self, filename): + """Write vocab file to disk. - token_id = len(RESERVED_TOKENS) - for token in token_generator: - self._id_to_token[token_id] = token - token_id += 1 + Vocab files have one token per line. The file ends in a newline. Reserved + tokens are written to the vocab file as well. - # _token_to_id is the reverse of _id_to_token - self._token_to_id = dict([(v, k) - for k, v in six.iteritems(self._id_to_token)]) + Args: + filename: full path of the file to store the vocab to. + """ + with tf.gfile.Open(filename, "w") as f: + for i in xrange(len(self._id_to_token)): + f.write(self._id_to_token[i] + "\n") def _escape_token(token, alphabet): diff --git a/tensor2tensor/utils/registry.py b/tensor2tensor/utils/registry.py index f5d83cbf1..f1db2f36c 100644 --- a/tensor2tensor/utils/registry.py +++ b/tensor2tensor/utils/registry.py @@ -90,8 +90,30 @@ def _reset(): ctr.clear() -def _default_name(obj): - return _convert_camel_to_snake(obj.__name__) +def _default_name(obj_class): + """Convert a class name to the registry's default name for the class. + + Args: + obj_class: the name of a class + + Returns: + The registry's default name for the class. + """ + + return _convert_camel_to_snake(obj_class.__name__) + + +def default_object_name(obj): + """Convert an object to the registry's default name for the object class. + + Args: + obj: an object instance + + Returns: + The registry's default name for the class of the object. + """ + + return _default_name(obj.__class__) def register_model(name=None): From 4fc251b8da7c11dce0fe4b6115e4db16fc183553 Mon Sep 17 00:00:00 2001 From: T2T Team <no-reply@google.com> Date: Mon, 21 Aug 2017 16:45:02 -0700 Subject: [PATCH 0298/4095] Fix reshape bug when an expert receive a batch of size zero PiperOrigin-RevId: 165994020 --- tensor2tensor/layers/common_attention.py | 62 ++++++++++++++---------- 1 file changed, 37 insertions(+), 25 deletions(-) diff --git a/tensor2tensor/layers/common_attention.py b/tensor2tensor/layers/common_attention.py index cfd6b37db..55b2be090 100644 --- a/tensor2tensor/layers/common_attention.py +++ b/tensor2tensor/layers/common_attention.py @@ -998,31 +998,43 @@ def self_attention_expert(x, batch_coordinate, mask_right=True): depth = x.get_shape().as_list()[-1] length = tf.shape(batch_coordinate)[0] - with tf.name_scope("expert_mask"): - batch_coordinate = tf.squeeze(batch_coordinate, 1) - # Convert to float first because of b/25387198 - batch_coordinate = tf.to_float(batch_coordinate) - bc_v = tf.expand_dims(batch_coordinate, 1) - bc_h = tf.expand_dims(batch_coordinate, 0) - bias = bc_v - bc_h # Broadcast to create [length, length] mask - bias = tf.minimum(1.0, tf.abs(bias)) # Theshold non zeros to 1.0 - bias *= -1e9 # Set non zeros to -infinity - - if mask_right: - bias += tf.reshape( - attention_bias_lower_triangle(length), [length, length]) - # bias has shape [length, length] - bias = tf.reshape(bias, [1, 1, length, length]) - x = tf.reshape(x, [1, length, depth]) - out = multihead_attention(x, - None, - bias, - total_key_depth=depth, - total_value_depth=depth, - output_depth=depth, - num_heads=1, - dropout_rate=0.0) - out = tf.squeeze(out, 0) + def length_not_null(x, batch_coordinate): + """Branch of the graph only evaluated when length isn't null.""" + with tf.name_scope("expert_mask"): + batch_coordinate = tf.squeeze(batch_coordinate, 1) + # Convert to float first because of b/25387198 + batch_coordinate = tf.to_float(batch_coordinate) + bc_v = tf.expand_dims(batch_coordinate, 1) + bc_h = tf.expand_dims(batch_coordinate, 0) + bias = bc_v - bc_h # Broadcast to create [length, length] mask + bias = tf.minimum(1.0, tf.abs(bias)) # Theshold non zeros to 1.0 + bias *= -1e9 # Set non zeros to -infinity + + if mask_right: + bias += tf.reshape( + attention_bias_lower_triangle(length), [length, length]) + # bias has shape [length, length] + bias = tf.reshape(bias, [1, 1, length, length]) + x = tf.reshape(x, [1, length, depth]) + out = multihead_attention(x, + None, + bias, + total_key_depth=depth, + total_value_depth=depth, + output_depth=depth, + num_heads=1, + dropout_rate=0.0) + out = tf.squeeze(out, 0) + + return out + + # If the length is empty, just forward an empty tensor (avoid having to + # evaluate multihead_attention with tensor having dim equal to zeros) + out = tf.cond( + tf.equal(length, 0), + lambda: tf.zeros(shape=[0, depth], dtype=tf.float32, name="empty_out"), + lambda: length_not_null(x, batch_coordinate), + ) return out # functools.partial(self_attention_expert, mask_right=, depth=) From 0f33a8da95a3a8e705fbca34d36be3cb2708c01f Mon Sep 17 00:00:00 2001 From: Alexander Ku <alexku@google.com> Date: Tue, 22 Aug 2017 09:50:24 -0700 Subject: [PATCH 0299/4095] Parameterize the number of encoder and decoder layers for the Transformer. PiperOrigin-RevId: 166073018 --- tensor2tensor/models/transformer.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tensor2tensor/models/transformer.py b/tensor2tensor/models/transformer.py index 0eed2dbdb..47db28c30 100644 --- a/tensor2tensor/models/transformer.py +++ b/tensor2tensor/models/transformer.py @@ -190,7 +190,7 @@ def transformer_encoder(encoder_input, """ x = encoder_input with tf.variable_scope(name): - for layer in xrange(hparams.num_hidden_layers): + for layer in xrange(hparams.num_encoder_layers): with tf.variable_scope("layer_%d" % layer): with tf.variable_scope("self_attention"): y = common_attention.multihead_attention( @@ -233,7 +233,7 @@ def transformer_decoder(decoder_input, """ x = decoder_input with tf.variable_scope(name): - for layer in xrange(hparams.num_hidden_layers): + for layer in xrange(hparams.num_decoder_layers): with tf.variable_scope("layer_%d" % layer): with tf.variable_scope("self_attention"): y = common_attention.multihead_attention( @@ -324,6 +324,9 @@ def transformer_base(): hparams.shared_embedding_and_softmax_weights = int(True) hparams.add_hparam("filter_size", 2048) # Add new ones like this. + # layer-related flags + hparams.add_hparam("num_encoder_layers", hparams.num_hidden_layers) + hparams.add_hparam("num_decoder_layers", hparams.num_hidden_layers) # attention-related flags hparams.add_hparam("num_heads", 8) hparams.add_hparam("attention_key_channels", 0) From dd66a031e122be5180d15539683e61f339d5cd5e Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Tue, 22 Aug 2017 16:04:02 -0700 Subject: [PATCH 0300/4095] Port to Dataset (again) with tests PiperOrigin-RevId: 166128391 --- setup.py | 4 +- tensor2tensor/data_generators/problem.py | 23 +- tensor2tensor/utils/data_reader.py | 247 +++++++++++------ tensor2tensor/utils/data_reader_test.py | 312 ++++++++++++++-------- tensor2tensor/utils/input_fn_builder.py | 6 +- tensor2tensor/utils/trainer_utils_test.py | 9 +- 6 files changed, 394 insertions(+), 207 deletions(-) diff --git a/setup.py b/setup.py index dd80dfd48..088f9b14c 100644 --- a/setup.py +++ b/setup.py @@ -26,8 +26,8 @@ 'six', ], extras_require={ - 'tensorflow': ['tensorflow>=1.2.0rc1'], - 'tensorflow_gpu': ['tensorflow-gpu>=1.2.0rc1'], + 'tensorflow': ['tensorflow>=1.3.0'], + 'tensorflow_gpu': ['tensorflow-gpu>=1.3.0'], }, tests_require=['nose'], test_suite='nose.collector', diff --git a/tensor2tensor/data_generators/problem.py b/tensor2tensor/data_generators/problem.py index 34127d288..e4424e73e 100644 --- a/tensor2tensor/data_generators/problem.py +++ b/tensor2tensor/data_generators/problem.py @@ -203,22 +203,22 @@ def training_filepaths(self, data_dir, num_shards, shuffled): file_basename = self.dataset_filename() if not shuffled: file_basename += generator_utils.UNSHUFFLED_SUFFIX - return generator_utils.train_data_filenames( - file_basename, data_dir, num_shards) + return generator_utils.train_data_filenames(file_basename, data_dir, + num_shards) def dev_filepaths(self, data_dir, num_shards, shuffled): file_basename = self.dataset_filename() if not shuffled: file_basename += generator_utils.UNSHUFFLED_SUFFIX - return generator_utils.dev_data_filenames( - file_basename, data_dir, num_shards) + return generator_utils.dev_data_filenames(file_basename, data_dir, + num_shards) def test_filepaths(self, data_dir, num_shards, shuffled): file_basename = self.dataset_filename() if not shuffled: file_basename += generator_utils.UNSHUFFLED_SUFFIX - return generator_utils.test_data_filenames( - file_basename, data_dir, num_shards) + return generator_utils.test_data_filenames(file_basename, data_dir, + num_shards) def __init__(self, was_reversed=False, was_copy=False): """Create a Problem. @@ -415,10 +415,8 @@ def generate_data(self, data_dir, tmp_dir, task_id=-1): generator_utils.shuffle_dataset(all_paths) else: generator_utils.generate_dataset_and_shuffle( - self.generator(data_dir, tmp_dir, True), - self.training_filepaths(data_dir, self.num_shards, shuffled=False), - self.generator(data_dir, tmp_dir, False), - self.dev_filepaths(data_dir, self.num_dev_shards, shuffled=False)) + self.generator(data_dir, tmp_dir, True), train_paths, + self.generator(data_dir, tmp_dir, False), dev_paths) def feature_encoders(self, data_dir): if self.is_character_level: @@ -438,8 +436,9 @@ def hparams(self, defaults, unused_model_hparams): if self.has_inputs: source_vocab_size = self._encoders["inputs"].vocab_size - p.input_modality = {"inputs": (registry.Modalities.SYMBOL, - source_vocab_size)} + p.input_modality = { + "inputs": (registry.Modalities.SYMBOL, source_vocab_size) + } target_vocab_size = self._encoders["targets"].vocab_size p.target_modality = (registry.Modalities.SYMBOL, target_vocab_size) if self.has_inputs: diff --git a/tensor2tensor/utils/data_reader.py b/tensor2tensor/utils/data_reader.py index 667552d83..199de7a79 100644 --- a/tensor2tensor/utils/data_reader.py +++ b/tensor2tensor/utils/data_reader.py @@ -20,9 +20,12 @@ import math import os +import random # Dependency imports +import numpy as np + import six from six.moves import zip # pylint: disable=redefined-builtin @@ -88,8 +91,7 @@ def examples_reader(data_sources, by default (if this is None), we decode all items. Returns: - A dictionary mapping each data_field to a corresponding 1D int64 tensor - read from the created Dataset. + A tf.contrib.data.Dataset of dict<feature name, Tensor> """ def decode_record(record): @@ -113,18 +115,17 @@ def decode_record(record): return dict(zip(decode_items, decoded)) with tf.name_scope("examples_in"): - # Read serialized examples using slim parallel_reader. data_files = tf.contrib.slim.parallel_reader.get_data_files(data_sources) - num_readers = min(4 if training else 1, len(data_files)) - _, example_serialized = tf.contrib.slim.parallel_reader.parallel_read( - data_sources, - tf.TFRecordReader, - num_epochs=None if training else 1, - shuffle=training, - capacity=2 * capacity, - min_after_dequeue=capacity, - num_readers=num_readers) - return decode_record(example_serialized) + if training: + random.shuffle(data_files) + dataset = tf.contrib.data.TFRecordDataset(data_files) + num_threads = min(4 if training else 1, len(data_files)) + dataset = dataset.map(decode_record, num_threads=num_threads) + if training: + dataset = dataset.shuffle(capacity) + # Loop inifinitely if training, just once otherwise + dataset = dataset.repeat(None if training else 1) + return dataset def preprocessing(examples, data_file_pattern): @@ -212,8 +213,11 @@ def default_example_reading_spec(data_file_pattern): return data_fields, data_items_to_decoders -def input_pipeline(problem, data_file_pattern, capacity, mode, hparams): - """Input pipeline, returns a dictionary of tensors from queues.""" +def read_examples(problem, + data_file_pattern, + capacity, + mode=tf.contrib.learn.ModeKeys.TRAIN): + """Create Dataset of Example for problem and data_file_pattern.""" if problem is None: data_fields, data_items_to_decoders = default_example_reading_spec( data_file_pattern) @@ -224,73 +228,131 @@ def input_pipeline(problem, data_file_pattern, capacity, mode, hparams): # Create placeholders for input, rather than reading data from disk. return feature_placeholders(data_fields) - examples = examples_reader( + is_training = mode == tf.contrib.learn.ModeKeys.TRAIN + dataset = examples_reader( [data_file_pattern], data_fields, - training=(mode == tf.contrib.learn.ModeKeys.TRAIN), + training=is_training, capacity=capacity, data_items_to_decoders=data_items_to_decoders) + return dataset + + +def input_pipeline(problem, data_file_pattern, capacity, mode, hparams, + batching_scheme): + """Input pipeline, returns a dictionary of batched and padded tensors. + + Args: + problem: Problem instance for which to build the input pipeline. + data_file_pattern: file pattern for input files. + capacity: int, data pipeline buffer capacity. + mode: tf.contrib.learn.ModeKeys entry. + hparams: an HParams object. + batching_scheme: a dictionary containing + "boundaries": a list of integers for the boundaries that will be + used for bucketing; see bucket_by_sequence_length for more details. + "batch_sizes": a list of batch sizes corresponding to the buckets + "max_length": an integer. We drop sequences which are longer. + + Returns: + dict <feature name, batched and padded Tensor> + """ + is_training = mode == tf.contrib.learn.ModeKeys.TRAIN + num_threads = 4 if is_training else 1 + with tf.name_scope("input_pipeline"): + dataset = read_examples(problem, data_file_pattern, capacity, mode=mode) + dataset = dataset.map( + lambda ex: _preprocess(ex, problem, data_file_pattern, hparams, mode), + num_threads=num_threads) + dataset = dataset.filter( + lambda ex: _example_too_big(ex, batching_scheme["max_length"])) + + dataset = bucket_by_sequence_length(dataset, _example_length, + batching_scheme["boundaries"], + batching_scheme["batch_sizes"]) + + batched_examples = dataset.make_one_shot_iterator().get_next() + return batched_examples + + +def _preprocess(example, problem, data_file_pattern, hparams, mode): + """Preprocessing for example.""" if problem is None: - examples = preprocess_examples_common(examples, hparams) - examples = preprocessing(examples, data_file_pattern) + example = preprocess_examples_common(example, hparams) + example = preprocessing(example, data_file_pattern) else: - examples = problem.preprocess_examples(examples, mode, hparams) + example = problem.preprocess_examples(example, mode, hparams) # We do not want int64s as they are not supported on GPUs. - examples = cast_int64_to_int32(examples) + example = cast_int64_to_int32(example) + + return example - return examples +def _example_length(example): + length = 0 + # Length of the example is the maximum length of the feature lengths + for v in example.values(): + # For images the sequence length is the size of the spatial dimensions. + feature_length = (tf.shape(v)[0] if len(v.get_shape()) < 3 else + tf.shape(v)[0] * tf.shape(v)[1]) + length = tf.maximum(length, feature_length) + return length -def batch_examples(examples, batching_scheme): - """Given a queue of examples, create batches of examples with similar lengths. - We assume that examples is a dictionary with string keys and tensor values, - possibly coming from a queue, e.g., constructed by examples_reader above. - Each tensor in examples is assumed to be 1D. We will put tensors of similar - length into batches togeter. We return a dictionary with the same keys as - examples, and with values being batches of size batch_size. If elements have - different lengths, they are padded with 0s. This function is based on - tf.contrib.training.bucket_by_sequence_length so see there for details. +def _example_too_big(example, max_length): + return tf.less_equal(_example_length(example), max_length) - For example, if examples is a queue containing [1, 2, 3] and [4], then - this function with batch_size=2 will return a batch [[1, 2, 3], [4, 0, 0]]. + +def bucket_by_sequence_length(dataset, example_length_fn, bucket_boundaries, + bucket_batch_sizes): + """Bucket entries in dataset by length. Args: - examples: a dictionary with string keys and 1D tensor values. - batching_scheme: a dictionary containing - "boundaries": a list of integers for the boundaries that will be - used for bucketing; see tf.contrib.training.bucket_by_sequence_length - for more details. - "batch_sizes": a list of batch sizes corresponding to the buckets - "max_length": an integer. We drop sequences which are longer. + dataset: Dataset of dict<feature name, Tensor>. + example_length_fn: function from example to int, determines the length of + the example, which will determine the bucket it goes into. + bucket_boundaries: list<int>, boundaries of the buckets. + bucket_batch_sizes: list<int>, batch size per bucket. Returns: - A dictionary with the same keys as examples and with values being batches - of examples padded with 0s, i.e., [batch_size x length] tensors. + Dataset of padded and batched examples. """ - with tf.name_scope("batch_examples"): - # The queue to bucket on will be chosen based on maximum length. - max_length = 0 - for v in examples.values(): - # For images the sequence length is the size of the spatial dimensions. - sequence_length = (tf.shape(v)[0] if len(v.get_shape()) < 3 else - tf.shape(v)[0] * tf.shape(v)[1]) - max_length = tf.maximum(max_length, sequence_length) - (_, outputs) = tf.contrib.training.bucket_by_sequence_length( - max_length, - examples, - batching_scheme["batch_sizes"], - [b + 1 for b in batching_scheme["boundaries"]], - capacity=2, # Number of full batches to store, we don't need many. - bucket_capacities=[2 * b for b in batching_scheme["batch_sizes"]], - dynamic_pad=True, - keep_input=(max_length <= batching_scheme["max_length"])) - return outputs - - -def bucket_boundaries(max_length, min_length=8, mantissa_bits=2): + with tf.name_scope("bucket_by_seq_length"): + + def example_to_bucket_id(example): + """Return int64 id of the length bucket for this example.""" + seq_length = example_length_fn(example) + + boundaries = list(bucket_boundaries) + buckets_min = [np.iinfo(np.int32).min] + boundaries + buckets_max = boundaries + [np.iinfo(np.int32).max] + conditions_c = tf.logical_and( + tf.less_equal(buckets_min, seq_length), + tf.less(seq_length, buckets_max)) + bucket_id = tf.reduce_min(tf.where(conditions_c)) + + return bucket_id + + def batching_fn(bucket_id, grouped_dataset): + batch_sizes = tf.constant(bucket_batch_sizes, dtype=tf.int64) + batch_size = batch_sizes[bucket_id] + + # Pad each dimension of each feature so that they match. + padded_shapes = dict( + [(name, [None] * len(shape)) + for name, shape in grouped_dataset.output_shapes.items()]) + return grouped_dataset.padded_batch(batch_size, padded_shapes) + + window_size = max( + max(bucket_batch_sizes) * 10, len(bucket_batch_sizes) * 32) + dataset = dataset.group_by_window(example_to_bucket_id, batching_fn, + window_size) + return dataset + + +def _bucket_boundaries(max_length, min_length=8, mantissa_bits=2): """A default set of length-bucket boundaries.""" x = min_length boundaries = [] @@ -300,39 +362,46 @@ def bucket_boundaries(max_length, min_length=8, mantissa_bits=2): return boundaries -def hparams_to_batching_scheme(hparams, - drop_long_sequences=False, - shard_multiplier=1, - length_multiplier=1): +def _batching_scheme(batch_size=16 * 256, + max_length=None, + batching_mantissa_bits=1, + drop_long_sequences=False, + shard_multiplier=1, + length_multiplier=1): """A batching scheme based on model hyperparameters. Every batch containins a number of sequences divisible by `shard_multiplier`. - If `drop_long_sequences` is True, then sequences longer than - `hparams.batch_size` are dropped. This prevents generating batches with - more than the usual number of tokens, which can cause out-of-memory errors. - Args: - hparams: a hyperparameters. - drop_long_sequences: a boolean. + batch_size: int, total number of tokens in a batch. + max_length: int, sequences longer than this will be skipped. Defaults to + batch_size. + batching_mantissa_bits: int, ??. + drop_long_sequences: bool, if True, then sequences longer than + `max_length` are dropped. This prevents generating batches with + more than the usual number of tokens, which can cause out-of-memory + errors. shard_multiplier: an integer increasing the batch_size to suit splitting across datashards. length_multiplier: an integer multiplier that is used to increase the batch sizes and sequence length tolerance. Returns: - a dictionary + A dictionary with parameters that can be passed to input_pipeline: + * boundaries: list of bucket boundaries + * batch_sizes: list of batch sizes for each length bucket + * max_length: int, maximum length of an example """ - max_length = hparams.max_length or hparams.batch_size - boundaries = bucket_boundaries( - max_length, mantissa_bits=hparams.batching_mantissa_bits) + max_length = max_length or batch_size + boundaries = _bucket_boundaries( + max_length, mantissa_bits=batching_mantissa_bits) + boundaries = [boundary * length_multiplier for boundary in boundaries] + max_length *= length_multiplier + batch_sizes = [ - max(1, hparams.batch_size // length) + max(1, batch_size // length) * shard_multiplier for length in boundaries + [max_length] ] - batch_sizes = [b * shard_multiplier for b in batch_sizes] - max_length *= length_multiplier - boundaries = [boundary * length_multiplier for boundary in boundaries] return { "boundaries": boundaries, "batch_sizes": batch_sizes, @@ -340,6 +409,20 @@ def hparams_to_batching_scheme(hparams, } +def hparams_to_batching_scheme(hparams, + drop_long_sequences=False, + shard_multiplier=1, + length_multiplier=1): + """Wrapper around _batching_scheme with hparams.""" + return _batching_scheme( + max_length=hparams.max_length, + batch_size=hparams.batch_size, + batching_mantissa_bits=hparams.batching_mantissa_bits, + drop_long_sequences=drop_long_sequences, + shard_multiplier=shard_multiplier, + length_multiplier=length_multiplier) + + def constant_batching_scheme(constant_batch_size_in_sequences): """A batching scheme with constant batch size. @@ -349,7 +432,7 @@ def constant_batching_scheme(constant_batch_size_in_sequences): Returns: a dictionary """ - boundaries = bucket_boundaries(1024) + boundaries = _bucket_boundaries(1024) batch_sizes = [constant_batch_size_in_sequences] * (1 + len(boundaries)) return { "boundaries": boundaries, diff --git a/tensor2tensor/utils/data_reader_test.py b/tensor2tensor/utils/data_reader_test.py index ea98da06d..318fb1cab 100644 --- a/tensor2tensor/utils/data_reader_test.py +++ b/tensor2tensor/utils/data_reader_test.py @@ -28,123 +28,223 @@ from six.moves import xrange # pylint: disable=redefined-builtin from tensor2tensor.data_generators import generator_utils +from tensor2tensor.data_generators import problem as problem_mod from tensor2tensor.utils import data_reader +from tensor2tensor.utils import registry import tensorflow as tf -class DataReaderTest(tf.test.TestCase): +@registry.register_problem +class TestProblem(problem_mod.Problem): - def testExamplesQueue(self): - tf.set_random_seed(1) - tmp_dir = self.get_temp_dir() - (_, tmp_file_path) = tempfile.mkstemp(dir=tmp_dir) - tmp_file_name = os.path.basename(tmp_file_path) - - # Generate a file with 100 examples. - def test_generator(): - for i in xrange(100): - yield {"inputs": [i], "targets": [i], "floats": [i + 0.5]} - - filenames = generator_utils.train_data_filenames(tmp_file_name, tmp_dir, 1) - generator_utils.generate_files(test_generator(), filenames) - self.assertTrue(tf.gfile.Exists(tmp_file_path + "-train-00000-of-00001")) - - examples_train = data_reader.examples_reader( - [tmp_file_path + "*"], { - "inputs": tf.VarLenFeature(tf.int64), - "targets": tf.VarLenFeature(tf.int64) - }, - training=True) - examples_eval = data_reader.examples_reader( - [tmp_file_path + "*"], { - "inputs": tf.VarLenFeature(tf.int64), - "targets": tf.VarLenFeature(tf.int64), - "floats": tf.VarLenFeature(tf.float32) - }, - training=False) - with tf.train.MonitoredSession() as session: - # Evaluation data comes in the same order as in the file, check 10. - for i in xrange(10): - examples = session.run(examples_eval) - self.assertEqual(len(examples["inputs"]), 1) - self.assertEqual(len(examples["targets"]), 1) - self.assertEqual(examples["inputs"][0], i) - self.assertEqual(examples["targets"][0], i) - self.assertEqual(examples["floats"][0], i + 0.5) - # Training data is shuffled. - is_shuffled = False - for i in xrange(10): - examples = session.run(examples_train) - self.assertEqual(len(examples["inputs"]), 1) - self.assertEqual(len(examples["targets"]), 1) - self.assertEqual(examples["inputs"][0], examples["targets"][0]) - if examples["inputs"][0] != i: - is_shuffled = True - self.assertTrue(is_shuffled) - - # Clean up. - os.remove(tmp_file_path + "-train-00000-of-00001") - os.remove(tmp_file_path) - - # TODO(rsepassi): fix and reenable test - def _testBatchExamples(self): - tf.set_random_seed(1) - tmp_dir = self.get_temp_dir() - (_, tmp_file_path) = tempfile.mkstemp(dir=tmp_dir) - tmp_file_name = os.path.basename(tmp_file_path) + def generator(self, data_dir, tmp_dir, is_training): + for i in xrange(30): + yield {"inputs": [i] * (i + 1), "targets": [i], "floats": [i + 0.5]} - # Generate a file with 100 examples, n-th example of length n + 1. - def test_generator(): - for i in xrange(100): - yield {"inputs": [i + 1 for _ in xrange(i + 1)], "targets": [i + 1]} + def generate_data(self, data_dir, tmp_dir, task_id=-1): + train_paths = self.training_filepaths(data_dir, 1, shuffled=True) + dev_paths = self.dev_filepaths(data_dir, 1, shuffled=True) + generator_utils.generate_files( + self.generator(data_dir, tmp_dir, True), train_paths) + generator_utils.generate_files( + self.generator(data_dir, tmp_dir, False), dev_paths) - filenames = generator_utils.train_data_filenames(tmp_file_name, tmp_dir, 1) - generator_utils.generate_files(test_generator(), filenames) - self.assertTrue(tf.gfile.Exists(tmp_file_path + "-train-00000-of-00001")) + def hparams(self, defaults, model_hparams): + pass - examples_train = data_reader.examples_reader([tmp_file_path + "*"], { + def example_reading_spec(self): + data_fields = { "inputs": tf.VarLenFeature(tf.int64), - "targets": tf.VarLenFeature(tf.int64) - }, True) - batch_train = data_reader.batch_examples(examples_train, 4) - examples_eval = data_reader.examples_reader([tmp_file_path + "*"], { - "inputs": tf.VarLenFeature(tf.int64), - "targets": tf.VarLenFeature(tf.int64) - }, False) - batch_eval = data_reader.batch_examples(examples_eval, 2) - session, coord = tf.Session(), tf.train.Coordinator() - with session.as_default(): - tf.train.start_queue_runners(coord=coord) - - # Evaluation data comes in the same order as in the file. - # The first batch will be inputs=[[1, 0], [2, 2]], targets=[[1], [2]]. - examples = session.run(batch_eval) - self.assertAllClose(examples["inputs"], np.array([[1, 0], [2, 2]])) - self.assertAllClose(examples["targets"], np.array([[1], [2]])) - # Check the second batch too. - examples = session.run(batch_eval) - self.assertAllClose(examples["inputs"], - np.array([[3, 3, 3, 0], [4, 4, 4, 4]])) - self.assertAllClose(examples["targets"], np.array([[3], [4]])) - - # Training data is shuffled but shouldn't have too many pads. + "targets": tf.VarLenFeature(tf.int64), + "floats": tf.VarLenFeature(tf.float32), + } + data_items_to_decoders = None + return (data_fields, data_items_to_decoders) + + def preprocess_examples(self, examples, unused_mode, unused_hparams): + examples["new_field"] = tf.constant([42.42]) + return examples + + +def generate_test_data(problem, tmp_dir): + problem.generate_data(tmp_dir, tmp_dir) + filepatterns = data_reader.get_data_filepatterns( + problem.name, tmp_dir, tf.contrib.learn.ModeKeys.TRAIN) + assert tf.gfile.Glob(filepatterns[0]) + return filepatterns + + +class DataReaderTest(tf.test.TestCase): + + @classmethod + def setUpClass(cls): + tf.set_random_seed(1) + cls.problem = registry.problem("test_problem") + cls.filepatterns = generate_test_data(cls.problem, tempfile.gettempdir()) + + @classmethod + def tearDownClass(cls): + # Clean up files + for fp in cls.filepatterns: + files = tf.gfile.Glob(fp) + for f in files: + os.remove(f) + + def testBasicExampleReading(self): + dataset = data_reader.read_examples(self.problem, self.filepatterns[0], 32) + examples = dataset.make_one_shot_iterator().get_next() + with tf.train.MonitoredSession() as sess: + # Check that there are multiple examples that have the right fields of the + # right type (lists of int/float). for _ in xrange(10): - examples = session.run(batch_train) - inputs = examples["inputs"] - # Only 3 out of 4 examples in a batch have padding zeros at all. - pad_per_example = (inputs.size - np.count_nonzero(inputs)) // 3 - # Default bucketing is in steps of 8 until 64 and 32 later. - if int(max(examples["targets"])) < 64: - self.assertLess(pad_per_example, 8) - else: - self.assertLess(pad_per_example, 32) - - # Clean up. - coord.request_stop() - coord.join() - os.remove(tmp_file_path + "-train-00000-of-00001") - os.remove(tmp_file_path) + ex_val = sess.run(examples) + inputs, targets, floats = (ex_val["inputs"], ex_val["targets"], + ex_val["floats"]) + self.assertEqual(np.int64, inputs.dtype) + self.assertEqual(np.int64, targets.dtype) + self.assertEqual(np.float32, floats.dtype) + for field in [inputs, targets, floats]: + self.assertGreater(len(field), 0) + + def testTrainEvalBehavior(self): + train_dataset = data_reader.read_examples(self.problem, + self.filepatterns[0], 16) + train_examples = train_dataset.make_one_shot_iterator().get_next() + eval_dataset = data_reader.read_examples( + self.problem, + self.filepatterns[0], + 16, + mode=tf.contrib.learn.ModeKeys.EVAL) + eval_examples = eval_dataset.make_one_shot_iterator().get_next() + + eval_idxs = [] + with tf.train.MonitoredSession() as sess: + # Train should be shuffled and run through infinitely + for i in xrange(30): + self.assertNotEqual(i, sess.run(train_examples)["inputs"][0]) + + # Eval should not be shuffled and only run through once + for i in xrange(30): + self.assertEqual(i, sess.run(eval_examples)["inputs"][0]) + eval_idxs.append(i) + + with self.assertRaises(tf.errors.OutOfRangeError): + sess.run(eval_examples) + # Should never run because above line should error + eval_idxs.append(30) + + # Ensuring that the above exception handler actually ran and we didn't + # exit the MonitoredSession context. + eval_idxs.append(-1) + + self.assertAllEqual(list(range(30)) + [-1], eval_idxs) + + def testPreprocess(self): + dataset = data_reader.read_examples(self.problem, self.filepatterns[0], 32) + examples = dataset.make_one_shot_iterator().get_next() + examples = data_reader._preprocess(examples, self.problem, None, None, None) + with tf.train.MonitoredSession() as sess: + ex_val = sess.run(examples) + # problem.preprocess_examples has been run + self.assertAllClose([42.42], ex_val["new_field"]) + + # int64 has been cast to int32 + self.assertEqual(np.int32, ex_val["inputs"].dtype) + self.assertEqual(np.int32, ex_val["targets"].dtype) + self.assertEqual(np.float32, ex_val["floats"].dtype) + + def testLengthFilter(self): + max_len = 15 + dataset = data_reader.read_examples(self.problem, self.filepatterns[0], 32) + dataset = dataset.filter( + lambda ex: data_reader._example_too_big(ex, max_len)) + examples = dataset.make_one_shot_iterator().get_next() + with tf.train.MonitoredSession() as sess: + ex_lens = [] + for _ in xrange(max_len): + ex_lens.append(len(sess.run(examples)["inputs"])) + + self.assertAllEqual(list(range(1, max_len + 1)), sorted(ex_lens)) + + def testBatchingSchemeMaxLength(self): + scheme = data_reader._batching_scheme( + batch_size=20, max_length=None, drop_long_sequences=False) + self.assertGreater(scheme["max_length"], 10000) + + scheme = data_reader._batching_scheme( + batch_size=20, max_length=None, drop_long_sequences=True) + self.assertEqual(scheme["max_length"], 20) + + scheme = data_reader._batching_scheme( + batch_size=20, max_length=15, drop_long_sequences=True) + self.assertEqual(scheme["max_length"], 15) + + scheme = data_reader._batching_scheme( + batch_size=20, max_length=15, drop_long_sequences=False) + self.assertGreater(scheme["max_length"], 10000) + + def testBatchingSchemeBuckets(self): + scheme = data_reader._batching_scheme(batch_size=128) + boundaries, batch_sizes = scheme["boundaries"], scheme["batch_sizes"] + self.assertEqual(len(boundaries), len(batch_sizes) - 1) + expected_boundaries = [8, 12, 16, 24, 32, 48, 64, 96] + self.assertEqual(expected_boundaries, boundaries) + expected_batch_sizes = [16, 10, 8, 5, 4, 2, 2, 1, 1] + self.assertEqual(expected_batch_sizes, batch_sizes) + + scheme = data_reader._batching_scheme(batch_size=128, shard_multiplier=2) + boundaries, batch_sizes = scheme["boundaries"], scheme["batch_sizes"] + self.assertAllEqual([bs * 2 for bs in expected_batch_sizes], batch_sizes) + self.assertEqual(expected_boundaries, boundaries) + + scheme = data_reader._batching_scheme(batch_size=128, length_multiplier=2) + boundaries, batch_sizes = scheme["boundaries"], scheme["batch_sizes"] + self.assertAllEqual([b * 2 for b in expected_boundaries], boundaries) + self.assertEqual([max(1, bs // 2) + for bs in expected_batch_sizes], batch_sizes) + + def testBucketBySeqLength(self): + + def example_len(ex): + return tf.shape(ex["inputs"])[0] + + boundaries = [10, 20, 30] + batch_sizes = [10, 8, 4, 2] + + dataset = data_reader.read_examples( + self.problem, + self.filepatterns[0], + 32, + mode=tf.contrib.learn.ModeKeys.EVAL) + dataset = data_reader.bucket_by_sequence_length(dataset, example_len, + boundaries, batch_sizes) + batch = dataset.make_one_shot_iterator().get_next() + + input_vals = [] + obs_batch_sizes = [] + with tf.train.MonitoredSession() as sess: + # Until OutOfRangeError + while True: + batch_val = sess.run(batch) + batch_inputs = batch_val["inputs"] + batch_size, max_len = batch_inputs.shape + obs_batch_sizes.append(batch_size) + for inputs in batch_inputs: + input_val = inputs[0] + input_vals.append(input_val) + # The inputs were constructed such that they were repeated value+1 + # times (i.e. if the inputs value is 7, the example has 7 repeated 8 + # times). + repeat = input_val + 1 + # Check padding + self.assertAllEqual([input_val] * repeat + [0] * (max_len - repeat), + inputs) + + # Check that all inputs came through + self.assertEqual(list(range(30)), sorted(input_vals)) + # Check that we saw variable batch size + self.assertTrue(len(set(obs_batch_sizes)) > 1) if __name__ == "__main__": diff --git a/tensor2tensor/utils/input_fn_builder.py b/tensor2tensor/utils/input_fn_builder.py index 1be485d56..c31ba0f31 100644 --- a/tensor2tensor/utils/input_fn_builder.py +++ b/tensor2tensor/utils/input_fn_builder.py @@ -92,11 +92,9 @@ def input_fn(): with tf.device("/cpu:0"): # Input reading on CPU capacity = ( p_hparams.max_expected_batch_size_per_shard * num_datashards) - examples = data_reader.input_pipeline( + feature_map = data_reader.input_pipeline( problem_instance, data_file_patterns and data_file_patterns[n], - capacity, mode, hparams) - feature_map = data_reader.batch_examples( - examples, + capacity, mode, hparams, data_reader.hparams_to_batching_scheme( hparams, shard_multiplier=num_datashards, diff --git a/tensor2tensor/utils/trainer_utils_test.py b/tensor2tensor/utils/trainer_utils_test.py index 61156f227..568026ad6 100644 --- a/tensor2tensor/utils/trainer_utils_test.py +++ b/tensor2tensor/utils/trainer_utils_test.py @@ -19,6 +19,9 @@ from __future__ import division from __future__ import print_function +import os +import shutil + # Dependency imports from tensor2tensor.data_generators import algorithmic @@ -60,9 +63,13 @@ class TrainerUtilsTest(tf.test.TestCase): @classmethod def setUpClass(cls): + tmp_dir = tf.test.get_temp_dir() + shutil.rmtree(tmp_dir) + os.mkdir(tmp_dir) + # Generate a small test dataset FLAGS.problems = "tiny_algo" - TrainerUtilsTest.data_dir = tf.test.get_temp_dir() + TrainerUtilsTest.data_dir = tmp_dir registry.problem(FLAGS.problems).generate_data(TrainerUtilsTest.data_dir, None) From a237064efb4e773906f98dd9ee2efff313c69104 Mon Sep 17 00:00:00 2001 From: Niki Parmar <nikip@google.com> Date: Tue, 22 Aug 2017 17:41:13 -0700 Subject: [PATCH 0301/4095] change decode_from_dataset to write outside the loop and extra cond for decode_from_file PiperOrigin-RevId: 166140044 --- tensor2tensor/utils/decoding.py | 35 +++++++++++++++++----------- tensor2tensor/utils/trainer_utils.py | 2 +- 2 files changed, 23 insertions(+), 14 deletions(-) diff --git a/tensor2tensor/utils/decoding.py b/tensor2tensor/utils/decoding.py index 4ba8dc71a..2e430a204 100644 --- a/tensor2tensor/utils/decoding.py +++ b/tensor2tensor/utils/decoding.py @@ -74,22 +74,19 @@ def log_fn(inputs, decoded_outputs = " ".join(map(str, outputs.flatten())) decoded_targets = " ".join(map(str, targets.flatten())) else: - decoded_outputs = targets_vocab.decode( - _save_until_eos(outputs.flatten())) - decoded_targets = targets_vocab.decode( - _save_until_eos(targets.flatten())) + decoded_outputs = " ".join(map( + str, targets_vocab.decode(_save_until_eos(outputs.flatten())))) + decoded_targets = " ".join(map( + str, targets_vocab.decode(_save_until_eos(targets.flatten())))) tf.logging.info("Inference results OUTPUT: %s" % decoded_outputs) tf.logging.info("Inference results TARGET: %s" % decoded_targets) - if FLAGS.decode_to_file: - output_filepath = FLAGS.decode_to_file + ".outputs." + problem - output_file = tf.gfile.Open(output_filepath, "a") - output_file.write(decoded_outputs + "\n") - target_filepath = FLAGS.decode_to_file + ".targets." + problem - target_file = tf.gfile.Open(target_filepath, "a") - target_file.write(decoded_targets + "\n") + return decoded_outputs, decoded_targets + result_iter = estimator.predict(input_fn=infer_input_fn, as_iterable=True) count = 0 + agg_outputs = [] + agg_targets = [] for result in result_iter: # predictions from the test input. We use it to log inputs and decodes. inputs = result["inputs"] @@ -99,13 +96,25 @@ def log_fn(inputs, output_beams = np.split(outputs, FLAGS.decode_beam_size, axis=0) for k, beam in enumerate(output_beams): tf.logging.info("BEAM %d:" % k) - log_fn(inputs, targets, beam, problem, count) + o, t = log_fn(inputs, targets, beam, problem, count) + agg_outputs.append(o) + agg_targets.append(t) else: - log_fn(inputs, targets, outputs, problem, count) + o, t = log_fn(inputs, targets, outputs, problem, count) + agg_outputs.append(o) + agg_targets.append(t) count += 1 if FLAGS.decode_num_samples != -1 and count >= FLAGS.decode_num_samples: break + if FLAGS.decode_to_file: + output_filepath = FLAGS.decode_to_file + ".outputs." + problem + output_file = tf.gfile.Open(output_filepath, "w") + target_filepath = FLAGS.decode_to_file + ".targets." + problem + target_file = tf.gfile.Open(target_filepath, "w") + for o, t in zip(agg_outputs, agg_targets): + output_file.write(str(o)+"\n") + target_file.write(str(t)+"\n") tf.logging.info("Completed inference on %d samples." % count) diff --git a/tensor2tensor/utils/trainer_utils.py b/tensor2tensor/utils/trainer_utils.py index 22fd727f9..09375b79a 100644 --- a/tensor2tensor/utils/trainer_utils.py +++ b/tensor2tensor/utils/trainer_utils.py @@ -343,7 +343,7 @@ def get_data_filepatterns(data_dir, mode): def decode(estimator): if FLAGS.decode_interactive: decoding.decode_interactively(estimator) - elif FLAGS.decode_from_file is not None: + elif FLAGS.decode_from_file is not None and FLAGS.decode_from_file is not "": decoding.decode_from_file(estimator, FLAGS.decode_from_file) elif FLAGS.decode_from_dataset: decoding.decode_from_dataset(estimator) From e5e79cd2b629ce0d0134a25b35cee352400ba844 Mon Sep 17 00:00:00 2001 From: Lukasz Kaiser <lukaszkaiser@google.com> Date: Tue, 22 Aug 2017 20:19:05 -0700 Subject: [PATCH 0302/4095] Small corrections for TF 1.3, remove 8k wiki (vocab is 21k anyway), play with VAE. PiperOrigin-RevId: 166151697 --- README.md | 6 +- .../data_generators/algorithmic_test.py | 2 + tensor2tensor/data_generators/lm1b.py | 2 + tensor2tensor/data_generators/wiki.py | 9 - tensor2tensor/layers/common_attention.py | 2 + tensor2tensor/layers/common_layers.py | 4 +- tensor2tensor/layers/rev_block.py | 2 + tensor2tensor/models/transformer_vae.py | 175 ++++++------ tensor2tensor/utils/model_builder.py | 2 +- tensor2tensor/utils/yellowfin.py | 261 +++++++++++++----- 10 files changed, 284 insertions(+), 181 deletions(-) diff --git a/README.md b/README.md index 1f77e7134..236d279c2 100644 --- a/README.md +++ b/README.md @@ -34,7 +34,7 @@ trains an English-German translation model, and lets you use it interactively: pip install tensor2tensor && t2t-trainer \ --generate_data \ --data_dir=~/t2t_data \ - --problems=wmt_ende_tokens_32k \ + --problems=translate_ende_wmt32k \ --model=transformer \ --hparams_set=transformer_base_single_gpu \ --output_dir=~/t2t_train/base \ @@ -72,7 +72,7 @@ pip install tensor2tensor # You can easily swap between them (and add new ones). t2t-trainer --registry_help -PROBLEM=wmt_ende_tokens_32k +PROBLEM=translate_ende_wmt32k MODEL=transformer HPARAMS=transformer_base_single_gpu @@ -277,7 +277,7 @@ registrations. To add a new dataset, subclass [`Problem`](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/data_generators/problem.py) and register it with `@registry.register_problem`. See -[`WMTEnDeTokens8k`](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/data_generators/wmt.py) +[`TranslateEndeWmt8k`](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/data_generators/wmt.py) for an example. Also see the [data generators diff --git a/tensor2tensor/data_generators/algorithmic_test.py b/tensor2tensor/data_generators/algorithmic_test.py index 4ac6d3123..bcd61216e 100644 --- a/tensor2tensor/data_generators/algorithmic_test.py +++ b/tensor2tensor/data_generators/algorithmic_test.py @@ -21,6 +21,8 @@ # Dependency imports +from six.moves import xrange # pylint: disable=redefined-builtin + from tensor2tensor.data_generators import algorithmic import tensorflow as tf diff --git a/tensor2tensor/data_generators/lm1b.py b/tensor2tensor/data_generators/lm1b.py index a436e0e6e..a3771e124 100644 --- a/tensor2tensor/data_generators/lm1b.py +++ b/tensor2tensor/data_generators/lm1b.py @@ -26,6 +26,8 @@ # Dependency imports +from six.moves import xrange # pylint: disable=redefined-builtin + from tensor2tensor.data_generators import generator_utils from tensor2tensor.data_generators import text_encoder from tensor2tensor.data_generators import tokenizer diff --git a/tensor2tensor/data_generators/wiki.py b/tensor2tensor/data_generators/wiki.py index 3cdbac5db..d1c80f2e1 100644 --- a/tensor2tensor/data_generators/wiki.py +++ b/tensor2tensor/data_generators/wiki.py @@ -129,12 +129,3 @@ def generator(self, data_dir, tmp_dir, _): encoded = encoder.encode(page) + [EOS] encoded_title = encoder.encode(title) + [EOS] yield {"inputs": encoded_title, "targets": encoded} - - -@registry.register_problem -class LanguagemodelWikiFull8k(problem.Text2TextProblem): - """A language model on full English Wikipedia.""" - - @property - def targeted_vocab_size(self): - return 2**13 # 8192 diff --git a/tensor2tensor/layers/common_attention.py b/tensor2tensor/layers/common_attention.py index 55b2be090..63bd6f472 100644 --- a/tensor2tensor/layers/common_attention.py +++ b/tensor2tensor/layers/common_attention.py @@ -23,6 +23,8 @@ # Dependency imports +from six.moves import xrange # pylint: disable=redefined-builtin + from tensor2tensor.layers import common_layers from tensor2tensor.utils import expert_utils diff --git a/tensor2tensor/layers/common_layers.py b/tensor2tensor/layers/common_layers.py index 8621ddcb1..5be5a35a7 100644 --- a/tensor2tensor/layers/common_layers.py +++ b/tensor2tensor/layers/common_layers.py @@ -55,13 +55,13 @@ def hard_tanh(x, saturation_limit=0.9): def inverse_exp_decay(max_step, min_value=0.01): """Inverse-decay exponentially from 0.01 to 1.0 reached at max_step.""" inv_base = tf.exp(tf.log(min_value) / float(max_step)) - step = tf.to_float(tf.contrib.framework.get_global_step()) + step = tf.to_float(tf.train.get_global_step()) return inv_base**tf.maximum(float(max_step) - step, 0.0) def inverse_lin_decay(max_step, min_value=0.01): """Inverse-decay linearly from 0.01 to 1.0 reached at max_step.""" - step = tf.to_float(tf.contrib.framework.get_global_step()) + step = tf.to_float(tf.train.get_global_step()) progress = tf.minimum(step / float(max_step), 1.0) return progress * (1.0 - min_value) + min_value diff --git a/tensor2tensor/layers/rev_block.py b/tensor2tensor/layers/rev_block.py index 4dd1cde03..7198a953d 100644 --- a/tensor2tensor/layers/rev_block.py +++ b/tensor2tensor/layers/rev_block.py @@ -28,6 +28,8 @@ # Dependency imports +from six.moves import xrange # pylint: disable=redefined-builtin + import tensorflow as tf from tensorflow.python.framework import dtypes from tensorflow.python.framework import function diff --git a/tensor2tensor/models/transformer_vae.py b/tensor2tensor/models/transformer_vae.py index 6a3f3afdf..819fa8b9d 100644 --- a/tensor2tensor/models/transformer_vae.py +++ b/tensor2tensor/models/transformer_vae.py @@ -26,7 +26,6 @@ from tensor2tensor.layers import common_attention from tensor2tensor.layers import common_layers from tensor2tensor.models import transformer -from tensor2tensor.utils import expert_utils from tensor2tensor.utils import registry from tensor2tensor.utils import t2t_model @@ -85,37 +84,26 @@ def decompress_step(source, c, hparams, first_relu, name): return tf.reshape(thicker, [shape[0], shape[1] * 2, 1, hparams.hidden_size]) -def top_k_softmax(x, k): - """Calculate softmax(x), select top-k and rescale to sum to 1.""" - x = tf.nn.softmax(x) - top_x, _ = tf.nn.top_k(x, k=k+1) - min_top = tf.reduce_min(top_x, axis=-1, keep_dims=True) - x = tf.nn.relu((x - min_top) + 1e-12) - x /= tf.reduce_sum(x, axis=-1, keep_dims=True) - return x, tf.reduce_max(top_x, axis=-1) +def gumbel_sample(shape): + """Sample from the Gumbel distribution, protect from overflows.""" + uniform_samples = tf.random_uniform(shape, minval=0.00001, maxval=0.99998) + return -tf.log(-tf.log(uniform_samples)) -def top_k_experts(x, k, hparams): - x_shape = tf.shape(x) - x_flat = tf.reshape(x, [-1, x.get_shape().as_list()[-1]]) - is_training = hparams.mode == tf.contrib.learn.ModeKeys.TRAIN - gates, load = expert_utils.noisy_top_k_gating( - x_flat, hparams.v_size, is_training, k) - gates_shape = [x_shape[0], x_shape[1], x_shape[2], hparams.v_size] - gates = tf.reshape(gates, gates_shape) - load_loss = expert_utils.cv_squared(load) - return gates, load_loss - - -def dvae(x, k, hparams, name): +def dvae(x, hparams, name): with tf.variable_scope(name): m = tf.layers.dense(x, hparams.v_size, name="mask") - if k is None: - m = tf.nn.softmax(m) - kl = - tf.reduce_max(m, axis=-1) - else: - m, kl = top_k_softmax(m, k) - return m, 1.0 - tf.reduce_mean(kl) + logsm = tf.nn.log_softmax(m) + # Gumbel-softmax sample. + gumbel_samples = gumbel_sample(tf.shape(m)) + steps = hparams.kl_warmup_steps + gumbel_samples *= common_layers.inverse_exp_decay(steps) + temperature = 1.01 - common_layers.inverse_lin_decay(steps) + s = tf.nn.softmax((logsm + gumbel_samples) / temperature) + m = tf.nn.softmax(m) + kl = - tf.reduce_max(logsm, axis=-1) + tf.summary.histogram("max-log", tf.reshape(kl, [-1])) + return m, tf.reduce_mean(kl), s def vae(x, hparams, name): @@ -145,79 +133,98 @@ def compress(x, c, hparams, name): return cur -def mix(x1, x2, steps, min_prob=0.0, max_prob=1.0, mode="lin"): +def mix(x1, x2, steps, min_prob=0.0, max_prob=1.0, mode="lin", simple=False): + """Mix starting with x2, mixing mixing, going towards x1.""" if mode == "lin": - alpha_p = common_layers.inverse_lin_decay(steps) + 0.001 + alpha_p = common_layers.inverse_lin_decay(steps) else: - alpha_p = common_layers.inverse_exp_decay(steps) + 0.001 + alpha_p = common_layers.inverse_exp_decay(steps) alpha_p = alpha_p * (max_prob - min_prob) + min_prob + if simple: + return alpha_p * x1 + (1.0 - alpha_p) * x2 alpha = tf.random_uniform(tf.shape(x1)) alpha = tf.to_float(tf.less(alpha, alpha_p)) return alpha * x1 + (1.0 - alpha) * x2 -def vae_compress(x, c, hparams, compress_name, decompress_name, reuse=None): +def encode(x, x_space, hparams, name): + """Transformer preparations and encoder.""" + with tf.variable_scope(name): + (encoder_input, encoder_self_attention_bias, + ed) = transformer.transformer_prepare_encoder(x, x_space, hparams) + encoder_input = tf.nn.dropout(encoder_input, 1.0 - hparams.dropout) + return transformer.transformer_encoder( + encoder_input, encoder_self_attention_bias, hparams), ed + + +def decode(cond_vec, gold, c, ed, hparams): + drop_gold = tf.nn.dropout(gold, 1.0 - hparams.layer_prepostprocess_dropout) + drop_gold += cond_vec + decoder_input = common_layers.shift_left(drop_gold, pad_value=cond_vec) + decoder_input = tf.squeeze(decoder_input, axis=2) + decoder_input = common_attention.add_timing_signal_1d(decoder_input) + bias = common_attention.attention_bias_lower_triangle(tf.shape(gold)[1]) + c = tf.squeeze(c, axis=2) + return transformer.transformer_decoder(decoder_input, c, bias, ed, hparams) + + +def expand_batch(x, mul): + """Expand on batch by mul times.""" + cx = tf.expand_dims(x, axis=1) + x_shape = x.get_shape().as_list() + batch_mul = tf.to_int32(mul) + cx += tf.zeros([1, batch_mul, 1, 1, 1]) + mid_shape = [tf.shape(x)[2]] if len(x_shape) > 3 else [] + end_shape = [x_shape[-1]] if x_shape[-1] else [tf.shape(x)[-1]] + res_shape = [-1, tf.shape(x)[1]] + mid_shape + end_shape + return tf.reshape(cx, res_shape) + + +def vae_compress(x, c, ed, hparams, compress_name, decompress_name, reuse=None): """Compress, then VAE.""" - mix_k = 8 with tf.variable_scope(compress_name, reuse=reuse): cur = compress(x, None, hparams, "compress") # Convolve and ReLu to get state. cur = common_layers.conv_block( cur, hparams.hidden_size, [((1, 1), (1, 1))], name="mid_conv") # z, kl_loss, mu, log_sigma = vae(cur, hparams, name="vae") - z, kl_loss = dvae(cur, None, hparams, name="dvae") - z1, kl_loss1 = top_k_experts(cur, mix_k, hparams) - mu, log_sigma = None, None - - # Mix expert-selection and flat selection. - alpha_p = common_layers.inverse_lin_decay(60000) + 0.001 - z = alpha_p * z1 + (1 - alpha_p) * z - kl_loss += kl_loss1 + z_true, kl_loss, z_gumbel = dvae(cur, hparams, name="dvae") # Compress context. with tf.variable_scope(compress_name, reuse=reuse): compress_c = compress(c, None, hparams, "compress_context") c_z = tf.layers.dense(compress_c, hparams.v_size, name="mask_context") reconstruct_loss = tf.nn.softmax_cross_entropy_with_logits( - labels=z, logits=c_z) + labels=z_true, logits=c_z) # If not training, use the predicted z instead of the autoregressive one. - # if hparams.mode != tf.contrib.learn.ModeKeys.TRAIN: - # z = mix(c_z, z, 50000, max_prob=0.3, mode="exp") - # z, _ = top_k_softmax(c_z, mix_k) + if hparams.mode == tf.contrib.learn.ModeKeys.INFER: + z = tf.one_hot(tf.argmax(c_z, axis=-1), hparams.v_size) with tf.variable_scope(decompress_name, reuse=reuse): # Decompress. - z = tf.layers.dense(z, hparams.hidden_size, name="z_to_dense") + z = tf.layers.dense(z_gumbel, hparams.hidden_size, name="z_to_dense") # Leak at the beginning to help train. - z = mix(z, cur, 30000) + z = mix(z, cur, hparams.startup_steps) + # Decompress. + d = z for i in xrange(hparams.num_compress_steps): j = hparams.num_compress_steps - i - 1 - z = residual_conv(z, 1, hparams, "decompress_rc_%d" % j) - z = decompress_step(z, c, hparams, i > 0, "decompress_step_%d" % j) - return z, kl_loss + 0.0001 * reconstruct_loss, mu, log_sigma - - -def encode(x, x_space, hparams, name): - """Transformer preparations and encoder.""" - with tf.variable_scope(name): - (encoder_input, encoder_self_attention_bias, - _) = transformer.transformer_prepare_encoder(x, x_space, hparams) - encoder_input = tf.nn.dropout(encoder_input, 1.0 - hparams.dropout) - return transformer.transformer_encoder( - encoder_input, encoder_self_attention_bias, hparams) + d = residual_conv(d, 1, hparams, "decompress_rc_%d" % j) + d = decompress_step(d, c, hparams, i > 0, "decompress_step_%d" % j) + k = 2**hparams.num_compress_steps + z_batch = tf.reshape(z, [-1, 1, 1, hparams.hidden_size]) + x_batch = tf.reshape(x + d, [-1, k, 1, hparams.hidden_size]) + # dec_batch = decode(z_batch, x_batch, None, None, hparams) + c = expand_batch(c, tf.shape(x_batch)[0] / tf.shape(x)[0]) + ed = expand_batch(ed, tf.shape(x_batch)[0] / tf.shape(x)[0]) + dec_batch = decode(z_batch, x_batch, c, ed, hparams) + z = tf.reshape(dec_batch, [-1, tf.shape(x)[1], 1, hparams.hidden_size]) -def dropmask(targets, targets_dropout_max, is_training): - if not is_training: - return targets - targets_drop_prob = tf.random_uniform([]) * targets_dropout_max - drop_mask = tf.random_uniform(tf.shape(targets)[:-1]) - drop_mask = tf.to_float(tf.less(drop_mask, targets_drop_prob)) - keep_mask = tf.expand_dims(1.0 - drop_mask, axis=2) - return targets * keep_mask + return z, kl_loss, reconstruct_loss def ffn(x, hparams, name): @@ -239,29 +246,16 @@ def vae_transformer_internal(inputs, targets, target_space, hparams): k = 2**hparams.num_compress_steps inputs, targets = common_layers.pad_to_same_length( inputs, targets, final_length_divisible_by=k) - inputs = encode(inputs, target_space, hparams, "input_enc") + inputs, ed_bias = encode(inputs, target_space, hparams, "input_enc") # Compress and vae. - z, kl_loss, _, _ = vae_compress(tf.expand_dims(targets, axis=2), - tf.expand_dims(inputs, axis=2), - hparams, "vae_compress", "vae_decompress") - - # Join z with inputs, run decoder. - to_decode = common_layers.conv_block( - tf.concat([z, tf.expand_dims(inputs, axis=2)], axis=3), - hparams.hidden_size, [((1, 1), (1, 1))], name="join_z") - ret = encode(tf.squeeze(to_decode, axis=2), target_space, hparams, "dec") - - # For experiments with one-sided decoder: - # decoder_in = tf.squeeze(to_decode, axis=2) - # (decoder_input, decoder_self_attention_bias) = ( - # transformer.transformer_prepare_decoder(decoder_in, hparams)) - # ret = transformer.transformer_decoder( - # decoder_input, inputs, decoder_self_attention_bias, None, hparams) - - kl_loss *= common_layers.inverse_exp_decay(hparams.kl_warmup_steps) * 3.0 - losses = {"kl": kl_loss} - return tf.expand_dims(ret, axis=2), losses + z, kl, r = vae_compress(tf.expand_dims(targets, axis=2), + tf.expand_dims(inputs, axis=2), + ed_bias, hparams, "vae_compress", "vae_decompress") + kl *= common_layers.inverse_exp_decay(int(hparams.startup_steps * 2.0))*0.5 + r *= common_layers.inverse_exp_decay(int(hparams.startup_steps * 2.0)) + losses = {"kl": kl, "reconstruction": r} + return z, losses @registry.register_model @@ -319,7 +313,8 @@ def transformer_vae_small(): hparams.add_hparam("z_size", 128) hparams.add_hparam("v_size", 1024*8) hparams.add_hparam("num_compress_steps", 4) - hparams.add_hparam("kl_warmup_steps", 50000) + hparams.add_hparam("kl_warmup_steps", 120000) + hparams.add_hparam("startup_steps", 20000) return hparams diff --git a/tensor2tensor/utils/model_builder.py b/tensor2tensor/utils/model_builder.py index da33e1e40..7714f3867 100644 --- a/tensor2tensor/utils/model_builder.py +++ b/tensor2tensor/utils/model_builder.py @@ -319,7 +319,7 @@ def nth_model(n): train_op = tf.contrib.layers.optimize_loss( name="training", loss=total_loss, - global_step=tf.contrib.framework.get_global_step(), + global_step=tf.train.get_global_step(), learning_rate=learning_rate, clip_gradients=my_hp.clip_grad_norm or None, gradient_noise_scale=hparams.grad_noise_scale or None, diff --git a/tensor2tensor/utils/yellowfin.py b/tensor2tensor/utils/yellowfin.py index aeb14e76e..450875fa5 100644 --- a/tensor2tensor/utils/yellowfin.py +++ b/tensor2tensor/utils/yellowfin.py @@ -21,18 +21,16 @@ # Dependency imports -import numpy as np import tensorflow as tf -from tensorflow.python.framework import ops # Values for gate_gradients. -GATE_NONE = 0 -GATE_OP = 1 -GATE_GRAPH = 2 +GATE_NONE = tf.train.Optimizer.GATE_NONE +GATE_OP = tf.train.Optimizer.GATE_OP +GATE_GRAPH = tf.train.Optimizer.GATE_GRAPH -class YellowFinOptimizer(tf.train.Optimizer): +class YellowFinOptimizer(object): """Optimizer that implements the YellowFin algorithm. See [Zhang et. al., 2017](https://arxiv.org/abs/1706.03471) for details. @@ -45,20 +43,38 @@ def __init__(self, beta=0.999, curvature_window_width=20, zero_debias=True, - delta_mu=0.0): + delta_mu=0.0, + sparsity_debias=True, + use_locking=False, + name="YellowFin", + use_nesterov=False): """Construct a new YellowFin optimizer. + Implemented as a wrapper around tf.train.MomentumOptimizer + Args: learning_rate: A Tensor or a floating point value. The learning rate. + Set to 1.0 in the paper. momentum: A Tensor or a floating point value. The momentum. + Set to 0.0 in the paper. clip_thresh: A Tensor or a floating point value. The cliping threshold for - tf.clip_by_global_norm. If None, no clipping will be carried out. + `tf.clip_by_global_norm`. If None, no clipping will be carried out. beta: A float value or a constant float tensor. The smoothing parameter for estimations. curvature_window_width: A int value or a constant int tensor. The curvature window width. zero_debias: A boolean, zero debias moving-averages. delta_mu: For extensions. Not necessary in the basic use. + sparsity_debias: A boolean. Gradient norm and curvature are + biased to larger values when calculated with sparse gradient. + This is useful when the model is very sparse, e.g. LSTM with + word embedding. For non-sparse CNN, turning it off could + slightly accelerate the speed. + use_locking: If True, use locks for update operations. + name: Optional name prefix for the operations created when + applying gradients. Defaults to "YellowFin". + use_nesterov: If True, the underlying MomentumOptimizer uses Nesterov + Momentum. Set to False in the default YellowFin algorithm. Note: clip_thresh is the threshold value on ||lr * gradient||, @@ -81,27 +97,28 @@ def __init__(self, self._mu = momentum # Set lr and mu tensor. - self._lr_var = tf.Variable(learning_rate, - dtype=tf.float32, - name="YF_lr", - trainable=False) - self._mu_var = tf.Variable(momentum, - dtype=tf.float32, - name="YF_mu", - trainable=False) + self._lr_var = tf.get_variable("YF_lr", + dtype=tf.float32, + trainable=False, + initializer=learning_rate) + self._mu_var = tf.get_variable("YF_mu", + dtype=tf.float32, + trainable=False, + initializer=tf.constant(momentum)) # Tuning factor for learning rates step or decaying scheme. - self.lr_factor = tf.Variable(1.0, - dtype=tf.float32, - name="YF_lr_factor", - trainable=False) + self.lr_factor = tf.get_variable("YF_lr_factor", + dtype=tf.float32, + trainable=False, + initializer=tf.constant(1.0)) # Gradient Clipping Threshold. if clip_thresh is not None: - self._clip_thresh_var = tf.Variable(clip_thresh, - dtype=tf.float32, - name="YF_clip_thresh", - trainable=False) + self._clip_thresh_var = tf.get_variable( + "YF_clip_thresh", + dtype=tf.float32, + trainable=False, + initializer=tf.constant(clip_thresh)) else: self._clip_thresh_var = None @@ -111,17 +128,18 @@ def __init__(self, # Init momentum optimizer. self._momentum_optimizer = tf.train.MomentumOptimizer( - self._lr_m, self._mu_m) + self._lr_m, self._mu_m, use_locking, name, use_nesterov) # Moving average for statistics. self._beta = beta self._moving_averager = None # Step counting. - self._step = tf.Variable(0, - dtype=tf.int32, - name="YF_step", - trainable=False) + self._step = tf.get_variable("YF_step", + dtype=tf.int32, + trainable=False, + initializer=tf.constant(0)) + # YF_step + 1 op. self._increment_step_op = None @@ -130,6 +148,7 @@ def __init__(self, # Moving-averages. self._zero_debias = zero_debias + self._sparsity_debias = sparsity_debias # For curvature range. self.curvature_window_width = curvature_window_width @@ -170,27 +189,32 @@ def __init__(self, # and (zero_devias) moving-averages. self._moving_averager = None + # Handling Sparse Matrix + self._sparsity = None + self._sparsity_avg = None + def _curvature_range(self): """Curvature range. Returns: h_max_t, h_min_t ops """ - self._curv_win = tf.Variable(np.zeros([self.curvature_window_width,]), - dtype=tf.float32, - name="curv_win", - trainable=False) - + self._curv_win = tf.get_variable("curv_win", + dtype=tf.float32, + trainable=False, + shape=[self.curvature_window_width,], + initializer=tf.zeros_initializer) + # We use log smoothing for curvature range self._curv_win = tf.scatter_update(self._curv_win, self._step % self.curvature_window_width, - self._grad_norm_squared) + tf.log(self._grad_norm_squared)) # Note here the iterations start from iteration 0 valid_window = tf.slice(self._curv_win, tf.constant([0,]), tf.expand_dims( tf.minimum( tf.constant(self.curvature_window_width), - self._step + 1), axis=0)) + self._step + 1), dim=0)) self._h_min_t = tf.reduce_min(valid_window) self._h_max_t = tf.reduce_max(valid_window) @@ -198,8 +222,13 @@ def _curvature_range(self): with tf.control_dependencies([self._h_min_t, self._h_max_t]): avg_op = self._moving_averager.apply([self._h_min_t, self._h_max_t]) with tf.control_dependencies([avg_op]): - self._h_min = tf.identity(self._moving_averager.average(self._h_min_t)) - self._h_max = tf.identity(self._moving_averager.average(self._h_max_t)) + self._h_min = tf.exp( + tf.identity(self._moving_averager.average(self._h_min_t))) + self._h_max = tf.exp( + tf.identity(self._moving_averager.average(self._h_max_t))) + if self._sparsity_debias: + self._h_min *= self._sparsity_avg + self._h_max *= self._sparsity_avg curv_range_ops.append(avg_op) return curv_range_ops # h_max_t, h_min_t @@ -226,10 +255,14 @@ def _grad_variance(self): self._grad_avg = [self._moving_averager.average(val) for val in tensor_to_avg] self._grad_avg_squared = [tf.square(val) for val in self._grad_avg] - self._grad_avg_squared = tf.add_n([tf.reduce_sum(val) - for val in self._grad_avg_squared]) + # Compute Variance - self._grad_var = self._grad_norm_squared_avg - self._grad_avg_squared + self._grad_var = tf.maximum( + tf.constant(1e-6, dtype=self._grad_norm_squared_avg.dtype), + self._grad_norm_squared_avg + - tf.add_n([tf.reduce_sum(val) for val in self._grad_avg_squared])) + if self._sparsity_debias: + self._grad_var *= self._sparsity_avg return grad_var_ops # C_t def _dist_to_opt(self): @@ -239,7 +272,7 @@ def _dist_to_opt(self): D_t ops """ dist_to_opt_ops = [] - # Running average of the norm of gradeint + # Running average of the norm of gradient self._grad_norm = tf.sqrt(self._grad_norm_squared) avg_op = self._moving_averager.apply([self._grad_norm,]) dist_to_opt_ops.append(avg_op) @@ -254,8 +287,27 @@ def _dist_to_opt(self): with tf.control_dependencies([avg_op]): self._dist_to_opt_avg = tf.identity( self._moving_averager.average(self._d_t)) + if self._sparsity_debias: + self._dist_to_opt_avg /= tf.sqrt(self._sparsity_avg) return dist_to_opt_ops # D_t + def _grad_sparsity(self): + """Gradient sparsity.""" + # If the sparse minibatch gradient has 10 percent of its entries + # non-zero, its sparsity is 0.1. + # The norm of dense gradient averaged from full dataset + # are roughly estimated norm of minibatch + # sparse gradient norm * sqrt(sparsity) + # An extension maybe only correct the sparse blob. + non_zero_cnt = tf.add_n([tf.count_nonzero(g) for g in self._grad]) + all_entry_cnt = tf.add_n([tf.size(g) for g in self._grad]) + self._sparsity = tf.cast(non_zero_cnt, self._grad[0].dtype) + self._sparsity /= tf.cast(all_entry_cnt, self._grad[0].dtype) + avg_op = self._moving_averager.apply([self._sparsity,]) + with tf.control_dependencies([avg_op]): + self._sparsity_avg = self._moving_averager.average(self._sparsity) + return avg_op + def _prepare_variables(self): """Prepare Variables for YellowFin. @@ -264,7 +316,7 @@ def _prepare_variables(self): """ self._moving_averager = tf.train.ExponentialMovingAverage( decay=self._beta, zero_debias=self._zero_debias) - assert self._grad + # assert self._grad is not None and len(self._grad) > 0 # List for the returned Operations prepare_variables_op = [] @@ -275,13 +327,17 @@ def _prepare_variables(self): # Gradient squared for v, g in zip(self._vars, self._grad): if g is None: continue - with ops.colocate_with(v): + with tf.colocate_with(v): self._grad_squared.append(tf.square(g)) # Norm squared. self._grad_norm_squared = [tf.reduce_sum(g_sq) for g_sq in self._grad_squared] + if self._sparsity_debias: + avg_op_sparsity = self._grad_sparsity() + prepare_variables_op.append(avg_op_sparsity) + # The following running average on squared norm of gradient # is shared by grad_var and dist_to_opt avg_op = self._moving_averager.apply(self._grad_norm_squared) @@ -295,6 +351,44 @@ def _prepare_variables(self): prepare_variables_op.append(avg_op) return tf.group(*prepare_variables_op) + def _get_cubic_root(self): + """Get the cubic root.""" + # We have the equation x^2 D^2 + (1-x)^4 * C / h_min^2 + # where x = sqrt(mu). + # We substitute x, which is sqrt(mu), with x = y + 1. + # It gives y^3 + py = q + # where p = (D^2 h_min^2)/(2*C) and q = -p. + # We use the Vieta's substution to compute the root. + # There is only one real solution y (which is in [0, 1] ). + # http://mathworld.wolfram.com/VietasSubstitution.html + assert_array = [ + tf.Assert( + tf.logical_not(tf.is_nan(self._dist_to_opt_avg)), + [self._dist_to_opt_avg,]), + tf.Assert( + tf.logical_not(tf.is_nan(self._h_min)), + [self._h_min,]), + tf.Assert( + tf.logical_not(tf.is_nan(self._grad_var)), + [self._grad_var,]), + tf.Assert( + tf.logical_not(tf.is_inf(self._dist_to_opt_avg)), + [self._dist_to_opt_avg,]), + tf.Assert( + tf.logical_not(tf.is_inf(self._h_min)), + [self._h_min,]), + tf.Assert( + tf.logical_not(tf.is_inf(self._grad_var)), + [self._grad_var,]) + ] + with tf.control_dependencies(assert_array): + p = self._dist_to_opt_avg**2 * self._h_min**2 / 2 / self._grad_var + w3 = (-tf.sqrt(p**2 + 4.0 / 27.0 * p**3) - p) / 2.0 + w = tf.sign(w3) * tf.pow(tf.abs(w3), 1.0/3.0) + y = w - p / 3.0 / w + x = y + 1 + return x + def _get_lr_tensor(self): """Get lr minimzing the surrogate. @@ -310,32 +404,10 @@ def _get_mu_tensor(self): Returns: The mu_t. """ - const_fact = self._dist_to_opt_avg**2 * self._h_min**2 / 2 / self._grad_var - coef = tf.Variable([-1.0, 3.0, 0.0, 1.0], - dtype=tf.float32, - name="cubic_solver_coef") - coef = tf.scatter_update(coef, - tf.constant(2), - -(3 + const_fact)) - roots = tf.py_func(np.roots, - [coef], - Tout=tf.complex64, - stateful=False) - - # Filter out the correct root - root_idx = tf.logical_and( - tf.logical_and( - tf.greater(tf.real(roots), tf.constant(0.0)), - tf.less(tf.real(roots), tf.constant(1.0))), - tf.less(tf.abs(tf.imag(roots)), 1e-5)) - - # In case there are two duplicated roots satisfying the above condition - root = tf.reshape(tf.gather(tf.gather(roots, tf.where(root_idx)), - tf.constant(0)), - shape=[]) - + root = self._get_cubic_root() dr = self._h_max / self._h_min - mu = tf.maximum(tf.real(root)**2, ((tf.sqrt(dr) - 1)/(tf.sqrt(dr) + 1))**2) + mu = tf.maximum( + root**2, ((tf.sqrt(dr) - 1) / (tf.sqrt(dr) + 1))**2) return mu def _yellowfin(self): @@ -366,7 +438,8 @@ def _yellowfin(self): # squared distance from the optimum of a local quadratic # approximation after a single step while keeping all directions in the # robust region. - self._mu = tf.identity(tf.cond(self._do_tune, self._get_mu_tensor, + self._mu = tf.identity(tf.cond(self._do_tune, + self._get_mu_tensor, lambda: self._mu_var)) with tf.control_dependencies([self._mu]): self._lr = tf.identity(tf.cond(self._do_tune, @@ -383,6 +456,10 @@ def _yellowfin(self): yellowfin_ops = tf.group(*yellowfin_ops) return yellowfin_ops + def get_name(self): + """Get optimizer name.""" + return self._momentum_optimizer.get_name() + def apply_gradients(self, grads_and_vars, global_step=None, name=None): """Applying gradients aand tune hyperparams with YellowFin. @@ -400,7 +477,6 @@ def apply_gradients(self, grads_and_vars, global_step=None, name=None): YellowFin ops(Curvature, Variance, Distance) ops, SingleStep and lr_mu tuning ops, Step increment ops. - """ self._grad, self._vars = zip(*[(g, t) for g, t in grads_and_vars if g is not None]) @@ -409,18 +485,28 @@ def apply_gradients(self, grads_and_vars, global_step=None, name=None): with tf.variable_scope("apply_updates"): # Gradient Clipping? if self._clip_thresh_var is not None: - self._grads_clip, self._grads_norm = tf.clip_by_global_norm( + self._grad, _ = tf.clip_by_global_norm( self._grad, self._clip_thresh_var) apply_grad_op = self._momentum_optimizer.apply_gradients( - zip(self._grads_clip, self._vars), global_step=global_step) + zip(self._grad, self._vars), + global_step=global_step, + name=name) else: apply_grad_op = self._momentum_optimizer.apply_gradients( - zip(self._grad, self._vars), global_step=global_step) + zip(self._grad, self._vars), + global_step=global_step, + name=name) # Begin lr and mu tuning. with tf.variable_scope("prepare_yellowFin_variables"): - prepare_variables_op = self._prepare_variables() + # the dependencies ideally only need to be after clip is done, + # i.e. dependes on self._grads. However, the control_dependencies + # does not support indexed slice for sparse gradients. + # The alternative dependencies here might be slightly slower due + # to less parallelization. + with tf.control_dependencies([apply_grad_op,]): + prepare_variables_op = self._prepare_variables() with tf.variable_scope("yellowfin"): with tf.control_dependencies([prepare_variables_op]): @@ -467,6 +553,7 @@ def compute_gradients(self, A list of (gradient, variable) pairs. Variable is always present, but gradient can be None. """ + del global_step, name # Unused for now. return self._momentum_optimizer.compute_gradients( loss, var_list=var_list, @@ -533,4 +620,26 @@ def minimize(self, print("g ", g) print("v ", v) - return self.apply_gradients(grads_and_vars, global_step=global_step) + return self.apply_gradients(grads_and_vars, + global_step=global_step, + name=name) + + def get_slot(self, var, name): + """Return a slot named `name` created for `var`. + + Args: + var: A variable passed to `minimize()` or `apply_gradients()`. + name: A string. + + Returns: + The `Variable` for the slot if it was created, `None` otherwise. + """ + return self._momentum_optimizer.get_slot(var, name) + + def get_slot_names(self): + """Return a list of the names of the slots using MomentumOptimizer. + + Returns: + A list of strings. + """ + return self._momentum_optimizer.get_slot_names() From be7a446d26448561b90662f2dd5d70264aea37ab Mon Sep 17 00:00:00 2001 From: Noam Shazeer <noam@google.com> Date: Wed, 23 Aug 2017 10:08:47 -0700 Subject: [PATCH 0303/4095] First stab at "diet" variables. PiperOrigin-RevId: 166216217 --- tensor2tensor/layers/common_layers.py | 37 +++ tensor2tensor/layers/rev_block.py | 14 +- tensor2tensor/models/attention_lm_moe.py | 44 ++- tensor2tensor/utils/diet.py | 361 +++++++++++++++++++++++ tensor2tensor/utils/model_builder.py | 25 +- 5 files changed, 460 insertions(+), 21 deletions(-) create mode 100644 tensor2tensor/utils/diet.py diff --git a/tensor2tensor/layers/common_layers.py b/tensor2tensor/layers/common_layers.py index 5be5a35a7..6fea85e67 100644 --- a/tensor2tensor/layers/common_layers.py +++ b/tensor2tensor/layers/common_layers.py @@ -1625,3 +1625,40 @@ def ravanbakhsh_set_layer(layer_size, inputs - tf.expand_dims(global_pool_1d(inputs, mask=mask), axis=1), activation_fn=activation_fn, name=name) + + +def underlying_variable_ref(t): + """Find the underlying variable ref, ignoring Identity ops. + + Args: + t: a Tensor + + Returns: + a Tensor that is a variable ref, or None on error. + """ + while t.op.type == "Identity": + t = t.op.inputs[0] + if "Variable" in t.op.type: + return t + else: + return None + + +def underlying_variable(t): + """Find the underlying tf.Variable object. + + Args: + t: a Tensor + + Returns: + a tf.Varaible object. + """ + t = underlying_variable_ref(t) + assert t is not None + # make sure that the graph has a variable index and that it is up-to-date + if not hasattr(tf.get_default_graph(), "var_index"): + tf.get_default_graph().var_index = {} + var_index = tf.get_default_graph().var_index + for v in tf.global_variables()[len(var_index):]: + var_index[v.name] = v + return var_index[t.name] diff --git a/tensor2tensor/layers/rev_block.py b/tensor2tensor/layers/rev_block.py index 7198a953d..6ca0c676f 100644 --- a/tensor2tensor/layers/rev_block.py +++ b/tensor2tensor/layers/rev_block.py @@ -30,8 +30,8 @@ from six.moves import xrange # pylint: disable=redefined-builtin +from tensor2tensor.layers.common_layers import underlying_variable_ref import tensorflow as tf -from tensorflow.python.framework import dtypes from tensorflow.python.framework import function LAYER_RE = re.compile(".*revlayer_([0-9]*)/([fg])/.*") @@ -151,16 +151,6 @@ def _rev_block_forward(x1, return y1, y2 -def _underlying_variable(t): - """Find the underlying variable ref, ignoring Identity ops.""" - while t.op.type == "Identity": - t = t.op.inputs[0] - if t.dtype == dtypes.float32_ref and "Variable" in t.op.type: - return t - else: - return None - - def fn_with_custom_grad(grad_fn): """Decorator to create a subgraph with a custom gradient function. @@ -331,7 +321,7 @@ def custom_grad_fn(inputs, variables, ys, grad_ys): g_vars_idxs = [[] for _ in range(num_layers)] for i, t in enumerate(variables): - ref = _underlying_variable(t) + ref = underlying_variable_ref(t) # Use the name to identify the layer number and function (f or g) regex = LAYER_RE.match(ref.name) diff --git a/tensor2tensor/models/attention_lm_moe.py b/tensor2tensor/models/attention_lm_moe.py index 1d1c1519b..346cb079b 100644 --- a/tensor2tensor/models/attention_lm_moe.py +++ b/tensor2tensor/models/attention_lm_moe.py @@ -32,6 +32,7 @@ from tensor2tensor.layers import common_attention from tensor2tensor.layers import common_hparams from tensor2tensor.layers import common_layers +from tensor2tensor.utils import diet from tensor2tensor.utils import expert_utils from tensor2tensor.utils import registry from tensor2tensor.utils import t2t_model @@ -76,6 +77,14 @@ def postprocess(x, y): 1.0 - hparams.layer_prepostprocess_dropout) extra_loss = 0.0 moe_hidden_sizes = [int(s) for s in hparams.moe_hidden_sizes.split(",")] + if hparams.diet_experts: + hsize, = moe_hidden_sizes + diet_optimizer = diet.DietAdamOptimizer(diet.diet_adam_optimizer_params()) + expert_fn = lambda x: diet.diet_expert(x, hsize, diet_optimizer) + else: + expert_fn = expert_utils.ffn_expert_fn( + hparams.hidden_size, moe_hidden_sizes, + hparams.hidden_size) for layer in xrange(hparams.num_hidden_layers): with tf.variable_scope("layer_%d" % layer): with tf.variable_scope( @@ -116,9 +125,7 @@ def postprocess(x, y): preprocess(x), hparams.mode == tf.contrib.learn.ModeKeys.TRAIN, input_size=hparams.hidden_size, - expert_fn=expert_utils.ffn_expert_fn( - hparams.hidden_size, moe_hidden_sizes, - hparams.hidden_size), + expert_fn=expert_fn, num_experts=hparams.moe_num_experts, k=hparams.moe_k, loss_coef=hparams.moe_loss_coef) @@ -207,6 +214,7 @@ def attention_lm_moe_base(): # moe params. local attention moe. hparams.add_hparam("attention_moe_type", AttentionMoeType.NONE) hparams.add_hparam("attention_num_experts", 16) + hparams.add_hparam("diet_experts", int(False)) return hparams @@ -253,8 +261,8 @@ def attention_lm_attention_moe_tiny(): """ hparams = attention_lm_moe_small() hparams.moe_layers = "" - hparams.attention_num_experts = 16 - hparams.filter_size = 512 + hparams.attention_num_experts = 128 + hparams.filter_size = 8192 hparams.attention_moe_type = AttentionMoeType.LOCAL return hparams @@ -304,6 +312,32 @@ def attention_lm_moe_large(): return hparams +@registry.register_hparams +def attention_lm_moe_large_diet(): + hparams = attention_lm_moe_large() + hparams.diet_experts = int(True) + return hparams + + +@registry.register_hparams +def attention_lm_moe_32b_diet(): + """Unnecessarily large model with 32B params - because we can.""" + hparams = attention_lm_moe_large_diet() + hparams.moe_hidden_sizes = "16384" + hparams.moe_num_experts = 1024 + return hparams + + +@registry.register_hparams +def attention_lm_moe_24b_diet(): + """Unnecessarily large model with 24B params - because we can.""" + hparams = attention_lm_moe_large_diet() + hparams.moe_hidden_sizes = "12288" + hparams.moe_num_experts = 1024 + hparams.batch_size = 4096 + return hparams + + @registry.register_hparams def attention_lm_moe_translation(): """Version to use for seq2seq.""" diff --git a/tensor2tensor/utils/diet.py b/tensor2tensor/utils/diet.py new file mode 100644 index 000000000..73077ea72 --- /dev/null +++ b/tensor2tensor/utils/diet.py @@ -0,0 +1,361 @@ +# coding=utf-8 +# Copyright 2017 The Tensor2Tensor Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Diet varaibles are much more memory-efficient than regular variables. + +Using diet variables, we can reduce memory overhead per parameter from +16 bytes to 2 bytes, allowing for up to 4B parameters per GPU. + +This is an idea by rsepassi about how make this more generally useful. +with diet_variable_scope(diet_options=opts): + custom variable getter that creates vars with diet_options + per variable have fn that does the optimization acc to diet_options +@forward_with_diet_backwards fn decorator +""" + + +from collections import defaultdict +import math +# Dependency imports +from tensor2tensor.layers.common_layers import underlying_variable +import tensorflow as tf +from tensorflow.python.framework import function + + +def diet_adam_optimizer_params(): + """Default hyperparameters for a DietAdamOptimizer. + + Returns: + a hyperparameters object. + """ + return tf.contrib.training.HParams( + quantize=int(True), # use 16-bit fixed-point + quantization_scale=10.0 / tf.int16.max, + optimizer="factored_adam", + learning_rate=1.0, + learning_rate_warmup_steps=2000, + learning_rate_decay_scheme="noam", # "noam" or "none" + epsilon=1e-10, + beta1=0.0, # we can save memory if beta1=0 + beta2=0.98, + randomized_updates=int(True), # use unbiased roundoff in updates + factored_second_moment_accumulator=int(True), # this saves memory + ) + + +class DietAdamOptimizer(object): + """A memory efficient optimizer for memory-efficient variables. + + We employ the following techniques: + - 16-bit fixed-point quantization + - inline updates during backprop, instead of through the optimizer. This + keeps the gradients from staying around in memory. + - momentum is optional - saves a slot if it is off (beta1=0.0). + - "factored second-moment accumulator" + (keep row-wise and col-wise averages instead of full accumulator) + - tighter control over operation ordering to make sure that only a small + portion of the decompressed variables and of the variable gradients + are resident in memory at any given time. + + All together these techniques reduce the memory footprint per parameter to + a little over 2 bytes, allowing for roughly 4B parameters per GPU. This is + roughly an 8x improvement over the naive version. + + Usage: + + Diet variables should be created with the + DietAdamOptimizer.get_variable() method. The resulting variables + have extra fields pointing to the otpimizer and to the accumulator + slots. + + The variable is kept in quantized form, so you need to call + var.optimizer.dequantize(var) to get the value. + + The variables are created with trainable=False, so that they will + not be optimized by an ordinary optimizer. Instead, the user is + responsible for making sure that var.optimizer.update(var, grad) is + called during backprop. The reason for this inline update is to + avoid keeping around the gradients for all variables at once. This + is done with the clever use of defuns and control dependencies. See + diet_expert() for an example of how all of this is done. + + To facilitate fixed-point quantization and to make it easier to + choose a learning rate, all varaibles are initialized with unit + normal initialization. If you want smaller values, downscale on the + outside. + """ + + def __init__(self, hparams): + """Create a DietAdamOptimizer. + + Args: + hparams: a hyperparameters object - see diet_adam_optimizer_params() + """ + self._hparams = hparams + self._global_step = tf.to_float( + tf.contrib.framework.get_global_step()) + 1.0 + self._initializer_dependencies = defaultdict(list) + + @property + def dtype(self): + """The data type used for the variables.""" + return tf.float16 if self._hparams.quantize else tf.float32 + + def get_variable(self, name, shape): + """Create a diet variable. + + Args: + name: a string + shape: a list of integers + + Returns: + a variable + """ + var = tf.get_variable( + name, shape, trainable=False, + dtype=self.dtype, + initializer=self._initializer()) + self._create_slots(var, name) + var.optimizer = self + return var + + def _create_slots(self, var, name): + """Create auxiliary slots for a variable. + + Args: + var: a tf.Variable + name: a string + """ + hparams = self._hparams + shape = var.get_shape().as_list() + if hparams.factored_second_moment_accumulator and len(shape) == 2: + var.adam_vr = tf.get_variable( + name + "_adam_vr", [shape[0], 1], trainable=False, + initializer=tf.zeros_initializer()) + var.adam_vc = tf.get_variable( + name + "_adam_vc", [1, shape[1]], trainable=False, + initializer=tf.zeros_initializer()) + else: + var.adam_v = tf.get_variable( + name + "_adam_v", shape, trainable=False, + initializer=tf.zeros_initializer()) + if hparams.beta1 != 0.0: + var.adam_m = tf.get_variable( + name + "_adam_m", shape, trainable=False, + initializer=tf.zeros_initializer()) + + def _quantize(self, x, randomize=True): + """Quantize to tf.int16, then bitcast to tf.float16. + + The reason for float16 is that for some reason, tensorflow refuses to put + integer variables on gpu. + + Args: + x: a Tensor of type tf.float32 + randomize: a boolean + + Returns: + a Tensor of type tf.float16 + """ + hparams = self._hparams + if not hparams.quantize: + return x + if not randomize: + return tf.bitcast( + tf.cast(x / hparams.quantization_scale, tf.int16), tf.float16) + abs_x = tf.abs(x) + sign_x = tf.sign(x) + y = abs_x / hparams.quantization_scale + y = tf.floor(y + tf.random_uniform(tf.shape(x))) + y = tf.minimum(y, tf.int16.max) * sign_x + q = tf.bitcast(tf.cast(y, tf.int16), tf.float16) + return q + + def dequantize(self, q): + """Approximate inverse of _quantize(). + + Args: + q: a Tensor with type tf.float16 + + Returns: + a Tensor with type tf.float32 + """ + hparams = self._hparams + if hparams.quantize: + return tf.to_float(tf.bitcast(q, tf.int16)) * hparams.quantization_scale + else: + return q + + def _initializer(self): + """Returns an initializer function. + + Returns: + a function + """ + hparams = self._hparams + device = tf.constant(1.0).device + def _initializer(shape, dtype=self.dtype, partition_info=None): + assert dtype == self.dtype + del partition_info + # make sure no two initializers run simultaneously (to conserve memory) + with tf.control_dependencies(self._initializer_dependencies[device]): + float_range = math.sqrt(3) + ret = tf.random_uniform(shape, -float_range, float_range) + if hparams.quantize: + ret = self._quantize(ret, randomize=False) + self._initializer_dependencies[device] = [ret] + return ret + return _initializer + + def update(self, var, grad): + """Update a diet varaible given a gradient. + + Args: + var: a variable + grad: a Tensor + + Returns: + an update op. Make sure that something depends on this + op if you want it to run. + """ + hparams = self._hparams + var = underlying_variable(var) + # compute learning rate + lrate = hparams.learning_rate + if hparams.learning_rate_decay_scheme == "noam": + lrate *= tf.minimum( + self._global_step * hparams.learning_rate_warmup_steps ** -1.5, + self._global_step ** -0.5) + else: + assert hparams.learning_rate_decay_scheme == "none" + lrate *= tf.minumum( + self._global_step / hparams.learning_rate_warmup_steps, 1.0) + # compute adjustment due to second moment + grad_squared = tf.square(grad) + beta2_pow = tf.pow(hparams.beta2, self._global_step) + if hparams.factored_second_moment_accumulator and len(var.shape) == 2: + vr_update = tf.assign( + var.adam_vr, + var.adam_vr * hparams.beta2 + + tf.reduce_mean(grad_squared, 1, keep_dims=True) * + (1.0 - hparams.beta2)) + vc_update = tf.assign( + var.adam_vc, + var.adam_vc * hparams.beta2 + + tf.reduce_mean(grad_squared, 0, keep_dims=True) * + (1.0 - hparams.beta2)) + with tf.control_dependencies([vr_update, vc_update]): + vr = tf.sqrt(var.adam_vr / (1.0 - beta2_pow)) + hparams.epsilon + vc = tf.sqrt(var.adam_vc / (1.0 - beta2_pow)) + hparams.epsilon + vc /= tf.reduce_mean(vc) + denom = vr * vc + else: + v_update = tf.assign( + var.adam_v, + var.adam_v * hparams.beta2 + grad_squared * (1.0 - hparams.beta2)) + with tf.control_dependencies([v_update]): + denom = tf.sqrt(var.adam_v / (1.0 - beta2_pow)) + hparams.epsilon + # compute momentum if applicable + if hparams.beta1 != 0.0: + m_update = tf.assign( + var.adam_m, var.adam_m * hparams.beta1 + grad * (1.0 - hparams.beta1)) + with tf.control_dependencies([m_update]): + grad = var.adam_m + subtrahend = lrate * grad / denom + new_val = self._quantize(self.dequantize(var) - subtrahend) + return tf.assign(var, new_val) + + +def dependency_dict(): + """Get or create a defaultdict(list) that is stored in the default graph. + + This is used when we want to make sure that certain operations are performed + sequentially. + + example use - make sure calls to foo on the same device execute sequentially: + + def foo(x, device) + key = "foo " + device + with tf.device(device): + with tf.control_dependencies(dependency_dict()[key]): + y = bar(x) + dependency_dict()[key] = y + return y + + Returns: + a defaultdict whose default value is the empty list + """ + if not hasattr(tf.get_default_graph(), "dependency_dict"): + setattr(tf.get_default_graph(), "dependency_dict", defaultdict(list)) + return tf.get_default_graph().dependency_dict + + +def _diet_expert_internal(x, w0, w1): + h = tf.matmul(x, w0) + h = tf.nn.relu(h) + y = tf.matmul(h, w1) + y *= tf.rsqrt(tf.to_float(tf.shape(w0)[0] * tf.shape(w1)[0])) + y.set_shape(x.get_shape()) + return y + + +def _diet_expert_grad(op, dy): + x, w0, w1 = op.inputs + w0_var = underlying_variable(w0) + w1_var = underlying_variable(w1) + key = "diet_expert_backward_deps " + dy.device + with tf.control_dependencies(dependency_dict()[key]): + w0 = w0_var.optimizer.dequantize(w0_var) + w1 = w1_var.optimizer.dequantize(w1_var) + y = _diet_expert_internal(x, w0, w1) + dx, dw0, dw1 = tf.gradients(ys=[y], xs=[x, w0, w1], grad_ys=[dy]) + w0_update = w0_var.optimizer.update(w0_var, dw0) + w1_update = w1_var.optimizer.update(w1_var, dw1) + with tf.control_dependencies([w0_update, w1_update]): + dx = tf.identity(dx) + dependency_dict()[key] = [dx] + return dx, None, None + + +def diet_expert(x, hidden_size, optimizer): + """A two-layer feed-forward network with relu activation on hidden layer. + + Uses diet variables. + Recompuets hidden layer on backprop to save activation memory. + + Args: + x: a Tensor with shape [batch, io_size] + hidden_size: an integer + optimizer: a DietAdamOptimizer or some such class + + Returns: + a Tensor with shape [batch, io_size] + """ + @function.Defun(python_grad_func=_diet_expert_grad, + shape_func=lambda _: (x.get_shape(),)) + def _diet_expert_fn(x, w0, w1): + w0 = optimizer.dequantize(w0) + w1 = optimizer.dequantize(w1) + return _diet_expert_internal(x, w0, w1) + + with tf.device(x.device): + _, io_size = x.get_shape().as_list() + w0_var = optimizer.get_variable("w0", [io_size, hidden_size]) + w1_var = optimizer.get_variable("w1", [hidden_size, io_size]) + key = "diet_expert_forward_deps " + x.device + with tf.control_dependencies(dependency_dict()[key]): + ret = _diet_expert_fn(x, w0_var, w1_var) + dependency_dict()[key] = [ret] + return ret diff --git a/tensor2tensor/utils/model_builder.py b/tensor2tensor/utils/model_builder.py index 7714f3867..fdc4226d0 100644 --- a/tensor2tensor/utils/model_builder.py +++ b/tensor2tensor/utils/model_builder.py @@ -46,6 +46,24 @@ IMAGE_DECODE_LENGTH = 100 +def log_variable_sizes(var_list, tag): + """Log the sizes and shapes of variables, and the total size. + + Args: + var_list: a list of varaibles + tag: a string + """ + name_to_var = {v.name: v for v in var_list} + total_size = 0 + for v_name in sorted(list(name_to_var)): + v = name_to_var[v_name] + v_size = int(np.prod(np.array(v.shape.as_list()))) + tf.logging.info("Weight %s\tshape %s\tsize %d", + v.name[:-2].ljust(80), str(v.shape).ljust(20), v_size) + total_size += v_size + tf.logging.info("%s Total size: %d", tag, total_size) + + def build_model_fn(model, hparams): """Returns a function to build the model. @@ -288,8 +306,6 @@ def nth_model(n): for v_name in sorted(list(all_weights)): v = all_weights[v_name] v_size = int(np.prod(np.array(v.shape.as_list()))) - tf.logging.info("Weight %s\tshape %s\tsize %d", - v.name[:-2].ljust(80), str(v.shape).ljust(20), v_size) total_size += v_size if my_hp.weight_decay > 0.0 and len(v.shape.as_list()) > 1: # Add weight regularization if set and the weight is not a bias (dim>1). @@ -305,11 +321,12 @@ def nth_model(n): noise_op = v.assign_add(noise) with tf.control_dependencies([noise_op]): total_loss = tf.identity(total_loss) - tf.logging.info("Total trainable variables size: %d", total_size) if my_hp.weight_decay > 0.0: total_loss += weight_decay_loss * my_hp.weight_decay total_loss = tf.identity(total_loss, name="total_loss") - + log_variable_sizes(tf.trainable_variables(), "Trainable Variables") + diet_vars = [v for v in tf.global_variables() if hasattr(v, "optimizer")] + log_variable_sizes(diet_vars, "Diet Varaibles") # Define the train_op for the TRAIN mode. opt = _ConditionalOptimizer(my_hp.optimizer, learning_rate, my_hp) tf.logging.info("Computing gradients for global model_fn.") From 31b688aaa22811b94fac5f5ca72e9f52680dfda3 Mon Sep 17 00:00:00 2001 From: T2T Team <no-reply@google.com> Date: Wed, 23 Aug 2017 13:30:55 -0700 Subject: [PATCH 0304/4095] Add hparams to control the attention k,v,q size and add default base attention expert model. PiperOrigin-RevId: 166245826 --- tensor2tensor/layers/common_attention.py | 32 ++++++++++++++++++------ tensor2tensor/models/attention_lm_moe.py | 17 ++++++++++++- 2 files changed, 40 insertions(+), 9 deletions(-) diff --git a/tensor2tensor/layers/common_attention.py b/tensor2tensor/layers/common_attention.py index 63bd6f472..d69e68f80 100644 --- a/tensor2tensor/layers/common_attention.py +++ b/tensor2tensor/layers/common_attention.py @@ -977,7 +977,13 @@ def coordinate_tensor(shape, axis): return tf.zeros(shape, dtype=tf.int32) + tf.reshape(r, r_shape) -def self_attention_expert(x, batch_coordinate, mask_right=True): +def self_attention_expert( + x, + batch_coordinate, + mask_right=True, + attention_kq_size=None, + attention_v_size=None, +): """Implementing attention that runs inside each expert. Args: @@ -988,6 +994,8 @@ def self_attention_expert(x, batch_coordinate, mask_right=True): positions from different sequences don't attend to each other. mask_right: A bool. If true, we will not attend to positions on the right, just as decoder self attention. + attention_kq_size (int): dimension used for the attention key, and query + attention_v_size (int): dimension used for the attention value Returns: out: A tensor of shape [batch, depth]. @@ -1000,6 +1008,9 @@ def self_attention_expert(x, batch_coordinate, mask_right=True): depth = x.get_shape().as_list()[-1] length = tf.shape(batch_coordinate)[0] + attention_kq_size = attention_kq_size or depth + attention_v_size = attention_v_size or depth + def length_not_null(x, batch_coordinate): """Branch of the graph only evaluated when length isn't null.""" with tf.name_scope("expert_mask"): @@ -1021,8 +1032,8 @@ def length_not_null(x, batch_coordinate): out = multihead_attention(x, None, bias, - total_key_depth=depth, - total_value_depth=depth, + total_key_depth=attention_kq_size, + total_value_depth=attention_v_size, output_depth=depth, num_heads=1, dropout_rate=0.0) @@ -1042,8 +1053,14 @@ def length_not_null(x, batch_coordinate): # functools.partial(self_attention_expert, mask_right=, depth=) -def local_expert_attention(x, k, loss_coef, attention_num_experts, train=True, - mask_right=True): +def local_expert_attention( + x, + k, + loss_coef, + attention_num_experts, + train=True, + **kwargs +): """Attention using a mixture of experts. Positions sent to the same expert can attend to each other. @@ -1056,8 +1073,7 @@ def local_expert_attention(x, k, loss_coef, attention_num_experts, train=True, loss_coef: a scalar. A multiplier for the expert loss attention_num_experts: The number of experts to use train: a boolean for the current mode - mask_right: A boolean. If true, we will mask out positions to the right - for self-attention. + **kwargs: Arguments to forward to self_attention_expert Returns: y: a Tensor with shape [batch, length, depth] @@ -1071,7 +1087,7 @@ def local_expert_attention(x, k, loss_coef, attention_num_experts, train=True, return expert_utils.local_moe( x, train, - partial(self_attention_expert, mask_right=mask_right), + partial(self_attention_expert, **kwargs), attention_num_experts, k=k, loss_coef=loss_coef, diff --git a/tensor2tensor/models/attention_lm_moe.py b/tensor2tensor/models/attention_lm_moe.py index 346cb079b..6a5f6a0b3 100644 --- a/tensor2tensor/models/attention_lm_moe.py +++ b/tensor2tensor/models/attention_lm_moe.py @@ -110,7 +110,9 @@ def postprocess(x, y): loss_coef=1e-2, attention_num_experts=hparams.attention_num_experts, train=hparams.mode == tf.contrib.learn.ModeKeys.TRAIN, - mask_right=True) + mask_right=True, + attention_kq_size=hparams.attention_kq_size, + attention_v_size=hparams.attention_v_size) # TODO(avaswani, epot, noam): Do we need to divide by num shards ? extra_loss += tf.add_n(loss)/dp.n else: @@ -214,10 +216,23 @@ def attention_lm_moe_base(): # moe params. local attention moe. hparams.add_hparam("attention_moe_type", AttentionMoeType.NONE) hparams.add_hparam("attention_num_experts", 16) + # Key, query and value dimensions for the attention + hparams.add_hparam("attention_kq_size", 64) + hparams.add_hparam("attention_v_size", 64) hparams.add_hparam("diet_experts", int(False)) return hparams +@registry.register_hparams +def attention_lm_moe_base_ae(): + """Base model with attention expert.""" + hparams = attention_lm_moe_base() + hparams.attention_moe_type = AttentionMoeType.LOCAL + hparams.max_length = hparams.batch_size + hparams.eval_drop_long_sequences = int(True) + return hparams + + @registry.register_hparams def attention_lm_moe_small(): """Cheap model for single-gpu training. From 7a949f0167564a0add22a8658088e153b92c31c5 Mon Sep 17 00:00:00 2001 From: Noam Shazeer <noam@google.com> Date: Wed, 23 Aug 2017 16:11:09 -0700 Subject: [PATCH 0305/4095] Add a more memory-efficient version of matmul->softmax->cross_entropy. The method is to split the batch and only realize the logits for one part of the batch at a time. PiperOrigin-RevId: 166270103 --- tensor2tensor/layers/common_hparams.py | 2 + tensor2tensor/layers/common_layers.py | 171 +++++++++++++++++++++ tensor2tensor/layers/common_layers_test.py | 78 ++++++++++ tensor2tensor/layers/modalities.py | 18 ++- tensor2tensor/layers/modalities_test.py | 38 ++++- 5 files changed, 300 insertions(+), 7 deletions(-) diff --git a/tensor2tensor/layers/common_hparams.py b/tensor2tensor/layers/common_hparams.py index 6bb4d3e9d..0173ebb2c 100644 --- a/tensor2tensor/layers/common_hparams.py +++ b/tensor2tensor/layers/common_hparams.py @@ -68,6 +68,8 @@ def basic_params1(): learning_rate=0.1, sampling_method="argmax", # "argmax" or "random" problem_choice="adaptive", # "uniform", "adaptive", "distributed" + # expand the logits a piece at a time - saves memory. + factored_logits=int(False), multiply_embedding_mode="sqrt_depth", # Parameters related to mixtures of experts. moe_hidden_sizes="2048", # hidden layer sizes (comma-separated) diff --git a/tensor2tensor/layers/common_layers.py b/tensor2tensor/layers/common_layers.py index 6fea85e67..53cb4581a 100644 --- a/tensor2tensor/layers/common_layers.py +++ b/tensor2tensor/layers/common_layers.py @@ -29,6 +29,7 @@ import tensorflow as tf from tensorflow.python.framework import function +from tensorflow.python.framework import ops # This is a global setting. When turned off, no @function.Defun is used. allow_defun = False @@ -1424,6 +1425,7 @@ def padded_cross_entropy(logits, Args: logits: a `Tensor` with shape `[batch, timesteps, vocab_size]`. + optionally a FactoredTensor. labels: an integer `Tensor` with shape `[batch, timesteps]`. label_smoothing: a floating point `Scalar`. weights_fn: A function from labels to weights. @@ -1433,6 +1435,12 @@ def padded_cross_entropy(logits, loss_numerator: a `Scalar`. Sum of losses. loss_denominator: a `Scalar. The number of non-padding target tokens. """ + if isinstance(logits, FactoredTensor): + return padded_cross_entropy_factored(logits, + labels, + label_smoothing, + weights_fn=weights_fn, + reduce_sum=reduce_sum) confidence = 1.0 - label_smoothing vocab_size = tf.shape(logits)[-1] with tf.name_scope("padded_cross_entropy", [logits, labels]): @@ -1662,3 +1670,166 @@ def underlying_variable(t): for v in tf.global_variables()[len(var_index):]: var_index[v.name] = v return var_index[t.name] + + +def approximate_split(x, num_splits, axis=0): + """Split approximately equally into num_splits parts. + + Args: + x: a Tensor + num_splits: an integer + axis: an integer. + + Returns: + a list of num_splits Tensors. + """ + size = tf.shape(x)[axis] + size_splits = [ + tf.div(size + i, num_splits) for i in xrange(num_splits)] + return tf.split(x, size_splits, axis=axis) + + +class FactoredTensor(object): + """A concise factored representation of Tensor as two tensors. + + This class represents the tensor tf.matmul(a, b, transpose_b=True) + by storing the values of Tensors a and b. + + The reason for this is that the product may be too big to fully realize at + once, so it can be realized a part at a time. + + "a" may have extra leading dimensions, in which case they are flattened out + before computing the matrix product, then re-expanded afterwards. + """ + + def __init__(self, a, b): + self._a = a + self._b = b + + @property + def a(self): + return self._a + + @property + def b(self): + return self._b + + def to_tensor(self): + inner_dim = tf.shape(self.b)[1] + result_dim = tf.shape(self.b)[0] + flat_a = tf.reshape(self.a, [-1, inner_dim]) + product = tf.matmul(flat_a, self.b, transpose_b=True) + product_shape = tf.concat([tf.shape(self.a)[:-1], [result_dim]], 0) + product = tf.reshape(product, product_shape) + product.set_shape(self.a.get_shape().as_list()[:-1] + + [self.b.get_shape()[0]]) + return product + + +def _convert_factored_tensor_to_tensor(value, *args, **kwargs): + # call ops.convert_to_tensor to handle optional arguments appropriately + return ops.internal_convert_to_tensor(value.to_tensor(), *args, **kwargs) + + +tf.register_tensor_conversion_function(FactoredTensor, + _convert_factored_tensor_to_tensor) + + +def smoothing_cross_entropy_factored_grad(op, dy): + """Gradient function for smoothing_cross_entropy_factored.""" + a = op.inputs[0] + b = op.inputs[1] + labels = op.inputs[2] + confidence = op.inputs[3] + num_splits = 32 + vocab_size = tf.shape(b)[0] + labels = approximate_split(labels, num_splits) + a = approximate_split(a, num_splits) + dy = approximate_split(dy, num_splits) + b_grad = None + a_grad_parts = [] + deps = [] + for part in xrange(num_splits): + with tf.control_dependencies(deps): + logits = tf.matmul(a[part], b, transpose_b=True) + output_part = smoothing_cross_entropy( + logits, labels[part], vocab_size, confidence) + a_grad_part, b_grad_part = tf.gradients( + ys=[output_part], + xs=[a[part], b], + grad_ys=[dy[part]]) + a_grad_parts.append(a_grad_part) + if part > 0: + b_grad += b_grad_part + else: + b_grad = b_grad_part + deps = [b_grad, a_grad_part] + a_grad = tf.concat(a_grad_parts, 0) + return a_grad, b_grad, None, None + + +@function.Defun(noinline=True, + python_grad_func=smoothing_cross_entropy_factored_grad, + compiled=True, separate_compiled_gradients=True) +def smoothing_cross_entropy_factored( + a, b, labels, confidence): + """Memory-efficient computation of smoothing cross-entropy. + + Avoids realizing the entire logits matrix at once. + + Args: + a: a Tensor with shape [batch, inner_dim] + b: a Tensor with shape [vocab_size, inner_dim] + labels: an integer Tensor with shape [batch] + confidence: a float + + Returns: + A Tensor with shape [batch] + """ + num_splits = 32 + vocab_size = tf.shape(b)[0] + labels = approximate_split(labels, num_splits) + a = approximate_split(a, num_splits) + parts = [] + for part in xrange(num_splits): + with tf.control_dependencies(parts[-1:]): + logits = tf.matmul(a[part], b, transpose_b=True) + parts.append( + smoothing_cross_entropy(logits, labels[part], vocab_size, confidence)) + return tf.concat(parts, 0) + + +def padded_cross_entropy_factored(factored_logits, + labels, + label_smoothing, + weights_fn=weights_nonzero, + reduce_sum=True): + """Memory-efficient computation of smoothing cross-entropy. + + Avoids realizing the entire logits matrix at once. + + Args: + factored_logits: a `FactoredTensor` representing a Tensor + with shape `[batch, timesteps, vocab_size]`. + labels: an integer `Tensor` with shape `[batch, timesteps]`. + label_smoothing: a floating point `Scalar`. + weights_fn: A function from labels to weights. + reduce_sum: a Boolean, whether to sum at the end or not. + + Returns: + loss_numerator: a `Scalar`. Sum of losses. + loss_denominator: a `Scalar. The number of non-padding target tokens. + """ + a = factored_logits.a + b = factored_logits.b + confidence = 1.0 - label_smoothing + with tf.name_scope("padded_cross_entropy_factored", [a, b, labels]): + labels_flat = tf.reshape(labels, [-1]) + a_flat = tf.reshape(a, [-1, tf.shape(b)[1]]) + xent = smoothing_cross_entropy_factored( + a_flat, b, labels_flat, tf.convert_to_tensor(confidence)) + xent = tf.reshape(xent, tf.shape(labels)) + weights = weights_fn(labels) + if not reduce_sum: + return xent * weights, weights + return tf.reduce_sum(xent * weights), tf.reduce_sum(weights) diff --git a/tensor2tensor/layers/common_layers_test.py b/tensor2tensor/layers/common_layers_test.py index 3cf3f3374..f251ac5b8 100644 --- a/tensor2tensor/layers/common_layers_test.py +++ b/tensor2tensor/layers/common_layers_test.py @@ -392,6 +392,84 @@ def testRavanbakhshSetLayer(self): actual = session.run(layer) self.assertEqual(actual.shape, (5, 4, 32)) + def testPaddingCrossEntropyFactored(self): + vocab_size = 19 + rows = 5 + cols = 4 + depth = 11 + label_smoothing = 0.1 + features = np.random.rand(rows, cols, depth) + weights = np.random.rand(vocab_size, depth) + labels = np.random.randint(0, vocab_size - 1, size=(rows, cols)) + with self.test_session() as session: + features = tf.to_float(features) + weights = tf.to_float(weights) + labels = tf.to_int32(labels) + logits = tf.matmul(tf.reshape(features, [rows * cols, depth]), + weights, transpose_b=True) + logits = tf.reshape(logits, [rows, cols, vocab_size]) + loss_num, loss_den = common_layers.padded_cross_entropy( + logits, labels, label_smoothing=label_smoothing, reduce_sum=False) + factored_logits = common_layers.FactoredTensor(features, weights) + loss_num_f, loss_den_f = common_layers.padded_cross_entropy_factored( + factored_logits, labels=labels, label_smoothing=label_smoothing, + reduce_sum=False) + num, den, num_f, den_f = session.run( + [loss_num, loss_den, loss_num_f, loss_den_f]) + self.assertEqual(num.shape, (rows, cols)) + self.assertEqual(den.shape, (rows, cols)) + self.assertEqual(num_f.shape, (rows, cols)) + self.assertEqual(den_f.shape, (rows, cols)) + self.assertAllClose(num, num_f) + self.assertAllClose(den, den_f) + + def testPaddingCrossEntropyFactoredGrad(self): + vocab_size = 19 + rows = 5 + cols = 4 + depth = 11 + label_smoothing = 0.1 + features = np.random.rand(rows, cols, depth) + weights = np.random.rand(vocab_size, depth) + labels = np.random.randint(0, vocab_size - 1, size=(rows, cols)) + with self.test_session() as session: + features = tf.to_float(features) + weights = tf.to_float(weights) + labels = tf.to_int32(labels) + logits = tf.matmul(tf.reshape(features, [rows * cols, depth]), + weights, transpose_b=True) + logits = tf.reshape(logits, [rows, cols, vocab_size]) + loss_num, loss_den = common_layers.padded_cross_entropy( + logits, labels, label_smoothing=label_smoothing, reduce_sum=False) + factored_logits = common_layers.FactoredTensor(features, weights) + loss_num_factored, loss_den_factored = ( + common_layers.padded_cross_entropy_factored( + factored_logits, labels=labels, label_smoothing=label_smoothing, + reduce_sum=False)) + df, dw = tf.gradients(ys=[loss_num, loss_den], xs=[features, weights]) + df_factored, dw_factored = tf.gradients( + ys=[loss_num_factored, loss_den_factored], xs=[features, weights]) + actual_df, actual_dw, actual_df_factored, actual_dw_factored = ( + session.run([df, dw, df_factored, dw_factored])) + self.assertEqual(actual_df.shape, (rows, cols, depth)) + self.assertEqual(actual_dw.shape, (vocab_size, depth)) + self.assertEqual(actual_df_factored.shape, (rows, cols, depth)) + self.assertEqual(actual_dw_factored.shape, (vocab_size, depth)) + self.assertAllClose(actual_df, actual_df_factored) + self.assertAllClose(actual_dw, actual_dw_factored) + + def testFactoredTensorImplicitConversion(self): + a = np.random.rand(3, 4, 5) + b = np.random.rand(6, 5) + c = np.random.rand(3, 4, 6) + with self.test_session() as session: + # a factored representation of a Tensor of shape (3, 4, 6) + factored = common_layers.FactoredTensor(tf.to_float(a), tf.to_float(b)) + # implicitly converts factored to a Tensor (performing the matmul) + d = factored + tf.to_float(c) + out = session.run(d) + self.assertEqual(out.shape, (3, 4, 6)) + if __name__ == "__main__": tf.test.main() diff --git a/tensor2tensor/layers/modalities.py b/tensor2tensor/layers/modalities.py index 01728ba24..57652dbec 100644 --- a/tensor2tensor/layers/modalities.py +++ b/tensor2tensor/layers/modalities.py @@ -112,12 +112,18 @@ def top(self, body_output, _): reuse = False with tf.variable_scope(scope_name, reuse=reuse): var = self._get_weights() - shape = tf.shape(body_output)[:-1] - body_output = tf.reshape(body_output, [-1, self._body_input_depth]) - logits = tf.matmul(body_output, var, transpose_b=True) - logits = tf.reshape(logits, tf.concat([shape, [self._vocab_size]], 0)) - # insert a channels dimension - return tf.expand_dims(logits, 3) + if (self._model_hparams.factored_logits and + self._model_hparams.mode == tf.contrib.learn.ModeKeys.TRAIN): + # insert channels dimension + body_output = tf.expand_dims(body_output, 3) + logits = common_layers.FactoredTensor(body_output, var) + else: + shape = tf.shape(body_output)[:-1] + body_output = tf.reshape(body_output, [-1, self._body_input_depth]) + logits = tf.matmul(body_output, var, transpose_b=True) + logits = tf.reshape( + logits, tf.concat([shape, [1, self._vocab_size]], 0)) + return logits @registry.register_image_modality diff --git a/tensor2tensor/layers/modalities_test.py b/tensor2tensor/layers/modalities_test.py index 0ccd13777..5813422ab 100644 --- a/tensor2tensor/layers/modalities_test.py +++ b/tensor2tensor/layers/modalities_test.py @@ -65,7 +65,43 @@ def testSymbolModalityTargets(self): symbol_modality_num_shards=4, hidden_size=hidden_size, label_smoothing=0.2, - shared_embedding_and_softmax_weights=0) + shared_embedding_and_softmax_weights=0, + factored_logits=0, + mode=tf.contrib.learn.ModeKeys.TRAIN) + body_output = -1 + np.random.random_integers( + 100, size=(batch_size, length, height, hidden_size)) + targets = -1 + np.random.random_integers( + vocab_size, size=(batch_size, length, height, 1)) + m = modalities.SymbolModality(model_hparams, vocab_size) + data_parallelism = expert_utils.Parallelism( + ["/device:CPU:0"] * num_datashards, reuse=True) + with self.test_session() as session: + sharded_body_output = tf.split(tf.to_float(body_output), num_datashards) + sharded_targets = tf.split(targets, num_datashards) + sharded_logits = m.top_sharded(sharded_body_output, sharded_targets, + data_parallelism) + train_loss = m.loss_sharded(sharded_logits, sharded_targets, + data_parallelism) + logits = tf.concat(sharded_logits, 0) + session.run(tf.global_variables_initializer()) + res1, res2 = session.run((logits, train_loss)) + self.assertEqual(res1.shape, (batch_size, length, height, 1, vocab_size)) + self.assertEqual(res2.shape, ()) + + def testSymbolModalityTargetsFactored(self): + batch_size = 10 + num_datashards = 5 + length = 6 + height = 7 + hidden_size = 9 + vocab_size = 11 + model_hparams = tf.contrib.training.HParams( + symbol_modality_num_shards=4, + hidden_size=hidden_size, + label_smoothing=0.2, + shared_embedding_and_softmax_weights=0, + factored_logits=1, + mode=tf.contrib.learn.ModeKeys.TRAIN) body_output = -1 + np.random.random_integers( 100, size=(batch_size, length, height, hidden_size)) targets = -1 + np.random.random_integers( From 6db9fa4536e23c9af140ca4d7725efe4ae1e7ef1 Mon Sep 17 00:00:00 2001 From: T2T Team <no-reply@google.com> Date: Wed, 23 Aug 2017 22:20:36 -0700 Subject: [PATCH 0306/4095] calling empty preprocessing sequence by name PiperOrigin-RevId: 166300528 --- tensor2tensor/layers/common_hparams.py | 12 ++++++++---- tensor2tensor/layers/common_layers.py | 2 ++ 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/tensor2tensor/layers/common_hparams.py b/tensor2tensor/layers/common_hparams.py index 0173ebb2c..d4751bb0d 100644 --- a/tensor2tensor/layers/common_hparams.py +++ b/tensor2tensor/layers/common_hparams.py @@ -79,13 +79,17 @@ def basic_params1(): # Sequences of operations to perform on layer input and layer output. # Used by common_layers.layer_preprocess, common_layers.layer_postprocess # Each character repsesnts an operation: - # d: apply dropout - # n: apply normalization (see norm_type and norm_epsilon) - # a: add layer input (residual connection - only during postprocess) + # none: no preprocessing + # d: apply dropout + # n: apply normalization (see norm_type and norm_epsilon) + # a: add layer input (residual connection - only during postprocess) + # The special string "none" is used instead of the empty string + # to indicate no pre/postprocesisng, since the empty string causes + # trouble for hyperparameter tuning. # TODO(noam): The current settings ("", "dan") are the published version # of the transformer. ("n", "da") seems better for harder-to-learn # models, so it should probably be the default. - layer_preprocess_sequence="", + layer_preprocess_sequence="none", layer_postprocess_sequence="dan", # dropout rate to use during layer_preprocess and layer_postprocess layer_prepostprocess_dropout=0.1, diff --git a/tensor2tensor/layers/common_layers.py b/tensor2tensor/layers/common_layers.py index 53cb4581a..a0d70c6c8 100644 --- a/tensor2tensor/layers/common_layers.py +++ b/tensor2tensor/layers/common_layers.py @@ -519,6 +519,8 @@ def layer_prepostprocess(previous_value, a Tensor """ with tf.variable_scope(name): + if sequence == "none": + return x for c in sequence: if c == "a": x += previous_value From 8194a07eca8b03561692f2709167fe5463bdbe87 Mon Sep 17 00:00:00 2001 From: T2T Team <no-reply@google.com> Date: Thu, 24 Aug 2017 18:50:14 -0700 Subject: [PATCH 0307/4095] Fix bug in TokenTextEncoder where saving and loading a vocab wouldn't produce the same vocab, and minor fix handling of reserved tokens in SubwordTextEncoder. PiperOrigin-RevId: 166425490 --- .../data_generators/generator_utils.py | 25 +++++- tensor2tensor/data_generators/text_encoder.py | 85 ++++++++++++++++--- .../data_generators/text_encoder_test.py | 46 ++++++++++ 3 files changed, 139 insertions(+), 17 deletions(-) diff --git a/tensor2tensor/data_generators/generator_utils.py b/tensor2tensor/data_generators/generator_utils.py index eadca9bd6..3e1086d37 100644 --- a/tensor2tensor/data_generators/generator_utils.py +++ b/tensor2tensor/data_generators/generator_utils.py @@ -301,9 +301,24 @@ def gunzip_file(gz_path, new_path): def get_or_generate_vocab_inner(data_dir, vocab_filename, vocab_size, generator_fn): - """Inner implementation for vocab generators.""" - vocab_filepath = os.path.join(data_dir, vocab_filename) - if tf.gfile.Exists(vocab_filepath): + """Inner implementation for vocab generators. + + Args: + data_dir: The base directory where data and vocab files are stored. If None, + then do not save the vocab even if it doesn't exist. + vocab_filename: relative filename where vocab file is stored + vocab_size: target size of the vocabulary constructed by SubwordTextEncoder + generator_fn: a generator that produces tokens from the vocabulary + + Returns: + A SubwordTextEncoder vocabulary object. + """ + if data_dir is None: + vocab_filepath = None + else: + vocab_filepath = os.path.join(data_dir, vocab_filename) + + if vocab_filepath is not None and tf.gfile.Exists(vocab_filepath): tf.logging.info("Found vocab file: %s", vocab_filepath) vocab = text_encoder.SubwordTextEncoder(vocab_filepath) return vocab @@ -316,7 +331,9 @@ def get_or_generate_vocab_inner(data_dir, vocab_filename, vocab_size, vocab = text_encoder.SubwordTextEncoder.build_to_target_size( vocab_size, token_counts, 1, 1e3) - vocab.store_to_file(vocab_filepath) + + if vocab_filepath is not None: + vocab.store_to_file(vocab_filepath) return vocab diff --git a/tensor2tensor/data_generators/text_encoder.py b/tensor2tensor/data_generators/text_encoder.py index ec43efe22..a6093b7ce 100644 --- a/tensor2tensor/data_generators/text_encoder.py +++ b/tensor2tensor/data_generators/text_encoder.py @@ -160,7 +160,22 @@ def __init__(self, reverse=False, vocab_list=None, num_reserved_ids=NUM_RESERVED_TOKENS): - """Initialize from a file or list, one token per line.""" + """Initialize from a file or list, one token per line. + + Handling of reserved tokens works as follows: + - When initializing from a list, we add reserved tokens to the vocab. + - When initializing from a file, we do not add reserved tokens to the vocab. + - When saving vocab files, we save reserved tokens to the file. + + Args: + vocab_filename: If not None, the full filename to read vocab from. If this + is not None, then vocab_list should be None. + reverse: Boolean indicating if tokens should be reversed during encoding + and decoding. + vocab_list: If not None, a list of elements of the vocabulary. If this is + not None, then vocab_filename should be None. + num_reserved_ids: Number of IDs to save for reserved tokens like <EOS>. + """ super(TokenTextEncoder, self).__init__(num_reserved_ids=num_reserved_ids) self._reverse = reverse if vocab_filename: @@ -186,30 +201,47 @@ def _safe_id_to_token(self, idx): return self._id_to_token.get(idx, "ID_%d" % idx) def _init_vocab_from_file(self, filename): - """Load vocab from a file.""" + """Load vocab from a file. + Args: + filename: The file to load vocabulary from. + """ def token_gen(): with tf.gfile.Open(filename) as f: for line in f: token = line.strip() yield token - self._init_vocab(token_gen()) + self._init_vocab(token_gen(), add_reserved_tokens=False) def _init_vocab_from_list(self, vocab_list): + """Initialize tokens from a list of tokens. + It is ok if reserved tokens appear in the vocab list. They will be + removed. The set of tokens in vocab_list should be unique. + + Args: + vocab_list: A list of tokens. + """ def token_gen(): for token in vocab_list: - yield token + if token not in RESERVED_TOKENS: + yield token self._init_vocab(token_gen()) - def _init_vocab(self, token_generator): + def _init_vocab(self, token_generator, add_reserved_tokens=True): """Initialize vocabulary with tokens from token_generator.""" - # Initialize with reserved tokens - self._id_to_token = dict(enumerate(RESERVED_TOKENS)) + + self._id_to_token = {} + non_reserved_start_index = 0 + + if add_reserved_tokens: + self._id_to_token.update(enumerate(RESERVED_TOKENS)) + non_reserved_start_index = len(RESERVED_TOKENS) + self._id_to_token.update( - enumerate(token_generator, start=len(RESERVED_TOKENS))) + enumerate(token_generator, start=non_reserved_start_index)) # _token_to_id is the reverse of _id_to_token self._token_to_id = dict((v, k) @@ -222,7 +254,7 @@ def store_to_file(self, filename): tokens are written to the vocab file as well. Args: - filename: full path of the file to store the vocab to. + filename: Full path of the file to store the vocab to. """ with tf.gfile.Open(filename, "w") as f: for i in xrange(len(self._id_to_token)): @@ -311,7 +343,12 @@ class SubwordTextEncoder(TextEncoder): """ def __init__(self, filename=None): - """Initialize and read from a file, if provided.""" + """Initialize and read from a file, if provided. + + Args: + filename: filename from which to read vocab. If None, do not load a + vocab + """ self._alphabet = set() if filename is not None: self._load_from_file(filename) @@ -565,8 +602,26 @@ def dump(self): for i, s in sorted(subtoken_strings))) def _init_subtokens_from_list(self, subtoken_strings, reserved=0): - """Initialize token information from a list of subtoken strings.""" - self._all_subtoken_strings = [u""] * reserved + subtoken_strings + """Initialize token information from a list of subtoken strings. + + Args: + subtoken_strings: a list of subtokens + reserved: number of spaces to save at the beginning for reserved tokens + + Raises: + ValueError: if reserved is not 0 or len(RESERVED_TOKENS). In this case, it + is not clear what the space is being reserved for, or when it will be + filled in. + """ + if reserved == 0: + self._all_subtoken_strings = subtoken_strings + elif reserved == len(RESERVED_TOKENS): + self._all_subtoken_strings = RESERVED_TOKENS + subtoken_strings + else: + # TODO(dtarlow): or should we fall back to the previous behavior and + # insert copies of "" for each reserved count? + raise ValueError("Unexpected value for reserved. What is being reserved?") + # we remember the maximum length of any subtoken to avoid having to # check arbitrarily long strings. self._max_subtoken_len = max([len(s) for s in subtoken_strings]) @@ -583,7 +638,11 @@ def _init_alphabet_from_tokens(self, tokens): self._alphabet |= _ESCAPE_CHARS def _load_from_file(self, filename): - """Load from a file.""" + """Load from a file. + + Args: + filename: filename to load vocabulary from + """ subtoken_strings = [] with tf.gfile.Open(filename) as f: for line in f: diff --git a/tensor2tensor/data_generators/text_encoder_test.py b/tensor2tensor/data_generators/text_encoder_test.py index 4142f8699..eadfcfb5e 100644 --- a/tensor2tensor/data_generators/text_encoder_test.py +++ b/tensor2tensor/data_generators/text_encoder_test.py @@ -21,6 +21,8 @@ from __future__ import unicode_literals import collections +import os +import shutil # Dependency imports import mock @@ -47,6 +49,50 @@ def test_unescape_token(self): 'Foo! Bar.\nunder_score back\\slash', unescaped) +class TokenTextEncoderTest(tf.test.TestCase): + + @classmethod + def setUpClass(cls): + """Make sure the test dir exists and is empty.""" + cls.test_temp_dir = os.path.join(tf.test.get_temp_dir(), 'encoder_test') + shutil.rmtree(cls.test_temp_dir, ignore_errors=True) + os.mkdir(cls.test_temp_dir) + + def test_save_and_reload(self): + """Test that saving and reloading doesn't change the vocab. + + Note that this test reads and writes to the filesystem, which necessitates + that this test size be "large". + """ + + corpus = 'A B C D E F G H I J K L M N O P Q R S T U V W X Y Z' + vocab_filename = os.path.join(self.test_temp_dir, 'abc.vocab') + + # Make text encoder from a list and store vocab to fake filesystem. + encoder = text_encoder.TokenTextEncoder(None, vocab_list=corpus.split()) + encoder.store_to_file(vocab_filename) + + # Load back the saved vocab file from the fake_filesystem. + new_encoder = text_encoder.TokenTextEncoder(vocab_filename) + + self.assertEqual(encoder._id_to_token, new_encoder._id_to_token) + self.assertEqual(encoder._token_to_id, new_encoder._token_to_id) + + def test_reserved_tokens_in_corpus(self): + """Test that we handle reserved tokens appearing in the corpus.""" + corpus = 'A B {} D E F {} G {}'.format(text_encoder.EOS, + text_encoder.EOS, + text_encoder.PAD) + + encoder = text_encoder.TokenTextEncoder(None, vocab_list=corpus.split()) + + all_tokens = encoder._id_to_token.values() + + # If reserved tokens are removed correctly, then the set of tokens will + # be unique. + self.assertEqual(len(all_tokens), len(set(all_tokens))) + + class SubwordTextEncoderTest(tf.test.TestCase): def test_encode_decode(self): From bde34993b31e46ff151f21e3a7580b7a92889632 Mon Sep 17 00:00:00 2001 From: Lukasz Kaiser <lukaszkaiser@google.com> Date: Thu, 24 Aug 2017 19:07:48 -0700 Subject: [PATCH 0308/4095] Illustrate how to call T2T models from raw TF session, small cleanup. PiperOrigin-RevId: 166426686 --- tensor2tensor/utils/model_builder.py | 13 ++++--- tensor2tensor/utils/trainer_utils.py | 28 ++++++++++---- tensor2tensor/utils/trainer_utils_test.py | 46 +++++++++++++++++++++++ 3 files changed, 73 insertions(+), 14 deletions(-) diff --git a/tensor2tensor/utils/model_builder.py b/tensor2tensor/utils/model_builder.py index fdc4226d0..e16a99772 100644 --- a/tensor2tensor/utils/model_builder.py +++ b/tensor2tensor/utils/model_builder.py @@ -226,9 +226,13 @@ def nth_model(n): ops.append( loss_moving_avg.assign(loss_moving_avg * 0.9 + loss_value * 0.1)) total_loss += loss_value - with tf.variable_scope(tf.get_variable_scope(), reuse=True): - # Total loss was already constructed on input. - loss_moving_avg = tf.get_variable("problem_%d/total_loss" % n) + try: # Total loss avg might be reused or not, we try both. + with tf.variable_scope(tf.get_variable_scope(), reuse=True): + # Total loss was already constructed on input. + loss_moving_avg = tf.get_variable("problem_%d/total_loss" % n) + except ValueError: + loss_moving_avg = tf.get_variable("problem_%d/total_loss" % n, + initializer=100.0, trainable=False) ops.append( loss_moving_avg.assign(loss_moving_avg * 0.9 + total_loss * 0.1)) with tf.variable_scope("train_stats"): # Count steps for this problem. @@ -266,9 +270,6 @@ def nth_model(n): sharded_logits, total_loss = result_list[1:], result_list[0] if mode == tf.contrib.learn.ModeKeys.EVAL: logits = tf.concat(sharded_logits, 0) - if FLAGS.eval_print: - logits = tf.Print( - logits, [features["inputs"], logits], "EVAL PRINT", summarize=10000) # For evaluation, return the logits layer as our predictions. run_info["predictions"] = logits train_op = None diff --git a/tensor2tensor/utils/trainer_utils.py b/tensor2tensor/utils/trainer_utils.py index 09375b79a..fa9d9233e 100644 --- a/tensor2tensor/utils/trainer_utils.py +++ b/tensor2tensor/utils/trainer_utils.py @@ -62,7 +62,6 @@ flags.DEFINE_integer("train_steps", 250000, "The number of steps to run training for.") flags.DEFINE_integer("eval_steps", 10, "Number of steps in evaluation.") -flags.DEFINE_bool("eval_print", False, "Print eval logits and predictions.") flags.DEFINE_bool("eval_run_autoregressive", False, "Run eval autoregressively where we condition on previous" "generated output instead of the actual target.") @@ -70,6 +69,13 @@ "How many recent checkpoints to keep.") flags.DEFINE_bool("experimental_optimize_placement", False, "Optimize ops placement with experimental session options.") +flags.DEFINE_integer("keep_checkpoint_every_n_hours", 10000, + "Number of hours between each checkpoint to be saved. " + "The default value 10,000 hours effectively disables it.") +flags.DEFINE_integer("save_checkpoints_secs", 0, + "Save checkpoints every this many seconds. " + "Default=0 means let tensorflow.contrib.learn.python.learn" + " decide, which is currently set to 600 = 10 minutes.") # Distributed training flags flags.DEFINE_string("master", "", "Address of TensorFlow master.") @@ -143,7 +149,8 @@ def experiment_fn(output_dir): def create_experiment(output_dir, data_dir, model_name, train_steps, eval_steps): """Create Experiment.""" - hparams = create_hparams(FLAGS.hparams_set, data_dir) + hparams = create_hparams(FLAGS.hparams_set, FLAGS.problems, data_dir, + passed_hparams=FLAGS.hparams) estimator, input_fns = create_experiment_components( hparams=hparams, output_dir=output_dir, @@ -196,14 +203,17 @@ def create_experiment_components(hparams, output_dir, data_dir, model_name): worker_replicas=FLAGS.worker_replicas, worker_id=FLAGS.worker_id) estimator = tf.contrib.learn.Estimator( - model_fn=model_builder.build_model_fn(model_name, hparams=hparams), + model_fn=model_builder.build_model_fn(model_name, hparams), model_dir=output_dir, config=tf.contrib.learn.RunConfig( master=FLAGS.master, model_dir=output_dir, gpu_memory_fraction=FLAGS.worker_gpu_memory_fraction, session_config=session_config(), - keep_checkpoint_max=FLAGS.keep_checkpoint_max)) + keep_checkpoint_max=FLAGS.keep_checkpoint_max, + keep_checkpoint_every_n_hours=FLAGS.keep_checkpoint_every_n_hours, + save_checkpoints_secs=FLAGS.save_checkpoints_secs)) + # Store the hparams in the estimator as well estimator.hparams = hparams return estimator, { @@ -239,7 +249,7 @@ def add_problem_hparams(hparams, problems): return hparams -def create_hparams(params_id, data_dir): +def create_hparams(params_id, problems, data_dir, passed_hparams=None): """Returns hyperparameters, including any flag value overrides. If the hparams FLAG is set, then it will use any values specified in @@ -248,7 +258,9 @@ def create_hparams(params_id, data_dir): Args: params_id: which set of parameters to choose (must be in _PARAMS above). + problems: the string with problem names to get problem_hparams from. data_dir: the directory containing the training data. + passed_hparams: command-line overrides for some hparams. Returns: The hyperparameters as a tf.contrib.training.HParams object. @@ -256,10 +268,10 @@ def create_hparams(params_id, data_dir): hparams = registry.hparams(params_id)() hparams.add_hparam("data_dir", data_dir) # Command line flags override any of the preceding hyperparameter values. - if FLAGS.hparams: - hparams = hparams.parse(FLAGS.hparams) + if passed_hparams: + hparams = hparams.parse(passed_hparams) - return add_problem_hparams(hparams, FLAGS.problems) + return add_problem_hparams(hparams, problems) def run(data_dir, model, output_dir, train_steps, eval_steps, schedule): diff --git a/tensor2tensor/utils/trainer_utils_test.py b/tensor2tensor/utils/trainer_utils_test.py index 568026ad6..6cc654d26 100644 --- a/tensor2tensor/utils/trainer_utils_test.py +++ b/tensor2tensor/utils/trainer_utils_test.py @@ -27,6 +27,7 @@ from tensor2tensor.data_generators import algorithmic from tensor2tensor.data_generators import generator_utils from tensor2tensor.models import transformer +from tensor2tensor.utils import model_builder from tensor2tensor.utils import registry from tensor2tensor.utils import trainer_utils @@ -92,6 +93,51 @@ def testSingleStep(self): eval_steps=1) exp.test() + def testSingleEvalStepRawSession(self): + """Illustrate how to run a T2T model in a raw session.""" + + # Set model name, hparams, problems as would be set on command line. + model_name = "transformer" + FLAGS.hparams_set = "transformer_test" + FLAGS.problems = "tiny_algo" + data_dir = "/tmp" # Used only when a vocab file or such like is needed. + + # Create the problem object, hparams, model_fn, placeholders, features dict. + encoders = registry.problem(FLAGS.problems).feature_encoders(data_dir) + hparams = trainer_utils.create_hparams( + FLAGS.hparams_set, FLAGS.problems, data_dir) + model_fn = model_builder.build_model_fn(model_name, hparams) + inputs_ph = tf.placeholder(dtype=tf.int32) # Just length dimension. + batch_inputs = tf.reshape(inputs_ph, [1, -1, 1, 1]) # Make it 4D. + targets_ph = tf.placeholder(dtype=tf.int32) # Just length dimension. + batch_targets = tf.reshape(targets_ph, [1, -1, 1, 1]) # Make it 4D. + features = {"inputs": batch_inputs, + "problem_choice": 0, # We run on the first problem here. + "input_space_id": hparams.problems[0].input_space_id, + "target_space_id": hparams.problems[0].target_space_id} + + # Now set a mode and create the graph by invoking model_fn. + mode = tf.contrib.learn.ModeKeys.EVAL + predictions_dict, _, _ = model_fn( # In INFER mode targets can be None. + features, batch_targets, mode) + predictions = tf.squeeze( # These are not images, axis=2,3 are not needed. + predictions_dict["predictions"], axis=[2, 3]) + + # Having the graph, let's run it on some data. + with self.test_session() as sess: + sess.run(tf.global_variables_initializer()) + inputs = "0 1 0" + targets = "0 1 0" + # Encode from raw string to numpy input array using problem encoders. + inputs_numpy = encoders["inputs"].encode(inputs) + targets_numpy = encoders["targets"].encode(targets) + # Feed the encoded inputs and targets and run session. + feed = {inputs_ph: inputs_numpy, targets_ph: targets_numpy} + np_predictions = sess.run(predictions, feed) + # Check that the result has the correct shape: batch x length x vocab_size + # where, for us, batch = 1, length = 3, vocab_size = 4. + self.assertEqual(np_predictions.shape, (1, 3, 4)) + if __name__ == "__main__": tf.test.main() From e75c1183031ccb313071bc4d62f8a91e7dffaccf Mon Sep 17 00:00:00 2001 From: Noam Shazeer <noam@google.com> Date: Fri, 25 Aug 2017 14:09:48 -0700 Subject: [PATCH 0309/4095] Decrease loss for small batches to defend against the new dataset API. PiperOrigin-RevId: 166523059 --- tensor2tensor/utils/model_builder.py | 30 +++++++++++++++++++++++----- 1 file changed, 25 insertions(+), 5 deletions(-) diff --git a/tensor2tensor/utils/model_builder.py b/tensor2tensor/utils/model_builder.py index e16a99772..24c17ca9e 100644 --- a/tensor2tensor/utils/model_builder.py +++ b/tensor2tensor/utils/model_builder.py @@ -168,6 +168,9 @@ def model_fn(features, targets, mode): dp = devices.data_parallelism() + tf.get_variable_scope().set_initializer(initializer()) + is_training = mode == tf.contrib.learn.ModeKeys.TRAIN + # Add input statistics for incoming features. with tf.name_scope("input_stats"): for (k, v) in six.iteritems(features): @@ -175,13 +178,28 @@ def model_fn(features, targets, mode): tf.summary.scalar("%s_batch" % k, tf.shape(v)[0] // dp.n) tf.summary.scalar("%s_length" % k, tf.shape(v)[1]) nonpadding = tf.to_float(tf.not_equal(v, 0)) - tf.summary.scalar("%s_nonpadding_tokens" % k, - tf.reduce_sum(nonpadding)) + nonpadding_tokens = tf.reduce_sum(nonpadding) + if k == "targets": + targets_nonpadding_tokens = nonpadding_tokens + tf.summary.scalar("%s_nonpadding_tokens" % k, nonpadding_tokens) tf.summary.scalar("%s_nonpadding_fraction" % k, tf.reduce_mean(nonpadding)) - tf.get_variable_scope().set_initializer(initializer()) - train = mode == tf.contrib.learn.ModeKeys.TRAIN + # The new data reader occasionally emits very small batches, which + # cause the examples in those batches to be grossly overweighted. + # We decrease the loss proportionally to the ratio of the size of this + # batch to the size of the largest training batch ever. + # TODO(noam): to be more sophisticated, we could keep separate + # maxima based on problem choice. + max_nonpadding_var = tf.get_variable( + "max_nonpadding", shape=[], + initializer=tf.ones_initializer(), trainable=False) + max_nonpadding = tf.maximum(max_nonpadding_var, targets_nonpadding_tokens) + if is_training: + with tf.control_dependencies( + [tf.assign(max_nonpadding_var, max_nonpadding)]): + small_batch_multiplier = targets_nonpadding_tokens / max_nonpadding + tf.summary.scalar("small_batch_multiplier", small_batch_multiplier) # Get multi-problem logits and loss based on features["problem_choice"]. loss_variable_names = [] @@ -204,7 +222,7 @@ def nth_model(n): alpha=FLAGS.decode_alpha, decode_length=FLAGS.decode_extra_length) # In distributed mode, we build graph for problem=0 and problem=worker_id. - skipping_is_on = my_hp.problem_choice == "distributed" and train + skipping_is_on = my_hp.problem_choice == "distributed" and is_training problem_worker_id = FLAGS.worker_id % len(my_hp.problems) skip_this_one = n != 0 and n % FLAGS.worker_replicas != problem_worker_id # On worker 0 also build graph for problems <= 1. @@ -324,6 +342,8 @@ def nth_model(n): total_loss = tf.identity(total_loss) if my_hp.weight_decay > 0.0: total_loss += weight_decay_loss * my_hp.weight_decay + if is_training: + total_loss *= small_batch_multiplier total_loss = tf.identity(total_loss, name="total_loss") log_variable_sizes(tf.trainable_variables(), "Trainable Variables") diet_vars = [v for v in tf.global_variables() if hasattr(v, "optimizer")] From 92c1fa6df02ff4219b942cce2b7a71473f311519 Mon Sep 17 00:00:00 2001 From: Lukasz Kaiser <lukaszkaiser@google.com> Date: Fri, 25 Aug 2017 14:36:14 -0700 Subject: [PATCH 0310/4095] Corrections for the Dataset API, play with VAEs. PiperOrigin-RevId: 166526788 --- tensor2tensor/data_generators/text_encoder.py | 24 ++++--- tensor2tensor/models/transformer_vae.py | 70 ++++++++++++++----- tensor2tensor/utils/data_reader.py | 46 +++++++++++- 3 files changed, 109 insertions(+), 31 deletions(-) diff --git a/tensor2tensor/data_generators/text_encoder.py b/tensor2tensor/data_generators/text_encoder.py index a6093b7ce..c8a3bd1f9 100644 --- a/tensor2tensor/data_generators/text_encoder.py +++ b/tensor2tensor/data_generators/text_encoder.py @@ -56,19 +56,21 @@ _ESCAPE_CHARS = set(u"\\_u;0123456789") -def native_to_unicode_py2(s): - """Python 2: transform native string to Unicode.""" - return s if isinstance(s, unicode) else s.decode("utf8") +# Conversion between Unicode and UTF-8, if required (on Python2). +if six.PY2: + def native_to_unicode(s): + return s if isinstance(s, unicode) else s.decode("utf8") -# Conversion between Unicode and UTF-8, if required (on Python2) -if six.PY2: - native_to_unicode = native_to_unicode_py2 - unicode_to_native = lambda s: s.encode("utf-8") -else: - # No conversion required on Python3 - native_to_unicode = lambda s: s - unicode_to_native = lambda s: s + def unicode_to_native(s): + return s.encode("utf-8") +else: # No conversion required on Python >= 3. + + def native_to_unicode(s): + return s + + def unicode_to_native(s): + return s class TextEncoder(object): diff --git a/tensor2tensor/models/transformer_vae.py b/tensor2tensor/models/transformer_vae.py index 819fa8b9d..34f6a1c39 100644 --- a/tensor2tensor/models/transformer_vae.py +++ b/tensor2tensor/models/transformer_vae.py @@ -97,13 +97,13 @@ def dvae(x, hparams, name): # Gumbel-softmax sample. gumbel_samples = gumbel_sample(tf.shape(m)) steps = hparams.kl_warmup_steps - gumbel_samples *= common_layers.inverse_exp_decay(steps) - temperature = 1.01 - common_layers.inverse_lin_decay(steps) + gumbel_samples *= common_layers.inverse_exp_decay(steps) * 0.1 + temperature = 1.2 - common_layers.inverse_lin_decay(steps) s = tf.nn.softmax((logsm + gumbel_samples) / temperature) m = tf.nn.softmax(m) kl = - tf.reduce_max(logsm, axis=-1) tf.summary.histogram("max-log", tf.reshape(kl, [-1])) - return m, tf.reduce_mean(kl), s + return m, s, tf.reduce_mean(kl) def vae(x, hparams, name): @@ -118,6 +118,27 @@ def vae(x, hparams, name): return z, tf.reduce_mean(kl), mu, log_sigma +def nearest(x, means, hparams): + """Find the nearest means to elements in x.""" + x, means = tf.stop_gradient(x), tf.stop_gradient(means) + x_flat = tf.reshape(x, [-1, hparams.hidden_size]) + # dist = tf.reduce_sum(tf.square(x_flat - tf.expand_dims(means, 0)), axis=2) + dist = - tf.matmul(x_flat, means, transpose_b=True) + _, nearest_idx = tf.nn.top_k(- dist, k=1) + nearest_hot = tf.one_hot(tf.squeeze(nearest_idx, axis=1), hparams.v_size) + nearest_hot = tf.reshape(nearest_hot, [tf.shape(x)[0], tf.shape(x)[1], + 1, hparams.v_size]) + return tf.stop_gradient(nearest_hot) + + +def kmeans(x, means, hparams, name): + with tf.variable_scope(name): + x_means_hot = nearest(x, means, hparams) + x_means = tf.gather(means, tf.argmax(x_means_hot, axis=-1)) + kl = tf.reduce_sum(tf.square(x - x_means), axis=-1) + return x_means_hot, x_means_hot, tf.reduce_mean(kl) * 100.0 + + def compress(x, c, hparams, name): """Compress.""" with tf.variable_scope(name): @@ -157,14 +178,17 @@ def encode(x, x_space, hparams, name): encoder_input, encoder_self_attention_bias, hparams), ed -def decode(cond_vec, gold, c, ed, hparams): +def decode(cond_vec, cond_add, gold, c, ed, hparams): + """Transformer decoder.""" drop_gold = tf.nn.dropout(gold, 1.0 - hparams.layer_prepostprocess_dropout) - drop_gold += cond_vec decoder_input = common_layers.shift_left(drop_gold, pad_value=cond_vec) + if cond_add is not None: + decoder_input += cond_add decoder_input = tf.squeeze(decoder_input, axis=2) decoder_input = common_attention.add_timing_signal_1d(decoder_input) bias = common_attention.attention_bias_lower_triangle(tf.shape(gold)[1]) - c = tf.squeeze(c, axis=2) + if c is not None: + c = tf.squeeze(c, axis=2) return transformer.transformer_decoder(decoder_input, c, bias, ed, hparams) @@ -187,13 +211,18 @@ def vae_compress(x, c, ed, hparams, compress_name, decompress_name, reuse=None): # Convolve and ReLu to get state. cur = common_layers.conv_block( cur, hparams.hidden_size, [((1, 1), (1, 1))], name="mid_conv") + cur = tf.nn.l2_normalize(cur, dim=3) + means = tf.get_variable("z_to_dense", [hparams.v_size, hparams.hidden_size]) + means = tf.nn.l2_normalize(means, dim=1) # z, kl_loss, mu, log_sigma = vae(cur, hparams, name="vae") - z_true, kl_loss, z_gumbel = dvae(cur, hparams, name="dvae") + # z_true, z_sample, kl_loss = dvae(cur, hparams, name="dvae") + z_true, z_sample, kl_loss = kmeans(cur, means, hparams, name="kmeans") # Compress context. with tf.variable_scope(compress_name, reuse=reuse): compress_c = compress(c, None, hparams, "compress_context") - c_z = tf.layers.dense(compress_c, hparams.v_size, name="mask_context") + dec_c = decode(None, compress_c, cur, None, None, hparams) + c_z = tf.layers.dense(dec_c, hparams.v_size, name="mask_context") reconstruct_loss = tf.nn.softmax_cross_entropy_with_logits( labels=z_true, logits=c_z) @@ -203,11 +232,17 @@ def vae_compress(x, c, ed, hparams, compress_name, decompress_name, reuse=None): with tf.variable_scope(decompress_name, reuse=reuse): # Decompress. - z = tf.layers.dense(z_gumbel, hparams.hidden_size, name="z_to_dense") + z_sample_flat = tf.reshape(z_sample, [-1, hparams.v_size]) + z = tf.matmul(z_sample_flat, means) + z = tf.reshape(z, [tf.shape(z_sample)[0], tf.shape(z_sample)[1], + 1, hparams.hidden_size]) # Leak at the beginning to help train. z = mix(z, cur, hparams.startup_steps) + # Dropout for better autoencoding. + z = tf.nn.dropout(z, keep_prob=0.9) + # Decompress. d = z for i in xrange(hparams.num_compress_steps): @@ -217,11 +252,12 @@ def vae_compress(x, c, ed, hparams, compress_name, decompress_name, reuse=None): k = 2**hparams.num_compress_steps z_batch = tf.reshape(z, [-1, 1, 1, hparams.hidden_size]) - x_batch = tf.reshape(x + d, [-1, k, 1, hparams.hidden_size]) - # dec_batch = decode(z_batch, x_batch, None, None, hparams) + x_batch = tf.reshape(x, [-1, k, 1, hparams.hidden_size]) + d_batch = tf.reshape(d, [-1, k, 1, hparams.hidden_size]) + # dec_batch = decode(z_batch, d_batch, x_batch, None, None, hparams) c = expand_batch(c, tf.shape(x_batch)[0] / tf.shape(x)[0]) ed = expand_batch(ed, tf.shape(x_batch)[0] / tf.shape(x)[0]) - dec_batch = decode(z_batch, x_batch, c, ed, hparams) + dec_batch = decode(z_batch, d_batch, x_batch, c, ed, hparams) z = tf.reshape(dec_batch, [-1, tf.shape(x)[1], 1, hparams.hidden_size]) return z, kl_loss, reconstruct_loss @@ -252,7 +288,7 @@ def vae_transformer_internal(inputs, targets, target_space, hparams): z, kl, r = vae_compress(tf.expand_dims(targets, axis=2), tf.expand_dims(inputs, axis=2), ed_bias, hparams, "vae_compress", "vae_decompress") - kl *= common_layers.inverse_exp_decay(int(hparams.startup_steps * 2.0))*0.5 + kl *= common_layers.inverse_exp_decay(int(hparams.startup_steps * 2.0)) r *= common_layers.inverse_exp_decay(int(hparams.startup_steps * 2.0)) losses = {"kl": kl, "reconstruction": r} return z, losses @@ -290,7 +326,7 @@ def infer(self, features=None, decode_length=50, beam_size=1, top_beams=1, samples = tf.concat(sharded_samples, 0) # More steps. - how_many_more_steps = 20 + how_many_more_steps = 2 for _ in xrange(how_many_more_steps): with tf.variable_scope(tf.get_variable_scope(), reuse=True): features["targets"] = samples @@ -311,10 +347,10 @@ def transformer_vae_small(): hparams.batch_size = 2048 hparams.learning_rate_warmup_steps = 4000 hparams.add_hparam("z_size", 128) - hparams.add_hparam("v_size", 1024*8) + hparams.add_hparam("v_size", 1024*32) hparams.add_hparam("num_compress_steps", 4) - hparams.add_hparam("kl_warmup_steps", 120000) - hparams.add_hparam("startup_steps", 20000) + hparams.add_hparam("kl_warmup_steps", 60000) + hparams.add_hparam("startup_steps", 30000) return hparams diff --git a/tensor2tensor/utils/data_reader.py b/tensor2tensor/utils/data_reader.py index 199de7a79..dbbd8e936 100644 --- a/tensor2tensor/utils/data_reader.py +++ b/tensor2tensor/utils/data_reader.py @@ -18,6 +18,7 @@ from __future__ import division from __future__ import print_function +import fractions import math import os import random @@ -271,7 +272,9 @@ def input_pipeline(problem, data_file_pattern, capacity, mode, hparams, dataset = bucket_by_sequence_length(dataset, _example_length, batching_scheme["boundaries"], batching_scheme["batch_sizes"]) - + max_batch_size = max(batching_scheme["batch_sizes"]) + # We reshuffle the batches to prevent many long-sequence batches at once. + dataset = dataset.shuffle(max_batch_size * 3) batched_examples = dataset.make_one_shot_iterator().get_next() return batched_examples @@ -305,6 +308,36 @@ def _example_too_big(example, max_length): return tf.less_equal(_example_length(example), max_length) +def _lcm(l): + """Least common multiple of integers in a list.""" + if not l: + raise ValueError("LCD of an empty list.") + if len(l) == 1: + return l[0] + x = l[0] + y = _lcm(l[1:]) + return x * y // fractions.gcd(x, y) + + +def _closest_small_primes(x): + """Closest number to x which has only 2, 3, 5 as prime factors, 3,5 once.""" + assert x > 0 + def is_small_primes(x, covered3, covered5): + if x % 2 == 0: + return is_small_primes(x // 2, covered3, covered5) + if x % 3 == 0 and not covered3: + return is_small_primes(x // 3, True, covered5) + if x % 5 == 0 and not covered5: + return is_small_primes(x // 5, covered3, True) + return x == 1 + for i in xrange(x): + if is_small_primes(x - i, False, False): + return x - i + # We search for higher numbers too, but only 8 of them to not increase much. + if i < 9 and is_small_primes(x + i, False, False): + return x + i + + def bucket_by_sequence_length(dataset, example_length_fn, bucket_boundaries, bucket_batch_sizes): """Bucket entries in dataset by length. @@ -319,6 +352,14 @@ def bucket_by_sequence_length(dataset, example_length_fn, bucket_boundaries, Returns: Dataset of padded and batched examples. """ + # Since the Datasets API only allows a single constant for window_size, + # and it needs divide all bucket_batch_sizes, we first make sure they only + # have a few primes in them so that their LCM doesn't explode quickly. + # TODO(lukaszkaiser): remove this adjustment when Dataset API improves. + bucket_batch_sizes1 = [_closest_small_primes(b) for b in bucket_batch_sizes] + tf.logging.info("Corrected bucket_batch_sizes from %s to %s." + % (str(bucket_batch_sizes), str(bucket_batch_sizes1))) + bucket_batch_sizes = bucket_batch_sizes1 with tf.name_scope("bucket_by_seq_length"): def example_to_bucket_id(example): @@ -345,8 +386,7 @@ def batching_fn(bucket_id, grouped_dataset): for name, shape in grouped_dataset.output_shapes.items()]) return grouped_dataset.padded_batch(batch_size, padded_shapes) - window_size = max( - max(bucket_batch_sizes) * 10, len(bucket_batch_sizes) * 32) + window_size = _lcm(bucket_batch_sizes) dataset = dataset.group_by_window(example_to_bucket_id, batching_fn, window_size) return dataset From 8e7a5d9d64672b800971861214bc83870918358f Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Fri, 25 Aug 2017 14:38:41 -0700 Subject: [PATCH 0311/4095] Make diet variables more generic PiperOrigin-RevId: 166527108 --- tensor2tensor/layers/common_layers.py | 184 ++++++-- tensor2tensor/layers/common_layers_test.py | 93 +++- tensor2tensor/layers/rev_block.py | 93 +--- tensor2tensor/layers/rev_block_test.py | 77 ---- tensor2tensor/models/attention_lm_moe.py | 17 +- tensor2tensor/utils/diet.py | 487 ++++++++++----------- tensor2tensor/utils/diet_test.py | 70 +++ 7 files changed, 565 insertions(+), 456 deletions(-) create mode 100644 tensor2tensor/utils/diet_test.py diff --git a/tensor2tensor/layers/common_layers.py b/tensor2tensor/layers/common_layers.py index a0d70c6c8..ad899bfbf 100644 --- a/tensor2tensor/layers/common_layers.py +++ b/tensor2tensor/layers/common_layers.py @@ -18,7 +18,10 @@ from __future__ import division from __future__ import print_function +from collections import defaultdict +import contextlib import math +import random # Dependency imports @@ -486,14 +489,8 @@ def apply_norm(x, norm_type, depth, epsilon): "'noam', 'none'.") -def layer_prepostprocess(previous_value, - x, - sequence, - dropout_rate, - norm_type, - depth, - epsilon, - name): +def layer_prepostprocess(previous_value, x, sequence, dropout_rate, norm_type, + depth, epsilon, name): """Apply a sequence of functions to the input or output of a layer. The sequence is specified as a string which may contain the following @@ -556,7 +553,8 @@ def layer_preprocess(layer_input, hparams): assert "a" not in hparams.layer_preprocess_sequence, ( "No residual connections allowed in hparams.layer_preprocess_sequence") return layer_prepostprocess( - None, layer_input, + None, + layer_input, sequence=hparams.layer_preprocess_sequence, dropout_rate=hparams.layer_prepostprocess_dropout, norm_type=hparams.norm_type, @@ -588,7 +586,8 @@ def layer_postprocess(layer_input, layer_output, hparams): a Tensor """ return layer_prepostprocess( - layer_input, layer_output, + layer_input, + layer_output, sequence=hparams.layer_postprocess_sequence, dropout_rate=hparams.layer_prepostprocess_dropout, norm_type=hparams.norm_type, @@ -1438,11 +1437,12 @@ def padded_cross_entropy(logits, loss_denominator: a `Scalar. The number of non-padding target tokens. """ if isinstance(logits, FactoredTensor): - return padded_cross_entropy_factored(logits, - labels, - label_smoothing, - weights_fn=weights_fn, - reduce_sum=reduce_sum) + return padded_cross_entropy_factored( + logits, + labels, + label_smoothing, + weights_fn=weights_fn, + reduce_sum=reduce_sum) confidence = 1.0 - label_smoothing vocab_size = tf.shape(logits)[-1] with tf.name_scope("padded_cross_entropy", [logits, labels]): @@ -1637,6 +1637,37 @@ def ravanbakhsh_set_layer(layer_size, name=name) +def fn_device_dependency_dict(): + """State container for fn_device_dependency.""" + if not hasattr(tf.get_default_graph(), "dependency_dict"): + setattr(tf.get_default_graph(), "dependency_dict", defaultdict(list)) + return tf.get_default_graph().dependency_dict + + +@contextlib.contextmanager +def fn_device_dependency(name, device=""): + """Add control deps for name and device.""" + key = name + "_" + device + outs = [] + + def body(): + with tf.control_dependencies(fn_device_dependency_dict()[key]): + yield outs + assert outs + + deps = outs + if isinstance(outs[0], list) or isinstance(outs[0], tuple): + assert len(outs) == 1 + deps = outs[0] + fn_device_dependency_dict()[key] = deps + + if device: + with tf.device(device): + return body() + else: + return body() + + def underlying_variable_ref(t): """Find the underlying variable ref, ignoring Identity ops. @@ -1686,8 +1717,7 @@ def approximate_split(x, num_splits, axis=0): a list of num_splits Tensors. """ size = tf.shape(x)[axis] - size_splits = [ - tf.div(size + i, num_splits) for i in xrange(num_splits)] + size_splits = [tf.div(size + i, num_splits) for i in xrange(num_splits)] return tf.split(x, size_splits, axis=axis) @@ -1723,8 +1753,8 @@ def to_tensor(self): product = tf.matmul(flat_a, self.b, transpose_b=True) product_shape = tf.concat([tf.shape(self.a)[:-1], [result_dim]], 0) product = tf.reshape(product, product_shape) - product.set_shape(self.a.get_shape().as_list()[:-1] - + [self.b.get_shape()[0]]) + product.set_shape(self.a.get_shape().as_list()[:-1] + + [self.b.get_shape()[0]]) return product @@ -1754,12 +1784,10 @@ def smoothing_cross_entropy_factored_grad(op, dy): for part in xrange(num_splits): with tf.control_dependencies(deps): logits = tf.matmul(a[part], b, transpose_b=True) - output_part = smoothing_cross_entropy( - logits, labels[part], vocab_size, confidence) + output_part = smoothing_cross_entropy(logits, labels[part], vocab_size, + confidence) a_grad_part, b_grad_part = tf.gradients( - ys=[output_part], - xs=[a[part], b], - grad_ys=[dy[part]]) + ys=[output_part], xs=[a[part], b], grad_ys=[dy[part]]) a_grad_parts.append(a_grad_part) if part > 0: b_grad += b_grad_part @@ -1770,11 +1798,12 @@ def smoothing_cross_entropy_factored_grad(op, dy): return a_grad, b_grad, None, None -@function.Defun(noinline=True, - python_grad_func=smoothing_cross_entropy_factored_grad, - compiled=True, separate_compiled_gradients=True) -def smoothing_cross_entropy_factored( - a, b, labels, confidence): +@function.Defun( + noinline=True, + python_grad_func=smoothing_cross_entropy_factored_grad, + compiled=True, + separate_compiled_gradients=True) +def smoothing_cross_entropy_factored(a, b, labels, confidence): """Memory-efficient computation of smoothing cross-entropy. Avoids realizing the entire logits matrix at once. @@ -1828,10 +1857,103 @@ def padded_cross_entropy_factored(factored_logits, with tf.name_scope("padded_cross_entropy_factored", [a, b, labels]): labels_flat = tf.reshape(labels, [-1]) a_flat = tf.reshape(a, [-1, tf.shape(b)[1]]) - xent = smoothing_cross_entropy_factored( - a_flat, b, labels_flat, tf.convert_to_tensor(confidence)) + xent = smoothing_cross_entropy_factored(a_flat, b, labels_flat, + tf.convert_to_tensor(confidence)) xent = tf.reshape(xent, tf.shape(labels)) weights = weights_fn(labels) if not reduce_sum: return xent * weights, weights return tf.reduce_sum(xent * weights), tf.reduce_sum(weights) + + +def fn_with_custom_grad(grad_fn, use_global_vars=False): + """Decorator to create a subgraph with a custom gradient function. + + The subgraph created by the decorated function is NOT put in a Defun and so + does not suffer from the limitations of the Defun (all subgraph ops on the + same device, no summaries). + + Args: + grad_fn: function with signature + (inputs, variables, outputs, output_grads) -> (grad_inputs, grad_vars), + all of which are lists of Tensors. + use_global_vars: if True, variables will be the global variables created. + If False, will be the trainable variables. + + Returns: + Decorator for function such that the gradient is defined by grad_fn. + """ + + def dec(fn): + + def wrapped(*args): + return _fn_with_custom_grad( + fn, args, grad_fn, use_global_vars=use_global_vars) + + return wrapped + + return dec + + +def _fn_with_custom_grad(fn, inputs, grad_fn, use_global_vars=False): + """Create a subgraph with a custom gradient. + + Args: + fn: function that takes inputs as arguments and produces 1 or more Tensors. + inputs: list<Tensor>, will be passed as fn(*inputs). + grad_fn: function with signature + (inputs, vars, outputs, output_grads) -> (grad_inputs, grad_vars), + all of which are lists of Tensors. + use_global_vars: if True, variables will be the global variables created. + If False, will be the trainable variables. + + Returns: + fn(*inputs) + """ + with tf.variable_scope(None, default_name="fn_with_custom_grad") as vs: + inputs = list(inputs) + outputs = fn(*inputs) + if use_global_vars: + train_vars = list(vs.global_variables()) + else: + train_vars = list(vs.trainable_variables()) + + if grad_fn is None: + return outputs + else: + if not (isinstance(outputs, tuple) or isinstance(outputs, list)): + outputs = [outputs] + outputs = list(outputs) + + in_types = [t.dtype for t in inputs] + out_types = [t.dtype for t in outputs] + var_types = [t.dtype for t in train_vars] + + def custom_grad_fn(op, *dys): + """Custom grad fn applying grad_fn for identity Defun.""" + dys = list(dys) + fn_inputs = op.inputs[:len(inputs)] + fn_vars = op.inputs[len(inputs):len(inputs) + len(train_vars)] + fn_outputs = op.inputs[len(inputs) + len(train_vars):] + assert len(fn_outputs) == len(outputs) + assert len(fn_outputs) == len(dys) + + grad_inputs, grad_vars = grad_fn(fn_inputs, fn_vars, fn_outputs, dys) + grad_outputs = [None] * len(fn_outputs) + return tuple(grad_inputs + grad_vars + grad_outputs) + + # The Defun takes as input the original inputs, the trainable variables + # created in fn, and the outputs. In the forward it passes through the + # outputs. In the backwards, it produces gradients for the original inputs + # and the trainable variables. + @function.Defun( + *(in_types + var_types + out_types), + func_name="identity_custom_grad%d" % random.randint(1, 10**9), + python_grad_func=custom_grad_fn, + shape_func=lambda _: [t.get_shape() for t in outputs]) + def identity(*args): + outs = args[len(inputs) + len(train_vars):] + return tuple([tf.identity(t) for t in outs]) + + id_out = identity(*(inputs + train_vars + outputs)) + return id_out diff --git a/tensor2tensor/layers/common_layers_test.py b/tensor2tensor/layers/common_layers_test.py index f251ac5b8..61023938f 100644 --- a/tensor2tensor/layers/common_layers_test.py +++ b/tensor2tensor/layers/common_layers_test.py @@ -405,14 +405,16 @@ def testPaddingCrossEntropyFactored(self): features = tf.to_float(features) weights = tf.to_float(weights) labels = tf.to_int32(labels) - logits = tf.matmul(tf.reshape(features, [rows * cols, depth]), - weights, transpose_b=True) + logits = tf.matmul( + tf.reshape(features, [rows * cols, depth]), weights, transpose_b=True) logits = tf.reshape(logits, [rows, cols, vocab_size]) loss_num, loss_den = common_layers.padded_cross_entropy( logits, labels, label_smoothing=label_smoothing, reduce_sum=False) factored_logits = common_layers.FactoredTensor(features, weights) loss_num_f, loss_den_f = common_layers.padded_cross_entropy_factored( - factored_logits, labels=labels, label_smoothing=label_smoothing, + factored_logits, + labels=labels, + label_smoothing=label_smoothing, reduce_sum=False) num, den, num_f, den_f = session.run( [loss_num, loss_den, loss_num_f, loss_den_f]) @@ -436,15 +438,17 @@ def testPaddingCrossEntropyFactoredGrad(self): features = tf.to_float(features) weights = tf.to_float(weights) labels = tf.to_int32(labels) - logits = tf.matmul(tf.reshape(features, [rows * cols, depth]), - weights, transpose_b=True) + logits = tf.matmul( + tf.reshape(features, [rows * cols, depth]), weights, transpose_b=True) logits = tf.reshape(logits, [rows, cols, vocab_size]) loss_num, loss_den = common_layers.padded_cross_entropy( logits, labels, label_smoothing=label_smoothing, reduce_sum=False) factored_logits = common_layers.FactoredTensor(features, weights) loss_num_factored, loss_den_factored = ( common_layers.padded_cross_entropy_factored( - factored_logits, labels=labels, label_smoothing=label_smoothing, + factored_logits, + labels=labels, + label_smoothing=label_smoothing, reduce_sum=False)) df, dw = tf.gradients(ys=[loss_num, loss_den], xs=[features, weights]) df_factored, dw_factored = tf.gradients( @@ -471,5 +475,82 @@ def testFactoredTensorImplicitConversion(self): self.assertEqual(out.shape, (3, 4, 6)) +class FnWithCustomGradTest(tf.test.TestCase): + + def testCorrectness(self): + + w = tf.random_uniform([6, 10]) + + def fn(a, b, c): + return tf.layers.dense( + a, + 10, + use_bias=False, + kernel_initializer=lambda shape, dtype, partition_info: w + ) + tf.matmul(b, c) + + def grad_fn(inputs, variables, outputs, grad_outputs): + outputs = outputs[0] + grad_outputs = grad_outputs[0] + grad_inputs = tf.gradients(outputs, inputs, grad_ys=grad_outputs) + grad_vars = tf.gradients(outputs, variables, grad_ys=grad_outputs) + return grad_inputs, grad_vars + + custom_fn = common_layers.fn_with_custom_grad(grad_fn)(fn) + + a = tf.random_uniform([11, 6]) + b = tf.random_uniform([11, 7]) + c = tf.random_uniform([7, 10]) + + out = fn(a, b, c) + custom_out = custom_fn(a, b, c) + self.assertEqual(out.get_shape().as_list(), + custom_out.get_shape().as_list()) + + loss = tf.reduce_mean(out) + custom_loss = tf.reduce_mean(custom_out) + + grads = tf.gradients(loss, [a, b, c] + [tf.trainable_variables()[0]]) + custom_grads = tf.gradients(custom_loss, + [a, b, c] + [tf.trainable_variables()[1]]) + + with self.test_session() as sess: + sess.run(tf.global_variables_initializer()) + out_val, custom_out_val, grads_val, custom_grads_val = sess.run( + [out, custom_out, grads, custom_grads]) + self.assertAllClose(out_val, custom_out_val) + for g1, g2 in zip(grads_val, custom_grads_val): + self.assertAllClose(g1, g2) + + def testCustomGrad(self): + + def fn(a, b, c): + return tf.layers.dense(a, 10, use_bias=False) + tf.matmul(b, c) + + def grad_fn(inputs, variables, unused_outputs, unused_grad_outputs): + grad_inputs = [tf.ones_like(t) * (i + 1.) for i, t in enumerate(inputs)] + grad_vars = [ + tf.ones_like(t) * (i + len(inputs) + 1.) + for i, t in enumerate(variables) + ] + return grad_inputs, grad_vars + + a = tf.random_uniform([11, 6]) + b = tf.random_uniform([11, 7]) + c = tf.random_uniform([7, 10]) + w = tf.random_uniform([6, 10]) + out = common_layers.fn_with_custom_grad(grad_fn)(fn)(a, b, c) + loss = tf.reduce_mean(out) + grads = tf.gradients(loss, [a, b, c, tf.trainable_variables()[0]]) + expected_grads = [ + tf.ones_like(t) * (i + 1.) for i, t in enumerate([a, b, c, w]) + ] + with self.test_session() as sess: + sess.run(tf.global_variables_initializer()) + g_val, eg_val = sess.run([grads, expected_grads]) + for g1, g2 in zip(g_val, eg_val): + self.assertAllClose(g1, g2) + + if __name__ == "__main__": tf.test.main() diff --git a/tensor2tensor/layers/rev_block.py b/tensor2tensor/layers/rev_block.py index 6ca0c676f..9def9f481 100644 --- a/tensor2tensor/layers/rev_block.py +++ b/tensor2tensor/layers/rev_block.py @@ -23,16 +23,14 @@ from __future__ import division from __future__ import print_function -import random import re # Dependency imports from six.moves import xrange # pylint: disable=redefined-builtin -from tensor2tensor.layers.common_layers import underlying_variable_ref +from tensor2tensor.layers import common_layers import tensorflow as tf -from tensorflow.python.framework import function LAYER_RE = re.compile(".*revlayer_([0-9]*)/([fg])/.*") @@ -151,91 +149,6 @@ def _rev_block_forward(x1, return y1, y2 -def fn_with_custom_grad(grad_fn): - """Decorator to create a subgraph with a custom gradient function. - - The subgraph created by the decorated function is NOT put in a Defun and so - does not suffer from the limitations of the Defun (all subgraph ops on the - same device, no summaries). - - Args: - grad_fn: function with signature - (inputs, variables, outputs, output_grads) -> (grad_inputs, grad_vars), - all of which are lists of Tensors. - - Returns: - Decorator for function such that the gradient is defined by grad_fn. - """ - - def dec(fn): - - def wrapped(*args): - return _fn_with_custom_grad(fn, args, grad_fn) - - return wrapped - - return dec - - -def _fn_with_custom_grad(fn, inputs, grad_fn): - """Create a subgraph with a custom gradient. - - Args: - fn: function that takes inputs as arguments and produces 1 or more Tensors. - inputs: list<Tensor>, will be passed as fn(*inputs). - grad_fn: function with signature - (inputs, vars, outputs, output_grads) -> (grad_inputs, grad_vars), - all of which are lists of Tensors. - - Returns: - fn(*inputs) - """ - with tf.variable_scope(None, default_name="fn_with_custom_grad") as vs: - inputs = list(inputs) - outputs = fn(*inputs) - train_vars = list(vs.trainable_variables()) - - if grad_fn is None: - return outputs - else: - if not (isinstance(outputs, tuple) or isinstance(outputs, list)): - outputs = [outputs] - outputs = list(outputs) - - in_types = [t.dtype for t in inputs] - out_types = [t.dtype for t in outputs] - var_types = [t.dtype for t in train_vars] - - def custom_grad_fn(op, *dys): - """Custom grad fn applying grad_fn for identity Defun.""" - dys = list(dys) - fn_inputs = op.inputs[:len(inputs)] - fn_vars = op.inputs[len(inputs):len(inputs) + len(train_vars)] - fn_outputs = op.inputs[len(inputs) + len(train_vars):] - assert len(fn_outputs) == len(outputs) - assert len(fn_outputs) == len(dys) - - grad_inputs, grad_vars = grad_fn(fn_inputs, fn_vars, fn_outputs, dys) - grad_outputs = [None] * len(fn_outputs) - return tuple(grad_inputs + grad_vars + grad_outputs) - - # The Defun takes as input the original inputs, the trainable variables - # created in fn, and the outputs. In the forward it passes through the - # outputs. In the backwards, it produces gradients for the original inputs - # and the trainable variables. - @function.Defun( - *(in_types + var_types + out_types), - func_name="identity_custom_grad%d" % random.randint(1, 10**9), - python_grad_func=custom_grad_fn, - shape_func=lambda _: [t.get_shape() for t in outputs]) - def identity(*args): - outs = args[len(inputs) + len(train_vars):] - return tuple([tf.identity(t) for t in outs]) - - id_out = identity(*(inputs + train_vars + outputs)) - return id_out - - def rev_block(x1, x2, f, @@ -321,7 +234,7 @@ def custom_grad_fn(inputs, variables, ys, grad_ys): g_vars_idxs = [[] for _ in range(num_layers)] for i, t in enumerate(variables): - ref = underlying_variable_ref(t) + ref = common_layers.underlying_variable_ref(t) # Use the name to identify the layer number and function (f or g) regex = LAYER_RE.match(ref.name) @@ -387,7 +300,7 @@ def custom_grad_fn(inputs, variables, ys, grad_ys): return [grad_x1, grad_x2] + side_input_grads, variable_grads # Need a forward function with positional arguments - @fn_with_custom_grad(custom_grad_fn if is_training else None) + @common_layers.fn_with_custom_grad(custom_grad_fn if is_training else None) def forward(x1, x2, *side_inputs): f_side = side_inputs[:len(f_side_input)] g_side = side_inputs[len(f_side_input):] diff --git a/tensor2tensor/layers/rev_block_test.py b/tensor2tensor/layers/rev_block_test.py index a668ff726..5aecc8ea3 100644 --- a/tensor2tensor/layers/rev_block_test.py +++ b/tensor2tensor/layers/rev_block_test.py @@ -137,82 +137,5 @@ def f(x): self._testRevBlock(x=x, f=f) -class FnWithCustomGradTest(tf.test.TestCase): - - def testCorrectness(self): - - w = tf.random_uniform([6, 10]) - - def fn(a, b, c): - return tf.layers.dense( - a, - 10, - use_bias=False, - kernel_initializer=lambda shape, dtype, partition_info: w - ) + tf.matmul(b, c) - - def grad_fn(inputs, variables, outputs, grad_outputs): - outputs = outputs[0] - grad_outputs = grad_outputs[0] - grad_inputs = tf.gradients(outputs, inputs, grad_ys=grad_outputs) - grad_vars = tf.gradients(outputs, variables, grad_ys=grad_outputs) - return grad_inputs, grad_vars - - custom_fn = rev_block.fn_with_custom_grad(grad_fn)(fn) - - a = tf.random_uniform([11, 6]) - b = tf.random_uniform([11, 7]) - c = tf.random_uniform([7, 10]) - - out = fn(a, b, c) - custom_out = custom_fn(a, b, c) - self.assertEqual(out.get_shape().as_list(), - custom_out.get_shape().as_list()) - - loss = tf.reduce_mean(out) - custom_loss = tf.reduce_mean(custom_out) - - grads = tf.gradients(loss, [a, b, c] + [tf.trainable_variables()[0]]) - custom_grads = tf.gradients(custom_loss, - [a, b, c] + [tf.trainable_variables()[1]]) - - with self.test_session() as sess: - sess.run(tf.global_variables_initializer()) - out_val, custom_out_val, grads_val, custom_grads_val = sess.run( - [out, custom_out, grads, custom_grads]) - self.assertAllClose(out_val, custom_out_val) - for g1, g2 in zip(grads_val, custom_grads_val): - self.assertAllClose(g1, g2) - - def testCustomGrad(self): - - def fn(a, b, c): - return tf.layers.dense(a, 10, use_bias=False) + tf.matmul(b, c) - - def grad_fn(inputs, variables, unused_outputs, unused_grad_outputs): - grad_inputs = [tf.ones_like(t) * (i + 1.) for i, t in enumerate(inputs)] - grad_vars = [ - tf.ones_like(t) * (i + len(inputs) + 1.) - for i, t in enumerate(variables) - ] - return grad_inputs, grad_vars - - a = tf.random_uniform([11, 6]) - b = tf.random_uniform([11, 7]) - c = tf.random_uniform([7, 10]) - w = tf.random_uniform([6, 10]) - out = rev_block.fn_with_custom_grad(grad_fn)(fn)(a, b, c) - loss = tf.reduce_mean(out) - grads = tf.gradients(loss, [a, b, c, tf.trainable_variables()[0]]) - expected_grads = [ - tf.ones_like(t) * (i + 1.) for i, t in enumerate([a, b, c, w]) - ] - with self.test_session() as sess: - sess.run(tf.global_variables_initializer()) - g_val, eg_val = sess.run([grads, expected_grads]) - for g1, g2 in zip(g_val, eg_val): - self.assertAllClose(g1, g2) - - if __name__ == "__main__": tf.test.main() diff --git a/tensor2tensor/models/attention_lm_moe.py b/tensor2tensor/models/attention_lm_moe.py index 6a5f6a0b3..5bb63c303 100644 --- a/tensor2tensor/models/attention_lm_moe.py +++ b/tensor2tensor/models/attention_lm_moe.py @@ -79,12 +79,14 @@ def postprocess(x, y): moe_hidden_sizes = [int(s) for s in hparams.moe_hidden_sizes.split(",")] if hparams.diet_experts: hsize, = moe_hidden_sizes - diet_optimizer = diet.DietAdamOptimizer(diet.diet_adam_optimizer_params()) - expert_fn = lambda x: diet.diet_expert(x, hsize, diet_optimizer) + + def _diet_expert(x): + return diet.diet_expert(x, hsize, diet.diet_adam_optimizer_params()) + + expert_fn = _diet_expert else: expert_fn = expert_utils.ffn_expert_fn( - hparams.hidden_size, moe_hidden_sizes, - hparams.hidden_size) + hparams.hidden_size, moe_hidden_sizes, hparams.hidden_size) for layer in xrange(hparams.num_hidden_layers): with tf.variable_scope("layer_%d" % layer): with tf.variable_scope( @@ -114,7 +116,7 @@ def postprocess(x, y): attention_kq_size=hparams.attention_kq_size, attention_v_size=hparams.attention_v_size) # TODO(avaswani, epot, noam): Do we need to divide by num shards ? - extra_loss += tf.add_n(loss)/dp.n + extra_loss += tf.add_n(loss) / dp.n else: raise ValueError("Only {} supported for now.".format( AttentionMoeType.get_choices())) @@ -158,9 +160,8 @@ def attention_lm_moe_prepare_decoder(targets, hparams): to implement masked attention and possibly baises for diagonal alignments """ if hparams.prepend_mode == "prepend_inputs_full_attention": - decoder_self_attention_bias = ( - common_attention.attention_bias_prepended( - common_attention.embedding_to_padding(targets))) + decoder_self_attention_bias = (common_attention.attention_bias_prepended( + common_attention.embedding_to_padding(targets))) else: decoder_self_attention_bias = ( common_attention.attention_bias_lower_triangle(tf.shape(targets)[1])) diff --git a/tensor2tensor/utils/diet.py b/tensor2tensor/utils/diet.py index 73077ea72..4ff44de5b 100644 --- a/tensor2tensor/utils/diet.py +++ b/tensor2tensor/utils/diet.py @@ -13,25 +13,21 @@ # See the License for the specific language governing permissions and # limitations under the License. -"""Diet varaibles are much more memory-efficient than regular variables. +"""Diet variables are much more memory-efficient than regular variables. Using diet variables, we can reduce memory overhead per parameter from 16 bytes to 2 bytes, allowing for up to 4B parameters per GPU. -This is an idea by rsepassi about how make this more generally useful. -with diet_variable_scope(diet_options=opts): - custom variable getter that creates vars with diet_options - per variable have fn that does the optimization acc to diet_options -@forward_with_diet_backwards fn decorator +Functions that build subgraphs with variables can be made to use diet variables +by using the fn_with_diet_vars decorator. """ - from collections import defaultdict +import copy import math # Dependency imports -from tensor2tensor.layers.common_layers import underlying_variable +from tensor2tensor.layers import common_layers import tensorflow as tf -from tensorflow.python.framework import function def diet_adam_optimizer_params(): @@ -43,19 +39,66 @@ def diet_adam_optimizer_params(): return tf.contrib.training.HParams( quantize=int(True), # use 16-bit fixed-point quantization_scale=10.0 / tf.int16.max, - optimizer="factored_adam", + optimizer="DietAdam", learning_rate=1.0, learning_rate_warmup_steps=2000, learning_rate_decay_scheme="noam", # "noam" or "none" epsilon=1e-10, beta1=0.0, # we can save memory if beta1=0 beta2=0.98, - randomized_updates=int(True), # use unbiased roundoff in updates factored_second_moment_accumulator=int(True), # this saves memory ) -class DietAdamOptimizer(object): +def diet_expert(x, hidden_size, params): + """A two-layer feed-forward network with relu activation on hidden layer. + + Uses diet variables. + Recompuets hidden layer on backprop to save activation memory. + + Args: + x: a Tensor with shape [batch, io_size] + hidden_size: an integer + params: a diet variable HParams object. + + Returns: + a Tensor with shape [batch, io_size] + """ + + @fn_with_diet_vars(params) + def diet_expert_internal(x): + dim = x.get_shape().as_list()[-1] + h = tf.layers.dense(x, hidden_size, activation=tf.nn.relu, use_bias=False) + y = tf.layers.dense(h, dim, use_bias=False) + y *= tf.rsqrt(tf.to_float(dim * hidden_size)) + return y + + return diet_expert_internal(x) + + +class DietVariableOptimizer(object): + """Base class for Diet variable optimizers.""" + + def __init__(self, params): + self._params = params + self._global_step = tf.train.get_or_create_global_step() + + @property + def params(self): + return self._params + + @property + def global_step(self): + return self._global_step + + def create_slots(self, var): + raise NotImplementedError() + + def update_variable(self, var, grad_var): + raise NotImplementedError() + + +class DietAdamOptimizer(DietVariableOptimizer): """A memory efficient optimizer for memory-efficient variables. We employ the following techniques: @@ -97,265 +140,221 @@ class DietAdamOptimizer(object): outside. """ - def __init__(self, hparams): - """Create a DietAdamOptimizer. + def create_slots(self, var): + """Create the factorized Adam accumulators for diet variables.""" + params = self.params + shape = var.get_shape().as_list() - Args: - hparams: a hyperparameters object - see diet_adam_optimizer_params() - """ - self._hparams = hparams - self._global_step = tf.to_float( - tf.contrib.framework.get_global_step()) + 1.0 - self._initializer_dependencies = defaultdict(list) + if not hasattr(params, "slots"): + params.slots = defaultdict(dict) - @property - def dtype(self): - """The data type used for the variables.""" - return tf.float16 if self._hparams.quantize else tf.float32 - - def get_variable(self, name, shape): - """Create a diet variable. - - Args: - name: a string - shape: a list of integers - - Returns: - a variable - """ - var = tf.get_variable( - name, shape, trainable=False, - dtype=self.dtype, - initializer=self._initializer()) - self._create_slots(var, name) - var.optimizer = self - return var - - def _create_slots(self, var, name): - """Create auxiliary slots for a variable. - - Args: - var: a tf.Variable - name: a string - """ - hparams = self._hparams - shape = var.get_shape().as_list() - if hparams.factored_second_moment_accumulator and len(shape) == 2: - var.adam_vr = tf.get_variable( - name + "_adam_vr", [shape[0], 1], trainable=False, + name = var.op.name + slots = params.slots[name] + + if params.factored_second_moment_accumulator and len(shape) == 2: + slots["adam_vr"] = tf.get_variable( + name + "_adam_vr", [shape[0], 1], + trainable=False, initializer=tf.zeros_initializer()) - var.adam_vc = tf.get_variable( - name + "_adam_vc", [1, shape[1]], trainable=False, + slots["adam_vc"] = tf.get_variable( + name + "_adam_vc", [1, shape[1]], + trainable=False, initializer=tf.zeros_initializer()) else: - var.adam_v = tf.get_variable( - name + "_adam_v", shape, trainable=False, + slots["adam_v"] = tf.get_variable( + name + "_adam_v", + shape, + trainable=False, initializer=tf.zeros_initializer()) - if hparams.beta1 != 0.0: - var.adam_m = tf.get_variable( - name + "_adam_m", shape, trainable=False, + if params.beta1 != 0.0: + slots["adam_m"] = tf.get_variable( + name + "_adam_m", + shape, + trainable=False, initializer=tf.zeros_initializer()) - def _quantize(self, x, randomize=True): - """Quantize to tf.int16, then bitcast to tf.float16. - - The reason for float16 is that for some reason, tensorflow refuses to put - integer variables on gpu. - - Args: - x: a Tensor of type tf.float32 - randomize: a boolean - - Returns: - a Tensor of type tf.float16 - """ - hparams = self._hparams - if not hparams.quantize: - return x - if not randomize: - return tf.bitcast( - tf.cast(x / hparams.quantization_scale, tf.int16), tf.float16) - abs_x = tf.abs(x) - sign_x = tf.sign(x) - y = abs_x / hparams.quantization_scale - y = tf.floor(y + tf.random_uniform(tf.shape(x))) - y = tf.minimum(y, tf.int16.max) * sign_x - q = tf.bitcast(tf.cast(y, tf.int16), tf.float16) - return q - - def dequantize(self, q): - """Approximate inverse of _quantize(). - - Args: - q: a Tensor with type tf.float16 + def update_variable(self, var, grad_var): + """Update the variable and its slots.""" + params = self.params + global_step = tf.to_float(self.global_step) + 1 - Returns: - a Tensor with type tf.float32 - """ - hparams = self._hparams - if hparams.quantize: - return tf.to_float(tf.bitcast(q, tf.int16)) * hparams.quantization_scale - else: - return q - - def _initializer(self): - """Returns an initializer function. - - Returns: - a function - """ - hparams = self._hparams - device = tf.constant(1.0).device - def _initializer(shape, dtype=self.dtype, partition_info=None): - assert dtype == self.dtype - del partition_info - # make sure no two initializers run simultaneously (to conserve memory) - with tf.control_dependencies(self._initializer_dependencies[device]): - float_range = math.sqrt(3) - ret = tf.random_uniform(shape, -float_range, float_range) - if hparams.quantize: - ret = self._quantize(ret, randomize=False) - self._initializer_dependencies[device] = [ret] - return ret - return _initializer - - def update(self, var, grad): - """Update a diet varaible given a gradient. - - Args: - var: a variable - grad: a Tensor - - Returns: - an update op. Make sure that something depends on this - op if you want it to run. - """ - hparams = self._hparams - var = underlying_variable(var) # compute learning rate - lrate = hparams.learning_rate - if hparams.learning_rate_decay_scheme == "noam": - lrate *= tf.minimum( - self._global_step * hparams.learning_rate_warmup_steps ** -1.5, - self._global_step ** -0.5) + lrate = params.learning_rate + if params.learning_rate_decay_scheme == "noam": + lrate *= tf.minimum(global_step * params.learning_rate_warmup_steps**-1.5, + global_step**-0.5) else: - assert hparams.learning_rate_decay_scheme == "none" - lrate *= tf.minumum( - self._global_step / hparams.learning_rate_warmup_steps, 1.0) + assert params.learning_rate_decay_scheme == "none" + lrate *= tf.minumum(global_step / params.learning_rate_warmup_steps, 1.0) + # compute adjustment due to second moment - grad_squared = tf.square(grad) - beta2_pow = tf.pow(hparams.beta2, self._global_step) - if hparams.factored_second_moment_accumulator and len(var.shape) == 2: - vr_update = tf.assign( - var.adam_vr, - var.adam_vr * hparams.beta2 + - tf.reduce_mean(grad_squared, 1, keep_dims=True) * - (1.0 - hparams.beta2)) - vc_update = tf.assign( - var.adam_vc, - var.adam_vc * hparams.beta2 + - tf.reduce_mean(grad_squared, 0, keep_dims=True) * - (1.0 - hparams.beta2)) + slots = params.slots[var.op.name] + grad_squared = tf.square(grad_var) + beta2_pow = tf.pow(params.beta2, global_step) + if params.factored_second_moment_accumulator and len(var.shape) == 2: + vr_update = tf.assign(slots["adam_vr"], slots["adam_vr"] * params.beta2 + + tf.reduce_mean(grad_squared, 1, keep_dims=True) * + (1.0 - params.beta2)) + vc_update = tf.assign(slots["adam_vc"], slots["adam_vc"] * params.beta2 + + tf.reduce_mean(grad_squared, 0, keep_dims=True) * + (1.0 - params.beta2)) with tf.control_dependencies([vr_update, vc_update]): - vr = tf.sqrt(var.adam_vr / (1.0 - beta2_pow)) + hparams.epsilon - vc = tf.sqrt(var.adam_vc / (1.0 - beta2_pow)) + hparams.epsilon + vr = tf.sqrt(slots["adam_vr"] / (1.0 - beta2_pow)) + params.epsilon + vc = tf.sqrt(slots["adam_vc"] / (1.0 - beta2_pow)) + params.epsilon vc /= tf.reduce_mean(vc) denom = vr * vc else: - v_update = tf.assign( - var.adam_v, - var.adam_v * hparams.beta2 + grad_squared * (1.0 - hparams.beta2)) + v_update = tf.assign(slots["adam_v"], + slots["adam_v"] * params.beta2 + grad_squared * + (1.0 - params.beta2)) with tf.control_dependencies([v_update]): - denom = tf.sqrt(var.adam_v / (1.0 - beta2_pow)) + hparams.epsilon + denom = tf.sqrt(slots["adam_v"] / (1.0 - beta2_pow)) + params.epsilon + # compute momentum if applicable - if hparams.beta1 != 0.0: - m_update = tf.assign( - var.adam_m, var.adam_m * hparams.beta1 + grad * (1.0 - hparams.beta1)) + if params.beta1 != 0.0: + m_update = tf.assign(slots["adam_m"], + slots["adam_m"] * params.beta1 + grad_var * + (1.0 - params.beta1)) with tf.control_dependencies([m_update]): - grad = var.adam_m - subtrahend = lrate * grad / denom - new_val = self._quantize(self.dequantize(var) - subtrahend) + grad_var = slots["adam_m"] + + # update var + subtrahend = lrate * grad_var / denom + new_val = _quantize(_dequantize(var, params) - subtrahend, params) return tf.assign(var, new_val) -def dependency_dict(): - """Get or create a defaultdict(list) that is stored in the default graph. +def _create_diet_optimizer(params): + if params.optimizer == "DietAdam": + return DietAdamOptimizer(params) + else: + raise ValueError("Unrecognized diet optimizer") - This is used when we want to make sure that certain operations are performed - sequentially. - example use - make sure calls to foo on the same device execute sequentially: +def _quantize(x, params, randomize=True): + """Quantize x according to params, optionally randomizing the rounding.""" + if not params.quantize: + return x - def foo(x, device) - key = "foo " + device - with tf.device(device): - with tf.control_dependencies(dependency_dict()[key]): - y = bar(x) - dependency_dict()[key] = y - return y + if not randomize: + return tf.bitcast( + tf.cast(x / params.quantization_scale, tf.int16), tf.float16) - Returns: - a defaultdict whose default value is the empty list - """ - if not hasattr(tf.get_default_graph(), "dependency_dict"): - setattr(tf.get_default_graph(), "dependency_dict", defaultdict(list)) - return tf.get_default_graph().dependency_dict - - -def _diet_expert_internal(x, w0, w1): - h = tf.matmul(x, w0) - h = tf.nn.relu(h) - y = tf.matmul(h, w1) - y *= tf.rsqrt(tf.to_float(tf.shape(w0)[0] * tf.shape(w1)[0])) - y.set_shape(x.get_shape()) - return y - - -def _diet_expert_grad(op, dy): - x, w0, w1 = op.inputs - w0_var = underlying_variable(w0) - w1_var = underlying_variable(w1) - key = "diet_expert_backward_deps " + dy.device - with tf.control_dependencies(dependency_dict()[key]): - w0 = w0_var.optimizer.dequantize(w0_var) - w1 = w1_var.optimizer.dequantize(w1_var) - y = _diet_expert_internal(x, w0, w1) - dx, dw0, dw1 = tf.gradients(ys=[y], xs=[x, w0, w1], grad_ys=[dy]) - w0_update = w0_var.optimizer.update(w0_var, dw0) - w1_update = w1_var.optimizer.update(w1_var, dw1) - with tf.control_dependencies([w0_update, w1_update]): - dx = tf.identity(dx) - dependency_dict()[key] = [dx] - return dx, None, None - - -def diet_expert(x, hidden_size, optimizer): - """A two-layer feed-forward network with relu activation on hidden layer. + abs_x = tf.abs(x) + sign_x = tf.sign(x) + y = abs_x / params.quantization_scale + y = tf.floor(y + tf.random_uniform(tf.shape(x))) + y = tf.minimum(y, tf.int16.max) * sign_x + q = tf.bitcast(tf.cast(y, tf.int16), tf.float16) + return q - Uses diet variables. - Recompuets hidden layer on backprop to save activation memory. - Args: - x: a Tensor with shape [batch, io_size] - hidden_size: an integer - optimizer: a DietAdamOptimizer or some such class +def _dequantize(q, params): + """Dequantize q according to params.""" + if not params.quantize: + return q + return tf.to_float(tf.bitcast(q, tf.int16)) * params.quantization_scale - Returns: - a Tensor with shape [batch, io_size] - """ - @function.Defun(python_grad_func=_diet_expert_grad, - shape_func=lambda _: (x.get_shape(),)) - def _diet_expert_fn(x, w0, w1): - w0 = optimizer.dequantize(w0) - w1 = optimizer.dequantize(w1) - return _diet_expert_internal(x, w0, w1) - - with tf.device(x.device): - _, io_size = x.get_shape().as_list() - w0_var = optimizer.get_variable("w0", [io_size, hidden_size]) - w1_var = optimizer.get_variable("w1", [hidden_size, io_size]) - key = "diet_expert_forward_deps " + x.device - with tf.control_dependencies(dependency_dict()[key]): - ret = _diet_expert_fn(x, w0_var, w1_var) - dependency_dict()[key] = [ret] + +def make_diet_var_getter(params): + """Create a custom variable getter for diet variables according to params.""" + + def diet_var_initializer(shape, dtype, partition_info=None): + del dtype + del partition_info + + with common_layers.fn_device_dependency("diet_init") as out_deps: + float_range = math.sqrt(3) + ret = tf.random_uniform(shape, -float_range, float_range) + if params.quantize: + ret = _quantize(ret, params, randomize=False) + out_deps.append(ret) return ret + + def diet_var_getter(getter, **kwargs): + """Get diet variable and return it dequantized.""" + if params.quantize: + kwargs["dtype"] = tf.float16 + kwargs["initializer"] = diet_var_initializer + kwargs["trainable"] = False + + base_var = getter(**kwargs) + + dequantized = _dequantize(base_var, params) + + if not hasattr(params, "dequantized"): + params.dequantized = defaultdict(list) + params.dequantized[base_var.name].append(dequantized) + + return dequantized + + return diet_var_getter + + +def _fn_with_diet_vars(fn, args, params): + """Call function with args; use diet variables according to params.""" + + vs_ctr = [] + + def grad_fn(inputs, variables, outputs, output_grads): + del outputs # recomputing below + with common_layers.fn_device_dependency("diet_grad", + output_grads[0].device) as out_dep: + with tf.variable_scope(vs_ctr[0], reuse=True): + outputs = fn(*inputs) + + variables = [common_layers.underlying_variable_ref(v) for v in variables] + dequantized_variables = [ + params.dequantized[v.name][-1] for v in variables + ] + + grads = tf.gradients(outputs, inputs + dequantized_variables, + output_grads) + grad_inputs = grads[:len(inputs)] + grad_variables = grads[len(inputs):] + + opt = _create_diet_optimizer(params) + + # Apply grad_variables here + var_updates = [] + for v, dv in zip(variables, grad_variables): + with tf.variable_scope(vs_ctr[0].name): + opt.create_slots(v) + update_op = opt.update_variable(v, dv) + var_updates.append(update_op) + + with tf.control_dependencies(var_updates): + grad_inputs = [tf.identity(dx) for dx in grad_inputs] + + out_dep.append(grad_inputs) + + return grad_inputs, [None] * len(variables) + + @common_layers.fn_with_custom_grad(grad_fn, use_global_vars=True) + def forward(*inputs): + with tf.variable_scope( + None, default_name="diet", + custom_getter=make_diet_var_getter(params)) as vs: + vs_ctr.append(vs) + outputs = fn(*inputs) + return outputs + + with common_layers.fn_device_dependency("diet_forward", + args[0].device) as out_dep: + outputs = forward(*args) + out_dep.append(outputs) + return outputs + + +def fn_with_diet_vars(params): + """Decorator for graph-building function to use diet variables.""" + params = copy.copy(params) + + def dec(fn): + + def wrapped(*args): + return _fn_with_diet_vars(fn, args, params) + + return wrapped + + return dec diff --git a/tensor2tensor/utils/diet_test.py b/tensor2tensor/utils/diet_test.py new file mode 100644 index 000000000..9c0c570cc --- /dev/null +++ b/tensor2tensor/utils/diet_test.py @@ -0,0 +1,70 @@ +# coding=utf-8 +# Copyright 2017 The Tensor2Tensor Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for common layers.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +# Dependency imports + +from tensor2tensor.utils import diet + +import tensorflow as tf + + +class DietVarTest(tf.test.TestCase): + + def testDiet(self): + + params = diet.diet_adam_optimizer_params() + + @diet.fn_with_diet_vars(params) + def model_fn(x): + y = tf.layers.dense(x, 10, use_bias=False) + return y + + @diet.fn_with_diet_vars(params) + def model_fn2(x): + y = tf.layers.dense(x, 10, use_bias=False) + return y + + x = tf.random_uniform((10, 10)) + y = model_fn(x) + 10. + y = model_fn2(y) + 10. + grads = tf.gradients(y, [x]) + with tf.control_dependencies(grads): + incr_step = tf.assign_add(tf.train.get_or_create_global_step(), 1) + + train_op = tf.group(incr_step, *grads) + with self.test_session() as sess: + sess.run(tf.global_variables_initializer()) + orig_vals = sess.run(tf.global_variables()) + for _ in xrange(10): + sess.run(train_op) + new_vals = sess.run(tf.global_variables()) + + different = [] + for old, new in zip(orig_vals, new_vals): + try: + self.assertAllClose(old, new) + except AssertionError: + different.append(True) + self.assertEqual(len(different), len(tf.global_variables())) + + +if __name__ == "__main__": + tf.test.main() From b52130f1290e17a39b48e20114506d841b7fadf6 Mon Sep 17 00:00:00 2001 From: T2T Team <no-reply@google.com> Date: Fri, 25 Aug 2017 14:58:49 -0700 Subject: [PATCH 0312/4095] Open source IPython Notebook for creating visualization from the Transformers paper. PiperOrigin-RevId: 166529624 --- .../TransformerVisualization.ipynb | 477 ++++++++++++++++++ tensor2tensor/visualization/attention.js | 363 +++++++++++++ tensor2tensor/visualization/attention.py | 150 ++++++ 3 files changed, 990 insertions(+) create mode 100644 tensor2tensor/visualization/TransformerVisualization.ipynb create mode 100644 tensor2tensor/visualization/attention.js create mode 100644 tensor2tensor/visualization/attention.py diff --git a/tensor2tensor/visualization/TransformerVisualization.ipynb b/tensor2tensor/visualization/TransformerVisualization.ipynb new file mode 100644 index 000000000..ef1c7b45d --- /dev/null +++ b/tensor2tensor/visualization/TransformerVisualization.ipynb @@ -0,0 +1,477 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Create Your Own Visualizations!\n", + "Instructions:\n", + "1. Install tensor2tensor and train up a Transformer model following the instruction in the repository https://github.com/tensorflow/tensor2tensor.\n", + "2. Update cell 3 to point to your checkpoint, it is currently set up to read from the default checkpoint location that would be created from following the instructions above.\n", + "3. If you used custom hyper parameters then update cell 4.\n", + "4. Run the notebook!" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from __future__ import absolute_import\n", + "from __future__ import division\n", + "from __future__ import print_function\n", + "\n", + "import json\n", + "\n", + "import tensorflow as tf\n", + "import numpy as np\n", + "\n", + "from tensor2tensor.utils import trainer_utils as utils\n", + "from tensor2tensor.visualization import attention" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "application/javascript": [ + "require.config({\n", + " paths: {\n", + " d3: '//cdnjs.cloudflare.com/ajax/libs/d3/3.4.8/d3.min'\n", + " }\n", + "});" + ], + "text/plain": [ + "<IPython.core.display.Javascript object>" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "%%javascript\n", + "require.config({\n", + " paths: {\n", + " d3: '//cdnjs.cloudflare.com/ajax/libs/d3/3.4.8/d3.min'\n", + " }\n", + "});" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Data" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "/home/llion/t2t_train/wmt_ende_tokens_32k/transformer-transformer_base_single_gpu\n" + ] + } + ], + "source": [ + "import os\n", + "# PUT THE MODEL YOU WANT TO LOAD HERE!\n", + "\n", + "PROBLEM = 'wmt_ende_tokens_32k'\n", + "MODEL = 'transformer'\n", + "HPARAMS = 'transformer_base_single_gpu'\n", + "\n", + "DATA_DIR=os.path.expanduser('~/t2t_data')\n", + "TRAIN_DIR=os.path.expanduser('~/t2t_train/%s/%s-%s' % (PROBLEM, MODEL, HPARAMS))\n", + "print(TRAIN_DIR)\n", + "\n", + "FLAGS = tf.flags.FLAGS\n", + "FLAGS.problems = PROBLEM\n", + "FLAGS.hparams_set = HPARAMS\n", + "FLAGS.data_dir = DATA_DIR\n", + "FLAGS.model = MODEL" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "INFO:tensorflow:datashard_devices: ['gpu:0']\n", + "INFO:tensorflow:caching_devices: None\n" + ] + } + ], + "source": [ + "hparams = utils.create_hparams(HPARAMS, DATA_DIR)\n", + "\n", + "# SET EXTRA HYPER PARAMS HERE!\n", + "# e.g.\n", + "# hparams.batch_size = 1024\n", + "\n", + "num_datashards = utils.devices.data_parallelism().n\n", + "\n", + "problems_data = utils.get_data_filepatterns(\n", + " DATA_DIR, tf.contrib.learn.ModeKeys.EVAL)\n", + "input_fn = utils.input_fn_builder.build_input_fn(\n", + " mode=tf.contrib.learn.ModeKeys.EVAL,\n", + " hparams=hparams,\n", + " data_file_patterns=problems_data,\n", + " num_datashards=num_datashards)\n", + "\n", + "inputs, target = input_fn()\n", + "features = inputs\n", + "features['targets'] = target" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "def encode(string):\n", + " subtokenizer = hparams.problems[0].vocabulary['inputs']\n", + " return [subtokenizer.encode(string) + [1] + [0]]\n", + "\n", + "def decode(ids):\n", + " return hparams.problems[0].vocabulary['targets'].decode(np.squeeze(ids))\n", + "\n", + "def to_tokens(ids):\n", + " ids = np.squeeze(ids)\n", + " subtokenizer = hparams.problems[0].vocabulary['targets']\n", + " tokens = []\n", + " for _id in ids:\n", + " if _id == 0:\n", + " tokens.append('<PAD>')\n", + " elif _id == 1:\n", + " tokens.append('<EOS>')\n", + " else:\n", + " tokens.append(subtokenizer._subtoken_id_to_subtoken_string(_id))\n", + " return tokens" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Model" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "INFO:tensorflow:datashard_devices: ['gpu:0']\n", + "INFO:tensorflow:caching_devices: None\n", + "INFO:tensorflow:Doing model_fn_body took 1.881 sec.\n", + "INFO:tensorflow:This model_fn took 2.023 sec.\n" + ] + } + ], + "source": [ + "model_fn=utils.model_builder.build_model_fn(MODEL, hparams=hparams)\n", + "sharded_logits, training_loss, extra_loss = model_fn(features, target, tf.contrib.learn.ModeKeys.EVAL)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "INFO:tensorflow:datashard_devices: ['gpu:0']\n", + "INFO:tensorflow:caching_devices: None\n", + "INFO:tensorflow:Beam Decoding with beam size 4\n", + "INFO:tensorflow:Doing model_fn_body took 1.393 sec.\n", + "INFO:tensorflow:This model_fn took 1.504 sec.\n" + ] + } + ], + "source": [ + "with tf.variable_scope(tf.get_variable_scope(), reuse=True):\n", + " beam_out = model_fn(features, target, tf.contrib.learn.ModeKeys.INFER)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Session" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "INFO:tensorflow:Restoring parameters from /home/llion/t2t_train/wmt_ende_tokens_32k/transformer-transformer_base_single_gpu/model.ckpt-250000\n", + "INFO:tensorflow:Starting standard services.\n", + "INFO:tensorflow:Saving checkpoint to path /home/llion/t2t_train/wmt_ende_tokens_32k/transformer-transformer_base_single_gpu/model.ckpt\n", + "INFO:tensorflow:Starting queue runners.\n" + ] + }, + { + "data": { + "text/plain": [ + "[]" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sv = tf.train.Supervisor(\n", + " logdir=TRAIN_DIR,\n", + " global_step=tf.Variable(0, dtype=tf.int64, trainable=False, name='global_step'))\n", + "sess = sv.PrepareSession(config=tf.ConfigProto(allow_soft_placement=True))\n", + "sv.StartQueueRunners(\n", + " sess,\n", + " tf.get_default_graph().get_collection(tf.GraphKeys.QUEUE_RUNNERS))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Visualization" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "# Get the attention tensors from the graph.\n", + "# This need to be done using the training graph since the inference uses a tf.while_loop\n", + "# and you cant fetch tensors from inside a while_loop.\n", + "\n", + "enc_atts = []\n", + "dec_atts = []\n", + "encdec_atts = []\n", + "\n", + "for i in range(hparams.num_hidden_layers):\n", + " enc_att = tf.get_default_graph().get_operation_by_name(\n", + " \"body/model/parallel_0/body/encoder/layer_%i/self_attention/multihead_attention/dot_product_attention/attention_weights\" % i).values()[0]\n", + " dec_att = tf.get_default_graph().get_operation_by_name(\n", + " \"body/model/parallel_0/body/decoder/layer_%i/self_attention/multihead_attention/dot_product_attention/attention_weights\" % i).values()[0]\n", + " encdec_att = tf.get_default_graph().get_operation_by_name(\n", + " \"body/model/parallel_0/body/decoder/layer_%i/encdec_attention/multihead_attention/dot_product_attention/attention_weights\" % i).values()[0]\n", + "\n", + " enc_atts.append(enc_att)\n", + " dec_atts.append(dec_att)\n", + " encdec_atts.append(encdec_att)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Test translation from the dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "INFO:tensorflow:global_step/sec: 0\n", + "Input: For example, during the 2008 general election in Florida, 33% of early voters were African-Americans, who accounted however for only 13% of voters in the State.\n", + "Gold: Beispielsweise waren bei den allgemeinen Wahlen 2008 in Florida 33% der Wähler, die im Voraus gewählt haben, Afro-Amerikaner, obwohl sie nur 13% der Wähler des Bundesstaates ausmachen.\n", + "Gold out: So waren 33 den allgemeinen Wahlen im in der a 33 % der Frühjungdie nur Land die wurden, die ro- Amerikaner, die sie nur 13 % der Wähler im Staates staats betra.\n", + "INFO:tensorflow:Recording summary at step 250000.\n" + ] + } + ], + "source": [ + "inp, out, logits = sess.run([inputs['inputs'], target, sharded_logits['predictions']])\n", + "\n", + "print(\"Input: \", decode(inp[0]))\n", + "print(\"Gold: \", decode(out[0]))\n", + "logits = np.squeeze(logits[0])\n", + "tokens = np.argmax(logits, axis=1)\n", + "print(\"Gold out: \", decode(tokens))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Visualize Custom Sentence" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "eng = \"I have three dogs.\"" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Ich habe drei Hunde.\n" + ] + } + ], + "source": [ + "inp_ids = encode(eng)\n", + "beam_decode = sess.run(beam_out[0]['outputs'], {\n", + " inputs['inputs']: np.expand_dims(np.expand_dims(inp_ids, axis=2), axis=3),\n", + "})\n", + "trans = decode(beam_decode[0])\n", + "print(trans)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "der = decode(beam_decode[0])\n", + "output_ids = encode(der)\n", + "\n", + "# Get attentions\n", + "np_enc_atts, np_dec_atts, np_encdec_atts = sess.run([enc_atts, dec_atts, encdec_atts], {\n", + " inputs['inputs']: np.expand_dims(np.expand_dims(inp_ids, axis=2), axis=3),\n", + " target: np.expand_dims(np.expand_dims(output_ids, axis=2), axis=3),\n", + "})" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "application/javascript": [ + "IPython.OutputArea.prototype._should_scroll = function(lines) {\n", + " return false;\n", + "}" + ], + "text/plain": [ + "<IPython.core.display.Javascript object>" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "%%javascript\n", + "IPython.OutputArea.prototype._should_scroll = function(lines) {\n", + " return false;\n", + "}" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Interpreting the Visualizations\n", + "- The layers drop down allow you to view the different Transformer layers, 0-indexed of course.\n", + " - Tip: The first layer, last layer and 2nd to last layer are usually the most interpretable.\n", + "- The attention dropdown allows you to select different pairs of encoder-decoder attentions:\n", + " - All: Shows all types of attentions together. NOTE: There is no relation between heads of the same color - between the decoder self attention and decoder-encoder attention since they do not share parameters.\n", + " - Input - Input: Shows only the encoder self-attention.\n", + " - Input - Output: Shows the decoder’s attention on the encoder. NOTE: Every decoder layer attends to the final layer of encoder so the visualization will show the attention on the final encoder layer regardless of what layer is selected in the drop down.\n", + " - Output - Output: Shows only the decoder self-attention. NOTE: The visualization might be slightly misleading in the first layer since the text shown is the target of the decoder, the input to the decoder at layer 0 is this text with a GO symbol prepreded.\n", + "- The colored squares represent the different attention heads.\n", + " - You can hide or show a given head by clicking on it’s color.\n", + " - Double clicking a color will hide all other colors, double clicking on a color when it’s the only head showing will show all the heads again.\n", + "- You can hover over a word to see the individual attention weights for just that position.\n", + " - Hovering over the words on the left will show what that position attended to.\n", + " - Hovering over the words on the right will show what positions attended to it.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "inp_text = to_tokens(inp_ids)\n", + "out_text = to_tokens(output_ids)\n", + "\n", + "attention.show(inp_text, out_text, np_enc_atts, np_dec_atts, np_encdec_atts)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 2", + "language": "python", + "name": "python2" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.12" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/tensor2tensor/visualization/attention.js b/tensor2tensor/visualization/attention.js new file mode 100644 index 000000000..ae2deb6bd --- /dev/null +++ b/tensor2tensor/visualization/attention.js @@ -0,0 +1,363 @@ +/** + * @fileoverview Transformer Visualization D3 javascript code. + */ + +requirejs(['jquery', 'd3'], +function($, d3) { + +var attention = window.attention; + +const TEXT_SIZE = 15; +const BOXWIDTH = TEXT_SIZE * 8; +const BOXHEIGHT = TEXT_SIZE * 1.5; +const WIDTH = 2000; +const HEIGHT = attention.all.bot_text.length * BOXHEIGHT * 2 + 100; +const MATRIX_WIDTH = 150; +const head_colours = d3.scale.category10(); +const CHECKBOX_SIZE = 20; + +function lighten(colour) { + var c = d3.hsl(colour); + var increment = (1 - c.l) * 0.6; + c.l += increment; + c.s -= increment; + return c; +} + +function transpose(mat) { + return mat[0].map(function(col, i) { + return mat.map(function(row) { + return row[i]; + }); + }); +} + +function zip(a, b) { + return a.map(function (e, i) { + return [e, b[i]]; + }); +} + + +function renderVis(id, top_text, bot_text, attention_heads, config) { + $(id).empty(); + var svg = d3.select(id) + .append('svg') + .attr("width", WIDTH) + .attr("height", HEIGHT); + + var att_data = []; + for (var i=0; i < attention_heads.length; i++) { + var att_trans = transpose(attention_heads[i]); + att_data.push(zip(attention_heads[i], att_trans)); + } + + renderText(svg, top_text, true, att_data, 0); + renderText(svg, bot_text, false, att_data, MATRIX_WIDTH + BOXWIDTH); + + renderAttentionHighlights(svg, att_data); + + svg.append("g").classed("attention_heads", true); + + renderAttention(svg, attention_heads); + + draw_checkboxes(config, 0, svg, attention_heads); +} + + +function renderText(svg, text, is_top, att_data, left_pos) { + var id = is_top ? "top" : "bottom"; + var textContainer = svg.append("svg:g") + .attr("id", id); + + textContainer.append("g").classed("attention_boxes", true) + .selectAll("g") + .data(att_data) + .enter() + .append("g") + .selectAll("rect") + .data(function(d) {return d;}) + .enter() + .append("rect") + .attr("x", function(d, i, j) { + return left_pos + box_offset(j); + }) + .attr("y", function(d, i) { + return (+1) * BOXHEIGHT; + }) + .attr("width", BOXWIDTH/active_heads()) + .attr("height", function() { return BOXHEIGHT; }) + .attr("fill", function(d, i, j) { + return head_colours(j); + }) + .style("opacity", 0.0); + + + var tokenContainer = textContainer.append("g").selectAll("g") + .data(text) + .enter() + .append("g"); + + tokenContainer.append("rect") + .classed("background", true) + .style("opacity", 0.0) + .attr("fill", "lightgray") + .attr("x", left_pos) + .attr("y", function(d, i) { + return (i+1) * BOXHEIGHT; + }) + .attr("width", BOXWIDTH) + .attr("height", BOXHEIGHT); + + var theText = tokenContainer.append("text") + .text(function(d) { return d; }) + .attr("font-size", TEXT_SIZE + "px") + .style("cursor", "default") + .style("-webkit-user-select", "none") + .attr("x", left_pos) + .attr("y", function(d, i) { + return (i+1) * BOXHEIGHT; + }); + + if (is_top) { + theText.style("text-anchor", "end") + .attr("dx", BOXWIDTH - TEXT_SIZE) + .attr("dy", TEXT_SIZE); + } else { + theText.style("text-anchor", "start") + .attr("dx", + TEXT_SIZE) + .attr("dy", TEXT_SIZE); + } + + tokenContainer.on("mouseover", function(d, index) { + textContainer.selectAll(".background") + .style("opacity", function(d, i) { + return i == index ? 1.0 : 0.0; + }); + + svg.selectAll(".attention_heads").style("display", "none"); + + svg.selectAll(".line_heads") // To get the nesting to work. + .selectAll(".att_lines") + .attr("stroke-opacity", function(d) { + return 1.0; + }) + .attr("y1", function(d, i) { + if (is_top) { + return (index+1) * BOXHEIGHT + (BOXHEIGHT/2); + } else { + return (i+1) * BOXHEIGHT + (BOXHEIGHT/2); + } + }) + .attr("x1", BOXWIDTH) + .attr("y2", function(d, i) { + if (is_top) { + return (i+1) * BOXHEIGHT + (BOXHEIGHT/2); + } else { + return (index+1) * BOXHEIGHT + (BOXHEIGHT/2); + } + }) + .attr("x2", BOXWIDTH + MATRIX_WIDTH) + .attr("stroke-width", 2) + .attr("stroke", function(d, i, j) { + return head_colours(j); + }) + .attr("stroke-opacity", function(d, i, j) { + if (is_top) {d = d[0];} else {d = d[1];} + if (config.head_vis[j]) { + if (d) { + return d[index]; + } else { + return 0.0; + } + } else { + return 0.0; + } + }); + + + function updateAttentionBoxes() { + var id = is_top ? "bottom" : "top"; + var the_left_pos = is_top ? MATRIX_WIDTH + BOXWIDTH : 0; + svg.select("#" + id) + .selectAll(".attention_boxes") + .selectAll("g") + .selectAll("rect") + .attr("x", function(d, i, j) { return the_left_pos + box_offset(j); }) + .attr("y", function(d, i) { return (i+1) * BOXHEIGHT; }) + .attr("width", BOXWIDTH/active_heads()) + .attr("height", function() { return BOXHEIGHT; }) + .style("opacity", function(d, i, j) { + if (is_top) {d = d[0];} else {d = d[1];} + if (config.head_vis[j]) + if (d) { + return d[index]; + } else { + return 0.0; + } + else + return 0.0; + + }); + } + + updateAttentionBoxes(); + }); + + textContainer.on("mouseleave", function() { + d3.select(this).selectAll(".background") + .style("opacity", 0.0); + + svg.selectAll(".att_lines").attr("stroke-opacity", 0.0); + svg.selectAll(".attention_heads").style("display", "inline"); + svg.selectAll(".attention_boxes") + .selectAll("g") + .selectAll("rect") + .style("opacity", 0.0); + }); +} + +function renderAttentionHighlights(svg, attention) { + var line_container = svg.append("g"); + line_container.selectAll("g") + .data(attention) + .enter() + .append("g") + .classed("line_heads", true) + .selectAll("line") + .data(function(d){return d;}) + .enter() + .append("line").classed("att_lines", true); +} + +function renderAttention(svg, attention_heads) { + var line_container = svg.selectAll(".attention_heads"); + line_container.html(null); + for(var h=0; h<attention_heads.length; h++) { + for(var a=0; a<attention_heads[h].length; a++) { + for(var s=0; s<attention_heads[h][a].length; s++) { + line_container.append("line") + .attr("y1", (s+1) * BOXHEIGHT + (BOXHEIGHT/2)) + .attr("x1", BOXWIDTH) + .attr("y2", (a+1) * BOXHEIGHT + (BOXHEIGHT/2)) + .attr("x2", BOXWIDTH + MATRIX_WIDTH) + .attr("stroke-width", 2) + .attr("stroke", head_colours(h)) + .attr("stroke-opacity", function() { + if (config.head_vis[h]) { + return attention_heads[h][a][s]/active_heads(); + } else { + return 0.0; + } + }()); + } + } + } +} + +// Checkboxes +function box_offset(i) { + var num_head_above = config.head_vis.reduce( + function(acc, val, cur) {return val && cur < i ? acc + 1: acc;}, 0); + return num_head_above*(BOXWIDTH / active_heads()); +} + +function active_heads() { + return config.head_vis.reduce(function(acc, val) { + return val ? acc + 1: acc; + }, 0); +} + +function draw_checkboxes(config, top, svg, attention_heads) { + var checkboxContainer = svg.append("g"); + var checkbox = checkboxContainer.selectAll("rect") + .data(config.head_vis) + .enter() + .append("rect") + .attr("fill", function(d, i) { + return head_colours(i); + }) + .attr("x", function(d, i) { + return (i+1) * CHECKBOX_SIZE; + }) + .attr("y", top) + .attr("width", CHECKBOX_SIZE) + .attr("height", CHECKBOX_SIZE); + + function update_checkboxes() { + checkboxContainer.selectAll("rect") + .data(config.head_vis) + .attr("fill", function(d, i) { + var head_colour = head_colours(i); + var colour = d ? head_colour : lighten(head_colour); + return colour; + }); + } + + update_checkboxes(); + + checkbox.on("click", function(d, i) { + if (config.head_vis[i] && active_heads() == 1) return; + config.head_vis[i] = !config.head_vis[i]; + update_checkboxes(); + renderAttention(svg, attention_heads); + }); + + checkbox.on("dblclick", function(d, i) { + // If we double click on the only active head then reset + if (config.head_vis[i] && active_heads() == 1) { + config.head_vis = new Array(config.num_heads).fill(true); + } else { + config.head_vis = new Array(config.num_heads).fill(false); + config.head_vis[i] = true; + } + update_checkboxes(); + renderAttention(svg, attention_heads); + }); +} + +var config = { + layer: 0, + att_type: 'all', +}; + +function visualize() { + var num_heads = attention['all']['att'][0].length; + config.head_vis = new Array(num_heads).fill(true); + config.num_heads = num_heads; + config.attention = attention; + + render(); +} + +function render() { + var conf = config.attention[config.att_type]; + + var top_text = conf.top_text; + var bot_text = conf.bot_text; + var attention = conf.att[config.layer]; + + $("#vis svg").empty(); + renderVis("#vis", top_text, bot_text, attention, config); +} + +$("#layer").empty(); +for(var i=0; i<6; i++) { + $("#layer").append($("<option />").val(i).text(i)); +} + +$("#layer").on('change', function(e) { + config.layer = +e.currentTarget.value; + render(); +}); + +$("#att_type").on('change', function(e) { + config.att_type = e.currentTarget.value; + render(); +}); + +$("button").on('click', visualize); + +visualize(); + +}); diff --git a/tensor2tensor/visualization/attention.py b/tensor2tensor/visualization/attention.py new file mode 100644 index 000000000..2c1f61c9c --- /dev/null +++ b/tensor2tensor/visualization/attention.py @@ -0,0 +1,150 @@ +# coding=utf-8 +# Copyright 2017 The Tensor2Tensor Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Module for postprocessing and displaying tranformer attentions. + +This module is deigned to be called from an ipython notebook. +""" + +import json +import os + +from IPython.display import HTML +from IPython.display import Javascript + +import numpy as np + +vis_html = """ + <span style="user-select:none"> + Layer: <select id="layer"></select> + Attention: <select id="att_type"> + <option value="all">All</option> + <option value="inp_inp">Input - Input</option> + <option value="inp_out">Input - Output</option> + <option value="out_out">Output - Output</option> + </select> + </span> + <div id='vis'></div> +""" + + +__location__ = os.path.realpath( + os.path.join(os.getcwd(), os.path.dirname(__file__))) +vis_js = open(os.path.join(__location__, 'attention.js')).read() + + +def show(inp_text, out_text, enc_atts, dec_atts, encdec_atts): + attention = _get_attention( + inp_text, out_text, enc_atts, dec_atts, encdec_atts) + att_json = json.dumps(attention) + _show_attention(att_json) + + +def _show_attention(att_json): + display(HTML(vis_html)) # pylint: disable=undefined-variable + display(Javascript('window.attention = %s' % att_json)) # pylint: disable=undefined-variable + display(Javascript(vis_js)) # pylint: disable=undefined-variable + + +def _get_attention(inp_text, out_text, enc_atts, dec_atts, encdec_atts): + """Compute representation of the attention ready for the d3 visualization. + + Args: + inp_text: list of strings, words to be displayed on the left of the vis + out_text: list of strings, words to be displayed on the right of the vis + enc_atts: numpy array, encoder self-attentions + [num_layers, batch_size, num_heads, enc_length, enc_length] + dec_atts: numpy array, decoder self-attentions + [num_layers, batch_size, num_heads, dec_length, dec_length] + encdec_atts: numpy array, encoder-decoder attentions + [num_layers, batch_size, num_heads, enc_length, dec_length] + + Returns: + Dictionary of attention representations with the structure: + { + 'all': Representations for showing all attentions at the same time. + 'inp_inp': Representations for showing encoder self-attentions + 'inp_out': Representations for showing encoder-decoder attentions + 'out_out': Representations for showing decoder self-attentions + } + and each sub-dictionary has structure: + { + 'att': list of inter attentions matrices, one for each attention head + 'top_text': list of strings, words to be displayed on the left of the vis + 'bot_text': list of strings, words to be displayed on the right of the vis + } + """ + def get_full_attention(layer): + """Get the full input+output - input+output attentions.""" + enc_att = enc_atts[layer][0], + dec_att = dec_atts[layer][0], + encdec_att = encdec_atts[layer][0] + enc_att = np.transpose(enc_att, [0, 2, 1]) + dec_att = np.transpose(dec_att, [0, 2, 1]) + encdec_att = np.transpose(encdec_att, [0, 2, 1]) + # [heads, query_length, memory_length] + enc_length = enc_att.shape[1] + dec_length = dec_att.shape[1] + num_heads = enc_att.shape[0] + first = np.concatenate([enc_att, encdec_att], axis=2) + second = np.concatenate( + [np.zeros((num_heads, dec_length, enc_length)), dec_att], axis=2) + full_att = np.concatenate([first, second], axis=1) + return [ha.T.tolist() for ha in full_att] + + def get_inp_inp_attention(layer): + att = np.transpose(enc_atts[layer][0], (0, 2, 1)) + return [ha.T.tolist() for ha in att] + + def get_out_inp_attention(layer): + att = np.transpose(encdec_atts[layer][0], (0, 2, 1)) + return [ha.T.tolist() for ha in att] + + def get_out_out_attention(layer): + att = np.transpose(dec_atts[layer][0], (0, 2, 1)) + return [ha.T.tolist() for ha in att] + + def get_attentions(get_attention_fn): + num_layers = len(enc_atts) + attentions = [] + for i in range(num_layers): + attentions.append(get_attention_fn(i)) + + return attentions + + attentions = { + 'all': { + 'att': get_attentions(get_full_attention), + 'top_text': inp_text + out_text, + 'bot_text': inp_text + out_text, + }, + 'inp_inp': { + 'att': get_attentions(get_inp_inp_attention), + 'top_text': inp_text, + 'bot_text': inp_text, + }, + 'inp_out': { + 'att': get_attentions(get_out_inp_attention), + 'top_text': inp_text, + 'bot_text': out_text, + }, + 'out_out': { + 'att': get_attentions(get_out_out_attention), + 'top_text': out_text, + 'bot_text': out_text, + }, + } + + return attentions From f4a3ac9fe3daa5bcdfddbc104738af0f5004e090 Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Fri, 25 Aug 2017 15:15:47 -0700 Subject: [PATCH 0313/4095] v1.1.10 PiperOrigin-RevId: 166532006 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 088f9b14c..90a554533 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ setup( name='tensor2tensor', - version='1.1.9', + version='1.1.10', description='Tensor2Tensor', author='Google Inc.', author_email='no-reply@google.com', From a56155131b64a82b6a09e11e305c345df91107b0 Mon Sep 17 00:00:00 2001 From: Lukasz Kaiser <lukaszkaiser@google.com> Date: Fri, 25 Aug 2017 15:39:08 -0700 Subject: [PATCH 0314/4095] Big release bump, small corrections. PiperOrigin-RevId: 166535137 --- setup.py | 2 +- tensor2tensor/models/transformer_vae.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/setup.py b/setup.py index 90a554533..f32e8508c 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ setup( name='tensor2tensor', - version='1.1.10', + version='1.2.0', description='Tensor2Tensor', author='Google Inc.', author_email='no-reply@google.com', diff --git a/tensor2tensor/models/transformer_vae.py b/tensor2tensor/models/transformer_vae.py index 34f6a1c39..fa6b3f397 100644 --- a/tensor2tensor/models/transformer_vae.py +++ b/tensor2tensor/models/transformer_vae.py @@ -121,6 +121,7 @@ def vae(x, hparams, name): def nearest(x, means, hparams): """Find the nearest means to elements in x.""" x, means = tf.stop_gradient(x), tf.stop_gradient(means) + means = tf.nn.l2_normalize(means, dim=1) x_flat = tf.reshape(x, [-1, hparams.hidden_size]) # dist = tf.reduce_sum(tf.square(x_flat - tf.expand_dims(means, 0)), axis=2) dist = - tf.matmul(x_flat, means, transpose_b=True) @@ -213,7 +214,6 @@ def vae_compress(x, c, ed, hparams, compress_name, decompress_name, reuse=None): cur, hparams.hidden_size, [((1, 1), (1, 1))], name="mid_conv") cur = tf.nn.l2_normalize(cur, dim=3) means = tf.get_variable("z_to_dense", [hparams.v_size, hparams.hidden_size]) - means = tf.nn.l2_normalize(means, dim=1) # z, kl_loss, mu, log_sigma = vae(cur, hparams, name="vae") # z_true, z_sample, kl_loss = dvae(cur, hparams, name="dvae") z_true, z_sample, kl_loss = kmeans(cur, means, hparams, name="kmeans") @@ -288,7 +288,7 @@ def vae_transformer_internal(inputs, targets, target_space, hparams): z, kl, r = vae_compress(tf.expand_dims(targets, axis=2), tf.expand_dims(inputs, axis=2), ed_bias, hparams, "vae_compress", "vae_decompress") - kl *= common_layers.inverse_exp_decay(int(hparams.startup_steps * 2.0)) + kl *= common_layers.inverse_exp_decay(int(hparams.startup_steps * 0.5)) r *= common_layers.inverse_exp_decay(int(hparams.startup_steps * 2.0)) losses = {"kl": kl, "reconstruction": r} return z, losses From ab1e7664c514394ebeb073616632891119849e3d Mon Sep 17 00:00:00 2001 From: T2T Team <no-reply@google.com> Date: Fri, 25 Aug 2017 16:01:48 -0700 Subject: [PATCH 0315/4095] Add beam search test. PiperOrigin-RevId: 166537858 --- tensor2tensor/models/transformer_test.py | 71 +++++++++++++++++------- 1 file changed, 50 insertions(+), 21 deletions(-) diff --git a/tensor2tensor/models/transformer_test.py b/tensor2tensor/models/transformer_test.py index 8f4d26339..391824524 100644 --- a/tensor2tensor/models/transformer_test.py +++ b/tensor2tensor/models/transformer_test.py @@ -29,36 +29,65 @@ import tensorflow as tf +BATCH_SIZE = 3 +INPUT_LENGTH = 5 +TARGET_LENGTH = 7 +VOCAB_SIZE = 9 + + class TransformerTest(tf.test.TestCase): - def _testTransformer(self, net): - batch_size = 3 - input_length = 5 - target_length = 7 - vocab_size = 9 - hparams = transformer.transformer_tiny() - p_hparams = problem_hparams.test_problem_hparams(hparams, vocab_size, - vocab_size) + def getModel(self): + hparams = transformer.transformer_small() + p_hparams = problem_hparams.test_problem_hparams( + hparams, VOCAB_SIZE, VOCAB_SIZE) hparams.problems = [p_hparams] inputs = -1 + np.random.random_integers( - vocab_size, size=(batch_size, input_length, 1, 1)) + VOCAB_SIZE, size=(BATCH_SIZE, INPUT_LENGTH, 1, 1)) targets = -1 + np.random.random_integers( - vocab_size, size=(batch_size, target_length, 1, 1)) + VOCAB_SIZE, size=(BATCH_SIZE, TARGET_LENGTH, 1, 1)) + features = { + "inputs": tf.constant(inputs, dtype=tf.int32), + "targets": tf.constant(targets, dtype=tf.int32), + "target_space_id": tf.constant(1, dtype=tf.int32), + } + + return transformer.Transformer( + hparams, tf.contrib.learn.ModeKeys.INFER, p_hparams), features + + def testTransformer(self): + model, features = self.getModel() + shadred_logits, _ = model.model_fn(features) + logits = tf.concat(shadred_logits, 0) with self.test_session() as session: - features = { - "inputs": tf.constant(inputs, dtype=tf.int32), - "targets": tf.constant(targets, dtype=tf.int32), - "target_space_id": tf.constant(1, dtype=tf.int32), - } - model = net(hparams, tf.contrib.learn.ModeKeys.TRAIN, p_hparams) - shadred_logits, _ = model.model_fn(features) - logits = tf.concat(shadred_logits, 0) session.run(tf.global_variables_initializer()) res = session.run(logits) - self.assertEqual(res.shape, (batch_size, target_length, 1, 1, vocab_size)) + self.assertEqual(res.shape, (BATCH_SIZE, TARGET_LENGTH, 1, 1, VOCAB_SIZE)) - def testTransformer(self): - self._testTransformer(transformer.Transformer) + def testBeamDecodeVsGreedy(self): + model, features = self.getModel() + + decode_length = 20 + + greedy_result, _, _ = model._greedy_infer( + features, decode_length, last_position_only=True) + greedy_result = tf.squeeze(greedy_result, axis=[2, 3]) + + with tf.variable_scope(tf.get_variable_scope(), reuse=True): + beam_res = model._beam_decode( + features, + decode_length, + beam_size=1, + top_beams=1, + last_position_only=True, + alpha=1.0) + + with self.test_session() as session: + session.run(tf.global_variables_initializer()) + greedy_res, beam_res = session.run([greedy_result, beam_res]) + + self.assertEqual(beam_res.shape, (BATCH_SIZE, INPUT_LENGTH + decode_length)) + self.assertAllClose(greedy_res, beam_res) if __name__ == "__main__": From ebc09f0368d13592f0d0fa790f0cf06d0817e7d1 Mon Sep 17 00:00:00 2001 From: T2T Team <no-reply@google.com> Date: Sat, 26 Aug 2017 00:37:09 -0700 Subject: [PATCH 0316/4095] Add __init__.py to visualization folder. PiperOrigin-RevId: 166566900 --- .../visualization/TransformerVisualization.ipynb | 4 ++-- tensor2tensor/visualization/__init__.py | 16 ++++++++++++++++ tensor2tensor/visualization/attention.py | 4 ++-- 3 files changed, 20 insertions(+), 4 deletions(-) create mode 100644 tensor2tensor/visualization/__init__.py diff --git a/tensor2tensor/visualization/TransformerVisualization.ipynb b/tensor2tensor/visualization/TransformerVisualization.ipynb index ef1c7b45d..ff0710f5d 100644 --- a/tensor2tensor/visualization/TransformerVisualization.ipynb +++ b/tensor2tensor/visualization/TransformerVisualization.ipynb @@ -86,7 +86,7 @@ "import os\n", "# PUT THE MODEL YOU WANT TO LOAD HERE!\n", "\n", - "PROBLEM = 'wmt_ende_tokens_32k'\n", + "PROBLEM = 'translate_ende_wmt32k'\n", "MODEL = 'transformer'\n", "HPARAMS = 'transformer_base_single_gpu'\n", "\n", @@ -118,7 +118,7 @@ } ], "source": [ - "hparams = utils.create_hparams(HPARAMS, DATA_DIR)\n", + "hparams = utils.create_hparams(HPARAMS, PROBLEM, DATA_DIR)\n", "\n", "# SET EXTRA HYPER PARAMS HERE!\n", "# e.g.\n", diff --git a/tensor2tensor/visualization/__init__.py b/tensor2tensor/visualization/__init__.py new file mode 100644 index 000000000..b62605264 --- /dev/null +++ b/tensor2tensor/visualization/__init__.py @@ -0,0 +1,16 @@ +# coding=utf-8 +# Copyright 2017 The Tensor2Tensor Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + diff --git a/tensor2tensor/visualization/attention.py b/tensor2tensor/visualization/attention.py index 2c1f61c9c..280b95b5c 100644 --- a/tensor2tensor/visualization/attention.py +++ b/tensor2tensor/visualization/attention.py @@ -88,8 +88,8 @@ def _get_attention(inp_text, out_text, enc_atts, dec_atts, encdec_atts): """ def get_full_attention(layer): """Get the full input+output - input+output attentions.""" - enc_att = enc_atts[layer][0], - dec_att = dec_atts[layer][0], + enc_att = enc_atts[layer][0] + dec_att = dec_atts[layer][0] encdec_att = encdec_atts[layer][0] enc_att = np.transpose(enc_att, [0, 2, 1]) dec_att = np.transpose(dec_att, [0, 2, 1]) From 65636dfb88162dfe3f00e827e0e560477362d428 Mon Sep 17 00:00:00 2001 From: Noam Shazeer <noam@google.com> Date: Mon, 28 Aug 2017 11:06:17 -0700 Subject: [PATCH 0317/4095] modify the hacked-up batching scheme to prevent excessively-long shuffle queues. All of this ugly logic will hopefully go away once the Datasets API supports different batch sizes per bucket. PiperOrigin-RevId: 166728240 --- tensor2tensor/layers/common_hparams.py | 25 +++-- tensor2tensor/models/transformer.py | 4 - tensor2tensor/utils/data_reader.py | 116 ++++++++++++------------ tensor2tensor/utils/data_reader_test.py | 50 +++++++--- 4 files changed, 110 insertions(+), 85 deletions(-) diff --git a/tensor2tensor/layers/common_hparams.py b/tensor2tensor/layers/common_hparams.py index d4751bb0d..2e33c9e94 100644 --- a/tensor2tensor/layers/common_hparams.py +++ b/tensor2tensor/layers/common_hparams.py @@ -33,14 +33,6 @@ def basic_params1(): """A set of basic hyperparameters.""" return tf.contrib.training.HParams( batch_size=4096, # in tokens per batch per gpu - # This flag controls the number of length buckets in the data reader. - # Too many buckets slows down data reading - this needs fixing. - # Too few buckets mean lots of wasted padding. - # If this value is 1, we have buckets with maximum lengths: - # [8, 12, 16, 24, 32, 48 ... (max_length or batch_size)] - # If this value is 2, we have buckets with maximum lengths: - # [8, 10, 12, 14, 16, 20, 24 ... (max_length or batch_size)] - batching_mantissa_bits=1, num_hidden_layers=4, kernel_height=3, kernel_width=1, @@ -98,9 +90,22 @@ def basic_params1(): # epsilon parameter to normalization function norm_epsilon=1e-6, symbol_modality_num_shards=16, - # setting the max length in a minibatch. 0 means default behavior, - # max_length = hparams.batch_size * length_multiplier + # During training, we drop sequences whose inputs or targets are longer + # than max_length. + # If max_length==0, we use hparams.batch_size instead. max_length=0, + # Maximum length in the smallest length bucket. Setting this + # flag too high will result in wasteful padding of short + # sequences. Due to some (hopefully) temporary hacks in the + # data reading and batching code, setting this flag too low + # results in a very long batch-shuffling queue. + # TODO(noam): change this once the Datasets API changes. + min_length_bucket=8, + # This flag controls the number of length buckets in the data + # reader. The buckets have maximum lengths from + # min_bucket_length to (max_length or batch_size), increasing + # (approximately) by factors of length_bucket_step. + length_bucket_step=1.1, # If set to True, drop sequences longer than max_length during eval. # This affects the validity of the evaluation metrics. eval_drop_long_sequences=int(False), diff --git a/tensor2tensor/models/transformer.py b/tensor2tensor/models/transformer.py index 47db28c30..105d9eb32 100644 --- a/tensor2tensor/models/transformer.py +++ b/tensor2tensor/models/transformer.py @@ -378,7 +378,6 @@ def transformer_big(): hparams.hidden_size = 1024 hparams.filter_size = 4096 hparams.num_heads = 16 - hparams.batching_mantissa_bits = 2 hparams.layer_prepostprocess_dropout = 0.3 return hparams @@ -390,7 +389,6 @@ def transformer_big_single_gpu(): hparams.layer_prepostprocess_dropout = 0.1 hparams.learning_rate_warmup_steps = 16000 hparams.optimizer_adam_beta2 = 0.998 - hparams.batching_mantissa_bits = 3 return hparams @@ -400,7 +398,6 @@ def transformer_base_single_gpu(): hparams = transformer_base() hparams.batch_size = 2048 hparams.learning_rate_warmup_steps = 16000 - hparams.batching_mantissa_bits = 2 return hparams @@ -593,7 +590,6 @@ def transformer_big_dr1(): hparams.filter_size = 4096 hparams.num_heads = 16 hparams.layer_prepostprocess_dropout = 0.1 - hparams.batching_mantissa_bits = 2 return hparams diff --git a/tensor2tensor/utils/data_reader.py b/tensor2tensor/utils/data_reader.py index dbbd8e936..d55911f19 100644 --- a/tensor2tensor/utils/data_reader.py +++ b/tensor2tensor/utils/data_reader.py @@ -18,8 +18,6 @@ from __future__ import division from __future__ import print_function -import fractions -import math import os import random @@ -271,10 +269,11 @@ def input_pipeline(problem, data_file_pattern, capacity, mode, hparams, dataset = bucket_by_sequence_length(dataset, _example_length, batching_scheme["boundaries"], - batching_scheme["batch_sizes"]) - max_batch_size = max(batching_scheme["batch_sizes"]) + batching_scheme["batch_sizes"], + batching_scheme["window_size"]) # We reshuffle the batches to prevent many long-sequence batches at once. - dataset = dataset.shuffle(max_batch_size * 3) + if batching_scheme["shuffle_queue_size"] is not None: + dataset = dataset.shuffle(batching_scheme["shuffle_queue_size"]) batched_examples = dataset.make_one_shot_iterator().get_next() return batched_examples @@ -308,38 +307,8 @@ def _example_too_big(example, max_length): return tf.less_equal(_example_length(example), max_length) -def _lcm(l): - """Least common multiple of integers in a list.""" - if not l: - raise ValueError("LCD of an empty list.") - if len(l) == 1: - return l[0] - x = l[0] - y = _lcm(l[1:]) - return x * y // fractions.gcd(x, y) - - -def _closest_small_primes(x): - """Closest number to x which has only 2, 3, 5 as prime factors, 3,5 once.""" - assert x > 0 - def is_small_primes(x, covered3, covered5): - if x % 2 == 0: - return is_small_primes(x // 2, covered3, covered5) - if x % 3 == 0 and not covered3: - return is_small_primes(x // 3, True, covered5) - if x % 5 == 0 and not covered5: - return is_small_primes(x // 5, covered3, True) - return x == 1 - for i in xrange(x): - if is_small_primes(x - i, False, False): - return x - i - # We search for higher numbers too, but only 8 of them to not increase much. - if i < 9 and is_small_primes(x + i, False, False): - return x + i - - def bucket_by_sequence_length(dataset, example_length_fn, bucket_boundaries, - bucket_batch_sizes): + bucket_batch_sizes, window_size): """Bucket entries in dataset by length. Args: @@ -348,18 +317,11 @@ def bucket_by_sequence_length(dataset, example_length_fn, bucket_boundaries, the example, which will determine the bucket it goes into. bucket_boundaries: list<int>, boundaries of the buckets. bucket_batch_sizes: list<int>, batch size per bucket. + window_size: an integer divisible by all elements of bucket_batch_sizes Returns: Dataset of padded and batched examples. """ - # Since the Datasets API only allows a single constant for window_size, - # and it needs divide all bucket_batch_sizes, we first make sure they only - # have a few primes in them so that their LCM doesn't explode quickly. - # TODO(lukaszkaiser): remove this adjustment when Dataset API improves. - bucket_batch_sizes1 = [_closest_small_primes(b) for b in bucket_batch_sizes] - tf.logging.info("Corrected bucket_batch_sizes from %s to %s." - % (str(bucket_batch_sizes), str(bucket_batch_sizes1))) - bucket_batch_sizes = bucket_batch_sizes1 with tf.name_scope("bucket_by_seq_length"): def example_to_bucket_id(example): @@ -386,25 +348,27 @@ def batching_fn(bucket_id, grouped_dataset): for name, shape in grouped_dataset.output_shapes.items()]) return grouped_dataset.padded_batch(batch_size, padded_shapes) - window_size = _lcm(bucket_batch_sizes) dataset = dataset.group_by_window(example_to_bucket_id, batching_fn, window_size) return dataset -def _bucket_boundaries(max_length, min_length=8, mantissa_bits=2): +def _bucket_boundaries(max_length, min_length=8, length_bucket_step=1.1): """A default set of length-bucket boundaries.""" + assert min_length <= max_length + assert length_bucket_step > 1.0 x = min_length boundaries = [] while x < max_length: boundaries.append(x) - x += 2**max(0, int(math.log(x, 2)) - mantissa_bits) + x = max(x + 1, int(x * length_bucket_step)) return boundaries -def _batching_scheme(batch_size=16 * 256, - max_length=None, - batching_mantissa_bits=1, +def _batching_scheme(batch_size, + max_length, + min_length_bucket, + length_bucket_step, drop_long_sequences=False, shard_multiplier=1, length_multiplier=1): @@ -416,7 +380,8 @@ def _batching_scheme(batch_size=16 * 256, batch_size: int, total number of tokens in a batch. max_length: int, sequences longer than this will be skipped. Defaults to batch_size. - batching_mantissa_bits: int, ??. + min_length_bucket: int + length_bucket_step: float greater than 1.0 drop_long_sequences: bool, if True, then sequences longer than `max_length` are dropped. This prevents generating batches with more than the usual number of tokens, which can cause out-of-memory @@ -434,19 +399,47 @@ def _batching_scheme(batch_size=16 * 256, """ max_length = max_length or batch_size boundaries = _bucket_boundaries( - max_length, mantissa_bits=batching_mantissa_bits) + max_length, min_length_bucket, length_bucket_step) boundaries = [boundary * length_multiplier for boundary in boundaries] max_length *= length_multiplier - batch_sizes = [ - max(1, batch_size // length) * shard_multiplier - for length in boundaries + [max_length] + max(1, batch_size // length) for length in boundaries + [max_length] ] - return { + max_batch_size = max(batch_sizes) + # Since the Datasets API only allows a single constant for window_size, + # and it needs divide all bucket_batch_sizes, we pick a highly-compoisite + # window size and then round down all batch sizes to divisors of that window + # size, so that a window can always be divided evenly into batches. + # TODO(noam): remove this when Dataset API improves. + highly_composite_numbers = [ + 1, 2, 4, 6, 12, 24, 36, 48, 60, 120, 180, 240, 360, 720, 840, 1260, 1680, + 2520, 5040, 7560, 10080, 15120, 20160, 25200, 27720, 45360, 50400, 55440, + 83160, 110880, 166320, 221760, 277200, 332640, 498960, 554400, 665280, + 720720, 1081080, 1441440, 2162160, 2882880, 3603600, 4324320, 6486480, + 7207200, 8648640, 10810800, 14414400, 17297280, 21621600, 32432400, + 36756720, 43243200, 61261200, 73513440, 110270160] + window_size = max([ + i for i in highly_composite_numbers if i <= 3 * max_batch_size]) + divisors = [i for i in xrange(1, window_size + 1) if window_size % i == 0] + batch_sizes = [max([d for d in divisors if d <= bs]) for bs in batch_sizes] + window_size *= shard_multiplier + batch_sizes = [bs * shard_multiplier for bs in batch_sizes] + # The Datasets API splits one window into multiple batches, which + # produces runs of many consecutive batches of the same size. This + # is bad for training. To solve this, we will shuffle the batches + # using a queue which must be several times as large as the maximum + # number of batches per window. + max_batches_per_window = window_size // min(batch_sizes) + shuffle_queue_size = max_batches_per_window * 3 + ret = { "boundaries": boundaries, "batch_sizes": batch_sizes, - "max_length": (max_length if drop_long_sequences else 10**9) + "max_length": (max_length if drop_long_sequences else 10**9), + "shuffle_queue_size": shuffle_queue_size, + "window_size": window_size, } + tf.logging.info("batching_scheme = %s" % ret) + return ret def hparams_to_batching_scheme(hparams, @@ -455,9 +448,10 @@ def hparams_to_batching_scheme(hparams, length_multiplier=1): """Wrapper around _batching_scheme with hparams.""" return _batching_scheme( - max_length=hparams.max_length, batch_size=hparams.batch_size, - batching_mantissa_bits=hparams.batching_mantissa_bits, + max_length=hparams.max_length, + min_length_bucket=hparams.min_length_bucket, + length_bucket_step=hparams.length_bucket_step, drop_long_sequences=drop_long_sequences, shard_multiplier=shard_multiplier, length_multiplier=length_multiplier) @@ -477,7 +471,9 @@ def constant_batching_scheme(constant_batch_size_in_sequences): return { "boundaries": boundaries, "batch_sizes": batch_sizes, - "max_length": 10**9 + "max_length": 10**9, + "shuffle_queue_size": None, + "window_size": constant_batch_size_in_sequences, } diff --git a/tensor2tensor/utils/data_reader_test.py b/tensor2tensor/utils/data_reader_test.py index 318fb1cab..991669a99 100644 --- a/tensor2tensor/utils/data_reader_test.py +++ b/tensor2tensor/utils/data_reader_test.py @@ -169,36 +169,62 @@ def testLengthFilter(self): def testBatchingSchemeMaxLength(self): scheme = data_reader._batching_scheme( - batch_size=20, max_length=None, drop_long_sequences=False) + batch_size=20, max_length=None, + min_length_bucket=8, length_bucket_step=1.1, + drop_long_sequences=False) self.assertGreater(scheme["max_length"], 10000) scheme = data_reader._batching_scheme( - batch_size=20, max_length=None, drop_long_sequences=True) + batch_size=20, max_length=None, + min_length_bucket=8, length_bucket_step=1.1, + drop_long_sequences=True) self.assertEqual(scheme["max_length"], 20) scheme = data_reader._batching_scheme( - batch_size=20, max_length=15, drop_long_sequences=True) + batch_size=20, max_length=15, + min_length_bucket=8, length_bucket_step=1.1, + drop_long_sequences=True) self.assertEqual(scheme["max_length"], 15) scheme = data_reader._batching_scheme( - batch_size=20, max_length=15, drop_long_sequences=False) + batch_size=20, max_length=15, + min_length_bucket=8, length_bucket_step=1.1, + drop_long_sequences=False) self.assertGreater(scheme["max_length"], 10000) def testBatchingSchemeBuckets(self): - scheme = data_reader._batching_scheme(batch_size=128) + scheme = data_reader._batching_scheme( + batch_size=128, + max_length=0, + min_length_bucket=8, + length_bucket_step=1.1) boundaries, batch_sizes = scheme["boundaries"], scheme["batch_sizes"] self.assertEqual(len(boundaries), len(batch_sizes) - 1) - expected_boundaries = [8, 12, 16, 24, 32, 48, 64, 96] + expected_boundaries = [ + 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 22, 24, 26, 28, + 30, 33, 36, 39, 42, 46, 50, 55, 60, 66, 72, 79, 86, 94, 103, 113, 124] self.assertEqual(expected_boundaries, boundaries) - expected_batch_sizes = [16, 10, 8, 5, 4, 2, 2, 1, 1] + expected_batch_sizes = [ + 16, 12, 12, 8, 8, 8, 8, 8, 8, 6, 6, 6, 6, 4, 4, 4, 4, 4, 3, 3, 3, + 3, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1] self.assertEqual(expected_batch_sizes, batch_sizes) - scheme = data_reader._batching_scheme(batch_size=128, shard_multiplier=2) + scheme = data_reader._batching_scheme( + batch_size=128, + max_length=0, + min_length_bucket=8, + length_bucket_step=1.1, + shard_multiplier=2) boundaries, batch_sizes = scheme["boundaries"], scheme["batch_sizes"] self.assertAllEqual([bs * 2 for bs in expected_batch_sizes], batch_sizes) self.assertEqual(expected_boundaries, boundaries) - scheme = data_reader._batching_scheme(batch_size=128, length_multiplier=2) + scheme = data_reader._batching_scheme( + batch_size=128, + max_length=0, + min_length_bucket=8, + length_bucket_step=1.1, + length_multiplier=2) boundaries, batch_sizes = scheme["boundaries"], scheme["batch_sizes"] self.assertAllEqual([b * 2 for b in expected_boundaries], boundaries) self.assertEqual([max(1, bs // 2) @@ -211,14 +237,16 @@ def example_len(ex): boundaries = [10, 20, 30] batch_sizes = [10, 8, 4, 2] + window_size = 40 dataset = data_reader.read_examples( self.problem, self.filepatterns[0], 32, mode=tf.contrib.learn.ModeKeys.EVAL) - dataset = data_reader.bucket_by_sequence_length(dataset, example_len, - boundaries, batch_sizes) + dataset = data_reader.bucket_by_sequence_length( + dataset, example_len, + boundaries, batch_sizes, window_size) batch = dataset.make_one_shot_iterator().get_next() input_vals = [] From 814a4723e7e185ad27ad3b7f8ca749ad253ca9a5 Mon Sep 17 00:00:00 2001 From: Lukasz Kaiser <lukaszkaiser@google.com> Date: Mon, 28 Aug 2017 12:50:15 -0700 Subject: [PATCH 0318/4095] Make an outline for docs. PiperOrigin-RevId: 166743249 --- docs/example_life.md | 17 +++++++ docs/index.md | 34 ++++++++----- docs/new_model.md | 16 ++++++ docs/new_problem.md | 16 ++++++ docs/walkthrough.md | 118 +++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 189 insertions(+), 12 deletions(-) create mode 100644 docs/example_life.md create mode 100644 docs/new_model.md create mode 100644 docs/new_problem.md create mode 100644 docs/walkthrough.md diff --git a/docs/example_life.md b/docs/example_life.md new file mode 100644 index 000000000..074bcb7c3 --- /dev/null +++ b/docs/example_life.md @@ -0,0 +1,17 @@ +# T2T: Life of an Example + +[![PyPI +version](https://badge.fury.io/py/tensor2tensor.svg)](https://badge.fury.io/py/tensor2tensor) +[![GitHub +Issues](https://img.shields.io/github/issues/tensorflow/tensor2tensor.svg)](https://github.com/tensorflow/tensor2tensor/issues) +[![Contributions +welcome](https://img.shields.io/badge/contributions-welcome-brightgreen.svg)](CONTRIBUTING.md) +[![Gitter](https://img.shields.io/gitter/room/nwjs/nw.js.svg)](https://gitter.im/tensor2tensor/Lobby) +[![License](https://img.shields.io/badge/License-Apache%202.0-brightgreen.svg)](https://opensource.org/licenses/Apache-2.0) + +This document show how a training example passes through the T2T pipeline, +and how all its parts are connected to work together. + +## The Life of an Example + +TODO: complete. diff --git a/docs/index.md b/docs/index.md index a5eeba137..b5ee118f4 100644 --- a/docs/index.md +++ b/docs/index.md @@ -1,11 +1,4 @@ -# T2T: Tensor2Tensor Transformers - -Check us out on -<a href=https://github.com/tensorflow/tensor2tensor> -GitHub -<img src="https://github.com/favicon.ico" width="16"> -</a> -. +# Tensor2Tensor Docs Index [![PyPI version](https://badge.fury.io/py/tensor2tensor.svg)](https://badge.fury.io/py/tensor2tensor) @@ -16,8 +9,25 @@ welcome](https://img.shields.io/badge/contributions-welcome-brightgreen.svg)](CO [![Gitter](https://img.shields.io/gitter/room/nwjs/nw.js.svg)](https://gitter.im/tensor2tensor/Lobby) [![License](https://img.shields.io/badge/License-Apache%202.0-brightgreen.svg)](https://opensource.org/licenses/Apache-2.0) -See our -[README](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/README.md) -for documentation. -More documentation and tutorials coming soon... +Welcome to Tensor2Tensor! + +Tensor2Tensor, or T2T for short, is a library we use to create, +investigate and deploy deep learning models. This page hosts our +documentation, from basic tutorials to full code documentation. + +## Basics + +* [Walkthrough: Install and Run](walkthrough.md) +* [Tutorial: Train on Your Data](new_problem.md) +* [Tutorial: Create Your Own Model](new_model.md) + +## Deep Dive + +* [Life of an Example](example_life.md): how all parts of T2T are connected and work together + +## Code documentation + +See our +[README](https://github.com/tensorflow/tensor2tensor/blob/master/README.md) +for now, code docs coming. diff --git a/docs/new_model.md b/docs/new_model.md new file mode 100644 index 000000000..5968c8325 --- /dev/null +++ b/docs/new_model.md @@ -0,0 +1,16 @@ +# T2T: Create Your Own Model + +[![PyPI +version](https://badge.fury.io/py/tensor2tensor.svg)](https://badge.fury.io/py/tensor2tensor) +[![GitHub +Issues](https://img.shields.io/github/issues/tensorflow/tensor2tensor.svg)](https://github.com/tensorflow/tensor2tensor/issues) +[![Contributions +welcome](https://img.shields.io/badge/contributions-welcome-brightgreen.svg)](CONTRIBUTING.md) +[![Gitter](https://img.shields.io/gitter/room/nwjs/nw.js.svg)](https://gitter.im/tensor2tensor/Lobby) +[![License](https://img.shields.io/badge/License-Apache%202.0-brightgreen.svg)](https://opensource.org/licenses/Apache-2.0) + +Here we show how to create your own model in T2T. + +## The T2TModel class + +TODO: complete. diff --git a/docs/new_problem.md b/docs/new_problem.md new file mode 100644 index 000000000..98669e8c8 --- /dev/null +++ b/docs/new_problem.md @@ -0,0 +1,16 @@ +# T2T: Train on Your Own Data + +[![PyPI +version](https://badge.fury.io/py/tensor2tensor.svg)](https://badge.fury.io/py/tensor2tensor) +[![GitHub +Issues](https://img.shields.io/github/issues/tensorflow/tensor2tensor.svg)](https://github.com/tensorflow/tensor2tensor/issues) +[![Contributions +welcome](https://img.shields.io/badge/contributions-welcome-brightgreen.svg)](CONTRIBUTING.md) +[![Gitter](https://img.shields.io/gitter/room/nwjs/nw.js.svg)](https://gitter.im/tensor2tensor/Lobby) +[![License](https://img.shields.io/badge/License-Apache%202.0-brightgreen.svg)](https://opensource.org/licenses/Apache-2.0) + +Here we show how to hook-up your own data to train T2T models on it. + +## The Problem class + +TODO: complete. diff --git a/docs/walkthrough.md b/docs/walkthrough.md new file mode 100644 index 000000000..ba4c86872 --- /dev/null +++ b/docs/walkthrough.md @@ -0,0 +1,118 @@ +# T2T Install and Run Walkthrough + +[![PyPI +version](https://badge.fury.io/py/tensor2tensor.svg)](https://badge.fury.io/py/tensor2tensor) +[![GitHub +Issues](https://img.shields.io/github/issues/tensorflow/tensor2tensor.svg)](https://github.com/tensorflow/tensor2tensor/issues) +[![Contributions +welcome](https://img.shields.io/badge/contributions-welcome-brightgreen.svg)](CONTRIBUTING.md) +[![Gitter](https://img.shields.io/gitter/room/nwjs/nw.js.svg)](https://gitter.im/tensor2tensor/Lobby) +[![License](https://img.shields.io/badge/License-Apache%202.0-brightgreen.svg)](https://opensource.org/licenses/Apache-2.0) + +Here is a one-command version that installs tensor2tensor, downloads the data, +trains an English-German translation model, and lets you use it interactively: +``` +pip install tensor2tensor && t2t-trainer \ + --generate_data \ + --data_dir=~/t2t_data \ + --problems=translate_ende_wmt32k \ + --model=transformer \ + --hparams_set=transformer_base_single_gpu \ + --output_dir=~/t2t_train/base \ + --decode_interactive +``` + +## Walkthrough + +Here's a walkthrough training a good English-to-German translation +model using the Transformer model from [*Attention Is All You +Need*](https://arxiv.org/abs/1706.03762) on WMT data. + +``` +pip install tensor2tensor + +# See what problems, models, and hyperparameter sets are available. +# You can easily swap between them (and add new ones). +t2t-trainer --registry_help + +PROBLEM=translate_ende_wmt32k +MODEL=transformer +HPARAMS=transformer_base_single_gpu + +DATA_DIR=$HOME/t2t_data +TMP_DIR=/tmp/t2t_datagen +TRAIN_DIR=$HOME/t2t_train/$PROBLEM/$MODEL-$HPARAMS + +mkdir -p $DATA_DIR $TMP_DIR $TRAIN_DIR + +# Generate data +t2t-datagen \ + --data_dir=$DATA_DIR \ + --tmp_dir=$TMP_DIR \ + --problem=$PROBLEM + +# Train +# * If you run out of memory, add --hparams='batch_size=1024'. +t2t-trainer \ + --data_dir=$DATA_DIR \ + --problems=$PROBLEM \ + --model=$MODEL \ + --hparams_set=$HPARAMS \ + --output_dir=$TRAIN_DIR + +# Decode + +DECODE_FILE=$DATA_DIR/decode_this.txt +echo "Hello world" >> $DECODE_FILE +echo "Goodbye world" >> $DECODE_FILE + +BEAM_SIZE=4 +ALPHA=0.6 + +t2t-trainer \ + --data_dir=$DATA_DIR \ + --problems=$PROBLEM \ + --model=$MODEL \ + --hparams_set=$HPARAMS \ + --output_dir=$TRAIN_DIR \ + --train_steps=0 \ + --eval_steps=0 \ + --decode_beam_size=$BEAM_SIZE \ + --decode_alpha=$ALPHA \ + --decode_from_file=$DECODE_FILE + +cat $DECODE_FILE.$MODEL.$HPARAMS.beam$BEAM_SIZE.alpha$ALPHA.decodes +``` + +--- + +## Installation + +``` +# Assumes tensorflow or tensorflow-gpu installed +pip install tensor2tensor + +# Installs with tensorflow-gpu requirement +pip install tensor2tensor[tensorflow_gpu] + +# Installs with tensorflow (cpu) requirement +pip install tensor2tensor[tensorflow] +``` + +Binaries: + +``` +# Data generator +t2t-datagen + +# Trainer +t2t-trainer --registry_help +``` + +Library usage: + +``` +python -c "from tensor2tensor.models.transformer import Transformer" +``` + +--- From 6d00c8bc4bfb277a4e814267eb1700f4093fbf74 Mon Sep 17 00:00:00 2001 From: T2T Team <no-reply@google.com> Date: Mon, 28 Aug 2017 14:10:44 -0700 Subject: [PATCH 0319/4095] For the experts: Remove padding, add summaries and better params. PiperOrigin-RevId: 166755082 --- tensor2tensor/layers/common_attention.py | 8 +- tensor2tensor/models/attention_lm_moe.py | 26 +++-- tensor2tensor/utils/expert_utils.py | 106 ++++++++++++++++- tensor2tensor/utils/expert_utils_test.py | 143 +++++++++++++++++++++++ 4 files changed, 274 insertions(+), 9 deletions(-) create mode 100644 tensor2tensor/utils/expert_utils_test.py diff --git a/tensor2tensor/layers/common_attention.py b/tensor2tensor/layers/common_attention.py index d69e68f80..1faea6401 100644 --- a/tensor2tensor/layers/common_attention.py +++ b/tensor2tensor/layers/common_attention.py @@ -1008,6 +1008,8 @@ def self_attention_expert( depth = x.get_shape().as_list()[-1] length = tf.shape(batch_coordinate)[0] + tf.summary.scalar("batch_size", length, family="experts_stats_batch_size") + attention_kq_size = attention_kq_size or depth attention_v_size = attention_v_size or depth @@ -1059,6 +1061,7 @@ def local_expert_attention( loss_coef, attention_num_experts, train=True, + pad_remover=None, **kwargs ): """Attention using a mixture of experts. @@ -1073,6 +1076,7 @@ def local_expert_attention( loss_coef: a scalar. A multiplier for the expert loss attention_num_experts: The number of experts to use train: a boolean for the current mode + pad_remover (PadRemover): A util object containing the padding position **kwargs: Arguments to forward to self_attention_expert Returns: @@ -1093,4 +1097,6 @@ def local_expert_attention( loss_coef=loss_coef, pass_x=True, pass_gates=False, - additional_dispatch_params=additional_dispatch_params) + additional_dispatch_params=additional_dispatch_params, + pad_remover=pad_remover + ) diff --git a/tensor2tensor/models/attention_lm_moe.py b/tensor2tensor/models/attention_lm_moe.py index 5bb63c303..03073d144 100644 --- a/tensor2tensor/models/attention_lm_moe.py +++ b/tensor2tensor/models/attention_lm_moe.py @@ -70,7 +70,7 @@ def preprocess(x): def postprocess(x, y): return dp(common_layers.layer_postprocess, x, y, hparams) - (decoder_input, decoder_self_attention_bias) = dp( + (decoder_input, decoder_self_attention_bias, pad_remover) = dp( attention_lm_moe_prepare_decoder, targets, hparams) x = dp(tf.nn.dropout, decoder_input, @@ -87,6 +87,7 @@ def _diet_expert(x): else: expert_fn = expert_utils.ffn_expert_fn( hparams.hidden_size, moe_hidden_sizes, hparams.hidden_size) + for layer in xrange(hparams.num_hidden_layers): with tf.variable_scope("layer_%d" % layer): with tf.variable_scope( @@ -109,9 +110,10 @@ def _diet_expert(x): common_attention.local_expert_attention, x, k=2, - loss_coef=1e-2, + loss_coef=hparams.attention_load_balance, attention_num_experts=hparams.attention_num_experts, train=hparams.mode == tf.contrib.learn.ModeKeys.TRAIN, + pad_remover=pad_remover, mask_right=True, attention_kq_size=hparams.attention_kq_size, attention_v_size=hparams.attention_v_size) @@ -158,17 +160,22 @@ def attention_lm_moe_prepare_decoder(targets, hparams): decoder_input: a Tensor, bottom of decoder stack decoder_self_attention_bias: a Tensor, containing large negative values to implement masked attention and possibly baises for diagonal alignments + pad_remover (expert_utils.PadRemover): an util object to remove padding """ + targets_pad_mask = common_attention.embedding_to_padding(targets) + with tf.name_scope("pad_remover"): + pad_remover = expert_utils.PadRemover(targets_pad_mask) + if hparams.prepend_mode == "prepend_inputs_full_attention": - decoder_self_attention_bias = (common_attention.attention_bias_prepended( - common_attention.embedding_to_padding(targets))) + decoder_self_attention_bias = ( + common_attention.attention_bias_prepended(targets_pad_mask)) else: decoder_self_attention_bias = ( common_attention.attention_bias_lower_triangle(tf.shape(targets)[1])) decoder_input = common_layers.shift_left_3d(targets) if hparams.pos == "timing": decoder_input = common_attention.add_timing_signal_1d(decoder_input) - return (decoder_input, decoder_self_attention_bias) + return (decoder_input, decoder_self_attention_bias, pad_remover) @registry.register_hparams @@ -218,8 +225,10 @@ def attention_lm_moe_base(): hparams.add_hparam("attention_moe_type", AttentionMoeType.NONE) hparams.add_hparam("attention_num_experts", 16) # Key, query and value dimensions for the attention - hparams.add_hparam("attention_kq_size", 64) - hparams.add_hparam("attention_v_size", 64) + hparams.add_hparam("attention_kq_size", 128) + hparams.add_hparam("attention_v_size", 256) + # Loss coef for load balancing + hparams.add_hparam("attention_load_balance", 2e-2) hparams.add_hparam("diet_experts", int(False)) return hparams @@ -231,6 +240,9 @@ def attention_lm_moe_base_ae(): hparams.attention_moe_type = AttentionMoeType.LOCAL hparams.max_length = hparams.batch_size hparams.eval_drop_long_sequences = int(True) + hparams.batching_mantissa_bits = 2 # More buckets + hparams.learning_rate = 0.05 + hparams.learning_rate_warmup_steps = 10000 return hparams diff --git a/tensor2tensor/utils/expert_utils.py b/tensor2tensor/utils/expert_utils.py index 6f26f20fa..fb1d1fac0 100644 --- a/tensor2tensor/utils/expert_utils.py +++ b/tensor2tensor/utils/expert_utils.py @@ -436,6 +436,87 @@ def noisy_top_k_gating(x, return gates, load +class PadRemover(object): + """Helper to remove padding from a tensor before sending to the experts. + + The padding is computed for one reference tensor containing the padding mask + and then can be applied to any other tensor of shape [dim_origin,...]. + + Ex: + input = [ + [tok1, tok2], + [tok3, tok4], + [0, 0], + [0, 0], + [tok5, tok6], + [0, 0], + ] + output = [ + [tok1, tok2], + [tok3, tok4], + [tok5, tok6], + ] + """ + + def __init__(self, pad_mask): + """Compute and store the location of the padding. + + Args: + pad_mask (tf.Tensor): Reference padding tensor of shape + [batch_size,length] or [dim_origin] (dim_origin=batch_size*length) + containing non-zeros positive values to indicate padding location. + """ + self.nonpad_ids = None + self.dim_origin = None + + with tf.name_scope("pad_reduce/get_ids"): + pad_mask = tf.reshape(pad_mask, [-1]) # Flatten the batch + # nonpad_ids contains coordinates of zeros rows (as pad_mask is + # float32, checking zero equality is done with |x| < epsilon, with + # epsilon=1e-9 as standard, here pad_mask only contains positive values + # so tf.abs would be redundant) + self.nonpad_ids = tf.to_int32(tf.where(pad_mask < 1e-9)) + self.dim_origin = tf.shape(pad_mask)[:1] + + def remove(self, x): + """Remove padding from the given tensor. + + Args: + x (tf.Tensor): of shape [dim_origin,...] + + Returns: + a tensor of shape [dim_compressed,...] with dim_compressed <= dim_origin + """ + with tf.name_scope("pad_reduce/remove"): + x_shape = x.get_shape().as_list() + x = tf.gather_nd( + x, + indices=self.nonpad_ids, + ) + # This is a hack but for some reason, gather_nd return a tensor of + # undefined shape, so the shape is set up manually + x.set_shape([None] + x_shape[1:]) + return x + + def restore(self, x): + """Add padding back to the given tensor. + + Args: + x (tf.Tensor): of shape [dim_compressed,...] + + Returns: + a tensor of shape [dim_origin,...] with dim_compressed >= dim_origin. The + dim is restored from the original reference tensor + """ + with tf.name_scope("pad_reduce/restore"): + x = tf.scatter_nd( + indices=self.nonpad_ids, + updates=x, + shape=tf.concat([self.dim_origin, tf.shape(x)[1:]], axis=0), + ) + return x + + class SparseDispatcher(object): """Helper for implementing a mixture of experts. @@ -766,6 +847,7 @@ def local_moe(x, pass_x=True, pass_gates=False, additional_dispatch_params=None, + pad_remover=None, name=None): """Call a local mixture of experts. @@ -782,6 +864,8 @@ def local_moe(x, additional_dispatch_params: The extra tensors that need to be sent to each expert. Examples include batch batch coordinates (see common_attention.local_expert_attention) + pad_remover (PadRemover): If given, the padding is removed/restored before + sending to the experts name: a string Returns: @@ -791,8 +875,18 @@ def local_moe(x, training loss of the model. The backpropagation of this loss encourages all experts to be approximately equally used across a batch. """ + with tf.variable_scope(name, default_name="local_moe"): x_flat = flatten_all_but_last(x) + + # Remove the padding tokens + if pad_remover: + x_flat = pad_remover.remove(x_flat) + tf.summary.scalar( # Should match the targets_nonpadding_tokens + "nonpadding_tokens", + tf.shape(x_flat)[0], + family="experts_stats") + # The gates indicate which batch elements go to which tensors. # load is a measure of approximately how many examples go to each expert gates, load = noisy_top_k_gating( @@ -805,17 +899,27 @@ def local_moe(x, noise_epsilon=1e-2) # This magic object helps us shuffle data between datashards and experts. dispatcher = SparseDispatcher(num_experts, gates) + + # Set up expert_fn arguments expert_kwargs = {} if pass_x: expert_kwargs["x"] = dispatcher.dispatch(x_flat) if pass_gates: expert_kwargs["gates"] = dispatcher.expert_to_gates() for k, v in six.iteritems(additional_dispatch_params or {}): - expert_kwargs[k] = dispatcher.dispatch(flatten_all_but_last(v)) + v = flatten_all_but_last(v) + if pad_remover: + v = pad_remover.remove(v) + expert_kwargs[k] = dispatcher.dispatch(v) + ep = Parallelism([DEFAULT_DEV_STRING] * num_experts) expert_outputs = ep(expert_fn, **expert_kwargs) + y_flat = dispatcher.combine(expert_outputs) + if pad_remover: + y_flat = pad_remover.restore(y_flat) y = reshape_like(y_flat, x) + importance = tf.reduce_sum(gates, 0) loss = loss_coef * (cv_squared(importance) + cv_squared(load)) return y, loss diff --git a/tensor2tensor/utils/expert_utils_test.py b/tensor2tensor/utils/expert_utils_test.py new file mode 100644 index 000000000..25d14753b --- /dev/null +++ b/tensor2tensor/utils/expert_utils_test.py @@ -0,0 +1,143 @@ +# coding=utf-8 +# Copyright 2017 The Tensor2Tensor Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for google3.third_party.py.tensor2tensor.utils.expert_utils.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +# Dependency imports +from tensor2tensor.layers import common_attention +from tensor2tensor.utils import expert_utils +import tensorflow as tf + + +class ExpertUtilsTest(tf.test.TestCase): + + def _verify_value(self, sess, tensor, expected): + output = sess.run(tensor) + self.assertAllClose(output, expected, 1e-9) + + def testPadRemover(self): + """Check that the padding remover is working correctly.""" + x_1 = tf.constant([ + [1, 2, 3], + [4, 5, 6], + [7, 8, 9], + [0, 0, 0], # pad + [0, 0, 0], # pad + [0, 0, 0], # pad + [10, 11, 12], + [13, 14, 15], + [0, 0, 0], # pad + ], dtype=tf.float32) + # Get padding mask + x_pad_mask = common_attention.embedding_to_padding(x_1) + x_2 = tf.constant([ + [1], + [2], + [3], + [4], # pad + [5], # pad + [6], # pad + [7], + [8], + [9], # pad + ], dtype=tf.float32) + x_3 = tf.constant([ + 1, + 2, + 3, + 4, # pad + 5, # pad + 6, # pad + 7, + 8, + 9, # pad + ], dtype=tf.float32) + + pad_remover = expert_utils.PadRemover(x_pad_mask) + + y_1 = pad_remover.remove(x_1) + y_2 = pad_remover.remove(x_2) + y_3 = pad_remover.remove(x_3) + + z_1 = pad_remover.restore(y_1 * 2) + z_2 = pad_remover.restore(y_2 * 2) + z_3 = pad_remover.restore(y_3 * 2) + + with self.test_session() as sess: + # Padding should have been removed + self._verify_value(sess, y_1, [ + [1., 2., 3.], + [4., 5., 6.], + [7., 8., 9.], + [10., 11., 12.], + [13., 14., 15.], + ]) + self._verify_value(sess, y_2, [ + [1.], + [2.], + [3.], + [7.], + [8.], + ]) + self._verify_value(sess, y_3, [ + 1., + 2., + 3., + 7., + 8., + ]) + + # Padding should have been restored + self._verify_value(sess, z_1, [ + [2., 4., 6.], + [8., 10., 12.], + [14., 16, 18.], + [0., 0., 0.], + [0., 0., 0.], + [0., 0., 0.], + [20., 22., 24.], + [26., 28., 30.], + [0., 0., 0.], + ]) + self._verify_value(sess, z_2, [ + [2.], + [4.], + [6.], + [0.], # pad + [0.], # pad + [0.], # pad + [14.], + [16.], + [0.], # pad + ]) + self._verify_value(sess, z_3, [ + 2., + 4., + 6., + 0., # pad + 0., # pad + 0., # pad + 14., + 16., + 0., # pad + ]) + + +if __name__ == '__main__': + tf.test.main() From ab90f71b8027dd11f34df77e93e949a5e1cbd043 Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Mon, 28 Aug 2017 17:18:05 -0700 Subject: [PATCH 0320/4095] Fix setup.py for visualization PiperOrigin-RevId: 166781071 --- setup.py | 8 +++++++- tensor2tensor/utils/expert_utils_test.py | 2 +- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index f32e8508c..78ba7ba9f 100644 --- a/setup.py +++ b/setup.py @@ -12,7 +12,13 @@ url='http://github.com/tensorflow/tensor2tensor', license='Apache 2.0', packages=find_packages(), - package_data={'tensor2tensor.data_generators': ['test_data/*']}, + package_data={ + 'tensor2tensor.data_generators': ['test_data/*'], + 'tensor2tensor.visualization': [ + 'attention.js', + 'TransformerVisualization.ipynb' + ], + }, scripts=[ 'tensor2tensor/bin/t2t-trainer', 'tensor2tensor/bin/t2t-datagen', diff --git a/tensor2tensor/utils/expert_utils_test.py b/tensor2tensor/utils/expert_utils_test.py index 25d14753b..93af9c78c 100644 --- a/tensor2tensor/utils/expert_utils_test.py +++ b/tensor2tensor/utils/expert_utils_test.py @@ -13,7 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -"""Tests for google3.third_party.py.tensor2tensor.utils.expert_utils.""" +"""Tests for tensor2tensor.utils.expert_utils.""" from __future__ import absolute_import from __future__ import division From 684f0d02da8468a6e3b32e0d5682bc8c4b28610b Mon Sep 17 00:00:00 2001 From: Lukasz Kaiser <lukaszkaiser@google.com> Date: Mon, 28 Aug 2017 17:25:27 -0700 Subject: [PATCH 0321/4095] Migrate En-De BPE translation to Problem, add UNK option in TokenTextEncoder. PiperOrigin-RevId: 166781929 --- tensor2tensor/bin/t2t-datagen | 5 -- .../data_generators/problem_hparams.py | 78 ------------------- tensor2tensor/data_generators/text_encoder.py | 10 ++- tensor2tensor/data_generators/wmt.py | 49 +++++++++--- 4 files changed, 47 insertions(+), 95 deletions(-) diff --git a/tensor2tensor/bin/t2t-datagen b/tensor2tensor/bin/t2t-datagen index f7ea7e1f2..b960d6106 100644 --- a/tensor2tensor/bin/t2t-datagen +++ b/tensor2tensor/bin/t2t-datagen @@ -92,11 +92,6 @@ _SUPPORTED_PROBLEM_GENERATORS = { FLAGS.data_dir, FLAGS.tmp_dir, True, 2**14, 2**9), lambda: wsj_parsing.parsing_token_generator( FLAGS.data_dir, FLAGS.tmp_dir, False, 2**14, 2**9)), - "translate_ende_wmt_bpe32k": ( - lambda: wmt.ende_bpe_token_generator( - FLAGS.data_dir, FLAGS.tmp_dir, True), - lambda: wmt.ende_bpe_token_generator( - FLAGS.data_dir, FLAGS.tmp_dir, False)), "languagemodel_1b32k": ( lambda: lm1b.generator(FLAGS.tmp_dir, True), lambda: lm1b.generator(FLAGS.tmp_dir, False) diff --git a/tensor2tensor/data_generators/problem_hparams.py b/tensor2tensor/data_generators/problem_hparams.py index 63b835f38..17f89920f 100644 --- a/tensor2tensor/data_generators/problem_hparams.py +++ b/tensor2tensor/data_generators/problem_hparams.py @@ -267,55 +267,6 @@ def audio_timit_tokens(model_hparams, wrong_vocab_size): return p -def audio_wsj_characters(unused_model_hparams): - """English audio transcription benchmark.""" - p = default_problem_hparams() - p.input_modality = { - "inputs": (registry.Modalities.AUDIO, None), - } - p.target_modality = (registry.Modalities.SYMBOL, 256) - p.vocabulary = { - "inputs": text_encoder.TextEncoder(), - "targets": text_encoder.ByteTextEncoder(), - } - p.batch_size_multiplier = 512 - p.loss_multiplier = 2.0 - p.input_space_id = 13 - p.target_space_id = 2 - return p - - -def audio_wsj_tokens(model_hparams, wrong_vocab_size): - """English audio transcription benchmark. - - Args: - model_hparams: a tf.contrib.training.HParams - wrong_vocab_size: a number used in the filename indicating the approximate - vocabulary size. This is not to be confused with the actual vocabulary - size. - Returns: - a tf.contrib.training.HParams - """ - p = default_problem_hparams() - # This vocab file must be present within the data directory. - vocab_filename = os.path.join(model_hparams.data_dir, - "vocab.endefr.%d" % wrong_vocab_size) - subtokenizer = text_encoder.SubwordTextEncoder(vocab_filename) - p.input_modality = { - "inputs": (registry.Modalities.AUDIO, None), - } - p.target_modality = (registry.Modalities.SYMBOL, subtokenizer.vocab_size) - p.vocabulary = { - "inputs": text_encoder.TextEncoder(), - "targets": subtokenizer, - } - p.batch_size_multiplier = 512 - p.loss_multiplier = 2.0 - p.input_space_id = 12 - p.target_space_id = 3 - return p - - def lm1b_32k(model_hparams): """Billion-word language-modeling benchmark, 32k subword vocabulary.""" p = default_problem_hparams() @@ -345,25 +296,6 @@ def lm1b_characters(unused_model_hparams): return p -def wmt_ende_bpe32k(model_hparams): - """English to German translation benchmark.""" - p = default_problem_hparams() - vocab_size = 40960 - modality_spec = (registry.Modalities.SYMBOL, vocab_size) - p.input_modality = {"inputs": modality_spec} - p.target_modality = modality_spec - # This vocab file must be present within the data directory. - vocab_filename = os.path.join(model_hparams.data_dir, "vocab.bpe.32000") - p.vocabulary = { - "inputs": text_encoder.TokenTextEncoder(vocab_filename=vocab_filename), - "targets": text_encoder.TokenTextEncoder(vocab_filename=vocab_filename), - } - p.loss_multiplier = 1.4 - p.input_space_id = 4 - p.target_space_id = 9 - return p - - def wmt_parsing_characters(model_hparams): """English to parse tree translation benchmark.""" del model_hparams # Unused. @@ -472,14 +404,6 @@ def img2img_imagenet(unused_model_hparams): lambda p: audio_timit_tokens(p, 2**13), "audio_timit_tokens_8k_test": lambda p: audio_timit_tokens(p, 2**13), - "audio_wsj_characters_tune": - audio_wsj_characters, - "audio_wsj_characters_test": - audio_wsj_characters, - "audio_wsj_tokens_8k_tune": - lambda p: audio_wsj_tokens(p, 2**13), - "audio_wsj_tokens_8k_test": - lambda p: audio_wsj_tokens(p, 2**13), "languagemodel_1b_characters": lm1b_characters, "languagemodel_1b32k": @@ -489,8 +413,6 @@ def img2img_imagenet(unused_model_hparams): "parsing_english_ptb16k": lambda p: wsj_parsing_tokens( # pylint: disable=g-long-lambda p, "wsj", 2**14, 2**9), - "translate_ende_wmt_bpe32k": - wmt_ende_bpe32k, "img2img_imagenet": img2img_imagenet, } diff --git a/tensor2tensor/data_generators/text_encoder.py b/tensor2tensor/data_generators/text_encoder.py index c8a3bd1f9..f05393313 100644 --- a/tensor2tensor/data_generators/text_encoder.py +++ b/tensor2tensor/data_generators/text_encoder.py @@ -161,6 +161,7 @@ def __init__(self, vocab_filename, reverse=False, vocab_list=None, + replace_oov=None, num_reserved_ids=NUM_RESERVED_TOKENS): """Initialize from a file or list, one token per line. @@ -176,10 +177,13 @@ def __init__(self, and decoding. vocab_list: If not None, a list of elements of the vocabulary. If this is not None, then vocab_filename should be None. + replace_oov: If not None, every out-of-vocabulary token seen when + encoding will be replaced by this string (which must be in vocab). num_reserved_ids: Number of IDs to save for reserved tokens like <EOS>. """ super(TokenTextEncoder, self).__init__(num_reserved_ids=num_reserved_ids) self._reverse = reverse + self._replace_oov = replace_oov if vocab_filename: self._init_vocab_from_file(vocab_filename) else: @@ -188,7 +192,11 @@ def __init__(self, def encode(self, sentence): """Converts a space-separated string of tokens to a list of ids.""" - ret = [self._token_to_id[tok] for tok in sentence.strip().split()] + tokens = sentence.strip().split() + if self._replace_oov is not None: + tokens = [t if t in self._token_to_id else self._replace_oov + for t in tokens] + ret = [self._token_to_id[tok] for tok in tokens] return ret[::-1] if self._reverse else ret def decode(self, ids): diff --git a/tensor2tensor/data_generators/wmt.py b/tensor2tensor/data_generators/wmt.py index 93fc27ac5..8d6cdae6f 100644 --- a/tensor2tensor/data_generators/wmt.py +++ b/tensor2tensor/data_generators/wmt.py @@ -305,17 +305,44 @@ def _get_wmt_ende_bpe_dataset(directory, filename): return train_path -def ende_bpe_token_generator(data_dir, tmp_dir, train): - """Instance of token generator for the WMT en->de task, training set.""" - dataset_path = ("train.tok.clean.bpe.32000" - if train else "newstest2013.tok.bpe.32000") - train_path = _get_wmt_ende_bpe_dataset(tmp_dir, dataset_path) - token_tmp_path = os.path.join(tmp_dir, "vocab.bpe.32000") - token_path = os.path.join(data_dir, "vocab.bpe.32000") - tf.gfile.Copy(token_tmp_path, token_path, overwrite=True) - token_vocab = text_encoder.TokenTextEncoder(vocab_filename=token_path) - return token_generator(train_path + ".en", train_path + ".de", token_vocab, - EOS) +@registry.register_problem +class TranslateEndeWmtBpe32k(TranslateProblem): + """Problem spec for WMT En-De translation, BPE version.""" + + @property + def targeted_vocab_size(self): + return 32000 + + @property + def vocab_name(self): + return "vocab.bpe" + + def feature_encoders(self, data_dir): + vocab_filename = os.path.join(data_dir, self.vocab_file) + encoder = text_encoder.TokenTextEncoder(vocab_filename, replace_oov="UNK") + return {"inputs": encoder, "targets": encoder} + + def generator(self, data_dir, tmp_dir, train): + """Instance of token generator for the WMT en->de task, training set.""" + dataset_path = ("train.tok.clean.bpe.32000" + if train else "newstest2013.tok.bpe.32000") + train_path = _get_wmt_ende_bpe_dataset(tmp_dir, dataset_path) + token_tmp_path = os.path.join(tmp_dir, self.vocab_file) + token_path = os.path.join(data_dir, self.vocab_file) + tf.gfile.Copy(token_tmp_path, token_path, overwrite=True) + with tf.gfile.GFile(token_path, mode="a") as f: + f.write("UNK\n") # Add UNK to the vocab. + token_vocab = text_encoder.TokenTextEncoder(token_path, replace_oov="UNK") + return token_generator(train_path + ".en", train_path + ".de", + token_vocab, EOS) + + @property + def input_space_id(self): + return problem.SpaceID.EN_BPE_TOK + + @property + def target_space_id(self): + return problem.SpaceID.DE_BPE_TOK def _preprocess_sgm(line, is_sgm): From da6643d04c5580d8a6ec91d65fec68c9c002fd1f Mon Sep 17 00:00:00 2001 From: T2T Team <no-reply@google.com> Date: Mon, 28 Aug 2017 17:42:03 -0700 Subject: [PATCH 0322/4095] Use more robust method for showing the visualizations. PiperOrigin-RevId: 166783838 --- tensor2tensor/visualization/attention.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/tensor2tensor/visualization/attention.py b/tensor2tensor/visualization/attention.py index 280b95b5c..bc4238081 100644 --- a/tensor2tensor/visualization/attention.py +++ b/tensor2tensor/visualization/attention.py @@ -21,8 +21,7 @@ import json import os -from IPython.display import HTML -from IPython.display import Javascript +import IPython.display as display import numpy as np @@ -53,9 +52,9 @@ def show(inp_text, out_text, enc_atts, dec_atts, encdec_atts): def _show_attention(att_json): - display(HTML(vis_html)) # pylint: disable=undefined-variable - display(Javascript('window.attention = %s' % att_json)) # pylint: disable=undefined-variable - display(Javascript(vis_js)) # pylint: disable=undefined-variable + display.display(display.HTML(vis_html)) + display.display(display.Javascript('window.attention = %s' % att_json)) + display.display(display.Javascript(vis_js)) def _get_attention(inp_text, out_text, enc_atts, dec_atts, encdec_atts): From 03a386137623e1bf456c4fb905f80c69116f17b2 Mon Sep 17 00:00:00 2001 From: T2T Team <no-reply@google.com> Date: Mon, 28 Aug 2017 18:10:31 -0700 Subject: [PATCH 0323/4095] Small simplification is vis notebook. PiperOrigin-RevId: 166786625 --- tensor2tensor/visualization/TransformerVisualization.ipynb | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tensor2tensor/visualization/TransformerVisualization.ipynb b/tensor2tensor/visualization/TransformerVisualization.ipynb index ff0710f5d..e3fb8f958 100644 --- a/tensor2tensor/visualization/TransformerVisualization.ipynb +++ b/tensor2tensor/visualization/TransformerVisualization.ipynb @@ -381,8 +381,7 @@ }, "outputs": [], "source": [ - "der = decode(beam_decode[0])\n", - "output_ids = encode(der)\n", + "output_ids = beam_decode\n", "\n", "# Get attentions\n", "np_enc_atts, np_dec_atts, np_encdec_atts = sess.run([enc_atts, dec_atts, encdec_atts], {\n", From 4cc039abc94c9d9720da4515d4b041576f0b84b9 Mon Sep 17 00:00:00 2001 From: Lukasz Kaiser <lukaszkaiser@google.com> Date: Mon, 28 Aug 2017 18:21:57 -0700 Subject: [PATCH 0324/4095] Add a cyclic linear learning rate scheme, play with VAE. PiperOrigin-RevId: 166787878 --- tensor2tensor/models/transformer_vae.py | 14 +++++++++++--- tensor2tensor/utils/model_builder.py | 8 ++++++++ 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/tensor2tensor/models/transformer_vae.py b/tensor2tensor/models/transformer_vae.py index fa6b3f397..90277e522 100644 --- a/tensor2tensor/models/transformer_vae.py +++ b/tensor2tensor/models/transformer_vae.py @@ -137,7 +137,7 @@ def kmeans(x, means, hparams, name): x_means_hot = nearest(x, means, hparams) x_means = tf.gather(means, tf.argmax(x_means_hot, axis=-1)) kl = tf.reduce_sum(tf.square(x - x_means), axis=-1) - return x_means_hot, x_means_hot, tf.reduce_mean(kl) * 100.0 + return x_means_hot, x_means_hot, tf.reduce_mean(kl) * 10.0 def compress(x, c, hparams, name): @@ -213,10 +213,12 @@ def vae_compress(x, c, ed, hparams, compress_name, decompress_name, reuse=None): cur = common_layers.conv_block( cur, hparams.hidden_size, [((1, 1), (1, 1))], name="mid_conv") cur = tf.nn.l2_normalize(cur, dim=3) + cur_n = hparams.kmeans_lr_factor * cur + cur_n += (1.0 - hparams.kmeans_lr_factor) * tf.stop_gradient(cur) means = tf.get_variable("z_to_dense", [hparams.v_size, hparams.hidden_size]) # z, kl_loss, mu, log_sigma = vae(cur, hparams, name="vae") # z_true, z_sample, kl_loss = dvae(cur, hparams, name="dvae") - z_true, z_sample, kl_loss = kmeans(cur, means, hparams, name="kmeans") + z_true, z_sample, kl_loss = kmeans(cur_n, means, hparams, name="kmeans") # Compress context. with tf.variable_scope(compress_name, reuse=reuse): @@ -239,6 +241,11 @@ def vae_compress(x, c, ed, hparams, compress_name, decompress_name, reuse=None): # Leak at the beginning to help train. z = mix(z, cur, hparams.startup_steps) + prob_z = common_layers.inverse_exp_decay(hparams.startup_steps) * 0.8 + prob_z = prob_z if hparams.mode == tf.contrib.learn.ModeKeys.TRAIN else 0.0 + z = tf.cond(tf.less(tf.random_uniform([]), prob_z), + lambda: z, lambda: cur) + z = tf.layers.dense(z, hparams.hidden_size, name="unnormalize") # Dropout for better autoencoding. z = tf.nn.dropout(z, keep_prob=0.9) @@ -289,7 +296,7 @@ def vae_transformer_internal(inputs, targets, target_space, hparams): tf.expand_dims(inputs, axis=2), ed_bias, hparams, "vae_compress", "vae_decompress") kl *= common_layers.inverse_exp_decay(int(hparams.startup_steps * 0.5)) - r *= common_layers.inverse_exp_decay(int(hparams.startup_steps * 2.0)) + r *= common_layers.inverse_exp_decay(int(hparams.startup_steps * 0.5)) losses = {"kl": kl, "reconstruction": r} return z, losses @@ -364,5 +371,6 @@ def transformer_vae_base(): hparams.relu_dropout = 0.0 hparams.dropout = 0.0 hparams.num_hidden_layers = 4 + hparams.kmeans_lr_factor = 0.002 hparams.z_size = 256 return hparams diff --git a/tensor2tensor/utils/model_builder.py b/tensor2tensor/utils/model_builder.py index 24c17ca9e..2c92a0d90 100644 --- a/tensor2tensor/utils/model_builder.py +++ b/tensor2tensor/utils/model_builder.py @@ -104,6 +104,14 @@ def learning_rate_decay(): elif hparams.learning_rate_decay_scheme == "cosine": cycle_steps = hparams.learning_rate_cosine_cycle_steps return 0.5 * (1 + tf.cos(np.pi * (step % cycle_steps) / cycle_steps)) + elif hparams.learning_rate_decay_scheme == "cyclelinear10x": + # Cycle the rate linearly by 10x every warmup_steps, up and down. + cycle_steps = hparams.learning_rate_warmup_steps + cycle_position = step % (2 * cycle_steps) + cycle_position = tf.to_float( # Normalize to the interval [-1, 1]. + cycle_position - cycle_steps) / float(cycle_steps) + cycle_position = 1.0 - tf.abs(cycle_position) # 0 to 1 and back to 0. + return (cycle_position + 0.01) * 10.0 # 10x difference each cycle. inv_base = tf.exp(tf.log(0.01) / warmup_steps) inv_decay = inv_base**(warmup_steps - step) From e742509a05af7dbc23e22bcb0f06787c5894a31f Mon Sep 17 00:00:00 2001 From: Niki Parmar <nikip@google.com> Date: Mon, 28 Aug 2017 19:08:28 -0700 Subject: [PATCH 0325/4095] Bug fix, no access to targets during decoding. Move to correct place PiperOrigin-RevId: 166791783 --- tensor2tensor/utils/model_builder.py | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/tensor2tensor/utils/model_builder.py b/tensor2tensor/utils/model_builder.py index 2c92a0d90..15a81dd5f 100644 --- a/tensor2tensor/utils/model_builder.py +++ b/tensor2tensor/utils/model_builder.py @@ -193,17 +193,18 @@ def model_fn(features, targets, mode): tf.summary.scalar("%s_nonpadding_fraction" % k, tf.reduce_mean(nonpadding)) - # The new data reader occasionally emits very small batches, which - # cause the examples in those batches to be grossly overweighted. - # We decrease the loss proportionally to the ratio of the size of this - # batch to the size of the largest training batch ever. - # TODO(noam): to be more sophisticated, we could keep separate - # maxima based on problem choice. - max_nonpadding_var = tf.get_variable( - "max_nonpadding", shape=[], - initializer=tf.ones_initializer(), trainable=False) - max_nonpadding = tf.maximum(max_nonpadding_var, targets_nonpadding_tokens) if is_training: + # The new data reader occasionally emits very small batches, which + # cause the examples in those batches to be grossly overweighted. + # We decrease the loss proportionally to the ratio of the size of this + # batch to the size of the largest training batch ever. + # TODO(noam): to be more sophisticated, we could keep separate + # maxima based on problem choice. + max_nonpadding_var = tf.get_variable( + "max_nonpadding", shape=[], + initializer=tf.ones_initializer(), trainable=False) + max_nonpadding = tf.maximum(max_nonpadding_var, + targets_nonpadding_tokens) with tf.control_dependencies( [tf.assign(max_nonpadding_var, max_nonpadding)]): small_batch_multiplier = targets_nonpadding_tokens / max_nonpadding From ee3296f4ed62292517307563026a08c2e9b02fef Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Mon, 28 Aug 2017 20:10:59 -0700 Subject: [PATCH 0326/4095] Fix decode_from_dataset so that it decodes from multiple batches again PiperOrigin-RevId: 166795991 --- tensor2tensor/utils/decoding.py | 163 ++++++++++++++---------- tensor2tensor/utils/input_fn_builder.py | 17 +++ tensor2tensor/utils/model_builder.py | 11 +- tensor2tensor/utils/trainer_utils.py | 14 +- 4 files changed, 131 insertions(+), 74 deletions(-) diff --git a/tensor2tensor/utils/decoding.py b/tensor2tensor/utils/decoding.py index 2e430a204..3f00c25a9 100644 --- a/tensor2tensor/utils/decoding.py +++ b/tensor2tensor/utils/decoding.py @@ -37,85 +37,114 @@ FLAGS = tf.flags.FLAGS -def decode_from_dataset(estimator): +def _decode_from_dataset_log_results(inputs, + targets, + outputs, + problem_name, + prediction_idx, + inputs_vocab, + targets_vocab, + save_images=False, + model_dir=None, + identity_output=False): + """Log inference results.""" + if "image" in problem_name and save_images: + save_path = os.path.join(model_dir, "%s_prediction_%d.jpg" % + (problem_name, prediction_idx)) + show_and_save_image(inputs / 255., save_path) + elif inputs_vocab: + decoded_inputs = inputs_vocab.decode(_save_until_eos(inputs.flatten())) + tf.logging.info("Inference results INPUT: %s" % decoded_inputs) + + if identity_output: + decoded_outputs = "".join(map(str, outputs.flatten())) + decoded_targets = "".join(map(str, targets.flatten())) + else: + decoded_outputs = "".join( + map(str, targets_vocab.decode(_save_until_eos(outputs.flatten())))) + decoded_targets = "".join( + map(str, targets_vocab.decode(_save_until_eos(targets.flatten())))) + + tf.logging.info("Inference results OUTPUT: %s" % decoded_outputs) + tf.logging.info("Inference results TARGET: %s" % decoded_targets) + return decoded_outputs, decoded_targets + + +def decode_from_dataset(estimator, + problem_names, + return_beams=False, + beam_size=1, + max_predictions=-1, + decode_to_file=None, + save_images=False, + identity_output=False): + tf.logging.info("Performing local inference from dataset for %s.", + str(problem_names)) hparams = estimator.hparams - for i, problem in enumerate(FLAGS.problems.split("-")): - inputs_vocab = hparams.problems[i].vocabulary.get("inputs", None) - targets_vocab = hparams.problems[i].vocabulary["targets"] - tf.logging.info("Performing local inference.") + + for problem_idx, problem_name in enumerate(problem_names): + # Build the inference input function infer_problems_data = data_reader.get_data_filepatterns( - FLAGS.problems, hparams.data_dir, tf.contrib.learn.ModeKeys.INFER) + problem_name, hparams.data_dir, tf.contrib.learn.ModeKeys.INFER) infer_input_fn = input_fn_builder.build_input_fn( mode=tf.contrib.learn.ModeKeys.INFER, hparams=hparams, data_file_patterns=infer_problems_data, num_datashards=devices.data_parallelism().n, - fixed_problem=i) - - def log_fn(inputs, - targets, - outputs, - problem, - j, - inputs_vocab=inputs_vocab, - targets_vocab=targets_vocab): - """Log inference results.""" - if "image" in problem and FLAGS.decode_save_images: - save_path = os.path.join(estimator.model_dir, - "%s_prediction_%d.jpg" % (problem, j)) - show_and_save_image(inputs / 255., save_path) - elif inputs_vocab: - decoded_inputs = inputs_vocab.decode( - _save_until_eos(inputs.flatten())) - tf.logging.info("Inference results INPUT: %s" % decoded_inputs) + fixed_problem=problem_idx) - if FLAGS.identity_output: - decoded_outputs = " ".join(map(str, outputs.flatten())) - decoded_targets = " ".join(map(str, targets.flatten())) - else: - decoded_outputs = " ".join(map( - str, targets_vocab.decode(_save_until_eos(outputs.flatten())))) - decoded_targets = " ".join(map( - str, targets_vocab.decode(_save_until_eos(targets.flatten())))) - - tf.logging.info("Inference results OUTPUT: %s" % decoded_outputs) - tf.logging.info("Inference results TARGET: %s" % decoded_targets) - return decoded_outputs, decoded_targets - - result_iter = estimator.predict(input_fn=infer_input_fn, as_iterable=True) - count = 0 - agg_outputs = [] - agg_targets = [] - for result in result_iter: - # predictions from the test input. We use it to log inputs and decodes. - inputs = result["inputs"] - targets = result["targets"] - outputs = result["outputs"] - if FLAGS.decode_return_beams: - output_beams = np.split(outputs, FLAGS.decode_beam_size, axis=0) - for k, beam in enumerate(output_beams): - tf.logging.info("BEAM %d:" % k) - o, t = log_fn(inputs, targets, beam, problem, count) - agg_outputs.append(o) - agg_targets.append(t) - else: - o, t = log_fn(inputs, targets, outputs, problem, count) - agg_outputs.append(o) - agg_targets.append(t) + # Get the predictions as an iterable + predictions = estimator.predict(input_fn=infer_input_fn, as_iterable=True) + + # Prepare output file writers if decode_to_file passed + if decode_to_file: + output_filepath = decode_to_file + ".outputs." + problem_name + target_filepath = decode_to_file + ".targets." + problem_name - count += 1 - if FLAGS.decode_num_samples != -1 and count >= FLAGS.decode_num_samples: - break - if FLAGS.decode_to_file: - output_filepath = FLAGS.decode_to_file + ".outputs." + problem output_file = tf.gfile.Open(output_filepath, "w") - target_filepath = FLAGS.decode_to_file + ".targets." + problem target_file = tf.gfile.Open(target_filepath, "w") - for o, t in zip(agg_outputs, agg_targets): - output_file.write(str(o)+"\n") - target_file.write(str(t)+"\n") - tf.logging.info("Completed inference on %d samples." % count) + + problem_hparams = hparams.problems[problem_idx] + inputs_vocab = problem_hparams.vocabulary.get("inputs", None) + targets_vocab = problem_hparams.vocabulary["targets"] + for num_predictions, prediction in enumerate(predictions): + inputs = prediction["inputs"] + targets = prediction["targets"] + outputs = prediction["outputs"] + + # Log predictions + decoded_outputs = [] + if return_beams: + output_beams = np.split(outputs, beam_size, axis=0) + for i, beam in enumerate(output_beams): + tf.logging.info("BEAM %d:" % i) + decoded = _decode_from_dataset_log_results( + inputs, targets, beam, problem_name, num_predictions, + inputs_vocab, targets_vocab, save_images, estimator.model_dir, + identity_output) + decoded_outputs.append(decoded) + else: + decoded = _decode_from_dataset_log_results( + inputs, targets, outputs, problem_name, num_predictions, + inputs_vocab, targets_vocab, save_images, estimator.model_dir, + identity_output) + decoded_outputs.append(decoded) + + # Write out predictions if decode_to_file passed + if decode_to_file: + for decoded_output, decoded_target in decoded_outputs: + output_file.write(str(decoded_output) + "\n") + target_file.write(str(decoded_target) + "\n") + + if max_predictions >= 0 and num_predictions >= max_predictions: + break + + if decode_to_file: + output_file.close() + target_file.close() + + tf.logging.info("Completed inference on %d samples." % num_predictions) # pylint: disable=undefined-loop-variable def decode_from_file(estimator, filename): diff --git a/tensor2tensor/utils/input_fn_builder.py b/tensor2tensor/utils/input_fn_builder.py index c31ba0f31..bef95d58f 100644 --- a/tensor2tensor/utils/input_fn_builder.py +++ b/tensor2tensor/utils/input_fn_builder.py @@ -183,6 +183,12 @@ def input_fn(): if mode == tf.contrib.learn.ModeKeys.INFER: rand_feature_map["infer_targets"] = rand_target rand_target = None + # This is because of a bug in the tf.contrib.learn Estimator that + # short-circuits prediction if it doesn't see a QueueRunner. + # DummyQueueRunner implements the minimal expected interface but does + # nothing. + # TODO(rsepassi): Remove once we move to core Estimator. + tf.add_to_collection(tf.GraphKeys.QUEUE_RUNNERS, DummyQueueRunner()) return rand_feature_map, rand_target return input_fn @@ -195,3 +201,14 @@ def cond_on_index(fn, index_tensor, cur_idx, max_idx): return tf.cond( tf.equal(index_tensor, cur_idx), lambda: fn(cur_idx), lambda: cond_on_index(fn, index_tensor, cur_idx + 1, max_idx)) + + +class DummyQueueRunner(object): + """Can stand-in for a QueueRunner but does nothing.""" + + def __init__(self): + pass + + def create_threads(self, sess, coord=None, daemon=False, start=False): + del sess, coord, daemon, start + return [] diff --git a/tensor2tensor/utils/model_builder.py b/tensor2tensor/utils/model_builder.py index 15a81dd5f..0212f9a4d 100644 --- a/tensor2tensor/utils/model_builder.py +++ b/tensor2tensor/utils/model_builder.py @@ -201,8 +201,10 @@ def model_fn(features, targets, mode): # TODO(noam): to be more sophisticated, we could keep separate # maxima based on problem choice. max_nonpadding_var = tf.get_variable( - "max_nonpadding", shape=[], - initializer=tf.ones_initializer(), trainable=False) + "max_nonpadding", + shape=[], + initializer=tf.ones_initializer(), + trainable=False) max_nonpadding = tf.maximum(max_nonpadding_var, targets_nonpadding_tokens) with tf.control_dependencies( @@ -212,6 +214,7 @@ def model_fn(features, targets, mode): # Get multi-problem logits and loss based on features["problem_choice"]. loss_variable_names = [] + def nth_model(n): """Build the model for the n-th problem, plus some added variables.""" model_class = registry.model(model)( @@ -258,8 +261,8 @@ def nth_model(n): # Total loss was already constructed on input. loss_moving_avg = tf.get_variable("problem_%d/total_loss" % n) except ValueError: - loss_moving_avg = tf.get_variable("problem_%d/total_loss" % n, - initializer=100.0, trainable=False) + loss_moving_avg = tf.get_variable( + "problem_%d/total_loss" % n, initializer=100.0, trainable=False) ops.append( loss_moving_avg.assign(loss_moving_avg * 0.9 + total_loss * 0.1)) with tf.variable_scope("train_stats"): # Count steps for this problem. diff --git a/tensor2tensor/utils/trainer_utils.py b/tensor2tensor/utils/trainer_utils.py index fa9d9233e..36fdcf6be 100644 --- a/tensor2tensor/utils/trainer_utils.py +++ b/tensor2tensor/utils/trainer_utils.py @@ -149,8 +149,8 @@ def experiment_fn(output_dir): def create_experiment(output_dir, data_dir, model_name, train_steps, eval_steps): """Create Experiment.""" - hparams = create_hparams(FLAGS.hparams_set, FLAGS.problems, data_dir, - passed_hparams=FLAGS.hparams) + hparams = create_hparams( + FLAGS.hparams_set, FLAGS.problems, data_dir, passed_hparams=FLAGS.hparams) estimator, input_fns = create_experiment_components( hparams=hparams, output_dir=output_dir, @@ -358,4 +358,12 @@ def decode(estimator): elif FLAGS.decode_from_file is not None and FLAGS.decode_from_file is not "": decoding.decode_from_file(estimator, FLAGS.decode_from_file) elif FLAGS.decode_from_dataset: - decoding.decode_from_dataset(estimator) + decoding.decode_from_dataset( + estimator, + FLAGS.problems.split("-"), + return_beams=FLAGS.decode_return_beams, + beam_size=FLAGS.decode_beam_size, + max_predictions=FLAGS.decode_num_samples, + decode_to_file=FLAGS.decode_to_file, + save_images=FLAGS.decode_save_images, + identity_output=FLAGS.identity_output) From a2cf057c63e6863968eaab8874cb541189559ae1 Mon Sep 17 00:00:00 2001 From: T2T Team <no-reply@google.com> Date: Tue, 29 Aug 2017 11:48:42 -0700 Subject: [PATCH 0327/4095] Add edit distance as metric as additional evaluation criteria. PiperOrigin-RevId: 166879585 --- tensor2tensor/utils/metrics.py | 46 +++++++++++++++++++++++++++++ tensor2tensor/utils/metrics_test.py | 23 +++++++++++++++ 2 files changed, 69 insertions(+) diff --git a/tensor2tensor/utils/metrics.py b/tensor2tensor/utils/metrics.py index e5cb88ddf..baff66669 100644 --- a/tensor2tensor/utils/metrics.py +++ b/tensor2tensor/utils/metrics.py @@ -42,6 +42,7 @@ class Metrics(object): R2 = "r_squared" ROUGE_2_F = "rouge_2_fscore" ROUGE_L_F = "rouge_L_fscore" + EDIT_DISTANCE = "edit_distance" def padded_rmse(predictions, labels, weights_fn=common_layers.weights_all): @@ -122,6 +123,50 @@ def padded_sequence_accuracy(predictions, return correct_seq, tf.constant(1.0) +def sequence_edit_distance(predictions, + labels, + weights_fn=common_layers.weights_nonzero): + """Average edit distance, ignoring padding 0s. + + The score returned is the edit distance divided by the total length of + reference truth and the weight returned is the total length of the truth. + + Args: + predictions: Tensor of shape [`batch_size`, `length`, 1, `num_classes`] and + type tf.float32 representing the logits, 0-padded. + labels: Tensor of shape [`batch_size`, `length`, 1, 1] and type tf.int32 + representing the labels of same length as logits and 0-padded. + weights_fn: ignored. The weights returned are the total length of the ground + truth labels, excluding 0-paddings. + + Returns: + (edit distance / reference length, reference length) + + Raises: + ValueError: if weights_fn is not common_layers.weights_nonzero. + """ + if weights_fn is not common_layers.weights_nonzero: + raise ValueError("Only weights_nonzero can be used for this metric.") + + with tf.variable_scope("edit_distance", values=[predictions, labels]): + # Transform logits into sequence classes by taking max at every step. + predictions = tf.to_int32( + tf.squeeze(tf.argmax(predictions, axis=-1), axis=(2, 3))) + nonzero_idx = tf.where(tf.not_equal(predictions, 0)) + sparse_outputs = tf.SparseTensor(nonzero_idx, + tf.gather_nd(predictions, nonzero_idx), + tf.shape(predictions, out_type=tf.int64)) + labels = tf.squeeze(labels, axis=(2, 3)) + nonzero_idx = tf.where(tf.not_equal(labels, 0)) + label_sparse_outputs = tf.SparseTensor(nonzero_idx, + tf.gather_nd(labels, nonzero_idx), + tf.shape(labels, out_type=tf.int64)) + distance = tf.reduce_sum( + tf.edit_distance(sparse_outputs, label_sparse_outputs, normalize=False)) + reference_length = tf.to_float(tf.shape(nonzero_idx)[0]) + return distance / reference_length, reference_length + + def padded_neg_log_perplexity(predictions, labels, weights_fn=common_layers.weights_nonzero): @@ -234,4 +279,5 @@ def problem_metric_fn(predictions, labels, weights): Metrics.R2: padded_variance_explained, Metrics.ROUGE_2_F: rouge.rouge_2_fscore, Metrics.ROUGE_L_F: rouge.rouge_l_fscore, + Metrics.EDIT_DISTANCE: sequence_edit_distance, } diff --git a/tensor2tensor/utils/metrics_test.py b/tensor2tensor/utils/metrics_test.py index 0d78e632c..528fd4755 100644 --- a/tensor2tensor/utils/metrics_test.py +++ b/tensor2tensor/utils/metrics_test.py @@ -72,6 +72,29 @@ def testSequenceAccuracyMetric(self): actual = session.run(a) self.assertEqual(actual, expected) + def testSequenceEditDistanceMetric(self): + predictions = np.array([[3, 4, 5, 1, 0, 0], + [2, 1, 3, 4, 0, 0], + [2, 1, 3, 4, 0, 0]]) + # Targets are just a bit different: + # - first sequence has a different prediction + # - second sequence has a different prediction and one extra step + # - third sequence is identical + targets = np.array([[5, 4, 5, 1, 0, 0], + [2, 5, 3, 4, 1, 0], + [2, 1, 3, 4, 0, 0]]) + # Reshape to match expected input format by metric fns. + predictions = np.reshape(predictions, [3, 6, 1, 1]) + targets = np.reshape(targets, [3, 6, 1, 1]) + with self.test_session() as session: + scores, weight = metrics.sequence_edit_distance( + tf.one_hot(predictions, depth=6, dtype=tf.float32), + tf.constant(targets, dtype=tf.int32)) + session.run(tf.global_variables_initializer()) + actual_scores, actual_weight = session.run([scores, weight]) + self.assertAlmostEqual(actual_scores, 3.0 / 13) + self.assertEqual(actual_weight, 13) + def testNegativeLogPerplexity(self): predictions = np.random.randint(4, size=(12, 12, 12, 1)) targets = np.random.randint(4, size=(12, 12, 12, 1)) From a8ee62ab7a15d9436da967591990e29cd0ee0c14 Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Tue, 29 Aug 2017 14:38:09 -0700 Subject: [PATCH 0328/4095] Add IMDB sentiment classification dataset PiperOrigin-RevId: 166905238 --- tensor2tensor/data_generators/all_problems.py | 1 + tensor2tensor/data_generators/image.py | 3 +- tensor2tensor/data_generators/imdb.py | 124 ++++++++++++++++++ .../data_generators/problem_hparams.py | 4 +- tensor2tensor/layers/modalities.py | 25 ++-- tensor2tensor/utils/model_builder.py | 18 +-- 6 files changed, 150 insertions(+), 25 deletions(-) create mode 100644 tensor2tensor/data_generators/imdb.py diff --git a/tensor2tensor/data_generators/all_problems.py b/tensor2tensor/data_generators/all_problems.py index ec3a9d0af..2a16a802a 100644 --- a/tensor2tensor/data_generators/all_problems.py +++ b/tensor2tensor/data_generators/all_problems.py @@ -26,6 +26,7 @@ from tensor2tensor.data_generators import desc2code from tensor2tensor.data_generators import ice_parsing from tensor2tensor.data_generators import image +from tensor2tensor.data_generators import imdb from tensor2tensor.data_generators import lm1b from tensor2tensor.data_generators import ptb from tensor2tensor.data_generators import snli diff --git a/tensor2tensor/data_generators/image.py b/tensor2tensor/data_generators/image.py index 71f4f0920..fbe91d70e 100644 --- a/tensor2tensor/data_generators/image.py +++ b/tensor2tensor/data_generators/image.py @@ -272,7 +272,8 @@ def hparams(self, defaults, model_hparams): small_modality = "%s:small_image_modality" % registry.Modalities.IMAGE modality = small_modality if self.is_small else registry.Modalities.IMAGE p.input_modality = {"inputs": (modality, None)} - p.target_modality = (registry.Modalities.CLASS_LABEL, self.num_classes) + p.target_modality = ("%s:2d" % registry.Modalities.CLASS_LABEL, + self.num_classes) p.batch_size_multiplier = 4 if self.is_small else 256 p.max_expected_batch_size_per_shard = 8 if self.is_small else 2 p.loss_multiplier = 3.0 if self.is_small else 1.0 diff --git a/tensor2tensor/data_generators/imdb.py b/tensor2tensor/data_generators/imdb.py new file mode 100644 index 000000000..0710a2532 --- /dev/null +++ b/tensor2tensor/data_generators/imdb.py @@ -0,0 +1,124 @@ +# coding=utf-8 +# Copyright 2017 The Tensor2Tensor Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""IMDB Sentiment Classification Problem.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +import tarfile + +# Dependency imports + +from tensor2tensor.data_generators import generator_utils +from tensor2tensor.data_generators import problem +from tensor2tensor.data_generators import text_encoder +from tensor2tensor.utils import registry + +import tensorflow as tf + +# End-of-sentence marker. +EOS = text_encoder.EOS_ID + + +@registry.register_problem +class SentimentIMDB(problem.Problem): + """IMDB sentiment classification.""" + URL = "http://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz" + + @property + def num_shards(self): + return 10 + + @property + def vocab_file(self): + return "sentiment_imdb.vocab" + + @property + def targeted_vocab_size(self): + return 2**15 + + def doc_generator(self, imdb_dir, dataset, include_label=False): + dirs = [(os.path.join(imdb_dir, dataset, "pos"), True), (os.path.join( + imdb_dir, dataset, "neg"), False)] + + for d, label in dirs: + for filename in os.listdir(d): + with tf.gfile.Open(os.path.join(d, filename)) as imdb_f: + doc = imdb_f.read().strip() + if include_label: + yield doc, label + else: + yield doc + + def generator(self, data_dir, tmp_dir, train): + """Generate examples.""" + # Download and extract + compressed_filename = os.path.basename(self.URL) + download_path = generator_utils.maybe_download(tmp_dir, compressed_filename, + self.URL) + imdb_dir = os.path.join(tmp_dir, "aclImdb") + if not tf.gfile.Exists(imdb_dir): + with tarfile.open(download_path, "r:gz") as tar: + tar.extractall(tmp_dir) + + # Generate vocab + encoder = generator_utils.get_or_generate_vocab_inner( + data_dir, self.vocab_file, self.targeted_vocab_size, + lambda: self.doc_generator(imdb_dir, "train")) + + # Generate examples + dataset = "train" if train else "test" + for doc, label in self.doc_generator(imdb_dir, dataset, include_label=True): + yield { + "inputs": encoder.encode(doc) + [EOS], + "targets": [int(label)], + } + + def generate_data(self, data_dir, tmp_dir, task_id=-1): + train_paths = self.training_filepaths( + data_dir, self.num_shards, shuffled=False) + dev_paths = self.dev_filepaths(data_dir, 1, shuffled=False) + generator_utils.generate_dataset_and_shuffle( + self.generator(data_dir, tmp_dir, True), train_paths, + self.generator(data_dir, tmp_dir, False), dev_paths) + + def hparams(self, defaults, model_hparams): + p = defaults + source_vocab_size = self._encoders["inputs"].vocab_size + p.input_modality = { + "inputs": (registry.Modalities.SYMBOL, source_vocab_size) + } + p.target_modality = (registry.Modalities.CLASS_LABEL, 2) + p.input_space_id = problem.SpaceID.EN_TOK + p.target_space_id = problem.SpaceID.GENERIC + + def feature_encoders(self, data_dir): + vocab_filename = os.path.join(data_dir, self.vocab_file) + encoder = text_encoder.SubwordTextEncoder(vocab_filename) + return { + "inputs": encoder, + "targets": text_encoder.TextEncoder(), + } + + def example_reading_spec(self): + data_fields = { + "inputs": tf.VarLenFeature(tf.int64), + "targets": tf.FixedLenFeature([1], tf.int64), + } + data_items_to_decoders = None + return (data_fields, data_items_to_decoders) diff --git a/tensor2tensor/data_generators/problem_hparams.py b/tensor2tensor/data_generators/problem_hparams.py index 17f89920f..38dc05939 100644 --- a/tensor2tensor/data_generators/problem_hparams.py +++ b/tensor2tensor/data_generators/problem_hparams.py @@ -147,8 +147,8 @@ def default_problem_hparams(): # Modalities used to map from input features to a space compatible with # chosen model architecture. One modality spec (which is a 2-tuple, # (modality_full_name, vocab_size)) per feature key. modality_full_name is - # a string type:name, e.g. class_label:class_label_2d. Leaving off the - # name uses the default modality for that type (e.g. class_label == + # a string type:name, e.g. class_label:2d. Leaving off the name uses the + # default modality for that type (e.g. class_label == # class_label:default). input_modality={}, diff --git a/tensor2tensor/layers/modalities.py b/tensor2tensor/layers/modalities.py index 57652dbec..e03e6835e 100644 --- a/tensor2tensor/layers/modalities.py +++ b/tensor2tensor/layers/modalities.py @@ -361,9 +361,9 @@ def xnet_resblock(x, filters, res_relu, name): "compress_block_final") -@registry.register_class_label_modality("default") +@registry.register_class_label_modality("2d") class ClassLabelModality(modality.Modality): - """Used for label data.""" + """Used for label data; if is2d=True, uses Xception flow to logits.""" def __init__(self, model_hparams, vocab_size, is2d=True): super(ClassLabelModality, self).__init__(model_hparams, vocab_size) @@ -397,9 +397,11 @@ def targets_bottom(self, x): def top(self, body_output, _): """Transform inputs from model space to target space. - Perform the Xception "Exit flow", consisting of a single residual block and - two separable convolutional upscalings followed by global spatial average - pooling. + If instantiated with is2d=True, perform the Xception "Exit flow", consisting + of a single residual block and two separable convolutional upscalings + followed by global spatial average pooling. + + Otherwise, a single linear layer to logits. Args: body_output: A Tensor with shape [batch, ?, ?, body_output_size]. @@ -417,11 +419,12 @@ def top(self, body_output, _): spatial_dim = tf.to_int32(spatial_dim_float) x_depth = int(x.get_shape()[3]) x = tf.reshape(x, [-1, spatial_dim, spatial_dim, x_depth]) - x = common_layers.conv_block_downsample(x, self._kernel, self._strides, - self._padding) - x = tf.nn.relu(x) - x = tf.reduce_mean(x, axis=[1, 2], keep_dims=True) - res = common_layers.conv(x, self._vocab_size, (1, 1)) + x = common_layers.conv_block_downsample(x, self._kernel, self._strides, + self._padding) + x = tf.nn.relu(x) + x = tf.reduce_mean(x, axis=[1, 2], keep_dims=True) + + res = tf.layers.dense(x, self._vocab_size) return tf.expand_dims(res, 3) def loss(self, top_out, targets, weights_fn=common_layers.weights_all): @@ -431,7 +434,7 @@ def loss(self, top_out, targets, weights_fn=common_layers.weights_all): top_out, targets, weights_fn=weights_fn) -@registry.register_class_label_modality("class_label_2d") +@registry.register_class_label_modality("default") class ClassLabel1DModality(ClassLabelModality): """Used for label data.""" diff --git a/tensor2tensor/utils/model_builder.py b/tensor2tensor/utils/model_builder.py index 0212f9a4d..f2632aa94 100644 --- a/tensor2tensor/utils/model_builder.py +++ b/tensor2tensor/utils/model_builder.py @@ -164,12 +164,6 @@ def model_fn(features, targets, mode): features = _interactive_input_tensor_to_features_dict(features, my_hp) elif FLAGS.decode_from_file: features = _decode_input_tensor_to_features_dict(features, my_hp) - # A dictionary containing: - # - problem_choice: A Tensor containing an integer indicating which problem - # was selected for this run. - # - predictions: A Tensor containing the model's output predictions. - run_info = dict() - run_info["problem_choice"] = features["problem_choice"] if targets is not None: features["targets"] = targets @@ -299,11 +293,13 @@ def nth_model(n): sharded_logits, total_loss = result_list[1:], result_list[0] if mode == tf.contrib.learn.ModeKeys.EVAL: - logits = tf.concat(sharded_logits, 0) # For evaluation, return the logits layer as our predictions. - run_info["predictions"] = logits - train_op = None - return run_info, total_loss, None + logits = tf.concat(sharded_logits, 0) + ret = { + "predictions": logits, + "problem_choice": features["problem_choice"], + } + return ret, total_loss, None assert mode == tf.contrib.learn.ModeKeys.TRAIN @@ -385,7 +381,7 @@ def nth_model(n): del summaries[i] tf.logging.info("Global model_fn finished.") - return run_info, total_loss, train_op + return {"problem_choice": features["problem_choice"]}, total_loss, train_op return model_fn From 1fc6766a52ef21c298778373ce45da4924a57c12 Mon Sep 17 00:00:00 2001 From: T2T Team <no-reply@google.com> Date: Tue, 29 Aug 2017 14:46:15 -0700 Subject: [PATCH 0329/4095] Create a Problem class for the lm1b dataset. PiperOrigin-RevId: 166906734 --- tensor2tensor/data_generators/lm1b.py | 96 +++++++++++++------ tensor2tensor/data_generators/text_encoder.py | 25 +++-- .../data_generators/text_encoder_test.py | 27 ++++++ 3 files changed, 115 insertions(+), 33 deletions(-) diff --git a/tensor2tensor/data_generators/lm1b.py b/tensor2tensor/data_generators/lm1b.py index a3771e124..4688911d7 100644 --- a/tensor2tensor/data_generators/lm1b.py +++ b/tensor2tensor/data_generators/lm1b.py @@ -29,8 +29,10 @@ from six.moves import xrange # pylint: disable=redefined-builtin from tensor2tensor.data_generators import generator_utils +from tensor2tensor.data_generators import problem from tensor2tensor.data_generators import text_encoder from tensor2tensor.data_generators import tokenizer +from tensor2tensor.utils import registry import tensorflow as tf @@ -53,7 +55,7 @@ def _original_vocab(tmp_dir): """ vocab_url = ("http://download.tensorflow.org/models/LM_LSTM_CNN/" "vocab-2016-09-10.txt") - vocab_filename = os.path.basename(vocab_url) + vocab_filename = os.path.basename(vocab_url + ".en") vocab_filepath = os.path.join(tmp_dir, vocab_filename) if not os.path.exists(vocab_filepath): generator_utils.maybe_download(tmp_dir, vocab_filename, vocab_url) @@ -140,29 +142,69 @@ def _get_or_build_subword_text_encoder(tmp_dir): return ret -def generator(tmp_dir, train, characters=False): - """Generator for lm1b sentences. - - Args: - tmp_dir: a string. - train: a boolean. - characters: a boolean - - Yields: - A dictionary {"inputs": [0], "targets": [<subword ids>]} - """ - _maybe_download_corpus(tmp_dir) - original_vocab = _original_vocab(tmp_dir) - files = (_train_data_filenames(tmp_dir) if train - else [_dev_data_filename(tmp_dir)]) - if characters: - encoder = text_encoder.ByteTextEncoder() - else: - encoder = _get_or_build_subword_text_encoder(tmp_dir) - for filepath in files: - tf.logging.info("filepath = %s", filepath) - for line in tf.gfile.Open(filepath): - tokens = encoder.encode( - _replace_oov(original_vocab, text_encoder.native_to_unicode(line))) - tokens.append(EOS) - yield {"inputs": [0], "targets": tokens} +@registry.register_problem("languagemodel_1b32k") +class LanguagemodelLm1b(problem.Text2TextProblem): + """A language model on full English Wikipedia.""" + + @property + def is_character_level(self): + return False + + @property + def has_inputs(self): + return True + + @property + def input_space_id(self): + return problem.SpaceID.EN_TOK + + @property + def target_space_id(self): + return problem.SpaceID.EN_TOK + + @property + def num_shards(self): + return 10 + + @property + def vocab_name(self): + return "vocab-2016-09-10.txt.en" + + @property + def use_subword_tokenizer(self): + return True + + @property + def targeted_vocab_size(self): + return 2**15 # 32768 + + @property + def use_train_shards_for_dev(self): + return True + + def generator(self, tmp_dir, train, characters=False): + """Generator for lm1b sentences. + + Args: + tmp_dir: a string. + train: a boolean. + characters: a boolean + + Yields: + A dictionary {"inputs": [0], "targets": [<subword ids>]} + """ + _maybe_download_corpus(tmp_dir) + original_vocab = _original_vocab(tmp_dir) + files = (_train_data_filenames(tmp_dir) if train + else [_dev_data_filename(tmp_dir)]) + if characters: + encoder = text_encoder.ByteTextEncoder() + else: + encoder = _get_or_build_subword_text_encoder(tmp_dir) + for filepath in files: + tf.logging.info("filepath = %s", filepath) + for line in tf.gfile.Open(filepath): + tokens = encoder.encode( + _replace_oov(original_vocab, text_encoder.native_to_unicode(line))) + tokens.append(EOS) + yield {"inputs": [0], "targets": tokens} diff --git a/tensor2tensor/data_generators/text_encoder.py b/tensor2tensor/data_generators/text_encoder.py index f05393313..fe9f0ad57 100644 --- a/tensor2tensor/data_generators/text_encoder.py +++ b/tensor2tensor/data_generators/text_encoder.py @@ -647,19 +647,32 @@ def _init_alphabet_from_tokens(self, tokens): self._alphabet = {c for token in tokens for c in token} self._alphabet |= _ESCAPE_CHARS - def _load_from_file(self, filename): - """Load from a file. + def _load_from_file_object(self, f): + """Load from a file object. Args: - filename: filename to load vocabulary from + f: File object to load vocabulary from """ subtoken_strings = [] - with tf.gfile.Open(filename) as f: - for line in f: - subtoken_strings.append(native_to_unicode(line.strip()[1:-1])) + for line in f: + s = line.strip() + # Some vocab files wrap words in single quotes, but others don't + if (len(s) > 1 and ((s.startswith("'") and s.endswith("'")) or + (s.startswith("\"") and s.endswith("\"")))): + s = s[1:-1] + subtoken_strings.append(native_to_unicode(s)) self._init_subtokens_from_list(subtoken_strings) self._init_alphabet_from_tokens(subtoken_strings) + def _load_from_file(self, filename): + """Load from a file. + + Args: + filename: Filename to load vocabulary from + """ + with tf.gfile.Open(filename) as f: + self._load_from_file_object(f) + def store_to_file(self, filename): with tf.gfile.Open(filename, "w") as f: for subtoken_string in self._all_subtoken_strings: diff --git a/tensor2tensor/data_generators/text_encoder_test.py b/tensor2tensor/data_generators/text_encoder_test.py index eadfcfb5e..0886449ee 100644 --- a/tensor2tensor/data_generators/text_encoder_test.py +++ b/tensor2tensor/data_generators/text_encoder_test.py @@ -21,6 +21,7 @@ from __future__ import unicode_literals import collections +import io import os import shutil @@ -31,6 +32,14 @@ import tensorflow as tf +class NativeToUnicodeTest(tf.test.TestCase): + + def test_native_to_unicode(self): + s = r'foo bar' + self.assertIsInstance(text_encoder.native_to_unicode(s), unicode) + self.assertEqual(text_encoder.native_to_unicode(s), u'foo bar') + + class EscapeUnescapeTokenTest(tf.test.TestCase): def test_escape_token(self): @@ -186,6 +195,24 @@ def test_raises_exception_when_not_encodable(self): with self.assertRaises(AssertionError): encoder.encode(original) + def test_load_from_file(self): + # Test a vocab file with words not wrapped with single quotes + encoder = text_encoder.SubwordTextEncoder() + correct_vocab = ['the', 'and', 'of'] + vocab = io.StringIO('the\n' + 'and\n' + 'of\n') + encoder._load_from_file_object(vocab) + self.assertEqual(encoder._all_subtoken_strings, correct_vocab) + + # Test a vocab file with words wrapped in single quotes + encoder = text_encoder.SubwordTextEncoder() + vocab = io.StringIO('\'the\'\n' + '\'and\'\n' + '\'of\'\n') + encoder._load_from_file_object(vocab) + self.assertEqual(encoder._all_subtoken_strings, correct_vocab) + if __name__ == '__main__': tf.test.main() From 357c9d42ed2bc2a3c6b6646163fae53b211ffb18 Mon Sep 17 00:00:00 2001 From: Katherine Lee <katherinelee@google.com> Date: Tue, 29 Aug 2017 15:25:02 -0700 Subject: [PATCH 0330/4095] Adding example problem to T2T documentation PiperOrigin-RevId: 166912906 --- docs/new_problem.md | 232 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 229 insertions(+), 3 deletions(-) diff --git a/docs/new_problem.md b/docs/new_problem.md index 98669e8c8..4dd4246bf 100644 --- a/docs/new_problem.md +++ b/docs/new_problem.md @@ -9,8 +9,234 @@ welcome](https://img.shields.io/badge/contributions-welcome-brightgreen.svg)](CO [![Gitter](https://img.shields.io/gitter/room/nwjs/nw.js.svg)](https://gitter.im/tensor2tensor/Lobby) [![License](https://img.shields.io/badge/License-Apache%202.0-brightgreen.svg)](https://opensource.org/licenses/Apache-2.0) -Here we show how to hook-up your own data to train T2T models on it. +Let's add a new dataset together and train the transformer model. We'll be learning to define English words by training the transformer to "translate" between English words and their definitions on a character level. -## The Problem class +# About the Problem -TODO: complete. +For each problem we want to tackle we create a new problem class and register it. Let's call our problem `Word2def`. + +Since many text2text problems share similar methods, there's already a class called `Text2TextProblem` that extends the base problem class, `Problem` (both found in `problem.py`). + +For our problem, we can go ahead and create the file `word2def.py` in the `data_generators` folder and add our new problem, `Word2def`, which extends `TranslateProblem`. Let's also register it while we're at it so we can specify the problem through flags. + +```python +@registry.register_problem() +class Word2def(problem.Text2TextProblem): + """Problem spec for English word to dictionary definition.""" + return NotImplementedError() +``` + +We need to implement the following methods from `Text2TextProblem` in our new class: +* is_character_level +* targeted_vocab_size +* generator +* input_space_id +* target_space_id +* num_shards +* vocab_name +* use_subword_tokenizer + +Let's tackle them one by one: + +**input_space_id, target_space_id, is_character_level, targeted_vocab_size, use_subword_tokenizer**: + +SpaceIDs tell Tensor2Tensor what sort of space the input and target tensors are in. These are things like, EN_CHR (English character), EN_TOK (English token), AUDIO_WAV (audio waveform), IMAGE, DNA (genetic bases). The complete list can be found at `data_generators/problem.py` in the class `SpaceID`. + +Since we're generating definitions and feeding in words at the character level, we set `is_character_level` to true, and use the same SpaceID, EN_CHR, for both input and target. Additionally, since we aren't using tokens, we don't need to give a `targeted_vocab_size` or define `use_subword_tokenizer`. + +**vocab_name**: + +`vocab_name` will be used to name your vocabulary files. We can call ours `'vocab.word2def.en'` + +**num_shards**: + +The number of shards to break data files into. + +```python +@registry.register_problem() +class Word2def(problem.Text2TextProblem): + """Problem spec for English word to dictionary definition.""" + def is_character_level(self): + return True + + @property + def vocab_name(self): + return "vocab.word2def.en" + + @property + def input_space_id(self): + return problem.SpaceID.EN_CHR + + @property + def target_space_id(self): + return problem.SpaceID.EN_CHR + + @property + def num_shards(self): + return 100 + + @property + def use_subword_tokenizer(self): + return False +``` + +**generator**: + +We're almost done. `generator` generates the training and evaluation data and stores them in files like "word2def_train.lang1" in your DATA_DIR. Thankfully several commonly used methods like `character_generator`, and `token_generator` are already written in the file `wmt.py`. We will import `character_generator` and write: +```python + def generator(self, data_dir, tmp_dir, train): + character_vocab = text_encoder.ByteTextEncoder() + datasets = _WORD2DEF_TRAIN_DATASETS if train else _WORD2DEF_TEST_DATASETS + tag = "train" if train else "dev" + return character_generator(datasets[0], datasets[1], character_vocab, EOS) +``` + +Now our `word2def.py` file looks like the below: + +```python +@registry.register_problem() +class Word2def(problem.Text2TextProblem): + """Problem spec for English word to dictionary definition.""" + @property + def is_character_level(self): + return True + + @property + def vocab_name(self): + return "vocab.word2def.en" + + def generator(self, data_dir, tmp_dir, train): + character_vocab = text_encoder.ByteTextEncoder() + datasets = _WORD2DEF_TRAIN_DATASETS if train else _WORD2DEF_TEST_DATASETS + tag = "train" if train else "dev" + return character_generator(datasets[0], datasets[1], character_vocab, EOS) + + @property + def input_space_id(self): + return problem.SpaceID.EN_CHR + + @property + def target_space_id(self): + return problem.SpaceID.EN_CHR + + @property + def num_shards(self): + return 100 + + @property + def use_subword_tokenizer(self): + return False +``` + +## Data: +Now we need to tell Tensor2Tensor where our data is located. + +I've gone ahead and split all words into a train and test set and saved them in files called `words.train.txt`, `words.test.txt`, +`definitions.train.txt`, and `definitions.test.txt` in a directory called `LOCATION_OF_DATA/`. Let's tell T2T where these files are: + +```python +# English Word2def datasets +_WORD2DEF_TRAIN_DATASETS = [ + [ + "LOCATION_OF_DATA/", ("words_train.txt", "definitions_train.txt") + ] +] +_WORD2DEF_TEST_DATASETS = [ + [ + "LOCATION_OF_DATA", ("words_test.txt", "definitions_test.txt") + ] +] +``` + +## Putting it all together + +Now our `word2def.py` file looks like: (with the correct imports) +```python +""" Problem definition for word to dictionary definition. +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +import tarfile # do we need this import + +import google3 + +from tensor2tensor.data_generators import generator_utils +from tensor2tensor.data_generators import problem +from tensor2tensor.data_generators import text_encoder +from tensor2tensor.data_generators.wmt import character_generator + +from tensor2tensor.utils import registry + +import tensorflow as tf + +FLAGS = tf.flags.FLAGS + +# English Word2def datasets +_WORD2DEF_TRAIN_DATASETS = [ + LOCATION_OF_DATA+'words_train.txt', + LOCATION_OF_DATA+'definitions_train.txt' +] + +_WORD2DEF_TEST_DATASETS = [ + LOCATION_OF_DATA+'words_test.txt', + LOCATION_OF_DATA+'definitions_test.txt' +] + +@registry.register_problem() +class Word2def(problem.Text2TextProblem): + """Problem spec for English word to dictionary definition.""" + @property + def is_character_level(self): + return True + + @property + def vocab_name(self): + return "vocab.word2def.en" + + def generator(self, data_dir, tmp_dir, train): + character_vocab = text_encoder.ByteTextEncoder() + datasets = _WORD2DEF_TRAIN_DATASETS if train else _WORD2DEF_TEST_DATASETS + tag = "train" if train else "dev" + return character_generator(datasets[0], datasets[1], character_vocab, EOS) + + @property + def input_space_id(self): + return problem.SpaceID.EN_CHR + + @property + def target_space_id(self): + return problem.SpaceID.EN_CHR + + @property + def num_shards(self): + return 100 + + @property + def use_subword_tokenizer(self): + return False + +``` + +# Hyperparameters +All hyperparamters inherit from `_default_hparams()` in `problem.py.` If you would like to customize your hyperparameters, add another method to the file `problem_hparams.py`. + +# Run the problem +Now that we've gotten our problem set up, let's train a model and generate definitions. + +We specify our problem name, the model, and hparams. +```bash +PROBLEM=word2def +MODEL=transformer +HPARAMS=transofmer_base_single_gpu +``` + +The rest of the steps are as given in the [walkthrough](walkthrough.md). + + +What if we wanted to train a model to generate words given definitions? In T2T, we can change the problem name to be `PROBLEM=word2def_rev`. + +All done. Let us know what definitions your model generated. From ffe238633a3474df6f42eda93d132f79cd6e0dba Mon Sep 17 00:00:00 2001 From: Noam Shazeer <noam@google.com> Date: Tue, 29 Aug 2017 15:42:35 -0700 Subject: [PATCH 0331/4095] Added optional memory-efficient versions of conv-hidden-relu and self-attention. PiperOrigin-RevId: 166915506 --- tensor2tensor/layers/common_attention.py | 149 ++++++++++++++++++ tensor2tensor/layers/common_attention_test.py | 44 ++++++ tensor2tensor/layers/common_layers.py | 123 ++++++++++++++- tensor2tensor/layers/common_layers_test.py | 37 +++++ tensor2tensor/models/attention_lm_moe.py | 63 ++++++-- 5 files changed, 398 insertions(+), 18 deletions(-) diff --git a/tensor2tensor/layers/common_attention.py b/tensor2tensor/layers/common_attention.py index 1faea6401..253e9bee5 100644 --- a/tensor2tensor/layers/common_attention.py +++ b/tensor2tensor/layers/common_attention.py @@ -30,6 +30,8 @@ import tensorflow as tf +from tensorflow.python.framework import function + def add_timing_signal_1d(x, min_timescale=1.0, max_timescale=1.0e4): """Adds a bunch of sinusoids of different frequencies to a Tensor. @@ -1100,3 +1102,150 @@ def local_expert_attention( additional_dispatch_params=additional_dispatch_params, pad_remover=pad_remover ) + + +def scaled_dot_product_attention_simple(q, k, v, bias, name=None): + """scaled dot-product attention. One head. One spatial dimension. + + Args: + q: a Tensor with shape [batch, length_q, depth_k] + k: a Tensor with shape [batch, length_kv, depth_k] + v: a Tensor with shape [batch, length_kv, depth_v] + bias: optional Tensor broadcastable to [batch, length_q, length_kv] + name: an optional string + + Returns: + A Tensor. + """ + with tf.variable_scope( + name, default_name="scaled_dot_product_attention_simple"): + scalar = tf.rsqrt(tf.to_float(tf.shape(q)[2])) + logits = tf.matmul(q * scalar, k, transpose_b=True) + if bias is not None: + logits += bias + weights = tf.nn.softmax(logits, name="attention_weights") + return tf.matmul(weights, v) + + +_function_cache = {} + + +def multihead_self_attention_memory_efficient(x, + bias, + num_heads, + head_size=None, + epsilon=1e-6, + forget=True, + test_vars=None, + name=None): + """Multihead scaled-dot-product self-attention. + + Includes layer norm. + + Returns multihead-self-attention(layer_norm(x)) + + Computes one attention head at a time to avoid exhausting memory. + + If forget=True, then forget all forwards activations and recompute on + the backwards pass. + + Args: + x: a Tensor with shape [batch, length, input_size] + bias: an attention bias tensor broadcastable to [batch, 1, length, length] + num_heads: an integer + head_size: an optional integer - defaults to input_size/num_heads + epsilon: a float, for layer norm + forget: a boolean - forget forwards activations and recompute on backprop + test_vars: optional tuple of variables for testing purposes + name: an optional string + + Returns: + A Tensor. + """ + io_size = x.get_shape().as_list()[-1] + if head_size is None: + assert io_size % num_heads == 0 + head_size = io_size / num_heads + + def forward_internal(x, wqkv, wo, attention_bias, norm_scale, norm_bias): + """Forward function.""" + n = common_layers.layer_norm_compute_python( + x, epsilon, norm_scale, norm_bias) + wqkv_split = tf.unstack(wqkv, num=num_heads) + wo_split = tf.unstack(wo, num=num_heads) + y = 0 + for h in xrange(num_heads): + with tf.control_dependencies([y] if h > 0 else []): + combined = tf.nn.conv1d(n, wqkv_split[h], 1, "SAME") + q, k, v = tf.split(combined, 3, axis=2) + o = scaled_dot_product_attention_simple(q, k, v, attention_bias) + y += tf.nn.conv1d(o, wo_split[h], 1, "SAME") + return y + + key = ("multihead_self_attention_memory_efficient %s %s" % + (num_heads, epsilon)) + if not forget: + forward_fn = forward_internal + elif key in _function_cache: + forward_fn = _function_cache[key] + else: + @function.Defun(compiled=True) + def grad_fn(x, wqkv, wo, attention_bias, norm_scale, norm_bias, dy): + with tf.control_dependencies([dy]): + n = common_layers.layer_norm_compute_python( + x, epsilon, norm_scale, norm_bias) + wqkv_split = tf.unstack(wqkv, num=num_heads) + wo_split = tf.unstack(wo, num=num_heads) + deps = [] + dwqkvs = [] + dwos = [] + dn = 0 + for h in xrange(num_heads): + with tf.control_dependencies(deps): + combined = tf.nn.conv1d(n, wqkv_split[h], 1, "SAME") + q, k, v = tf.split(combined, 3, axis=2) + o = scaled_dot_product_attention_simple(q, k, v, attention_bias) + partial_y = tf.nn.conv1d(o, wo_split[h], 1, "SAME") + pdn, dwqkvh, dwoh = tf.gradients( + ys=[partial_y], + xs=[n, wqkv_split[h], wo_split[h]], + grad_ys=[dy]) + dn += pdn + dwqkvs.append(dwqkvh) + dwos.append(dwoh) + deps = [dn, dwqkvh, dwoh] + dwqkv = tf.stack(dwqkvs) + dwo = tf.stack(dwos) + with tf.control_dependencies(deps): + dx, dnorm_scale, dnorm_bias = tf.gradients( + ys=[n], xs=[x, norm_scale, norm_bias], grad_ys=[dn]) + return (dx, dwqkv, dwo, tf.zeros_like(attention_bias), + dnorm_scale, dnorm_bias) + + @function.Defun(grad_func=grad_fn, compiled=True, + separate_compiled_gradients=True) + def forward_fn(x, wqkv, wo, attention_bias, norm_scale, norm_bias): + return forward_internal( + x, wqkv, wo, attention_bias, norm_scale, norm_bias) + _function_cache[key] = forward_fn + + if bias is not None: + bias = tf.squeeze(bias, 1) + with tf.variable_scope(name, default_name="multihead_attention", values=[x]): + # TODO(noam): it would be nice to save memory by casting x to float16 + # here, but this causes problems with the gradients. Figure out if there + # is a way to leave the gradients as float32. + if test_vars is not None: + wqkv, wo, norm_scale, norm_bias = list(test_vars) + else: + wqkv = tf.get_variable( + "wqkv", [num_heads, 1, io_size, 3 * head_size], + initializer=tf.random_normal_initializer(stddev=io_size**-0.5)) + wo = tf.get_variable( + "wo", [num_heads, 1, head_size, io_size], + initializer=tf.random_normal_initializer( + stddev=(head_size * num_heads)**-0.5)) + norm_scale, norm_bias = common_layers.layer_norm_vars(io_size) + y = forward_fn(x, wqkv, wo, bias, norm_scale, norm_bias) + y.set_shape(x.get_shape()) + return y diff --git a/tensor2tensor/layers/common_attention_test.py b/tensor2tensor/layers/common_attention_test.py index e49999fbb..6664bcc2d 100644 --- a/tensor2tensor/layers/common_attention_test.py +++ b/tensor2tensor/layers/common_attention_test.py @@ -23,6 +23,7 @@ import numpy as np from tensor2tensor.layers import common_attention +from tensor2tensor.layers import common_layers import tensorflow as tf @@ -117,6 +118,49 @@ def testLocalUnmaskedAttention2DMatchingBlockLength(self): res = session.run(a) self.assertEqual(res.shape, (5, 4, 25, 25, 16)) + def testMultiheadSelfAttentionMemoryEfficient(self): + num_heads = 4 + io_size = 16 + batch = 2 + length = 7 + head_size = 5 + x = np.random.rand(batch, length, io_size) + dy = np.random.rand(batch, length, io_size) + with self.test_session() as session: + x = tf.to_float(x) + dy = tf.to_float(dy) + bias = common_attention.attention_bias_lower_triangle(length) + wqkv = tf.get_variable( + "wqkv", [num_heads, 1, io_size, 3 * head_size], + initializer=tf.random_normal_initializer(stddev=io_size**-0.5)) + wo = tf.get_variable( + "wo", [num_heads, 1, head_size, io_size], + initializer=tf.random_normal_initializer( + stddev=(head_size * num_heads)**-0.5)) + norm_scale, norm_bias = common_layers.layer_norm_vars(io_size) + y = common_attention.multihead_self_attention_memory_efficient( + x, bias, num_heads, head_size=head_size, forget=False, + test_vars=(wqkv, wo, norm_scale, norm_bias)) + y_forget = common_attention.multihead_self_attention_memory_efficient( + x, bias, num_heads, head_size=head_size, forget=True, + test_vars=(wqkv, wo, norm_scale, norm_bias)) + dx, dwqkv, dwo, dnorm_scale, dnorm_bias = tf.gradients( + ys=[y], xs=[x, wqkv, wo, norm_scale, norm_bias], grad_ys=[dy]) + dx_f, dwqkv_f, dwo_f, dnorm_scale_f, dnorm_bias_f = tf.gradients( + ys=[y_forget], xs=[x, wqkv, wo, norm_scale, norm_bias], grad_ys=[dy]) + session.run(tf.global_variables_initializer()) + (y, y_forget, + dx, dwqkv, dwo, dnorm_scale, dnorm_bias, + dx_f, dwqkv_f, dwo_f, dnorm_scale_f, dnorm_bias_f) = session.run( + [y, y_forget, + dx, dwqkv, dwo, dnorm_scale, dnorm_bias, + dx_f, dwqkv_f, dwo_f, dnorm_scale_f, dnorm_bias_f]) + self.assertAllClose(y, y_forget) + self.assertAllClose(dwo, dwo_f) + self.assertAllClose(dwqkv, dwqkv_f) + self.assertAllClose(dnorm_scale, dnorm_scale_f) + self.assertAllClose(dnorm_bias, dnorm_bias_f) + self.assertAllClose(dx, dx_f) if __name__ == "__main__": tf.test.main() diff --git a/tensor2tensor/layers/common_layers.py b/tensor2tensor/layers/common_layers.py index ad899bfbf..4b09e70cb 100644 --- a/tensor2tensor/layers/common_layers.py +++ b/tensor2tensor/layers/common_layers.py @@ -425,6 +425,15 @@ def conv_fn(inputs, filters, kernel_size, **kwargs): return conv_internal(conv_fn, inputs, filters, kernel_size, **kwargs) +def layer_norm_vars(filters): + """Create Variables for layer norm.""" + scale = tf.get_variable( + "layer_norm_scale", [filters], initializer=tf.ones_initializer()) + bias = tf.get_variable( + "layer_norm_bias", [filters], initializer=tf.zeros_initializer()) + return scale, bias + + def layer_norm_compute_python(x, epsilon, scale, bias): """Layer norm raw computation.""" mean = tf.reduce_mean(x, axis=[-1], keep_dims=True) @@ -1773,7 +1782,7 @@ def smoothing_cross_entropy_factored_grad(op, dy): b = op.inputs[1] labels = op.inputs[2] confidence = op.inputs[3] - num_splits = 32 + num_splits = 16 vocab_size = tf.shape(b)[0] labels = approximate_split(labels, num_splits) a = approximate_split(a, num_splits) @@ -1817,7 +1826,7 @@ def smoothing_cross_entropy_factored(a, b, labels, confidence): Returns: A Tensor with shape [batch] """ - num_splits = 32 + num_splits = 16 vocab_size = tf.shape(b)[0] labels = approximate_split(labels, num_splits) a = approximate_split(a, num_splits) @@ -1957,3 +1966,113 @@ def identity(*args): id_out = identity(*(inputs + train_vars + outputs)) return id_out + + +_function_cache = {} + + +def conv_hidden_relu_memory_efficient(x, + filter_size, + epsilon=1e-6, + forget=True, + test_vars=None, + name=None): + """LayerNorm, Conv, ReLU, Conv. + + All convolutions have kernel size 1. + + returns conv(relu(conv(layer_norm(x)))) + + Args: + x: input Tensor with shape [batch, length, io_size] + filter_size: an integer - size of the hidden layer. + epsilon: a float (for layer norm) + forget: a boolean - forget forwards activations and recompute on backprop + test_vars: optional tuple of variables for testing purposes + name: an optional string + + Returns: + a Tensor with shape [batch, length, io_size] + """ + io_size = x.get_shape().as_list()[-1] + + def forward_internal(x, f1, f2, scale, bias): + """Forward function.""" + # split batch-wise to avoid exhausting memory in cast the batch is large + # and the hidden layer is large. + num_splits = 4 + x_flat = tf.reshape(x, [-1, 1, tf.shape(x)[2]]) + xs = approximate_split(x_flat, num_splits) + ys = [] + for i in xrange(num_splits): + with tf.control_dependencies(ys[-1:]): + n = layer_norm_compute_python(xs[i], epsilon, scale, bias) + y = tf.nn.conv1d(n, f1, 1, "SAME") + y = tf.nn.relu(y) + y = tf.nn.conv1d(y, f2, 1, "SAME") + ys.append(y) + y = tf.concat(ys, 0) + y = tf.reshape(y, tf.shape(x)) + return y + key = ("conv_hidden_relu_memory_efficient %s" % epsilon) + if not forget: + forward_fn = forward_internal + elif key in _function_cache: + forward_fn = _function_cache[key] + else: + @function.Defun(compiled=True) + def grad_fn(x, f1, f2, scale, bias, dy): + with tf.control_dependencies([dy]): + num_splits = 4 + x_shape = tf.shape(x) + flat_shape = [-1, 1, x_shape[2]] + x = tf.reshape(x, flat_shape) + dy = tf.reshape(dy, flat_shape) + xs = approximate_split(x, num_splits) + dys = approximate_split(dy, num_splits) + dxs = [] + df1 = 0 + df2 = 0 + dscale = 0 + dbias = 0 + deps = [] + for i in xrange(num_splits): + with tf.control_dependencies(deps): + n = layer_norm_compute_python(xs[i], epsilon, scale, bias) + y = tf.nn.conv1d(n, f1, 1, "SAME") + y = tf.nn.relu(y) + y = tf.nn.conv1d(y, f2, 1, "SAME") + dxi, pdf1, pdf2, pdscale, pdbias = tf.gradients( + ys=[y], xs=[xs[i], f1, f2, scale, bias], grad_ys=[dys[i]]) + df1 += pdf1 + df2 += pdf2 + dscale += pdscale + dbias += pdbias + dxs.append(dxi) + deps = [dxi, df1, df2, dscale, dbias] + with tf.control_dependencies(deps): + dx = tf.concat(dxs, 0) + dx = tf.reshape(dx, x_shape) + return dx, df1, df2, dscale, dbias + + @function.Defun(grad_func=grad_fn, compiled=True, + separate_compiled_gradients=True) + def forward_fn(x, f1, f2, scale, bias): + return forward_internal(x, f1, f2, scale, bias) + + with tf.variable_scope(name, default_name="ffn2", values=[x]): + # TODO(noam): it would be nice to save memory by casting x to float16 + # here, but this causes problems with the gradients. Figure out if there + # is a way to leave the gradients as float32. + if test_vars is not None: + f1, f2, scale, bias = list(test_vars) + else: + f1 = tf.get_variable("f1", [1, io_size, filter_size]) + f2 = tf.get_variable("f2", [1, filter_size, io_size]) + scale, bias = layer_norm_vars(io_size) + if forget: + y = forward_fn(x, f1, f2, scale, bias) + else: + y = forward_internal(x, f1, f2, scale, bias) + y.set_shape(x.get_shape()) + return y diff --git a/tensor2tensor/layers/common_layers_test.py b/tensor2tensor/layers/common_layers_test.py index 61023938f..d11f8ce2c 100644 --- a/tensor2tensor/layers/common_layers_test.py +++ b/tensor2tensor/layers/common_layers_test.py @@ -474,6 +474,43 @@ def testFactoredTensorImplicitConversion(self): out = session.run(d) self.assertEqual(out.shape, (3, 4, 6)) + def testConvHiddenReluMemoryEfficient(self): + batch = 3 + length = 23 + io_size = 16 + filter_size = 7 + x = np.random.rand(batch, length, io_size) + dy = np.random.rand(batch, length, io_size) + with self.test_session() as session: + x = tf.to_float(x) + dy = tf.to_float(dy) + f1 = tf.get_variable("f1", [1, io_size, filter_size]) + f2 = tf.get_variable("f2", [1, filter_size, io_size]) + norm_scale, norm_bias = common_layers.layer_norm_vars(io_size) + y = common_layers.conv_hidden_relu_memory_efficient( + x, filter_size, forget=False, + test_vars=(f1, f2, norm_scale, norm_bias)) + y_forget = common_layers.conv_hidden_relu_memory_efficient( + x, filter_size, forget=True, + test_vars=(f1, f2, norm_scale, norm_bias)) + dx, df1, df2, dnorm_scale, dnorm_bias = tf.gradients( + ys=[y], xs=[x, f1, f2, norm_scale, norm_bias], grad_ys=[dy]) + dx_f, df1_f, df2_f, dnorm_scale_f, dnorm_bias_f = tf.gradients( + ys=[y_forget], xs=[x, f1, f2, norm_scale, norm_bias], grad_ys=[dy]) + session.run(tf.global_variables_initializer()) + (y, y_forget, + dx, df1, df2, dnorm_scale, dnorm_bias, + dx_f, df1_f, df2_f, dnorm_scale_f, dnorm_bias_f) = session.run( + [y, y_forget, + dx, df1, df2, dnorm_scale, dnorm_bias, + dx_f, df1_f, df2_f, dnorm_scale_f, dnorm_bias_f]) + self.assertAllClose(y, y_forget) + self.assertAllClose(df2, df2_f) + self.assertAllClose(df1, df1_f) + self.assertAllClose(dnorm_scale, dnorm_scale_f) + self.assertAllClose(dnorm_bias, dnorm_bias_f) + self.assertAllClose(dx, dx_f) + class FnWithCustomGradTest(tf.test.TestCase): diff --git a/tensor2tensor/models/attention_lm_moe.py b/tensor2tensor/models/attention_lm_moe.py index 03073d144..3b72ea9c2 100644 --- a/tensor2tensor/models/attention_lm_moe.py +++ b/tensor2tensor/models/attention_lm_moe.py @@ -40,16 +40,18 @@ import tensorflow as tf -class AttentionMoeType(object): - NONE = "none" - LOCAL = "local" - GLOBAL = "global" +class AttentionType(object): + MULTIHEAD = "multihead" + LOCAL_EXPERTS = "local_experts" + GLOBAL_MOE = "global_experts" + MEMORY_EFFICIENT = "memory_efficient" @staticmethod def get_choices(): return [ - AttentionMoeType.NONE, - AttentionMoeType.LOCAL, + AttentionType.MULTIHEAD, + AttentionType.LOCAL_EXPERTS, + AttentionType.MEMORY_EFFICIENT, ] @@ -91,12 +93,11 @@ def _diet_expert(x): for layer in xrange(hparams.num_hidden_layers): with tf.variable_scope("layer_%d" % layer): with tf.variable_scope( - "attention_{}".format(hparams.attention_moe_type)): - x = preprocess(x) - if hparams.attention_moe_type == AttentionMoeType.NONE: + "attention_{}".format(hparams.attention_type)): + if hparams.attention_type == AttentionType.MULTIHEAD: y = dp( common_attention.multihead_attention, - x, + preprocess(x), None, decoder_self_attention_bias, hparams.attention_key_channels or hparams.hidden_size, @@ -105,10 +106,18 @@ def _diet_expert(x): hparams.num_heads, hparams.attention_dropout, name="decoder_self_attention") - elif hparams.attention_moe_type == AttentionMoeType.LOCAL: + elif hparams.attention_type == AttentionType.MEMORY_EFFICIENT: + assert hparams.layer_preprocess_sequence == "n" + y = dp( + common_attention.multihead_self_attention_memory_efficient, + x, + decoder_self_attention_bias, + hparams.num_heads, + name="decoder_self_attention") + elif hparams.attention_type == AttentionType.LOCAL_EXPERTS: y, loss = dp( common_attention.local_expert_attention, - x, + preprocess(x), k=2, loss_coef=hparams.attention_load_balance, attention_num_experts=hparams.attention_num_experts, @@ -121,7 +130,7 @@ def _diet_expert(x): extra_loss += tf.add_n(loss) / dp.n else: raise ValueError("Only {} supported for now.".format( - AttentionMoeType.get_choices())) + AttentionType.get_choices())) x = postprocess(x, y) with tf.variable_scope("ffn"): if str(layer) in hparams.moe_layers.split(","): @@ -136,6 +145,12 @@ def _diet_expert(x): k=hparams.moe_k, loss_coef=hparams.moe_loss_coef) extra_loss += loss + elif hparams.memory_efficient_ffn: + assert hparams.layer_preprocess_sequence == "n" + y = dp( + common_layers.conv_hidden_relu_memory_efficient, + x, + hparams.filter_size) else: y = dp( common_layers.conv_hidden_relu, @@ -222,7 +237,7 @@ def attention_lm_moe_base(): hparams.add_hparam("pos", "timing") # timing, none hparams.add_hparam("moe_layers", "2") # comma separated list of layer numbers # moe params. local attention moe. - hparams.add_hparam("attention_moe_type", AttentionMoeType.NONE) + hparams.add_hparam("attention_type", AttentionType.MULTIHEAD) hparams.add_hparam("attention_num_experts", 16) # Key, query and value dimensions for the attention hparams.add_hparam("attention_kq_size", 128) @@ -230,6 +245,7 @@ def attention_lm_moe_base(): # Loss coef for load balancing hparams.add_hparam("attention_load_balance", 2e-2) hparams.add_hparam("diet_experts", int(False)) + hparams.add_hparam("memory_efficient_ffn", int(False)) return hparams @@ -237,7 +253,7 @@ def attention_lm_moe_base(): def attention_lm_moe_base_ae(): """Base model with attention expert.""" hparams = attention_lm_moe_base() - hparams.attention_moe_type = AttentionMoeType.LOCAL + hparams.attention_type = AttentionType.LOCAL_EXPERTS hparams.max_length = hparams.batch_size hparams.eval_drop_long_sequences = int(True) hparams.batching_mantissa_bits = 2 # More buckets @@ -291,7 +307,7 @@ def attention_lm_attention_moe_tiny(): hparams.moe_layers = "" hparams.attention_num_experts = 128 hparams.filter_size = 8192 - hparams.attention_moe_type = AttentionMoeType.LOCAL + hparams.attention_type = AttentionType.LOCAL_EXPERTS return hparams @@ -347,6 +363,21 @@ def attention_lm_moe_large_diet(): return hparams +@registry.register_hparams +def attention_lm_moe_memory_efficient(): + """Memory-efficient version.""" + hparams = attention_lm_moe_large() + hparams.diet_experts = int(True) + hparams.layer_preprocess_sequence = "n" + hparams.layer_postprocess_sequence = "da" + hparams.layer_prepostprocess_dropout = 0.0 + hparams.memory_efficient_ffn = True + hparams.attention_type = AttentionType.MEMORY_EFFICIENT + hparams.num_heads = 8 + hparams.factored_logits = int(True) + return hparams + + @registry.register_hparams def attention_lm_moe_32b_diet(): """Unnecessarily large model with 32B params - because we can.""" From 5bf1e82eb9b7200dda958a25e3f0fd474b4a041f Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Tue, 29 Aug 2017 15:52:47 -0700 Subject: [PATCH 0332/4095] v1.2.1 PiperOrigin-RevId: 166916922 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 78ba7ba9f..358322d90 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ setup( name='tensor2tensor', - version='1.2.0', + version='1.2.1', description='Tensor2Tensor', author='Google Inc.', author_email='no-reply@google.com', From 8353ef283fcdae4fc09c8bec7d925a5882dd102a Mon Sep 17 00:00:00 2001 From: Lukasz Kaiser <lukaszkaiser@google.com> Date: Tue, 29 Aug 2017 16:04:17 -0700 Subject: [PATCH 0333/4095] Finish LM1B transfer to Problem, add CNN+DailyMail dataset, style corrections. PiperOrigin-RevId: 166918589 --- tensor2tensor/bin/t2t-datagen | 9 -- tensor2tensor/data_generators/all_problems.py | 1 + .../data_generators/cnn_dailymail.py | 137 ++++++++++++++++++ tensor2tensor/data_generators/imdb.py | 2 +- tensor2tensor/data_generators/lm1b.py | 21 ++- .../data_generators/problem_hparams.py | 33 ----- tensor2tensor/data_generators/text_encoder.py | 4 +- .../data_generators/text_encoder_test.py | 94 ++++++------ tensor2tensor/data_generators/wiki.py | 3 +- 9 files changed, 206 insertions(+), 98 deletions(-) create mode 100644 tensor2tensor/data_generators/cnn_dailymail.py diff --git a/tensor2tensor/bin/t2t-datagen b/tensor2tensor/bin/t2t-datagen index b960d6106..cb6253524 100644 --- a/tensor2tensor/bin/t2t-datagen +++ b/tensor2tensor/bin/t2t-datagen @@ -42,7 +42,6 @@ from tensor2tensor.data_generators import algorithmic_math from tensor2tensor.data_generators import all_problems # pylint: disable=unused-import from tensor2tensor.data_generators import audio from tensor2tensor.data_generators import generator_utils -from tensor2tensor.data_generators import lm1b from tensor2tensor.data_generators import snli from tensor2tensor.data_generators import wmt from tensor2tensor.data_generators import wsj_parsing @@ -92,14 +91,6 @@ _SUPPORTED_PROBLEM_GENERATORS = { FLAGS.data_dir, FLAGS.tmp_dir, True, 2**14, 2**9), lambda: wsj_parsing.parsing_token_generator( FLAGS.data_dir, FLAGS.tmp_dir, False, 2**14, 2**9)), - "languagemodel_1b32k": ( - lambda: lm1b.generator(FLAGS.tmp_dir, True), - lambda: lm1b.generator(FLAGS.tmp_dir, False) - ), - "languagemodel_1b_characters": ( - lambda: lm1b.generator(FLAGS.tmp_dir, True, characters=True), - lambda: lm1b.generator(FLAGS.tmp_dir, False, characters=True) - ), "inference_snli32k": ( lambda: snli.snli_token_generator(FLAGS.tmp_dir, True, 2**15), lambda: snli.snli_token_generator(FLAGS.tmp_dir, False, 2**15), diff --git a/tensor2tensor/data_generators/all_problems.py b/tensor2tensor/data_generators/all_problems.py index 2a16a802a..f9afa895b 100644 --- a/tensor2tensor/data_generators/all_problems.py +++ b/tensor2tensor/data_generators/all_problems.py @@ -23,6 +23,7 @@ from tensor2tensor.data_generators import algorithmic_math from tensor2tensor.data_generators import audio from tensor2tensor.data_generators import cipher +from tensor2tensor.data_generators import cnn_dailymail from tensor2tensor.data_generators import desc2code from tensor2tensor.data_generators import ice_parsing from tensor2tensor.data_generators import image diff --git a/tensor2tensor/data_generators/cnn_dailymail.py b/tensor2tensor/data_generators/cnn_dailymail.py new file mode 100644 index 000000000..db4deae4e --- /dev/null +++ b/tensor2tensor/data_generators/cnn_dailymail.py @@ -0,0 +1,137 @@ +# coding=utf-8 +# Copyright 2017 The Tensor2Tensor Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Data generators for the CNN and Daily Mail datasets.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +import tarfile + +# Dependency imports + +import six +from tensor2tensor.data_generators import generator_utils +from tensor2tensor.data_generators import problem +from tensor2tensor.data_generators import text_encoder +from tensor2tensor.utils import registry + +import tensorflow as tf + + +# Links to data from http://cs.nyu.edu/~kcho/DMQA/ +_CNN_STORIES_DRIVE_URL = "https://drive.google.com/uc?export=download&id=0BwmD_VLjROrfTHk4NFg2SndKcjQ" + +_DAILYMAIL_STORIES_DRIVE_URL = "https://drive.google.com/uc?export=download&id=0BwmD_VLjROrfM1BxdkxVaTY2bWs" + + +# End-of-sentence marker. +EOS = text_encoder.EOS_ID + + +def _maybe_download_corpora(tmp_dir): + """Download corpora if necessary and unzip them. + + Args: + tmp_dir: directory containing dataset. + + Returns: + filepath of the downloaded corpus file. + """ + cnn_filename = "cnn_stories.tgz" + dailymail_filename = "dailymail_stories.tgz" + cnn_finalpath = os.path.join(tmp_dir, "cnn/stories/") + dailymail_finalpath = os.path.join(tmp_dir, "dailymail/stories/") + if not tf.gfile.Exists(cnn_finalpath): + cnn_file = generator_utils.maybe_download_from_drive( + tmp_dir, cnn_filename, _CNN_STORIES_DRIVE_URL) + with tarfile.open(cnn_file, "r:gz") as cnn_tar: + cnn_tar.extractall(tmp_dir) + if not tf.gfile.Exists(dailymail_finalpath): + dailymail_file = generator_utils.maybe_download_from_drive( + tmp_dir, dailymail_filename, _CNN_STORIES_DRIVE_URL) + with tarfile.open(dailymail_file, "r:gz") as dailymail_tar: + dailymail_tar.extractall(tmp_dir) + return [cnn_finalpath, dailymail_finalpath] + + +def story_generator(tmp_dir): + paths = _maybe_download_corpora(tmp_dir) + for path in paths: + for story_file in tf.gfile.Glob(path + "*"): + story = u"" + for line in tf.gfile.Open(story_file): + line = unicode(line, "utf-8") if six.PY2 else line.decode("utf-8") + story += line + yield story + + +def _story_summary_split(story): + end_pos = story.find("\n\n") # Upto first empty line. + assert end_pos != -1 + return story[:end_pos], story[end_pos:].strip() + + +@registry.register_problem +class SummarizeCnnDailymail32k(problem.Text2TextProblem): + """Summarize CNN and Daily Mail articles to their first paragraph.""" + + @property + def is_character_level(self): + return False + + @property + def has_inputs(self): + return True + + @property + def input_space_id(self): + return problem.SpaceID.EN_TOK + + @property + def target_space_id(self): + return problem.SpaceID.EN_TOK + + @property + def num_shards(self): + return 100 + + @property + def vocab_name(self): + return "vocab.cnndailymail" + + @property + def use_subword_tokenizer(self): + return True + + @property + def targeted_vocab_size(self): + return 2**15 # 32768 + + @property + def use_train_shards_for_dev(self): + return True + + def generator(self, data_dir, tmp_dir, _): + encoder = generator_utils.get_or_generate_vocab_inner( + data_dir, self.vocab_file, self.targeted_vocab_size, + lambda: story_generator(tmp_dir)) + for story in story_generator(tmp_dir): + summary, rest = _story_summary_split(story) + encoded_summary = encoder.encode(summary) + [EOS] + encoded_story = encoder.encode(rest) + [EOS] + yield {"inputs": encoded_story, "targets": encoded_summary} diff --git a/tensor2tensor/data_generators/imdb.py b/tensor2tensor/data_generators/imdb.py index 0710a2532..281a03bee 100644 --- a/tensor2tensor/data_generators/imdb.py +++ b/tensor2tensor/data_generators/imdb.py @@ -50,7 +50,7 @@ def vocab_file(self): @property def targeted_vocab_size(self): - return 2**15 + return 2**13 # 8k vocab suffices for this small dataset. def doc_generator(self, imdb_dir, dataset, include_label=False): dirs = [(os.path.join(imdb_dir, dataset, "pos"), True), (os.path.join( diff --git a/tensor2tensor/data_generators/lm1b.py b/tensor2tensor/data_generators/lm1b.py index 4688911d7..d45e4fe1e 100644 --- a/tensor2tensor/data_generators/lm1b.py +++ b/tensor2tensor/data_generators/lm1b.py @@ -142,9 +142,9 @@ def _get_or_build_subword_text_encoder(tmp_dir): return ret -@registry.register_problem("languagemodel_1b32k") -class LanguagemodelLm1b(problem.Text2TextProblem): - """A language model on full English Wikipedia.""" +@registry.register_problem +class LanguagemodelLm1b32k(problem.Text2TextProblem): + """A language model on the 1B words corpus.""" @property def is_character_level(self): @@ -156,6 +156,8 @@ def has_inputs(self): @property def input_space_id(self): + # Ratio of dev tokens (including eos) to dev words (including eos) + # 176884 / 159658 = 1.107893; multiply ppx by this to compare results. return problem.SpaceID.EN_TOK @property @@ -164,11 +166,11 @@ def target_space_id(self): @property def num_shards(self): - return 10 + return 100 @property def vocab_name(self): - return "vocab-2016-09-10.txt.en" + return "vocab.lm1b.en" @property def use_subword_tokenizer(self): @@ -208,3 +210,12 @@ def generator(self, tmp_dir, train, characters=False): _replace_oov(original_vocab, text_encoder.native_to_unicode(line))) tokens.append(EOS) yield {"inputs": [0], "targets": tokens} + + +@registry.register_problem +class LanguagemodelLm1bCharacters(LanguagemodelLm1b32k): + """A language model on the 1B words corpus, character level.""" + + @property + def is_character_level(self): + return True diff --git a/tensor2tensor/data_generators/problem_hparams.py b/tensor2tensor/data_generators/problem_hparams.py index 38dc05939..e002329bc 100644 --- a/tensor2tensor/data_generators/problem_hparams.py +++ b/tensor2tensor/data_generators/problem_hparams.py @@ -267,35 +267,6 @@ def audio_timit_tokens(model_hparams, wrong_vocab_size): return p -def lm1b_32k(model_hparams): - """Billion-word language-modeling benchmark, 32k subword vocabulary.""" - p = default_problem_hparams() - # ratio of dev tokens (including eos) to dev words (including eos) - # 176884 / 159658 = 1.107893 - p.perplexity_exponent = 1.107893 - p.input_modality = {} - encoder = text_encoder.SubwordTextEncoder( - os.path.join(model_hparams.data_dir, "lm1b_32k.subword_text_encoder")) - p.target_modality = (registry.Modalities.SYMBOL, encoder.vocab_size) - p.vocabulary = {"targets": encoder} - p.target_space_id = 3 - return p - - -def lm1b_characters(unused_model_hparams): - """Billion-word language-modeling benchmark, 32k subword vocabulary.""" - p = default_problem_hparams() - # ratio of dev tokens (including eos) to dev words (including eos) - # 826189 / 159658 = 5.174742 - p.perplexity_exponent = 5.174742 - p.input_modality = {} - encoder = text_encoder.ByteTextEncoder() - p.target_modality = (registry.Modalities.SYMBOL, encoder.vocab_size) - p.vocabulary = {"targets": encoder} - p.target_space_id = 2 - return p - - def wmt_parsing_characters(model_hparams): """English to parse tree translation benchmark.""" del model_hparams # Unused. @@ -404,10 +375,6 @@ def img2img_imagenet(unused_model_hparams): lambda p: audio_timit_tokens(p, 2**13), "audio_timit_tokens_8k_test": lambda p: audio_timit_tokens(p, 2**13), - "languagemodel_1b_characters": - lm1b_characters, - "languagemodel_1b32k": - lm1b_32k, "parsing_english_ptb8k": lambda p: wmt_parsing_tokens(p, 2**13), "parsing_english_ptb16k": diff --git a/tensor2tensor/data_generators/text_encoder.py b/tensor2tensor/data_generators/text_encoder.py index fe9f0ad57..ac9260cfa 100644 --- a/tensor2tensor/data_generators/text_encoder.py +++ b/tensor2tensor/data_generators/text_encoder.py @@ -657,8 +657,8 @@ def _load_from_file_object(self, f): for line in f: s = line.strip() # Some vocab files wrap words in single quotes, but others don't - if (len(s) > 1 and ((s.startswith("'") and s.endswith("'")) or - (s.startswith("\"") and s.endswith("\"")))): + if ((s.startswith("'") and s.endswith("'")) or + (s.startswith("\"") and s.endswith("\""))): s = s[1:-1] subtoken_strings.append(native_to_unicode(s)) self._init_subtokens_from_list(subtoken_strings) diff --git a/tensor2tensor/data_generators/text_encoder_test.py b/tensor2tensor/data_generators/text_encoder_test.py index 0886449ee..c13078808 100644 --- a/tensor2tensor/data_generators/text_encoder_test.py +++ b/tensor2tensor/data_generators/text_encoder_test.py @@ -35,27 +35,27 @@ class NativeToUnicodeTest(tf.test.TestCase): def test_native_to_unicode(self): - s = r'foo bar' + s = r"foo bar" self.assertIsInstance(text_encoder.native_to_unicode(s), unicode) - self.assertEqual(text_encoder.native_to_unicode(s), u'foo bar') + self.assertEqual(text_encoder.native_to_unicode(s), u"foo bar") class EscapeUnescapeTokenTest(tf.test.TestCase): def test_escape_token(self): escaped = text_encoder._escape_token( - 'Foo! Bar.\nunder_score back\\slash', - set('abcdefghijklmnopqrstuvwxyz .\n') | text_encoder._ESCAPE_CHARS) + "Foo! Bar.\nunder_score back\\slash", + set("abcdefghijklmnopqrstuvwxyz .\n") | text_encoder._ESCAPE_CHARS) self.assertEqual( - '\\70;oo\\33; \\66;ar.\\10;under\\uscore back\\\\slash_', escaped) + "\\70;oo\\33; \\66;ar.\\10;under\\uscore back\\\\slash_", escaped) def test_unescape_token(self): unescaped = text_encoder._unescape_token( - '\\70;oo\\33; \\66;ar.\\10;under\\uscore back\\\\slash_') + "\\70;oo\\33; \\66;ar.\\10;under\\uscore back\\\\slash_") self.assertEqual( - 'Foo! Bar.\nunder_score back\\slash', unescaped) + "Foo! Bar.\nunder_score back\\slash", unescaped) class TokenTextEncoderTest(tf.test.TestCase): @@ -63,7 +63,7 @@ class TokenTextEncoderTest(tf.test.TestCase): @classmethod def setUpClass(cls): """Make sure the test dir exists and is empty.""" - cls.test_temp_dir = os.path.join(tf.test.get_temp_dir(), 'encoder_test') + cls.test_temp_dir = os.path.join(tf.test.get_temp_dir(), "encoder_test") shutil.rmtree(cls.test_temp_dir, ignore_errors=True) os.mkdir(cls.test_temp_dir) @@ -74,8 +74,8 @@ def test_save_and_reload(self): that this test size be "large". """ - corpus = 'A B C D E F G H I J K L M N O P Q R S T U V W X Y Z' - vocab_filename = os.path.join(self.test_temp_dir, 'abc.vocab') + corpus = "A B C D E F G H I J K L M N O P Q R S T U V W X Y Z" + vocab_filename = os.path.join(self.test_temp_dir, "abc.vocab") # Make text encoder from a list and store vocab to fake filesystem. encoder = text_encoder.TokenTextEncoder(None, vocab_list=corpus.split()) @@ -89,7 +89,7 @@ def test_save_and_reload(self): def test_reserved_tokens_in_corpus(self): """Test that we handle reserved tokens appearing in the corpus.""" - corpus = 'A B {} D E F {} G {}'.format(text_encoder.EOS, + corpus = "A B {} D E F {} G {}".format(text_encoder.EOS, text_encoder.EOS, text_encoder.PAD) @@ -106,14 +106,14 @@ class SubwordTextEncoderTest(tf.test.TestCase): def test_encode_decode(self): corpus = ( - 'This is a corpus of text that provides a bunch of tokens from which ' - 'to build a vocabulary. It will be used when strings are encoded ' - 'with a TextEncoder subclass. The encoder was coded by a coder.') - token_counts = collections.Counter(corpus.split(' ')) - alphabet = set(corpus) ^ {' '} + "This is a corpus of text that provides a bunch of tokens from which " + "to build a vocabulary. It will be used when strings are encoded " + "with a TextEncoder subclass. The encoder was coded by a coder.") + token_counts = collections.Counter(corpus.split(" ")) + alphabet = set(corpus) ^ {" "} - original = 'This is a coded sentence encoded by the SubwordTextEncoder.' - token_counts.update(original.split(' ')) + original = "This is a coded sentence encoded by the SubwordTextEncoder." + token_counts.update(original.split(" ")) encoder = text_encoder.SubwordTextEncoder.build_to_target_size( 100, token_counts, 2, 10) @@ -127,31 +127,31 @@ def test_encode_decode(self): # they should appear in the vocabulary even though they are substrings # of other included strings. subtoken_strings = {encoder._all_subtoken_strings[i] for i in encoded} - self.assertIn('encoded_', subtoken_strings) - self.assertIn('coded_', subtoken_strings) - self.assertIn('TextEncoder', encoder._all_subtoken_strings) - self.assertIn('coder', encoder._all_subtoken_strings) + self.assertIn("encoded_", subtoken_strings) + self.assertIn("coded_", subtoken_strings) + self.assertIn("TextEncoder", encoder._all_subtoken_strings) + self.assertIn("coder", encoder._all_subtoken_strings) - # Every character in the corpus should be in the encoder's alphabet and + # Every character in the corpus should be in the encoders alphabet and # its subtoken vocabulary. self.assertTrue(alphabet.issubset(encoder._alphabet)) for a in alphabet: self.assertIn(a, encoder._all_subtoken_strings) def test_unicode(self): - corpus = 'Cat emoticons. \U0001F638 \U0001F639 \U0001F63A \U0001F63B' - token_counts = collections.Counter(corpus.split(' ')) + corpus = "Cat emoticons. \U0001F638 \U0001F639 \U0001F63A \U0001F63B" + token_counts = collections.Counter(corpus.split(" ")) encoder = text_encoder.SubwordTextEncoder.build_to_target_size( 100, token_counts, 2, 10) - self.assertIn('\U0001F638', encoder._alphabet) - self.assertIn('\U0001F63B', encoder._all_subtoken_strings) + self.assertIn("\U0001F638", encoder._alphabet) + self.assertIn("\U0001F63B", encoder._all_subtoken_strings) def test_small_vocab(self): - corpus = 'The quick brown fox jumps over the lazy dog' - token_counts = collections.Counter(corpus.split(' ')) - alphabet = set(corpus) ^ {' '} + corpus = "The quick brown fox jumps over the lazy dog" + token_counts = collections.Counter(corpus.split(" ")) + alphabet = set(corpus) ^ {" "} encoder = text_encoder.SubwordTextEncoder.build_to_target_size( 10, token_counts, 2, 10) @@ -164,12 +164,12 @@ def test_small_vocab(self): self.assertIn(a, encoder._all_subtoken_strings) def test_encodable_when_not_in_alphabet(self): - corpus = 'the quick brown fox jumps over the lazy dog' - token_counts = collections.Counter(corpus.split(' ')) + corpus = "the quick brown fox jumps over the lazy dog" + token_counts = collections.Counter(corpus.split(" ")) encoder = text_encoder.SubwordTextEncoder.build_to_target_size( 100, token_counts, 2, 10) - original = 'This has UPPER CASE letters that are out of alphabet' + original = "This has UPPER CASE letters that are out of alphabet" # Early versions could have an infinite loop when breaking into subtokens # if there was any out-of-alphabet characters in the encoded string. @@ -177,19 +177,19 @@ def test_encodable_when_not_in_alphabet(self): decoded = encoder.decode(encoded) self.assertEqual(original, decoded) - encoded_str = ''.join(encoder._all_subtoken_strings[i] for i in encoded) - self.assertIn('\\84;', encoded_str) + encoded_str = "".join(encoder._all_subtoken_strings[i] for i in encoded) + self.assertIn("\\84;", encoded_str) - @mock.patch.object(text_encoder, '_ESCAPE_CHARS', new=set('\\_;13579')) + @mock.patch.object(text_encoder, "_ESCAPE_CHARS", new=set("\\_;13579")) def test_raises_exception_when_not_encodable(self): - corpus = 'the quick brown fox jumps over the lazy dog' - token_counts = collections.Counter(corpus.split(' ')) + corpus = "the quick brown fox jumps over the lazy dog" + token_counts = collections.Counter(corpus.split(" ")) # Deliberately exclude some required encoding chars from the alphabet # and token list, making some strings unencodable. encoder = text_encoder.SubwordTextEncoder.build_to_target_size( 100, token_counts, 2, 10) - original = 'This has UPPER CASE letters that are out of alphabet' + original = "This has UPPER CASE letters that are out of alphabet" # Previously there was a bug which produced an infinite loop in this case. with self.assertRaises(AssertionError): @@ -198,21 +198,21 @@ def test_raises_exception_when_not_encodable(self): def test_load_from_file(self): # Test a vocab file with words not wrapped with single quotes encoder = text_encoder.SubwordTextEncoder() - correct_vocab = ['the', 'and', 'of'] - vocab = io.StringIO('the\n' - 'and\n' - 'of\n') + correct_vocab = ["the", "and", "of"] + vocab = io.StringIO("the\n" + "and\n" + "of\n") encoder._load_from_file_object(vocab) self.assertEqual(encoder._all_subtoken_strings, correct_vocab) # Test a vocab file with words wrapped in single quotes encoder = text_encoder.SubwordTextEncoder() - vocab = io.StringIO('\'the\'\n' - '\'and\'\n' - '\'of\'\n') + vocab = io.StringIO("\"the\"\n" + "\"and\"\n" + "\"of\"\n") encoder._load_from_file_object(vocab) self.assertEqual(encoder._all_subtoken_strings, correct_vocab) -if __name__ == '__main__': +if __name__ == "__main__": tf.test.main() diff --git a/tensor2tensor/data_generators/wiki.py b/tensor2tensor/data_generators/wiki.py index d1c80f2e1..9610cb1d8 100644 --- a/tensor2tensor/data_generators/wiki.py +++ b/tensor2tensor/data_generators/wiki.py @@ -31,6 +31,7 @@ from tensor2tensor.data_generators import text_encoder from tensor2tensor.utils import registry +import tensorflow as tf # End-of-sentence marker. EOS = text_encoder.EOS_ID @@ -49,7 +50,7 @@ def _maybe_download_corpus(tmp_dir): "enwiki-20170620-pages-articles-multistream.xml.bz2") corpus_filename = os.path.basename(corpus_url) corpus_filepath = os.path.join(tmp_dir, corpus_filename) - if not os.path.exists(corpus_filepath): + if not tf.gfile.Exists(corpus_filepath): generator_utils.maybe_download(tmp_dir, corpus_filename, corpus_url) return corpus_filepath From b54b7110f5860599acdead43704485a5d4263cb5 Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Tue, 29 Aug 2017 16:08:05 -0700 Subject: [PATCH 0334/4095] fix docs PiperOrigin-RevId: 166919072 --- docs/new_problem.md | 2 -- 1 file changed, 2 deletions(-) diff --git a/docs/new_problem.md b/docs/new_problem.md index 4dd4246bf..c859c6eba 100644 --- a/docs/new_problem.md +++ b/docs/new_problem.md @@ -162,8 +162,6 @@ from __future__ import print_function import os import tarfile # do we need this import -import google3 - from tensor2tensor.data_generators import generator_utils from tensor2tensor.data_generators import problem from tensor2tensor.data_generators import text_encoder From a3be70a2b5647a55491d3f77ce3ddee69850b4b7 Mon Sep 17 00:00:00 2001 From: Lukasz Kaiser <lukaszkaiser@google.com> Date: Tue, 29 Aug 2017 16:17:11 -0700 Subject: [PATCH 0335/4095] Correct cyclic lr scheme, docs, play with AE. PiperOrigin-RevId: 166920305 --- docs/example_life.md | 19 ++- docs/index.md | 1 + tensor2tensor/models/transformer_vae.py | 154 ++++++++++++------------ tensor2tensor/utils/model_builder.py | 2 +- 4 files changed, 100 insertions(+), 76 deletions(-) diff --git a/docs/example_life.md b/docs/example_life.md index 074bcb7c3..2983f5077 100644 --- a/docs/example_life.md +++ b/docs/example_life.md @@ -14,4 +14,21 @@ and how all its parts are connected to work together. ## The Life of an Example -TODO: complete. +A training example passes the following stages in T2T: +* raw input (text from command line or file) +* encoded input after [Problem.feature_encoder](https://github.com/tensorflow/tensor2tensor/blob/master/tensor2tensor/data_generators/problem.py#L173) function `encode` is usually a sparse tensor, e.g., a vector of `tf.int32`s +* batched input after [data input pipeline](https://github.com/tensorflow/tensor2tensor/blob/master/tensor2tensor/utils/data_reader.py#L242) where the inputs, after [Problem.preprocess_examples](https://github.com/tensorflow/tensor2tensor/blob/master/tensor2tensor/data_generators/problem.py#L188) are grouped by their length and made into batches. +* dense input after being processed by a [Modality](https://github.com/tensorflow/tensor2tensor/blob/master/tensor2tensor/utils/modality.py#L30) function `bottom`. +* dense output after [T2T.model_fn_body](https://github.com/tensorflow/tensor2tensor/blob/master/tensor2tensor/utils/t2t_model.py#L542) +* back to sparse output through [Modality](https://github.com/tensorflow/tensor2tensor/blob/master/tensor2tensor/utils/modality.py#L30) function `top`. +* if decoding, back through [Problem.feature_encoder](https://github.com/tensorflow/tensor2tensor/blob/master/tensor2tensor/data_generators/problem.py#L173) function `decode` to display on the screen. + +We go into these phases step by step below. + +## Feature Encoders + +TODO: describe [Problem.feature_encoder](https://github.com/tensorflow/tensor2tensor/blob/master/tensor2tensor/data_generators/problem.py#L173) which is a dict of encoders that have `encode` and `decode` functions. + +## Modalities + +TODO: describe [Modality](https://github.com/tensorflow/tensor2tensor/blob/master/tensor2tensor/utils/modality.py#L30) which has `bottom` and `top` but also sharded versions and one for targets. diff --git a/docs/index.md b/docs/index.md index b5ee118f4..9394809b3 100644 --- a/docs/index.md +++ b/docs/index.md @@ -25,6 +25,7 @@ documentation, from basic tutorials to full code documentation. ## Deep Dive * [Life of an Example](example_life.md): how all parts of T2T are connected and work together +* [Distributed Training](distributed_training.md) ## Code documentation diff --git a/tensor2tensor/models/transformer_vae.py b/tensor2tensor/models/transformer_vae.py index 90277e522..1c566e996 100644 --- a/tensor2tensor/models/transformer_vae.py +++ b/tensor2tensor/models/transformer_vae.py @@ -13,7 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -"""VAE Transformer.""" +"""AE Transformer.""" from __future__ import absolute_import from __future__ import division @@ -32,10 +32,9 @@ import tensorflow as tf -def residual_conv(x, repeat, hparams, name, reuse=None): +def residual_conv(x, repeat, k, hparams, name, reuse=None): """A stack of convolution blocks with residual connections.""" with tf.variable_scope(name, reuse=reuse): - k = (3, 1) dilations_and_kernels = [((1, 1), k) for _ in xrange(3)] for i in xrange(repeat): with tf.variable_scope("repeat_%d" % i): @@ -72,15 +71,19 @@ def interleave(x, y, axis=1): return tf.concat([x, y], axis=axis+1) -def decompress_step(source, c, hparams, first_relu, name): +def decompress_step(source, c, hparams, first_relu, is_2d, name): """Decompression function.""" with tf.variable_scope(name): shape = tf.shape(source) if c is not None: source = attend(source, c, hparams, "decompress_attend") + multiplier = 4 if is_2d else 2 + kernel = (1, 1) if is_2d else (1, 1) thicker = common_layers.conv_block( - source, hparams.hidden_size * 2, [((1, 1), (1, 1))], + source, hparams.hidden_size * multiplier, [((1, 1), kernel)], first_relu=first_relu, name="decompress_conv") + if is_2d: + return tf.depth_to_space(thicker, 2) return tf.reshape(thicker, [shape[0], shape[1] * 2, 1, hparams.hidden_size]) @@ -90,7 +93,7 @@ def gumbel_sample(shape): return -tf.log(-tf.log(uniform_samples)) -def dvae(x, hparams, name): +def dae(x, hparams, name): with tf.variable_scope(name): m = tf.layers.dense(x, hparams.v_size, name="mask") logsm = tf.nn.log_softmax(m) @@ -128,7 +131,7 @@ def nearest(x, means, hparams): _, nearest_idx = tf.nn.top_k(- dist, k=1) nearest_hot = tf.one_hot(tf.squeeze(nearest_idx, axis=1), hparams.v_size) nearest_hot = tf.reshape(nearest_hot, [tf.shape(x)[0], tf.shape(x)[1], - 1, hparams.v_size]) + tf.shape(x)[2], hparams.v_size]) return tf.stop_gradient(nearest_hot) @@ -137,21 +140,23 @@ def kmeans(x, means, hparams, name): x_means_hot = nearest(x, means, hparams) x_means = tf.gather(means, tf.argmax(x_means_hot, axis=-1)) kl = tf.reduce_sum(tf.square(x - x_means), axis=-1) - return x_means_hot, x_means_hot, tf.reduce_mean(kl) * 10.0 + return x_means_hot, tf.reduce_mean(kl) * 10.0 -def compress(x, c, hparams, name): +def compress(x, c, is_2d, hparams, name): """Compress.""" with tf.variable_scope(name): # Run compression by strided convs. cur = x + k1 = (3, 3) if is_2d else (3, 1) + k2 = (2, 2) if is_2d else (2, 1) for i in xrange(hparams.num_compress_steps): if c is not None: cur = attend(cur, c, hparams, "compress_attend_%d" % i) - cur = residual_conv(cur, 1, hparams, "compress_rc_%d" % i) + cur = residual_conv(cur, 1, k1, hparams, "compress_rc_%d" % i) cur = common_layers.conv_block( - cur, hparams.hidden_size, [((1, 1), (2, 1))], - strides=(2, 1), name="compress_%d" % i) + cur, hparams.hidden_size, [((1, 1), k2)], + strides=k2, name="compress_%d" % i) return cur @@ -188,7 +193,7 @@ def decode(cond_vec, cond_add, gold, c, ed, hparams): decoder_input = tf.squeeze(decoder_input, axis=2) decoder_input = common_attention.add_timing_signal_1d(decoder_input) bias = common_attention.attention_bias_lower_triangle(tf.shape(gold)[1]) - if c is not None: + if c is not None and len(c.get_shape()) > 3: c = tf.squeeze(c, axis=2) return transformer.transformer_decoder(decoder_input, c, bias, ed, hparams) @@ -205,10 +210,10 @@ def expand_batch(x, mul): return tf.reshape(cx, res_shape) -def vae_compress(x, c, ed, hparams, compress_name, decompress_name, reuse=None): - """Compress, then VAE.""" - with tf.variable_scope(compress_name, reuse=reuse): - cur = compress(x, None, hparams, "compress") +def ae_compress(x, is_2d, hparams, name, reuse=None): + """Compress, then AE.""" + with tf.variable_scope(name, reuse=reuse): + cur = compress(x, None, is_2d, hparams, "compress") # Convolve and ReLu to get state. cur = common_layers.conv_block( cur, hparams.hidden_size, [((1, 1), (1, 1))], name="mid_conv") @@ -216,58 +221,51 @@ def vae_compress(x, c, ed, hparams, compress_name, decompress_name, reuse=None): cur_n = hparams.kmeans_lr_factor * cur cur_n += (1.0 - hparams.kmeans_lr_factor) * tf.stop_gradient(cur) means = tf.get_variable("z_to_dense", [hparams.v_size, hparams.hidden_size]) - # z, kl_loss, mu, log_sigma = vae(cur, hparams, name="vae") - # z_true, z_sample, kl_loss = dvae(cur, hparams, name="dvae") - z_true, z_sample, kl_loss = kmeans(cur_n, means, hparams, name="kmeans") - - # Compress context. - with tf.variable_scope(compress_name, reuse=reuse): - compress_c = compress(c, None, hparams, "compress_context") - dec_c = decode(None, compress_c, cur, None, None, hparams) - c_z = tf.layers.dense(dec_c, hparams.v_size, name="mask_context") - reconstruct_loss = tf.nn.softmax_cross_entropy_with_logits( - labels=z_true, logits=c_z) + hot, loss = kmeans(cur_n, means, hparams, name="kmeans") + # We need a linear layer to undo the l2-normalization. + cur = tf.layers.dense(cur, hparams.hidden_size, name="unnormalize") + return cur, hot, loss - # If not training, use the predicted z instead of the autoregressive one. - if hparams.mode == tf.contrib.learn.ModeKeys.INFER: - z = tf.one_hot(tf.argmax(c_z, axis=-1), hparams.v_size) - with tf.variable_scope(decompress_name, reuse=reuse): - # Decompress. - z_sample_flat = tf.reshape(z_sample, [-1, hparams.v_size]) - z = tf.matmul(z_sample_flat, means) - z = tf.reshape(z, [tf.shape(z_sample)[0], tf.shape(z_sample)[1], - 1, hparams.hidden_size]) +def ae_embed(hot, hparams, name, reuse=None): + with tf.variable_scope(name, reuse=reuse): + means = tf.get_variable("z_to_dense", [hparams.v_size, hparams.hidden_size]) + hot_flat = tf.reshape(hot, [-1, hparams.v_size]) + emb = tf.matmul(hot_flat, means) + emb = tf.reshape(emb, [tf.shape(hot)[0], tf.shape(hot)[1], + tf.shape(hot)[2], hparams.hidden_size]) + return tf.layers.dense(emb, hparams.hidden_size, + name="unnormalize", reuse=reuse) + +def ae_decompress(z, ae, x, is_2d, hparams, name, reuse=None): + """Decompress from z, leaking from ae.""" + with tf.variable_scope(name + "_decompress", reuse=reuse): # Leak at the beginning to help train. - z = mix(z, cur, hparams.startup_steps) + z = mix(z, ae, hparams.startup_steps) prob_z = common_layers.inverse_exp_decay(hparams.startup_steps) * 0.8 - prob_z = prob_z if hparams.mode == tf.contrib.learn.ModeKeys.TRAIN else 0.0 + prob_z = prob_z if hparams.mode == tf.contrib.learn.ModeKeys.TRAIN else 1.0 z = tf.cond(tf.less(tf.random_uniform([]), prob_z), - lambda: z, lambda: cur) - z = tf.layers.dense(z, hparams.hidden_size, name="unnormalize") + lambda: z, lambda: ae) # Dropout for better autoencoding. - z = tf.nn.dropout(z, keep_prob=0.9) + z = tf.nn.dropout(z, keep_prob=1.0 - hparams.z_dropout) # Decompress. d = z for i in xrange(hparams.num_compress_steps): j = hparams.num_compress_steps - i - 1 - d = residual_conv(d, 1, hparams, "decompress_rc_%d" % j) - d = decompress_step(d, c, hparams, i > 0, "decompress_step_%d" % j) + d = residual_conv(d, 1, (3, 1), hparams, "decompress_rc_%d" % j) + d = decompress_step(d, None, hparams, i > 0, is_2d, "decompress_%d" % j) k = 2**hparams.num_compress_steps z_batch = tf.reshape(z, [-1, 1, 1, hparams.hidden_size]) x_batch = tf.reshape(x, [-1, k, 1, hparams.hidden_size]) d_batch = tf.reshape(d, [-1, k, 1, hparams.hidden_size]) - # dec_batch = decode(z_batch, d_batch, x_batch, None, None, hparams) - c = expand_batch(c, tf.shape(x_batch)[0] / tf.shape(x)[0]) - ed = expand_batch(ed, tf.shape(x_batch)[0] / tf.shape(x)[0]) - dec_batch = decode(z_batch, d_batch, x_batch, c, ed, hparams) + dec_batch = decode(z_batch, d_batch, x_batch, None, None, hparams) z = tf.reshape(dec_batch, [-1, tf.shape(x)[1], 1, hparams.hidden_size]) - return z, kl_loss, reconstruct_loss + return z def ffn(x, hparams, name): @@ -277,35 +275,42 @@ def ffn(x, hparams, name): return common_layers.layer_postprocess(x, y, hparams) -def vae_transformer_internal(inputs, targets, target_space, hparams): - """VAE Transformer, main step used for training.""" - with tf.variable_scope("vae_transformer"): - # Prepare inputs, targets, and k. - inputs = common_layers.flatten4d3d(inputs) - input_len = tf.shape(inputs)[1] # Double input size to cover targets. - inputs = tf.pad(inputs, [[0, 0], [0, input_len], [0, 0]]) - inputs.set_shape([None, None, hparams.hidden_size]) - targets = common_layers.flatten4d3d(targets) +def ae_transformer_internal(inputs, targets, target_space, hparams): + """AE Transformer, main step used for training.""" + with tf.variable_scope("ae_transformer"): + # Prepare inputs, targets, k. k = 2**hparams.num_compress_steps - inputs, targets = common_layers.pad_to_same_length( - inputs, targets, final_length_divisible_by=k) - inputs, ed_bias = encode(inputs, target_space, hparams, "input_enc") - - # Compress and vae. - z, kl, r = vae_compress(tf.expand_dims(targets, axis=2), - tf.expand_dims(inputs, axis=2), - ed_bias, hparams, "vae_compress", "vae_decompress") + _, targets = common_layers.pad_to_same_length( + targets, targets, final_length_divisible_by=k) + inputs = common_layers.flatten4d3d(inputs) + inputs, ed = encode(inputs, target_space, hparams, "input_enc") + + # Compress and ae. + ae, hot, kl = ae_compress(targets, False, hparams, "ae") + emb = ae_embed(hot, hparams, "ae", reuse=True) + + # Compress context and run autoregressive decoder on emb-hot. + dec_c = decode(None, None, emb, inputs, ed, hparams) + c_z = tf.layers.dense(dec_c, hparams.v_size, name="mask_context") + reconstruct_loss = tf.nn.softmax_cross_entropy_with_logits( + labels=hot, logits=c_z) + # If not training, use the predicted z instead of the autoregressive one. + if hparams.mode == tf.contrib.learn.ModeKeys.INFER: + hot = tf.one_hot(tf.argmax(c_z, axis=-1), hparams.v_size) + + # Decompress, pass for ae loss. + z = ae_decompress(emb, ae, targets, False, hparams, "ae") kl *= common_layers.inverse_exp_decay(int(hparams.startup_steps * 0.5)) - r *= common_layers.inverse_exp_decay(int(hparams.startup_steps * 0.5)) - losses = {"kl": kl, "reconstruction": r} + reconstruct_loss *= common_layers.inverse_exp_decay(hparams.startup_steps) + losses = {"kl": kl, "reconstruction": reconstruct_loss} return z, losses @registry.register_model -class TransformerVAE(t2t_model.T2TModel): +class TransformerAE(t2t_model.T2TModel): def model_fn_body(self, features): - return vae_transformer_internal( + return ae_transformer_internal( features["inputs"], features["targets"], features["target_space_id"], self._hparams) @@ -348,7 +353,7 @@ def infer(self, features=None, decode_length=50, beam_size=1, top_beams=1, @registry.register_hparams -def transformer_vae_small(): +def transformer_ae_small(): """Set of hyperparameters.""" hparams = transformer.transformer_small() hparams.batch_size = 2048 @@ -358,19 +363,20 @@ def transformer_vae_small(): hparams.add_hparam("num_compress_steps", 4) hparams.add_hparam("kl_warmup_steps", 60000) hparams.add_hparam("startup_steps", 30000) + hparams.add_hparam("kmeans_lr_factor", 0.002) + hparams.add_hparam("z_dropout", 0.1) return hparams @registry.register_hparams -def transformer_vae_base(): +def transformer_ae_base(): """Set of hyperparameters.""" - hparams = transformer_vae_small() + hparams = transformer_ae_small() hparams.hidden_size = 512 hparams.filter_size = 2048 hparams.attention_dropout = 0.0 hparams.relu_dropout = 0.0 hparams.dropout = 0.0 hparams.num_hidden_layers = 4 - hparams.kmeans_lr_factor = 0.002 hparams.z_size = 256 return hparams diff --git a/tensor2tensor/utils/model_builder.py b/tensor2tensor/utils/model_builder.py index f2632aa94..34af6c827 100644 --- a/tensor2tensor/utils/model_builder.py +++ b/tensor2tensor/utils/model_builder.py @@ -111,7 +111,7 @@ def learning_rate_decay(): cycle_position = tf.to_float( # Normalize to the interval [-1, 1]. cycle_position - cycle_steps) / float(cycle_steps) cycle_position = 1.0 - tf.abs(cycle_position) # 0 to 1 and back to 0. - return (cycle_position + 0.01) * 10.0 # 10x difference each cycle. + return (cycle_position + 0.1) * 3.0 # 10x difference each cycle (0.3-3). inv_base = tf.exp(tf.log(0.01) / warmup_steps) inv_decay = inv_base**(warmup_steps - step) From f715f858177d7e4cc1e7b0fe6a3fc7a05acdedeb Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Tue, 29 Aug 2017 16:18:50 -0700 Subject: [PATCH 0336/4095] Separate CLI t2t_decoder PiperOrigin-RevId: 166920562 --- README.md | 19 ++++-- docs/walkthrough.md | 15 ++++- setup.py | 1 + tensor2tensor/bin/t2t-decoder | 90 ++++++++++++++++++++++++++++ tensor2tensor/utils/trainer_utils.py | 32 ++-------- 5 files changed, 123 insertions(+), 34 deletions(-) create mode 100644 tensor2tensor/bin/t2t-decoder diff --git a/README.md b/README.md index 236d279c2..4e56d7855 100644 --- a/README.md +++ b/README.md @@ -29,7 +29,7 @@ You can chat with us and other users on with T2T announcements. Here is a one-command version that installs tensor2tensor, downloads the data, -trains an English-German translation model, and lets you use it interactively: +trains an English-German translation model, and evaluates it: ``` pip install tensor2tensor && t2t-trainer \ --generate_data \ @@ -37,7 +37,18 @@ pip install tensor2tensor && t2t-trainer \ --problems=translate_ende_wmt32k \ --model=transformer \ --hparams_set=transformer_base_single_gpu \ - --output_dir=~/t2t_train/base \ + --output_dir=~/t2t_train/base +``` + +You can decode from the model interactively: + +``` +t2t-decoder \ + --data_dir=~/t2t_data \ + --problems=translate_ende_wmt32k \ + --model=transformer \ + --hparams_set=transformer_base_single_gpu \ + --output_dir=~/t2t_train/base --decode_interactive ``` @@ -106,14 +117,12 @@ echo "Goodbye world" >> $DECODE_FILE BEAM_SIZE=4 ALPHA=0.6 -t2t-trainer \ +t2t-decoder \ --data_dir=$DATA_DIR \ --problems=$PROBLEM \ --model=$MODEL \ --hparams_set=$HPARAMS \ --output_dir=$TRAIN_DIR \ - --train_steps=0 \ - --eval_steps=0 \ --decode_beam_size=$BEAM_SIZE \ --decode_alpha=$ALPHA \ --decode_from_file=$DECODE_FILE diff --git a/docs/walkthrough.md b/docs/walkthrough.md index ba4c86872..57d7a03f4 100644 --- a/docs/walkthrough.md +++ b/docs/walkthrough.md @@ -10,7 +10,7 @@ welcome](https://img.shields.io/badge/contributions-welcome-brightgreen.svg)](CO [![License](https://img.shields.io/badge/License-Apache%202.0-brightgreen.svg)](https://opensource.org/licenses/Apache-2.0) Here is a one-command version that installs tensor2tensor, downloads the data, -trains an English-German translation model, and lets you use it interactively: +trains an English-German translation model, and evaluates it: ``` pip install tensor2tensor && t2t-trainer \ --generate_data \ @@ -18,7 +18,18 @@ pip install tensor2tensor && t2t-trainer \ --problems=translate_ende_wmt32k \ --model=transformer \ --hparams_set=transformer_base_single_gpu \ - --output_dir=~/t2t_train/base \ + --output_dir=~/t2t_train/base +``` + +You can decode from the model interactively: + +``` +t2t-decoder \ + --data_dir=~/t2t_data \ + --problems=translate_ende_wmt32k \ + --model=transformer \ + --hparams_set=transformer_base_single_gpu \ + --output_dir=~/t2t_train/base --decode_interactive ``` diff --git a/setup.py b/setup.py index 358322d90..b51070c77 100644 --- a/setup.py +++ b/setup.py @@ -22,6 +22,7 @@ scripts=[ 'tensor2tensor/bin/t2t-trainer', 'tensor2tensor/bin/t2t-datagen', + 'tensor2tensor/bin/t2t-decoder', 'tensor2tensor/bin/t2t-make-tf-configs', ], install_requires=[ diff --git a/tensor2tensor/bin/t2t-decoder b/tensor2tensor/bin/t2t-decoder new file mode 100644 index 000000000..5c3eeb293 --- /dev/null +++ b/tensor2tensor/bin/t2t-decoder @@ -0,0 +1,90 @@ +#!/usr/bin/env python +# coding=utf-8 +# Copyright 2017 The Tensor2Tensor Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +r"""Decode from trained T2T models. + +This binary performs inference using the Estimator API. + +Example usage to decode from dataset: + + t2t-decoder \ + --data_dir ~/data \ + --problems=algorithmic_identity_binary40 \ + --model=transformer + --hparams_set=transformer_base + +Set FLAGS.decode_interactive or FLAGS.decode_from_file for alternative decode +sources. +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os + +# Dependency imports + +from tensor2tensor.utils import decoding +from tensor2tensor.utils import trainer_utils +from tensor2tensor.utils import usr_dir + +import tensorflow as tf + +flags = tf.flags +FLAGS = flags.FLAGS + +flags.DEFINE_string("t2t_usr_dir", "", + "Path to a Python module that will be imported. The " + "__init__.py file should include the necessary imports. " + "The imported files should contain registrations, " + "e.g. @registry.register_model calls, that will then be " + "available to the t2t-decoder.") + + +def main(_): + tf.logging.set_verbosity(tf.logging.INFO) + usr_dir.import_usr_dir(FLAGS.t2t_usr_dir) + trainer_utils.log_registry() + trainer_utils.validate_flags() + data_dir = os.path.expanduser(FLAGS.data_dir) + output_dir = os.path.expanduser(FLAGS.output_dir) + + hparams = trainer_utils.create_hparams( + FLAGS.hparams_set, FLAGS.problems, data_dir, passed_hparams=FLAGS.hparams) + estimator, _ = trainer_utils.create_experiment_components( + hparams=hparams, + output_dir=output_dir, + data_dir=data_dir, + model_name=FLAGS.model) + + if FLAGS.decode_interactive: + decoding.decode_interactively(estimator) + elif FLAGS.decode_from_file: + decoding.decode_from_file(estimator, FLAGS.decode_from_file) + else: + decoding.decode_from_dataset( + estimator, + FLAGS.problems.split("-"), + return_beams=FLAGS.decode_return_beams, + beam_size=FLAGS.decode_beam_size, + max_predictions=FLAGS.decode_num_samples, + decode_to_file=FLAGS.decode_to_file, + save_images=FLAGS.decode_save_images, + identity_output=FLAGS.identity_output) + + +if __name__ == "__main__": + tf.app.run() diff --git a/tensor2tensor/utils/trainer_utils.py b/tensor2tensor/utils/trainer_utils.py index 36fdcf6be..a747b9a09 100644 --- a/tensor2tensor/utils/trainer_utils.py +++ b/tensor2tensor/utils/trainer_utils.py @@ -27,7 +27,6 @@ from tensor2tensor.data_generators import problem_hparams from tensor2tensor.models import models # pylint: disable=unused-import from tensor2tensor.utils import data_reader -from tensor2tensor.utils import decoding from tensor2tensor.utils import devices from tensor2tensor.utils import input_fn_builder from tensor2tensor.utils import metrics @@ -101,16 +100,13 @@ flags.DEFINE_string("ps_job", "/job:ps", "name of ps job") flags.DEFINE_integer("ps_replicas", 0, "How many ps replicas.") -# Decode flags -# Set one of {decode_from_dataset, decode_interactive, decode_from_file} to -# decode. -flags.DEFINE_bool("decode_from_dataset", False, "Decode from dataset on disk.") -flags.DEFINE_bool("decode_use_last_position_only", False, - "In inference, use last position only for speedup.") +# Decoding flags +flags.DEFINE_string("decode_from_file", None, "Path to decode file") flags.DEFINE_bool("decode_interactive", False, "Interactive local inference mode.") +flags.DEFINE_bool("decode_use_last_position_only", False, + "In inference, use last position only for speedup.") flags.DEFINE_bool("decode_save_images", False, "Save inference input images.") -flags.DEFINE_string("decode_from_file", None, "Path to decode file") flags.DEFINE_string("decode_to_file", None, "Path to inference output file") flags.DEFINE_integer("decode_shards", 1, "How many shards to decode.") flags.DEFINE_integer("decode_problem_id", 0, "Which problem to decode.") @@ -128,7 +124,7 @@ "Maximum number of ids in input. Or <= 0 for no max.") flags.DEFINE_bool("identity_output", False, "To print the output as identity") flags.DEFINE_integer("decode_num_samples", -1, - "Number of samples to decode. Currently used in" + "Number of samples to decode. Currently used in " "decode_from_dataset. Use -1 for all.") @@ -303,7 +299,6 @@ def run(data_dir, model, output_dir, train_steps, eval_steps, schedule): if exp.train_steps > 0 or exp.eval_steps > 0: tf.logging.info("Performing local training and evaluation.") exp.train_and_evaluate() - decode(exp.estimator) else: # Perform distributed training/evaluation. learn_runner.run( @@ -350,20 +345,3 @@ def session_config(): def get_data_filepatterns(data_dir, mode): return data_reader.get_data_filepatterns(FLAGS.problems, data_dir, mode) - - -def decode(estimator): - if FLAGS.decode_interactive: - decoding.decode_interactively(estimator) - elif FLAGS.decode_from_file is not None and FLAGS.decode_from_file is not "": - decoding.decode_from_file(estimator, FLAGS.decode_from_file) - elif FLAGS.decode_from_dataset: - decoding.decode_from_dataset( - estimator, - FLAGS.problems.split("-"), - return_beams=FLAGS.decode_return_beams, - beam_size=FLAGS.decode_beam_size, - max_predictions=FLAGS.decode_num_samples, - decode_to_file=FLAGS.decode_to_file, - save_images=FLAGS.decode_save_images, - identity_output=FLAGS.identity_output) From 210ff829e697b87810cda80cf71c475450cf7378 Mon Sep 17 00:00:00 2001 From: Francesco Mosconi <git@mosconi.me> Date: Wed, 30 Aug 2017 23:24:37 -0700 Subject: [PATCH 0337/4095] FIX: missing xrange import --- tensor2tensor/utils/data_reader.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tensor2tensor/utils/data_reader.py b/tensor2tensor/utils/data_reader.py index d55911f19..cde91cc7b 100644 --- a/tensor2tensor/utils/data_reader.py +++ b/tensor2tensor/utils/data_reader.py @@ -27,6 +27,7 @@ import six from six.moves import zip # pylint: disable=redefined-builtin +from six.moves import xrange # pylint: disable=redefined-builtin from tensor2tensor.data_generators import problem_hparams from tensor2tensor.data_generators.problem import preprocess_examples_common From e2fc3d55256ab0185b9f67f043aa9153da2e5367 Mon Sep 17 00:00:00 2001 From: Brian Albertalli <brian.albertalli@gmail.com> Date: Sat, 2 Sep 2017 13:17:22 -0400 Subject: [PATCH 0338/4095] Clean up of new_problem.md I don't want to step on anyone's toes, but I think this cleanup of new_problem.md would be a lot easier for new users to understand (no unused imports, consistent class references, etc.) It's a great guide otherwise! --- docs/new_problem.md | 48 ++++++++++++++++++++++----------------------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/docs/new_problem.md b/docs/new_problem.md index c859c6eba..617921910 100644 --- a/docs/new_problem.md +++ b/docs/new_problem.md @@ -17,13 +17,15 @@ For each problem we want to tackle we create a new problem class and register it Since many text2text problems share similar methods, there's already a class called `Text2TextProblem` that extends the base problem class, `Problem` (both found in `problem.py`). -For our problem, we can go ahead and create the file `word2def.py` in the `data_generators` folder and add our new problem, `Word2def`, which extends `TranslateProblem`. Let's also register it while we're at it so we can specify the problem through flags. +For our problem, we can go ahead and create the file `word2def.py` in the `data_generators` folder and add our new problem, `Word2def`, which extends `Text2TextProblem`. Let's also register it while we're at it so we can specify the problem through flags. ```python -@registry.register_problem() +@registry.register_problem class Word2def(problem.Text2TextProblem): """Problem spec for English word to dictionary definition.""" - return NotImplementedError() + @property + def is_character_level(self): + ... ``` We need to implement the following methods from `Text2TextProblem` in our new class: @@ -56,6 +58,8 @@ The number of shards to break data files into. @registry.register_problem() class Word2def(problem.Text2TextProblem): """Problem spec for English word to dictionary definition.""" + + @property def is_character_level(self): return True @@ -87,7 +91,6 @@ We're almost done. `generator` generates the training and evaluation data and st def generator(self, data_dir, tmp_dir, train): character_vocab = text_encoder.ByteTextEncoder() datasets = _WORD2DEF_TRAIN_DATASETS if train else _WORD2DEF_TEST_DATASETS - tag = "train" if train else "dev" return character_generator(datasets[0], datasets[1], character_vocab, EOS) ``` @@ -108,7 +111,6 @@ class Word2def(problem.Text2TextProblem): def generator(self, data_dir, tmp_dir, train): character_vocab = text_encoder.ByteTextEncoder() datasets = _WORD2DEF_TRAIN_DATASETS if train else _WORD2DEF_TEST_DATASETS - tag = "train" if train else "dev" return character_generator(datasets[0], datasets[1], character_vocab, EOS) @property @@ -137,14 +139,13 @@ I've gone ahead and split all words into a train and test set and saved them in ```python # English Word2def datasets _WORD2DEF_TRAIN_DATASETS = [ - [ - "LOCATION_OF_DATA/", ("words_train.txt", "definitions_train.txt") - ] + LOCATION_OF_DATA + 'words_train.txt', + LOCATION_OF_DATA + 'definitions_train.txt' ] + _WORD2DEF_TEST_DATASETS = [ - [ - "LOCATION_OF_DATA", ("words_test.txt", "definitions_test.txt") - ] + LOCATION_OF_DATA + 'words_test.txt', + LOCATION_OF_DATA + 'definitions_test.txt' ] ``` @@ -155,24 +156,14 @@ Now our `word2def.py` file looks like: (with the correct imports) """ Problem definition for word to dictionary definition. """ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - import os -import tarfile # do we need this import -from tensor2tensor.data_generators import generator_utils from tensor2tensor.data_generators import problem from tensor2tensor.data_generators import text_encoder from tensor2tensor.data_generators.wmt import character_generator from tensor2tensor.utils import registry -import tensorflow as tf - -FLAGS = tf.flags.FLAGS - # English Word2def datasets _WORD2DEF_TRAIN_DATASETS = [ LOCATION_OF_DATA+'words_train.txt', @@ -198,7 +189,6 @@ class Word2def(problem.Text2TextProblem): def generator(self, data_dir, tmp_dir, train): character_vocab = text_encoder.ByteTextEncoder() datasets = _WORD2DEF_TRAIN_DATASETS if train else _WORD2DEF_TEST_DATASETS - tag = "train" if train else "dev" return character_generator(datasets[0], datasets[1], character_vocab, EOS) @property @@ -220,7 +210,17 @@ class Word2def(problem.Text2TextProblem): ``` # Hyperparameters -All hyperparamters inherit from `_default_hparams()` in `problem.py.` If you would like to customize your hyperparameters, add another method to the file `problem_hparams.py`. +All hyperparamters inherit from `_default_hparams()` in `problem.py.` If you would like to customize your hyperparameters, register a new hyperparameter set in `word2def.py` like the example provided in the walkthrough. For example: + +```python +from tensor2tensor.models import transformer + +@registry.register_hparams +def word2def_hparams(self): + hparams = transformer.transformer_base_single_gpu() # Or whatever you'd like to build off. + hparams.batch_size = 1024 + return hparams +``` # Run the problem Now that we've gotten our problem set up, let's train a model and generate definitions. @@ -229,7 +229,7 @@ We specify our problem name, the model, and hparams. ```bash PROBLEM=word2def MODEL=transformer -HPARAMS=transofmer_base_single_gpu +HPARAMS=word2def_hparams ``` The rest of the steps are as given in the [walkthrough](walkthrough.md). From 406db600c0acc98138ea8610d48121d7a7faa07e Mon Sep 17 00:00:00 2001 From: Francesco Mosconi <git@mosconi.me> Date: Thu, 7 Sep 2017 15:43:35 -0700 Subject: [PATCH 0339/4095] Added links to files in doc and corrected a few typos (#282) * better documentation with links * fixed line permalink --- README.md | 3 +-- docs/new_problem.md | 12 ++++++------ 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 4e56d7855..58a58aa17 100644 --- a/README.md +++ b/README.md @@ -214,8 +214,7 @@ on the task (e.g. fed through a final linear transform to produce logits for a softmax over classes). All models are imported in [`models.py`](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/models/models.py), inherit from `T2TModel` - defined in -[`t2t_model.py`](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/utils/t2t_model.py) -- and are registered with +[`t2t_model.py`](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/utils/t2t_model.py) - and are registered with [`@registry.register_model`](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/utils/registry.py). ### Hyperparameter Sets diff --git a/docs/new_problem.md b/docs/new_problem.md index 617921910..e69a7dfdb 100644 --- a/docs/new_problem.md +++ b/docs/new_problem.md @@ -15,9 +15,9 @@ Let's add a new dataset together and train the transformer model. We'll be learn For each problem we want to tackle we create a new problem class and register it. Let's call our problem `Word2def`. -Since many text2text problems share similar methods, there's already a class called `Text2TextProblem` that extends the base problem class, `Problem` (both found in `problem.py`). +Since many text2text problems share similar methods, there's already a class called [`Text2TextProblem`](https://github.com/tensorflow/tensor2tensor/blob/master/tensor2tensor/data_generators/problem.py#L354) that extends the base problem class, `Problem` (both found in [`problem.py`](https://github.com/tensorflow/tensor2tensor/blob/master/tensor2tensor/data_generators/problem.py)). -For our problem, we can go ahead and create the file `word2def.py` in the `data_generators` folder and add our new problem, `Word2def`, which extends `Text2TextProblem`. Let's also register it while we're at it so we can specify the problem through flags. +For our problem, we can go ahead and create the file `word2def.py` in the [`data_generators`](https://github.com/tensorflow/tensor2tensor/blob/master/tensor2tensor/data_generators/) folder and add our new problem, `Word2def`, which extends [`Text2TextProblem`](https://github.com/tensorflow/tensor2tensor/blob/24071ba07d5a14c170044c5e60a24bda8179fb7a/tensor2tensor/data_generators/problem.py#L354). Let's also register it while we're at it so we can specify the problem through flags. ```python @registry.register_problem @@ -28,7 +28,7 @@ class Word2def(problem.Text2TextProblem): ... ``` -We need to implement the following methods from `Text2TextProblem` in our new class: +We need to implement the following methods from [`Text2TextProblem`](https://github.com/tensorflow/tensor2tensor/blob/master/tensor2tensor/data_generators/problem.py#L354) in our new class: * is_character_level * targeted_vocab_size * generator @@ -42,7 +42,7 @@ Let's tackle them one by one: **input_space_id, target_space_id, is_character_level, targeted_vocab_size, use_subword_tokenizer**: -SpaceIDs tell Tensor2Tensor what sort of space the input and target tensors are in. These are things like, EN_CHR (English character), EN_TOK (English token), AUDIO_WAV (audio waveform), IMAGE, DNA (genetic bases). The complete list can be found at `data_generators/problem.py` in the class `SpaceID`. +SpaceIDs tell Tensor2Tensor what sort of space the input and target tensors are in. These are things like, EN_CHR (English character), EN_TOK (English token), AUDIO_WAV (audio waveform), IMAGE, DNA (genetic bases). The complete list can be found at [`data_generators/problem.py`](https://github.com/tensorflow/tensor2tensor/blob/master/tensor2tensor/data_generators/problem.py) in the class `SpaceID`. Since we're generating definitions and feeding in words at the character level, we set `is_character_level` to true, and use the same SpaceID, EN_CHR, for both input and target. Additionally, since we aren't using tokens, we don't need to give a `targeted_vocab_size` or define `use_subword_tokenizer`. @@ -86,7 +86,7 @@ class Word2def(problem.Text2TextProblem): **generator**: -We're almost done. `generator` generates the training and evaluation data and stores them in files like "word2def_train.lang1" in your DATA_DIR. Thankfully several commonly used methods like `character_generator`, and `token_generator` are already written in the file `wmt.py`. We will import `character_generator` and write: +We're almost done. `generator` generates the training and evaluation data and stores them in files like "word2def_train.lang1" in your DATA_DIR. Thankfully several commonly used methods like `character_generator`, and `token_generator` are already written in the file [`wmt.py`](https://github.com/tensorflow/tensor2tensor/blob/master/tensor2tensor/data_generators/wmt.py). We will import `character_generator` and [`text_encoder`](https://github.com/tensorflow/tensor2tensor/blob/master/tensor2tensor/data_generators/text_encoder.py) to write: ```python def generator(self, data_dir, tmp_dir, train): character_vocab = text_encoder.ByteTextEncoder() @@ -151,7 +151,7 @@ _WORD2DEF_TEST_DATASETS = [ ## Putting it all together -Now our `word2def.py` file looks like: (with the correct imports) +Now our `word2def.py` file looks like: ```python """ Problem definition for word to dictionary definition. """ From 8f83adf8cc916ded00da8f6f300ce3459f76917b Mon Sep 17 00:00:00 2001 From: Edward Misback <elmisback@users.noreply.github.com> Date: Thu, 7 Sep 2017 18:43:58 -0400 Subject: [PATCH 0340/4095] Fix module docstring typo (#274) --- tensor2tensor/visualization/attention.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensor2tensor/visualization/attention.py b/tensor2tensor/visualization/attention.py index bc4238081..6109f9cc6 100644 --- a/tensor2tensor/visualization/attention.py +++ b/tensor2tensor/visualization/attention.py @@ -15,7 +15,7 @@ """Module for postprocessing and displaying tranformer attentions. -This module is deigned to be called from an ipython notebook. +This module is designed to be called from an ipython notebook. """ import json From 594da6340fc814743a2b7b8dd545a9965e036241 Mon Sep 17 00:00:00 2001 From: T2T Team <no-reply@google.com> Date: Wed, 30 Aug 2017 07:21:14 -0700 Subject: [PATCH 0341/4095] internal. PiperOrigin-RevId: 166990178 --- README.md | 3 +- docs/new_problem.md | 58 ++++++++++++------------ tensor2tensor/utils/data_reader.py | 1 - tensor2tensor/visualization/attention.py | 2 +- 4 files changed, 32 insertions(+), 32 deletions(-) diff --git a/README.md b/README.md index 58a58aa17..4e56d7855 100644 --- a/README.md +++ b/README.md @@ -214,7 +214,8 @@ on the task (e.g. fed through a final linear transform to produce logits for a softmax over classes). All models are imported in [`models.py`](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/models/models.py), inherit from `T2TModel` - defined in -[`t2t_model.py`](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/utils/t2t_model.py) - and are registered with +[`t2t_model.py`](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/utils/t2t_model.py) +- and are registered with [`@registry.register_model`](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/utils/registry.py). ### Hyperparameter Sets diff --git a/docs/new_problem.md b/docs/new_problem.md index e69a7dfdb..c859c6eba 100644 --- a/docs/new_problem.md +++ b/docs/new_problem.md @@ -15,20 +15,18 @@ Let's add a new dataset together and train the transformer model. We'll be learn For each problem we want to tackle we create a new problem class and register it. Let's call our problem `Word2def`. -Since many text2text problems share similar methods, there's already a class called [`Text2TextProblem`](https://github.com/tensorflow/tensor2tensor/blob/master/tensor2tensor/data_generators/problem.py#L354) that extends the base problem class, `Problem` (both found in [`problem.py`](https://github.com/tensorflow/tensor2tensor/blob/master/tensor2tensor/data_generators/problem.py)). +Since many text2text problems share similar methods, there's already a class called `Text2TextProblem` that extends the base problem class, `Problem` (both found in `problem.py`). -For our problem, we can go ahead and create the file `word2def.py` in the [`data_generators`](https://github.com/tensorflow/tensor2tensor/blob/master/tensor2tensor/data_generators/) folder and add our new problem, `Word2def`, which extends [`Text2TextProblem`](https://github.com/tensorflow/tensor2tensor/blob/24071ba07d5a14c170044c5e60a24bda8179fb7a/tensor2tensor/data_generators/problem.py#L354). Let's also register it while we're at it so we can specify the problem through flags. +For our problem, we can go ahead and create the file `word2def.py` in the `data_generators` folder and add our new problem, `Word2def`, which extends `TranslateProblem`. Let's also register it while we're at it so we can specify the problem through flags. ```python -@registry.register_problem +@registry.register_problem() class Word2def(problem.Text2TextProblem): """Problem spec for English word to dictionary definition.""" - @property - def is_character_level(self): - ... + return NotImplementedError() ``` -We need to implement the following methods from [`Text2TextProblem`](https://github.com/tensorflow/tensor2tensor/blob/master/tensor2tensor/data_generators/problem.py#L354) in our new class: +We need to implement the following methods from `Text2TextProblem` in our new class: * is_character_level * targeted_vocab_size * generator @@ -42,7 +40,7 @@ Let's tackle them one by one: **input_space_id, target_space_id, is_character_level, targeted_vocab_size, use_subword_tokenizer**: -SpaceIDs tell Tensor2Tensor what sort of space the input and target tensors are in. These are things like, EN_CHR (English character), EN_TOK (English token), AUDIO_WAV (audio waveform), IMAGE, DNA (genetic bases). The complete list can be found at [`data_generators/problem.py`](https://github.com/tensorflow/tensor2tensor/blob/master/tensor2tensor/data_generators/problem.py) in the class `SpaceID`. +SpaceIDs tell Tensor2Tensor what sort of space the input and target tensors are in. These are things like, EN_CHR (English character), EN_TOK (English token), AUDIO_WAV (audio waveform), IMAGE, DNA (genetic bases). The complete list can be found at `data_generators/problem.py` in the class `SpaceID`. Since we're generating definitions and feeding in words at the character level, we set `is_character_level` to true, and use the same SpaceID, EN_CHR, for both input and target. Additionally, since we aren't using tokens, we don't need to give a `targeted_vocab_size` or define `use_subword_tokenizer`. @@ -58,8 +56,6 @@ The number of shards to break data files into. @registry.register_problem() class Word2def(problem.Text2TextProblem): """Problem spec for English word to dictionary definition.""" - - @property def is_character_level(self): return True @@ -86,11 +82,12 @@ class Word2def(problem.Text2TextProblem): **generator**: -We're almost done. `generator` generates the training and evaluation data and stores them in files like "word2def_train.lang1" in your DATA_DIR. Thankfully several commonly used methods like `character_generator`, and `token_generator` are already written in the file [`wmt.py`](https://github.com/tensorflow/tensor2tensor/blob/master/tensor2tensor/data_generators/wmt.py). We will import `character_generator` and [`text_encoder`](https://github.com/tensorflow/tensor2tensor/blob/master/tensor2tensor/data_generators/text_encoder.py) to write: +We're almost done. `generator` generates the training and evaluation data and stores them in files like "word2def_train.lang1" in your DATA_DIR. Thankfully several commonly used methods like `character_generator`, and `token_generator` are already written in the file `wmt.py`. We will import `character_generator` and write: ```python def generator(self, data_dir, tmp_dir, train): character_vocab = text_encoder.ByteTextEncoder() datasets = _WORD2DEF_TRAIN_DATASETS if train else _WORD2DEF_TEST_DATASETS + tag = "train" if train else "dev" return character_generator(datasets[0], datasets[1], character_vocab, EOS) ``` @@ -111,6 +108,7 @@ class Word2def(problem.Text2TextProblem): def generator(self, data_dir, tmp_dir, train): character_vocab = text_encoder.ByteTextEncoder() datasets = _WORD2DEF_TRAIN_DATASETS if train else _WORD2DEF_TEST_DATASETS + tag = "train" if train else "dev" return character_generator(datasets[0], datasets[1], character_vocab, EOS) @property @@ -139,31 +137,42 @@ I've gone ahead and split all words into a train and test set and saved them in ```python # English Word2def datasets _WORD2DEF_TRAIN_DATASETS = [ - LOCATION_OF_DATA + 'words_train.txt', - LOCATION_OF_DATA + 'definitions_train.txt' + [ + "LOCATION_OF_DATA/", ("words_train.txt", "definitions_train.txt") + ] ] - _WORD2DEF_TEST_DATASETS = [ - LOCATION_OF_DATA + 'words_test.txt', - LOCATION_OF_DATA + 'definitions_test.txt' + [ + "LOCATION_OF_DATA", ("words_test.txt", "definitions_test.txt") + ] ] ``` ## Putting it all together -Now our `word2def.py` file looks like: +Now our `word2def.py` file looks like: (with the correct imports) ```python """ Problem definition for word to dictionary definition. """ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + import os +import tarfile # do we need this import +from tensor2tensor.data_generators import generator_utils from tensor2tensor.data_generators import problem from tensor2tensor.data_generators import text_encoder from tensor2tensor.data_generators.wmt import character_generator from tensor2tensor.utils import registry +import tensorflow as tf + +FLAGS = tf.flags.FLAGS + # English Word2def datasets _WORD2DEF_TRAIN_DATASETS = [ LOCATION_OF_DATA+'words_train.txt', @@ -189,6 +198,7 @@ class Word2def(problem.Text2TextProblem): def generator(self, data_dir, tmp_dir, train): character_vocab = text_encoder.ByteTextEncoder() datasets = _WORD2DEF_TRAIN_DATASETS if train else _WORD2DEF_TEST_DATASETS + tag = "train" if train else "dev" return character_generator(datasets[0], datasets[1], character_vocab, EOS) @property @@ -210,17 +220,7 @@ class Word2def(problem.Text2TextProblem): ``` # Hyperparameters -All hyperparamters inherit from `_default_hparams()` in `problem.py.` If you would like to customize your hyperparameters, register a new hyperparameter set in `word2def.py` like the example provided in the walkthrough. For example: - -```python -from tensor2tensor.models import transformer - -@registry.register_hparams -def word2def_hparams(self): - hparams = transformer.transformer_base_single_gpu() # Or whatever you'd like to build off. - hparams.batch_size = 1024 - return hparams -``` +All hyperparamters inherit from `_default_hparams()` in `problem.py.` If you would like to customize your hyperparameters, add another method to the file `problem_hparams.py`. # Run the problem Now that we've gotten our problem set up, let's train a model and generate definitions. @@ -229,7 +229,7 @@ We specify our problem name, the model, and hparams. ```bash PROBLEM=word2def MODEL=transformer -HPARAMS=word2def_hparams +HPARAMS=transofmer_base_single_gpu ``` The rest of the steps are as given in the [walkthrough](walkthrough.md). diff --git a/tensor2tensor/utils/data_reader.py b/tensor2tensor/utils/data_reader.py index cde91cc7b..d55911f19 100644 --- a/tensor2tensor/utils/data_reader.py +++ b/tensor2tensor/utils/data_reader.py @@ -27,7 +27,6 @@ import six from six.moves import zip # pylint: disable=redefined-builtin -from six.moves import xrange # pylint: disable=redefined-builtin from tensor2tensor.data_generators import problem_hparams from tensor2tensor.data_generators.problem import preprocess_examples_common diff --git a/tensor2tensor/visualization/attention.py b/tensor2tensor/visualization/attention.py index 6109f9cc6..bc4238081 100644 --- a/tensor2tensor/visualization/attention.py +++ b/tensor2tensor/visualization/attention.py @@ -15,7 +15,7 @@ """Module for postprocessing and displaying tranformer attentions. -This module is designed to be called from an ipython notebook. +This module is deigned to be called from an ipython notebook. """ import json From 98f55734aa8f49aa00aec5cb27a90887e96b5682 Mon Sep 17 00:00:00 2001 From: Etienne Pot <epot@google.com> Date: Wed, 30 Aug 2017 19:48:53 -0700 Subject: [PATCH 0342/4095] Add some logging/debug messages. Remove padding for all layers when local experts (both attention and fc) PiperOrigin-RevId: 167086679 --- tensor2tensor/layers/common_attention.py | 34 +++++++-- tensor2tensor/models/attention_lm_moe.py | 94 ++++++++++++++++++++++-- tensor2tensor/utils/expert_utils.py | 15 ---- 3 files changed, 117 insertions(+), 26 deletions(-) diff --git a/tensor2tensor/layers/common_attention.py b/tensor2tensor/layers/common_attention.py index 253e9bee5..975ed94ae 100644 --- a/tensor2tensor/layers/common_attention.py +++ b/tensor2tensor/layers/common_attention.py @@ -33,6 +33,9 @@ from tensorflow.python.framework import function +_expert_count = 0 + + def add_timing_signal_1d(x, min_timescale=1.0, max_timescale=1.0e4): """Adds a bunch of sinusoids of different frequencies to a Tensor. @@ -1007,9 +1010,22 @@ def self_attention_expert( expert_fn=functools.partial(self_attention_expert, mask_right=) ) """ + depth = x.get_shape().as_list()[-1] length = tf.shape(batch_coordinate)[0] + # Print a warning message if one of the expert isn't used (useful at + # inference where summaries aren't used and the gating function don't add + # noise) + global _expert_count # Hack to make each expert have a unique id + _expert_count += 1 + length = tf.cond( + tf.equal(length, 0), + lambda: tf.Print( # pylint: disable=g-long-lambda + length, [length], "Expert {} empty: ".format(_expert_count)), + lambda: length, + ) + tf.summary.scalar("batch_size", length, family="experts_stats_batch_size") attention_kq_size = attention_kq_size or depth @@ -1063,7 +1079,7 @@ def local_expert_attention( loss_coef, attention_num_experts, train=True, - pad_remover=None, + batch_coordinate=None, **kwargs ): """Attention using a mixture of experts. @@ -1072,23 +1088,30 @@ def local_expert_attention( The mixture of experts is "local" in that it is replicated on each datashard. + local_moe flatten all batches so to avoid problems with padding (ex: all + padding going to the same expert, self attention attending to non null + padding tokens,...), the padding should be removed before. + Args: - x: a Tensor with shape [batch, length, depth] + x: a Tensor with shape [batch, length, depth] or [1, batch*length, depth] k: The number of experts to dispatch each example to loss_coef: a scalar. A multiplier for the expert loss attention_num_experts: The number of experts to use train: a boolean for the current mode - pad_remover (PadRemover): A util object containing the padding position + batch_coordinate (tf.Tensor): int32 tensor of shape [1, batch*length, 1] + containing the batch ids. If None, deduced from first dim of x. **kwargs: Arguments to forward to self_attention_expert Returns: y: a Tensor with shape [batch, length, depth] loss: a Scalar """ + if batch_coordinate is None: + batch_coordinate = tf.expand_dims( + coordinate_tensor(tf.shape(x)[:-1], axis=0), axis=-1) with tf.variable_scope("local_expert_attention"): additional_dispatch_params = { - "batch_coordinate": tf.expand_dims( - coordinate_tensor(tf.shape(x)[:-1], axis=0), axis=-1) + "batch_coordinate": batch_coordinate } return expert_utils.local_moe( x, @@ -1100,7 +1123,6 @@ def local_expert_attention( pass_x=True, pass_gates=False, additional_dispatch_params=additional_dispatch_params, - pad_remover=pad_remover ) diff --git a/tensor2tensor/models/attention_lm_moe.py b/tensor2tensor/models/attention_lm_moe.py index 3b72ea9c2..191d4aa04 100644 --- a/tensor2tensor/models/attention_lm_moe.py +++ b/tensor2tensor/models/attention_lm_moe.py @@ -25,6 +25,8 @@ from __future__ import division from __future__ import print_function +import functools + # Dependency imports from six.moves import xrange # pylint: disable=redefined-builtin @@ -40,6 +42,9 @@ import tensorflow as tf +ModeKeys = tf.contrib.learn.ModeKeys # pylint: disable=invalid-name + + class AttentionType(object): MULTIHEAD = "multihead" LOCAL_EXPERTS = "local_experts" @@ -90,6 +95,37 @@ def _diet_expert(x): expert_fn = expert_utils.ffn_expert_fn( hparams.hidden_size, moe_hidden_sizes, hparams.hidden_size) + if hparams.attention_type == AttentionType.LOCAL_EXPERTS: + # As preprocess and postprocess are called with batch of size one (all + # batches concatenated), we just make sure that batch_norm is not use ( + # should not either way) + assert hparams.norm_type != "batch" + + dp_remove_pad = functools.partial( + dp, remove_pad, pad_remover=pad_remover, mode=hparams.mode) + dp_restore_pad = functools.partial( + dp, restore_pad, ref_x=x, pad_remover=pad_remover, mode=hparams.mode) + elif (hparams.attention_type == AttentionType.MULTIHEAD or + hparams.attention_type == AttentionType.MEMORY_EFFICIENT): + # Using identity function: No effect + dp_remove_pad = lambda x: (x, None) + dp_restore_pad = lambda x: x + else: + raise ValueError("Only {} supported for now.".format( + AttentionType.get_choices())) + + def print_shape(x, suffix): + # To help debugging, print the input/output shapes at inference and eval + # Inference for long sequences can take a long time, so that's help to + # see the progession of the generation + if hparams.mode == ModeKeys.TRAIN: + return x + return tf.Print(x, [tf.shape(x)], "shape_x_{}".format(suffix)) + + x = dp(print_shape, x, "in") + x, batch_coordinate = dp_remove_pad(x) + x = dp(print_shape, x, "in_flat") + for layer in xrange(hparams.num_hidden_layers): with tf.variable_scope("layer_%d" % layer): with tf.variable_scope( @@ -118,11 +154,11 @@ def _diet_expert(x): y, loss = dp( common_attention.local_expert_attention, preprocess(x), - k=2, + k=hparams.attention_moe_k, loss_coef=hparams.attention_load_balance, attention_num_experts=hparams.attention_num_experts, - train=hparams.mode == tf.contrib.learn.ModeKeys.TRAIN, - pad_remover=pad_remover, + train=hparams.mode == ModeKeys.TRAIN, + batch_coordinate=batch_coordinate, mask_right=True, attention_kq_size=hparams.attention_kq_size, attention_v_size=hparams.attention_v_size) @@ -138,7 +174,7 @@ def _diet_expert(x): dp, self._ps_devices, preprocess(x), - hparams.mode == tf.contrib.learn.ModeKeys.TRAIN, + hparams.mode == ModeKeys.TRAIN, input_size=hparams.hidden_size, expert_fn=expert_fn, num_experts=hparams.moe_num_experts, @@ -160,6 +196,9 @@ def _diet_expert(x): dropout=hparams.relu_dropout) x = postprocess(x, y) x = preprocess(x) + + x = dp_restore_pad(x) + decoder_output = dp(tf.expand_dims, x, 2) return decoder_output, extra_loss @@ -187,12 +226,56 @@ def attention_lm_moe_prepare_decoder(targets, hparams): else: decoder_self_attention_bias = ( common_attention.attention_bias_lower_triangle(tf.shape(targets)[1])) + # TODO(epot): The padding remover should take into account that the input is + # shifted. decoder_input = common_layers.shift_left_3d(targets) if hparams.pos == "timing": decoder_input = common_attention.add_timing_signal_1d(decoder_input) return (decoder_input, decoder_self_attention_bias, pad_remover) +def remove_pad(x, pad_remover, mode): + """Remove padding by concatenating all dimension into one. + + Args: + x (tf.Tensor): input of shape [batch_size, length, depth] + pad_remover (obj): a PadRemover object + mode (ModeKeys): infer, train or eval. If inference, the padding remover is + not applied + + Returns: + tf.Tensor of shape [1,length_nonpad,depth] where + length_nonpad <= batch_size*length + """ + # Compute the batch coordinate before flattening all batches + batch_coordinate = tf.expand_dims( + common_attention.coordinate_tensor(tf.shape(x)[:-1], axis=0), axis=-1) + batch_coordinate = expert_utils.flatten_all_but_last(batch_coordinate) + + # Concatenate all tokens (without padding) + x = expert_utils.flatten_all_but_last(x) + + # Remove padding for training and eval + if mode != ModeKeys.INFER: + # This is a hack to allows inference when the <go> token + # is detected as padding and removed. This works for now because there is + # no padding at inference. + batch_coordinate = pad_remover.remove(batch_coordinate) + x = pad_remover.remove(x) + + batch_coordinate = tf.expand_dims(batch_coordinate, axis=0) + x = tf.expand_dims(x, axis=0) # Now batch_size=1 + return x, batch_coordinate + + +def restore_pad(x, ref_x, pad_remover, mode): + x = tf.squeeze(x, axis=0) + if mode != ModeKeys.INFER: + x = pad_remover.restore(x) + x = expert_utils.reshape_like(x, ref_x) + return x + + @registry.register_hparams def attention_lm_moe_base(): """Set of hyperparameters. @@ -238,6 +321,7 @@ def attention_lm_moe_base(): hparams.add_hparam("moe_layers", "2") # comma separated list of layer numbers # moe params. local attention moe. hparams.add_hparam("attention_type", AttentionType.MULTIHEAD) + hparams.add_hparam("attention_moe_k", 2) hparams.add_hparam("attention_num_experts", 16) # Key, query and value dimensions for the attention hparams.add_hparam("attention_kq_size", 128) @@ -256,7 +340,7 @@ def attention_lm_moe_base_ae(): hparams.attention_type = AttentionType.LOCAL_EXPERTS hparams.max_length = hparams.batch_size hparams.eval_drop_long_sequences = int(True) - hparams.batching_mantissa_bits = 2 # More buckets + hparams.min_length_bucket = 256 # Avoid cyclic problems for big batches hparams.learning_rate = 0.05 hparams.learning_rate_warmup_steps = 10000 return hparams diff --git a/tensor2tensor/utils/expert_utils.py b/tensor2tensor/utils/expert_utils.py index fb1d1fac0..16820ff37 100644 --- a/tensor2tensor/utils/expert_utils.py +++ b/tensor2tensor/utils/expert_utils.py @@ -847,7 +847,6 @@ def local_moe(x, pass_x=True, pass_gates=False, additional_dispatch_params=None, - pad_remover=None, name=None): """Call a local mixture of experts. @@ -864,8 +863,6 @@ def local_moe(x, additional_dispatch_params: The extra tensors that need to be sent to each expert. Examples include batch batch coordinates (see common_attention.local_expert_attention) - pad_remover (PadRemover): If given, the padding is removed/restored before - sending to the experts name: a string Returns: @@ -879,14 +876,6 @@ def local_moe(x, with tf.variable_scope(name, default_name="local_moe"): x_flat = flatten_all_but_last(x) - # Remove the padding tokens - if pad_remover: - x_flat = pad_remover.remove(x_flat) - tf.summary.scalar( # Should match the targets_nonpadding_tokens - "nonpadding_tokens", - tf.shape(x_flat)[0], - family="experts_stats") - # The gates indicate which batch elements go to which tensors. # load is a measure of approximately how many examples go to each expert gates, load = noisy_top_k_gating( @@ -908,16 +897,12 @@ def local_moe(x, expert_kwargs["gates"] = dispatcher.expert_to_gates() for k, v in six.iteritems(additional_dispatch_params or {}): v = flatten_all_but_last(v) - if pad_remover: - v = pad_remover.remove(v) expert_kwargs[k] = dispatcher.dispatch(v) ep = Parallelism([DEFAULT_DEV_STRING] * num_experts) expert_outputs = ep(expert_fn, **expert_kwargs) y_flat = dispatcher.combine(expert_outputs) - if pad_remover: - y_flat = pad_remover.restore(y_flat) y = reshape_like(y_flat, x) importance = tf.reduce_sum(gates, 0) From 473089b113e7644aa9b3a7f8794f237d3f41e24f Mon Sep 17 00:00:00 2001 From: Lukasz Kaiser <lukaszkaiser@google.com> Date: Wed, 30 Aug 2017 20:20:07 -0700 Subject: [PATCH 0343/4095] Correct CNN+DailyMail generator, make TransforerAE work with 2d input. PiperOrigin-RevId: 167088556 --- .../data_generators/cnn_dailymail.py | 4 +- tensor2tensor/layers/modalities.py | 45 ++++--------- tensor2tensor/models/cycle_gan.py | 66 +------------------ tensor2tensor/models/transformer_vae.py | 46 +++++++++---- 4 files changed, 48 insertions(+), 113 deletions(-) diff --git a/tensor2tensor/data_generators/cnn_dailymail.py b/tensor2tensor/data_generators/cnn_dailymail.py index db4deae4e..93e846a0b 100644 --- a/tensor2tensor/data_generators/cnn_dailymail.py +++ b/tensor2tensor/data_generators/cnn_dailymail.py @@ -53,8 +53,8 @@ def _maybe_download_corpora(tmp_dir): filepath of the downloaded corpus file. """ cnn_filename = "cnn_stories.tgz" - dailymail_filename = "dailymail_stories.tgz" cnn_finalpath = os.path.join(tmp_dir, "cnn/stories/") + dailymail_filename = "dailymail_stories.tgz" dailymail_finalpath = os.path.join(tmp_dir, "dailymail/stories/") if not tf.gfile.Exists(cnn_finalpath): cnn_file = generator_utils.maybe_download_from_drive( @@ -63,7 +63,7 @@ def _maybe_download_corpora(tmp_dir): cnn_tar.extractall(tmp_dir) if not tf.gfile.Exists(dailymail_finalpath): dailymail_file = generator_utils.maybe_download_from_drive( - tmp_dir, dailymail_filename, _CNN_STORIES_DRIVE_URL) + tmp_dir, dailymail_filename, _DAILYMAIL_STORIES_DRIVE_URL) with tarfile.open(dailymail_file, "r:gz") as dailymail_tar: dailymail_tar.extractall(tmp_dir) return [cnn_finalpath, dailymail_finalpath] diff --git a/tensor2tensor/layers/modalities.py b/tensor2tensor/layers/modalities.py index e03e6835e..c93a05433 100644 --- a/tensor2tensor/layers/modalities.py +++ b/tensor2tensor/layers/modalities.py @@ -141,17 +141,11 @@ def top_dimensionality(self): def bottom(self, inputs): with tf.variable_scope(self.name): inputs = common_layers.standardize_images(inputs) - # TODO(lukaszkaiser): summaries here don't work in multi-problem case yet. - # tf.summary.image("inputs", inputs, max_outputs=2) - if self._model_hparams.compress_steps > 0: - strides = (2, 2) - else: - strides = (1, 1) + tf.summary.image("inputs", inputs, max_outputs=2) return common_layers.conv_block( inputs, self._body_input_depth, [((1, 1), (3, 3))], first_relu=False, - strides=strides, padding="SAME", force2d=True, name="small_image_conv") @@ -159,43 +153,26 @@ def bottom(self, inputs): def targets_bottom(self, inputs): with tf.variable_scope(self.name): # Reshape inputs to 2-d tensor and embed the RGB pixel values. + shape = tf.shape(inputs) inputs = common_layers.flatten4d3d(inputs) ret = common_layers.embedding( - inputs, + tf.to_int32(inputs), self.top_dimensionality, self._body_input_depth, name="input_rgb_embedding") if self._model_hparams.multiply_embedding_mode == "sqrt_depth": ret *= self._body_input_depth**0.5 - return ret + ret = tf.reshape(ret, [shape[0], shape[1], shape[2], + self._body_input_depth * 3]) + return tf.layers.dense(ret, self._body_input_depth) def top(self, body_output, _): with tf.variable_scope("rgb_softmax"): - # separate embedding for each channel - # assuming the body output returns a tensor of shape - # [batch_size, rows, cols, channels, self._body_input_depth] - body_output_split = tf.split(body_output, self._channels, axis=3) - output_rgb_embedding_var = tf.get_variable( - "output_rgb_embedding", - [self._channels, self.top_dimensionality, self._body_input_depth], - initializer=tf.random_normal_initializer(0.0, self._body_input_depth - **-0.5)) - # compute logits separately for each channel - rgb_channel_logits = [] - for i in self._channels: - shape = tf.shape(body_output_split[i])[:-1] - body_output = tf.reshape(body_output_split[i], - [-1, self._body_input_depth]) - channel_logits = tf.matmul( - body_output, output_rgb_embedding_var[i], transpose_b=True) - rgb_channel_logits.append( - tf.reshape(channel_logits, - tf.concat([shape, [self.top_dimensionality]], 0))) - - logits = tf.concat(rgb_channel_logits, axis=3) - # Reshape logits to conform to CIFAR image shapes (32 by 32 by 3) - - return logits + shape = tf.shape(body_output) + dim = body_output.get_shape().as_list()[-1] // 3 + out = tf.reshape(body_output, [shape[0], shape[1], shape[2], + self._channels, dim]) + return tf.layers.dense(out, self.top_dimensionality) def loss(self, top_out, targets, weights_fn=common_layers.weights_all): # Call the default implementation, but weight 1.0 on 0s by default. diff --git a/tensor2tensor/models/cycle_gan.py b/tensor2tensor/models/cycle_gan.py index c17becbbe..4cf1a5871 100644 --- a/tensor2tensor/models/cycle_gan.py +++ b/tensor2tensor/models/cycle_gan.py @@ -124,74 +124,10 @@ def model_fn_body(self, features): self._hparams) -def cycle_vae_gan_internal(inputs, targets, _, hparams): - """Cycle GAN, main step used for training.""" - with tf.variable_scope("cycle_vae_gan"): - # Embed inputs and targets. - inputs_orig, targets_orig = tf.to_int32(inputs), tf.to_int32(targets) - k = 2**hparams.num_compress_steps - inputs_orig, targets_orig = common_layers.pad_to_same_length( - inputs_orig, targets_orig, final_length_divisible_by=k) - inputs = common_layers.embedding( - inputs_orig, hparams.vocab_size, hparams.hidden_size, "embed") - targets = common_layers.embedding( - targets_orig, hparams.vocab_size, hparams.hidden_size, - "embed", reuse=True) - - # Split the batch into input-input and target-target parts. - inputs1, _ = split_on_batch(inputs) - _, targets2 = split_on_batch(targets) - - # Input-input part. - inp1_back, kl_loss1, inp1_mu, inp1_log_sigma = transformer_vae.vae_compress( - inputs1, None, hparams, "inp2hyp", "hyp2inp") - inp1_hyp = tf.concat([inp1_mu, inp1_log_sigma], axis=3) - - # Target-target part. - tgt2_back, kl_loss2, tgt2_mu, tgt2_log_sigma = transformer_vae.vae_compress( - targets2, None, hparams, "tgt2hyp", "hyp2tgt") - tgt2_hyp = tf.concat([tgt2_mu, tgt2_log_sigma], axis=3) - - # Reconstruction losses. - inp1_orig, _ = split_on_batch(inputs_orig) - _, tgt2_orig = split_on_batch(targets_orig) - inp1_loss = reconstruct_loss( - inp1_back, tf.squeeze(inp1_orig, axis=3), hparams) - tgt2_loss = reconstruct_loss( - tgt2_back, tf.squeeze(tgt2_orig, axis=3), hparams, reuse=True) - - # Discriminator loss. - dloss = discriminate_loss(inp1_hyp, tgt2_hyp, False, hparams, "dloss") - - # Reconstruct targets from inputs. - tgt, _, _, _ = transformer_vae.vae_compress( - inputs, None, hparams, "inp2hyp", "hyp2tgt", reuse=True) - tgt = tf.layers.dense(tgt, hparams.vocab_size, name="softmax", reuse=True) - # We use the reconstruction only for tracking progress, no gradients here! - tgt = tf.stop_gradient(tf.expand_dims(tgt, axis=2)) - - kl_rev_decay = common_layers.inverse_exp_decay(hparams.kl_warmup_steps) - losses = {"input_input": hparams.cycle_loss_multiplier * inp1_loss, - "target_target": hparams.cycle_loss_multiplier * tgt2_loss, - "input_kl": kl_loss1 * kl_rev_decay * 15.0, - "target_kl": kl_loss2 * kl_rev_decay * 15.0, - "discriminator": dloss} - return tgt, losses - - -@registry.register_model -class CycleVaeGAN(t2t_model.T2TModel): - - def model_fn_body(self, features): - return cycle_vae_gan_internal( - features["inputs"], features["targets"], features["target_space_id"], - self._hparams) - - @registry.register_hparams def cycle_gan_small(): """Set of hyperparameters.""" - hparams = transformer_vae.transformer_vae_small() + hparams = transformer_vae.transformer_ae_small() hparams.batch_size = 2048 hparams.input_modalities = "inputs:symbol:identity" hparams.target_modality = "symbol:identity" diff --git a/tensor2tensor/models/transformer_vae.py b/tensor2tensor/models/transformer_vae.py index 1c566e996..025f8d631 100644 --- a/tensor2tensor/models/transformer_vae.py +++ b/tensor2tensor/models/transformer_vae.py @@ -253,18 +253,25 @@ def ae_decompress(z, ae, x, is_2d, hparams, name, reuse=None): # Decompress. d = z + k = (3, 3) if is_2d else (3, 1) for i in xrange(hparams.num_compress_steps): j = hparams.num_compress_steps - i - 1 - d = residual_conv(d, 1, (3, 1), hparams, "decompress_rc_%d" % j) + d = residual_conv(d, 1, k, hparams, "decompress_rc_%d" % j) d = decompress_step(d, None, hparams, i > 0, is_2d, "decompress_%d" % j) - k = 2**hparams.num_compress_steps - z_batch = tf.reshape(z, [-1, 1, 1, hparams.hidden_size]) - x_batch = tf.reshape(x, [-1, k, 1, hparams.hidden_size]) - d_batch = tf.reshape(d, [-1, k, 1, hparams.hidden_size]) - dec_batch = decode(z_batch, d_batch, x_batch, None, None, hparams) - z = tf.reshape(dec_batch, [-1, tf.shape(x)[1], 1, hparams.hidden_size]) - + # Autoregressive part. + if not is_2d: # Currently we don't do it autoregressively for 2d problems. + k = 2**(hparams.num_compress_steps * (2 if is_2d else 1)) + z_batch = tf.reshape(z, [-1, 1, 1, hparams.hidden_size]) + x_batch = tf.reshape(x, [-1, k, 1, hparams.hidden_size]) + d_batch = tf.reshape(d, [-1, k, 1, hparams.hidden_size]) + dec_batch = decode(z_batch, d_batch, x_batch, None, None, hparams) + else: # For non-autoregressive. + dec_batch = d + z = tf.reshape(dec_batch, [-1, tf.shape(x)[1], tf.shape(x)[2], + hparams.hidden_size]) + if is_2d: + z = tf.layers.dense(z, hparams.hidden_size * 3) return z @@ -286,11 +293,14 @@ def ae_transformer_internal(inputs, targets, target_space, hparams): inputs, ed = encode(inputs, target_space, hparams, "input_enc") # Compress and ae. - ae, hot, kl = ae_compress(targets, False, hparams, "ae") + ae, hot, kl = ae_compress(targets, hparams.is_2d, hparams, "ae") + tf.summary.histogram("hot", tf.reshape(tf.argmax(hot, axis=-1), [-1])) emb = ae_embed(hot, hparams, "ae", reuse=True) # Compress context and run autoregressive decoder on emb-hot. - dec_c = decode(None, None, emb, inputs, ed, hparams) + emb_flat = tf.expand_dims(common_layers.flatten4d3d(emb), axis=2) + dec_c = decode(None, None, emb_flat, inputs, ed, hparams) + dec_c = tf.reshape(dec_c, tf.shape(emb)) c_z = tf.layers.dense(dec_c, hparams.v_size, name="mask_context") reconstruct_loss = tf.nn.softmax_cross_entropy_with_logits( labels=hot, logits=c_z) @@ -299,8 +309,8 @@ def ae_transformer_internal(inputs, targets, target_space, hparams): hot = tf.one_hot(tf.argmax(c_z, axis=-1), hparams.v_size) # Decompress, pass for ae loss. - z = ae_decompress(emb, ae, targets, False, hparams, "ae") - kl *= common_layers.inverse_exp_decay(int(hparams.startup_steps * 0.5)) + z = ae_decompress(emb, ae, targets, hparams.is_2d, hparams, "ae") + kl *= common_layers.inverse_exp_decay(int(hparams.startup_steps * 0.8)) reconstruct_loss *= common_layers.inverse_exp_decay(hparams.startup_steps) losses = {"kl": kl, "reconstruction": reconstruct_loss} return z, losses @@ -365,6 +375,18 @@ def transformer_ae_small(): hparams.add_hparam("startup_steps", 30000) hparams.add_hparam("kmeans_lr_factor", 0.002) hparams.add_hparam("z_dropout", 0.1) + hparams.add_hparam("is_2d", 0) + return hparams + + +@registry.register_hparams +def transformer_ae_cifar(): + hparams = transformer_ae_small() + hparams.batch_size = 1024 * 16 + hparams.num_compress_steps = 2 + hparams.v_size = 1024 * 16 + hparams.startup_steps = 120000 + hparams.is_2d = 1 return hparams From c7636a372e5575c040cfcb8a574bd0b0387da53e Mon Sep 17 00:00:00 2001 From: T2T Team <no-reply@google.com> Date: Thu, 31 Aug 2017 14:46:43 -0700 Subject: [PATCH 0344/4095] Transformer hparams fall back on `num_hidden_layers` PiperOrigin-RevId: 167194460 --- tensor2tensor/models/transformer.py | 17 +++++++++------- tensor2tensor/models/transformer_test.py | 25 ------------------------ 2 files changed, 10 insertions(+), 32 deletions(-) diff --git a/tensor2tensor/models/transformer.py b/tensor2tensor/models/transformer.py index 105d9eb32..41bfa5b7f 100644 --- a/tensor2tensor/models/transformer.py +++ b/tensor2tensor/models/transformer.py @@ -190,7 +190,8 @@ def transformer_encoder(encoder_input, """ x = encoder_input with tf.variable_scope(name): - for layer in xrange(hparams.num_encoder_layers): + for layer in xrange( + hparams.num_encoder_layers or hparams.num_hidden_layers): with tf.variable_scope("layer_%d" % layer): with tf.variable_scope("self_attention"): y = common_attention.multihead_attention( @@ -233,7 +234,8 @@ def transformer_decoder(decoder_input, """ x = decoder_input with tf.variable_scope(name): - for layer in xrange(hparams.num_decoder_layers): + for layer in xrange( + hparams.num_decoder_layers or hparams.num_hidden_layers): with tf.variable_scope("layer_%d" % layer): with tf.variable_scope("self_attention"): y = common_attention.multihead_attention( @@ -323,11 +325,12 @@ def transformer_base(): hparams.label_smoothing = 0.1 hparams.shared_embedding_and_softmax_weights = int(True) - hparams.add_hparam("filter_size", 2048) # Add new ones like this. - # layer-related flags - hparams.add_hparam("num_encoder_layers", hparams.num_hidden_layers) - hparams.add_hparam("num_decoder_layers", hparams.num_hidden_layers) - # attention-related flags + # Add new ones like this. + hparams.add_hparam("filter_size", 2048) + # Layer-related flags. If zero, these fall back on hparams.num_hidden_layers. + hparams.add_hparam("num_encoder_layers", 0) + hparams.add_hparam("num_decoder_layers", 0) + # Attention-related flags. hparams.add_hparam("num_heads", 8) hparams.add_hparam("attention_key_channels", 0) hparams.add_hparam("attention_value_channels", 0) diff --git a/tensor2tensor/models/transformer_test.py b/tensor2tensor/models/transformer_test.py index 391824524..6c0eee203 100644 --- a/tensor2tensor/models/transformer_test.py +++ b/tensor2tensor/models/transformer_test.py @@ -64,31 +64,6 @@ def testTransformer(self): res = session.run(logits) self.assertEqual(res.shape, (BATCH_SIZE, TARGET_LENGTH, 1, 1, VOCAB_SIZE)) - def testBeamDecodeVsGreedy(self): - model, features = self.getModel() - - decode_length = 20 - - greedy_result, _, _ = model._greedy_infer( - features, decode_length, last_position_only=True) - greedy_result = tf.squeeze(greedy_result, axis=[2, 3]) - - with tf.variable_scope(tf.get_variable_scope(), reuse=True): - beam_res = model._beam_decode( - features, - decode_length, - beam_size=1, - top_beams=1, - last_position_only=True, - alpha=1.0) - - with self.test_session() as session: - session.run(tf.global_variables_initializer()) - greedy_res, beam_res = session.run([greedy_result, beam_res]) - - self.assertEqual(beam_res.shape, (BATCH_SIZE, INPUT_LENGTH + decode_length)) - self.assertAllClose(greedy_res, beam_res) - if __name__ == "__main__": tf.test.main() From a317801dd7594b8b60a846e974d31ed426a8eeba Mon Sep 17 00:00:00 2001 From: T2T Team <no-reply@google.com> Date: Thu, 31 Aug 2017 15:13:22 -0700 Subject: [PATCH 0345/4095] Speed up Transformer using PadRemover PiperOrigin-RevId: 167198565 --- tensor2tensor/layers/common_attention.py | 16 +++++++++++++ tensor2tensor/models/transformer.py | 29 ++++++++++++++++++++---- 2 files changed, 40 insertions(+), 5 deletions(-) diff --git a/tensor2tensor/layers/common_attention.py b/tensor2tensor/layers/common_attention.py index 975ed94ae..7ed7799d0 100644 --- a/tensor2tensor/layers/common_attention.py +++ b/tensor2tensor/layers/common_attention.py @@ -215,6 +215,22 @@ def attention_bias_ignore_padding(memory_padding): return tf.expand_dims(tf.expand_dims(ret, axis=1), axis=1) +def attention_bias_to_padding(attention_bias): + """Inverse of attention_bias_ignore_padding(). + + Args: + attention_bias: a `Tensor` with shape [batch, 1, 1, memory_length], as + returned by attention_bias_ignore_padding(). + + Returns: + a Tensor with shape [batch, memory_length] with 1.0 in padding positions + and 0.0 in non-padding positions. + """ + # `attention_bias` is a large negative number in padding positions and 0.0 + # elsewhere. + return tf.squeeze(tf.to_float(tf.less(attention_bias, -1)), axis=[1, 2]) + + def attention_bias_prepend_inputs_full_attention(padding): """Create a bias tensor for prepend_mode="prepend_inputs_full_attention". diff --git a/tensor2tensor/models/transformer.py b/tensor2tensor/models/transformer.py index 41bfa5b7f..86b920dc5 100644 --- a/tensor2tensor/models/transformer.py +++ b/tensor2tensor/models/transformer.py @@ -30,6 +30,7 @@ from tensor2tensor.layers import common_attention from tensor2tensor.layers import common_hparams from tensor2tensor.layers import common_layers +from tensor2tensor.utils import expert_utils from tensor2tensor.utils import registry from tensor2tensor.utils import t2t_model @@ -50,8 +51,8 @@ def model_fn_body(self, features): targets = common_layers.flatten4d3d(targets) (encoder_input, encoder_self_attention_bias, - encoder_decoder_attention_bias) = (transformer_prepare_encoder( - inputs, target_space, hparams)) + encoder_decoder_attention_bias) = transformer_prepare_encoder( + inputs, target_space, hparams) (decoder_input, decoder_self_attention_bias) = transformer_prepare_decoder( targets, hparams) @@ -202,8 +203,11 @@ def transformer_encoder(encoder_input, hparams.hidden_size, hparams.num_heads, hparams.attention_dropout) x = common_layers.layer_postprocess(x, y, hparams) with tf.variable_scope("ffn"): + pad_remover = expert_utils.PadRemover( + common_attention.attention_bias_to_padding( + encoder_self_attention_bias)) y = transformer_ffn_layer( - common_layers.layer_preprocess(x, hparams), hparams) + common_layers.layer_preprocess(x, hparams), hparams, pad_remover) x = common_layers.layer_postprocess(x, y, hparams) # if normalization is done in layer_preprocess, then it shuold also be done # on the output, since the output can grow very large, being the sum of @@ -265,22 +269,37 @@ def transformer_decoder(decoder_input, return common_layers.layer_preprocess(x, hparams) -def transformer_ffn_layer(x, hparams): +def transformer_ffn_layer(x, hparams, pad_remover=None): """Feed-forward layer in the transformer. Args: x: a Tensor of shape [batch_size, length, hparams.hidden_size] hparams: hyperparmeters for model + pad_remover: an expert_utils.PadRemover object tracking the padding + positions. If provided, when using convolutional settings, the padding + is removed before applying the convolution, and restored afterward. This + can give a significant speedup. Returns: a Tensor of shape [batch_size, length, hparams.hidden_size] """ if hparams.ffn_layer == "conv_hidden_relu": - return common_layers.conv_hidden_relu( + # In simple convolution mode, use `pad_remover` to speed up processing. + if pad_remover: + original_shape = tf.shape(x) + # Collapse `x` across examples, and remove padding positions. + x = tf.reshape(x, tf.concat([[-1], tf.shape(x)[2:]], axis=0)) + x = tf.expand_dims(pad_remover.remove(x), axis=0) + conv_output = common_layers.conv_hidden_relu( x, hparams.filter_size, hparams.hidden_size, dropout=hparams.relu_dropout) + if pad_remover: + # Restore `conv_output` to the original shape of `x`, including padding. + conv_output = tf.reshape( + pad_remover.restore(tf.squeeze(conv_output, axis=0)), original_shape) + return conv_output elif hparams.ffn_layer == "parameter_attention": return common_attention.parameter_attention( x, hparams.parameter_attention_key_channels or hparams.hidden_size, From 1a9bdacf2fc4f87faa4da74908487a626e06c2db Mon Sep 17 00:00:00 2001 From: T2T Team <no-reply@google.com> Date: Fri, 1 Sep 2017 12:55:56 -0700 Subject: [PATCH 0346/4095] Bug fix and better documentation for normalizer_fn. PiperOrigin-RevId: 167312851 --- tensor2tensor/layers/common_layers.py | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/tensor2tensor/layers/common_layers.py b/tensor2tensor/layers/common_layers.py index 4b09e70cb..264c11cf6 100644 --- a/tensor2tensor/layers/common_layers.py +++ b/tensor2tensor/layers/common_layers.py @@ -628,11 +628,22 @@ def conv_block_internal(conv_fn, Returns: a Tensor. """ + name = kwargs.pop("name") if "name" in kwargs else None mask = kwargs.pop("mask") if "mask" in kwargs else None - norm = kwargs.pop("normalizer_fn") if "normalizer_fn" in kwargs else None - if norm is None and "normalizer_fn" not in kwargs: + + # Usage for normalize_fn kwarg: + # if not specified, use layer norm + # if given normalize_fn=None, don't use any normalization + # if given normalize_fn=norm, use the specified norm function + + use_layer_norm = "normalizer_fn" not in kwargs + norm = kwargs.pop("normalizer_fn", None) + use_normalizer_fn = use_layer_norm or norm + + if use_layer_norm: norm = lambda x, name: layer_norm(x, filters, name=name) + with tf.variable_scope(name, "conv_block", [inputs]): cur, counter = inputs, -1 for dilation_rate, kernel_size in dilation_rates_and_kernel_sizes: @@ -660,7 +671,7 @@ def conv_block_internal(conv_fn, name="conv_block_%d" % counter, use_bias=norm is None, **kwargs) - if norm is not None: + if use_normalizer_fn: cur = norm(cur, name="conv_block_norm_%d" % counter) return cur From 956e767af673be6292e9b2d06e5ce15688ba76d9 Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Fri, 1 Sep 2017 12:59:51 -0700 Subject: [PATCH 0347/4095] Use new dynamic window size group_by_window functionality in an OSS-compatible way PiperOrigin-RevId: 167313309 --- tensor2tensor/utils/data_reader.py | 41 +++++++++++++++++++++--------- 1 file changed, 29 insertions(+), 12 deletions(-) diff --git a/tensor2tensor/utils/data_reader.py b/tensor2tensor/utils/data_reader.py index d55911f19..09ef159a4 100644 --- a/tensor2tensor/utils/data_reader.py +++ b/tensor2tensor/utils/data_reader.py @@ -267,12 +267,14 @@ def input_pipeline(problem, data_file_pattern, capacity, mode, hparams, dataset = dataset.filter( lambda ex: _example_too_big(ex, batching_scheme["max_length"])) - dataset = bucket_by_sequence_length(dataset, _example_length, - batching_scheme["boundaries"], - batching_scheme["batch_sizes"], - batching_scheme["window_size"]) + dataset = bucket_by_sequence_length( + dataset, _example_length, batching_scheme["boundaries"], + batching_scheme["batch_sizes"], batching_scheme["window_size"]) # We reshuffle the batches to prevent many long-sequence batches at once. - if batching_scheme["shuffle_queue_size"] is not None: + # TODO(rsepassi): Rm hasattr call once new dynamic window size functionality + # is in a stable TF release. + if (batching_scheme["shuffle_queue_size"] is not None and + not hasattr(dataset, "apply")): dataset = dataset.shuffle(batching_scheme["shuffle_queue_size"]) batched_examples = dataset.make_one_shot_iterator().get_next() return batched_examples @@ -338,6 +340,12 @@ def example_to_bucket_id(example): return bucket_id + def window_size_fn(bucket_id): + # window size = batch size + batch_sizes = tf.constant(bucket_batch_sizes, dtype=tf.int64) + window_size = batch_sizes[bucket_id] + return window_size + def batching_fn(bucket_id, grouped_dataset): batch_sizes = tf.constant(bucket_batch_sizes, dtype=tf.int64) batch_size = batch_sizes[bucket_id] @@ -348,8 +356,16 @@ def batching_fn(bucket_id, grouped_dataset): for name, shape in grouped_dataset.output_shapes.items()]) return grouped_dataset.padded_batch(batch_size, padded_shapes) - dataset = dataset.group_by_window(example_to_bucket_id, batching_fn, - window_size) + # TODO(rsepassi): Rm branch once the new group_by_window functionality is in + # a stable TF release. + if hasattr(dataset, "apply"): + # If the Dataset supports dynamic window size, use it. + dataset = dataset.apply( + tf.contrib.data.group_by_window, + args=(example_to_bucket_id, batching_fn, None, window_size_fn)) + else: + dataset = dataset.group_by_window(example_to_bucket_id, batching_fn, + window_size) return dataset @@ -398,8 +414,8 @@ def _batching_scheme(batch_size, * max_length: int, maximum length of an example """ max_length = max_length or batch_size - boundaries = _bucket_boundaries( - max_length, min_length_bucket, length_bucket_step) + boundaries = _bucket_boundaries(max_length, min_length_bucket, + length_bucket_step) boundaries = [boundary * length_multiplier for boundary in boundaries] max_length *= length_multiplier batch_sizes = [ @@ -417,9 +433,10 @@ def _batching_scheme(batch_size, 83160, 110880, 166320, 221760, 277200, 332640, 498960, 554400, 665280, 720720, 1081080, 1441440, 2162160, 2882880, 3603600, 4324320, 6486480, 7207200, 8648640, 10810800, 14414400, 17297280, 21621600, 32432400, - 36756720, 43243200, 61261200, 73513440, 110270160] - window_size = max([ - i for i in highly_composite_numbers if i <= 3 * max_batch_size]) + 36756720, 43243200, 61261200, 73513440, 110270160 + ] + window_size = max( + [i for i in highly_composite_numbers if i <= 3 * max_batch_size]) divisors = [i for i in xrange(1, window_size + 1) if window_size % i == 0] batch_sizes = [max([d for d in divisors if d <= bs]) for bs in batch_sizes] window_size *= shard_multiplier From f76ea08833639613287b2c46fa079bf5ef88207e Mon Sep 17 00:00:00 2001 From: T2T Team <no-reply@google.com> Date: Fri, 1 Sep 2017 13:12:17 -0700 Subject: [PATCH 0348/4095] Fixed typo. PiperOrigin-RevId: 167314859 --- tensor2tensor/utils/yellowfin.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensor2tensor/utils/yellowfin.py b/tensor2tensor/utils/yellowfin.py index 450875fa5..a3f6a18a1 100644 --- a/tensor2tensor/utils/yellowfin.py +++ b/tensor2tensor/utils/yellowfin.py @@ -602,7 +602,7 @@ def minimize(self, Raises: ValueError: if no gradients are provided for any variable. """ - grads_and_vars = self._optimizer.compute_gradients( + grads_and_vars = self._momentum_optimizer.compute_gradients( loss, var_list=var_list, gate_gradients=gate_gradients, From 0f3d76cc266c6a96f8093cd2ddca6bfc6f3cd721 Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Mon, 4 Sep 2017 04:38:00 -0700 Subject: [PATCH 0349/4095] Enable easy dataset construction from Problems with Problem.dataset PiperOrigin-RevId: 167485065 --- .../data_generators/gene_expression.py | 5 +- tensor2tensor/data_generators/ice_parsing.py | 2 +- tensor2tensor/data_generators/image.py | 14 +- tensor2tensor/data_generators/imdb.py | 2 +- tensor2tensor/data_generators/problem.py | 176 +++++++++++++++++- tensor2tensor/models/gene_expression_test.py | 2 +- tensor2tensor/models/multimodel_test.py | 2 +- tensor2tensor/models/slicenet_test.py | 2 +- tensor2tensor/problems.py | 36 ++++ tensor2tensor/problems_test.py | 60 ++++++ tensor2tensor/utils/data_reader.py | 2 + tensor2tensor/utils/trainer_utils.py | 2 +- 12 files changed, 284 insertions(+), 21 deletions(-) create mode 100644 tensor2tensor/problems.py create mode 100644 tensor2tensor/problems_test.py diff --git a/tensor2tensor/data_generators/gene_expression.py b/tensor2tensor/data_generators/gene_expression.py index 0607aad15..43d5a6702 100644 --- a/tensor2tensor/data_generators/gene_expression.py +++ b/tensor2tensor/data_generators/gene_expression.py @@ -142,7 +142,7 @@ def generate_data(self, data_dir, tmp_dir, task_id=-1): # Shuffle generator_utils.shuffle_dataset(all_filepaths) - def hparams(self, defaults, model_hparams): + def hparams(self, defaults, unused_model_hparams): p = defaults vocab_size = self._encoders["inputs"].vocab_size p.input_modality = {"inputs": (registry.Modalities.SYMBOL, vocab_size)} @@ -159,9 +159,8 @@ def example_reading_spec(self): data_items_to_decoders = None return (data_fields, data_items_to_decoders) - def preprocess_examples(self, examples, mode, hparams): + def preprocess_examples(self, examples, mode, unused_hparams): del mode - del hparams # Reshape targets to contain num_output_predictions per output timestep examples["targets"] = tf.reshape(examples["targets"], diff --git a/tensor2tensor/data_generators/ice_parsing.py b/tensor2tensor/data_generators/ice_parsing.py index 4fb0424bb..2aa261cd4 100644 --- a/tensor2tensor/data_generators/ice_parsing.py +++ b/tensor2tensor/data_generators/ice_parsing.py @@ -109,7 +109,7 @@ def generate_data(self, data_dir, tmp_dir, task_id=-1): self.targeted_vocab_size), self.dev_filepaths(data_dir, 1, shuffled=False)) - def hparams(self, defaults, model_hparams): + def hparams(self, defaults, unused_model_hparams): p = defaults source_vocab_size = self._encoders["inputs"].vocab_size p.input_modality = {"inputs": (registry.Modalities.SYMBOL, diff --git a/tensor2tensor/data_generators/image.py b/tensor2tensor/data_generators/image.py index fbe91d70e..03cea1d02 100644 --- a/tensor2tensor/data_generators/image.py +++ b/tensor2tensor/data_generators/image.py @@ -105,7 +105,7 @@ def resize(img, size): examples["targets"] = resize(inputs, 32) return examples - def hparams(self, defaults, model_hparams): + def hparams(self, defaults, unused_model_hparams): p = defaults p.input_modality = {"inputs": ("image:identity_no_pad", None)} p.target_modality = ("image:identity_no_pad", None) @@ -229,7 +229,7 @@ def feature_encoders(self, data_dir): "targets": text_encoder.SubwordTextEncoder(vocab_filename) } - def hparams(self, defaults, model_hparams): + def hparams(self, defaults, unused_model_hparams): p = defaults p.input_modality = {"inputs": (registry.Modalities.IMAGE, None)} vocab_size = self._encoders["targets"].vocab_size @@ -267,7 +267,7 @@ def dev_shards(self): def generator(self, data_dir, tmp_dir, is_training): raise NotImplementedError() - def hparams(self, defaults, model_hparams): + def hparams(self, defaults, unused_model_hparams): p = defaults small_modality = "%s:small_image_modality" % registry.Modalities.IMAGE modality = small_modality if self.is_small else registry.Modalities.IMAGE @@ -349,7 +349,7 @@ def is_small(self): def num_classes(self): return 1000 - def preprocess_examples(self, examples, mode, hparams): + def preprocess_examples(self, examples, mode, unused_hparams): # Just resize with area. if self._was_reversed: examples["inputs"] = tf.to_int64( @@ -565,7 +565,7 @@ def cifar10_generator(tmp_dir, training, how_many, start_from=0): @registry.register_problem class ImageCifar10Tune(ImageMnistTune): - def preprocess_examples(self, examples, mode, hparams): + def preprocess_examples(self, examples, mode, unused_hparams): if mode == tf.contrib.learn.ModeKeys.TRAIN: examples["inputs"] = common_layers.cifar_image_augmentation( examples["inputs"]) @@ -591,7 +591,7 @@ def generator(self, data_dir, tmp_dir, is_training): @registry.register_problem class ImageCifar10Plain(ImageCifar10): - def preprocess_examples(self, examples, mode, hparams): + def preprocess_examples(self, examples, mode, unused_hparams): return examples @@ -730,7 +730,7 @@ def feature_encoders(self, data_dir): encoder = text_encoder.SubwordTextEncoder(vocab_filename) return {"targets": encoder} - def hparams(self, defaults, model_hparams): + def hparams(self, defaults, unused_model_hparams): p = defaults p.input_modality = {"inputs": (registry.Modalities.IMAGE, None)} encoder = self._encoders["targets"] diff --git a/tensor2tensor/data_generators/imdb.py b/tensor2tensor/data_generators/imdb.py index 281a03bee..4216747c4 100644 --- a/tensor2tensor/data_generators/imdb.py +++ b/tensor2tensor/data_generators/imdb.py @@ -97,7 +97,7 @@ def generate_data(self, data_dir, tmp_dir, task_id=-1): self.generator(data_dir, tmp_dir, True), train_paths, self.generator(data_dir, tmp_dir, False), dev_paths) - def hparams(self, defaults, model_hparams): + def hparams(self, defaults, unused_model_hparams): p = defaults source_vocab_size = self._encoders["inputs"].vocab_size p.input_modality = { diff --git a/tensor2tensor/data_generators/problem.py b/tensor2tensor/data_generators/problem.py index e4424e73e..d0ed6ad2c 100644 --- a/tensor2tensor/data_generators/problem.py +++ b/tensor2tensor/data_generators/problem.py @@ -18,10 +18,14 @@ from __future__ import division from __future__ import print_function +import collections import os +import random # Dependency imports +import six + from tensor2tensor.data_generators import generator_utils from tensor2tensor.data_generators import text_encoder from tensor2tensor.utils import metrics @@ -30,6 +34,7 @@ import tensorflow as tf + class SpaceID(object): """Input and target space ids. Add more as needed.""" # Generic / unknown output space (default) @@ -92,6 +97,14 @@ class SpaceID(object): CPP_TOK = 28 +def default_model_hparams(): + return tf.contrib.training.HParams( + max_input_seq_length=0, + max_target_seq_length=0, + prepend_mode="none", + data_dir=None) + + def preprocess_examples_common(examples, hparams): """Preprocessing steps common to all models.""" if hparams.max_input_seq_length > 0: @@ -232,14 +245,23 @@ def __init__(self, was_reversed=False, was_copy=False): self._was_reversed = was_reversed self._was_copy = was_copy self._encoders = None + self._hparams = None + self._feature_info = None - def internal_build_encoders(self, data_dir): - self._encoders = self.feature_encoders(data_dir) + def get_feature_encoders(self, data_dir=None): + if self._encoders is None: + self._encoders = self.feature_encoders(data_dir) + return self._encoders - def internal_hparams(self, model_hparams): + def get_hparams(self, model_hparams=None): """Returns problem_hparams.""" + if self._hparams is not None: + return self._hparams + + assert model_hparams is not None + if self._encoders is None: - self.internal_build_encoders(model_hparams.data_dir) + self.get_feature_encoders(model_hparams.data_dir) hp = _default_hparams() ret = self.hparams(hp, model_hparams) @@ -255,7 +277,9 @@ def internal_hparams(self, model_hparams): _reverse_problem_hparams(hp) if self._was_copy: _copy_problem_hparams(hp) - return hp + + self._hparams = hp + return self._hparams def maybe_reverse_features(self, feature_map): if not self._was_reversed: @@ -268,6 +292,148 @@ def maybe_copy_features(self, feature_map): return feature_map["targets"] = feature_map["inputs"] + def dataset(self, + mode, + data_dir=None, + num_threads=None, + output_buffer_size=None, + shuffle_files=None, + hparams=None): + """Build a Dataset for this problem. + + Args: + mode: tf.estimator.ModeKeys; determines which files to read from. + data_dir: directory that contains data files. + num_threads: int, number of threads to use for decode and preprocess + Dataset.map calls. + output_buffer_size: int, how many elements to prefetch in Dataset.map + calls. + shuffle_files: whether to shuffle input files. Default behavior (i.e. when + shuffle_files=None) is to shuffle if mode == TRAIN. + hparams: tf.contrib.training.HParams; hparams to be passed to + Problem.preprocess_examples and Problem.hparams. If None, will use a + default set that is a no-op. + + Returns: + Dataset containing dict<feature name, Tensor>. + """ + assert data_dir + + if hparams is None: + hparams = default_model_hparams() + + if not hasattr(hparams, "data_dir"): + hparams.add_hparam("data_dir", data_dir) + if not hparams.data_dir: + hparams.data_dir = data_dir + # Construct the Problem's hparams so that items within it are accessible + _ = self.get_hparams(hparams) + + base_filename = self.dataset_filename() + path = os.path.join(data_dir, base_filename) + + # TODO(rsepassi): handle ModeKeys.PREDICT with placeholders + is_training = mode == tf.estimator.ModeKeys.TRAIN + if is_training: + suffix = "train" + elif mode == tf.estimator.ModeKeys.EVAL: + suffix = "dev" + else: + assert mode == "test" + suffix = "test" + + filepattern = "%s-%s*" % (path, suffix) + data_fields, data_items_to_decoders = self.example_reading_spec() + if data_items_to_decoders is None: + data_items_to_decoders = { + field: tf.contrib.slim.tfexample_decoder.Tensor(field) + for field in data_fields + } + + data_files = tf.contrib.slim.parallel_reader.get_data_files(filepattern) + if shuffle_files or shuffle_files is None and is_training: + random.shuffle(data_files) + dataset = tf.contrib.data.TFRecordDataset(data_files) + + def decode_record(record): + """Serialized Example to dict of <feature name, Tensor>.""" + decoder = tf.contrib.slim.tfexample_decoder.TFExampleDecoder( + data_fields, data_items_to_decoders) + + decode_items = list(data_items_to_decoders) + decoded = decoder.decode(record, items=decode_items) + return dict(zip(decode_items, decoded)) + + def preprocess(example): + example = self.preprocess_examples(example, mode, hparams) + self.maybe_reverse_features(example) + self.maybe_copy_features(example) + return example + + dataset = dataset.map(decode_record, num_threads=num_threads) + dataset = dataset.map( + preprocess, + num_threads=num_threads, + output_buffer_size=output_buffer_size) + + return dataset + + @property + def feature_info(self): + """Retrieve dict<feature name, FeatureInfo>. + + Must first call Problem.get_hparams or Problem.dataset to have the problem's + internal hparams already constructed. + + Returns: + dict<feature name, FeatureInfo> + """ + if self._feature_info is not None: + return self._feature_info + + assert self._hparams is not None + + hp = self.get_hparams() + input_mods = hp.input_modality + target_mod = hp.target_modality + vocabs = hp.vocabulary + in_id = hp.input_space_id + out_id = hp.target_space_id + + features = collections.defaultdict(FeatureInfo) + + for name, mod_spec in six.iteritems(input_mods): + mod, vocab_size = mod_spec + finfo = features[name] + finfo.modality = mod + finfo.vocab_size = vocab_size + + mod, vocab_size = target_mod + features["targets"].modality = mod + features["targets"].vocab_size = vocab_size + + for name, encoder in six.iteritems(vocabs): + features[name].encoder = encoder + + features["inputs"].space_id = in_id + features["targets"].space_id = out_id + + self._feature_info = features + return features + + +class FeatureInfo(object): + + def __init__(self, + encoder=None, + modality=None, + vocab_size=None, + space_id=None): + self.encoder = encoder + self.modality = modality + self.vocab_size = vocab_size + self.space_id = space_id + def _copy_problem_hparams(p_hparams): """Use input modality, vocab, and space id for target.""" diff --git a/tensor2tensor/models/gene_expression_test.py b/tensor2tensor/models/gene_expression_test.py index cc4cd1200..e46e81859 100644 --- a/tensor2tensor/models/gene_expression_test.py +++ b/tensor2tensor/models/gene_expression_test.py @@ -70,7 +70,7 @@ def testGeneExpressionModels(self): gene_expression_conv_test())] for model_cls, hparams in models_hparams: hparams.add_hparam("data_dir", None) - p_hparams = gene_data.GenomicsExpressionCage10().internal_hparams(hparams) + p_hparams = gene_data.GenomicsExpressionCage10().get_hparams(hparams) hparams.problems = [p_hparams] self._testModel(hparams, model_cls) diff --git a/tensor2tensor/models/multimodel_test.py b/tensor2tensor/models/multimodel_test.py index 73a8436cc..ab60bae97 100644 --- a/tensor2tensor/models/multimodel_test.py +++ b/tensor2tensor/models/multimodel_test.py @@ -38,7 +38,7 @@ def testMultiModel(self): hparams = multimodel.multimodel_tiny() hparams.add_hparam("data_dir", "") problem = registry.problem("image_cifar10") - p_hparams = problem.internal_hparams(hparams) + p_hparams = problem.get_hparams(hparams) hparams.problems = [p_hparams] with self.test_session() as session: features = { diff --git a/tensor2tensor/models/slicenet_test.py b/tensor2tensor/models/slicenet_test.py index 388acde1b..c3a064a85 100644 --- a/tensor2tensor/models/slicenet_test.py +++ b/tensor2tensor/models/slicenet_test.py @@ -39,7 +39,7 @@ def testSliceNet(self): hparams = slicenet.slicenet_params1_tiny() hparams.add_hparam("data_dir", "") problem = registry.problem("image_cifar10") - p_hparams = problem.internal_hparams(hparams) + p_hparams = problem.get_hparams(hparams) hparams.problems = [p_hparams] with self.test_session() as session: features = { diff --git a/tensor2tensor/problems.py b/tensor2tensor/problems.py new file mode 100644 index 000000000..1e94c7bad --- /dev/null +++ b/tensor2tensor/problems.py @@ -0,0 +1,36 @@ +# coding=utf-8 +# Copyright 2017 The Tensor2Tensor Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Access T2T Problems. + +See problems_test.py for basic usage. +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +# Dependency imports + +from tensor2tensor.data_generators import all_problems # pylint: disable=unused-import +from tensor2tensor.utils import registry + + +def problem(name): + return registry.problem(name) + + +def available(): + return sorted(registry.list_problems()) diff --git a/tensor2tensor/problems_test.py b/tensor2tensor/problems_test.py new file mode 100644 index 000000000..de101e6e7 --- /dev/null +++ b/tensor2tensor/problems_test.py @@ -0,0 +1,60 @@ +# coding=utf-8 +# Copyright 2017 The Tensor2Tensor Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""tensor2tensor.problems test.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +# Dependency imports + +from tensor2tensor import problems + +import tensorflow as tf + +MODES = tf.estimator.ModeKeys + + +class ProblemsTest(tf.test.TestCase): + + def testBuildDataset(self): + # See all the available problems + self.assertTrue(len(problems.available()) > 10) + + # Retrieve a problem by name + problem = problems.problem("translate_ende_wmt8k") + + # Access train and dev datasets through Problem + train_dataset = problem.dataset(MODES.TRAIN) + dev_dataset = problem.dataset(MODES.EVAL) + + # Access vocab size and other info (e.g. the data encoders used to + # encode/decode data for the feature, used below) through feature_info. + feature_info = problem.feature_info + self.assertTrue(feature_info["inputs"].vocab_size > 0) + self.assertTrue(feature_info["targets"].vocab_size > 0) + + train_example = train_dataset.make_one_shot_iterator().get_next() + dev_example = dev_dataset.make_one_shot_iterator().get_next() + + with tf.Session() as sess: + train_ex_val, _ = sess.run([train_example, dev_example]) + _ = feature_info["inputs"].encoder.decode(train_ex_val["inputs"]) + _ = feature_info["targets"].encoder.decode(train_ex_val["targets"]) + + +if __name__ == "__main__": + tf.test.main() diff --git a/tensor2tensor/utils/data_reader.py b/tensor2tensor/utils/data_reader.py index 09ef159a4..681f3598b 100644 --- a/tensor2tensor/utils/data_reader.py +++ b/tensor2tensor/utils/data_reader.py @@ -260,6 +260,8 @@ def input_pipeline(problem, data_file_pattern, capacity, mode, hparams, num_threads = 4 if is_training else 1 with tf.name_scope("input_pipeline"): + # TODO(rsepassi): Once all problems use the Problem class, rm example + # reading, parsing, and preprocessing. Use Problem.dataset instead. dataset = read_examples(problem, data_file_pattern, capacity, mode=mode) dataset = dataset.map( lambda ex: _preprocess(ex, problem, data_file_pattern, hparams, mode), diff --git a/tensor2tensor/utils/trainer_utils.py b/tensor2tensor/utils/trainer_utils.py index a747b9a09..8539f4eb1 100644 --- a/tensor2tensor/utils/trainer_utils.py +++ b/tensor2tensor/utils/trainer_utils.py @@ -237,7 +237,7 @@ def add_problem_hparams(hparams, problems): if problem is None: p_hparams = problem_hparams.problem_hparams(problem_name, hparams) else: - p_hparams = problem.internal_hparams(hparams) + p_hparams = problem.get_hparams(hparams) hparams.problem_instances.append(problem) hparams.problems.append(p_hparams) From 636d2e1fd089290f28eaa45f2476cc00ce67d7a4 Mon Sep 17 00:00:00 2001 From: Katherine Lee <katherinelee@google.com> Date: Mon, 4 Sep 2017 14:44:19 -0700 Subject: [PATCH 0350/4095] Add strokes SpaceID. PiperOrigin-RevId: 167518694 --- tensor2tensor/data_generators/all_problems.py | 1 - tensor2tensor/data_generators/problem.py | 2 ++ 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/tensor2tensor/data_generators/all_problems.py b/tensor2tensor/data_generators/all_problems.py index f9afa895b..52354704d 100644 --- a/tensor2tensor/data_generators/all_problems.py +++ b/tensor2tensor/data_generators/all_problems.py @@ -45,4 +45,3 @@ pass # pylint: enable=g-import-not-at-top # pylint: enable=unused-import - diff --git a/tensor2tensor/data_generators/problem.py b/tensor2tensor/data_generators/problem.py index d0ed6ad2c..302c51fa7 100644 --- a/tensor2tensor/data_generators/problem.py +++ b/tensor2tensor/data_generators/problem.py @@ -95,6 +95,8 @@ class SpaceID(object): PY_TOK = 27 # C++ CPP_TOK = 28 + # Strokes + STROKES = 29 def default_model_hparams(): From c25325be184bd555a1b0df0af021699996435f79 Mon Sep 17 00:00:00 2001 From: Katherine Lee <katherinelee@google.com> Date: Mon, 4 Sep 2017 15:36:23 -0700 Subject: [PATCH 0351/4095] Merge from GitHub PiperOrigin-RevId: 167520632 --- docs/new_problem.md | 48 ++++++++++++++++++++++----------------------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/docs/new_problem.md b/docs/new_problem.md index c859c6eba..d581a3a1b 100644 --- a/docs/new_problem.md +++ b/docs/new_problem.md @@ -17,13 +17,15 @@ For each problem we want to tackle we create a new problem class and register it Since many text2text problems share similar methods, there's already a class called `Text2TextProblem` that extends the base problem class, `Problem` (both found in `problem.py`). -For our problem, we can go ahead and create the file `word2def.py` in the `data_generators` folder and add our new problem, `Word2def`, which extends `TranslateProblem`. Let's also register it while we're at it so we can specify the problem through flags. +For our problem, we can go ahead and create the file `word2def.py` in the `data_generators` folder and add our new problem, `Word2def`, which extends `Text2TextProblem`. Let's also register it while we're at it so we can specify the problem through flags. ```python -@registry.register_problem() +@registry.register_problem class Word2def(problem.Text2TextProblem): """Problem spec for English word to dictionary definition.""" - return NotImplementedError() + @property + def is_character_level(self): + ... ``` We need to implement the following methods from `Text2TextProblem` in our new class: @@ -56,6 +58,8 @@ The number of shards to break data files into. @registry.register_problem() class Word2def(problem.Text2TextProblem): """Problem spec for English word to dictionary definition.""" + + @property def is_character_level(self): return True @@ -87,7 +91,6 @@ We're almost done. `generator` generates the training and evaluation data and st def generator(self, data_dir, tmp_dir, train): character_vocab = text_encoder.ByteTextEncoder() datasets = _WORD2DEF_TRAIN_DATASETS if train else _WORD2DEF_TEST_DATASETS - tag = "train" if train else "dev" return character_generator(datasets[0], datasets[1], character_vocab, EOS) ``` @@ -108,7 +111,6 @@ class Word2def(problem.Text2TextProblem): def generator(self, data_dir, tmp_dir, train): character_vocab = text_encoder.ByteTextEncoder() datasets = _WORD2DEF_TRAIN_DATASETS if train else _WORD2DEF_TEST_DATASETS - tag = "train" if train else "dev" return character_generator(datasets[0], datasets[1], character_vocab, EOS) @property @@ -137,14 +139,13 @@ I've gone ahead and split all words into a train and test set and saved them in ```python # English Word2def datasets _WORD2DEF_TRAIN_DATASETS = [ - [ - "LOCATION_OF_DATA/", ("words_train.txt", "definitions_train.txt") - ] + LOCATION_OF_DATA + 'words_train.txt', + LOCATION_OF_DATA + 'definitions_train.txt' ] + _WORD2DEF_TEST_DATASETS = [ - [ - "LOCATION_OF_DATA", ("words_test.txt", "definitions_test.txt") - ] + LOCATION_OF_DATA + 'words_test.txt', + LOCATION_OF_DATA + 'definitions_test.txt' ] ``` @@ -155,24 +156,14 @@ Now our `word2def.py` file looks like: (with the correct imports) """ Problem definition for word to dictionary definition. """ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - import os -import tarfile # do we need this import -from tensor2tensor.data_generators import generator_utils from tensor2tensor.data_generators import problem from tensor2tensor.data_generators import text_encoder from tensor2tensor.data_generators.wmt import character_generator from tensor2tensor.utils import registry -import tensorflow as tf - -FLAGS = tf.flags.FLAGS - # English Word2def datasets _WORD2DEF_TRAIN_DATASETS = [ LOCATION_OF_DATA+'words_train.txt', @@ -198,7 +189,6 @@ class Word2def(problem.Text2TextProblem): def generator(self, data_dir, tmp_dir, train): character_vocab = text_encoder.ByteTextEncoder() datasets = _WORD2DEF_TRAIN_DATASETS if train else _WORD2DEF_TEST_DATASETS - tag = "train" if train else "dev" return character_generator(datasets[0], datasets[1], character_vocab, EOS) @property @@ -220,7 +210,17 @@ class Word2def(problem.Text2TextProblem): ``` # Hyperparameters -All hyperparamters inherit from `_default_hparams()` in `problem.py.` If you would like to customize your hyperparameters, add another method to the file `problem_hparams.py`. +All hyperparamters inherit from `_default_hparams()` in `problem.py.` If you would like to customize your hyperparameters, register a new hyperparameter set in `word2def.py` like the example provided in the walkthrough. For example: + +```python +from tensor2tensor.models import transformer + +@registry.register_hparams +def word2def_hparams(self): + hparams = transformer.transformer_base_single_gpu() # Or whatever you'd like to build off. + hparams.batch_size = 1024 + return hparams +``` # Run the problem Now that we've gotten our problem set up, let's train a model and generate definitions. @@ -229,7 +229,7 @@ We specify our problem name, the model, and hparams. ```bash PROBLEM=word2def MODEL=transformer -HPARAMS=transofmer_base_single_gpu +HPARAMS=word2def_hparams ``` The rest of the steps are as given in the [walkthrough](walkthrough.md). From 0de6f8c53204ebbce4cdabaaa32182d69571ad6c Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Tue, 5 Sep 2017 14:05:23 -0700 Subject: [PATCH 0352/4095] Save metadata (flags, hparams) on train PiperOrigin-RevId: 167628142 --- tensor2tensor/utils/trainer_utils.py | 34 ++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/tensor2tensor/utils/trainer_utils.py b/tensor2tensor/utils/trainer_utils.py index 8539f4eb1..ee3445e26 100644 --- a/tensor2tensor/utils/trainer_utils.py +++ b/tensor2tensor/utils/trainer_utils.py @@ -19,6 +19,7 @@ from __future__ import division from __future__ import print_function +import os import sys # Dependency imports @@ -147,6 +148,8 @@ def create_experiment(output_dir, data_dir, model_name, train_steps, """Create Experiment.""" hparams = create_hparams( FLAGS.hparams_set, FLAGS.problems, data_dir, passed_hparams=FLAGS.hparams) + if FLAGS.worker_id == 0: + save_metadata(output_dir, hparams) estimator, input_fns = create_experiment_components( hparams=hparams, output_dir=output_dir, @@ -245,6 +248,37 @@ def add_problem_hparams(hparams, problems): return hparams +def save_metadata(output_dir, hparams): + """Saves FLAGS and hparams to output_dir.""" + # Save FLAGS in txt file + if hasattr(FLAGS, "flags_into_string"): + flags_str = FLAGS.flags_into_string() + t2t_flags_str = "\n".join([ + "--%s=%s" % (f.name, f.value) + for f in FLAGS.flags_by_module_dict()[ + "tensor2tensor.utils.trainer_utils"] + ]) + else: + flags_dict = FLAGS.__dict__["__flags"] + flags_str = "\n".join( + ["--%s=%s" % (name, str(f.value)) for (name, f) in flags_dict.items()]) + t2t_flags_str = None + + flags_txt = os.path.join(output_dir, "flags.txt") + with tf.gfile.Open(flags_txt, "w") as f: + f.write(flags_str) + + if t2t_flags_str: + t2t_flags_txt = os.path.join(output_dir, "flags_t2t.txt") + with tf.gfile.Open(t2t_flags_txt, "w") as f: + f.write(t2t_flags_str) + + # Save hparams as hparams.json + hparams_fname = os.path.join(output_dir, "hparams.json") + with tf.gfile.Open(hparams_fname, "w") as f: + f.write(hparams.to_json()) + + def create_hparams(params_id, problems, data_dir, passed_hparams=None): """Returns hyperparameters, including any flag value overrides. From c46684f79620ae695e4c79708e3064ab2aea8b7d Mon Sep 17 00:00:00 2001 From: Etienne Pot <epot@google.com> Date: Wed, 6 Sep 2017 08:55:04 -0700 Subject: [PATCH 0353/4095] Attention experts uses local info for the FC. Fix long max_length size when batch_size is set through command line. Minor cleanup PiperOrigin-RevId: 167726943 --- tensor2tensor/models/attention_lm_moe.py | 63 ++++++++++++++++-------- 1 file changed, 43 insertions(+), 20 deletions(-) diff --git a/tensor2tensor/models/attention_lm_moe.py b/tensor2tensor/models/attention_lm_moe.py index 191d4aa04..eccf349c9 100644 --- a/tensor2tensor/models/attention_lm_moe.py +++ b/tensor2tensor/models/attention_lm_moe.py @@ -101,29 +101,30 @@ def _diet_expert(x): # should not either way) assert hparams.norm_type != "batch" + tf.logging.info("Applying Padding Remover for the attention experts") + dp_remove_pad = functools.partial( dp, remove_pad, pad_remover=pad_remover, mode=hparams.mode) dp_restore_pad = functools.partial( dp, restore_pad, ref_x=x, pad_remover=pad_remover, mode=hparams.mode) - elif (hparams.attention_type == AttentionType.MULTIHEAD or - hparams.attention_type == AttentionType.MEMORY_EFFICIENT): + else: # Using identity function: No effect - dp_remove_pad = lambda x: (x, None) + dp_remove_pad = lambda x: x dp_restore_pad = lambda x: x - else: - raise ValueError("Only {} supported for now.".format( - AttentionType.get_choices())) - def print_shape(x, suffix): + def print_shape(x, suffix, debug=False): # To help debugging, print the input/output shapes at inference and eval # Inference for long sequences can take a long time, so that's help to # see the progession of the generation - if hparams.mode == ModeKeys.TRAIN: + if not debug and hparams.mode == ModeKeys.TRAIN: return x return tf.Print(x, [tf.shape(x)], "shape_x_{}".format(suffix)) + batch_coordinate = dp(get_batch_coordinate, x) + batch_coordinate = dp_remove_pad(batch_coordinate) + x = dp(print_shape, x, "in") - x, batch_coordinate = dp_remove_pad(x) + x = dp_remove_pad(x) x = dp(print_shape, x, "in_flat") for layer in xrange(hparams.num_hidden_layers): @@ -188,12 +189,31 @@ def print_shape(x, suffix): x, hparams.filter_size) else: + x_in = preprocess(x) + additional_conv_params = dict() + if hparams.use_sepconv: + # Restore padding so sequences don't attend to each others + # restore_pad will apply a reshape like x_ref, to restore the + # original shape. Here this works because the last dimension is + # constant between the output of attention and the original input + # but it shouldn't necessarily be the case. + x_in = dp_restore_pad(x_in) + additional_conv_params = dict( + padding="LEFT", + # Parameters copied from the transformer model + kernel_size=(3, 1), + second_kernel_size=(31, 1), + ) y = dp( common_layers.conv_hidden_relu, - preprocess(x), + x_in, hparams.filter_size, hparams.hidden_size, - dropout=hparams.relu_dropout) + dropout=hparams.relu_dropout, + **additional_conv_params + ) + if hparams.use_sepconv: + y = dp_remove_pad(y) x = postprocess(x, y) x = preprocess(x) @@ -234,6 +254,14 @@ def attention_lm_moe_prepare_decoder(targets, hparams): return (decoder_input, decoder_self_attention_bias, pad_remover) +def get_batch_coordinate(x): + """Return a flat int32 tensor of shape [1, batch_size*length, 1].""" + # Compute the batch coordinate before flattening all batches + batch_coordinate = tf.expand_dims( + common_attention.coordinate_tensor(tf.shape(x)[:-1], axis=0), axis=-1) + return batch_coordinate + + def remove_pad(x, pad_remover, mode): """Remove padding by concatenating all dimension into one. @@ -247,11 +275,6 @@ def remove_pad(x, pad_remover, mode): tf.Tensor of shape [1,length_nonpad,depth] where length_nonpad <= batch_size*length """ - # Compute the batch coordinate before flattening all batches - batch_coordinate = tf.expand_dims( - common_attention.coordinate_tensor(tf.shape(x)[:-1], axis=0), axis=-1) - batch_coordinate = expert_utils.flatten_all_but_last(batch_coordinate) - # Concatenate all tokens (without padding) x = expert_utils.flatten_all_but_last(x) @@ -260,12 +283,10 @@ def remove_pad(x, pad_remover, mode): # This is a hack to allows inference when the <go> token # is detected as padding and removed. This works for now because there is # no padding at inference. - batch_coordinate = pad_remover.remove(batch_coordinate) x = pad_remover.remove(x) - batch_coordinate = tf.expand_dims(batch_coordinate, axis=0) x = tf.expand_dims(x, axis=0) # Now batch_size=1 - return x, batch_coordinate + return x def restore_pad(x, ref_x, pad_remover, mode): @@ -328,6 +349,7 @@ def attention_lm_moe_base(): hparams.add_hparam("attention_v_size", 256) # Loss coef for load balancing hparams.add_hparam("attention_load_balance", 2e-2) + hparams.add_hparam("use_sepconv", int(False)) hparams.add_hparam("diet_experts", int(False)) hparams.add_hparam("memory_efficient_ffn", int(False)) return hparams @@ -338,7 +360,8 @@ def attention_lm_moe_base_ae(): """Base model with attention expert.""" hparams = attention_lm_moe_base() hparams.attention_type = AttentionType.LOCAL_EXPERTS - hparams.max_length = hparams.batch_size + hparams.use_sepconv = int(True) + hparams.max_length = 0 # max_length == batch_size hparams.eval_drop_long_sequences = int(True) hparams.min_length_bucket = 256 # Avoid cyclic problems for big batches hparams.learning_rate = 0.05 From 5767beceb71c56222f73cb41e70641c380636cb9 Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Wed, 6 Sep 2017 10:11:01 -0700 Subject: [PATCH 0354/4095] ClassLabelEncoder to map class ids to names PiperOrigin-RevId: 167736101 --- tensor2tensor/data_generators/image.py | 23 +++++++++++++++ tensor2tensor/data_generators/imdb.py | 2 +- tensor2tensor/data_generators/problem.py | 17 +++++++---- tensor2tensor/data_generators/text_encoder.py | 29 +++++++++++++++++++ 4 files changed, 64 insertions(+), 7 deletions(-) diff --git a/tensor2tensor/data_generators/image.py b/tensor2tensor/data_generators/image.py index 03cea1d02..8d142d239 100644 --- a/tensor2tensor/data_generators/image.py +++ b/tensor2tensor/data_generators/image.py @@ -264,6 +264,17 @@ def train_shards(self): def dev_shards(self): return 1 + @property + def class_labels(self): + return ["ID_%d" % i for i in range(self.num_classes)] + + def feature_encoders(self, data_dir): + del data_dir + return { + "inputs": text_encoder.TextEncoder(), + "targets": text_encoder.ClassLabelEncoder(self.class_labels) + } + def generator(self, data_dir, tmp_dir, is_training): raise NotImplementedError() @@ -491,6 +502,10 @@ def is_small(self): def num_classes(self): return 10 + @property + def class_labels(self): + return [str(c) for c in range(self.num_classes)] + @property def train_shards(self): return 10 @@ -564,6 +579,14 @@ def cifar10_generator(tmp_dir, training, how_many, start_from=0): @registry.register_problem class ImageCifar10Tune(ImageMnistTune): + """Cifar-10 Tune.""" + + @property + def class_labels(self): + return [ + "airplane", "automobile", "bird", "cat", "deer", "dog", "frog", "horse", + "ship", "truck" + ] def preprocess_examples(self, examples, mode, unused_hparams): if mode == tf.contrib.learn.ModeKeys.TRAIN: diff --git a/tensor2tensor/data_generators/imdb.py b/tensor2tensor/data_generators/imdb.py index 4216747c4..d7eadcd1d 100644 --- a/tensor2tensor/data_generators/imdb.py +++ b/tensor2tensor/data_generators/imdb.py @@ -112,7 +112,7 @@ def feature_encoders(self, data_dir): encoder = text_encoder.SubwordTextEncoder(vocab_filename) return { "inputs": encoder, - "targets": text_encoder.TextEncoder(), + "targets": text_encoder.ClassLabelEncoder(["neg", "pos"]), } def example_reading_spec(self): diff --git a/tensor2tensor/data_generators/problem.py b/tensor2tensor/data_generators/problem.py index 302c51fa7..4aa4862ef 100644 --- a/tensor2tensor/data_generators/problem.py +++ b/tensor2tensor/data_generators/problem.py @@ -300,7 +300,8 @@ def dataset(self, num_threads=None, output_buffer_size=None, shuffle_files=None, - hparams=None): + hparams=None, + preprocess=True): """Build a Dataset for this problem. Args: @@ -315,6 +316,8 @@ def dataset(self, hparams: tf.contrib.training.HParams; hparams to be passed to Problem.preprocess_examples and Problem.hparams. If None, will use a default set that is a no-op. + preprocess: bool, whether to map the Dataset through + Problem.preprocess_examples. Returns: Dataset containing dict<feature name, Tensor>. @@ -366,17 +369,19 @@ def decode_record(record): decoded = decoder.decode(record, items=decode_items) return dict(zip(decode_items, decoded)) - def preprocess(example): + def _preprocess(example): example = self.preprocess_examples(example, mode, hparams) self.maybe_reverse_features(example) self.maybe_copy_features(example) return example dataset = dataset.map(decode_record, num_threads=num_threads) - dataset = dataset.map( - preprocess, - num_threads=num_threads, - output_buffer_size=output_buffer_size) + + if preprocess: + dataset = dataset.map( + _preprocess, + num_threads=num_threads, + output_buffer_size=output_buffer_size) return dataset diff --git a/tensor2tensor/data_generators/text_encoder.py b/tensor2tensor/data_generators/text_encoder.py index ac9260cfa..97ab88402 100644 --- a/tensor2tensor/data_generators/text_encoder.py +++ b/tensor2tensor/data_generators/text_encoder.py @@ -154,6 +154,35 @@ def vocab_size(self): return 2**8 + self._num_reserved_ids +class ClassLabelEncoder(TextEncoder): + """Encoder for class labels.""" + + def __init__(self, class_labels=None, class_labels_fname=None): + super(ClassLabelEncoder, self).__init__(num_reserved_ids=0) + + assert class_labels or class_labels_fname + assert not (class_labels and class_labels_fname) + + if class_labels_fname: + with tf.gfile.Open(class_labels_fname) as f: + class_labels = [label.strip() for label in f.readlines()] + + self._class_labels = class_labels + + def encode(self, label_str): + return self._class_labels.index(label_str) + + def decode(self, label_id): + if isinstance(label_id, list): + assert len(label_id) == 1 + label_id, = label_id + return self._class_labels[label_id] + + @property + def vocab_size(self): + return len(self._class_labels) + + class TokenTextEncoder(TextEncoder): """Encoder based on a user-supplied vocabulary (file or list).""" From 78d8ddb349870400c89cd08c1c3e70bcc92f1f5f Mon Sep 17 00:00:00 2001 From: Niki Parmar <nikip@google.com> Date: Wed, 6 Sep 2017 14:56:10 -0700 Subject: [PATCH 0355/4095] Add attention 2D functions over local_attention_2d PiperOrigin-RevId: 167777554 --- tensor2tensor/layers/common_attention.py | 119 +++++++++++++++++++++++ 1 file changed, 119 insertions(+) diff --git a/tensor2tensor/layers/common_attention.py b/tensor2tensor/layers/common_attention.py index 7ed7799d0..1053a69af 100644 --- a/tensor2tensor/layers/common_attention.py +++ b/tensor2tensor/layers/common_attention.py @@ -328,6 +328,19 @@ def split_heads(x, num_heads): return tf.transpose(split_last_dimension(x, num_heads), [0, 2, 1, 3]) +def split_heads_2d(x, num_heads): + """Split channels (dimension 4) into multiple heads (becomes dimension 1). + + Args: + x: a Tensor with shape [batch, height, width, channels] + num_heads: an integer + + Returns: + a Tensor with shape [batch, num_heads, height, width, channels / num_heads] + """ + return tf.transpose(split_last_dimension(x, num_heads), [0, 3, 1, 2, 4]) + + def combine_heads(x): """Inverse of split_heads. @@ -340,6 +353,18 @@ def combine_heads(x): return combine_last_two_dimensions(tf.transpose(x, [0, 2, 1, 3])) +def combine_heads_2d(x): + """Inverse of split_heads_2d function. + + Args: + x: a Tensor with shape [batch, num_heads, height, width, channels/num_heads] + + Returns: + a Tensor with shape [batch, height, width, channels] + """ + return combine_last_two_dimensions(tf.transpose(x, [0, 2, 3, 1, 4])) + + def attention_image_summary(attn, image_shapes=None): """Compute color image summary. @@ -768,6 +793,43 @@ def compute_qkv(query_antecedent, memory_antecedent, total_key_depth, return q, k, v +def compute_qkv_2d(query_antecedent, memory_antecedent, total_key_depth, + total_value_depth): + """Computes query, key and value of a 4D tensor. + + Args: + query_antecedent: a Tensor with shape [batch, h, w, depth_k] + memory_antecedent: a Tensor with shape [batch, h, w, depth_k] + total_key_depth: an integer + total_value_depth: and integer + + Returns: + q, k, v : [batch, h, w, depth_k] tensors + """ + # self attention with single position q, k, and v. + if memory_antecedent is None: + combined = tf.layers.conv2d( + query_antecedent, + total_key_depth * 2 + total_value_depth, (1, 1), + name="qkv_transform") + q, k, v = tf.split( + combined, [total_key_depth, total_key_depth, total_value_depth], + axis=-1) + return q, k, v + + # Encoder decoder attention. + q = common_layers.conv1d( + query_antecedent, total_key_depth, 1, name="q_transform") + combined = common_layers.conv1d( + memory_antecedent, + total_key_depth + total_value_depth, + 1, + name="kv_transform") + k, v = tf.split(combined, [total_key_depth, total_value_depth], axis=2) + + return q, k, v + + def multihead_attention(query_antecedent, memory_antecedent, bias, @@ -849,6 +911,63 @@ def multihead_attention(query_antecedent, return x +def multihead_attention_2d(query_antecedent, + memory_antecedent, + total_key_depth, + total_value_depth, + output_depth, + num_heads, + attention_type="local_attention_2d", + block_length=128, + block_width=128, + name=None): + """2d Multihead scaled-dot-product attention with inp/output transformations. + + Args: + query_antecedent: a Tensor with shape [batch, h, w, depth_k] + memory_antecedent: a Tensor with shape [batch, h, w, depth_k] + total_key_depth: an integer + total_value_depth: an integer + output_depth: an integer + num_heads: an integer dividing total_key_depth and total_value_depth + attention_type: String, type of attention function to use. + block_length: an integer - relevant for "local_attention_2d" + block_width: an integer - relevant for "local_attention_2d" + name: an optional string + + Returns: + A Tensor of shape [batch, h, w, depth_k] + + Raises: + ValueError: if the key depth or value depth are not divisible by the + number of attention heads. + """ + if total_key_depth % num_heads != 0: + raise ValueError("Key depth (%d) must be divisible by the number of " + "attention heads (%d)." % (total_key_depth, num_heads)) + if total_value_depth % num_heads != 0: + raise ValueError("Value depth (%d) must be divisible by the number of " + "attention heads (%d)." % (total_value_depth, num_heads)) + with tf.variable_scope( + name, + default_name="multihead_attention", + values=[query_antecedent, memory_antecedent]): + q, k, v = compute_qkv_2d(query_antecedent, memory_antecedent, + total_key_depth, total_value_depth) + + q = split_heads_2d(q, num_heads) + k = split_heads_2d(k, num_heads) + v = split_heads_2d(v, num_heads) + key_depth_per_head = total_key_depth // num_heads + q *= key_depth_per_head**-0.5 + if attention_type == "local_attention_2d": + x = local_attention_2d( + q, k, v, block_length=block_length, filter_flange=block_width) + x = tf.squeeze(combine_heads_2d(x), axis=-2) + x = common_layers.conv1d(x, output_depth, 1, name="output_transform") + return x + + def ffn_self_attention_layer(x, filter_depth, output_depth, From 4794c20af3e0d104e38985a37cfa7244185cd13e Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Wed, 6 Sep 2017 16:13:02 -0700 Subject: [PATCH 0356/4095] GPU mem fraction default 0.95 to rm allocation error msg PiperOrigin-RevId: 167788682 --- tensor2tensor/utils/trainer_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensor2tensor/utils/trainer_utils.py b/tensor2tensor/utils/trainer_utils.py index ee3445e26..3248d9ca9 100644 --- a/tensor2tensor/utils/trainer_utils.py +++ b/tensor2tensor/utils/trainer_utils.py @@ -93,7 +93,7 @@ flags.DEFINE_integer("worker_gpu", 1, "How many GPUs to use.") flags.DEFINE_integer("worker_replicas", 1, "How many workers to use.") flags.DEFINE_integer("worker_id", 0, "Which worker task are we.") -flags.DEFINE_float("worker_gpu_memory_fraction", 1., +flags.DEFINE_float("worker_gpu_memory_fraction", 0.95, "Fraction of GPU memory to allocate.") flags.DEFINE_integer("ps_gpu", 0, "How many GPUs to use per ps.") flags.DEFINE_string("gpu_order", "", "Optional order for daisy-chaining gpus." From 74044ea9768fd41e90166305d041d67457955bfd Mon Sep 17 00:00:00 2001 From: T2T Team <no-reply@google.com> Date: Wed, 6 Sep 2017 16:32:29 -0700 Subject: [PATCH 0357/4095] Share one PadRemover across all Transformer encoder layers PiperOrigin-RevId: 167791186 --- tensor2tensor/models/transformer.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tensor2tensor/models/transformer.py b/tensor2tensor/models/transformer.py index 86b920dc5..38766ec19 100644 --- a/tensor2tensor/models/transformer.py +++ b/tensor2tensor/models/transformer.py @@ -191,6 +191,8 @@ def transformer_encoder(encoder_input, """ x = encoder_input with tf.variable_scope(name): + pad_remover = expert_utils.PadRemover( + common_attention.attention_bias_to_padding(encoder_self_attention_bias)) for layer in xrange( hparams.num_encoder_layers or hparams.num_hidden_layers): with tf.variable_scope("layer_%d" % layer): @@ -203,9 +205,6 @@ def transformer_encoder(encoder_input, hparams.hidden_size, hparams.num_heads, hparams.attention_dropout) x = common_layers.layer_postprocess(x, y, hparams) with tf.variable_scope("ffn"): - pad_remover = expert_utils.PadRemover( - common_attention.attention_bias_to_padding( - encoder_self_attention_bias)) y = transformer_ffn_layer( common_layers.layer_preprocess(x, hparams), hparams, pad_remover) x = common_layers.layer_postprocess(x, y, hparams) From 665dbe8b92f827d68a7671fa15cbb6f0231de1ad Mon Sep 17 00:00:00 2001 From: Ashish Vaswani <avaswani@google.com> Date: Wed, 6 Sep 2017 16:43:15 -0700 Subject: [PATCH 0358/4095] 2d masked local attention. Each memory block can attend to a memory region top-left, top, and top-right. The mask ensures that we don't peek into the future. Refactored some functions out of local_attention_2d so that they could be shared. PiperOrigin-RevId: 167792489 --- tensor2tensor/layers/common_attention.py | 263 +++++++++++++----- tensor2tensor/layers/common_attention_test.py | 8 +- 2 files changed, 196 insertions(+), 75 deletions(-) diff --git a/tensor2tensor/layers/common_attention.py b/tensor2tensor/layers/common_attention.py index 1053a69af..84f8d2d9a 100644 --- a/tensor2tensor/layers/common_attention.py +++ b/tensor2tensor/layers/common_attention.py @@ -22,6 +22,7 @@ import math # Dependency imports +import numpy as np from six.moves import xrange # pylint: disable=redefined-builtin @@ -354,10 +355,11 @@ def combine_heads(x): def combine_heads_2d(x): - """Inverse of split_heads_2d function. + """Inverse of split_heads_2d. Args: - x: a Tensor with shape [batch, num_heads, height, width, channels/num_heads] + x: a Tensor with shape + [batch, num_heads, height, width, channels / num_heads] Returns: a Tensor with shape [batch, height, width, channels] @@ -627,8 +629,8 @@ def pad_l_and_r(x, pad_length): def local_attention_2d(q, k, v, - block_length=128, - filter_flange=100, + query_shape=(8, 16), + memory_flange=(8, 16), name=None): """strided block local self-attention. @@ -636,8 +638,9 @@ def local_attention_2d(q, q: a Tensor with shape [batch, heads, h, w, depth_k] k: a Tensor with shape [batch, heads, h, w, depth_k] v: a Tensor with shape [batch, heads, h, w, depth_v] - block_length: an integer indicating the side length of each square block. - filter_flange: an integer indicating how much to look around each block. + query_shape: an tuple indicating the height and width of each query block. + memory_flange: an integer indicating how much to look in height and width + from each query block. name: an optional string Returns: @@ -651,68 +654,26 @@ def local_attention_2d(q, num_heads = tf.shape(q)[1] original_length = tf.shape(q)[2] * tf.shape(q)[3] - def reshape_range(tensor, i, j, shape): - """Reshapes a tensor between dimensions i and j.""" - target_shape = tf.concat( - [tf.shape(tensor)[:i], shape, tf.shape(tensor)[j:]], - axis=0) - return tf.reshape(tensor, target_shape) - - def pad_to_multiple(x, d): - """Making sure x is a multiple of d.""" - height_padding = -tf.shape(x)[1] % d - width_padding = -tf.shape(x)[2] % d - paddings = [[0, 0], [0, 0], [0, height_padding], - [0, width_padding], [0, 0]] - return tf.pad(x, paddings) - - def gather_indices(x, block_length, stride): - """Getting gather indices.""" - # making an identity matrix kernel - kernel = tf.eye(block_length ** 2) - kernel = reshape_range(kernel, 0, 1, [block_length, block_length, 1]) - # making indices [1, h, w, 1] to appy convs - indices = tf.range(0, tf.shape(x)[2] * tf.shape(x)[3], delta=1) - indices = tf.reshape(indices, [1, tf.shape(x)[2], tf.shape(x)[3], 1]) - indices = tf.nn.conv2d( - tf.cast(indices, tf.float32), - kernel, - strides=[1, stride, stride, 1], - padding="VALID") - # making indices [num_blocks, dim] to gather - num_blocks = tf.reduce_prod(tf.shape(indices)[:2]) - indices = tf.reshape(indices, [num_blocks, -1]) - return tf.cast(indices, tf.int32) - - def gather_blocks(x, indices): - """Gathers flattened blocks from x.""" - x_shape = tf.shape(x) - x = reshape_range(x, 2, 4, [tf.reduce_prod(x_shape[2:4])]) - # [length, batch, heads, dim] - x_t = tf.transpose(x, [2, 0, 1, 3]) - x_new = tf.gather(x_t, indices) - # returns [batch, heads, num_blocks, block_length ** 2, dim] - return tf.transpose(x_new, [2, 3, 0, 1, 4]) - - q = pad_to_multiple(q, block_length) - k = pad_to_multiple(k, block_length) - v = pad_to_multiple(v, block_length) + q = pad_to_multiple_2d(q, query_shape) + k = pad_to_multiple_2d(k, query_shape) + v = pad_to_multiple_2d(v, query_shape) # Setting up k and v values - paddings = [[0, 0], [0, 0], [filter_flange, filter_flange], - [filter_flange, filter_flange], [0, 0]] + paddings = [[0, 0], [0, 0], [memory_flange[0], memory_flange[1]], + [memory_flange[0], memory_flange[1]], [0, 0]] k = tf.pad(k, paddings) v = tf.pad(v, paddings) # Setting up q blocks - q_indices = gather_indices(q, block_length, block_length) - q_new = gather_blocks(q, q_indices) + q_indices = gather_indices_2d(q, query_shape, query_shape) + q_new = gather_blocks_2d(q, q_indices) # Setting up k and v blocks - full_filter_width = block_length + 2 * filter_flange - k_and_v_indices = gather_indices(k, full_filter_width, block_length) - k_new = gather_blocks(k, k_and_v_indices) - v_new = gather_blocks(v, k_and_v_indices) + memory_shape = (query_shape[0]+2*memory_flange[0], + query_shape[1]+2*memory_flange[1]) + k_and_v_indices = gather_indices_2d(k, memory_shape, query_shape) + k_new = gather_blocks_2d(k, k_and_v_indices) + v_new = gather_blocks_2d(v, k_and_v_indices) attention_bias = tf.expand_dims( tf.to_float(embedding_to_padding(k_new)) * -1e9, axis=-2) @@ -729,6 +690,159 @@ def gather_blocks(x, indices): return tf.reshape(output, v_shape) +def pad_to_multiple_2d(x, block_shape): + """Making sure x is a multiple of shape.""" + old_shape = x.get_shape().dims + last = old_shape[-1] + height_padding = -tf.shape(x)[1] % block_shape[0] + width_padding = -tf.shape(x)[2] % block_shape[1] + paddings = [[0, 0], [0, 0], [0, height_padding], + [0, width_padding], [0, 0]] + padded_x = tf.pad(x, paddings) + padded_shape = padded_x.get_shape().as_list() + padded_shape = padded_shape[:-1]+[last] + padded_x.set_shape(padded_shape) + return padded_x + + +def reshape_range(tensor, i, j, shape): + """Reshapes a tensor between dimensions i and j.""" + target_shape = tf.concat( + [tf.shape(tensor)[:i], shape, tf.shape(tensor)[j:]], + axis=0) + return tf.reshape(tensor, target_shape) + + +def gather_blocks_2d(x, indices): + """Gathers flattened blocks from x.""" + x_shape = tf.shape(x) + x = reshape_range(x, 2, 4, [tf.reduce_prod(x_shape[2:4])]) + # [length, batch, heads, dim] + x_t = tf.transpose(x, [2, 0, 1, 3]) + x_new = tf.gather(x_t, indices) + # returns [batch, heads, num_blocks, block_length ** 2, dim] + return tf.transpose(x_new, [2, 3, 0, 1, 4]) + + +def gather_indices_2d(x, block_shape, block_stride): + """Getting gather indices.""" + # making an identity matrix kernel + kernel = tf.eye(block_shape[0]*block_shape[1]) + kernel = reshape_range(kernel, 0, 1, [block_shape[0], block_shape[1], 1]) + # making indices [1, h, w, 1] to appy convs + indices = tf.range(0, tf.shape(x)[2] * tf.shape(x)[3], delta=1) + indices = tf.reshape(indices, [1, tf.shape(x)[2], tf.shape(x)[3], 1]) + indices = tf.nn.conv2d( + tf.cast(indices, tf.float32), + kernel, + strides=[1, block_stride[0], block_stride[1], 1], + padding="VALID") + # making indices [num_blocks, dim] to gather + num_blocks = tf.reduce_prod(tf.shape(indices)[:3]) + indices = tf.reshape(indices, [num_blocks, -1]) + return tf.cast(indices, tf.int32) + + +def masked_local_attention_2d(q, + k, + v, + query_shape=(8, 16), + memory_flange=(8, 16), + name=None): + """strided block local self-attention. + + Args: + q: a Tensor with shape [batch, heads, h, w, depth_k] + k: a Tensor with shape [batch, heads, h, w, depth_k] + v: a Tensor with shape [batch, heads, h, w, depth_v] + query_shape: an tuple indicating the height and width of each query block. + query_shape = block_shape + memory_flange: an integer indicating how much to look in height and width + from each query block. + memory shape = query_shape + (block_flange[0], 2*block_flange[1]) + name: an optional string + + Returns: + a Tensor of shape [batch, heads, h, w, depth_v] + """ + with tf.variable_scope( + name, default_name="local_masked_self_attention_2d", values=[q, k, v]): + v_shape = tf.shape(v) + depth_v = tf.shape(v)[4] + batch_size = tf.shape(q)[0] + num_heads = tf.shape(q)[1] + original_length = tf.shape(q)[2] * tf.shape(q)[3] + def make_mask(query_shape, memory_flange): + """creates a mask. + + The query mask can look to the left, top left, top, and top right, but + not the right. Inside the query, we have the standard raster scan + masking. + Args: + query_shape: A tuple of ints (query_height, query_width) + memory_flange: A tuple of ints + (memory_flange_height, memory_flange_width) + + Returns: + A tensor of shape query_size, memory_size + """ + + query_triangle = tf.matrix_band_part( + tf.ones([np.prod(query_shape), np.prod(query_shape)]), -1, 0) + split_query_masks = tf.split(query_triangle, query_shape[0], axis=1) + mask_pieces = [ + tf.concat( + [tf.ones([np.prod(query_shape), memory_flange[1]]), + split_query_masks[i], + tf.zeros([np.prod(query_shape), memory_flange[1]]) + ], axis=1) for i in range(query_shape[0])] + + final_mask = tf.concat( + [tf.ones( + [np.prod(query_shape), + (query_shape[1]+2*memory_flange[1])*memory_flange[0]]), + tf.concat(mask_pieces, axis=1) + ], axis=1) + # 0. is visible location, 1.0 is masked. + return 1. - final_mask + q = pad_to_multiple_2d(q, query_shape) + k = pad_to_multiple_2d(k, query_shape) + v = pad_to_multiple_2d(v, query_shape) + # Setting up k and v values. Padding top, left, and right + paddings = [[0, 0], [0, 0], [memory_flange[0], 0], + [memory_flange[1], memory_flange[1]], [0, 0]] + k = tf.pad(k, paddings) + v = tf.pad(v, paddings) + # Setting up q blocks + q_indices = gather_indices_2d(q, query_shape, query_shape) + q_new = gather_blocks_2d(q, q_indices) + # Setting up k and v blocks + memory_shape = (query_shape[0]+memory_flange[0], + query_shape[1]+memory_flange[1]*2) + k_and_v_indices = gather_indices_2d(k, memory_shape, query_shape) + k_new = gather_blocks_2d(k, k_and_v_indices) + v_new = gather_blocks_2d(v, k_and_v_indices) + logits = tf.matmul(q_new, k_new, transpose_b=True) + # Combining the mask for padding and visible region + attention_mask_shape = [np.prod(query_shape), + (query_shape[0]+memory_flange[0])* + (query_shape[1]+2*memory_flange[1])] + attention_mask = tf.cast(make_mask(query_shape, memory_flange), tf.bool) + # reshaping attention mask to have same dims as logits + attention_mask = tf.reshape(attention_mask, [1, 1, 1]+attention_mask_shape) + padding_mask = tf.expand_dims( + tf.cast(embedding_to_padding(k_new), tf.bool), axis=-2) + attention_bias = ( + tf.to_float(tf.logical_or(attention_mask, padding_mask)) *-1e9) + attention = tf.nn.softmax(logits + attention_bias) + output = tf.matmul(attention, v_new) + output = tf.reshape(output, [batch_size, num_heads, -1, depth_v]) + # Remove the padding if introduced + output = tf.slice(output, [0, 0, 0, 0], [-1, -1, original_length, -1]) + # [batch, heads, h, w, depth_v] + return tf.reshape(output, v_shape) + + def compute_qkv(query_antecedent, memory_antecedent, total_key_depth, total_value_depth, q_filter_width=1, kv_filter_width=1, q_padding="VALID", kv_padding="VALID"): @@ -795,7 +909,7 @@ def compute_qkv(query_antecedent, memory_antecedent, total_key_depth, def compute_qkv_2d(query_antecedent, memory_antecedent, total_key_depth, total_value_depth): - """Computes query, key and value of a 4D tensor. + """Computes query, key and value. Args: query_antecedent: a Tensor with shape [batch, h, w, depth_k] @@ -806,7 +920,7 @@ def compute_qkv_2d(query_antecedent, memory_antecedent, total_key_depth, Returns: q, k, v : [batch, h, w, depth_k] tensors """ - # self attention with single position q, k, and v. + # self attention with single position q, k, and v if memory_antecedent is None: combined = tf.layers.conv2d( query_antecedent, @@ -817,7 +931,7 @@ def compute_qkv_2d(query_antecedent, memory_antecedent, total_key_depth, axis=-1) return q, k, v - # Encoder decoder attention. + # Encoder decoder attention q = common_layers.conv1d( query_antecedent, total_key_depth, 1, name="q_transform") combined = common_layers.conv1d( @@ -918,8 +1032,8 @@ def multihead_attention_2d(query_antecedent, output_depth, num_heads, attention_type="local_attention_2d", - block_length=128, - block_width=128, + query_shape=(8, 16), + memory_flange=(8, 16), name=None): """2d Multihead scaled-dot-product attention with inp/output transformations. @@ -931,8 +1045,8 @@ def multihead_attention_2d(query_antecedent, output_depth: an integer num_heads: an integer dividing total_key_depth and total_value_depth attention_type: String, type of attention function to use. - block_length: an integer - relevant for "local_attention_2d" - block_width: an integer - relevant for "local_attention_2d" + query_shape: an tuple indicating the height and width of each query block. + memory_flange: an integer indicating how much to look in height and width name: an optional string Returns: @@ -954,7 +1068,7 @@ def multihead_attention_2d(query_antecedent, values=[query_antecedent, memory_antecedent]): q, k, v = compute_qkv_2d(query_antecedent, memory_antecedent, total_key_depth, total_value_depth) - + # after splitting, shape is [batch, heads, h, w, depth] q = split_heads_2d(q, num_heads) k = split_heads_2d(k, num_heads) v = split_heads_2d(v, num_heads) @@ -962,9 +1076,16 @@ def multihead_attention_2d(query_antecedent, q *= key_depth_per_head**-0.5 if attention_type == "local_attention_2d": x = local_attention_2d( - q, k, v, block_length=block_length, filter_flange=block_width) - x = tf.squeeze(combine_heads_2d(x), axis=-2) - x = common_layers.conv1d(x, output_depth, 1, name="output_transform") + q, k, v, query_shape=query_shape, memory_flange=memory_flange) + else: + x = masked_local_attention_2d(q, k, v, query_shape=query_shape, + memory_flange=memory_flange) + x = combine_heads_2d(x) + x = tf.layers.conv2d( + x, + output_depth, + (1, 1), + name="output_transform") return x diff --git a/tensor2tensor/layers/common_attention_test.py b/tensor2tensor/layers/common_attention_test.py index 6664bcc2d..d8f6f2b39 100644 --- a/tensor2tensor/layers/common_attention_test.py +++ b/tensor2tensor/layers/common_attention_test.py @@ -98,8 +98,8 @@ def testLocalUnmaskedAttention2D(self): tf.constant(x, dtype=tf.float32), tf.constant(y, dtype=tf.float32), tf.constant(y, dtype=tf.float32), - block_length=4, - filter_flange=3) + query_shape=(4, 4), + memory_flange=(3, 3)) session.run(tf.global_variables_initializer()) res = session.run(a) self.assertEqual(res.shape, (5, 4, 25, 25, 16)) @@ -112,8 +112,8 @@ def testLocalUnmaskedAttention2DMatchingBlockLength(self): tf.constant(x, dtype=tf.float32), tf.constant(y, dtype=tf.float32), tf.constant(y, dtype=tf.float32), - block_length=5, - filter_flange=3) + query_shape=(5, 5), + memory_flange=(3, 3)) session.run(tf.global_variables_initializer()) res = session.run(a) self.assertEqual(res.shape, (5, 4, 25, 25, 16)) From 2ebead2f451d30107c43f6f061998496978f5279 Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Wed, 6 Sep 2017 18:14:18 -0700 Subject: [PATCH 0359/4095] Move to core Estimator and improve decoding PiperOrigin-RevId: 167802133 --- tensor2tensor/data_generators/image.py | 4 +- tensor2tensor/layers/modalities.py | 2 +- tensor2tensor/layers/modalities_test.py | 4 +- tensor2tensor/models/attention_lm_moe.py | 6 +- tensor2tensor/models/bluenet_test.py | 2 +- tensor2tensor/models/bytenet_test.py | 2 +- tensor2tensor/models/gene_expression_test.py | 2 +- tensor2tensor/models/lstm.py | 4 +- tensor2tensor/models/lstm_test.py | 4 +- tensor2tensor/models/multimodel.py | 4 +- tensor2tensor/models/multimodel_test.py | 2 +- tensor2tensor/models/neural_gpu_test.py | 2 +- tensor2tensor/models/shake_shake.py | 2 +- tensor2tensor/models/slicenet_test.py | 2 +- tensor2tensor/models/transformer_moe.py | 4 +- tensor2tensor/models/transformer_revnet.py | 4 +- .../models/transformer_revnet_test.py | 2 +- tensor2tensor/models/transformer_test.py | 2 +- tensor2tensor/models/transformer_vae.py | 4 +- tensor2tensor/models/xception_test.py | 2 +- tensor2tensor/utils/data_reader.py | 10 +- tensor2tensor/utils/data_reader_test.py | 6 +- tensor2tensor/utils/decoding.py | 326 ++++++++++++------ tensor2tensor/utils/input_fn_builder.py | 19 +- tensor2tensor/utils/metrics.py | 13 +- tensor2tensor/utils/model_builder.py | 286 ++++++--------- tensor2tensor/utils/t2t_model.py | 4 +- tensor2tensor/utils/trainer_utils.py | 40 +-- tensor2tensor/utils/trainer_utils_test.py | 9 +- .../TransformerVisualization.ipynb | 16 +- 30 files changed, 427 insertions(+), 362 deletions(-) diff --git a/tensor2tensor/data_generators/image.py b/tensor2tensor/data_generators/image.py index 8d142d239..06942ed3f 100644 --- a/tensor2tensor/data_generators/image.py +++ b/tensor2tensor/data_generators/image.py @@ -313,7 +313,7 @@ def resize(img): return tf.to_int64(tf.image.resize_images(img, [299, 299])) inputs = tf.cast(examples["inputs"], tf.int64) - if mode == tf.contrib.learn.ModeKeys.TRAIN: + if mode == tf.estimator.ModeKeys.TRAIN: examples["inputs"] = tf.cond( # Preprocess 90% of the time. tf.less(tf.random_uniform([]), 0.9), lambda img=inputs: preprocess(img), @@ -589,7 +589,7 @@ def class_labels(self): ] def preprocess_examples(self, examples, mode, unused_hparams): - if mode == tf.contrib.learn.ModeKeys.TRAIN: + if mode == tf.estimator.ModeKeys.TRAIN: examples["inputs"] = common_layers.cifar_image_augmentation( examples["inputs"]) return examples diff --git a/tensor2tensor/layers/modalities.py b/tensor2tensor/layers/modalities.py index c93a05433..1d606ec1d 100644 --- a/tensor2tensor/layers/modalities.py +++ b/tensor2tensor/layers/modalities.py @@ -113,7 +113,7 @@ def top(self, body_output, _): with tf.variable_scope(scope_name, reuse=reuse): var = self._get_weights() if (self._model_hparams.factored_logits and - self._model_hparams.mode == tf.contrib.learn.ModeKeys.TRAIN): + self._model_hparams.mode == tf.estimator.ModeKeys.TRAIN): # insert channels dimension body_output = tf.expand_dims(body_output, 3) logits = common_layers.FactoredTensor(body_output, var) diff --git a/tensor2tensor/layers/modalities_test.py b/tensor2tensor/layers/modalities_test.py index 5813422ab..93dda6d09 100644 --- a/tensor2tensor/layers/modalities_test.py +++ b/tensor2tensor/layers/modalities_test.py @@ -67,7 +67,7 @@ def testSymbolModalityTargets(self): label_smoothing=0.2, shared_embedding_and_softmax_weights=0, factored_logits=0, - mode=tf.contrib.learn.ModeKeys.TRAIN) + mode=tf.estimator.ModeKeys.TRAIN) body_output = -1 + np.random.random_integers( 100, size=(batch_size, length, height, hidden_size)) targets = -1 + np.random.random_integers( @@ -101,7 +101,7 @@ def testSymbolModalityTargetsFactored(self): label_smoothing=0.2, shared_embedding_and_softmax_weights=0, factored_logits=1, - mode=tf.contrib.learn.ModeKeys.TRAIN) + mode=tf.estimator.ModeKeys.TRAIN) body_output = -1 + np.random.random_integers( 100, size=(batch_size, length, height, hidden_size)) targets = -1 + np.random.random_integers( diff --git a/tensor2tensor/models/attention_lm_moe.py b/tensor2tensor/models/attention_lm_moe.py index eccf349c9..596d5b01d 100644 --- a/tensor2tensor/models/attention_lm_moe.py +++ b/tensor2tensor/models/attention_lm_moe.py @@ -42,7 +42,7 @@ import tensorflow as tf -ModeKeys = tf.contrib.learn.ModeKeys # pylint: disable=invalid-name +ModeKeys = tf.estimator.ModeKeys # pylint: disable=invalid-name class AttentionType(object): @@ -279,7 +279,7 @@ def remove_pad(x, pad_remover, mode): x = expert_utils.flatten_all_but_last(x) # Remove padding for training and eval - if mode != ModeKeys.INFER: + if mode != ModeKeys.PREDICT: # This is a hack to allows inference when the <go> token # is detected as padding and removed. This works for now because there is # no padding at inference. @@ -291,7 +291,7 @@ def remove_pad(x, pad_remover, mode): def restore_pad(x, ref_x, pad_remover, mode): x = tf.squeeze(x, axis=0) - if mode != ModeKeys.INFER: + if mode != ModeKeys.PREDICT: x = pad_remover.restore(x) x = expert_utils.reshape_like(x, ref_x) return x diff --git a/tensor2tensor/models/bluenet_test.py b/tensor2tensor/models/bluenet_test.py index 70b8defe9..d559fd953 100644 --- a/tensor2tensor/models/bluenet_test.py +++ b/tensor2tensor/models/bluenet_test.py @@ -45,7 +45,7 @@ def testBlueNet(self): "targets": tf.constant(y, dtype=tf.int32), } model = bluenet.BlueNet( - hparams, tf.contrib.learn.ModeKeys.TRAIN, p_hparams) + hparams, tf.estimator.ModeKeys.TRAIN, p_hparams) sharded_logits, _ = model.model_fn(features) logits = tf.concat(sharded_logits, 0) session.run(tf.global_variables_initializer()) diff --git a/tensor2tensor/models/bytenet_test.py b/tensor2tensor/models/bytenet_test.py index 536d348e7..56f421153 100644 --- a/tensor2tensor/models/bytenet_test.py +++ b/tensor2tensor/models/bytenet_test.py @@ -44,7 +44,7 @@ def testByteNet(self): "targets": tf.constant(y, dtype=tf.int32), } model = bytenet.ByteNet( - hparams, tf.contrib.learn.ModeKeys.TRAIN, p_hparams) + hparams, tf.estimator.ModeKeys.TRAIN, p_hparams) sharded_logits, _ = model.model_fn(features) logits = tf.concat(sharded_logits, 0) session.run(tf.global_variables_initializer()) diff --git a/tensor2tensor/models/gene_expression_test.py b/tensor2tensor/models/gene_expression_test.py index e46e81859..ea02572d0 100644 --- a/tensor2tensor/models/gene_expression_test.py +++ b/tensor2tensor/models/gene_expression_test.py @@ -55,7 +55,7 @@ def _testModel(self, hparams, model_cls): "targets": tf.constant(targets, dtype=tf.float32), } p_hparams, = hparams.problems - sharded_logits, _ = model_cls(hparams, tf.contrib.learn.ModeKeys.TRAIN, + sharded_logits, _ = model_cls(hparams, tf.estimator.ModeKeys.TRAIN, p_hparams).model_fn(features) logits = tf.concat(sharded_logits, 0) diff --git a/tensor2tensor/models/lstm.py b/tensor2tensor/models/lstm.py index d79b04494..9f909433e 100644 --- a/tensor2tensor/models/lstm.py +++ b/tensor2tensor/models/lstm.py @@ -251,7 +251,7 @@ def lstm_seq2seq_internal_attention(inputs, targets, hparams, train): class LSTMSeq2seq(t2t_model.T2TModel): def model_fn_body(self, features): - train = self._hparams.mode == tf.contrib.learn.ModeKeys.TRAIN + train = self._hparams.mode == tf.estimator.ModeKeys.TRAIN return lstm_seq2seq_internal(features["inputs"], features["targets"], self._hparams, train) @@ -260,7 +260,7 @@ def model_fn_body(self, features): class LSTMSeq2seqAttention(t2t_model.T2TModel): def model_fn_body(self, features): - train = self._hparams.mode == tf.contrib.learn.ModeKeys.TRAIN + train = self._hparams.mode == tf.estimator.ModeKeys.TRAIN return lstm_seq2seq_internal_attention( features["inputs"], features["targets"], self._hparams, train) diff --git a/tensor2tensor/models/lstm_test.py b/tensor2tensor/models/lstm_test.py index 7da3d2380..c1190d016 100644 --- a/tensor2tensor/models/lstm_test.py +++ b/tensor2tensor/models/lstm_test.py @@ -44,7 +44,7 @@ def testLSTMSeq2Seq(self): "inputs": tf.constant(x, dtype=tf.int32), "targets": tf.constant(y, dtype=tf.int32), } - model = lstm.LSTMSeq2seq(hparams, tf.contrib.learn.ModeKeys.TRAIN, + model = lstm.LSTMSeq2seq(hparams, tf.estimator.ModeKeys.TRAIN, p_hparams) sharded_logits, _ = model.model_fn(features) logits = tf.concat(sharded_logits, 0) @@ -69,7 +69,7 @@ def testLSTMSeq2SeqAttention(self): "targets": tf.constant(y, dtype=tf.int32), } model = lstm.LSTMSeq2seqAttention( - hparams, tf.contrib.learn.ModeKeys.TRAIN, p_hparams) + hparams, tf.estimator.ModeKeys.TRAIN, p_hparams) sharded_logits, _ = model.model_fn(features) logits = tf.concat(sharded_logits, 0) session.run(tf.global_variables_initializer()) diff --git a/tensor2tensor/models/multimodel.py b/tensor2tensor/models/multimodel.py index c8d515c8d..5df8fcd3c 100644 --- a/tensor2tensor/models/multimodel.py +++ b/tensor2tensor/models/multimodel.py @@ -74,7 +74,7 @@ def residual_fn3(x, y, z, hparams): def conv_experts(xs, hparams, dp, ps, padding, mask, layer_id): """Convolutions + Mixture-of-Experts layer.""" del layer_id # Unused. - train = hparams.mode == tf.contrib.learn.ModeKeys.TRAIN, + train = hparams.mode == tf.estimator.ModeKeys.TRAIN, conv_out = dp(conv_res_step, xs, hparams, padding, mask) loss = 0.0 moe_hidden_sizes = [hparams.filter_size] @@ -109,7 +109,7 @@ def prepare_decoder(targets, target_space_emb): class MultiModel(t2t_model.T2TModel): def model_fn_body_sharded(self, sharded_features): - train = self._hparams.mode == tf.contrib.learn.ModeKeys.TRAIN + train = self._hparams.mode == tf.estimator.ModeKeys.TRAIN dp = self._data_parallelism hparams = self._hparams diff --git a/tensor2tensor/models/multimodel_test.py b/tensor2tensor/models/multimodel_test.py index ab60bae97..3aff41029 100644 --- a/tensor2tensor/models/multimodel_test.py +++ b/tensor2tensor/models/multimodel_test.py @@ -47,7 +47,7 @@ def testMultiModel(self): "target_space_id": tf.constant(1, dtype=tf.int32), } model = multimodel.MultiModel( - hparams, tf.contrib.learn.ModeKeys.TRAIN, p_hparams) + hparams, tf.estimator.ModeKeys.TRAIN, p_hparams) sharded_logits, _ = model.model_fn(features) logits = tf.concat(sharded_logits, 0) session.run(tf.global_variables_initializer()) diff --git a/tensor2tensor/models/neural_gpu_test.py b/tensor2tensor/models/neural_gpu_test.py index b7a1e98f7..164623699 100644 --- a/tensor2tensor/models/neural_gpu_test.py +++ b/tensor2tensor/models/neural_gpu_test.py @@ -50,7 +50,7 @@ def testNeuralGPU(self): "inputs": tf.constant(inputs, dtype=tf.int32), "targets": tf.constant(targets, dtype=tf.int32) } - model = neural_gpu.NeuralGPU(hparams, tf.contrib.learn.ModeKeys.TRAIN, + model = neural_gpu.NeuralGPU(hparams, tf.estimator.ModeKeys.TRAIN, p_hparams) shadred_logits, _ = model.model_fn(features) logits = tf.concat(shadred_logits, 0) diff --git a/tensor2tensor/models/shake_shake.py b/tensor2tensor/models/shake_shake.py index a7b379e11..a4dd2385a 100644 --- a/tensor2tensor/models/shake_shake.py +++ b/tensor2tensor/models/shake_shake.py @@ -64,7 +64,7 @@ def shake_shake_block(x, conv_filters, stride, hparams): skip = downsampling_residual_branch(x, conv_filters) # TODO(rshin): Use different alpha for each image in batch. - if hparams.mode == tf.contrib.learn.ModeKeys.TRAIN: + if hparams.mode == tf.estimator.ModeKeys.TRAIN: if hparams.shakeshake_type == "batch": shaken = common_layers.shakeshake2(branch1, branch2) elif hparams.shakeshake_type == "image": diff --git a/tensor2tensor/models/slicenet_test.py b/tensor2tensor/models/slicenet_test.py index c3a064a85..faf028737 100644 --- a/tensor2tensor/models/slicenet_test.py +++ b/tensor2tensor/models/slicenet_test.py @@ -47,7 +47,7 @@ def testSliceNet(self): "targets": tf.constant(y, dtype=tf.int32), "target_space_id": tf.constant(1, dtype=tf.int32), } - model = slicenet.SliceNet(hparams, tf.contrib.learn.ModeKeys.TRAIN, + model = slicenet.SliceNet(hparams, tf.estimator.ModeKeys.TRAIN, p_hparams) sharded_logits, _ = model.model_fn(features) logits = tf.concat(sharded_logits, 0) diff --git a/tensor2tensor/models/transformer_moe.py b/tensor2tensor/models/transformer_moe.py index 669b1842b..c8a32a667 100644 --- a/tensor2tensor/models/transformer_moe.py +++ b/tensor2tensor/models/transformer_moe.py @@ -91,7 +91,7 @@ def postprocess(x, y): dp, self._ps_devices, preprocess(x), - hparams.mode == tf.contrib.learn.ModeKeys.TRAIN, + hparams.mode == tf.estimator.ModeKeys.TRAIN, input_size=hparams.hidden_size, expert_fn=expert_fn, num_experts=hparams.moe_num_experts, @@ -140,7 +140,7 @@ def postprocess(x, y): dp, self._ps_devices, preprocess(x), - hparams.mode == tf.contrib.learn.ModeKeys.TRAIN, + hparams.mode == tf.estimator.ModeKeys.TRAIN, input_size=hparams.hidden_size, expert_fn=expert_fn, num_experts=hparams.moe_num_experts, diff --git a/tensor2tensor/models/transformer_revnet.py b/tensor2tensor/models/transformer_revnet.py index 942a00660..7275c370a 100644 --- a/tensor2tensor/models/transformer_revnet.py +++ b/tensor2tensor/models/transformer_revnet.py @@ -131,7 +131,7 @@ def g(x): g, num_layers=hparams.num_hidden_layers, f_side_input=[encoder_self_attention_bias], - is_training=hparams.mode == tf.contrib.learn.ModeKeys.TRAIN) + is_training=hparams.mode == tf.estimator.ModeKeys.TRAIN) y = tf.concat([y1, y2], axis=-1) return common_layers.layer_preprocess(y, hparams) @@ -212,7 +212,7 @@ def g(x): decoder_self_attention_bias, encoder_decoder_attention_bias, encoder_output ], - is_training=hparams.mode == tf.contrib.learn.ModeKeys.TRAIN) + is_training=hparams.mode == tf.estimator.ModeKeys.TRAIN) y = tf.concat([y1, y2], axis=-1) return common_layers.layer_preprocess(y, hparams) diff --git a/tensor2tensor/models/transformer_revnet_test.py b/tensor2tensor/models/transformer_revnet_test.py index 66b493b0b..f9bc8cfb2 100644 --- a/tensor2tensor/models/transformer_revnet_test.py +++ b/tensor2tensor/models/transformer_revnet_test.py @@ -59,7 +59,7 @@ def testTransformer(self): "target_space_id": tf.constant(1, dtype=tf.int32), } model = transformer_revnet.TransformerRevnet( - hparams, tf.contrib.learn.ModeKeys.TRAIN, p_hparams) + hparams, tf.estimator.ModeKeys.TRAIN, p_hparams) sharded_logits, _ = model.model_fn(features) logits = tf.concat(sharded_logits, 0) grads = tf.gradients( diff --git a/tensor2tensor/models/transformer_test.py b/tensor2tensor/models/transformer_test.py index 6c0eee203..9e450a670 100644 --- a/tensor2tensor/models/transformer_test.py +++ b/tensor2tensor/models/transformer_test.py @@ -53,7 +53,7 @@ def getModel(self): } return transformer.Transformer( - hparams, tf.contrib.learn.ModeKeys.INFER, p_hparams), features + hparams, tf.estimator.ModeKeys.PREDICT, p_hparams), features def testTransformer(self): model, features = self.getModel() diff --git a/tensor2tensor/models/transformer_vae.py b/tensor2tensor/models/transformer_vae.py index 025f8d631..e3279495a 100644 --- a/tensor2tensor/models/transformer_vae.py +++ b/tensor2tensor/models/transformer_vae.py @@ -244,7 +244,7 @@ def ae_decompress(z, ae, x, is_2d, hparams, name, reuse=None): # Leak at the beginning to help train. z = mix(z, ae, hparams.startup_steps) prob_z = common_layers.inverse_exp_decay(hparams.startup_steps) * 0.8 - prob_z = prob_z if hparams.mode == tf.contrib.learn.ModeKeys.TRAIN else 1.0 + prob_z = prob_z if hparams.mode == tf.estimator.ModeKeys.TRAIN else 1.0 z = tf.cond(tf.less(tf.random_uniform([]), prob_z), lambda: z, lambda: ae) @@ -305,7 +305,7 @@ def ae_transformer_internal(inputs, targets, target_space, hparams): reconstruct_loss = tf.nn.softmax_cross_entropy_with_logits( labels=hot, logits=c_z) # If not training, use the predicted z instead of the autoregressive one. - if hparams.mode == tf.contrib.learn.ModeKeys.INFER: + if hparams.mode == tf.estimator.ModeKeys.PREDICT: hot = tf.one_hot(tf.argmax(c_z, axis=-1), hparams.v_size) # Decompress, pass for ae loss. diff --git a/tensor2tensor/models/xception_test.py b/tensor2tensor/models/xception_test.py index 776d1306a..eb4c6db20 100644 --- a/tensor2tensor/models/xception_test.py +++ b/tensor2tensor/models/xception_test.py @@ -44,7 +44,7 @@ def testXception(self): "targets": tf.constant(y, dtype=tf.int32), } model = xception.Xception( - hparams, tf.contrib.learn.ModeKeys.TRAIN, p_hparams) + hparams, tf.estimator.ModeKeys.TRAIN, p_hparams) sharded_logits, _ = model.model_fn(features) logits = tf.concat(sharded_logits, 0) session.run(tf.global_variables_initializer()) diff --git a/tensor2tensor/utils/data_reader.py b/tensor2tensor/utils/data_reader.py index 681f3598b..f48665078 100644 --- a/tensor2tensor/utils/data_reader.py +++ b/tensor2tensor/utils/data_reader.py @@ -215,7 +215,7 @@ def default_example_reading_spec(data_file_pattern): def read_examples(problem, data_file_pattern, capacity, - mode=tf.contrib.learn.ModeKeys.TRAIN): + mode=tf.estimator.ModeKeys.TRAIN): """Create Dataset of Example for problem and data_file_pattern.""" if problem is None: data_fields, data_items_to_decoders = default_example_reading_spec( @@ -227,7 +227,7 @@ def read_examples(problem, # Create placeholders for input, rather than reading data from disk. return feature_placeholders(data_fields) - is_training = mode == tf.contrib.learn.ModeKeys.TRAIN + is_training = mode == tf.estimator.ModeKeys.TRAIN dataset = examples_reader( [data_file_pattern], data_fields, @@ -245,7 +245,7 @@ def input_pipeline(problem, data_file_pattern, capacity, mode, hparams, problem: Problem instance for which to build the input pipeline. data_file_pattern: file pattern for input files. capacity: int, data pipeline buffer capacity. - mode: tf.contrib.learn.ModeKeys entry. + mode: tf.estimator.ModeKeys entry. hparams: an HParams object. batching_scheme: a dictionary containing "boundaries": a list of integers for the boundaries that will be @@ -256,7 +256,7 @@ def input_pipeline(problem, data_file_pattern, capacity, mode, hparams, Returns: dict <feature name, batched and padded Tensor> """ - is_training = mode == tf.contrib.learn.ModeKeys.TRAIN + is_training = mode == tf.estimator.ModeKeys.TRAIN num_threads = 4 if is_training else 1 with tf.name_scope("input_pipeline"): @@ -505,7 +505,7 @@ def get_data_filepatterns(problems, data_dir, mode): except ValueError: problem, _, _ = problem_hparams.parse_problem_name(problem) path = os.path.join(data_dir, problem) - if mode == tf.contrib.learn.ModeKeys.TRAIN: + if mode == tf.estimator.ModeKeys.TRAIN: datasets.append("%s-train*" % path) else: datasets.append("%s-dev*" % path) diff --git a/tensor2tensor/utils/data_reader_test.py b/tensor2tensor/utils/data_reader_test.py index 991669a99..aed2598c7 100644 --- a/tensor2tensor/utils/data_reader_test.py +++ b/tensor2tensor/utils/data_reader_test.py @@ -70,7 +70,7 @@ def preprocess_examples(self, examples, unused_mode, unused_hparams): def generate_test_data(problem, tmp_dir): problem.generate_data(tmp_dir, tmp_dir) filepatterns = data_reader.get_data_filepatterns( - problem.name, tmp_dir, tf.contrib.learn.ModeKeys.TRAIN) + problem.name, tmp_dir, tf.estimator.ModeKeys.TRAIN) assert tf.gfile.Glob(filepatterns[0]) return filepatterns @@ -115,7 +115,7 @@ def testTrainEvalBehavior(self): self.problem, self.filepatterns[0], 16, - mode=tf.contrib.learn.ModeKeys.EVAL) + mode=tf.estimator.ModeKeys.EVAL) eval_examples = eval_dataset.make_one_shot_iterator().get_next() eval_idxs = [] @@ -243,7 +243,7 @@ def example_len(ex): self.problem, self.filepatterns[0], 32, - mode=tf.contrib.learn.ModeKeys.EVAL) + mode=tf.estimator.ModeKeys.EVAL) dataset = data_reader.bucket_by_sequence_length( dataset, example_len, boundaries, batch_sizes, window_size) diff --git a/tensor2tensor/utils/decoding.py b/tensor2tensor/utils/decoding.py index 3f00c25a9..ea1a5fa01 100644 --- a/tensor2tensor/utils/decoding.py +++ b/tensor2tensor/utils/decoding.py @@ -36,17 +36,20 @@ FLAGS = tf.flags.FLAGS - -def _decode_from_dataset_log_results(inputs, - targets, - outputs, - problem_name, - prediction_idx, - inputs_vocab, - targets_vocab, - save_images=False, - model_dir=None, - identity_output=False): +# Number of samples to draw for an image input (in such cases as captioning) +IMAGE_DECODE_LENGTH = 100 + + +def log_decode_results(inputs, + outputs, + problem_name, + prediction_idx, + inputs_vocab, + targets_vocab, + targets=None, + save_images=False, + model_dir=None, + identity_output=False): """Log inference results.""" if "image" in problem_name and save_images: save_path = os.path.join(model_dir, "%s_prediction_%d.jpg" % @@ -56,17 +59,21 @@ def _decode_from_dataset_log_results(inputs, decoded_inputs = inputs_vocab.decode(_save_until_eos(inputs.flatten())) tf.logging.info("Inference results INPUT: %s" % decoded_inputs) + decoded_targets = None if identity_output: decoded_outputs = "".join(map(str, outputs.flatten())) - decoded_targets = "".join(map(str, targets.flatten())) + if targets is not None: + decoded_targets = "".join(map(str, targets.flatten())) else: decoded_outputs = "".join( map(str, targets_vocab.decode(_save_until_eos(outputs.flatten())))) - decoded_targets = "".join( - map(str, targets_vocab.decode(_save_until_eos(targets.flatten())))) + if targets is not None: + decoded_targets = "".join( + map(str, targets_vocab.decode(_save_until_eos(targets.flatten())))) tf.logging.info("Inference results OUTPUT: %s" % decoded_outputs) - tf.logging.info("Inference results TARGET: %s" % decoded_targets) + if targets is not None: + tf.logging.info("Inference results TARGET: %s" % decoded_targets) return decoded_outputs, decoded_targets @@ -80,22 +87,22 @@ def decode_from_dataset(estimator, identity_output=False): tf.logging.info("Performing local inference from dataset for %s.", str(problem_names)) - hparams = estimator.hparams + hparams = estimator.params for problem_idx, problem_name in enumerate(problem_names): # Build the inference input function infer_problems_data = data_reader.get_data_filepatterns( - problem_name, hparams.data_dir, tf.contrib.learn.ModeKeys.INFER) + problem_name, hparams.data_dir, tf.estimator.ModeKeys.PREDICT) infer_input_fn = input_fn_builder.build_input_fn( - mode=tf.contrib.learn.ModeKeys.INFER, + mode=tf.estimator.ModeKeys.PREDICT, hparams=hparams, data_file_patterns=infer_problems_data, num_datashards=devices.data_parallelism().n, fixed_problem=problem_idx) # Get the predictions as an iterable - predictions = estimator.predict(input_fn=infer_input_fn, as_iterable=True) + predictions = estimator.predict(infer_input_fn) # Prepare output file writers if decode_to_file passed if decode_to_file: @@ -119,16 +126,30 @@ def decode_from_dataset(estimator, output_beams = np.split(outputs, beam_size, axis=0) for i, beam in enumerate(output_beams): tf.logging.info("BEAM %d:" % i) - decoded = _decode_from_dataset_log_results( - inputs, targets, beam, problem_name, num_predictions, - inputs_vocab, targets_vocab, save_images, estimator.model_dir, - identity_output) + decoded = log_decode_results( + inputs, + beam, + problem_name, + num_predictions, + inputs_vocab, + targets_vocab, + save_images=save_images, + model_dir=estimator.model_dir, + identity_output=identity_output, + targets=targets) decoded_outputs.append(decoded) else: - decoded = _decode_from_dataset_log_results( - inputs, targets, outputs, problem_name, num_predictions, - inputs_vocab, targets_vocab, save_images, estimator.model_dir, - identity_output) + decoded = log_decode_results( + inputs, + outputs, + problem_name, + num_predictions, + inputs_vocab, + targets_vocab, + save_images=save_images, + model_dir=estimator.model_dir, + identity_output=identity_output, + targets=targets) decoded_outputs.append(decoded) # Write out predictions if decode_to_file passed @@ -149,43 +170,40 @@ def decode_from_dataset(estimator, def decode_from_file(estimator, filename): """Compute predictions on entries in filename and write them out.""" - hparams = estimator.hparams + hparams = estimator.params problem_id = FLAGS.decode_problem_id inputs_vocab = hparams.problems[problem_id].vocabulary["inputs"] targets_vocab = hparams.problems[problem_id].vocabulary["targets"] + problem_name = FLAGS.problems.split("-")[problem_id] tf.logging.info("Performing decoding from a file.") sorted_inputs, sorted_keys = _get_sorted_inputs(filename) num_decode_batches = (len(sorted_inputs) - 1) // FLAGS.decode_batch_size + 1 - input_fn = _decode_batch_input_fn(problem_id, num_decode_batches, - sorted_inputs, inputs_vocab) - decodes = [] - for _ in range(num_decode_batches): - result_iter = estimator.predict( - input_fn=input_fn.next if six.PY2 else input_fn.__next__, - as_iterable=True) - for result in result_iter: - - def log_fn(inputs, outputs): - decoded_inputs = inputs_vocab.decode(_save_until_eos(inputs.flatten())) - tf.logging.info("Inference results INPUT: %s" % decoded_inputs) - - decoded_outputs = targets_vocab.decode( - _save_until_eos(outputs.flatten())) - tf.logging.info("Inference results OUTPUT: %s" % decoded_outputs) - return decoded_outputs - - if FLAGS.decode_return_beams: - beam_decodes = [] - output_beams = np.split( - result["outputs"], FLAGS.decode_beam_size, axis=0) - for k, beam in enumerate(output_beams): - tf.logging.info("BEAM %d:" % k) - beam_decodes.append(log_fn(result["inputs"], beam)) - decodes.append("\t".join(beam_decodes)) + def input_fn(): + input_gen = _decode_batch_input_fn(problem_id, num_decode_batches, + sorted_inputs, inputs_vocab) + gen_fn = make_input_fn_from_generator(input_gen) + example = gen_fn() + return _decode_input_tensor_to_features_dict(example, hparams) - else: - decodes.append(log_fn(result["inputs"], result["outputs"])) + decodes = [] + result_iter = estimator.predict(input_fn) + for result in result_iter: + if FLAGS.decode_return_beams: + beam_decodes = [] + output_beams = np.split(result["outputs"], FLAGS.decode_beam_size, axis=0) + for k, beam in enumerate(output_beams): + tf.logging.info("BEAM %d:" % k) + decoded_outputs, _ = log_decode_results(result["inputs"], beam, + problem_name, None, + inputs_vocab, targets_vocab) + beam_decodes.append(decoded_outputs) + decodes.append("\t".join(beam_decodes)) + else: + decoded_outputs, _ = log_decode_results(result["inputs"], + result["outputs"], problem_name, + None, inputs_vocab, targets_vocab) + decodes.append(decoded_outputs) # Reversing the decoded inputs and outputs because they were reversed in # _decode_batch_input_fn @@ -210,33 +228,63 @@ def log_fn(inputs, outputs): outfile.write("%s\n" % (decodes[sorted_keys[index]])) -def decode_interactively(estimator): - hparams = estimator.hparams +def make_input_fn_from_generator(gen): + """Use py_func to yield elements from the given generator.""" + first_ex = six.next(gen) + flattened = tf.contrib.framework.nest.flatten(first_ex) + types = [t.dtype for t in flattened] + shapes = [[None] * len(t.shape) for t in flattened] + first_ex_list = [first_ex] + + def py_func(): + if first_ex_list: + example = first_ex_list.pop() + else: + example = six.next(gen) + return tf.contrib.framework.nest.flatten(example) - infer_input_fn = _interactive_input_fn(hparams) - for problem_idx, example in infer_input_fn: + def input_fn(): + flat_example = tf.py_func(py_func, [], types) + _ = [t.set_shape(shape) for t, shape in zip(flat_example, shapes)] + example = tf.contrib.framework.nest.pack_sequence_as(first_ex, flat_example) + return example + + return input_fn + + +def decode_interactively(estimator): + """Interactive decoding.""" + hparams = estimator.params + + def input_fn(): + gen_fn = make_input_fn_from_generator(_interactive_input_fn(hparams)) + example = gen_fn() + example = _interactive_input_tensor_to_features_dict(example, hparams) + return example + + result_iter = estimator.predict(input_fn) + for result in result_iter: + problem_idx = result["problem_choice"] targets_vocab = hparams.problems[problem_idx].vocabulary["targets"] - result_iter = estimator.predict(input_fn=lambda e=example: e) - for result in result_iter: - if FLAGS.decode_return_beams: - beams = np.split(result["outputs"], FLAGS.decode_beam_size, axis=0) - scores = None - if "scores" in result: - scores = np.split(result["scores"], FLAGS.decode_beam_size, axis=0) - for k, beam in enumerate(beams): - tf.logging.info("BEAM %d:" % k) - beam_string = targets_vocab.decode(_save_until_eos(beam.flatten())) - if scores is not None: - tf.logging.info("%s\tScore:%f" % (beam_string, scores[k])) - else: - tf.logging.info(beam_string) - else: - if FLAGS.identity_output: - tf.logging.info(" ".join(map(str, result["outputs"].flatten()))) + + if FLAGS.decode_return_beams: + beams = np.split(result["outputs"], FLAGS.decode_beam_size, axis=0) + scores = None + if "scores" in result: + scores = np.split(result["scores"], FLAGS.decode_beam_size, axis=0) + for k, beam in enumerate(beams): + tf.logging.info("BEAM %d:" % k) + beam_string = targets_vocab.decode(_save_until_eos(beam.flatten())) + if scores is not None: + tf.logging.info("%s\tScore:%f" % (beam_string, scores[k])) else: - tf.logging.info( - targets_vocab.decode( - _save_until_eos(result["outputs"].flatten()))) + tf.logging.info(beam_string) + else: + if FLAGS.identity_output: + tf.logging.info(" ".join(map(str, result["outputs"].flatten()))) + else: + tf.logging.info( + targets_vocab.decode(_save_until_eos(result["outputs"].flatten()))) def _decode_batch_input_fn(problem_id, num_decode_batches, sorted_inputs, @@ -264,9 +312,10 @@ def _decode_batch_input_fn(problem_id, num_decode_batches, sorted_inputs, assert len(input_ids) <= batch_length x = input_ids + [0] * (batch_length - len(input_ids)) final_batch_inputs.append(x) + yield { - "inputs": np.array(final_batch_inputs), - "problem_choice": np.array(problem_id) + "inputs": np.array(final_batch_inputs).astype(np.int32), + "problem_choice": np.array(problem_id).astype(np.int32), } @@ -277,7 +326,7 @@ def _interactive_input_fn(hparams): whole graph, then we are stuck encoding all of the input as one fixed-size numpy array. - We yield int64 arrays with shape [const_array_size]. The format is: + We yield int32 arrays with shape [const_array_size]. The format is: [num_samples, decode_length, len(input ids), <input ids>, <padding>] Args: @@ -288,7 +337,7 @@ def _interactive_input_fn(hparams): Raises: Exception: when `input_type` is invalid. """ - num_samples = 3 + num_samples = 1 decode_length = 100 input_type = "text" problem_id = 0 @@ -304,12 +353,13 @@ def _interactive_input_fn(hparams): pass while True: prompt = ("INTERACTIVE MODE num_samples=%d decode_length=%d \n" - " it=<input_type> ('text' or 'image' or 'label')\n" - " pr=<problem_num> (set the problem number)\n" + " it=<input_type> ('text' or 'image' or 'label', default: " + "text)\n" + " pr=<problem_num> (set the problem number, default: 0)\n" " in=<input_problem> (set the input problem number)\n" " ou=<output_problem> (set the output problem number)\n" - " ns=<num_samples> (changes number of samples)\n" - " dl=<decode_length> (changes decode length)\n" + " ns=<num_samples> (changes number of samples, default: 1)\n" + " dl=<decode_length> (changes decode length, default: 100)\n" " <%s> (decode)\n" " q (quit)\n" ">" % (num_samples, decode_length, "source_string" @@ -344,23 +394,23 @@ def _interactive_input_fn(hparams): x = [num_samples, decode_length, len(input_ids)] + input_ids assert len(x) < const_array_size x += [0] * (const_array_size - len(x)) - yield problem_id, { - "inputs": np.array(x), - "problem_choice": np.array(problem_id) + yield { + "inputs": np.array(x).astype(np.int32), + "problem_choice": np.array(problem_id).astype(np.int32) } elif input_type == "image": input_path = input_string img = read_image(input_path) - yield problem_id, { - "inputs": img, - "problem_choice": np.array(problem_id) + yield { + "inputs": img.astype(np.int32), + "problem_choice": np.array(problem_id).astype(np.int32) } elif input_type == "label": input_ids = [int(input_string)] x = [num_samples, decode_length, len(input_ids)] + input_ids - yield problem_id, { - "inputs": np.array(x), - "problem_choice": np.array(problem_id) + yield { + "inputs": np.array(x).astype(np.int32), + "problem_choice": np.array(problem_id).astype(np.int32) } else: raise Exception("Unsupported input type.") @@ -423,3 +473,85 @@ def _save_until_eos(hyp): except ValueError: # No EOS_ID: return the array as-is. return hyp + + +def _interactive_input_tensor_to_features_dict(feature_map, hparams): + """Convert the interactive input format (see above) to a dictionary. + + Args: + feature_map: a dictionary with keys `problem_choice` and `input` containing + Tensors. + hparams: model hyperparameters + + Returns: + a features dictionary, as expected by the decoder. + """ + inputs = tf.convert_to_tensor(feature_map["inputs"]) + input_is_image = False if len(inputs.get_shape()) < 3 else True + + def input_fn(problem_choice, x=inputs): # pylint: disable=missing-docstring + if input_is_image: + x = tf.image.resize_images(x, [299, 299]) + x = tf.reshape(x, [1, 299, 299, -1]) + x = tf.to_int32(x) + else: + # Remove the batch dimension. + num_samples = x[0] + length = x[2] + x = tf.slice(x, [3], tf.to_int32([length])) + x = tf.reshape(x, [1, -1, 1, 1]) + # Transform into a batch of size num_samples to get that many random + # decodes. + x = tf.tile(x, tf.to_int32([num_samples, 1, 1, 1])) + + p_hparams = hparams.problems[problem_choice] + return (tf.constant(p_hparams.input_space_id), + tf.constant(p_hparams.target_space_id), x) + + input_space_id, target_space_id, x = input_fn_builder.cond_on_index( + input_fn, feature_map["problem_choice"], 0, len(hparams.problems) - 1) + + features = {} + features["problem_choice"] = tf.convert_to_tensor( + feature_map["problem_choice"]) + features["input_space_id"] = input_space_id + features["target_space_id"] = target_space_id + features["decode_length"] = (IMAGE_DECODE_LENGTH + if input_is_image else inputs[1]) + features["inputs"] = x + return features + + +def _decode_input_tensor_to_features_dict(feature_map, hparams): + """Convert the interactive input format (see above) to a dictionary. + + Args: + feature_map: a dictionary with keys `problem_choice` and `input` containing + Tensors. + hparams: model hyperparameters + + Returns: + a features dictionary, as expected by the decoder. + """ + inputs = tf.convert_to_tensor(feature_map["inputs"]) + input_is_image = False + + def input_fn(problem_choice, x=inputs): # pylint: disable=missing-docstring + p_hparams = hparams.problems[problem_choice] + # Add a third empty dimension dimension + x = tf.expand_dims(x, axis=[2]) + x = tf.to_int32(x) + return (tf.constant(p_hparams.input_space_id), + tf.constant(p_hparams.target_space_id), x) + + input_space_id, target_space_id, x = input_fn_builder.cond_on_index( + input_fn, feature_map["problem_choice"], 0, len(hparams.problems) - 1) + + features = {} + features["problem_choice"] = feature_map["problem_choice"] + features["input_space_id"] = input_space_id + features["target_space_id"] = target_space_id + features["decode_length"] = (IMAGE_DECODE_LENGTH + if input_is_image else tf.shape(x)[1] + 50) + features["inputs"] = x + return features diff --git a/tensor2tensor/utils/input_fn_builder.py b/tensor2tensor/utils/input_fn_builder.py index bef95d58f..abec8d4ad 100644 --- a/tensor2tensor/utils/input_fn_builder.py +++ b/tensor2tensor/utils/input_fn_builder.py @@ -47,7 +47,7 @@ def build_input_fn(mode, evaluation, and testing prediction. Args: - mode: The execution mode, as defined in tf.contrib.learn.ModeKeys. + mode: The execution mode, as defined in tf.estimator.ModeKeys. hparams: HParams object. data_file_patterns: The list of file patterns to use to read in data. Set to `None` if you want to create a placeholder for the input data. The @@ -98,7 +98,7 @@ def input_fn(): data_reader.hparams_to_batching_scheme( hparams, shard_multiplier=num_datashards, - drop_long_sequences=(mode == tf.contrib.learn.ModeKeys.TRAIN + drop_long_sequences=(mode == tf.estimator.ModeKeys.TRAIN or hparams.eval_drop_long_sequences), length_multiplier=(p_hparams.batch_size_multiplier))) @@ -137,7 +137,7 @@ def input_fn(): trainable=False)) if fixed_problem is None: if (hparams.problem_choice == "uniform" or - mode != tf.contrib.learn.ModeKeys.TRAIN): + mode != tf.estimator.ModeKeys.TRAIN): problem_choice = tf.random_uniform( [], maxval=problem_count, dtype=tf.int32) elif hparams.problem_choice == "adaptive": @@ -169,7 +169,7 @@ def input_fn(): inp_id.set_shape([]) tgt_id.set_shape([]) # Forced shape obfuscation is necessary for inference. - if mode == tf.contrib.learn.ModeKeys.INFER: + if mode == tf.estimator.ModeKeys.PREDICT: rand_inputs._shape = tf.TensorShape([None, None, None, None]) # pylint: disable=protected-access rand_target._shape = tf.TensorShape([None, None, None, None]) # pylint: disable=protected-access @@ -180,15 +180,14 @@ def input_fn(): "input_space_id": inp_id, "target_space_id": tgt_id } - if mode == tf.contrib.learn.ModeKeys.INFER: + if mode == tf.estimator.ModeKeys.PREDICT: rand_feature_map["infer_targets"] = rand_target rand_target = None - # This is because of a bug in the tf.contrib.learn Estimator that - # short-circuits prediction if it doesn't see a QueueRunner. - # DummyQueueRunner implements the minimal expected interface but does - # nothing. - # TODO(rsepassi): Remove once we move to core Estimator. + # This is because of a bug in the Estimator that short-circuits prediction + # if it doesn't see a QueueRunner. DummyQueueRunner implements the + # minimal expected interface but does nothing. tf.add_to_collection(tf.GraphKeys.QUEUE_RUNNERS, DummyQueueRunner()) + return rand_feature_map, rand_target return input_fn diff --git a/tensor2tensor/utils/metrics.py b/tensor2tensor/utils/metrics.py index baff66669..5bfad5338 100644 --- a/tensor2tensor/utils/metrics.py +++ b/tensor2tensor/utils/metrics.py @@ -20,8 +20,6 @@ # Dependency imports -import six - from tensor2tensor.layers import common_layers from tensor2tensor.utils import bleu_hook from tensor2tensor.utils import rouge @@ -197,6 +195,8 @@ def create_evaluation_metrics(problems, model_hparams): model_hparams: a set of hparams. Returns: + Dict <metric name, metric function>. The metric functions have signature + (predictions, labels, problem_choice) -> (metric Tensor, update op). A dictionary with keys that are strings naming the evaluation metrics and values that are functions taking arguments of (predictions, targets), returning a tuple of a tensor of the @@ -210,8 +210,7 @@ def create_evaluation_metrics(problems, model_hparams): def make_problem_specific_metric_fn(metric_fn, problem_idx, weights_fn): """Create a metric fn conditioned on problem_idx.""" - def problem_metric_fn(predictions, labels, weights): - problem_choice = weights + def problem_metric_fn(predictions, labels, problem_choice): (scores, weights) = tf.cond( tf.equal(problem_idx, problem_choice), lambda: metric_fn(predictions, labels, weights_fn=weights_fn), @@ -258,11 +257,7 @@ def problem_metric_fn(predictions, labels, weights): metric_fn, problem_idx, weights_fn) eval_metrics["metrics-%s/%s" % (problem_name, metric)] = problem_metric_fn - return { - k: tf.contrib.learn.MetricSpec( - v, prediction_key="predictions", weight_key="problem_choice") - for (k, v) in six.iteritems(eval_metrics) - } + return eval_metrics # Metrics are functions that take predictions and labels and return diff --git a/tensor2tensor/utils/model_builder.py b/tensor2tensor/utils/model_builder.py index 34af6c827..21ef96b28 100644 --- a/tensor2tensor/utils/model_builder.py +++ b/tensor2tensor/utils/model_builder.py @@ -33,6 +33,7 @@ from tensor2tensor.models import models # pylint: disable=unused-import from tensor2tensor.utils import devices from tensor2tensor.utils import input_fn_builder +from tensor2tensor.utils import metrics from tensor2tensor.utils import registry from tensor2tensor.utils import yellowfin @@ -42,9 +43,6 @@ # TODO(rsepassi): Rm dep on FLAGS here FLAGS = tf.flags.FLAGS -# Number of samples to draw for an image input (in such cases as captioning) -IMAGE_DECODE_LENGTH = 100 - def log_variable_sizes(var_list, tag): """Log the sizes and shapes of variables, and the total size. @@ -64,90 +62,30 @@ def log_variable_sizes(var_list, tag): tf.logging.info("%s Total size: %d", tag, total_size) -def build_model_fn(model, hparams): +def build_model_fn(model): """Returns a function to build the model. Args: model: The name of the model to use. - hparams: The hyperparameters. Returns: A function to build the model's graph. This function is called by the Estimator object to construct the graph. """ - def initializer(): - if hparams.initializer == "orthogonal": - return tf.orthogonal_initializer(gain=hparams.initializer_gain) - elif hparams.initializer == "uniform": - max_val = 0.1 * hparams.initializer_gain - return tf.random_uniform_initializer(-max_val, max_val) - elif hparams.initializer == "normal_unit_scaling": - return init_ops.variance_scaling_initializer( - hparams.initializer_gain, mode="fan_avg", distribution="normal") - elif hparams.initializer == "uniform_unit_scaling": - return init_ops.variance_scaling_initializer( - hparams.initializer_gain, mode="fan_avg", distribution="uniform") - else: - raise ValueError("Unrecognized initializer: %s" % hparams.initializer) - - def learning_rate_decay(): - """Inverse-decay learning rate until warmup_steps, then decay.""" - warmup_steps = tf.to_float( - hparams.learning_rate_warmup_steps * FLAGS.worker_replicas) - step = tf.to_float(tf.contrib.framework.get_global_step()) - if hparams.learning_rate_decay_scheme == "noam": - return 5000.0 * hparams.hidden_size**-0.5 * tf.minimum( - (step + 1) * warmup_steps**-1.5, (step + 1)**-0.5) - elif hparams.learning_rate_decay_scheme == "exp100k": - return 0.94**(step // 100000) - elif hparams.learning_rate_decay_scheme == "cosine": - cycle_steps = hparams.learning_rate_cosine_cycle_steps - return 0.5 * (1 + tf.cos(np.pi * (step % cycle_steps) / cycle_steps)) - elif hparams.learning_rate_decay_scheme == "cyclelinear10x": - # Cycle the rate linearly by 10x every warmup_steps, up and down. - cycle_steps = hparams.learning_rate_warmup_steps - cycle_position = step % (2 * cycle_steps) - cycle_position = tf.to_float( # Normalize to the interval [-1, 1]. - cycle_position - cycle_steps) / float(cycle_steps) - cycle_position = 1.0 - tf.abs(cycle_position) # 0 to 1 and back to 0. - return (cycle_position + 0.1) * 3.0 # 10x difference each cycle (0.3-3). - - inv_base = tf.exp(tf.log(0.01) / warmup_steps) - inv_decay = inv_base**(warmup_steps - step) - if hparams.learning_rate_decay_scheme == "sqrt": - decay = _sqrt_decay(step - warmup_steps) - elif hparams.learning_rate_decay_scheme == "exp10k": - decay = _exp_decay_after(step - warmup_steps, 0.9995, - FLAGS.train_steps - warmup_steps - 10000) - elif hparams.learning_rate_decay_scheme == "exp50k": - decay = _exp_decay_after(step - warmup_steps, 0.99995, - FLAGS.train_steps - warmup_steps - 50000) - elif hparams.learning_rate_decay_scheme == "exp500k": - decay = _exp_decay_after(step - warmup_steps, 0.9999955, - FLAGS.train_steps - warmup_steps - 500000) - elif hparams.learning_rate_decay_scheme == "none": - decay = tf.constant(1.0) - else: - raise ValueError("Unrecognized learning rate decay scheme: %s" % - hparams.learning_rate_decay_scheme) - return tf.cond( - step < warmup_steps, - lambda: inv_decay, - lambda: decay, - name="learning_rate_decay_warump_cond") - - def model_fn(features, targets, mode): + def model_fn(features, labels, mode, params): """Creates the prediction, loss, and train ops. Args: features: A dictionary of tensors keyed by the feature name. - targets: A tensor representing the labels (targets). - mode: The execution mode, as defined in tf.contrib.learn.ModeKeys. + labels: A tensor representing the labels. + mode: The execution mode, as defined in tf.estimator.ModeKeys. + params: model HParams. Returns: - A tuple consisting of the prediction, loss, and train_op. + An EstimatorSpec. """ + hparams = params # Deep-copy the model hparams between modes to eliminate # side-effects caused by abuse of the linked problem_hparams # objects which are used to share modality objects between @@ -159,19 +97,76 @@ def model_fn(features, targets, mode): # could be created once per mode and passed to the constructor of # t2t_model. my_hp = copy.deepcopy(hparams) - if mode == tf.contrib.learn.ModeKeys.INFER: - if FLAGS.decode_interactive: - features = _interactive_input_tensor_to_features_dict(features, my_hp) - elif FLAGS.decode_from_file: - features = _decode_input_tensor_to_features_dict(features, my_hp) - if targets is not None: - features["targets"] = targets + def initializer(): + if hparams.initializer == "orthogonal": + return tf.orthogonal_initializer(gain=hparams.initializer_gain) + elif hparams.initializer == "uniform": + max_val = 0.1 * hparams.initializer_gain + return tf.random_uniform_initializer(-max_val, max_val) + elif hparams.initializer == "normal_unit_scaling": + return init_ops.variance_scaling_initializer( + hparams.initializer_gain, mode="fan_avg", distribution="normal") + elif hparams.initializer == "uniform_unit_scaling": + return init_ops.variance_scaling_initializer( + hparams.initializer_gain, mode="fan_avg", distribution="uniform") + else: + raise ValueError("Unrecognized initializer: %s" % hparams.initializer) + + def learning_rate_decay(): + """Inverse-decay learning rate until warmup_steps, then decay.""" + warmup_steps = tf.to_float( + hparams.learning_rate_warmup_steps * FLAGS.worker_replicas) + step = tf.to_float(tf.contrib.framework.get_global_step()) + if hparams.learning_rate_decay_scheme == "noam": + return 5000.0 * hparams.hidden_size**-0.5 * tf.minimum( + (step + 1) * warmup_steps**-1.5, (step + 1)**-0.5) + elif hparams.learning_rate_decay_scheme == "exp100k": + return 0.94**(step // 100000) + elif hparams.learning_rate_decay_scheme == "cosine": + cycle_steps = hparams.learning_rate_cosine_cycle_steps + return 0.5 * (1 + tf.cos(np.pi * (step % cycle_steps) / cycle_steps)) + elif hparams.learning_rate_decay_scheme == "cyclelinear10x": + # Cycle the rate linearly by 10x every warmup_steps, up and down. + cycle_steps = hparams.learning_rate_warmup_steps + cycle_position = step % (2 * cycle_steps) + cycle_position = tf.to_float( # Normalize to the interval [-1, 1]. + cycle_position - cycle_steps) / float(cycle_steps) + cycle_position = 1.0 - tf.abs(cycle_position) # 0 to 1 and back to 0. + return ( + cycle_position + 0.1) * 3.0 # 10x difference each cycle (0.3-3). + + inv_base = tf.exp(tf.log(0.01) / warmup_steps) + inv_decay = inv_base**(warmup_steps - step) + if hparams.learning_rate_decay_scheme == "sqrt": + decay = _sqrt_decay(step - warmup_steps) + elif hparams.learning_rate_decay_scheme == "exp10k": + decay = _exp_decay_after(step - warmup_steps, 0.9995, + FLAGS.train_steps - warmup_steps - 10000) + elif hparams.learning_rate_decay_scheme == "exp50k": + decay = _exp_decay_after(step - warmup_steps, 0.99995, + FLAGS.train_steps - warmup_steps - 50000) + elif hparams.learning_rate_decay_scheme == "exp500k": + decay = _exp_decay_after(step - warmup_steps, 0.9999955, + FLAGS.train_steps - warmup_steps - 500000) + elif hparams.learning_rate_decay_scheme == "none": + decay = tf.constant(1.0) + else: + raise ValueError("Unrecognized learning rate decay scheme: %s" % + hparams.learning_rate_decay_scheme) + return tf.cond( + step < warmup_steps, + lambda: inv_decay, + lambda: decay, + name="learning_rate_decay_warump_cond") + + if labels is not None: + features["targets"] = labels dp = devices.data_parallelism() tf.get_variable_scope().set_initializer(initializer()) - is_training = mode == tf.contrib.learn.ModeKeys.TRAIN + is_training = mode == tf.estimator.ModeKeys.TRAIN # Add input statistics for incoming features. with tf.name_scope("input_stats"): @@ -218,7 +213,7 @@ def nth_model(n): n, dp, devices.ps_devices(all_workers=True)) - if mode == tf.contrib.learn.ModeKeys.INFER: + if mode == tf.estimator.ModeKeys.PREDICT: return model_class.infer( features, beam_size=FLAGS.decode_beam_size, @@ -235,7 +230,7 @@ def nth_model(n): # TODO(lukaszkaiser): why is this hack needed for variables init? Repair. skip_this_one = skip_this_one and (FLAGS.worker_id != 0 or n > 1) if (FLAGS.eval_run_autoregressive and - mode == tf.contrib.learn.ModeKeys.EVAL): + mode == tf.estimator.ModeKeys.EVAL): sharded_logits, losses_dict = model_class.eval_autoregressive(features) else: sharded_logits, losses_dict = model_class.model_fn( @@ -272,36 +267,50 @@ def nth_model(n): features["problem_choice"], 0, len(my_hp.problems) - 1) - if mode == tf.contrib.learn.ModeKeys.INFER: + if mode == tf.estimator.ModeKeys.PREDICT: # Beam search in sequence model returns both decodes withe key "outputs" # and scores with they key "scores". If return list is a dict, we expect # that it will have keys "outputs", a tensor of int32 and scores, a # tensor of floats. This is useful if we want to return scores from # estimator.predict if not isinstance(result_list, dict): - ret = {"outputs": result_list}, None, None + predictions = {"outputs": result_list} else: - ret = { + predictions = { "outputs": result_list["outputs"], "scores": result_list["scores"] - }, None, None + } + if "inputs" in features: - ret[0]["inputs"] = features["inputs"] + predictions["inputs"] = features["inputs"] if "infer_targets" in features: - ret[0]["targets"] = features["infer_targets"] - return ret + predictions["targets"] = features["infer_targets"] + predictions["problem_choice"] = (features["problem_choice"] * tf.ones( + (tf.shape(features["inputs"])[0],), dtype=tf.int32)) + + return tf.estimator.EstimatorSpec(mode, predictions=predictions) sharded_logits, total_loss = result_list[1:], result_list[0] - if mode == tf.contrib.learn.ModeKeys.EVAL: + if mode == tf.estimator.ModeKeys.EVAL: # For evaluation, return the logits layer as our predictions. logits = tf.concat(sharded_logits, 0) - ret = { - "predictions": logits, - "problem_choice": features["problem_choice"], - } - return ret, total_loss, None - assert mode == tf.contrib.learn.ModeKeys.TRAIN + eval_metrics_fns = metrics.create_evaluation_metrics( + zip(FLAGS.problems.split("-"), hparams.problem_instances), hparams) + _check_autotune_metrics(eval_metrics_fns) + + eval_metrics = {} + for metric_name, metric_fn in six.iteritems(eval_metrics_fns): + eval_metrics[metric_name] = metric_fn(logits, labels, + features["problem_choice"]) + + return tf.estimator.EstimatorSpec( + mode, + predictions={"predictions": logits}, + eval_metric_ops=eval_metrics, + loss=total_loss) + + assert mode == tf.estimator.ModeKeys.TRAIN # Some training statistics. with tf.name_scope("training_stats"): @@ -381,7 +390,11 @@ def nth_model(n): del summaries[i] tf.logging.info("Global model_fn finished.") - return {"problem_choice": features["problem_choice"]}, total_loss, train_op + return tf.estimator.EstimatorSpec( + mode, + predictions={"problem_choice": features["problem_choice"]}, + loss=total_loss, + train_op=train_op) return model_fn @@ -431,81 +444,8 @@ def _exp_decay_after(step, rate, from_which_step): name="exponential_decay_step_cond") -def _interactive_input_tensor_to_features_dict(feature_map, hparams): - """Convert the interactive input format (see above) to a dictionary. - - Args: - feature_map: a dictionary with keys `problem_choice` and `input` containing - Tensors. - hparams: model hyperparameters - - Returns: - a features dictionary, as expected by the decoder. - """ - inputs = tf.constant(feature_map["inputs"]) - input_is_image = False if len(inputs.shape) < 3 else True - - def input_fn(problem_choice, x=inputs): # pylint: disable=missing-docstring - p_hparams = hparams.problems[problem_choice] - if not input_is_image: - # Remove the batch dimension. - num_samples = x[0] - length = x[2] - x = tf.slice(x, [3], tf.to_int32([length])) - x = tf.reshape(x, [1, -1, 1, 1]) - # Transform into a batch of size num_samples to get that many random - # decodes. - x = tf.tile(x, tf.to_int32([num_samples, 1, 1, 1])) - else: - x = tf.image.resize_images(x, [299, 299]) - x = tf.reshape(x, [1, 299, 299, -1]) - x = tf.to_int32(x) - return (tf.constant(p_hparams.input_space_id), - tf.constant(p_hparams.target_space_id), x) - - input_space_id, target_space_id, x = input_fn_builder.cond_on_index( - input_fn, feature_map["problem_choice"], 0, len(hparams.problems) - 1) - - features = {} - features["problem_choice"] = tf.constant(feature_map["problem_choice"]) - features["input_space_id"] = input_space_id - features["target_space_id"] = target_space_id - features["decode_length"] = (IMAGE_DECODE_LENGTH - if input_is_image else inputs[1]) - features["inputs"] = x - return features - - -def _decode_input_tensor_to_features_dict(feature_map, hparams): - """Convert the interactive input format (see above) to a dictionary. - - Args: - feature_map: a dictionary with keys `problem_choice` and `input` containing - Tensors. - hparams: model hyperparameters - - Returns: - a features dictionary, as expected by the decoder. - """ - inputs = tf.constant(feature_map["inputs"]) - input_is_image = False - - def input_fn(problem_choice, x=inputs): # pylint: disable=missing-docstring - p_hparams = hparams.problems[problem_choice] - # Add a third empty dimension dimension - x = tf.expand_dims(x, axis=[2]) - x = tf.to_int32(x) - return (tf.constant(p_hparams.input_space_id), - tf.constant(p_hparams.target_space_id), x) - - input_space_id, target_space_id, x = input_fn_builder.cond_on_index( - input_fn, feature_map["problem_choice"], 0, len(hparams.problems) - 1) - - features = {} - features["problem_choice"] = feature_map["problem_choice"] - features["input_space_id"] = input_space_id - features["target_space_id"] = target_space_id - features["decode_length"] = (IMAGE_DECODE_LENGTH - if input_is_image else tf.shape(x)[1] + 50) - features["inputs"] = x - return features +def _check_autotune_metrics(metrics_dict): + if (hasattr(FLAGS, "autotune") and FLAGS.autotune and + FLAGS.objective not in metrics_dict): + raise ValueError("Tuning objective %s not among evaluation metrics %s" % + (FLAGS.objective, metrics_dict.keys())) diff --git a/tensor2tensor/utils/t2t_model.py b/tensor2tensor/utils/t2t_model.py index d3fc6dac1..32627f7e3 100644 --- a/tensor2tensor/utils/t2t_model.py +++ b/tensor2tensor/utils/t2t_model.py @@ -69,7 +69,7 @@ def __init__(self, Args: hparams: a hyperparameters object. - mode: The execution mode, as defined in tf.contrib.learn.ModeKeys. + mode: The execution mode, as defined in tf.estimator.ModeKeys. problem_hparams: a hyperparameters object. problem_idx: an integer. data_parallelism: a expert_utils.parallelism @@ -86,7 +86,7 @@ def __init__(self, hparams = copy.copy(hparams) hparams.add_hparam("mode", mode) # When not in training mode, set all forms of dropout to zero. - if mode != tf.contrib.learn.ModeKeys.TRAIN: + if mode != tf.estimator.ModeKeys.TRAIN: for key in hparams.values(): if key[-len("dropout"):] == "dropout": setattr(hparams, key, 0.0) diff --git a/tensor2tensor/utils/trainer_utils.py b/tensor2tensor/utils/trainer_utils.py index 3248d9ca9..83db7c007 100644 --- a/tensor2tensor/utils/trainer_utils.py +++ b/tensor2tensor/utils/trainer_utils.py @@ -30,7 +30,6 @@ from tensor2tensor.utils import data_reader from tensor2tensor.utils import devices from tensor2tensor.utils import input_fn_builder -from tensor2tensor.utils import metrics from tensor2tensor.utils import model_builder from tensor2tensor.utils import registry @@ -155,12 +154,6 @@ def create_experiment(output_dir, data_dir, model_name, train_steps, output_dir=output_dir, data_dir=data_dir, model_name=model_name) - eval_metrics = metrics.create_evaluation_metrics( - zip(FLAGS.problems.split("-"), hparams.problem_instances), hparams) - if (hasattr(FLAGS, "autotune") and FLAGS.autotune and - FLAGS.objective not in eval_metrics): - raise ValueError("Tuning objective %s not among evaluation metrics %s" % - (FLAGS.objective, eval_metrics.keys())) train_monitors = [] eval_hooks = [] if FLAGS.tfdbg: @@ -169,9 +162,8 @@ def create_experiment(output_dir, data_dir, model_name, train_steps, eval_hooks.append(hook) return tf.contrib.learn.Experiment( estimator=estimator, - train_input_fn=input_fns[tf.contrib.learn.ModeKeys.TRAIN], - eval_input_fn=input_fns[tf.contrib.learn.ModeKeys.EVAL], - eval_metrics=eval_metrics, + train_input_fn=input_fns[tf.estimator.ModeKeys.TRAIN], + eval_input_fn=input_fns[tf.estimator.ModeKeys.EVAL], train_steps=train_steps, eval_steps=eval_steps, min_eval_frequency=FLAGS.local_eval_frequency, @@ -185,39 +177,37 @@ def create_experiment_components(hparams, output_dir, data_dir, model_name): num_datashards = devices.data_parallelism().n train_input_fn = input_fn_builder.build_input_fn( - mode=tf.contrib.learn.ModeKeys.TRAIN, + mode=tf.estimator.ModeKeys.TRAIN, hparams=hparams, data_file_patterns=get_data_filepatterns(data_dir, - tf.contrib.learn.ModeKeys.TRAIN), + tf.estimator.ModeKeys.TRAIN), num_datashards=num_datashards, worker_replicas=FLAGS.worker_replicas, worker_id=FLAGS.worker_id) eval_input_fn = input_fn_builder.build_input_fn( - mode=tf.contrib.learn.ModeKeys.EVAL, + mode=tf.estimator.ModeKeys.EVAL, hparams=hparams, data_file_patterns=get_data_filepatterns(data_dir, - tf.contrib.learn.ModeKeys.EVAL), + tf.estimator.ModeKeys.EVAL), num_datashards=num_datashards, worker_replicas=FLAGS.worker_replicas, worker_id=FLAGS.worker_id) - estimator = tf.contrib.learn.Estimator( - model_fn=model_builder.build_model_fn(model_name, hparams), + estimator = tf.estimator.Estimator( + model_fn=model_builder.build_model_fn(model_name), model_dir=output_dir, + params=hparams, config=tf.contrib.learn.RunConfig( master=FLAGS.master, - model_dir=output_dir, gpu_memory_fraction=FLAGS.worker_gpu_memory_fraction, session_config=session_config(), keep_checkpoint_max=FLAGS.keep_checkpoint_max, keep_checkpoint_every_n_hours=FLAGS.keep_checkpoint_every_n_hours, save_checkpoints_secs=FLAGS.save_checkpoints_secs)) - # Store the hparams in the estimator as well - estimator.hparams = hparams return estimator, { - tf.contrib.learn.ModeKeys.TRAIN: train_input_fn, - tf.contrib.learn.ModeKeys.EVAL: eval_input_fn + tf.estimator.ModeKeys.TRAIN: train_input_fn, + tf.estimator.ModeKeys.EVAL: eval_input_fn } @@ -330,9 +320,15 @@ def run(data_dir, model, output_dir, train_steps, eval_steps, schedule): if schedule == "local_run": # Run the local demo. exp = exp_fn(output_dir) - if exp.train_steps > 0 or exp.eval_steps > 0: + if exp.train_steps > 0 and exp.eval_steps > 0: tf.logging.info("Performing local training and evaluation.") exp.train_and_evaluate() + elif exp.train_steps > 0: + tf.logging.info("Performing local training.") + exp.train() + elif exp.eval_steps > 0: + tf.logging.info("Performing local evaluation.") + exp.evaluate(delay_secs=0) else: # Perform distributed training/evaluation. learn_runner.run( diff --git a/tensor2tensor/utils/trainer_utils_test.py b/tensor2tensor/utils/trainer_utils_test.py index 6cc654d26..e71fc16c2 100644 --- a/tensor2tensor/utils/trainer_utils_test.py +++ b/tensor2tensor/utils/trainer_utils_test.py @@ -106,7 +106,7 @@ def testSingleEvalStepRawSession(self): encoders = registry.problem(FLAGS.problems).feature_encoders(data_dir) hparams = trainer_utils.create_hparams( FLAGS.hparams_set, FLAGS.problems, data_dir) - model_fn = model_builder.build_model_fn(model_name, hparams) + model_fn = model_builder.build_model_fn(model_name) inputs_ph = tf.placeholder(dtype=tf.int32) # Just length dimension. batch_inputs = tf.reshape(inputs_ph, [1, -1, 1, 1]) # Make it 4D. targets_ph = tf.placeholder(dtype=tf.int32) # Just length dimension. @@ -117,9 +117,10 @@ def testSingleEvalStepRawSession(self): "target_space_id": hparams.problems[0].target_space_id} # Now set a mode and create the graph by invoking model_fn. - mode = tf.contrib.learn.ModeKeys.EVAL - predictions_dict, _, _ = model_fn( # In INFER mode targets can be None. - features, batch_targets, mode) + mode = tf.estimator.ModeKeys.EVAL + estimator_spec = model_fn( # In INFER mode targets can be None. + features, batch_targets, mode, hparams) + predictions_dict = estimator_spec.predictions predictions = tf.squeeze( # These are not images, axis=2,3 are not needed. predictions_dict["predictions"], axis=[2, 3]) diff --git a/tensor2tensor/visualization/TransformerVisualization.ipynb b/tensor2tensor/visualization/TransformerVisualization.ipynb index e3fb8f958..166e0c9c5 100644 --- a/tensor2tensor/visualization/TransformerVisualization.ipynb +++ b/tensor2tensor/visualization/TransformerVisualization.ipynb @@ -127,9 +127,9 @@ "num_datashards = utils.devices.data_parallelism().n\n", "\n", "problems_data = utils.get_data_filepatterns(\n", - " DATA_DIR, tf.contrib.learn.ModeKeys.EVAL)\n", + " DATA_DIR, tf.estimator.ModeKeys.EVAL)\n", "input_fn = utils.input_fn_builder.build_input_fn(\n", - " mode=tf.contrib.learn.ModeKeys.EVAL,\n", + " mode=tf.estimator.ModeKeys.EVAL,\n", " hparams=hparams,\n", " data_file_patterns=problems_data,\n", " num_datashards=num_datashards)\n", @@ -192,8 +192,9 @@ } ], "source": [ - "model_fn=utils.model_builder.build_model_fn(MODEL, hparams=hparams)\n", - "sharded_logits, training_loss, extra_loss = model_fn(features, target, tf.contrib.learn.ModeKeys.EVAL)" + "model_fn=utils.model_builder.build_model_fn(MODEL)\n", + "spec = model_fn(features, target, tf.estimator.ModeKeys.EVAL, hparams)\n", + "predictions_dict = spec.predictions", ] }, { @@ -215,7 +216,8 @@ ], "source": [ "with tf.variable_scope(tf.get_variable_scope(), reuse=True):\n", - " beam_out = model_fn(features, target, tf.contrib.learn.ModeKeys.INFER)" + " spec = model_fn(features, target, tf.estimator.ModeKeys.PREDICT, hparams)\n", + " beam_out = spec.predictions['outputs']", ] }, { @@ -324,7 +326,7 @@ } ], "source": [ - "inp, out, logits = sess.run([inputs['inputs'], target, sharded_logits['predictions']])\n", + "inp, out, logits = sess.run([inputs['inputs'], target, predictions_dict['predictions']])\n", "\n", "print(\"Input: \", decode(inp[0]))\n", "print(\"Gold: \", decode(out[0]))\n", @@ -366,7 +368,7 @@ ], "source": [ "inp_ids = encode(eng)\n", - "beam_decode = sess.run(beam_out[0]['outputs'], {\n", + "beam_decode = sess.run(beam_out, {\n", " inputs['inputs']: np.expand_dims(np.expand_dims(inp_ids, axis=2), axis=3),\n", "})\n", "trans = decode(beam_decode[0])\n", From cb181de23926052a042ee5e6fa9bda0d21dc8f23 Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Wed, 6 Sep 2017 18:55:59 -0700 Subject: [PATCH 0360/4095] Enable passing padded_shapes in padded_batch; log_device_placement FLAG PiperOrigin-RevId: 167805495 --- tensor2tensor/models/transformer.py | 17 ++++++---- tensor2tensor/utils/data_reader.py | 44 ++++++++++++++++++------- tensor2tensor/utils/data_reader_test.py | 2 +- tensor2tensor/utils/trainer_utils.py | 5 ++- 4 files changed, 48 insertions(+), 20 deletions(-) diff --git a/tensor2tensor/models/transformer.py b/tensor2tensor/models/transformer.py index 38766ec19..d3a406a29 100644 --- a/tensor2tensor/models/transformer.py +++ b/tensor2tensor/models/transformer.py @@ -191,10 +191,13 @@ def transformer_encoder(encoder_input, """ x = encoder_input with tf.variable_scope(name): - pad_remover = expert_utils.PadRemover( - common_attention.attention_bias_to_padding(encoder_self_attention_bias)) - for layer in xrange( - hparams.num_encoder_layers or hparams.num_hidden_layers): + pad_remover = None + if hparams.use_pad_remover: + pad_remover = expert_utils.PadRemover( + common_attention.attention_bias_to_padding( + encoder_self_attention_bias)) + for layer in xrange(hparams.num_encoder_layers or + hparams.num_hidden_layers): with tf.variable_scope("layer_%d" % layer): with tf.variable_scope("self_attention"): y = common_attention.multihead_attention( @@ -237,8 +240,8 @@ def transformer_decoder(decoder_input, """ x = decoder_input with tf.variable_scope(name): - for layer in xrange( - hparams.num_decoder_layers or hparams.num_hidden_layers): + for layer in xrange(hparams.num_decoder_layers or + hparams.num_hidden_layers): with tf.variable_scope("layer_%d" % layer): with tf.variable_scope("self_attention"): y = common_attention.multihead_attention( @@ -362,6 +365,8 @@ def transformer_base(): hparams.add_hparam("pos", "timing") # timing, none hparams.add_hparam("nbr_decoder_problems", 1) hparams.add_hparam("proximity_bias", int(False)) + hparams.add_hparam("use_pad_remover", int(True)) + return hparams diff --git a/tensor2tensor/utils/data_reader.py b/tensor2tensor/utils/data_reader.py index f48665078..e89b9b808 100644 --- a/tensor2tensor/utils/data_reader.py +++ b/tensor2tensor/utils/data_reader.py @@ -267,11 +267,23 @@ def input_pipeline(problem, data_file_pattern, capacity, mode, hparams, lambda ex: _preprocess(ex, problem, data_file_pattern, hparams, mode), num_threads=num_threads) dataset = dataset.filter( - lambda ex: _example_too_big(ex, batching_scheme["max_length"])) + lambda ex: example_valid_size(ex, batching_scheme["max_length"])) + + bucket_id_fn = _example_length + if len(batching_scheme["boundaries"]) == 1: + bucket_id_fn = lambda _: tf.constant(0) + + if "padded_shapes" not in batching_scheme: + batching_scheme["padded_shapes"] = None dataset = bucket_by_sequence_length( - dataset, _example_length, batching_scheme["boundaries"], - batching_scheme["batch_sizes"], batching_scheme["window_size"]) + dataset, + bucket_id_fn, + batching_scheme["boundaries"], + batching_scheme["batch_sizes"], + batching_scheme["window_size"], + padded_shapes=batching_scheme["padded_shapes"]) + # We reshuffle the batches to prevent many long-sequence batches at once. # TODO(rsepassi): Rm hasattr call once new dynamic window size functionality # is in a stable TF release. @@ -307,12 +319,16 @@ def _example_length(example): return length -def _example_too_big(example, max_length): +def example_valid_size(example, max_length): return tf.less_equal(_example_length(example), max_length) -def bucket_by_sequence_length(dataset, example_length_fn, bucket_boundaries, - bucket_batch_sizes, window_size): +def bucket_by_sequence_length(dataset, + example_length_fn, + bucket_boundaries, + bucket_batch_sizes, + window_size, + padded_shapes=None): """Bucket entries in dataset by length. Args: @@ -322,6 +338,8 @@ def bucket_by_sequence_length(dataset, example_length_fn, bucket_boundaries, bucket_boundaries: list<int>, boundaries of the buckets. bucket_batch_sizes: list<int>, batch size per bucket. window_size: an integer divisible by all elements of bucket_batch_sizes + padded_shapes: dict<feature name, list<int>>, optional, shapes of the + features with None where feature should be padded to max in that dim. Returns: Dataset of padded and batched examples. @@ -351,12 +369,7 @@ def window_size_fn(bucket_id): def batching_fn(bucket_id, grouped_dataset): batch_sizes = tf.constant(bucket_batch_sizes, dtype=tf.int64) batch_size = batch_sizes[bucket_id] - - # Pad each dimension of each feature so that they match. - padded_shapes = dict( - [(name, [None] * len(shape)) - for name, shape in grouped_dataset.output_shapes.items()]) - return grouped_dataset.padded_batch(batch_size, padded_shapes) + return padded_batch(grouped_dataset, batch_size, padded_shapes) # TODO(rsepassi): Rm branch once the new group_by_window functionality is in # a stable TF release. @@ -371,6 +384,13 @@ def batching_fn(bucket_id, grouped_dataset): return dataset +def padded_batch(dataset, batch_size, padded_shapes=None): + padded_shapes = padded_shapes or dict( + [(name, [None] * len(shape)) + for name, shape in dataset.output_shapes.items()]) + return dataset.padded_batch(batch_size, padded_shapes) + + def _bucket_boundaries(max_length, min_length=8, length_bucket_step=1.1): """A default set of length-bucket boundaries.""" assert min_length <= max_length diff --git a/tensor2tensor/utils/data_reader_test.py b/tensor2tensor/utils/data_reader_test.py index aed2598c7..f03ce6da2 100644 --- a/tensor2tensor/utils/data_reader_test.py +++ b/tensor2tensor/utils/data_reader_test.py @@ -158,7 +158,7 @@ def testLengthFilter(self): max_len = 15 dataset = data_reader.read_examples(self.problem, self.filepatterns[0], 32) dataset = dataset.filter( - lambda ex: data_reader._example_too_big(ex, max_len)) + lambda ex: data_reader.example_valid_size(ex, max_len)) examples = dataset.make_one_shot_iterator().get_next() with tf.train.MonitoredSession() as sess: ex_lens = [] diff --git a/tensor2tensor/utils/trainer_utils.py b/tensor2tensor/utils/trainer_utils.py index 83db7c007..08359ea5c 100644 --- a/tensor2tensor/utils/trainer_utils.py +++ b/tensor2tensor/utils/trainer_utils.py @@ -75,6 +75,8 @@ "Save checkpoints every this many seconds. " "Default=0 means let tensorflow.contrib.learn.python.learn" " decide, which is currently set to 600 = 10 minutes.") +flags.DEFINE_bool("log_device_placement", False, + "Whether to log device placement.") # Distributed training flags flags.DEFINE_string("master", "", "Address of TensorFlow master.") @@ -369,7 +371,8 @@ def session_config(): config = tf.ConfigProto( allow_soft_placement=True, graph_options=graph_options, - gpu_options=gpu_options) + gpu_options=gpu_options, + log_device_placement=FLAGS.log_device_placement) return config From ad57b3b2a4bd401464010778a39784644a055c9f Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Wed, 6 Sep 2017 22:18:07 -0700 Subject: [PATCH 0361/4095] correct transformer ranged hparams PiperOrigin-RevId: 167817267 --- tensor2tensor/models/transformer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensor2tensor/models/transformer.py b/tensor2tensor/models/transformer.py index d3a406a29..a2e76dd13 100644 --- a/tensor2tensor/models/transformer.py +++ b/tensor2tensor/models/transformer.py @@ -664,6 +664,6 @@ def transformer_base_range(rhp): rhp.set_discrete("learning_rate_warmup_steps", [1000, 2000, 4000, 8000, 16000]) rhp.set_float("initializer_gain", 0.5, 2.0) - rhp.set_float("optimizer_adam_beta2", 0.85, 0.95) + rhp.set_float("optimizer_adam_beta1", 0.85, 0.95) rhp.set_float("optimizer_adam_beta2", 0.97, 0.99) rhp.set_float("weight_decay", 0.0, 2.0) From 772337a811579a32078228d43e9572ccad4a669a Mon Sep 17 00:00:00 2001 From: T2T Team <no-reply@google.com> Date: Thu, 7 Sep 2017 07:05:14 -0700 Subject: [PATCH 0362/4095] bug fix to link function for log_poisson loss PiperOrigin-RevId: 167855204 --- tensor2tensor/layers/modalities.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/tensor2tensor/layers/modalities.py b/tensor2tensor/layers/modalities.py index 1d606ec1d..8e76c8051 100644 --- a/tensor2tensor/layers/modalities.py +++ b/tensor2tensor/layers/modalities.py @@ -475,19 +475,16 @@ def loss(self, top_out, targets, weights_fn=common_layers.weights_all): @registry.register_real_modality("log_poisson_loss") class RealLogPoissonLossModality(RealL2LossModality): """Modality for real (i.e. float) vectors with log Poisson regression loss. - - * Top is a linear projection to vocab size followed by a softplus - transform (log(exp(features) + 1)). """ - def top(self, body_output, _): - with tf.variable_scope("real"): - return tf.nn.softplus(tf.layers.dense(body_output, self._vocab_size)) + def bottom(self, x): + return x def loss(self, top_out, targets, weights_fn=common_layers.weights_all): predictions = top_out with tf.name_scope("log_possion"): weights = weights_fn(targets) + lp_loss = tf.nn.log_poisson_loss(targets, predictions) return tf.reduce_sum(lp_loss * weights), tf.reduce_sum(weights) From 73af26b1968efee44b41c0efaafa66bc393d2a29 Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Thu, 7 Sep 2017 11:40:57 -0700 Subject: [PATCH 0363/4095] change default initializer in lstm to uniform_unit_scaling PiperOrigin-RevId: 167888817 --- tensor2tensor/models/lstm.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tensor2tensor/models/lstm.py b/tensor2tensor/models/lstm.py index 9f909433e..d1c3101b4 100644 --- a/tensor2tensor/models/lstm.py +++ b/tensor2tensor/models/lstm.py @@ -272,6 +272,7 @@ def lstm_attention(): hparams.batch_size = 1024 hparams.hidden_size = 128 hparams.num_hidden_layers = 2 + hparams.initializer = "uniform_unit_scaling" # Attention hparams.add_hparam("attn_vec_size", hparams.hidden_size) From 5fcc9bc8d813607bc2e93d680ea6a08cacf83a2c Mon Sep 17 00:00:00 2001 From: Etienne Pot <epot@google.com> Date: Thu, 7 Sep 2017 15:33:38 -0700 Subject: [PATCH 0364/4095] Expert now process each sequence individually to reduce the attention matrix size. PiperOrigin-RevId: 167921622 --- tensor2tensor/layers/common_attention.py | 82 ++++++++++++++------- tensor2tensor/models/attention_lm_moe.py | 7 ++ tensor2tensor/utils/expert_utils.py | 91 ++++++++++++++++++++++++ 3 files changed, 154 insertions(+), 26 deletions(-) diff --git a/tensor2tensor/layers/common_attention.py b/tensor2tensor/layers/common_attention.py index 84f8d2d9a..3f3885b10 100644 --- a/tensor2tensor/layers/common_attention.py +++ b/tensor2tensor/layers/common_attention.py @@ -441,7 +441,9 @@ def dot_product_attention(q, weights = tf.nn.softmax(logits, name="attention_weights") # dropping out the attention links for each of the heads weights = tf.nn.dropout(weights, 1.0 - dropout_rate) - if not tf.get_variable_scope().reuse: + if (not tf.get_variable_scope().reuse and + # Summaries don't work well within tf.while_loop() + "/while/" not in tf.contrib.framework.get_name_scope()): attention_image_summary(weights, image_shapes) return tf.matmul(weights, v) @@ -1242,6 +1244,7 @@ def self_attention_expert( x, batch_coordinate, mask_right=True, + split_batch=False, attention_kq_size=None, attention_v_size=None, ): @@ -1255,6 +1258,9 @@ def self_attention_expert( positions from different sequences don't attend to each other. mask_right: A bool. If true, we will not attend to positions on the right, just as decoder self attention. + split_batch (bool): If True, each sequence of the batch is processed + individually on a loop. If False, the sequences are processed all at + once and a mask is applied to isolate the sequences from each others attention_kq_size (int): dimension used for the attention key, and query attention_v_size (int): dimension used for the attention value @@ -1289,32 +1295,58 @@ def self_attention_expert( def length_not_null(x, batch_coordinate): """Branch of the graph only evaluated when length isn't null.""" + + # Mask between the sequences (not used if map_ids is used) with tf.name_scope("expert_mask"): - batch_coordinate = tf.squeeze(batch_coordinate, 1) + batch_coord_float = tf.squeeze(batch_coordinate, 1) # Convert to float first because of b/25387198 - batch_coordinate = tf.to_float(batch_coordinate) - bc_v = tf.expand_dims(batch_coordinate, 1) - bc_h = tf.expand_dims(batch_coordinate, 0) - bias = bc_v - bc_h # Broadcast to create [length, length] mask - bias = tf.minimum(1.0, tf.abs(bias)) # Theshold non zeros to 1.0 - bias *= -1e9 # Set non zeros to -infinity - - if mask_right: - bias += tf.reshape( + batch_coord_float = tf.to_float(batch_coord_float) + bc_v = tf.expand_dims(batch_coord_float, 1) + bc_h = tf.expand_dims(batch_coord_float, 0) + bias_batch = bc_v - bc_h # Broadcast to create [length, length] mask + # Theshold non zeros to 1.0 + bias_batch = tf.minimum(1.0, tf.abs(bias_batch)) + bias_batch *= -1e9 # Set non zeros to -infinity + + def add_or_set_if(prev_bias, new_bias, condition): + """Add the bias together while concidering the None case.""" + if not condition: + return prev_bias + elif prev_bias is None: + return new_bias + else: + return prev_bias + new_bias + + def mask_and_call_attention(x): + """Function applied once for each sequence of the batch.""" + + # Mask to prevent sequences of attenting to the future + length = tf.shape(x)[1] # x has shape [1, length,...] + bias_past = tf.reshape( attention_bias_lower_triangle(length), [length, length]) - # bias has shape [length, length] - bias = tf.reshape(bias, [1, 1, length, length]) - x = tf.reshape(x, [1, length, depth]) - out = multihead_attention(x, - None, - bias, - total_key_depth=attention_kq_size, - total_value_depth=attention_v_size, - output_depth=depth, - num_heads=1, - dropout_rate=0.0) - out = tf.squeeze(out, 0) - + # bias has shape [length, length] + bias_past = tf.reshape(bias_past, [1, 1, length, length]) + + bias = None + bias = add_or_set_if(bias, bias_past, mask_right) + bias = add_or_set_if(bias, bias_batch, not split_batch) + + return multihead_attention( + x, + None, + bias, + total_key_depth=attention_kq_size, + total_value_depth=attention_v_size, + output_depth=depth, + num_heads=1, + dropout_rate=0.0) + + if split_batch: + out = expert_utils.map_ids(x, batch_coordinate, mask_and_call_attention) + else: + x = tf.reshape(x, [1, length, depth]) + out = mask_and_call_attention(x) + out = tf.squeeze(out, 0) return out # If the length is empty, just forward an empty tensor (avoid having to @@ -1326,8 +1358,6 @@ def length_not_null(x, batch_coordinate): ) return out -# functools.partial(self_attention_expert, mask_right=, depth=) - def local_expert_attention( x, diff --git a/tensor2tensor/models/attention_lm_moe.py b/tensor2tensor/models/attention_lm_moe.py index 596d5b01d..87d456b7d 100644 --- a/tensor2tensor/models/attention_lm_moe.py +++ b/tensor2tensor/models/attention_lm_moe.py @@ -127,6 +127,8 @@ def print_shape(x, suffix, debug=False): x = dp_remove_pad(x) x = dp(print_shape, x, "in_flat") + assert hparams.batch_size >= hparams.max_length + for layer in xrange(hparams.num_hidden_layers): with tf.variable_scope("layer_%d" % layer): with tf.variable_scope( @@ -161,6 +163,7 @@ def print_shape(x, suffix, debug=False): train=hparams.mode == ModeKeys.TRAIN, batch_coordinate=batch_coordinate, mask_right=True, + split_batch=bool(hparams.attention_split_batch), attention_kq_size=hparams.attention_kq_size, attention_v_size=hparams.attention_v_size) # TODO(avaswani, epot, noam): Do we need to divide by num shards ? @@ -344,6 +347,7 @@ def attention_lm_moe_base(): hparams.add_hparam("attention_type", AttentionType.MULTIHEAD) hparams.add_hparam("attention_moe_k", 2) hparams.add_hparam("attention_num_experts", 16) + hparams.add_hparam("attention_split_batch", int(False)) # Key, query and value dimensions for the attention hparams.add_hparam("attention_kq_size", 128) hparams.add_hparam("attention_v_size", 256) @@ -366,6 +370,9 @@ def attention_lm_moe_base_ae(): hparams.min_length_bucket = 256 # Avoid cyclic problems for big batches hparams.learning_rate = 0.05 hparams.learning_rate_warmup_steps = 10000 + # According to noam, ("n", "da") seems better for harder-to-learn models + # hparams.layer_preprocess_sequence = "n" + # hparams.layer_postprocess_sequence = "da" return hparams diff --git a/tensor2tensor/utils/expert_utils.py b/tensor2tensor/utils/expert_utils.py index 16820ff37..8865b9271 100644 --- a/tensor2tensor/utils/expert_utils.py +++ b/tensor2tensor/utils/expert_utils.py @@ -23,6 +23,7 @@ from __future__ import division from __future__ import print_function +import functools import math # Dependency imports @@ -60,6 +61,27 @@ def convert_gradient_to_tensor(x): return x +def add_name_scope(scope): + """Return a decorator which add a TF name scope to a function. + + Args: + scope (str): name of the name scope + + Returns: + fct: the add_scope decorator + """ + def decorator(f): + + @functools.wraps(f) + def decorated(*args, **kwargs): + with tf.name_scope(scope): + return f(*args, **kwargs) + + return decorated + + return decorator + + class Parallelism(object): """Helper class for creating sets of parallel function calls. @@ -517,6 +539,75 @@ def restore(self, x): return x +@add_name_scope("map_ids") +def map_ids(x, indices, map_fn): + """Apply a function to each coordinate ids of a multidimentional tensor. + + This allows to process each sequence of a batch independently. This is + similar to tf.map_fn but with tensor where the batch dim has been flatten. + + Warning: The indices ids have to be contigous and orderd in memory as the + output vector for each of the ids are simply concatenated after being + processed. + Ex: if your indices are [0,2,2,1,2,0], the output will contains the processed + rows in the following order: [0,0,1,2,2,2] + + Args: + x (Tensor): The tensor to be dispatched of shape [length,...] + indices (Tensor): A int32 tensor of size [length, 1] containing the batch + coordinate of x + map_fn (fct): Function called for every ids of the original tensor. Take + as input a tensor of same rank than x and from shape [length_id,...] with + length_id <= length. Isn't called if length_id == 0 + + Returns: + a tensor of same shape as x, where each elements has been processed + """ + indices = tf.reshape(indices, [-1]) + + t_i = tf.constant(0) + # batch_coordinates start at 0 + t_batch_size = tf.reduce_max(indices) + 1 + + # ta_stack_out will store the intermediate results for each individual id + # As alternative to tf.TensorArray, scatter_update could potentially be used + # but that would require an additional mutable tensor. + ta_stack_out = tf.TensorArray( + x.dtype, + size=t_batch_size, + ) + + # Then we iterate over each sequence individually and compute the + # transformation for each id + while_condition = lambda t_i, *args: tf.less(t_i, t_batch_size) + def body(t_i, ta_stack_out): + """Loop body.""" + # Gather the ids + current_ids = tf.to_int32(tf.where(tf.equal(indices, t_i))) + t_row = tf.gather_nd(x, indices=current_ids) + + # TODO(epot): Should not call map_fn if t_row size is 0 + + # Apply transformation to each id + # Restore batch_dim=1 as most function expect [batch_dim, length, ...] as + # input + t_row = tf.expand_dims(t_row, axis=0) + t_row = map_fn(t_row) + t_row = tf.squeeze(t_row, axis=0) # Squeeze for concatenation + ta_stack_out = ta_stack_out.write(t_i, t_row) + + return [tf.add(t_i, 1), ta_stack_out] # ++i + + # Run the loop, equivalent to: + # stack_out = [] + # while i < batch_size: + # stack_out.expand(map_fn(x[indices==i])) + _, ta_stack_out = tf.while_loop(while_condition, body, [t_i, ta_stack_out]) + + # Merge all results + return ta_stack_out.concat() + + class SparseDispatcher(object): """Helper for implementing a mixture of experts. From 327c8d23999048596c1e9a7a59abc369ffd1ee4e Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Thu, 7 Sep 2017 17:08:25 -0700 Subject: [PATCH 0365/4095] Merge PRs #274 #282 PiperOrigin-RevId: 167933701 --- README.md | 4 +-- docs/new_problem.md | 38 +++++++++++++++++++----- tensor2tensor/visualization/attention.py | 2 +- 3 files changed, 34 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index 4e56d7855..bec411f1e 100644 --- a/README.md +++ b/README.md @@ -214,8 +214,8 @@ on the task (e.g. fed through a final linear transform to produce logits for a softmax over classes). All models are imported in [`models.py`](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/models/models.py), inherit from `T2TModel` - defined in -[`t2t_model.py`](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/utils/t2t_model.py) -- and are registered with +[`t2t_model.py`](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/utils/t2t_model.py) - +and are registered with [`@registry.register_model`](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/utils/registry.py). ### Hyperparameter Sets diff --git a/docs/new_problem.md b/docs/new_problem.md index d581a3a1b..ab5dd5e26 100644 --- a/docs/new_problem.md +++ b/docs/new_problem.md @@ -15,9 +15,17 @@ Let's add a new dataset together and train the transformer model. We'll be learn For each problem we want to tackle we create a new problem class and register it. Let's call our problem `Word2def`. -Since many text2text problems share similar methods, there's already a class called `Text2TextProblem` that extends the base problem class, `Problem` (both found in `problem.py`). - -For our problem, we can go ahead and create the file `word2def.py` in the `data_generators` folder and add our new problem, `Word2def`, which extends `Text2TextProblem`. Let's also register it while we're at it so we can specify the problem through flags. +Since many text2text problems share similar methods, there's already a class +called `Text2TextProblem` that extends the base problem class, `Problem` +(both found in +[`problem.py`](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/data_generators/problem.py)). + +For our problem, we can go ahead and create the file `word2def.py` in the +[`data_generators`](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/data_generators/) +folder and add our new problem, `Word2def`, which extends +[`Text2TextProblem`](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/data_generators/problem.py). +Let's also register it while we're at it so we can specify the problem through +flags. ```python @registry.register_problem @@ -28,7 +36,9 @@ class Word2def(problem.Text2TextProblem): ... ``` -We need to implement the following methods from `Text2TextProblem` in our new class: +We need to implement the following methods from +[`Text2TextProblem`](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/data_generators/problem.py). +in our new class: * is_character_level * targeted_vocab_size * generator @@ -42,7 +52,12 @@ Let's tackle them one by one: **input_space_id, target_space_id, is_character_level, targeted_vocab_size, use_subword_tokenizer**: -SpaceIDs tell Tensor2Tensor what sort of space the input and target tensors are in. These are things like, EN_CHR (English character), EN_TOK (English token), AUDIO_WAV (audio waveform), IMAGE, DNA (genetic bases). The complete list can be found at `data_generators/problem.py` in the class `SpaceID`. +SpaceIDs tell Tensor2Tensor what sort of space the input and target tensors are +in. These are things like, EN_CHR (English character), EN_TOK (English token), +AUDIO_WAV (audio waveform), IMAGE, DNA (genetic bases). The complete list can be +found at +[`data_generators/problem.py`](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/data_generators/problem.py). +in the class `SpaceID`. Since we're generating definitions and feeding in words at the character level, we set `is_character_level` to true, and use the same SpaceID, EN_CHR, for both input and target. Additionally, since we aren't using tokens, we don't need to give a `targeted_vocab_size` or define `use_subword_tokenizer`. @@ -86,7 +101,15 @@ class Word2def(problem.Text2TextProblem): **generator**: -We're almost done. `generator` generates the training and evaluation data and stores them in files like "word2def_train.lang1" in your DATA_DIR. Thankfully several commonly used methods like `character_generator`, and `token_generator` are already written in the file `wmt.py`. We will import `character_generator` and write: +We're almost done. `generator` generates the training and evaluation data and +stores them in files like "word2def_train.lang1" in your DATA_DIR. Thankfully +several commonly used methods like `character_generator`, and `token_generator` +are already written in the file +[`wmt.py`](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/data_generators/wmt.py). +We will import `character_generator` and +[`text_encoder`](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/data_generators/text_encoder.py) +to write: + ```python def generator(self, data_dir, tmp_dir, train): character_vocab = text_encoder.ByteTextEncoder() @@ -151,7 +174,8 @@ _WORD2DEF_TEST_DATASETS = [ ## Putting it all together -Now our `word2def.py` file looks like: (with the correct imports) +Now our `word2def.py` file looks like: + ```python """ Problem definition for word to dictionary definition. """ diff --git a/tensor2tensor/visualization/attention.py b/tensor2tensor/visualization/attention.py index bc4238081..6109f9cc6 100644 --- a/tensor2tensor/visualization/attention.py +++ b/tensor2tensor/visualization/attention.py @@ -15,7 +15,7 @@ """Module for postprocessing and displaying tranformer attentions. -This module is deigned to be called from an ipython notebook. +This module is designed to be called from an ipython notebook. """ import json From 0c0016a81424088e96df9fc6d712ce9b6ad90226 Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Thu, 7 Sep 2017 17:17:12 -0700 Subject: [PATCH 0366/4095] Fix error message when problem is misspecified PiperOrigin-RevId: 167934726 --- .../data_generators/problem_hparams.py | 5 +--- tensor2tensor/utils/registry.py | 30 +++++++++---------- tensor2tensor/utils/registry_test.py | 12 ++++---- tensor2tensor/utils/trainer_utils.py | 18 +++++++++-- 4 files changed, 37 insertions(+), 28 deletions(-) diff --git a/tensor2tensor/data_generators/problem_hparams.py b/tensor2tensor/data_generators/problem_hparams.py index e002329bc..f4880e4d9 100644 --- a/tensor2tensor/data_generators/problem_hparams.py +++ b/tensor2tensor/data_generators/problem_hparams.py @@ -40,9 +40,6 @@ def problem_hparams(problem_name, model_hparams): Returns: a tf.contrib.training.HParams - - Raises: - ValueError: if problem_name is unknown. """ base_name, was_reversed, was_copy = parse_problem_name(problem_name) p = _lookup_problem_hparams_fn(base_name)(model_hparams) @@ -78,7 +75,7 @@ def _lookup_problem_hparams_fn(name): if name not in PROBLEM_HPARAMS_MAP: map_str = "* " + "\n* ".join(sorted(PROBLEM_HPARAMS_MAP.keys())) error_msg = "%s not in the supported set of problems:\n%s" % (name, map_str) - raise ValueError(error_msg) + raise LookupError(error_msg) return PROBLEM_HPARAMS_MAP.get(name) diff --git a/tensor2tensor/utils/registry.py b/tensor2tensor/utils/registry.py index f1db2f36c..2b708b4ce 100644 --- a/tensor2tensor/utils/registry.py +++ b/tensor2tensor/utils/registry.py @@ -123,7 +123,7 @@ def decorator(model_cls, registration_name=None): """Registers & returns model_cls with registration_name or default name.""" model_name = registration_name or _default_name(model_cls) if model_name in _MODELS: - raise ValueError("Model %s already registered." % model_name) + raise LookupError("Model %s already registered." % model_name) _MODELS[model_name] = model_cls return model_cls @@ -137,7 +137,7 @@ def decorator(model_cls, registration_name=None): def model(name): if name not in _MODELS: - raise ValueError("Model %s never registered." % name) + raise LookupError("Model %s never registered." % name) return _MODELS[name] @@ -152,7 +152,7 @@ def decorator(hp_fn, registration_name=None): """Registers & returns hp_fn with registration_name or default name.""" hp_name = registration_name or _default_name(hp_fn) if hp_name in _HPARAMS: - raise ValueError("HParams set %s already registered." % hp_name) + raise LookupError("HParams set %s already registered." % hp_name) _HPARAMS[hp_name] = hp_fn return hp_fn @@ -166,7 +166,7 @@ def decorator(hp_fn, registration_name=None): def hparams(name): if name not in _HPARAMS: - raise ValueError("HParams set %s never registered." % name) + raise LookupError("HParams set %s never registered." % name) return _HPARAMS[name] @@ -181,7 +181,7 @@ def decorator(rhp_fn, registration_name=None): """Registers & returns hp_fn with registration_name or default name.""" rhp_name = registration_name or _default_name(rhp_fn) if rhp_name in _RANGED_HPARAMS: - raise ValueError("RangedHParams set %s already registered." % rhp_name) + raise LookupError("RangedHParams set %s already registered." % rhp_name) # Check that the fn takes a single argument args, varargs, keywords, _ = inspect.getargspec(rhp_fn) if len(args) != 1 or varargs is not None or keywords is not None: @@ -201,7 +201,7 @@ def decorator(rhp_fn, registration_name=None): def ranged_hparams(name): if name not in _RANGED_HPARAMS: - raise ValueError("RangedHParams set %s never registered." % name) + raise LookupError("RangedHParams set %s never registered." % name) return _RANGED_HPARAMS[name] @@ -216,7 +216,7 @@ def decorator(p_cls, registration_name=None): """Registers & returns p_cls with registration_name or default name.""" p_name = registration_name or _default_name(p_cls) if p_name in _PROBLEMS: - raise ValueError("Problem %s already registered." % p_name) + raise LookupError("Problem %s already registered." % p_name) _PROBLEMS[p_name] = p_cls p_cls.name = p_name @@ -258,7 +258,7 @@ def parse_problem_name(problem_name): base_name, was_reversed, was_copy = parse_problem_name(name) if base_name not in _PROBLEMS: - raise ValueError("Problem %s never registered." % name) + raise LookupError("Problem %s never registered." % name) return _PROBLEMS[base_name](was_reversed, was_copy) @@ -270,8 +270,8 @@ def _internal_get_modality(name, mod_collection, collection_str): if name is None: name = "default" if name not in mod_collection: - raise ValueError("%s modality %s never registered." % (collection_str, - name)) + raise LookupError("%s modality %s never registered." % (collection_str, + name)) return mod_collection[name] @@ -312,8 +312,8 @@ def decorator(mod_cls, registration_name=None): """Registers & returns mod_cls with registration_name or default name.""" mod_name = registration_name or _default_name(mod_cls) if mod_name in mod_collection: - raise ValueError("%s modality %s already registered." % (collection_str, - mod_name)) + raise LookupError("%s modality %s already registered." % (collection_str, + mod_name)) mod_collection[mod_name] = mod_cls return mod_cls @@ -391,7 +391,7 @@ def create_modality(modality_spec, model_hparams): Modality instance. Raises: - ValueError: if modality_type is not recognized. See Modalities class for + LookupError: if modality_type is not recognized. See Modalities class for accepted types. """ retrieval_fns = { @@ -406,8 +406,8 @@ def create_modality(modality_spec, model_hparams): modality_full_name, vocab_size = modality_spec modality_type, modality_name = parse_modality_name(modality_full_name) if modality_type not in retrieval_fns: - raise ValueError("Modality type %s not recognized. Options are: %s" % - (modality_type, list(_MODALITIES))) + raise LookupError("Modality type %s not recognized. Options are: %s" % + (modality_type, list(_MODALITIES))) return retrieval_fns[modality_type](modality_name)(model_hparams, vocab_size) diff --git a/tensor2tensor/utils/registry_test.py b/tensor2tensor/utils/registry_test.py index 62c24b054..d97dc6bdc 100644 --- a/tensor2tensor/utils/registry_test.py +++ b/tensor2tensor/utils/registry_test.py @@ -63,7 +63,7 @@ def model_fn(): self.assertTrue(model is model_fn) def testUnknownModel(self): - with self.assertRaisesRegexp(ValueError, "never registered"): + with self.assertRaisesRegexp(LookupError, "never registered"): registry.model("not_registered") def testDuplicateRegistration(self): @@ -72,7 +72,7 @@ def testDuplicateRegistration(self): def m1(): pass - with self.assertRaisesRegexp(ValueError, "already registered"): + with self.assertRaisesRegexp(LookupError, "already registered"): @registry.register_model("m1") def m2(): @@ -137,9 +137,9 @@ def my_hparams_range(_): self.assertTrue(registry.ranged_hparams("a") is my_hparams_range) def testUnknownHparams(self): - with self.assertRaisesRegexp(ValueError, "never registered"): + with self.assertRaisesRegexp(LookupError, "never registered"): registry.hparams("not_registered") - with self.assertRaisesRegexp(ValueError, "never registered"): + with self.assertRaisesRegexp(LookupError, "never registered"): registry.ranged_hparams("not_registered") def testDuplicateRegistration(self): @@ -148,7 +148,7 @@ def testDuplicateRegistration(self): def hp1(): pass - with self.assertRaisesRegexp(ValueError, "already registered"): + with self.assertRaisesRegexp(LookupError, "already registered"): @registry.register_hparams("hp1") def hp2(): @@ -158,7 +158,7 @@ def hp2(): def rhp1(_): pass - with self.assertRaisesRegexp(ValueError, "already registered"): + with self.assertRaisesRegexp(LookupError, "already registered"): @registry.register_ranged_hparams("rhp1") def rhp2(_): diff --git a/tensor2tensor/utils/trainer_utils.py b/tensor2tensor/utils/trainer_utils.py index 08359ea5c..be5e5530f 100644 --- a/tensor2tensor/utils/trainer_utils.py +++ b/tensor2tensor/utils/trainer_utils.py @@ -149,7 +149,7 @@ def create_experiment(output_dir, data_dir, model_name, train_steps, """Create Experiment.""" hparams = create_hparams( FLAGS.hparams_set, FLAGS.problems, data_dir, passed_hparams=FLAGS.hparams) - if FLAGS.worker_id == 0: + if FLAGS.worker_id == 0 and FLAGS.schedule in ["local_run", "train"]: save_metadata(output_dir, hparams) estimator, input_fns = create_experiment_components( hparams=hparams, @@ -226,11 +226,23 @@ def add_problem_hparams(hparams, problems): for problem_name in problems.split("-"): try: problem = registry.problem(problem_name) - except ValueError: + except LookupError: problem = None if problem is None: - p_hparams = problem_hparams.problem_hparams(problem_name, hparams) + try: + p_hparams = problem_hparams.problem_hparams(problem_name, hparams) + except LookupError: + # The problem is not in the set of registered Problems nor in the old + # set of problem_hparams. + all_problem_names = sorted( + list(problem_hparams.PROBLEM_HPARAMS_MAP) + + registry.list_problems()) + error_lines = [ + "%s not in the set of supported problems:" % problem_name + ] + all_problem_names + error_msg = "\n * ".join(error_lines) + raise LookupError(error_msg) else: p_hparams = problem.get_hparams(hparams) From 6d004bdc853e2fc7fe6aa341dfefbb89d6b17963 Mon Sep 17 00:00:00 2001 From: Niki Parmar <nikip@google.com> Date: Thu, 7 Sep 2017 22:56:03 -0700 Subject: [PATCH 0367/4095] Edit 2d scope name PiperOrigin-RevId: 167958304 --- tensor2tensor/layers/common_attention.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensor2tensor/layers/common_attention.py b/tensor2tensor/layers/common_attention.py index 3f3885b10..c5a0c60cb 100644 --- a/tensor2tensor/layers/common_attention.py +++ b/tensor2tensor/layers/common_attention.py @@ -1066,7 +1066,7 @@ def multihead_attention_2d(query_antecedent, "attention heads (%d)." % (total_value_depth, num_heads)) with tf.variable_scope( name, - default_name="multihead_attention", + default_name="multihead_attention_2d", values=[query_antecedent, memory_antecedent]): q, k, v = compute_qkv_2d(query_antecedent, memory_antecedent, total_key_depth, total_value_depth) From c99d5b5d350feb33ecb99f1bbbc74a2660e8a46b Mon Sep 17 00:00:00 2001 From: Noam Shazeer <noam@google.com> Date: Fri, 8 Sep 2017 09:33:02 -0700 Subject: [PATCH 0368/4095] log diet variables properly PiperOrigin-RevId: 168006293 --- tensor2tensor/utils/model_builder.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tensor2tensor/utils/model_builder.py b/tensor2tensor/utils/model_builder.py index 21ef96b28..1540c0f88 100644 --- a/tensor2tensor/utils/model_builder.py +++ b/tensor2tensor/utils/model_builder.py @@ -38,6 +38,7 @@ from tensor2tensor.utils import yellowfin import tensorflow as tf +from tensorflow.python.framework import dtypes from tensorflow.python.ops import init_ops # TODO(rsepassi): Rm dep on FLAGS here @@ -363,7 +364,9 @@ def nth_model(n): total_loss *= small_batch_multiplier total_loss = tf.identity(total_loss, name="total_loss") log_variable_sizes(tf.trainable_variables(), "Trainable Variables") - diet_vars = [v for v in tf.global_variables() if hasattr(v, "optimizer")] + diet_vars = [ + v for v in tf.global_variables() if v.dtype == dtypes.float16_ref + ] log_variable_sizes(diet_vars, "Diet Varaibles") # Define the train_op for the TRAIN mode. opt = _ConditionalOptimizer(my_hp.optimizer, learning_rate, my_hp) From 8f5fcc2d0ef416bbf06b2e2e777100da071292a1 Mon Sep 17 00:00:00 2001 From: Noam Shazeer <noam@google.com> Date: Fri, 8 Sep 2017 13:27:18 -0700 Subject: [PATCH 0369/4095] add wiki-scramble dataset. PiperOrigin-RevId: 168037859 --- tensor2tensor/data_generators/wiki.py | 117 +++++++++++++++++++++++ tensor2tensor/models/attention_lm_moe.py | 35 +++++-- 2 files changed, 145 insertions(+), 7 deletions(-) diff --git a/tensor2tensor/data_generators/wiki.py b/tensor2tensor/data_generators/wiki.py index 9610cb1d8..6f6c97686 100644 --- a/tensor2tensor/data_generators/wiki.py +++ b/tensor2tensor/data_generators/wiki.py @@ -25,6 +25,8 @@ import bz2file +import numpy as np + import six from tensor2tensor.data_generators import generator_utils from tensor2tensor.data_generators import problem @@ -130,3 +132,118 @@ def generator(self, data_dir, tmp_dir, _): encoded = encoder.encode(page) + [EOS] encoded_title = encoder.encode(title) + [EOS] yield {"inputs": encoded_title, "targets": encoded} + + +class LanguagemodelWikiScramble(problem.Text2TextProblem): + """Language modeling on English wikipedia. + + "targets" is a sequence of sequence_length tokens - a fragment of an article. + "inputs" is a copy of "targets", but with a random scramble_fraction of the + tokens randomly permuted. + + This dataset is intended to test parallel (non-autoregressive) prediction + of the target sequence given the input sequence. + """ + + @property + def sequence_length(self): + raise NotImplementedError() + + @property + def scramble_fraction(self): + raise NotImplementedError() + + @property + def is_character_level(self): + return False + + @property + def has_inputs(self): + return True + + @property + def input_space_id(self): + return problem.SpaceID.EN_TOK + + @property + def target_space_id(self): + return problem.SpaceID.EN_TOK + + @property + def num_shards(self): + return 1000 + + @property + def vocab_name(self): + return "vocab.wiki" + + @property + def use_subword_tokenizer(self): + return True + + @property + def targeted_vocab_size(self): + return 2**13 # 8192 + + @property + def use_train_shards_for_dev(self): + return True + + @property + def max_cases(self): + return (2 ** 30) / self.sequence_length + + def scramble(self, seq): + seq = np.array(seq) + num_permute = int(self.sequence_length * self.scramble_fraction) + full_permutation = np.random.permutation(self.sequence_length) + inverse_full_permutation = np.argsort(full_permutation) + partial_permutation = np.random.permutation(num_permute) + seq = seq[full_permutation] + seq = np.concatenate( + (seq[:num_permute][partial_permutation], seq[num_permute:])) + seq = seq[inverse_full_permutation] + seq = list(seq) + return seq + + def generator(self, data_dir, tmp_dir, _): + encoder = generator_utils.get_or_generate_vocab_inner( + data_dir, self.vocab_file, self.targeted_vocab_size, + lambda: page_generator(tmp_dir, max_docs=1000)) + case_num = 0 + for page in page_generator(tmp_dir): + encoded = encoder.encode(page) + for i in xrange(len(encoded) // self.sequence_length): + case_num += 1 + if self.max_cases and case_num > self.max_cases: + return + targets = encoded[ + i * self.sequence_length:(i + 1) * self.sequence_length] + inputs = self.scramble(targets) + yield {"inputs": inputs, "targets": targets} + + +@registry.register_problem +class LanguagemodelWikiScramble1k50(LanguagemodelWikiScramble): + """Sequence length 1024, 50% scrambed.""" + + @property + def sequence_length(self): + return 1024 + + @property + def scramble_fraction(self): + return 0.5 + + +@registry.register_problem +class LanguagemodelWikiScramble8k50(LanguagemodelWikiScramble): + """Sequence length 8192, 50% scrambed.""" + + @property + def sequence_length(self): + return 8192 + + @property + def scramble_fraction(self): + return 0.5 diff --git a/tensor2tensor/models/attention_lm_moe.py b/tensor2tensor/models/attention_lm_moe.py index 87d456b7d..cd54ce64e 100644 --- a/tensor2tensor/models/attention_lm_moe.py +++ b/tensor2tensor/models/attention_lm_moe.py @@ -68,8 +68,14 @@ def model_fn_body_sharded(self, sharded_features): # Remove dropout if not training hparams = self._hparams dp = self._data_parallelism - targets = sharded_features["targets"] - targets = dp(tf.squeeze, targets, 2) + if hparams.use_inputs: + decoder_input = dp(tf.squeeze, sharded_features["inputs"], 2) + decoder_self_attention_bias = None + else: + targets = sharded_features["targets"] + targets = dp(tf.squeeze, targets, 2) + (decoder_input, decoder_self_attention_bias, pad_remover) = dp( + attention_lm_moe_prepare_decoder, targets, hparams) def preprocess(x): return dp(common_layers.layer_preprocess, x, hparams) @@ -77,9 +83,6 @@ def preprocess(x): def postprocess(x, y): return dp(common_layers.layer_postprocess, x, y, hparams) - (decoder_input, decoder_self_attention_bias, pad_remover) = dp( - attention_lm_moe_prepare_decoder, targets, hparams) - x = dp(tf.nn.dropout, decoder_input, 1.0 - hparams.layer_prepostprocess_dropout) extra_loss = 0.0 @@ -95,7 +98,8 @@ def _diet_expert(x): expert_fn = expert_utils.ffn_expert_fn( hparams.hidden_size, moe_hidden_sizes, hparams.hidden_size) - if hparams.attention_type == AttentionType.LOCAL_EXPERTS: + if (hparams.attention_type == AttentionType.LOCAL_EXPERTS + and not hparams.use_inputs): # As preprocess and postprocess are called with batch of size one (all # batches concatenated), we just make sure that batch_norm is not use ( # should not either way) @@ -162,7 +166,7 @@ def print_shape(x, suffix, debug=False): attention_num_experts=hparams.attention_num_experts, train=hparams.mode == ModeKeys.TRAIN, batch_coordinate=batch_coordinate, - mask_right=True, + mask_right=not hparams.use_inputs, split_batch=bool(hparams.attention_split_batch), attention_kq_size=hparams.attention_kq_size, attention_v_size=hparams.attention_v_size) @@ -356,6 +360,9 @@ def attention_lm_moe_base(): hparams.add_hparam("use_sepconv", int(False)) hparams.add_hparam("diet_experts", int(False)) hparams.add_hparam("memory_efficient_ffn", int(False)) + # if True, we learn a non-autoregressive model from "inputs" to "targets". + # if False, we learn an autoregressive model to generate "targets" + hparams.add_hparam("use_inputs", int(False)) return hparams @@ -526,3 +533,17 @@ def attention_lm_moe_translation(): hparams.moe_layers = "0,1,2,3,4,5" hparams.shared_embedding_and_softmax_weights = int(True) return hparams + + +@registry.register_hparams +def attention_lm_moe_unscramble_base(): + """Version to use with languagemodel_wiki_scramble1k50.""" + hparams = attention_lm_no_moe_small() + hparams.use_inputs = True + hparams.min_length_bucket = 1024 + hparams.max_length = 1024 + hparams.batch_size = 5000 + hparams.layer_prepostprocess_dropout = 0.0 + hparams.layer_preprocess_sequence = "n" + hparams.layer_postprocess_sequence = "da" + return hparams From 1991f7b8addb657abe41bb633e1d909edade56ce Mon Sep 17 00:00:00 2001 From: Etienne Pot <epot@google.com> Date: Fri, 8 Sep 2017 13:51:17 -0700 Subject: [PATCH 0370/4095] Add option for local attention in attention_lm_moe. PiperOrigin-RevId: 168041046 --- tensor2tensor/models/attention_lm_moe.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/tensor2tensor/models/attention_lm_moe.py b/tensor2tensor/models/attention_lm_moe.py index cd54ce64e..adbb871b5 100644 --- a/tensor2tensor/models/attention_lm_moe.py +++ b/tensor2tensor/models/attention_lm_moe.py @@ -148,6 +148,8 @@ def print_shape(x, suffix, debug=False): hparams.hidden_size, hparams.num_heads, hparams.attention_dropout, + attention_type=("local_mask_right" if hparams.attention_local + else "dot_product"), name="decoder_self_attention") elif hparams.attention_type == AttentionType.MEMORY_EFFICIENT: assert hparams.layer_preprocess_sequence == "n" @@ -349,6 +351,7 @@ def attention_lm_moe_base(): hparams.add_hparam("moe_layers", "2") # comma separated list of layer numbers # moe params. local attention moe. hparams.add_hparam("attention_type", AttentionType.MULTIHEAD) + hparams.add_hparam("attention_local", int(False)) hparams.add_hparam("attention_moe_k", 2) hparams.add_hparam("attention_num_experts", 16) hparams.add_hparam("attention_split_batch", int(False)) @@ -383,6 +386,18 @@ def attention_lm_moe_base_ae(): return hparams +@registry.register_hparams +def attention_lm_moe_base_local(): + """Base model with attention expert.""" + hparams = attention_lm_moe_base() + hparams.attention_local = int(True) + hparams.use_sepconv = int(True) + hparams.max_length = 0 # max_length == batch_size + hparams.eval_drop_long_sequences = int(True) + hparams.min_length_bucket = 256 # Avoid cyclic problems for big batches + return hparams + + @registry.register_hparams def attention_lm_moe_small(): """Cheap model for single-gpu training. From 1d769553d3e9e4942229a705a526080626c6d16d Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Fri, 8 Sep 2017 14:45:11 -0700 Subject: [PATCH 0371/4095] v1.2.2 PiperOrigin-RevId: 168048958 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index b51070c77..119eeea7e 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ setup( name='tensor2tensor', - version='1.2.1', + version='1.2.2', description='Tensor2Tensor', author='Google Inc.', author_email='no-reply@google.com', From b8e59e746919a80f0ccd30dbf87426928c856218 Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Fri, 8 Sep 2017 14:46:56 -0700 Subject: [PATCH 0372/4095] open source fixes PiperOrigin-RevId: 168049257 --- tensor2tensor/utils/trainer_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensor2tensor/utils/trainer_utils.py b/tensor2tensor/utils/trainer_utils.py index be5e5530f..5ab3db70c 100644 --- a/tensor2tensor/utils/trainer_utils.py +++ b/tensor2tensor/utils/trainer_utils.py @@ -265,7 +265,7 @@ def save_metadata(output_dir, hparams): else: flags_dict = FLAGS.__dict__["__flags"] flags_str = "\n".join( - ["--%s=%s" % (name, str(f.value)) for (name, f) in flags_dict.items()]) + ["--%s=%s" % (name, str(f)) for (name, f) in flags_dict.items()]) t2t_flags_str = None flags_txt = os.path.join(output_dir, "flags.txt") From b9cfedc8de073c338eea512e651961c724e127bd Mon Sep 17 00:00:00 2001 From: Martin Popel <popel@ufal.mff.cuni.cz> Date: Sat, 9 Sep 2017 19:05:39 +0200 Subject: [PATCH 0373/4095] xrange for Python3, again (#292) --- tensor2tensor/utils/data_reader.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tensor2tensor/utils/data_reader.py b/tensor2tensor/utils/data_reader.py index e89b9b808..2ec3f9e73 100644 --- a/tensor2tensor/utils/data_reader.py +++ b/tensor2tensor/utils/data_reader.py @@ -27,6 +27,7 @@ import six from six.moves import zip # pylint: disable=redefined-builtin +from six.moves import xrange from tensor2tensor.data_generators import problem_hparams from tensor2tensor.data_generators.problem import preprocess_examples_common From a7ca5b1161439420761914ce0df416495ec1f164 Mon Sep 17 00:00:00 2001 From: cclauss <cclauss@bluewin.ch> Date: Sat, 9 Sep 2017 19:07:14 +0200 Subject: [PATCH 0374/4095] diet_test: xrange(10) --> range(10) for Python 3 (#289) xrange() was removed in Python 3. For counting to 10, `from six.moves import xrange` would be overkill. --- tensor2tensor/utils/diet_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensor2tensor/utils/diet_test.py b/tensor2tensor/utils/diet_test.py index 9c0c570cc..d5815db2e 100644 --- a/tensor2tensor/utils/diet_test.py +++ b/tensor2tensor/utils/diet_test.py @@ -53,7 +53,7 @@ def model_fn2(x): with self.test_session() as sess: sess.run(tf.global_variables_initializer()) orig_vals = sess.run(tf.global_variables()) - for _ in xrange(10): + for _ in range(10): sess.run(train_op) new_vals = sess.run(tf.global_variables()) From 783e1f4ce2a047f109f7cb3291193ecc2a7a589f Mon Sep 17 00:00:00 2001 From: T2T Team <no-reply@google.com> Date: Fri, 8 Sep 2017 15:25:08 -0700 Subject: [PATCH 0375/4095] Internal merge PiperOrigin-RevId: 168054464 --- tensor2tensor/utils/data_reader.py | 1 - tensor2tensor/utils/diet_test.py | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/tensor2tensor/utils/data_reader.py b/tensor2tensor/utils/data_reader.py index 2ec3f9e73..e89b9b808 100644 --- a/tensor2tensor/utils/data_reader.py +++ b/tensor2tensor/utils/data_reader.py @@ -27,7 +27,6 @@ import six from six.moves import zip # pylint: disable=redefined-builtin -from six.moves import xrange from tensor2tensor.data_generators import problem_hparams from tensor2tensor.data_generators.problem import preprocess_examples_common diff --git a/tensor2tensor/utils/diet_test.py b/tensor2tensor/utils/diet_test.py index d5815db2e..9c0c570cc 100644 --- a/tensor2tensor/utils/diet_test.py +++ b/tensor2tensor/utils/diet_test.py @@ -53,7 +53,7 @@ def model_fn2(x): with self.test_session() as sess: sess.run(tf.global_variables_initializer()) orig_vals = sess.run(tf.global_variables()) - for _ in range(10): + for _ in xrange(10): sess.run(train_op) new_vals = sess.run(tf.global_variables()) From abf81e790b40c7b0b46659c1051dfd20517d4c13 Mon Sep 17 00:00:00 2001 From: Ashish Vaswani <avaswani@google.com> Date: Fri, 8 Sep 2017 16:08:01 -0700 Subject: [PATCH 0376/4095] Fixed a bug in pad_to_multiple_2d. The height and width were incorrect. PiperOrigin-RevId: 168060083 --- tensor2tensor/layers/common_attention.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensor2tensor/layers/common_attention.py b/tensor2tensor/layers/common_attention.py index c5a0c60cb..42ae089cd 100644 --- a/tensor2tensor/layers/common_attention.py +++ b/tensor2tensor/layers/common_attention.py @@ -696,8 +696,8 @@ def pad_to_multiple_2d(x, block_shape): """Making sure x is a multiple of shape.""" old_shape = x.get_shape().dims last = old_shape[-1] - height_padding = -tf.shape(x)[1] % block_shape[0] - width_padding = -tf.shape(x)[2] % block_shape[1] + height_padding = -tf.shape(x)[2] % block_shape[0] + width_padding = -tf.shape(x)[3] % block_shape[1] paddings = [[0, 0], [0, 0], [0, height_padding], [0, width_padding], [0, 0]] padded_x = tf.pad(x, paddings) From 7b961a72d5b01a4562d9d88e79c4e70dd9a64024 Mon Sep 17 00:00:00 2001 From: Niki Parmar <nikip@google.com> Date: Fri, 8 Sep 2017 20:53:00 -0700 Subject: [PATCH 0377/4095] Port img2img_imagenet to Problem and remove dependency from data_reader and problem_hparams PiperOrigin-RevId: 168080917 --- tensor2tensor/data_generators/image.py | 35 +++++++++++++++++++ .../data_generators/problem_hparams.py | 14 -------- tensor2tensor/utils/data_reader.py | 12 +------ 3 files changed, 36 insertions(+), 25 deletions(-) diff --git a/tensor2tensor/data_generators/image.py b/tensor2tensor/data_generators/image.py index 06942ed3f..64b9d8639 100644 --- a/tensor2tensor/data_generators/image.py +++ b/tensor2tensor/data_generators/image.py @@ -360,6 +360,12 @@ def is_small(self): def num_classes(self): return 1000 + def generate_data(self, data_dir, tmp_dir, task_id=-1): + # TODO(lukaszkaiser): find a better way than printing this. + print("To generate the ImageNet dataset in the proper format, follow " + "instructions at https://github.com/tensorflow/models/blob/master" + "/inception/README.md#getting-started") + def preprocess_examples(self, examples, mode, unused_hparams): # Just resize with area. if self._was_reversed: @@ -373,6 +379,35 @@ def preprocess_examples(self, examples, mode, unused_hparams): return examples +@registry.register_problem +class Img2imgImagenet(ImageProblem): + """Imagenet rescaled to 8x8 for input and 32x32 for output.""" + + def dataset_filename(self): + return "image_imagenet" # Reuse Imagenet data. + + def preprocess_examples(self, examples, unused_mode, unused_hparams): + + def resize(img, size): + return tf.to_int64( + tf.image.resize_images(img, [size, size], tf.image.ResizeMethod.AREA)) + + inputs = examples["inputs"] + # For Img2Img resize input and output images as desired. + examples["inputs"] = resize(inputs, 8) + examples["targets"] = resize(inputs, 32) + return examples + + def hparams(self, defaults, unused_model_hparams): + p = defaults + p.input_modality = {"inputs": ("image:identity_no_pad", None)} + p.target_modality = ("image:identity_no_pad", None) + p.batch_size_multiplier = 256 + p.max_expected_batch_size_per_shard = 4 + p.input_space_id = 1 + p.target_space_id = 1 + + def image_generator(images, labels): """Generator for images that takes image and labels lists and creates pngs. diff --git a/tensor2tensor/data_generators/problem_hparams.py b/tensor2tensor/data_generators/problem_hparams.py index f4880e4d9..147fc7538 100644 --- a/tensor2tensor/data_generators/problem_hparams.py +++ b/tensor2tensor/data_generators/problem_hparams.py @@ -349,18 +349,6 @@ def wsj_parsing_tokens(model_hparams, prefix, wrong_source_vocab_size, return p -def img2img_imagenet(unused_model_hparams): - """Image 2 Image for imagenet dataset.""" - p = default_problem_hparams() - p.input_modality = {"inputs": ("image:identity", None)} - p.target_modality = ("image:identity", None) - p.batch_size_multiplier = 256 - p.max_expected_batch_size_per_shard = 4 - p.input_space_id = 1 - p.target_space_id = 1 - return p - - # Dictionary of named hyperparameter settings for various problems. # This is only accessed through the problem_hparams function below. PROBLEM_HPARAMS_MAP = { @@ -377,6 +365,4 @@ def img2img_imagenet(unused_model_hparams): "parsing_english_ptb16k": lambda p: wsj_parsing_tokens( # pylint: disable=g-long-lambda p, "wsj", 2**14, 2**9), - "img2img_imagenet": - img2img_imagenet, } diff --git a/tensor2tensor/utils/data_reader.py b/tensor2tensor/utils/data_reader.py index e89b9b808..36faa9e66 100644 --- a/tensor2tensor/utils/data_reader.py +++ b/tensor2tensor/utils/data_reader.py @@ -131,17 +131,7 @@ def preprocessing(examples, data_file_pattern): """Preprocessing of examples.""" # This function is for obsolete problems only, as we're porting them # all to the Problem class and its preprocess_examples method. Don't add. - if "image" in data_file_pattern: - - def resize(img, size): - return tf.to_int64( - tf.image.resize_images(img, [size, size], tf.image.ResizeMethod.AREA)) - - if "img2img" in data_file_pattern: - inputs = examples["inputs"] - examples["inputs"] = resize(inputs, 16) - examples["targets"] = resize(inputs, 64) - elif "audio" in data_file_pattern: + if "audio" in data_file_pattern: # Reshape audio to proper shape sample_count = tf.to_int32(examples.pop("audio/sample_count")) sample_width = tf.to_int32(examples.pop("audio/sample_width")) From c0569d209b0d0a91fab7eba731e98032c0997bdb Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Sat, 9 Sep 2017 22:02:19 -0700 Subject: [PATCH 0378/4095] internal merge of #292, #289 PiperOrigin-RevId: 168132768 --- tensor2tensor/utils/data_reader.py | 1 + tensor2tensor/utils/diet_test.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/tensor2tensor/utils/data_reader.py b/tensor2tensor/utils/data_reader.py index 36faa9e66..834e631ac 100644 --- a/tensor2tensor/utils/data_reader.py +++ b/tensor2tensor/utils/data_reader.py @@ -26,6 +26,7 @@ import numpy as np import six +from six.moves import xrange # pylint: disable=redefined-builtin from six.moves import zip # pylint: disable=redefined-builtin from tensor2tensor.data_generators import problem_hparams diff --git a/tensor2tensor/utils/diet_test.py b/tensor2tensor/utils/diet_test.py index 9c0c570cc..d5815db2e 100644 --- a/tensor2tensor/utils/diet_test.py +++ b/tensor2tensor/utils/diet_test.py @@ -53,7 +53,7 @@ def model_fn2(x): with self.test_session() as sess: sess.run(tf.global_variables_initializer()) orig_vals = sess.run(tf.global_variables()) - for _ in xrange(10): + for _ in range(10): sess.run(train_op) new_vals = sess.run(tf.global_variables()) From 42f089b5b45761051065b5e321f4d71dc5a3105c Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Sun, 10 Sep 2017 10:59:03 -0700 Subject: [PATCH 0379/4095] Refactor and move decode flags into --decode_hparams PiperOrigin-RevId: 168160609 --- README.md | 4 +- tensor2tensor/bin/t2t-decoder | 25 +- tensor2tensor/models/README.md | 4 +- tensor2tensor/models/__init__.py | 28 + tensor2tensor/models/models.py | 44 -- tensor2tensor/utils/decoding.py | 114 +-- tensor2tensor/utils/input_fn_builder.py | 221 +++--- tensor2tensor/utils/metrics.py | 37 +- tensor2tensor/utils/model_builder.py | 719 +++++++++--------- tensor2tensor/utils/trainer_utils.py | 50 +- tensor2tensor/utils/trainer_utils_test.py | 26 +- .../TransformerVisualization.ipynb | 5 +- 12 files changed, 686 insertions(+), 591 deletions(-) delete mode 100644 tensor2tensor/models/models.py diff --git a/README.md b/README.md index bec411f1e..445218ca7 100644 --- a/README.md +++ b/README.md @@ -211,8 +211,8 @@ modality-independent tensors. input/output modality or task. Models take dense tensors in and produce dense tensors that may then be transformed in a final step by a **modality** depending on the task (e.g. fed through a final linear transform to produce logits for a -softmax over classes). All models are imported in -[`models.py`](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/models/models.py), +softmax over classes). All models are imported in the +[`models` subpackage](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/models/__init__.py), inherit from `T2TModel` - defined in [`t2t_model.py`](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/utils/t2t_model.py) - and are registered with diff --git a/tensor2tensor/bin/t2t-decoder b/tensor2tensor/bin/t2t-decoder index 5c3eeb293..8da8ae5a2 100644 --- a/tensor2tensor/bin/t2t-decoder +++ b/tensor2tensor/bin/t2t-decoder @@ -46,6 +46,12 @@ import tensorflow as tf flags = tf.flags FLAGS = flags.FLAGS +flags.DEFINE_string("decode_from_file", None, "Path to decode file") +flags.DEFINE_string("decode_to_file", None, + "Path prefix to inference output file") +flags.DEFINE_bool("decode_interactive", False, + "Interactive local inference mode.") +flags.DEFINE_integer("decode_shards", 1, "Number of decoding replicas.") flags.DEFINE_string("t2t_usr_dir", "", "Path to a Python module that will be imported. The " "__init__.py file should include the necessary imports. " @@ -70,20 +76,17 @@ def main(_): data_dir=data_dir, model_name=FLAGS.model) + decode_hp = decoding.decode_hparams(FLAGS.decode_hparams) + decode_hp.add_hparam("shards", FLAGS.decode_shards) if FLAGS.decode_interactive: - decoding.decode_interactively(estimator) + decoding.decode_interactively(estimator, decode_hp) elif FLAGS.decode_from_file: - decoding.decode_from_file(estimator, FLAGS.decode_from_file) + decoding.decode_from_file(estimator, FLAGS.decode_from_file, decode_hp, + FLAGS.decode_to_file) else: - decoding.decode_from_dataset( - estimator, - FLAGS.problems.split("-"), - return_beams=FLAGS.decode_return_beams, - beam_size=FLAGS.decode_beam_size, - max_predictions=FLAGS.decode_num_samples, - decode_to_file=FLAGS.decode_to_file, - save_images=FLAGS.decode_save_images, - identity_output=FLAGS.identity_output) + decoding.decode_from_dataset(estimator, + FLAGS.problems.split("-"), decode_hp, + FLAGS.decode_to_file) if __name__ == "__main__": diff --git a/tensor2tensor/models/README.md b/tensor2tensor/models/README.md index 69050b9b2..3da3e0f82 100644 --- a/tensor2tensor/models/README.md +++ b/tensor2tensor/models/README.md @@ -4,13 +4,13 @@ This directory contains T2T models, their hyperparameters, and a number of common layers and hyperparameter settings to help construct new models. Common building blocks are in `common_layers.py` and `common_attention.py`. Common hyperparameters are in `common_hparams.py`. Models are imported in -`models.py`. +`__init__.py`. ## Adding a new model. To add a model to the built-in set, create a new file (see, e.g., `neural_gpu.py`) and write your model class inheriting from `T2TModel` there and -decorate it with `registry.register_model`. Import it in `models.py`. +decorate it with `registry.register_model`. Import it in `__init__.py`. It is now available to use with the trainer binary (`t2t-trainer`) using the `--model=model_name` flag. diff --git a/tensor2tensor/models/__init__.py b/tensor2tensor/models/__init__.py index 3f714ce1f..acebef809 100644 --- a/tensor2tensor/models/__init__.py +++ b/tensor2tensor/models/__init__.py @@ -13,3 +13,31 @@ # See the License for the specific language governing permissions and # limitations under the License. +"""Models defined in T2T. Imports here force registration.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +# Dependency imports + +# pylint: disable=unused-import + +from tensor2tensor.layers import modalities +from tensor2tensor.models import attention_lm +from tensor2tensor.models import attention_lm_moe +from tensor2tensor.models import bluenet +from tensor2tensor.models import bytenet +from tensor2tensor.models import cycle_gan +from tensor2tensor.models import gene_expression +from tensor2tensor.models import lstm +from tensor2tensor.models import multimodel +from tensor2tensor.models import neural_gpu +from tensor2tensor.models import shake_shake +from tensor2tensor.models import slicenet +from tensor2tensor.models import transformer +from tensor2tensor.models import transformer_alternative +from tensor2tensor.models import transformer_moe +from tensor2tensor.models import transformer_revnet +from tensor2tensor.models import transformer_vae +from tensor2tensor.models import xception +# pylint: enable=unused-import diff --git a/tensor2tensor/models/models.py b/tensor2tensor/models/models.py deleted file mode 100644 index 7c31f4e05..000000000 --- a/tensor2tensor/models/models.py +++ /dev/null @@ -1,44 +0,0 @@ -# coding=utf-8 -# Copyright 2017 The Tensor2Tensor Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Models defined in T2T. Imports here force registration.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -# Dependency imports - -# pylint: disable=unused-import - -from tensor2tensor.layers import modalities -from tensor2tensor.models import attention_lm -from tensor2tensor.models import attention_lm_moe -from tensor2tensor.models import bluenet -from tensor2tensor.models import bytenet -from tensor2tensor.models import cycle_gan -from tensor2tensor.models import gene_expression -from tensor2tensor.models import lstm -from tensor2tensor.models import multimodel -from tensor2tensor.models import neural_gpu -from tensor2tensor.models import shake_shake -from tensor2tensor.models import slicenet -from tensor2tensor.models import transformer -from tensor2tensor.models import transformer_alternative -from tensor2tensor.models import transformer_moe -from tensor2tensor.models import transformer_revnet -from tensor2tensor.models import transformer_vae -from tensor2tensor.models import xception -# pylint: enable=unused-import diff --git a/tensor2tensor/utils/decoding.py b/tensor2tensor/utils/decoding.py index ea1a5fa01..d84fd740b 100644 --- a/tensor2tensor/utils/decoding.py +++ b/tensor2tensor/utils/decoding.py @@ -40,6 +40,24 @@ IMAGE_DECODE_LENGTH = 100 +def decode_hparams(overrides=""): + """Hyperparameters for decoding.""" + hp = tf.contrib.training.HParams( + use_last_position_only=False, + save_images=False, + problem_idx=0, + extra_length=50, + batch_size=32, + beam_size=4, + alpha=0.6, + return_beams=False, + max_input_size=-1, + identity_output=False, + num_samples=-1) + hp = hp.parse(overrides) + return hp + + def log_decode_results(inputs, outputs, problem_name, @@ -79,12 +97,8 @@ def log_decode_results(inputs, def decode_from_dataset(estimator, problem_names, - return_beams=False, - beam_size=1, - max_predictions=-1, - decode_to_file=None, - save_images=False, - identity_output=False): + decode_hp, + decode_to_file=None): tf.logging.info("Performing local inference from dataset for %s.", str(problem_names)) hparams = estimator.params @@ -106,8 +120,11 @@ def decode_from_dataset(estimator, # Prepare output file writers if decode_to_file passed if decode_to_file: - output_filepath = decode_to_file + ".outputs." + problem_name - target_filepath = decode_to_file + ".targets." + problem_name + output_filepath = _decode_filename(decode_to_file, problem_name, + decode_hp) + parts = output_filepath.split(".") + parts[-1] = "targets" + target_filepath = ".".join(parts) output_file = tf.gfile.Open(output_filepath, "w") target_file = tf.gfile.Open(target_filepath, "w") @@ -122,8 +139,8 @@ def decode_from_dataset(estimator, # Log predictions decoded_outputs = [] - if return_beams: - output_beams = np.split(outputs, beam_size, axis=0) + if decode_hp.return_beams: + output_beams = np.split(outputs, decode_hp.beam_size, axis=0) for i, beam in enumerate(output_beams): tf.logging.info("BEAM %d:" % i) decoded = log_decode_results( @@ -133,9 +150,9 @@ def decode_from_dataset(estimator, num_predictions, inputs_vocab, targets_vocab, - save_images=save_images, + save_images=decode_hp.save_images, model_dir=estimator.model_dir, - identity_output=identity_output, + identity_output=decode_hp.identity_output, targets=targets) decoded_outputs.append(decoded) else: @@ -146,9 +163,9 @@ def decode_from_dataset(estimator, num_predictions, inputs_vocab, targets_vocab, - save_images=save_images, + save_images=decode_hp.save_images, model_dir=estimator.model_dir, - identity_output=identity_output, + identity_output=decode_hp.identity_output, targets=targets) decoded_outputs.append(decoded) @@ -158,7 +175,8 @@ def decode_from_dataset(estimator, output_file.write(str(decoded_output) + "\n") target_file.write(str(decoded_target) + "\n") - if max_predictions >= 0 and num_predictions >= max_predictions: + if (decode_hp.num_samples >= 0 and + num_predictions >= decode_hp.num_samples): break if decode_to_file: @@ -168,20 +186,21 @@ def decode_from_dataset(estimator, tf.logging.info("Completed inference on %d samples." % num_predictions) # pylint: disable=undefined-loop-variable -def decode_from_file(estimator, filename): +def decode_from_file(estimator, filename, decode_hp, decode_to_file=None): """Compute predictions on entries in filename and write them out.""" hparams = estimator.params - problem_id = FLAGS.decode_problem_id + problem_id = decode_hp.problem_idx inputs_vocab = hparams.problems[problem_id].vocabulary["inputs"] targets_vocab = hparams.problems[problem_id].vocabulary["targets"] problem_name = FLAGS.problems.split("-")[problem_id] tf.logging.info("Performing decoding from a file.") - sorted_inputs, sorted_keys = _get_sorted_inputs(filename) - num_decode_batches = (len(sorted_inputs) - 1) // FLAGS.decode_batch_size + 1 + sorted_inputs, sorted_keys = _get_sorted_inputs(filename, decode_hp.shards) + num_decode_batches = (len(sorted_inputs) - 1) // decode_hp.batch_size + 1 def input_fn(): - input_gen = _decode_batch_input_fn(problem_id, num_decode_batches, - sorted_inputs, inputs_vocab) + input_gen = _decode_batch_input_fn( + problem_id, num_decode_batches, sorted_inputs, inputs_vocab, + decode_hp.batch_size, decode_hp.max_input_size) gen_fn = make_input_fn_from_generator(input_gen) example = gen_fn() return _decode_input_tensor_to_features_dict(example, hparams) @@ -189,9 +208,9 @@ def input_fn(): decodes = [] result_iter = estimator.predict(input_fn) for result in result_iter: - if FLAGS.decode_return_beams: + if decode_hp.return_beams: beam_decodes = [] - output_beams = np.split(result["outputs"], FLAGS.decode_beam_size, axis=0) + output_beams = np.split(result["outputs"], decode_hp.beam_size, axis=0) for k, beam in enumerate(output_beams): tf.logging.info("BEAM %d:" % k) decoded_outputs, _ = log_decode_results(result["inputs"], beam, @@ -211,23 +230,31 @@ def input_fn(): decodes.reverse() # Dumping inputs and outputs to file filename.decodes in # format result\tinput in the same order as original inputs - if FLAGS.decode_to_file: - output_filename = FLAGS.decode_to_file + if decode_to_file: + output_filename = decode_to_file else: output_filename = filename - if FLAGS.decode_shards > 1: + if decode_hp.shards > 1: base_filename = output_filename + ("%.2d" % FLAGS.worker_id) else: base_filename = output_filename - decode_filename = (base_filename + "." + FLAGS.model + "." + FLAGS.hparams_set - + ".beam" + str(FLAGS.decode_beam_size) + ".alpha" + - str(FLAGS.decode_alpha) + ".decodes") + decode_filename = _decode_filename(base_filename, problem_name, decode_hp) tf.logging.info("Writing decodes into %s" % decode_filename) outfile = tf.gfile.Open(decode_filename, "w") for index in range(len(sorted_inputs)): outfile.write("%s\n" % (decodes[sorted_keys[index]])) +def _decode_filename(base_filename, problem_name, decode_hp): + return "{base}.{model}.{hp}.{problem}.beam{beam}.alpha{alpha}.decodes".format( + base=base_filename, + model=FLAGS.model, + hp=FLAGS.hparams_set, + problem=problem_name, + beam=str(decode_hp.beam_size), + alpha=str(decode_hp.alpha)) + + def make_input_fn_from_generator(gen): """Use py_func to yield elements from the given generator.""" first_ex = six.next(gen) @@ -252,7 +279,7 @@ def input_fn(): return input_fn -def decode_interactively(estimator): +def decode_interactively(estimator, decode_hp): """Interactive decoding.""" hparams = estimator.params @@ -267,11 +294,11 @@ def input_fn(): problem_idx = result["problem_choice"] targets_vocab = hparams.problems[problem_idx].vocabulary["targets"] - if FLAGS.decode_return_beams: - beams = np.split(result["outputs"], FLAGS.decode_beam_size, axis=0) + if decode_hp.return_beams: + beams = np.split(result["outputs"], decode_hp.beam_size, axis=0) scores = None if "scores" in result: - scores = np.split(result["scores"], FLAGS.decode_beam_size, axis=0) + scores = np.split(result["scores"], decode_hp.beam_size, axis=0) for k, beam in enumerate(beams): tf.logging.info("BEAM %d:" % k) beam_string = targets_vocab.decode(_save_until_eos(beam.flatten())) @@ -280,7 +307,7 @@ def input_fn(): else: tf.logging.info(beam_string) else: - if FLAGS.identity_output: + if decode_hp.identity_output: tf.logging.info(" ".join(map(str, result["outputs"].flatten()))) else: tf.logging.info( @@ -288,7 +315,7 @@ def input_fn(): def _decode_batch_input_fn(problem_id, num_decode_batches, sorted_inputs, - vocabulary): + vocabulary, batch_size, max_input_size): tf.logging.info(" batch %d" % num_decode_batches) # First reverse all the input sentences so that if you're going to get OOMs, # you'll see it in the first batch @@ -297,12 +324,11 @@ def _decode_batch_input_fn(problem_id, num_decode_batches, sorted_inputs, tf.logging.info("Decoding batch %d" % b) batch_length = 0 batch_inputs = [] - for inputs in sorted_inputs[b * FLAGS.decode_batch_size:( - b + 1) * FLAGS.decode_batch_size]: + for inputs in sorted_inputs[b * batch_size:(b + 1) * batch_size]: input_ids = vocabulary.encode(inputs) - if FLAGS.decode_max_input_size > 0: + if max_input_size > 0: # Subtract 1 for the EOS_ID. - input_ids = input_ids[:FLAGS.decode_max_input_size - 1] + input_ids = input_ids[:max_input_size - 1] input_ids.append(text_encoder.EOS_ID) batch_inputs.append(input_ids) if len(input_ids) > batch_length: @@ -437,11 +463,13 @@ def show_and_save_image(img, save_path): plt.savefig(save_path) -def _get_sorted_inputs(filename): +def _get_sorted_inputs(filename, num_shards=1): """Returning inputs sorted according to length. Args: filename: path to file with inputs, 1 per line. + num_shards: number of input shards. If > 1, will read from file filename.XX, + where XX is FLAGS.worker_id. Returns: a sorted list of inputs @@ -449,7 +477,7 @@ def _get_sorted_inputs(filename): """ tf.logging.info("Getting sorted inputs") # read file and sort inputs according them according to input length. - if FLAGS.decode_shards > 1: + if num_shards > 1: decode_filename = filename + ("%.2d" % FLAGS.worker_id) else: decode_filename = filename @@ -509,7 +537,7 @@ def input_fn(problem_choice, x=inputs): # pylint: disable=missing-docstring tf.constant(p_hparams.target_space_id), x) input_space_id, target_space_id, x = input_fn_builder.cond_on_index( - input_fn, feature_map["problem_choice"], 0, len(hparams.problems) - 1) + input_fn, feature_map["problem_choice"], len(hparams.problems) - 1) features = {} features["problem_choice"] = tf.convert_to_tensor( @@ -545,7 +573,7 @@ def input_fn(problem_choice, x=inputs): # pylint: disable=missing-docstring tf.constant(p_hparams.target_space_id), x) input_space_id, target_space_id, x = input_fn_builder.cond_on_index( - input_fn, feature_map["problem_choice"], 0, len(hparams.problems) - 1) + input_fn, feature_map["problem_choice"], len(hparams.problems) - 1) features = {} features["problem_choice"] = feature_map["problem_choice"] diff --git a/tensor2tensor/utils/input_fn_builder.py b/tensor2tensor/utils/input_fn_builder.py index abec8d4ad..cfa782e8d 100644 --- a/tensor2tensor/utils/input_fn_builder.py +++ b/tensor2tensor/utils/input_fn_builder.py @@ -81,125 +81,115 @@ def input_fn(): Raises: ValueError: if one of the parameters has an unsupported value. """ - problem_count, batches = len(hparams.problems), [] - with tf.name_scope("input_reader"): - for n in xrange(problem_count): - if fixed_problem is not None and n != fixed_problem: + problem_count = len(hparams.problems) + problem_batches = [] + with tf.name_scope("input_fn"): + for problem_idx in xrange(problem_count): + if fixed_problem is not None and problem_idx != fixed_problem: continue - problem_instance = hparams.problem_instances[n] - p_hparams = hparams.problems[n] - with tf.name_scope("problem_%d" % n): - with tf.device("/cpu:0"): # Input reading on CPU - capacity = ( - p_hparams.max_expected_batch_size_per_shard * num_datashards) - feature_map = data_reader.input_pipeline( - problem_instance, data_file_patterns and data_file_patterns[n], - capacity, mode, hparams, - data_reader.hparams_to_batching_scheme( - hparams, - shard_multiplier=num_datashards, - drop_long_sequences=(mode == tf.estimator.ModeKeys.TRAIN - or hparams.eval_drop_long_sequences), - length_multiplier=(p_hparams.batch_size_multiplier))) - - # Reverse inputs and targets features if the problem was reversed. - if problem_instance is not None: - problem_instance.maybe_reverse_features(feature_map) - problem_instance.maybe_copy_features(feature_map) - else: - if p_hparams.was_reversed: - inputs = feature_map["inputs"] - targets = feature_map["targets"] - feature_map["inputs"] = targets - feature_map["targets"] = inputs - # Use the inputs as the targets if the problem is a copy problem. - if p_hparams.was_copy: - feature_map["targets"] = feature_map["inputs"] - - # Ensure inputs and targets are proper rank. - while len(feature_map["inputs"].get_shape()) != 4: - feature_map["inputs"] = tf.expand_dims(feature_map["inputs"], axis=-1) - while len(feature_map["targets"].get_shape()) != 4: - feature_map["targets"] = tf.expand_dims( - feature_map["targets"], axis=-1) - - batches.append((feature_map["inputs"], feature_map["targets"], - tf.constant(n), tf.constant(p_hparams.input_space_id), - tf.constant(p_hparams.target_space_id))) + problem_instance = hparams.problem_instances[problem_idx] + p_hparams = hparams.problems[problem_idx] + problem_filepatterns = (data_file_patterns and + data_file_patterns[problem_idx]) + feature_map = features_for_problem( + problem_instance, + p_hparams, + hparams, + problem_filepatterns, + num_datashards, + mode, + name="problem_%d" % problem_idx) + problem_batches.append(feature_map) # We choose which problem to process. loss_moving_avgs = [] # Need loss moving averages for that. - for n in xrange(problem_count): + for problem_idx in xrange(problem_count): with tf.variable_scope("losses_avg"): loss_moving_avgs.append( tf.get_variable( - "problem_%d/total_loss" % n, initializer=100.0, + "problem_%d/total_loss" % problem_idx, + initializer=100.0, trainable=False)) if fixed_problem is None: - if (hparams.problem_choice == "uniform" or - mode != tf.estimator.ModeKeys.TRAIN): - problem_choice = tf.random_uniform( - [], maxval=problem_count, dtype=tf.int32) - elif hparams.problem_choice == "adaptive": - loss_moving_avgs = tf.stack(loss_moving_avgs) - problem_choice = tf.multinomial( - tf.reshape(loss_moving_avgs, [1, -1]), 1) - problem_choice = tf.to_int32(tf.squeeze(problem_choice)) - elif hparams.problem_choice == "distributed": - assert worker_replicas >= problem_count - assert worker_replicas % problem_count == 0 - problem_choice = tf.to_int32(worker_id % problem_count) - else: - raise ValueError( - "Value of hparams.problem_choice is %s and must be " - "one of [uniform, adaptive, distributed]" % hparams.problem_choice) - - # Inputs and targets conditional on problem_choice. - rand_inputs, rand_target, choice, inp_id, tgt_id = cond_on_index( - lambda n: batches[n], problem_choice, 0, problem_count - 1) + problem_choice = _problem_choice(hparams.problem_choice, mode, + problem_count, loss_moving_avgs, + worker_replicas, worker_id) + + # Problem conditional on problem_choice. + feature_map = cond_on_index( + lambda problem_idx: problem_batches[problem_idx], problem_choice, + problem_count - 1) else: problem_choice = tf.constant(fixed_problem) # Take the only constructed batch, which is the fixed_problem. - rand_inputs, rand_target, choice, inp_id, tgt_id = batches[0] + feature_map = problem_batches[0] + + feature_map["problem_choice"] = problem_choice # Set shapes so the ranks are clear. - rand_inputs.set_shape([None, None, None, None]) - rand_target.set_shape([None, None, None, None]) - choice.set_shape([]) - inp_id.set_shape([]) - tgt_id.set_shape([]) - # Forced shape obfuscation is necessary for inference. - if mode == tf.estimator.ModeKeys.PREDICT: - rand_inputs._shape = tf.TensorShape([None, None, None, None]) # pylint: disable=protected-access - rand_target._shape = tf.TensorShape([None, None, None, None]) # pylint: disable=protected-access - - # Final feature map. - rand_feature_map = { - "inputs": rand_inputs, - "problem_choice": choice, - "input_space_id": inp_id, - "target_space_id": tgt_id - } + feature_map["inputs"].set_shape([None, None, None, None]) + feature_map["targets"].set_shape([None, None, None, None]) + feature_map["problem_choice"].set_shape([]) + feature_map["input_space_id"].set_shape([]) + feature_map["target_space_id"].set_shape([]) + if mode == tf.estimator.ModeKeys.PREDICT: - rand_feature_map["infer_targets"] = rand_target - rand_target = None + feature_map["infer_targets"] = feature_map["targets"] + # Forced shape obfuscation is necessary for inference. + feature_map["inputs"]._shape = tf.TensorShape([None, None, None, None]) # pylint: disable=protected-access + feature_map["targets"]._shape = tf.TensorShape([None, None, None, None]) # pylint: disable=protected-access + # This is because of a bug in the Estimator that short-circuits prediction # if it doesn't see a QueueRunner. DummyQueueRunner implements the # minimal expected interface but does nothing. tf.add_to_collection(tf.GraphKeys.QUEUE_RUNNERS, DummyQueueRunner()) + return feature_map, None - return rand_feature_map, rand_target + return feature_map, feature_map["targets"] return input_fn -def cond_on_index(fn, index_tensor, cur_idx, max_idx): +def _problem_choice(choice_mode, mode, problem_count, loss_moving_avgs, + worker_replicas, worker_id): + """Return idx of problem based on choice_mode and mode.""" + if choice_mode == "uniform" or mode != tf.estimator.ModeKeys.TRAIN: + problem_choice = tf.random_uniform([], maxval=problem_count, dtype=tf.int32) + elif choice_mode == "adaptive": + loss_moving_avgs = tf.stack(loss_moving_avgs) + problem_choice = tf.multinomial(tf.reshape(loss_moving_avgs, [1, -1]), 1) + problem_choice = tf.to_int32(tf.squeeze(problem_choice)) + elif choice_mode == "distributed": + assert worker_replicas >= problem_count + assert worker_replicas % problem_count == 0 + problem_choice = tf.to_int32(worker_id % problem_count) + else: + raise ValueError("Value of hparams.problem_choice is %s and must be " + "one of [uniform, adaptive, distributed]" % choice_mode) + + return problem_choice + + +def cond_on_index(fn, index_tensor, max_idx, cur_idx=0): """Call fn(index_tensor) using tf.cond in [cur_id, max_idx].""" + + # Because tf.cond expects fn to return a flat list of Tensors, we flatten the + # output of fn. By capturing the original output here in orig_out, we can pack + # the flat sequence into the original structure. + orig_out = [] + + def wrapped_fn(): + out = fn(cur_idx) + orig_out.append(out) + return tf.contrib.framework.nest.flatten(out) + if cur_idx == max_idx: - return fn(cur_idx) - return tf.cond( - tf.equal(index_tensor, cur_idx), lambda: fn(cur_idx), - lambda: cond_on_index(fn, index_tensor, cur_idx + 1, max_idx)) + flat_out = wrapped_fn() + else: + flat_out = tf.cond( + tf.equal(index_tensor, cur_idx), wrapped_fn, + lambda: cond_on_index(fn, index_tensor, max_idx, cur_idx + 1)) + return tf.contrib.framework.nest.pack_sequence_as(orig_out[0], flat_out) class DummyQueueRunner(object): @@ -211,3 +201,48 @@ def __init__(self): def create_threads(self, sess, coord=None, daemon=False, start=False): del sess, coord, daemon, start return [] + + +def features_for_problem(problem_instance, + p_hparams, + hparams, + data_filepatterns, + num_datashards, + mode, + name="problem_inputs"): + """Feature map for Problem.""" + with tf.name_scope(name): + with tf.device("/cpu:0"): # Input reading on CPU + capacity = (p_hparams.max_expected_batch_size_per_shard * num_datashards) + feature_map = data_reader.input_pipeline( + problem_instance, data_filepatterns, capacity, mode, hparams, + data_reader.hparams_to_batching_scheme( + hparams, + shard_multiplier=num_datashards, + drop_long_sequences=(mode == tf.estimator.ModeKeys.TRAIN or + hparams.eval_drop_long_sequences), + length_multiplier=(p_hparams.batch_size_multiplier))) + + # Reverse inputs and targets features if the problem was reversed. + if problem_instance is not None: + problem_instance.maybe_reverse_features(feature_map) + problem_instance.maybe_copy_features(feature_map) + else: + if p_hparams.was_reversed: + inputs = feature_map["inputs"] + targets = feature_map["targets"] + feature_map["inputs"] = targets + feature_map["targets"] = inputs + # Use the inputs as the targets if the problem is a copy problem. + if p_hparams.was_copy: + feature_map["targets"] = feature_map["inputs"] + + # Ensure inputs and targets are proper rank. + while len(feature_map["inputs"].get_shape()) != 4: + feature_map["inputs"] = tf.expand_dims(feature_map["inputs"], axis=-1) + while len(feature_map["targets"].get_shape()) != 4: + feature_map["targets"] = tf.expand_dims(feature_map["targets"], axis=-1) + + feature_map["input_space_id"] = tf.constant(p_hparams.input_space_id) + feature_map["target_space_id"] = tf.constant(p_hparams.target_space_id) + return feature_map diff --git a/tensor2tensor/utils/metrics.py b/tensor2tensor/utils/metrics.py index 5bfad5338..2f469cbf0 100644 --- a/tensor2tensor/utils/metrics.py +++ b/tensor2tensor/utils/metrics.py @@ -18,6 +18,8 @@ from __future__ import division from __future__ import print_function +import inspect + # Dependency imports from tensor2tensor.layers import common_layers @@ -195,12 +197,9 @@ def create_evaluation_metrics(problems, model_hparams): model_hparams: a set of hparams. Returns: - Dict <metric name, metric function>. The metric functions have signature - (predictions, labels, problem_choice) -> (metric Tensor, update op). - A dictionary with keys that are strings naming the evaluation - metrics and values that are functions taking arguments of - (predictions, targets), returning a tuple of a tensor of the - metric's value together with an op to update the metric's value. + dict<metric name, metric function>. The metric functions have signature + (Tensor predictions, features) -> (metric Tensor, update op), where features + is a dict with keys {targets, problem_choice}. Raises: ValueError: if the metrics specified by a problem are not recognized (i.e. @@ -210,10 +209,23 @@ def create_evaluation_metrics(problems, model_hparams): def make_problem_specific_metric_fn(metric_fn, problem_idx, weights_fn): """Create a metric fn conditioned on problem_idx.""" - def problem_metric_fn(predictions, labels, problem_choice): + def problem_metric_fn(predictions, features): + """Metric fn.""" + labels = features.get("targets", None) + problem_choice = features.get("problem_choice", 0) + + # Send along the entire features dict if the metric fn has the kwarg + # "features". + kwargs = {} + args, _, keywords, _ = inspect.getargspec(metric_fn) + if "features" in args or keywords: + kwargs["features"] = features + + def wrapped_metric_fn(): + return metric_fn(predictions, labels, weights_fn=weights_fn, **kwargs) + (scores, weights) = tf.cond( - tf.equal(problem_idx, problem_choice), - lambda: metric_fn(predictions, labels, weights_fn=weights_fn), + tf.equal(problem_idx, problem_choice), wrapped_metric_fn, lambda: (tf.constant(0.0), tf.constant(0.0))) # The tf.metrics.mean function assures correct aggregation. return tf.metrics.mean(scores, weights) @@ -241,9 +253,8 @@ def problem_metric_fn(predictions, labels, problem_choice): class_output = "image" in problem_name and "coco" not in problem_name real_output = "gene_expression" in problem_name if model_hparams.prepend_mode != "none": - assert ( - model_hparams.prepend_mode == "prepend_inputs_masked_attention" or - model_hparams.prepend_mode == "prepend_inputs_full_attention") + assert (model_hparams.prepend_mode == "prepend_inputs_masked_attention" or + model_hparams.prepend_mode == "prepend_inputs_full_attention") assert not class_output weights_fn = common_layers.weights_prepend_inputs_to_targets elif class_output or real_output: @@ -262,6 +273,8 @@ def problem_metric_fn(predictions, labels, problem_choice): # Metrics are functions that take predictions and labels and return # a tensor of metrics and a tensor of weights. +# If the function has "features" as an argument, it will receive the whole +# features dict as well. # The results are passed to tf.metrics.mean to accumulate properly. METRICS_FNS = { Metrics.ACC: padded_accuracy, diff --git a/tensor2tensor/utils/model_builder.py b/tensor2tensor/utils/model_builder.py index 1540c0f88..bda22f4ee 100644 --- a/tensor2tensor/utils/model_builder.py +++ b/tensor2tensor/utils/model_builder.py @@ -30,7 +30,7 @@ from six.moves import xrange # pylint: enable=redefined-builtin -from tensor2tensor.models import models # pylint: disable=unused-import +from tensor2tensor import models # pylint: disable=unused-import from tensor2tensor.utils import devices from tensor2tensor.utils import input_fn_builder from tensor2tensor.utils import metrics @@ -39,54 +39,291 @@ import tensorflow as tf from tensorflow.python.framework import dtypes -from tensorflow.python.ops import init_ops -# TODO(rsepassi): Rm dep on FLAGS here -FLAGS = tf.flags.FLAGS - -def log_variable_sizes(var_list, tag): - """Log the sizes and shapes of variables, and the total size. +def model_fn(model, + features, + mode, + hparams, + problem_names, + train_steps=100000, + worker_id=0, + worker_replicas=1, + eval_run_autoregressive=False, + decode_hparams=None, + autotune=False, + objective=None): + """Builds the model for all modes. + + * TRAIN: Constructs loss and train_op + * EVAL: Constructs the loss and eval metrics + * PREDICT: Constructs the predictions Args: - var_list: a list of varaibles - tag: a string - """ - name_to_var = {v.name: v for v in var_list} - total_size = 0 - for v_name in sorted(list(name_to_var)): - v = name_to_var[v_name] - v_size = int(np.prod(np.array(v.shape.as_list()))) - tf.logging.info("Weight %s\tshape %s\tsize %d", - v.name[:-2].ljust(80), str(v.shape).ljust(20), v_size) - total_size += v_size - tf.logging.info("%s Total size: %d", tag, total_size) - - -def build_model_fn(model): - """Returns a function to build the model. - - Args: - model: The name of the model to use. + model: str, name of model. + features: dict<feature name, Tensor>. Expected to have keys + {inputs, targets, problem_choice}. + mode: tf.estimator.ModeKeys. + hparams: model HParams. + problem_names: list of str, names of the problems. + train_steps: int, total number of training steps. Used to compute learning + rate decay. + worker_id: int, id of this worker. + worker_replicas: int, number of workers. + eval_run_autoregressive: bool, whether to run evaluation autoregressively. + decode_hparams: HParams for decode settings. Used when mode == PREDICT. + autotune: bool, whether this model is being used for autotuning. + objective: str, the objective if autotune==True. Returns: - A function to build the model's graph. This function is called by - the Estimator object to construct the graph. + tf.estimator.EstimatorSpec """ + assert len(problem_names) == len(hparams.problem_instances) + decode_hp = decode_hparams + + # TODO(rsepassi): This still depends on FLAGS. Rm eventually. + dp = devices.data_parallelism() + + tf.get_variable_scope().set_initializer(_get_variable_initializer(hparams)) + is_training = mode == tf.estimator.ModeKeys.TRAIN + + # Add input statistics for incoming features. + with tf.name_scope("input_stats"): + for (k, v) in six.iteritems(features): + if isinstance(v, tf.Tensor) and v.get_shape().ndims > 1: + tf.summary.scalar("%s_batch" % k, tf.shape(v)[0] // dp.n) + tf.summary.scalar("%s_length" % k, tf.shape(v)[1]) + nonpadding = tf.to_float(tf.not_equal(v, 0)) + nonpadding_tokens = tf.reduce_sum(nonpadding) + if k == "targets": + targets_nonpadding_tokens = nonpadding_tokens + tf.summary.scalar("%s_nonpadding_tokens" % k, nonpadding_tokens) + tf.summary.scalar("%s_nonpadding_fraction" % k, + tf.reduce_mean(nonpadding)) + + # Get multi-problem logits and loss based on features["problem_choice"]. + loss_variable_names = [] + + def nth_model(n): + """Build the model for the n-th problem, plus some added variables.""" + model_class = registry.model(model)( + hparams, + mode, + hparams.problems[n], + n, + dp, + devices.ps_devices(all_workers=True)) + if mode == tf.estimator.ModeKeys.PREDICT: + return model_class.infer( + features, + beam_size=decode_hp.beam_size, + top_beams=(decode_hp.beam_size if decode_hp.return_beams else 1), + last_position_only=decode_hp.use_last_position_only, + alpha=decode_hp.alpha, + decode_length=decode_hp.extra_length) + # In distributed mode, we build graph for problem=0 and problem=worker_id. + skipping_is_on = hparams.problem_choice == "distributed" and is_training + problem_worker_id = worker_id % len(hparams.problems) + skip_this_one = n != 0 and n % worker_replicas != problem_worker_id + # On worker 0 also build graph for problems <= 1. + # TODO(lukaszkaiser): why is this hack needed for variables init? Repair. + skip_this_one = skip_this_one and (worker_id != 0 or n > 1) + if eval_run_autoregressive and mode == tf.estimator.ModeKeys.EVAL: + sharded_logits, losses_dict = model_class.eval_autoregressive(features) + else: + sharded_logits, losses_dict = model_class.model_fn( + features, skip=(skipping_is_on and skip_this_one)) + with tf.variable_scope("losses_avg"): + total_loss, ops = 0.0, [] + for loss_key, loss_value in six.iteritems(losses_dict): + loss_name = "problem_%d/%s_loss" % (n, loss_key) + loss_moving_avg = tf.get_variable( + loss_name, initializer=100.0, trainable=False) + loss_variable_names.append(loss_name) + ops.append( + loss_moving_avg.assign(loss_moving_avg * 0.9 + loss_value * 0.1)) + total_loss += loss_value + try: # Total loss avg might be reused or not, we try both. + with tf.variable_scope(tf.get_variable_scope(), reuse=True): + # Total loss was already constructed on input. + loss_moving_avg = tf.get_variable("problem_%d/total_loss" % n) + except ValueError: + loss_moving_avg = tf.get_variable( + "problem_%d/total_loss" % n, initializer=100.0, trainable=False) + ops.append( + loss_moving_avg.assign(loss_moving_avg * 0.9 + total_loss * 0.1)) + with tf.variable_scope("train_stats"): # Count steps for this problem. + problem_steps = tf.get_variable( + "problem_%d_steps" % n, initializer=0, trainable=False) + ops.append(problem_steps.assign_add(1)) + with tf.control_dependencies(ops): # Make sure the ops run. + # Ensure the loss is a scalar here. + total_loss = tf.reshape(total_loss, [], name="total_loss_control_id") + return [total_loss, tf.concat(sharded_logits, 0)] + + model_output = input_fn_builder.cond_on_index( + nth_model, + index_tensor=features["problem_choice"], + max_idx=len(hparams.problems) - 1) + + if mode == tf.estimator.ModeKeys.PREDICT: + # If beam searching, model_output will be a dict with keys "outputs" and + # "scores". + if isinstance(model_output, dict): + outputs = model_output["outputs"] + scores = model_output["scores"] + else: + outputs = model_output + scores = None + + batched_problem_choice = (features["problem_choice"] * tf.ones( + (tf.shape(features["inputs"])[0],), dtype=tf.int32)) + predictions = { + "outputs": outputs, + "scores": scores, + "inputs": features.get("inputs", None), + "targets": features.get("infer_targets", None), + "problem_choice": batched_problem_choice, + } + _del_dict_nones(predictions) + return tf.estimator.EstimatorSpec(mode, predictions=predictions) + + total_loss, logits = model_output + + if mode == tf.estimator.ModeKeys.EVAL: + eval_metrics_fns = metrics.create_evaluation_metrics( + zip(problem_names, hparams.problem_instances), hparams) + _check_autotune_metrics( + eval_metrics_fns, autotune=autotune, objective=objective) + + eval_metrics = {} + for metric_name, metric_fn in six.iteritems(eval_metrics_fns): + eval_metrics[metric_name] = metric_fn(logits, features) - def model_fn(features, labels, mode, params): - """Creates the prediction, loss, and train ops. - - Args: - features: A dictionary of tensors keyed by the feature name. - labels: A tensor representing the labels. - mode: The execution mode, as defined in tf.estimator.ModeKeys. - params: model HParams. - - Returns: - An EstimatorSpec. - """ - hparams = params + return tf.estimator.EstimatorSpec( + mode, + predictions={"predictions": logits}, + eval_metric_ops=eval_metrics, + loss=total_loss) + + assert mode == tf.estimator.ModeKeys.TRAIN + + # Set learning rate + learning_rate = hparams.learning_rate * _learning_rate_decay( + hparams, num_worker_replicas=worker_replicas, num_train_steps=train_steps) + learning_rate /= math.sqrt(float(worker_replicas)) + + # Get global step + global_step = tf.train.get_or_create_global_step() + + # Some training statistics. + with tf.name_scope("training_stats"): + tf.summary.scalar("learning_rate", learning_rate) + for n in xrange(len(hparams.problems)): + names_and_vars = [] + with tf.variable_scope("losses_avg", reuse=True): + total_loss_var = tf.get_variable("problem_%d/total_loss" % n) + names_and_vars.append(("total_loss", total_loss_var)) + with tf.variable_scope("losses_avg", reuse=True): + for loss_name in loss_variable_names: + if loss_name.startswith("problem_%d/" % n): + loss_var = tf.get_variable(loss_name) + loss_suffix = loss_name[loss_name.index("/") + 1:] + names_and_vars.append((loss_suffix, loss_var)) + for (loss_name, loss_var) in names_and_vars: + tf.summary.scalar("loss_avg_%d/%s" % (n, loss_name), loss_var) + with tf.variable_scope("train_stats", reuse=True): + nth_steps = tf.get_variable("problem_%d_steps" % n, dtype=tf.int32) + tf.summary.scalar("problem_%d_frequency" % n, + tf.to_float(nth_steps) / + (tf.to_float(global_step) + 1.0)) + + # Add weight decay and noise. + total_size, weight_decay_loss = 0, 0.0 + all_weights = {v.name: v for v in tf.trainable_variables()} + for v_name in sorted(list(all_weights)): + v = all_weights[v_name] + v_size = int(np.prod(np.array(v.shape.as_list()))) + total_size += v_size + if hparams.weight_decay > 0.0 and len(v.shape.as_list()) > 1: + # Add weight regularization if set and the weight is not a bias (dim>1). + with tf.device(v._ref().device): # pylint: disable=protected-access + v_loss = tf.nn.l2_loss(v) / v_size + weight_decay_loss += v_loss + is_body = len(v_name) > 5 and v_name[:5] == "body/" + if hparams.weight_noise > 0.0 and is_body: + # Add weight noise if set in hparams. + with tf.device(v._ref().device): # pylint: disable=protected-access + scale = learning_rate * 0.001 + noise = tf.truncated_normal(v.shape) * hparams.weight_noise * scale + noise_op = v.assign_add(noise) + with tf.control_dependencies([noise_op]): + total_loss = tf.identity(total_loss) + if hparams.weight_decay > 0.0: + total_loss += weight_decay_loss * hparams.weight_decay + + # The new data reader occasionally emits very small batches, which + # cause the examples in those batches to be grossly overweighted. + # We decrease the loss proportionally to the ratio of the size of this + # batch to the size of the largest training batch ever. + # TODO(noam): to be more sophisticated, we could keep separate + # maxima based on problem choice. + max_nonpadding_var = tf.get_variable( + "max_nonpadding", + shape=[], + initializer=tf.ones_initializer(), + trainable=False) + max_nonpadding = tf.maximum(max_nonpadding_var, targets_nonpadding_tokens) + with tf.control_dependencies([tf.assign(max_nonpadding_var, max_nonpadding)]): + small_batch_multiplier = targets_nonpadding_tokens / max_nonpadding + tf.summary.scalar("small_batch_multiplier", small_batch_multiplier) + total_loss *= small_batch_multiplier + + # Log variable sizes + _log_variable_sizes(tf.trainable_variables(), "Trainable Variables") + diet_vars = [ + v for v in tf.global_variables() if v.dtype == dtypes.float16_ref + ] + _log_variable_sizes(diet_vars, "Diet Varaibles") + + # Optimize + total_loss = tf.identity(total_loss, name="total_loss") + opt = _ConditionalOptimizer(hparams.optimizer, learning_rate, hparams) + opt_summaries = ["learning_rate", "loss"] + if hparams.summarize_grads: + opt_summaries.extend(["gradients", "gradient_norm"]) + tf.logging.info("Computing gradients for global model_fn.") + train_op = tf.contrib.layers.optimize_loss( + name="training", + loss=total_loss, + global_step=global_step, + learning_rate=learning_rate, + clip_gradients=hparams.clip_grad_norm or None, + gradient_noise_scale=hparams.grad_noise_scale or None, + optimizer=opt, + summaries=opt_summaries, + colocate_gradients_with_ops=True) + + # Remove summaries that will fail to run because they are in conditionals. + # TODO(cwhipkey): Test with this code removed, later in 2017. + summaries = tf.get_collection_ref(tf.GraphKeys.SUMMARIES) + for i in reversed(range(len(summaries))): + if summaries[i].name.startswith("cond_"): + del summaries[i] + + tf.logging.info("Global model_fn finished.") + return tf.estimator.EstimatorSpec( + mode, + predictions={"problem_choice": features["problem_choice"]}, + loss=total_loss, + train_op=train_op) + + +def build_model_fn(model, **kwargs): + """Returns a function to build the model. See model_fn.""" + + # Model function as expected by Estimator + def wrapping_model_fn(features, labels, mode, params): # Deep-copy the model hparams between modes to eliminate # side-effects caused by abuse of the linked problem_hparams # objects which are used to share modality objects between @@ -97,309 +334,16 @@ def model_fn(features, labels, mode, params): # share the modality objects between problems. This dictionary # could be created once per mode and passed to the constructor of # t2t_model. - my_hp = copy.deepcopy(hparams) - - def initializer(): - if hparams.initializer == "orthogonal": - return tf.orthogonal_initializer(gain=hparams.initializer_gain) - elif hparams.initializer == "uniform": - max_val = 0.1 * hparams.initializer_gain - return tf.random_uniform_initializer(-max_val, max_val) - elif hparams.initializer == "normal_unit_scaling": - return init_ops.variance_scaling_initializer( - hparams.initializer_gain, mode="fan_avg", distribution="normal") - elif hparams.initializer == "uniform_unit_scaling": - return init_ops.variance_scaling_initializer( - hparams.initializer_gain, mode="fan_avg", distribution="uniform") - else: - raise ValueError("Unrecognized initializer: %s" % hparams.initializer) - - def learning_rate_decay(): - """Inverse-decay learning rate until warmup_steps, then decay.""" - warmup_steps = tf.to_float( - hparams.learning_rate_warmup_steps * FLAGS.worker_replicas) - step = tf.to_float(tf.contrib.framework.get_global_step()) - if hparams.learning_rate_decay_scheme == "noam": - return 5000.0 * hparams.hidden_size**-0.5 * tf.minimum( - (step + 1) * warmup_steps**-1.5, (step + 1)**-0.5) - elif hparams.learning_rate_decay_scheme == "exp100k": - return 0.94**(step // 100000) - elif hparams.learning_rate_decay_scheme == "cosine": - cycle_steps = hparams.learning_rate_cosine_cycle_steps - return 0.5 * (1 + tf.cos(np.pi * (step % cycle_steps) / cycle_steps)) - elif hparams.learning_rate_decay_scheme == "cyclelinear10x": - # Cycle the rate linearly by 10x every warmup_steps, up and down. - cycle_steps = hparams.learning_rate_warmup_steps - cycle_position = step % (2 * cycle_steps) - cycle_position = tf.to_float( # Normalize to the interval [-1, 1]. - cycle_position - cycle_steps) / float(cycle_steps) - cycle_position = 1.0 - tf.abs(cycle_position) # 0 to 1 and back to 0. - return ( - cycle_position + 0.1) * 3.0 # 10x difference each cycle (0.3-3). - - inv_base = tf.exp(tf.log(0.01) / warmup_steps) - inv_decay = inv_base**(warmup_steps - step) - if hparams.learning_rate_decay_scheme == "sqrt": - decay = _sqrt_decay(step - warmup_steps) - elif hparams.learning_rate_decay_scheme == "exp10k": - decay = _exp_decay_after(step - warmup_steps, 0.9995, - FLAGS.train_steps - warmup_steps - 10000) - elif hparams.learning_rate_decay_scheme == "exp50k": - decay = _exp_decay_after(step - warmup_steps, 0.99995, - FLAGS.train_steps - warmup_steps - 50000) - elif hparams.learning_rate_decay_scheme == "exp500k": - decay = _exp_decay_after(step - warmup_steps, 0.9999955, - FLAGS.train_steps - warmup_steps - 500000) - elif hparams.learning_rate_decay_scheme == "none": - decay = tf.constant(1.0) - else: - raise ValueError("Unrecognized learning rate decay scheme: %s" % - hparams.learning_rate_decay_scheme) - return tf.cond( - step < warmup_steps, - lambda: inv_decay, - lambda: decay, - name="learning_rate_decay_warump_cond") + hparams = copy.deepcopy(params) + del params if labels is not None: features["targets"] = labels + del labels - dp = devices.data_parallelism() - - tf.get_variable_scope().set_initializer(initializer()) - is_training = mode == tf.estimator.ModeKeys.TRAIN - - # Add input statistics for incoming features. - with tf.name_scope("input_stats"): - for (k, v) in six.iteritems(features): - if isinstance(v, tf.Tensor) and v.get_shape().ndims > 1: - tf.summary.scalar("%s_batch" % k, tf.shape(v)[0] // dp.n) - tf.summary.scalar("%s_length" % k, tf.shape(v)[1]) - nonpadding = tf.to_float(tf.not_equal(v, 0)) - nonpadding_tokens = tf.reduce_sum(nonpadding) - if k == "targets": - targets_nonpadding_tokens = nonpadding_tokens - tf.summary.scalar("%s_nonpadding_tokens" % k, nonpadding_tokens) - tf.summary.scalar("%s_nonpadding_fraction" % k, - tf.reduce_mean(nonpadding)) - - if is_training: - # The new data reader occasionally emits very small batches, which - # cause the examples in those batches to be grossly overweighted. - # We decrease the loss proportionally to the ratio of the size of this - # batch to the size of the largest training batch ever. - # TODO(noam): to be more sophisticated, we could keep separate - # maxima based on problem choice. - max_nonpadding_var = tf.get_variable( - "max_nonpadding", - shape=[], - initializer=tf.ones_initializer(), - trainable=False) - max_nonpadding = tf.maximum(max_nonpadding_var, - targets_nonpadding_tokens) - with tf.control_dependencies( - [tf.assign(max_nonpadding_var, max_nonpadding)]): - small_batch_multiplier = targets_nonpadding_tokens / max_nonpadding - tf.summary.scalar("small_batch_multiplier", small_batch_multiplier) - - # Get multi-problem logits and loss based on features["problem_choice"]. - loss_variable_names = [] - - def nth_model(n): - """Build the model for the n-th problem, plus some added variables.""" - model_class = registry.model(model)( - my_hp, - mode, - my_hp.problems[n], - n, - dp, - devices.ps_devices(all_workers=True)) - if mode == tf.estimator.ModeKeys.PREDICT: - return model_class.infer( - features, - beam_size=FLAGS.decode_beam_size, - top_beams=(FLAGS.decode_beam_size - if FLAGS.decode_return_beams else 1), - last_position_only=FLAGS.decode_use_last_position_only, - alpha=FLAGS.decode_alpha, - decode_length=FLAGS.decode_extra_length) - # In distributed mode, we build graph for problem=0 and problem=worker_id. - skipping_is_on = my_hp.problem_choice == "distributed" and is_training - problem_worker_id = FLAGS.worker_id % len(my_hp.problems) - skip_this_one = n != 0 and n % FLAGS.worker_replicas != problem_worker_id - # On worker 0 also build graph for problems <= 1. - # TODO(lukaszkaiser): why is this hack needed for variables init? Repair. - skip_this_one = skip_this_one and (FLAGS.worker_id != 0 or n > 1) - if (FLAGS.eval_run_autoregressive and - mode == tf.estimator.ModeKeys.EVAL): - sharded_logits, losses_dict = model_class.eval_autoregressive(features) - else: - sharded_logits, losses_dict = model_class.model_fn( - features, skip=(skipping_is_on and skip_this_one)) - with tf.variable_scope("losses_avg"): - total_loss, ops = 0.0, [] - for loss_key, loss_value in six.iteritems(losses_dict): - loss_name = "problem_%d/%s_loss" % (n, loss_key) - loss_moving_avg = tf.get_variable( - loss_name, initializer=100.0, trainable=False) - loss_variable_names.append(loss_name) - ops.append( - loss_moving_avg.assign(loss_moving_avg * 0.9 + loss_value * 0.1)) - total_loss += loss_value - try: # Total loss avg might be reused or not, we try both. - with tf.variable_scope(tf.get_variable_scope(), reuse=True): - # Total loss was already constructed on input. - loss_moving_avg = tf.get_variable("problem_%d/total_loss" % n) - except ValueError: - loss_moving_avg = tf.get_variable( - "problem_%d/total_loss" % n, initializer=100.0, trainable=False) - ops.append( - loss_moving_avg.assign(loss_moving_avg * 0.9 + total_loss * 0.1)) - with tf.variable_scope("train_stats"): # Count steps for this problem. - problem_steps = tf.get_variable( - "problem_%d_steps" % n, initializer=0, trainable=False) - ops.append(problem_steps.assign_add(1)) - with tf.control_dependencies(ops): # Make sure the ops run. - # Ensure the loss is a scalar here. - total_loss = tf.reshape(total_loss, [], name="total_loss_control_id") - return [total_loss] + sharded_logits # Need to flatten for cond later. - - result_list = input_fn_builder.cond_on_index(nth_model, - features["problem_choice"], 0, - len(my_hp.problems) - 1) + return model_fn(model, features, mode, hparams, **kwargs) - if mode == tf.estimator.ModeKeys.PREDICT: - # Beam search in sequence model returns both decodes withe key "outputs" - # and scores with they key "scores". If return list is a dict, we expect - # that it will have keys "outputs", a tensor of int32 and scores, a - # tensor of floats. This is useful if we want to return scores from - # estimator.predict - if not isinstance(result_list, dict): - predictions = {"outputs": result_list} - else: - predictions = { - "outputs": result_list["outputs"], - "scores": result_list["scores"] - } - - if "inputs" in features: - predictions["inputs"] = features["inputs"] - if "infer_targets" in features: - predictions["targets"] = features["infer_targets"] - predictions["problem_choice"] = (features["problem_choice"] * tf.ones( - (tf.shape(features["inputs"])[0],), dtype=tf.int32)) - - return tf.estimator.EstimatorSpec(mode, predictions=predictions) - - sharded_logits, total_loss = result_list[1:], result_list[0] - if mode == tf.estimator.ModeKeys.EVAL: - # For evaluation, return the logits layer as our predictions. - logits = tf.concat(sharded_logits, 0) - - eval_metrics_fns = metrics.create_evaluation_metrics( - zip(FLAGS.problems.split("-"), hparams.problem_instances), hparams) - _check_autotune_metrics(eval_metrics_fns) - - eval_metrics = {} - for metric_name, metric_fn in six.iteritems(eval_metrics_fns): - eval_metrics[metric_name] = metric_fn(logits, labels, - features["problem_choice"]) - - return tf.estimator.EstimatorSpec( - mode, - predictions={"predictions": logits}, - eval_metric_ops=eval_metrics, - loss=total_loss) - - assert mode == tf.estimator.ModeKeys.TRAIN - - # Some training statistics. - with tf.name_scope("training_stats"): - learning_rate = my_hp.learning_rate * learning_rate_decay() - learning_rate /= math.sqrt(float(FLAGS.worker_replicas)) - tf.summary.scalar("learning_rate", learning_rate) - global_step = tf.to_float(tf.contrib.framework.get_global_step()) - for n in xrange(len(my_hp.problems)): - names_and_vars = [] - with tf.variable_scope("losses_avg", reuse=True): - total_loss_var = tf.get_variable("problem_%d/total_loss" % n) - names_and_vars.append(("total_loss", total_loss_var)) - with tf.variable_scope("losses_avg", reuse=True): - for loss_name in loss_variable_names: - if loss_name.startswith("problem_%d/" % n): - loss_var = tf.get_variable(loss_name) - loss_suffix = loss_name[loss_name.index("/") + 1:] - names_and_vars.append((loss_suffix, loss_var)) - for (loss_name, loss_var) in names_and_vars: - tf.summary.scalar("loss_avg_%d/%s" % (n, loss_name), loss_var) - with tf.variable_scope("train_stats", reuse=True): - nth_steps = tf.get_variable("problem_%d_steps" % n, dtype=tf.int32) - tf.summary.scalar("problem_%d_frequency" % n, - tf.to_float(nth_steps) / (global_step + 1.0)) - - # Log trainable weights and add decay. - total_size, weight_decay_loss = 0, 0.0 - all_weights = {v.name: v for v in tf.trainable_variables()} - for v_name in sorted(list(all_weights)): - v = all_weights[v_name] - v_size = int(np.prod(np.array(v.shape.as_list()))) - total_size += v_size - if my_hp.weight_decay > 0.0 and len(v.shape.as_list()) > 1: - # Add weight regularization if set and the weight is not a bias (dim>1). - with tf.device(v._ref().device): # pylint: disable=protected-access - v_loss = tf.nn.l2_loss(v) / v_size - weight_decay_loss += v_loss - is_body = len(v_name) > 5 and v_name[:5] == "body/" - if my_hp.weight_noise > 0.0 and is_body: - # Add weight noise if set in my_hp. - with tf.device(v._ref().device): # pylint: disable=protected-access - scale = learning_rate * 0.001 - noise = tf.truncated_normal(v.shape) * my_hp.weight_noise * scale - noise_op = v.assign_add(noise) - with tf.control_dependencies([noise_op]): - total_loss = tf.identity(total_loss) - if my_hp.weight_decay > 0.0: - total_loss += weight_decay_loss * my_hp.weight_decay - if is_training: - total_loss *= small_batch_multiplier - total_loss = tf.identity(total_loss, name="total_loss") - log_variable_sizes(tf.trainable_variables(), "Trainable Variables") - diet_vars = [ - v for v in tf.global_variables() if v.dtype == dtypes.float16_ref - ] - log_variable_sizes(diet_vars, "Diet Varaibles") - # Define the train_op for the TRAIN mode. - opt = _ConditionalOptimizer(my_hp.optimizer, learning_rate, my_hp) - tf.logging.info("Computing gradients for global model_fn.") - opt_summaries = ["learning_rate", "loss"] - if hparams.summarize_grads: - opt_summaries.extend(["gradients", "gradient_norm"]) - train_op = tf.contrib.layers.optimize_loss( - name="training", - loss=total_loss, - global_step=tf.train.get_global_step(), - learning_rate=learning_rate, - clip_gradients=my_hp.clip_grad_norm or None, - gradient_noise_scale=hparams.grad_noise_scale or None, - optimizer=opt, - summaries=opt_summaries, - colocate_gradients_with_ops=True) - - # Remove summaries that will fail to run because they are in conditionals. - # TODO(cwhipkey): Test with this code removed, later in 2017. - summaries = tf.get_collection_ref(tf.GraphKeys.SUMMARIES) - for i in range(len(summaries) - 1, -1, -1): - if summaries[i].name.startswith("cond_"): - del summaries[i] - - tf.logging.info("Global model_fn finished.") - return tf.estimator.EstimatorSpec( - mode, - predictions={"problem_choice": features["problem_choice"]}, - loss=total_loss, - train_op=train_op) - - return model_fn + return wrapping_model_fn class _ConditionalOptimizer(tf.train.Optimizer): @@ -447,8 +391,97 @@ def _exp_decay_after(step, rate, from_which_step): name="exponential_decay_step_cond") -def _check_autotune_metrics(metrics_dict): - if (hasattr(FLAGS, "autotune") and FLAGS.autotune and - FLAGS.objective not in metrics_dict): +def _check_autotune_metrics(metrics_dict, autotune=False, objective=None): + if not autotune: + return + + if objective not in metrics_dict: raise ValueError("Tuning objective %s not among evaluation metrics %s" % - (FLAGS.objective, metrics_dict.keys())) + (objective, metrics_dict.keys())) + + +def _log_variable_sizes(var_list, tag): + """Log the sizes and shapes of variables, and the total size. + + Args: + var_list: a list of varaibles + tag: a string + """ + name_to_var = {v.name: v for v in var_list} + total_size = 0 + for v_name in sorted(list(name_to_var)): + v = name_to_var[v_name] + v_size = int(np.prod(np.array(v.shape.as_list()))) + tf.logging.info("Weight %s\tshape %s\tsize %d", + v.name[:-2].ljust(80), str(v.shape).ljust(20), v_size) + total_size += v_size + tf.logging.info("%s Total size: %d", tag, total_size) + + +def _get_variable_initializer(hparams): + if hparams.initializer == "orthogonal": + return tf.orthogonal_initializer(gain=hparams.initializer_gain) + elif hparams.initializer == "uniform": + max_val = 0.1 * hparams.initializer_gain + return tf.random_uniform_initializer(-max_val, max_val) + elif hparams.initializer == "normal_unit_scaling": + return tf.variance_scaling_initializer( + hparams.initializer_gain, mode="fan_avg", distribution="normal") + elif hparams.initializer == "uniform_unit_scaling": + return tf.variance_scaling_initializer( + hparams.initializer_gain, mode="fan_avg", distribution="uniform") + else: + raise ValueError("Unrecognized initializer: %s" % hparams.initializer) + + +def _learning_rate_decay(hparams, num_worker_replicas=1, num_train_steps=1): + """Inverse-decay learning rate until warmup_steps, then decay.""" + warmup_steps = tf.to_float( + hparams.learning_rate_warmup_steps * num_worker_replicas) + step = tf.to_float(tf.contrib.framework.get_global_step()) + if hparams.learning_rate_decay_scheme == "noam": + return 5000.0 * hparams.hidden_size**-0.5 * tf.minimum( + (step + 1) * warmup_steps**-1.5, (step + 1)**-0.5) + elif hparams.learning_rate_decay_scheme == "exp100k": + return 0.94**(step // 100000) + elif hparams.learning_rate_decay_scheme == "cosine": + cycle_steps = hparams.learning_rate_cosine_cycle_steps + return 0.5 * (1 + tf.cos(np.pi * (step % cycle_steps) / cycle_steps)) + elif hparams.learning_rate_decay_scheme == "cyclelinear10x": + # Cycle the rate linearly by 10x every warmup_steps, up and down. + cycle_steps = hparams.learning_rate_warmup_steps + cycle_position = step % (2 * cycle_steps) + cycle_position = tf.to_float( # Normalize to the interval [-1, 1]. + cycle_position - cycle_steps) / float(cycle_steps) + cycle_position = 1.0 - tf.abs(cycle_position) # 0 to 1 and back to 0. + return (cycle_position + 0.1) * 3.0 # 10x difference each cycle (0.3-3). + + inv_base = tf.exp(tf.log(0.01) / warmup_steps) + inv_decay = inv_base**(warmup_steps - step) + if hparams.learning_rate_decay_scheme == "sqrt": + decay = _sqrt_decay(step - warmup_steps) + elif hparams.learning_rate_decay_scheme == "exp10k": + decay = _exp_decay_after(step - warmup_steps, 0.9995, + num_train_steps - warmup_steps - 10000) + elif hparams.learning_rate_decay_scheme == "exp50k": + decay = _exp_decay_after(step - warmup_steps, 0.99995, + num_train_steps - warmup_steps - 50000) + elif hparams.learning_rate_decay_scheme == "exp500k": + decay = _exp_decay_after(step - warmup_steps, 0.9999955, + num_train_steps - warmup_steps - 500000) + elif hparams.learning_rate_decay_scheme == "none": + decay = tf.constant(1.0) + else: + raise ValueError("Unrecognized learning rate decay scheme: %s" % + hparams.learning_rate_decay_scheme) + return tf.cond( + step < warmup_steps, + lambda: inv_decay, + lambda: decay, + name="learning_rate_decay_warump_cond") + + +def _del_dict_nones(d): + for k in d.keys(): + if d[k] is None: + del d[k] diff --git a/tensor2tensor/utils/trainer_utils.py b/tensor2tensor/utils/trainer_utils.py index 5ab3db70c..8ed7fb678 100644 --- a/tensor2tensor/utils/trainer_utils.py +++ b/tensor2tensor/utils/trainer_utils.py @@ -24,10 +24,11 @@ # Dependency imports +from tensor2tensor import models # pylint: disable=unused-import from tensor2tensor.data_generators import all_problems # pylint: disable=unused-import from tensor2tensor.data_generators import problem_hparams -from tensor2tensor.models import models # pylint: disable=unused-import from tensor2tensor.utils import data_reader +from tensor2tensor.utils import decoding from tensor2tensor.utils import devices from tensor2tensor.utils import input_fn_builder from tensor2tensor.utils import model_builder @@ -103,31 +104,10 @@ flags.DEFINE_integer("ps_replicas", 0, "How many ps replicas.") # Decoding flags -flags.DEFINE_string("decode_from_file", None, "Path to decode file") -flags.DEFINE_bool("decode_interactive", False, - "Interactive local inference mode.") -flags.DEFINE_bool("decode_use_last_position_only", False, - "In inference, use last position only for speedup.") -flags.DEFINE_bool("decode_save_images", False, "Save inference input images.") -flags.DEFINE_string("decode_to_file", None, "Path to inference output file") -flags.DEFINE_integer("decode_shards", 1, "How many shards to decode.") -flags.DEFINE_integer("decode_problem_id", 0, "Which problem to decode.") -flags.DEFINE_integer("decode_extra_length", 50, "Added decode length.") -flags.DEFINE_integer("decode_batch_size", 32, "Batch size for decoding. " - "The decodes will be written to <filename>.decodes in" - "format result\tinput") -flags.DEFINE_integer("decode_beam_size", 4, "The beam size for beam decoding") -flags.DEFINE_float("decode_alpha", 0.6, "Alpha for length penalty") -flags.DEFINE_bool("decode_return_beams", False, - "Whether to return 1 (False) or all (True) beams. The \n " - "output file will have the format " - "<beam1>\t<beam2>..\t<input>") -flags.DEFINE_integer("decode_max_input_size", -1, - "Maximum number of ids in input. Or <= 0 for no max.") -flags.DEFINE_bool("identity_output", False, "To print the output as identity") -flags.DEFINE_integer("decode_num_samples", -1, - "Number of samples to decode. Currently used in " - "decode_from_dataset. Use -1 for all.") +flags.DEFINE_string( + "decode_hparams", "", + "Comma-separated list of name=value pairs to control decode behavior. " + "See decoding.decode_hparams for defaults.") def make_experiment_fn(data_dir, model_name, train_steps, eval_steps): @@ -195,8 +175,24 @@ def create_experiment_components(hparams, output_dir, data_dir, model_name): num_datashards=num_datashards, worker_replicas=FLAGS.worker_replicas, worker_id=FLAGS.worker_id) + + autotune = False + objective = None + if hasattr(FLAGS, "autotune"): + autotune = FLAGS.autotune + objective = FLAGS.objective + model_fn = model_builder.build_model_fn( + model_name, + problem_names=FLAGS.problems.split("-"), + train_steps=FLAGS.train_steps, + worker_id=FLAGS.worker_id, + worker_replicas=FLAGS.worker_replicas, + eval_run_autoregressive=FLAGS.eval_run_autoregressive, + decode_hparams=decoding.decode_hparams(FLAGS.decode_hparams), + autotune=autotune, + objective=objective) estimator = tf.estimator.Estimator( - model_fn=model_builder.build_model_fn(model_name), + model_fn=model_fn, model_dir=output_dir, params=hparams, config=tf.contrib.learn.RunConfig( diff --git a/tensor2tensor/utils/trainer_utils_test.py b/tensor2tensor/utils/trainer_utils_test.py index e71fc16c2..6045dd2e0 100644 --- a/tensor2tensor/utils/trainer_utils_test.py +++ b/tensor2tensor/utils/trainer_utils_test.py @@ -102,27 +102,31 @@ def testSingleEvalStepRawSession(self): FLAGS.problems = "tiny_algo" data_dir = "/tmp" # Used only when a vocab file or such like is needed. - # Create the problem object, hparams, model_fn, placeholders, features dict. + # Create the problem object, hparams, placeholders, features dict. encoders = registry.problem(FLAGS.problems).feature_encoders(data_dir) - hparams = trainer_utils.create_hparams( - FLAGS.hparams_set, FLAGS.problems, data_dir) - model_fn = model_builder.build_model_fn(model_name) + hparams = trainer_utils.create_hparams(FLAGS.hparams_set, FLAGS.problems, + data_dir) inputs_ph = tf.placeholder(dtype=tf.int32) # Just length dimension. batch_inputs = tf.reshape(inputs_ph, [1, -1, 1, 1]) # Make it 4D. + # In INFER mode targets can be None. targets_ph = tf.placeholder(dtype=tf.int32) # Just length dimension. batch_targets = tf.reshape(targets_ph, [1, -1, 1, 1]) # Make it 4D. - features = {"inputs": batch_inputs, - "problem_choice": 0, # We run on the first problem here. - "input_space_id": hparams.problems[0].input_space_id, - "target_space_id": hparams.problems[0].target_space_id} + features = { + "inputs": batch_inputs, + "targets": batch_targets, + "problem_choice": 0, # We run on the first problem here. + "input_space_id": hparams.problems[0].input_space_id, + "target_space_id": hparams.problems[0].target_space_id + } # Now set a mode and create the graph by invoking model_fn. mode = tf.estimator.ModeKeys.EVAL - estimator_spec = model_fn( # In INFER mode targets can be None. - features, batch_targets, mode, hparams) + estimator_spec = model_builder.model_fn( + model_name, features, mode, hparams, problem_names=[FLAGS.problems]) predictions_dict = estimator_spec.predictions predictions = tf.squeeze( # These are not images, axis=2,3 are not needed. - predictions_dict["predictions"], axis=[2, 3]) + predictions_dict["predictions"], + axis=[2, 3]) # Having the graph, let's run it on some data. with self.test_session() as sess: diff --git a/tensor2tensor/visualization/TransformerVisualization.ipynb b/tensor2tensor/visualization/TransformerVisualization.ipynb index 166e0c9c5..d927c3eb4 100644 --- a/tensor2tensor/visualization/TransformerVisualization.ipynb +++ b/tensor2tensor/visualization/TransformerVisualization.ipynb @@ -192,8 +192,7 @@ } ], "source": [ - "model_fn=utils.model_builder.build_model_fn(MODEL)\n", - "spec = model_fn(features, target, tf.estimator.ModeKeys.EVAL, hparams)\n", + "spec = utils.model_builder.model_fn(MODEL, features, tf.estimator.ModeKeys.EVAL, hparams, problem_names=[PROBLEM])\n", "predictions_dict = spec.predictions", ] }, @@ -216,7 +215,7 @@ ], "source": [ "with tf.variable_scope(tf.get_variable_scope(), reuse=True):\n", - " spec = model_fn(features, target, tf.estimator.ModeKeys.PREDICT, hparams)\n", + " spec = utils.model_builder.model_fn(MODEL, features, tf.estimator.ModeKeys.PREDICT, hparams, problem_names=[PROBLEM])\n", " beam_out = spec.predictions['outputs']", ] }, From 76195816f6b073a6fe1823f467883b45f0d878ce Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Sun, 10 Sep 2017 11:08:00 -0700 Subject: [PATCH 0380/4095] Travis CI config for Py2 and Py3, plus fixes PiperOrigin-RevId: 168160985 --- .gitignore | 2 ++ .travis.yml | 16 ++++++++++++++++ setup.py | 4 ++-- .../data_generators/algorithmic_math.py | 7 +++---- tensor2tensor/data_generators/dna_encoder.py | 5 +++-- .../data_generators/generator_utils_test.py | 6 ++++-- .../data_generators/text_encoder_test.py | 7 +++++-- tensor2tensor/layers/common_layers.py | 2 +- tensor2tensor/layers/rev_block.py | 4 ++-- 9 files changed, 38 insertions(+), 15 deletions(-) create mode 100644 .travis.yml diff --git a/.gitignore b/.gitignore index c9dd3db88..1cfdcd120 100644 --- a/.gitignore +++ b/.gitignore @@ -3,9 +3,11 @@ # Byte-compiled _pycache__/ +.cache/ # Python egg metadata, regenerated from source files by setuptools. /*.egg-info +.eggs/ # PyPI distribution artifacts. build/ diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 000000000..8f20ac24e --- /dev/null +++ b/.travis.yml @@ -0,0 +1,16 @@ +language: python +python: + - "2.7" + - "3.6" +before_install: + - sudo apt-get update -qq + - sudo apt-get install -qq libhdf5-dev +install: + - pip install tensorflow + - pip install .[tests] +script: + - pytest --ignore=tensor2tensor/utils/registry_test.py --ignore=tensor2tensor/utils/trainer_utils_test.py --ignore=tensor2tensor/problems_test.py + - pytest tensor2tensor/utils/registry_test.py + - pytest tensor2tensor/utils/trainer_utils_test.py +git: + depth: 3 \ No newline at end of file diff --git a/setup.py b/setup.py index 119eeea7e..a84f772b6 100644 --- a/setup.py +++ b/setup.py @@ -27,6 +27,7 @@ ], install_requires=[ 'bz2file', + 'future', 'numpy', 'requests', 'sympy', @@ -35,9 +36,8 @@ extras_require={ 'tensorflow': ['tensorflow>=1.3.0'], 'tensorflow_gpu': ['tensorflow-gpu>=1.3.0'], + 'tests': ['pytest', 'h5py', 'mock'], }, - tests_require=['nose'], - test_suite='nose.collector', classifiers=[ 'Development Status :: 4 - Beta', 'Intended Audience :: Developers', diff --git a/tensor2tensor/data_generators/algorithmic_math.py b/tensor2tensor/data_generators/algorithmic_math.py index e061ceb0b..93c8ad117 100644 --- a/tensor2tensor/data_generators/algorithmic_math.py +++ b/tensor2tensor/data_generators/algorithmic_math.py @@ -278,8 +278,7 @@ def generate_algebra_inverse_sample(vlist, ops, solve_ops, min_depth, left_str = str(left) right_str = str(right) target = str(algebra_inverse_solve(left, right, var, solve_ops)) - sample = var + ":" + left_str + "=" + right_str - + sample = "%s:%s=%s" % (var, left_str, right_str) return sample, target @@ -410,8 +409,8 @@ def math_dataset_init(alphabet_size=26, digits=None, functions=None): "/r": lambda l, r, to: (r, ExprNode(l, to, ops["/"])), } alphabet = ( - [six.int2byte(ord("a") + c) - for c in range(26)] + [six.int2byte(ord("A") + c) for c in range(26)]) + [six.int2byte(ord("a") + c).decode("utf-8") for c in range(26)] + + [six.int2byte(ord("A") + c).decode("utf-8") for c in range(26)]) if alphabet_size > 52: raise ValueError( "alphabet_size cannot be greater than 52. Got %s." % alphabet_size) diff --git a/tensor2tensor/data_generators/dna_encoder.py b/tensor2tensor/data_generators/dna_encoder.py index 0f6a8d68f..f084c9cd2 100644 --- a/tensor2tensor/data_generators/dna_encoder.py +++ b/tensor2tensor/data_generators/dna_encoder.py @@ -107,6 +107,7 @@ class DelimitedDNAEncoder(DNAEncoder): def __init__(self, delimiter=",", **kwargs): self._delimiter = delimiter + self._delimiter_key = tuple(self._delimiter) super(DelimitedDNAEncoder, self).__init__(**kwargs) @property @@ -114,11 +115,11 @@ def delimiter(self): return self._delimiter def _tokens(self): - return super(DelimitedDNAEncoder, self)._tokens() + [self.delimiter] + return super(DelimitedDNAEncoder, self)._tokens() + [self._delimiter_key] def encode(self, delimited_string): ids = [] for s in delimited_string.split(self.delimiter): ids.extend(super(DelimitedDNAEncoder, self).encode(s)) - ids.append(self._tokens_to_ids[self.delimiter]) + ids.append(self._tokens_to_ids[self._delimiter_key]) return ids[:-1] diff --git a/tensor2tensor/data_generators/generator_utils_test.py b/tensor2tensor/data_generators/generator_utils_test.py index 144507e6b..f058428fb 100644 --- a/tensor2tensor/data_generators/generator_utils_test.py +++ b/tensor2tensor/data_generators/generator_utils_test.py @@ -26,6 +26,8 @@ # Dependency imports +from builtins import bytes # pylint: disable=redefined-builtin + from tensor2tensor.data_generators import generator_utils import tensorflow as tf @@ -84,13 +86,13 @@ def testGunzipFile(self): # Create a test zip file and unzip it. with gzip.open(tmp_file_path + ".gz", "wb") as gz_file: - gz_file.write("test line") + gz_file.write(bytes("test line", "utf-8")) generator_utils.gunzip_file(tmp_file_path + ".gz", tmp_file_path + ".txt") # Check that the unzipped result is as expected. lines = [] for line in io.open(tmp_file_path + ".txt", "rb"): - lines.append(line.strip()) + lines.append(line.decode("utf-8").strip()) self.assertEqual(len(lines), 1) self.assertEqual(lines[0], "test line") diff --git a/tensor2tensor/data_generators/text_encoder_test.py b/tensor2tensor/data_generators/text_encoder_test.py index c13078808..b55a51bf4 100644 --- a/tensor2tensor/data_generators/text_encoder_test.py +++ b/tensor2tensor/data_generators/text_encoder_test.py @@ -27,6 +27,7 @@ # Dependency imports import mock +import six from tensor2tensor.data_generators import text_encoder import tensorflow as tf @@ -36,8 +37,10 @@ class NativeToUnicodeTest(tf.test.TestCase): def test_native_to_unicode(self): s = r"foo bar" - self.assertIsInstance(text_encoder.native_to_unicode(s), unicode) - self.assertEqual(text_encoder.native_to_unicode(s), u"foo bar") + s_unicode = text_encoder.native_to_unicode(s) + if six.PY2: + self.assertIsInstance(s_unicode, unicode) + self.assertEqual(s_unicode, u"foo bar") class EscapeUnescapeTokenTest(tf.test.TestCase): diff --git a/tensor2tensor/layers/common_layers.py b/tensor2tensor/layers/common_layers.py index 264c11cf6..bd9ff896d 100644 --- a/tensor2tensor/layers/common_layers.py +++ b/tensor2tensor/layers/common_layers.py @@ -902,7 +902,7 @@ def multiscale_conv_sum(inputs, output_size, dilation_rates_and_kernel_sizes, results, counter = [], -1 for dilation_rate, kernel_size in dilation_rates_and_kernel_sizes: counter += 1 - if dilation_rate > 1: + if dilation_rate[0] > 1: pooled = pool(inputs, kernel_size, pooling_type, padding) else: pooled = inputs diff --git a/tensor2tensor/layers/rev_block.py b/tensor2tensor/layers/rev_block.py index 9def9f481..8502e0a8b 100644 --- a/tensor2tensor/layers/rev_block.py +++ b/tensor2tensor/layers/rev_block.py @@ -286,8 +286,8 @@ def custom_grad_fn(inputs, variables, ys, grad_ys): # idxs. f_var_grads.reverse() g_var_grads.reverse() - for idxs, grads in zip(f_vars_idxs, f_var_grads) + zip( - g_vars_idxs, g_var_grads): + for idxs, grads in list(zip(f_vars_idxs, f_var_grads)) + list(zip( + g_vars_idxs, g_var_grads)): for i, grad in zip(idxs, grads): variable_grads[i] = grad From ff231f08a259c02e70216b220e84207d422a4af3 Mon Sep 17 00:00:00 2001 From: Ashish Vaswani <avaswani@google.com> Date: Sun, 10 Sep 2017 15:36:38 -0700 Subject: [PATCH 0381/4095] Added an assert in common_attention. PiperOrigin-RevId: 168170097 --- tensor2tensor/layers/common_attention.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tensor2tensor/layers/common_attention.py b/tensor2tensor/layers/common_attention.py index 42ae089cd..6f7c9fa23 100644 --- a/tensor2tensor/layers/common_attention.py +++ b/tensor2tensor/layers/common_attention.py @@ -1080,6 +1080,7 @@ def multihead_attention_2d(query_antecedent, x = local_attention_2d( q, k, v, query_shape=query_shape, memory_flange=memory_flange) else: + assert attention_type == "masked_local_attention_2d" x = masked_local_attention_2d(q, k, v, query_shape=query_shape, memory_flange=memory_flange) x = combine_heads_2d(x) From 31ac28f6dee3a9625a38219a6932d58ba0d21752 Mon Sep 17 00:00:00 2001 From: vfdev-5 <vfdev.5@gmail.com> Date: Mon, 11 Sep 2017 21:36:45 +0200 Subject: [PATCH 0382/4095] * Fix ipynb format --- .../TransformerVisualization.ipynb | 43 +++++++++++++------ 1 file changed, 31 insertions(+), 12 deletions(-) diff --git a/tensor2tensor/visualization/TransformerVisualization.ipynb b/tensor2tensor/visualization/TransformerVisualization.ipynb index d927c3eb4..bf0a269d0 100644 --- a/tensor2tensor/visualization/TransformerVisualization.ipynb +++ b/tensor2tensor/visualization/TransformerVisualization.ipynb @@ -15,7 +15,9 @@ { "cell_type": "code", "execution_count": 1, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "from __future__ import absolute_import\n", @@ -34,7 +36,9 @@ { "cell_type": "code", "execution_count": 2, - "metadata": {}, + "metadata": { + "collapsed": false + }, "outputs": [ { "data": { @@ -72,7 +76,9 @@ { "cell_type": "code", "execution_count": 3, - "metadata": {}, + "metadata": { + "collapsed": false + }, "outputs": [ { "name": "stdout", @@ -105,6 +111,7 @@ "cell_type": "code", "execution_count": 4, "metadata": { + "collapsed": false, "scrolled": true }, "outputs": [ @@ -178,7 +185,9 @@ { "cell_type": "code", "execution_count": 6, - "metadata": {}, + "metadata": { + "collapsed": false + }, "outputs": [ { "name": "stdout", @@ -193,13 +202,15 @@ ], "source": [ "spec = utils.model_builder.model_fn(MODEL, features, tf.estimator.ModeKeys.EVAL, hparams, problem_names=[PROBLEM])\n", - "predictions_dict = spec.predictions", + "predictions_dict = spec.predictions" ] }, { "cell_type": "code", "execution_count": 7, - "metadata": {}, + "metadata": { + "collapsed": false + }, "outputs": [ { "name": "stdout", @@ -216,7 +227,7 @@ "source": [ "with tf.variable_scope(tf.get_variable_scope(), reuse=True):\n", " spec = utils.model_builder.model_fn(MODEL, features, tf.estimator.ModeKeys.PREDICT, hparams, problem_names=[PROBLEM])\n", - " beam_out = spec.predictions['outputs']", + " beam_out = spec.predictions['outputs']" ] }, { @@ -229,7 +240,9 @@ { "cell_type": "code", "execution_count": 8, - "metadata": {}, + "metadata": { + "collapsed": false + }, "outputs": [ { "name": "stdout", @@ -309,6 +322,7 @@ "cell_type": "code", "execution_count": 10, "metadata": { + "collapsed": false, "scrolled": false }, "outputs": [ @@ -355,7 +369,9 @@ { "cell_type": "code", "execution_count": 12, - "metadata": {}, + "metadata": { + "collapsed": false + }, "outputs": [ { "name": "stdout", @@ -394,7 +410,9 @@ { "cell_type": "code", "execution_count": 14, - "metadata": {}, + "metadata": { + "collapsed": false + }, "outputs": [ { "data": { @@ -442,6 +460,7 @@ "cell_type": "code", "execution_count": null, "metadata": { + "collapsed": true, "scrolled": true }, "outputs": [], @@ -469,9 +488,9 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", - "version": "2.7.12" + "version": "2.7.13" } }, "nbformat": 4, "nbformat_minor": 2 -} \ No newline at end of file +} From 6aeb3c109d66f6d7e76c04743f4109c73861836f Mon Sep 17 00:00:00 2001 From: Stuart Axelbrooke <stuart@axelbrooke.com> Date: Fri, 22 Sep 2017 01:05:05 +0900 Subject: [PATCH 0383/4095] Add missing trailing slash in README example (#301) --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 445218ca7..af9778725 100644 --- a/README.md +++ b/README.md @@ -48,7 +48,7 @@ t2t-decoder \ --problems=translate_ende_wmt32k \ --model=transformer \ --hparams_set=transformer_base_single_gpu \ - --output_dir=~/t2t_train/base + --output_dir=~/t2t_train/base \ --decode_interactive ``` From c3dedef6c2ba4219046cb6f0633dbb335582da42 Mon Sep 17 00:00:00 2001 From: Stuart Axelbrooke <stuart@axelbrooke.com> Date: Fri, 22 Sep 2017 01:06:37 +0900 Subject: [PATCH 0384/4095] Fix key deletion during key iteration in Python 3 (#304) Deletion of dict keys while iterating over said dict is a no-no in Python 3, and RuntimeErrors. This simply consumes the dict key iterator into a list before iterative deletion. --- tensor2tensor/utils/model_builder.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensor2tensor/utils/model_builder.py b/tensor2tensor/utils/model_builder.py index bda22f4ee..7c4172743 100644 --- a/tensor2tensor/utils/model_builder.py +++ b/tensor2tensor/utils/model_builder.py @@ -482,6 +482,6 @@ def _learning_rate_decay(hparams, num_worker_replicas=1, num_train_steps=1): def _del_dict_nones(d): - for k in d.keys(): + for k in list(d.keys()): if d[k] is None: del d[k] From 1b6905f48793717b5e117257e1c1807a4dc642ea Mon Sep 17 00:00:00 2001 From: Jerry Liu <twairball@yahoo.com> Date: Fri, 22 Sep 2017 00:11:17 +0800 Subject: [PATCH 0385/4095] fix #252 encoded vocab filepath for wmt17.zh-en (#293) --- tensor2tensor/data_generators/wmt.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensor2tensor/data_generators/wmt.py b/tensor2tensor/data_generators/wmt.py index 8d6cdae6f..befb9ac7f 100644 --- a/tensor2tensor/data_generators/wmt.py +++ b/tensor2tensor/data_generators/wmt.py @@ -510,9 +510,9 @@ def target_space_id(self): def feature_encoders(self, data_dir): vocab_size = self.targeted_vocab_size source_vocab_filename = os.path.join(data_dir, - "vocab.zh.%d" % vocab_size) + "vocab.zhen-zh.%d" % vocab_size) target_vocab_filename = os.path.join(data_dir, - "vocab.en.%d" % vocab_size) + "vocab.zhen-en.%d" % vocab_size) source_token = text_encoder.SubwordTextEncoder(source_vocab_filename) target_token = text_encoder.SubwordTextEncoder(target_vocab_filename) return { From a852994a3bf336fb90f45950fc0a6b71260e111c Mon Sep 17 00:00:00 2001 From: Etienne Pot <epot@google.com> Date: Mon, 11 Sep 2017 13:18:05 -0700 Subject: [PATCH 0386/4095] Attention moe can mix attention layer types PiperOrigin-RevId: 168274573 --- tensor2tensor/models/attention_lm_moe.py | 66 +++++++++++++++++++----- 1 file changed, 52 insertions(+), 14 deletions(-) diff --git a/tensor2tensor/models/attention_lm_moe.py b/tensor2tensor/models/attention_lm_moe.py index adbb871b5..3afe77fc0 100644 --- a/tensor2tensor/models/attention_lm_moe.py +++ b/tensor2tensor/models/attention_lm_moe.py @@ -60,6 +60,13 @@ def get_choices(): ] +LAYER_SYMBOLS = { + "h": AttentionType.MULTIHEAD, # multi-Head + "e": AttentionType.LOCAL_EXPERTS, # Experts + "m": AttentionType.MEMORY_EFFICIENT, # Memory +} + + @registry.register_model class AttentionLmMoe(t2t_model.T2TModel): """Attention net. See file docstring.""" @@ -133,11 +140,20 @@ def print_shape(x, suffix, debug=False): assert hparams.batch_size >= hparams.max_length - for layer in xrange(hparams.num_hidden_layers): + num_hidden_layers = ( + len(hparams.attention_layers) or hparams.num_hidden_layers) + for layer in xrange(num_hidden_layers): with tf.variable_scope("layer_%d" % layer): + + # Use the layer type defined in attention_layers + if hparams.attention_layers: + attention_type = LAYER_SYMBOLS[hparams.attention_layers[layer]] + else: + attention_type = hparams.attention_type + with tf.variable_scope( - "attention_{}".format(hparams.attention_type)): - if hparams.attention_type == AttentionType.MULTIHEAD: + "attention_{}".format(attention_type)): + if attention_type == AttentionType.MULTIHEAD: y = dp( common_attention.multihead_attention, preprocess(x), @@ -151,7 +167,7 @@ def print_shape(x, suffix, debug=False): attention_type=("local_mask_right" if hparams.attention_local else "dot_product"), name="decoder_self_attention") - elif hparams.attention_type == AttentionType.MEMORY_EFFICIENT: + elif attention_type == AttentionType.MEMORY_EFFICIENT: assert hparams.layer_preprocess_sequence == "n" y = dp( common_attention.multihead_self_attention_memory_efficient, @@ -159,7 +175,7 @@ def print_shape(x, suffix, debug=False): decoder_self_attention_bias, hparams.num_heads, name="decoder_self_attention") - elif hparams.attention_type == AttentionType.LOCAL_EXPERTS: + elif attention_type == AttentionType.LOCAL_EXPERTS: y, loss = dp( common_attention.local_expert_attention, preprocess(x), @@ -350,6 +366,10 @@ def attention_lm_moe_base(): hparams.add_hparam("pos", "timing") # timing, none hparams.add_hparam("moe_layers", "2") # comma separated list of layer numbers # moe params. local attention moe. + # If attention_layers is set, the num_hidden_layers parameter will be ignored + # and each caracter of the string will correspond to one attention + # layer type + hparams.add_hparam("attention_layers", "") hparams.add_hparam("attention_type", AttentionType.MULTIHEAD) hparams.add_hparam("attention_local", int(False)) hparams.add_hparam("attention_moe_k", 2) @@ -370,14 +390,24 @@ def attention_lm_moe_base(): @registry.register_hparams -def attention_lm_moe_base_ae(): - """Base model with attention expert.""" +def attention_lm_moe_base_long_seq(): + """Hyper parameters specifics for long sequence generation.""" hparams = attention_lm_moe_base() - hparams.attention_type = AttentionType.LOCAL_EXPERTS - hparams.use_sepconv = int(True) + hparams.max_length = 0 # max_length == batch_size hparams.eval_drop_long_sequences = int(True) hparams.min_length_bucket = 256 # Avoid cyclic problems for big batches + hparams.use_sepconv = int(True) + + return hparams + + +@registry.register_hparams +def attention_lm_moe_base_ae(): + """Base model with attention expert.""" + hparams = attention_lm_moe_base_long_seq() + hparams.attention_type = AttentionType.LOCAL_EXPERTS + hparams.learning_rate = 0.05 hparams.learning_rate_warmup_steps = 10000 # According to noam, ("n", "da") seems better for harder-to-learn models @@ -389,12 +419,20 @@ def attention_lm_moe_base_ae(): @registry.register_hparams def attention_lm_moe_base_local(): """Base model with attention expert.""" - hparams = attention_lm_moe_base() + hparams = attention_lm_moe_base_long_seq() hparams.attention_local = int(True) - hparams.use_sepconv = int(True) - hparams.max_length = 0 # max_length == batch_size - hparams.eval_drop_long_sequences = int(True) - hparams.min_length_bucket = 256 # Avoid cyclic problems for big batches + return hparams + + +@registry.register_hparams +def attention_lm_moe_base_hybrid(): + """Base model with attention expert.""" + hparams = attention_lm_moe_base_long_seq() + hparams.attention_layers = "hehe" # Alternate local/expert + hparams.attention_local = int(True) + + # hparams.layer_preprocess_sequence = "n" + # hparams.layer_postprocess_sequence = "da" return hparams From 017f83a91da0a4f481834fa0eac44d446774acfe Mon Sep 17 00:00:00 2001 From: Ashish Vaswani <avaswani@google.com> Date: Mon, 11 Sep 2017 14:16:09 -0700 Subject: [PATCH 0387/4095] Bug fixes in masked_local_attention_2d and local_attention_2d. We needed to scatter the representations after attention back into the right positions. Added test2dGatherAndScatter, which tests for invertibility of 2d gather and scatter functions. PiperOrigin-RevId: 168283655 --- tensor2tensor/layers/common_attention.py | 49 ++++++++++++------- tensor2tensor/layers/common_attention_test.py | 20 ++++++++ 2 files changed, 50 insertions(+), 19 deletions(-) diff --git a/tensor2tensor/layers/common_attention.py b/tensor2tensor/layers/common_attention.py index 6f7c9fa23..3b89ef1bc 100644 --- a/tensor2tensor/layers/common_attention.py +++ b/tensor2tensor/layers/common_attention.py @@ -650,16 +650,13 @@ def local_attention_2d(q, """ with tf.variable_scope( name, default_name="local_self_attention_2d", values=[q, k, v]): + q_shape = q.get_shape().as_list() v_shape = tf.shape(v) - depth_v = tf.shape(v)[4] - batch_size = tf.shape(q)[0] - num_heads = tf.shape(q)[1] - original_length = tf.shape(q)[2] * tf.shape(q)[3] q = pad_to_multiple_2d(q, query_shape) k = pad_to_multiple_2d(k, query_shape) v = pad_to_multiple_2d(v, query_shape) - + padded_q_shape = tf.shape(q) # Setting up k and v values paddings = [[0, 0], [0, 0], [memory_flange[0], memory_flange[1]], [memory_flange[0], memory_flange[1]], [0, 0]] @@ -684,12 +681,13 @@ def local_attention_2d(q, attention = tf.nn.softmax(logits + attention_bias) output = tf.matmul(attention, v_new) - - output = tf.reshape(output, [batch_size, num_heads, -1, depth_v]) - # Remove the padding if introduced - output = tf.slice(output, [0, 0, 0, 0], [-1, -1, original_length, -1]) - # [batch, heads, h, w, depth_v] - return tf.reshape(output, v_shape) + # putting the representations back in the right place + output = scatter_blocks_2d(output, q_indices, padded_q_shape) + # Remove the padding if introduced + output = tf.slice(output, [0, 0, 0, 0, 0], + [-1, -1, v_shape[2], v_shape[3], -1]) + output.set_shape(q_shape) + return output def pad_to_multiple_2d(x, block_shape): @@ -726,6 +724,19 @@ def gather_blocks_2d(x, indices): return tf.transpose(x_new, [2, 3, 0, 1, 4]) +def scatter_blocks_2d(x, indices, shape): + """scatters blocks from x into shape with indices.""" + x_shape = tf.shape(x) + # [length, batch, heads, dim] + x_t = tf.transpose(tf.reshape(x, [x_shape[0], x_shape[1], -1, x_shape[-1]]), + [2, 0, 1, 3]) + x_t_shape = tf.shape(x_t) + indices = tf.reshape(indices, [-1, 1]) + scattered_x = tf.scatter_nd(indices, x_t, x_t_shape) + scattered_x = tf.transpose(scattered_x, [1, 2, 0, 3]) + return tf.reshape(scattered_x, shape) + + def gather_indices_2d(x, block_shape, block_stride): """Getting gather indices.""" # making an identity matrix kernel @@ -769,11 +780,8 @@ def masked_local_attention_2d(q, """ with tf.variable_scope( name, default_name="local_masked_self_attention_2d", values=[q, k, v]): + q_shape = q.get_shape().as_list() v_shape = tf.shape(v) - depth_v = tf.shape(v)[4] - batch_size = tf.shape(q)[0] - num_heads = tf.shape(q)[1] - original_length = tf.shape(q)[2] * tf.shape(q)[3] def make_mask(query_shape, memory_flange): """creates a mask. @@ -808,6 +816,7 @@ def make_mask(query_shape, memory_flange): # 0. is visible location, 1.0 is masked. return 1. - final_mask q = pad_to_multiple_2d(q, query_shape) + padded_q_shape = tf.shape(q) k = pad_to_multiple_2d(k, query_shape) v = pad_to_multiple_2d(v, query_shape) # Setting up k and v values. Padding top, left, and right @@ -838,11 +847,13 @@ def make_mask(query_shape, memory_flange): tf.to_float(tf.logical_or(attention_mask, padding_mask)) *-1e9) attention = tf.nn.softmax(logits + attention_bias) output = tf.matmul(attention, v_new) - output = tf.reshape(output, [batch_size, num_heads, -1, depth_v]) + # putting the representations back in the right place + output = scatter_blocks_2d(output, q_indices, padded_q_shape) # Remove the padding if introduced - output = tf.slice(output, [0, 0, 0, 0], [-1, -1, original_length, -1]) - # [batch, heads, h, w, depth_v] - return tf.reshape(output, v_shape) + output = tf.slice(output, [0, 0, 0, 0, 0], + [-1, -1, v_shape[2], v_shape[3], -1]) + output.set_shape(q_shape) + return output def compute_qkv(query_antecedent, memory_antecedent, total_key_depth, diff --git a/tensor2tensor/layers/common_attention_test.py b/tensor2tensor/layers/common_attention_test.py index d8f6f2b39..644b27a98 100644 --- a/tensor2tensor/layers/common_attention_test.py +++ b/tensor2tensor/layers/common_attention_test.py @@ -162,5 +162,25 @@ def testMultiheadSelfAttentionMemoryEfficient(self): self.assertAllClose(dnorm_bias, dnorm_bias_f) self.assertAllClose(dx, dx_f) + def test2dGatherAndScatter(self): + """2d gather and scatter invertibility test.""" + batch_size = 2 + num_heads = 2 + height = 4 + width = 6 + depth = 8 + query_shape = (2, 3) + x = np.random.rand(batch_size, num_heads, height, width, depth) + with self.test_session() as session: + x_indices = common_attention.gather_indices_2d( + x, query_shape, query_shape) + gathered_x = common_attention.gather_blocks_2d(x, x_indices) + x_shape = tf.constant([batch_size, num_heads, height, width, depth]) + scattered_x = common_attention.scatter_blocks_2d( + gathered_x, x_indices, x_shape) + session.run(tf.global_variables_initializer()) + res = session.run(scattered_x) + self.assertAllClose(x, res) + if __name__ == "__main__": tf.test.main() From b5db405a4dc09bb72241dceebac9806cefa348e4 Mon Sep 17 00:00:00 2001 From: Etienne Pot <epot@google.com> Date: Mon, 11 Sep 2017 15:06:11 -0700 Subject: [PATCH 0388/4095] Fix the pad_remover for attention expert when hybrid attention layers. Now only applied for the attention expert. PiperOrigin-RevId: 168292028 --- tensor2tensor/models/attention_lm_moe.py | 23 ++++++----------------- 1 file changed, 6 insertions(+), 17 deletions(-) diff --git a/tensor2tensor/models/attention_lm_moe.py b/tensor2tensor/models/attention_lm_moe.py index 3afe77fc0..abdd68c8b 100644 --- a/tensor2tensor/models/attention_lm_moe.py +++ b/tensor2tensor/models/attention_lm_moe.py @@ -105,8 +105,7 @@ def _diet_expert(x): expert_fn = expert_utils.ffn_expert_fn( hparams.hidden_size, moe_hidden_sizes, hparams.hidden_size) - if (hparams.attention_type == AttentionType.LOCAL_EXPERTS - and not hparams.use_inputs): + if not hparams.use_inputs: # As preprocess and postprocess are called with batch of size one (all # batches concatenated), we just make sure that batch_norm is not use ( # should not either way) @@ -135,8 +134,6 @@ def print_shape(x, suffix, debug=False): batch_coordinate = dp_remove_pad(batch_coordinate) x = dp(print_shape, x, "in") - x = dp_remove_pad(x) - x = dp(print_shape, x, "in_flat") assert hparams.batch_size >= hparams.max_length @@ -176,9 +173,11 @@ def print_shape(x, suffix, debug=False): hparams.num_heads, name="decoder_self_attention") elif attention_type == AttentionType.LOCAL_EXPERTS: + x_in = preprocess(x) + x_in = dp_remove_pad(x_in) y, loss = dp( common_attention.local_expert_attention, - preprocess(x), + x_in, k=hparams.attention_moe_k, loss_coef=hparams.attention_load_balance, attention_num_experts=hparams.attention_num_experts, @@ -188,6 +187,7 @@ def print_shape(x, suffix, debug=False): split_batch=bool(hparams.attention_split_batch), attention_kq_size=hparams.attention_kq_size, attention_v_size=hparams.attention_v_size) + y = dp_restore_pad(y) # TODO(avaswani, epot, noam): Do we need to divide by num shards ? extra_loss += tf.add_n(loss) / dp.n else: @@ -214,15 +214,8 @@ def print_shape(x, suffix, debug=False): x, hparams.filter_size) else: - x_in = preprocess(x) additional_conv_params = dict() if hparams.use_sepconv: - # Restore padding so sequences don't attend to each others - # restore_pad will apply a reshape like x_ref, to restore the - # original shape. Here this works because the last dimension is - # constant between the output of attention and the original input - # but it shouldn't necessarily be the case. - x_in = dp_restore_pad(x_in) additional_conv_params = dict( padding="LEFT", # Parameters copied from the transformer model @@ -231,19 +224,15 @@ def print_shape(x, suffix, debug=False): ) y = dp( common_layers.conv_hidden_relu, - x_in, + preprocess(x), hparams.filter_size, hparams.hidden_size, dropout=hparams.relu_dropout, **additional_conv_params ) - if hparams.use_sepconv: - y = dp_remove_pad(y) x = postprocess(x, y) x = preprocess(x) - x = dp_restore_pad(x) - decoder_output = dp(tf.expand_dims, x, 2) return decoder_output, extra_loss From 15682d535244ca33983d8933df9725d459e02d4f Mon Sep 17 00:00:00 2001 From: Noam Shazeer <noam@google.com> Date: Mon, 11 Sep 2017 16:23:33 -0700 Subject: [PATCH 0389/4095] Added a new model "aligned" for aligned sequence problems without autoregression/masking. PiperOrigin-RevId: 168302680 --- tensor2tensor/models/__init__.py | 1 + tensor2tensor/models/aligned.py | 256 +++++++++++++++++++++++++++++++ 2 files changed, 257 insertions(+) create mode 100644 tensor2tensor/models/aligned.py diff --git a/tensor2tensor/models/__init__.py b/tensor2tensor/models/__init__.py index acebef809..f5fafe706 100644 --- a/tensor2tensor/models/__init__.py +++ b/tensor2tensor/models/__init__.py @@ -23,6 +23,7 @@ # pylint: disable=unused-import from tensor2tensor.layers import modalities +from tensor2tensor.models import aligned from tensor2tensor.models import attention_lm from tensor2tensor.models import attention_lm_moe from tensor2tensor.models import bluenet diff --git a/tensor2tensor/models/aligned.py b/tensor2tensor/models/aligned.py new file mode 100644 index 000000000..9cadc0cae --- /dev/null +++ b/tensor2tensor/models/aligned.py @@ -0,0 +1,256 @@ +# coding=utf-8 +# Copyright 2017 The Tensor2Tensor Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Single stack of transformations with no masking. + +Produces output aligned with inputs. + +Configurable using hyperparameters to use some combination of convolutions, +attention, mixtures of experts, etc. + +A good problem for this model is languagemodel_wiki_scramble1k50 . +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +# Dependency imports + +from tensor2tensor.layers import common_attention +from tensor2tensor.layers import common_hparams +from tensor2tensor.layers import common_layers +from tensor2tensor.utils import diet +from tensor2tensor.utils import expert_utils +from tensor2tensor.utils import registry +from tensor2tensor.utils import t2t_model + +import tensorflow as tf + + +ModeKeys = tf.estimator.ModeKeys # pylint: disable=invalid-name + + +def _should_preprocess(layer_type): + return layer_type not in ["timing", "pos_emb"] + + +def _should_postprocess(layer_type): + return layer_type not in ["timing", "pos_emb"] + + +@registry.register_model +class Aligned(t2t_model.T2TModel): + """Attention net. See file docstring.""" + + def model_fn_body_sharded(self, sharded_features): + # Remove dropout if not training + hparams = self._hparams + dp = self._data_parallelism + x = dp(tf.squeeze, sharded_features["inputs"], 2) + def preprocess(x): + return dp(common_layers.layer_preprocess, x, hparams) + def postprocess(x, y): + return dp(common_layers.layer_postprocess, x, y, hparams) + x = dp(tf.nn.dropout, x, 1.0 - hparams.layer_prepostprocess_dropout) + extra_loss = 0.0 + ffn_hidden_sizes = [int(s) for s in hparams.ffn_hidden_sizes.split(",")] + moe_hidden_sizes = [int(s) for s in hparams.moe_hidden_sizes.split(",")] + if hparams.diet_experts: + hsize, = moe_hidden_sizes + + def _diet_expert(x): + return diet.diet_expert(x, hsize, diet.diet_adam_optimizer_params()) + + expert_fn = _diet_expert + else: + expert_fn = expert_utils.ffn_expert_fn( + hparams.hidden_size, moe_hidden_sizes, hparams.hidden_size) + + batch_coordinate = dp(get_batch_coordinate, x) + + assert hparams.batch_size >= hparams.max_length + + layers = hparams.layers.strip(",").split(",") + for layer_num, layer_type in enumerate(layers): + with tf.variable_scope("%s_%d" % (layer_type, layer_num)): + if _should_preprocess(layer_type): + x = preprocess(x) + if layer_type == "timing": + y = dp(common_attention.add_timing_signal_nd, x) + elif layer_type == "pos_emb": + y = dp(common_attention.add_positional_embedding_nd, + x, hparams.max_length, name="pos_emb") + elif layer_type == "att": + # multihead attention + y = dp( + common_attention.multihead_attention, + x, + None, + None, # bias + hparams.attention_key_channels or hparams.hidden_size, + hparams.attention_value_channels or hparams.hidden_size, + hparams.hidden_size, + hparams.num_heads, + hparams.attention_dropout, + attention_type=("local_unmasked" if hparams.attention_local + else "dot_product"), + name="decoder_self_attention") + elif layer_type == "local_expert_attention": + y, loss = dp( + common_attention.local_expert_attention, + x, + k=hparams.attention_moe_k, + loss_coef=hparams.attention_load_balance, + attention_num_experts=hparams.attention_num_experts, + train=hparams.mode == ModeKeys.TRAIN, + batch_coordinate=batch_coordinate, + mask_right=False, + split_batch=bool(hparams.attention_split_batch), + attention_kq_size=hparams.attention_kq_size, + attention_v_size=hparams.attention_v_size) + # TODO(avaswani, epot, noam): Do we need to divide by num shards ? + extra_loss += tf.add_n(loss) / dp.n + elif layer_type == "moe": + y, loss = expert_utils.distributed_moe( + dp, + self._ps_devices, + x, + hparams.mode == ModeKeys.TRAIN, + input_size=hparams.hidden_size, + expert_fn=expert_fn, + num_experts=hparams.moe_num_experts, + k=hparams.moe_k, + loss_coef=hparams.moe_loss_coef) + extra_loss += loss + elif layer_type == "ffn": + y = dp( + expert_utils.ffn_expert_fn( + hparams.hidden_size, + ffn_hidden_sizes, + hparams.hidden_size), + dp(expert_utils.flatten_all_but_last, x)) + y = dp(expert_utils.reshape_like, y, x) + elif layer_type == "conv": + y = dp( + common_layers.conv1d, + x, + hparams.hidden_size, + hparams.kernel_height, + activation=tf.nn.relu, + padding="SAME", + ) + else: + assert False, "unknown sublayer %s" % layer_type + if _should_postprocess(layer_type): + x = postprocess(x, y) + else: + x = y + x = preprocess(x) + + decoder_output = dp(tf.expand_dims, x, 2) + return decoder_output, extra_loss + + +def get_batch_coordinate(x): + """Return a flat int32 tensor of shape [1, batch_size*length, 1].""" + # Compute the batch coordinate before flattening all batches + batch_coordinate = tf.expand_dims( + common_attention.coordinate_tensor(tf.shape(x)[:-1], axis=0), axis=-1) + return batch_coordinate + + +@registry.register_hparams +def aligned_base(): + """Set of hyperparameters. + + Returns: + a hparams object + """ + hparams = common_hparams.basic_params1() + hparams.hidden_size = 512 + hparams.batch_size = 5000 + hparams.max_length = 1024 + hparams.dropout = 0.0 + hparams.layer_prepostprocess_dropout = 0.0 + hparams.label_smoothing = 0.0 + hparams.clip_grad_norm = 0. # i.e. no gradient clipping + hparams.optimizer_adam_epsilon = 1e-9 + hparams.learning_rate_decay_scheme = "noam" + hparams.learning_rate = 0.1 + hparams.learning_rate_warmup_steps = 2000 + hparams.initializer_gain = 1.0 + hparams.initializer = "uniform_unit_scaling" + hparams.weight_decay = 0.0 + hparams.optimizer_adam_beta1 = 0.9 + hparams.optimizer_adam_beta2 = 0.98 + hparams.shared_embedding_and_softmax_weights = int(False) + hparams.add_hparam("ffn_hidden_sizes", "2048") # Add new ones like this. + hparams.moe_num_experts = 32 + hparams.layer_preprocess_sequence = "n" + hparams.layer_postprocess_sequence = "da" + hparams.add_hparam("layers", "timing," + "att,ffn," * 4) + + # attention-related flags + hparams.add_hparam("num_heads", 8) + hparams.add_hparam("attention_key_channels", 0) + hparams.add_hparam("attention_value_channels", 0) + # All hyperparameters ending in "dropout" are automatically set to 0.0 + # when not in training mode. + hparams.add_hparam("attention_dropout", 0.0) + hparams.add_hparam("pos", "timing") # timing, none + # moe params. local attention moe. + hparams.add_hparam("attention_local", int(False)) + hparams.add_hparam("attention_moe_k", 2) + hparams.add_hparam("attention_num_experts", 16) + hparams.add_hparam("attention_split_batch", int(False)) + # Key, query and value dimensions for the attention + hparams.add_hparam("attention_kq_size", 128) + hparams.add_hparam("attention_v_size", 256) + # Loss coef for load balancing + hparams.add_hparam("attention_load_balance", 2e-2) + hparams.add_hparam("diet_experts", int(False)) + hparams.add_hparam("memory_efficient_ffn", int(False)) + # if True, we learn a non-autoregressive model from "inputs" to "targets". + # if False, we learn an autoregressive model to generate "targets" + return hparams + + +@registry.register_hparams +def aligned_with_conv(): + hparams = aligned_base() + hparams.layers = "timing," + "conv,att,ffn," * 4 + return hparams + + +@registry.register_hparams +def aligned_local(): + hparams = aligned_base() + hparams.attention_local = int(True) + return hparams + + +@registry.register_hparams +def aligned_pos_emb(): + hparams = aligned_base() + hparams.layers = "pos_emb," + "att,ffn," * 4 + return hparams + + +@registry.register_hparams +def aligned_moe(): + hparams = aligned_base() + hparams.layers = "timing," + "att,moe," * 4 + return hparams From a7c70874e0e545dd6d890c1122e4005e2b65ccf3 Mon Sep 17 00:00:00 2001 From: Ashish Vaswani <avaswani@google.com> Date: Tue, 12 Sep 2017 17:12:25 -0700 Subject: [PATCH 0390/4095] Added tests for 2-d local attention. Refactoring to use dot_product_attention in local_1d and 2d attention functions. Adding a flag for image summaries in dot_product_attention because we need to figure out the best way to get image summaries in 2d functions. PiperOrigin-RevId: 168472353 --- tensor2tensor/layers/common_attention.py | 101 +++++++++--------- tensor2tensor/layers/common_attention_test.py | 65 ++++++++++- 2 files changed, 117 insertions(+), 49 deletions(-) diff --git a/tensor2tensor/layers/common_attention.py b/tensor2tensor/layers/common_attention.py index 3b89ef1bc..fdba48b01 100644 --- a/tensor2tensor/layers/common_attention.py +++ b/tensor2tensor/layers/common_attention.py @@ -416,7 +416,8 @@ def dot_product_attention(q, bias, dropout_rate=0.0, image_shapes=None, - name=None): + name=None, + make_image_summary=True): """dot-product attention. Args: @@ -428,6 +429,7 @@ def dot_product_attention(q, image_shapes: optional tuple of integer scalars. see comments for attention_image_summary() name: an optional string + make_image_summary: True if you want an image summary. Returns: A Tensor. @@ -443,7 +445,8 @@ def dot_product_attention(q, weights = tf.nn.dropout(weights, 1.0 - dropout_rate) if (not tf.get_variable_scope().reuse and # Summaries don't work well within tf.while_loop() - "/while/" not in tf.contrib.framework.get_name_scope()): + "/while/" not in tf.contrib.framework.get_name_scope() and + make_image_summary): attention_image_summary(weights, image_shapes) return tf.matmul(weights, v) @@ -616,11 +619,9 @@ def pad_l_and_r(x, pad_length): v_new = tf.gather(v_t, gather_indices) v_new = tf.transpose(v_new, [2, 3, 0, 1, 4]) - logits = tf.matmul(q, k_new, transpose_b=True) - - attention = tf.nn.softmax(logits + attention_bias) - output = tf.matmul(attention, v_new) - + output = dot_product_attention( + q, k_new, v_new, attention_bias, dropout_rate=0., name="local_1d", + make_image_summary=False) output = tf.reshape(output, [batch_size, num_heads, -1, depth_v]) # Remove the padding if introduced output = tf.slice(output, [0, 0, 0, 0], [-1, -1, original_length, -1]) @@ -677,10 +678,9 @@ def local_attention_2d(q, attention_bias = tf.expand_dims( tf.to_float(embedding_to_padding(k_new)) * -1e9, axis=-2) - logits = tf.matmul(q_new, k_new, transpose_b=True) - - attention = tf.nn.softmax(logits + attention_bias) - output = tf.matmul(attention, v_new) + output = dot_product_attention(q_new, k_new, v_new, attention_bias, + dropout_rate=0., name="local_2d", + make_image_summary=False) # putting the representations back in the right place output = scatter_blocks_2d(output, q_indices, padded_q_shape) # Remove the padding if introduced @@ -756,6 +756,42 @@ def gather_indices_2d(x, block_shape, block_stride): return tf.cast(indices, tf.int32) +def make_2d_block_raster_mask(query_shape, memory_flange): + """creates a mask for 2d block raster scany. + + The query mask can look to the left, top left, top, and top right, but + not to the right. Inside the query, we have the standard raster scan + masking. + Args: + query_shape: A tuple of ints (query_height, query_width) + memory_flange: A tuple of ints + (memory_flange_height, memory_flange_width) + + Returns: + A tensor of shape query_size, memory_size + """ + # mask inside the query block + query_triangle = tf.matrix_band_part( + tf.ones([np.prod(query_shape), np.prod(query_shape)]), -1, 0) + split_query_masks = tf.split(query_triangle, query_shape[0], axis=1) + # adding mask for left and right + mask_pieces = [ + tf.concat( + [tf.ones([np.prod(query_shape), memory_flange[1]]), + split_query_masks[i], + tf.zeros([np.prod(query_shape), memory_flange[1]]) + ], axis=1) for i in range(query_shape[0])] + # adding mask for top + final_mask = tf.concat( + [tf.ones( + [np.prod(query_shape), + (query_shape[1]+2*memory_flange[1])*memory_flange[0]]), + tf.concat(mask_pieces, axis=1) + ], axis=1) + # 0. is visible location, 1.0 is masked. + return 1. - final_mask + + def masked_local_attention_2d(q, k, v, @@ -782,39 +818,7 @@ def masked_local_attention_2d(q, name, default_name="local_masked_self_attention_2d", values=[q, k, v]): q_shape = q.get_shape().as_list() v_shape = tf.shape(v) - def make_mask(query_shape, memory_flange): - """creates a mask. - - The query mask can look to the left, top left, top, and top right, but - not the right. Inside the query, we have the standard raster scan - masking. - Args: - query_shape: A tuple of ints (query_height, query_width) - memory_flange: A tuple of ints - (memory_flange_height, memory_flange_width) - - Returns: - A tensor of shape query_size, memory_size - """ - - query_triangle = tf.matrix_band_part( - tf.ones([np.prod(query_shape), np.prod(query_shape)]), -1, 0) - split_query_masks = tf.split(query_triangle, query_shape[0], axis=1) - mask_pieces = [ - tf.concat( - [tf.ones([np.prod(query_shape), memory_flange[1]]), - split_query_masks[i], - tf.zeros([np.prod(query_shape), memory_flange[1]]) - ], axis=1) for i in range(query_shape[0])] - - final_mask = tf.concat( - [tf.ones( - [np.prod(query_shape), - (query_shape[1]+2*memory_flange[1])*memory_flange[0]]), - tf.concat(mask_pieces, axis=1) - ], axis=1) - # 0. is visible location, 1.0 is masked. - return 1. - final_mask + q = pad_to_multiple_2d(q, query_shape) padded_q_shape = tf.shape(q) k = pad_to_multiple_2d(k, query_shape) @@ -833,20 +837,21 @@ def make_mask(query_shape, memory_flange): k_and_v_indices = gather_indices_2d(k, memory_shape, query_shape) k_new = gather_blocks_2d(k, k_and_v_indices) v_new = gather_blocks_2d(v, k_and_v_indices) - logits = tf.matmul(q_new, k_new, transpose_b=True) # Combining the mask for padding and visible region attention_mask_shape = [np.prod(query_shape), (query_shape[0]+memory_flange[0])* (query_shape[1]+2*memory_flange[1])] - attention_mask = tf.cast(make_mask(query_shape, memory_flange), tf.bool) + attention_mask = tf.cast( + make_2d_block_raster_mask(query_shape, memory_flange), tf.bool) # reshaping attention mask to have same dims as logits attention_mask = tf.reshape(attention_mask, [1, 1, 1]+attention_mask_shape) padding_mask = tf.expand_dims( tf.cast(embedding_to_padding(k_new), tf.bool), axis=-2) attention_bias = ( tf.to_float(tf.logical_or(attention_mask, padding_mask)) *-1e9) - attention = tf.nn.softmax(logits + attention_bias) - output = tf.matmul(attention, v_new) + output = dot_product_attention(q_new, k_new, v_new, attention_bias, + dropout_rate=0., name="masked_local_2d", + make_image_summary=False) # putting the representations back in the right place output = scatter_blocks_2d(output, q_indices, padded_q_shape) # Remove the padding if introduced diff --git a/tensor2tensor/layers/common_attention_test.py b/tensor2tensor/layers/common_attention_test.py index 644b27a98..7823936fa 100644 --- a/tensor2tensor/layers/common_attention_test.py +++ b/tensor2tensor/layers/common_attention_test.py @@ -162,7 +162,7 @@ def testMultiheadSelfAttentionMemoryEfficient(self): self.assertAllClose(dnorm_bias, dnorm_bias_f) self.assertAllClose(dx, dx_f) - def test2dGatherAndScatter(self): + def test2dGatherAndScatterInvertibility(self): """2d gather and scatter invertibility test.""" batch_size = 2 num_heads = 2 @@ -182,5 +182,68 @@ def test2dGatherAndScatter(self): res = session.run(scattered_x) self.assertAllClose(x, res) + def test2dBlockRasterScanMask(self): + """Testing the 2d block raster scan mask.""" + query_shape = (2, 3) + memory_flange = (2, 1) + with self.test_session() as session: + mask = common_attention.make_2d_block_raster_mask( + query_shape, memory_flange) + res = session.run(mask) + correct_mask = np.array( + [[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, + 1.0, 0.0, 1.0, 1.0, 1.0, 1.0], + [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, + 1.0, 0.0, 1.0, 1.0, 1.0, 1.0], + [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, + 1.0, 0.0, 1.0, 1.0, 1.0, 1.0], + [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, + 1.0, 0.0, 0.0, 1.0, 1.0, 1.0], + [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, + 1.0, 0.0, 0.0, 0.0, 1.0, 1.0], + [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, + 1.0, 0.0, 0.0, 0.0, 0.0, 1.0]]) + self.assertAllClose(correct_mask, res) + + def test2dGather(self): + """Testing 2d index gather and block gather functions.""" + batch_size = 2 + num_heads = 2 + height = 4 + width = 6 + depth = 8 + query_shape = (2, 3) + x = np.random.rand(batch_size, num_heads, height, width, depth) + y = np.reshape(x, (batch_size, num_heads, -1, depth)) + correct_indices = [[0, 1, 2, 6, 7, 8], + [3, 4, 5, 9, 10, 11], + [12, 13, 14, 18, 19, 20], + [15, 16, 17, 21, 22, 23]] + correct_gathered_x = [[[y[0, 0, correct_indices[0]], + y[0, 0, correct_indices[1]], + y[0, 0, correct_indices[2]], + y[0, 0, correct_indices[3]]], + [y[0, 1, correct_indices[0]], + y[0, 1, correct_indices[1]], + y[0, 1, correct_indices[2]], + y[0, 1, correct_indices[3]]]], + [[y[1, 0, correct_indices[0]], + y[1, 0, correct_indices[1]], + y[1, 0, correct_indices[2]], + y[1, 0, correct_indices[3]]], + [y[1, 1, correct_indices[0]], + y[1, 1, correct_indices[1]], + y[1, 1, correct_indices[2]], + y[1, 1, correct_indices[3]]]]] + + with self.test_session() as session: + x_indices = common_attention.gather_indices_2d( + x, query_shape, query_shape) + gathered_x = common_attention.gather_blocks_2d(x, x_indices) + x_indices, gathered_x = session.run([x_indices, gathered_x]) + self.assertAllEqual(correct_indices, x_indices) + self.assertAllClose(correct_gathered_x, gathered_x) + + if __name__ == "__main__": tf.test.main() From 4f0737502cbb3d0ce8bdf311f088f539c7101a59 Mon Sep 17 00:00:00 2001 From: Niki Parmar <nikip@google.com> Date: Wed, 13 Sep 2017 11:36:40 -0700 Subject: [PATCH 0391/4095] use the right value for shape PiperOrigin-RevId: 168570496 --- tensor2tensor/layers/common_attention.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensor2tensor/layers/common_attention.py b/tensor2tensor/layers/common_attention.py index fdba48b01..1da33479b 100644 --- a/tensor2tensor/layers/common_attention.py +++ b/tensor2tensor/layers/common_attention.py @@ -482,7 +482,7 @@ def masked_local_attention_1d( # If (length < 2 * block_length), then we use only one block. block_length = tf.where(tf.less(length, block_length * 2), length, block_length) - depth_k = tf.shape(q)[3] + depth_k = tf.shape(k)[3] depth_v = tf.shape(v)[3] original_length = length padding_size = tf.mod(-length, block_length) From 802b95fcb5e23562d46e2efec6e1b7769c9d674e Mon Sep 17 00:00:00 2001 From: T2T Team <no-reply@google.com> Date: Wed, 13 Sep 2017 12:33:42 -0700 Subject: [PATCH 0392/4095] Separate out encoding a decoding steps. PiperOrigin-RevId: 168579149 --- tensor2tensor/models/transformer.py | 109 +++++++++++++++++++++++----- 1 file changed, 90 insertions(+), 19 deletions(-) diff --git a/tensor2tensor/models/transformer.py b/tensor2tensor/models/transformer.py index a2e76dd13..4ee6746a1 100644 --- a/tensor2tensor/models/transformer.py +++ b/tensor2tensor/models/transformer.py @@ -41,34 +41,105 @@ class Transformer(t2t_model.T2TModel): """Attention net. See file docstring.""" + def encode(self, inputs, target_space, hparams): + """Encode transformer inputs. + + Args: + inputs: Transformer inputs [batch_size, input_length, hidden_dim] + target_space: scalar, target space ID. + hparams: hyperparmeters for model. + + Returns: + Tuple of: + encoder_output: Encoder representation. + [batch_size, input_length, hidden_dim] + encoder_decoder_attention_bias: Bias and mask weights for + encodre-decoder attention. [batch_size, input_length] + """ + inputs = common_layers.flatten4d3d(inputs) + + encoder_input, self_attention_bias, encoder_decoder_attention_bias = ( + transformer_prepare_encoder(inputs, target_space, hparams)) + + encoder_input = tf.nn.dropout( + encoder_input, 1.0 - hparams.layer_prepostprocess_dropout) + + encoder_output = transformer_encoder( + encoder_input, + self_attention_bias, + hparams) + + return encoder_output, encoder_decoder_attention_bias + + def decode( + self, + decoder_input, + encoder_output, + encoder_decoder_attention_bias, + decoder_self_attention_bias, + hparams): + """Decode Transformer outputs from encoder representation. + + Args: + decoder_input: inputs to bottom of the model. + [batch_size, decoder_length, hidden_dim] + encoder_output: Encoder representation. + [batch_size, input_length, hidden_dim] + encoder_decoder_attention_bias: Bias and mask weights for + encoder-decoder attention. [batch_size, input_length] + decoder_self_attention_bias: Bias and mask weights for decoder + self-attention. [batch_size, decoder_length] + hparams: hyperparmeters for model. + + Returns: + Final decoder representaiton. [batch_size, decoder_length, hidden_dim] + """ + decoder_input = tf.nn.dropout(decoder_input, + 1.0 - hparams.layer_prepostprocess_dropout) + + decoder_output = transformer_decoder( + decoder_input, + encoder_output, + decoder_self_attention_bias, + encoder_decoder_attention_bias, + hparams) + + # Expand since t2t expects 4d tensors. + return tf.expand_dims(decoder_output, axis=2) + def model_fn_body(self, features): + """Transformet main model_fn. + + Args: + features: Map of features to the model. Should contain the following: + "inputs": Transformer inputs [batch_size, input_length, hidden_dim] + "tragets": Target decoder outputs. + [batch_size, decoder_length, hidden_dim] + "target_space_id" + + Returns: + Final decoder representaiton. [batch_size, decoder_length, hidden_dim] + """ hparams = self._hparams - targets = features["targets"] + inputs = features["inputs"] + target_space = features["target_space_id"] + encoder_output, encoder_decoder_attention_bias = self.encode( + inputs, target_space, hparams) - inputs = common_layers.flatten4d3d(inputs) + targets = features["targets"] targets = common_layers.flatten4d3d(targets) - (encoder_input, encoder_self_attention_bias, - encoder_decoder_attention_bias) = transformer_prepare_encoder( - inputs, target_space, hparams) - (decoder_input, decoder_self_attention_bias) = transformer_prepare_decoder( + decoder_input, decoder_self_attention_bias = transformer_prepare_decoder( targets, hparams) - encoder_input = tf.nn.dropout(encoder_input, - 1.0 - hparams.layer_prepostprocess_dropout) - decoder_input = tf.nn.dropout(decoder_input, - 1.0 - hparams.layer_prepostprocess_dropout) - encoder_output = transformer_encoder(encoder_input, - encoder_self_attention_bias, hparams) - - decoder_output = transformer_decoder( - decoder_input, encoder_output, decoder_self_attention_bias, - encoder_decoder_attention_bias, hparams) - decoder_output = tf.expand_dims(decoder_output, 2) - - return decoder_output + return self.decode( + decoder_input, + encoder_output, + encoder_decoder_attention_bias, + decoder_self_attention_bias, + hparams) @registry.register_model From 466ce80f09cbdec86fca93e74a1dd5c286713f06 Mon Sep 17 00:00:00 2001 From: T2T Team <no-reply@google.com> Date: Wed, 13 Sep 2017 12:34:40 -0700 Subject: [PATCH 0393/4095] Split out timing signal function. PiperOrigin-RevId: 168579271 --- tensor2tensor/layers/common_attention.py | 47 +++++++++++++++++++++--- 1 file changed, 41 insertions(+), 6 deletions(-) diff --git a/tensor2tensor/layers/common_attention.py b/tensor2tensor/layers/common_attention.py index 1da33479b..840131c6a 100644 --- a/tensor2tensor/layers/common_attention.py +++ b/tensor2tensor/layers/common_attention.py @@ -37,8 +37,9 @@ _expert_count = 0 -def add_timing_signal_1d(x, min_timescale=1.0, max_timescale=1.0e4): - """Adds a bunch of sinusoids of different frequencies to a Tensor. +def get_timing_signal_1d( + length, channels, min_timescale=1.0, max_timescale=1.0e4): + """Gets a bunch of sinusoids of different frequencies. Each channel of the input Tensor is incremented by a sinusoid of a different frequency and phase. @@ -58,15 +59,15 @@ def add_timing_signal_1d(x, min_timescale=1.0, max_timescale=1.0e4): the channels dimension. Args: - x: a Tensor with shape [batch, length, channels] + length: scalar, length of timing signal sequence. + channels: scalar, size of timing embeddings to create. The number of + different timescales is equal to channels / 2. min_timescale: a float max_timescale: a float Returns: - a Tensor the same shape as x. + a Tensor of timing signals [1, length, channels] """ - length = tf.shape(x)[1] - channels = tf.shape(x)[2] position = tf.to_float(tf.range(length)) num_timescales = channels // 2 log_timescale_increment = ( @@ -78,6 +79,40 @@ def add_timing_signal_1d(x, min_timescale=1.0, max_timescale=1.0e4): signal = tf.concat([tf.sin(scaled_time), tf.cos(scaled_time)], axis=1) signal = tf.pad(signal, [[0, 0], [0, tf.mod(channels, 2)]]) signal = tf.reshape(signal, [1, length, channels]) + return signal + + +def add_timing_signal_1d(x, min_timescale=1.0, max_timescale=1.0e4): + """Adds a bunch of sinusoids of different frequencies to a Tensor. + + Each channel of the input Tensor is incremented by a sinusoid of a different + frequency and phase. + + This allows attention to learn to use absolute and relative positions. + Timing signals should be added to some precursors of both the query and the + memory inputs to attention. + + The use of relative position is possible because sin(x+y) and cos(x+y) can be + experessed in terms of y, sin(x) and cos(x). + + In particular, we use a geometric sequence of timescales starting with + min_timescale and ending with max_timescale. The number of different + timescales is equal to channels / 2. For each timescale, we + generate the two sinusoidal signals sin(timestep/timescale) and + cos(timestep/timescale). All of these sinusoids are concatenated in + the channels dimension. + + Args: + x: a Tensor with shape [batch, length, channels] + min_timescale: a float + max_timescale: a float + + Returns: + a Tensor the same shape as x. + """ + length = tf.shape(x)[1] + channels = tf.shape(x)[2] + signal = get_timing_signal_1d(length, channels, min_timescale, max_timescale) return x + signal From 79ba4a8b98752cf0d5cbed6718f2ef6cdcfd1374 Mon Sep 17 00:00:00 2001 From: T2T Team <no-reply@google.com> Date: Wed, 13 Sep 2017 14:53:40 -0700 Subject: [PATCH 0394/4095] Adding has_inputs property to Problem. PiperOrigin-RevId: 168599850 --- tensor2tensor/data_generators/problem.py | 15 ++++++++------- tensor2tensor/utils/input_fn_builder.py | 16 ++++++++++------ 2 files changed, 18 insertions(+), 13 deletions(-) diff --git a/tensor2tensor/data_generators/problem.py b/tensor2tensor/data_generators/problem.py index 4aa4862ef..cb8b47aee 100644 --- a/tensor2tensor/data_generators/problem.py +++ b/tensor2tensor/data_generators/problem.py @@ -17,20 +17,15 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function - import collections import os import random - # Dependency imports - import six - from tensor2tensor.data_generators import generator_utils from tensor2tensor.data_generators import text_encoder from tensor2tensor.utils import metrics from tensor2tensor.utils import registry - import tensorflow as tf @@ -385,6 +380,10 @@ def _preprocess(example): return dataset + @property + def has_inputs(self): + return "inputs" in self.get_feature_encoders() + @property def feature_info(self): """Retrieve dict<feature name, FeatureInfo>. @@ -404,7 +403,8 @@ def feature_info(self): input_mods = hp.input_modality target_mod = hp.target_modality vocabs = hp.vocabulary - in_id = hp.input_space_id + if self.has_inputs: + in_id = hp.input_space_id out_id = hp.target_space_id features = collections.defaultdict(FeatureInfo) @@ -422,7 +422,8 @@ def feature_info(self): for name, encoder in six.iteritems(vocabs): features[name].encoder = encoder - features["inputs"].space_id = in_id + if self.has_inputs: + features["inputs"].space_id = in_id features["targets"].space_id = out_id self._feature_info = features diff --git a/tensor2tensor/utils/input_fn_builder.py b/tensor2tensor/utils/input_fn_builder.py index cfa782e8d..5a63a8bd1 100644 --- a/tensor2tensor/utils/input_fn_builder.py +++ b/tensor2tensor/utils/input_fn_builder.py @@ -127,16 +127,18 @@ def input_fn(): feature_map["problem_choice"] = problem_choice # Set shapes so the ranks are clear. - feature_map["inputs"].set_shape([None, None, None, None]) + if problem_instance.has_inputs: + feature_map["inputs"].set_shape([None, None, None, None]) + feature_map["input_space_id"].set_shape([]) feature_map["targets"].set_shape([None, None, None, None]) feature_map["problem_choice"].set_shape([]) - feature_map["input_space_id"].set_shape([]) feature_map["target_space_id"].set_shape([]) if mode == tf.estimator.ModeKeys.PREDICT: feature_map["infer_targets"] = feature_map["targets"] # Forced shape obfuscation is necessary for inference. - feature_map["inputs"]._shape = tf.TensorShape([None, None, None, None]) # pylint: disable=protected-access + if problem_instance.has_inputs: + feature_map["inputs"]._shape = tf.TensorShape([None, None, None, None]) # pylint: disable=protected-access feature_map["targets"]._shape = tf.TensorShape([None, None, None, None]) # pylint: disable=protected-access # This is because of a bug in the Estimator that short-circuits prediction @@ -238,11 +240,13 @@ def features_for_problem(problem_instance, feature_map["targets"] = feature_map["inputs"] # Ensure inputs and targets are proper rank. - while len(feature_map["inputs"].get_shape()) != 4: - feature_map["inputs"] = tf.expand_dims(feature_map["inputs"], axis=-1) + if problem_instance.has_inputs: + while len(feature_map["inputs"].get_shape()) != 4: + feature_map["inputs"] = tf.expand_dims(feature_map["inputs"], axis=-1) while len(feature_map["targets"].get_shape()) != 4: feature_map["targets"] = tf.expand_dims(feature_map["targets"], axis=-1) - feature_map["input_space_id"] = tf.constant(p_hparams.input_space_id) + if problem_instance.has_inputs: + feature_map["input_space_id"] = tf.constant(p_hparams.input_space_id) feature_map["target_space_id"] = tf.constant(p_hparams.target_space_id) return feature_map From 7035ffe8a1711476d137a5c6e0af85c70c718df7 Mon Sep 17 00:00:00 2001 From: T2T Team <no-reply@google.com> Date: Wed, 13 Sep 2017 15:04:34 -0700 Subject: [PATCH 0395/4095] Allowing explicit timing positions to be used, by adding function add_timing_signal_1d_given_position in common_attention.py that takes timing positions (as a tensor of shape [batch, length]). PiperOrigin-RevId: 168601518 --- tensor2tensor/layers/common_attention.py | 27 ++++++++++++++++++++++++ tensor2tensor/utils/t2t_model.py | 3 +++ 2 files changed, 30 insertions(+) diff --git a/tensor2tensor/layers/common_attention.py b/tensor2tensor/layers/common_attention.py index 840131c6a..daefb56c5 100644 --- a/tensor2tensor/layers/common_attention.py +++ b/tensor2tensor/layers/common_attention.py @@ -116,6 +116,33 @@ def add_timing_signal_1d(x, min_timescale=1.0, max_timescale=1.0e4): return x + signal +def add_timing_signal_1d_given_position(x, position, min_timescale=1.0, + max_timescale=1.0e4): + """Adds sinusoids of diff frequencies to a Tensor, with timing position given. + + Args: + x: a Tensor with shape [batch, length, channels] + position: a Tensor with shape [batch, length] + min_timescale: a float + max_timescale: a float + + Returns: + a Tensor the same shape as x. + """ + channels = tf.shape(x)[2] + num_timescales = channels // 2 + log_timescale_increment = ( + math.log(float(max_timescale) / float(min_timescale)) / + (tf.to_float(num_timescales) - 1)) + inv_timescales = min_timescale * tf.exp( + tf.to_float(tf.range(num_timescales)) * -log_timescale_increment) + scaled_time = (tf.expand_dims(tf.to_float(position), 2) * + tf.expand_dims(tf.expand_dims(inv_timescales, 0), 0)) + signal = tf.concat([tf.sin(scaled_time), tf.cos(scaled_time)], axis=2) + signal = tf.pad(signal, [[0, 0], [0, 0], [0, tf.mod(channels, 2)]]) + return x + signal + + def add_timing_signal_nd(x, min_timescale=1.0, max_timescale=1.0e4): """Adds a bunch of sinusoids of different frequencies to a Tensor. diff --git a/tensor2tensor/utils/t2t_model.py b/tensor2tensor/utils/t2t_model.py index 32627f7e3..916de50b7 100644 --- a/tensor2tensor/utils/t2t_model.py +++ b/tensor2tensor/utils/t2t_model.py @@ -464,6 +464,9 @@ def model_fn(self, features, skip=False, last_position_only=False): transformed_features["targets"] = target_modality.targets_bottom_sharded( sharded_features["targets"], dp) + # Allows later access to pre-embedding raw targets. + transformed_features["raw_targets"] = sharded_features["targets"] + # Construct the model body. with tf.variable_scope("body", reuse=self._problem_idx > 0): if skip: From 213859956b00db43925b54a7ff938fa034885959 Mon Sep 17 00:00:00 2001 From: Etienne Pot <epot@google.com> Date: Thu, 14 Sep 2017 15:40:13 -0700 Subject: [PATCH 0396/4095] fix off-by-one num_samples bug in decode_from_dataset PiperOrigin-RevId: 168757167 --- tensor2tensor/utils/decoding.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensor2tensor/utils/decoding.py b/tensor2tensor/utils/decoding.py index d84fd740b..a27ff72df 100644 --- a/tensor2tensor/utils/decoding.py +++ b/tensor2tensor/utils/decoding.py @@ -176,7 +176,7 @@ def decode_from_dataset(estimator, target_file.write(str(decoded_target) + "\n") if (decode_hp.num_samples >= 0 and - num_predictions >= decode_hp.num_samples): + (num_predictions + 1) >= decode_hp.num_samples): break if decode_to_file: From 32375386d8b2abd8a8d619f482e92454e2afdca8 Mon Sep 17 00:00:00 2001 From: Etienne Pot <epot@google.com> Date: Thu, 14 Sep 2017 16:36:54 -0700 Subject: [PATCH 0397/4095] Use decode_hparams.batch_size when decoding from dataset PiperOrigin-RevId: 168765336 --- tensor2tensor/utils/decoding.py | 10 ++++++++-- tensor2tensor/utils/input_fn_builder.py | 23 ++++++++++++++++------- 2 files changed, 24 insertions(+), 9 deletions(-) diff --git a/tensor2tensor/utils/decoding.py b/tensor2tensor/utils/decoding.py index a27ff72df..fc5f22c1a 100644 --- a/tensor2tensor/utils/decoding.py +++ b/tensor2tensor/utils/decoding.py @@ -47,7 +47,7 @@ def decode_hparams(overrides=""): save_images=False, problem_idx=0, extra_length=50, - batch_size=32, + batch_size=0, beam_size=4, alpha=0.6, return_beams=False, @@ -113,7 +113,8 @@ def decode_from_dataset(estimator, hparams=hparams, data_file_patterns=infer_problems_data, num_datashards=devices.data_parallelism().n, - fixed_problem=problem_idx) + fixed_problem=problem_idx, + batch_size=decode_hp.batch_size) # Get the predictions as an iterable predictions = estimator.predict(infer_input_fn) @@ -188,6 +189,11 @@ def decode_from_dataset(estimator, def decode_from_file(estimator, filename, decode_hp, decode_to_file=None): """Compute predictions on entries in filename and write them out.""" + if not decode_hp.batch_size: + decode_hp.batch_size = 32 + tf.logging.info( + "decode_hp.batch_size not specified; default=%d" % decode_hp.batch_size) + hparams = estimator.params problem_id = decode_hp.problem_idx inputs_vocab = hparams.problems[problem_id].vocabulary["inputs"] diff --git a/tensor2tensor/utils/input_fn_builder.py b/tensor2tensor/utils/input_fn_builder.py index 5a63a8bd1..c9dde1a14 100644 --- a/tensor2tensor/utils/input_fn_builder.py +++ b/tensor2tensor/utils/input_fn_builder.py @@ -34,7 +34,8 @@ def build_input_fn(mode, num_datashards=None, fixed_problem=None, worker_replicas=None, - worker_id=None): + worker_id=None, + batch_size=None): """Provides input to the graph, either from disk or via a placeholder. This function produces an input function that will feed data into @@ -61,6 +62,7 @@ def build_input_fn(mode, setting with hparams.problem_choice == distributed. worker_id: int, id of this worker replica. Used in multiproblem setting with hparams.problem_choice == distributed. + batch_size: int, if provided, will use a fixed batch size. Returns: A function that returns a dictionary of features and the target labels. @@ -98,6 +100,7 @@ def input_fn(): problem_filepatterns, num_datashards, mode, + batch_size=batch_size, name="problem_%d" % problem_idx) problem_batches.append(feature_map) @@ -211,19 +214,25 @@ def features_for_problem(problem_instance, data_filepatterns, num_datashards, mode, + batch_size=None, name="problem_inputs"): """Feature map for Problem.""" with tf.name_scope(name): with tf.device("/cpu:0"): # Input reading on CPU capacity = (p_hparams.max_expected_batch_size_per_shard * num_datashards) + batching_scheme = data_reader.hparams_to_batching_scheme( + hparams, + shard_multiplier=num_datashards, + drop_long_sequences=(mode == tf.estimator.ModeKeys.TRAIN or + hparams.eval_drop_long_sequences), + length_multiplier=(p_hparams.batch_size_multiplier)) + if batch_size: + # If batch_size is fixed, use a single input bucket + batching_scheme["batch_sizes"] = [batch_size] + batching_scheme["boundaries"] = [] feature_map = data_reader.input_pipeline( problem_instance, data_filepatterns, capacity, mode, hparams, - data_reader.hparams_to_batching_scheme( - hparams, - shard_multiplier=num_datashards, - drop_long_sequences=(mode == tf.estimator.ModeKeys.TRAIN or - hparams.eval_drop_long_sequences), - length_multiplier=(p_hparams.batch_size_multiplier))) + batching_scheme) # Reverse inputs and targets features if the problem was reversed. if problem_instance is not None: From e6e4263680be61d7d495c9e27e1617f453a683d7 Mon Sep 17 00:00:00 2001 From: Noam Shazeer <noam@google.com> Date: Thu, 14 Sep 2017 19:16:27 -0700 Subject: [PATCH 0398/4095] Add wiki_scramble_128 dataset. PiperOrigin-RevId: 168782469 --- tensor2tensor/data_generators/wiki.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/tensor2tensor/data_generators/wiki.py b/tensor2tensor/data_generators/wiki.py index 6f6c97686..396d120c7 100644 --- a/tensor2tensor/data_generators/wiki.py +++ b/tensor2tensor/data_generators/wiki.py @@ -223,6 +223,19 @@ def generator(self, data_dir, tmp_dir, _): yield {"inputs": inputs, "targets": targets} +@registry.register_problem +class LanguagemodelWikiScramble128(LanguagemodelWikiScramble): + """Sequence length 128, 50% scrambed.""" + + @property + def sequence_length(self): + return 128 + + @property + def scramble_fraction(self): + return 0.5 + + @registry.register_problem class LanguagemodelWikiScramble1k50(LanguagemodelWikiScramble): """Sequence length 1024, 50% scrambed.""" From 6cb0bc8bea37cdbc31cc6260d9727c577bd3a278 Mon Sep 17 00:00:00 2001 From: T2T Team <no-reply@google.com> Date: Fri, 15 Sep 2017 12:00:26 -0700 Subject: [PATCH 0399/4095] Add ability to average the last N checkpoints, without needing to specify individual checkpoints. PiperOrigin-RevId: 168867603 --- tensor2tensor/utils/avg_checkpoints.py | 38 ++++++++++++++++++++------ 1 file changed, 29 insertions(+), 9 deletions(-) diff --git a/tensor2tensor/utils/avg_checkpoints.py b/tensor2tensor/utils/avg_checkpoints.py index 77acd4353..4d1c56eda 100644 --- a/tensor2tensor/utils/avg_checkpoints.py +++ b/tensor2tensor/utils/avg_checkpoints.py @@ -18,6 +18,8 @@ from __future__ import division from __future__ import print_function +import os + # Dependency imports import numpy as np @@ -30,6 +32,9 @@ flags.DEFINE_string("checkpoints", "", "Comma-separated list of checkpoints to average.") +flags.DEFINE_integer("num_last_checkpoints", 0, + "Averages the last N saved checkpoints." + " If the checkpoints flag is set, this is ignored.") flags.DEFINE_string("prefix", "", "Prefix (e.g., directory) to append to each checkpoint.") flags.DEFINE_string("output_path", "/tmp/averaged.ckpt", @@ -42,17 +47,32 @@ def checkpoint_exists(path): def main(_): - # Get the checkpoints list from flags and run some basic checks. - checkpoints = [c.strip() for c in FLAGS.checkpoints.split(",")] - checkpoints = [c for c in checkpoints if c] - if not checkpoints: - raise ValueError("No checkpoints provided for averaging.") - if FLAGS.prefix: - checkpoints = [FLAGS.prefix + c for c in checkpoints] + if FLAGS.checkpoints: + # Get the checkpoints list from flags and run some basic checks. + checkpoints = [c.strip() for c in FLAGS.checkpoints.split(",")] + checkpoints = [c for c in checkpoints if c] + if not checkpoints: + raise ValueError("No checkpoints provided for averaging.") + if FLAGS.prefix: + checkpoints = [FLAGS.prefix + c for c in checkpoints] + else: + assert FLAGS.num_last_checkpoints >= 1, "Must average at least one model" + assert FLAGS.prefix, ("Prefix must be provided when averaging last" + " N checkpoints") + checkpoint_state = tf.train.get_checkpoint_state( + os.path.dirname(FLAGS.prefix)) + # Checkpoints are ordered from oldest to newest. + checkpoints = checkpoint_state.all_model_checkpoint_paths[ + -FLAGS.num_last_checkpoints:] + checkpoints = [c for c in checkpoints if checkpoint_exists(c)] if not checkpoints: - raise ValueError( - "None of the provided checkpoints exist. %s" % FLAGS.checkpoints) + if FLAGS.checkpoints: + raise ValueError( + "None of the provided checkpoints exist. %s" % FLAGS.checkpoints) + else: + raise ValueError("Could not find checkpoints at %s" % + os.path.dirname(FLAGS.prefix)) # Read variables from all checkpoints and average them. tf.logging.info("Reading variables and averaging checkpoints:") From be19196ded9f907098bf4747a138d632e0a9736b Mon Sep 17 00:00:00 2001 From: Noam Shazeer <noam@google.com> Date: Fri, 15 Sep 2017 14:30:54 -0700 Subject: [PATCH 0400/4095] Working on a model for cnn_dailymail summarization task. Make greedy inference and beam search work in prepend mode. After this change, inference in prepend mode requires batch size 1, since padding is not properly ignored. PiperOrigin-RevId: 168889211 --- tensor2tensor/data_generators/inspect.py | 4 ++-- tensor2tensor/data_generators/problem.py | 12 +++++++----- tensor2tensor/models/transformer.py | 8 ++++++++ tensor2tensor/utils/data_reader.py | 3 +-- tensor2tensor/utils/t2t_model.py | 22 +++++++++++++++++++--- 5 files changed, 37 insertions(+), 12 deletions(-) diff --git a/tensor2tensor/data_generators/inspect.py b/tensor2tensor/data_generators/inspect.py index 848b74a2d..c84f00606 100644 --- a/tensor2tensor/data_generators/inspect.py +++ b/tensor2tensor/data_generators/inspect.py @@ -67,9 +67,9 @@ def main(_): inputs = [int(i) for i in x.features.feature["inputs"].int64_list.value] targets = [int(i) for i in x.features.feature["targets"].int64_list.value] if FLAGS.print_inputs: - print(encoder.decode(inputs) if encoder else inputs) + print("INPUTS:\n" + encoder.decode(inputs) if encoder else inputs) if FLAGS.print_targets: - print(encoder.decode(targets) if encoder else targets) + print("TARGETS:\n" + encoder.decode(targets) if encoder else targets) total_input_tokens += len(inputs) total_target_tokens += len(targets) total_sequences += 1 diff --git a/tensor2tensor/data_generators/problem.py b/tensor2tensor/data_generators/problem.py index cb8b47aee..a006d5627 100644 --- a/tensor2tensor/data_generators/problem.py +++ b/tensor2tensor/data_generators/problem.py @@ -102,15 +102,18 @@ def default_model_hparams(): data_dir=None) -def preprocess_examples_common(examples, hparams): +def preprocess_examples_common(examples, hparams, mode): """Preprocessing steps common to all models.""" if hparams.max_input_seq_length > 0: examples["inputs"] = examples["inputs"][:hparams.max_input_seq_length] if hparams.max_target_seq_length > 0: examples["targets"] = examples["targets"][:hparams.max_target_seq_length] if hparams.prepend_mode != "none": - examples["targets"] = tf.concat( - [examples["inputs"], [0], examples["targets"]], 0) + if mode == tf.estimator.ModeKeys.PREDICT: + examples["partial_targets"] = tf.concat([examples["inputs"], [0]], 0) + else: + examples["targets"] = tf.concat( + [examples["inputs"], [0], examples["targets"]], 0) return examples @@ -196,8 +199,7 @@ def example_reading_spec(self): return (data_fields, data_items_to_decoders) def preprocess_examples(self, examples, mode, hparams): - del mode - return preprocess_examples_common(examples, hparams) + return preprocess_examples_common(examples, hparams, mode) def eval_metrics(self): return [ diff --git a/tensor2tensor/models/transformer.py b/tensor2tensor/models/transformer.py index 4ee6746a1..7d52824fa 100644 --- a/tensor2tensor/models/transformer.py +++ b/tensor2tensor/models/transformer.py @@ -725,6 +725,14 @@ def transformer_parameter_attention_b(): return hparams +@registry.register_hparams +def transformer_prepend(): + hparams = transformer_base() + hparams.prepend_mode = "prepend_inputs_masked_attention" + hparams.max_length = 0 + return hparams + + @registry.register_ranged_hparams("transformer_base") def transformer_base_range(rhp): """Small range of hyperparameters.""" diff --git a/tensor2tensor/utils/data_reader.py b/tensor2tensor/utils/data_reader.py index 834e631ac..d94e85e39 100644 --- a/tensor2tensor/utils/data_reader.py +++ b/tensor2tensor/utils/data_reader.py @@ -288,7 +288,7 @@ def input_pipeline(problem, data_file_pattern, capacity, mode, hparams, def _preprocess(example, problem, data_file_pattern, hparams, mode): """Preprocessing for example.""" if problem is None: - example = preprocess_examples_common(example, hparams) + example = preprocess_examples_common(example, hparams, mode) example = preprocessing(example, data_file_pattern) else: example = problem.preprocess_examples(example, mode, hparams) @@ -384,7 +384,6 @@ def padded_batch(dataset, batch_size, padded_shapes=None): def _bucket_boundaries(max_length, min_length=8, length_bucket_step=1.1): """A default set of length-bucket boundaries.""" - assert min_length <= max_length assert length_bucket_step > 1.0 x = min_length boundaries = [] diff --git a/tensor2tensor/utils/t2t_model.py b/tensor2tensor/utils/t2t_model.py index 916de50b7..812e5aee3 100644 --- a/tensor2tensor/utils/t2t_model.py +++ b/tensor2tensor/utils/t2t_model.py @@ -228,10 +228,19 @@ def _beam_decode(self, features, decode_length, beam_size, top_beams, samples: an integer `Tensor`. Top samples from the beam search """ + batch_size = tf.shape(features["inputs"])[0] + batch_size = tf.Print(batch_size, [batch_size], "beam_decode batch_size=") + def symbols_to_logits_fn(ids): """Go from ids to logits.""" ids = tf.expand_dims(tf.expand_dims(ids, axis=2), axis=3) ids = tf.pad(ids[:, 1:], [[0, 0], [0, 1], [0, 0], [0, 0]]) + if "partial_targets" in features: + pt = features["partial_targets"] + pt_length = tf.shape(pt)[1] + pt = tf.tile(pt, [1, beam_size]) + pt = tf.reshape(pt, [batch_size * beam_size, pt_length, 1, 1]) + ids = tf.concat([pt, ids], axis=1) features["targets"] = ids self._coverage = None @@ -247,7 +256,6 @@ def symbols_to_logits_fn(ids): logits = logits[:, current_output_position, :, :] return tf.squeeze(logits, axis=[1, 2]) - batch_size = tf.shape(features["inputs"])[0] initial_ids = tf.zeros([batch_size], dtype=tf.int32) inputs_old = features["inputs"] @@ -263,7 +271,9 @@ def symbols_to_logits_fn(ids): target_modality = self._hparams.problems[self._problem_idx].target_modality vocab_size = target_modality.top_dimensionality # Setting decode length to input length + decode_length - decode_length = tf.shape(features["inputs"])[1] + tf.constant(decode_length) + decode_length = tf.constant(decode_length) + if "partial_targets" not in features: + decode_length += tf.shape(features["inputs"])[1] ids, scores = beam_search.beam_search(symbols_to_logits_fn, initial_ids, beam_size, decode_length, vocab_size, alpha) @@ -333,7 +343,9 @@ def infer_step(recent_output, recent_logits, unused_loss): # Create an initial output tensor. This will be passed # to the infer_step, which adds one timestep at every iteration. if "partial_targets" in features: - initial_output = tf.convert_to_tensor(features["partial_targets"]) + initial_output = tf.to_int64(tf.expand_dims( + tf.expand_dims(features["partial_targets"], 2), 3)) + batch_size = tf.shape(initial_output)[0] else: batch_size = tf.shape(features["inputs"])[0] initial_output = tf.zeros((batch_size, 0, 1, 1), dtype=tf.int64) @@ -366,6 +378,10 @@ def infer_step(recent_output, recent_logits, unused_loss): if inputs_old is not None: # Restore to not confuse Estimator. features["inputs"] = inputs_old losses = {"training": loss} + if "partial_targets" in features: + partial_target_length = tf.shape(features["partial_targets"])[1] + result = tf.slice( + result, [0, partial_target_length, 0, 0], [-1, -1, -1, -1]) return result, logits, losses def sample(self, features, last_position_only=False): From 6970dea82e2605e21054e3eccb4888a7bda9535e Mon Sep 17 00:00:00 2001 From: Manoj Kumar <mechcoder@google.com> Date: Mon, 18 Sep 2017 11:14:48 -0700 Subject: [PATCH 0401/4095] Change ptb data generator to encode end of sentences with <EOS> tags during PiperOrigin-RevId: 169116643 --- tensor2tensor/data_generators/ptb.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tensor2tensor/data_generators/ptb.py b/tensor2tensor/data_generators/ptb.py index 893c2b77c..31bc83c0a 100644 --- a/tensor2tensor/data_generators/ptb.py +++ b/tensor2tensor/data_generators/ptb.py @@ -42,9 +42,9 @@ def _read_words(filename): """Reads words from a file.""" with tf.gfile.GFile(filename, "r") as f: if sys.version_info[0] >= 3: - return f.read().replace("\n", " ").split() + return f.read().replace("\n", " %s " % EOS).split() else: - return f.read().decode("utf-8").replace("\n", " ").split() + return f.read().decode("utf-8").replace("\n", " %s " % EOS).split() def _build_vocab(filename, vocab_path, vocab_size): @@ -151,7 +151,7 @@ def generator(self, data_dir, tmp_dir, train): def _generator(self, filename, encoder): with tf.gfile.GFile(filename, "r") as f: for line in f: - line = " ".join(line.replace("\n", EOS).split()) + line = " ".join(line.replace("\n", " %s " % EOS).split()) tok = encoder.encode(line) if tok: yield {"inputs": [0], "targets": tok} From 3aa13683e30b15633b0c358f9d888f702cff0c3f Mon Sep 17 00:00:00 2001 From: T2T Team <no-reply@google.com> Date: Mon, 18 Sep 2017 13:19:02 -0700 Subject: [PATCH 0402/4095] Rename ambiguous function names. PiperOrigin-RevId: 169135518 --- tensor2tensor/layers/common_layers.py | 6 +++--- tensor2tensor/layers/common_layers_test.py | 2 +- tensor2tensor/models/attention_lm.py | 2 +- tensor2tensor/models/attention_lm_moe.py | 2 +- tensor2tensor/models/bytenet.py | 2 +- tensor2tensor/models/lstm.py | 4 ++-- tensor2tensor/models/multimodel.py | 2 +- tensor2tensor/models/slicenet.py | 2 +- tensor2tensor/models/transformer.py | 2 +- tensor2tensor/models/transformer_vae.py | 2 +- 10 files changed, 13 insertions(+), 13 deletions(-) diff --git a/tensor2tensor/layers/common_layers.py b/tensor2tensor/layers/common_layers.py index bd9ff896d..6554e0d31 100644 --- a/tensor2tensor/layers/common_layers.py +++ b/tensor2tensor/layers/common_layers.py @@ -209,7 +209,7 @@ def embedding(x, vocab_size, dense_size, name=None, reuse=None, multiplier=1.0): return tf.reshape(emb_x, [shape[0], shape[1], shape[2], static_shape[4]]) -def shift_left(x, pad_value=None): +def shift_right(x, pad_value=None): """Shift the second dimension of x right by one.""" if pad_value is None: shifted_targets = tf.pad(x, [[0, 0], [1, 0], [0, 0], [0, 0]])[:, :-1, :, :] @@ -218,7 +218,7 @@ def shift_left(x, pad_value=None): return shifted_targets -def shift_left_3d(x, pad_value=None): +def shift_right_3d(x, pad_value=None): """Shift the second dimension of x right by one.""" if pad_value is None: shifted_targets = tf.pad(x, [[0, 0], [1, 0], [0, 0]])[:, :-1, :] @@ -815,7 +815,7 @@ def decompress_seqcnn(x, # Flatten x and embedded targets. Flat targets are factor* larger on axis=1. flat_x = tf.reshape(x, [-1, 1, 1, hidden_size]) flat_targets = tf.reshape(targets_emb, [-1, factor, 1, hidden_size]) - shifted_targets = shift_left(flat_targets) + shifted_targets = shift_right(flat_targets) # Run a SeqCNN large-batch to produce factor outputs out of every target. flat_x += tf.zeros_like(shifted_targets) # Broadcast on axis=1. flat_outputs = conv_block( diff --git a/tensor2tensor/layers/common_layers_test.py b/tensor2tensor/layers/common_layers_test.py index d11f8ce2c..ee07c48d3 100644 --- a/tensor2tensor/layers/common_layers_test.py +++ b/tensor2tensor/layers/common_layers_test.py @@ -281,7 +281,7 @@ def testShiftLeft(self): expected = np.zeros((5, 7, 1, 11)) expected[:, 1, :] = np.ones_like(expected[:, 1, :]) with self.test_session() as session: - a = common_layers.shift_left(tf.constant(x1, dtype=tf.float32)) + a = common_layers.shift_right(tf.constant(x1, dtype=tf.float32)) actual = session.run(a) self.assertAllEqual(actual, expected) diff --git a/tensor2tensor/models/attention_lm.py b/tensor2tensor/models/attention_lm.py index 3302f45be..696057233 100644 --- a/tensor2tensor/models/attention_lm.py +++ b/tensor2tensor/models/attention_lm.py @@ -79,7 +79,7 @@ def attention_lm_prepare_decoder(targets, hparams): else: decoder_self_attention_bias = ( common_attention.attention_bias_lower_triangle(tf.shape(targets)[1])) - decoder_input = common_layers.shift_left_3d(targets) + decoder_input = common_layers.shift_right_3d(targets) if hparams.pos == "timing": decoder_input = common_attention.add_timing_signal_1d(decoder_input) return (decoder_input, decoder_self_attention_bias) diff --git a/tensor2tensor/models/attention_lm_moe.py b/tensor2tensor/models/attention_lm_moe.py index abdd68c8b..42a9fbabf 100644 --- a/tensor2tensor/models/attention_lm_moe.py +++ b/tensor2tensor/models/attention_lm_moe.py @@ -262,7 +262,7 @@ def attention_lm_moe_prepare_decoder(targets, hparams): common_attention.attention_bias_lower_triangle(tf.shape(targets)[1])) # TODO(epot): The padding remover should take into account that the input is # shifted. - decoder_input = common_layers.shift_left_3d(targets) + decoder_input = common_layers.shift_right_3d(targets) if hparams.pos == "timing": decoder_input = common_attention.add_timing_signal_1d(decoder_input) return (decoder_input, decoder_self_attention_bias, pad_remover) diff --git a/tensor2tensor/models/bytenet.py b/tensor2tensor/models/bytenet.py index e4537ef3f..5af0c4435 100644 --- a/tensor2tensor/models/bytenet.py +++ b/tensor2tensor/models/bytenet.py @@ -66,7 +66,7 @@ def bytenet_internal(inputs, targets, hparams): final_encoder = residual_dilated_conv(inputs, hparams.num_block_repeat, "SAME", "encoder", hparams) - shifted_targets = common_layers.shift_left(targets) + shifted_targets = common_layers.shift_right(targets) kernel = (hparams.kernel_height, hparams.kernel_width) decoder_start = common_layers.conv_block( tf.concat([final_encoder, shifted_targets], axis=3), diff --git a/tensor2tensor/models/lstm.py b/tensor2tensor/models/lstm.py index d1c3101b4..20475a5a9 100644 --- a/tensor2tensor/models/lstm.py +++ b/tensor2tensor/models/lstm.py @@ -221,7 +221,7 @@ def lstm_seq2seq_internal(inputs, targets, hparams, train): _, final_encoder_state = lstm( tf.reverse(inputs, axis=[1]), hparams, train, "encoder") # LSTM decoder. - shifted_targets = common_layers.shift_left(targets) + shifted_targets = common_layers.shift_right(targets) decoder_outputs, _ = lstm( common_layers.flatten4d3d(shifted_targets), hparams, @@ -240,7 +240,7 @@ def lstm_seq2seq_internal_attention(inputs, targets, hparams, train): encoder_outputs, final_encoder_state = lstm( tf.reverse(inputs, axis=[1]), hparams, train, "encoder") # LSTM decoder with attention - shifted_targets = common_layers.shift_left(targets) + shifted_targets = common_layers.shift_right(targets) decoder_outputs, _ = lstm_attention_decoder( common_layers.flatten4d3d(shifted_targets), hparams, train, "decoder", final_encoder_state, encoder_outputs) diff --git a/tensor2tensor/models/multimodel.py b/tensor2tensor/models/multimodel.py index 5df8fcd3c..a4c82d942 100644 --- a/tensor2tensor/models/multimodel.py +++ b/tensor2tensor/models/multimodel.py @@ -99,7 +99,7 @@ def prepare_decoder(targets, target_space_emb): common_attention.attention_bias_lower_triangle(tf.shape(targets)[1])) target_space_emb = tf.reshape(target_space_emb, [1, 1, -1]) target_space_emb = tf.tile(target_space_emb, [tf.shape(targets)[0], 1, 1]) - decoder_input = common_layers.shift_left_3d( + decoder_input = common_layers.shift_right_3d( targets, pad_value=target_space_emb) decoder_input = common_attention.add_timing_signal_1d(decoder_input) return (decoder_input, decoder_self_attention_bias) diff --git a/tensor2tensor/models/slicenet.py b/tensor2tensor/models/slicenet.py index 6b07dc640..5377fd97e 100644 --- a/tensor2tensor/models/slicenet.py +++ b/tensor2tensor/models/slicenet.py @@ -198,7 +198,7 @@ def norm_fn(x, name): similarity_loss = 0.0 # Use attention from each target to look at input and retrieve. - targets_shifted = common_layers.shift_left( + targets_shifted = common_layers.shift_right( targets_flat, pad_value=target_space_emb) if hparams.attention_type == "none": targets_with_attention = tf.zeros_like(targets_shifted) diff --git a/tensor2tensor/models/transformer.py b/tensor2tensor/models/transformer.py index 7d52824fa..9e5fdacc6 100644 --- a/tensor2tensor/models/transformer.py +++ b/tensor2tensor/models/transformer.py @@ -238,7 +238,7 @@ def transformer_prepare_decoder(targets, hparams): if hparams.proximity_bias: decoder_self_attention_bias += common_attention.attention_bias_proximal( tf.shape(targets)[1]) - decoder_input = common_layers.shift_left_3d(targets) + decoder_input = common_layers.shift_right_3d(targets) if hparams.pos == "timing": decoder_input = common_attention.add_timing_signal_1d(decoder_input) return (decoder_input, decoder_self_attention_bias) diff --git a/tensor2tensor/models/transformer_vae.py b/tensor2tensor/models/transformer_vae.py index e3279495a..86950d6b7 100644 --- a/tensor2tensor/models/transformer_vae.py +++ b/tensor2tensor/models/transformer_vae.py @@ -187,7 +187,7 @@ def encode(x, x_space, hparams, name): def decode(cond_vec, cond_add, gold, c, ed, hparams): """Transformer decoder.""" drop_gold = tf.nn.dropout(gold, 1.0 - hparams.layer_prepostprocess_dropout) - decoder_input = common_layers.shift_left(drop_gold, pad_value=cond_vec) + decoder_input = common_layers.shift_right(drop_gold, pad_value=cond_vec) if cond_add is not None: decoder_input += cond_add decoder_input = tf.squeeze(decoder_input, axis=2) From 558fe96923d5d56622a8d596a5441069c85cc72e Mon Sep 17 00:00:00 2001 From: T2T Team <no-reply@google.com> Date: Mon, 18 Sep 2017 14:31:05 -0700 Subject: [PATCH 0403/4095] Move the final layer_preprocess in the encoder and decoder in to the variable scopes so that they don't share parameters. PiperOrigin-RevId: 169147528 --- tensor2tensor/models/transformer.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/tensor2tensor/models/transformer.py b/tensor2tensor/models/transformer.py index 9e5fdacc6..855e0fa55 100644 --- a/tensor2tensor/models/transformer.py +++ b/tensor2tensor/models/transformer.py @@ -282,10 +282,10 @@ def transformer_encoder(encoder_input, y = transformer_ffn_layer( common_layers.layer_preprocess(x, hparams), hparams, pad_remover) x = common_layers.layer_postprocess(x, y, hparams) - # if normalization is done in layer_preprocess, then it shuold also be done - # on the output, since the output can grow very large, being the sum of - # a whole stack of unnormalized layer outputs. - return common_layers.layer_preprocess(x, hparams) + # if normalization is done in layer_preprocess, then it shuold also be done + # on the output, since the output can grow very large, being the sum of + # a whole stack of unnormalized layer outputs. + return common_layers.layer_preprocess(x, hparams) def transformer_decoder(decoder_input, @@ -336,10 +336,10 @@ def transformer_decoder(decoder_input, y = transformer_ffn_layer( common_layers.layer_preprocess(x, hparams), hparams) x = common_layers.layer_postprocess(x, y, hparams) - # if normalization is done in layer_preprocess, then it shuold also be done - # on the output, since the output can grow very large, being the sum of - # a whole stack of unnormalized layer outputs. - return common_layers.layer_preprocess(x, hparams) + # if normalization is done in layer_preprocess, then it shuold also be done + # on the output, since the output can grow very large, being the sum of + # a whole stack of unnormalized layer outputs. + return common_layers.layer_preprocess(x, hparams) def transformer_ffn_layer(x, hparams, pad_remover=None): From 1e712d3a0bd9c20d31d128f060339951f9e56d1a Mon Sep 17 00:00:00 2001 From: Noam Shazeer <noam@google.com> Date: Mon, 18 Sep 2017 16:07:01 -0700 Subject: [PATCH 0404/4095] More experiments with "aligned" model and wiki_scramble dataset. PiperOrigin-RevId: 169162566 --- tensor2tensor/data_generators/wiki.py | 6 + tensor2tensor/layers/common_attention.py | 30 +++- tensor2tensor/models/aligned.py | 220 +++++++++++++++++++++-- 3 files changed, 238 insertions(+), 18 deletions(-) diff --git a/tensor2tensor/data_generators/wiki.py b/tensor2tensor/data_generators/wiki.py index 396d120c7..30a16817b 100644 --- a/tensor2tensor/data_generators/wiki.py +++ b/tensor2tensor/data_generators/wiki.py @@ -31,6 +31,7 @@ from tensor2tensor.data_generators import generator_utils from tensor2tensor.data_generators import problem from tensor2tensor.data_generators import text_encoder +from tensor2tensor.utils import metrics from tensor2tensor.utils import registry import tensorflow as tf @@ -222,6 +223,11 @@ def generator(self, data_dir, tmp_dir, _): inputs = self.scramble(targets) yield {"inputs": inputs, "targets": targets} + def eval_metrics(self): + return [ + metrics.Metrics.ACC, metrics.Metrics.NEG_LOG_PERPLEXITY + ] + @registry.register_problem class LanguagemodelWikiScramble128(LanguagemodelWikiScramble): diff --git a/tensor2tensor/layers/common_attention.py b/tensor2tensor/layers/common_attention.py index daefb56c5..9b4235cc3 100644 --- a/tensor2tensor/layers/common_attention.py +++ b/tensor2tensor/layers/common_attention.py @@ -251,18 +251,42 @@ def embedding_to_padding(emb): return tf.to_float(tf.equal(emb_sum, 0.0)) +def attention_bias_local(length, max_backward, max_forward): + """Create an bias tensor to be added to attention logits. + + A position may attend to positions at most max_distance from it, + forward and backwards. + + This does not actually save any computation. + + Args: + length: an integer Scalar. + max_backward: an int64 Scalar - maximum distance backward to attend. + negative values indicate unlimited. + max_forward: an int64 Scalar - maximum distance forward to attend. + negative values indicate unlimited. + + Returns: + a `Tensor` with shape [1, 1, length, length]. + """ + band = tf.matrix_band_part( + tf.ones([length, length]), max_backward, max_forward) + ret = -1e9 * (1.0 - band) + return tf.reshape(ret, [1, 1, length, length]) + + def attention_bias_lower_triangle(length): """Create an bias tensor to be added to attention logits. + Allows a query to attend to all positions up to and including its own. + Args: length: a Scalar. Returns: a `Tensor` with shape [1, 1, length, length]. """ - lower_triangle = tf.matrix_band_part(tf.ones([length, length]), -1, 0) - ret = -1e9 * (1.0 - lower_triangle) - return tf.reshape(ret, [1, 1, length, length]) + return attention_bias_local(length, -1, 0) def attention_bias_ignore_padding(memory_padding): diff --git a/tensor2tensor/models/aligned.py b/tensor2tensor/models/aligned.py index 9cadc0cae..90100c842 100644 --- a/tensor2tensor/models/aligned.py +++ b/tensor2tensor/models/aligned.py @@ -44,7 +44,8 @@ def _should_preprocess(layer_type): - return layer_type not in ["timing", "pos_emb"] + return layer_type not in [ + "timing", "pos_emb", "att_memory_efficient"] def _should_postprocess(layer_type): @@ -81,8 +82,6 @@ def _diet_expert(x): batch_coordinate = dp(get_batch_coordinate, x) - assert hparams.batch_size >= hparams.max_length - layers = hparams.layers.strip(",").split(",") for layer_num, layer_type in enumerate(layers): with tf.variable_scope("%s_%d" % (layer_type, layer_num)): @@ -94,7 +93,25 @@ def _diet_expert(x): y = dp(common_attention.add_positional_embedding_nd, x, hparams.max_length, name="pos_emb") elif layer_type == "att": - # multihead attention + y = dp( + common_attention.multihead_attention, + x, + None, + None, # bias + hparams.attention_key_channels or hparams.hidden_size, + hparams.attention_value_channels or hparams.hidden_size, + hparams.hidden_size, + hparams.num_heads, + hparams.attention_dropout) + elif layer_type == "att_memory_efficient": + assert hparams.layer_preprocess_sequence == "n" + zero_bias = tf.zeros([1, 1, 1, 1]) + y = dp( + common_attention.multihead_self_attention_memory_efficient, + x, + zero_bias, + hparams.num_heads) + elif layer_type == "att_local": y = dp( common_attention.multihead_attention, x, @@ -105,10 +122,29 @@ def _diet_expert(x): hparams.hidden_size, hparams.num_heads, hparams.attention_dropout, - attention_type=("local_unmasked" if hparams.attention_local - else "dot_product"), - name="decoder_self_attention") - elif layer_type == "local_expert_attention": + attention_type="local_unmasked", + block_length=hparams.local_attention_window, + block_width=hparams.local_attention_window) + elif layer_type == "att_pseudolocal": + # This is an inefficient implementation of local attention, for the + # purpose of testing model quality. + def _pseudolocal_bias(x): + return common_attention.attention_bias_local( + tf.shape(x)[1], + hparams.local_attention_window, + hparams.local_attention_window) + pseudolocal_bias = dp(_pseudolocal_bias, x) + y = dp( + common_attention.multihead_attention, + x, + None, + pseudolocal_bias, + hparams.attention_key_channels or hparams.hidden_size, + hparams.attention_value_channels or hparams.hidden_size, + hparams.hidden_size, + hparams.num_heads, + hparams.attention_dropout) + elif layer_type == "att_local_expert": y, loss = dp( common_attention.local_expert_attention, x, @@ -176,6 +212,10 @@ def get_batch_coordinate(x): def aligned_base(): """Set of hyperparameters. + languagemodel_wiki_scramble1k50, 1gpu, 7k steps (10min): log(ppl)_eval = 2.60 + 12.0 steps/sec on P100 + 8gpu (8x batch), 7k steps: log(ppl)_eval = 2.00 + Returns: a hparams object """ @@ -183,6 +223,7 @@ def aligned_base(): hparams.hidden_size = 512 hparams.batch_size = 5000 hparams.max_length = 1024 + hparams.min_length_bucket = 1024 hparams.dropout = 0.0 hparams.layer_prepostprocess_dropout = 0.0 hparams.label_smoothing = 0.0 @@ -196,12 +237,12 @@ def aligned_base(): hparams.weight_decay = 0.0 hparams.optimizer_adam_beta1 = 0.9 hparams.optimizer_adam_beta2 = 0.98 - hparams.shared_embedding_and_softmax_weights = int(False) + hparams.shared_embedding_and_softmax_weights = int(True) hparams.add_hparam("ffn_hidden_sizes", "2048") # Add new ones like this. hparams.moe_num_experts = 32 hparams.layer_preprocess_sequence = "n" hparams.layer_postprocess_sequence = "da" - hparams.add_hparam("layers", "timing," + "att,ffn," * 4) + hparams.add_hparam("layers", "timing," + "conv,att,ffn," * 2) # attention-related flags hparams.add_hparam("num_heads", 8) @@ -223,34 +264,183 @@ def aligned_base(): hparams.add_hparam("attention_load_balance", 2e-2) hparams.add_hparam("diet_experts", int(False)) hparams.add_hparam("memory_efficient_ffn", int(False)) + hparams.add_hparam("local_attention_window", 128) # if True, we learn a non-autoregressive model from "inputs" to "targets". # if False, we learn an autoregressive model to generate "targets" return hparams @registry.register_hparams -def aligned_with_conv(): +def aligned_memory_efficient(): + """Use multihead_self_attention_memory_efficient. + + languagemodel_wiki_scramble1k50, 1gpu, 7k steps: log(ppl)_eval = 2.59 + 8.7 steps/sec on P100 + 8gpu (8x batch), 7k steps: log(ppl)_eval = 2.02 + + Returns: + a hparams object + """ hparams = aligned_base() - hparams.layers = "timing," + "conv,att,ffn," * 4 + hparams.layers = "timing," + "conv,att_memory_efficient,ffn," * 2 + return hparams + + +@registry.register_hparams +def aligned_local_expert(): + """Use local_expert_attention. + + languagemodel_wiki_scramble1k50, 1gpu, 7k steps: log(ppl)_eval = 2.72 + 10.2 steps/sec on P100 + 8gpu (8x batch), 7k steps: log(ppl)_eval = 2.27 + + Returns: + a hparams object + """ + hparams = aligned_base() + hparams.layers = "timing," + "conv,att_local_expert,ffn," * 2 return hparams @registry.register_hparams def aligned_local(): + """Use local attention code. + + languagemodel_wiki_scramble1k50, 1gpu, 7k steps: log(ppl)_eval = 2.57 + 12.8 steps/sec on P100 + 8gpu (8x batch), 7k steps: log(ppl)_eval = 2.08 + + Returns: + a hparams object + """ + hparams = aligned_base() + hparams.layers = "timing," + "conv,att_local,ffn," * 2 + return hparams + + +@registry.register_hparams +def aligned_local_1k(): + """Use local attention code, attend to full sequence. + + languagemodel_wiki_scramble1k50, 1gpu, 7k steps: log(ppl)_eval = 2.57 + 7.5 steps/sec on P100 + 8gpu (8x batch), 7k steps: log(ppl)_eval = 2.00 + + Returns: + a hparams object + """ + hparams = aligned_local() + hparams.local_attention_window = 1024 + return hparams + + +@registry.register_hparams +def aligned_pseudolocal(): + """Use a bias to simulate local attention. attention radius 128. + + languagemodel_wiki_scramble1k50, 1gpu, 7k steps: log(ppl)_eval = 2.57 + 12.0 steps/sec on P100 + 8gpu (8x batch), 7k steps: log(ppl)_eval = 2.06 + + Returns: + a hparams object + """ + hparams = aligned_base() + hparams.layers = "timing," + "conv,att_pseudolocal,ffn," * 2 + return hparams + + +@registry.register_hparams +def aligned_pseudolocal_256(): + """Use a bias to simulate local attention. attentio radius 256. + + languagemodel_wiki_scramble1k50, 1gpu, 7k steps: log(ppl)_eval = 2.56 + 12.0 steps/sec on P100 + 8gpu (8x batch), 7k steps: log(ppl)_eval = 2.05 + + Returns: + a hparams object + """ + hparams = aligned_pseudolocal() + hparams.local_attention_window = 256 + return hparams + + +@registry.register_hparams +def aligned_no_timing(): + """No timing signal. + + languagemodel_wiki_scramble1k50, 1gpu, 7k steps: log(ppl)_eval = 2.75 + 12.3 steps/sec on P100 + 8gpu (8x batch), 7k steps: log(ppl)_eval = 2.39 + + Returns: + a hparams object + """ hparams = aligned_base() - hparams.attention_local = int(True) + hparams.layers = "conv,att,ffn," * 2 + return hparams + + +@registry.register_hparams +def aligned_no_att(): + """No attention at all. + + languagemodel_wiki_scramble1k50, 1gpu, 7k steps: log(ppl)_eval = 2.89 + 20.8 steps/sec on P100 + 8gpu (8x batch), 7k steps: log(ppl)_eval = 2.70 + + Returns: + a hparams object + """ + hparams = aligned_base() + hparams.layers = "conv,ffn," * 2 return hparams @registry.register_hparams def aligned_pos_emb(): + """positional embedding insead of timing signal. + + languagemodel_wiki_scramble1k50, 1gpu, 7k steps: log(ppl)_eval = 2.67 + 12.1 steps/sec on P100 + 8gpu (8x batch), 7k steps: log(ppl)_eval = 2.00 + + Returns: + a hparams object + """ hparams = aligned_base() - hparams.layers = "pos_emb," + "att,ffn," * 4 + hparams.layers = "pos_emb," + "conv,att,ffn," * 2 return hparams @registry.register_hparams def aligned_moe(): + """mixture of experts instead of ffn. + + languagemodel_wiki_scramble1k50, 1gpu, 7k steps: log(ppl)_eval = 2.62 + 6.7 steps/sec on P100 + 8gpu (8x batch), 7k steps: log(ppl)_eval = 1.94 + + Returns: + a hparams object + """ + hparams = aligned_base() + hparams.layers = "timing," + "conv,att,moe," * 2 + return hparams + + +@registry.register_hparams +def aligned_8k(): + """version for languagemodel_wiki_scramble8k50. + + languagemodel_wiki_scramble1k50, 1gpu, 7k steps: log(ppl)_eval = 2.93 + 1.5 steps/sec on P100 + + Returns: + a hparams object + """ hparams = aligned_base() - hparams.layers = "timing," + "att,moe," * 4 + hparams.max_length = 8192 + hparams.batch_size = 8192 return hparams From 1c7d365dd37a5873017b9529e9fa6fba9c1a6e50 Mon Sep 17 00:00:00 2001 From: T2T Team <no-reply@google.com> Date: Mon, 18 Sep 2017 16:34:12 -0700 Subject: [PATCH 0405/4095] Initial version of fast decoding for transformer models. PiperOrigin-RevId: 169166125 --- tensor2tensor/layers/common_attention.py | 33 ++++- tensor2tensor/models/transformer.py | 177 ++++++++++++++++++++++- tensor2tensor/models/transformer_test.py | 43 +++++- tensor2tensor/utils/t2t_model.py | 19 ++- 4 files changed, 259 insertions(+), 13 deletions(-) diff --git a/tensor2tensor/layers/common_attention.py b/tensor2tensor/layers/common_attention.py index 9b4235cc3..582f8e9b3 100644 --- a/tensor2tensor/layers/common_attention.py +++ b/tensor2tensor/layers/common_attention.py @@ -1064,6 +1064,7 @@ def multihead_attention(query_antecedent, kv_filter_width=1, q_padding="VALID", kv_padding="VALID", + cache=None, name=None): """Multihead scaled-dot-product attention with input/output transformations. @@ -1087,11 +1088,28 @@ def multihead_attention(query_antecedent, to be. q_padding: One of "VALID", "SAME" or "LEFT". Default is VALID: No padding. kv_padding: One of "VALID", "SAME" or "LEFT". Default is VALID: No padding. - + cache: dict, containing Tensors which are the results of previous + attentions, used for fast decoding. Expects the dict to contrain two + keys; 'k' and 'v', for the initial call the values for these keys should + be empty Tensors of the appropriate shape. + 'k' [batch_size, 0, key_channels] + 'v' [batch_size, 0, value_channels] name: an optional string + Caching: + WARNING: For decoder self-attention, i.e. when memory_antecedent == None, + the caching assumes that the bias contains future masking. + + The caching works by saving all the previous key and value values so that + you are able to send just the last query location to this attention + function. I.e. if the cache dict is provided it assumes the query is of the + shape [batch_size, 1, hiddem_dim] rather than the full memory. + Returns: - A Tensor. + The result of the attention transformation. The output shape is + [batch_size, length_q, hidden_dim] + unless the cache dict is provided in which case only the last memory + position is calculated and the output shape is [batch_size, 1, hidden_dim] Raises: ValueError: if the key depth or value depth are not divisible by the @@ -1111,6 +1129,17 @@ def multihead_attention(query_antecedent, total_value_depth, q_filter_width, kv_filter_width, q_padding, kv_padding) + if cache is not None: + if attention_type != "dot_product": + raise NotImplementedError( + "Caching is not guaranteed to work with attention types other than" + " dot_product.") + if bias is None: + raise ValueError("Bias required for caching. See function docstring " + "for details.") + k = cache["k"] = tf.concat([cache["k"], k], axis=1) + v = cache["v"] = tf.concat([cache["v"], v], axis=1) + q = split_heads(q, num_heads) k = split_heads(k, num_heads) v = split_heads(v, num_heads) diff --git a/tensor2tensor/models/transformer.py b/tensor2tensor/models/transformer.py index 855e0fa55..918fc8645 100644 --- a/tensor2tensor/models/transformer.py +++ b/tensor2tensor/models/transformer.py @@ -77,7 +77,8 @@ def decode( encoder_output, encoder_decoder_attention_bias, decoder_self_attention_bias, - hparams): + hparams, + cache=None): """Decode Transformer outputs from encoder representation. Args: @@ -90,6 +91,8 @@ def decode( decoder_self_attention_bias: Bias and mask weights for decoder self-attention. [batch_size, decoder_length] hparams: hyperparmeters for model. + cache: dict, containing tensors which are the results of previous + attentions, used for fast decoding. Returns: Final decoder representaiton. [batch_size, decoder_length, hidden_dim] @@ -102,7 +105,8 @@ def decode( encoder_output, decoder_self_attention_bias, encoder_decoder_attention_bias, - hparams) + hparams, + cache=cache) # Expand since t2t expects 4d tensors. return tf.expand_dims(decoder_output, axis=2) @@ -141,6 +145,152 @@ def model_fn_body(self, features): decoder_self_attention_bias, hparams) + # TODO(llion): Enable fast inference once it's been fully tested. + def x_greedy_infer( + self, features, decode_length, last_position_only=True): + """Fast version of greedy decoding. + + Args: + features: an map of string to `Tensor` + decode_length: an integer. How many additional timesteps to decode. + last_position_only: MUST be true for fast decoding! + + Returns: + samples: [batch_size, input_length + decode_length] + logits: Not returned + losses: Not returned + + Raises: + ValueError: If last_position_only if False + NotImplementedError: If there are multiple data shards. + """ + if not last_position_only: + raise ValueError("Fast decoding only deals with the last positions!") + if self._num_datashards != 1: + raise NotImplementedError("Fast decoding only supports a single shard.") + dp = self._data_parallelism + hparams = self._hparams + + inputs = features["inputs"] + batch_size = tf.shape(inputs)[0] + # TODO(llion): Support class modality + decode_length = tf.shape(inputs)[1] + decode_length + + # TODO(llion): Clean up this reshaping logic. + inputs = tf.expand_dims(inputs, axis=1) + if len(inputs.shape) < 5: + inputs = tf.expand_dims(inputs, axis=4) + s = tf.shape(inputs) + inputs = tf.reshape(inputs, [s[0] * s[1], s[2], s[3], s[4]]) + # _shard_features called to ensure that the variable names match + inputs = self._shard_features({"inputs": inputs})["inputs"] + input_modality = self._problem_hparams.input_modality["inputs"] + with tf.variable_scope(input_modality.name): + inputs = input_modality.bottom_sharded(inputs, dp) + with tf.variable_scope("body"): + encoder_output, encoder_decoder_attention_bias = dp( + self.encode, inputs, features["target_space_id"], hparams) + + if hparams.pos == "timing": + timing_signal = common_attention.get_timing_signal_1d( + decode_length + 1, hparams.hidden_size) + + target_modality = self._problem_hparams.target_modality + + def preprocess_targets(targets, i): + """Performs preprocessing steps on the targets to prepare for the decoder. + + This includes: + - Embedding the ids. + - Flattening to 3D tensor. + - Optionally adding timing signals. + + Args: + targets: inputs ids to the decoder. [batch_size, 1] + i: scalar, Step number of the decoding loop. + + Returns: + Processed targets [batch_size, 1, hidden_dim] + """ + # _shard_features called to ensure that the variable names match + targets = self._shard_features({"targets": targets})["targets"] + with tf.variable_scope(target_modality.name): + targets = target_modality.targets_bottom_sharded(targets, dp)[0] + targets = common_layers.flatten4d3d(targets) + + # TODO(llion): Explain! Is this even needed? + targets = tf.cond( + tf.equal(i, 0), + lambda: tf.zeros_like(targets), + lambda: targets) + + if hparams.pos == "timing": + targets += timing_signal[:, i:i+1] + return targets + + decoder_self_attention_bias = ( + common_attention.attention_bias_lower_triangle(decode_length)) + if hparams.proximity_bias: + decoder_self_attention_bias += common_attention.attention_bias_proximal( + decode_length) + + def symbols_to_logits_fn(ids, i, cache): + """Go from ids to logits for next symbol.""" + targets = tf.expand_dims(tf.expand_dims(ids, axis=2), axis=3) + targets = preprocess_targets(targets, i) + + bias = decoder_self_attention_bias[:, :, i:i+1, :i+1] + + with tf.variable_scope("body"): + body_outputs = self._data_parallelism( + self.decode, + targets, + encoder_output[0], + encoder_decoder_attention_bias[0], + bias, + hparams, + cache) + + with tf.variable_scope(target_modality.name): + logits = target_modality.top_sharded(body_outputs, None, dp)[0] + + return tf.squeeze(logits, axis=[1, 2, 3]) + + def inner_loop(i, next_id, decoded_ids, cache): + logits = symbols_to_logits_fn(next_id, i, cache) + next_id = tf.expand_dims(tf.argmax(logits, axis=-1), axis=1) + decoded_ids = tf.concat([decoded_ids, next_id], axis=1) + return i+1, next_id, decoded_ids, cache + + key_channels = hparams.attention_key_channels or hparams.hidden_size + value_channels = hparams.attention_value_channels or hparams.hidden_size + num_layers = hparams.num_decoder_layers or hparams.num_hidden_layers + + cache = { + "layer_%d" % layer: { + "k": tf.zeros([batch_size, 0, key_channels]), + "v": tf.zeros([batch_size, 0, value_channels]), + } for layer in range(num_layers) + } + decoded_ids = tf.zeros([batch_size, 0], dtype=tf.int64) + next_id = tf.zeros([batch_size, 1], dtype=tf.int64) + _, _, decoded_ids, _ = tf.while_loop( + # TODO(llion): Early stopping. + lambda i, *_: tf.less(i, decode_length), + inner_loop, + [tf.constant(0), next_id, decoded_ids, cache], + shape_invariants=[ + tf.TensorShape([]), + tf.TensorShape([None, None]), + tf.TensorShape([None, None]), + {"layer_%d" % layer: { + "k": tf.TensorShape([None, None, key_channels]), + "v": tf.TensorShape([None, None, value_channels]), + } for layer in range(num_layers)} + ]) + + return decoded_ids, None, None + @registry.register_model class TransformerEncoder(t2t_model.T2TModel): @@ -293,6 +443,7 @@ def transformer_decoder(decoder_input, decoder_self_attention_bias, encoder_decoder_attention_bias, hparams, + cache=None, name="decoder"): """A stack of transformer layers. @@ -304,6 +455,8 @@ def transformer_decoder(decoder_input, encoder_decoder_attention_bias: bias Tensor for encoder-decoder attention (see common_attention.attention_bias()) hparams: hyperparameters for model + cache: dict, containing tensors which are the results of previous + attentions, used for fast decoding. name: a string Returns: @@ -313,20 +466,28 @@ def transformer_decoder(decoder_input, with tf.variable_scope(name): for layer in xrange(hparams.num_decoder_layers or hparams.num_hidden_layers): - with tf.variable_scope("layer_%d" % layer): + layer_name = "layer_%d" % layer + layer_cache = cache[layer_name] if cache is not None else None + with tf.variable_scope(layer_name): with tf.variable_scope("self_attention"): y = common_attention.multihead_attention( - common_layers.layer_preprocess( - x, hparams), None, decoder_self_attention_bias, + common_layers.layer_preprocess(x, hparams), + None, + decoder_self_attention_bias, hparams.attention_key_channels or hparams.hidden_size, hparams.attention_value_channels or hparams.hidden_size, - hparams.hidden_size, hparams.num_heads, hparams.attention_dropout) + hparams.hidden_size, + hparams.num_heads, + hparams.attention_dropout, + cache=layer_cache) x = common_layers.layer_postprocess(x, y, hparams) if encoder_output is not None: with tf.variable_scope("encdec_attention"): + # TODO(llion): Add caching. y = common_attention.multihead_attention( - common_layers.layer_preprocess( - x, hparams), encoder_output, encoder_decoder_attention_bias, + common_layers.layer_preprocess(x, hparams), + encoder_output, + encoder_decoder_attention_bias, hparams.attention_key_channels or hparams.hidden_size, hparams.attention_value_channels or hparams.hidden_size, hparams.hidden_size, hparams.num_heads, diff --git a/tensor2tensor/models/transformer_test.py b/tensor2tensor/models/transformer_test.py index 9e450a670..77e17a494 100644 --- a/tensor2tensor/models/transformer_test.py +++ b/tensor2tensor/models/transformer_test.py @@ -32,16 +32,22 @@ BATCH_SIZE = 3 INPUT_LENGTH = 5 TARGET_LENGTH = 7 -VOCAB_SIZE = 9 +VOCAB_SIZE = 10 class TransformerTest(tf.test.TestCase): - def getModel(self): + def getModel(self, mode=tf.estimator.ModeKeys.TRAIN): hparams = transformer.transformer_small() + hparams.hidden_size = 8 + hparams.filter_size = 32 + hparams.num_heads = 1 + hparams.layer_prepostprocess_dropout = 0.0 + p_hparams = problem_hparams.test_problem_hparams( hparams, VOCAB_SIZE, VOCAB_SIZE) hparams.problems = [p_hparams] + inputs = -1 + np.random.random_integers( VOCAB_SIZE, size=(BATCH_SIZE, INPUT_LENGTH, 1, 1)) targets = -1 + np.random.random_integers( @@ -64,6 +70,39 @@ def testTransformer(self): res = session.run(logits) self.assertEqual(res.shape, (BATCH_SIZE, TARGET_LENGTH, 1, 1, VOCAB_SIZE)) + def testGreedyVsFast(self): + model, features = self.getModel() + + decode_length = 2 + + out_logits, _ = model.model_fn(features) + out_logits = tf.squeeze(out_logits[0], axis=[2, 3]) + loss = tf.nn.sparse_softmax_cross_entropy_with_logits( + logits=tf.reshape(out_logits, [-1, VOCAB_SIZE]), + labels=tf.reshape(features["targets"], [-1])) + loss = tf.reduce_mean(loss) + apply_grad = tf.train.AdamOptimizer(0.001).minimize(loss) + + with self.test_session(): + tf.global_variables_initializer().run() + for _ in range(100): + apply_grad.run() + + model, _ = self.getModel(tf.estimator.ModeKeys.PREDICT) + + with tf.variable_scope(tf.get_variable_scope(), reuse=True): + greedy_result, _, _ = model._slow_greedy_infer( + features, decode_length, last_position_only=True) + greedy_result = tf.squeeze(greedy_result, axis=[2, 3]) + + fast_result, _, _ = model.x_greedy_infer(features, decode_length) + + with self.test_session(): + greedy_res = greedy_result.eval() + fast_res = fast_result.eval() + + self.assertEqual(fast_res.shape, (BATCH_SIZE, INPUT_LENGTH + decode_length)) + self.assertAllClose(greedy_res, fast_res) if __name__ == "__main__": tf.test.main() diff --git a/tensor2tensor/utils/t2t_model.py b/tensor2tensor/utils/t2t_model.py index 812e5aee3..6d38a5ba8 100644 --- a/tensor2tensor/utils/t2t_model.py +++ b/tensor2tensor/utils/t2t_model.py @@ -292,7 +292,24 @@ def symbols_to_logits_fn(ids): return {"outputs": ids[:, :top_beams, 1:], "scores": scores} return ids[:, :top_beams, 1:] - def _greedy_infer(self, features, decode_length, last_position_only): + def _greedy_infer(self, features, decode_length, last_position_only): + """A greedy inference method. + + Models should ideally implement a more efficient version of this function. + + Args: + features: an map of string to `Tensor` + decode_length: an integer. How many additional timesteps to decode. + last_position_only: a boolean, speed-up by computing last position only. + + Returns: + samples: an integer `Tensor`. + logits: `Tensor` of shape [batch_size, time, 1, 1, vocab_size]. + losses: a dictionary: {loss-name (string): floating point `Scalar`} + """ + return self._slow_greedy_infer(features, decode_length, last_position_only) + + def _slow_greedy_infer(self, features, decode_length, last_position_only): """A slow greedy inference method. Quadratic time in decode_length. From aa40c4b373ee76900497b1c93f122fc49317e776 Mon Sep 17 00:00:00 2001 From: T2T Team <no-reply@google.com> Date: Mon, 18 Sep 2017 20:18:50 -0700 Subject: [PATCH 0406/4095] Update experiment function signature to (run_config, hparams) PiperOrigin-RevId: 169187769 --- tensor2tensor/bin/t2t-trainer | 7 ++ tensor2tensor/utils/model_builder.py | 17 +---- tensor2tensor/utils/trainer_utils.py | 93 ++++++++++++----------- tensor2tensor/utils/trainer_utils_test.py | 23 ++++-- 4 files changed, 75 insertions(+), 65 deletions(-) diff --git a/tensor2tensor/bin/t2t-trainer b/tensor2tensor/bin/t2t-trainer index 7c7b48932..5defbb465 100644 --- a/tensor2tensor/bin/t2t-trainer +++ b/tensor2tensor/bin/t2t-trainer @@ -43,6 +43,7 @@ import tensorflow as tf flags = tf.flags FLAGS = flags.FLAGS +# See trainer_utils.py for additional command-line flags. flags.DEFINE_string("t2t_usr_dir", "", "Path to a Python module that will be imported. The " "__init__.py file should include the necessary imports. " @@ -53,6 +54,12 @@ flags.DEFINE_string("tmp_dir", "/tmp/t2t_datagen", "Temporary storage directory.") flags.DEFINE_bool("generate_data", False, "Generate data before training?") +flags.DEFINE_integer("eval_steps", 10, "Number of steps in evaluation.") +flags.DEFINE_string("output_dir", "", "Base output directory for run.") +flags.DEFINE_string("master", "", "Address of TensorFlow master.") +flags.DEFINE_string("schedule", "local_run", + "Method of tf.contrib.learn.Experiment to run.") + def main(_): tf.logging.set_verbosity(tf.logging.INFO) diff --git a/tensor2tensor/utils/model_builder.py b/tensor2tensor/utils/model_builder.py index 7c4172743..a0d362035 100644 --- a/tensor2tensor/utils/model_builder.py +++ b/tensor2tensor/utils/model_builder.py @@ -50,9 +50,7 @@ def model_fn(model, worker_id=0, worker_replicas=1, eval_run_autoregressive=False, - decode_hparams=None, - autotune=False, - objective=None): + decode_hparams=None): """Builds the model for all modes. * TRAIN: Constructs loss and train_op @@ -72,8 +70,6 @@ def model_fn(model, worker_replicas: int, number of workers. eval_run_autoregressive: bool, whether to run evaluation autoregressively. decode_hparams: HParams for decode settings. Used when mode == PREDICT. - autotune: bool, whether this model is being used for autotuning. - objective: str, the objective if autotune==True. Returns: tf.estimator.EstimatorSpec @@ -193,8 +189,6 @@ def nth_model(n): if mode == tf.estimator.ModeKeys.EVAL: eval_metrics_fns = metrics.create_evaluation_metrics( zip(problem_names, hparams.problem_instances), hparams) - _check_autotune_metrics( - eval_metrics_fns, autotune=autotune, objective=objective) eval_metrics = {} for metric_name, metric_fn in six.iteritems(eval_metrics_fns): @@ -391,15 +385,6 @@ def _exp_decay_after(step, rate, from_which_step): name="exponential_decay_step_cond") -def _check_autotune_metrics(metrics_dict, autotune=False, objective=None): - if not autotune: - return - - if objective not in metrics_dict: - raise ValueError("Tuning objective %s not among evaluation metrics %s" % - (objective, metrics_dict.keys())) - - def _log_variable_sizes(var_list, tag): """Log the sizes and shapes of variables, and the total size. diff --git a/tensor2tensor/utils/trainer_utils.py b/tensor2tensor/utils/trainer_utils.py index 8ed7fb678..f2bb62c1f 100644 --- a/tensor2tensor/utils/trainer_utils.py +++ b/tensor2tensor/utils/trainer_utils.py @@ -45,7 +45,6 @@ "If True, logs the contents of the registry and exits.") flags.DEFINE_bool("tfdbg", False, "If True, use the TF debugger CLI on train/eval.") -flags.DEFINE_string("output_dir", "", "Base output directory for run.") flags.DEFINE_string("model", "", "Which model to use.") flags.DEFINE_string("hparams_set", "", "Which parameters to use.") flags.DEFINE_string("hparams_range", "", "Parameters range.") @@ -61,7 +60,6 @@ flags.DEFINE_string("data_dir", "/tmp/data", "Directory with training data.") flags.DEFINE_integer("train_steps", 250000, "The number of steps to run training for.") -flags.DEFINE_integer("eval_steps", 10, "Number of steps in evaluation.") flags.DEFINE_bool("eval_run_autoregressive", False, "Run eval autoregressively where we condition on previous" "generated output instead of the actual target.") @@ -80,9 +78,6 @@ "Whether to log device placement.") # Distributed training flags -flags.DEFINE_string("master", "", "Address of TensorFlow master.") -flags.DEFINE_string("schedule", "local_run", - "Method of tf.contrib.learn.Experiment to run.") flags.DEFINE_integer("local_eval_frequency", 2000, "Run evaluation every this steps during local training.") flags.DEFINE_bool("locally_shard_to_cpu", False, @@ -91,7 +86,7 @@ flags.DEFINE_bool("daisy_chain_variables", True, "copy variables around in a daisy chain") flags.DEFINE_bool("sync", False, "Sync compute on PS.") -flags.DEFINE_string("worker_job", "/job:worker", "name of worker job") +flags.DEFINE_string("worker_job", "/job:localhost", "name of worker job") flags.DEFINE_integer("worker_gpu", 1, "How many GPUs to use.") flags.DEFINE_integer("worker_replicas", 1, "How many workers to use.") flags.DEFINE_integer("worker_id", 0, "Which worker task are we.") @@ -113,29 +108,26 @@ def make_experiment_fn(data_dir, model_name, train_steps, eval_steps): """Returns experiment_fn for learn_runner. Wraps create_experiment.""" - def experiment_fn(output_dir): + def experiment_fn(run_config, hparams): return create_experiment( - output_dir=output_dir, - data_dir=data_dir, + data_dir, model_name=model_name, train_steps=train_steps, - eval_steps=eval_steps) + eval_steps=eval_steps, + hparams=hparams, + run_config=run_config) return experiment_fn -def create_experiment(output_dir, data_dir, model_name, train_steps, - eval_steps): +def create_experiment(data_dir, model_name, train_steps, eval_steps, hparams, + run_config): """Create Experiment.""" - hparams = create_hparams( - FLAGS.hparams_set, FLAGS.problems, data_dir, passed_hparams=FLAGS.hparams) - if FLAGS.worker_id == 0 and FLAGS.schedule in ["local_run", "train"]: - save_metadata(output_dir, hparams) estimator, input_fns = create_experiment_components( - hparams=hparams, - output_dir=output_dir, data_dir=data_dir, - model_name=model_name) + model_name=model_name, + hparams=hparams, + run_config=run_config) train_monitors = [] eval_hooks = [] if FLAGS.tfdbg: @@ -153,9 +145,12 @@ def create_experiment(output_dir, data_dir, model_name, train_steps, eval_hooks=eval_hooks) -def create_experiment_components(hparams, output_dir, data_dir, model_name): +def create_experiment_components(data_dir, model_name, hparams, run_config): """Constructs and returns Estimator and train/eval input functions.""" - tf.logging.info("Creating experiment, storing model files in %s", output_dir) + tf.logging.info("Creating experiment, storing model files in %s", + run_config.model_dir) + + hparams = add_problem_hparams(hparams, FLAGS.problems) num_datashards = devices.data_parallelism().n train_input_fn = input_fn_builder.build_input_fn( @@ -176,11 +171,6 @@ def create_experiment_components(hparams, output_dir, data_dir, model_name): worker_replicas=FLAGS.worker_replicas, worker_id=FLAGS.worker_id) - autotune = False - objective = None - if hasattr(FLAGS, "autotune"): - autotune = FLAGS.autotune - objective = FLAGS.objective model_fn = model_builder.build_model_fn( model_name, problem_names=FLAGS.problems.split("-"), @@ -188,20 +178,13 @@ def create_experiment_components(hparams, output_dir, data_dir, model_name): worker_id=FLAGS.worker_id, worker_replicas=FLAGS.worker_replicas, eval_run_autoregressive=FLAGS.eval_run_autoregressive, - decode_hparams=decoding.decode_hparams(FLAGS.decode_hparams), - autotune=autotune, - objective=objective) + decode_hparams=decoding.decode_hparams(FLAGS.decode_hparams)) + estimator = tf.estimator.Estimator( model_fn=model_fn, - model_dir=output_dir, + model_dir=run_config.model_dir, params=hparams, - config=tf.contrib.learn.RunConfig( - master=FLAGS.master, - gpu_memory_fraction=FLAGS.worker_gpu_memory_fraction, - session_config=session_config(), - keep_checkpoint_max=FLAGS.keep_checkpoint_max, - keep_checkpoint_every_n_hours=FLAGS.keep_checkpoint_every_n_hours, - save_checkpoints_secs=FLAGS.save_checkpoints_secs)) + config=run_config) return estimator, { tf.estimator.ModeKeys.TRAIN: train_input_fn, @@ -279,7 +262,7 @@ def save_metadata(output_dir, hparams): f.write(hparams.to_json()) -def create_hparams(params_id, problems, data_dir, passed_hparams=None): +def create_hparams(params_id, data_dir, passed_hparams=None): """Returns hyperparameters, including any flag value overrides. If the hparams FLAG is set, then it will use any values specified in @@ -288,7 +271,6 @@ def create_hparams(params_id, problems, data_dir, passed_hparams=None): Args: params_id: which set of parameters to choose (must be in _PARAMS above). - problems: the string with problem names to get problem_hparams from. data_dir: the directory containing the training data. passed_hparams: command-line overrides for some hparams. @@ -301,7 +283,22 @@ def create_hparams(params_id, problems, data_dir, passed_hparams=None): if passed_hparams: hparams = hparams.parse(passed_hparams) - return add_problem_hparams(hparams, problems) + return hparams + + +def create_run_config(output_dir): + """Create a RunConfig object.""" + + run_config = tf.contrib.learn.RunConfig( + model_dir=output_dir, + master=FLAGS.master, + gpu_memory_fraction=FLAGS.worker_gpu_memory_fraction, + session_config=session_config(), + keep_checkpoint_max=FLAGS.keep_checkpoint_max, + keep_checkpoint_every_n_hours=FLAGS.keep_checkpoint_every_n_hours, + save_checkpoints_secs=FLAGS.save_checkpoints_secs) + + return run_config def run(data_dir, model, output_dir, train_steps, eval_steps, schedule): @@ -327,9 +324,17 @@ def run(data_dir, model, output_dir, train_steps, eval_steps, schedule): train_steps=train_steps, eval_steps=eval_steps) + # Create hparams and run_config + run_config = create_run_config(output_dir) + hparams = create_hparams( + FLAGS.hparams_set, data_dir, passed_hparams=FLAGS.hparams) + if FLAGS.worker_id == 0 and schedule in ["local_run", "train"]: + save_metadata(output_dir, hparams) + if schedule == "local_run": # Run the local demo. - exp = exp_fn(output_dir) + + exp = exp_fn(run_config, hparams) if exp.train_steps > 0 and exp.eval_steps > 0: tf.logging.info("Performing local training and evaluation.") exp.train_and_evaluate() @@ -341,8 +346,10 @@ def run(data_dir, model, output_dir, train_steps, eval_steps, schedule): exp.evaluate(delay_secs=0) else: # Perform distributed training/evaluation. - learn_runner.run( - experiment_fn=exp_fn, schedule=schedule, output_dir=output_dir) + learn_runner.run(experiment_fn=exp_fn, + schedule=schedule, + run_config=run_config, + hparams=hparams) def validate_flags(): diff --git a/tensor2tensor/utils/trainer_utils_test.py b/tensor2tensor/utils/trainer_utils_test.py index 6045dd2e0..1a971ac0c 100644 --- a/tensor2tensor/utils/trainer_utils_test.py +++ b/tensor2tensor/utils/trainer_utils_test.py @@ -33,8 +33,14 @@ import tensorflow as tf +flags = tf.flags FLAGS = tf.flags.FLAGS +flags.DEFINE_string("schedule", "local_run", "") +flags.DEFINE_integer("eval_steps", 10, "Number of steps in evaluation.") +flags.DEFINE_string("master", "", "Address of TensorFlow master.") +flags.DEFINE_string("output_dir", "", "Base output directory for run.") + @registry.register_problem class TinyAlgo(algorithmic.AlgorithmicIdentityBinary40): @@ -84,13 +90,17 @@ def testHParamsImported(self): def testSingleStep(self): model_name = "transformer" - FLAGS.hparams_set = "transformer_test" + FLAGS.worker_job = "/job:localhost" + data_dir = TrainerUtilsTest.data_dir + hparams = trainer_utils.create_hparams("transformer_test", data_dir) exp = trainer_utils.create_experiment( - output_dir=tf.test.get_temp_dir(), - data_dir=TrainerUtilsTest.data_dir, + data_dir=data_dir, model_name=model_name, train_steps=1, - eval_steps=1) + eval_steps=1, + hparams=hparams, + run_config=trainer_utils.create_run_config( + output_dir=tf.test.get_temp_dir())) exp.test() def testSingleEvalStepRawSession(self): @@ -100,12 +110,13 @@ def testSingleEvalStepRawSession(self): model_name = "transformer" FLAGS.hparams_set = "transformer_test" FLAGS.problems = "tiny_algo" + FLAGS.worker_job = "/job:localhost" data_dir = "/tmp" # Used only when a vocab file or such like is needed. # Create the problem object, hparams, placeholders, features dict. encoders = registry.problem(FLAGS.problems).feature_encoders(data_dir) - hparams = trainer_utils.create_hparams(FLAGS.hparams_set, FLAGS.problems, - data_dir) + hparams = trainer_utils.create_hparams(FLAGS.hparams_set, data_dir) + hparams = trainer_utils.add_problem_hparams(hparams, FLAGS.problems) inputs_ph = tf.placeholder(dtype=tf.int32) # Just length dimension. batch_inputs = tf.reshape(inputs_ph, [1, -1, 1, 1]) # Make it 4D. # In INFER mode targets can be None. From aec87db8df301d33b7a84722e72de19832963bd7 Mon Sep 17 00:00:00 2001 From: T2T Team <no-reply@google.com> Date: Tue, 19 Sep 2017 10:50:54 -0700 Subject: [PATCH 0407/4095] [tf.contrib.data] Standardize transformation functions for use with `Dataset.apply()`. PiperOrigin-RevId: 169264919 --- tensor2tensor/utils/data_reader.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensor2tensor/utils/data_reader.py b/tensor2tensor/utils/data_reader.py index d94e85e39..acf4ae026 100644 --- a/tensor2tensor/utils/data_reader.py +++ b/tensor2tensor/utils/data_reader.py @@ -367,8 +367,8 @@ def batching_fn(bucket_id, grouped_dataset): if hasattr(dataset, "apply"): # If the Dataset supports dynamic window size, use it. dataset = dataset.apply( - tf.contrib.data.group_by_window, - args=(example_to_bucket_id, batching_fn, None, window_size_fn)) + tf.contrib.data.group_by_window( + example_to_bucket_id, batching_fn, None, window_size_fn)) else: dataset = dataset.group_by_window(example_to_bucket_id, batching_fn, window_size) From 0b8573c7d89f55f6e9d8c3ca7d7d7640293e09e9 Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Tue, 19 Sep 2017 12:15:37 -0700 Subject: [PATCH 0408/4095] @recompute_grad decorator PiperOrigin-RevId: 169279745 --- tensor2tensor/layers/rev_block.py | 47 ++++++++++++++++++++++++-- tensor2tensor/layers/rev_block_test.py | 26 ++++++++++++++ 2 files changed, 71 insertions(+), 2 deletions(-) diff --git a/tensor2tensor/layers/rev_block.py b/tensor2tensor/layers/rev_block.py index 8502e0a8b..3dff92c5c 100644 --- a/tensor2tensor/layers/rev_block.py +++ b/tensor2tensor/layers/rev_block.py @@ -18,11 +18,15 @@ From [The Reversible Residual Network: Backpropagation Without Storing Activations](https://arxiv.org/abs/1707.04585). + +Also contains the @recompute_grad decorator, which recomputes the forward +function on the backwards pass. """ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import functools import re # Dependency imports @@ -286,8 +290,8 @@ def custom_grad_fn(inputs, variables, ys, grad_ys): # idxs. f_var_grads.reverse() g_var_grads.reverse() - for idxs, grads in list(zip(f_vars_idxs, f_var_grads)) + list(zip( - g_vars_idxs, g_var_grads)): + for idxs, grads in list(zip(f_vars_idxs, f_var_grads)) + list( + zip(g_vars_idxs, g_var_grads)): for i, grad in zip(idxs, grads): variable_grads[i] = grad @@ -316,3 +320,42 @@ def forward(x1, x2, *side_inputs): gate_outputs=is_training) return forward(x1, x2, *(f_side_input + g_side_input)) + + +def recompute_grad(fn): + """Decorator that recomputes the function on the backwards pass. + + Args: + fn: a function that takes Tensors (all as positional arguments) and returns + a tuple of Tensors. + + Returns: + A wrapped fn that is identical to fn when called, but its activations will + be discarded and recomputed on the backwards pass (i.e. on a call to + tf.gradients). + """ + + @functools.wraps(fn) + def wrapped(*args): + return _recompute_grad(fn, args) + + return wrapped + + +def _recompute_grad(fn, args): + """See recompute_grad.""" + + def grad_fn(inputs, variables, outputs, output_grads): + del outputs + # recompute outputs + outputs = fn(*inputs) + grads = tf.gradients(outputs, inputs + variables, output_grads) + grad_inputs = grads[:len(inputs)] + grad_vars = grads[len(inputs):] + return grad_inputs, grad_vars + + @common_layers.fn_with_custom_grad(grad_fn) + def fn_with_recompute(*args): + return fn(*args) + + return fn_with_recompute(*args) diff --git a/tensor2tensor/layers/rev_block_test.py b/tensor2tensor/layers/rev_block_test.py index 5aecc8ea3..3e5f7c932 100644 --- a/tensor2tensor/layers/rev_block_test.py +++ b/tensor2tensor/layers/rev_block_test.py @@ -137,5 +137,31 @@ def f(x): self._testRevBlock(x=x, f=f) +class RecomputeTest(tf.test.TestCase): + + def testRecompute(self): + + @rev_block.recompute_grad + def fn_recompute(x, y): + return x + y, x**y + + def fn(x, y): + return x + y, x**y + + x = tf.ones((3, 3)) + y = tf.ones((3, 3)) + out1 = tf.reduce_sum(fn_recompute(x, y)) + out2 = tf.reduce_sum(fn(x, y)) + + grad1 = tf.gradients(out1, [x, y]) + grad2 = tf.gradients(out2, [x, y]) + + with self.test_session() as sess: + outs = sess.run([out1, out2, grad1, grad2]) + self.assertAllClose(outs[0], outs[1]) + for g1, g2 in zip(outs[2], outs[3]): + self.assertAllClose(g1, g2) + + if __name__ == "__main__": tf.test.main() From 77e91f6c3414ae0135eb710d4862aca07fc26c9d Mon Sep 17 00:00:00 2001 From: T2T Team <no-reply@google.com> Date: Tue, 19 Sep 2017 14:11:00 -0700 Subject: [PATCH 0409/4095] Register `lstm_seq2seq` hparams. PiperOrigin-RevId: 169297690 --- tensor2tensor/models/lstm.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/tensor2tensor/models/lstm.py b/tensor2tensor/models/lstm.py index 20475a5a9..f336bd6b4 100644 --- a/tensor2tensor/models/lstm.py +++ b/tensor2tensor/models/lstm.py @@ -266,13 +266,20 @@ def model_fn_body(self, features): @registry.register_hparams -def lstm_attention(): - """hparams for LSTM with attention.""" +def lstm_seq2seq(): + """hparams for LSTM.""" hparams = common_hparams.basic_params1() hparams.batch_size = 1024 hparams.hidden_size = 128 hparams.num_hidden_layers = 2 hparams.initializer = "uniform_unit_scaling" + return hparams + + +@registry.register_hparams +def lstm_attention(): + """hparams for LSTM with attention.""" + hparams = lstm_seq2seq() # Attention hparams.add_hparam("attn_vec_size", hparams.hidden_size) From 12126bd1306a0a7876e617ce664e610ec1e1b22a Mon Sep 17 00:00:00 2001 From: Etienne Pot <epot@google.com> Date: Tue, 19 Sep 2017 14:18:36 -0700 Subject: [PATCH 0410/4095] Add flag to profile ops/memory PiperOrigin-RevId: 169299088 --- tensor2tensor/utils/trainer_utils.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tensor2tensor/utils/trainer_utils.py b/tensor2tensor/utils/trainer_utils.py index f2bb62c1f..50cfcc5d0 100644 --- a/tensor2tensor/utils/trainer_utils.py +++ b/tensor2tensor/utils/trainer_utils.py @@ -35,6 +35,7 @@ from tensor2tensor.utils import registry import tensorflow as tf +from tensorflow.contrib.hooks.python.training.profiler_hook import ProfilerHook from tensorflow.contrib.learn.python.learn import learn_runner from tensorflow.python import debug @@ -45,6 +46,8 @@ "If True, logs the contents of the registry and exits.") flags.DEFINE_bool("tfdbg", False, "If True, use the TF debugger CLI on train/eval.") +flags.DEFINE_bool("dbgprofile", False, + "If True, record the timeline for chrome://tracing/.") flags.DEFINE_string("model", "", "Which model to use.") flags.DEFINE_string("hparams_set", "", "Which parameters to use.") flags.DEFINE_string("hparams_range", "", "Parameters range.") @@ -134,6 +137,15 @@ def create_experiment(data_dir, model_name, train_steps, eval_steps, hparams, hook = debug.LocalCLIDebugHook() train_monitors.append(hook) eval_hooks.append(hook) + if FLAGS.dbgprofile: + # Recorded traces can be visualized with chrome://tracing/ + # The memory/tensor lifetime is also profiled + train_monitors.append(ProfilerHook( + save_steps=10, + output_dir=run_config.model_dir, + show_dataflow=True, + show_memory=True, + )) return tf.contrib.learn.Experiment( estimator=estimator, train_input_fn=input_fns[tf.estimator.ModeKeys.TRAIN], From 9d63460a32a6abe44f4caf4a7112a8f3708d2263 Mon Sep 17 00:00:00 2001 From: T2T Team <no-reply@google.com> Date: Tue, 19 Sep 2017 14:23:31 -0700 Subject: [PATCH 0411/4095] Enable fast decoding. PiperOrigin-RevId: 169299895 --- tensor2tensor/models/transformer.py | 5 ++--- tensor2tensor/models/transformer_test.py | 2 +- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/tensor2tensor/models/transformer.py b/tensor2tensor/models/transformer.py index 918fc8645..9fe0bc5f7 100644 --- a/tensor2tensor/models/transformer.py +++ b/tensor2tensor/models/transformer.py @@ -145,8 +145,7 @@ def model_fn_body(self, features): decoder_self_attention_bias, hparams) - # TODO(llion): Enable fast inference once it's been fully tested. - def x_greedy_infer( + def _greedy_infer( self, features, decode_length, last_position_only=True): """Fast version of greedy decoding. @@ -242,7 +241,7 @@ def symbols_to_logits_fn(ids, i, cache): bias = decoder_self_attention_bias[:, :, i:i+1, :i+1] with tf.variable_scope("body"): - body_outputs = self._data_parallelism( + body_outputs = dp( self.decode, targets, encoder_output[0], diff --git a/tensor2tensor/models/transformer_test.py b/tensor2tensor/models/transformer_test.py index 77e17a494..04c527ac1 100644 --- a/tensor2tensor/models/transformer_test.py +++ b/tensor2tensor/models/transformer_test.py @@ -95,7 +95,7 @@ def testGreedyVsFast(self): features, decode_length, last_position_only=True) greedy_result = tf.squeeze(greedy_result, axis=[2, 3]) - fast_result, _, _ = model.x_greedy_infer(features, decode_length) + fast_result, _, _ = model._greedy_infer(features, decode_length) with self.test_session(): greedy_res = greedy_result.eval() From bc191b54e0e3977ef9016384e2ec4920e660e70c Mon Sep 17 00:00:00 2001 From: Niki Parmar <nikip@google.com> Date: Tue, 19 Sep 2017 15:39:34 -0700 Subject: [PATCH 0412/4095] Fix formatting in identity output PiperOrigin-RevId: 169312588 --- tensor2tensor/utils/decoding.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/tensor2tensor/utils/decoding.py b/tensor2tensor/utils/decoding.py index fc5f22c1a..664935c94 100644 --- a/tensor2tensor/utils/decoding.py +++ b/tensor2tensor/utils/decoding.py @@ -74,14 +74,18 @@ def log_decode_results(inputs, (problem_name, prediction_idx)) show_and_save_image(inputs / 255., save_path) elif inputs_vocab: - decoded_inputs = inputs_vocab.decode(_save_until_eos(inputs.flatten())) + if identity_output: + decoded_inputs = " ".join(map(str, inputs.flatten())) + else: + decoded_inputs = inputs_vocab.decode(_save_until_eos(inputs.flatten())) + tf.logging.info("Inference results INPUT: %s" % decoded_inputs) decoded_targets = None if identity_output: - decoded_outputs = "".join(map(str, outputs.flatten())) + decoded_outputs = " ".join(map(str, outputs.flatten())) if targets is not None: - decoded_targets = "".join(map(str, targets.flatten())) + decoded_targets = " ".join(map(str, targets.flatten())) else: decoded_outputs = "".join( map(str, targets_vocab.decode(_save_until_eos(outputs.flatten())))) From f07b59f398f8112ea1ff99f120c6d94babb74905 Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Tue, 19 Sep 2017 21:01:28 -0700 Subject: [PATCH 0413/4095] Fix output shape of TransformerEncoder PiperOrigin-RevId: 169345762 --- tensor2tensor/models/transformer.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tensor2tensor/models/transformer.py b/tensor2tensor/models/transformer.py index 9fe0bc5f7..b4f083eca 100644 --- a/tensor2tensor/models/transformer.py +++ b/tensor2tensor/models/transformer.py @@ -309,6 +309,7 @@ def model_fn_body(self, features): 1.0 - hparams.layer_prepostprocess_dropout) encoder_output = transformer_encoder(encoder_input, encoder_self_attention_bias, hparams) + encoder_output = tf.expand_dims(encoder_output, 2) return encoder_output From 21b3b55fa60cefab78581b8c536ad089b2315f46 Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Tue, 19 Sep 2017 21:22:36 -0700 Subject: [PATCH 0414/4095] SavedModel export and decoding fixes PiperOrigin-RevId: 169347220 --- tensor2tensor/bin/t2t-decoder | 13 +++-- tensor2tensor/utils/data_reader.py | 67 ++++++++++++++++++++--- tensor2tensor/utils/model_builder.py | 12 +++- tensor2tensor/utils/trainer_utils.py | 29 ++++++++-- tensor2tensor/utils/trainer_utils_test.py | 3 +- 5 files changed, 104 insertions(+), 20 deletions(-) diff --git a/tensor2tensor/bin/t2t-decoder b/tensor2tensor/bin/t2t-decoder index 8da8ae5a2..5b5b09555 100644 --- a/tensor2tensor/bin/t2t-decoder +++ b/tensor2tensor/bin/t2t-decoder @@ -46,6 +46,7 @@ import tensorflow as tf flags = tf.flags FLAGS = flags.FLAGS +flags.DEFINE_string("output_dir", "", "Training directory to load from.") flags.DEFINE_string("decode_from_file", None, "Path to decode file") flags.DEFINE_string("decode_to_file", None, "Path prefix to inference output file") @@ -58,6 +59,8 @@ flags.DEFINE_string("t2t_usr_dir", "", "The imported files should contain registrations, " "e.g. @registry.register_model calls, that will then be " "available to the t2t-decoder.") +flags.DEFINE_string("master", "", "Address of TensorFlow master.") +flags.DEFINE_string("schedule", "local_run", "Must be local_run for decoding.") def main(_): @@ -65,16 +68,18 @@ def main(_): usr_dir.import_usr_dir(FLAGS.t2t_usr_dir) trainer_utils.log_registry() trainer_utils.validate_flags() + assert FLAGS.schedule == "local_run" data_dir = os.path.expanduser(FLAGS.data_dir) output_dir = os.path.expanduser(FLAGS.output_dir) hparams = trainer_utils.create_hparams( - FLAGS.hparams_set, FLAGS.problems, data_dir, passed_hparams=FLAGS.hparams) + FLAGS.hparams_set, data_dir, passed_hparams=FLAGS.hparams) + hparams = trainer_utils.add_problem_hparams(hparams, FLAGS.problems) estimator, _ = trainer_utils.create_experiment_components( - hparams=hparams, - output_dir=output_dir, data_dir=data_dir, - model_name=FLAGS.model) + model_name=FLAGS.model, + hparams=hparams, + run_config=trainer_utils.create_run_config(output_dir)) decode_hp = decoding.decode_hparams(FLAGS.decode_hparams) decode_hp.add_hparam("shards", FLAGS.decode_shards) diff --git a/tensor2tensor/utils/data_reader.py b/tensor2tensor/utils/data_reader.py index acf4ae026..4b0541d31 100644 --- a/tensor2tensor/utils/data_reader.py +++ b/tensor2tensor/utils/data_reader.py @@ -156,13 +156,30 @@ def cast_int64_to_int32(features): return f -def feature_placeholders(data_fields): - feature_map = {} - for (field, tp) in data_fields: - if not field.startswith("targets"): - feature_map[field] = tf.placeholder( - dtype=tp, shape=[None] * 4, name=field) - return feature_map +def feature_placeholders(data_fields, data_items_to_decoders): + """Construct Placeholders and run decoders.""" + example = {} + for field, config in data_fields.items(): + if isinstance(config, tf.VarLenFeature): + shape = [None] + else: + shape = config.shape + + example[field] = tf.placeholder(dtype=config.dtype, shape=shape, name=field) + + # Decode + if data_items_to_decoders is None: + data_items_to_decoders = { + field: tf.contrib.slim.tfexample_decoder.Tensor(field) + for field in data_fields + } + + decoded_example = {} + for field, decoder in data_items_to_decoders.items(): + keys_to_tensors = {key: example[key] for key in decoder.keys} + decoded_example[field] = decoder.tensors_to_item(keys_to_tensors) + + return decoded_example def default_example_reading_spec(data_file_pattern): @@ -216,7 +233,7 @@ def read_examples(problem, if data_file_pattern is None: # Create placeholders for input, rather than reading data from disk. - return feature_placeholders(data_fields) + return feature_placeholders(data_fields, data_items_to_decoders) is_training = mode == tf.estimator.ModeKeys.TRAIN dataset = examples_reader( @@ -520,3 +537,37 @@ def get_data_filepatterns(problems, data_dir, mode): else: datasets.append("%s-dev*" % path) return datasets + + +def serving_input_fn(problem, hparams): + """Input fn for serving, starting from Placeholders.""" + data_fields, data_items_to_decoders = problem.example_reading_spec() + + # Feature placeholders that mimic what's on disk + example = feature_placeholders(data_fields, data_items_to_decoders) + + # Preprocess + example = problem.preprocess_examples(example, tf.estimator.ModeKeys.PREDICT, + hparams) + example = cast_int64_to_int32(example) + + # 4-D inputs and space ids + constants = {} + constants["target_space_id"] = tf.constant( + problem.get_hparams().target_space_id) + constants["problem_choice"] = tf.constant(0) + if problem.has_inputs: + while len(example["inputs"].get_shape()) != 4: + example["inputs"] = tf.expand_dims(example["inputs"], axis=-1) + constants["input_space_id"] = tf.constant( + problem.get_hparams().input_space_id) + example.pop("targets") + else: + while len(example["targets"].get_shape()) != 4: + example["targets"] = tf.expand_dims(example["targets"], axis=-1) + + features = constants + features.update(example) + + return tf.estimator.export.ServingInputReceiver( + features=features, receiver_tensors=example) diff --git a/tensor2tensor/utils/model_builder.py b/tensor2tensor/utils/model_builder.py index a0d362035..4a4717bd4 100644 --- a/tensor2tensor/utils/model_builder.py +++ b/tensor2tensor/utils/model_builder.py @@ -182,7 +182,17 @@ def nth_model(n): "problem_choice": batched_problem_choice, } _del_dict_nones(predictions) - return tf.estimator.EstimatorSpec(mode, predictions=predictions) + + export_out = {"outputs": predictions["outputs"]} + if "scores" in predictions: + export_out["scores"] = predictions["scores"] + + return tf.estimator.EstimatorSpec( + mode, + predictions=predictions, + export_outputs={ + "output": tf.estimator.export.PredictOutput(export_out) + }) total_loss, logits = model_output diff --git a/tensor2tensor/utils/trainer_utils.py b/tensor2tensor/utils/trainer_utils.py index 50cfcc5d0..cec1b444d 100644 --- a/tensor2tensor/utils/trainer_utils.py +++ b/tensor2tensor/utils/trainer_utils.py @@ -46,6 +46,8 @@ "If True, logs the contents of the registry and exits.") flags.DEFINE_bool("tfdbg", False, "If True, use the TF debugger CLI on train/eval.") +flags.DEFINE_bool("export_saved_model", False, + "Whether to export a SavedModel for serving.") flags.DEFINE_bool("dbgprofile", False, "If True, record the timeline for chrome://tracing/.") flags.DEFINE_string("model", "", "Which model to use.") @@ -131,6 +133,7 @@ def create_experiment(data_dir, model_name, train_steps, eval_steps, hparams, model_name=model_name, hparams=hparams, run_config=run_config) + train_monitors = [] eval_hooks = [] if FLAGS.tfdbg: @@ -146,6 +149,15 @@ def create_experiment(data_dir, model_name, train_steps, eval_steps, hparams, show_dataflow=True, show_memory=True, )) + + optional_kwargs = {} + if FLAGS.export_saved_model: + assert len(hparams.problem_instances) == 1 + problem = hparams.problem_instances[0] + optional_kwargs["export_strategies"] = [ + make_export_strategy(problem, hparams) + ] + return tf.contrib.learn.Experiment( estimator=estimator, train_input_fn=input_fns[tf.estimator.ModeKeys.TRAIN], @@ -154,7 +166,13 @@ def create_experiment(data_dir, model_name, train_steps, eval_steps, hparams, eval_steps=eval_steps, min_eval_frequency=FLAGS.local_eval_frequency, train_monitors=train_monitors, - eval_hooks=eval_hooks) + eval_hooks=eval_hooks, + **optional_kwargs) + + +def make_export_strategy(problem, hparams): + return tf.contrib.learn.make_export_strategy( + lambda: data_reader.serving_input_fn(problem, hparams), as_text=True) def create_experiment_components(data_dir, model_name, hparams, run_config): @@ -358,10 +376,11 @@ def run(data_dir, model, output_dir, train_steps, eval_steps, schedule): exp.evaluate(delay_secs=0) else: # Perform distributed training/evaluation. - learn_runner.run(experiment_fn=exp_fn, - schedule=schedule, - run_config=run_config, - hparams=hparams) + learn_runner.run( + experiment_fn=exp_fn, + schedule=schedule, + run_config=run_config, + hparams=hparams) def validate_flags(): diff --git a/tensor2tensor/utils/trainer_utils_test.py b/tensor2tensor/utils/trainer_utils_test.py index 1a971ac0c..5e9e31031 100644 --- a/tensor2tensor/utils/trainer_utils_test.py +++ b/tensor2tensor/utils/trainer_utils_test.py @@ -90,9 +90,9 @@ def testHParamsImported(self): def testSingleStep(self): model_name = "transformer" - FLAGS.worker_job = "/job:localhost" data_dir = TrainerUtilsTest.data_dir hparams = trainer_utils.create_hparams("transformer_test", data_dir) + hparams = trainer_utils.add_problem_hparams(hparams, FLAGS.problems) exp = trainer_utils.create_experiment( data_dir=data_dir, model_name=model_name, @@ -110,7 +110,6 @@ def testSingleEvalStepRawSession(self): model_name = "transformer" FLAGS.hparams_set = "transformer_test" FLAGS.problems = "tiny_algo" - FLAGS.worker_job = "/job:localhost" data_dir = "/tmp" # Used only when a vocab file or such like is needed. # Create the problem object, hparams, placeholders, features dict. From 620d6a541478d73b93db33f6c75c5d837523f8d0 Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Tue, 19 Sep 2017 23:06:42 -0700 Subject: [PATCH 0415/4095] Add Travis build shield to README PiperOrigin-RevId: 169354689 --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index af9778725..e37db796d 100644 --- a/README.md +++ b/README.md @@ -8,6 +8,7 @@ Issues](https://img.shields.io/github/issues/tensorflow/tensor2tensor.svg)](http welcome](https://img.shields.io/badge/contributions-welcome-brightgreen.svg)](CONTRIBUTING.md) [![Gitter](https://img.shields.io/gitter/room/nwjs/nw.js.svg)](https://gitter.im/tensor2tensor/Lobby) [![License](https://img.shields.io/badge/License-Apache%202.0-brightgreen.svg)](https://opensource.org/licenses/Apache-2.0) +[![Travis](https://img.shields.io/travis/tensorflow/tensor2tensor.svg)]() [T2T](https://github.com/tensorflow/tensor2tensor) is a modular and extensible library and binaries for supervised learning with TensorFlow and with support From 4280f4402ff68213e2d04b502f4b404a8fb0acfb Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Wed, 20 Sep 2017 10:26:31 -0700 Subject: [PATCH 0416/4095] Rm all refs to local_run in favor of train_and_evaluate PiperOrigin-RevId: 169412526 --- tensor2tensor/bin/t2t-decoder | 5 +-- tensor2tensor/bin/t2t-trainer | 2 +- tensor2tensor/utils/devices.py | 2 +- tensor2tensor/utils/trainer_utils.py | 38 +++++++---------------- tensor2tensor/utils/trainer_utils_test.py | 2 +- 5 files changed, 18 insertions(+), 31 deletions(-) diff --git a/tensor2tensor/bin/t2t-decoder b/tensor2tensor/bin/t2t-decoder index 5b5b09555..d2fe41f2f 100644 --- a/tensor2tensor/bin/t2t-decoder +++ b/tensor2tensor/bin/t2t-decoder @@ -60,7 +60,8 @@ flags.DEFINE_string("t2t_usr_dir", "", "e.g. @registry.register_model calls, that will then be " "available to the t2t-decoder.") flags.DEFINE_string("master", "", "Address of TensorFlow master.") -flags.DEFINE_string("schedule", "local_run", "Must be local_run for decoding.") +flags.DEFINE_string("schedule", "train_and_evaluate", + "Must be train_and_evaluate for decoding.") def main(_): @@ -68,7 +69,7 @@ def main(_): usr_dir.import_usr_dir(FLAGS.t2t_usr_dir) trainer_utils.log_registry() trainer_utils.validate_flags() - assert FLAGS.schedule == "local_run" + assert FLAGS.schedule == "train_and_evaluate" data_dir = os.path.expanduser(FLAGS.data_dir) output_dir = os.path.expanduser(FLAGS.output_dir) diff --git a/tensor2tensor/bin/t2t-trainer b/tensor2tensor/bin/t2t-trainer index 5defbb465..c986522f3 100644 --- a/tensor2tensor/bin/t2t-trainer +++ b/tensor2tensor/bin/t2t-trainer @@ -57,7 +57,7 @@ flags.DEFINE_bool("generate_data", False, "Generate data before training?") flags.DEFINE_integer("eval_steps", 10, "Number of steps in evaluation.") flags.DEFINE_string("output_dir", "", "Base output directory for run.") flags.DEFINE_string("master", "", "Address of TensorFlow master.") -flags.DEFINE_string("schedule", "local_run", +flags.DEFINE_string("schedule", "train_and_evaluate", "Method of tf.contrib.learn.Experiment to run.") diff --git a/tensor2tensor/utils/devices.py b/tensor2tensor/utils/devices.py index d04b73563..d532b6d5f 100644 --- a/tensor2tensor/utils/devices.py +++ b/tensor2tensor/utils/devices.py @@ -109,7 +109,7 @@ def _replica_device_setter(worker_device): ps_tasks=FLAGS.ps_replicas, ps_device=FLAGS.ps_job + "/GPU:0" if FLAGS.ps_gpu > 0 else FLAGS.ps_job) - if FLAGS.schedule == "local_run": + if FLAGS.schedule == "train_and_evaluate": assert not FLAGS.sync datashard_devices = ["gpu:%d" % d for d in _gpu_order(FLAGS.worker_gpu)] if FLAGS.locally_shard_to_cpu or FLAGS.worker_gpu < 1: diff --git a/tensor2tensor/utils/trainer_utils.py b/tensor2tensor/utils/trainer_utils.py index cec1b444d..69d981f7c 100644 --- a/tensor2tensor/utils/trainer_utils.py +++ b/tensor2tensor/utils/trainer_utils.py @@ -334,11 +334,6 @@ def create_run_config(output_dir): def run(data_dir, model, output_dir, train_steps, eval_steps, schedule): """Runs an Estimator locally or distributed. - This function chooses one of two paths to execute: - - 1. Running locally if schedule=="local_run". - 3. Distributed training/evaluation otherwise. - Args: data_dir: The directory the data can be found in. model: The name of the model to use. @@ -358,29 +353,15 @@ def run(data_dir, model, output_dir, train_steps, eval_steps, schedule): run_config = create_run_config(output_dir) hparams = create_hparams( FLAGS.hparams_set, data_dir, passed_hparams=FLAGS.hparams) - if FLAGS.worker_id == 0 and schedule in ["local_run", "train"]: + + if is_chief(): save_metadata(output_dir, hparams) - if schedule == "local_run": - # Run the local demo. - - exp = exp_fn(run_config, hparams) - if exp.train_steps > 0 and exp.eval_steps > 0: - tf.logging.info("Performing local training and evaluation.") - exp.train_and_evaluate() - elif exp.train_steps > 0: - tf.logging.info("Performing local training.") - exp.train() - elif exp.eval_steps > 0: - tf.logging.info("Performing local evaluation.") - exp.evaluate(delay_secs=0) - else: - # Perform distributed training/evaluation. - learn_runner.run( - experiment_fn=exp_fn, - schedule=schedule, - run_config=run_config, - hparams=hparams) + learn_runner.run( + experiment_fn=exp_fn, + schedule=schedule, + run_config=run_config, + hparams=hparams) def validate_flags(): @@ -398,6 +379,11 @@ def validate_flags(): "Using default output_dir=%s.", FLAGS.output_dir) +def is_chief(): + schedules = ["train", "train_and_evaluate"] + return FLAGS.worker_id == 0 and FLAGS.schedule in schedules + + def session_config(): """The TensorFlow Session config to use.""" graph_options = tf.GraphOptions(optimizer_options=tf.OptimizerOptions( diff --git a/tensor2tensor/utils/trainer_utils_test.py b/tensor2tensor/utils/trainer_utils_test.py index 5e9e31031..16a8149f4 100644 --- a/tensor2tensor/utils/trainer_utils_test.py +++ b/tensor2tensor/utils/trainer_utils_test.py @@ -36,7 +36,7 @@ flags = tf.flags FLAGS = tf.flags.FLAGS -flags.DEFINE_string("schedule", "local_run", "") +flags.DEFINE_string("schedule", "train_and_evaluate", "") flags.DEFINE_integer("eval_steps", 10, "Number of steps in evaluation.") flags.DEFINE_string("master", "", "Address of TensorFlow master.") flags.DEFINE_string("output_dir", "", "Base output directory for run.") From 0841742e0b88640999312ecd23a454a49bc04412 Mon Sep 17 00:00:00 2001 From: T2T Team <no-reply@google.com> Date: Wed, 20 Sep 2017 17:20:28 -0700 Subject: [PATCH 0417/4095] Support class modality in fast decoding. PiperOrigin-RevId: 169476287 --- tensor2tensor/models/transformer.py | 9 +++++---- tensor2tensor/utils/t2t_model.py | 6 +++--- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/tensor2tensor/models/transformer.py b/tensor2tensor/models/transformer.py index b4f083eca..7d4ce27be 100644 --- a/tensor2tensor/models/transformer.py +++ b/tensor2tensor/models/transformer.py @@ -172,8 +172,11 @@ def _greedy_infer( inputs = features["inputs"] batch_size = tf.shape(inputs)[0] - # TODO(llion): Support class modality - decode_length = tf.shape(inputs)[1] + decode_length + target_modality = self._problem_hparams.target_modality + if t2t_model.is_class_modality(target_modality): + decode_length = 1 + else: + decode_length = tf.shape(inputs)[1] + decode_length # TODO(llion): Clean up this reshaping logic. inputs = tf.expand_dims(inputs, axis=1) @@ -194,8 +197,6 @@ def _greedy_infer( timing_signal = common_attention.get_timing_signal_1d( decode_length + 1, hparams.hidden_size) - target_modality = self._problem_hparams.target_modality - def preprocess_targets(targets, i): """Performs preprocessing steps on the targets to prepare for the decoder. diff --git a/tensor2tensor/utils/t2t_model.py b/tensor2tensor/utils/t2t_model.py index 6d38a5ba8..3fc110ebf 100644 --- a/tensor2tensor/utils/t2t_model.py +++ b/tensor2tensor/utils/t2t_model.py @@ -44,7 +44,7 @@ def fn_with_timing(*args, **kwargs): return fn_with_timing -def _is_class_modality(mod): +def is_class_modality(mod): # TODO(lukaszkaiser): should be based on type, like CLASS_LABEL, not string. prefix = "class_label_modality_" if len(mod.name) < len(prefix): @@ -198,7 +198,7 @@ def infer(self, # generated sequences, than to see the most likely sequence repeatedly. beam_size = 1 self._hparams.sampling_method = "random" - if _is_class_modality( + if is_class_modality( self._hparams.problems[self._problem_idx].target_modality): beam_size = 1 # No use to run beam-search for a single class. if beam_size == 1: @@ -371,7 +371,7 @@ def infer_step(recent_output, recent_logits, unused_loss): initial_output = tf.slice(initial_output, [0, 0, 0, 0], tf.shape(initial_output)) target_modality = self._hparams.problems[self._problem_idx].target_modality - if _is_class_modality(target_modality): + if is_class_modality(target_modality): decode_length = 1 else: decode_length = tf.shape(features["inputs"])[1] + decode_length From 09f1f17d1f673a57cbf69dbb65176a34be426f6b Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Wed, 20 Sep 2017 18:18:33 -0700 Subject: [PATCH 0418/4095] Minimally port remaining problems to Problem class PiperOrigin-RevId: 169482949 --- tensor2tensor/data_generators/all_problems.py | 1 + tensor2tensor/data_generators/problem.py | 5 +- .../data_generators/problem_hparams.py | 510 ++++++------------ .../data_generators/problem_hparams_test.py | 50 -- tensor2tensor/models/bluenet_test.py | 3 +- tensor2tensor/models/bytenet_test.py | 3 +- tensor2tensor/models/lstm_test.py | 6 +- tensor2tensor/models/neural_gpu_test.py | 2 +- .../models/transformer_revnet_test.py | 3 +- tensor2tensor/models/transformer_test.py | 3 +- tensor2tensor/models/xception_test.py | 3 +- tensor2tensor/utils/beam_search.py | 1 - tensor2tensor/utils/data_reader.py | 81 +-- tensor2tensor/utils/data_reader_test.py | 2 +- tensor2tensor/utils/trainer_utils.py | 25 +- 15 files changed, 198 insertions(+), 500 deletions(-) delete mode 100644 tensor2tensor/data_generators/problem_hparams_test.py diff --git a/tensor2tensor/data_generators/all_problems.py b/tensor2tensor/data_generators/all_problems.py index 52354704d..5877b541e 100644 --- a/tensor2tensor/data_generators/all_problems.py +++ b/tensor2tensor/data_generators/all_problems.py @@ -29,6 +29,7 @@ from tensor2tensor.data_generators import image from tensor2tensor.data_generators import imdb from tensor2tensor.data_generators import lm1b +from tensor2tensor.data_generators import problem_hparams from tensor2tensor.data_generators import ptb from tensor2tensor.data_generators import snli from tensor2tensor.data_generators import wiki diff --git a/tensor2tensor/data_generators/problem.py b/tensor2tensor/data_generators/problem.py index a006d5627..4ada1d212 100644 --- a/tensor2tensor/data_generators/problem.py +++ b/tensor2tensor/data_generators/problem.py @@ -257,10 +257,9 @@ def get_hparams(self, model_hparams=None): if self._hparams is not None: return self._hparams - assert model_hparams is not None - if self._encoders is None: - self.get_feature_encoders(model_hparams.data_dir) + data_dir = (model_hparams and model_hparams.data_dir) or None + self.get_feature_encoders(data_dir) hp = _default_hparams() ret = self.hparams(hp, model_hparams) diff --git a/tensor2tensor/data_generators/problem_hparams.py b/tensor2tensor/data_generators/problem_hparams.py index 147fc7538..88212b0db 100644 --- a/tensor2tensor/data_generators/problem_hparams.py +++ b/tensor2tensor/data_generators/problem_hparams.py @@ -24,345 +24,185 @@ # Dependency imports +from tensor2tensor.data_generators import problem from tensor2tensor.data_generators import text_encoder from tensor2tensor.layers import modalities # pylint: disable=unused-import from tensor2tensor.utils import registry import tensorflow as tf - -def problem_hparams(problem_name, model_hparams): - """Generate problem hyperparameters based on problem name. - - Args: - problem_name: a string - model_hparams: a tf.contrib.training.HParams - - Returns: - a tf.contrib.training.HParams - """ - base_name, was_reversed, was_copy = parse_problem_name(problem_name) - p = _lookup_problem_hparams_fn(base_name)(model_hparams) - if was_reversed: - _reverse_problem_hparams(p) - if was_copy: - _copy_problem_hparams(p) - return p - - -def parse_problem_name(problem_name): - """Determines if problem_name specifies a copy and/or reversal. - - Args: - problem_name: A string containing a single problem name from FLAGS.problems. - - Returns: - base_name: A string with the base problem name. - was_reversed: A boolean. - was_copy: A boolean. - """ - # Recursively strip tags until we reach a base name. - if problem_name.endswith("_rev"): - base, _, was_copy = parse_problem_name(problem_name[:-4]) - return base, True, was_copy - elif problem_name.endswith("_copy"): - base, was_reversed, _ = parse_problem_name(problem_name[:-5]) - return base, was_reversed, True - return problem_name, False, False - - -def _lookup_problem_hparams_fn(name): - if name not in PROBLEM_HPARAMS_MAP: - map_str = "* " + "\n* ".join(sorted(PROBLEM_HPARAMS_MAP.keys())) - error_msg = "%s not in the supported set of problems:\n%s" % (name, map_str) - raise LookupError(error_msg) - return PROBLEM_HPARAMS_MAP.get(name) - - -def _copy_problem_hparams(p_hparams): - """Use input modality, vocab, and space id for target.""" - p = p_hparams - # Duplicate input modality. - p.target_modality = p.input_modality["inputs"] - # Duplicate input vocabulary. - p.vocabulary["targets"] = p.vocabulary["inputs"] - # Duplicate input space ids. - p.target_space_id = p.input_space_id - # Mark that p was reversed. - p.was_copy = True - - -def _reverse_problem_hparams(p_hparams): - """Swap input/output modalities, vocab, and space ids.""" - p = p_hparams - - # Swap modalities. - input_modality = p.input_modality["inputs"] - target_modality = p.target_modality - p.input_modality["inputs"] = target_modality - p.target_modality = input_modality - - # Swap vocabularies. - input_vocabulary = p.vocabulary["inputs"] - target_vocabulary = p.vocabulary["targets"] - p.vocabulary["inputs"] = target_vocabulary - p.vocabulary["targets"] = input_vocabulary - - # Swap input/target space ids. - input_space_id = p.input_space_id - target_space_id = p.target_space_id - p.input_space_id = target_space_id - p.target_space_id = input_space_id - - # Mark that p was reversed. - p.was_reversed = True - - -def default_problem_hparams(): - """A set of basic model hyperparameters.""" - return tf.contrib.training.HParams( - # Use this parameter to get comparable perplexity numbers with different - # tokenizations. This value should be set to the ratio of the number of - # tokens in the test set according to the tokeization used to the number - # of tokens in the test set in the "official" tokenization. For example, - # if we are using a word-piece based model and we want to compute - # per-word perplexity, then we set loss_multiplier to the number of - # wordpieces per word in the test set. - loss_multiplier=1.0, - - # Use this parameter to allow for larger sequences in the batch. Without - # the use of this parameter, the size of the inner two dimensions will be - # used to judge the sequence length. - batch_size_multiplier=1, - - # To make queues of the right capacity, it's good to know the maximal - # expected batch size, as it can vary a lot. It only affects performance - # of input readers and memory use. The defaults should be safe and fast, - # but decrease if your reader uses a lot of memory and increase if slow. - max_expected_batch_size_per_shard=64, - - # Modalities used to map from input features to a space compatible with - # chosen model architecture. One modality spec (which is a 2-tuple, - # (modality_full_name, vocab_size)) per feature key. modality_full_name is - # a string type:name, e.g. class_label:2d. Leaving off the name uses the - # default modality for that type (e.g. class_label == - # class_label:default). - input_modality={}, - - # Modality used to map from hidden representation to the target space. - # Specified as a modality spec, a 2-tuple described above. - target_modality=None, - - # Identifiers used to tell the model which input/target space will be - # expected. For example, it can tell that we expect French as characters - # as output, or Spanish as sound. An integer with the following semantics: - # 0: Generic / unknown output space (default) - # 1: Image labels - # 2: English characters - # 3: English tokens - # 4: English bpe tokens - # 5: French characters - # 6: French tokens - # 7: German characters - # 8: German tokens - # 9: German bpe tokens - # 10: Digit cipher lexicon 0 - # 11: Digit cipher lexicon 1 - # 12: Audio waveform domain - # 13: Audio spectral domain - # 14: Parse characters - # 15: Parse tokens - # 16: Chinese tokens - # 17: Icelandic characters - # 18: Icelandic tokens - # 19: Icelandic parse tokens - # 20: Macedonian tokens - # 21: Czech tokens - # 22: Czech characters - # Add more above if needed. - input_space_id=0, - target_space_id=0, - - # Vocabulary per feature key. - # a vocabulary converts to/from human-readable strings. - # E.g. {"inputs": text_encoder.ByteTextEncoder(), - # "targets": text_encoder.SubwordTextEncoder("vocab_filename.txt")} - vocabulary={ - "inputs": text_encoder.TextEncoder(), - "targets": text_encoder.TextEncoder() - }, - - # This is a marker to keep track if the problem was reversed or copied. - # Only set automatically, do not override the default. - # - # These tags can be combined in order to perform copies of the input or - # the targets. For instance `problem_copy` will copy the inputs, but - # `problem_rev_copy` will copy the targets. - was_reversed=False, - was_copy=False,) - - -def test_problem_hparams(unused_model_hparams, input_vocab_size, - target_vocab_size): +# TODO(rsepassi): Merge these problems with their data generators. Currenlty +# they only implement the hparams. + + +class AudioTimitProblem(problem.Problem): + """Base class for TIMIT problems.""" + + def example_reading_spec(self): + data_fields = { + "inputs": tf.VarLenFeature(tf.int64), + "audio/sample_count": tf.FixedLenFeature((), tf.int64), + "audio/sample_width": tf.FixedLenFeature((), tf.int64), + "targets": tf.VarLenFeature(tf.int64), + } + return data_fields, None + + def preprocess_examples(self, examples, mode, hparams): + examples = super(AudioTimitProblem, self).preprocess_examples( + examples, mode, hparams) + # Reshape audio to proper shape + sample_count = tf.to_int32(examples.pop("audio/sample_count")) + sample_width = tf.to_int32(examples.pop("audio/sample_width")) + channel_count = 1 + examples["inputs"] = tf.reshape(examples["inputs"], + [sample_count, sample_width, channel_count]) + return examples + + +@registry.register_problem +class AudioTimitCharactersTune(AudioTimitProblem): + """TIMIT to characters.""" + + def feature_encoders(self, _): + return { + "inputs": text_encoder.TextEncoder(), + "targets": text_encoder.ByteTextEncoder(), + } + + def hparams(self, defaults, model_hparams): + hp = defaults + hp.input_modality = { + "inputs": (registry.Modalities.AUDIO, None), + } + hp.target_modality = (registry.Modalities.SYMBOL, 256) + + +@registry.register_problem +class AudioTimitTokens8kTune(AudioTimitProblem): + """TIMIT to tokens.""" + + @property + def target_vocab_size(self): + return 2**13 # 8192 + + def feature_encoders(self, data_dir): + vocab_filename = os.path.join(data_dir, + "vocab.endefr.%d" % self.target_vocab_size) + subtokenizer = text_encoder.SubwordTextEncoder(vocab_filename) + return { + "inputs": text_encoder.TextEncoder(), + "targets": subtokenizer, + } + + def hparams(self, defaults, model_hparams): + hp = defaults + hp.input_modality = { + "inputs": (registry.Modalities.AUDIO, None), + } + hp.target_modality = (registry.Modalities.SYMBOL, + self.get_feature_encoders()["targets"].vocab_size) + hp.batch_size_multiplier = 256 + hp.loss_multiplier = 2.0 + hp.input_space_id = 13 + hp.target_space_id = 3 + + +@registry.register_problem +class AudioTimitTokens8kTest(AudioTimitTokens8kTune): + """TIMIT to tokens.""" + pass + + +@registry.register_problem +class ParsingEnglishPtb8k(problem.Problem): + """Parsing.""" + + @property + def target_vocab_size(self): + return 2**13 # 8192 + + def feature_encoders(self, data_dir): + vocab_filename = os.path.join(data_dir, + "vocab.endefr.%d" % self.target_vocab_size) + subtokenizer = text_encoder.SubwordTextEncoder(vocab_filename) + return { + "inputs": subtokenizer, + "targets": subtokenizer, + } + + def hparams(self, defaults, model_hparams): + hp = defaults + hp.input_modality = { + "inputs": (registry.Modalities.SYMBOL, + self.get_feature_encoders()["inputs"].vocab_size), + } + hp.target_modality = (registry.Modalities.SYMBOL, + self.get_feature_encoders()["targets"].vocab_size) + hp.batch_size_multiplier = 256 + hp.loss_multiplier = 2.0 + hp.input_space_id = 3 + hp.target_space_id = 15 + + +@registry.register_problem +class ParsingEnglishPtb16k(problem.Problem): + """Parsing.""" + + @property + def vocab_prefix(self): + return "wsj" + + @property + def inputs_target_vocab_size(self): + return 2**9 # 512 + + @property + def targets_target_vocab_size(self): + return 2**14 # 16384 + + def feature_encoders(self, data_dir): + source_vocab_filename = os.path.join( + data_dir, + self.vocab_prefix + "_source.vocab.%d" % self.inputs_target_vocab_size) + target_vocab_filename = os.path.join( + data_dir, + self.vocab_prefix + "_target.vocab.%d" % self.targets_target_vocab_size) + source_subtokenizer = text_encoder.SubwordTextEncoder(source_vocab_filename) + target_subtokenizer = text_encoder.SubwordTextEncoder(target_vocab_filename) + return { + "inputs": source_subtokenizer, + "targets": target_subtokenizer, + } + + def hparams(self, defaults, model_hparams): + hp = defaults + hp.input_modality = { + "inputs": (registry.Modalities.SYMBOL, + self.get_feature_encoders()["inputs"].vocab_size), + } + hp.target_modality = (registry.Modalities.SYMBOL, + self.get_feature_encoders()["targets"].vocab_size) + hp.input_space_id = 3 + hp.target_space_id = 15 + + +class TestProblem(problem.Problem): + """Test problem.""" + + def __init__(self, input_vocab_size, target_vocab_size): + super(TestProblem, self).__init__(False, False) + self.input_vocab_size = input_vocab_size + self.target_vocab_size = target_vocab_size + + def hparams(self, defaults, model_hparams): + hp = defaults + hp.input_modality = { + "inputs": (registry.Modalities.SYMBOL, self.input_vocab_size) + } + hp.target_modality = (registry.Modalities.SYMBOL, self.target_vocab_size) + + +def test_problem_hparams(input_vocab_size=None, target_vocab_size=None): """Problem hparams for testing model bodies.""" - p = default_problem_hparams() - p.input_modality = {"inputs": (registry.Modalities.SYMBOL, input_vocab_size)} - p.target_modality = (registry.Modalities.SYMBOL, target_vocab_size) - p.vocabulary = { - "inputs": text_encoder.TextEncoder(), - "targets": text_encoder.TextEncoder() - } - return p - - -def audio_timit_characters(unused_model_hparams): - """English audio transcription benchmark.""" - p = default_problem_hparams() - p.input_modality = { - "inputs": (registry.Modalities.AUDIO, None), - } - p.target_modality = (registry.Modalities.SYMBOL, 256) - p.vocabulary = { - "inputs": text_encoder.TextEncoder(), - "targets": text_encoder.ByteTextEncoder(), - } - p.batch_size_multiplier = 256 - p.loss_multiplier = 2.0 - p.input_space_id = 12 - p.target_space_id = 2 - return p - - -def audio_timit_tokens(model_hparams, wrong_vocab_size): - """English audio transcription benchmark. - - Args: - model_hparams: a tf.contrib.training.HParams - wrong_vocab_size: a number used in the filename indicating the approximate - vocabulary size. This is not to be confused with the actual vocabulary - size. - Returns: - a tf.contrib.training.HParams - """ - p = default_problem_hparams() - # This vocab file must be present within the data directory. - vocab_filename = os.path.join(model_hparams.data_dir, - "vocab.endefr.%d" % wrong_vocab_size) - subtokenizer = text_encoder.SubwordTextEncoder(vocab_filename) - p.input_modality = { - "inputs": (registry.Modalities.AUDIO, None), - } - p.target_modality = (registry.Modalities.SYMBOL, subtokenizer.vocab_size) - p.vocabulary = { - "inputs": text_encoder.TextEncoder(), - "targets": subtokenizer, - } - p.batch_size_multiplier = 256 - p.loss_multiplier = 2.0 - p.input_space_id = 13 - p.target_space_id = 3 - return p - - -def wmt_parsing_characters(model_hparams): - """English to parse tree translation benchmark.""" - del model_hparams # Unused. - p = default_problem_hparams() - p.input_modality = {"inputs": (registry.Modalities.SYMBOL, 256)} - p.target_modality = (registry.Modalities.SYMBOL, 256) - p.vocabulary = { - "inputs": text_encoder.ByteTextEncoder(), - "targets": text_encoder.ByteTextEncoder(), - } - p.loss_multiplier = 2.0 - p.input_space_id = 2 - p.target_space_id = 14 - return p - - -def wmt_parsing_tokens(model_hparams, wrong_vocab_size): - """English to parse tree translation benchmark. - - Args: - model_hparams: a tf.contrib.training.HParams - wrong_vocab_size: a number used in the filename indicating the approximate - vocabulary size. This is not to be confused with the actual vocabulary - size. - Returns: - a tf.contrib.training.HParams - """ - p = default_problem_hparams() - # This vocab file must be present within the data directory. - vocab_filename = os.path.join(model_hparams.data_dir, - "vocab.endefr.%d" % wrong_vocab_size) - subtokenizer = text_encoder.SubwordTextEncoder(vocab_filename) - p.input_modality = { - "inputs": (registry.Modalities.SYMBOL, subtokenizer.vocab_size) - } - p.target_modality = (registry.Modalities.SYMBOL, subtokenizer.vocab_size) - p.vocabulary = { - "inputs": subtokenizer, - "targets": subtokenizer, - } - p.input_space_id = 3 - p.target_space_id = 15 - return p - - -def wsj_parsing_tokens(model_hparams, prefix, wrong_source_vocab_size, - wrong_target_vocab_size): - """English to parse tree translation benchmark. - - Args: - model_hparams: a tf.contrib.training.HParams - prefix: name to use as prefix for vocabulary files. - wrong_source_vocab_size: a number used in the filename indicating the - approximate vocabulary size. This is not to be confused with the actual - vocabulary size. - wrong_target_vocab_size: a number used in the filename indicating the - approximate target vocabulary size. This is not to be confused with the - actual target vocabulary size. - Returns: - a tf.contrib.training.HParams - """ - p = default_problem_hparams() - # This vocab file must be present within the data directory. - source_vocab_filename = os.path.join( - model_hparams.data_dir, - prefix + "_source.vocab.%d" % wrong_source_vocab_size) - target_vocab_filename = os.path.join( - model_hparams.data_dir, - prefix + "_target.vocab.%d" % wrong_target_vocab_size) - source_subtokenizer = text_encoder.SubwordTextEncoder(source_vocab_filename) - target_subtokenizer = text_encoder.SubwordTextEncoder(target_vocab_filename) - p.input_modality = { - "inputs": (registry.Modalities.SYMBOL, source_subtokenizer.vocab_size) - } - p.target_modality = (registry.Modalities.SYMBOL, - target_subtokenizer.vocab_size) - p.vocabulary = { - "inputs": source_subtokenizer, - "targets": target_subtokenizer, - } - p.input_space_id = 3 - p.target_space_id = 15 - return p - - -# Dictionary of named hyperparameter settings for various problems. -# This is only accessed through the problem_hparams function below. -PROBLEM_HPARAMS_MAP = { - "audio_timit_characters_tune": - audio_timit_characters, - "audio_timit_characters_test": - audio_timit_characters, - "audio_timit_tokens_8k_tune": - lambda p: audio_timit_tokens(p, 2**13), - "audio_timit_tokens_8k_test": - lambda p: audio_timit_tokens(p, 2**13), - "parsing_english_ptb8k": - lambda p: wmt_parsing_tokens(p, 2**13), - "parsing_english_ptb16k": - lambda p: wsj_parsing_tokens( # pylint: disable=g-long-lambda - p, "wsj", 2**14, 2**9), -} + p = TestProblem(input_vocab_size, target_vocab_size) + return p.get_hparams() diff --git a/tensor2tensor/data_generators/problem_hparams_test.py b/tensor2tensor/data_generators/problem_hparams_test.py deleted file mode 100644 index df92919ef..000000000 --- a/tensor2tensor/data_generators/problem_hparams_test.py +++ /dev/null @@ -1,50 +0,0 @@ -# coding=utf-8 -# Copyright 2017 The Tensor2Tensor Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Tests for tensor2tensor.problem_hparams.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -# Dependency imports - -from tensor2tensor.data_generators import problem_hparams - -import tensorflow as tf - - -class ProblemHparamsTest(tf.test.TestCase): - - def testParseProblemName(self): - problem_name = "base" - self.assertEqual( - problem_hparams.parse_problem_name(problem_name), ("base", False, - False)) - problem_name = "base_rev" - self.assertEqual( - problem_hparams.parse_problem_name(problem_name), ("base", True, False)) - problem_name = "base_copy" - self.assertEqual( - problem_hparams.parse_problem_name(problem_name), ("base", False, True)) - problem_name = "base_copy_rev" - self.assertEqual( - problem_hparams.parse_problem_name(problem_name), ("base", True, True)) - problem_name = "base_rev_copy" - self.assertEqual( - problem_hparams.parse_problem_name(problem_name), ("base", True, True)) - - -if __name__ == "__main__": - tf.test.main() diff --git a/tensor2tensor/models/bluenet_test.py b/tensor2tensor/models/bluenet_test.py index d559fd953..daf87529e 100644 --- a/tensor2tensor/models/bluenet_test.py +++ b/tensor2tensor/models/bluenet_test.py @@ -36,8 +36,7 @@ def testBlueNet(self): x = np.random.random_integers(1, high=vocab_size - 1, size=(3, 5, 1, 1)) y = np.random.random_integers(1, high=vocab_size - 1, size=(3, 1, 1, 1)) hparams = bluenet.bluenet_tiny() - p_hparams = problem_hparams.test_problem_hparams(hparams, vocab_size, - vocab_size) + p_hparams = problem_hparams.test_problem_hparams(vocab_size, vocab_size) with self.test_session() as session: tf.train.get_or_create_global_step() features = { diff --git a/tensor2tensor/models/bytenet_test.py b/tensor2tensor/models/bytenet_test.py index 56f421153..f96d3b999 100644 --- a/tensor2tensor/models/bytenet_test.py +++ b/tensor2tensor/models/bytenet_test.py @@ -36,8 +36,7 @@ def testByteNet(self): x = np.random.random_integers(1, high=vocab_size - 1, size=(3, 5, 1, 1)) y = np.random.random_integers(1, high=vocab_size - 1, size=(3, 6, 1, 1)) hparams = bytenet.bytenet_base() - p_hparams = problem_hparams.test_problem_hparams(hparams, vocab_size, - vocab_size) + p_hparams = problem_hparams.test_problem_hparams(vocab_size, vocab_size) with self.test_session() as session: features = { "inputs": tf.constant(x, dtype=tf.int32), diff --git a/tensor2tensor/models/lstm_test.py b/tensor2tensor/models/lstm_test.py index c1190d016..0d4bc6d80 100644 --- a/tensor2tensor/models/lstm_test.py +++ b/tensor2tensor/models/lstm_test.py @@ -37,8 +37,7 @@ def testLSTMSeq2Seq(self): x = np.random.random_integers(1, high=vocab_size - 1, size=(3, 5, 1, 1)) y = np.random.random_integers(1, high=vocab_size - 1, size=(3, 6, 1, 1)) hparams = common_hparams.basic_params1() - p_hparams = problem_hparams.test_problem_hparams(hparams, vocab_size, - vocab_size) + p_hparams = problem_hparams.test_problem_hparams(vocab_size, vocab_size) with self.test_session() as session: features = { "inputs": tf.constant(x, dtype=tf.int32), @@ -58,8 +57,7 @@ def testLSTMSeq2SeqAttention(self): y = np.random.random_integers(1, high=vocab_size - 1, size=(3, 6, 1, 1)) hparams = lstm.lstm_attention() - p_hparams = problem_hparams.test_problem_hparams(hparams, vocab_size, - vocab_size) + p_hparams = problem_hparams.test_problem_hparams(vocab_size, vocab_size) x = tf.constant(x, dtype=tf.int32) x._shape = tf.TensorShape([None, None, 1, 1]) diff --git a/tensor2tensor/models/neural_gpu_test.py b/tensor2tensor/models/neural_gpu_test.py index 164623699..75149ddd5 100644 --- a/tensor2tensor/models/neural_gpu_test.py +++ b/tensor2tensor/models/neural_gpu_test.py @@ -39,7 +39,7 @@ def testNeuralGPU(self): target_length = input_length input_vocab_size = 9 target_vocab_size = 11 - p_hparams = problem_hparams.test_problem_hparams(hparams, input_vocab_size, + p_hparams = problem_hparams.test_problem_hparams(input_vocab_size, target_vocab_size) inputs = -1 + np.random.random_integers( input_vocab_size, size=(batch_size, input_length, 1, 1)) diff --git a/tensor2tensor/models/transformer_revnet_test.py b/tensor2tensor/models/transformer_revnet_test.py index f9bc8cfb2..f61b88b5b 100644 --- a/tensor2tensor/models/transformer_revnet_test.py +++ b/tensor2tensor/models/transformer_revnet_test.py @@ -46,8 +46,7 @@ def testTransformer(self): target_length = 7 vocab_size = 9 hparams = transformer_revnet_test() - p_hparams = problem_hparams.test_problem_hparams(hparams, vocab_size, - vocab_size) + p_hparams = problem_hparams.test_problem_hparams(vocab_size, vocab_size) hparams.problems = [p_hparams] inputs = -1 + np.random.random_integers( vocab_size, size=(batch_size, input_length, 1, 1)) diff --git a/tensor2tensor/models/transformer_test.py b/tensor2tensor/models/transformer_test.py index 04c527ac1..22848b249 100644 --- a/tensor2tensor/models/transformer_test.py +++ b/tensor2tensor/models/transformer_test.py @@ -44,8 +44,7 @@ def getModel(self, mode=tf.estimator.ModeKeys.TRAIN): hparams.num_heads = 1 hparams.layer_prepostprocess_dropout = 0.0 - p_hparams = problem_hparams.test_problem_hparams( - hparams, VOCAB_SIZE, VOCAB_SIZE) + p_hparams = problem_hparams.test_problem_hparams(VOCAB_SIZE, VOCAB_SIZE) hparams.problems = [p_hparams] inputs = -1 + np.random.random_integers( diff --git a/tensor2tensor/models/xception_test.py b/tensor2tensor/models/xception_test.py index eb4c6db20..9114fb781 100644 --- a/tensor2tensor/models/xception_test.py +++ b/tensor2tensor/models/xception_test.py @@ -36,8 +36,7 @@ def testXception(self): x = np.random.random_integers(1, high=vocab_size - 1, size=(3, 5, 1, 1)) y = np.random.random_integers(1, high=vocab_size - 1, size=(3, 1, 1, 1)) hparams = xception.xception_tiny() - p_hparams = problem_hparams.test_problem_hparams(hparams, vocab_size, - vocab_size) + p_hparams = problem_hparams.test_problem_hparams(vocab_size, vocab_size) with self.test_session() as session: features = { "inputs": tf.constant(x, dtype=tf.int32), diff --git a/tensor2tensor/utils/beam_search.py b/tensor2tensor/utils/beam_search.py index be6c28559..c5e8eb85e 100644 --- a/tensor2tensor/utils/beam_search.py +++ b/tensor2tensor/utils/beam_search.py @@ -107,7 +107,6 @@ def beam_search(symbols_to_logits_fn, eos_id=EOS_ID): """Beam search with length penalties. - Uses an interface specific to the sequence cnn models; Requires a function that can take the currently decoded sybmols and return the logits for the next symbol. The implementation is inspired by https://arxiv.org/abs/1609.08144. diff --git a/tensor2tensor/utils/data_reader.py b/tensor2tensor/utils/data_reader.py index 4b0541d31..08e01ccfb 100644 --- a/tensor2tensor/utils/data_reader.py +++ b/tensor2tensor/utils/data_reader.py @@ -29,8 +29,6 @@ from six.moves import xrange # pylint: disable=redefined-builtin from six.moves import zip # pylint: disable=redefined-builtin -from tensor2tensor.data_generators import problem_hparams -from tensor2tensor.data_generators.problem import preprocess_examples_common from tensor2tensor.utils import registry import tensorflow as tf @@ -128,25 +126,6 @@ def decode_record(record): return dataset -def preprocessing(examples, data_file_pattern): - """Preprocessing of examples.""" - # This function is for obsolete problems only, as we're porting them - # all to the Problem class and its preprocess_examples method. Don't add. - if "audio" in data_file_pattern: - # Reshape audio to proper shape - sample_count = tf.to_int32(examples.pop("audio/sample_count")) - sample_width = tf.to_int32(examples.pop("audio/sample_width")) - channel_count = 1 - examples["inputs"] = tf.reshape(examples["inputs"], - [sample_count, sample_width, channel_count]) - if "wsj" in data_file_pattern: - examples["inputs"] = tf.bitcast(examples["inputs"], tf.int32) - elif "a2q_20161229" in data_file_pattern: - # we forgot the EOS when we preprocessed this data. - examples["targets"] = tf.concat([examples["targets"], [1]], 0) - return examples - - def cast_int64_to_int32(features): f = {} for k, v in six.iteritems(features): @@ -182,54 +161,12 @@ def feature_placeholders(data_fields, data_items_to_decoders): return decoded_example -def default_example_reading_spec(data_file_pattern): - """Example reading spec for problem_hparams problems.""" - # This function is for problems that have yet to be ported to the new Problem - # API. Do not add here. - data_items_to_decoders = None - # Read from image TFRecords if the file has "image" in its name. - if data_file_pattern and "image" in data_file_pattern: - label_key = "image/class/label" - data_fields = { - "image/encoded": tf.FixedLenFeature((), tf.string), - "image/format": tf.FixedLenFeature((), tf.string), - label_key: tf.VarLenFeature(tf.int64) - } - data_items_to_decoders = { - "inputs": - tf.contrib.slim.tfexample_decoder.Image( - image_key="image/encoded", - format_key="image/format", - channels=1 if "mnist" in data_file_pattern else 3), - "targets": - tf.contrib.slim.tfexample_decoder.Tensor(label_key), - } - elif data_file_pattern and "audio" in data_file_pattern: - data_type = tf.int64 if "timit" in data_file_pattern else tf.float32 - data_fields = { - "inputs": tf.VarLenFeature(data_type), - "audio/sample_count": tf.FixedLenFeature((), tf.int64), - "audio/sample_width": tf.FixedLenFeature((), tf.int64), - "targets": tf.VarLenFeature(tf.int64), - } - else: - data_fields = { - "inputs": tf.VarLenFeature(tf.int64), - "targets": tf.VarLenFeature(tf.int64) - } - return data_fields, data_items_to_decoders - - def read_examples(problem, data_file_pattern, capacity, mode=tf.estimator.ModeKeys.TRAIN): """Create Dataset of Example for problem and data_file_pattern.""" - if problem is None: - data_fields, data_items_to_decoders = default_example_reading_spec( - data_file_pattern) - else: - data_fields, data_items_to_decoders = problem.example_reading_spec() + data_fields, data_items_to_decoders = problem.example_reading_spec() if data_file_pattern is None: # Create placeholders for input, rather than reading data from disk. @@ -272,7 +209,7 @@ def input_pipeline(problem, data_file_pattern, capacity, mode, hparams, # reading, parsing, and preprocessing. Use Problem.dataset instead. dataset = read_examples(problem, data_file_pattern, capacity, mode=mode) dataset = dataset.map( - lambda ex: _preprocess(ex, problem, data_file_pattern, hparams, mode), + lambda ex: _preprocess(ex, problem, hparams, mode), num_threads=num_threads) dataset = dataset.filter( lambda ex: example_valid_size(ex, batching_scheme["max_length"])) @@ -302,14 +239,9 @@ def input_pipeline(problem, data_file_pattern, capacity, mode, hparams, return batched_examples -def _preprocess(example, problem, data_file_pattern, hparams, mode): +def _preprocess(example, problem, hparams, mode): """Preprocessing for example.""" - if problem is None: - example = preprocess_examples_common(example, hparams, mode) - example = preprocessing(example, data_file_pattern) - else: - example = problem.preprocess_examples(example, mode, hparams) - + example = problem.preprocess_examples(example, mode, hparams) # We do not want int64s as they are not supported on GPUs. example = cast_int64_to_int32(example) @@ -527,10 +459,7 @@ def get_data_filepatterns(problems, data_dir, mode): """Return the location of a dataset for a given mode.""" datasets = [] for problem in problems.split("-"): - try: - problem = registry.problem(problem).dataset_filename() - except ValueError: - problem, _, _ = problem_hparams.parse_problem_name(problem) + problem = registry.problem(problem).dataset_filename() path = os.path.join(data_dir, problem) if mode == tf.estimator.ModeKeys.TRAIN: datasets.append("%s-train*" % path) diff --git a/tensor2tensor/utils/data_reader_test.py b/tensor2tensor/utils/data_reader_test.py index f03ce6da2..ff01cf07f 100644 --- a/tensor2tensor/utils/data_reader_test.py +++ b/tensor2tensor/utils/data_reader_test.py @@ -143,7 +143,7 @@ def testTrainEvalBehavior(self): def testPreprocess(self): dataset = data_reader.read_examples(self.problem, self.filepatterns[0], 32) examples = dataset.make_one_shot_iterator().get_next() - examples = data_reader._preprocess(examples, self.problem, None, None, None) + examples = data_reader._preprocess(examples, self.problem, None, None) with tf.train.MonitoredSession() as sess: ex_val = sess.run(examples) # problem.preprocess_examples has been run diff --git a/tensor2tensor/utils/trainer_utils.py b/tensor2tensor/utils/trainer_utils.py index 69d981f7c..09c86ca09 100644 --- a/tensor2tensor/utils/trainer_utils.py +++ b/tensor2tensor/utils/trainer_utils.py @@ -26,7 +26,6 @@ from tensor2tensor import models # pylint: disable=unused-import from tensor2tensor.data_generators import all_problems # pylint: disable=unused-import -from tensor2tensor.data_generators import problem_hparams from tensor2tensor.utils import data_reader from tensor2tensor.utils import decoding from tensor2tensor.utils import devices @@ -236,24 +235,12 @@ def add_problem_hparams(hparams, problems): try: problem = registry.problem(problem_name) except LookupError: - problem = None - - if problem is None: - try: - p_hparams = problem_hparams.problem_hparams(problem_name, hparams) - except LookupError: - # The problem is not in the set of registered Problems nor in the old - # set of problem_hparams. - all_problem_names = sorted( - list(problem_hparams.PROBLEM_HPARAMS_MAP) + - registry.list_problems()) - error_lines = [ - "%s not in the set of supported problems:" % problem_name - ] + all_problem_names - error_msg = "\n * ".join(error_lines) - raise LookupError(error_msg) - else: - p_hparams = problem.get_hparams(hparams) + all_problem_names = sorted(registry.list_problems()) + error_lines = ["%s not in the set of supported problems:" % problem_name + ] + all_problem_names + error_msg = "\n * ".join(error_lines) + raise LookupError(error_msg) + p_hparams = problem.get_hparams(hparams) hparams.problem_instances.append(problem) hparams.problems.append(p_hparams) From f191c7864623dc8d130b916411f9d9f866997cc4 Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Thu, 21 Sep 2017 09:37:39 -0700 Subject: [PATCH 0419/4095] Correct README for decoding PiperOrigin-RevId: 169554635 --- README.md | 3 +- docs/index.md | 9 +- docs/walkthrough.md | 182 +++++++++++++++++++++++++-- tensor2tensor/data_generators/wmt.py | 34 ++--- tensor2tensor/utils/decoding.py | 3 +- 5 files changed, 197 insertions(+), 34 deletions(-) diff --git a/README.md b/README.md index e37db796d..6ef815f4e 100644 --- a/README.md +++ b/README.md @@ -124,8 +124,7 @@ t2t-decoder \ --model=$MODEL \ --hparams_set=$HPARAMS \ --output_dir=$TRAIN_DIR \ - --decode_beam_size=$BEAM_SIZE \ - --decode_alpha=$ALPHA \ + --decode_hparams="beam_size=$BEAM_SIZE,alpha=$ALPHA" \ --decode_from_file=$DECODE_FILE cat $DECODE_FILE.$MODEL.$HPARAMS.beam$BEAM_SIZE.alpha$ALPHA.decodes diff --git a/docs/index.md b/docs/index.md index 9394809b3..3eb7f1c61 100644 --- a/docs/index.md +++ b/docs/index.md @@ -24,11 +24,6 @@ documentation, from basic tutorials to full code documentation. ## Deep Dive -* [Life of an Example](example_life.md): how all parts of T2T are connected and work together +* [Life of an Example](example_life.md): how all parts of T2T are connected and + work together * [Distributed Training](distributed_training.md) - -## Code documentation - -See our -[README](https://github.com/tensorflow/tensor2tensor/blob/master/README.md) -for now, code docs coming. diff --git a/docs/walkthrough.md b/docs/walkthrough.md index 57d7a03f4..6ef815f4e 100644 --- a/docs/walkthrough.md +++ b/docs/walkthrough.md @@ -1,4 +1,4 @@ -# T2T Install and Run Walkthrough +# T2T: Tensor2Tensor Transformers [![PyPI version](https://badge.fury.io/py/tensor2tensor.svg)](https://badge.fury.io/py/tensor2tensor) @@ -8,6 +8,26 @@ Issues](https://img.shields.io/github/issues/tensorflow/tensor2tensor.svg)](http welcome](https://img.shields.io/badge/contributions-welcome-brightgreen.svg)](CONTRIBUTING.md) [![Gitter](https://img.shields.io/gitter/room/nwjs/nw.js.svg)](https://gitter.im/tensor2tensor/Lobby) [![License](https://img.shields.io/badge/License-Apache%202.0-brightgreen.svg)](https://opensource.org/licenses/Apache-2.0) +[![Travis](https://img.shields.io/travis/tensorflow/tensor2tensor.svg)]() + +[T2T](https://github.com/tensorflow/tensor2tensor) is a modular and extensible +library and binaries for supervised learning with TensorFlow and with support +for sequence tasks. It is actively used and maintained by researchers and +engineers within the Google Brain team. You can read more about Tensor2Tensor in +the recent [Google Research Blog post introducing +it](https://research.googleblog.com/2017/06/accelerating-deep-learning-research.html). + +We're eager to collaborate with you on extending T2T, so please feel +free to [open an issue on +GitHub](https://github.com/tensorflow/tensor2tensor/issues) or +send along a pull request to add your dataset or model. +See [our contribution +doc](CONTRIBUTING.md) for details and our [open +issues](https://github.com/tensorflow/tensor2tensor/issues). +You can chat with us and other users on +[Gitter](https://gitter.im/tensor2tensor/Lobby) and please join our +[Google Group](https://groups.google.com/forum/#!forum/tensor2tensor) to keep up +with T2T announcements. Here is a one-command version that installs tensor2tensor, downloads the data, trains an English-German translation model, and evaluates it: @@ -29,10 +49,28 @@ t2t-decoder \ --problems=translate_ende_wmt32k \ --model=transformer \ --hparams_set=transformer_base_single_gpu \ - --output_dir=~/t2t_train/base + --output_dir=~/t2t_train/base \ --decode_interactive ``` +See the [Walkthrough](#walkthrough) below for more details on each step. + +### Contents + +* [Walkthrough](#walkthrough) +* [Installation](#installation) +* [Features](#features) +* [T2T Overview](#t2t-overview) + * [Datasets](#datasets) + * [Problems and Modalities](#problems-and-modalities) + * [Models](#models) + * [Hyperparameter Sets](#hyperparameter-sets) + * [Trainer](#trainer) +* [Adding your own components](#adding-your-own-components) +* [Adding a dataset](#adding-a-dataset) + +--- + ## Walkthrough Here's a walkthrough training a good English-to-German translation @@ -80,16 +118,13 @@ echo "Goodbye world" >> $DECODE_FILE BEAM_SIZE=4 ALPHA=0.6 -t2t-trainer \ +t2t-decoder \ --data_dir=$DATA_DIR \ --problems=$PROBLEM \ --model=$MODEL \ --hparams_set=$HPARAMS \ --output_dir=$TRAIN_DIR \ - --train_steps=0 \ - --eval_steps=0 \ - --decode_beam_size=$BEAM_SIZE \ - --decode_alpha=$ALPHA \ + --decode_hparams="beam_size=$BEAM_SIZE,alpha=$ALPHA" \ --decode_from_file=$DECODE_FILE cat $DECODE_FILE.$MODEL.$HPARAMS.beam$BEAM_SIZE.alpha$ALPHA.decodes @@ -127,3 +162,136 @@ python -c "from tensor2tensor.models.transformer import Transformer" ``` --- + +## Features + +* Many state of the art and baseline models are built-in and new models can be + added easily (open an issue or pull request!). +* Many datasets across modalities - text, audio, image - available for + generation and use, and new ones can be added easily (open an issue or pull + request for public datasets!). +* Models can be used with any dataset and input mode (or even multiple); all + modality-specific processing (e.g. embedding lookups for text tokens) is done + with `Modality` objects, which are specified per-feature in the dataset/task + specification. +* Support for multi-GPU machines and synchronous (1 master, many workers) and + asynchronous (independent workers synchronizing through a parameter server) + [distributed training](https://github.com/tensorflow/tensor2tensor/tree/master/docs/distributed_training.md). +* Easily swap amongst datasets and models by command-line flag with the data + generation script `t2t-datagen` and the training script `t2t-trainer`. + +--- + +## T2T overview + +### Datasets + +**Datasets** are all standardized on `TFRecord` files with `tensorflow.Example` +protocol buffers. All datasets are registered and generated with the +[data +generator](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/bin/t2t-datagen) +and many common sequence datasets are already available for generation and use. + +### Problems and Modalities + +**Problems** define training-time hyperparameters for the dataset and task, +mainly by setting input and output **modalities** (e.g. symbol, image, audio, +label) and vocabularies, if applicable. All problems are defined either in +[`problem_hparams.py`](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/data_generators/problem_hparams.py) +or are registered with `@registry.register_problem` (run `t2t-datagen` to see +the list of all available problems). +**Modalities**, defined in +[`modality.py`](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/utils/modality.py), +abstract away the input and output data types so that **models** may deal with +modality-independent tensors. + +### Models + +**`T2TModel`s** define the core tensor-to-tensor transformation, independent of +input/output modality or task. Models take dense tensors in and produce dense +tensors that may then be transformed in a final step by a **modality** depending +on the task (e.g. fed through a final linear transform to produce logits for a +softmax over classes). All models are imported in the +[`models` subpackage](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/models/__init__.py), +inherit from `T2TModel` - defined in +[`t2t_model.py`](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/utils/t2t_model.py) - +and are registered with +[`@registry.register_model`](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/utils/registry.py). + +### Hyperparameter Sets + +**Hyperparameter sets** are defined and registered in code with +[`@registry.register_hparams`](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/utils/registry.py) +and are encoded in +[`tf.contrib.training.HParams`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/training/python/training/hparam.py) +objects. The `HParams` are available to both the problem specification and the +model. A basic set of hyperparameters are defined in +[`common_hparams.py`](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/layers/common_hparams.py) +and hyperparameter set functions can compose other hyperparameter set functions. + +### Trainer + +The **trainer** binary is the main entrypoint for training, evaluation, and +inference. Users can easily switch between problems, models, and hyperparameter +sets by using the `--model`, `--problems`, and `--hparams_set` flags. Specific +hyperparameters can be overridden with the `--hparams` flag. `--schedule` and +related flags control local and distributed training/evaluation +([distributed training documentation](https://github.com/tensorflow/tensor2tensor/tree/master/docs/distributed_training.md)). + +--- + +## Adding your own components + +T2T's components are registered using a central registration mechanism that +enables easily adding new ones and easily swapping amongst them by command-line +flag. You can add your own components without editing the T2T codebase by +specifying the `--t2t_usr_dir` flag in `t2t-trainer`. + +You can do so for models, hyperparameter sets, modalities, and problems. Please +do submit a pull request if your component might be useful to others. + +Here's an example with a new hyperparameter set: + +```python +# In ~/usr/t2t_usr/my_registrations.py + +from tensor2tensor.models import transformer +from tensor2tensor.utils import registry + +@registry.register_hparams +def transformer_my_very_own_hparams_set(): + hparams = transformer.transformer_base() + hparams.hidden_size = 1024 + ... +``` + +```python +# In ~/usr/t2t_usr/__init__.py +from . import my_registrations +``` + +``` +t2t-trainer --t2t_usr_dir=~/usr/t2t_usr --registry_help +``` + +You'll see under the registered HParams your +`transformer_my_very_own_hparams_set`, which you can directly use on the command +line with the `--hparams_set` flag. + +`t2t-datagen` also supports the `--t2t_usr_dir` flag for `Problem` +registrations. + +## Adding a dataset + +To add a new dataset, subclass +[`Problem`](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/data_generators/problem.py) +and register it with `@registry.register_problem`. See +[`TranslateEndeWmt8k`](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/data_generators/wmt.py) +for an example. + +Also see the [data generators +README](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/data_generators/README.md). + +--- + +*Note: This is not an official Google product.* diff --git a/tensor2tensor/data_generators/wmt.py b/tensor2tensor/data_generators/wmt.py index befb9ac7f..cde0bc9ac 100644 --- a/tensor2tensor/data_generators/wmt.py +++ b/tensor2tensor/data_generators/wmt.py @@ -34,7 +34,6 @@ FLAGS = tf.flags.FLAGS - # End-of-sentence marker. EOS = text_encoder.EOS_ID @@ -186,7 +185,6 @@ def bi_vocabs_token_generator(source_path, # Data-set URLs. - _ENDE_TRAIN_DATASETS = [ [ "http://data.statmt.org/wmt17/translation-task/training-parallel-nc-v12.tgz", # pylint: disable=line-too-long @@ -287,7 +285,6 @@ def bi_vocabs_token_generator(source_path, ], ] - # Generators. @@ -333,8 +330,8 @@ def generator(self, data_dir, tmp_dir, train): with tf.gfile.GFile(token_path, mode="a") as f: f.write("UNK\n") # Add UNK to the vocab. token_vocab = text_encoder.TokenTextEncoder(token_path, replace_oov="UNK") - return token_generator(train_path + ".en", train_path + ".de", - token_vocab, EOS) + return token_generator(train_path + ".en", train_path + ".de", token_vocab, + EOS) @property def input_space_id(self): @@ -360,7 +357,7 @@ def _preprocess_sgm(line, is_sgm): line = line.strip() if line.startswith("<seg") and line.endswith("</seg>"): i = line.index(">") - return line[i+1:-6] # Strip first <seg ...> and last </seg>. + return line[i + 1:-6] # Strip first <seg ...> and last </seg>. def _compile_data(tmp_dir, datasets, filename): @@ -479,18 +476,24 @@ def targeted_vocab_size(self): def num_shards(self): return 10 # This is a small dataset. + @property + def source_vocab_name(self): + return "vocab.zhen-zh.%d" % self.targeted_vocab_size + + @property + def target_vocab_name(self): + return "vocab.zhen-en.%d" % self.targeted_vocab_size + def generator(self, data_dir, tmp_dir, train): - source_vocab_size = self.targeted_vocab_size - target_vocab_size = self.targeted_vocab_size datasets = _ZHEN_TRAIN_DATASETS if train else _ZHEN_TEST_DATASETS source_datasets = [[item[0], [item[1][0]]] for item in _ZHEN_TRAIN_DATASETS] target_datasets = [[item[0], [item[1][1]]] for item in _ZHEN_TRAIN_DATASETS] source_vocab = generator_utils.get_or_generate_vocab( - data_dir, tmp_dir, "vocab.zhen-zh.%d" % source_vocab_size, - source_vocab_size, source_datasets) + data_dir, tmp_dir, self.source_vocab_name, self.targeted_vocab_size, + source_datasets) target_vocab = generator_utils.get_or_generate_vocab( - data_dir, tmp_dir, "vocab.zhen-en.%d" % target_vocab_size, - target_vocab_size, target_datasets) + data_dir, tmp_dir, self.target_vocab_name, self.targeted_vocab_size, + target_datasets) tag = "train" if train else "dev" data_path = _compile_data(tmp_dir, datasets, "wmt_zhen_tok_%s" % tag) # We generate English->X data by convention, to train reverse translation @@ -508,11 +511,8 @@ def target_space_id(self): return problem.SpaceID.EN_TOK def feature_encoders(self, data_dir): - vocab_size = self.targeted_vocab_size - source_vocab_filename = os.path.join(data_dir, - "vocab.zhen-zh.%d" % vocab_size) - target_vocab_filename = os.path.join(data_dir, - "vocab.zhen-en.%d" % vocab_size) + source_vocab_filename = os.path.join(data_dir, self.source_vocab_name) + target_vocab_filename = os.path.join(data_dir, self.target_vocab_name) source_token = text_encoder.SubwordTextEncoder(source_vocab_filename) target_token = text_encoder.SubwordTextEncoder(target_vocab_filename) return { diff --git a/tensor2tensor/utils/decoding.py b/tensor2tensor/utils/decoding.py index 664935c94..a08947202 100644 --- a/tensor2tensor/utils/decoding.py +++ b/tensor2tensor/utils/decoding.py @@ -138,6 +138,7 @@ def decode_from_dataset(estimator, inputs_vocab = problem_hparams.vocabulary.get("inputs", None) targets_vocab = problem_hparams.vocabulary["targets"] for num_predictions, prediction in enumerate(predictions): + num_predictions += 1 inputs = prediction["inputs"] targets = prediction["targets"] outputs = prediction["outputs"] @@ -181,7 +182,7 @@ def decode_from_dataset(estimator, target_file.write(str(decoded_target) + "\n") if (decode_hp.num_samples >= 0 and - (num_predictions + 1) >= decode_hp.num_samples): + num_predictions >= decode_hp.num_samples): break if decode_to_file: From c996878113b6d283253aef7de4f266484e4b50f6 Mon Sep 17 00:00:00 2001 From: T2T Team <no-reply@google.com> Date: Thu, 21 Sep 2017 10:53:17 -0700 Subject: [PATCH 0420/4095] Reproduces a bug with the SubwordTextEncoder in a test. PiperOrigin-RevId: 169566059 --- .../data_generators/text_encoder_test.py | 29 +++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/tensor2tensor/data_generators/text_encoder_test.py b/tensor2tensor/data_generators/text_encoder_test.py index b55a51bf4..0351d0d2f 100644 --- a/tensor2tensor/data_generators/text_encoder_test.py +++ b/tensor2tensor/data_generators/text_encoder_test.py @@ -107,6 +107,13 @@ def test_reserved_tokens_in_corpus(self): class SubwordTextEncoderTest(tf.test.TestCase): + @classmethod + def setUpClass(cls): + """Make sure the test dir exists and is empty.""" + cls.test_temp_dir = os.path.join(tf.test.get_temp_dir(), "encoder_test") + shutil.rmtree(cls.test_temp_dir, ignore_errors=True) + os.mkdir(cls.test_temp_dir) + def test_encode_decode(self): corpus = ( "This is a corpus of text that provides a bunch of tokens from which " @@ -216,6 +223,28 @@ def test_load_from_file(self): encoder._load_from_file_object(vocab) self.assertEqual(encoder._all_subtoken_strings, correct_vocab) + def test_reserved_token_chars_not_in_alphabet(self): + corpus = "dog" + token_counts = collections.Counter(corpus.split(" ")) + encoder1 = text_encoder.SubwordTextEncoder.build_to_target_size( + 100, token_counts, 2, 100) + filename = os.path.join(self.test_temp_dir, "out.voc") + encoder1.store_to_file(filename) + encoder2 = text_encoder.SubwordTextEncoder(filename=filename) + + for t in text_encoder.RESERVED_TOKENS: + for c in t: + # Verify that encoder1 can encode all reserved token chars. + encoder1.encode(c) + + # TODO(seabass): Implement the fix so that we can remove this assertion. + with self.assertRaises(AssertionError): + for t in text_encoder.RESERVED_TOKENS: + for c in t: + # Verify that encoder2 fails to encode the characters (i.e. + # reproduce the bug). + encoder2.encode(c) + if __name__ == "__main__": tf.test.main() From 8ee83501f149d38b11ef800a00e8f16bb7c661d5 Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Thu, 21 Sep 2017 15:14:14 -0700 Subject: [PATCH 0421/4095] v1.2.3 PiperOrigin-RevId: 169607663 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index a84f772b6..331abb78e 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ setup( name='tensor2tensor', - version='1.2.2', + version='1.2.3', description='Tensor2Tensor', author='Google Inc.', author_email='no-reply@google.com', From e892dc3cc5a5ef2e6fde5b6569281ac4abc7fa24 Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Thu, 21 Sep 2017 17:20:44 -0700 Subject: [PATCH 0422/4095] Update example_life.md PiperOrigin-RevId: 169625024 --- docs/example_life.md | 195 ++++++++++++++++-- .../data_generators/cnn_dailymail.py | 2 +- tensor2tensor/data_generators/desc2code.py | 3 +- .../data_generators/gene_expression.py | 10 +- .../data_generators/generator_utils.py | 18 +- tensor2tensor/data_generators/image.py | 64 +++--- tensor2tensor/data_generators/imdb.py | 2 +- tensor2tensor/data_generators/problem.py | 26 +-- .../data_generators/problem_hparams.py | 16 +- tensor2tensor/data_generators/wiki.py | 4 +- tensor2tensor/layers/common_hparams.py | 7 +- tensor2tensor/utils/data_reader.py | 10 +- tensor2tensor/utils/data_reader_test.py | 8 +- 13 files changed, 263 insertions(+), 102 deletions(-) diff --git a/docs/example_life.md b/docs/example_life.md index 2983f5077..f3b18a817 100644 --- a/docs/example_life.md +++ b/docs/example_life.md @@ -9,26 +9,189 @@ welcome](https://img.shields.io/badge/contributions-welcome-brightgreen.svg)](CO [![Gitter](https://img.shields.io/gitter/room/nwjs/nw.js.svg)](https://gitter.im/tensor2tensor/Lobby) [![License](https://img.shields.io/badge/License-Apache%202.0-brightgreen.svg)](https://opensource.org/licenses/Apache-2.0) -This document show how a training example passes through the T2T pipeline, -and how all its parts are connected to work together. +This doc explains how a training example flows through T2T, from data generation +to training, evaluation, and decoding. It points out the various hooks available +in the `Problem` and `T2TModel` classes and gives an overview of the T2T code +(key functions, files, hyperparameters, etc.). -## The Life of an Example +Some key files and their functions: -A training example passes the following stages in T2T: -* raw input (text from command line or file) -* encoded input after [Problem.feature_encoder](https://github.com/tensorflow/tensor2tensor/blob/master/tensor2tensor/data_generators/problem.py#L173) function `encode` is usually a sparse tensor, e.g., a vector of `tf.int32`s -* batched input after [data input pipeline](https://github.com/tensorflow/tensor2tensor/blob/master/tensor2tensor/utils/data_reader.py#L242) where the inputs, after [Problem.preprocess_examples](https://github.com/tensorflow/tensor2tensor/blob/master/tensor2tensor/data_generators/problem.py#L188) are grouped by their length and made into batches. -* dense input after being processed by a [Modality](https://github.com/tensorflow/tensor2tensor/blob/master/tensor2tensor/utils/modality.py#L30) function `bottom`. -* dense output after [T2T.model_fn_body](https://github.com/tensorflow/tensor2tensor/blob/master/tensor2tensor/utils/t2t_model.py#L542) -* back to sparse output through [Modality](https://github.com/tensorflow/tensor2tensor/blob/master/tensor2tensor/utils/modality.py#L30) function `top`. -* if decoding, back through [Problem.feature_encoder](https://github.com/tensorflow/tensor2tensor/blob/master/tensor2tensor/data_generators/problem.py#L173) function `decode` to display on the screen. +* [`trainer_utils.py`](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/utils/trainer_utils.py): + Constructs and runs all the main components of the system (the `Problem`, + the `HParams`, the `Estimator`, the `Experiment`, the `input_fn`s and + `model_fn`). +* [`common_hparams.py`](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/layers/common_hparams.py): + `basic_params1` serves as the base for all model hyperparameters. Registered + model hparams functions always start with this default set of + hyperparameters. +* [`problem.py`](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/data_generators/problem.py): + Every dataset in T2T subclasses `Problem`. +* [`t2t_model.py`](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/utils/t2t_model.py): + Every model in T2T subclasses `T2TModel`. -We go into these phases step by step below. +## Data Generation -## Feature Encoders +The `t2t-datagen` binary is the entrypoint for data generation. It simply looks +up the `Problem` specified by `--problem` and calls +`Problem.generate_data(data_dir, tmp_dir)`. -TODO: describe [Problem.feature_encoder](https://github.com/tensorflow/tensor2tensor/blob/master/tensor2tensor/data_generators/problem.py#L173) which is a dict of encoders that have `encode` and `decode` functions. +All `Problem`s are expected to generate 2 sharded `TFRecords` files - 1 for +training and 1 for evaluation - with `tensorflow.Example` protocol buffers. The +expected names of the files are given by `Problem.{training, dev}_filepaths`. +Typically, the features in the `Example` will be `"inputs"` and `"targets"`; +however, some tasks have a different on-disk representation that is converted to +`"inputs"` and `"targets"` online in the input pipeline (e.g. image features are +typically stored with features `"image/encoded"` and `"image/format"` and the +decoding happens in the input pipeline). -## Modalities +For tasks that require a vocabulary, this is also the point at which the +vocabulary is generated and all examples are encoded. -TODO: describe [Modality](https://github.com/tensorflow/tensor2tensor/blob/master/tensor2tensor/utils/modality.py#L30) which has `bottom` and `top` but also sharded versions and one for targets. +There are several utility functions in +[`generator_utils`](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/data_generators/generator_utils.py) +that are commonly used by `Problem`s to generate data. Several are highlighted +below: + +* `generate_dataset_and_shuffle`: given 2 generators, 1 for training and 1 for + eval, yielding dictionaries of `<feature name, list< int or float or + string >>`, will produce sharded and shuffled `TFRecords` files with + `tensorflow.Example` protos. +* `maybe_download`: downloads a file at a URL to the given directory and + filename (see `maybe_download_from_drive` if the URL points to Google + Drive). +* `get_or_generate_vocab_inner`: given a target vocabulary size and a + generator that yields lines or tokens from the dataset, will build a + `SubwordTextEncoder` along with a backing vocabulary file that can be used + to map input strings to lists of ids. + [`SubwordTextEncoder`](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/data_generators/text_encoder.py) + uses word pieces and its encoding is fully invertible. + +## Data Input Pipeline + +Once the data is produced on disk, training, evaluation, and inference (if +decoding from the dataset) consume it by way of T2T input pipeline. This section +will give an overview of that pipeline with specific attention to the various +hooks in the `Problem` class and the model's `HParams` object (typically +registered in the model's file and specified by the `--hparams_set` flag). + +The entire input pipeline is implemented with the new `tf.data.Dataset` API +(previously `tf.contrib.data.Dataset`). + +The key function in the codebase for the input pipeline is +[`data_reader.input_pipeline`](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/utils/data_reader.py). +The full input function is built in +[`input_fn_builder.build_input_fn`](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/utils/input_fn_builder.py) +(which calls `data_reader.input_pipeline`). + +### Reading and decoding data + +`Problem.dataset_filename` specifies the prefix of the files on disk (they will +be suffixed with `-train` or `-dev` as well as their sharding). + +The features read from the files and their decoding is specified by +`Problem.example_reading_spec`, which returns 2 items: + +1. Dict mapping from on-disk feature name to on-disk types (`VarLenFeature` or + `FixedLenFeature`. +2. Dict mapping output feature name to decoder. This return value is optional + and is only needed for tasks whose features may require additional decoding + (e.g. images). You can find the available decoders in + `tf.contrib.slim.tfexample_decoder`. + +At this point in the input pipeline, the example is a `dict<feature name, +Tensor>`. + +### Preprocessing + +The read `Example` now runs through `Problem.preprocess_example`, which by +default runs +[`problem.preprocess_example_common`](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/data_generators/problem.py), +which may truncate the inputs/targets or prepend to targets, governed by some +hyperparameters. + +### Batching + +Examples are bucketed by sequence length and then batched out of those buckets. +This significantly improves performance over a naive batching scheme for +variable length sequences because each example in a batch must be padded to +match the example with the maximum length in the batch. + +There are several hyperparameters that affect how examples are batched together: + +* `hp.batch_size`: this is the approximate total number of tokens in the batch + (i.e. for a sequence problem, long sequences will have smaller actual batch + size and short sequences will have a larger actual batch size in order to + generally have an equal number of tokens in the batch). +* `hp.max_length`: sequences with length longer than this will be dropped + during training (and also during eval if `hp.eval_drop_long_sequences` is + `True`). If not set, the maximum length of examples is set to + `hp.batch_size`. +* `hp.batch_size_multiplier`: multiplier for the maximum length +* `hp.min_length_bucket`: example length for the smallest bucket (i.e. the + smallest bucket will bucket examples up to this length). +* `hp.length_bucket_step`: controls how spaced out the length buckets are. + +## Building the Model + +At this point, the input features typically have `"inputs"` and `"targets"`, +each of which is a batched 4-D Tensor (e.g. of shape `[batch_size, +sequence_length, 1, 1]` for text input or `[batch_size, height, width, 3]` for +image input). + +A `T2TModel` is composed of transforms of the input features by `Modality`s, +then the body of the model, then transforms of the model output to predictions +by a `Modality`, and then a loss (during training). + +The `Modality` types for the various input features and for the target are +specified in `Problem.hparams`. A `Modality` is a feature adapter that enables +models to be agnostic to input/output spaces. You can see the various +`Modality`s in +[`modalities.py`](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/layers/modalities.py). + +The sketch structure of a T2T model is as follows: + +```python +features = {...} # output from the input pipeline +input_modaly = ... # specified in Problem.hparams +target_modality = ... # specified in Problem.hparams + +transformed_features = {} +transformed_features["inputs"] = input_modality.bottom( + features["inputs"]) +transformed_features["targets"] = target_modality.targets_bottom( + features["targets"]) # for autoregressive models + +body_outputs = model.model_fn_body(transformed_features) + +predictions = target_modality.top(body_outputs, features["targets"]) +loss = target_modality.loss(predictions, features["targets"]) +``` + +Most `T2TModel`s only override `model_fn_body`. + +## Training, Eval, Inference modes + +Both the input function and model functions take a mode in the form of a +`tf.estimator.ModeKeys`, which allows the functions to behave differently in +different modes. + +In training, the model function constructs an optimizer and minimizes the loss. + +In evaluation, the model function constructs the evaluation metrics specified by +`Problem.eval_metrics`. + +In inference, the model function outputs predictions. + +## `Estimator` and `Experiment` + +With the input function and model functions constructed, the actual training +loop and related services (checkpointing, summaries, continuous evaluation, +etc.) are all handled by `Estimator` and `Experiment` objects, constructed in +[`trainer_utils.py`](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/utils/trainer_utils.py). + +## Decoding + +* [`decoding.py`](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/utils/decoding.py) + +TODO(rsepassi): Explain decoding (interactive, from file, and from dataset) and +`Problem.feature_encoders`. diff --git a/tensor2tensor/data_generators/cnn_dailymail.py b/tensor2tensor/data_generators/cnn_dailymail.py index 93e846a0b..2f8e9cf30 100644 --- a/tensor2tensor/data_generators/cnn_dailymail.py +++ b/tensor2tensor/data_generators/cnn_dailymail.py @@ -129,7 +129,7 @@ def use_train_shards_for_dev(self): def generator(self, data_dir, tmp_dir, _): encoder = generator_utils.get_or_generate_vocab_inner( data_dir, self.vocab_file, self.targeted_vocab_size, - lambda: story_generator(tmp_dir)) + story_generator(tmp_dir)) for story in story_generator(tmp_dir): summary, rest = _story_summary_split(story) encoded_summary = encoder.encode(summary) + [EOS] diff --git a/tensor2tensor/data_generators/desc2code.py b/tensor2tensor/data_generators/desc2code.py index 1e26b000c..174bd8107 100644 --- a/tensor2tensor/data_generators/desc2code.py +++ b/tensor2tensor/data_generators/desc2code.py @@ -195,8 +195,7 @@ def generator_target(): data_dir=data_dir, vocab_filename=self.vocab_target_filename, vocab_size=self.target_vocab_size, - generator_fn=generator_target, - ) + generator=generator_target(),) # Yield the training and testing samples eos_list = [EOS] diff --git a/tensor2tensor/data_generators/gene_expression.py b/tensor2tensor/data_generators/gene_expression.py index 43d5a6702..477e04017 100644 --- a/tensor2tensor/data_generators/gene_expression.py +++ b/tensor2tensor/data_generators/gene_expression.py @@ -159,17 +159,17 @@ def example_reading_spec(self): data_items_to_decoders = None return (data_fields, data_items_to_decoders) - def preprocess_examples(self, examples, mode, unused_hparams): + def preprocess_example(self, example, mode, unused_hparams): del mode # Reshape targets to contain num_output_predictions per output timestep - examples["targets"] = tf.reshape(examples["targets"], - [-1, 1, self.num_output_predictions]) + example["targets"] = tf.reshape(example["targets"], + [-1, 1, self.num_output_predictions]) # Slice off EOS - not needed, and messes up the GeneExpressionConv model # which expects the input length to be a multiple of the target length. - examples["inputs"] = examples["inputs"][:-1] + example["inputs"] = example["inputs"][:-1] - return examples + return example def eval_metrics(self): return [metrics.Metrics.LOG_POISSON, metrics.Metrics.R2] diff --git a/tensor2tensor/data_generators/generator_utils.py b/tensor2tensor/data_generators/generator_utils.py index 3e1086d37..f22e84794 100644 --- a/tensor2tensor/data_generators/generator_utils.py +++ b/tensor2tensor/data_generators/generator_utils.py @@ -300,7 +300,7 @@ def gunzip_file(gz_path, new_path): def get_or_generate_vocab_inner(data_dir, vocab_filename, vocab_size, - generator_fn): + generator): """Inner implementation for vocab generators. Args: @@ -308,7 +308,7 @@ def get_or_generate_vocab_inner(data_dir, vocab_filename, vocab_size, then do not save the vocab even if it doesn't exist. vocab_filename: relative filename where vocab file is stored vocab_size: target size of the vocabulary constructed by SubwordTextEncoder - generator_fn: a generator that produces tokens from the vocabulary + generator: a generator that produces tokens from the vocabulary Returns: A SubwordTextEncoder vocabulary object. @@ -325,7 +325,7 @@ def get_or_generate_vocab_inner(data_dir, vocab_filename, vocab_size, tf.logging.info("Generating vocab file: %s", vocab_filepath) token_counts = defaultdict(int) - for item in generator_fn(): + for item in generator: for tok in tokenizer.encode(text_encoder.native_to_unicode(item)): token_counts[tok] += 1 @@ -382,8 +382,8 @@ def generate(): file_byte_budget -= len(line) yield line - return get_or_generate_vocab_inner( - data_dir, vocab_filename, vocab_size, generator_fn=generate) + return get_or_generate_vocab_inner(data_dir, vocab_filename, vocab_size, + generate()) def get_or_generate_tabbed_vocab(data_dir, tmp_dir, source_filename, @@ -416,8 +416,8 @@ def generate(): part = parts[index].strip() yield part - return get_or_generate_vocab_inner( - data_dir, vocab_filename, vocab_size, generator_fn=generate) + return get_or_generate_vocab_inner(data_dir, vocab_filename, vocab_size, + generate()) def get_or_generate_txt_vocab(data_dir, vocab_filename, vocab_size, @@ -434,8 +434,8 @@ def generate(): for line in source_file: yield line.strip() - return get_or_generate_vocab_inner( - data_dir, vocab_filename, vocab_size, generator_fn=generate) + return get_or_generate_vocab_inner(data_dir, vocab_filename, vocab_size, + generate()) def read_records(filename): diff --git a/tensor2tensor/data_generators/image.py b/tensor2tensor/data_generators/image.py index 64b9d8639..084ef330a 100644 --- a/tensor2tensor/data_generators/image.py +++ b/tensor2tensor/data_generators/image.py @@ -91,19 +91,19 @@ class ImageCeleba(ImageProblem): "Wearing_Hat Wearing_Lipstick Wearing_Necklace Wearing_Necktie Young" ).split() - def preprocess_examples(self, examples, unused_mode, unused_hparams): + def preprocess_example(self, example, unused_mode, unused_hparams): def resize(img, size): return tf.to_int64( tf.image.resize_images(img, [size, size], tf.image.ResizeMethod.AREA)) - inputs = examples["inputs"] + inputs = example["inputs"] # Remove boundaries in CelebA images. Remove 40 pixels each side # vertically and 20 pixels each side horizontally. inputs = tf.image.crop_to_bounding_box(inputs, 40, 20, 218 - 80, 178 - 40) - examples["inputs"] = resize(inputs, 8) - examples["targets"] = resize(inputs, 32) - return examples + example["inputs"] = resize(inputs, 8) + example["targets"] = resize(inputs, 32) + return example def hparams(self, defaults, unused_model_hparams): p = defaults @@ -301,7 +301,7 @@ def generate_data(self, data_dir, tmp_dir, task_id=-1): self.dev_filepaths(data_dir, self.dev_shards, shuffled=False)) -def imagenet_preprocess_examples(examples, mode): +def imagenet_preprocess_example(example, mode): """Preprocessing used for Imagenet and similar problems.""" def preprocess(img): @@ -312,15 +312,15 @@ def preprocess(img): def resize(img): return tf.to_int64(tf.image.resize_images(img, [299, 299])) - inputs = tf.cast(examples["inputs"], tf.int64) + inputs = tf.cast(example["inputs"], tf.int64) if mode == tf.estimator.ModeKeys.TRAIN: - examples["inputs"] = tf.cond( # Preprocess 90% of the time. + example["inputs"] = tf.cond( # Preprocess 90% of the time. tf.less(tf.random_uniform([]), 0.9), lambda img=inputs: preprocess(img), lambda img=inputs: resize(img)) else: - examples["inputs"] = resize(inputs) - return examples + example["inputs"] = resize(inputs) + return example @registry.register_problem @@ -341,8 +341,8 @@ def generate_data(self, data_dir, tmp_dir, task_id=-1): "instructions at https://github.com/tensorflow/models/blob/master" "/inception/README.md#getting-started") - def preprocess_examples(self, examples, mode, _): - return imagenet_preprocess_examples(examples, mode) + def preprocess_example(self, example, mode, _): + return imagenet_preprocess_example(example, mode) @registry.register_problem @@ -366,17 +366,17 @@ def generate_data(self, data_dir, tmp_dir, task_id=-1): "instructions at https://github.com/tensorflow/models/blob/master" "/inception/README.md#getting-started") - def preprocess_examples(self, examples, mode, unused_hparams): + def preprocess_example(self, example, mode, unused_hparams): # Just resize with area. if self._was_reversed: - examples["inputs"] = tf.to_int64( - tf.image.resize_images(examples["inputs"], [32, 32], + example["inputs"] = tf.to_int64( + tf.image.resize_images(example["inputs"], [32, 32], tf.image.ResizeMethod.AREA)) else: - examples = imagenet_preprocess_examples(examples, mode) - examples["inputs"] = tf.to_int64( - tf.image.resize_images(examples["inputs"], [32, 32])) - return examples + example = imagenet_preprocess_example(example, mode) + example["inputs"] = tf.to_int64( + tf.image.resize_images(example["inputs"], [32, 32])) + return example @registry.register_problem @@ -386,17 +386,17 @@ class Img2imgImagenet(ImageProblem): def dataset_filename(self): return "image_imagenet" # Reuse Imagenet data. - def preprocess_examples(self, examples, unused_mode, unused_hparams): + def preprocess_example(self, example, unused_mode, unused_hparams): def resize(img, size): return tf.to_int64( tf.image.resize_images(img, [size, size], tf.image.ResizeMethod.AREA)) - inputs = examples["inputs"] + inputs = example["inputs"] # For Img2Img resize input and output images as desired. - examples["inputs"] = resize(inputs, 8) - examples["targets"] = resize(inputs, 32) - return examples + example["inputs"] = resize(inputs, 8) + example["targets"] = resize(inputs, 32) + return example def hparams(self, defaults, unused_model_hparams): p = defaults @@ -623,11 +623,11 @@ def class_labels(self): "ship", "truck" ] - def preprocess_examples(self, examples, mode, unused_hparams): + def preprocess_example(self, example, mode, unused_hparams): if mode == tf.estimator.ModeKeys.TRAIN: - examples["inputs"] = common_layers.cifar_image_augmentation( - examples["inputs"]) - return examples + example["inputs"] = common_layers.cifar_image_augmentation( + example["inputs"]) + return example def generator(self, data_dir, tmp_dir, is_training): if is_training: @@ -649,8 +649,8 @@ def generator(self, data_dir, tmp_dir, is_training): @registry.register_problem class ImageCifar10Plain(ImageCifar10): - def preprocess_examples(self, examples, mode, unused_hparams): - return examples + def preprocess_example(self, example, mode, unused_hparams): + return example # URLs and filenames for MSCOCO data. @@ -827,8 +827,8 @@ def train_shards(self): def dev_shards(self): return 10 - def preprocess_examples(self, examples, mode, _): - return imagenet_preprocess_examples(examples, mode) + def preprocess_example(self, example, mode, _): + return imagenet_preprocess_example(example, mode) def generator(self, data_dir, tmp_dir, is_training): if is_training: diff --git a/tensor2tensor/data_generators/imdb.py b/tensor2tensor/data_generators/imdb.py index d7eadcd1d..95d728b1e 100644 --- a/tensor2tensor/data_generators/imdb.py +++ b/tensor2tensor/data_generators/imdb.py @@ -79,7 +79,7 @@ def generator(self, data_dir, tmp_dir, train): # Generate vocab encoder = generator_utils.get_or_generate_vocab_inner( data_dir, self.vocab_file, self.targeted_vocab_size, - lambda: self.doc_generator(imdb_dir, "train")) + self.doc_generator(imdb_dir, "train")) # Generate examples dataset = "train" if train else "test" diff --git a/tensor2tensor/data_generators/problem.py b/tensor2tensor/data_generators/problem.py index 4ada1d212..37eee64ab 100644 --- a/tensor2tensor/data_generators/problem.py +++ b/tensor2tensor/data_generators/problem.py @@ -102,19 +102,19 @@ def default_model_hparams(): data_dir=None) -def preprocess_examples_common(examples, hparams, mode): +def preprocess_example_common(example, hparams, mode): """Preprocessing steps common to all models.""" if hparams.max_input_seq_length > 0: - examples["inputs"] = examples["inputs"][:hparams.max_input_seq_length] + example["inputs"] = example["inputs"][:hparams.max_input_seq_length] if hparams.max_target_seq_length > 0: - examples["targets"] = examples["targets"][:hparams.max_target_seq_length] + example["targets"] = example["targets"][:hparams.max_target_seq_length] if hparams.prepend_mode != "none": if mode == tf.estimator.ModeKeys.PREDICT: - examples["partial_targets"] = tf.concat([examples["inputs"], [0]], 0) + example["partial_targets"] = tf.concat([example["inputs"], [0]], 0) else: - examples["targets"] = tf.concat( - [examples["inputs"], [0], examples["targets"]], 0) - return examples + example["targets"] = tf.concat( + [example["inputs"], [0], example["targets"]], 0) + return example class Problem(object): @@ -154,7 +154,7 @@ class Problem(object): * example_reading_spec - Specify the names and types of the features on disk. - Specify tf.contrib.slim.tfexample_decoder - * preprocess_examples(examples, mode) + * preprocess_example(example, mode) - Preprocess the example feature dict from feature name to Tensor or SparseTensor. - Used in training, eval, and inference (specified by mode). @@ -198,8 +198,8 @@ def example_reading_spec(self): data_items_to_decoders = None return (data_fields, data_items_to_decoders) - def preprocess_examples(self, examples, mode, hparams): - return preprocess_examples_common(examples, hparams, mode) + def preprocess_example(self, example, mode, hparams): + return preprocess_example_common(example, hparams, mode) def eval_metrics(self): return [ @@ -310,10 +310,10 @@ def dataset(self, shuffle_files: whether to shuffle input files. Default behavior (i.e. when shuffle_files=None) is to shuffle if mode == TRAIN. hparams: tf.contrib.training.HParams; hparams to be passed to - Problem.preprocess_examples and Problem.hparams. If None, will use a + Problem.preprocess_example and Problem.hparams. If None, will use a default set that is a no-op. preprocess: bool, whether to map the Dataset through - Problem.preprocess_examples. + Problem.preprocess_example. Returns: Dataset containing dict<feature name, Tensor>. @@ -366,7 +366,7 @@ def decode_record(record): return dict(zip(decode_items, decoded)) def _preprocess(example): - example = self.preprocess_examples(example, mode, hparams) + example = self.preprocess_example(example, mode, hparams) self.maybe_reverse_features(example) self.maybe_copy_features(example) return example diff --git a/tensor2tensor/data_generators/problem_hparams.py b/tensor2tensor/data_generators/problem_hparams.py index 88212b0db..576a27a79 100644 --- a/tensor2tensor/data_generators/problem_hparams.py +++ b/tensor2tensor/data_generators/problem_hparams.py @@ -47,16 +47,16 @@ def example_reading_spec(self): } return data_fields, None - def preprocess_examples(self, examples, mode, hparams): - examples = super(AudioTimitProblem, self).preprocess_examples( - examples, mode, hparams) + def preprocess_example(self, example, mode, hparams): + example = super(AudioTimitProblem, self).preprocess_example( + example, mode, hparams) # Reshape audio to proper shape - sample_count = tf.to_int32(examples.pop("audio/sample_count")) - sample_width = tf.to_int32(examples.pop("audio/sample_width")) + sample_count = tf.to_int32(example.pop("audio/sample_count")) + sample_width = tf.to_int32(example.pop("audio/sample_width")) channel_count = 1 - examples["inputs"] = tf.reshape(examples["inputs"], - [sample_count, sample_width, channel_count]) - return examples + example["inputs"] = tf.reshape(example["inputs"], + [sample_count, sample_width, channel_count]) + return example @registry.register_problem diff --git a/tensor2tensor/data_generators/wiki.py b/tensor2tensor/data_generators/wiki.py index 30a16817b..a1380c27f 100644 --- a/tensor2tensor/data_generators/wiki.py +++ b/tensor2tensor/data_generators/wiki.py @@ -127,7 +127,7 @@ def use_train_shards_for_dev(self): def generator(self, data_dir, tmp_dir, _): encoder = generator_utils.get_or_generate_vocab_inner( data_dir, self.vocab_file, self.targeted_vocab_size, - lambda: page_generator(tmp_dir, max_docs=10000)) + page_generator(tmp_dir, max_docs=10000)) for page in page_generator(tmp_dir): title = _page_title(page) encoded = encoder.encode(page) + [EOS] @@ -210,7 +210,7 @@ def scramble(self, seq): def generator(self, data_dir, tmp_dir, _): encoder = generator_utils.get_or_generate_vocab_inner( data_dir, self.vocab_file, self.targeted_vocab_size, - lambda: page_generator(tmp_dir, max_docs=1000)) + page_generator(tmp_dir, max_docs=1000)) case_num = 0 for page in page_generator(tmp_dir): encoded = encoder.encode(page) diff --git a/tensor2tensor/layers/common_hparams.py b/tensor2tensor/layers/common_hparams.py index 2e33c9e94..deae14ddc 100644 --- a/tensor2tensor/layers/common_hparams.py +++ b/tensor2tensor/layers/common_hparams.py @@ -126,13 +126,13 @@ def basic_params1(): # The maximum length of "input" sequence. # Sequences longer than this value will be truncated. 0 or negative values # mean there is no maximum or truncation. - # You can change this behavior by overridding preprocess_examples() method + # You can change this behavior by overridding preprocess_example() method # in your problem class. max_input_seq_length=0, # The maximum length of "target" sequence. # Sequences longer than this value will be truncated. 0 or negative values # mean there is no maximum or truncation. - # You can change this behavior by overridding preprocess_examples() method + # You can change this behavior by overridding preprocess_example() method # in your problem class. max_target_seq_length=0, # This flag allows us to optionally treat a seq-to-seq problem @@ -152,8 +152,7 @@ def basic_params1(): # position in the inputs portion can see the # entire inputs portion. This removes the challenge of # autoregressively predicting the inputs portion. - prepend_mode="none", - ) + prepend_mode="none",) class RangedHParams(object): diff --git a/tensor2tensor/utils/data_reader.py b/tensor2tensor/utils/data_reader.py index 08e01ccfb..e88d208ac 100644 --- a/tensor2tensor/utils/data_reader.py +++ b/tensor2tensor/utils/data_reader.py @@ -241,7 +241,7 @@ def input_pipeline(problem, data_file_pattern, capacity, mode, hparams, def _preprocess(example, problem, hparams, mode): """Preprocessing for example.""" - example = problem.preprocess_examples(example, mode, hparams) + example = problem.preprocess_example(example, mode, hparams) # We do not want int64s as they are not supported on GPUs. example = cast_int64_to_int32(example) @@ -316,8 +316,8 @@ def batching_fn(bucket_id, grouped_dataset): if hasattr(dataset, "apply"): # If the Dataset supports dynamic window size, use it. dataset = dataset.apply( - tf.contrib.data.group_by_window( - example_to_bucket_id, batching_fn, None, window_size_fn)) + tf.contrib.data.group_by_window(example_to_bucket_id, batching_fn, + None, window_size_fn)) else: dataset = dataset.group_by_window(example_to_bucket_id, batching_fn, window_size) @@ -476,8 +476,8 @@ def serving_input_fn(problem, hparams): example = feature_placeholders(data_fields, data_items_to_decoders) # Preprocess - example = problem.preprocess_examples(example, tf.estimator.ModeKeys.PREDICT, - hparams) + example = problem.preprocess_example(example, tf.estimator.ModeKeys.PREDICT, + hparams) example = cast_int64_to_int32(example) # 4-D inputs and space ids diff --git a/tensor2tensor/utils/data_reader_test.py b/tensor2tensor/utils/data_reader_test.py index ff01cf07f..4f4d7530d 100644 --- a/tensor2tensor/utils/data_reader_test.py +++ b/tensor2tensor/utils/data_reader_test.py @@ -62,9 +62,9 @@ def example_reading_spec(self): data_items_to_decoders = None return (data_fields, data_items_to_decoders) - def preprocess_examples(self, examples, unused_mode, unused_hparams): - examples["new_field"] = tf.constant([42.42]) - return examples + def preprocess_example(self, example, unused_mode, unused_hparams): + example["new_field"] = tf.constant([42.42]) + return example def generate_test_data(problem, tmp_dir): @@ -146,7 +146,7 @@ def testPreprocess(self): examples = data_reader._preprocess(examples, self.problem, None, None) with tf.train.MonitoredSession() as sess: ex_val = sess.run(examples) - # problem.preprocess_examples has been run + # problem.preprocess_example has been run self.assertAllClose([42.42], ex_val["new_field"]) # int64 has been cast to int32 From 6237729d291d0fd7e2d4a4dfbfc6edcac6b756c4 Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Thu, 21 Sep 2017 17:59:22 -0700 Subject: [PATCH 0423/4095] Fix travis shield link PiperOrigin-RevId: 169629386 --- README.md | 2 +- docs/walkthrough.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 6ef815f4e..0e97770ba 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,7 @@ Issues](https://img.shields.io/github/issues/tensorflow/tensor2tensor.svg)](http welcome](https://img.shields.io/badge/contributions-welcome-brightgreen.svg)](CONTRIBUTING.md) [![Gitter](https://img.shields.io/gitter/room/nwjs/nw.js.svg)](https://gitter.im/tensor2tensor/Lobby) [![License](https://img.shields.io/badge/License-Apache%202.0-brightgreen.svg)](https://opensource.org/licenses/Apache-2.0) -[![Travis](https://img.shields.io/travis/tensorflow/tensor2tensor.svg)]() +[![Travis](https://img.shields.io/travis/tensorflow/tensor2tensor.svg)](https://travis-ci.org/tensorflow/tensor2tensor) [T2T](https://github.com/tensorflow/tensor2tensor) is a modular and extensible library and binaries for supervised learning with TensorFlow and with support diff --git a/docs/walkthrough.md b/docs/walkthrough.md index 6ef815f4e..0e97770ba 100644 --- a/docs/walkthrough.md +++ b/docs/walkthrough.md @@ -8,7 +8,7 @@ Issues](https://img.shields.io/github/issues/tensorflow/tensor2tensor.svg)](http welcome](https://img.shields.io/badge/contributions-welcome-brightgreen.svg)](CONTRIBUTING.md) [![Gitter](https://img.shields.io/gitter/room/nwjs/nw.js.svg)](https://gitter.im/tensor2tensor/Lobby) [![License](https://img.shields.io/badge/License-Apache%202.0-brightgreen.svg)](https://opensource.org/licenses/Apache-2.0) -[![Travis](https://img.shields.io/travis/tensorflow/tensor2tensor.svg)]() +[![Travis](https://img.shields.io/travis/tensorflow/tensor2tensor.svg)](https://travis-ci.org/tensorflow/tensor2tensor) [T2T](https://github.com/tensorflow/tensor2tensor) is a modular and extensible library and binaries for supervised learning with TensorFlow and with support From 76706efe22b8fc384ed462d9b648ed148cb7f527 Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Thu, 21 Sep 2017 18:31:35 -0700 Subject: [PATCH 0424/4095] Make output of fn in @recompute_grad a list to avoid trying to concat tuple and list PiperOrigin-RevId: 169632380 --- tensor2tensor/layers/rev_block.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensor2tensor/layers/rev_block.py b/tensor2tensor/layers/rev_block.py index 3dff92c5c..8d1206ee8 100644 --- a/tensor2tensor/layers/rev_block.py +++ b/tensor2tensor/layers/rev_block.py @@ -348,7 +348,7 @@ def _recompute_grad(fn, args): def grad_fn(inputs, variables, outputs, output_grads): del outputs # recompute outputs - outputs = fn(*inputs) + outputs = list(fn(*inputs)) grads = tf.gradients(outputs, inputs + variables, output_grads) grad_inputs = grads[:len(inputs)] grad_vars = grads[len(inputs):] From eb5d4cb6efd238a0f30ef8b885a5873093307405 Mon Sep 17 00:00:00 2001 From: Stefan Schweter <stefan@schweter.it> Date: Tue, 26 Sep 2017 22:50:12 +0200 Subject: [PATCH 0425/4095] model_builder: fix log message for diet variables --- tensor2tensor/utils/model_builder.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensor2tensor/utils/model_builder.py b/tensor2tensor/utils/model_builder.py index 4a4717bd4..6e0b32b13 100644 --- a/tensor2tensor/utils/model_builder.py +++ b/tensor2tensor/utils/model_builder.py @@ -288,7 +288,7 @@ def nth_model(n): diet_vars = [ v for v in tf.global_variables() if v.dtype == dtypes.float16_ref ] - _log_variable_sizes(diet_vars, "Diet Varaibles") + _log_variable_sizes(diet_vars, "Diet Variables") # Optimize total_loss = tf.identity(total_loss, name="total_loss") From 0c904e33d4edfe85e6e8b710a1ff215881fcc2ea Mon Sep 17 00:00:00 2001 From: T2T Team <no-reply@google.com> Date: Thu, 21 Sep 2017 22:43:19 -0700 Subject: [PATCH 0426/4095] Adding minimal changes that permit deeper introspection of the beam search PiperOrigin-RevId: 169648596 --- tensor2tensor/data_generators/text_encoder.py | 37 +++++++++++++++- tensor2tensor/utils/beam_search.py | 43 ++++++++++++++++--- 2 files changed, 71 insertions(+), 9 deletions(-) diff --git a/tensor2tensor/data_generators/text_encoder.py b/tensor2tensor/data_generators/text_encoder.py index 97ab88402..557a62d13 100644 --- a/tensor2tensor/data_generators/text_encoder.py +++ b/tensor2tensor/data_generators/text_encoder.py @@ -110,13 +110,28 @@ def decode(self, ids): Returns: s: human-readable string. """ + return " ".join(self.decode_list(ids)) + + def decode_list(self, ids): + """Transform a sequence of int ids into a their string versions. + + This method supports transforming individual input/output ids to their + string versions so that sequence to/from text conversions can be visualized + in a human readable format. + + Args: + ids: list of integers to be converted. + + Returns: + strs: list of human-readable string. + """ decoded_ids = [] for id_ in ids: if 0 <= id_ < self._num_reserved_ids: decoded_ids.append(RESERVED_TOKENS[int(id_)]) else: decoded_ids.append(id_ - self._num_reserved_ids) - return " ".join([str(d) for d in decoded_ids]) + return [str(d) for d in decoded_ids] @property def vocab_size(self): @@ -149,6 +164,18 @@ def decode(self, ids): # Python3: join byte arrays and then decode string return b"".join(decoded_ids).decode("utf-8", "replace") + def decode_list(self, ids): + numres = self._num_reserved_ids + decoded_ids = [] + int2byte = six.int2byte + for id_ in ids: + if 0 <= id_ < numres: + decoded_ids.append(RESERVED_TOKENS_BYTES[int(id_)]) + else: + decoded_ids.append(int2byte(id_ - numres)) + # Python3: join byte arrays and then decode string + return decoded_ids + @property def vocab_size(self): return 2**8 + self._num_reserved_ids @@ -229,8 +256,11 @@ def encode(self, sentence): return ret[::-1] if self._reverse else ret def decode(self, ids): + return " ".join(self.decode_list(ids)) + + def decode_list(self, ids): seq = reversed(ids) if self._reverse else ids - return " ".join([self._safe_id_to_token(i) for i in seq]) + return [self._safe_id_to_token(i) for i in seq] @property def vocab_size(self): @@ -415,6 +445,9 @@ def decode(self, subtokens): return unicode_to_native( tokenizer.decode(self._subtoken_ids_to_tokens(subtokens))) + def decode_list(self, subtokens): + return [self._subtoken_id_to_subtoken_string(s) for s in subtokens] + @property def vocab_size(self): """The subtoken vocabulary size.""" diff --git a/tensor2tensor/utils/beam_search.py b/tensor2tensor/utils/beam_search.py index c5e8eb85e..9c26579af 100644 --- a/tensor2tensor/utils/beam_search.py +++ b/tensor2tensor/utils/beam_search.py @@ -51,13 +51,19 @@ def compute_batch_indices(batch_size, beam_size): def compute_topk_scores_and_seq(sequences, scores, scores_to_gather, flags, - beam_size, batch_size): + beam_size, batch_size, prefix="default"): """Given sequences and scores, will gather the top k=beam size sequences. This function is used to grow alive, and finished. It takes sequences, scores, and flags, and returns the top k from sequences, scores_to_gather, and flags based on the values in scores. + This method permits easy introspection using tfdbg. It adds three named ops + that are prefixed by `prefix`: + - _topk_seq: the tensor for topk_seq returned by this method. + - _topk_flags: the tensor for topk_finished_flags returned by this method. + - _topk_scores: the tensor for tokp_gathered_scores returned by this method. + Args: sequences: Tensor of sequences that we need to gather from. [batch_size, beam_size, seq_length] @@ -72,6 +78,7 @@ def compute_topk_scores_and_seq(sequences, scores, scores_to_gather, flags, EOS or not beam_size: int batch_size: int + prefix: string that will prefix unique names for the ops run. Returns: Tuple of (topk_seq [batch_size, beam_size, decode_length], @@ -91,10 +98,15 @@ def compute_topk_scores_and_seq(sequences, scores, scores_to_gather, flags, # last dimension contains the i,j gathering coordinates. top_coordinates = tf.stack([batch_pos, topk_indexes], axis=2) - # Gather up the highest scoring sequences - topk_seq = tf.gather_nd(sequences, top_coordinates) - topk_flags = tf.gather_nd(flags, top_coordinates) - topk_gathered_scores = tf.gather_nd(scores_to_gather, top_coordinates) + # Gather up the highest scoring sequences. For each operation added, give it + # a concrete name to simplify observing these operations with tfdbg. Clients + # can capture these tensors by watching these node names. + topk_seq = tf.gather_nd( + sequences, top_coordinates, name=(prefix + "_topk_seq")) + topk_flags = tf.gather_nd( + flags, top_coordinates, name=(prefix + "_topk_flags")) + topk_gathered_scores = tf.gather_nd( + scores_to_gather, top_coordinates, name=(prefix + "_topk_scores")) return topk_seq, topk_gathered_scores, topk_flags @@ -111,6 +123,22 @@ def beam_search(symbols_to_logits_fn, the logits for the next symbol. The implementation is inspired by https://arxiv.org/abs/1609.08144. + When running, the beam search steps can be visualized by using tfdbg to watch + the operations generating the output ids for each beam step. These operations + have the pattern: + (alive|finished)_topk_(seq,scores) + + Operations marked `alive` represent the new beam sequences that will be + processed in the next step. Operations marked `finished` represent the + completed beam sequences, which may be padded with 0s if no beams finished. + + Operations marked `seq` store the full beam sequence for the time step. + Operations marked `scores` store the sequence's final log scores. + + The beam search steps will be processed sequentially in order, so when + capturing observed from these operations, tensors, clients can make + assumptions about which step is being recorded. + Args: symbols_to_logits_fn: Interface to the model, to provide logits. Shoud take [batch_size, decoded_ids] and return [batch_size, vocab_size] @@ -184,7 +212,7 @@ def grow_finished(finished_seq, finished_scores, finished_flags, curr_seq, curr_finished_flags = tf.concat([finished_flags, curr_finished], axis=1) return compute_topk_scores_and_seq( curr_finished_seq, curr_finished_scores, curr_finished_scores, - curr_finished_flags, beam_size, batch_size) + curr_finished_flags, beam_size, batch_size, "grow_finished") def grow_alive(curr_seq, curr_scores, curr_log_probs, curr_finished): """Given sequences and scores, will gather the top k=beam size sequences. @@ -207,7 +235,8 @@ def grow_alive(curr_seq, curr_scores, curr_log_probs, curr_finished): # values curr_scores += tf.to_float(curr_finished) * -INF return compute_topk_scores_and_seq(curr_seq, curr_scores, curr_log_probs, - curr_finished, beam_size, batch_size) + curr_finished, beam_size, batch_size, + "grow_alive") def grow_topk(i, alive_seq, alive_log_probs): r"""Inner beam seach loop. From 0587533001777de2bddf32bd57d63ca5418e1a5e Mon Sep 17 00:00:00 2001 From: T2T Team <no-reply@google.com> Date: Fri, 22 Sep 2017 12:22:03 -0700 Subject: [PATCH 0427/4095] Add option to use relative position embeddings as part of self-attention. PiperOrigin-RevId: 169721943 --- tensor2tensor/layers/common_attention.py | 124 +++++++++++++++++- tensor2tensor/layers/common_attention_test.py | 14 ++ tensor2tensor/models/transformer.py | 45 ++++++- tensor2tensor/models/transformer_test.py | 19 ++- 4 files changed, 192 insertions(+), 10 deletions(-) diff --git a/tensor2tensor/layers/common_attention.py b/tensor2tensor/layers/common_attention.py index 582f8e9b3..2b193b37a 100644 --- a/tensor2tensor/layers/common_attention.py +++ b/tensor2tensor/layers/common_attention.py @@ -537,6 +537,121 @@ def dot_product_attention(q, return tf.matmul(weights, v) +def _generate_relative_positions_matrix(length, max_relative_position): + """Generates matrix of relative positions between inputs.""" + range_vec = tf.range(length) + range_mat = tf.reshape(tf.tile(range_vec, [length]), [length, length]) + distance_mat = range_mat - tf.transpose(range_mat) + distance_mat_clipped = tf.clip_by_value(distance_mat, -max_relative_position, + max_relative_position) + # Shift values to be >= 0. Each integer still uniquely identifies a relative + # position difference. + final_mat = distance_mat_clipped + max_relative_position + return final_mat + + +def _generate_relative_positions_embeddings(heads, length, depth, + max_relative_position, name): + """Generates tensor of size [heads, length, length, depth].""" + with tf.variable_scope(name): + relative_positions_matrix = _generate_relative_positions_matrix( + length, max_relative_position) + vocab_size = max_relative_position * 2 + 1 + # Generates embedding for each relative position of dimension heads * depth. + embeddings_table = tf.get_variable("embeddings", + [vocab_size, heads * depth]) + embeddings = tf.gather(embeddings_table, relative_positions_matrix) + # Split embeddings per head. + embeddings = tf.reshape(embeddings, [length, length, heads, depth]) + # Transpose to shape [heads, length, length, depth]. + embeddings = tf.transpose(embeddings, [2, 0, 1, 3]) + return embeddings + + +def _relative_attention_inner(x, y, z, transpose): + """Relative position-aware dot-product attention inner calculation. + + This batches matrix multiply calculations to avoid unnecessary broadcasting. + + Args: + x: Tensor with shape [batch_size, heads, length, length or depth]. + y: Tensor with shape [batch_size, heads, length, depth]. + z: Tensor with shape [heads, length, length, depth]. + transpose: Whether to tranpose inner matrices of y and z. Should be true if + last dimension of x is depth, not length. + + Returns: + A Tensor with shape [batch_size, heads, length, a]. + """ + xy_matmul = tf.matmul(x, y, transpose_b=transpose) + x_t = tf.transpose(x, [1, 2, 0, 3]) + x_tz_matmul = tf.matmul(x_t, z, transpose_b=transpose) + x_tz_matmul_t = tf.transpose(x_tz_matmul, [2, 0, 1, 3]) + return xy_matmul + x_tz_matmul_t + + +def dot_product_attention_relative(q, + k, + v, + bias, + max_relative_position, + dropout_rate=0.0, + image_shapes=None, + name=None): + """Calculate relative position-aware dot-product self-attention. + + The attention calculation is augmented with learned representations for the + relative position between each element in q and each element in k and v. + + Args: + q: a Tensor with shape [batch, heads, length, depth]. + k: a Tensor with shape [batch, heads, length, depth]. + v: a Tensor with shape [batch, heads, length, depth]. + bias: bias Tensor. + max_relative_position: an integer specifying the maxmimum distance between + inputs that unique position embeddings should be learned for. + dropout_rate: a floating point number. + image_shapes: optional tuple of integer scalars. + name: an optional string. + + Returns: + A Tensor. + + Raises: + ValueError: if max_relative_position is not > 0. + """ + if not max_relative_position: + raise ValueError("Max relative position (%s) should be > 0 when using " + "relative self attention." % (max_relative_position)) + with tf.variable_scope( + name, default_name="dot_product_attention_relative", values=[q, k, v]): + + # This calculation only works for self attention. + # q, k and v must therefore have the same shape. + q.get_shape().assert_is_compatible_with(k.get_shape()) + q.get_shape().assert_is_compatible_with(v.get_shape()) + + # Use separate embeddings suitable for keys and values. + heads = q.get_shape().as_list()[1] + depth = q.get_shape().as_list()[3] + length = tf.shape(q)[2] + relations_keys = _generate_relative_positions_embeddings( + heads, length, depth, max_relative_position, "relative_positions_keys") + relations_values = _generate_relative_positions_embeddings( + heads, length, depth, max_relative_position, + "relative_positions_values") + + # Compute self attention considering the relative position embeddings. + logits = _relative_attention_inner(q, k, relations_keys, True) + if bias is not None: + logits += bias + weights = tf.nn.softmax(logits, name="attention_weights") + weights = tf.nn.dropout(weights, 1.0 - dropout_rate) + if not tf.get_variable_scope().reuse: + attention_image_summary(weights, image_shapes) + return _relative_attention_inner(weights, v, relations_values, False) + + def masked_local_attention_1d( q, k, v, block_length=128, name=None): """Attention to the source position and a neigborhood to the left of it. @@ -769,7 +884,7 @@ def local_attention_2d(q, make_image_summary=False) # putting the representations back in the right place output = scatter_blocks_2d(output, q_indices, padded_q_shape) - # Remove the padding if introduced + # Remove the padding if introduced output = tf.slice(output, [0, 0, 0, 0, 0], [-1, -1, v_shape[2], v_shape[3], -1]) output.set_shape(q_shape) @@ -1056,6 +1171,7 @@ def multihead_attention(query_antecedent, output_depth, num_heads, dropout_rate, + max_relative_position=None, image_shapes=None, attention_type="dot_product", block_length=128, @@ -1077,6 +1193,9 @@ def multihead_attention(query_antecedent, output_depth: an integer num_heads: an integer dividing total_key_depth and total_value_depth dropout_rate: a floating point number + max_relative_position: Maximum distance between inputs to generate + unique relation embeddings for. Only relevant + when using dot_product_relative attention. image_shapes: optional tuple of integer scalars. see comments for attention_image_summary() attention_type: a string, either "dot_product" or "local_mask_right" or @@ -1147,6 +1266,9 @@ def multihead_attention(query_antecedent, q *= key_depth_per_head**-0.5 if attention_type == "dot_product": x = dot_product_attention(q, k, v, bias, dropout_rate, image_shapes) + elif attention_type == "dot_product_relative": + x = dot_product_attention_relative(q, k, v, bias, max_relative_position, + dropout_rate, image_shapes) elif attention_type == "local_mask_right": x = masked_local_attention_1d(q, k, v, block_length=block_length) else: diff --git a/tensor2tensor/layers/common_attention_test.py b/tensor2tensor/layers/common_attention_test.py index 7823936fa..ef67b0d8e 100644 --- a/tensor2tensor/layers/common_attention_test.py +++ b/tensor2tensor/layers/common_attention_test.py @@ -244,6 +244,20 @@ def test2dGather(self): self.assertAllEqual(correct_indices, x_indices) self.assertAllClose(correct_gathered_x, gathered_x) + def testDotProductAttentionRelative(self): + x = np.random.rand(5, 7, 12, 32) + y = np.random.rand(5, 7, 12, 32) + with self.test_session() as session: + a = common_attention.dot_product_attention_relative( + tf.constant(x, dtype=tf.float32), + tf.constant(y, dtype=tf.float32), + tf.constant(y, dtype=tf.float32), + None, + max_relative_position=3) + session.run(tf.global_variables_initializer()) + res = session.run(a) + self.assertEqual(res.shape, (5, 7, 12, 32)) + if __name__ == "__main__": tf.test.main() diff --git a/tensor2tensor/models/transformer.py b/tensor2tensor/models/transformer.py index 7d4ce27be..e0f619805 100644 --- a/tensor2tensor/models/transformer.py +++ b/tensor2tensor/models/transformer.py @@ -423,11 +423,16 @@ def transformer_encoder(encoder_input, with tf.variable_scope("layer_%d" % layer): with tf.variable_scope("self_attention"): y = common_attention.multihead_attention( - common_layers.layer_preprocess( - x, hparams), None, encoder_self_attention_bias, + common_layers.layer_preprocess(x, hparams), + None, + encoder_self_attention_bias, hparams.attention_key_channels or hparams.hidden_size, hparams.attention_value_channels or hparams.hidden_size, - hparams.hidden_size, hparams.num_heads, hparams.attention_dropout) + hparams.hidden_size, + hparams.num_heads, + hparams.attention_dropout, + attention_type=hparams.self_attention_type, + max_relative_position=hparams.max_relative_position) x = common_layers.layer_postprocess(x, y, hparams) with tf.variable_scope("ffn"): y = transformer_ffn_layer( @@ -480,6 +485,8 @@ def transformer_decoder(decoder_input, hparams.hidden_size, hparams.num_heads, hparams.attention_dropout, + attention_type=hparams.self_attention_type, + max_relative_position=hparams.max_relative_position, cache=layer_cache) x = common_layers.layer_postprocess(x, y, hparams) if encoder_output is not None: @@ -599,7 +606,8 @@ def transformer_base(): hparams.add_hparam("nbr_decoder_problems", 1) hparams.add_hparam("proximity_bias", int(False)) hparams.add_hparam("use_pad_remover", int(True)) - + hparams.add_hparam("self_attention_type", "dot_product") + hparams.add_hparam("max_relative_position", 0) return hparams @@ -908,3 +916,32 @@ def transformer_base_range(rhp): rhp.set_float("optimizer_adam_beta1", 0.85, 0.95) rhp.set_float("optimizer_adam_beta2", 0.97, 0.99) rhp.set_float("weight_decay", 0.0, 2.0) + + +@registry.register_hparams +def transformer_relative(): + """Use relative position embeddings instead of absolute position encodings.""" + hparams = transformer_base() + hparams.pos = None + hparams.self_attention_type = "dot_product_relative" + hparams.max_relative_position = 20 + return hparams + + +@registry.register_hparams +def transformer_relative_tiny(): + hparams = transformer_relative() + hparams.num_hidden_layers = 2 + hparams.hidden_size = 128 + hparams.filter_size = 512 + hparams.num_heads = 4 + return hparams + + +@registry.register_hparams +def transformer_relative_big(): + hparams = transformer_big() + hparams.pos = None + hparams.self_attention_type = "dot_product_relative" + hparams.max_relative_position = 20 + return hparams diff --git a/tensor2tensor/models/transformer_test.py b/tensor2tensor/models/transformer_test.py index 22848b249..e77138eaf 100644 --- a/tensor2tensor/models/transformer_test.py +++ b/tensor2tensor/models/transformer_test.py @@ -37,8 +37,7 @@ class TransformerTest(tf.test.TestCase): - def getModel(self, mode=tf.estimator.ModeKeys.TRAIN): - hparams = transformer.transformer_small() + def getModel(self, hparams, mode=tf.estimator.ModeKeys.TRAIN): hparams.hidden_size = 8 hparams.filter_size = 32 hparams.num_heads = 1 @@ -61,7 +60,16 @@ def getModel(self, mode=tf.estimator.ModeKeys.TRAIN): hparams, tf.estimator.ModeKeys.PREDICT, p_hparams), features def testTransformer(self): - model, features = self.getModel() + model, features = self.getModel(transformer.transformer_small()) + shadred_logits, _ = model.model_fn(features) + logits = tf.concat(shadred_logits, 0) + with self.test_session() as session: + session.run(tf.global_variables_initializer()) + res = session.run(logits) + self.assertEqual(res.shape, (BATCH_SIZE, TARGET_LENGTH, 1, 1, VOCAB_SIZE)) + + def testTransformerRelative(self): + model, features = self.getModel(transformer.transformer_relative_tiny()) shadred_logits, _ = model.model_fn(features) logits = tf.concat(shadred_logits, 0) with self.test_session() as session: @@ -70,7 +78,7 @@ def testTransformer(self): self.assertEqual(res.shape, (BATCH_SIZE, TARGET_LENGTH, 1, 1, VOCAB_SIZE)) def testGreedyVsFast(self): - model, features = self.getModel() + model, features = self.getModel(transformer.transformer_small()) decode_length = 2 @@ -87,7 +95,8 @@ def testGreedyVsFast(self): for _ in range(100): apply_grad.run() - model, _ = self.getModel(tf.estimator.ModeKeys.PREDICT) + model, _ = self.getModel(transformer.transformer_small(), + mode=tf.estimator.ModeKeys.PREDICT) with tf.variable_scope(tf.get_variable_scope(), reuse=True): greedy_result, _, _ = model._slow_greedy_infer( From 1951ac728f212199d9e960ccdbf6c6bd5384d518 Mon Sep 17 00:00:00 2001 From: Etienne Pot <epot@google.com> Date: Sun, 24 Sep 2017 14:35:42 -0700 Subject: [PATCH 0428/4095] For expert attention, allow to split each position into multiple positions with smaller dimensionality; better @add_scope decorator; new attention expert hparams_set. PiperOrigin-RevId: 169848292 --- tensor2tensor/models/attention_lm_moe.py | 161 ++++++++++++++++++++++- tensor2tensor/utils/expert_utils.py | 17 ++- 2 files changed, 172 insertions(+), 6 deletions(-) diff --git a/tensor2tensor/models/attention_lm_moe.py b/tensor2tensor/models/attention_lm_moe.py index 42a9fbabf..96017f721 100644 --- a/tensor2tensor/models/attention_lm_moe.py +++ b/tensor2tensor/models/attention_lm_moe.py @@ -122,6 +122,27 @@ def _diet_expert(x): dp_remove_pad = lambda x: x dp_restore_pad = lambda x: x + if hparams.attention_exp_factor != 0: + tf.logging.info("Expand/compress tokens before sending them to experts") + dp_expand_bc = lambda x: dp( # pylint: disable=g-long-lambda + expand_batch_coordinates, + x, + hparams.attention_exp_factor) + dp_expand_x = lambda x: dp( # pylint: disable=g-long-lambda + deconv_elems_1d, + x, + hparams.attention_exp_factor, + hparams.attention_exp_inputdim) + dp_compress_x = lambda x, l: dp( # pylint: disable=g-long-lambda + conv_elems_1d, + x, + hparams.attention_exp_factor, + l) + else: + dp_expand_bc = lambda x: x + dp_expand_x = lambda x: x + dp_compress_x = lambda x, l: x + def print_shape(x, suffix, debug=False): # To help debugging, print the input/output shapes at inference and eval # Inference for long sequences can take a long time, so that's help to @@ -130,8 +151,10 @@ def print_shape(x, suffix, debug=False): return x return tf.Print(x, [tf.shape(x)], "shape_x_{}".format(suffix)) - batch_coordinate = dp(get_batch_coordinate, x) - batch_coordinate = dp_remove_pad(batch_coordinate) + with tf.name_scope("batch_coordinate_preprocess"): + batch_coordinate = dp(get_batch_coordinate, x) + batch_coordinate = dp_remove_pad(batch_coordinate) + batch_coordinate = dp_expand_bc(batch_coordinate) x = dp(print_shape, x, "in") @@ -175,6 +198,7 @@ def print_shape(x, suffix, debug=False): elif attention_type == AttentionType.LOCAL_EXPERTS: x_in = preprocess(x) x_in = dp_remove_pad(x_in) + x_in = dp_expand_x(x_in) y, loss = dp( common_attention.local_expert_attention, x_in, @@ -187,6 +211,7 @@ def print_shape(x, suffix, debug=False): split_batch=bool(hparams.attention_split_batch), attention_kq_size=hparams.attention_kq_size, attention_v_size=hparams.attention_v_size) + y = dp_compress_x(y, x[0].get_shape().as_list()[-1]) y = dp_restore_pad(y) # TODO(avaswani, epot, noam): Do we need to divide by num shards ? extra_loss += tf.add_n(loss) / dp.n @@ -276,6 +301,87 @@ def get_batch_coordinate(x): return batch_coordinate +@expert_utils.add_var_scope() +def deconv_elems_1d(x, factor, out_depth): + """Increase the length and change the dimensionality. + + Expand/project each positions of dim depth of the input into + factor*tokens of dim out_depth + + Args: + x (tf.Tensor): shape [batch_size, length, depth] + factor (int): Multiplicative factor of each tokens. + out_depth (int): Output depth + + Returns: + tf.Tensor: shape [batch_size, length*factor, out_depth] + """ + x = tf.expand_dims(x, 1) # [batch_size, 1, length, depth] + x = tf.layers.conv2d_transpose( + inputs=x, + filters=out_depth, + kernel_size=(1, factor), + strides=(1, factor), + padding="valid", + data_format="channels_last", + ) # [batch_size, 1, length*factor, out_depth] + x = tf.squeeze(x, 1) # [batch_size, 1, length, depth] + return x + + +@expert_utils.add_var_scope() +def conv_elems_1d(x, factor, out_depth): + """Decrease the length and change the dimensionality. + + Merge/restore/compress factors positions of dim depth of the input into + a single position of dim out_depth. + This is basically just a strided convolution without overlapp + between each strides. + The original length has to be divided by factor. + + Args: + x (tf.Tensor): shape [batch_size, length, depth] + factor (int): Length compression factor. + out_depth (int): Output depth + + Returns: + tf.Tensor: shape [batch_size, length//factor, out_depth] + """ + with tf.control_dependencies( # Dynamic assertion + [tf.assert_equal(tf.shape(x)[1] % factor, 0)]): + x = tf.expand_dims(x, 1) # [batch_size, 1, length, depth] + x = tf.layers.conv2d( + inputs=x, + filters=out_depth, + kernel_size=(1, factor), + strides=(1, factor), + padding="valid", + data_format="channels_last", + ) # [batch_size, 1, length//factor, out_depth] + x = tf.squeeze(x, 1) # [batch_size, 1, length, depth] + return x + + +def expand_batch_coordinates(bc, length_factor): + """Duplicate elements of bc by length_factor. + + Args: + bc (tf.Tensor): int32 tensor of shape [1, length, 1] + length_factor (int): + + Returns: + tf.Tensor: of shape [1, length*length_factor, 1] where every elements has + been duplicated length_factor times. + """ + assert bc.get_shape().as_list() == [1, None, 1] + # bc has shape [1, length, 1] + bc *= tf.constant([[1] * length_factor]) + # bc has shape [1, length, length_factor] + bc = tf.reshape(bc, [1, -1, 1]) + # bc has shape [1, length*length_factor] + return bc + + def remove_pad(x, pad_remover, mode): """Remove padding by concatenating all dimension into one. @@ -364,6 +470,12 @@ def attention_lm_moe_base(): hparams.add_hparam("attention_moe_k", 2) hparams.add_hparam("attention_num_experts", 16) hparams.add_hparam("attention_split_batch", int(False)) + # If attention_exp_factor is set, each input to local_expert_attention (of + # dimensionality hidden size) is projected into attention_exp_factor smaller + # inputs, each of dimensionality attention_exp_inputdim. (otherwise + # attention_exp_inputdim is ignored) + hparams.add_hparam("attention_exp_factor", 0) + hparams.add_hparam("attention_exp_inputdim", 128) # Key, query and value dimensions for the attention hparams.add_hparam("attention_kq_size", 128) hparams.add_hparam("attention_v_size", 256) @@ -425,6 +537,51 @@ def attention_lm_moe_base_hybrid(): return hparams +@registry.register_hparams +def attention_lm_hybrid_v2(): + hparams = attention_lm_moe_base_long_seq() + hparams.attention_layers = "hheh" # Alternate local/expert + hparams.attention_local = int(True) + hparams.attention_moe_k = 6 + + hparams.layer_preprocess_sequence = "n" + hparams.layer_postprocess_sequence = "da" + return hparams + + +@registry.register_hparams +def attention_lm_ae_extended(): + """Experiment with the exp_factor params.""" + hparams = attention_lm_moe_base_long_seq() + hparams.attention_layers = "eeee" + hparams.attention_local = int(True) + # hparams.factored_logits=1 # Necessary when the number of expert grow bigger + hparams.attention_moe_k = 2 + hparams.attention_exp_factor = 4 + # hparams.attention_exp_inputdim = 128 + + hparams.layer_preprocess_sequence = "n" + hparams.layer_postprocess_sequence = "da" + return hparams + + +@registry.register_hparams +def attention_lm_moe_base_memeff(): + """Base model with attention expert.""" + hparams = attention_lm_moe_base_long_seq() + hparams.use_sepconv = int(False) + + hparams.diet_experts = int(True) + hparams.layer_preprocess_sequence = "n" + hparams.layer_postprocess_sequence = "da" + hparams.layer_prepostprocess_dropout = 0.0 + hparams.memory_efficient_ffn = True + hparams.attention_type = AttentionType.MEMORY_EFFICIENT + hparams.num_heads = 8 + hparams.factored_logits = int(True) + return hparams + + @registry.register_hparams def attention_lm_moe_small(): """Cheap model for single-gpu training. diff --git a/tensor2tensor/utils/expert_utils.py b/tensor2tensor/utils/expert_utils.py index 8865b9271..495c3fb50 100644 --- a/tensor2tensor/utils/expert_utils.py +++ b/tensor2tensor/utils/expert_utils.py @@ -61,11 +61,16 @@ def convert_gradient_to_tensor(x): return x -def add_name_scope(scope): - """Return a decorator which add a TF name scope to a function. +def add_scope(scope=None, scope_fn=None): + """Return a decorator which add a TF name/variable scope to a function. + + Note that the function returned by the decorator accept an additional 'name' + parameter, which can overwritte the name scope given when the function is + created. Args: - scope (str): name of the name scope + scope (str): name of the scope. If None, the function name is used. + scope_fn (fct): Either tf.name_scope or tf.variable_scope Returns: fct: the add_scope decorator @@ -74,13 +79,17 @@ def decorator(f): @functools.wraps(f) def decorated(*args, **kwargs): - with tf.name_scope(scope): + name = kwargs.pop("name", None) # Python 2 hack for keyword only args + with scope_fn(name or scope or f.__name__): return f(*args, **kwargs) return decorated return decorator +add_var_scope = functools.partial(add_scope, scope_fn=tf.variable_scope) +add_name_scope = functools.partial(add_scope, scope_fn=tf.name_scope) + class Parallelism(object): """Helper class for creating sets of parallel function calls. From e976fe3b06717e9e4bb4c40699d3dbd1fa41ec19 Mon Sep 17 00:00:00 2001 From: Etienne Pot <epot@google.com> Date: Mon, 25 Sep 2017 11:46:20 -0700 Subject: [PATCH 0429/4095] Add hparam for the number of attention heads inside the experts PiperOrigin-RevId: 169938486 --- tensor2tensor/layers/common_attention.py | 4 +++- tensor2tensor/models/attention_lm_moe.py | 2 ++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/tensor2tensor/layers/common_attention.py b/tensor2tensor/layers/common_attention.py index 2b193b37a..785010afd 100644 --- a/tensor2tensor/layers/common_attention.py +++ b/tensor2tensor/layers/common_attention.py @@ -1499,6 +1499,7 @@ def self_attention_expert( batch_coordinate, mask_right=True, split_batch=False, + attention_num_head=1, attention_kq_size=None, attention_v_size=None, ): @@ -1515,6 +1516,7 @@ def self_attention_expert( split_batch (bool): If True, each sequence of the batch is processed individually on a loop. If False, the sequences are processed all at once and a mask is applied to isolate the sequences from each others + attention_num_head (int): number of attention heads attention_kq_size (int): dimension used for the attention key, and query attention_v_size (int): dimension used for the attention value @@ -1592,7 +1594,7 @@ def mask_and_call_attention(x): total_key_depth=attention_kq_size, total_value_depth=attention_v_size, output_depth=depth, - num_heads=1, + num_heads=attention_num_head, dropout_rate=0.0) if split_batch: diff --git a/tensor2tensor/models/attention_lm_moe.py b/tensor2tensor/models/attention_lm_moe.py index 96017f721..0c114f948 100644 --- a/tensor2tensor/models/attention_lm_moe.py +++ b/tensor2tensor/models/attention_lm_moe.py @@ -209,6 +209,7 @@ def print_shape(x, suffix, debug=False): batch_coordinate=batch_coordinate, mask_right=not hparams.use_inputs, split_batch=bool(hparams.attention_split_batch), + attention_num_head=hparams.attention_num_head, attention_kq_size=hparams.attention_kq_size, attention_v_size=hparams.attention_v_size) y = dp_compress_x(y, x[0].get_shape().as_list()[-1]) @@ -468,6 +469,7 @@ def attention_lm_moe_base(): hparams.add_hparam("attention_type", AttentionType.MULTIHEAD) hparams.add_hparam("attention_local", int(False)) hparams.add_hparam("attention_moe_k", 2) + hparams.add_hparam("attention_num_head", 1) hparams.add_hparam("attention_num_experts", 16) hparams.add_hparam("attention_split_batch", int(False)) # If attention_exp_factor is set, each input to local_expert_attention (of From f1b75861d8c9927fbc13643a6d58b60d2f3d08b0 Mon Sep 17 00:00:00 2001 From: T2T Team <no-reply@google.com> Date: Mon, 25 Sep 2017 17:20:18 -0700 Subject: [PATCH 0430/4095] Fixes an encoder issue with SubwordTextEncoders created from file. PiperOrigin-RevId: 169986292 --- tensor2tensor/data_generators/text_encoder.py | 18 +++++++++++++++++- .../data_generators/text_encoder_test.py | 13 ++++--------- 2 files changed, 21 insertions(+), 10 deletions(-) diff --git a/tensor2tensor/data_generators/text_encoder.py b/tensor2tensor/data_generators/text_encoder.py index 557a62d13..64eef14fe 100644 --- a/tensor2tensor/data_generators/text_encoder.py +++ b/tensor2tensor/data_generators/text_encoder.py @@ -25,6 +25,7 @@ from __future__ import print_function import collections +from itertools import chain import re # Dependency imports @@ -602,8 +603,23 @@ def build_from_token_counts(self, min_count: an integer - discard subtokens with lower counts. num_iterations: an integer. how many iterations of refinement. num_reserved_ids: an integer. how many ids to reserve for special tokens. + + Raises: + ValueError: if reserved is not 0 or len(RESERVED_TOKENS). In this case, it + is not clear what the space is being reserved for, or when it will be + filled in. """ - self._init_alphabet_from_tokens(six.iterkeys(token_counts)) + # Initialize the alphabet. Note, this must include reserved tokens or it can + # result in encoding failures. + if num_reserved_ids == NUM_RESERVED_TOKENS: + alphabet_tokens = chain(six.iterkeys(token_counts), + [native_to_unicode(t) for t in RESERVED_TOKENS]) + elif num_reserved_ids == 0: + alphabet_tokens = six.iterkeys(token_counts) + else: + raise ValueError("Unexpected value for reserved. What is being reserved?") + + self._init_alphabet_from_tokens(alphabet_tokens) # Bootstrap the initial list of subtokens with the characters from the # alphabet plus the escaping characters. diff --git a/tensor2tensor/data_generators/text_encoder_test.py b/tensor2tensor/data_generators/text_encoder_test.py index 0351d0d2f..6578d873a 100644 --- a/tensor2tensor/data_generators/text_encoder_test.py +++ b/tensor2tensor/data_generators/text_encoder_test.py @@ -232,18 +232,13 @@ def test_reserved_token_chars_not_in_alphabet(self): encoder1.store_to_file(filename) encoder2 = text_encoder.SubwordTextEncoder(filename=filename) + self.assertEqual(encoder1._alphabet, encoder2._alphabet) + for t in text_encoder.RESERVED_TOKENS: for c in t: - # Verify that encoder1 can encode all reserved token chars. + # Verify that encoders can encode all reserved token chars. encoder1.encode(c) - - # TODO(seabass): Implement the fix so that we can remove this assertion. - with self.assertRaises(AssertionError): - for t in text_encoder.RESERVED_TOKENS: - for c in t: - # Verify that encoder2 fails to encode the characters (i.e. - # reproduce the bug). - encoder2.encode(c) + encoder2.encode(c) if __name__ == "__main__": From 767fea1a5d732b005d13ad0ff8d7f7081bf80fee Mon Sep 17 00:00:00 2001 From: T2T Team <no-reply@google.com> Date: Mon, 25 Sep 2017 19:18:14 -0700 Subject: [PATCH 0431/4095] Change LM1B has_inputs to False PiperOrigin-RevId: 169996843 --- tensor2tensor/data_generators/lm1b.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensor2tensor/data_generators/lm1b.py b/tensor2tensor/data_generators/lm1b.py index d45e4fe1e..75c6c17a4 100644 --- a/tensor2tensor/data_generators/lm1b.py +++ b/tensor2tensor/data_generators/lm1b.py @@ -152,7 +152,7 @@ def is_character_level(self): @property def has_inputs(self): - return True + return False @property def input_space_id(self): From 1993e6b237c7ca8293441a994a7630d829cd0aaf Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Tue, 26 Sep 2017 11:10:21 -0700 Subject: [PATCH 0432/4095] Fix vocab file name for LM1B PiperOrigin-RevId: 170079010 --- tensor2tensor/data_generators/lm1b.py | 40 ++++++++++++++------------- 1 file changed, 21 insertions(+), 19 deletions(-) diff --git a/tensor2tensor/data_generators/lm1b.py b/tensor2tensor/data_generators/lm1b.py index 75c6c17a4..da6dd92af 100644 --- a/tensor2tensor/data_generators/lm1b.py +++ b/tensor2tensor/data_generators/lm1b.py @@ -36,7 +36,6 @@ import tensorflow as tf - # End-of-sentence marker (should correspond to the position of EOS in the # RESERVED_TOKENS list in text_encoder.py) EOS = 1 @@ -59,9 +58,10 @@ def _original_vocab(tmp_dir): vocab_filepath = os.path.join(tmp_dir, vocab_filename) if not os.path.exists(vocab_filepath): generator_utils.maybe_download(tmp_dir, vocab_filename, vocab_url) - return set( - [text_encoder.native_to_unicode(l.strip()) for l in - tf.gfile.Open(vocab_filepath)]) + return set([ + text_encoder.native_to_unicode(l.strip()) + for l in tf.gfile.Open(vocab_filepath) + ]) def _replace_oov(original_vocab, line): @@ -81,19 +81,19 @@ def _replace_oov(original_vocab, line): def _train_data_filenames(tmp_dir): - return [os.path.join( - tmp_dir, - "1-billion-word-language-modeling-benchmark-r13output", - "training-monolingual.tokenized.shuffled", - "news.en-%05d-of-00100" % i) for i in xrange(1, 100)] + return [ + os.path.join(tmp_dir, + "1-billion-word-language-modeling-benchmark-r13output", + "training-monolingual.tokenized.shuffled", + "news.en-%05d-of-00100" % i) for i in xrange(1, 100) + ] def _dev_data_filename(tmp_dir): - return os.path.join( - tmp_dir, - "1-billion-word-language-modeling-benchmark-r13output", - "heldout-monolingual.tokenized.shuffled", - "news.en.heldout-00000-of-00050") + return os.path.join(tmp_dir, + "1-billion-word-language-modeling-benchmark-r13output", + "heldout-monolingual.tokenized.shuffled", + "news.en.heldout-00000-of-00050") def _maybe_download_corpus(tmp_dir): @@ -112,15 +112,17 @@ def _maybe_download_corpus(tmp_dir): corpus_tar.extractall(tmp_dir) -def _get_or_build_subword_text_encoder(tmp_dir): +def _get_or_build_subword_text_encoder(tmp_dir, vocab_name): """Builds a SubwordTextEncoder based on the corpus. Args: tmp_dir: directory containing dataset. + vocab_name: name of vocab file. + Returns: a SubwordTextEncoder. """ - filepath = os.path.join(tmp_dir, "lm1b_32k.subword_text_encoder") + filepath = os.path.join(tmp_dir, vocab_name) if tf.gfile.Exists(filepath): return text_encoder.SubwordTextEncoder(filepath) _maybe_download_corpus(tmp_dir) @@ -197,12 +199,12 @@ def generator(self, tmp_dir, train, characters=False): """ _maybe_download_corpus(tmp_dir) original_vocab = _original_vocab(tmp_dir) - files = (_train_data_filenames(tmp_dir) if train - else [_dev_data_filename(tmp_dir)]) + files = (_train_data_filenames(tmp_dir) + if train else [_dev_data_filename(tmp_dir)]) if characters: encoder = text_encoder.ByteTextEncoder() else: - encoder = _get_or_build_subword_text_encoder(tmp_dir) + encoder = _get_or_build_subword_text_encoder(tmp_dir, self.vocab_file) for filepath in files: tf.logging.info("filepath = %s", filepath) for line in tf.gfile.Open(filepath): From 41b7c709f5d4724b12c96e1e8daa5984d94bd4cb Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Tue, 26 Sep 2017 11:59:14 -0700 Subject: [PATCH 0433/4095] Support using "-test" data for EVAL/PREDICT with --eval_use_test_set flag PiperOrigin-RevId: 170087475 --- tensor2tensor/bin/t2t-decoder | 10 +++++++--- tensor2tensor/utils/data_reader.py | 5 ++++- tensor2tensor/utils/decoding.py | 13 +++++++------ tensor2tensor/utils/trainer_utils.py | 17 ++++++++++------- 4 files changed, 28 insertions(+), 17 deletions(-) diff --git a/tensor2tensor/bin/t2t-decoder b/tensor2tensor/bin/t2t-decoder index d2fe41f2f..6915c0400 100644 --- a/tensor2tensor/bin/t2t-decoder +++ b/tensor2tensor/bin/t2t-decoder @@ -90,9 +90,13 @@ def main(_): decoding.decode_from_file(estimator, FLAGS.decode_from_file, decode_hp, FLAGS.decode_to_file) else: - decoding.decode_from_dataset(estimator, - FLAGS.problems.split("-"), decode_hp, - FLAGS.decode_to_file) + decoding.decode_from_dataset( + estimator, + FLAGS.problems.split("-"), + decode_hp, + decode_to_file=FLAGS.decode_to_file, + dataset="test" + if FLAGS.eval_use_test_set else tf.estimator.ModeKeys.PREDICT) if __name__ == "__main__": diff --git a/tensor2tensor/utils/data_reader.py b/tensor2tensor/utils/data_reader.py index e88d208ac..31ea13c49 100644 --- a/tensor2tensor/utils/data_reader.py +++ b/tensor2tensor/utils/data_reader.py @@ -464,7 +464,10 @@ def get_data_filepatterns(problems, data_dir, mode): if mode == tf.estimator.ModeKeys.TRAIN: datasets.append("%s-train*" % path) else: - datasets.append("%s-dev*" % path) + if mode == "test": + datasets.append("%s-test*" % path) + else: + datasets.append("%s-dev*" % path) return datasets diff --git a/tensor2tensor/utils/decoding.py b/tensor2tensor/utils/decoding.py index a08947202..e8d8e17d3 100644 --- a/tensor2tensor/utils/decoding.py +++ b/tensor2tensor/utils/decoding.py @@ -102,7 +102,8 @@ def log_decode_results(inputs, def decode_from_dataset(estimator, problem_names, decode_hp, - decode_to_file=None): + decode_to_file=None, + dataset=tf.estimator.ModeKeys.PREDICT): tf.logging.info("Performing local inference from dataset for %s.", str(problem_names)) hparams = estimator.params @@ -110,7 +111,7 @@ def decode_from_dataset(estimator, for problem_idx, problem_name in enumerate(problem_names): # Build the inference input function infer_problems_data = data_reader.get_data_filepatterns( - problem_name, hparams.data_dir, tf.estimator.ModeKeys.PREDICT) + problem_name, hparams.data_dir, dataset) infer_input_fn = input_fn_builder.build_input_fn( mode=tf.estimator.ModeKeys.PREDICT, @@ -544,8 +545,8 @@ def input_fn(problem_choice, x=inputs): # pylint: disable=missing-docstring x = tf.tile(x, tf.to_int32([num_samples, 1, 1, 1])) p_hparams = hparams.problems[problem_choice] - return (tf.constant(p_hparams.input_space_id), - tf.constant(p_hparams.target_space_id), x) + return (tf.constant(p_hparams.input_space_id), tf.constant( + p_hparams.target_space_id), x) input_space_id, target_space_id, x = input_fn_builder.cond_on_index( input_fn, feature_map["problem_choice"], len(hparams.problems) - 1) @@ -580,8 +581,8 @@ def input_fn(problem_choice, x=inputs): # pylint: disable=missing-docstring # Add a third empty dimension dimension x = tf.expand_dims(x, axis=[2]) x = tf.to_int32(x) - return (tf.constant(p_hparams.input_space_id), - tf.constant(p_hparams.target_space_id), x) + return (tf.constant(p_hparams.input_space_id), tf.constant( + p_hparams.target_space_id), x) input_space_id, target_space_id, x = input_fn_builder.cond_on_index( input_fn, feature_map["problem_choice"], len(hparams.problems) - 1) diff --git a/tensor2tensor/utils/trainer_utils.py b/tensor2tensor/utils/trainer_utils.py index 09c86ca09..1157bfb2f 100644 --- a/tensor2tensor/utils/trainer_utils.py +++ b/tensor2tensor/utils/trainer_utils.py @@ -67,6 +67,8 @@ flags.DEFINE_bool("eval_run_autoregressive", False, "Run eval autoregressively where we condition on previous" "generated output instead of the actual target.") +flags.DEFINE_bool("eval_use_test_set", False, + "Whether to use the '-test' data for EVAL (and PREDICT).") flags.DEFINE_integer("keep_checkpoint_max", 20, "How many recent checkpoints to keep.") flags.DEFINE_bool("experimental_optimize_placement", False, @@ -142,12 +144,12 @@ def create_experiment(data_dir, model_name, train_steps, eval_steps, hparams, if FLAGS.dbgprofile: # Recorded traces can be visualized with chrome://tracing/ # The memory/tensor lifetime is also profiled - train_monitors.append(ProfilerHook( - save_steps=10, - output_dir=run_config.model_dir, - show_dataflow=True, - show_memory=True, - )) + train_monitors.append( + ProfilerHook( + save_steps=10, + output_dir=run_config.model_dir, + show_dataflow=True, + show_memory=True,)) optional_kwargs = {} if FLAGS.export_saved_model: @@ -194,7 +196,8 @@ def create_experiment_components(data_dir, model_name, hparams, run_config): eval_input_fn = input_fn_builder.build_input_fn( mode=tf.estimator.ModeKeys.EVAL, hparams=hparams, - data_file_patterns=get_data_filepatterns(data_dir, + data_file_patterns=get_data_filepatterns(data_dir, "test" + if FLAGS.eval_use_test_set else tf.estimator.ModeKeys.EVAL), num_datashards=num_datashards, worker_replicas=FLAGS.worker_replicas, From c6710dd27754df18552cc9e845aca8c56fe88576 Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Tue, 26 Sep 2017 17:08:47 -0700 Subject: [PATCH 0434/4095] input_pipeline uses Problem.dataset PiperOrigin-RevId: 170133030 --- tensor2tensor/bin/t2t-decoder | 3 +- tensor2tensor/data_generators/problem.py | 38 ++-- tensor2tensor/utils/data_reader.py | 174 +++--------------- tensor2tensor/utils/data_reader_test.py | 100 ++++------ tensor2tensor/utils/decoding.py | 11 +- tensor2tensor/utils/input_fn_builder.py | 30 +-- tensor2tensor/utils/trainer_utils.py | 14 +- .../TransformerVisualization.ipynb | 4 +- 8 files changed, 104 insertions(+), 270 deletions(-) diff --git a/tensor2tensor/bin/t2t-decoder b/tensor2tensor/bin/t2t-decoder index 6915c0400..dce12c23c 100644 --- a/tensor2tensor/bin/t2t-decoder +++ b/tensor2tensor/bin/t2t-decoder @@ -95,8 +95,7 @@ def main(_): FLAGS.problems.split("-"), decode_hp, decode_to_file=FLAGS.decode_to_file, - dataset="test" - if FLAGS.eval_use_test_set else tf.estimator.ModeKeys.PREDICT) + dataset_split="test" if FLAGS.eval_use_test_set else None) if __name__ == "__main__": diff --git a/tensor2tensor/data_generators/problem.py b/tensor2tensor/data_generators/problem.py index 37eee64ab..d7870fac2 100644 --- a/tensor2tensor/data_generators/problem.py +++ b/tensor2tensor/data_generators/problem.py @@ -232,6 +232,20 @@ def test_filepaths(self, data_dir, num_shards, shuffled): return generator_utils.test_data_filenames(file_basename, data_dir, num_shards) + def filepattern(self, data_dir, mode): + """Get filepattern for data files for mode.""" + path = os.path.join(data_dir, self.dataset_filename()) + + if mode == tf.estimator.ModeKeys.TRAIN: + suffix = "train" + elif mode == tf.estimator.ModeKeys.EVAL: + suffix = "dev" + else: + assert mode == "test" + suffix = "test" + + return "%s-%s*" % (path, suffix) + def __init__(self, was_reversed=False, was_copy=False): """Create a Problem. @@ -297,7 +311,8 @@ def dataset(self, output_buffer_size=None, shuffle_files=None, hparams=None, - preprocess=True): + preprocess=True, + dataset_split=None): """Build a Dataset for this problem. Args: @@ -314,10 +329,13 @@ def dataset(self, default set that is a no-op. preprocess: bool, whether to map the Dataset through Problem.preprocess_example. + dataset_split: tf.estimator.ModeKeys + ["test"], which split to read data + from (TRAIN:"-train", EVAL:"-dev", "test":"-test"). Defaults to mode. Returns: Dataset containing dict<feature name, Tensor>. """ + dataset_split = dataset_split or mode assert data_dir if hparams is None: @@ -330,20 +348,6 @@ def dataset(self, # Construct the Problem's hparams so that items within it are accessible _ = self.get_hparams(hparams) - base_filename = self.dataset_filename() - path = os.path.join(data_dir, base_filename) - - # TODO(rsepassi): handle ModeKeys.PREDICT with placeholders - is_training = mode == tf.estimator.ModeKeys.TRAIN - if is_training: - suffix = "train" - elif mode == tf.estimator.ModeKeys.EVAL: - suffix = "dev" - else: - assert mode == "test" - suffix = "test" - - filepattern = "%s-%s*" % (path, suffix) data_fields, data_items_to_decoders = self.example_reading_spec() if data_items_to_decoders is None: data_items_to_decoders = { @@ -351,7 +355,9 @@ def dataset(self, for field in data_fields } - data_files = tf.contrib.slim.parallel_reader.get_data_files(filepattern) + is_training = mode == tf.estimator.ModeKeys.TRAIN + data_files = tf.contrib.slim.parallel_reader.get_data_files( + [self.filepattern(data_dir, dataset_split)]) if shuffle_files or shuffle_files is None and is_training: random.shuffle(data_files) dataset = tf.contrib.data.TFRecordDataset(data_files) diff --git a/tensor2tensor/utils/data_reader.py b/tensor2tensor/utils/data_reader.py index 31ea13c49..cfe37c379 100644 --- a/tensor2tensor/utils/data_reader.py +++ b/tensor2tensor/utils/data_reader.py @@ -18,114 +18,16 @@ from __future__ import division from __future__ import print_function -import os -import random - # Dependency imports import numpy as np import six from six.moves import xrange # pylint: disable=redefined-builtin -from six.moves import zip # pylint: disable=redefined-builtin - -from tensor2tensor.utils import registry import tensorflow as tf -def examples_reader(data_sources, - data_fields_to_features, - training, - capacity=32, - data_items_to_decoders=None, - data_items_to_decode=None): - """Reads Examples from data_sources and decodes to Tensors. - - The dictionary data_fields_to_features for an image dataset can be: - - data_fields_to_features = { - 'image/encoded': tf.FixedLenFeature((), tf.string, default_value=''), - 'image/format': tf.FixedLenFeature((), tf.string, default_value='raw'), - 'image/class/label': tf.FixedLenFeature( - [1], tf.int64, default_value=tf.zeros([1], dtype=tf.int64)), - } - - and for a simple algorithmic dataset with variable-length data it is: - - data_fields_to_features = { - 'inputs': tf.VarLenFeature(tf.int64), - 'targets': tf.VarLenFeature(tf.int64), - } - - The data_items_to_decoders dictionary argument can be left as None if there - is no decoding to be performed. But, e.g. for images, it should be set so that - the images are decoded from the features, e.g., for MNIST: - - data_items_to_decoders = { - 'image': tfexample_decoder.Image( - image_key = 'image/encoded', - format_key = 'image/format', - shape=[28, 28], - channels=1), - 'label': tfexample_decoder.Tensor('image/class/label'), - } - - These arguments are compatible with the use of tf.contrib.slim.data module, - see there for more documentation. - - Args: - data_sources: a list or tuple of sources from which the data will be read, - for example [/path/to/train@128, /path/to/train2*, /tmp/.../train3*] - data_fields_to_features: a dictionary from data fields in the data sources - to features, such as tf.VarLenFeature(tf.int64), see above for examples. - training: a Boolean, whether to read for training or evaluation. - capacity: integer, buffer capacity; set to 2 * max_batch_size or more. - data_items_to_decoders: a dictionary mapping data items (that will be - in the returned result) to decoders that will decode them using features - defined in data_fields_to_features; see above for examples. By default - (if this is None), we grab the tensor from every feature. - data_items_to_decode: a subset of data items that will be decoded; - by default (if this is None), we decode all items. - - Returns: - A tf.contrib.data.Dataset of dict<feature name, Tensor> - """ - - def decode_record(record): - """Serialized Example to dict of <feature name, Tensor>.""" - example_serialized = record - item_decoders = data_items_to_decoders - if item_decoders is None: - item_decoders = { - field: tf.contrib.slim.tfexample_decoder.Tensor(field) - for field in data_fields_to_features - } - - decoder = tf.contrib.slim.tfexample_decoder.TFExampleDecoder( - data_fields_to_features, item_decoders) - - decode_items = data_items_to_decode - if decode_items is None: - decode_items = list(item_decoders) - - decoded = decoder.decode(example_serialized, items=decode_items) - return dict(zip(decode_items, decoded)) - - with tf.name_scope("examples_in"): - data_files = tf.contrib.slim.parallel_reader.get_data_files(data_sources) - if training: - random.shuffle(data_files) - dataset = tf.contrib.data.TFRecordDataset(data_files) - num_threads = min(4 if training else 1, len(data_files)) - dataset = dataset.map(decode_record, num_threads=num_threads) - if training: - dataset = dataset.shuffle(capacity) - # Loop inifinitely if training, just once otherwise - dataset = dataset.repeat(None if training else 1) - return dataset - - def cast_int64_to_int32(features): f = {} for k, v in six.iteritems(features): @@ -161,34 +63,18 @@ def feature_placeholders(data_fields, data_items_to_decoders): return decoded_example -def read_examples(problem, - data_file_pattern, - capacity, - mode=tf.estimator.ModeKeys.TRAIN): - """Create Dataset of Example for problem and data_file_pattern.""" - data_fields, data_items_to_decoders = problem.example_reading_spec() - - if data_file_pattern is None: - # Create placeholders for input, rather than reading data from disk. - return feature_placeholders(data_fields, data_items_to_decoders) - - is_training = mode == tf.estimator.ModeKeys.TRAIN - dataset = examples_reader( - [data_file_pattern], - data_fields, - training=is_training, - capacity=capacity, - data_items_to_decoders=data_items_to_decoders) - return dataset - - -def input_pipeline(problem, data_file_pattern, capacity, mode, hparams, - batching_scheme): +def input_pipeline(problem, + data_dir, + capacity, + mode, + hparams, + batching_scheme, + dataset_split=None): """Input pipeline, returns a dictionary of batched and padded tensors. Args: problem: Problem instance for which to build the input pipeline. - data_file_pattern: file pattern for input files. + data_dir: directory with input data. capacity: int, data pipeline buffer capacity. mode: tf.estimator.ModeKeys entry. hparams: an HParams object. @@ -197,6 +83,8 @@ def input_pipeline(problem, data_file_pattern, capacity, mode, hparams, used for bucketing; see bucket_by_sequence_length for more details. "batch_sizes": a list of batch sizes corresponding to the buckets "max_length": an integer. We drop sequences which are longer. + dataset_split: tf.estimator.ModeKeys + ["test"], which split of the dataset + to use. Defaults to mode. Returns: dict <feature name, batched and padded Tensor> @@ -205,14 +93,19 @@ def input_pipeline(problem, data_file_pattern, capacity, mode, hparams, num_threads = 4 if is_training else 1 with tf.name_scope("input_pipeline"): - # TODO(rsepassi): Once all problems use the Problem class, rm example - # reading, parsing, and preprocessing. Use Problem.dataset instead. - dataset = read_examples(problem, data_file_pattern, capacity, mode=mode) - dataset = dataset.map( - lambda ex: _preprocess(ex, problem, hparams, mode), - num_threads=num_threads) + dataset = problem.dataset( + mode, + data_dir=data_dir, + num_threads=num_threads, + output_buffer_size=capacity, + hparams=hparams, + dataset_split=dataset_split) + dataset = dataset.map(cast_int64_to_int32, num_threads=num_threads) dataset = dataset.filter( lambda ex: example_valid_size(ex, batching_scheme["max_length"])) + if is_training: + dataset = dataset.shuffle(capacity) + dataset = dataset.repeat(None) bucket_id_fn = _example_length if len(batching_scheme["boundaries"]) == 1: @@ -239,15 +132,6 @@ def input_pipeline(problem, data_file_pattern, capacity, mode, hparams, return batched_examples -def _preprocess(example, problem, hparams, mode): - """Preprocessing for example.""" - example = problem.preprocess_example(example, mode, hparams) - # We do not want int64s as they are not supported on GPUs. - example = cast_int64_to_int32(example) - - return example - - def _example_length(example): length = 0 # Length of the example is the maximum length of the feature lengths @@ -455,22 +339,6 @@ def constant_batching_scheme(constant_batch_size_in_sequences): } -def get_data_filepatterns(problems, data_dir, mode): - """Return the location of a dataset for a given mode.""" - datasets = [] - for problem in problems.split("-"): - problem = registry.problem(problem).dataset_filename() - path = os.path.join(data_dir, problem) - if mode == tf.estimator.ModeKeys.TRAIN: - datasets.append("%s-train*" % path) - else: - if mode == "test": - datasets.append("%s-test*" % path) - else: - datasets.append("%s-dev*" % path) - return datasets - - def serving_input_fn(problem, hparams): """Input fn for serving, starting from Placeholders.""" data_fields, data_items_to_decoders = problem.example_reading_spec() diff --git a/tensor2tensor/utils/data_reader_test.py b/tensor2tensor/utils/data_reader_test.py index 4f4d7530d..0dccfaedf 100644 --- a/tensor2tensor/utils/data_reader_test.py +++ b/tensor2tensor/utils/data_reader_test.py @@ -69,10 +69,7 @@ def preprocess_example(self, example, unused_mode, unused_hparams): def generate_test_data(problem, tmp_dir): problem.generate_data(tmp_dir, tmp_dir) - filepatterns = data_reader.get_data_filepatterns( - problem.name, tmp_dir, tf.estimator.ModeKeys.TRAIN) - assert tf.gfile.Glob(filepatterns[0]) - return filepatterns + return [problem.filepattern(tmp_dir, tf.estimator.ModeKeys.TRAIN)] class DataReaderTest(tf.test.TestCase): @@ -81,7 +78,8 @@ class DataReaderTest(tf.test.TestCase): def setUpClass(cls): tf.set_random_seed(1) cls.problem = registry.problem("test_problem") - cls.filepatterns = generate_test_data(cls.problem, tempfile.gettempdir()) + cls.data_dir = tempfile.gettempdir() + cls.filepatterns = generate_test_data(cls.problem, cls.data_dir) @classmethod def tearDownClass(cls): @@ -92,7 +90,8 @@ def tearDownClass(cls): os.remove(f) def testBasicExampleReading(self): - dataset = data_reader.read_examples(self.problem, self.filepatterns[0], 32) + dataset = self.problem.dataset( + tf.estimator.ModeKeys.TRAIN, data_dir=self.data_dir) examples = dataset.make_one_shot_iterator().get_next() with tf.train.MonitoredSession() as sess: # Check that there are multiple examples that have the right fields of the @@ -107,56 +106,19 @@ def testBasicExampleReading(self): for field in [inputs, targets, floats]: self.assertGreater(len(field), 0) - def testTrainEvalBehavior(self): - train_dataset = data_reader.read_examples(self.problem, - self.filepatterns[0], 16) - train_examples = train_dataset.make_one_shot_iterator().get_next() - eval_dataset = data_reader.read_examples( - self.problem, - self.filepatterns[0], - 16, - mode=tf.estimator.ModeKeys.EVAL) - eval_examples = eval_dataset.make_one_shot_iterator().get_next() - - eval_idxs = [] - with tf.train.MonitoredSession() as sess: - # Train should be shuffled and run through infinitely - for i in xrange(30): - self.assertNotEqual(i, sess.run(train_examples)["inputs"][0]) - - # Eval should not be shuffled and only run through once - for i in xrange(30): - self.assertEqual(i, sess.run(eval_examples)["inputs"][0]) - eval_idxs.append(i) - - with self.assertRaises(tf.errors.OutOfRangeError): - sess.run(eval_examples) - # Should never run because above line should error - eval_idxs.append(30) - - # Ensuring that the above exception handler actually ran and we didn't - # exit the MonitoredSession context. - eval_idxs.append(-1) - - self.assertAllEqual(list(range(30)) + [-1], eval_idxs) - def testPreprocess(self): - dataset = data_reader.read_examples(self.problem, self.filepatterns[0], 32) + dataset = self.problem.dataset( + tf.estimator.ModeKeys.TRAIN, data_dir=self.data_dir) examples = dataset.make_one_shot_iterator().get_next() - examples = data_reader._preprocess(examples, self.problem, None, None) with tf.train.MonitoredSession() as sess: ex_val = sess.run(examples) # problem.preprocess_example has been run self.assertAllClose([42.42], ex_val["new_field"]) - # int64 has been cast to int32 - self.assertEqual(np.int32, ex_val["inputs"].dtype) - self.assertEqual(np.int32, ex_val["targets"].dtype) - self.assertEqual(np.float32, ex_val["floats"].dtype) - def testLengthFilter(self): max_len = 15 - dataset = data_reader.read_examples(self.problem, self.filepatterns[0], 32) + dataset = self.problem.dataset( + tf.estimator.ModeKeys.TRAIN, data_dir=self.data_dir) dataset = dataset.filter( lambda ex: data_reader.example_valid_size(ex, max_len)) examples = dataset.make_one_shot_iterator().get_next() @@ -169,26 +131,34 @@ def testLengthFilter(self): def testBatchingSchemeMaxLength(self): scheme = data_reader._batching_scheme( - batch_size=20, max_length=None, - min_length_bucket=8, length_bucket_step=1.1, + batch_size=20, + max_length=None, + min_length_bucket=8, + length_bucket_step=1.1, drop_long_sequences=False) self.assertGreater(scheme["max_length"], 10000) scheme = data_reader._batching_scheme( - batch_size=20, max_length=None, - min_length_bucket=8, length_bucket_step=1.1, + batch_size=20, + max_length=None, + min_length_bucket=8, + length_bucket_step=1.1, drop_long_sequences=True) self.assertEqual(scheme["max_length"], 20) scheme = data_reader._batching_scheme( - batch_size=20, max_length=15, - min_length_bucket=8, length_bucket_step=1.1, + batch_size=20, + max_length=15, + min_length_bucket=8, + length_bucket_step=1.1, drop_long_sequences=True) self.assertEqual(scheme["max_length"], 15) scheme = data_reader._batching_scheme( - batch_size=20, max_length=15, - min_length_bucket=8, length_bucket_step=1.1, + batch_size=20, + max_length=15, + min_length_bucket=8, + length_bucket_step=1.1, drop_long_sequences=False) self.assertGreater(scheme["max_length"], 10000) @@ -201,12 +171,14 @@ def testBatchingSchemeBuckets(self): boundaries, batch_sizes = scheme["boundaries"], scheme["batch_sizes"] self.assertEqual(len(boundaries), len(batch_sizes) - 1) expected_boundaries = [ - 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 22, 24, 26, 28, - 30, 33, 36, 39, 42, 46, 50, 55, 60, 66, 72, 79, 86, 94, 103, 113, 124] + 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 22, 24, 26, 28, 30, + 33, 36, 39, 42, 46, 50, 55, 60, 66, 72, 79, 86, 94, 103, 113, 124 + ] self.assertEqual(expected_boundaries, boundaries) expected_batch_sizes = [ - 16, 12, 12, 8, 8, 8, 8, 8, 8, 6, 6, 6, 6, 4, 4, 4, 4, 4, 3, 3, 3, - 3, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1] + 16, 12, 12, 8, 8, 8, 8, 8, 8, 6, 6, 6, 6, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, + 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1 + ] self.assertEqual(expected_batch_sizes, batch_sizes) scheme = data_reader._batching_scheme( @@ -239,14 +211,10 @@ def example_len(ex): batch_sizes = [10, 8, 4, 2] window_size = 40 - dataset = data_reader.read_examples( - self.problem, - self.filepatterns[0], - 32, - mode=tf.estimator.ModeKeys.EVAL) + dataset = self.problem.dataset( + tf.estimator.ModeKeys.TRAIN, data_dir=self.data_dir) dataset = data_reader.bucket_by_sequence_length( - dataset, example_len, - boundaries, batch_sizes, window_size) + dataset, example_len, boundaries, batch_sizes, window_size) batch = dataset.make_one_shot_iterator().get_next() input_vals = [] diff --git a/tensor2tensor/utils/decoding.py b/tensor2tensor/utils/decoding.py index e8d8e17d3..c11fdef34 100644 --- a/tensor2tensor/utils/decoding.py +++ b/tensor2tensor/utils/decoding.py @@ -29,7 +29,6 @@ from six.moves import input # pylint: disable=redefined-builtin from tensor2tensor.data_generators import text_encoder -from tensor2tensor.utils import data_reader from tensor2tensor.utils import devices from tensor2tensor.utils import input_fn_builder import tensorflow as tf @@ -103,23 +102,21 @@ def decode_from_dataset(estimator, problem_names, decode_hp, decode_to_file=None, - dataset=tf.estimator.ModeKeys.PREDICT): + dataset_split=None): tf.logging.info("Performing local inference from dataset for %s.", str(problem_names)) hparams = estimator.params for problem_idx, problem_name in enumerate(problem_names): # Build the inference input function - infer_problems_data = data_reader.get_data_filepatterns( - problem_name, hparams.data_dir, dataset) - infer_input_fn = input_fn_builder.build_input_fn( mode=tf.estimator.ModeKeys.PREDICT, hparams=hparams, - data_file_patterns=infer_problems_data, + data_dir=hparams.data_dir, num_datashards=devices.data_parallelism().n, fixed_problem=problem_idx, - batch_size=decode_hp.batch_size) + batch_size=decode_hp.batch_size, + dataset_split=dataset_split) # Get the predictions as an iterable predictions = estimator.predict(infer_input_fn) diff --git a/tensor2tensor/utils/input_fn_builder.py b/tensor2tensor/utils/input_fn_builder.py index c9dde1a14..258213889 100644 --- a/tensor2tensor/utils/input_fn_builder.py +++ b/tensor2tensor/utils/input_fn_builder.py @@ -30,12 +30,13 @@ def build_input_fn(mode, hparams, - data_file_patterns=None, + data_dir=None, num_datashards=None, fixed_problem=None, worker_replicas=None, worker_id=None, - batch_size=None): + batch_size=None, + dataset_split=None): """Provides input to the graph, either from disk or via a placeholder. This function produces an input function that will feed data into @@ -50,11 +51,7 @@ def build_input_fn(mode, Args: mode: The execution mode, as defined in tf.estimator.ModeKeys. hparams: HParams object. - data_file_patterns: The list of file patterns to use to read in data. Set to - `None` if you want to create a placeholder for the input data. The - `problems` flag is a list of problem names joined by the `-` character. - The flag's string is then split along the `-` and each problem gets its - own example queue. + data_dir: directory with input data. num_datashards: An integer. fixed_problem: An integer indicating the problem to fetch data for, or None if the input is to be randomly selected. @@ -63,6 +60,8 @@ def build_input_fn(mode, worker_id: int, id of this worker replica. Used in multiproblem setting with hparams.problem_choice == distributed. batch_size: int, if provided, will use a fixed batch size. + dataset_split: tf.estimator.ModeKeys + ["test"], which split of the dataset + to use. Defaults to mode. Returns: A function that returns a dictionary of features and the target labels. @@ -91,16 +90,15 @@ def input_fn(): continue problem_instance = hparams.problem_instances[problem_idx] p_hparams = hparams.problems[problem_idx] - problem_filepatterns = (data_file_patterns and - data_file_patterns[problem_idx]) feature_map = features_for_problem( problem_instance, p_hparams, hparams, - problem_filepatterns, + data_dir, num_datashards, mode, batch_size=batch_size, + dataset_split=dataset_split, name="problem_%d" % problem_idx) problem_batches.append(feature_map) @@ -211,10 +209,11 @@ def create_threads(self, sess, coord=None, daemon=False, start=False): def features_for_problem(problem_instance, p_hparams, hparams, - data_filepatterns, + data_dir, num_datashards, mode, batch_size=None, + dataset_split=None, name="problem_inputs"): """Feature map for Problem.""" with tf.name_scope(name): @@ -231,8 +230,13 @@ def features_for_problem(problem_instance, batching_scheme["batch_sizes"] = [batch_size] batching_scheme["boundaries"] = [] feature_map = data_reader.input_pipeline( - problem_instance, data_filepatterns, capacity, mode, hparams, - batching_scheme) + problem_instance, + data_dir, + capacity, + mode, + hparams, + batching_scheme, + dataset_split=dataset_split) # Reverse inputs and targets features if the problem was reversed. if problem_instance is not None: diff --git a/tensor2tensor/utils/trainer_utils.py b/tensor2tensor/utils/trainer_utils.py index 1157bfb2f..0355ffcbf 100644 --- a/tensor2tensor/utils/trainer_utils.py +++ b/tensor2tensor/utils/trainer_utils.py @@ -187,8 +187,7 @@ def create_experiment_components(data_dir, model_name, hparams, run_config): train_input_fn = input_fn_builder.build_input_fn( mode=tf.estimator.ModeKeys.TRAIN, hparams=hparams, - data_file_patterns=get_data_filepatterns(data_dir, - tf.estimator.ModeKeys.TRAIN), + data_dir=data_dir, num_datashards=num_datashards, worker_replicas=FLAGS.worker_replicas, worker_id=FLAGS.worker_id) @@ -196,12 +195,11 @@ def create_experiment_components(data_dir, model_name, hparams, run_config): eval_input_fn = input_fn_builder.build_input_fn( mode=tf.estimator.ModeKeys.EVAL, hparams=hparams, - data_file_patterns=get_data_filepatterns(data_dir, "test" - if FLAGS.eval_use_test_set else - tf.estimator.ModeKeys.EVAL), + data_dir=data_dir, num_datashards=num_datashards, worker_replicas=FLAGS.worker_replicas, - worker_id=FLAGS.worker_id) + worker_id=FLAGS.worker_id, + dataset_split="test" if FLAGS.eval_use_test_set else None) model_fn = model_builder.build_model_fn( model_name, @@ -396,7 +394,3 @@ def session_config(): gpu_options=gpu_options, log_device_placement=FLAGS.log_device_placement) return config - - -def get_data_filepatterns(data_dir, mode): - return data_reader.get_data_filepatterns(FLAGS.problems, data_dir, mode) diff --git a/tensor2tensor/visualization/TransformerVisualization.ipynb b/tensor2tensor/visualization/TransformerVisualization.ipynb index bf0a269d0..ca26edac1 100644 --- a/tensor2tensor/visualization/TransformerVisualization.ipynb +++ b/tensor2tensor/visualization/TransformerVisualization.ipynb @@ -133,12 +133,10 @@ "\n", "num_datashards = utils.devices.data_parallelism().n\n", "\n", - "problems_data = utils.get_data_filepatterns(\n", - " DATA_DIR, tf.estimator.ModeKeys.EVAL)\n", "input_fn = utils.input_fn_builder.build_input_fn(\n", " mode=tf.estimator.ModeKeys.EVAL,\n", " hparams=hparams,\n", - " data_file_patterns=problems_data,\n", + " data_dir=DATA_DIR,\n", " num_datashards=num_datashards)\n", "\n", "inputs, target = input_fn()\n", From 9e6d9dac8eceaca9c9bc2bbfee80d3bc600cbf17 Mon Sep 17 00:00:00 2001 From: T2T Team <no-reply@google.com> Date: Wed, 27 Sep 2017 11:13:46 -0700 Subject: [PATCH 0435/4095] Add PICKLED_PYTHON SpaceID PiperOrigin-RevId: 170223947 --- tensor2tensor/data_generators/problem.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/tensor2tensor/data_generators/problem.py b/tensor2tensor/data_generators/problem.py index d7870fac2..8e587163a 100644 --- a/tensor2tensor/data_generators/problem.py +++ b/tensor2tensor/data_generators/problem.py @@ -92,6 +92,8 @@ class SpaceID(object): CPP_TOK = 28 # Strokes STROKES = 29 + # Pickled Python + PICKLED_PYTHON = 30 def default_model_hparams(): @@ -537,6 +539,7 @@ class Text2TextProblem(Problem): @property def is_character_level(self): + """Whether the inputs and targets are sequences of characters.""" raise NotImplementedError() @property @@ -544,7 +547,18 @@ def targeted_vocab_size(self): raise NotImplementedError() # Not needed if self.is_character_level. def generator(self, data_dir, tmp_dir, is_training): - """Generator for the training and evaluation data.""" + """Generator for the training and evaluation data. + + Args: + data_dir: The directory in which to assets, e.g. the vocab file. + tmp_dir: A scratch directory (if needed). + is_training: A boolean indicating if we should generate training data + (True) or dev set data (False). + + Yields: + dicts with keys "inputs" and "targets", with values being lists of token + ids. + """ raise NotImplementedError() @property From fc2d30680f65646a5f60323cd9688cbee4bf0d50 Mon Sep 17 00:00:00 2001 From: Etienne Pot <epot@google.com> Date: Wed, 27 Sep 2017 11:32:19 -0700 Subject: [PATCH 0436/4095] Add attention experts which use a kq based dispatcher PiperOrigin-RevId: 170227737 --- tensor2tensor/layers/common_attention.py | 172 ++++++++++++++++++++--- tensor2tensor/models/attention_lm_moe.py | 48 ++++++- 2 files changed, 201 insertions(+), 19 deletions(-) diff --git a/tensor2tensor/layers/common_attention.py b/tensor2tensor/layers/common_attention.py index 785010afd..84289b31d 100644 --- a/tensor2tensor/layers/common_attention.py +++ b/tensor2tensor/layers/common_attention.py @@ -17,14 +17,16 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -from functools import partial +import functools import math # Dependency imports import numpy as np +from six.moves import range # pylint: disable=redefined-builtin from six.moves import xrange # pylint: disable=redefined-builtin +from six.moves import zip # pylint: disable=redefined-builtin from tensor2tensor.layers import common_layers from tensor2tensor.utils import expert_utils @@ -365,6 +367,30 @@ def attention_bias_proximal(length): return tf.expand_dims(tf.expand_dims(-tf.log(1 + tf.abs(diff)), 0), 0) +@expert_utils.add_name_scope() +def attention_bias_coordinates(batch_coordinate): + """Generate a mask to prevent the batch to attend to each others. + + Args: + batch_coordinate (tf.Tensor): int32 of shape [length, 1] containing the + coordinates of the batches + + Returns: + tf.Tensor: float32 mask of shape [length, length] containing either 0 or + -infinity (-1e9) + """ + batch_coord_float = tf.squeeze(batch_coordinate, 1) + # Convert to float first because of b/25387198 + batch_coord_float = tf.to_float(batch_coord_float) + bc_v = tf.expand_dims(batch_coord_float, 1) + bc_h = tf.expand_dims(batch_coord_float, 0) + bias_batch = bc_v - bc_h # Broadcast to create [length, length] mask + # Theshold non zeros to 1.0 + bias_batch = tf.minimum(1.0, tf.abs(bias_batch)) + bias_batch *= -1e9 # Set non zeros to -infinity + return bias_batch + + def split_last_dimension(x, n): """Reshape x so that the last dimension becomes two dimensions. @@ -1181,7 +1207,8 @@ def multihead_attention(query_antecedent, q_padding="VALID", kv_padding="VALID", cache=None, - name=None): + name=None, + **kwargs): """Multihead scaled-dot-product attention with input/output transformations. Args: @@ -1198,8 +1225,9 @@ def multihead_attention(query_antecedent, when using dot_product_relative attention. image_shapes: optional tuple of integer scalars. see comments for attention_image_summary() - attention_type: a string, either "dot_product" or "local_mask_right" or - "local_unmasked" + attention_type: a string, either "dot_product", "local_mask_right", + "local_unmasked" or any attention function with the + signature (q, k, v, **kwargs) block_length: an integer - relevant for "local_mask_right" block_width: an integer - relevant for "local_unmasked" q_filter_width: An integer specifying how wide you want the query to be. @@ -1214,6 +1242,7 @@ def multihead_attention(query_antecedent, 'k' [batch_size, 0, key_channels] 'v' [batch_size, 0, value_channels] name: an optional string + **kwargs (dict): Params for the attention function Caching: WARNING: For decoder self-attention, i.e. when memory_antecedent == None, @@ -1264,7 +1293,9 @@ def multihead_attention(query_antecedent, v = split_heads(v, num_heads) key_depth_per_head = total_key_depth // num_heads q *= key_depth_per_head**-0.5 - if attention_type == "dot_product": + if callable(attention_type): # Generic way to extend multihead_attention + x = attention_type(q, k, v, **kwargs) + elif attention_type == "dot_product": x = dot_product_attention(q, k, v, bias, dropout_rate, image_shapes) elif attention_type == "dot_product_relative": x = dot_product_attention_relative(q, k, v, bias, max_relative_position, @@ -1553,16 +1584,7 @@ def length_not_null(x, batch_coordinate): """Branch of the graph only evaluated when length isn't null.""" # Mask between the sequences (not used if map_ids is used) - with tf.name_scope("expert_mask"): - batch_coord_float = tf.squeeze(batch_coordinate, 1) - # Convert to float first because of b/25387198 - batch_coord_float = tf.to_float(batch_coord_float) - bc_v = tf.expand_dims(batch_coord_float, 1) - bc_h = tf.expand_dims(batch_coord_float, 0) - bias_batch = bc_v - bc_h # Broadcast to create [length, length] mask - # Theshold non zeros to 1.0 - bias_batch = tf.minimum(1.0, tf.abs(bias_batch)) - bias_batch *= -1e9 # Set non zeros to -infinity + bias_batch = attention_bias_coordinates(batch_coordinate) def add_or_set_if(prev_bias, new_bias, condition): """Add the bias together while concidering the None case.""" @@ -1581,11 +1603,11 @@ def mask_and_call_attention(x): bias_past = tf.reshape( attention_bias_lower_triangle(length), [length, length]) # bias has shape [length, length] - bias_past = tf.reshape(bias_past, [1, 1, length, length]) bias = None bias = add_or_set_if(bias, bias_past, mask_right) bias = add_or_set_if(bias, bias_batch, not split_batch) + bias = tf.reshape(bias, [1, 1, length, length]) return multihead_attention( x, @@ -1658,7 +1680,7 @@ def local_expert_attention( return expert_utils.local_moe( x, train, - partial(self_attention_expert, **kwargs), + functools.partial(self_attention_expert, **kwargs), attention_num_experts, k=k, loss_coef=loss_coef, @@ -1668,6 +1690,118 @@ def local_expert_attention( ) +@expert_utils.add_name_scope() +def sparse_dot_product_attention(q, k, v, bc, loss_proxy, experts_params): + """Sparse multihead self attention. + + Perform an approximation of the full multihead attention by dispatching + the tokens using their keys/values. Thus the attention matrix are only + computed each times on a subset of the tokens. + + Notes: + * The function don't perform scaling here (multihead_attention does + the /sqrt(depth)). + * The padding should have been removed (so batch size should be 1 but length + contains the elements from all different batches) + * Right now, only self attention is supported so length_q and length_kv + should be identical and the function will add triangular mask. + * The bias is added inside this function to prevent attention to the future. + + Args: + q (tf.Tensor): Queries of shape [1, heads, length_q, depth_k] + k (tf.Tensor): Keys of shape [1, heads, length_q, depth_k] + v (tf.Tensor): Values of shape [1, heads, length_kv, depth_v] + bc (tf.Tensor): Batch coordinates of shape [1, length_q, 1] + loss_proxy (CacheValue): Object containing the expert loss + experts_params (dict): Additional params for the local expert + + Returns: + tf.Tensor: Approximation of Softmax(Q.K) * V, of shape + [1, heads, length_q, depth_v] + """ + + assert q.get_shape().as_list()[0] == 1 + assert k.get_shape().as_list()[0] == 1 + assert v.get_shape().as_list()[0] == 1 + + @expert_utils.add_name_scope() + def unpack_heads(x): + # Flatten the batch. squeeze works because batch_size = 1 (otherwise could + # use tf.transpose and flatten after unpacking) + x = tf.squeeze(x, axis=0) + list_x = tf.unstack(x) + return list_x # list[tf.Tensor(shape=[batch * length, depth])] + + bc = tf.squeeze(bc, axis=0) + list_q = unpack_heads(q) + list_k = unpack_heads(k) + list_v = unpack_heads(v) + + @expert_utils.add_name_scope() + def expert_dot_product(x, q, k, v, bc): + """Perform dot product on a subset of the sequence. + + Args: + x (tf.Tensor): Unused but forwarded by local_moe + q (tf.Tensor): Queries of shape [length_expert, depth_k] + k (tf.Tensor): Queries of shape [length_expert, depth_k] + v (tf.Tensor): Queries of shape [length_expert, depth_v] + bc (tf.Tensor): Batch coordinates of shape [length_expert, 1] + + Returns: + tf.Tensor: dot product attention output ([length_expert, depth_v]) + """ + length = tf.shape(x)[0] + + # Mask between the sequences + bias_batch = attention_bias_coordinates(bc) + # Mask to prevent sequences of attenting to the future + bias_past = tf.reshape( + attention_bias_lower_triangle(length), [length, length]) + bias = bias_batch + bias_past # bias has shape [length, length] + bias = tf.reshape(bias, [1, 1, length, length]) + + # Restore batch and head dimension + q, k, v = [tf.expand_dims(tf.expand_dims(t, 0), 0) for t in (q, k, v)] + # Softmax(Q.K)*V + v_out = dot_product_attention(q, k, v, bias=bias) + # Remove batch and head dimension + v_out = tf.squeeze(v_out, axis=0) + v_out = tf.squeeze(v_out, axis=0) + return v_out + + list_v_out = [] + for q, k, v in zip(list_q, list_k, list_v): + # Each head get its own dispatcher + + # TODO(epot): Choose which dispatcher use here on the k/q pair (either + # noisy_top_k_gating or Locality-sensitive hashing) + + # Concatenate along the depth axis + x = tf.concat([q, k], axis=-1) # Works because q and k lengths are the same + + # Compute the attention on the sparse tokens + v_out, loss = expert_utils.local_moe( + x=x, + expert_fn=expert_dot_product, + additional_dispatch_params=dict( + q=q, + k=k, + v=v, + bc=bc + ), + **experts_params + ) + list_v_out.append(v_out) + # Hack: Forward the loss by by-passing multihead_attention + loss_proxy.value += loss + + # Restore original shape as expected by multihead_attention + v_out = tf.stack(list_v_out) # Merge heads + v_out = tf.expand_dims(v_out, axis=0) + return v_out + + def scaled_dot_product_attention_simple(q, k, v, bias, name=None): """scaled dot-product attention. One head. One spatial dimension. @@ -1813,3 +1947,7 @@ def forward_fn(x, wqkv, wo, attention_bias, norm_scale, norm_bias): y = forward_fn(x, wqkv, wo, bias, norm_scale, norm_bias) y.set_shape(x.get_shape()) return y + + +multihead_attention_sparse_dot_prod = functools.partial( + multihead_attention, attention_type=sparse_dot_product_attention) diff --git a/tensor2tensor/models/attention_lm_moe.py b/tensor2tensor/models/attention_lm_moe.py index 0c114f948..ef04e7fa7 100644 --- a/tensor2tensor/models/attention_lm_moe.py +++ b/tensor2tensor/models/attention_lm_moe.py @@ -50,6 +50,7 @@ class AttentionType(object): LOCAL_EXPERTS = "local_experts" GLOBAL_MOE = "global_experts" MEMORY_EFFICIENT = "memory_efficient" + SPARSE_MULTIHEAD = "sparse_multihead" @staticmethod def get_choices(): @@ -57,6 +58,7 @@ def get_choices(): AttentionType.MULTIHEAD, AttentionType.LOCAL_EXPERTS, AttentionType.MEMORY_EFFICIENT, + AttentionType.SPARSE_MULTIHEAD, ] @@ -64,6 +66,7 @@ def get_choices(): "h": AttentionType.MULTIHEAD, # multi-Head "e": AttentionType.LOCAL_EXPERTS, # Experts "m": AttentionType.MEMORY_EFFICIENT, # Memory + "s": AttentionType.SPARSE_MULTIHEAD, # Sparse } @@ -187,6 +190,35 @@ def print_shape(x, suffix, debug=False): attention_type=("local_mask_right" if hparams.attention_local else "dot_product"), name="decoder_self_attention") + elif attention_type == AttentionType.SPARSE_MULTIHEAD: + x_in = preprocess(x) + x_in = dp_remove_pad(x_in) + # loss_proxies will be dispatched by dp + loss_proxies = [CacheValue(0.0) for _ in range(dp.n)] + y = dp( + common_attention.multihead_attention_sparse_dot_prod, + x_in, + None, + None, # Bias is computed inside + hparams.attention_key_channels or hparams.hidden_size, + hparams.attention_value_channels or hparams.hidden_size, + hparams.hidden_size, + hparams.num_heads, + hparams.attention_dropout, + + # Additional parameters + bc=batch_coordinate, + loss_proxy=loss_proxies, # Contains the additional expert loss + experts_params=dict( + train=hparams.mode == ModeKeys.TRAIN, + num_experts=hparams.attention_num_experts, + k=hparams.attention_moe_k, + ), + ) + y = dp_restore_pad(y) + + # TODO(avaswani, epot, noam): Do we need to divide by num shards ? + extra_loss += tf.add_n([l.value for l in loss_proxies]) / dp.n elif attention_type == AttentionType.MEMORY_EFFICIENT: assert hparams.layer_preprocess_sequence == "n" y = dp( @@ -278,6 +310,9 @@ def attention_lm_moe_prepare_decoder(targets, hparams): """ targets_pad_mask = common_attention.embedding_to_padding(targets) with tf.name_scope("pad_remover"): + # Because of the shift_right, the <eos> token will be concidered as + # padding. In practice, it doesn't really matter, due to the triangular + # mask, this token should never be attended. pad_remover = expert_utils.PadRemover(targets_pad_mask) if hparams.prepend_mode == "prepend_inputs_full_attention": @@ -286,8 +321,6 @@ def attention_lm_moe_prepare_decoder(targets, hparams): else: decoder_self_attention_bias = ( common_attention.attention_bias_lower_triangle(tf.shape(targets)[1])) - # TODO(epot): The padding remover should take into account that the input is - # shifted. decoder_input = common_layers.shift_right_3d(targets) if hparams.pos == "timing": decoder_input = common_attention.add_timing_signal_1d(decoder_input) @@ -418,6 +451,17 @@ def restore_pad(x, ref_x, pad_remover, mode): return x +class CacheValue(object): + """Class allowing to share variable between functions. + + Avoid having the function to return the variables as it the object can be + passed and shared by reference. + """ + + def __init__(self, value): + self.value = value + + @registry.register_hparams def attention_lm_moe_base(): """Set of hyperparameters. From 80998844b4523c5a7673e7f5a6a22a81ab99e588 Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Wed, 27 Sep 2017 15:01:41 -0700 Subject: [PATCH 0437/4095] Fix lm1b data generator PiperOrigin-RevId: 170257440 --- tensor2tensor/data_generators/lm1b.py | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/tensor2tensor/data_generators/lm1b.py b/tensor2tensor/data_generators/lm1b.py index da6dd92af..d3bcec527 100644 --- a/tensor2tensor/data_generators/lm1b.py +++ b/tensor2tensor/data_generators/lm1b.py @@ -112,19 +112,18 @@ def _maybe_download_corpus(tmp_dir): corpus_tar.extractall(tmp_dir) -def _get_or_build_subword_text_encoder(tmp_dir, vocab_name): +def _get_or_build_subword_text_encoder(tmp_dir, vocab_filepath): """Builds a SubwordTextEncoder based on the corpus. Args: tmp_dir: directory containing dataset. - vocab_name: name of vocab file. + vocab_filepath: path to store (or load) vocab. Returns: a SubwordTextEncoder. """ - filepath = os.path.join(tmp_dir, vocab_name) - if tf.gfile.Exists(filepath): - return text_encoder.SubwordTextEncoder(filepath) + if tf.gfile.Exists(vocab_filepath): + return text_encoder.SubwordTextEncoder(vocab_filepath) _maybe_download_corpus(tmp_dir) original_vocab = _original_vocab(tmp_dir) token_counts = defaultdict(int) @@ -140,7 +139,7 @@ def _get_or_build_subword_text_encoder(tmp_dir, vocab_name): break ret = text_encoder.SubwordTextEncoder() ret.build_from_token_counts(token_counts, min_count=5) - ret.store_to_file(filepath) + ret.store_to_file(vocab_filepath) return ret @@ -186,13 +185,13 @@ def targeted_vocab_size(self): def use_train_shards_for_dev(self): return True - def generator(self, tmp_dir, train, characters=False): + def generator(self, data_dir, tmp_dir, is_training): """Generator for lm1b sentences. Args: - tmp_dir: a string. - train: a boolean. - characters: a boolean + data_dir: data dir. + tmp_dir: tmp dir. + is_training: a boolean. Yields: A dictionary {"inputs": [0], "targets": [<subword ids>]} @@ -200,11 +199,12 @@ def generator(self, tmp_dir, train, characters=False): _maybe_download_corpus(tmp_dir) original_vocab = _original_vocab(tmp_dir) files = (_train_data_filenames(tmp_dir) - if train else [_dev_data_filename(tmp_dir)]) - if characters: + if is_training else [_dev_data_filename(tmp_dir)]) + if self.is_character_level: encoder = text_encoder.ByteTextEncoder() else: - encoder = _get_or_build_subword_text_encoder(tmp_dir, self.vocab_file) + vocab_filepath = os.path.join(data_dir, self.vocab_file) + encoder = _get_or_build_subword_text_encoder(tmp_dir, vocab_filepath) for filepath in files: tf.logging.info("filepath = %s", filepath) for line in tf.gfile.Open(filepath): From f0938a399d5f7568d3c890759b76732e53b41206 Mon Sep 17 00:00:00 2001 From: Etienne Pot <epot@google.com> Date: Wed, 27 Sep 2017 15:13:33 -0700 Subject: [PATCH 0438/4095] multihead_attention can return additional value PiperOrigin-RevId: 170259587 --- tensor2tensor/layers/common_attention.py | 18 +++++++++++++----- tensor2tensor/models/attention_lm_moe.py | 18 ++---------------- 2 files changed, 15 insertions(+), 21 deletions(-) diff --git a/tensor2tensor/layers/common_attention.py b/tensor2tensor/layers/common_attention.py index 84289b31d..6d43ab3ab 100644 --- a/tensor2tensor/layers/common_attention.py +++ b/tensor2tensor/layers/common_attention.py @@ -1258,6 +1258,8 @@ def multihead_attention(query_antecedent, [batch_size, length_q, hidden_dim] unless the cache dict is provided in which case only the last memory position is calculated and the output shape is [batch_size, 1, hidden_dim] + Optionnaly return an additional loss parameters (ex: load balance loss for + the experts) returned by the attention_type function Raises: ValueError: if the key depth or value depth are not divisible by the @@ -1293,8 +1295,12 @@ def multihead_attention(query_antecedent, v = split_heads(v, num_heads) key_depth_per_head = total_key_depth // num_heads q *= key_depth_per_head**-0.5 + + additional_returned_value = None if callable(attention_type): # Generic way to extend multihead_attention x = attention_type(q, k, v, **kwargs) + if isinstance(x, tuple): + x, additional_returned_value = x # Unpack elif attention_type == "dot_product": x = dot_product_attention(q, k, v, bias, dropout_rate, image_shapes) elif attention_type == "dot_product_relative": @@ -1308,6 +1314,9 @@ def multihead_attention(query_antecedent, q, k, v, block_length=block_length, filter_width=block_width) x = combine_heads(x) x = common_layers.conv1d(x, output_depth, 1, name="output_transform") + + if additional_returned_value is not None: + return x, additional_returned_value return x @@ -1691,7 +1700,7 @@ def local_expert_attention( @expert_utils.add_name_scope() -def sparse_dot_product_attention(q, k, v, bc, loss_proxy, experts_params): +def sparse_dot_product_attention(q, k, v, bc, experts_params): """Sparse multihead self attention. Perform an approximation of the full multihead attention by dispatching @@ -1712,7 +1721,6 @@ def sparse_dot_product_attention(q, k, v, bc, loss_proxy, experts_params): k (tf.Tensor): Keys of shape [1, heads, length_q, depth_k] v (tf.Tensor): Values of shape [1, heads, length_kv, depth_v] bc (tf.Tensor): Batch coordinates of shape [1, length_q, 1] - loss_proxy (CacheValue): Object containing the expert loss experts_params (dict): Additional params for the local expert Returns: @@ -1771,6 +1779,7 @@ def expert_dot_product(x, q, k, v, bc): return v_out list_v_out = [] + total_loss = 0.0 for q, k, v in zip(list_q, list_k, list_v): # Each head get its own dispatcher @@ -1793,13 +1802,12 @@ def expert_dot_product(x, q, k, v, bc): **experts_params ) list_v_out.append(v_out) - # Hack: Forward the loss by by-passing multihead_attention - loss_proxy.value += loss + total_loss += loss # Restore original shape as expected by multihead_attention v_out = tf.stack(list_v_out) # Merge heads v_out = tf.expand_dims(v_out, axis=0) - return v_out + return v_out, total_loss / len(list_v_out) def scaled_dot_product_attention_simple(q, k, v, bias, name=None): diff --git a/tensor2tensor/models/attention_lm_moe.py b/tensor2tensor/models/attention_lm_moe.py index ef04e7fa7..3a5b73a3e 100644 --- a/tensor2tensor/models/attention_lm_moe.py +++ b/tensor2tensor/models/attention_lm_moe.py @@ -193,9 +193,7 @@ def print_shape(x, suffix, debug=False): elif attention_type == AttentionType.SPARSE_MULTIHEAD: x_in = preprocess(x) x_in = dp_remove_pad(x_in) - # loss_proxies will be dispatched by dp - loss_proxies = [CacheValue(0.0) for _ in range(dp.n)] - y = dp( + y, loss_experts = dp( common_attention.multihead_attention_sparse_dot_prod, x_in, None, @@ -208,7 +206,6 @@ def print_shape(x, suffix, debug=False): # Additional parameters bc=batch_coordinate, - loss_proxy=loss_proxies, # Contains the additional expert loss experts_params=dict( train=hparams.mode == ModeKeys.TRAIN, num_experts=hparams.attention_num_experts, @@ -218,7 +215,7 @@ def print_shape(x, suffix, debug=False): y = dp_restore_pad(y) # TODO(avaswani, epot, noam): Do we need to divide by num shards ? - extra_loss += tf.add_n([l.value for l in loss_proxies]) / dp.n + extra_loss += tf.add_n(loss_experts) / dp.n elif attention_type == AttentionType.MEMORY_EFFICIENT: assert hparams.layer_preprocess_sequence == "n" y = dp( @@ -451,17 +448,6 @@ def restore_pad(x, ref_x, pad_remover, mode): return x -class CacheValue(object): - """Class allowing to share variable between functions. - - Avoid having the function to return the variables as it the object can be - passed and shared by reference. - """ - - def __init__(self, value): - self.value = value - - @registry.register_hparams def attention_lm_moe_base(): """Set of hyperparameters. From ba98d3b43fce1bad4ebb291d7614e6d23ab8ef91 Mon Sep 17 00:00:00 2001 From: T2T Team <no-reply@google.com> Date: Wed, 27 Sep 2017 18:04:54 -0700 Subject: [PATCH 0439/4095] Call old slow decoding when fetching logits. PiperOrigin-RevId: 170281924 --- tensor2tensor/utils/t2t_model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensor2tensor/utils/t2t_model.py b/tensor2tensor/utils/t2t_model.py index 3fc110ebf..72e2ea602 100644 --- a/tensor2tensor/utils/t2t_model.py +++ b/tensor2tensor/utils/t2t_model.py @@ -162,7 +162,7 @@ def eval_autoregressive(self, losses: a dictionary: {loss-name (string): floating point `Scalar`}. Contains a single key "training". """ - _, logits, losses = self._greedy_infer( + _, logits, losses = self._slow_greedy_infer( features, decode_length=decode_length, last_position_only=last_position_only) From 705e96ba665fcec9db6b9890e3701a7da09a616a Mon Sep 17 00:00:00 2001 From: T2T Team <no-reply@google.com> Date: Thu, 28 Sep 2017 09:49:15 -0700 Subject: [PATCH 0440/4095] Adds dummy all_problems_test to tensor2tensor PiperOrigin-RevId: 170356180 --- .../data_generators/all_problems_test.py | 36 +++++++++++++++++++ 1 file changed, 36 insertions(+) create mode 100644 tensor2tensor/data_generators/all_problems_test.py diff --git a/tensor2tensor/data_generators/all_problems_test.py b/tensor2tensor/data_generators/all_problems_test.py new file mode 100644 index 000000000..de84a0bf3 --- /dev/null +++ b/tensor2tensor/data_generators/all_problems_test.py @@ -0,0 +1,36 @@ +# coding=utf-8 +# Copyright 2017 The Tensor2Tensor Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for Tensor2Tensor's all_problems.py.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +# Dependency imports +from tensor2tensor.data_generators import all_problems + +import tensorflow as tf + + +class AllProblemsTest(tf.test.TestCase): + + def testImport(self): + """Make sure that importing all_problems doesn't break.""" + self.assertIsNotNone(all_problems) + + +if __name__ == '__main__': + tf.test.main() From b5c0201b0d0b5243e118e0054a0610f78fb546bd Mon Sep 17 00:00:00 2001 From: T2T Team <no-reply@google.com> Date: Thu, 28 Sep 2017 11:40:00 -0700 Subject: [PATCH 0441/4095] Add an option to use simple fixed batch scheme for training by turning on hparams.use_fixed_batch_size PiperOrigin-RevId: 170374297 --- tensor2tensor/layers/common_hparams.py | 3 +++ tensor2tensor/utils/input_fn_builder.py | 2 ++ tensor2tensor/utils/trainer_utils.py | 6 ++++-- 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/tensor2tensor/layers/common_hparams.py b/tensor2tensor/layers/common_hparams.py index deae14ddc..d3ebfdffe 100644 --- a/tensor2tensor/layers/common_hparams.py +++ b/tensor2tensor/layers/common_hparams.py @@ -33,6 +33,9 @@ def basic_params1(): """A set of basic hyperparameters.""" return tf.contrib.training.HParams( batch_size=4096, # in tokens per batch per gpu + # Fixed batch size turns off bucketing during training mode + # and uses batch_size as minibatch size (use small batch_size<=32) + use_fixed_batch_size=int(False), num_hidden_layers=4, kernel_height=3, kernel_width=1, diff --git a/tensor2tensor/utils/input_fn_builder.py b/tensor2tensor/utils/input_fn_builder.py index 258213889..06a35f589 100644 --- a/tensor2tensor/utils/input_fn_builder.py +++ b/tensor2tensor/utils/input_fn_builder.py @@ -229,6 +229,8 @@ def features_for_problem(problem_instance, # If batch_size is fixed, use a single input bucket batching_scheme["batch_sizes"] = [batch_size] batching_scheme["boundaries"] = [] + # Log new batching scheme if updated + tf.logging.info("Updated batching_scheme = %s", batching_scheme) feature_map = data_reader.input_pipeline( problem_instance, data_dir, diff --git a/tensor2tensor/utils/trainer_utils.py b/tensor2tensor/utils/trainer_utils.py index 0355ffcbf..a3260d3ae 100644 --- a/tensor2tensor/utils/trainer_utils.py +++ b/tensor2tensor/utils/trainer_utils.py @@ -182,7 +182,8 @@ def create_experiment_components(data_dir, model_name, hparams, run_config): run_config.model_dir) hparams = add_problem_hparams(hparams, FLAGS.problems) - + # hparams batch_size is used as minibatch size instead of tokens in batch + batch_size = (hparams.use_fixed_batch_size and hparams.batch_size) or None num_datashards = devices.data_parallelism().n train_input_fn = input_fn_builder.build_input_fn( mode=tf.estimator.ModeKeys.TRAIN, @@ -190,7 +191,8 @@ def create_experiment_components(data_dir, model_name, hparams, run_config): data_dir=data_dir, num_datashards=num_datashards, worker_replicas=FLAGS.worker_replicas, - worker_id=FLAGS.worker_id) + worker_id=FLAGS.worker_id, + batch_size=batch_size) eval_input_fn = input_fn_builder.build_input_fn( mode=tf.estimator.ModeKeys.EVAL, From 8c78b620370cf3b51098b2844e243893fc3275ec Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Thu, 28 Sep 2017 15:04:04 -0700 Subject: [PATCH 0442/4095] Default name in layer_prepostprocess PiperOrigin-RevId: 170403616 --- tensor2tensor/bin/t2t-decoder | 2 +- tensor2tensor/layers/common_layers.py | 18 +++++++++++++----- tensor2tensor/utils/metrics.py | 20 +++++--------------- tensor2tensor/utils/model_builder.py | 6 +++--- tensor2tensor/utils/trainer_utils.py | 5 ++--- tensor2tensor/utils/trainer_utils_test.py | 4 ++-- 6 files changed, 26 insertions(+), 29 deletions(-) diff --git a/tensor2tensor/bin/t2t-decoder b/tensor2tensor/bin/t2t-decoder index dce12c23c..ff143f5d4 100644 --- a/tensor2tensor/bin/t2t-decoder +++ b/tensor2tensor/bin/t2t-decoder @@ -75,7 +75,7 @@ def main(_): hparams = trainer_utils.create_hparams( FLAGS.hparams_set, data_dir, passed_hparams=FLAGS.hparams) - hparams = trainer_utils.add_problem_hparams(hparams, FLAGS.problems) + trainer_utils.add_problem_hparams(hparams, FLAGS.problems) estimator, _ = trainer_utils.create_experiment_components( data_dir=data_dir, model_name=FLAGS.model, diff --git a/tensor2tensor/layers/common_layers.py b/tensor2tensor/layers/common_layers.py index 6554e0d31..1923a9e24 100644 --- a/tensor2tensor/layers/common_layers.py +++ b/tensor2tensor/layers/common_layers.py @@ -498,8 +498,15 @@ def apply_norm(x, norm_type, depth, epsilon): "'noam', 'none'.") -def layer_prepostprocess(previous_value, x, sequence, dropout_rate, norm_type, - depth, epsilon, name): +def layer_prepostprocess(previous_value, + x, + sequence, + dropout_rate, + norm_type, + depth, + epsilon, + default_name, + name=None): """Apply a sequence of functions to the input or output of a layer. The sequence is specified as a string which may contain the following @@ -519,12 +526,13 @@ def layer_prepostprocess(previous_value, x, sequence, dropout_rate, norm_type, norm_type: a string (see apply_norm()) depth: an integer (size of last dimension of x). epsilon: a float (parameter for normalization) + default_name: a string name: a string Returns: a Tensor """ - with tf.variable_scope(name): + with tf.variable_scope(name, default_name=default_name): if sequence == "none": return x for c in sequence: @@ -569,7 +577,7 @@ def layer_preprocess(layer_input, hparams): norm_type=hparams.norm_type, depth=hparams.hidden_size, epsilon=hparams.norm_epsilon, - name="layer_prepostprocess") + default_name="layer_prepostprocess") def layer_postprocess(layer_input, layer_output, hparams): @@ -602,7 +610,7 @@ def layer_postprocess(layer_input, layer_output, hparams): norm_type=hparams.norm_type, depth=hparams.hidden_size, epsilon=hparams.norm_epsilon, - name="layer_postprocess") + default_name="layer_postprocess") def conv_block_internal(conv_fn, diff --git a/tensor2tensor/utils/metrics.py b/tensor2tensor/utils/metrics.py index 2f469cbf0..56ac17f38 100644 --- a/tensor2tensor/utils/metrics.py +++ b/tensor2tensor/utils/metrics.py @@ -234,21 +234,11 @@ def wrapped_metric_fn(): eval_metrics = dict() for problem_idx, (problem_name, problem_instance) in enumerate(problems): - if problem_instance is None: - # For problems in problem_hparams - metrics = [ - Metrics.ACC, Metrics.ACC_TOP5, Metrics.ACC_PER_SEQ, - Metrics.NEG_LOG_PERPLEXITY - ] - if "wmt" in problem_name: - metrics.append(Metrics.APPROX_BLEU) - else: - # For registered Problems - metrics = problem_instance.eval_metrics() - if not all([m in METRICS_FNS for m in metrics]): - raise ValueError("Unrecognized metric. Problem %s specified metrics " - "%s. Recognized metrics are %s." % - (problem_name, metrics, METRICS_FNS.keys())) + metrics = problem_instance.eval_metrics() + if not all([m in METRICS_FNS for m in metrics]): + raise ValueError("Unrecognized metric. Problem %s specified metrics " + "%s. Recognized metrics are %s." % + (problem_name, metrics, METRICS_FNS.keys())) class_output = "image" in problem_name and "coco" not in problem_name real_output = "gene_expression" in problem_name diff --git a/tensor2tensor/utils/model_builder.py b/tensor2tensor/utils/model_builder.py index 6e0b32b13..370104907 100644 --- a/tensor2tensor/utils/model_builder.py +++ b/tensor2tensor/utils/model_builder.py @@ -213,7 +213,7 @@ def nth_model(n): assert mode == tf.estimator.ModeKeys.TRAIN # Set learning rate - learning_rate = hparams.learning_rate * _learning_rate_decay( + learning_rate = hparams.learning_rate * learning_rate_decay( hparams, num_worker_replicas=worker_replicas, num_train_steps=train_steps) learning_rate /= math.sqrt(float(worker_replicas)) @@ -429,11 +429,11 @@ def _get_variable_initializer(hparams): raise ValueError("Unrecognized initializer: %s" % hparams.initializer) -def _learning_rate_decay(hparams, num_worker_replicas=1, num_train_steps=1): +def learning_rate_decay(hparams, num_worker_replicas=1, num_train_steps=1): """Inverse-decay learning rate until warmup_steps, then decay.""" warmup_steps = tf.to_float( hparams.learning_rate_warmup_steps * num_worker_replicas) - step = tf.to_float(tf.contrib.framework.get_global_step()) + step = tf.to_float(tf.train.get_or_create_global_step()) if hparams.learning_rate_decay_scheme == "noam": return 5000.0 * hparams.hidden_size**-0.5 * tf.minimum( (step + 1) * warmup_steps**-1.5, (step + 1)**-0.5) diff --git a/tensor2tensor/utils/trainer_utils.py b/tensor2tensor/utils/trainer_utils.py index a3260d3ae..3bb422c39 100644 --- a/tensor2tensor/utils/trainer_utils.py +++ b/tensor2tensor/utils/trainer_utils.py @@ -181,7 +181,8 @@ def create_experiment_components(data_dir, model_name, hparams, run_config): tf.logging.info("Creating experiment, storing model files in %s", run_config.model_dir) - hparams = add_problem_hparams(hparams, FLAGS.problems) + add_problem_hparams(hparams, FLAGS.problems) + # hparams batch_size is used as minibatch size instead of tokens in batch batch_size = (hparams.use_fixed_batch_size and hparams.batch_size) or None num_datashards = devices.data_parallelism().n @@ -248,8 +249,6 @@ def add_problem_hparams(hparams, problems): hparams.problem_instances.append(problem) hparams.problems.append(p_hparams) - return hparams - def save_metadata(output_dir, hparams): """Saves FLAGS and hparams to output_dir.""" diff --git a/tensor2tensor/utils/trainer_utils_test.py b/tensor2tensor/utils/trainer_utils_test.py index 16a8149f4..d8dee3986 100644 --- a/tensor2tensor/utils/trainer_utils_test.py +++ b/tensor2tensor/utils/trainer_utils_test.py @@ -92,7 +92,7 @@ def testSingleStep(self): model_name = "transformer" data_dir = TrainerUtilsTest.data_dir hparams = trainer_utils.create_hparams("transformer_test", data_dir) - hparams = trainer_utils.add_problem_hparams(hparams, FLAGS.problems) + trainer_utils.add_problem_hparams(hparams, FLAGS.problems) exp = trainer_utils.create_experiment( data_dir=data_dir, model_name=model_name, @@ -115,7 +115,7 @@ def testSingleEvalStepRawSession(self): # Create the problem object, hparams, placeholders, features dict. encoders = registry.problem(FLAGS.problems).feature_encoders(data_dir) hparams = trainer_utils.create_hparams(FLAGS.hparams_set, data_dir) - hparams = trainer_utils.add_problem_hparams(hparams, FLAGS.problems) + trainer_utils.add_problem_hparams(hparams, FLAGS.problems) inputs_ph = tf.placeholder(dtype=tf.int32) # Just length dimension. batch_inputs = tf.reshape(inputs_ph, [1, -1, 1, 1]) # Make it 4D. # In INFER mode targets can be None. From fb858cb1616f69be07c9550814a00d8ebf333556 Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Thu, 28 Sep 2017 15:29:00 -0700 Subject: [PATCH 0443/4095] Move tpu_trainer to open-source PiperOrigin-RevId: 170407556 --- tensor2tensor/tpu/tpu_trainer.py | 72 ++++++ tensor2tensor/tpu/tpu_trainer_lib.py | 295 ++++++++++++++++++++++ tensor2tensor/tpu/tpu_trainer_lib_test.py | 68 +++++ 3 files changed, 435 insertions(+) create mode 100644 tensor2tensor/tpu/tpu_trainer.py create mode 100644 tensor2tensor/tpu/tpu_trainer_lib.py create mode 100644 tensor2tensor/tpu/tpu_trainer_lib_test.py diff --git a/tensor2tensor/tpu/tpu_trainer.py b/tensor2tensor/tpu/tpu_trainer.py new file mode 100644 index 000000000..2c6292405 --- /dev/null +++ b/tensor2tensor/tpu/tpu_trainer.py @@ -0,0 +1,72 @@ +# coding=utf-8 +# Copyright 2017 The Tensor2Tensor Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +r"""Train on TPU. + +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +# Dependency imports + +from tensor2tensor import models # pylint: disable=unused-import +from tensor2tensor.data_generators import all_problems # pylint: disable=unused-import +from tensor2tensor.tpu import tpu_trainer_lib as lib +from tensor2tensor.utils import trainer_utils + +import tensorflow as tf + +flags = tf.flags +FLAGS = flags.FLAGS + +flags.DEFINE_integer("tpu_num_shards", 8, "Number of tpu shards.") +flags.DEFINE_string("output_dir", "", "Base output directory for run.") +flags.DEFINE_string("master", "", "Address of TensorFlow master.") +flags.DEFINE_integer("eval_steps", 10, "Number of steps in evaluation.") + + +def main(unused_argv): + tf.logging.set_verbosity(tf.logging.INFO) + tf.set_random_seed(123) + + assert len(FLAGS.problems.split("-")) == 1 + + hparams = trainer_utils.create_hparams( + FLAGS.hparams_set, FLAGS.data_dir, passed_hparams=FLAGS.hparams) + trainer_utils.add_problem_hparams(hparams, FLAGS.problems) + + problem = hparams.problem_instances[0] + + model_fn = lib.get_model_fn(FLAGS.model, hparams) + input_fn = lib.get_input_fn(FLAGS.data_dir, problem, hparams) + + estimator = lib.make_estimator( + model_fn=model_fn, + output_dir=FLAGS.output_dir, + master=FLAGS.master, + num_shards=FLAGS.tpu_num_shards, + batch_size=hparams.batch_size_per_shard * FLAGS.tpu_num_shards, + log_device_placement=FLAGS.log_device_placement) + estimator.train( + lambda params: input_fn(tf.estimator.ModeKeys.TRAIN, params), + steps=FLAGS.train_steps) + estimator.evaluate( + lambda params: input_fn(tf.estimator.ModeKeys.EVAL, params), + steps=FLAGS.eval_steps) + + +if __name__ == "__main__": + tf.app.run() diff --git a/tensor2tensor/tpu/tpu_trainer_lib.py b/tensor2tensor/tpu/tpu_trainer_lib.py new file mode 100644 index 000000000..c6bba9d41 --- /dev/null +++ b/tensor2tensor/tpu/tpu_trainer_lib.py @@ -0,0 +1,295 @@ +# coding=utf-8 +# Copyright 2017 The Tensor2Tensor Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Library for training on TPU. See tpu_trainer.py.""" + +# TODO(rsepassi): +# * Fix EVAL (breaks when loading from checkpoint) +# * Support all decoders +# * Share more code with Problem.dataset and input_pipeline +# * Support PREDICT + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import copy +import math + +# Dependency imports + +from tensor2tensor.layers import common_layers +from tensor2tensor.models import transformer +from tensor2tensor.utils import data_reader +from tensor2tensor.utils import metrics +from tensor2tensor.utils import model_builder +from tensor2tensor.utils import registry + +import tensorflow as tf + + +def get_input_fn(data_dir, problem, hparams): + """Get basic T2T input fn.""" + + def input_fn(mode, params): + """Input fn.""" + is_training = mode == tf.estimator.ModeKeys.TRAIN + num_threads = 4 if is_training else 1 + batch_size = params["batch_size"] + + data_file_patterns = [problem.filepattern(data_dir, mode)] + + batching_scheme = { + "boundaries": [], + "batch_sizes": [batch_size], + "max_length": hparams.max_length, + "window_size": batch_size, + "padded_shapes": { + "inputs": [hparams.max_length], + "targets": [hparams.max_length], + }, + } + + def decode_record(record): + """Serialized Example to dict of <feature name, Tensor>.""" + data_fields, _ = problem.example_reading_spec() + decoded = tf.parse_single_example(record, features=data_fields) + decoded["inputs"] = decoded["inputs"].values + decoded["targets"] = decoded["targets"].values + return decoded + + data_files = tf.contrib.slim.parallel_reader.get_data_files( + data_file_patterns) + dataset = tf.contrib.data.TFRecordDataset(data_files) + dataset = dataset.map(decode_record, num_threads=num_threads) + + def _preprocess(example, problem, hparams, mode): + example = problem.preprocess_example(example, mode, hparams) + # We do not want int64s as they are not supported on TPUs. + example = data_reader.cast_int64_to_int32(example) + return example + + dataset = dataset.map( + lambda ex: _preprocess(ex, problem, hparams, mode), + num_threads=num_threads) + + def _valid_size(example): + return data_reader.example_valid_size(example, + batching_scheme["max_length"]) + + dataset = dataset.filter(_valid_size) + if is_training: + dataset = dataset.shuffle(100) + dataset = dataset.repeat(None) + dataset = data_reader.padded_batch(dataset, + batching_scheme["batch_sizes"][0], + batching_scheme["padded_shapes"]) + dataset.prefetch(1) + + train_features = dataset.make_one_shot_iterator().get_next() + + inputs = train_features["inputs"] + targets = train_features["targets"] + + # Ensure inputs and targets are proper rank. + while len(inputs.get_shape()) != 4: + inputs = tf.expand_dims(inputs, axis=-1) + while len(targets.get_shape()) != 4: + targets = tf.expand_dims(targets, axis=-1) + + inputs_shape = inputs.get_shape().as_list() + inputs_shape[0] = batch_size + inputs.set_shape(inputs_shape) + targets_shape = targets.get_shape().as_list() + targets_shape[0] = batch_size + targets.set_shape(targets_shape) + + train_features["inputs"] = inputs + train_features["targets"] = targets + + return train_features, targets + + return input_fn + + +def get_model_fn(model, hp, use_tpu=True): + """Get simple T2T model fn.""" + + def model_fn(features, labels, mode, params, config): + """Model fn.""" + del params + hparams = copy.deepcopy(hp) + problem_hp = hparams.problems[0] + orig_features = features + + # Instantiate model and retrieve modalities + model_class = registry.model(model)(hparams, mode, problem_hp) + input_modality = problem_hp.input_modality["inputs"] + target_modality = problem_hp.target_modality + + # Model construction + features = { + "inputs": input_modality.bottom(features["inputs"]), + "targets": target_modality.targets_bottom(features["targets"]), + "problem_choice": tf.constant(0), + "input_space_id": tf.constant(problem_hp.input_space_id), + "target_space_id": tf.constant(problem_hp.target_space_id) + } + outputs = model_class.model_fn_body(features) + logits = target_modality.top(outputs, labels) + + # Loss + loss_num, loss_den = target_modality.loss(logits, labels) + loss = loss_num / tf.maximum(1.0, loss_den) + + if mode == tf.estimator.ModeKeys.EVAL: + problem = hp.problem_instances[0] + eval_metrics_fn = create_eval_metrics_fn(problem) + return tf.contrib.tpu.TPUEstimatorSpec( + mode, + eval_metrics=(eval_metrics_fn, [logits, orig_features["targets"]]), + loss=loss) + + assert mode == tf.estimator.ModeKeys.TRAIN + + # Learning rate + num_shards = config.tpu_config.num_shards + lr = hparams.learning_rate * model_builder.learning_rate_decay( + hparams, num_worker_replicas=num_shards) + lr /= math.sqrt(float(num_shards)) + + # Optimizer + opt_name = hparams.optimizer + if opt_name == "Momentum": + opt = tf.train.MomentumOptimizer( + lr, momentum=hparams.optimizer_momentum_momentum) + else: + if hparams.optimizer not in ["RMSProp", "SGD"]: + tf.logging.warn( + "Only Momentum, RMSProp, and SGD are known to work on TPU.") + opt = tf.contrib.layers.OPTIMIZER_CLS_NAMES[opt_name](lr) + + if use_tpu: + opt = tf.contrib.tpu.CrossShardOptimizer(opt) + + # Optimize + gradients = opt.compute_gradients(loss, tf.trainable_variables()) + if hparams.clip_grad_norm: + gradients = _clip_gradients_by_norm(gradients, hparams.clip_grad_norm) + train_op = opt.apply_gradients( + gradients, global_step=tf.train.get_or_create_global_step()) + with tf.control_dependencies([train_op]): + train_op = tf.identity(loss) + + _remove_summaries() + return tf.contrib.tpu.TPUEstimatorSpec(mode, loss=loss, train_op=train_op) + + return model_fn + + +def create_eval_metrics_fn(problem): + """Create the metrics_fn that TPUEstimatorSpec expects.""" + + def make_metric_fn(metric_fn): + + def wrapped_metric_fn(logits, labels): + num, den = metric_fn( + logits, labels, weights_fn=common_layers.weights_nonzero) + return tf.metrics.mean(num, den) + + return wrapped_metric_fn + + metric_fns = [] + eval_metrics = problem.eval_metrics() + for metric in eval_metrics: + name = "metrics-%s/%s" % (problem.name, metric) + metric_fns.append((name, make_metric_fn(metrics.METRICS_FNS[metric]))) + + def all_metrics_fn(logits, labels): + metrics_dict = {} + + for name, fn in metric_fns: + metrics_dict[name] = fn(logits, labels) + + return metrics_dict + + return all_metrics_fn + + +def _remove_summaries(): + g = tf.get_default_graph() + key = tf.GraphKeys.SUMMARIES + del g.get_collection_ref(key)[:] + assert not g.get_collection(key) + + +def _clip_gradients_by_norm(grads_and_vars, clip_gradients): + """Clips gradients by global norm.""" + gradients, variables = zip(*grads_and_vars) + clipped_gradients, _ = tf.clip_by_global_norm(gradients, clip_gradients) + return list(zip(clipped_gradients, variables)) + + +def make_estimator(model_fn, + output_dir, + master="", + batch_size=16, + iterations_per_loop=100, + num_shards=8, + per_host_input_for_training=True, + use_tpu=True, + log_device_placement=False, + save_checkpoints_steps=1000): + """Make TPUEstimator.""" + tpu_config = tf.contrib.tpu.TPUConfig( + iterations_per_loop=iterations_per_loop, + num_shards=num_shards, + per_host_input_for_training=per_host_input_for_training) + session_config = tf.ConfigProto( + allow_soft_placement=True, log_device_placement=log_device_placement) + run_config = tf.contrib.tpu.RunConfig( + session_config=session_config, + save_summary_steps=0, + save_checkpoints_steps=save_checkpoints_steps, + tpu_config=tpu_config, + master=master) + + return tf.contrib.tpu.TPUEstimator( + model_fn=model_fn, + use_tpu=use_tpu, + model_dir=output_dir, + config=run_config, + train_batch_size=batch_size, + eval_batch_size=batch_size * 2) + + +@registry.register_hparams +def transformer_tpu(): + """HParams for Transformer model on TPU.""" + hp = transformer.transformer_base() + hp.use_pad_remover = int(False) # where op not supported + + # Inputs + hp.add_hparam("batch_size_per_shard", 24) + # Each example in the batch will be of (padded) length hp.max_length + hp.max_length = 64 + + hp.optimizer = "Momentum" # can be SGD, Momentum, RMSProp + hp.norm_type = "none" # seem to get nans with layer norm + hp.clip_grad_norm = 2. + hp.norm_epsilon = 1e-3 + hp.layer_preprocess_sequence = "n" + hp.layer_postprocess_sequence = "da" + return hp diff --git a/tensor2tensor/tpu/tpu_trainer_lib_test.py b/tensor2tensor/tpu/tpu_trainer_lib_test.py new file mode 100644 index 000000000..bbcf4ae89 --- /dev/null +++ b/tensor2tensor/tpu/tpu_trainer_lib_test.py @@ -0,0 +1,68 @@ +# coding=utf-8 +# Copyright 2017 The Tensor2Tensor Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for tpu_trainer_lib.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +# Dependency imports + +from tensor2tensor.tpu import tpu_trainer_lib as lib +from tensor2tensor.utils import trainer_utils +from tensor2tensor.utils import trainer_utils_test + +import tensorflow as tf + + +class TpuTrainerTest(tf.test.TestCase): + + @classmethod + def setUpClass(cls): + trainer_utils_test.TrainerUtilsTest.setUpClass() + + def testSmoke(self): + data_dir = trainer_utils_test.TrainerUtilsTest.data_dir + problem_name = "tiny_algo" + model_name = "transformer" + hparams_set = "transformer_tpu" + + hparams = trainer_utils.create_hparams(hparams_set, data_dir) + trainer_utils.add_problem_hparams(hparams, problem_name) + problem = hparams.problem_instances[0] + + model_fn = lib.get_model_fn(model_name, hparams, use_tpu=False) + input_fn = lib.get_input_fn(data_dir, problem, hparams) + + params = {"batch_size": 16} + config = tf.contrib.tpu.RunConfig( + tpu_config=tf.contrib.tpu.TPUConfig(num_shards=2)) + features, targets = input_fn(tf.estimator.ModeKeys.TRAIN, params) + with tf.variable_scope("training"): + spec = model_fn(features, targets, tf.estimator.ModeKeys.TRAIN, params, + config) + + self.assertTrue(spec.loss is not None) + self.assertTrue(spec.train_op is not None) + + with tf.variable_scope("eval"): + spec = model_fn(features, targets, tf.estimator.ModeKeys.EVAL, params, + config) + self.assertTrue(spec.eval_metrics is not None) + + +if __name__ == "__main__": + tf.test.main() From 3950b4027ac5d582fa70fbce9e720c8e4f34bb80 Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Thu, 28 Sep 2017 16:27:00 -0700 Subject: [PATCH 0444/4095] Fix Problem.filepattern to include PREDICT PiperOrigin-RevId: 170415717 --- tensor2tensor/data_generators/problem.py | 18 +++++++- tensor2tensor/utils/model_builder.py | 2 +- .../TransformerVisualization.ipynb | 43 ++++++------------- 3 files changed, 29 insertions(+), 34 deletions(-) diff --git a/tensor2tensor/data_generators/problem.py b/tensor2tensor/data_generators/problem.py index 8e587163a..aee71922b 100644 --- a/tensor2tensor/data_generators/problem.py +++ b/tensor2tensor/data_generators/problem.py @@ -235,12 +235,26 @@ def test_filepaths(self, data_dir, num_shards, shuffled): num_shards) def filepattern(self, data_dir, mode): - """Get filepattern for data files for mode.""" + """Get filepattern for data files for mode. + + Matches mode to a suffix. + * TRAIN: train + * EVAL: dev + * PREDICT: dev + * test: test + + Args: + data_dir: str, data directory. + mode: tf.estimator.ModeKeys or "test". + + Returns: + filepattern str + """ path = os.path.join(data_dir, self.dataset_filename()) if mode == tf.estimator.ModeKeys.TRAIN: suffix = "train" - elif mode == tf.estimator.ModeKeys.EVAL: + elif mode in [tf.estimator.ModeKeys.EVAL, tf.estimator.ModeKeys.PREDICT]: suffix = "dev" else: assert mode == "test" diff --git a/tensor2tensor/utils/model_builder.py b/tensor2tensor/utils/model_builder.py index 370104907..e9b233d34 100644 --- a/tensor2tensor/utils/model_builder.py +++ b/tensor2tensor/utils/model_builder.py @@ -288,7 +288,7 @@ def nth_model(n): diet_vars = [ v for v in tf.global_variables() if v.dtype == dtypes.float16_ref ] - _log_variable_sizes(diet_vars, "Diet Variables") + _log_variable_sizes(diet_vars, "Diet Varaibles") # Optimize total_loss = tf.identity(total_loss, name="total_loss") diff --git a/tensor2tensor/visualization/TransformerVisualization.ipynb b/tensor2tensor/visualization/TransformerVisualization.ipynb index ca26edac1..96e919b63 100644 --- a/tensor2tensor/visualization/TransformerVisualization.ipynb +++ b/tensor2tensor/visualization/TransformerVisualization.ipynb @@ -15,9 +15,7 @@ { "cell_type": "code", "execution_count": 1, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "from __future__ import absolute_import\n", @@ -36,9 +34,7 @@ { "cell_type": "code", "execution_count": 2, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "data": { @@ -76,9 +72,7 @@ { "cell_type": "code", "execution_count": 3, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "name": "stdout", @@ -111,7 +105,6 @@ "cell_type": "code", "execution_count": 4, "metadata": { - "collapsed": false, "scrolled": true }, "outputs": [ @@ -183,9 +176,7 @@ { "cell_type": "code", "execution_count": 6, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "name": "stdout", @@ -200,15 +191,13 @@ ], "source": [ "spec = utils.model_builder.model_fn(MODEL, features, tf.estimator.ModeKeys.EVAL, hparams, problem_names=[PROBLEM])\n", - "predictions_dict = spec.predictions" + "predictions_dict = spec.predictions", ] }, { "cell_type": "code", "execution_count": 7, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "name": "stdout", @@ -225,7 +214,7 @@ "source": [ "with tf.variable_scope(tf.get_variable_scope(), reuse=True):\n", " spec = utils.model_builder.model_fn(MODEL, features, tf.estimator.ModeKeys.PREDICT, hparams, problem_names=[PROBLEM])\n", - " beam_out = spec.predictions['outputs']" + " beam_out = spec.predictions['outputs']", ] }, { @@ -238,9 +227,7 @@ { "cell_type": "code", "execution_count": 8, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "name": "stdout", @@ -320,7 +307,6 @@ "cell_type": "code", "execution_count": 10, "metadata": { - "collapsed": false, "scrolled": false }, "outputs": [ @@ -367,9 +353,7 @@ { "cell_type": "code", "execution_count": 12, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "name": "stdout", @@ -408,9 +392,7 @@ { "cell_type": "code", "execution_count": 14, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "data": { @@ -458,7 +440,6 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": true, "scrolled": true }, "outputs": [], @@ -486,9 +467,9 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", - "version": "2.7.13" + "version": "2.7.12" } }, "nbformat": 4, "nbformat_minor": 2 -} +} \ No newline at end of file From d3ececf3b39a1caaa9d9127ef357646a71d6dace Mon Sep 17 00:00:00 2001 From: Lukasz Kaiser <lukaszkaiser@google.com> Date: Thu, 28 Sep 2017 16:31:32 -0700 Subject: [PATCH 0445/4095] merge PRs PiperOrigin-RevId: 170416256 --- tensor2tensor/utils/model_builder.py | 2 +- .../TransformerVisualization.ipynb | 40 +++++++++++++++---- 2 files changed, 34 insertions(+), 8 deletions(-) diff --git a/tensor2tensor/utils/model_builder.py b/tensor2tensor/utils/model_builder.py index e9b233d34..370104907 100644 --- a/tensor2tensor/utils/model_builder.py +++ b/tensor2tensor/utils/model_builder.py @@ -288,7 +288,7 @@ def nth_model(n): diet_vars = [ v for v in tf.global_variables() if v.dtype == dtypes.float16_ref ] - _log_variable_sizes(diet_vars, "Diet Varaibles") + _log_variable_sizes(diet_vars, "Diet Variables") # Optimize total_loss = tf.identity(total_loss, name="total_loss") diff --git a/tensor2tensor/visualization/TransformerVisualization.ipynb b/tensor2tensor/visualization/TransformerVisualization.ipynb index 96e919b63..326f3f5c3 100644 --- a/tensor2tensor/visualization/TransformerVisualization.ipynb +++ b/tensor2tensor/visualization/TransformerVisualization.ipynb @@ -15,7 +15,9 @@ { "cell_type": "code", "execution_count": 1, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "from __future__ import absolute_import\n", @@ -34,7 +36,9 @@ { "cell_type": "code", "execution_count": 2, - "metadata": {}, + "metadata": { + "collapsed": false + }, "outputs": [ { "data": { @@ -71,8 +75,13 @@ }, { "cell_type": "code", + "metadata": { + "collapsed": false + }, "execution_count": 3, - "metadata": {}, + "metadata": { + "collapsed": false + }, "outputs": [ { "name": "stdout", @@ -105,6 +114,7 @@ "cell_type": "code", "execution_count": 4, "metadata": { + "collapsed": false "scrolled": true }, "outputs": [ @@ -176,6 +186,9 @@ { "cell_type": "code", "execution_count": 6, + "metadata": { + "collapsed": false + }, "metadata": {}, "outputs": [ { @@ -191,12 +204,15 @@ ], "source": [ "spec = utils.model_builder.model_fn(MODEL, features, tf.estimator.ModeKeys.EVAL, hparams, problem_names=[PROBLEM])\n", - "predictions_dict = spec.predictions", + "predictions_dict = spec.predictions" ] }, { "cell_type": "code", "execution_count": 7, + "metadata": { + "collapsed": false + }, "metadata": {}, "outputs": [ { @@ -214,7 +230,7 @@ "source": [ "with tf.variable_scope(tf.get_variable_scope(), reuse=True):\n", " spec = utils.model_builder.model_fn(MODEL, features, tf.estimator.ModeKeys.PREDICT, hparams, problem_names=[PROBLEM])\n", - " beam_out = spec.predictions['outputs']", + " beam_out = spec.predictions['outputs']" ] }, { @@ -227,6 +243,9 @@ { "cell_type": "code", "execution_count": 8, + "metadata": { + "collapsed": false + }, "metadata": {}, "outputs": [ { @@ -307,6 +326,7 @@ "cell_type": "code", "execution_count": 10, "metadata": { + "collapsed": false "scrolled": false }, "outputs": [ @@ -353,7 +373,9 @@ { "cell_type": "code", "execution_count": 12, - "metadata": {}, + "metadata": { + "collapsed": false + }, "outputs": [ { "name": "stdout", @@ -392,6 +414,9 @@ { "cell_type": "code", "execution_count": 14, + "metadata": { + "collapsed": false + }, "metadata": {}, "outputs": [ { @@ -440,6 +465,7 @@ "cell_type": "code", "execution_count": null, "metadata": { + "collapsed": true "scrolled": true }, "outputs": [], @@ -467,7 +493,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", - "version": "2.7.12" + "version": "2.7.13" } }, "nbformat": 4, From 84319a23e57e0335928644275eaa4c757c5cdc84 Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Thu, 28 Sep 2017 16:35:53 -0700 Subject: [PATCH 0446/4095] v1.2.4 PiperOrigin-RevId: 170416778 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 331abb78e..d097b91d6 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ setup( name='tensor2tensor', - version='1.2.3', + version='1.2.4', description='Tensor2Tensor', author='Google Inc.', author_email='no-reply@google.com', From d79ee370d9d1395ee9b8bd40aa0da182658f37ae Mon Sep 17 00:00:00 2001 From: Lukasz Kaiser <lukaszkaiser@google.com> Date: Thu, 28 Sep 2017 16:37:35 -0700 Subject: [PATCH 0447/4095] Reference ProfilerHook directly (to solve issue #324). PiperOrigin-RevId: 170416993 --- tensor2tensor/utils/trainer_utils.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tensor2tensor/utils/trainer_utils.py b/tensor2tensor/utils/trainer_utils.py index 3bb422c39..30a079af3 100644 --- a/tensor2tensor/utils/trainer_utils.py +++ b/tensor2tensor/utils/trainer_utils.py @@ -34,7 +34,6 @@ from tensor2tensor.utils import registry import tensorflow as tf -from tensorflow.contrib.hooks.python.training.profiler_hook import ProfilerHook from tensorflow.contrib.learn.python.learn import learn_runner from tensorflow.python import debug @@ -145,7 +144,7 @@ def create_experiment(data_dir, model_name, train_steps, eval_steps, hparams, # Recorded traces can be visualized with chrome://tracing/ # The memory/tensor lifetime is also profiled train_monitors.append( - ProfilerHook( + tf.contrib.hooks.ProfilerHook( save_steps=10, output_dir=run_config.model_dir, show_dataflow=True, From 4991d65292c5d5271d6bef249b5b9f9bb958dbb5 Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Thu, 28 Sep 2017 18:17:34 -0700 Subject: [PATCH 0448/4095] Remove duplicate problem copy/reversal PiperOrigin-RevId: 170428089 --- tensor2tensor/data_generators/image.py | 1 + tensor2tensor/utils/input_fn_builder.py | 14 -------------- 2 files changed, 1 insertion(+), 14 deletions(-) diff --git a/tensor2tensor/data_generators/image.py b/tensor2tensor/data_generators/image.py index 084ef330a..5b41c4e19 100644 --- a/tensor2tensor/data_generators/image.py +++ b/tensor2tensor/data_generators/image.py @@ -650,6 +650,7 @@ def generator(self, data_dir, tmp_dir, is_training): class ImageCifar10Plain(ImageCifar10): def preprocess_example(self, example, mode, unused_hparams): + example["inputs"] = tf.to_int64(example["inputs"]) return example diff --git a/tensor2tensor/utils/input_fn_builder.py b/tensor2tensor/utils/input_fn_builder.py index 06a35f589..32b88e58d 100644 --- a/tensor2tensor/utils/input_fn_builder.py +++ b/tensor2tensor/utils/input_fn_builder.py @@ -240,20 +240,6 @@ def features_for_problem(problem_instance, batching_scheme, dataset_split=dataset_split) - # Reverse inputs and targets features if the problem was reversed. - if problem_instance is not None: - problem_instance.maybe_reverse_features(feature_map) - problem_instance.maybe_copy_features(feature_map) - else: - if p_hparams.was_reversed: - inputs = feature_map["inputs"] - targets = feature_map["targets"] - feature_map["inputs"] = targets - feature_map["targets"] = inputs - # Use the inputs as the targets if the problem is a copy problem. - if p_hparams.was_copy: - feature_map["targets"] = feature_map["inputs"] - # Ensure inputs and targets are proper rank. if problem_instance.has_inputs: while len(feature_map["inputs"].get_shape()) != 4: From 7c9319b5763e51b2610fb5c363725f4f8beff8e5 Mon Sep 17 00:00:00 2001 From: Lukasz Kaiser <lukaszkaiser@google.com> Date: Thu, 28 Sep 2017 19:38:35 -0700 Subject: [PATCH 0449/4095] Play with VAE and transformer. PiperOrigin-RevId: 170434131 --- tensor2tensor/models/transformer_vae.py | 46 ++++++++++++++++++++----- 1 file changed, 37 insertions(+), 9 deletions(-) diff --git a/tensor2tensor/models/transformer_vae.py b/tensor2tensor/models/transformer_vae.py index 86950d6b7..feb18d44d 100644 --- a/tensor2tensor/models/transformer_vae.py +++ b/tensor2tensor/models/transformer_vae.py @@ -100,13 +100,22 @@ def dae(x, hparams, name): # Gumbel-softmax sample. gumbel_samples = gumbel_sample(tf.shape(m)) steps = hparams.kl_warmup_steps - gumbel_samples *= common_layers.inverse_exp_decay(steps) * 0.1 + gumbel_samples *= common_layers.inverse_exp_decay(steps // 5) * 0.5 temperature = 1.2 - common_layers.inverse_lin_decay(steps) s = tf.nn.softmax((logsm + gumbel_samples) / temperature) m = tf.nn.softmax(m) kl = - tf.reduce_max(logsm, axis=-1) tf.summary.histogram("max-log", tf.reshape(kl, [-1])) - return m, s, tf.reduce_mean(kl) + # Calculate the argmax and construct hot vectors. + maxvec = tf.reshape(tf.argmax(m, axis=-1), [-1]) + maxvhot = tf.stop_gradient(tf.one_hot(maxvec, hparams.v_size)) + # Add losses that prevent too few being used. + distrib = tf.reshape(logsm, [-1, hparams.v_size]) * maxvhot + d_mean = tf.reduce_mean(distrib, axis=[0], keep_dims=True) + d_variance = tf.reduce_mean(tf.square(distrib - d_mean), axis=[0]) + d_dev = - tf.reduce_mean(d_variance) + ret = s # If we want just hot, do tf.reshape(maxvhot, tf.shape(s)) + return m, ret, d_dev * 5.0 + tf.reduce_mean(kl) * 0.002 def vae(x, hparams, name): @@ -140,7 +149,7 @@ def kmeans(x, means, hparams, name): x_means_hot = nearest(x, means, hparams) x_means = tf.gather(means, tf.argmax(x_means_hot, axis=-1)) kl = tf.reduce_sum(tf.square(x - x_means), axis=-1) - return x_means_hot, tf.reduce_mean(kl) * 10.0 + return x_means_hot, tf.reduce_mean(kl) # * 10.0 def compress(x, c, is_2d, hparams, name): @@ -217,10 +226,15 @@ def ae_compress(x, is_2d, hparams, name, reuse=None): # Convolve and ReLu to get state. cur = common_layers.conv_block( cur, hparams.hidden_size, [((1, 1), (1, 1))], name="mid_conv") - cur = tf.nn.l2_normalize(cur, dim=3) + # To put a standard VAE use the line below. + # cur, vae_kl, _, _ = vae(cur, hparams, "kmeans_vae") + cur = mix(tf.nn.l2_normalize(cur, dim=3), cur, + hparams.startup_steps // 3, mode="exp", simple=True) cur_n = hparams.kmeans_lr_factor * cur cur_n += (1.0 - hparams.kmeans_lr_factor) * tf.stop_gradient(cur) means = tf.get_variable("z_to_dense", [hparams.v_size, hparams.hidden_size]) + # To use Gumbel-Softmax use the line below instead. + # _, hot, loss = dae(cur, hparams, "dae") hot, loss = kmeans(cur_n, means, hparams, name="kmeans") # We need a linear layer to undo the l2-normalization. cur = tf.layers.dense(cur, hparams.hidden_size, name="unnormalize") @@ -244,7 +258,12 @@ def ae_decompress(z, ae, x, is_2d, hparams, name, reuse=None): # Leak at the beginning to help train. z = mix(z, ae, hparams.startup_steps) prob_z = common_layers.inverse_exp_decay(hparams.startup_steps) * 0.8 - prob_z = prob_z if hparams.mode == tf.estimator.ModeKeys.TRAIN else 1.0 + prob_z = prob_z if hparams.mode == tf.contrib.learn.ModeKeys.TRAIN else 1.0 + # Gradients flow to ae while the value is z. + z = tf.stop_gradient(z) + ae - tf.stop_gradient(ae) + # Leak during training to keep the full dense autoencoder. + prob_z = common_layers.inverse_exp_decay(hparams.startup_steps) * 0.6 + prob_z = prob_z if hparams.mode == tf.contrib.learn.ModeKeys.TRAIN else 1.0 z = tf.cond(tf.less(tf.random_uniform([]), prob_z), lambda: z, lambda: ae) @@ -260,10 +279,11 @@ def ae_decompress(z, ae, x, is_2d, hparams, name, reuse=None): d = decompress_step(d, None, hparams, i > 0, is_2d, "decompress_%d" % j) # Autoregressive part. - if not is_2d: # Currently we don't do it autoregressively for 2d problems. + if hparams.decode_autoregressive: k = 2**(hparams.num_compress_steps * (2 if is_2d else 1)) - z_batch = tf.reshape(z, [-1, 1, 1, hparams.hidden_size]) x_batch = tf.reshape(x, [-1, k, 1, hparams.hidden_size]) + x_batch = tf.stop_gradient(x_batch) + z_batch = tf.reshape(z, [-1, 1, 1, hparams.hidden_size]) d_batch = tf.reshape(d, [-1, k, 1, hparams.hidden_size]) dec_batch = decode(z_batch, d_batch, x_batch, None, None, hparams) else: # For non-autoregressive. @@ -299,6 +319,7 @@ def ae_transformer_internal(inputs, targets, target_space, hparams): # Compress context and run autoregressive decoder on emb-hot. emb_flat = tf.expand_dims(common_layers.flatten4d3d(emb), axis=2) + emb_flat = tf.stop_gradient(emb_flat) dec_c = decode(None, None, emb_flat, inputs, ed, hparams) dec_c = tf.reshape(dec_c, tf.shape(emb)) c_z = tf.layers.dense(dec_c, hparams.v_size, name="mask_context") @@ -310,7 +331,8 @@ def ae_transformer_internal(inputs, targets, target_space, hparams): # Decompress, pass for ae loss. z = ae_decompress(emb, ae, targets, hparams.is_2d, hparams, "ae") - kl *= common_layers.inverse_exp_decay(int(hparams.startup_steps * 0.8)) + kl *= common_layers.inverse_exp_decay(int(hparams.startup_steps * 0.8), + min_value=0.0001) reconstruct_loss *= common_layers.inverse_exp_decay(hparams.startup_steps) losses = {"kl": kl, "reconstruction": reconstruct_loss} return z, losses @@ -376,16 +398,22 @@ def transformer_ae_small(): hparams.add_hparam("kmeans_lr_factor", 0.002) hparams.add_hparam("z_dropout", 0.1) hparams.add_hparam("is_2d", 0) + hparams.add_hparam("decode_autoregressive", 1) return hparams @registry.register_hparams def transformer_ae_cifar(): + """Hyperparameters for CIFAR-10 experiments.""" hparams = transformer_ae_small() + hparams.hidden_size = 384 + hparams.z_size = 256 hparams.batch_size = 1024 * 16 hparams.num_compress_steps = 2 hparams.v_size = 1024 * 16 - hparams.startup_steps = 120000 + hparams.kl_warmup_steps = 350000 + hparams.startup_steps = 30000 + hparams.kmeans_lr_factor = 0.0 hparams.is_2d = 1 return hparams From 1f2aed6821bc818ac75a8a6dd34621d06cfaf008 Mon Sep 17 00:00:00 2001 From: Noam Shazeer <noam@google.com> Date: Thu, 28 Sep 2017 22:58:39 -0700 Subject: [PATCH 0450/4095] First version of "Grouped Attention" PiperOrigin-RevId: 170444672 --- tensor2tensor/layers/common_attention.py | 234 +++++++++++++++++++++++ tensor2tensor/models/aligned.py | 62 +++++- tensor2tensor/utils/expert_utils.py | 15 +- 3 files changed, 305 insertions(+), 6 deletions(-) diff --git a/tensor2tensor/layers/common_attention.py b/tensor2tensor/layers/common_attention.py index 6d43ab3ab..956d3fcb8 100644 --- a/tensor2tensor/layers/common_attention.py +++ b/tensor2tensor/layers/common_attention.py @@ -428,6 +428,23 @@ def combine_last_two_dimensions(x): return ret +def combine_first_two_dimensions(x): + """Reshape x so that the first two dimension become one. + + Args: + x: a Tensor with shape [a, b, ...] + + Returns: + a Tensor with shape [ab, ...] + """ + ret = tf.reshape(x, tf.concat([[-1], tf.shape(x)[2:]], 0)) + old_shape = x.get_shape().dims + a, b = old_shape[:2] + new_shape = [a * b if a and b else None] + old_shape[2:] + ret.set_shape(new_shape) + return ret + + def split_heads(x, num_heads): """Split channels (dimension 3) into multiple heads (becomes dimension 1). @@ -522,6 +539,223 @@ def attention_image_summary(attn, image_shapes=None): tf.summary.image("attention", image, max_outputs=1) +def grouped_attention_single(num_groups, q, kv, q_gates, m_gates): + """Compute grouped attention for one batch and one head. + + q is a Tensor of queries, and kv is Tensor of keys and values + (concatenated in dimension 1). + + q_gates and m_gates are float32 Tensors containing zeros and ones. + The ones indicate which positions belong to which groups. A + key-value pair can be in zero or more groups. Each query is in one + group. A query can only pay attention to key-value pairs which are + in its group. + + In addition to the usual output, we return two additional Tensors: + q_total and m_total. + + For query position i belonging to group g, q_total[i, g] contains + log(sum(exp(q_i dot k_j))) for all keys k_j in group g. + + For memory position j belonging to group g, m_total[j, g] contains + the sum of the attention weights over all queries and that memory position. + + q_total and m_total contain zeros in positions where the + corresponding query/memory does not belong to the corresponding + group. + + Args: + num_groups: an integer + q: Tensor with shape [length_q, depth_qk] + kv: Tensor with shape [length_kv, depth_qk + depth_v] + q_gates: Tensor with shape [length_q, num_groups] + m_gates: Tensor with shape [length_kv, num_groups] + + Returns: + o: Tensor with shape [length_q, depth_v] + q_total: Tensor with shape [length_q, num_groups] + m_total: Tensor with shape [length_kv, num_groups] + """ + q_dispatcher = expert_utils.SparseDispatcher(num_groups, q_gates) + m_dispatcher = expert_utils.SparseDispatcher(num_groups, m_gates) + q_length_coordinate = q_dispatcher.expert_to_batch_indices() + m_length_coordinate = m_dispatcher.expert_to_batch_indices() + dispatched_q = q_dispatcher.dispatch(q) + dispatched_kv = m_dispatcher.dispatch(kv) + length_q = tf.shape(q)[0] + length_kv = tf.shape(kv)[0] + depth_qk = tf.shape(q)[1] + depth_v = tf.shape(kv)[1] - depth_qk + o = [] + q_totals = [] + m_totals = [] + for e in xrange(num_groups): + k, v = tf.split(dispatched_kv[e], [depth_qk, depth_v], axis=1) + logits = tf.matmul(dispatched_q[e], k, transpose_b=True) + log_weights = tf.nn.log_softmax(logits) + weights = tf.exp(log_weights) + o.append(tf.matmul(weights, v)) + # For each query, this is the log of the sum of the unnormalized weights. + q_total = tf.reshape(logits[:, :1] - log_weights[:, :1], [-1]) + q_totals.append(tf.unsorted_segment_sum( + q_total, q_length_coordinate[e], length_q)) + epsilon = 1e-3 + m_total = tf.log(tf.reduce_sum(tf.stop_gradient(weights), axis=0) + epsilon) + m_totals.append( + tf.unsorted_segment_sum(m_total, m_length_coordinate[e], length_kv)) + o = q_dispatcher.combine(o, multiply_by_gates=False) + q_total = tf.stack(q_totals, axis=1) + m_total = tf.stack(m_totals, axis=1) + return o, q_total, m_total + + +def grouped_attention_multihead(query_antecedent, + memory_antecedent, + total_key_depth, + total_value_depth, + output_depth, + num_heads, + num_groups, + threshold=0.3, + name=None, + make_image_summary=True): + """Dot-product attention with sparsity. + + Args: + query_antecedent: a Tensor with shape [batch, length_q, channels] + memory_antecedent: a Tensor with shape [batch, length_m, channels] + total_key_depth: an integer + total_value_depth: an integer + output_depth: an integer + num_heads: an integer dividing total_key_depth and total_value_depth + num_groups: an integer + threshold: a floating point number + name: an optional string + make_image_summary: a boolean + + Returns: + A Tensor with shape [batch, length_q, output_depth] + + Raises: + ValueError: if the key depth or value depth are not divisible by the + number of attention heads. + """ + batch = tf.shape(query_antecedent)[0] + length_q = tf.shape(query_antecedent)[1] + length_kv = tf.shape(memory_antecedent)[1] + + if total_key_depth % num_heads != 0: + raise ValueError("Key depth (%d) must be divisible by the number of " + "attention heads (%d)." % (total_key_depth, num_heads)) + depth_qk = total_key_depth // num_heads + if total_value_depth % num_heads != 0: + raise ValueError("Value depth (%d) must be divisible by the number of " + "attention heads (%d)." % (total_value_depth, num_heads)) + depth_v = total_value_depth // num_heads + with tf.variable_scope( + name, + default_name="multihead_attention_sparse", + values=[query_antecedent, memory_antecedent]): + q = common_layers.conv1d( + query_antecedent, total_key_depth, 1, name="q_transform") + kv = common_layers.conv1d( + memory_antecedent, total_key_depth + total_value_depth, + 1, name="kv_transform") + q = split_heads(q, num_heads) + kv = split_heads(kv, num_heads) + # Make predictions about q_total and m_total. + # These are used to determine group inclusion. + # We will train these by auxiliary losses. We use stop_gradient here + # to keep these losses from back-propagating to the rest of the model. + q_pred = common_layers.conv1d( + tf.stop_gradient(query_antecedent), num_heads * num_groups, 1, + name="q_pred") + q_pred = split_heads(q_pred, num_heads) + m_pred = common_layers.conv1d(tf.stop_gradient( + memory_antecedent), num_heads * num_groups, 1, name="m_pred") + m_pred = split_heads(m_pred, num_heads) + q *= depth_qk**-0.5 + # q, kv, q_pred, m_pred are all [batch, heads, length_[q/m], ?] + # now reshape them all to [batch * heads, length, ?] + q = combine_first_two_dimensions(q) + kv = combine_first_two_dimensions(kv) + q_pred = combine_first_two_dimensions(q_pred) + m_pred = combine_first_two_dimensions(m_pred) + q_group = tf.argmax(q_pred, axis=2) + q_gates = tf.one_hot(q_group, num_groups, axis=-1) + m_gates = tf.to_float(tf.greater(m_pred, math.log(threshold))) + # include first memory position in all groups, to avoid zero-sized tensors. + # TODO(noam): do we need to do this for queries too? + m_gates = tf.maximum( + m_gates, tf.reshape(tf.one_hot([0], length_kv), [1, length_kv, 1])) + q_group_size = tf.reduce_sum(q_gates, 1) + m_group_size = tf.reduce_sum(m_gates, 1) + + # compute the output + o, q_total, m_total = tf.map_fn( + lambda args: grouped_attention_single(num_groups, *args), + (q, kv, q_gates, m_gates), + dtype=(tf.float32, tf.float32, tf.float32), + parallel_iterations=1) + + # compute auxiliary losses to train the predictions + q_loss = tf.nn.l2_loss((q_total - q_pred) * q_gates) + q_loss /= tf.to_float(batch * length_q) + m_loss = tf.nn.l2_loss((m_total - m_pred) * m_gates) + m_loss /= tf.to_float(batch * length_kv) + # We would like the query groups to be equal sized. The group + # size is discrete, so we need some trick here. We add a loss + # proportional to the product of the group size and the + # predictions for that group. This encourages the predictions to + # decrease for groups that are too big. + q_group_deviation = (q_group_size - tf.reduce_mean( + q_group_size, axis=1, keep_dims=True)) / tf.to_float(length_kv) + q_pred_mean = tf.reduce_mean(q_pred, axis=1) + q_pred_mean -= tf.reduce_mean(q_pred_mean, axis=1, keep_dims=True) + q_balance_loss = ( + tf.reduce_sum(q_pred_mean * q_group_deviation) / tf.to_float(batch)) + extra_loss_multiplier = 1e-3 + extra_loss = (q_loss + m_loss + q_balance_loss) * extra_loss_multiplier + + # Show a bunch of summaries. + if (not tf.get_variable_scope().reuse and + # Summaries don't work well within tf.while_loop() + "/while/" not in tf.contrib.framework.get_name_scope() and + make_image_summary): + tf.summary.histogram("q_group_size", q_group_size) + tf.summary.histogram("m_group_size", m_group_size) + tf.summary.scalar("q_loss", q_loss) + tf.summary.scalar("m_loss", m_loss) + tf.summary.scalar("q_balance_loss", q_balance_loss) + density = ( + tf.reduce_sum(tf.to_float(m_group_size) * tf.to_float(q_group_size)) / + tf.to_float(batch * num_heads * length_q * length_kv)) + tf.summary.scalar("density", density) + if make_image_summary: + # We recompute the attention for the first example, in an inefficient + # way - masking. This lets us show pretty pictures. + # [num_heads, length_q, group] + q_gates_0 = q_gates[:num_heads, :, :] + # [num_heads, length_kv, group] + m_gates_0 = m_gates[:num_heads, :, :] + mask = tf.matmul(q_gates_0, m_gates_0, transpose_b=True) + q_0 = q[:num_heads, :, :] + k_0 = kv[:num_heads, :, :depth_qk] + att_0 = tf.nn.softmax(tf.matmul(q_0, k_0, transpose_b=True)) + hdr = tf.pow(att_0, 0.2) # for high-dynamic-range + mask_channel = mask * tf.maximum(hdr, 0.3) + image = tf.stack([hdr, mask_channel, mask_channel], axis=3) + tf.summary.image("att", image, max_outputs=num_heads) + mask_coverage = tf.reduce_sum(mask * att_0) / ( + tf.to_float(length_q) * num_heads) + tf.summary.scalar("coverage", mask_coverage) + + o = tf.reshape(o, [batch, num_heads, length_q, depth_v]) + o = combine_heads(o) + o = common_layers.conv1d(o, output_depth, 1, name="output_transform") + return o, extra_loss + + def dot_product_attention(q, k, v, diff --git a/tensor2tensor/models/aligned.py b/tensor2tensor/models/aligned.py index 90100c842..abfecbaed 100644 --- a/tensor2tensor/models/aligned.py +++ b/tensor2tensor/models/aligned.py @@ -103,6 +103,27 @@ def _diet_expert(x): hparams.hidden_size, hparams.num_heads, hparams.attention_dropout) + elif layer_type == "att_grouped": + y, loss = dp( + common_attention.grouped_attention_multihead, + x, + x, + hparams.attention_key_channels or hparams.hidden_size, + hparams.attention_value_channels or hparams.hidden_size, + hparams.hidden_size, + hparams.num_heads, + num_groups=hparams.attention_num_groups, + make_image_summary=hparams.attention_image_summary, + ) + extra_loss += tf.add_n(loss) / dp.n + elif layer_type == "att_memory_efficient": + assert hparams.layer_preprocess_sequence == "n" + zero_bias = tf.zeros([1, 1, 1, 1]) + y = dp( + common_attention.multihead_self_attention_memory_efficient, + x, + zero_bias, + hparams.num_heads) elif layer_type == "att_memory_efficient": assert hparams.layer_preprocess_sequence == "n" zero_bias = tf.zeros([1, 1, 1, 1]) @@ -222,7 +243,7 @@ def aligned_base(): hparams = common_hparams.basic_params1() hparams.hidden_size = 512 hparams.batch_size = 5000 - hparams.max_length = 1024 + hparams.max_length = 0 hparams.min_length_bucket = 1024 hparams.dropout = 0.0 hparams.layer_prepostprocess_dropout = 0.0 @@ -265,8 +286,8 @@ def aligned_base(): hparams.add_hparam("diet_experts", int(False)) hparams.add_hparam("memory_efficient_ffn", int(False)) hparams.add_hparam("local_attention_window", 128) - # if True, we learn a non-autoregressive model from "inputs" to "targets". - # if False, we learn an autoregressive model to generate "targets" + hparams.add_hparam("attention_num_groups", 8) + hparams.add_hparam("attention_image_summary", int(True)) return hparams @@ -302,6 +323,23 @@ def aligned_local_expert(): return hparams +@registry.register_hparams +def aligned_grouped(): + """Use local_expert_attention. + + languagemodel_wiki_scramble1k50, 1gpu, 7k steps: log(ppl)_eval = 2.62 + 2.7 steps/sec on P100 + (some problem with map_fn - need to tune this) + 8gpu (8x batch), 7k steps: log(ppl)_eval = 2.02 + + Returns: + a hparams object + """ + hparams = aligned_base() + hparams.layers = "timing," + "conv,att_grouped,ffn," * 2 + return hparams + + @registry.register_hparams def aligned_local(): """Use local attention code. @@ -441,6 +479,22 @@ def aligned_8k(): a hparams object """ hparams = aligned_base() - hparams.max_length = 8192 hparams.batch_size = 8192 return hparams + + +@registry.register_hparams +def aligned_8k_grouped(): + """version for languagemodel_wiki_scramble8k50. + + languagemodel_wiki_scramble1k50, 1gpu, 7k steps: log(ppl)_eval = 2.93 + 3.3 steps/sec on P100 + 8gpu (8x batch), 7k steps: log(ppl)_eval = 2.18 + + Returns: + a hparams object + """ + hparams = aligned_grouped() + hparams.batch_size = 8192 + hparams.attention_image_summary = int(False) + return hparams diff --git a/tensor2tensor/utils/expert_utils.py b/tensor2tensor/utils/expert_utils.py index 495c3fb50..eb513d0e8 100644 --- a/tensor2tensor/utils/expert_utils.py +++ b/tensor2tensor/utils/expert_utils.py @@ -690,7 +690,7 @@ def dispatch(self, inp): `[expert_batch_size_i, <extra_input_dims>]`. """ inp = tf.gather(inp, self._batch_index) - return tf.split(inp, self._part_sizes_tensor, 0) + return tf.split(inp, self._part_sizes_tensor, 0, num=self._num_experts) def combine(self, expert_out, multiply_by_gates=True): """Sum together the expert output, weighted by the gates. @@ -723,7 +723,18 @@ def expert_to_gates(self): a list of `num_experts` one-dimensional `Tensor`s with type `tf.float32` and shapes `[expert_batch_size_i]` """ - return tf.split(self._nonzero_gates, self._part_sizes_tensor, 0) + return tf.split( + self._nonzero_gates, self._part_sizes_tensor, 0, num=self._num_experts) + + def expert_to_batch_indices(self): + """Batch indices corresponding to the examples in the per-expert `Tensor`s. + + Returns: + a list of `num_experts` one-dimensional `Tensor`s with type `tf.int64` + and shapes `[expert_batch_size_i]` + """ + return tf.split( + self._batch_index, self._part_sizes_tensor, 0, num=self._num_experts) @property def part_sizes(self): From f61901923fea4b0e7b0b1b2dbe8ff8253dd62ac8 Mon Sep 17 00:00:00 2001 From: Lukasz Kaiser <lukaszkaiser@google.com> Date: Fri, 29 Sep 2017 11:40:33 -0700 Subject: [PATCH 0451/4095] Corrections to VAE to get back previous runs. PiperOrigin-RevId: 170510732 --- tensor2tensor/models/transformer_vae.py | 60 ++++++++++++++++++++----- 1 file changed, 48 insertions(+), 12 deletions(-) diff --git a/tensor2tensor/models/transformer_vae.py b/tensor2tensor/models/transformer_vae.py index feb18d44d..d2b1bf631 100644 --- a/tensor2tensor/models/transformer_vae.py +++ b/tensor2tensor/models/transformer_vae.py @@ -26,6 +26,7 @@ from tensor2tensor.layers import common_attention from tensor2tensor.layers import common_layers from tensor2tensor.models import transformer +from tensor2tensor.utils import expert_utils from tensor2tensor.utils import registry from tensor2tensor.utils import t2t_model @@ -87,6 +88,28 @@ def decompress_step(source, c, hparams, first_relu, is_2d, name): return tf.reshape(thicker, [shape[0], shape[1] * 2, 1, hparams.hidden_size]) +def top_k_softmax(x, k): + """Calculate softmax(x), select top-k and rescale to sum to 1.""" + x = tf.nn.softmax(x) + top_x, _ = tf.nn.top_k(x, k=k+1) + min_top = tf.reduce_min(top_x, axis=-1, keep_dims=True) + x = tf.nn.relu((x - min_top) + 1e-12) + x /= tf.reduce_sum(x, axis=-1, keep_dims=True) + return x, tf.reduce_max(top_x, axis=-1) + + +def top_k_experts(x, k, hparams): + x_shape = tf.shape(x) + x_flat = tf.reshape(x, [-1, x.get_shape().as_list()[-1]]) + is_training = hparams.mode == tf.contrib.learn.ModeKeys.TRAIN + gates, load = expert_utils.noisy_top_k_gating( + x_flat, hparams.v_size, is_training, k) + gates_shape = [x_shape[0], x_shape[1], x_shape[2], hparams.v_size] + gates = tf.reshape(gates, gates_shape) + load_loss = expert_utils.cv_squared(load) + return gates, load_loss + + def gumbel_sample(shape): """Sample from the Gumbel distribution, protect from overflows.""" uniform_samples = tf.random_uniform(shape, minval=0.00001, maxval=0.99998) @@ -96,12 +119,19 @@ def gumbel_sample(shape): def dae(x, hparams, name): with tf.variable_scope(name): m = tf.layers.dense(x, hparams.v_size, name="mask") + if hparams.softmax_k > 0: + m, kl = top_k_softmax(m, hparams.softmax_k) + return m, m, 1.0 - tf.reduce_mean(kl) logsm = tf.nn.log_softmax(m) # Gumbel-softmax sample. gumbel_samples = gumbel_sample(tf.shape(m)) steps = hparams.kl_warmup_steps gumbel_samples *= common_layers.inverse_exp_decay(steps // 5) * 0.5 temperature = 1.2 - common_layers.inverse_lin_decay(steps) + # 30% of the time keep reasonably high temperature to keep learning. + temperature = tf.cond(tf.less(tf.random_uniform([]), 0.7), + lambda: temperature, + lambda: tf.random_uniform([], minval=0.5, maxval=1.0)) s = tf.nn.softmax((logsm + gumbel_samples) / temperature) m = tf.nn.softmax(m) kl = - tf.reduce_max(logsm, axis=-1) @@ -228,13 +258,15 @@ def ae_compress(x, is_2d, hparams, name, reuse=None): cur, hparams.hidden_size, [((1, 1), (1, 1))], name="mid_conv") # To put a standard VAE use the line below. # cur, vae_kl, _, _ = vae(cur, hparams, "kmeans_vae") + means = tf.get_variable("z_to_dense", [hparams.v_size, hparams.hidden_size]) + if hparams.use_gumbel_softmax: + _, hot, loss = dae(cur, hparams, "dae") + return cur, hot, loss + # Using k-means part. L2-normalizing to use fast cosine distance. cur = mix(tf.nn.l2_normalize(cur, dim=3), cur, hparams.startup_steps // 3, mode="exp", simple=True) cur_n = hparams.kmeans_lr_factor * cur cur_n += (1.0 - hparams.kmeans_lr_factor) * tf.stop_gradient(cur) - means = tf.get_variable("z_to_dense", [hparams.v_size, hparams.hidden_size]) - # To use Gumbel-Softmax use the line below instead. - # _, hot, loss = dae(cur, hparams, "dae") hot, loss = kmeans(cur_n, means, hparams, name="kmeans") # We need a linear layer to undo the l2-normalization. cur = tf.layers.dense(cur, hparams.hidden_size, name="unnormalize") @@ -248,6 +280,8 @@ def ae_embed(hot, hparams, name, reuse=None): emb = tf.matmul(hot_flat, means) emb = tf.reshape(emb, [tf.shape(hot)[0], tf.shape(hot)[1], tf.shape(hot)[2], hparams.hidden_size]) + if hparams.use_gumbel_softmax: + return emb return tf.layers.dense(emb, hparams.hidden_size, name="unnormalize", reuse=reuse) @@ -255,12 +289,12 @@ def ae_embed(hot, hparams, name, reuse=None): def ae_decompress(z, ae, x, is_2d, hparams, name, reuse=None): """Decompress from z, leaking from ae.""" with tf.variable_scope(name + "_decompress", reuse=reuse): - # Leak at the beginning to help train. - z = mix(z, ae, hparams.startup_steps) - prob_z = common_layers.inverse_exp_decay(hparams.startup_steps) * 0.8 - prob_z = prob_z if hparams.mode == tf.contrib.learn.ModeKeys.TRAIN else 1.0 - # Gradients flow to ae while the value is z. - z = tf.stop_gradient(z) + ae - tf.stop_gradient(ae) + if hparams.use_gumbel_softmax: + # Leak at the beginning to help train. + z = mix(z, ae, hparams.startup_steps) + else: + # Gradients flow to ae while the value is z. + z = tf.stop_gradient(z) + ae - tf.stop_gradient(ae) # Leak during training to keep the full dense autoencoder. prob_z = common_layers.inverse_exp_decay(hparams.startup_steps) * 0.6 prob_z = prob_z if hparams.mode == tf.contrib.learn.ModeKeys.TRAIN else 1.0 @@ -334,7 +368,7 @@ def ae_transformer_internal(inputs, targets, target_space, hparams): kl *= common_layers.inverse_exp_decay(int(hparams.startup_steps * 0.8), min_value=0.0001) reconstruct_loss *= common_layers.inverse_exp_decay(hparams.startup_steps) - losses = {"kl": kl, "reconstruction": reconstruct_loss} + losses = {"kl": kl, "reconstruction": reconstruct_loss * 0.1} return z, losses @@ -398,7 +432,9 @@ def transformer_ae_small(): hparams.add_hparam("kmeans_lr_factor", 0.002) hparams.add_hparam("z_dropout", 0.1) hparams.add_hparam("is_2d", 0) - hparams.add_hparam("decode_autoregressive", 1) + hparams.add_hparam("use_gumbel_softmax", int(True)) + hparams.add_hparam("softmax_k", 4) + hparams.add_hparam("decode_autoregressive", int(True)) return hparams @@ -411,7 +447,7 @@ def transformer_ae_cifar(): hparams.batch_size = 1024 * 16 hparams.num_compress_steps = 2 hparams.v_size = 1024 * 16 - hparams.kl_warmup_steps = 350000 + hparams.kl_warmup_steps = 150000 hparams.startup_steps = 30000 hparams.kmeans_lr_factor = 0.0 hparams.is_2d = 1 From be3e6fda0045b244cac92fadf43af2bb93fea9b7 Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Fri, 29 Sep 2017 14:43:33 -0700 Subject: [PATCH 0452/4095] Make @recompute_grad memory-efficient and fix variable reuse bug PiperOrigin-RevId: 170534956 --- tensor2tensor/layers/rev_block.py | 21 +++++++++--- tensor2tensor/layers/rev_block_test.py | 47 +++++++++++++++++++------- 2 files changed, 50 insertions(+), 18 deletions(-) diff --git a/tensor2tensor/layers/rev_block.py b/tensor2tensor/layers/rev_block.py index 8d1206ee8..5804e4d8f 100644 --- a/tensor2tensor/layers/rev_block.py +++ b/tensor2tensor/layers/rev_block.py @@ -91,8 +91,8 @@ def _rev_layer_backward(ys, grad_ys, f, g, f_vars, f_side_input, g_vars, # dL/dy2 * dG(y1)/y1 grad_gy1_y2 = tf.gradients(gy1, y1_stop, grad_y2)[0] grad_x1 = grad_y1 + grad_gy1_y2 - grad_x2 = (tf.gradients(fx2, x2_stop, grad_y1)[0] + grad_y2 + tf.gradients( - fx2, x2_stop, grad_gy1_y2)[0]) + grad_x2 = (tf.gradients(fx2, x2_stop, grad_y1)[0] + grad_y2 + + tf.gradients(fx2, x2_stop, grad_gy1_y2)[0]) # Compute gradients wrt to vars and side inputs in f and g grads1 = tf.gradients(gy1, g_vars + g_side_input, grad_y2) @@ -345,10 +345,19 @@ def wrapped(*args): def _recompute_grad(fn, args): """See recompute_grad.""" + cached_vs = [] + def grad_fn(inputs, variables, outputs, output_grads): + """Recompute outputs for gradient computation.""" del outputs - # recompute outputs - outputs = list(fn(*inputs)) + # Recompute outputs + with tf.control_dependencies(output_grads): + with tf.variable_scope(cached_vs[0], reuse=True): + outputs = fn(*inputs) + + if not (isinstance(outputs, list) or isinstance(outputs, tuple)): + outputs = [outputs] + outputs = list(outputs) grads = tf.gradients(outputs, inputs + variables, output_grads) grad_inputs = grads[:len(inputs)] grad_vars = grads[len(inputs):] @@ -356,6 +365,8 @@ def grad_fn(inputs, variables, outputs, output_grads): @common_layers.fn_with_custom_grad(grad_fn) def fn_with_recompute(*args): - return fn(*args) + with tf.variable_scope(None, default_name="recompute") as vs: + cached_vs.append(vs) + return fn(*args) return fn_with_recompute(*args) diff --git a/tensor2tensor/layers/rev_block_test.py b/tensor2tensor/layers/rev_block_test.py index 3e5f7c932..e4c87634f 100644 --- a/tensor2tensor/layers/rev_block_test.py +++ b/tensor2tensor/layers/rev_block_test.py @@ -141,22 +141,43 @@ class RecomputeTest(tf.test.TestCase): def testRecompute(self): - @rev_block.recompute_grad - def fn_recompute(x, y): - return x + y, x**y - - def fn(x, y): - return x + y, x**y - - x = tf.ones((3, 3)) - y = tf.ones((3, 3)) - out1 = tf.reduce_sum(fn_recompute(x, y)) - out2 = tf.reduce_sum(fn(x, y)) + def layer(x, name=None): + with tf.variable_scope(name, default_name="layer"): + x = tf.contrib.layers.layer_norm(x) + x = tf.layers.conv1d( + x, + 10, + 1, + use_bias=False, + kernel_initializer=tf.constant_initializer(42.42)) + x = tf.nn.relu(x) + return x + + def fn(x): + out = x + for _ in xrange(3): + out = layer(out) + return out - grad1 = tf.gradients(out1, [x, y]) - grad2 = tf.gradients(out2, [x, y]) + @rev_block.recompute_grad + def fn_recompute(x): + return fn(x) + + x = tf.random_uniform((3, 1, 3)) + recompute_vars = None + with tf.variable_scope("recompute") as vs: + out1 = tf.reduce_sum(fn_recompute(x)) + recompute_vars = vs.trainable_variables() + reg_vars = None + with tf.variable_scope("regular") as vs: + out2 = tf.reduce_sum(fn(x)) + reg_vars = vs.trainable_variables() + + grad1 = tf.gradients(out1, recompute_vars) + grad2 = tf.gradients(out2, reg_vars) with self.test_session() as sess: + sess.run(tf.global_variables_initializer()) outs = sess.run([out1, out2, grad1, grad2]) self.assertAllClose(outs[0], outs[1]) for g1, g2 in zip(outs[2], outs[3]): From 6785c33609516cb9154aac4dbd8549e862fa8d6f Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Fri, 29 Sep 2017 15:57:45 -0700 Subject: [PATCH 0453/4095] Remove default data_dir PiperOrigin-RevId: 170545064 --- tensor2tensor/bin/t2t-trainer | 2 ++ tensor2tensor/data_generators/problem.py | 4 +++- tensor2tensor/utils/trainer_utils.py | 2 +- 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/tensor2tensor/bin/t2t-trainer b/tensor2tensor/bin/t2t-trainer index c986522f3..5a2866da6 100644 --- a/tensor2tensor/bin/t2t-trainer +++ b/tensor2tensor/bin/t2t-trainer @@ -68,6 +68,8 @@ def main(_): trainer_utils.validate_flags() output_dir = os.path.expanduser(FLAGS.output_dir) tmp_dir = os.path.expanduser(FLAGS.tmp_dir) + if not FLAGS.data_dir: + raise ValueError("You must specify a --data_dir") data_dir = os.path.expanduser(FLAGS.data_dir) tf.gfile.MakeDirs(output_dir) diff --git a/tensor2tensor/data_generators/problem.py b/tensor2tensor/data_generators/problem.py index aee71922b..e46708859 100644 --- a/tensor2tensor/data_generators/problem.py +++ b/tensor2tensor/data_generators/problem.py @@ -372,8 +372,10 @@ def dataset(self, } is_training = mode == tf.estimator.ModeKeys.TRAIN + data_filepattern = self.filepattern(data_dir, dataset_split) + tf.logging.info("Reading data files from %s", data_filepattern) data_files = tf.contrib.slim.parallel_reader.get_data_files( - [self.filepattern(data_dir, dataset_split)]) + data_filepattern) if shuffle_files or shuffle_files is None and is_training: random.shuffle(data_files) dataset = tf.contrib.data.TFRecordDataset(data_files) diff --git a/tensor2tensor/utils/trainer_utils.py b/tensor2tensor/utils/trainer_utils.py index 30a079af3..fcdf5a463 100644 --- a/tensor2tensor/utils/trainer_utils.py +++ b/tensor2tensor/utils/trainer_utils.py @@ -60,7 +60,7 @@ model.""") flags.DEFINE_string("problems", "", "Dash separated list of problems to " "solve.") -flags.DEFINE_string("data_dir", "/tmp/data", "Directory with training data.") +flags.DEFINE_string("data_dir", None, "Directory with training data.") flags.DEFINE_integer("train_steps", 250000, "The number of steps to run training for.") flags.DEFINE_bool("eval_run_autoregressive", False, From 464f9adae898e9b950b43df6c841814795116ebe Mon Sep 17 00:00:00 2001 From: Lukasz Kaiser <lukaszkaiser@google.com> Date: Fri, 29 Sep 2017 16:28:02 -0700 Subject: [PATCH 0454/4095] Correct typos from PR merge in iPython. PiperOrigin-RevId: 170548704 --- .../visualization/TransformerVisualization.ipynb | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/tensor2tensor/visualization/TransformerVisualization.ipynb b/tensor2tensor/visualization/TransformerVisualization.ipynb index 326f3f5c3..ae3c5809a 100644 --- a/tensor2tensor/visualization/TransformerVisualization.ipynb +++ b/tensor2tensor/visualization/TransformerVisualization.ipynb @@ -75,9 +75,6 @@ }, { "cell_type": "code", - "metadata": { - "collapsed": false - }, "execution_count": 3, "metadata": { "collapsed": false @@ -114,7 +111,7 @@ "cell_type": "code", "execution_count": 4, "metadata": { - "collapsed": false + "collapsed": false, "scrolled": true }, "outputs": [ @@ -189,7 +186,6 @@ "metadata": { "collapsed": false }, - "metadata": {}, "outputs": [ { "name": "stdout", @@ -213,7 +209,6 @@ "metadata": { "collapsed": false }, - "metadata": {}, "outputs": [ { "name": "stdout", @@ -246,7 +241,6 @@ "metadata": { "collapsed": false }, - "metadata": {}, "outputs": [ { "name": "stdout", @@ -326,7 +320,7 @@ "cell_type": "code", "execution_count": 10, "metadata": { - "collapsed": false + "collapsed": false, "scrolled": false }, "outputs": [ @@ -417,7 +411,6 @@ "metadata": { "collapsed": false }, - "metadata": {}, "outputs": [ { "data": { @@ -465,7 +458,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": true + "collapsed": true, "scrolled": true }, "outputs": [], From ed7862c95a42323b775573a7b508409a7d167afc Mon Sep 17 00:00:00 2001 From: Ashish Vaswani <avaswani@google.com> Date: Fri, 29 Sep 2017 17:00:36 -0700 Subject: [PATCH 0455/4095] 1d Dilated masked and unmasked self-attention. Added spaces between tokens for logging during inference. PiperOrigin-RevId: 170552095 --- tensor2tensor/layers/common_attention.py | 295 ++++++++++++++++++++++- tensor2tensor/utils/decoding.py | 4 +- 2 files changed, 294 insertions(+), 5 deletions(-) diff --git a/tensor2tensor/layers/common_attention.py b/tensor2tensor/layers/common_attention.py index 956d3fcb8..33ce7d4a9 100644 --- a/tensor2tensor/layers/common_attention.py +++ b/tensor2tensor/layers/common_attention.py @@ -1090,6 +1090,280 @@ def pad_l_and_r(x, pad_length): return output +def reshape_by_blocks(x, x_shape, memory_block_size): + x = tf.reshape(x, [ + x_shape[0], x_shape[1], x_shape[2] // memory_block_size, + memory_block_size, x_shape[3] + ]) + return x + + +def dilated_self_attention_1d(q, + k, + v, + query_block_size=128, + memory_block_size=128, + gap_size=2, + num_memory_blocks=2, + name=None): + """dilated self-attention. + + Args: + q: a Tensor with shape [batch, heads, length, depth_k] + k: a Tensor with shape [batch, heads, length, depth_k] + v: a Tensor with shape [batch, heads, length, depth_v] + query_block_size: an integer indicating size of query block + memory_block_size: an integer indicating the size of a memory block. + gap_size: an integer indicating the gap size + num_memory_blocks: how many memory blocks to look at to the left and right. + Each will be separated by gap_size. + name: an optional string + + Returns: + a Tensor of shape [batch, heads, length, depth_v] + """ + with tf.variable_scope( + name, default_name="dilated_self_attention_1d", values=[q, k, v]): + v_list_shape = v.get_shape().as_list() + v_shape = tf.shape(v) + depth_v = v_shape[3] + batch_size = v_shape[0] + num_heads = v_shape[1] + original_length = tf.shape(q)[2] + # making sure q is a multiple of query block size + def pad_to_multiple(x, pad_length): + x_length = tf.shape(x)[2] + return tf.pad(x, [[0, 0], [0, 0], [0, -x_length % pad_length], [0, 0]]) + + def pad_l_and_r(x, pad_length): + return tf.pad(x, [[0, 0], [0, 0], [pad_length, pad_length], [0, 0]]) + + q = pad_to_multiple(q, query_block_size) + v = pad_to_multiple(v, query_block_size) + k = pad_to_multiple(k, query_block_size) + + q.set_shape(v_list_shape) + v.set_shape(v_list_shape) + k.set_shape(v_list_shape) + # Setting up q blocks + new_q_shape = tf.shape(q) + # Setting up q blocks + q = reshape_by_blocks(q, new_q_shape, query_block_size) + self_k_part = reshape_by_blocks(k, new_q_shape, query_block_size) + self_v_part = reshape_by_blocks(v, new_q_shape, query_block_size) + + # Setting up k and v windows + k_v_padding = (gap_size + memory_block_size) * num_memory_blocks + k = pad_l_and_r(k, k_v_padding) + v = pad_l_and_r(v, k_v_padding) + # getting gather indices + index_length = (new_q_shape[2] - query_block_size + memory_block_size) + indices = tf.range(0, index_length, delta=1, name="index_range") + # making indices [1, length, 1] to appy convs + indices = tf.reshape(indices, [1, -1, 1]) + kernel = tf.expand_dims(tf.eye(memory_block_size), axis=1) + gather_indices = tf.nn.conv1d( + tf.cast(indices, tf.float32), + kernel, + query_block_size, + padding="VALID", + name="gather_conv") + + gather_indices = tf.squeeze(tf.cast(gather_indices, tf.int32), axis=0) + + # get left and right memory blocks for each query + # [length, batch, heads, dim] + k_t = tf.transpose(k, [2, 0, 1, 3]) + v_t = tf.transpose(v, [2, 0, 1, 3]) + left_k = gather_dilated_memory_blocks(k_t[:-k_v_padding, :, :, :], + num_memory_blocks, gap_size, + query_block_size, memory_block_size, + gather_indices) + left_v = gather_dilated_memory_blocks(v_t[:-k_v_padding, :, :, :], + num_memory_blocks, gap_size, + query_block_size, memory_block_size, + gather_indices) + + right_k = gather_dilated_memory_blocks(k_t[k_v_padding:, :, :, :], + num_memory_blocks, gap_size, + query_block_size, memory_block_size, + gather_indices, direction="right") + right_v = gather_dilated_memory_blocks(v_t[k_v_padding:, :, :, :], + num_memory_blocks, gap_size, + query_block_size, memory_block_size, + gather_indices, direction="right") + + k_windows = tf.concat([left_k, self_k_part, right_k], axis=3) + v_windows = tf.concat([left_v, self_v_part, right_v], axis=3) + attention_bias = tf.expand_dims( + embedding_to_padding(k_windows) * -1e9, axis=-2) + + output = dot_product_attention( + q, k_windows, v_windows, attention_bias, dropout_rate=0., + name="dilated_1d", make_image_summary=False) + output = tf.reshape(output, [batch_size, num_heads, -1, depth_v]) + # Remove the padding if introduced + output = tf.slice(output, [0, 0, 0, 0], [-1, -1, original_length, -1]) + output.set_shape(v_list_shape) + return output + + +def gather_dilated_memory_blocks(x, num_memory_blocks, gap_size, + query_block_size, memory_block_size, + gather_indices, direction="left"): + """Gathers blocks with gaps in between. + + Args: + x: A tensor of shape [length, batch, heads, depth] + num_memory_blocks: num_memory_blocks: how many memory blocks to look + in "direction". Each will be separated by gap_size. + gap_size: an integer indicating the gap size + query_block_size: an integer indicating size of query block + memory_block_size: an integer indicating the size of a memory block. + gather_indices: The indices to gather from. + direction: left or right + Returns: + a tensor of shape [batch, heads, blocks, block_length, depth] + """ + + gathered_blocks = [] + # gathering memory blocks + for block_id in range(num_memory_blocks): + block_end_index = -(query_block_size + + gap_size * (block_id+1) + memory_block_size * + block_id) - 1 + block_start_index = ( + (memory_block_size + gap_size) * + (num_memory_blocks - (block_id + 1)) + ) + if direction != "left": + [block_end_index, block_start_index] = [ + -block_start_index - 1, -block_end_index + 1 + ] + def gather_dilated_1d_blocks(x, gather_indices): + x_new = tf.gather(x, gather_indices) + # [batch, heads, blocks, block_length, dim] + return tf.transpose(x_new, [2, 3, 0, 1, 4]) + + gathered_blocks.append( + gather_dilated_1d_blocks(x[block_start_index:block_end_index], + gather_indices)) + return tf.concat(gathered_blocks, 3) + + +def masked_dilated_self_attention_1d(q, + k, + v, + query_block_size=64, + memory_block_size=64, + gap_size=2, + num_memory_blocks=2, + name=None): + """dilated self-attention. + + Args: + q: a Tensor with shape [batch, heads, length, depth_k] + k: a Tensor with shape [batch, heads, length, depth_k] + v: a Tensor with shape [batch, heads, length, depth_v] + query_block_size: an integer + memory_block_size: an integer indicating how much to look left. + gap_size: an integer indicating the gap size + num_memory_blocks: how many memory blocks to look at to the left. Each will + be separated by gap_size. + name: an optional string + + Returns: + a Tensor of shape [batch, heads, length, depth_v] + """ + with tf.variable_scope( + name, default_name="masked_dilated_self_attention_1d", values=[q, k, v]): + v_list_shape = v.get_shape().as_list() + v_shape = tf.shape(v) + depth_v = v_shape[3] + batch_size = v_shape[0] + num_heads = v_shape[1] + original_length = tf.shape(q)[2] + # making sure q is a multiple of query block size + def pad_to_multiple(x, pad_length): + x_length = tf.shape(x)[2] + return tf.pad(x, [[0, 0], [0, 0], [0, -x_length % pad_length], [0, 0]]) + + def pad_l(x, left_pad_length): + return tf.pad(x, [[0, 0], [0, 0], [left_pad_length, 0], [0, 0]]) + + q = pad_to_multiple(q, query_block_size) + v = pad_to_multiple(v, query_block_size) + k = pad_to_multiple(k, query_block_size) + q.set_shape(v_list_shape) + v.set_shape(v_list_shape) + k.set_shape(v_list_shape) + # Setting up q blocks + new_q_shape = tf.shape(q) + + # Setting up q blocks + q = reshape_by_blocks(q, new_q_shape, query_block_size) + self_k_part = reshape_by_blocks(k, new_q_shape, query_block_size) + self_v_part = reshape_by_blocks(v, new_q_shape, query_block_size) + # Setting up k and v windows + k_v_padding = (gap_size + memory_block_size) * num_memory_blocks + k = pad_l(k, k_v_padding) + v = pad_l(v, k_v_padding) + # getting gather indices + index_length = (new_q_shape[2] - query_block_size + memory_block_size) + + indices = tf.range(0, index_length, delta=1, name="index_range") + # making indices [1, length, 1] to appy convs + indices = tf.reshape(indices, [1, -1, 1]) + kernel = tf.expand_dims(tf.eye(memory_block_size), axis=1) + gather_indices = tf.nn.conv1d( + tf.cast(indices, tf.float32), + kernel, + query_block_size, + padding="VALID", + name="gather_conv") + gather_indices = tf.squeeze(tf.cast(gather_indices, tf.int32), axis=0) + + # get left and right memory blocks for each query + # [length, batch, heads, dim] + k_t = tf.transpose(k, [2, 0, 1, 3]) + v_t = tf.transpose(v, [2, 0, 1, 3]) + + k_unmasked_windows = gather_dilated_memory_blocks(k_t, num_memory_blocks, + gap_size, + query_block_size, + memory_block_size, + gather_indices) + v_unmasked_windows = gather_dilated_memory_blocks(v_t, num_memory_blocks, + gap_size, + query_block_size, + memory_block_size, + gather_indices) + + # combine memory windows + block_q_shape = tf.shape(q) + masked_attention_bias = tf.tile(tf.expand_dims( + attention_bias_lower_triangle(query_block_size), axis=0), + [block_q_shape[0], block_q_shape[1], + block_q_shape[2], 1, 1]) + padding_attention_bias = tf.expand_dims( + embedding_to_padding(k_unmasked_windows) * -1e9, axis=-2) + padding_attention_bias = tf.tile(padding_attention_bias, + [1, 1, 1, query_block_size, 1]) + attention_bias = tf.concat([masked_attention_bias, padding_attention_bias], + axis=-1) + # combine memory windows + k_windows = tf.concat([self_k_part, k_unmasked_windows], 3) + v_windows = tf.concat([self_v_part, v_unmasked_windows], 3) + output = dot_product_attention( + q, k_windows, v_windows, attention_bias, dropout_rate=0., + name="dilated_1d", make_image_summary=False) + output = tf.reshape(output, [batch_size, num_heads, -1, depth_v]) + # Remove the padding if introduced + output = tf.slice(output, [0, 0, 0, 0], [-1, -1, original_length, -1]) + output.set_shape(v_list_shape) + return output + + def local_attention_2d(q, k, v, @@ -1441,6 +1715,8 @@ def multihead_attention(query_antecedent, q_padding="VALID", kv_padding="VALID", cache=None, + gap_size=0, + num_memory_blocks=2, name=None, **kwargs): """Multihead scaled-dot-product attention with input/output transformations. @@ -1475,6 +1751,10 @@ def multihead_attention(query_antecedent, be empty Tensors of the appropriate shape. 'k' [batch_size, 0, key_channels] 'v' [batch_size, 0, value_channels] + gap_size: Integer option for dilated attention to indicate spacing between + memory blocks. + num_memory_blocks: Integer option to indicate how many memory blocks to look + at. name: an optional string **kwargs (dict): Params for the attention function @@ -1542,13 +1822,22 @@ def multihead_attention(query_antecedent, dropout_rate, image_shapes) elif attention_type == "local_mask_right": x = masked_local_attention_1d(q, k, v, block_length=block_length) - else: - assert attention_type == "local_unmasked" + elif attention_type == "local_unmasked": x = local_attention_1d( q, k, v, block_length=block_length, filter_width=block_width) + elif attention_type == "masked_dilated_1d": + x = masked_dilated_self_attention_1d(q, k, v, block_length, + block_width, + gap_size, + num_memory_blocks) + else: + assert attention_type == "unmasked_dilated_1d" + x = dilated_self_attention_1d(q, k, v, block_length, + block_width, + gap_size, + num_memory_blocks) x = combine_heads(x) x = common_layers.conv1d(x, output_depth, 1, name="output_transform") - if additional_returned_value is not None: return x, additional_returned_value return x diff --git a/tensor2tensor/utils/decoding.py b/tensor2tensor/utils/decoding.py index c11fdef34..f1a3bf0bc 100644 --- a/tensor2tensor/utils/decoding.py +++ b/tensor2tensor/utils/decoding.py @@ -86,10 +86,10 @@ def log_decode_results(inputs, if targets is not None: decoded_targets = " ".join(map(str, targets.flatten())) else: - decoded_outputs = "".join( + decoded_outputs = " ".join( map(str, targets_vocab.decode(_save_until_eos(outputs.flatten())))) if targets is not None: - decoded_targets = "".join( + decoded_targets = " ".join( map(str, targets_vocab.decode(_save_until_eos(targets.flatten())))) tf.logging.info("Inference results OUTPUT: %s" % decoded_outputs) From fe5f8ade0170506d3b6730ca4151e423cdcfc35f Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Fri, 29 Sep 2017 17:10:00 -0700 Subject: [PATCH 0456/4095] Add full system exercise to Travis PiperOrigin-RevId: 170553043 --- .travis.yml | 14 ++++- tensor2tensor/data_generators/algorithmic.py | 54 +++++++++++++------- tensor2tensor/tpu/__init__.py | 15 ++++++ 3 files changed, 64 insertions(+), 19 deletions(-) create mode 100644 tensor2tensor/tpu/__init__.py diff --git a/.travis.yml b/.travis.yml index 8f20ac24e..91ac3625e 100644 --- a/.travis.yml +++ b/.travis.yml @@ -8,9 +8,21 @@ before_install: install: - pip install tensorflow - pip install .[tests] +env: + - T2T_PROBLEM=algorithmic_reverse_binary40_test + - T2T_DATA_DIR=/tmp/t2t-data + - T2T_TRAIN_DIR=/tmp/t2t-train script: - pytest --ignore=tensor2tensor/utils/registry_test.py --ignore=tensor2tensor/utils/trainer_utils_test.py --ignore=tensor2tensor/problems_test.py - pytest tensor2tensor/utils/registry_test.py - pytest tensor2tensor/utils/trainer_utils_test.py + - t2t-datagen 2>&1 | grep translate && echo passed + - python -c "from tensor2tensor.models import transformer; print(transformer.Transformer.__name__)" + - t2t-trainer --registry_help + - mkdir $T2T_DATA_DIR + - mkdir $T2T_TRAIN_DIR + - t2t-datagen --problem=$T2T_PROBLEM --data_dir=$T2T_DATA_DIR + - t2t-trainer --problems=$T2T_PROBLEM --data_dir=$T2T_DATA_DIR --model=transformer --hparams_set=transformer_tiny --train_steps=5 --eval_steps=5 --output_dir=$T2T_TRAIN_DIR + - t2t-decoder --problems=$T2T_PROBLEM --data_dir=$T2T_DATA_DIR --model=transformer --hparams_set=transformer_tiny --output_dir=$T2T_TRAIN_DIR git: - depth: 3 \ No newline at end of file + depth: 3 diff --git a/tensor2tensor/data_generators/algorithmic.py b/tensor2tensor/data_generators/algorithmic.py index c44ce65d8..3c1d5468f 100644 --- a/tensor2tensor/data_generators/algorithmic.py +++ b/tensor2tensor/data_generators/algorithmic.py @@ -62,13 +62,15 @@ def num_shards(self): return 10 def generate_data(self, data_dir, _, task_id=-1): + def generator_eos(nbr_symbols, max_length, nbr_cases): """Shift by NUM_RESERVED_IDS and append EOS token.""" for case in self.generator(nbr_symbols, max_length, nbr_cases): new_case = {} for feature in case: - new_case[feature] = [i + text_encoder.NUM_RESERVED_TOKENS - for i in case[feature]] + [text_encoder.EOS_ID] + new_case[feature] = [ + i + text_encoder.NUM_RESERVED_TOKENS for i in case[feature] + ] + [text_encoder.EOS_ID] yield new_case utils.generate_dataset_and_shuffle( @@ -154,10 +156,7 @@ def generator(self, nbr_symbols, max_length, nbr_cases): for _ in xrange(nbr_cases): l = np.random.randint(max_length) + 1 inputs = [np.random.randint(nbr_symbols - shift) for _ in xrange(l)] - yield { - "inputs": inputs, - "targets": [i + shift for i in inputs] - } + yield {"inputs": inputs, "targets": [i + shift for i in inputs]} @property def dev_length(self): @@ -191,10 +190,7 @@ def generator(self, nbr_symbols, max_length, nbr_cases): for _ in xrange(nbr_cases): l = np.random.randint(max_length) + 1 inputs = [np.random.randint(nbr_symbols) for _ in xrange(l)] - yield { - "inputs": inputs, - "targets": list(reversed(inputs)) - } + yield {"inputs": inputs, "targets": list(reversed(inputs))} @registry.register_problem @@ -272,10 +268,7 @@ def reverse_generator_nlplike(nbr_symbols, for _ in xrange(nbr_cases): l = int(abs(np.random.normal(loc=max_length / 2, scale=std_dev)) + 1) inputs = zipf_random_sample(distr_map, l) - yield { - "inputs": inputs, - "targets": list(reversed(inputs)) - } + yield {"inputs": inputs, "targets": list(reversed(inputs))} @registry.register_problem @@ -287,8 +280,8 @@ def num_symbols(self): return 8000 def generator(self, nbr_symbols, max_length, nbr_cases): - return reverse_generator_nlplike( - nbr_symbols, max_length, nbr_cases, 10, 1.300) + return reverse_generator_nlplike(nbr_symbols, max_length, nbr_cases, 10, + 1.300) @property def train_length(self): @@ -308,8 +301,8 @@ def num_symbols(self): return 32000 def generator(self, nbr_symbols, max_length, nbr_cases): - return reverse_generator_nlplike( - nbr_symbols, max_length, nbr_cases, 10, 1.050) + return reverse_generator_nlplike(nbr_symbols, max_length, nbr_cases, 10, + 1.050) def lower_endian_to_number(l, base): @@ -431,3 +424,28 @@ class AlgorithmicMultiplicationDecimal40(AlgorithmicMultiplicationBinary40): @property def num_symbols(self): return 10 + + +@registry.register_problem +class AlgorithmicReverseBinary40Test(AlgorithmicReverseBinary40): + """Test Problem with tiny dataset.""" + + @property + def train_length(self): + return 10 + + @property + def dev_length(self): + return 10 + + @property + def train_size(self): + return 1000 + + @property + def dev_size(self): + return 100 + + @property + def num_shards(self): + return 1 diff --git a/tensor2tensor/tpu/__init__.py b/tensor2tensor/tpu/__init__.py new file mode 100644 index 000000000..3f714ce1f --- /dev/null +++ b/tensor2tensor/tpu/__init__.py @@ -0,0 +1,15 @@ +# coding=utf-8 +# Copyright 2017 The Tensor2Tensor Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + From 583356d5fb4f835a99545b74ec8cc1d2df6aab6d Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Fri, 29 Sep 2017 20:06:36 -0700 Subject: [PATCH 0457/4095] Rm xrange usage to fix Py3 build PiperOrigin-RevId: 170563143 --- .travis.yml | 9 +++++---- tensor2tensor/layers/rev_block_test.py | 6 ++++-- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/.travis.yml b/.travis.yml index 91ac3625e..46373f829 100644 --- a/.travis.yml +++ b/.travis.yml @@ -9,11 +9,12 @@ install: - pip install tensorflow - pip install .[tests] env: - - T2T_PROBLEM=algorithmic_reverse_binary40_test - - T2T_DATA_DIR=/tmp/t2t-data - - T2T_TRAIN_DIR=/tmp/t2t-train + global: + - T2T_PROBLEM=algorithmic_reverse_binary40_test + - T2T_DATA_DIR=/tmp/t2t-data + - T2T_TRAIN_DIR=/tmp/t2t-train script: - - pytest --ignore=tensor2tensor/utils/registry_test.py --ignore=tensor2tensor/utils/trainer_utils_test.py --ignore=tensor2tensor/problems_test.py + - pytest --ignore=tensor2tensor/utils/registry_test.py --ignore=tensor2tensor/utils/trainer_utils_test.py --ignore=tensor2tensor/problems_test.py --ignore=tensor2tensor/tpu/tpu_trainer_lib_test.py - pytest tensor2tensor/utils/registry_test.py - pytest tensor2tensor/utils/trainer_utils_test.py - t2t-datagen 2>&1 | grep translate && echo passed diff --git a/tensor2tensor/layers/rev_block_test.py b/tensor2tensor/layers/rev_block_test.py index e4c87634f..31df15068 100644 --- a/tensor2tensor/layers/rev_block_test.py +++ b/tensor2tensor/layers/rev_block_test.py @@ -122,7 +122,9 @@ def f2(x): self._testRevBlock(f=[f1, f2, f1, f2]) - def testConvAndBatchNorm(self): + # TODO(rsepassi): Recent change to conv seems to have broken this test. Find + # out why. + def _testConvAndBatchNorm(self): x = tf.random_uniform( [self.BATCH_SIZE, 10, self.CHANNELS], dtype=tf.float32) @@ -155,7 +157,7 @@ def layer(x, name=None): def fn(x): out = x - for _ in xrange(3): + for _ in range(3): out = layer(out) return out From 24879dd3ad64c7671b4eaee3fdb6d9051ca168c0 Mon Sep 17 00:00:00 2001 From: Mike Kroutikov <mkroutikov@innodata.com> Date: Sat, 30 Sep 2017 19:00:04 -0400 Subject: [PATCH 0458/4095] fixed error due to newer and better tf.cond in recent TF --- tensor2tensor/utils/input_fn_builder.py | 23 +++++++---------------- 1 file changed, 7 insertions(+), 16 deletions(-) diff --git a/tensor2tensor/utils/input_fn_builder.py b/tensor2tensor/utils/input_fn_builder.py index 32b88e58d..c21dd973d 100644 --- a/tensor2tensor/utils/input_fn_builder.py +++ b/tensor2tensor/utils/input_fn_builder.py @@ -176,23 +176,14 @@ def _problem_choice(choice_mode, mode, problem_count, loss_moving_avgs, def cond_on_index(fn, index_tensor, max_idx, cur_idx=0): """Call fn(index_tensor) using tf.cond in [cur_id, max_idx].""" - # Because tf.cond expects fn to return a flat list of Tensors, we flatten the - # output of fn. By capturing the original output here in orig_out, we can pack - # the flat sequence into the original structure. - orig_out = [] - - def wrapped_fn(): - out = fn(cur_idx) - orig_out.append(out) - return tf.contrib.framework.nest.flatten(out) - if cur_idx == max_idx: - flat_out = wrapped_fn() - else: - flat_out = tf.cond( - tf.equal(index_tensor, cur_idx), wrapped_fn, - lambda: cond_on_index(fn, index_tensor, max_idx, cur_idx + 1)) - return tf.contrib.framework.nest.pack_sequence_as(orig_out[0], flat_out) + return fn(cur_idx) + + return tf.cond( + tf.equal(index_tensor, cur_idx), + lambda: fn(cur_idx), + lambda: cond_on_index(fn, index_tensor, max_idx, cur_idx + 1) + ) class DummyQueueRunner(object): From 51ce1e58cae177c3133583c894b98e758d59d01d Mon Sep 17 00:00:00 2001 From: Mike Kroutikov <mkroutikov@innodata.com> Date: Sun, 1 Oct 2017 13:41:32 -0400 Subject: [PATCH 0459/4095] unit test for cond_on_index --- tensor2tensor/utils/input_fn_builder_test.py | 59 ++++++++++++++++++++ 1 file changed, 59 insertions(+) create mode 100644 tensor2tensor/utils/input_fn_builder_test.py diff --git a/tensor2tensor/utils/input_fn_builder_test.py b/tensor2tensor/utils/input_fn_builder_test.py new file mode 100644 index 000000000..34b60c47a --- /dev/null +++ b/tensor2tensor/utils/input_fn_builder_test.py @@ -0,0 +1,59 @@ +# coding=utf-8 +# Copyright 2017 The Tensor2Tensor Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for tensor2tensor.utils.input_fn_builder.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensor2tensor.utils import input_fn_builder +import tensorflow as tf + + +class InputFnBuilderTest(tf.test.TestCase): + + def testCondOnIndex(self): + """Smoke tests of cond_on_index()""" + + z = tf.constant(1., dtype=tf.float32) + def f(n): + return { + "a": z * n, + "b": z * n * n + } + + index = tf.placeholder(shape=[], dtype=tf.int32) + out = input_fn_builder.cond_on_index(f, index, 3, 0) + + with self.test_session() as sess: + # Check dispatching to the correct branch + result = sess.run(out, feed_dict={ + index: 2 + }) + + self.assertAllClose(result["a"], 2.) + self.assertAllClose(result["b"], 4.) + + result = sess.run(out, feed_dict={ + index: 3 + }) + + self.assertAllClose(result["a"], 3.) + self.assertAllClose(result["b"], 9.) + + +if __name__ == '__main__': + tf.test.main() From 1d497bd5075edd4c97ab8bdeff2cc9e0ff2aa42a Mon Sep 17 00:00:00 2001 From: Urvashi Khandelwal <urvashik@stanford.edu> Date: Mon, 9 Oct 2017 08:34:11 -0700 Subject: [PATCH 0460/4095] Loading data for CNN/Dailymail summarization task --- .../data_generators/cnn_dailymail.py | 113 ++++++++++++++---- 1 file changed, 90 insertions(+), 23 deletions(-) diff --git a/tensor2tensor/data_generators/cnn_dailymail.py b/tensor2tensor/data_generators/cnn_dailymail.py index 2f8e9cf30..49724cc2a 100644 --- a/tensor2tensor/data_generators/cnn_dailymail.py +++ b/tensor2tensor/data_generators/cnn_dailymail.py @@ -21,6 +21,7 @@ import os import tarfile +import hashlib # Dependency imports @@ -38,19 +39,31 @@ _DAILYMAIL_STORIES_DRIVE_URL = "https://drive.google.com/uc?export=download&id=0BwmD_VLjROrfM1BxdkxVaTY2bWs" +# Note: using See et al. (2017) as reference for data generation +# For more info, use the links below + +# Train/Dev/Test Splits for summarization data +_TRAIN_URLS = "https://raw.githubusercontent.com/abisee/cnn-dailymail/master/url_lists/all_train.txt" +_DEV_URLS = "https://raw.githubusercontent.com/abisee/cnn-dailymail/master/url_lists/all_val.txt" +_TEST_URLS = "https://github.com/abisee/cnn-dailymail/blob/master/url_lists/all_test.txt" # End-of-sentence marker. EOS = text_encoder.EOS_ID +# Techniques for data prep from See et al. (2017) +dm_single_close_quote = u'\u2019' # unicode +dm_double_close_quote = u'\u201d' +END_TOKENS = [u'.', u'!', u'?', u'...', u"'", u"`", u'"', dm_single_close_quote, dm_double_close_quote, u")"] # acceptable ways to end a sentence + -def _maybe_download_corpora(tmp_dir): +def _maybe_download_corpora(tmp_dir, is_training): """Download corpora if necessary and unzip them. Args: tmp_dir: directory containing dataset. Returns: - filepath of the downloaded corpus file. + list of all files generated and path to file containing train/dev/test split info. """ cnn_filename = "cnn_stories.tgz" cnn_finalpath = os.path.join(tmp_dir, "cnn/stories/") @@ -66,29 +79,83 @@ def _maybe_download_corpora(tmp_dir): tmp_dir, dailymail_filename, _DAILYMAIL_STORIES_DRIVE_URL) with tarfile.open(dailymail_file, "r:gz") as dailymail_tar: dailymail_tar.extractall(tmp_dir) - return [cnn_finalpath, dailymail_finalpath] - -def story_generator(tmp_dir): - paths = _maybe_download_corpora(tmp_dir) - for path in paths: - for story_file in tf.gfile.Glob(path + "*"): - story = u"" - for line in tf.gfile.Open(story_file): - line = unicode(line, "utf-8") if six.PY2 else line.decode("utf-8") - story += line - yield story + cnn_files = tf.gfile.Glob(cnn_finalpath + "*") + dailymail_files = tf.gfile.Glob(dailymail_finalpath + "*") + all_files = cnn_files + dailymail_files + + if is_training: + urls_path = generator_utils.maybe_download(tmp_dir, "all_train.txt", _TRAIN_URLS) + else: + urls_path = generator_utils.maybe_download(tmp_dir, "all_val.txt", _DEV_URLS) + + return all_files, urls_path + +def example_splits(url_file, all_files): + def generate_hash(inp): + """Generate a sha1 hash to match the raw url to the filename extracted""" + h = hashlib.sha1() + h.update(inp) + return h.hexdigest() + + all_files_map = {f.split("/")[-1]:f for f in all_files} + + urls = [] + for line in tf.gfile.Open(url_file): + urls.append(line.strip()) + + filelist = [] + for url in urls: + url_hash = generate_hash(url) + filename = url_hash + ".story" + if filename not in all_files_map: + tf.logging.info("Missing file: %s" % url) + continue + filelist.append(all_files_map[filename]) + + tf.logging.info("Found %d examples" % len(filelist)) + + return filelist + +def example_generator(tmp_dir, is_training): + def fix_run_on_sents(line): + if u"@highlight" in line: return line + if line=="": return line + if line[-1] in END_TOKENS: return line + return line + u"." + + all_files, urls_path = _maybe_download_corpora(tmp_dir, is_training) + filelist = example_splits(urls_path, all_files) + + for story_file in filelist: + story = [] + summary = [] + reading_highlights = False + for line in tf.gfile.Open(story_file): + line = unicode(line.strip(), "utf-8") if six.PY2 else line.strip().decode("utf-8") + line = fix_run_on_sents(line) + if line == "": + continue + elif line.startswith(u"@highlight"): + if len(story) == 0: break # No article text + reading_highlights = True + elif reading_highlights: + summary.append(line) + else: + story.append(line) + yield " ".join(story) + u" <summary> " + " ".join(summary) def _story_summary_split(story): - end_pos = story.find("\n\n") # Upto first empty line. - assert end_pos != -1 - return story[:end_pos], story[end_pos:].strip() + split_str = u" <summary> " + split_str_len = len(split_str) + split_pos = story.find(split_str) + return story[:split_pos], story[split_pos+split_str_len:] # story, summary @registry.register_problem class SummarizeCnnDailymail32k(problem.Text2TextProblem): - """Summarize CNN and Daily Mail articles to their first paragraph.""" + """Summarize CNN and Daily Mail articles to their summary highlights.""" @property def is_character_level(self): @@ -124,14 +191,14 @@ def targeted_vocab_size(self): @property def use_train_shards_for_dev(self): - return True + return False - def generator(self, data_dir, tmp_dir, _): + def generator(self, data_dir, tmp_dir, is_training): encoder = generator_utils.get_or_generate_vocab_inner( data_dir, self.vocab_file, self.targeted_vocab_size, - story_generator(tmp_dir)) - for story in story_generator(tmp_dir): - summary, rest = _story_summary_split(story) + example_generator(tmp_dir, is_training)) + for example in example_generator(tmp_dir, is_training): + story, summary = _story_summary_split(example) encoded_summary = encoder.encode(summary) + [EOS] - encoded_story = encoder.encode(rest) + [EOS] + encoded_story = encoder.encode(story) + [EOS] yield {"inputs": encoded_story, "targets": encoded_summary} From d8ca082409cb0b9042b28227dfcf5450cf7d4542 Mon Sep 17 00:00:00 2001 From: Urvashi Khandelwal <urvashik@stanford.edu> Date: Mon, 9 Oct 2017 14:14:10 -0700 Subject: [PATCH 0461/4095] Removing summary token during vocab gen; handling empty stories -- confirmed the number of generated examples --- tensor2tensor/data_generators/cnn_dailymail.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/tensor2tensor/data_generators/cnn_dailymail.py b/tensor2tensor/data_generators/cnn_dailymail.py index 49724cc2a..6ce8bea00 100644 --- a/tensor2tensor/data_generators/cnn_dailymail.py +++ b/tensor2tensor/data_generators/cnn_dailymail.py @@ -117,7 +117,7 @@ def generate_hash(inp): return filelist -def example_generator(tmp_dir, is_training): +def example_generator(tmp_dir, is_training, sum_token): def fix_run_on_sents(line): if u"@highlight" in line: return line if line=="": return line @@ -126,6 +126,7 @@ def fix_run_on_sents(line): all_files, urls_path = _maybe_download_corpora(tmp_dir, is_training) filelist = example_splits(urls_path, all_files) + story_summary_split_token = u" <summary> " if sum_token else " " for story_file in filelist: story = [] @@ -143,7 +144,11 @@ def fix_run_on_sents(line): summary.append(line) else: story.append(line) - yield " ".join(story) + u" <summary> " + " ".join(summary) + + if len(story) == 0 or len(summary) == 0: + continue + + yield " ".join(story) + story_summary_split_token + " ".join(summary) def _story_summary_split(story): @@ -196,8 +201,8 @@ def use_train_shards_for_dev(self): def generator(self, data_dir, tmp_dir, is_training): encoder = generator_utils.get_or_generate_vocab_inner( data_dir, self.vocab_file, self.targeted_vocab_size, - example_generator(tmp_dir, is_training)) - for example in example_generator(tmp_dir, is_training): + example_generator(tmp_dir, is_training, sum_token=False)) + for example in example_generator(tmp_dir, is_training, sum_token=True): story, summary = _story_summary_split(example) encoded_summary = encoder.encode(summary) + [EOS] encoded_story = encoder.encode(story) + [EOS] From 5685b1021ed0b28714f3236dc3b26a7abffffd30 Mon Sep 17 00:00:00 2001 From: Eric Purdy <eric.purdy@fathomhealth.co> Date: Mon, 9 Oct 2017 22:02:07 +0000 Subject: [PATCH 0462/4095] Set precision and recall metrics --- tensor2tensor/utils/metrics.py | 47 +++++++++++++++++++++++++++++++++- 1 file changed, 46 insertions(+), 1 deletion(-) diff --git a/tensor2tensor/utils/metrics.py b/tensor2tensor/utils/metrics.py index 56ac17f38..173ffb194 100644 --- a/tensor2tensor/utils/metrics.py +++ b/tensor2tensor/utils/metrics.py @@ -43,7 +43,8 @@ class Metrics(object): ROUGE_2_F = "rouge_2_fscore" ROUGE_L_F = "rouge_L_fscore" EDIT_DISTANCE = "edit_distance" - + SET_PRECISION = 'set_precision' + SET_RECALL = 'set_recall' def padded_rmse(predictions, labels, weights_fn=common_layers.weights_all): predictions, labels = common_layers.pad_with_zeros(predictions, labels) @@ -188,6 +189,48 @@ def padded_accuracy(predictions, padded_labels = tf.to_int32(padded_labels) return tf.to_float(tf.equal(outputs, padded_labels)), weights +def set_precision(predictions, + labels, + weights_fn=common_layers.weights_nonzero): + """Precision of set predictions. + + Args: + predictions : A Tensor of scores of shape (batch, nlabels) + labels: A Tensor of int32s giving true set elements of shape (batch, seq_length) + + Returns: + hits: A Tensor of shape (batch, nlabels) + weights: A Tensor of shape (batch, nlabels) + """ + with tf.variable_scope("set_precision", values=[predictions, labels]): + labels = tf.squeeze(labels, [2, 3]) + labels = tf.one_hot(labels, predictions.shape[-1]) + labels = tf.reduce_max(labels, axis=1) + labels = tf.cast(labels, tf.bool) + predictions = predictions > 0 + return tf.to_float(tf.equal(labels, predictions)), tf.to_float(predictions) + +def set_recall(predictions, + labels, + weights_fn=common_layers.weights_nonzero): + """Recall of set predictions. + + Args: + predictions : A Tensor of scores of shape (batch, nlabels) + labels: A Tensor of int32s giving true set elements of shape (batch, seq_length) + + Returns: + hits: A Tensor of shape (batch, nlabels) + weights: A Tensor of shape (batch, nlabels) + """ + with tf.variable_scope("set_recall", values=[predictions, labels]): + labels = tf.squeeze(labels, [2, 3]) + labels = tf.one_hot(labels, predictions.shape[-1]) + labels = tf.reduce_max(labels, axis=1) + labels = tf.cast(labels, tf.bool) + predictions = predictions > 0 + return tf.to_float(tf.equal(labels, predictions)), tf.to_float(labels) + def create_evaluation_metrics(problems, model_hparams): """Creates the evaluation metrics for the model. @@ -278,4 +321,6 @@ def wrapped_metric_fn(): Metrics.ROUGE_2_F: rouge.rouge_2_fscore, Metrics.ROUGE_L_F: rouge.rouge_l_fscore, Metrics.EDIT_DISTANCE: sequence_edit_distance, + Metrics.SET_PRECISION: set_precision, + Metrics.SET_RECALL: set_recall, } From 3a9c9503ddbae018894787d20261e3ae2de390d4 Mon Sep 17 00:00:00 2001 From: pltrdy <pltrdy@gmail.com> Date: Sat, 14 Oct 2017 03:10:20 +0200 Subject: [PATCH 0463/4095] Fixing #359: decoding str object instead of bytes (#360) --- tensor2tensor/data_generators/cnn_dailymail.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensor2tensor/data_generators/cnn_dailymail.py b/tensor2tensor/data_generators/cnn_dailymail.py index 2f8e9cf30..8fa1e52d0 100644 --- a/tensor2tensor/data_generators/cnn_dailymail.py +++ b/tensor2tensor/data_generators/cnn_dailymail.py @@ -74,7 +74,7 @@ def story_generator(tmp_dir): for path in paths: for story_file in tf.gfile.Glob(path + "*"): story = u"" - for line in tf.gfile.Open(story_file): + for line in tf.gfile.Open(story_file, 'rb'): line = unicode(line, "utf-8") if six.PY2 else line.decode("utf-8") story += line yield story From 37a3a2987f5f10f44a17b419303d90a4ce2d92c9 Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Sat, 30 Sep 2017 13:05:54 -0700 Subject: [PATCH 0464/4095] Shorten Travis test decoding PiperOrigin-RevId: 170598463 --- .travis.yml | 2 +- .../data_generators/cnn_dailymail.py | 2 +- .../data_generators/generator_utils.py | 11 +- tensor2tensor/data_generators/wmt.py | 107 ++++++------------ tensor2tensor/utils/input_fn_builder.py | 23 ++-- tensor2tensor/utils/input_fn_builder_test.py | 59 ---------- tensor2tensor/utils/metrics.py | 47 +------- 7 files changed, 60 insertions(+), 191 deletions(-) delete mode 100644 tensor2tensor/utils/input_fn_builder_test.py diff --git a/.travis.yml b/.travis.yml index 46373f829..370682401 100644 --- a/.travis.yml +++ b/.travis.yml @@ -24,6 +24,6 @@ script: - mkdir $T2T_TRAIN_DIR - t2t-datagen --problem=$T2T_PROBLEM --data_dir=$T2T_DATA_DIR - t2t-trainer --problems=$T2T_PROBLEM --data_dir=$T2T_DATA_DIR --model=transformer --hparams_set=transformer_tiny --train_steps=5 --eval_steps=5 --output_dir=$T2T_TRAIN_DIR - - t2t-decoder --problems=$T2T_PROBLEM --data_dir=$T2T_DATA_DIR --model=transformer --hparams_set=transformer_tiny --output_dir=$T2T_TRAIN_DIR + - t2t-decoder --problems=$T2T_PROBLEM --data_dir=$T2T_DATA_DIR --model=transformer --hparams_set=transformer_tiny --output_dir=$T2T_TRAIN_DIR --decode_hparams='num_samples=10' git: depth: 3 diff --git a/tensor2tensor/data_generators/cnn_dailymail.py b/tensor2tensor/data_generators/cnn_dailymail.py index 8fa1e52d0..2f8e9cf30 100644 --- a/tensor2tensor/data_generators/cnn_dailymail.py +++ b/tensor2tensor/data_generators/cnn_dailymail.py @@ -74,7 +74,7 @@ def story_generator(tmp_dir): for path in paths: for story_file in tf.gfile.Glob(path + "*"): story = u"" - for line in tf.gfile.Open(story_file, 'rb'): + for line in tf.gfile.Open(story_file): line = unicode(line, "utf-8") if six.PY2 else line.decode("utf-8") story += line yield story diff --git a/tensor2tensor/data_generators/generator_utils.py b/tensor2tensor/data_generators/generator_utils.py index acd121868..f22e84794 100644 --- a/tensor2tensor/data_generators/generator_utils.py +++ b/tensor2tensor/data_generators/generator_utils.py @@ -350,18 +350,19 @@ def generate(): for source in sources: url = source[0] filename = os.path.basename(url) + read_type = "r:gz" if "tgz" in filename else "r" + compressed_file = maybe_download(tmp_dir, filename, url) + with tarfile.open(compressed_file, read_type) as corpus_tar: + corpus_tar.extractall(tmp_dir) + for lang_file in source[1]: tf.logging.info("Reading file: %s" % lang_file) filepath = os.path.join(tmp_dir, lang_file) - if not tf.gfile.Exists(filepath): - read_type = "r:gz" if filename.endswith("tgz") else "r" - with tarfile.open(compressed_file, read_type) as corpus_tar: - corpus_tar.extractall(tmp_dir) # For some datasets a second extraction is necessary. - if lang_file.endswith(".gz"): + if ".gz" in lang_file: new_filepath = os.path.join(tmp_dir, lang_file[:-3]) if tf.gfile.Exists(new_filepath): tf.logging.info( diff --git a/tensor2tensor/data_generators/wmt.py b/tensor2tensor/data_generators/wmt.py index f1b2b7dee..cde0bc9ac 100644 --- a/tensor2tensor/data_generators/wmt.py +++ b/tensor2tensor/data_generators/wmt.py @@ -19,9 +19,7 @@ from __future__ import division from __future__ import print_function -import glob import os -import stat import tarfile # Dependency imports @@ -266,10 +264,6 @@ def bi_vocabs_token_generator(source_path, # English-Czech datasets _ENCS_TRAIN_DATASETS = [ - [ - "https://lindat.mff.cuni.cz/repository/xmlui/bitstream/handle/11234/1-1458/data-plaintext-format.tar", - ('tsv', 3, 2, 'data.plaintext-format/*train.gz') - ], [ "http://data.statmt.org/wmt17/translation-task/training-parallel-nc-v12.tgz", # pylint: disable=line-too-long ("training/news-commentary-v12.cs-en.en", @@ -375,64 +369,38 @@ def _compile_data(tmp_dir, datasets, filename): url = dataset[0] compressed_filename = os.path.basename(url) compressed_filepath = os.path.join(tmp_dir, compressed_filename) - generator_utils.maybe_download(tmp_dir, compressed_filename, url) - if dataset[1][0] == 'tsv': - _, src_column, trg_column, glob_pattern = dataset[1] - filenames = glob.glob(os.path.join(tmp_dir, glob_pattern)) - if not filenames: - mode = "r:gz" if compressed_filepath.endswith("gz") else "r" # *.tgz *.tar.gz - with tarfile.open(compressed_filepath, mode) as corpus_tar: - corpus_tar.extractall(tmp_dir) - filenames = glob.glob(os.path.join(tmp_dir, glob_pattern)) - for tsv_filename in filenames: - if tsv_filename.endswith(".gz"): - new_filename = tsv_filename.strip(".gz") - try: - generator_utils.gunzip_file(tsv_filename, new_filename) - except PermissionError: - tsvdir = os.path.dirname(tsv_filename) - os.chmod(tsvdir, os.stat(tsvdir).st_mode | stat.S_IWRITE) - generator_utils.gunzip_file(tsv_filename, new_filename) - tsv_filename = new_filename - with tf.gfile.GFile(tsv_filename, mode="r") as tsv_file: - for line in tsv_file: - if line and "\t" in line: - parts = line.split("\t") - source, target = parts[src_column], parts[trg_column] - lang1_resfile.write(source.strip() + "\n") - lang2_resfile.write(target.strip() + "\n") - else: - lang1_filename, lang2_filename = dataset[1] - lang1_filepath = os.path.join(tmp_dir, lang1_filename) - lang2_filepath = os.path.join(tmp_dir, lang2_filename) - is_sgm = (lang1_filename.endswith("sgm") and - lang2_filename.endswith("sgm")) - - if not (os.path.exists(lang1_filepath) and - os.path.exists(lang2_filepath)): - # For .tar.gz and .tgz files, we read compressed. - mode = "r:gz" if compressed_filepath.endswith("gz") else "r" - with tarfile.open(compressed_filepath, mode) as corpus_tar: - corpus_tar.extractall(tmp_dir) - if lang1_filepath.endswith(".gz"): - new_filepath = lang1_filepath.strip(".gz") - generator_utils.gunzip_file(lang1_filepath, new_filepath) - lang1_filepath = new_filepath - if lang2_filepath.endswith(".gz"): - new_filepath = lang2_filepath.strip(".gz") - generator_utils.gunzip_file(lang2_filepath, new_filepath) - lang2_filepath = new_filepath - with tf.gfile.GFile(lang1_filepath, mode="r") as lang1_file: - with tf.gfile.GFile(lang2_filepath, mode="r") as lang2_file: + lang1_filename, lang2_filename = dataset[1] + lang1_filepath = os.path.join(tmp_dir, lang1_filename) + lang2_filepath = os.path.join(tmp_dir, lang2_filename) + is_sgm = (lang1_filename.endswith("sgm") and + lang2_filename.endswith("sgm")) + + generator_utils.maybe_download(tmp_dir, compressed_filename, url) + if not (os.path.exists(lang1_filepath) and + os.path.exists(lang2_filepath)): + # For .tar.gz and .tgz files, we read compressed. + mode = "r:gz" if compressed_filepath.endswith("gz") else "r" + with tarfile.open(compressed_filepath, mode) as corpus_tar: + corpus_tar.extractall(tmp_dir) + if lang1_filepath.endswith(".gz"): + new_filepath = lang1_filepath.strip(".gz") + generator_utils.gunzip_file(lang1_filepath, new_filepath) + lang1_filepath = new_filepath + if lang2_filepath.endswith(".gz"): + new_filepath = lang2_filepath.strip(".gz") + generator_utils.gunzip_file(lang2_filepath, new_filepath) + lang2_filepath = new_filepath + with tf.gfile.GFile(lang1_filepath, mode="r") as lang1_file: + with tf.gfile.GFile(lang2_filepath, mode="r") as lang2_file: + line1, line2 = lang1_file.readline(), lang2_file.readline() + while line1 or line2: + line1res = _preprocess_sgm(line1, is_sgm) + line2res = _preprocess_sgm(line2, is_sgm) + if line1res or line2res: + lang1_resfile.write(line1res.strip() + "\n") + lang2_resfile.write(line2res.strip() + "\n") line1, line2 = lang1_file.readline(), lang2_file.readline() - while line1 or line2: - line1res = _preprocess_sgm(line1, is_sgm) - line2res = _preprocess_sgm(line2, is_sgm) - if line1res or line2res: - lang1_resfile.write(line1res.strip() + "\n") - lang2_resfile.write(line2res.strip() + "\n") - line1, line2 = lang1_file.readline(), lang2_file.readline() return filename @@ -662,18 +630,13 @@ def vocab_name(self): def generator(self, data_dir, tmp_dir, train): datasets = _ENCS_TRAIN_DATASETS if train else _ENCS_TEST_DATASETS + source_datasets = [[item[0], [item[1][0]]] for item in datasets] + target_datasets = [[item[0], [item[1][1]]] for item in datasets] + symbolizer_vocab = generator_utils.get_or_generate_vocab( + data_dir, tmp_dir, self.vocab_file, self.targeted_vocab_size, + source_datasets + target_datasets) tag = "train" if train else "dev" data_path = _compile_data(tmp_dir, datasets, "wmt_encs_tok_%s" % tag) - vocab_datasets = [] - # CzEng contains 100 gz files with tab-separated columns, so let's expect - # it is the first dataset in datasets and use the newly created *.lang{1,2} files instead. - if datasets[0][0].endswith("data-plaintext-format.tar"): - vocab_datasets.append([datasets[0][0], - ["wmt_encs_tok_%s.lang1" % tag, "wmt_encs_tok_%s.lang2" % tag]]) - datasets = datasets[1:] - vocab_datasets += [[item[0], [item[1][0], item[1][1]]] for item in datasets] - symbolizer_vocab = generator_utils.get_or_generate_vocab( - data_dir, tmp_dir, self.vocab_file, self.targeted_vocab_size, vocab_datasets) return token_generator(data_path + ".lang1", data_path + ".lang2", symbolizer_vocab, EOS) diff --git a/tensor2tensor/utils/input_fn_builder.py b/tensor2tensor/utils/input_fn_builder.py index c21dd973d..32b88e58d 100644 --- a/tensor2tensor/utils/input_fn_builder.py +++ b/tensor2tensor/utils/input_fn_builder.py @@ -176,14 +176,23 @@ def _problem_choice(choice_mode, mode, problem_count, loss_moving_avgs, def cond_on_index(fn, index_tensor, max_idx, cur_idx=0): """Call fn(index_tensor) using tf.cond in [cur_id, max_idx].""" - if cur_idx == max_idx: - return fn(cur_idx) + # Because tf.cond expects fn to return a flat list of Tensors, we flatten the + # output of fn. By capturing the original output here in orig_out, we can pack + # the flat sequence into the original structure. + orig_out = [] + + def wrapped_fn(): + out = fn(cur_idx) + orig_out.append(out) + return tf.contrib.framework.nest.flatten(out) - return tf.cond( - tf.equal(index_tensor, cur_idx), - lambda: fn(cur_idx), - lambda: cond_on_index(fn, index_tensor, max_idx, cur_idx + 1) - ) + if cur_idx == max_idx: + flat_out = wrapped_fn() + else: + flat_out = tf.cond( + tf.equal(index_tensor, cur_idx), wrapped_fn, + lambda: cond_on_index(fn, index_tensor, max_idx, cur_idx + 1)) + return tf.contrib.framework.nest.pack_sequence_as(orig_out[0], flat_out) class DummyQueueRunner(object): diff --git a/tensor2tensor/utils/input_fn_builder_test.py b/tensor2tensor/utils/input_fn_builder_test.py deleted file mode 100644 index 34b60c47a..000000000 --- a/tensor2tensor/utils/input_fn_builder_test.py +++ /dev/null @@ -1,59 +0,0 @@ -# coding=utf-8 -# Copyright 2017 The Tensor2Tensor Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Tests for tensor2tensor.utils.input_fn_builder.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from tensor2tensor.utils import input_fn_builder -import tensorflow as tf - - -class InputFnBuilderTest(tf.test.TestCase): - - def testCondOnIndex(self): - """Smoke tests of cond_on_index()""" - - z = tf.constant(1., dtype=tf.float32) - def f(n): - return { - "a": z * n, - "b": z * n * n - } - - index = tf.placeholder(shape=[], dtype=tf.int32) - out = input_fn_builder.cond_on_index(f, index, 3, 0) - - with self.test_session() as sess: - # Check dispatching to the correct branch - result = sess.run(out, feed_dict={ - index: 2 - }) - - self.assertAllClose(result["a"], 2.) - self.assertAllClose(result["b"], 4.) - - result = sess.run(out, feed_dict={ - index: 3 - }) - - self.assertAllClose(result["a"], 3.) - self.assertAllClose(result["b"], 9.) - - -if __name__ == '__main__': - tf.test.main() diff --git a/tensor2tensor/utils/metrics.py b/tensor2tensor/utils/metrics.py index 173ffb194..56ac17f38 100644 --- a/tensor2tensor/utils/metrics.py +++ b/tensor2tensor/utils/metrics.py @@ -43,8 +43,7 @@ class Metrics(object): ROUGE_2_F = "rouge_2_fscore" ROUGE_L_F = "rouge_L_fscore" EDIT_DISTANCE = "edit_distance" - SET_PRECISION = 'set_precision' - SET_RECALL = 'set_recall' + def padded_rmse(predictions, labels, weights_fn=common_layers.weights_all): predictions, labels = common_layers.pad_with_zeros(predictions, labels) @@ -189,48 +188,6 @@ def padded_accuracy(predictions, padded_labels = tf.to_int32(padded_labels) return tf.to_float(tf.equal(outputs, padded_labels)), weights -def set_precision(predictions, - labels, - weights_fn=common_layers.weights_nonzero): - """Precision of set predictions. - - Args: - predictions : A Tensor of scores of shape (batch, nlabels) - labels: A Tensor of int32s giving true set elements of shape (batch, seq_length) - - Returns: - hits: A Tensor of shape (batch, nlabels) - weights: A Tensor of shape (batch, nlabels) - """ - with tf.variable_scope("set_precision", values=[predictions, labels]): - labels = tf.squeeze(labels, [2, 3]) - labels = tf.one_hot(labels, predictions.shape[-1]) - labels = tf.reduce_max(labels, axis=1) - labels = tf.cast(labels, tf.bool) - predictions = predictions > 0 - return tf.to_float(tf.equal(labels, predictions)), tf.to_float(predictions) - -def set_recall(predictions, - labels, - weights_fn=common_layers.weights_nonzero): - """Recall of set predictions. - - Args: - predictions : A Tensor of scores of shape (batch, nlabels) - labels: A Tensor of int32s giving true set elements of shape (batch, seq_length) - - Returns: - hits: A Tensor of shape (batch, nlabels) - weights: A Tensor of shape (batch, nlabels) - """ - with tf.variable_scope("set_recall", values=[predictions, labels]): - labels = tf.squeeze(labels, [2, 3]) - labels = tf.one_hot(labels, predictions.shape[-1]) - labels = tf.reduce_max(labels, axis=1) - labels = tf.cast(labels, tf.bool) - predictions = predictions > 0 - return tf.to_float(tf.equal(labels, predictions)), tf.to_float(labels) - def create_evaluation_metrics(problems, model_hparams): """Creates the evaluation metrics for the model. @@ -321,6 +278,4 @@ def wrapped_metric_fn(): Metrics.ROUGE_2_F: rouge.rouge_2_fscore, Metrics.ROUGE_L_F: rouge.rouge_l_fscore, Metrics.EDIT_DISTANCE: sequence_edit_distance, - Metrics.SET_PRECISION: set_precision, - Metrics.SET_RECALL: set_recall, } From ffac0408e5196f7a994ed42e5d116ba60922fc93 Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Sun, 1 Oct 2017 15:18:25 -0700 Subject: [PATCH 0465/4095] Rm uses of kwarg maxsplit in str.split to maintain Py2/3 compatibility PiperOrigin-RevId: 170647819 --- tensor2tensor/data_generators/generator_utils.py | 2 +- tensor2tensor/data_generators/wmt.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tensor2tensor/data_generators/generator_utils.py b/tensor2tensor/data_generators/generator_utils.py index f22e84794..1de27c5d2 100644 --- a/tensor2tensor/data_generators/generator_utils.py +++ b/tensor2tensor/data_generators/generator_utils.py @@ -412,7 +412,7 @@ def generate(): for line in source_file: line = line.strip() if line and "\t" in line: - parts = line.split("\t", maxsplit=1) + parts = line.split("\t", 1) part = parts[index].strip() yield part diff --git a/tensor2tensor/data_generators/wmt.py b/tensor2tensor/data_generators/wmt.py index cde0bc9ac..3d496cb5d 100644 --- a/tensor2tensor/data_generators/wmt.py +++ b/tensor2tensor/data_generators/wmt.py @@ -113,7 +113,7 @@ def tabbed_generator(source_path, source_vocab, target_vocab, eos=None): with tf.gfile.GFile(source_path, mode="r") as source_file: for line in source_file: if line and "\t" in line: - parts = line.split("\t", maxsplit=1) + parts = line.split("\t", 1) source, target = parts[0].strip(), parts[1].strip() source_ints = source_vocab.encode(source) + eos_list target_ints = target_vocab.encode(target) + eos_list From 90fd09b28229a1bf8ee84a6a96fe1f8d44a6aa30 Mon Sep 17 00:00:00 2001 From: Lukasz Kaiser <lukaszkaiser@google.com> Date: Mon, 2 Oct 2017 15:52:02 -0700 Subject: [PATCH 0466/4095] internal merge PiperOrigin-RevId: 170767217 --- .../data_generators/generator_utils.py | 13 ++- tensor2tensor/data_generators/wmt.py | 103 ++++++++++++------ tensor2tensor/utils/input_fn_builder.py | 24 ++-- tensor2tensor/utils/input_fn_builder_test.py | 61 +++++++++++ 4 files changed, 143 insertions(+), 58 deletions(-) create mode 100644 tensor2tensor/utils/input_fn_builder_test.py diff --git a/tensor2tensor/data_generators/generator_utils.py b/tensor2tensor/data_generators/generator_utils.py index 1de27c5d2..c8fe03564 100644 --- a/tensor2tensor/data_generators/generator_utils.py +++ b/tensor2tensor/data_generators/generator_utils.py @@ -350,19 +350,20 @@ def generate(): for source in sources: url = source[0] filename = os.path.basename(url) - read_type = "r:gz" if "tgz" in filename else "r" - compressed_file = maybe_download(tmp_dir, filename, url) - with tarfile.open(compressed_file, read_type) as corpus_tar: - corpus_tar.extractall(tmp_dir) - for lang_file in source[1]: tf.logging.info("Reading file: %s" % lang_file) filepath = os.path.join(tmp_dir, lang_file) + # Extract from tar if needed. + if not tf.gfile.Exists(filepath): + read_type = "r:gz" if filename.endswith("tgz") else "r" + with tarfile.open(compressed_file, read_type) as corpus_tar: + corpus_tar.extractall(tmp_dir) + # For some datasets a second extraction is necessary. - if ".gz" in lang_file: + if lang_file.endswith(".gz"): new_filepath = os.path.join(tmp_dir, lang_file[:-3]) if tf.gfile.Exists(new_filepath): tf.logging.info( diff --git a/tensor2tensor/data_generators/wmt.py b/tensor2tensor/data_generators/wmt.py index 3d496cb5d..89cc7bd41 100644 --- a/tensor2tensor/data_generators/wmt.py +++ b/tensor2tensor/data_generators/wmt.py @@ -264,6 +264,11 @@ def bi_vocabs_token_generator(source_path, # English-Czech datasets _ENCS_TRAIN_DATASETS = [ + [ + ("https://lindat.mff.cuni.cz/repository/xmlui/bitstream/handle/" + "11234/1-1458/data-plaintext-format.tar"), + ("tsv", 3, 2, "data.plaintext-format/*train.gz") + ], [ "http://data.statmt.org/wmt17/translation-task/training-parallel-nc-v12.tgz", # pylint: disable=line-too-long ("training/news-commentary-v12.cs-en.en", @@ -370,37 +375,58 @@ def _compile_data(tmp_dir, datasets, filename): compressed_filename = os.path.basename(url) compressed_filepath = os.path.join(tmp_dir, compressed_filename) - lang1_filename, lang2_filename = dataset[1] - lang1_filepath = os.path.join(tmp_dir, lang1_filename) - lang2_filepath = os.path.join(tmp_dir, lang2_filename) - is_sgm = (lang1_filename.endswith("sgm") and - lang2_filename.endswith("sgm")) - - generator_utils.maybe_download(tmp_dir, compressed_filename, url) - if not (os.path.exists(lang1_filepath) and - os.path.exists(lang2_filepath)): - # For .tar.gz and .tgz files, we read compressed. - mode = "r:gz" if compressed_filepath.endswith("gz") else "r" - with tarfile.open(compressed_filepath, mode) as corpus_tar: - corpus_tar.extractall(tmp_dir) - if lang1_filepath.endswith(".gz"): - new_filepath = lang1_filepath.strip(".gz") - generator_utils.gunzip_file(lang1_filepath, new_filepath) - lang1_filepath = new_filepath - if lang2_filepath.endswith(".gz"): - new_filepath = lang2_filepath.strip(".gz") - generator_utils.gunzip_file(lang2_filepath, new_filepath) - lang2_filepath = new_filepath - with tf.gfile.GFile(lang1_filepath, mode="r") as lang1_file: - with tf.gfile.GFile(lang2_filepath, mode="r") as lang2_file: - line1, line2 = lang1_file.readline(), lang2_file.readline() - while line1 or line2: - line1res = _preprocess_sgm(line1, is_sgm) - line2res = _preprocess_sgm(line2, is_sgm) - if line1res or line2res: - lang1_resfile.write(line1res.strip() + "\n") - lang2_resfile.write(line2res.strip() + "\n") + if dataset[1][0] == "tsv": + _, src_column, trg_column, glob_pattern = dataset[1] + filenames = tf.gfile.Glob(os.path.join(tmp_dir, glob_pattern)) + if not filenames: + # Capture *.tgz and *.tar.gz too. + mode = "r:gz" if compressed_filepath.endswith("gz") else "r" + with tarfile.open(compressed_filepath, mode) as corpus_tar: + corpus_tar.extractall(tmp_dir) + filenames = tf.gfile.Glob(os.path.join(tmp_dir, glob_pattern)) + for tsv_filename in filenames: + if tsv_filename.endswith(".gz"): + new_filename = tsv_filename.strip(".gz") + generator_utils.gunzip_file(tsv_filename, new_filename) + tsv_filename = new_filename + with tf.gfile.GFile(tsv_filename, mode="r") as tsv_file: + for line in tsv_file: + if line and "\t" in line: + parts = line.split("\t") + source, target = parts[src_column], parts[trg_column] + lang1_resfile.write(source.strip() + "\n") + lang2_resfile.write(target.strip() + "\n") + else: + lang1_filename, lang2_filename = dataset[1] + lang1_filepath = os.path.join(tmp_dir, lang1_filename) + lang2_filepath = os.path.join(tmp_dir, lang2_filename) + is_sgm = (lang1_filename.endswith("sgm") and + lang2_filename.endswith("sgm")) + + if not (os.path.exists(lang1_filepath) and + os.path.exists(lang2_filepath)): + # For .tar.gz and .tgz files, we read compressed. + mode = "r:gz" if compressed_filepath.endswith("gz") else "r" + with tarfile.open(compressed_filepath, mode) as corpus_tar: + corpus_tar.extractall(tmp_dir) + if lang1_filepath.endswith(".gz"): + new_filepath = lang1_filepath.strip(".gz") + generator_utils.gunzip_file(lang1_filepath, new_filepath) + lang1_filepath = new_filepath + if lang2_filepath.endswith(".gz"): + new_filepath = lang2_filepath.strip(".gz") + generator_utils.gunzip_file(lang2_filepath, new_filepath) + lang2_filepath = new_filepath + with tf.gfile.GFile(lang1_filepath, mode="r") as lang1_file: + with tf.gfile.GFile(lang2_filepath, mode="r") as lang2_file: line1, line2 = lang1_file.readline(), lang2_file.readline() + while line1 or line2: + line1res = _preprocess_sgm(line1, is_sgm) + line2res = _preprocess_sgm(line2, is_sgm) + if line1res or line2res: + lang1_resfile.write(line1res.strip() + "\n") + lang2_resfile.write(line2res.strip() + "\n") + line1, line2 = lang1_file.readline(), lang2_file.readline() return filename @@ -630,13 +656,20 @@ def vocab_name(self): def generator(self, data_dir, tmp_dir, train): datasets = _ENCS_TRAIN_DATASETS if train else _ENCS_TEST_DATASETS - source_datasets = [[item[0], [item[1][0]]] for item in datasets] - target_datasets = [[item[0], [item[1][1]]] for item in datasets] - symbolizer_vocab = generator_utils.get_or_generate_vocab( - data_dir, tmp_dir, self.vocab_file, self.targeted_vocab_size, - source_datasets + target_datasets) tag = "train" if train else "dev" + vocab_datasets = [] data_path = _compile_data(tmp_dir, datasets, "wmt_encs_tok_%s" % tag) + # CzEng contains 100 gz files with tab-separated columns, so let's expect + # it is the first dataset in datasets and use the newly created *.lang{1,2} + # files for vocab construction. + if datasets[0][0].endswith("data-plaintext-format.tar"): + vocab_datasets.append([datasets[0][0], ["wmt_encs_tok_%s.lang1" % tag, + "wmt_encs_tok_%s.lang2" % tag]]) + datasets = datasets[1:] + vocab_datasets += [[item[0], [item[1][0], item[1][1]]] for item in datasets] + symbolizer_vocab = generator_utils.get_or_generate_vocab( + data_dir, tmp_dir, self.vocab_file, self.targeted_vocab_size, + vocab_datasets) return token_generator(data_path + ".lang1", data_path + ".lang2", symbolizer_vocab, EOS) diff --git a/tensor2tensor/utils/input_fn_builder.py b/tensor2tensor/utils/input_fn_builder.py index 32b88e58d..f4a3098ad 100644 --- a/tensor2tensor/utils/input_fn_builder.py +++ b/tensor2tensor/utils/input_fn_builder.py @@ -175,24 +175,14 @@ def _problem_choice(choice_mode, mode, problem_count, loss_moving_avgs, def cond_on_index(fn, index_tensor, max_idx, cur_idx=0): """Call fn(index_tensor) using tf.cond in [cur_id, max_idx].""" - - # Because tf.cond expects fn to return a flat list of Tensors, we flatten the - # output of fn. By capturing the original output here in orig_out, we can pack - # the flat sequence into the original structure. - orig_out = [] - - def wrapped_fn(): - out = fn(cur_idx) - orig_out.append(out) - return tf.contrib.framework.nest.flatten(out) - if cur_idx == max_idx: - flat_out = wrapped_fn() - else: - flat_out = tf.cond( - tf.equal(index_tensor, cur_idx), wrapped_fn, - lambda: cond_on_index(fn, index_tensor, max_idx, cur_idx + 1)) - return tf.contrib.framework.nest.pack_sequence_as(orig_out[0], flat_out) + return fn(cur_idx) + + return tf.cond( + tf.equal(index_tensor, cur_idx), + lambda: fn(cur_idx), + lambda: cond_on_index(fn, index_tensor, max_idx, cur_idx + 1) + ) class DummyQueueRunner(object): diff --git a/tensor2tensor/utils/input_fn_builder_test.py b/tensor2tensor/utils/input_fn_builder_test.py new file mode 100644 index 000000000..ec2e6147e --- /dev/null +++ b/tensor2tensor/utils/input_fn_builder_test.py @@ -0,0 +1,61 @@ +# coding=utf-8 +# Copyright 2017 The Tensor2Tensor Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for tensor2tensor.utils.input_fn_builder.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +# Dependency imports + +from tensor2tensor.utils import input_fn_builder +import tensorflow as tf + + +class InputFnBuilderTest(tf.test.TestCase): + + def testCondOnIndex(self): + """Smoke tests of cond_on_index().""" + + z = tf.constant(1., dtype=tf.float32) + def f(n): + return { + "a": z * n, + "b": z * n * n + } + + index = tf.placeholder(shape=[], dtype=tf.int32) + out = input_fn_builder.cond_on_index(f, index, 3, 0) + + with self.test_session() as sess: + # Check dispatching to the correct branch + result = sess.run(out, feed_dict={ + index: 2 + }) + + self.assertAllClose(result["a"], 2.) + self.assertAllClose(result["b"], 4.) + + result = sess.run(out, feed_dict={ + index: 3 + }) + + self.assertAllClose(result["a"], 3.) + self.assertAllClose(result["b"], 9.) + + +if __name__ == "__main__": + tf.test.main() From 6c4c82132ebf3a88b373b66ba1c71b8176ae2b73 Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Mon, 2 Oct 2017 17:45:07 -0700 Subject: [PATCH 0467/4095] make metric name clear PiperOrigin-RevId: 170783412 --- tensor2tensor/utils/metrics.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tensor2tensor/utils/metrics.py b/tensor2tensor/utils/metrics.py index 56ac17f38..872c9f141 100644 --- a/tensor2tensor/utils/metrics.py +++ b/tensor2tensor/utils/metrics.py @@ -256,7 +256,10 @@ def wrapped_metric_fn(): metric_fn = METRICS_FNS[metric] problem_metric_fn = make_problem_specific_metric_fn( metric_fn, problem_idx, weights_fn) - eval_metrics["metrics-%s/%s" % (problem_name, metric)] = problem_metric_fn + + metric_name = "metrics-%s/%s" % (problem_name, metric) + + eval_metrics[metric_name] = problem_metric_fn return eval_metrics From 93019005fc2fdbb64808bd6ade6f90ff59ae9323 Mon Sep 17 00:00:00 2001 From: Etienne Pot <epot@google.com> Date: Tue, 3 Oct 2017 09:14:58 -0700 Subject: [PATCH 0468/4095] First version of LSH attention PiperOrigin-RevId: 170865238 --- tensor2tensor/layers/common_attention.py | 352 ++++++++++++++++++----- tensor2tensor/models/aligned.py | 32 +++ tensor2tensor/models/attention_lm_moe.py | 11 +- 3 files changed, 319 insertions(+), 76 deletions(-) diff --git a/tensor2tensor/layers/common_attention.py b/tensor2tensor/layers/common_attention.py index 33ce7d4a9..f50b75c80 100644 --- a/tensor2tensor/layers/common_attention.py +++ b/tensor2tensor/layers/common_attention.py @@ -239,6 +239,86 @@ def add_positional_embedding_nd(x, max_length, name): return x +class LshGating(object): + """Class to split key/queries into separate buckets.""" + + def __init__(self, depth, nb_hyperplanes, nb_replicat=1, trainable=False): + """Construct the gating function parameters. + + Compute the gates for a single head. + + Args: + depth (int): Dimension of the key/queries to dispatch + nb_hyperplanes (int): Nb of vectors use to split the space. Will determine + the number of buckets (2^nb_hyperplanes - 1). + nb_replicat (int): Redundancy to avoid the edge cases (to be in one bucket + the input should be in a majority) + trainable (bool): If True, a balance loss is added to force the hyperplane + to divide the key/query space evenly + """ + self.depth = depth + self.nb_hyperplanes = nb_hyperplanes + self.nb_buckets = 2**nb_hyperplanes + self.nb_replicat = nb_replicat # Unused for now + self.trainable = trainable # Unused for now + + self.dispatchers = {} + + assert self.nb_replicat == 1 # For now + + with tf.variable_scope("lsh_gating"): + # Vectors defining the hyperplanes + self.t_vectors = tf.get_variable( + "vector", + shape=(self.depth, self.nb_hyperplanes * self.nb_replicat), + dtype=tf.float32, + trainable=self.trainable, + ) + # Projection vector from the bit space to similarity score space + self.t_group = tf.constant([ + self._idx_to_bits(i) + for i in range(self.nb_buckets) + ], dtype=tf.float32, name="group") + + def _idx_to_bits(self, i): + """Convert an group index to its bit representation.""" + bits = bin(i)[2:].zfill(self.nb_hyperplanes) # Pad the bits str with 0 + return [-1.0 if b == "0" else 1.0 for b in bits] + + @expert_utils.add_name_scope("lsh_gating") + def get_gates(self, x): + """Return the bucket id of the given tensor. + + Args: + x (tf.Tensor): float32 of shape [length, depth] + + Returns: + tf.Tensor: One-hot vector int64 of shape [heads, length, nb_buckets] + containing the id of the bucket + """ + + # The balance loss don't propagate to the rest of the network + x = tf.stop_gradient(x) + # [length, depth] * [depth, nb_vectors * replicat] + x = tf.matmul(x, self.t_vectors) + # [length, nb_vector * replicat] + x = tf.sign(x) # Get on which side of the hyperplane the keys are. + + # x = tf.reshape(x, [-1, nb_replicat, nb_vector]) + # [length, replicat, nb_vector] * [nb_vector, 2^nb_vector - 1] + + x = tf.matmul(x, self.t_group, transpose_b=True) / self.nb_hyperplanes + # We get a similarity score for each of the group between [-1, 1] + # [length, (replicat,) 2^nb_vector - 1] + # Do an argmax to get the most likely group for each replicat + x = tf.argmax(x, axis=-1) + # [length(, replicat)] + # One-hot for compatibility with the sparse dispatcher + x = tf.one_hot(x, self.nb_buckets) + # TODO(epot): Use a loss to force an even distribution + return x + + def embedding_to_padding(emb): """Calculates the padding mask based on which embeddings are all zero. @@ -2223,7 +2303,147 @@ def local_expert_attention( @expert_utils.add_name_scope() -def sparse_dot_product_attention(q, k, v, bc, experts_params): +def expert_dot_product(q, k, v, info_q=None, info_k=None): + """Perform dot product on a subset of the sequence. + + Can add a mask to the attention to prevent sequences to attend to each other + and to prevent attention to the futur. + + Args: + q (tf.Tensor): Queries of shape [length_expert_q, depth_k] + k (tf.Tensor): Keys of shape [length_expert_k, depth_k] + v (tf.Tensor): Values of shape [length_expert_k, depth_v] + info_q (BatchInfo): Batch info for queries. If None, no mask is added + info_k (BatchInfo): Batch info for keys + + Returns: + tf.Tensor: dot product attention output ([length_expert_q, depth_v]) + """ + + length_q = tf.shape(q)[0] + length_k = tf.shape(k)[0] + depth_v = v.get_shape().as_list()[-1] + + bias = None + if info_q is not None or info_k is not None: + # TODO(epot): Implement more generic version of the mask computation to + # have Q/K of different lengths + raise NotImplementedError("No mask for now") + + # Restore batch and head dimension + q, k, v = [tf.expand_dims(tf.expand_dims(t, 0), 0) for t in (q, k, v)] + + def is_zero(): + zeros = tf.zeros(shape=[1, 1, length_q, depth_v], dtype=tf.float32) + zeros = tf.Print(zeros, [length_k, length_q], "length_k/length_q: ") + return zeros + + def is_not_zero(): + return dot_product_attention( + q, k, v, + bias=bias, + # No image summary to avoid "Retval[0] does not have value" (because + # inside a condition) + make_image_summary=False, + ) + + # TODO(epot): Should make sure a query gets at least one key. Because the + # different sequences of a batch are merged, it's possible that a + # query from a sequence only receive memory from another sequence, so + # with the mask, the query will perform a softmax on -infinity values. + # A hack could be to add at least one sequence of each batch on each group so + # the query can attend to at least one element. + # Softmax(Q.K)*V + v_out = tf.cond( + tf.logical_or(tf.equal(length_q, 0), tf.equal(length_k, 0)), + is_zero, + is_not_zero, + ) + + # Remove batch and head dimension + v_out = tf.squeeze(v_out, axis=0) + v_out = tf.squeeze(v_out, axis=0) + return v_out + + +@expert_utils.add_name_scope() +def dot_product_single_head(q, k, v, gates_q, gates_k, bc): # pylint: disable=unused-argument + """Perform a dot product attention on a single sequence on a single head. + + This function dispatch the q, k, v and loop over the buckets to compute the + attention dot product on each subsequences. + + Args: + q (tf.Tensor): [length_q, depth_q] + k (tf.Tensor): [length_k, depth_q] + v (tf.Tensor): [length_k, depth_v] + gates_q (tf.Tensor): One-hot vector of shape [length_q, nb_buckets] + gates_k (tf.Tensor): One-hot vector of shape [length_k, nb_buckets] + bc (BatchInfo): Contains the batch coordinates and sequence order + + Returns: + tf.Tensor: [length_q, depth_v] + """ + + nb_buckets = gates_q.get_shape().as_list()[-1] + + q_dispatcher = expert_utils.SparseDispatcher(nb_buckets, gates_q) + k_dispatcher = expert_utils.SparseDispatcher(nb_buckets, gates_k) + + # Iterate over every dispatched group + list_v_out = [] + for ( + q, + k, + v, + # TODO(epot): If the batch are merged together, should also dispatch the + # sequence positions and batch coordinates + ) in zip( + q_dispatcher.dispatch(q), + k_dispatcher.dispatch(k), + k_dispatcher.dispatch(v), + ): + list_v_out.append(expert_dot_product(q, k, v, None, None)) + + # Combine all buckets together to restore the original length + return q_dispatcher.combine(list_v_out) + + +def map_fn_switch(fn, elems, use_map_fn=True, **kwargs): + """Construct the graph with either tf.map_fn or a python for loop. + + This function is mainly for for benchmarking purpose. + + tf.map_fn is dynamic but is much slower than creating a static graph with + for loop. However, having a for loop make the graph much longer to build + and can consume too much RAM on distributed setting. + + Args: + fn (fct): same that tf.map_fn but for now can only return a single tensor + value (instead of a tuple of tensor for the general case) + elems (tuple): same that tf.map_fn + use_map_fn (bool): If True, tf.map_fn is used, if False, for _ in _: is used + instead + **kwargs: Additional tf.map_fn arguments (ignored if use_map_fn is False) + + Returns: + tf.Tensor: the output of tf.map_fn + """ + if use_map_fn: + return tf.map_fn(fn, elems, **kwargs) + else: + elems_unpacked = ( + tf.unstack(e) for e in elems + ) + out_unpacked = [ + fn(e) for e in zip(*elems_unpacked) + ] + out = tf.stack(out_unpacked) + return out + + +@expert_utils.add_name_scope() +def sparse_dot_product_attention(q, k, v, bc, use_map_fn, experts_params): """Sparse multihead self attention. Perform an approximation of the full multihead attention by dispatching @@ -2240,97 +2460,85 @@ def sparse_dot_product_attention(q, k, v, bc, experts_params): * The bias is added inside this function to prevent attention to the future. Args: - q (tf.Tensor): Queries of shape [1, heads, length_q, depth_k] - k (tf.Tensor): Keys of shape [1, heads, length_q, depth_k] - v (tf.Tensor): Values of shape [1, heads, length_kv, depth_v] + q (tf.Tensor): Queries of shape [batch, heads, length_q, depth_k] + k (tf.Tensor): Keys of shape [batch, heads, length_q, depth_k] + v (tf.Tensor): Values of shape [batch, heads, length_kv, depth_v] bc (tf.Tensor): Batch coordinates of shape [1, length_q, 1] + use_map_fn (bool): Use either tf.map_fn of python for loop to compute the + heads separately experts_params (dict): Additional params for the local expert Returns: tf.Tensor: Approximation of Softmax(Q.K) * V, of shape - [1, heads, length_q, depth_v] + [batch, heads, length_q, depth_v] """ + flattened = (q.get_shape().as_list()[0] == 1 and + k.get_shape().as_list()[0] == 1 and + v.get_shape().as_list()[0] == 1) + _, nb_heads, _, depth = q.get_shape().as_list() - assert q.get_shape().as_list()[0] == 1 - assert k.get_shape().as_list()[0] == 1 - assert v.get_shape().as_list()[0] == 1 - - @expert_utils.add_name_scope() - def unpack_heads(x): + # First case: Either constant batch size of size 1 or batch already flattened + if flattened: # Flatten the batch. squeeze works because batch_size = 1 (otherwise could # use tf.transpose and flatten after unpacking) - x = tf.squeeze(x, axis=0) - list_x = tf.unstack(x) - return list_x # list[tf.Tensor(shape=[batch * length, depth])] + q = tf.squeeze(q, axis=0) + k = tf.squeeze(k, axis=0) + v = tf.squeeze(v, axis=0) + # Second case: Flatten batch dimension + else: + batch_size = tf.shape(q)[0] + q = tf.transpose(q, perm=[1, 0, 2, 3]) + k = tf.transpose(k, perm=[1, 0, 2, 3]) + v = tf.transpose(v, perm=[1, 0, 2, 3]) + q = tf.reshape(q, [nb_heads, -1, depth]) + k = tf.reshape(k, [nb_heads, -1, depth]) + v = tf.reshape(v, [nb_heads, -1, depth]) bc = tf.squeeze(bc, axis=0) - list_q = unpack_heads(q) - list_k = unpack_heads(k) - list_v = unpack_heads(v) - - @expert_utils.add_name_scope() - def expert_dot_product(x, q, k, v, bc): - """Perform dot product on a subset of the sequence. - Args: - x (tf.Tensor): Unused but forwarded by local_moe - q (tf.Tensor): Queries of shape [length_expert, depth_k] - k (tf.Tensor): Queries of shape [length_expert, depth_k] - v (tf.Tensor): Queries of shape [length_expert, depth_v] - bc (tf.Tensor): Batch coordinates of shape [length_expert, 1] + # Unstack heads + list_q = tf.unstack(q) # list[tf.Tensor(shape=[batch * length, depth])] + list_k = tf.unstack(k) + list_v = tf.unstack(v) - Returns: - tf.Tensor: dot product attention output ([length_expert, depth_v]) - """ - length = tf.shape(x)[0] - - # Mask between the sequences - bias_batch = attention_bias_coordinates(bc) - # Mask to prevent sequences of attenting to the future - bias_past = tf.reshape( - attention_bias_lower_triangle(length), [length, length]) - bias = bias_batch + bias_past # bias has shape [length, length] - bias = tf.reshape(bias, [1, 1, length, length]) - - # Restore batch and head dimension - q, k, v = [tf.expand_dims(tf.expand_dims(t, 0), 0) for t in (q, k, v)] - # Softmax(Q.K)*V - v_out = dot_product_attention(q, k, v, bias=bias) - # Remove batch and head dimension - v_out = tf.squeeze(v_out, axis=0) - v_out = tf.squeeze(v_out, axis=0) - return v_out + list_gates_q = [] + list_gates_k = [] - list_v_out = [] total_loss = 0.0 - for q, k, v in zip(list_q, list_k, list_v): + # There might be a more optimized way to compute all heads at once + for single_q, single_k, _ in zip(list_q, list_k, list_v): # Each head get its own dispatcher - - # TODO(epot): Choose which dispatcher use here on the k/q pair (either - # noisy_top_k_gating or Locality-sensitive hashing) - - # Concatenate along the depth axis - x = tf.concat([q, k], axis=-1) # Works because q and k lengths are the same - - # Compute the attention on the sparse tokens - v_out, loss = expert_utils.local_moe( - x=x, - expert_fn=expert_dot_product, - additional_dispatch_params=dict( - q=q, - k=k, - v=v, - bc=bc - ), + lhs_gating = LshGating( + depth=single_q.get_shape().as_list()[-1], **experts_params ) - list_v_out.append(v_out) - total_loss += loss + + list_gates_q.append(lhs_gating.get_gates(single_q)) + list_gates_k.append(lhs_gating.get_gates(single_k)) + + gates_q = tf.stack(list_gates_q) + gates_k = tf.stack(list_gates_k) + + # Process each head separatly + v_out = map_fn_switch( + lambda args: dot_product_single_head(bc=bc, *args), + elems=(q, k, v, gates_q, gates_k), + dtype=(tf.float32), + parallel_iterations=2, + # back_prop=True, + # swap_memory=False, + # infer_shape=True, + # name=None + use_map_fn=use_map_fn, + ) # Restore original shape as expected by multihead_attention - v_out = tf.stack(list_v_out) # Merge heads - v_out = tf.expand_dims(v_out, axis=0) - return v_out, total_loss / len(list_v_out) + if flattened: + v_out = tf.expand_dims(v_out, axis=0) # Restore batch_size = 1 + else: + v_out = tf.reshape(v_out, [nb_heads, batch_size, -1, depth]) + v_out = tf.transpose(v_out, [1, 0, 2, 3]) + return v_out, total_loss / nb_heads def scaled_dot_product_attention_simple(q, k, v, bias, name=None): diff --git a/tensor2tensor/models/aligned.py b/tensor2tensor/models/aligned.py index abfecbaed..939799a91 100644 --- a/tensor2tensor/models/aligned.py +++ b/tensor2tensor/models/aligned.py @@ -180,6 +180,26 @@ def _pseudolocal_bias(x): attention_v_size=hparams.attention_v_size) # TODO(avaswani, epot, noam): Do we need to divide by num shards ? extra_loss += tf.add_n(loss) / dp.n + elif layer_type == "att_lsh": + y, loss = dp( + common_attention.multihead_attention_sparse_dot_prod, + x, + None, + None, # Bias is computed inside + hparams.attention_key_channels or hparams.hidden_size, + hparams.attention_value_channels or hparams.hidden_size, + hparams.hidden_size, + hparams.num_heads, + hparams.attention_dropout, + + # Additional parameters + bc=batch_coordinate, + use_map_fn=False, + experts_params=dict( + nb_hyperplanes=4, + ) + ) + extra_loss += tf.add_n(loss) / dp.n elif layer_type == "moe": y, loss = expert_utils.distributed_moe( dp, @@ -468,6 +488,18 @@ def aligned_moe(): return hparams +@registry.register_hparams +def aligned_lsh(): + """Use multihead_attention_sparse_dot_prod. + + Returns: + a hparams object + """ + hparams = aligned_base() + hparams.layers = "timing," + "conv,att_lsh,ffn," * 2 + return hparams + + @registry.register_hparams def aligned_8k(): """version for languagemodel_wiki_scramble8k50. diff --git a/tensor2tensor/models/attention_lm_moe.py b/tensor2tensor/models/attention_lm_moe.py index 3a5b73a3e..2031ec375 100644 --- a/tensor2tensor/models/attention_lm_moe.py +++ b/tensor2tensor/models/attention_lm_moe.py @@ -66,7 +66,7 @@ def get_choices(): "h": AttentionType.MULTIHEAD, # multi-Head "e": AttentionType.LOCAL_EXPERTS, # Experts "m": AttentionType.MEMORY_EFFICIENT, # Memory - "s": AttentionType.SPARSE_MULTIHEAD, # Sparse + "s": AttentionType.SPARSE_MULTIHEAD, # Sparse (Locality sensitive hashing) } @@ -206,10 +206,9 @@ def print_shape(x, suffix, debug=False): # Additional parameters bc=batch_coordinate, + use_map_fn=hparams.lsh_use_map_fn, experts_params=dict( - train=hparams.mode == ModeKeys.TRAIN, - num_experts=hparams.attention_num_experts, - k=hparams.attention_moe_k, + nb_hyperplanes=hparams.lsh_num_hyperplanes, ), ) y = dp_restore_pad(y) @@ -513,6 +512,10 @@ def attention_lm_moe_base(): hparams.add_hparam("attention_v_size", 256) # Loss coef for load balancing hparams.add_hparam("attention_load_balance", 2e-2) + # Locality-sensitive hashing params + hparams.add_hparam("lsh_num_hyperplanes", 4) + hparams.add_hparam("lsh_use_map_fn", int(False)) + hparams.add_hparam("use_sepconv", int(False)) hparams.add_hparam("diet_experts", int(False)) hparams.add_hparam("memory_efficient_ffn", int(False)) From 85668e117ad0b0bd2810ffa7cf2ba6ae3cee7c43 Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Tue, 3 Oct 2017 13:02:52 -0700 Subject: [PATCH 0469/4095] Add data_dir to flag validation PiperOrigin-RevId: 170900325 --- tensor2tensor/utils/trainer_utils.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tensor2tensor/utils/trainer_utils.py b/tensor2tensor/utils/trainer_utils.py index fcdf5a463..c8cbbaec9 100644 --- a/tensor2tensor/utils/trainer_utils.py +++ b/tensor2tensor/utils/trainer_utils.py @@ -353,6 +353,7 @@ def run(data_dir, model, output_dir, train_steps, eval_steps, schedule): def validate_flags(): + """Validate command line flags.""" if not FLAGS.model: raise ValueError("Must specify a model with --model.") if not FLAGS.problems: @@ -365,6 +366,8 @@ def validate_flags(): FLAGS.output_dir = "/tmp/tensor2tensor" tf.logging.warning("It is strongly recommended to specify --output_dir. " "Using default output_dir=%s.", FLAGS.output_dir) + if not FLAGS.data_dir: + raise ValueError("Must specify --data_dir.") def is_chief(): From 9b93d558c1e0d6446bcd6ac4c23d496bd2bd97fa Mon Sep 17 00:00:00 2001 From: Etienne Pot <epot@google.com> Date: Tue, 3 Oct 2017 19:36:39 -0700 Subject: [PATCH 0470/4095] More generic batch masking, add bias masking for the sparse lsh attention PiperOrigin-RevId: 170953554 --- tensor2tensor/layers/common_attention.py | 155 ++++++++++++------ tensor2tensor/layers/common_attention_test.py | 58 +++++++ tensor2tensor/models/aligned.py | 5 +- tensor2tensor/models/attention_lm_moe.py | 16 +- tensor2tensor/utils/expert_utils.py | 2 + 5 files changed, 184 insertions(+), 52 deletions(-) diff --git a/tensor2tensor/layers/common_attention.py b/tensor2tensor/layers/common_attention.py index f50b75c80..3676fe447 100644 --- a/tensor2tensor/layers/common_attention.py +++ b/tensor2tensor/layers/common_attention.py @@ -18,6 +18,7 @@ from __future__ import division from __future__ import print_function +import collections import functools import math @@ -36,6 +37,11 @@ from tensorflow.python.framework import function +# Struct conatining the sequences ids and order on a batch (are send to the +# expert to allow them to compute the bias mask) +BatchInfo = collections.namedtuple( + "BatchInfo", "coordinates, order") + _expert_count = 0 @@ -448,29 +454,59 @@ def attention_bias_proximal(length): @expert_utils.add_name_scope() -def attention_bias_coordinates(batch_coordinate): +def attention_bias_batch( + batch_coordinates_q, + batch_coordinates_k=None, + condition_fn=None, +): """Generate a mask to prevent the batch to attend to each others. Args: - batch_coordinate (tf.Tensor): int32 of shape [length, 1] containing the + batch_coordinates_q (tf.Tensor): int32 of shape [length_q, 1] containing the coordinates of the batches + batch_coordinates_k (tf.Tensor): int32 of shape [length_k, 1] containing the + coordinates of the batches. If None, do self attention (q and k identical) + condition_fn (fct): A predicat function defining which type of mask build Returns: - tf.Tensor: float32 mask of shape [length, length] containing either 0 or + tf.Tensor: float32 mask of shape [length_q, length_k] containing either 0 or -infinity (-1e9) """ - batch_coord_float = tf.squeeze(batch_coordinate, 1) + if batch_coordinates_k is None: + batch_coordinates_k = batch_coordinates_q + # Convert to float first because of b/25387198 - batch_coord_float = tf.to_float(batch_coord_float) - bc_v = tf.expand_dims(batch_coord_float, 1) - bc_h = tf.expand_dims(batch_coord_float, 0) - bias_batch = bc_v - bc_h # Broadcast to create [length, length] mask + def to_float(bc): + bc = tf.squeeze(bc, 1) + bc = tf.to_float(bc) + return bc + + bc_v = tf.expand_dims(to_float(batch_coordinates_q), 1) + bc_h = tf.expand_dims(to_float(batch_coordinates_k), 0) + bias_batch = bc_h - bc_v # Broadcast to create [length_q, length_k] mask # Theshold non zeros to 1.0 - bias_batch = tf.minimum(1.0, tf.abs(bias_batch)) + bias_batch = condition_fn(bias_batch) bias_batch *= -1e9 # Set non zeros to -infinity return bias_batch +# Mask to prevent individual sequences of the same batch to attend to each other +attention_bias_coordinates = functools.partial( + attention_bias_batch, + condition_fn=lambda bias: tf.minimum(1.0, tf.abs(bias)), +) + + +# Mask similar to upper triangular mask, but allow dispatching +attention_bias_future = functools.partial( + attention_bias_batch, + # Elems can attend to themself (otherwise would use bias_batch + 1.0) + # No tf.abs to concider the order + # tf.maximum and tf.minimum to threshold the values + condition_fn=lambda bias: tf.maximum(0.0, tf.minimum(1.0, bias)), +) + + def split_last_dimension(x, n): """Reshape x so that the last dimension becomes two dimensions. @@ -2119,6 +2155,7 @@ def parameter_attention(x, return y +@expert_utils.add_name_scope() def coordinate_tensor(shape, axis): """Return a tensor with given shape containing coordinte along given axis. @@ -2130,6 +2167,8 @@ def coordinate_tensor(shape, axis): A tensor with shape shape and type tf.int32, where each elements its coordinate along the given axis. """ + if axis < 0: + axis = tf.size(shape) + axis # Convert to positive for the one_hot indice r = tf.range(shape[axis]) r_shape = tf.one_hot( @@ -2303,7 +2342,7 @@ def local_expert_attention( @expert_utils.add_name_scope() -def expert_dot_product(q, k, v, info_q=None, info_k=None): +def expert_dot_product(q, k, v, info_q, info_k): """Perform dot product on a subset of the sequence. Can add a mask to the attention to prevent sequences to attend to each other @@ -2324,11 +2363,10 @@ def expert_dot_product(q, k, v, info_q=None, info_k=None): length_k = tf.shape(k)[0] depth_v = v.get_shape().as_list()[-1] - bias = None - if info_q is not None or info_k is not None: - # TODO(epot): Implement more generic version of the mask computation to - # have Q/K of different lengths - raise NotImplementedError("No mask for now") + # Create the mask + bias = attention_bias_coordinates(info_q.coordinates, info_k.coordinates) + if info_k.order is not None: + bias += attention_bias_future(info_q.order, info_k.order) # Restore batch and head dimension q, k, v = [tf.expand_dims(tf.expand_dims(t, 0), 0) for t in (q, k, v)] @@ -2367,7 +2405,7 @@ def is_not_zero(): @expert_utils.add_name_scope() -def dot_product_single_head(q, k, v, gates_q, gates_k, bc): # pylint: disable=unused-argument +def dot_product_single_head(q, k, v, gates_q, gates_k, bi): """Perform a dot product attention on a single sequence on a single head. This function dispatch the q, k, v and loop over the buckets to compute the @@ -2379,7 +2417,7 @@ def dot_product_single_head(q, k, v, gates_q, gates_k, bc): # pylint: disable=u v (tf.Tensor): [length_k, depth_v] gates_q (tf.Tensor): One-hot vector of shape [length_q, nb_buckets] gates_k (tf.Tensor): One-hot vector of shape [length_k, nb_buckets] - bc (BatchInfo): Contains the batch coordinates and sequence order + bi (BatchInfo): Contains the batch coordinates and sequence order Returns: tf.Tensor: [length_q, depth_v] @@ -2390,20 +2428,37 @@ def dot_product_single_head(q, k, v, gates_q, gates_k, bc): # pylint: disable=u q_dispatcher = expert_utils.SparseDispatcher(nb_buckets, gates_q) k_dispatcher = expert_utils.SparseDispatcher(nb_buckets, gates_k) + def eventually_dispatch(dispatcher, value): + if value is not None: + return dispatcher.dispatch(value) + return [None] * nb_buckets + # Iterate over every dispatched group list_v_out = [] for ( q, k, v, - # TODO(epot): If the batch are merged together, should also dispatch the - # sequence positions and batch coordinates + qbc, + qbo, + kbc, + kbo, ) in zip( + # Dispatch queries, keys and values q_dispatcher.dispatch(q), k_dispatcher.dispatch(k), k_dispatcher.dispatch(v), + # Also dispatch the sequence positions and batch coordinates + eventually_dispatch(q_dispatcher, bi.coordinates), + eventually_dispatch(q_dispatcher, bi.order), + eventually_dispatch(k_dispatcher, bi.coordinates), + eventually_dispatch(k_dispatcher, bi.order), ): - list_v_out.append(expert_dot_product(q, k, v, None, None)) + list_v_out.append(expert_dot_product( + q, k, v, + info_q=BatchInfo(coordinates=qbc, order=qbo), + info_k=BatchInfo(coordinates=kbc, order=kbo) + )) # Combine all buckets together to restore the original length return q_dispatcher.combine(list_v_out) @@ -2443,7 +2498,7 @@ def map_fn_switch(fn, elems, use_map_fn=True, **kwargs): @expert_utils.add_name_scope() -def sparse_dot_product_attention(q, k, v, bc, use_map_fn, experts_params): +def sparse_dot_product_attention(q, k, v, bi, use_map_fn, experts_params): """Sparse multihead self attention. Perform an approximation of the full multihead attention by dispatching @@ -2457,13 +2512,14 @@ def sparse_dot_product_attention(q, k, v, bc, use_map_fn, experts_params): contains the elements from all different batches) * Right now, only self attention is supported so length_q and length_kv should be identical and the function will add triangular mask. - * The bias is added inside this function to prevent attention to the future. + * If bi.order is not None, The bias is added inside this function to + prevent attention to the future. Args: q (tf.Tensor): Queries of shape [batch, heads, length_q, depth_k] k (tf.Tensor): Keys of shape [batch, heads, length_q, depth_k] v (tf.Tensor): Values of shape [batch, heads, length_kv, depth_v] - bc (tf.Tensor): Batch coordinates of shape [1, length_q, 1] + bi (BatchInfo): Contains the batch coordinates and sequence order use_map_fn (bool): Use either tf.map_fn of python for loop to compute the heads separately experts_params (dict): Additional params for the local expert @@ -2472,29 +2528,32 @@ def sparse_dot_product_attention(q, k, v, bc, use_map_fn, experts_params): tf.Tensor: Approximation of Softmax(Q.K) * V, of shape [batch, heads, length_q, depth_v] """ - flattened = (q.get_shape().as_list()[0] == 1 and - k.get_shape().as_list()[0] == 1 and - v.get_shape().as_list()[0] == 1) - _, nb_heads, _, depth = q.get_shape().as_list() - - # First case: Either constant batch size of size 1 or batch already flattened - if flattened: - # Flatten the batch. squeeze works because batch_size = 1 (otherwise could - # use tf.transpose and flatten after unpacking) - q = tf.squeeze(q, axis=0) - k = tf.squeeze(k, axis=0) - v = tf.squeeze(v, axis=0) - # Second case: Flatten batch dimension - else: - batch_size = tf.shape(q)[0] - q = tf.transpose(q, perm=[1, 0, 2, 3]) - k = tf.transpose(k, perm=[1, 0, 2, 3]) - v = tf.transpose(v, perm=[1, 0, 2, 3]) - q = tf.reshape(q, [nb_heads, -1, depth]) - k = tf.reshape(k, [nb_heads, -1, depth]) - v = tf.reshape(v, [nb_heads, -1, depth]) - - bc = tf.squeeze(bc, axis=0) + batch_size, nb_heads, _, depth = q.get_shape().as_list() + batch_size = batch_size or tf.shape(q)[0] + + @expert_utils.add_name_scope() + def flatten_first_dims(x): + # Case 1: Either constant batch size of size 1 or batch already flattened + if x.get_shape().as_list()[0] == 1: + return tf.squeeze(x, axis=0) + # Case 2: Flatten batch dimension + else: + x = tf.transpose(x, perm=[1, 0, 2, 3]) + x = tf.reshape(x, [nb_heads, -1, depth]) + return x + + def flatten_batch(x): + if x is None: + return x + return expert_utils.flatten_all_but_last(x) + + q = flatten_first_dims(q) + k = flatten_first_dims(k) + v = flatten_first_dims(v) + bi = BatchInfo( + coordinates=flatten_batch(bi.coordinates), + order=flatten_batch(bi.order), + ) # Unstack heads list_q = tf.unstack(q) # list[tf.Tensor(shape=[batch * length, depth])] @@ -2521,7 +2580,7 @@ def sparse_dot_product_attention(q, k, v, bc, use_map_fn, experts_params): # Process each head separatly v_out = map_fn_switch( - lambda args: dot_product_single_head(bc=bc, *args), + lambda args: dot_product_single_head(bi=bi, *args), elems=(q, k, v, gates_q, gates_k), dtype=(tf.float32), parallel_iterations=2, @@ -2533,7 +2592,7 @@ def sparse_dot_product_attention(q, k, v, bc, use_map_fn, experts_params): ) # Restore original shape as expected by multihead_attention - if flattened: + if isinstance(batch_size, int) and batch_size == 1: v_out = tf.expand_dims(v_out, axis=0) # Restore batch_size = 1 else: v_out = tf.reshape(v_out, [nb_heads, batch_size, -1, depth]) diff --git a/tensor2tensor/layers/common_attention_test.py b/tensor2tensor/layers/common_attention_test.py index ef67b0d8e..6f4a6a37c 100644 --- a/tensor2tensor/layers/common_attention_test.py +++ b/tensor2tensor/layers/common_attention_test.py @@ -258,6 +258,64 @@ def testDotProductAttentionRelative(self): res = session.run(a) self.assertEqual(res.shape, (5, 7, 12, 32)) + def testBiasBatchCoordinates(self): + """Testing the batch cooridnates mask.""" + q = tf.constant([0, 0, 1, 1, 1, 1, 2, 2, 2], dtype=tf.int32) + q = tf.expand_dims(q, axis=-1) + + k = tf.constant([0, 0, 0, 2, 2, 3, 3, 3], dtype=tf.int32) + k = tf.expand_dims(k, axis=-1) + + ground_truth = np.array([ + [0, 0, 0, 1, 1, 1, 1, 1], # 0 + [0, 0, 0, 1, 1, 1, 1, 1], + [1, 1, 1, 1, 1, 1, 1, 1], # 1 (just masked) + [1, 1, 1, 1, 1, 1, 1, 1], + [1, 1, 1, 1, 1, 1, 1, 1], + [1, 1, 1, 1, 1, 1, 1, 1], + [1, 1, 1, 0, 0, 1, 1, 1], # 2 + [1, 1, 1, 0, 0, 1, 1, 1], + [1, 1, 1, 0, 0, 1, 1, 1], + ], np.float32) * -1e9 + + bias = common_attention.attention_bias_coordinates(q, k) + + with self.test_session() as session: + session.run(tf.global_variables_initializer()) + self.assertAllClose( + bias.eval(), + ground_truth, + ) + + def testBiasFuture(self): + """Testing the sequence order mask.""" + q = tf.constant([0, 1, 2, 3, 0, 1, 2, 0, 1], dtype=tf.int32) + q = tf.expand_dims(q, axis=-1) + + k = tf.constant([0, 1, 2, 3, 4, 0, 1, 2], dtype=tf.int32) + k = tf.expand_dims(k, axis=-1) + + ground_truth = np.array([ + [0, 1, 1, 1, 1, 0, 1, 1], # 0 + [0, 0, 1, 1, 1, 0, 0, 1], # 1 + [0, 0, 0, 1, 1, 0, 0, 0], # 2 + [0, 0, 0, 0, 1, 0, 0, 0], # 3 + [0, 1, 1, 1, 1, 0, 1, 1], # 0 + [0, 0, 1, 1, 1, 0, 0, 1], # 1 + [0, 0, 0, 1, 1, 0, 0, 0], # 2 + [0, 1, 1, 1, 1, 0, 1, 1], # 0 + [0, 0, 1, 1, 1, 0, 0, 1], # 1 + ], np.float32) * -1e9 + + bias = common_attention.attention_bias_future(q, k) + + with self.test_session() as session: + session.run(tf.global_variables_initializer()) + self.assertAllClose( + bias.eval(), + ground_truth, + ) + if __name__ == "__main__": tf.test.main() diff --git a/tensor2tensor/models/aligned.py b/tensor2tensor/models/aligned.py index 939799a91..760b03855 100644 --- a/tensor2tensor/models/aligned.py +++ b/tensor2tensor/models/aligned.py @@ -193,7 +193,10 @@ def _pseudolocal_bias(x): hparams.attention_dropout, # Additional parameters - bc=batch_coordinate, + bi=[common_attention.BatchInfo( + coordinates=batch_coordinate[i], + order=None, # No future mask + ) for i in range(dp.n)], use_map_fn=False, experts_params=dict( nb_hyperplanes=4, diff --git a/tensor2tensor/models/attention_lm_moe.py b/tensor2tensor/models/attention_lm_moe.py index 2031ec375..57598388b 100644 --- a/tensor2tensor/models/attention_lm_moe.py +++ b/tensor2tensor/models/attention_lm_moe.py @@ -158,6 +158,9 @@ def print_shape(x, suffix, debug=False): batch_coordinate = dp(get_batch_coordinate, x) batch_coordinate = dp_remove_pad(batch_coordinate) batch_coordinate = dp_expand_bc(batch_coordinate) + batch_order = dp(get_batch_coordinate, x, axis=-1) + batch_order = dp_remove_pad(batch_order) + batch_order = dp_expand_bc(batch_order) x = dp(print_shape, x, "in") @@ -205,7 +208,10 @@ def print_shape(x, suffix, debug=False): hparams.attention_dropout, # Additional parameters - bc=batch_coordinate, + bi=[common_attention.BatchInfo( + coordinates=batch_coordinate[i], + order=batch_order[i], # No future mask + ) for i in range(dp.n)], use_map_fn=hparams.lsh_use_map_fn, experts_params=dict( nb_hyperplanes=hparams.lsh_num_hyperplanes, @@ -323,11 +329,12 @@ def attention_lm_moe_prepare_decoder(targets, hparams): return (decoder_input, decoder_self_attention_bias, pad_remover) -def get_batch_coordinate(x): +@expert_utils.add_name_scope() +def get_batch_coordinate(x, axis=0): """Return a flat int32 tensor of shape [1, batch_size*length, 1].""" # Compute the batch coordinate before flattening all batches batch_coordinate = tf.expand_dims( - common_attention.coordinate_tensor(tf.shape(x)[:-1], axis=0), axis=-1) + common_attention.coordinate_tensor(tf.shape(x)[:-1], axis=axis), axis=-1) return batch_coordinate @@ -392,6 +399,7 @@ def conv_elems_1d(x, factor, out_depth): return x +@expert_utils.add_name_scope() def expand_batch_coordinates(bc, length_factor): """Duplicate elements of bc by length_factor. @@ -412,6 +420,7 @@ def expand_batch_coordinates(bc, length_factor): return bc +@expert_utils.add_name_scope() def remove_pad(x, pad_remover, mode): """Remove padding by concatenating all dimension into one. @@ -439,6 +448,7 @@ def remove_pad(x, pad_remover, mode): return x +@expert_utils.add_name_scope() def restore_pad(x, ref_x, pad_remover, mode): x = tf.squeeze(x, axis=0) if mode != ModeKeys.PREDICT: diff --git a/tensor2tensor/utils/expert_utils.py b/tensor2tensor/utils/expert_utils.py index eb513d0e8..87bc285d5 100644 --- a/tensor2tensor/utils/expert_utils.py +++ b/tensor2tensor/utils/expert_utils.py @@ -677,6 +677,7 @@ def __init__(self, num_experts, gates): tf.reshape(self._gates, [-1]), self._batch_index * num_experts + self._expert_index) + @add_name_scope() def dispatch(self, inp): """Create one input Tensor for each expert. @@ -692,6 +693,7 @@ def dispatch(self, inp): inp = tf.gather(inp, self._batch_index) return tf.split(inp, self._part_sizes_tensor, 0, num=self._num_experts) + @add_name_scope() def combine(self, expert_out, multiply_by_gates=True): """Sum together the expert output, weighted by the gates. From 6ed35a509221ab56a886f0f0b557938c2ce4d55a Mon Sep 17 00:00:00 2001 From: Noam Shazeer <noam@google.com> Date: Tue, 3 Oct 2017 22:01:02 -0700 Subject: [PATCH 0471/4095] Modify grouped_attention to use constant group sizes and not require PiperOrigin-RevId: 170962314 --- tensor2tensor/layers/common_attention.py | 279 +++++++++++++---------- tensor2tensor/models/aligned.py | 54 +++-- tensor2tensor/utils/expert_utils.py | 140 ++++++++++++ tensor2tensor/utils/expert_utils_test.py | 68 ++++++ 4 files changed, 395 insertions(+), 146 deletions(-) diff --git a/tensor2tensor/layers/common_attention.py b/tensor2tensor/layers/common_attention.py index 3676fe447..2095a690b 100644 --- a/tensor2tensor/layers/common_attention.py +++ b/tensor2tensor/layers/common_attention.py @@ -655,76 +655,6 @@ def attention_image_summary(attn, image_shapes=None): tf.summary.image("attention", image, max_outputs=1) -def grouped_attention_single(num_groups, q, kv, q_gates, m_gates): - """Compute grouped attention for one batch and one head. - - q is a Tensor of queries, and kv is Tensor of keys and values - (concatenated in dimension 1). - - q_gates and m_gates are float32 Tensors containing zeros and ones. - The ones indicate which positions belong to which groups. A - key-value pair can be in zero or more groups. Each query is in one - group. A query can only pay attention to key-value pairs which are - in its group. - - In addition to the usual output, we return two additional Tensors: - q_total and m_total. - - For query position i belonging to group g, q_total[i, g] contains - log(sum(exp(q_i dot k_j))) for all keys k_j in group g. - - For memory position j belonging to group g, m_total[j, g] contains - the sum of the attention weights over all queries and that memory position. - - q_total and m_total contain zeros in positions where the - corresponding query/memory does not belong to the corresponding - group. - - Args: - num_groups: an integer - q: Tensor with shape [length_q, depth_qk] - kv: Tensor with shape [length_kv, depth_qk + depth_v] - q_gates: Tensor with shape [length_q, num_groups] - m_gates: Tensor with shape [length_kv, num_groups] - - Returns: - o: Tensor with shape [length_q, depth_v] - q_total: Tensor with shape [length_q, num_groups] - m_total: Tensor with shape [length_kv, num_groups] - """ - q_dispatcher = expert_utils.SparseDispatcher(num_groups, q_gates) - m_dispatcher = expert_utils.SparseDispatcher(num_groups, m_gates) - q_length_coordinate = q_dispatcher.expert_to_batch_indices() - m_length_coordinate = m_dispatcher.expert_to_batch_indices() - dispatched_q = q_dispatcher.dispatch(q) - dispatched_kv = m_dispatcher.dispatch(kv) - length_q = tf.shape(q)[0] - length_kv = tf.shape(kv)[0] - depth_qk = tf.shape(q)[1] - depth_v = tf.shape(kv)[1] - depth_qk - o = [] - q_totals = [] - m_totals = [] - for e in xrange(num_groups): - k, v = tf.split(dispatched_kv[e], [depth_qk, depth_v], axis=1) - logits = tf.matmul(dispatched_q[e], k, transpose_b=True) - log_weights = tf.nn.log_softmax(logits) - weights = tf.exp(log_weights) - o.append(tf.matmul(weights, v)) - # For each query, this is the log of the sum of the unnormalized weights. - q_total = tf.reshape(logits[:, :1] - log_weights[:, :1], [-1]) - q_totals.append(tf.unsorted_segment_sum( - q_total, q_length_coordinate[e], length_q)) - epsilon = 1e-3 - m_total = tf.log(tf.reduce_sum(tf.stop_gradient(weights), axis=0) + epsilon) - m_totals.append( - tf.unsorted_segment_sum(m_total, m_length_coordinate[e], length_kv)) - o = q_dispatcher.combine(o, multiply_by_gates=False) - q_total = tf.stack(q_totals, axis=1) - m_total = tf.stack(m_totals, axis=1) - return o, q_total, m_total - - def grouped_attention_multihead(query_antecedent, memory_antecedent, total_key_depth, @@ -732,10 +662,31 @@ def grouped_attention_multihead(query_antecedent, output_depth, num_heads, num_groups, - threshold=0.3, - name=None, - make_image_summary=True): - """Dot-product attention with sparsity. + memory_target_density=2.0, + multiplicative_overhead=1.25, + additive_overhead=8.0, + mask_right=False, + make_image_summary=True, + name=None): + """Multi-head dot-product attention with sparsity. + + For each attention head, the queries are partitioned into groups. + For each group, only a subset of the key-value pairs are considered. + + The choices of groups are selected based on trained predictors of + the total attention given the group inclusion. + + memory_target_density indicates the average how many groups in which + a key-value pair should participate. + + We use auxialiary losses to ensure that each group contains roughly + the same number of queries and the same number of key-value pairs. + If for a given sequence, the actual number of queries/pairs sent to + an expert exceeds this target by a factor of more than + multiplicative_overhead, then the last ones are dropped. We use + this drop-last policy to avoid bleeding information backwards, which + is necessary when using this function with autoregressive + prediction. Args: query_antecedent: a Tensor with shape [batch, length_q, channels] @@ -745,9 +696,12 @@ def grouped_attention_multihead(query_antecedent, output_depth: an integer num_heads: an integer dividing total_key_depth and total_value_depth num_groups: an integer - threshold: a floating point number - name: an optional string + memory_target_density: a floating point scalar + multiplicative_overhead: a floating point scalar + additive_overhead: a floating point scalar + mask_right: a boolean make_image_summary: a boolean + name: an optional string Returns: A Tensor with shape [batch, length_q, output_depth] @@ -783,13 +737,18 @@ def grouped_attention_multihead(query_antecedent, # These are used to determine group inclusion. # We will train these by auxiliary losses. We use stop_gradient here # to keep these losses from back-propagating to the rest of the model. + # We add biases that help balance the usage of the experts. q_pred = common_layers.conv1d( tf.stop_gradient(query_antecedent), num_heads * num_groups, 1, name="q_pred") q_pred = split_heads(q_pred, num_heads) + q_bias = tf.get_variable("q_bias", [1, num_heads, 1, num_groups]) + q_pred_biased = q_pred + q_bias m_pred = common_layers.conv1d(tf.stop_gradient( memory_antecedent), num_heads * num_groups, 1, name="m_pred") m_pred = split_heads(m_pred, num_heads) + m_bias = tf.get_variable("m_bias", [1, num_heads, 1, num_groups]) + m_pred_biased = m_pred + m_bias q *= depth_qk**-0.5 # q, kv, q_pred, m_pred are all [batch, heads, length_[q/m], ?] # now reshape them all to [batch * heads, length, ?] @@ -797,41 +756,98 @@ def grouped_attention_multihead(query_antecedent, kv = combine_first_two_dimensions(kv) q_pred = combine_first_two_dimensions(q_pred) m_pred = combine_first_two_dimensions(m_pred) - q_group = tf.argmax(q_pred, axis=2) - q_gates = tf.one_hot(q_group, num_groups, axis=-1) - m_gates = tf.to_float(tf.greater(m_pred, math.log(threshold))) - # include first memory position in all groups, to avoid zero-sized tensors. - # TODO(noam): do we need to do this for queries too? - m_gates = tf.maximum( - m_gates, tf.reshape(tf.one_hot([0], length_kv), [1, length_kv, 1])) - q_group_size = tf.reduce_sum(q_gates, 1) - m_group_size = tf.reduce_sum(m_gates, 1) - - # compute the output - o, q_total, m_total = tf.map_fn( - lambda args: grouped_attention_single(num_groups, *args), - (q, kv, q_gates, m_gates), - dtype=(tf.float32, tf.float32, tf.float32), - parallel_iterations=1) - - # compute auxiliary losses to train the predictions - q_loss = tf.nn.l2_loss((q_total - q_pred) * q_gates) + q_pred_biased = combine_first_two_dimensions(q_pred_biased) + m_pred_biased = combine_first_two_dimensions(m_pred_biased) + q_group = tf.argmax(q_pred_biased, axis=2) + q_requests = tf.one_hot(q_group, num_groups, axis=-1) + m_requests = tf.to_float(tf.greater(m_pred_biased, 0.0)) + # include first memory position in all groups, to avoid division by zero. + m_requests = tf.maximum( + m_requests, tf.reshape(tf.one_hot([0], length_kv), [1, length_kv, 1])) + q_group_size = tf.reduce_sum(q_requests, 1) + m_group_size = tf.reduce_sum(m_requests, 1) + q_group_target_size = tf.to_float(length_q) / tf.to_float(num_groups) + m_group_target_size = ( + tf.to_float(length_kv) * memory_target_density + / tf.to_float(num_groups)) + capacity_q = tf.minimum(length_q, tf.to_int32( + q_group_target_size * multiplicative_overhead + additive_overhead)) + capacity_m = tf.minimum(length_kv, tf.to_int32( + m_group_target_size * multiplicative_overhead + additive_overhead)) + q_dispatcher = expert_utils.TruncatingDispatcher(q_requests, capacity_q) + m_dispatcher = expert_utils.TruncatingDispatcher(m_requests, capacity_m) + q_gates = q_dispatcher.gates() + m_gates = m_dispatcher.gates() + dispatched_q = q_dispatcher.dispatch(q) + dispatched_kv = m_dispatcher.dispatch(kv) + # dispatched_q: [batch * num_heads, num_groups, capacity_q, depth_qk] + # dispatched_kv: + # [batch * num_heads, num_groups, capacity_m, depth_qk + depth_v] + k, v = tf.split(dispatched_kv, [depth_qk, depth_v], axis=3) + logits = tf.matmul(dispatched_q, k, transpose_b=True) + bias = tf.expand_dims((m_dispatcher.nonpadding() - 1.0) * 1e9, 2) + if mask_right: + q_coordinate = tf.to_float( + tf.expand_dims(q_dispatcher.length_coordinate(), 3)) + m_coordinate = tf.to_float( + tf.expand_dims(m_dispatcher.length_coordinate(), 2)) + bias += tf.to_float(tf.greater(m_coordinate, q_coordinate)) * -1e9 + logits += bias + log_weights = tf.nn.log_softmax(logits) + weights = tf.exp(log_weights) + # For each query, this is the log of the sum of the unnormalized weights. + q_total = tf.stop_gradient(logits[:, :, :, :1] - log_weights[:, :, :, :1]) + # For each key, this is the sum of the normalized weights. + m_total = tf.expand_dims( + tf.reduce_sum(tf.stop_gradient(weights), axis=2), -1) + o = tf.matmul(weights, v) + o = q_dispatcher.combine(o) + + o = tf.reshape(o, [batch, num_heads, length_q, depth_v]) + o = combine_heads(o) + o = common_layers.conv1d(o, output_depth, 1, name="output_transform") + + m_total = m_dispatcher.combine(m_total) + q_total = q_dispatcher.combine(q_total) + q_total = tf.squeeze(q_total, -1) + m_total = tf.squeeze(m_total, -1) + # Compute summed m predictions for all groups + m_pred_used = tf.reduce_sum(tf.exp(m_pred) * m_dispatcher.gates(), axis=2) + q_pred_used = tf.reduce_sum(q_pred * q_dispatcher.gates(), axis=2) + epsilon = 1e-3 + m_pred_used = tf.log(m_pred_used + epsilon) + m_total = tf.log(m_total + epsilon) + m_loss = tf.nn.l2_loss(m_total - m_pred_used) + q_loss = tf.nn.l2_loss( + (q_total - q_pred_used) * tf.reduce_sum(q_gates, axis=2)) + q_loss /= tf.to_float(batch * length_q) - m_loss = tf.nn.l2_loss((m_total - m_pred) * m_gates) m_loss /= tf.to_float(batch * length_kv) + # We would like the query groups to be equal sized. The group # size is discrete, so we need some trick here. We add a loss # proportional to the product of the group size and the # predictions for that group. This encourages the predictions to # decrease for groups that are too big. - q_group_deviation = (q_group_size - tf.reduce_mean( - q_group_size, axis=1, keep_dims=True)) / tf.to_float(length_kv) - q_pred_mean = tf.reduce_mean(q_pred, axis=1) - q_pred_mean -= tf.reduce_mean(q_pred_mean, axis=1, keep_dims=True) - q_balance_loss = ( - tf.reduce_sum(q_pred_mean * q_group_deviation) / tf.to_float(batch)) + q_group_deviation = (q_group_size / q_group_target_size) - 1.0 + q_balance_loss = tf.reduce_sum( + tf.reduce_mean(q_pred_biased, axis=1) * q_group_deviation + ) / tf.to_float(batch) + m_group_deviation = (m_group_size / m_group_target_size) - 1.0 + m_balance_loss = tf.reduce_sum( + tf.reduce_mean(m_pred_biased, axis=1) * m_group_deviation + ) / tf.to_float(batch) + + # The losses in this function only propagate back to variables + # defined in this function, and the losses outside of this + # function only propagate back to variables outside of this + # function. Assuming some kind of adaptive learning algorithm, + # it should not matter how much we scale the losses in this function. + # Still we scale them down a lot so that they should not show up + # much in the overall loss for the model. extra_loss_multiplier = 1e-3 - extra_loss = (q_loss + m_loss + q_balance_loss) * extra_loss_multiplier + extra_loss = q_loss + m_loss + q_balance_loss + m_balance_loss + extra_loss *= extra_loss_multiplier # Show a bunch of summaries. if (not tf.get_variable_scope().reuse and @@ -843,32 +859,45 @@ def grouped_attention_multihead(query_antecedent, tf.summary.scalar("q_loss", q_loss) tf.summary.scalar("m_loss", m_loss) tf.summary.scalar("q_balance_loss", q_balance_loss) - density = ( - tf.reduce_sum(tf.to_float(m_group_size) * tf.to_float(q_group_size)) / - tf.to_float(batch * num_heads * length_q * length_kv)) - tf.summary.scalar("density", density) + tf.summary.scalar("m_balance_loss", m_balance_loss) + tf.summary.histogram("m_pred_used", m_pred_used) + tf.summary.histogram("m_total", m_total) + tf.summary.histogram("q_pred_used", q_pred_used) + tf.summary.histogram("q_total", q_total) if make_image_summary: + # image summaries are expensive. + # So we restrict them to head_num<4, query_position<512, batch_index=0. + trunc_heads = min(4, num_heads) + trunc_length_q = tf.minimum(length_q, 512) # We recompute the attention for the first example, in an inefficient # way - masking. This lets us show pretty pictures. - # [num_heads, length_q, group] - q_gates_0 = q_gates[:num_heads, :, :] - # [num_heads, length_kv, group] - m_gates_0 = m_gates[:num_heads, :, :] - mask = tf.matmul(q_gates_0, m_gates_0, transpose_b=True) - q_0 = q[:num_heads, :, :] - k_0 = kv[:num_heads, :, :depth_qk] - att_0 = tf.nn.softmax(tf.matmul(q_0, k_0, transpose_b=True)) - hdr = tf.pow(att_0, 0.2) # for high-dynamic-range - mask_channel = mask * tf.maximum(hdr, 0.3) - image = tf.stack([hdr, mask_channel, mask_channel], axis=3) - tf.summary.image("att", image, max_outputs=num_heads) - mask_coverage = tf.reduce_sum(mask * att_0) / ( - tf.to_float(length_q) * num_heads) + # [trunc_heads, length_q, group] + q_gates_trunc = q_gates[:trunc_heads, :trunc_length_q, :] + # [trunc_heads, length_kv, group] + m_gates_trunc = m_gates[:trunc_heads, :, :] + grouping_mask = tf.matmul( + q_gates_trunc, m_gates_trunc, transpose_b=True) + q_trunc = q[:trunc_heads, :trunc_length_q, :] + k_trunc = kv[:trunc_heads, :, :depth_qk] + logits_trunc = tf.matmul(q_trunc, k_trunc, transpose_b=True) + if mask_right: + band = tf.matrix_band_part( + tf.ones([trunc_length_q, length_kv]), -1, 0) + trunc_bias = tf.expand_dims((1.0 - band) * -1e9, 0) + logits_trunc += trunc_bias + att_trunc = tf.nn.softmax(logits_trunc) + mask_coverage = tf.reduce_sum(grouping_mask * att_trunc) / ( + tf.to_float(trunc_length_q) * trunc_heads) tf.summary.scalar("coverage", mask_coverage) - - o = tf.reshape(o, [batch, num_heads, length_q, depth_v]) - o = combine_heads(o) - o = common_layers.conv1d(o, output_depth, 1, name="output_transform") + att_trunc_hdr = tf.pow(att_trunc, 0.2) # for high-dynamic-range + mask_channel = grouping_mask * tf.maximum(att_trunc_hdr, 0.3) + image = tf.stack([att_trunc_hdr, mask_channel, mask_channel], axis=3) + tf.summary.image("att", image, max_outputs=trunc_heads) + # show one group for each head. + att_per_group = tf.expand_dims(weights[:trunc_heads, 0, :, :], -1) + tf.summary.image( + "att_per_group_%d", tf.pow(att_per_group, 0.2), + max_outputs=trunc_heads) return o, extra_loss diff --git a/tensor2tensor/models/aligned.py b/tensor2tensor/models/aligned.py index 760b03855..fe9a9ef5b 100644 --- a/tensor2tensor/models/aligned.py +++ b/tensor2tensor/models/aligned.py @@ -69,6 +69,12 @@ def postprocess(x, y): extra_loss = 0.0 ffn_hidden_sizes = [int(s) for s in hparams.ffn_hidden_sizes.split(",")] moe_hidden_sizes = [int(s) for s in hparams.moe_hidden_sizes.split(",")] + if hparams.mask_right: + def _bias(x): + return common_attention.attention_bias_lower_triangle(tf.shape(x)[1]) + bias = dp(_bias, x) + else: + bias = tf.zeros([1, 1, 1, 1]) if hparams.diet_experts: hsize, = moe_hidden_sizes @@ -97,13 +103,16 @@ def _diet_expert(x): common_attention.multihead_attention, x, None, - None, # bias + bias, # bias hparams.attention_key_channels or hparams.hidden_size, hparams.attention_value_channels or hparams.hidden_size, hparams.hidden_size, hparams.num_heads, hparams.attention_dropout) elif layer_type == "att_grouped": + multiplicative_overhead = ( + hparams.multiplicative_overhead if hparams.mode == ModeKeys.TRAIN + else hparams.multiplicative_overhead_eval) y, loss = dp( common_attention.grouped_attention_multihead, x, @@ -113,24 +122,18 @@ def _diet_expert(x): hparams.hidden_size, hparams.num_heads, num_groups=hparams.attention_num_groups, + memory_target_density=hparams.memory_target_density, + multiplicative_overhead=multiplicative_overhead, make_image_summary=hparams.attention_image_summary, + mask_right=hparams.mask_right, ) extra_loss += tf.add_n(loss) / dp.n elif layer_type == "att_memory_efficient": assert hparams.layer_preprocess_sequence == "n" - zero_bias = tf.zeros([1, 1, 1, 1]) - y = dp( - common_attention.multihead_self_attention_memory_efficient, - x, - zero_bias, - hparams.num_heads) - elif layer_type == "att_memory_efficient": - assert hparams.layer_preprocess_sequence == "n" - zero_bias = tf.zeros([1, 1, 1, 1]) y = dp( common_attention.multihead_self_attention_memory_efficient, x, - zero_bias, + bias, hparams.num_heads) elif layer_type == "att_local": y = dp( @@ -143,7 +146,9 @@ def _diet_expert(x): hparams.hidden_size, hparams.num_heads, hparams.attention_dropout, - attention_type="local_unmasked", + attention_type=( + "local_mask_right" if hparams.mask_right + else "local_unmasked"), block_length=hparams.local_attention_window, block_width=hparams.local_attention_window) elif layer_type == "att_pseudolocal": @@ -153,7 +158,7 @@ def _pseudolocal_bias(x): return common_attention.attention_bias_local( tf.shape(x)[1], hparams.local_attention_window, - hparams.local_attention_window) + 0 if hparams.mask_right else hparams.local_attention_window) pseudolocal_bias = dp(_pseudolocal_bias, x) y = dp( common_attention.multihead_attention, @@ -174,7 +179,7 @@ def _pseudolocal_bias(x): attention_num_experts=hparams.attention_num_experts, train=hparams.mode == ModeKeys.TRAIN, batch_coordinate=batch_coordinate, - mask_right=False, + mask_right=hparams.mask_right, split_batch=bool(hparams.attention_split_batch), attention_kq_size=hparams.attention_kq_size, attention_v_size=hparams.attention_v_size) @@ -310,7 +315,13 @@ def aligned_base(): hparams.add_hparam("memory_efficient_ffn", int(False)) hparams.add_hparam("local_attention_window", 128) hparams.add_hparam("attention_num_groups", 8) + hparams.add_hparam("memory_target_density", 2.0) + hparams.add_hparam("multiplicative_overhead", 1.25) + hparams.add_hparam("multiplicative_overhead_eval", 2.0) hparams.add_hparam("attention_image_summary", int(True)) + # For testing right-masking. + # This is not implemented in all layers. + hparams.add_hparam("mask_right", int(False)) return hparams @@ -350,10 +361,9 @@ def aligned_local_expert(): def aligned_grouped(): """Use local_expert_attention. - languagemodel_wiki_scramble1k50, 1gpu, 7k steps: log(ppl)_eval = 2.62 - 2.7 steps/sec on P100 - (some problem with map_fn - need to tune this) - 8gpu (8x batch), 7k steps: log(ppl)_eval = 2.02 + languagemodel_wiki_scramble1k50, 1gpu, 7k steps: log(ppl)_eval = 2.63 + 10.2 steps/sec on P100 + 8gpu (8x batch), 7k steps: log(ppl)_eval = 2.04 Returns: a hparams object @@ -522,14 +532,16 @@ def aligned_8k(): def aligned_8k_grouped(): """version for languagemodel_wiki_scramble8k50. - languagemodel_wiki_scramble1k50, 1gpu, 7k steps: log(ppl)_eval = 2.93 + languagemodel_wiki_scramble1k50, 1gpu, 7k steps: log(ppl)_eval = 2.92 3.3 steps/sec on P100 - 8gpu (8x batch), 7k steps: log(ppl)_eval = 2.18 + 8gpu (8x batch), 7k steps: log(ppl)_eval = 2.15 Returns: a hparams object """ hparams = aligned_grouped() hparams.batch_size = 8192 - hparams.attention_image_summary = int(False) + # hparams.attention_image_summary = int(False) + hparams.num_groups = 16 + hparams.multiplicative_overhead = 1.1 return hparams diff --git a/tensor2tensor/utils/expert_utils.py b/tensor2tensor/utils/expert_utils.py index 87bc285d5..5005cdb50 100644 --- a/tensor2tensor/utils/expert_utils.py +++ b/tensor2tensor/utils/expert_utils.py @@ -1021,3 +1021,143 @@ def local_moe(x, importance = tf.reduce_sum(gates, 0) loss = loss_coef * (cv_squared(importance) + cv_squared(load)) return y, loss + + +class TruncatingDispatcher(object): + """Helper for implementing a mixture of experts. + + A TruncatingDispatcher is useful when you need to deal with + fixed-sized Tensors. As opposed to a SparseDispatcher, which + produces batches of different sizes for the different experts, the + TruncatingDispatcher always produces batches of the same given size, + and the results are returned stacked in one big tensor. + + In the case where an expert is over-capacity, the last items that + should have gone to that expert are dropped. + + Confusingly, the inputs to a TruncatingDispatcher have both a + "batch" and a "length" dimension. Not only does each expert receive + the same total number of examples, it also receives the same number + of examples for each element of "batch". This behavior is necessary + for applications such as grouped attention, where we have a batch of + sequences, and we want each sequence to be divided evenly among + experts. For simpler applications like mixture-of-experts, you can + reshape the input so that the "batch" dimension is 1, and only the + "length" dimension is used. + """ + + @add_name_scope("truncating_dispatcher") + def __init__(self, requests, expert_capacity): + """Create a TruncatingDispatcher. + + Args: + requests: a boolean `Tensor` of shape `[batch, length, num_experts]`. + Alternatively, a float or int Tensor containing zeros and ones. + expert_capacity: a Scalar - maximum number of examples per expert per + batch element. + + Returns: + a TruncatingDispatcher + """ + self._requests = tf.to_float(requests) + self._expert_capacity = expert_capacity + expert_capacity_f = tf.to_float(expert_capacity) + self._batch, self._length, self._num_experts = tf.unstack( + tf.shape(self._requests), num=3) + + # [batch, length, num_experts] + position_in_expert = tf.cumsum(self._requests, axis=1, exclusive=True) + # [batch, length, num_experts] + self._gates = self._requests * tf.to_float( + tf.less(position_in_expert, expert_capacity_f)) + batch_index = tf.reshape( + tf.to_float(tf.range(self._batch)), [self._batch, 1, 1]) + length_index = tf.reshape( + tf.to_float(tf.range(self._length)), [1, self._length, 1]) + expert_index = tf.reshape( + tf.to_float(tf.range(self._num_experts)), [1, 1, self._num_experts]) + # position in a Tensor with shape [batch * num_experts * expert_capacity] + flat_position = ( + position_in_expert + + batch_index * (tf.to_float(self._num_experts) * expert_capacity_f) + + expert_index * expert_capacity_f) + # Tensor of shape [batch * num_experts * expert_capacity]. + # each element is an integer in [0, length) + self._indices = tf.unsorted_segment_sum( + data=tf.reshape((length_index + 1.0) * self._gates, [-1]), + segment_ids=tf.to_int32(tf.reshape(flat_position, [-1])), + num_segments=self._batch * self._num_experts * expert_capacity) + self._indices = tf.reshape( + self._indices, + [self._batch, self._num_experts, expert_capacity]) + # Tensors of shape [batch, num_experts, expert_capacity]. + # each element is 0.0 or 1.0 + self._nonpadding = tf.minimum(self._indices, 1.0) + # each element is an integer in [0, length) + self._indices = tf.nn.relu(self._indices - 1.0) + # self._flat_indices is [batch, num_experts, expert_capacity], with values + # in [0, batch * length) + self._flat_indices = tf.to_int32( + self._indices + + (tf.reshape(tf.to_float(tf.range(self._batch)), [-1, 1, 1]) + * tf.to_float(self._length))) + self._indices = tf.to_int32(self._indices) + + @add_name_scope("truncating_dispatcher_dispatch") + def dispatch(self, inp): + """Send the inputs to the experts. + + Args: + inp: a `Tensor` of shape "[batch, length, depth]` + Returns: + a tensor with shape [batch, num_experts, expert_capacity, depth] + """ + inp = tf.reshape(inp, [self._batch * self._length, -1]) + # [batch, num_experts, expert_capacity, depth] + ret = tf.gather(inp, self._flat_indices) + return ret + + @add_name_scope("truncating_dispatcher_combine") + def combine(self, x): + """Return the output from the experts. + + When one example goes to multiple experts, the outputs are summed. + + Args: + x: a Tensor with shape [batch, num_experts, expert_capacity, depth] + + Returns: + a `Tensor` with shape `[batch, length, depth] + """ + depth = tf.shape(x)[-1] + x *= tf.expand_dims(self._nonpadding, -1) + ret = tf.unsorted_segment_sum( + x, self._flat_indices, num_segments=self._batch * self._length) + ret = tf.reshape(ret, [self._batch, self._length, depth]) + return ret + + def nonpadding(self): + """Which elements of a dispatched Tensor are not padding. + + Returns: + a Zero/One float tensor with shape [batch, num_experts, expert_capacity]. + """ + return self._nonpadding + + def gates(self): + """A Tensor indicating which examples go to which experts. + + Returns: + A float32 Tensor with shape [batch, length, num_experts], where each value + is 0.0 or 1.0. + """ + return self._gates + + def length_coordinate(self): + """Length coordinate of dispatched tensor. + + Returns: + a tensor with shape [batch, num_experts, expert_capacity] containing + integers in the range [0, length) + """ + return self._indices diff --git a/tensor2tensor/utils/expert_utils_test.py b/tensor2tensor/utils/expert_utils_test.py index 93af9c78c..f9abc72c1 100644 --- a/tensor2tensor/utils/expert_utils_test.py +++ b/tensor2tensor/utils/expert_utils_test.py @@ -138,6 +138,74 @@ def testPadRemover(self): 0., # pad ]) + def testTruncatingDispatcher(self): + """Check that the TruncatingDispatcher is working correctly.""" + # batch = 1 + # length = 3 + # num_experts = 2 + expert_capacity = 2 + requests = tf.constant([ + [[True, False], + [True, True], + [True, False]], + [[False, False], + [False, True], + [True, False]] + ], dtype=tf.float32) + dispatcher = expert_utils.TruncatingDispatcher(requests, expert_capacity) + x = tf.constant([ + [[3, 4], + [5, 6], + [7, 8]], + [[2, 3], + [4, 5], + [6, 7]] + ], dtype=tf.float32) + dispatched = dispatcher.dispatch(x) + dispatched_expected = [ + [[[3, 4], [5, 6]], + [[5, 6], [3, 4]]], + [[[6, 7], [2, 3]], + [[4, 5], [2, 3]]] + ] + y = [ + [[[7, 12], [11, 30]], + [[-1, 30], [9, 9]]], + [[[13, 42], [9, 9]], + [[-1, 20], [9, 9]]] + ] + combined = dispatcher.combine(y) + combined_expected = [ + [[7, 12], + [10, 60], + [0, 0]], + [[0, 0], + [-1, 20], + [13, 42]] + ] + nonpadding = dispatcher.nonpadding() + nonpadding_expected = [ + [[1, 1], + [1, 0]], + [[1, 0], + [1, 0]] + ] + gates = dispatcher.gates() + gates_expected = [ + [[1, 0], + [1, 1], + [0, 0]], + [[0, 0], + [0, 1], + [1, 0]] + ] + + with self.test_session() as sess: + self._verify_value(sess, dispatched, dispatched_expected) + self._verify_value(sess, combined, combined_expected) + self._verify_value(sess, nonpadding, nonpadding_expected) + self._verify_value(sess, gates, gates_expected) + if __name__ == '__main__': tf.test.main() From e8f1f57096966dec85e763dbad907da9c426e440 Mon Sep 17 00:00:00 2001 From: Niki Parmar <nikip@google.com> Date: Wed, 4 Oct 2017 11:53:23 -0700 Subject: [PATCH 0472/4095] Add cifar-10 for img2img PiperOrigin-RevId: 171041262 --- tensor2tensor/data_generators/image.py | 29 ++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/tensor2tensor/data_generators/image.py b/tensor2tensor/data_generators/image.py index 5b41c4e19..d03a65d9e 100644 --- a/tensor2tensor/data_generators/image.py +++ b/tensor2tensor/data_generators/image.py @@ -654,6 +654,35 @@ def preprocess_example(self, example, mode, unused_hparams): return example +@registry.register_problem +class Img2imgCifar10(ImageCifar10): + """CIFAR-10 rescaled to 8x8 for input and 32x32 for output.""" + + def dataset_filename(self): + return "image_cifar10_plain" # Reuse CIFAR-10 plain data. + + def preprocess_example(self, example, unused_mode, unused_hparams): + + def resize(img, size): + return tf.to_int64( + tf.image.resize_images(img, [size, size], tf.image.ResizeMethod.AREA)) + + inputs = example["inputs"] + # For Img2Img resize input and output images as desired. + example["inputs"] = resize(inputs, 8) + example["targets"] = resize(inputs, 32) + return example + + def hparams(self, defaults, unused_model_hparams): + p = defaults + p.input_modality = {"inputs": ("image:identity_no_pad", None)} + p.target_modality = ("image:identity_no_pad", None) + p.batch_size_multiplier = 256 + p.max_expected_batch_size_per_shard = 4 + p.input_space_id = 1 + p.target_space_id = 1 + + # URLs and filenames for MSCOCO data. _MSCOCO_ROOT_URL = "http://msvocds.blob.core.windows.net/" _MSCOCO_URLS = [ From 26dc4d63edbeada3436c5f27ff58540860dc9c3a Mon Sep 17 00:00:00 2001 From: T2T Team <no-reply@google.com> Date: Wed, 4 Oct 2017 14:02:35 -0700 Subject: [PATCH 0473/4095] Add `tpu_batch_size_per_shard` to the `HParams` PiperOrigin-RevId: 171059318 --- tensor2tensor/layers/common_hparams.py | 5 ++++- tensor2tensor/tpu/tpu_trainer.py | 2 +- tensor2tensor/tpu/tpu_trainer_lib.py | 1 - 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/tensor2tensor/layers/common_hparams.py b/tensor2tensor/layers/common_hparams.py index d3ebfdffe..491944382 100644 --- a/tensor2tensor/layers/common_hparams.py +++ b/tensor2tensor/layers/common_hparams.py @@ -155,7 +155,10 @@ def basic_params1(): # position in the inputs portion can see the # entire inputs portion. This removes the challenge of # autoregressively predicting the inputs portion. - prepend_mode="none",) + prepend_mode="none", + # This is the actual batch size, *not* tokens per batch (i.e. for + # language models this is the number of sentences in the batch) + tpu_batch_size_per_shard=24,) class RangedHParams(object): diff --git a/tensor2tensor/tpu/tpu_trainer.py b/tensor2tensor/tpu/tpu_trainer.py index 2c6292405..fac21f50d 100644 --- a/tensor2tensor/tpu/tpu_trainer.py +++ b/tensor2tensor/tpu/tpu_trainer.py @@ -58,7 +58,7 @@ def main(unused_argv): output_dir=FLAGS.output_dir, master=FLAGS.master, num_shards=FLAGS.tpu_num_shards, - batch_size=hparams.batch_size_per_shard * FLAGS.tpu_num_shards, + batch_size=hparams.tpu_batch_size_per_shard * FLAGS.tpu_num_shards, log_device_placement=FLAGS.log_device_placement) estimator.train( lambda params: input_fn(tf.estimator.ModeKeys.TRAIN, params), diff --git a/tensor2tensor/tpu/tpu_trainer_lib.py b/tensor2tensor/tpu/tpu_trainer_lib.py index c6bba9d41..f98b0488a 100644 --- a/tensor2tensor/tpu/tpu_trainer_lib.py +++ b/tensor2tensor/tpu/tpu_trainer_lib.py @@ -282,7 +282,6 @@ def transformer_tpu(): hp.use_pad_remover = int(False) # where op not supported # Inputs - hp.add_hparam("batch_size_per_shard", 24) # Each example in the batch will be of (padded) length hp.max_length hp.max_length = 64 From ef2fe8d3711217f347bc8620c5e06f4b6993ad52 Mon Sep 17 00:00:00 2001 From: Etienne Pot <epot@google.com> Date: Wed, 4 Oct 2017 19:49:39 -0700 Subject: [PATCH 0474/4095] Batch buckets together on the lsh attention to improve performances PiperOrigin-RevId: 171100463 --- tensor2tensor/layers/common_attention.py | 166 +++++++++++++++++++++++ tensor2tensor/models/aligned.py | 8 +- 2 files changed, 173 insertions(+), 1 deletion(-) diff --git a/tensor2tensor/layers/common_attention.py b/tensor2tensor/layers/common_attention.py index 2095a690b..86ee596c1 100644 --- a/tensor2tensor/layers/common_attention.py +++ b/tensor2tensor/layers/common_attention.py @@ -2629,6 +2629,169 @@ def flatten_batch(x): return v_out, total_loss / nb_heads +@expert_utils.add_name_scope() +def dot_product_batched_head(q, k, v, gates_q, gates_k): + """Perform a dot product attention on a single sequence on a single head. + + This function dispatch the q, k, v and loop over the buckets to compute the + attention dot product on each subsequences. + + Args: + q (tf.Tensor): [batch*heads, length_q, depth_q] + k (tf.Tensor): [batch*heads, length_k, depth_q] + v (tf.Tensor): [batch*heads, length_k, depth_v] + gates_q (tf.Tensor): One-hot of shape [batch*heads, length_q, nb_buckets] + gates_k (tf.Tensor): One-hot of shape [batch*heads, length_k, nb_buckets] + + Returns: + tf.Tensor: [length_q, depth_v] + """ + # Right now Q and K have same length + length = tf.shape(q)[1] + nb_buckets = tf.shape(gates_q)[-1] + capacity = length // nb_buckets * 3 # Capacity is hardcoded + capacity = tf.minimum(length, capacity) + + tf.summary.scalar("dispatch_capacity", capacity, family="lsh") + def add_summary_capacity(x, prefix): + # Monitor if capacity overflow + x = x[0, ...] # Take first batch/head + x = tf.reduce_sum(x, axis=0) + tf.summary.scalar(prefix + "_min", tf.reduce_min(x), family="lsh") + tf.summary.scalar(prefix + "_max", tf.reduce_max(x), family="lsh") + tf.summary.histogram(prefix + "capacity_distribution", x, family="lsh") + for i in range(3): # Show the first 3 buckets + tf.summary.scalar("{}_{}".format(prefix, i), x[i], family="lsh") + add_summary_capacity(gates_q, "q") + add_summary_capacity(gates_k, "k") + + q_dispatcher = expert_utils.TruncatingDispatcher(gates_q, capacity) + k_dispatcher = expert_utils.TruncatingDispatcher(gates_k, capacity) + + q = q_dispatcher.dispatch(q) + k = k_dispatcher.dispatch(k) + v = k_dispatcher.dispatch(v) + + # TODO(epot): Forward the padding bias and future + # Bias of shape [batch*heads, nb_buckets, 1, capacity] broadcasted to every + # queries + bias = tf.expand_dims((k_dispatcher.nonpadding() - 1.0) * 1e9, 2) + + # q, k, v now have shape [batch*heads, nb_bucket, capacity, depth] + # The buckets can be seen as different heads + v_out = dot_product_attention(q, k, v, bias=bias) + + # Combine all buckets together to restore the original length + return q_dispatcher.combine(v_out) + + +@expert_utils.add_name_scope() +def sparse_dot_product_attention_truncated( + q, k, v, bi, use_map_fn, experts_params): # pylint: disable=unused-argument + """Sparse multihead self attention. + + Perform an approximation of the full multihead attention by dispatching + the tokens using their keys/values. Thus the attention matrix are only + computed each times on a subset of the tokens. + + Notes: + * The function don't perform scaling here (multihead_attention does + the /sqrt(depth)). + * The padding should have been removed (so batch size should be 1 but length + contains the elements from all different batches) + * Right now, only self attention is supported so length_q and length_kv + should be identical and the function will add triangular mask. + * If bi.order is not None, The bias is added inside this function to + prevent attention to the future. + + Args: + q (tf.Tensor): Queries of shape [batch, heads, length_q, depth_k] + k (tf.Tensor): Keys of shape [batch, heads, length_q, depth_k] + v (tf.Tensor): Values of shape [batch, heads, length_kv, depth_v] + bi (BatchInfo): Contains the batch coordinates and sequence order + use_map_fn (bool): Use either tf.map_fn of python for loop to compute the + heads separately + experts_params (dict): Additional params for the local expert + + Returns: + tf.Tensor: Approximation of Softmax(Q.K) * V, of shape + [batch, heads, length_q, depth_v] + """ + # Currently depth is the same for for q and v + batch_size, nb_heads, _, depth = q.get_shape().as_list() + batch_size = batch_size or tf.shape(q)[0] + + total_loss = 0.0 + + # Each head get its own dispatcher + list_lsh = [ + LshGating( + depth=depth, + **experts_params + ) for _ in range(nb_heads) + ] + + @expert_utils.add_name_scope() + def get_gates_head(x, add_first=False): + """Return the gates for each heads of the current x. + + Args: + x (tf.Tensor): of shape [batch, heads, length, depth] + add_first (bool): if True, add the first element on each bucket + + Returns: + tf.Tensor: gates of shape [batch, heads, length, num_buckets] + """ + length = tf.shape(x)[2] + + # Invert heads/batch + x = tf.transpose(x, perm=[1, 0, 2, 3]) + x = tf.reshape(x, [nb_heads, batch_size*length, depth]) + + list_x = tf.unstack(x) # list[tf.Tensor(shape=[batch * length, depth])] + + # Unstack heads + list_gates = [] + # There might be a more optimized way to compute all heads at once + for lsh, single_x in zip(list_lsh, list_x): + # Each head get its own dispatcher + gates = lsh.get_gates(single_x) + nb_buckets = gates.get_shape().as_list()[-1] + # Reshape to [batch, length, depth] but should concider sequence + # padding in that case (also dispatch the padding) + gates = tf.reshape(gates, [batch_size, length, nb_buckets]) + list_gates.append(gates) + + gates = tf.stack(list_gates) + + # Restore original shape + gates = tf.reshape(gates, [nb_heads, batch_size, length, nb_buckets]) + gates = tf.transpose(gates, [1, 0, 2, 3]) + + # Dispatch the first element to every gates to avoid empty buckets + if add_first: + gates = tf.maximum( + gates, + tf.reshape(tf.one_hot([0], length), [1, 1, length, 1]) + ) + + return gates + + gates_q = get_gates_head(q) + gates_k = get_gates_head(k, add_first=True) + + # [batch, heads, length, depth] => [batch*heads, length, depth] + q, k, v, gates_q, gates_k = [ + combine_first_two_dimensions(t) for t in (q, k, v, gates_q, gates_k)] + + v_out = dot_product_batched_head(q, k, v, gates_q, gates_k) + + # Restore original dimension + v_out = tf.reshape(v_out, [batch_size, nb_heads, -1, depth]) + + return v_out, total_loss / nb_heads + + def scaled_dot_product_attention_simple(q, k, v, bias, name=None): """scaled dot-product attention. One head. One spatial dimension. @@ -2778,3 +2941,6 @@ def forward_fn(x, wqkv, wo, attention_bias, norm_scale, norm_bias): multihead_attention_sparse_dot_prod = functools.partial( multihead_attention, attention_type=sparse_dot_product_attention) + +multihead_attention_sparse_truncated = functools.partial( + multihead_attention, attention_type=sparse_dot_product_attention_truncated) diff --git a/tensor2tensor/models/aligned.py b/tensor2tensor/models/aligned.py index fe9a9ef5b..a0e92da94 100644 --- a/tensor2tensor/models/aligned.py +++ b/tensor2tensor/models/aligned.py @@ -186,8 +186,12 @@ def _pseudolocal_bias(x): # TODO(avaswani, epot, noam): Do we need to divide by num shards ? extra_loss += tf.add_n(loss) / dp.n elif layer_type == "att_lsh": + if hparams.lsh_truncated: + attention_fn = common_attention.multihead_attention_sparse_truncated + else: + attention_fn = common_attention.multihead_attention_sparse_dot_prod y, loss = dp( - common_attention.multihead_attention_sparse_dot_prod, + attention_fn, x, None, None, # Bias is computed inside @@ -319,6 +323,8 @@ def aligned_base(): hparams.add_hparam("multiplicative_overhead", 1.25) hparams.add_hparam("multiplicative_overhead_eval", 2.0) hparams.add_hparam("attention_image_summary", int(True)) + # LSH params + hparams.add_hparam("lsh_truncated", int(True)) # For testing right-masking. # This is not implemented in all layers. hparams.add_hparam("mask_right", int(False)) From 999105325a37a827b674582743876ae9bf1cad3c Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Thu, 5 Oct 2017 15:41:58 -0700 Subject: [PATCH 0475/4095] Switch TPU optimizer to Adam PiperOrigin-RevId: 171220272 --- tensor2tensor/tpu/tpu_trainer_lib.py | 19 ++++--------------- tensor2tensor/utils/model_builder.py | 19 ++++++++++++------- 2 files changed, 16 insertions(+), 22 deletions(-) diff --git a/tensor2tensor/tpu/tpu_trainer_lib.py b/tensor2tensor/tpu/tpu_trainer_lib.py index f98b0488a..6e3c4db62 100644 --- a/tensor2tensor/tpu/tpu_trainer_lib.py +++ b/tensor2tensor/tpu/tpu_trainer_lib.py @@ -171,16 +171,7 @@ def model_fn(features, labels, mode, params, config): lr /= math.sqrt(float(num_shards)) # Optimizer - opt_name = hparams.optimizer - if opt_name == "Momentum": - opt = tf.train.MomentumOptimizer( - lr, momentum=hparams.optimizer_momentum_momentum) - else: - if hparams.optimizer not in ["RMSProp", "SGD"]: - tf.logging.warn( - "Only Momentum, RMSProp, and SGD are known to work on TPU.") - opt = tf.contrib.layers.OPTIMIZER_CLS_NAMES[opt_name](lr) - + opt = model_builder.ConditionalOptimizer(hparams.optimizer, lr, hparams) if use_tpu: opt = tf.contrib.tpu.CrossShardOptimizer(opt) @@ -246,7 +237,7 @@ def make_estimator(model_fn, output_dir, master="", batch_size=16, - iterations_per_loop=100, + iterations_per_loop=1000, num_shards=8, per_host_input_for_training=True, use_tpu=True, @@ -283,12 +274,10 @@ def transformer_tpu(): # Inputs # Each example in the batch will be of (padded) length hp.max_length + # Batch size per shard is governed by tpu_batch_size_per_shard hp.max_length = 64 - hp.optimizer = "Momentum" # can be SGD, Momentum, RMSProp - hp.norm_type = "none" # seem to get nans with layer norm - hp.clip_grad_norm = 2. - hp.norm_epsilon = 1e-3 + hp.optimizer = "TrueAdam" hp.layer_preprocess_sequence = "n" hp.layer_postprocess_sequence = "da" return hp diff --git a/tensor2tensor/utils/model_builder.py b/tensor2tensor/utils/model_builder.py index 370104907..44a6f5208 100644 --- a/tensor2tensor/utils/model_builder.py +++ b/tensor2tensor/utils/model_builder.py @@ -292,7 +292,7 @@ def nth_model(n): # Optimize total_loss = tf.identity(total_loss, name="total_loss") - opt = _ConditionalOptimizer(hparams.optimizer, learning_rate, hparams) + opt = ConditionalOptimizer(hparams.optimizer, learning_rate, hparams) opt_summaries = ["learning_rate", "loss"] if hparams.summarize_grads: opt_summaries.extend(["gradients", "gradient_norm"]) @@ -350,7 +350,7 @@ def wrapping_model_fn(features, labels, mode, params): return wrapping_model_fn -class _ConditionalOptimizer(tf.train.Optimizer): +class ConditionalOptimizer(tf.train.Optimizer): """Conditional optimizer.""" def __init__(self, optimizer_name, lr, hparams): @@ -369,16 +369,21 @@ def __init__(self, optimizer_name, lr, hparams): tf.logging.info("Init YellowFin Optimizer.") self._opt = yellowfin.YellowFinOptimizer( learning_rate=lr, momentum=hparams.optimizer_momentum_momentum) + elif optimizer_name == "TrueAdam": + self._opt = tf.train.AdamOptimizer( + lr / 500.0, + beta1=hparams.optimizer_adam_beta1, + beta2=hparams.optimizer_adam_beta2, + epsilon=hparams.optimizer_adam_epsilon) else: self._opt = tf.contrib.layers.OPTIMIZER_CLS_NAMES[optimizer_name](lr) - def compute_gradients(self, loss, var_list, colocate_gradients_with_ops): - return self._opt.compute_gradients( - loss, var_list, colocate_gradients_with_ops=colocate_gradients_with_ops) + def compute_gradients(self, loss, var_list=None, **kwargs): + return self._opt.compute_gradients(loss, var_list, **kwargs) - def apply_gradients(self, gradients, global_step=None, name=None): + def apply_gradients(self, grads_and_vars, global_step=None, name=None): return self._opt.apply_gradients( - gradients, global_step=global_step, name=name) + grads_and_vars, global_step=global_step, name=name) def _sqrt_decay(step): From 7b2426725d97c11377f87210dd2d546d2d43398f Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Thu, 5 Oct 2017 17:19:33 -0700 Subject: [PATCH 0476/4095] Set eval_delay_secs=0 to speed up eval PiperOrigin-RevId: 171232440 --- tensor2tensor/utils/trainer_utils.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tensor2tensor/utils/trainer_utils.py b/tensor2tensor/utils/trainer_utils.py index c8cbbaec9..e90e2dd10 100644 --- a/tensor2tensor/utils/trainer_utils.py +++ b/tensor2tensor/utils/trainer_utils.py @@ -167,6 +167,7 @@ def create_experiment(data_dir, model_name, train_steps, eval_steps, hparams, min_eval_frequency=FLAGS.local_eval_frequency, train_monitors=train_monitors, eval_hooks=eval_hooks, + eval_delay_secs=0, **optional_kwargs) From d5bdfcc85fa3e10a73902974f2c0944dc51f6a33 Mon Sep 17 00:00:00 2001 From: Lukasz Kaiser <lukaszkaiser@google.com> Date: Fri, 6 Oct 2017 14:16:50 -0700 Subject: [PATCH 0477/4095] Make n-da with small dropouts the base config. PiperOrigin-RevId: 171342727 --- tensor2tensor/models/transformer.py | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/tensor2tensor/models/transformer.py b/tensor2tensor/models/transformer.py index e0f619805..68ce9604d 100644 --- a/tensor2tensor/models/transformer.py +++ b/tensor2tensor/models/transformer.py @@ -563,14 +563,13 @@ def transformer_ffn_layer(x, hparams, pad_remover=None): @registry.register_hparams -def transformer_base(): +def transformer_base_v1(): """Set of hyperparameters.""" hparams = common_hparams.basic_params1() hparams.norm_type = "layer" hparams.hidden_size = 512 hparams.batch_size = 4096 hparams.max_length = 256 - hparams.dropout = 0.0 hparams.clip_grad_norm = 0. # i.e. no gradient clipping hparams.optimizer_adam_epsilon = 1e-9 hparams.learning_rate_decay_scheme = "noam" @@ -611,6 +610,24 @@ def transformer_base(): return hparams +@registry.register_hparams +def transformer_base_v2(): + hparams = transformer_base_v1() + hparams.layer_preprocess_sequence = "n" + hparams.layer_postprocess_sequence = "da" + hparams.layer_prepostprocess_dropout = 0.1 + hparams.attention_dropout = 0.1 + hparams.relu_dropout = 0.1 + hparams.learning_rate_warmup_steps = 8000 + hparams.learning_rate = 0.2 + return hparams + + +@registry.register_hparams +def transformer_base(): + return transformer_base_v2() + + @registry.register_hparams def transformer_n_da(): """Normalize on layer input, instead of after residual connection. From dd6997edef11a51cf5f61fe04a7f3ed5a8ba854a Mon Sep 17 00:00:00 2001 From: Niki Parmar <nikip@google.com> Date: Fri, 6 Oct 2017 14:32:55 -0700 Subject: [PATCH 0478/4095] fix tf.while bug when using eval_run_autoregressive. PiperOrigin-RevId: 171345135 --- tensor2tensor/utils/t2t_model.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tensor2tensor/utils/t2t_model.py b/tensor2tensor/utils/t2t_model.py index 72e2ea602..e45aa35a7 100644 --- a/tensor2tensor/utils/t2t_model.py +++ b/tensor2tensor/utils/t2t_model.py @@ -332,6 +332,10 @@ def _slow_greedy_infer(self, features, decode_length, last_position_only): features["inputs"] = tf.expand_dims(features["inputs"], 2) if not self.has_input: features["partial_targets"] = tf.to_int64(features["inputs"]) + # Save the targets in a var and reassign it after the tf.while loop to avoid + # having targets being in a 'while' frame. This ensures targets when used + # in metric functions stays in the same frame as other vars. + targets_old = features["targets"] def infer_step(recent_output, recent_logits, unused_loss): """Inference step.""" @@ -394,6 +398,8 @@ def infer_step(recent_output, recent_logits, unused_loss): parallel_iterations=1) if inputs_old is not None: # Restore to not confuse Estimator. features["inputs"] = inputs_old + # Reassign targets back to the previous value. + features["targets"] = targets_old losses = {"training": loss} if "partial_targets" in features: partial_target_length = tf.shape(features["partial_targets"])[1] From ae6a879ddd05cf07e898d901f3a7ce70a3076251 Mon Sep 17 00:00:00 2001 From: Lukasz Kaiser <lukaszkaiser@google.com> Date: Fri, 6 Oct 2017 17:37:29 -0700 Subject: [PATCH 0479/4095] More VAE options: enable semantic hashing for bit-vectors, double-VAE. PiperOrigin-RevId: 171370468 --- tensor2tensor/models/transformer_vae.py | 150 +++++++++++++++++------- 1 file changed, 107 insertions(+), 43 deletions(-) diff --git a/tensor2tensor/models/transformer_vae.py b/tensor2tensor/models/transformer_vae.py index d2b1bf631..67ec86ef5 100644 --- a/tensor2tensor/models/transformer_vae.py +++ b/tensor2tensor/models/transformer_vae.py @@ -129,7 +129,7 @@ def dae(x, hparams, name): gumbel_samples *= common_layers.inverse_exp_decay(steps // 5) * 0.5 temperature = 1.2 - common_layers.inverse_lin_decay(steps) # 30% of the time keep reasonably high temperature to keep learning. - temperature = tf.cond(tf.less(tf.random_uniform([]), 0.7), + temperature = tf.cond(tf.less(tf.random_uniform([]), 0.9), lambda: temperature, lambda: tf.random_uniform([], minval=0.5, maxval=1.0)) s = tf.nn.softmax((logsm + gumbel_samples) / temperature) @@ -144,22 +144,56 @@ def dae(x, hparams, name): d_mean = tf.reduce_mean(distrib, axis=[0], keep_dims=True) d_variance = tf.reduce_mean(tf.square(distrib - d_mean), axis=[0]) d_dev = - tf.reduce_mean(d_variance) - ret = s # If we want just hot, do tf.reshape(maxvhot, tf.shape(s)) + ret = s + if hparams.mode != tf.contrib.learn.ModeKeys.TRAIN: + ret = tf.reshape(maxvhot, tf.shape(s)) # Just hot on eval/infer. return m, ret, d_dev * 5.0 + tf.reduce_mean(kl) * 0.002 -def vae(x, hparams, name): +def vae(x, z_size, name): with tf.variable_scope(name): - mu = tf.layers.dense(x, hparams.z_size, name="mu") - log_sigma = tf.layers.dense(x, hparams.z_size, name="log_sigma") + mu = tf.layers.dense(x, z_size, name="mu") + log_sigma = tf.layers.dense(x, z_size, name="log_sigma") shape = tf.shape(x) - epsilon = tf.random_normal([shape[0], shape[1], 1, hparams.z_size]) + epsilon = tf.random_normal([shape[0], shape[1], 1, z_size]) z = mu + tf.exp(log_sigma / 2) * epsilon kl = 0.5 * tf.reduce_mean( tf.exp(log_sigma) + tf.square(mu) - 1. - log_sigma, axis=-1) return z, tf.reduce_mean(kl), mu, log_sigma +def bit_vae(x, hparams, name): + with tf.variable_scope(name): + bity = tf.layers.dense(x, hparams.z_size, name="bity") + dev = common_layers.inverse_lin_decay(hparams.startup_steps) * 1.5 + noise = tf.random_normal(tf.shape(bity), mean=0.0, stddev=dev) + y = common_layers.saturating_sigmoid(bity + noise) + tf.summary.histogram("bit", tf.reshape(y, [-1])) + def discrete_y(): + d = tf.to_float(tf.less(0.5, y)) + return tf.stop_gradient(d) + y - tf.stop_gradient(y) + y = tf.cond(tf.less(tf.train.get_global_step(), hparams.startup_steps), + lambda: y, discrete_y) + # Flatten and predict for loss. + y_flat = tf.reshape(y, [-1, hparams.z_size, 1, 1]) + hsize = hparams.hidden_size + hparams.hidden_size = hsize // 2 + emb0 = tf.get_variable("emb0", [hparams.hidden_size]) + emb1 = tf.get_variable("emb1", [hparams.hidden_size]) + emb0 = tf.reshape(emb0, [1, 1, 1, hparams.hidden_size]) + emb1 = tf.reshape(emb0, [1, 1, 1, hparams.hidden_size]) + y_emb = y_flat * emb1 + (1 - y_flat) * emb0 + y_logit = decode(None, None, y_emb, None, None, hparams, "dbit") + hparams.hidden_size = hsize + y_pred = tf.nn.log_softmax(tf.layers.dense(y_logit, 2, name="y_pred")) + y_flat = tf.reshape(y_flat, [-1]) + y_pred = tf.reshape(y_pred, [-1, 2]) + loss = - (y_flat * y_pred[:, 1] + (1 - y_flat) * y_pred[:, 0]) + # Get the final z and return. + z = tf.layers.dense(y, hparams.z_size, name="after_bit") + return z, tf.reduce_mean(loss) + + def nearest(x, means, hparams): """Find the nearest means to elements in x.""" x, means = tf.stop_gradient(x), tf.stop_gradient(means) @@ -223,18 +257,19 @@ def encode(x, x_space, hparams, name): encoder_input, encoder_self_attention_bias, hparams), ed -def decode(cond_vec, cond_add, gold, c, ed, hparams): +def decode(cond_vec, cond_add, gold, c, ed, hparams, name): """Transformer decoder.""" - drop_gold = tf.nn.dropout(gold, 1.0 - hparams.layer_prepostprocess_dropout) - decoder_input = common_layers.shift_right(drop_gold, pad_value=cond_vec) - if cond_add is not None: - decoder_input += cond_add - decoder_input = tf.squeeze(decoder_input, axis=2) - decoder_input = common_attention.add_timing_signal_1d(decoder_input) - bias = common_attention.attention_bias_lower_triangle(tf.shape(gold)[1]) - if c is not None and len(c.get_shape()) > 3: - c = tf.squeeze(c, axis=2) - return transformer.transformer_decoder(decoder_input, c, bias, ed, hparams) + with tf.variable_scope(name): + drop_gold = tf.nn.dropout(gold, 1.0 - hparams.layer_prepostprocess_dropout) + decoder_input = common_layers.shift_right(drop_gold, pad_value=cond_vec) + if cond_add is not None: + decoder_input += cond_add + decoder_input = tf.squeeze(decoder_input, axis=2) + decoder_input = common_attention.add_timing_signal_1d(decoder_input) + bias = common_attention.attention_bias_lower_triangle(tf.shape(gold)[1]) + if c is not None and len(c.get_shape()) > 3: + c = tf.squeeze(c, axis=2) + return transformer.transformer_decoder(decoder_input, c, bias, ed, hparams) def expand_batch(x, mul): @@ -256,9 +291,26 @@ def ae_compress(x, is_2d, hparams, name, reuse=None): # Convolve and ReLu to get state. cur = common_layers.conv_block( cur, hparams.hidden_size, [((1, 1), (1, 1))], name="mid_conv") - # To put a standard VAE use the line below. - # cur, vae_kl, _, _ = vae(cur, hparams, "kmeans_vae") - means = tf.get_variable("z_to_dense", [hparams.v_size, hparams.hidden_size]) + means_size = hparams.z_size if hparams.do_vae else hparams.v_size + means = tf.get_variable("z_to_dense", [means_size, hparams.hidden_size]) + if hparams.do_vae: + if hparams.bit_vae: + hot, loss = bit_vae(cur, hparams, "bvae") + else: + hot, loss, _, _ = vae(cur, hparams.z_size, "vae") + # Do a second level vae with some probability. + if hparams.z_size2 > 0: + prob_z2 = common_layers.inverse_exp_decay(hparams.startup_steps*2) * 0.8 + if hparams.mode != tf.contrib.learn.ModeKeys.TRAIN: + prob_z2 = 1.0 + def vae2(): + hot2, loss2, _, _ = vae(hot, hparams.z_size2, "vae2") + ret = tf.layers.dense(hot2, hparams.z_size) + return mix(ret, hot, hparams.startup_steps * 2), loss2 + hot, loss2 = tf.cond(tf.less(tf.random_uniform([]), prob_z2), + vae2, lambda: (hot, tf.constant(0.0))) + loss += loss2 * 0.1 + return cur, hot, loss if hparams.use_gumbel_softmax: _, hot, loss = dae(cur, hparams, "dae") return cur, hot, loss @@ -275,12 +327,13 @@ def ae_compress(x, is_2d, hparams, name, reuse=None): def ae_embed(hot, hparams, name, reuse=None): with tf.variable_scope(name, reuse=reuse): - means = tf.get_variable("z_to_dense", [hparams.v_size, hparams.hidden_size]) - hot_flat = tf.reshape(hot, [-1, hparams.v_size]) + means_size = hparams.z_size if hparams.do_vae else hparams.v_size + means = tf.get_variable("z_to_dense", [means_size, hparams.hidden_size]) + hot_flat = tf.reshape(hot, [-1, means_size]) emb = tf.matmul(hot_flat, means) emb = tf.reshape(emb, [tf.shape(hot)[0], tf.shape(hot)[1], tf.shape(hot)[2], hparams.hidden_size]) - if hparams.use_gumbel_softmax: + if hparams.use_gumbel_softmax or hparams.do_vae: return emb return tf.layers.dense(emb, hparams.hidden_size, name="unnormalize", reuse=reuse) @@ -289,14 +342,14 @@ def ae_embed(hot, hparams, name, reuse=None): def ae_decompress(z, ae, x, is_2d, hparams, name, reuse=None): """Decompress from z, leaking from ae.""" with tf.variable_scope(name + "_decompress", reuse=reuse): - if hparams.use_gumbel_softmax: + if hparams.use_gumbel_softmax or hparams.do_vae: # Leak at the beginning to help train. z = mix(z, ae, hparams.startup_steps) else: # Gradients flow to ae while the value is z. z = tf.stop_gradient(z) + ae - tf.stop_gradient(ae) # Leak during training to keep the full dense autoencoder. - prob_z = common_layers.inverse_exp_decay(hparams.startup_steps) * 0.6 + prob_z = common_layers.inverse_exp_decay(hparams.startup_steps) * 0.8 prob_z = prob_z if hparams.mode == tf.contrib.learn.ModeKeys.TRAIN else 1.0 z = tf.cond(tf.less(tf.random_uniform([]), prob_z), lambda: z, lambda: ae) @@ -319,7 +372,7 @@ def ae_decompress(z, ae, x, is_2d, hparams, name, reuse=None): x_batch = tf.stop_gradient(x_batch) z_batch = tf.reshape(z, [-1, 1, 1, hparams.hidden_size]) d_batch = tf.reshape(d, [-1, k, 1, hparams.hidden_size]) - dec_batch = decode(z_batch, d_batch, x_batch, None, None, hparams) + dec_batch = decode(z_batch, d_batch, x_batch, None, None, hparams, "dar") else: # For non-autoregressive. dec_batch = d z = tf.reshape(dec_batch, [-1, tf.shape(x)[1], tf.shape(x)[2], @@ -352,21 +405,25 @@ def ae_transformer_internal(inputs, targets, target_space, hparams): emb = ae_embed(hot, hparams, "ae", reuse=True) # Compress context and run autoregressive decoder on emb-hot. - emb_flat = tf.expand_dims(common_layers.flatten4d3d(emb), axis=2) - emb_flat = tf.stop_gradient(emb_flat) - dec_c = decode(None, None, emb_flat, inputs, ed, hparams) - dec_c = tf.reshape(dec_c, tf.shape(emb)) - c_z = tf.layers.dense(dec_c, hparams.v_size, name="mask_context") - reconstruct_loss = tf.nn.softmax_cross_entropy_with_logits( - labels=hot, logits=c_z) - # If not training, use the predicted z instead of the autoregressive one. - if hparams.mode == tf.estimator.ModeKeys.PREDICT: - hot = tf.one_hot(tf.argmax(c_z, axis=-1), hparams.v_size) + if hparams.do_vae: + reconstruct_loss = 0.0 + else: + emb_flat = tf.expand_dims(common_layers.flatten4d3d(emb), axis=2) + emb_flat = tf.stop_gradient(emb_flat) + dec_c = decode(None, None, emb_flat, inputs, ed, hparams, "dgold") + dec_c = tf.reshape(dec_c, tf.shape(emb)) + c_z = tf.layers.dense(dec_c, hparams.v_size, name="mask_context") + reconstruct_loss = tf.nn.softmax_cross_entropy_with_logits( + labels=hot, logits=c_z) + # If not training, use the predicted z instead of the autoregressive one. + if hparams.mode == tf.estimator.ModeKeys.PREDICT: + hot = tf.one_hot(tf.argmax(c_z, axis=-1), hparams.v_size) # Decompress, pass for ae loss. z = ae_decompress(emb, ae, targets, hparams.is_2d, hparams, "ae") - kl *= common_layers.inverse_exp_decay(int(hparams.startup_steps * 0.8), - min_value=0.0001) + if not (hparams.use_gumbel_softmax and hparams.softmax_k > 0): + kl *= common_layers.inverse_exp_decay(int(hparams.startup_steps * 0.8), + min_value=0.0001) reconstruct_loss *= common_layers.inverse_exp_decay(hparams.startup_steps) losses = {"kl": kl, "reconstruction": reconstruct_loss * 0.1} return z, losses @@ -425,6 +482,7 @@ def transformer_ae_small(): hparams.batch_size = 2048 hparams.learning_rate_warmup_steps = 4000 hparams.add_hparam("z_size", 128) + hparams.add_hparam("z_size2", 0) hparams.add_hparam("v_size", 1024*32) hparams.add_hparam("num_compress_steps", 4) hparams.add_hparam("kl_warmup_steps", 60000) @@ -433,8 +491,10 @@ def transformer_ae_small(): hparams.add_hparam("z_dropout", 0.1) hparams.add_hparam("is_2d", 0) hparams.add_hparam("use_gumbel_softmax", int(True)) - hparams.add_hparam("softmax_k", 4) + hparams.add_hparam("softmax_k", 0) hparams.add_hparam("decode_autoregressive", int(True)) + hparams.add_hparam("do_vae", int(True)) + hparams.add_hparam("bit_vae", int(True)) return hparams @@ -442,15 +502,19 @@ def transformer_ae_small(): def transformer_ae_cifar(): """Hyperparameters for CIFAR-10 experiments.""" hparams = transformer_ae_small() - hparams.hidden_size = 384 - hparams.z_size = 256 - hparams.batch_size = 1024 * 16 + hparams.hidden_size = 256 + hparams.filter_size = 512 + hparams.z_size = 256 # 64 + hparams.z_size2 = 0 # 16 + hparams.batch_size = 1024 * 4 hparams.num_compress_steps = 2 hparams.v_size = 1024 * 16 hparams.kl_warmup_steps = 150000 - hparams.startup_steps = 30000 + hparams.startup_steps = 20000 hparams.kmeans_lr_factor = 0.0 hparams.is_2d = 1 + hparams.learning_rate_warmup_steps = 8000 + hparams.learning_rate = 0.2 return hparams From dcb29d42af837393a7b362604461c39b11de4196 Mon Sep 17 00:00:00 2001 From: Etienne Pot <epot@google.com> Date: Mon, 9 Oct 2017 14:37:15 -0700 Subject: [PATCH 0480/4095] Compressed multihead attention PiperOrigin-RevId: 171588391 --- tensor2tensor/layers/common_attention.py | 120 +++++++++++++++++++++++ tensor2tensor/models/attention_lm_moe.py | 103 +++++++------------ 2 files changed, 157 insertions(+), 66 deletions(-) diff --git a/tensor2tensor/layers/common_attention.py b/tensor2tensor/layers/common_attention.py index 86ee596c1..d973cf3a6 100644 --- a/tensor2tensor/layers/common_attention.py +++ b/tensor2tensor/layers/common_attention.py @@ -2792,6 +2792,126 @@ def get_gates_head(x, add_first=False): return v_out, total_loss / nb_heads +@expert_utils.add_var_scope() +def deconv_elems_1d(x, factor, out_depth=None): + """Increase the length and change the dimensionality. + + Expand/project each positions of dim depth of the input into + factor*tokens of dim out_depth + + Args: + x (tf.Tensor): shape [batch_size, length, depth] + factor (int): Multiplicative factor of each tokens. + out_depth (int): Output depth (if None, keep depth constant) + + Returns: + tf.Tensor: shape [batch_size, length*factor, out_depth] + """ + out_depth = out_depth or x.get_shape().as_list()[-1] + x = tf.expand_dims(x, 1) # [batch_size, 1, length, depth] + x = tf.layers.conv2d_transpose( + inputs=x, + filters=out_depth, + kernel_size=(1, factor), + strides=(1, factor), + padding="valid", + data_format="channels_last", + ) # [batch_size, 1, length*factor, out_depth] + x = tf.squeeze(x, 1) # [batch_size, length*factor, depth] + return x + + +@expert_utils.add_var_scope() +def conv_elems_1d(x, factor, out_depth=None): + """Decrease the length and change the dimensionality. + + Merge/restore/compress factors positions of dim depth of the input into + a single position of dim out_depth. + This is basically just a strided convolution without overlapp + between each strides. + The original length has to be divided by factor. + + Args: + x (tf.Tensor): shape [batch_size, length, depth] + factor (int): Length compression factor. + out_depth (int): Output depth + + Returns: + tf.Tensor: shape [batch_size, length//factor, out_depth] + """ + out_depth = out_depth or x.get_shape().as_list()[-1] + # with tf.control_dependencies( # Dynamic assertion + # [tf.assert_equal(tf.shape(x)[1] % factor, 0)]): + x = tf.expand_dims(x, 1) # [batch_size, 1, length, depth] + x = tf.layers.conv2d( + inputs=x, + filters=out_depth, + kernel_size=(1, factor), + strides=(1, factor), + padding="valid", + data_format="channels_last", + ) # [batch_size, 1, length//factor, out_depth] + x = tf.squeeze(x, 1) # [batch_size, length//factor, depth] + return x + + +@expert_utils.add_var_scope() +def multihead_self_attention_reduced(x, factor, multihead_params): + """Reduce the length dimension by compressing with conv. + + Args: + x (tf.Tensor): float32 of shape [batch, length, depth] + factor (int): compression factor for the memory sequence + multihead_params (dict): parameters for multihead attention + + Returns: + (tf.Tensor): float32 of shape [batch, length, depth] + """ + depth = x.get_shape().as_list()[-1] + + # Could try to have some overlapp between the blocks but that would + # create conv artifacts, would make it difficult to not attend to the future + # withing one group and the padding should be handled specially. + + # With valid padding, the last block won't be computed (not attended anyway) + memory_x = conv_elems_1d(x, factor) + memory_x = tf.concat( + # Add the first elem to make it attendable by everyone (otherwise the + # first block cannot attend to anything) + [x[:, :1, :], memory_x], + axis=1, + ) + + # Construct the bias + @expert_utils.add_name_scope() + def construct_bias_vectors(t, axis): + length = tf.to_float(tf.shape(t)[1]) + length_coordinates = tf.range(length, dtype=tf.float32) + length_coordinates = tf.expand_dims(length_coordinates, axis=axis) + # [1, length_k] or [length_q, 1] + return length_coordinates + + bias = tf.to_float(tf.greater( + # Because we add the first elem to the memory block and it can be attended + # by anyone,we don't need to add +1 anymore to prevent self attention + # Use * factor to make sure the last tokens of a block cannot attend the + # block + construct_bias_vectors(memory_x, 0) * factor, + # +epsilon to avoid float equality + construct_bias_vectors(x, 1) + 1e-3, + )) * -1e9 + bias = tf.expand_dims(bias, axis=0) + bias = tf.expand_dims(bias, axis=0) # [1, 1, length_k, length_q] + + return multihead_attention( + query_antecedent=x, + memory_antecedent=memory_x, + bias=bias, + output_depth=depth, + **multihead_params + ) + + def scaled_dot_product_attention_simple(q, k, v, bias, name=None): """scaled dot-product attention. One head. One spatial dimension. diff --git a/tensor2tensor/models/attention_lm_moe.py b/tensor2tensor/models/attention_lm_moe.py index 57598388b..0b3a83cc3 100644 --- a/tensor2tensor/models/attention_lm_moe.py +++ b/tensor2tensor/models/attention_lm_moe.py @@ -51,6 +51,8 @@ class AttentionType(object): GLOBAL_MOE = "global_experts" MEMORY_EFFICIENT = "memory_efficient" SPARSE_MULTIHEAD = "sparse_multihead" + MULTIHEAD_REDUCED = "multihead_reduced" + MULTIHEAD_FULL = "multihead_full" @staticmethod def get_choices(): @@ -59,6 +61,8 @@ def get_choices(): AttentionType.LOCAL_EXPERTS, AttentionType.MEMORY_EFFICIENT, AttentionType.SPARSE_MULTIHEAD, + AttentionType.MULTIHEAD_REDUCED, + AttentionType.MULTIHEAD_FULL, ] @@ -67,6 +71,8 @@ def get_choices(): "e": AttentionType.LOCAL_EXPERTS, # Experts "m": AttentionType.MEMORY_EFFICIENT, # Memory "s": AttentionType.SPARSE_MULTIHEAD, # Sparse (Locality sensitive hashing) + "r": AttentionType.MULTIHEAD_REDUCED, # Reduced + "f": AttentionType.MULTIHEAD_FULL, # Force using full attention } @@ -132,12 +138,12 @@ def _diet_expert(x): x, hparams.attention_exp_factor) dp_expand_x = lambda x: dp( # pylint: disable=g-long-lambda - deconv_elems_1d, + common_attention.deconv_elems_1d, x, hparams.attention_exp_factor, hparams.attention_exp_inputdim) dp_compress_x = lambda x, l: dp( # pylint: disable=g-long-lambda - conv_elems_1d, + common_attention.conv_elems_1d, x, hparams.attention_exp_factor, l) @@ -179,7 +185,13 @@ def print_shape(x, suffix, debug=False): with tf.variable_scope( "attention_{}".format(attention_type)): - if attention_type == AttentionType.MULTIHEAD: + if attention_type in [ + AttentionType.MULTIHEAD, AttentionType.MULTIHEAD_FULL]: + attention_dot_type = ( + "local_mask_right" if hparams.attention_local else + "dot_product") + if attention_type == AttentionType.MULTIHEAD_FULL: + attention_dot_type = "dot_product" y = dp( common_attention.multihead_attention, preprocess(x), @@ -190,8 +202,7 @@ def print_shape(x, suffix, debug=False): hparams.hidden_size, hparams.num_heads, hparams.attention_dropout, - attention_type=("local_mask_right" if hparams.attention_local - else "dot_product"), + attention_type=attention_dot_type, name="decoder_self_attention") elif attention_type == AttentionType.SPARSE_MULTIHEAD: x_in = preprocess(x) @@ -229,6 +240,19 @@ def print_shape(x, suffix, debug=False): decoder_self_attention_bias, hparams.num_heads, name="decoder_self_attention") + elif attention_type == AttentionType.MULTIHEAD_REDUCED: + y = dp( + common_attention.multihead_self_attention_reduced, + preprocess(x), + factor=hparams.attention_red_factor, + multihead_params=dict( + total_key_depth= + hparams.attention_key_channels or hparams.hidden_size, + total_value_depth= + hparams.attention_value_channels or hparams.hidden_size, + num_heads=hparams.num_heads, + dropout_rate=hparams.attention_dropout, + )) elif attention_type == AttentionType.LOCAL_EXPERTS: x_in = preprocess(x) x_in = dp_remove_pad(x_in) @@ -338,67 +362,6 @@ def get_batch_coordinate(x, axis=0): return batch_coordinate -@expert_utils.add_var_scope() -def deconv_elems_1d(x, factor, out_depth): - """Increase the length and change the dimensionality. - - Expand/project each positions of dim depth of the input into - factor*tokens of dim out_depth - - Args: - x (tf.Tensor): shape [batch_size, length, depth] - factor (int): Multiplicative factor of each tokens. - out_depth (int): Output depth - - Returns: - tf.Tensor: shape [batch_size, length*factor, out_depth] - """ - x = tf.expand_dims(x, 1) # [batch_size, 1, length, depth] - x = tf.layers.conv2d_transpose( - inputs=x, - filters=out_depth, - kernel_size=(1, factor), - strides=(1, factor), - padding="valid", - data_format="channels_last", - ) # [batch_size, 1, length*factor, out_depth] - x = tf.squeeze(x, 1) # [batch_size, 1, length, depth] - return x - - -@expert_utils.add_var_scope() -def conv_elems_1d(x, factor, out_depth): - """Decrease the length and change the dimensionality. - - Merge/restore/compress factors positions of dim depth of the input into - a single position of dim out_depth. - This is basically just a strided convolution without overlapp - between each strides. - The original length has to be divided by factor. - - Args: - x (tf.Tensor): shape [batch_size, length, depth] - factor (int): Length compression factor. - out_depth (int): Output depth - - Returns: - tf.Tensor: shape [batch_size, length//factor, out_depth] - """ - with tf.control_dependencies( # Dynamic assertion - [tf.assert_equal(tf.shape(x)[1] % factor, 0)]): - x = tf.expand_dims(x, 1) # [batch_size, 1, length, depth] - x = tf.layers.conv2d( - inputs=x, - filters=out_depth, - kernel_size=(1, factor), - strides=(1, factor), - padding="valid", - data_format="channels_last", - ) # [batch_size, 1, length//factor, out_depth] - x = tf.squeeze(x, 1) # [batch_size, 1, length, depth] - return x - - @expert_utils.add_name_scope() def expand_batch_coordinates(bc, length_factor): """Duplicate elements of bc by length_factor. @@ -511,6 +474,7 @@ def attention_lm_moe_base(): hparams.add_hparam("attention_num_head", 1) hparams.add_hparam("attention_num_experts", 16) hparams.add_hparam("attention_split_batch", int(False)) + hparams.add_hparam("attention_red_factor", 3) # If attention_exp_factor is set, each input to local_expert_attention (of # dimensionality hidden size) is projected into attention_exp_factor smaller # inputs, each of dimensionality attention_exp_inputdim. (otherwise @@ -594,6 +558,13 @@ def attention_lm_hybrid_v2(): return hparams +@registry.register_hparams +def attention_lm_16k(): + hparams = attention_lm_hybrid_v2() + hparams.batch_size = 16384 + return hparams + + @registry.register_hparams def attention_lm_ae_extended(): """Experiment with the exp_factor params.""" From 427a4e55e49f34f435c079d1c0b5efa8035a0e87 Mon Sep 17 00:00:00 2001 From: Etienne Pot <epot@google.com> Date: Mon, 9 Oct 2017 16:28:12 -0700 Subject: [PATCH 0481/4095] Add future and padding bias for LSH mask PiperOrigin-RevId: 171604241 --- tensor2tensor/layers/common_attention.py | 45 +++++++++++++++++------- tensor2tensor/models/attention_lm_moe.py | 30 ++++++++++++++++ 2 files changed, 62 insertions(+), 13 deletions(-) diff --git a/tensor2tensor/layers/common_attention.py b/tensor2tensor/layers/common_attention.py index d973cf3a6..f1251790d 100644 --- a/tensor2tensor/layers/common_attention.py +++ b/tensor2tensor/layers/common_attention.py @@ -2630,7 +2630,7 @@ def flatten_batch(x): @expert_utils.add_name_scope() -def dot_product_batched_head(q, k, v, gates_q, gates_k): +def dot_product_batched_head(q, k, v, gates_q, gates_k, mask_right=False): """Perform a dot product attention on a single sequence on a single head. This function dispatch the q, k, v and loop over the buckets to compute the @@ -2642,17 +2642,25 @@ def dot_product_batched_head(q, k, v, gates_q, gates_k): v (tf.Tensor): [batch*heads, length_k, depth_v] gates_q (tf.Tensor): One-hot of shape [batch*heads, length_q, nb_buckets] gates_k (tf.Tensor): One-hot of shape [batch*heads, length_k, nb_buckets] + mask_right (bool): Add a bias to prevent attention to the future Returns: tf.Tensor: [length_q, depth_v] """ - # Right now Q and K have same length - length = tf.shape(q)[1] nb_buckets = tf.shape(gates_q)[-1] - capacity = length // nb_buckets * 3 # Capacity is hardcoded - capacity = tf.minimum(length, capacity) - tf.summary.scalar("dispatch_capacity", capacity, family="lsh") + @expert_utils.add_name_scope() + def get_dispatcher(gates): + length = tf.shape(gates)[1] + # Count the number of ones per batch (and keep the max value) + nb_elems_to_dispatch = tf.reduce_sum(gates, axis=[1, 2]) + nb_elems_to_dispatch = tf.reduce_max(nb_elems_to_dispatch) + nb_elems_to_dispatch = tf.to_int32(nb_elems_to_dispatch) + capacity = nb_elems_to_dispatch // nb_buckets * 2 # Capacity is hardcoded + capacity = tf.minimum(length, capacity) + tf.summary.scalar("dispatch_capacity", capacity, family="lsh") + return expert_utils.TruncatingDispatcher(gates, capacity) + def add_summary_capacity(x, prefix): # Monitor if capacity overflow x = x[0, ...] # Take first batch/head @@ -2665,17 +2673,23 @@ def add_summary_capacity(x, prefix): add_summary_capacity(gates_q, "q") add_summary_capacity(gates_k, "k") - q_dispatcher = expert_utils.TruncatingDispatcher(gates_q, capacity) - k_dispatcher = expert_utils.TruncatingDispatcher(gates_k, capacity) + q_dispatcher = get_dispatcher(gates_q) + k_dispatcher = get_dispatcher(gates_k) q = q_dispatcher.dispatch(q) k = k_dispatcher.dispatch(k) v = k_dispatcher.dispatch(v) - # TODO(epot): Forward the padding bias and future # Bias of shape [batch*heads, nb_buckets, 1, capacity] broadcasted to every # queries bias = tf.expand_dims((k_dispatcher.nonpadding() - 1.0) * 1e9, 2) + if mask_right: + q_coordinate = tf.to_float( + tf.expand_dims(q_dispatcher.length_coordinate(), 3)) + k_coordinate = tf.to_float( + tf.expand_dims(k_dispatcher.length_coordinate(), 2)) + bias += tf.to_float(tf.greater(k_coordinate, q_coordinate)) * -1e9 + # The sequence padding is not masked but is ignored on the next layers # q, k, v now have shape [batch*heads, nb_bucket, capacity, depth] # The buckets can be seen as different heads @@ -2687,7 +2701,12 @@ def add_summary_capacity(x, prefix): @expert_utils.add_name_scope() def sparse_dot_product_attention_truncated( - q, k, v, bi, use_map_fn, experts_params): # pylint: disable=unused-argument + q, k, v, + bi, # Unused + experts_params, + use_map_fn=False, # Unused + mask_right=False, +): # pylint: disable=unused-argument """Sparse multihead self attention. Perform an approximation of the full multihead attention by dispatching @@ -2709,10 +2728,10 @@ def sparse_dot_product_attention_truncated( k (tf.Tensor): Keys of shape [batch, heads, length_q, depth_k] v (tf.Tensor): Values of shape [batch, heads, length_kv, depth_v] bi (BatchInfo): Contains the batch coordinates and sequence order + experts_params (dict): Additional params for the local expert use_map_fn (bool): Use either tf.map_fn of python for loop to compute the heads separately - experts_params (dict): Additional params for the local expert - + mask_right (bool): Returns: tf.Tensor: Approximation of Softmax(Q.K) * V, of shape [batch, heads, length_q, depth_v] @@ -2784,7 +2803,7 @@ def get_gates_head(x, add_first=False): q, k, v, gates_q, gates_k = [ combine_first_two_dimensions(t) for t in (q, k, v, gates_q, gates_k)] - v_out = dot_product_batched_head(q, k, v, gates_q, gates_k) + v_out = dot_product_batched_head(q, k, v, gates_q, gates_k, mask_right) # Restore original dimension v_out = tf.reshape(v_out, [batch_size, nb_heads, -1, depth]) diff --git a/tensor2tensor/models/attention_lm_moe.py b/tensor2tensor/models/attention_lm_moe.py index 0b3a83cc3..f24d969af 100644 --- a/tensor2tensor/models/attention_lm_moe.py +++ b/tensor2tensor/models/attention_lm_moe.py @@ -46,11 +46,13 @@ class AttentionType(object): + """Enum of the attention layers types.""" MULTIHEAD = "multihead" LOCAL_EXPERTS = "local_experts" GLOBAL_MOE = "global_experts" MEMORY_EFFICIENT = "memory_efficient" SPARSE_MULTIHEAD = "sparse_multihead" + SPARSE_MULTIHEAD_TRUNCATED = "sparse_multihead_truncated" MULTIHEAD_REDUCED = "multihead_reduced" MULTIHEAD_FULL = "multihead_full" @@ -61,6 +63,7 @@ def get_choices(): AttentionType.LOCAL_EXPERTS, AttentionType.MEMORY_EFFICIENT, AttentionType.SPARSE_MULTIHEAD, + AttentionType.SPARSE_MULTIHEAD_TRUNCATED, AttentionType.MULTIHEAD_REDUCED, AttentionType.MULTIHEAD_FULL, ] @@ -71,6 +74,7 @@ def get_choices(): "e": AttentionType.LOCAL_EXPERTS, # Experts "m": AttentionType.MEMORY_EFFICIENT, # Memory "s": AttentionType.SPARSE_MULTIHEAD, # Sparse (Locality sensitive hashing) + "t": AttentionType.SPARSE_MULTIHEAD_TRUNCATED, # Using TruncatedDispatcher "r": AttentionType.MULTIHEAD_REDUCED, # Reduced "f": AttentionType.MULTIHEAD_FULL, # Force using full attention } @@ -230,6 +234,32 @@ def print_shape(x, suffix, debug=False): ) y = dp_restore_pad(y) + # TODO(avaswani, epot, noam): Do we need to divide by num shards ? + extra_loss += tf.add_n(loss_experts) / dp.n + elif attention_type == AttentionType.SPARSE_MULTIHEAD_TRUNCATED: + x_in = preprocess(x) + y, loss_experts = dp( + common_attention.multihead_attention_sparse_truncated, + x_in, + None, + None, # Bias is computed inside + hparams.attention_key_channels or hparams.hidden_size, + hparams.attention_value_channels or hparams.hidden_size, + hparams.hidden_size, + hparams.num_heads, + hparams.attention_dropout, + + # Additional parameters + bi=[common_attention.BatchInfo( + coordinates=batch_coordinate[i], + order=batch_order[i], # No future mask + ) for i in range(dp.n)], + mask_right=True, + experts_params=dict( + nb_hyperplanes=hparams.lsh_num_hyperplanes, + ), + ) + # TODO(avaswani, epot, noam): Do we need to divide by num shards ? extra_loss += tf.add_n(loss_experts) / dp.n elif attention_type == AttentionType.MEMORY_EFFICIENT: From a524aba5d1932f4fddd22c4e95cda82efe1ebb04 Mon Sep 17 00:00:00 2001 From: Etienne Pot <epot@google.com> Date: Tue, 10 Oct 2017 10:17:11 -0700 Subject: [PATCH 0482/4095] Add flag to specify the minimum input length. PiperOrigin-RevId: 171696081 --- tensor2tensor/layers/common_hparams.py | 3 +++ tensor2tensor/tpu/tpu_trainer_lib.py | 2 ++ tensor2tensor/utils/data_reader.py | 32 +++++++++++++++++++++---- tensor2tensor/utils/data_reader_test.py | 2 +- 4 files changed, 34 insertions(+), 5 deletions(-) diff --git a/tensor2tensor/layers/common_hparams.py b/tensor2tensor/layers/common_hparams.py index 491944382..a701cf4fa 100644 --- a/tensor2tensor/layers/common_hparams.py +++ b/tensor2tensor/layers/common_hparams.py @@ -93,6 +93,9 @@ def basic_params1(): # epsilon parameter to normalization function norm_epsilon=1e-6, symbol_modality_num_shards=16, + # During training, we drop sequences whose inputs and targets are shorter + # than min_length + min_length=0, # During training, we drop sequences whose inputs or targets are longer # than max_length. # If max_length==0, we use hparams.batch_size instead. diff --git a/tensor2tensor/tpu/tpu_trainer_lib.py b/tensor2tensor/tpu/tpu_trainer_lib.py index 6e3c4db62..2466b99fb 100644 --- a/tensor2tensor/tpu/tpu_trainer_lib.py +++ b/tensor2tensor/tpu/tpu_trainer_lib.py @@ -54,6 +54,7 @@ def input_fn(mode, params): batching_scheme = { "boundaries": [], "batch_sizes": [batch_size], + "min_length": hparams.min_length, "max_length": hparams.max_length, "window_size": batch_size, "padded_shapes": { @@ -87,6 +88,7 @@ def _preprocess(example, problem, hparams, mode): def _valid_size(example): return data_reader.example_valid_size(example, + batching_scheme["min_length"], batching_scheme["max_length"]) dataset = dataset.filter(_valid_size) diff --git a/tensor2tensor/utils/data_reader.py b/tensor2tensor/utils/data_reader.py index cfe37c379..83f66b985 100644 --- a/tensor2tensor/utils/data_reader.py +++ b/tensor2tensor/utils/data_reader.py @@ -18,6 +18,8 @@ from __future__ import division from __future__ import print_function +import functools + # Dependency imports import numpy as np @@ -82,6 +84,7 @@ def input_pipeline(problem, "boundaries": a list of integers for the boundaries that will be used for bucketing; see bucket_by_sequence_length for more details. "batch_sizes": a list of batch sizes corresponding to the buckets + "min_length": an integer. We drop sequences which are shorter. "max_length": an integer. We drop sequences which are longer. dataset_split: tf.estimator.ModeKeys + ["test"], which split of the dataset to use. Defaults to mode. @@ -102,7 +105,11 @@ def input_pipeline(problem, dataset_split=dataset_split) dataset = dataset.map(cast_int64_to_int32, num_threads=num_threads) dataset = dataset.filter( - lambda ex: example_valid_size(ex, batching_scheme["max_length"])) + functools.partial( + example_valid_size, + min_length=batching_scheme["min_length"], + max_length=batching_scheme["max_length"], + )) if is_training: dataset = dataset.shuffle(capacity) dataset = dataset.repeat(None) @@ -143,8 +150,12 @@ def _example_length(example): return length -def example_valid_size(example, max_length): - return tf.less_equal(_example_length(example), max_length) +def example_valid_size(example, min_length, max_length): + length = _example_length(example) + return tf.logical_and( + length >= min_length, + length <= max_length, + ) def bucket_by_sequence_length(dataset, @@ -232,7 +243,8 @@ def _batching_scheme(batch_size, length_bucket_step, drop_long_sequences=False, shard_multiplier=1, - length_multiplier=1): + length_multiplier=1, + min_length=0): """A batching scheme based on model hyperparameters. Every batch containins a number of sequences divisible by `shard_multiplier`. @@ -251,18 +263,26 @@ def _batching_scheme(batch_size, across datashards. length_multiplier: an integer multiplier that is used to increase the batch sizes and sequence length tolerance. + min_length: int, sequences shorter than this will be skipped. Returns: A dictionary with parameters that can be passed to input_pipeline: * boundaries: list of bucket boundaries * batch_sizes: list of batch sizes for each length bucket * max_length: int, maximum length of an example + + Raises: + ValueError: If min_length > max_length """ max_length = max_length or batch_size + if max_length < min_length: + raise ValueError("max_length must be greater or equal to min_length") + boundaries = _bucket_boundaries(max_length, min_length_bucket, length_bucket_step) boundaries = [boundary * length_multiplier for boundary in boundaries] max_length *= length_multiplier + batch_sizes = [ max(1, batch_size // length) for length in boundaries + [max_length] ] @@ -293,9 +313,11 @@ def _batching_scheme(batch_size, # number of batches per window. max_batches_per_window = window_size // min(batch_sizes) shuffle_queue_size = max_batches_per_window * 3 + ret = { "boundaries": boundaries, "batch_sizes": batch_sizes, + "min_length": min_length, "max_length": (max_length if drop_long_sequences else 10**9), "shuffle_queue_size": shuffle_queue_size, "window_size": window_size, @@ -311,6 +333,7 @@ def hparams_to_batching_scheme(hparams, """Wrapper around _batching_scheme with hparams.""" return _batching_scheme( batch_size=hparams.batch_size, + min_length=hparams.min_length, max_length=hparams.max_length, min_length_bucket=hparams.min_length_bucket, length_bucket_step=hparams.length_bucket_step, @@ -333,6 +356,7 @@ def constant_batching_scheme(constant_batch_size_in_sequences): return { "boundaries": boundaries, "batch_sizes": batch_sizes, + "min_length": 0, "max_length": 10**9, "shuffle_queue_size": None, "window_size": constant_batch_size_in_sequences, diff --git a/tensor2tensor/utils/data_reader_test.py b/tensor2tensor/utils/data_reader_test.py index 0dccfaedf..bf2aa872e 100644 --- a/tensor2tensor/utils/data_reader_test.py +++ b/tensor2tensor/utils/data_reader_test.py @@ -120,7 +120,7 @@ def testLengthFilter(self): dataset = self.problem.dataset( tf.estimator.ModeKeys.TRAIN, data_dir=self.data_dir) dataset = dataset.filter( - lambda ex: data_reader.example_valid_size(ex, max_len)) + lambda ex: data_reader.example_valid_size(ex, 0, max_len)) examples = dataset.make_one_shot_iterator().get_next() with tf.train.MonitoredSession() as sess: ex_lens = [] From 7833026c3e39310caf3dd88f21e68ad8c9b1194f Mon Sep 17 00:00:00 2001 From: T2T Team <no-reply@google.com> Date: Tue, 10 Oct 2017 11:02:18 -0700 Subject: [PATCH 0483/4095] Fix doc typos. PiperOrigin-RevId: 171703251 --- tensor2tensor/models/transformer.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tensor2tensor/models/transformer.py b/tensor2tensor/models/transformer.py index 68ce9604d..baa85829c 100644 --- a/tensor2tensor/models/transformer.py +++ b/tensor2tensor/models/transformer.py @@ -95,7 +95,7 @@ def decode( attentions, used for fast decoding. Returns: - Final decoder representaiton. [batch_size, decoder_length, hidden_dim] + Final decoder representation. [batch_size, decoder_length, hidden_dim] """ decoder_input = tf.nn.dropout(decoder_input, 1.0 - hparams.layer_prepostprocess_dropout) @@ -112,7 +112,7 @@ def decode( return tf.expand_dims(decoder_output, axis=2) def model_fn_body(self, features): - """Transformet main model_fn. + """Transformer main model_fn. Args: features: Map of features to the model. Should contain the following: @@ -122,7 +122,7 @@ def model_fn_body(self, features): "target_space_id" Returns: - Final decoder representaiton. [batch_size, decoder_length, hidden_dim] + Final decoder representation. [batch_size, decoder_length, hidden_dim] """ hparams = self._hparams From f2d6295c2c3550ee8061c704d37a305431225a9b Mon Sep 17 00:00:00 2001 From: T2T Team <no-reply@google.com> Date: Tue, 10 Oct 2017 11:03:18 -0700 Subject: [PATCH 0484/4095] Add iterations_per_loop flag to tpu_trainer PiperOrigin-RevId: 171703463 --- tensor2tensor/tpu/tpu_trainer.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tensor2tensor/tpu/tpu_trainer.py b/tensor2tensor/tpu/tpu_trainer.py index fac21f50d..a156d11a2 100644 --- a/tensor2tensor/tpu/tpu_trainer.py +++ b/tensor2tensor/tpu/tpu_trainer.py @@ -36,6 +36,8 @@ flags.DEFINE_string("output_dir", "", "Base output directory for run.") flags.DEFINE_string("master", "", "Address of TensorFlow master.") flags.DEFINE_integer("eval_steps", 10, "Number of steps in evaluation.") +flags.DEFINE_integer("iterations_per_loop", 1000, + "Number of iterations in a TPU training loop.") def main(unused_argv): @@ -59,7 +61,8 @@ def main(unused_argv): master=FLAGS.master, num_shards=FLAGS.tpu_num_shards, batch_size=hparams.tpu_batch_size_per_shard * FLAGS.tpu_num_shards, - log_device_placement=FLAGS.log_device_placement) + log_device_placement=FLAGS.log_device_placement, + iterations_per_loop=FLAGS.iterations_per_loop) estimator.train( lambda params: input_fn(tf.estimator.ModeKeys.TRAIN, params), steps=FLAGS.train_steps) From e4fc73a204efdb74f6c59e12b28dae716197bc37 Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Tue, 10 Oct 2017 11:22:26 -0700 Subject: [PATCH 0485/4095] Update TPU hparams to reflect new base config PiperOrigin-RevId: 171707208 --- tensor2tensor/tpu/tpu_trainer_lib.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/tensor2tensor/tpu/tpu_trainer_lib.py b/tensor2tensor/tpu/tpu_trainer_lib.py index 2466b99fb..6f3c0130e 100644 --- a/tensor2tensor/tpu/tpu_trainer_lib.py +++ b/tensor2tensor/tpu/tpu_trainer_lib.py @@ -273,13 +273,12 @@ def transformer_tpu(): """HParams for Transformer model on TPU.""" hp = transformer.transformer_base() hp.use_pad_remover = int(False) # where op not supported + hp.optimizer = "TrueAdam" + hp.learning_rate = 0.4 # Inputs # Each example in the batch will be of (padded) length hp.max_length - # Batch size per shard is governed by tpu_batch_size_per_shard hp.max_length = 64 + hp.tpu_batch_size_per_shard = 20 - hp.optimizer = "TrueAdam" - hp.layer_preprocess_sequence = "n" - hp.layer_postprocess_sequence = "da" return hp From f59464304e02f8dd1b754d9edc892de7a983fce8 Mon Sep 17 00:00:00 2001 From: Etienne Pot <epot@google.com> Date: Wed, 11 Oct 2017 10:17:53 -0700 Subject: [PATCH 0486/4095] Compress the memory length using attention PiperOrigin-RevId: 171838308 --- tensor2tensor/layers/common_attention.py | 100 ++++++++++++++++++++++- tensor2tensor/models/attention_lm_moe.py | 4 + 2 files changed, 101 insertions(+), 3 deletions(-) diff --git a/tensor2tensor/layers/common_attention.py b/tensor2tensor/layers/common_attention.py index f1251790d..792241632 100644 --- a/tensor2tensor/layers/common_attention.py +++ b/tensor2tensor/layers/common_attention.py @@ -2875,16 +2875,103 @@ def conv_elems_1d(x, factor, out_depth=None): @expert_utils.add_var_scope() -def multihead_self_attention_reduced(x, factor, multihead_params): +def local_reduction_attention(x, block_length, multihead_params): + """Reduce the length dimension using self attention. + + Args: + x (tf.Tensor): float32 of shape [batch, length, depth] + block_length (int): Block length for local attention (Compression factor) + multihead_params (dict): parameters for multihead attention + + Returns: + tf.Tensor: Compressed tensor of shape [batch, length // factor, depth] + """ + @expert_utils.add_name_scope() + def dot_product_self_local_attention_flattened(q, k, v): + """Strided block local self-attention. + + No overlapp between the blocks. + + Args: + q (tf.Tensor): shape [batch, heads, length, depth_k] + k (tf.Tensor): shape [batch, heads, length, depth_k] + v (tf.Tensor): shape [batch, heads, length, depth_v] + + Returns: + tf.Tensor: shape [batch, heads, length, depth_v] + """ + _, num_head, _, depth = q.get_shape().as_list() + + # Extract the blocks + def pad_and_reshape(x): + """Split the length dim into [num_block, block_length].""" + length_x = tf.shape(x)[2] + # Add some padding, but won't matter as the last block will never be + # attended by the query (after compression) + x = tf.pad(x, [ + [0, 0], + [0, 0], + [0, -length_x % block_length], + [0, 0] + ]) + x = tf.reshape(x, [ + tf.shape(x)[0], # Batch + num_head, # Head + tf.shape(x)[2] // block_length, # Num blocks + block_length, # Block length + depth, # Depth + ]) + return x + + q, k, v = [pad_and_reshape(t) for t in (q, k, v)] + + # Perform attention on the flattened dot product + logits = tf.matmul(q, k, transpose_b=True) + logits = tf.reshape(logits, [ + tf.shape(logits)[0], # Batch + num_head, # Head + tf.shape(logits)[2], # Num blocks + block_length**2, # Flatten last dimension + ]) + weights = tf.nn.softmax(logits) + weights = tf.reshape(weights, [ + tf.shape(weights)[0], # Batch + num_head, # Head + tf.shape(weights)[2], # Num blocks + block_length, + block_length, # Restore the block length dimension + ]) + weights = tf.reduce_sum(weights, axis=3, keep_dims=True) # Compress block + v_out = tf.matmul(weights, v) # [1, block_length] @ [block_length, depth] + v_out = tf.squeeze(v_out, axis=3) + return v_out + + return multihead_attention( + x, + None, + bias=None, + output_depth=x.get_shape().as_list()[-1], + attention_type=dot_product_self_local_attention_flattened, + **multihead_params + ) + + +@expert_utils.add_var_scope() +def multihead_self_attention_reduced( + x, factor, reduction_type, multihead_params): """Reduce the length dimension by compressing with conv. Args: x (tf.Tensor): float32 of shape [batch, length, depth] factor (int): compression factor for the memory sequence + reduction_type (str): type of compression multihead_params (dict): parameters for multihead attention Returns: (tf.Tensor): float32 of shape [batch, length, depth] + + Raises: + ValueError: If reduction_type invalid """ depth = x.get_shape().as_list()[-1] @@ -2892,8 +2979,15 @@ def multihead_self_attention_reduced(x, factor, multihead_params): # create conv artifacts, would make it difficult to not attend to the future # withing one group and the padding should be handled specially. - # With valid padding, the last block won't be computed (not attended anyway) - memory_x = conv_elems_1d(x, factor) + # Reduce the memory dimension + if reduction_type == "attention": + memory_x = local_reduction_attention(x, factor, multihead_params) + elif reduction_type == "conv": + # With valid padding, the last block won't be computed (not attended anyway) + memory_x = conv_elems_1d(x, factor) + else: + raise ValueError("Unknown reduction type {}".format(reduction_type)) + memory_x = tf.concat( # Add the first elem to make it attendable by everyone (otherwise the # first block cannot attend to anything) diff --git a/tensor2tensor/models/attention_lm_moe.py b/tensor2tensor/models/attention_lm_moe.py index f24d969af..85c7c9d49 100644 --- a/tensor2tensor/models/attention_lm_moe.py +++ b/tensor2tensor/models/attention_lm_moe.py @@ -207,6 +207,7 @@ def print_shape(x, suffix, debug=False): hparams.num_heads, hparams.attention_dropout, attention_type=attention_dot_type, + block_length=hparams.attention_block_length, name="decoder_self_attention") elif attention_type == AttentionType.SPARSE_MULTIHEAD: x_in = preprocess(x) @@ -275,6 +276,7 @@ def print_shape(x, suffix, debug=False): common_attention.multihead_self_attention_reduced, preprocess(x), factor=hparams.attention_red_factor, + reduction_type=hparams.attention_reduction_type, multihead_params=dict( total_key_depth= hparams.attention_key_channels or hparams.hidden_size, @@ -505,6 +507,8 @@ def attention_lm_moe_base(): hparams.add_hparam("attention_num_experts", 16) hparams.add_hparam("attention_split_batch", int(False)) hparams.add_hparam("attention_red_factor", 3) + hparams.add_hparam("attention_block_length", 128) + hparams.add_hparam("attention_reduction_type", "conv") # If attention_exp_factor is set, each input to local_expert_attention (of # dimensionality hidden size) is projected into attention_exp_factor smaller # inputs, each of dimensionality attention_exp_inputdim. (otherwise From 317ecf3e616a65f2023c850304861f0e167de682 Mon Sep 17 00:00:00 2001 From: T2T Team <no-reply@google.com> Date: Wed, 11 Oct 2017 13:40:50 -0700 Subject: [PATCH 0487/4095] Add support for decoder states in beam search. PiperOrigin-RevId: 171868043 --- tensor2tensor/utils/beam_search.py | 111 +++++++++++++++++------- tensor2tensor/utils/beam_search_test.py | 95 +++++++++++++++++++- 2 files changed, 174 insertions(+), 32 deletions(-) diff --git a/tensor2tensor/utils/beam_search.py b/tensor2tensor/utils/beam_search.py index 9c26579af..1dd2f87b1 100644 --- a/tensor2tensor/utils/beam_search.py +++ b/tensor2tensor/utils/beam_search.py @@ -22,12 +22,31 @@ # Dependency imports import tensorflow as tf +from tensorflow.python.util import nest + # Assuming EOS_ID is 1 EOS_ID = 1 # Default value for INF INF = 1. * 1e7 +def expand_to_beam_size(tensor, beam_size): + """Tiles a given tensor by beam_size. + + Args: + tensor: tensor to tile [batch_size, ...] + beam_size: How much to tile the tensor by. + + Returns: + Tiled tensor [batch_size, beam_size, ...] + """ + tensor = tf.expand_dims(tensor, axis=1) + tile_dims = [1] * tensor.shape.ndims + tile_dims[1] = beam_size + + return tf.tile(tensor, tile_dims) + + def log_prob_from_logits(logits): return logits - tf.reduce_logsumexp(logits, axis=2, keep_dims=True) @@ -51,7 +70,8 @@ def compute_batch_indices(batch_size, beam_size): def compute_topk_scores_and_seq(sequences, scores, scores_to_gather, flags, - beam_size, batch_size, prefix="default"): + beam_size, batch_size, prefix="default", + states_to_gather=None): """Given sequences and scores, will gather the top k=beam size sequences. This function is used to grow alive, and finished. It takes sequences, @@ -79,6 +99,7 @@ def compute_topk_scores_and_seq(sequences, scores, scores_to_gather, flags, beam_size: int batch_size: int prefix: string that will prefix unique names for the ops run. + states_to_gather: dict (possibly nested) of decoding states. Returns: Tuple of (topk_seq [batch_size, beam_size, decode_length], @@ -101,13 +122,17 @@ def compute_topk_scores_and_seq(sequences, scores, scores_to_gather, flags, # Gather up the highest scoring sequences. For each operation added, give it # a concrete name to simplify observing these operations with tfdbg. Clients # can capture these tensors by watching these node names. - topk_seq = tf.gather_nd( - sequences, top_coordinates, name=(prefix + "_topk_seq")) - topk_flags = tf.gather_nd( - flags, top_coordinates, name=(prefix + "_topk_flags")) - topk_gathered_scores = tf.gather_nd( - scores_to_gather, top_coordinates, name=(prefix + "_topk_scores")) - return topk_seq, topk_gathered_scores, topk_flags + def gather(tensor, name): + return tf.gather_nd(tensor, top_coordinates, name=(prefix + name)) + topk_seq = gather(sequences, "_topk_seq") + topk_flags = gather(flags, "_topk_flags") + topk_gathered_scores = gather(scores_to_gather, "_topk_scores") + if states_to_gather: + topk_gathered_states = nest.map_structure( + lambda state: gather(state, "_topk_states"), states_to_gather) + else: + topk_gathered_states = states_to_gather + return topk_seq, topk_gathered_scores, topk_flags, topk_gathered_states def beam_search(symbols_to_logits_fn, @@ -116,6 +141,7 @@ def beam_search(symbols_to_logits_fn, decode_length, vocab_size, alpha, + states=None, eos_id=EOS_ID): """Beam search with length penalties. @@ -150,6 +176,7 @@ def beam_search(symbols_to_logits_fn, vocab_size: Size of the vocab, must equal the size of the logits returned by symbols_to_logits_fn alpha: alpha for length penalty. + states: dict (possibly nested) of decoding states. eos_id: ID for end of sentence. Returns: Tuple of @@ -163,9 +190,14 @@ def beam_search(symbols_to_logits_fn, # Expand to beam_size (batch_size, beam_size) alive_log_probs = tf.tile(initial_log_probs, [batch_size, 1]) - # Expand each batch to beam_size - alive_seq = tf.tile(tf.expand_dims(initial_ids, 1), [1, beam_size]) - alive_seq = tf.expand_dims(alive_seq, 2) # (batch_size, beam_size, 1) + # Expand each batch and state to beam_size + alive_seq = expand_to_beam_size(initial_ids, beam_size) + alive_seq = tf.expand_dims(alive_seq, axis=2) # (batch_size, beam_size, 1) + if states: + states = nest.map_structure( + lambda state: expand_to_beam_size(state, beam_size), states) + else: + states = {} # Finished will keep track of all the sequences that have finished so far # Finished log probs will be negative infinity in the beginning @@ -214,7 +246,7 @@ def grow_finished(finished_seq, finished_scores, finished_flags, curr_seq, curr_finished_seq, curr_finished_scores, curr_finished_scores, curr_finished_flags, beam_size, batch_size, "grow_finished") - def grow_alive(curr_seq, curr_scores, curr_log_probs, curr_finished): + def grow_alive(curr_seq, curr_scores, curr_log_probs, curr_finished, states): """Given sequences and scores, will gather the top k=beam size sequences. Args: @@ -225,6 +257,7 @@ def grow_alive(curr_seq, curr_scores, curr_log_probs, curr_finished): [batch_size, beam_size] curr_finished: Finished flags for each of these sequences. [batch_size, beam_size] + states: dict (possibly nested) of decoding states. Returns: Tuple of (Topk sequences based on scores, @@ -236,9 +269,9 @@ def grow_alive(curr_seq, curr_scores, curr_log_probs, curr_finished): curr_scores += tf.to_float(curr_finished) * -INF return compute_topk_scores_and_seq(curr_seq, curr_scores, curr_log_probs, curr_finished, beam_size, batch_size, - "grow_alive") + "grow_alive", states) - def grow_topk(i, alive_seq, alive_log_probs): + def grow_topk(i, alive_seq, alive_log_probs, states): r"""Inner beam seach loop. This function takes the current alive sequences, and grows them to topk @@ -255,19 +288,29 @@ def grow_topk(i, alive_seq, alive_log_probs): i: loop index alive_seq: Topk sequences decoded so far [batch_size, beam_size, i+1] alive_log_probs: probabilities of these sequences. [batch_size, beam_size] + states: dict (possibly nested) of decoding states. Returns: Tuple of (Topk sequences extended by the next word, The log probs of these sequences, The scores with length penalty of these sequences, - Flags indicating which of these sequences have finished decoding) + Flags indicating which of these sequences have finished decoding, + dict of transformed decoding states) """ # Get the logits for all the possible next symbols flat_ids = tf.reshape(alive_seq, [batch_size * beam_size, -1]) # (batch_size * beam_size, decoded_length) - flat_logits = symbols_to_logits_fn(flat_ids) - logits = tf.reshape(flat_logits, (batch_size, beam_size, -1)) + if states: + flat_states = nest.map_structure( + lambda state: tf.reshape(state, [batch_size * beam_size, -1]), states) + flat_logits, flat_states = symbols_to_logits_fn(flat_ids, flat_states) + states = nest.map_structure( + lambda state: tf.reshape(state, [batch_size, beam_size, -1]), + flat_states) + else: + flat_logits = symbols_to_logits_fn(flat_ids) + logits = tf.reshape(flat_logits, [batch_size, beam_size, -1]) # Convert logits to normalized log probs candidate_log_probs = log_prob_from_logits(logits) @@ -305,16 +348,19 @@ def grow_topk(i, alive_seq, alive_log_probs): # Gather up the most probable 2*beams both for the ids and finished_in_alive # bools topk_seq = tf.gather_nd(alive_seq, topk_coordinates) + if states: + states = nest.map_structure( + lambda state: tf.gather_nd(state, topk_coordinates), states) # Append the most probable alive topk_seq = tf.concat([topk_seq, tf.expand_dims(topk_ids, axis=2)], axis=2) topk_finished = tf.equal(topk_ids, eos_id) - return topk_seq, topk_log_probs, topk_scores, topk_finished + return topk_seq, topk_log_probs, topk_scores, topk_finished, states def inner_loop(i, alive_seq, alive_log_probs, finished_seq, finished_scores, - finished_flags): + finished_flags, states): """Inner beam seach loop. There are three groups of tensors, alive, finished, and topk. @@ -346,6 +392,7 @@ def inner_loop(i, alive_seq, alive_log_probs, finished_seq, finished_scores, [batch_size, beam_size] finished_flags: finished bools for each of these sequences. [batch_size, beam_size] + states: dict (possibly nested) of decoding states. Returns: Tuple of @@ -354,26 +401,27 @@ def inner_loop(i, alive_seq, alive_log_probs, finished_seq, finished_scores, Log probs of the alive sequences, New finished sequences, Scores of the new finished sequences, - Flags inidicating which sequence in finished as reached EOS) + Flags inidicating which sequence in finished as reached EOS, + dict of final decoding states) """ # Each inner loop, we carry out three steps: # 1. Get the current topk items. # 2. Extract the ones that have finished and haven't finished # 3. Recompute the contents of finished based on scores. - topk_seq, topk_log_probs, topk_scores, topk_finished = grow_topk( - i, alive_seq, alive_log_probs) - alive_seq, alive_log_probs, _ = grow_alive(topk_seq, topk_scores, - topk_log_probs, topk_finished) - finished_seq, finished_scores, finished_flags = grow_finished( + topk_seq, topk_log_probs, topk_scores, topk_finished, states = grow_topk( + i, alive_seq, alive_log_probs, states) + alive_seq, alive_log_probs, _, states = grow_alive( + topk_seq, topk_scores, topk_log_probs, topk_finished, states) + finished_seq, finished_scores, finished_flags, _ = grow_finished( finished_seq, finished_scores, finished_flags, topk_seq, topk_scores, topk_finished) return (i + 1, alive_seq, alive_log_probs, finished_seq, finished_scores, - finished_flags) + finished_flags, states) def _is_finished(i, unused_alive_seq, alive_log_probs, unused_finished_seq, - finished_scores, finished_in_finished): + finished_scores, finished_in_finished, unused_states): """Checking termination condition. We terminate when we decoded up to decode_length or the lowest scoring item @@ -416,11 +464,11 @@ def _is_finished(i, unused_alive_seq, alive_log_probs, unused_finished_seq, tf.less(i, decode_length), tf.logical_not(bound_is_met)) (_, alive_seq, alive_log_probs, finished_seq, finished_scores, - finished_flags) = tf.while_loop( + finished_flags, _) = tf.while_loop( _is_finished, inner_loop, [ tf.constant(0), alive_seq, alive_log_probs, finished_seq, - finished_scores, finished_flags + finished_scores, finished_flags, states ], shape_invariants=[ tf.TensorShape([]), @@ -428,7 +476,10 @@ def _is_finished(i, unused_alive_seq, alive_log_probs, unused_finished_seq, alive_log_probs.get_shape(), tf.TensorShape([None, None, None]), finished_scores.get_shape(), - finished_flags.get_shape() + finished_flags.get_shape(), + nest.map_structure( + lambda tensor: tf.TensorShape([None] * tensor.shape.ndims), + states), ], parallel_iterations=1, back_prop=False) diff --git a/tensor2tensor/utils/beam_search_test.py b/tensor2tensor/utils/beam_search_test.py index 5223989ea..f96094416 100644 --- a/tensor2tensor/utils/beam_search_test.py +++ b/tensor2tensor/utils/beam_search_test.py @@ -61,8 +61,9 @@ def testComputeTopkScoresAndSeq(self): flags = tf.constant([[True, False, False, True], [False, False, False, True]]) - topk_seq, topk_scores, topk_flags = beam_search.compute_topk_scores_and_seq( - sequences, scores, scores, flags, beam_size, batch_size) + topk_seq, topk_scores, topk_flags, _ = ( + beam_search.compute_topk_scores_and_seq( + sequences, scores, scores, flags, beam_size, batch_size)) with self.test_session(): topk_seq = topk_seq.eval() @@ -277,6 +278,96 @@ def symbols_to_logits(ids): ]], scores) self.assertAllEqual([[[0, 2, 0, 1], [0, 2, 1, 0]]], ids) + def testStates(self): + batch_size = 1 + beam_size = 1 + vocab_size = 2 + decode_length = 3 + + initial_ids = tf.constant([0] * batch_size) # GO + probabilities = tf.constant([[[0.7, 0.3]], [[0.4, 0.6]], [[0.5, 0.5]]]) + + expected_states = tf.constant([[[0.]], [[1.]]]) + + def symbols_to_logits(ids, states): + pos = tf.shape(ids)[1] - 1 + # We have to assert the values of state inline here since we can't fetch + # them out of the loop! + with tf.control_dependencies( + [tf.assert_equal(states["state"], expected_states[pos])]): + logits = tf.to_float(tf.log(probabilities[pos, :])) + + states["state"] += 1 + return logits, states + + states = { + "state": tf.zeros((batch_size, 1)), + } + + final_ids, _ = beam_search.beam_search( + symbols_to_logits, + initial_ids, + beam_size, + decode_length, + vocab_size, + 0.0, + eos_id=1, + states=states) + + with self.test_session() as sess: + # Catch and fail so that the testing framework doesn't think it's an error + try: + sess.run(final_ids) + except tf.errors.InvalidArgumentError, e: + raise AssertionError(e.message) + + def testStateBeamTwo(self): + batch_size = 1 + beam_size = 2 + vocab_size = 3 + decode_length = 3 + + initial_ids = tf.constant([0] * batch_size) # GO + probabilities = tf.constant([[[0.1, 0.1, 0.8], [0.1, 0.1, 0.8]], + [[0.4, 0.5, 0.1], [0.2, 0.4, 0.4]], + [[0.05, 0.9, 0.05], [0.4, 0.4, 0.2]]]) + + # The top beam is always selected so we should see the top beam's state + # at each position, which is the one thats getting 3 added to it each step. + expected_states = tf.constant([[[0.], [0.]], [[3.], [3.]], [[6.], [6.]]]) + + def symbols_to_logits(ids, states): + pos = tf.shape(ids)[1] - 1 + + # We have to assert the values of state inline here since we can't fetch + # them out of the loop! + with tf.control_dependencies( + [tf.assert_equal(states["state"], expected_states[pos])]): + logits = tf.to_float(tf.log(probabilities[pos, :])) + + states["state"] += tf.constant([[3.], [7.]]) + return logits, states + + states = { + "state": tf.zeros((batch_size, 1)), + } + + final_ids, _ = beam_search.beam_search( + symbols_to_logits, + initial_ids, + beam_size, + decode_length, + vocab_size, + 0.0, + eos_id=1, + states=states) + + with self.test_session() as sess: + # Catch and fail so that the testing framework doesn't think it's an error + try: + sess.run(final_ids) + except tf.errors.InvalidArgumentError, e: + raise AssertionError(e.message) if __name__ == "__main__": tf.test.main() From 4cc82df6313657d25d4ffc089ede7c1cb8b152ae Mon Sep 17 00:00:00 2001 From: Etienne Pot <epot@google.com> Date: Wed, 11 Oct 2017 18:04:05 -0700 Subject: [PATCH 0488/4095] Fix crash when decode_interactive PiperOrigin-RevId: 171903063 --- tensor2tensor/utils/t2t_model.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tensor2tensor/utils/t2t_model.py b/tensor2tensor/utils/t2t_model.py index e45aa35a7..c3430be37 100644 --- a/tensor2tensor/utils/t2t_model.py +++ b/tensor2tensor/utils/t2t_model.py @@ -335,7 +335,7 @@ def _slow_greedy_infer(self, features, decode_length, last_position_only): # Save the targets in a var and reassign it after the tf.while loop to avoid # having targets being in a 'while' frame. This ensures targets when used # in metric functions stays in the same frame as other vars. - targets_old = features["targets"] + targets_old = features.get("targets", None) def infer_step(recent_output, recent_logits, unused_loss): """Inference step.""" @@ -399,7 +399,8 @@ def infer_step(recent_output, recent_logits, unused_loss): if inputs_old is not None: # Restore to not confuse Estimator. features["inputs"] = inputs_old # Reassign targets back to the previous value. - features["targets"] = targets_old + if targets_old is not None: + features["targets"] = targets_old losses = {"training": loss} if "partial_targets" in features: partial_target_length = tf.shape(features["partial_targets"])[1] From 4c0a023e8fa066a136722a39dad5c061757cf135 Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Wed, 11 Oct 2017 18:16:50 -0700 Subject: [PATCH 0489/4095] modify underlying_variable_ref to be compatible with TPU PiperOrigin-RevId: 171904164 --- tensor2tensor/layers/common_layers.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/tensor2tensor/layers/common_layers.py b/tensor2tensor/layers/common_layers.py index 1923a9e24..1b52a6ea7 100644 --- a/tensor2tensor/layers/common_layers.py +++ b/tensor2tensor/layers/common_layers.py @@ -1697,7 +1697,10 @@ def body(): def underlying_variable_ref(t): - """Find the underlying variable ref, ignoring Identity ops. + """Find the underlying variable ref. + + Traverses through Identity, ReadVariableOp, and Enter ops. + Stops when op type has Variable or VarHandle in name. Args: t: a Tensor @@ -1705,9 +1708,11 @@ def underlying_variable_ref(t): Returns: a Tensor that is a variable ref, or None on error. """ - while t.op.type == "Identity": + while t.op.type in ["Identity", "ReadVariableOp", "Enter"]: t = t.op.inputs[0] - if "Variable" in t.op.type: + + op_type = t.op.type + if "Variable" in op_type or "VarHandle" in op_type: return t else: return None From d3faf909caa627028b05be8110049a5eee06daba Mon Sep 17 00:00:00 2001 From: Etienne Pot <epot@google.com> Date: Wed, 11 Oct 2017 19:43:50 -0700 Subject: [PATCH 0490/4095] Fix formatting issue when decoding the results PiperOrigin-RevId: 171910224 --- tensor2tensor/utils/decoding.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensor2tensor/utils/decoding.py b/tensor2tensor/utils/decoding.py index f1a3bf0bc..c11fdef34 100644 --- a/tensor2tensor/utils/decoding.py +++ b/tensor2tensor/utils/decoding.py @@ -86,10 +86,10 @@ def log_decode_results(inputs, if targets is not None: decoded_targets = " ".join(map(str, targets.flatten())) else: - decoded_outputs = " ".join( + decoded_outputs = "".join( map(str, targets_vocab.decode(_save_until_eos(outputs.flatten())))) if targets is not None: - decoded_targets = " ".join( + decoded_targets = "".join( map(str, targets_vocab.decode(_save_until_eos(targets.flatten())))) tf.logging.info("Inference results OUTPUT: %s" % decoded_outputs) From 9aa3326429d812815d299cbf46490a0cd6abead7 Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Thu, 12 Oct 2017 11:09:59 -0700 Subject: [PATCH 0491/4095] Updates to TPU eval (now restores and compiles, but TPU system does not initialize properly) PiperOrigin-RevId: 171983646 --- tensor2tensor/tpu/tpu_trainer.py | 14 ++--- tensor2tensor/tpu/tpu_trainer_lib.py | 77 +++++++++++++++++++--------- 2 files changed, 61 insertions(+), 30 deletions(-) diff --git a/tensor2tensor/tpu/tpu_trainer.py b/tensor2tensor/tpu/tpu_trainer.py index a156d11a2..8cda597d4 100644 --- a/tensor2tensor/tpu/tpu_trainer.py +++ b/tensor2tensor/tpu/tpu_trainer.py @@ -63,12 +63,14 @@ def main(unused_argv): batch_size=hparams.tpu_batch_size_per_shard * FLAGS.tpu_num_shards, log_device_placement=FLAGS.log_device_placement, iterations_per_loop=FLAGS.iterations_per_loop) - estimator.train( - lambda params: input_fn(tf.estimator.ModeKeys.TRAIN, params), - steps=FLAGS.train_steps) - estimator.evaluate( - lambda params: input_fn(tf.estimator.ModeKeys.EVAL, params), - steps=FLAGS.eval_steps) + if FLAGS.train_steps: + estimator.train( + lambda params: input_fn(tf.estimator.ModeKeys.TRAIN, params), + steps=FLAGS.train_steps) + if FLAGS.eval_steps: + estimator.evaluate( + lambda params: input_fn(tf.estimator.ModeKeys.EVAL, params), + steps=FLAGS.eval_steps) if __name__ == "__main__": diff --git a/tensor2tensor/tpu/tpu_trainer_lib.py b/tensor2tensor/tpu/tpu_trainer_lib.py index 6f3c0130e..c514da2ad 100644 --- a/tensor2tensor/tpu/tpu_trainer_lib.py +++ b/tensor2tensor/tpu/tpu_trainer_lib.py @@ -13,13 +13,10 @@ # See the License for the specific language governing permissions and # limitations under the License. -"""Library for training on TPU. See tpu_trainer.py.""" +"""Library for training on TPU. See tpu_trainer.py. -# TODO(rsepassi): -# * Fix EVAL (breaks when loading from checkpoint) -# * Support all decoders -# * Share more code with Problem.dataset and input_pipeline -# * Support PREDICT +Currently only supports training and evaluation for text-to-text problems. +""" from __future__ import absolute_import from __future__ import division @@ -38,6 +35,7 @@ from tensor2tensor.utils import registry import tensorflow as tf +from tensorflow.python.util import nest def get_input_fn(data_dir, problem, hparams): @@ -49,8 +47,6 @@ def input_fn(mode, params): num_threads = 4 if is_training else 1 batch_size = params["batch_size"] - data_file_patterns = [problem.filepattern(data_dir, mode)] - batching_scheme = { "boundaries": [], "batch_sizes": [batch_size], @@ -72,9 +68,9 @@ def decode_record(record): return decoded data_files = tf.contrib.slim.parallel_reader.get_data_files( - data_file_patterns) - dataset = tf.contrib.data.TFRecordDataset(data_files) - dataset = dataset.map(decode_record, num_threads=num_threads) + problem.filepattern(data_dir, mode)) + dataset = tf.data.TFRecordDataset(data_files) + dataset = dataset.map(decode_record, num_parallel_calls=num_threads) def _preprocess(example, problem, hparams, mode): example = problem.preprocess_example(example, mode, hparams) @@ -84,20 +80,25 @@ def _preprocess(example, problem, hparams, mode): dataset = dataset.map( lambda ex: _preprocess(ex, problem, hparams, mode), - num_threads=num_threads) + num_parallel_calls=num_threads) def _valid_size(example): - return data_reader.example_valid_size(example, - batching_scheme["min_length"], - batching_scheme["max_length"]) + return data_reader.example_valid_size( + example, batching_scheme["min_length"], batching_scheme["max_length"]) dataset = dataset.filter(_valid_size) if is_training: dataset = dataset.shuffle(100) - dataset = dataset.repeat(None) + # TODO(rsepassi): In eval mode, should not repeat + dataset = dataset.repeat(None) dataset = data_reader.padded_batch(dataset, batching_scheme["batch_sizes"][0], batching_scheme["padded_shapes"]) + + if not is_training: + dataset = dataset.map( + lambda f: pad_batch(f, batch_size), num_parallel_calls=num_threads) + dataset.prefetch(1) train_features = dataset.make_one_shot_iterator().get_next() @@ -111,13 +112,6 @@ def _valid_size(example): while len(targets.get_shape()) != 4: targets = tf.expand_dims(targets, axis=-1) - inputs_shape = inputs.get_shape().as_list() - inputs_shape[0] = batch_size - inputs.set_shape(inputs_shape) - targets_shape = targets.get_shape().as_list() - targets_shape[0] = batch_size - targets.set_shape(targets_shape) - train_features["inputs"] = inputs train_features["targets"] = targets @@ -126,6 +120,23 @@ def _valid_size(example): return input_fn +def pad_batch(features, batch_size): + """Pad each feature in features to batch_size on dim 0.""" + ts = [] + for t in nest.flatten(features): + before_pads = [0] * t.get_shape().ndims + after_pads = [0] * t.get_shape().ndims + batch_pad = tf.convert_to_tensor(batch_size) - tf.shape(t)[0] + after_pads[0] = batch_pad + pads = list(zip(before_pads, after_pads)) + old_shape = t.get_shape().as_list() + old_shape[0] = batch_size + t = tf.pad(t, pads) + t.set_shape(old_shape) + ts.append(t) + return nest.pack_sequence_as(features, ts) + + def get_model_fn(model, hp, use_tpu=True): """Get simple T2T model fn.""" @@ -152,6 +163,11 @@ def model_fn(features, labels, mode, params, config): outputs = model_class.model_fn_body(features) logits = target_modality.top(outputs, labels) + # Ensure the length is known statically + shape = [None] * logits.get_shape().ndims + shape[1] = hparams.max_length + logits.set_shape(logits.get_shape().merge_with(shape)) + # Loss loss_num, loss_den = target_modality.loss(logits, labels) loss = loss_num / tf.maximum(1.0, loss_den) @@ -159,6 +175,7 @@ def model_fn(features, labels, mode, params, config): if mode == tf.estimator.ModeKeys.EVAL: problem = hp.problem_instances[0] eval_metrics_fn = create_eval_metrics_fn(problem) + _remove_summaries() return tf.contrib.tpu.TPUEstimatorSpec( mode, eval_metrics=(eval_metrics_fn, [logits, orig_features["targets"]]), @@ -192,6 +209,13 @@ def model_fn(features, labels, mode, params, config): return model_fn +TPU_METRIC_BLACKLIST = set([ + metrics.Metrics.APPROX_BLEU, + metrics.Metrics.ROUGE_2_F, + metrics.Metrics.ROUGE_L_F, +]) + + def create_eval_metrics_fn(problem): """Create the metrics_fn that TPUEstimatorSpec expects.""" @@ -206,7 +230,11 @@ def wrapped_metric_fn(logits, labels): metric_fns = [] eval_metrics = problem.eval_metrics() + for metric in eval_metrics: + if metric in TPU_METRIC_BLACKLIST: + tf.logging.warn("Skipping eval metric %s in TPU_METRIC_BLACKLIST", metric) + continue name = "metrics-%s/%s" % (problem.name, metric) metric_fns.append((name, make_metric_fn(metrics.METRICS_FNS[metric]))) @@ -257,7 +285,8 @@ def make_estimator(model_fn, save_summary_steps=0, save_checkpoints_steps=save_checkpoints_steps, tpu_config=tpu_config, - master=master) + master=master, + evaluation_master=master) return tf.contrib.tpu.TPUEstimator( model_fn=model_fn, From dc190ec8bbf79ffa5a6bcc8e6ab04b02b076c510 Mon Sep 17 00:00:00 2001 From: Lukasz Kaiser <lukaszkaiser@google.com> Date: Thu, 12 Oct 2017 13:51:02 -0700 Subject: [PATCH 0492/4095] internal merege PiperOrigin-RevId: 172006716 --- tensor2tensor/utils/metrics.py | 52 ++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/tensor2tensor/utils/metrics.py b/tensor2tensor/utils/metrics.py index 872c9f141..b4d82d97d 100644 --- a/tensor2tensor/utils/metrics.py +++ b/tensor2tensor/utils/metrics.py @@ -43,6 +43,8 @@ class Metrics(object): ROUGE_2_F = "rouge_2_fscore" ROUGE_L_F = "rouge_L_fscore" EDIT_DISTANCE = "edit_distance" + SET_PRECISION = "set_precision" + SET_RECALL = "set_recall" def padded_rmse(predictions, labels, weights_fn=common_layers.weights_all): @@ -189,6 +191,54 @@ def padded_accuracy(predictions, return tf.to_float(tf.equal(outputs, padded_labels)), weights +def set_precision(predictions, + labels, + weights_fn=common_layers.weights_nonzero): + """Precision of set predictions. + + Args: + predictions : A Tensor of scores of shape [batch, nlabels]. + labels: A Tensor of int32s giving true set elements, + of shape [batch, seq_length]. + weights_fn: A function to weight the elements. + + Returns: + hits: A Tensor of shape [batch, nlabels]. + weights: A Tensor of shape [batch, nlabels]. + """ + with tf.variable_scope("set_precision", values=[predictions, labels]): + labels = tf.squeeze(labels, [2, 3]) + weights = weights_fn(labels) + labels = tf.one_hot(labels, predictions.shape[-1]) + labels = tf.reduce_max(labels, axis=1) + labels = tf.cast(labels, tf.bool) + return tf.to_float(tf.equal(labels, predictions)), weights + + +def set_recall(predictions, + labels, + weights_fn=common_layers.weights_nonzero): + """Recall of set predictions. + + Args: + predictions : A Tensor of scores of shape [batch, nlabels]. + labels: A Tensor of int32s giving true set elements, + of shape [batch, seq_length]. + weights_fn: A function to weight the elements. + + Returns: + hits: A Tensor of shape [batch, nlabels]. + weights: A Tensor of shape [batch, nlabels]. + """ + with tf.variable_scope("set_recall", values=[predictions, labels]): + labels = tf.squeeze(labels, [2, 3]) + weights = weights_fn(labels) + labels = tf.one_hot(labels, predictions.shape[-1]) + labels = tf.reduce_max(labels, axis=1) + labels = tf.cast(labels, tf.bool) + return tf.to_float(tf.equal(labels, predictions)), weights + + def create_evaluation_metrics(problems, model_hparams): """Creates the evaluation metrics for the model. @@ -281,4 +331,6 @@ def wrapped_metric_fn(): Metrics.ROUGE_2_F: rouge.rouge_2_fscore, Metrics.ROUGE_L_F: rouge.rouge_l_fscore, Metrics.EDIT_DISTANCE: sequence_edit_distance, + Metrics.SET_PRECISION: set_precision, + Metrics.SET_RECALL: set_recall, } From ee922bd7a90ea16417b09b0df9638d2a1ba2a22e Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Thu, 12 Oct 2017 14:55:05 -0700 Subject: [PATCH 0493/4095] Remove added var scopes in @recompute_grad and @fn_with_custom_grad PiperOrigin-RevId: 172016510 --- tensor2tensor/layers/common_layers.py | 14 +++++++------- tensor2tensor/layers/rev_block.py | 5 ++--- 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/tensor2tensor/layers/common_layers.py b/tensor2tensor/layers/common_layers.py index 1b52a6ea7..08fd2f56b 100644 --- a/tensor2tensor/layers/common_layers.py +++ b/tensor2tensor/layers/common_layers.py @@ -1943,13 +1943,13 @@ def _fn_with_custom_grad(fn, inputs, grad_fn, use_global_vars=False): Returns: fn(*inputs) """ - with tf.variable_scope(None, default_name="fn_with_custom_grad") as vs: - inputs = list(inputs) - outputs = fn(*inputs) - if use_global_vars: - train_vars = list(vs.global_variables()) - else: - train_vars = list(vs.trainable_variables()) + vs = tf.get_variable_scope() + get_vars_fn = (vs.global_variables if use_global_vars else + vs.trainable_variables) + len_before_vars = len(get_vars_fn()) + inputs = list(inputs) + outputs = fn(*inputs) + train_vars = get_vars_fn()[len_before_vars:] if grad_fn is None: return outputs diff --git a/tensor2tensor/layers/rev_block.py b/tensor2tensor/layers/rev_block.py index 5804e4d8f..1eb988c4c 100644 --- a/tensor2tensor/layers/rev_block.py +++ b/tensor2tensor/layers/rev_block.py @@ -365,8 +365,7 @@ def grad_fn(inputs, variables, outputs, output_grads): @common_layers.fn_with_custom_grad(grad_fn) def fn_with_recompute(*args): - with tf.variable_scope(None, default_name="recompute") as vs: - cached_vs.append(vs) - return fn(*args) + cached_vs.append(tf.get_variable_scope()) + return fn(*args) return fn_with_recompute(*args) From 39fd769cc83e538dd4f32cafaddc1d5287a69f24 Mon Sep 17 00:00:00 2001 From: Ashish Vaswani <avaswani@google.com> Date: Thu, 12 Oct 2017 16:42:07 -0700 Subject: [PATCH 0494/4095] Add sampling with temperature and cifar10 8 by 8 dataset. PiperOrigin-RevId: 172031867 --- tensor2tensor/data_generators/image.py | 42 +++++++++++++++----------- tensor2tensor/layers/common_hparams.py | 1 + tensor2tensor/utils/t2t_model.py | 8 +++-- 3 files changed, 30 insertions(+), 21 deletions(-) diff --git a/tensor2tensor/data_generators/image.py b/tensor2tensor/data_generators/image.py index d03a65d9e..df497019a 100644 --- a/tensor2tensor/data_generators/image.py +++ b/tensor2tensor/data_generators/image.py @@ -42,6 +42,12 @@ import tensorflow as tf +def resize_by_area(img, size): + """image resize function used by quite a few image problems.""" + return tf.to_int64( + tf.image.resize_images(img, [size, size], tf.image.ResizeMethod.AREA)) + + class ImageProblem(problem.Problem): def example_reading_spec(self, label_key=None): @@ -93,16 +99,12 @@ class ImageCeleba(ImageProblem): def preprocess_example(self, example, unused_mode, unused_hparams): - def resize(img, size): - return tf.to_int64( - tf.image.resize_images(img, [size, size], tf.image.ResizeMethod.AREA)) - inputs = example["inputs"] # Remove boundaries in CelebA images. Remove 40 pixels each side # vertically and 20 pixels each side horizontally. inputs = tf.image.crop_to_bounding_box(inputs, 40, 20, 218 - 80, 178 - 40) - example["inputs"] = resize(inputs, 8) - example["targets"] = resize(inputs, 32) + example["inputs"] = resize_by_area(inputs, 8) + example["targets"] = resize_by_area(inputs, 32) return example def hparams(self, defaults, unused_model_hparams): @@ -388,14 +390,10 @@ def dataset_filename(self): def preprocess_example(self, example, unused_mode, unused_hparams): - def resize(img, size): - return tf.to_int64( - tf.image.resize_images(img, [size, size], tf.image.ResizeMethod.AREA)) - inputs = example["inputs"] # For Img2Img resize input and output images as desired. - example["inputs"] = resize(inputs, 8) - example["targets"] = resize(inputs, 32) + example["inputs"] = resize_by_area(inputs, 8) + example["targets"] = resize_by_area(inputs, 32) return example def hparams(self, defaults, unused_model_hparams): @@ -654,6 +652,18 @@ def preprocess_example(self, example, mode, unused_hparams): return example +@registry.register_problem +class ImageCifar10Plain8(ImageCifar10): + """CIFAR-10 rescaled to 8x8 for output: Conditional image generation.""" + + def dataset_filename(self): + return "image_cifar10_plain" # Reuse CIFAR-10 plain data. + + def preprocess_example(self, example, mode, unused_hparams): + example["inputs"] = resize_by_area(example["inputs"], 8) + return example + + @registry.register_problem class Img2imgCifar10(ImageCifar10): """CIFAR-10 rescaled to 8x8 for input and 32x32 for output.""" @@ -663,14 +673,10 @@ def dataset_filename(self): def preprocess_example(self, example, unused_mode, unused_hparams): - def resize(img, size): - return tf.to_int64( - tf.image.resize_images(img, [size, size], tf.image.ResizeMethod.AREA)) - inputs = example["inputs"] # For Img2Img resize input and output images as desired. - example["inputs"] = resize(inputs, 8) - example["targets"] = resize(inputs, 32) + example["inputs"] = resize_by_area(inputs, 8) + example["targets"] = resize_by_area(inputs, 32) return example def hparams(self, defaults, unused_model_hparams): diff --git a/tensor2tensor/layers/common_hparams.py b/tensor2tensor/layers/common_hparams.py index a701cf4fa..4aacf2492 100644 --- a/tensor2tensor/layers/common_hparams.py +++ b/tensor2tensor/layers/common_hparams.py @@ -62,6 +62,7 @@ def basic_params1(): learning_rate_cosine_cycle_steps=250000, learning_rate=0.1, sampling_method="argmax", # "argmax" or "random" + sampling_temp=1.0, # temperature for sampling problem_choice="adaptive", # "uniform", "adaptive", "distributed" # expand the logits a piece at a time - saves memory. factored_logits=int(False), diff --git a/tensor2tensor/utils/t2t_model.py b/tensor2tensor/utils/t2t_model.py index c3430be37..04c7dcfc4 100644 --- a/tensor2tensor/utils/t2t_model.py +++ b/tensor2tensor/utils/t2t_model.py @@ -427,15 +427,17 @@ def sample(self, features, last_position_only=False): else: assert self._hparams.sampling_method == "random" - def _multinomial_squeeze(logits): - reshaped_logits = tf.reshape(logits, [-1, tf.shape(logits)[-1]]) + def _multinomial_squeeze(logits, temperature=1.0): + reshaped_logits = ( + tf.reshape(logits, [-1, tf.shape(logits)[-1]])/temperature) choices = tf.multinomial(reshaped_logits, 1) choices = tf.reshape(choices, tf.shape(logits)[:logits.get_shape().ndims - 1]) return choices sharded_samples = self._data_parallelism(_multinomial_squeeze, - sharded_logits) + sharded_logits, + self._hparams.sampling_temp) return tf.concat(sharded_samples, 0), sharded_logits, losses def _shard_features(self, features): # pylint: disable=missing-docstring From 43dbf4c3b1ca76f0b16c4b4b1e41f5ca45d777a6 Mon Sep 17 00:00:00 2001 From: Lukasz Kaiser <lukaszkaiser@google.com> Date: Thu, 12 Oct 2017 17:43:05 -0700 Subject: [PATCH 0495/4095] First (simple) version of scheduled sampling. PiperOrigin-RevId: 172038992 --- tensor2tensor/data_generators/wmt.py | 2 + tensor2tensor/layers/common_hparams.py | 13 ++++++ tensor2tensor/utils/t2t_model.py | 58 ++++++++++++++++++++++++-- 3 files changed, 70 insertions(+), 3 deletions(-) diff --git a/tensor2tensor/data_generators/wmt.py b/tensor2tensor/data_generators/wmt.py index 89cc7bd41..61716d012 100644 --- a/tensor2tensor/data_generators/wmt.py +++ b/tensor2tensor/data_generators/wmt.py @@ -375,6 +375,8 @@ def _compile_data(tmp_dir, datasets, filename): compressed_filename = os.path.basename(url) compressed_filepath = os.path.join(tmp_dir, compressed_filename) + generator_utils.maybe_download(tmp_dir, compressed_filename, url) + if dataset[1][0] == "tsv": _, src_column, trg_column, glob_pattern = dataset[1] filenames = tf.gfile.Glob(os.path.join(tmp_dir, glob_pattern)) diff --git a/tensor2tensor/layers/common_hparams.py b/tensor2tensor/layers/common_hparams.py index 4aacf2492..d2d8bb2e5 100644 --- a/tensor2tensor/layers/common_hparams.py +++ b/tensor2tensor/layers/common_hparams.py @@ -160,6 +160,19 @@ def basic_params1(): # entire inputs portion. This removes the challenge of # autoregressively predicting the inputs portion. prepend_mode="none", + # Scheduled sampling is interesting for auto-regressive models. + # It runs an additional step using the generated output as autoregressive + # targets, which can improve the models inference results later. The + # parameter scheduled_sampling_prob determines with what probability + # will such additional step be run. It's turned off (0.0) by default. + # This probability will exponentially warm up for the number of + # steps determined by scheduled_sampling_warmup_steps. + # The tensor used for the second step will consist of outputs from + # the first step mixed with gold truth, with the proportion of gold + # determined by scheduled_sampling_gold_mixin_prob. + scheduled_sampling_prob=0.0, + scheduled_sampling_warmup_steps=50000, + scheduled_sampling_gold_mixin_prob=0.5, # This is the actual batch size, *not* tokens per batch (i.e. for # language models this is the number of sentences in the batch) tpu_batch_size_per_shard=24,) diff --git a/tensor2tensor/utils/t2t_model.py b/tensor2tensor/utils/t2t_model.py index 04c7dcfc4..c54b38f3f 100644 --- a/tensor2tensor/utils/t2t_model.py +++ b/tensor2tensor/utils/t2t_model.py @@ -26,6 +26,7 @@ import six from six.moves import xrange # pylint: disable=redefined-builtin +from tensor2tensor.layers import common_layers from tensor2tensor.utils import beam_search from tensor2tensor.utils import expert_utils as eu from tensor2tensor.utils import registry @@ -523,9 +524,9 @@ def model_fn(self, features, skip=False, last_position_only=False): with tf.variable_scope(target_modality.name, reuse=target_reuse): if not last_position_only: sharded_logits = target_modality.top_sharded( - body_outputs, sharded_features["targets"], self._data_parallelism) + body_outputs, sharded_features["targets"], dp) training_loss = target_modality.loss_sharded( - sharded_logits, sharded_features["targets"], self._data_parallelism) + sharded_logits, sharded_features["targets"], dp) training_loss *= self._problem_hparams.loss_multiplier else: @@ -543,9 +544,60 @@ def model_fn(self, features, skip=False, last_position_only=False): last_position_targets, self._data_parallelism) training_loss = None + losses["training"] = training_loss + + # Scheduled sampling. + do_scheduled_sampling = ( # Only do it if training and set for it. + self._hparams.scheduled_sampling_prob > 0.0 and + self._hparams.mode == tf.estimator.ModeKeys.TRAIN and + not skip) + if do_scheduled_sampling: + + def sample(x): + """Multinomial sampling from a n-dimensional tensor.""" + vocab_size = target_modality.top_dimensionality + samples = tf.multinomial(tf.reshape(x, [-1, vocab_size]), 1) + reshaped_samples = tf.reshape(samples, tf.shape(x)[:-1]) + return tf.to_int32(reshaped_samples) + + def mix_gold_sampled(gold_targets, sampled_targets): + return tf.where( + tf.less(tf.random_uniform(tf.shape(sampled_targets)), + self._hparams.scheduled_sampling_gold_mixin_prob), + gold_targets, sampled_targets) + + def sampled_results(): + """Generate scheduled sampling results.""" + sampled_targets = dp(sample, sharded_logits) + new_targets = dp(mix_gold_sampled, + sharded_features["targets"], sampled_targets) + new_features = transformed_features + with tf.variable_scope(tf.get_variable_scope(), reuse=True): + with tf.variable_scope(target_modality.name): + new_features["targets"] = target_modality.targets_bottom_sharded( + new_targets, dp) + with tf.variable_scope("body"): + body_outputs, losses = self.model_fn_body_sharded(new_features) + if not isinstance(losses, dict): # If it's a single extra loss. + losses = {"extra": losses} + with tf.variable_scope(target_modality.name): + new_sharded_logits = target_modality.top_sharded( + body_outputs, sharded_features["targets"], dp) + training_loss = target_modality.loss_sharded( + sharded_logits, sharded_features["targets"], dp) + training_loss *= self._problem_hparams.loss_multiplier + losses["training"] = training_loss + return new_sharded_logits, losses + # Run the above conditionally. + prob = self._hparams.scheduled_sampling_prob + prob *= common_layers.inverse_exp_decay( + self._hparams.scheduled_sampling_warmup_steps, min_value=0.001) + sharded_logits, losses = tf.cond( + tf.less(tf.random_uniform([]), prob), + sampled_results, + lambda: (sharded_logits, losses)) tf.logging.info("This model_fn took %.3f sec." % (time.time() - start_time)) - losses["training"] = training_loss return sharded_logits, losses def model_fn_body_sharded(self, sharded_features): From 545ec342ed816fc4524dedd545380e0160a84720 Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Fri, 13 Oct 2017 12:02:13 -0700 Subject: [PATCH 0496/4095] Add support for custom record delimiter in decoding PiperOrigin-RevId: 172128016 --- tensor2tensor/utils/decoding.py | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/tensor2tensor/utils/decoding.py b/tensor2tensor/utils/decoding.py index c11fdef34..5dac0dd5f 100644 --- a/tensor2tensor/utils/decoding.py +++ b/tensor2tensor/utils/decoding.py @@ -52,7 +52,8 @@ def decode_hparams(overrides=""): return_beams=False, max_input_size=-1, identity_output=False, - num_samples=-1) + num_samples=-1, + delimiter="\n") hp = hp.parse(overrides) return hp @@ -176,8 +177,8 @@ def decode_from_dataset(estimator, # Write out predictions if decode_to_file passed if decode_to_file: for decoded_output, decoded_target in decoded_outputs: - output_file.write(str(decoded_output) + "\n") - target_file.write(str(decoded_target) + "\n") + output_file.write(str(decoded_output) + decode_hp.delimiter) + target_file.write(str(decoded_target) + decode_hp.delimiter) if (decode_hp.num_samples >= 0 and num_predictions >= decode_hp.num_samples): @@ -203,7 +204,8 @@ def decode_from_file(estimator, filename, decode_hp, decode_to_file=None): targets_vocab = hparams.problems[problem_id].vocabulary["targets"] problem_name = FLAGS.problems.split("-")[problem_id] tf.logging.info("Performing decoding from a file.") - sorted_inputs, sorted_keys = _get_sorted_inputs(filename, decode_hp.shards) + sorted_inputs, sorted_keys = _get_sorted_inputs(filename, decode_hp.shards, + decode_hp.delimiter) num_decode_batches = (len(sorted_inputs) - 1) // decode_hp.batch_size + 1 def input_fn(): @@ -251,7 +253,7 @@ def input_fn(): tf.logging.info("Writing decodes into %s" % decode_filename) outfile = tf.gfile.Open(decode_filename, "w") for index in range(len(sorted_inputs)): - outfile.write("%s\n" % (decodes[sorted_keys[index]])) + outfile.write("%s%s" % (decodes[sorted_keys[index]], decode_hp.delimiter)) def _decode_filename(base_filename, problem_name, decode_hp): @@ -472,13 +474,14 @@ def show_and_save_image(img, save_path): plt.savefig(save_path) -def _get_sorted_inputs(filename, num_shards=1): +def _get_sorted_inputs(filename, num_shards=1, delimiter="\n"): """Returning inputs sorted according to length. Args: filename: path to file with inputs, 1 per line. num_shards: number of input shards. If > 1, will read from file filename.XX, where XX is FLAGS.worker_id. + delimiter: str, delimits records in the file. Returns: a sorted list of inputs @@ -490,8 +493,12 @@ def _get_sorted_inputs(filename, num_shards=1): decode_filename = filename + ("%.2d" % FLAGS.worker_id) else: decode_filename = filename - inputs = [line.strip() for line in tf.gfile.Open(decode_filename)] - input_lens = [(i, len(line.strip().split())) for i, line in enumerate(inputs)] + + with tf.gfile.Open(decode_filename) as f: + text = f.read() + records = text.split(delimiter) + inputs = [record.strip() for record in records] + input_lens = [(i, len(line.split())) for i, line in enumerate(inputs)] sorted_input_lens = sorted(input_lens, key=operator.itemgetter(1)) # We'll need the keys to rearrange the inputs back into their original order sorted_keys = {} @@ -553,8 +560,8 @@ def input_fn(problem_choice, x=inputs): # pylint: disable=missing-docstring feature_map["problem_choice"]) features["input_space_id"] = input_space_id features["target_space_id"] = target_space_id - features["decode_length"] = (IMAGE_DECODE_LENGTH - if input_is_image else inputs[1]) + features["decode_length"] = ( + IMAGE_DECODE_LENGTH if input_is_image else inputs[1]) features["inputs"] = x return features @@ -588,7 +595,7 @@ def input_fn(problem_choice, x=inputs): # pylint: disable=missing-docstring features["problem_choice"] = feature_map["problem_choice"] features["input_space_id"] = input_space_id features["target_space_id"] = target_space_id - features["decode_length"] = (IMAGE_DECODE_LENGTH - if input_is_image else tf.shape(x)[1] + 50) + features["decode_length"] = ( + IMAGE_DECODE_LENGTH if input_is_image else tf.shape(x)[1] + 50) features["inputs"] = x return features From d58af0c4b983f9f51899ed95ff2ad5dea85e7436 Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Fri, 13 Oct 2017 17:28:17 -0700 Subject: [PATCH 0497/4095] v1.2.5 PiperOrigin-RevId: 172167687 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index d097b91d6..5b6f4690e 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ setup( name='tensor2tensor', - version='1.2.4', + version='1.2.5', description='Tensor2Tensor', author='Google Inc.', author_email='no-reply@google.com', From fa9ad63965e5c6ec528c5cf3ff91c47e44a4e3d9 Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Fri, 13 Oct 2017 18:38:11 -0700 Subject: [PATCH 0498/4095] Py3 fixes PiperOrigin-RevId: 172172540 --- tensor2tensor/data_generators/cnn_dailymail.py | 2 +- tensor2tensor/utils/beam_search_test.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tensor2tensor/data_generators/cnn_dailymail.py b/tensor2tensor/data_generators/cnn_dailymail.py index 2f8e9cf30..09c1645a1 100644 --- a/tensor2tensor/data_generators/cnn_dailymail.py +++ b/tensor2tensor/data_generators/cnn_dailymail.py @@ -74,7 +74,7 @@ def story_generator(tmp_dir): for path in paths: for story_file in tf.gfile.Glob(path + "*"): story = u"" - for line in tf.gfile.Open(story_file): + for line in tf.gfile.Open(story_file, "rb"): line = unicode(line, "utf-8") if six.PY2 else line.decode("utf-8") story += line yield story diff --git a/tensor2tensor/utils/beam_search_test.py b/tensor2tensor/utils/beam_search_test.py index f96094416..fc15eb3bc 100644 --- a/tensor2tensor/utils/beam_search_test.py +++ b/tensor2tensor/utils/beam_search_test.py @@ -318,7 +318,7 @@ def symbols_to_logits(ids, states): # Catch and fail so that the testing framework doesn't think it's an error try: sess.run(final_ids) - except tf.errors.InvalidArgumentError, e: + except tf.errors.InvalidArgumentError as e: raise AssertionError(e.message) def testStateBeamTwo(self): @@ -366,7 +366,7 @@ def symbols_to_logits(ids, states): # Catch and fail so that the testing framework doesn't think it's an error try: sess.run(final_ids) - except tf.errors.InvalidArgumentError, e: + except tf.errors.InvalidArgumentError as e: raise AssertionError(e.message) if __name__ == "__main__": From a18541a38f2f50550a8ebd95bedbac76ee487776 Mon Sep 17 00:00:00 2001 From: pltrdy <pltrdy@gmail.com> Date: Mon, 16 Oct 2017 21:15:34 +0200 Subject: [PATCH 0499/4095] fixing encoding issues on cnn/dailymail (#1) --- tensor2tensor/data_generators/cnn_dailymail.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensor2tensor/data_generators/cnn_dailymail.py b/tensor2tensor/data_generators/cnn_dailymail.py index e4f997f41..c0f6756a5 100644 --- a/tensor2tensor/data_generators/cnn_dailymail.py +++ b/tensor2tensor/data_generators/cnn_dailymail.py @@ -102,7 +102,7 @@ def generate_hash(inp): urls = [] for line in tf.gfile.Open(url_file): - urls.append(line.strip()) + urls.append(line.strip().encode('utf-8')) filelist = [] for url in urls: @@ -132,7 +132,7 @@ def fix_run_on_sents(line): story = [] summary = [] reading_highlights = False - for line in tf.gfile.Open(story_file): + for line in tf.gfile.Open(story_file, "rb"): line = unicode(line.strip(), "utf-8") if six.PY2 else line.strip().decode("utf-8") line = fix_run_on_sents(line) if line == "": From eacde9d9f5b4dede91bb95d4c38083bc70824b30 Mon Sep 17 00:00:00 2001 From: Vincent Nguyen <vince62s@yahoo.com> Date: Thu, 19 Oct 2017 14:19:22 +0200 Subject: [PATCH 0500/4095] Rename wmt.py to translate.py split language pairs for clarity dissociate ende / enfr, make them independant --- tensor2tensor/bin/t2t-datagen | 0 tensor2tensor/bin/t2t-decoder | 0 tensor2tensor/bin/t2t-make-tf-configs | 0 tensor2tensor/bin/t2t-trainer | 0 tensor2tensor/data_generators/all_problems.py | 7 +- .../data_generators/generator_utils.py | 41 +- tensor2tensor/data_generators/ice_parsing.py | 2 +- tensor2tensor/data_generators/translate.py | 262 +++++++ .../data_generators/translate_encs.py | 133 ++++ .../data_generators/translate_ende.py | 184 +++++ .../data_generators/translate_enfr.py | 146 ++++ .../data_generators/translate_enmk.py | 91 +++ .../data_generators/translate_enzh.py | 107 +++ .../{wmt_test.py => translate_test.py} | 4 +- tensor2tensor/data_generators/wmt.py | 718 ------------------ 15 files changed, 934 insertions(+), 761 deletions(-) mode change 100644 => 100755 tensor2tensor/bin/t2t-datagen mode change 100644 => 100755 tensor2tensor/bin/t2t-decoder mode change 100644 => 100755 tensor2tensor/bin/t2t-make-tf-configs mode change 100644 => 100755 tensor2tensor/bin/t2t-trainer create mode 100644 tensor2tensor/data_generators/translate.py create mode 100644 tensor2tensor/data_generators/translate_encs.py create mode 100644 tensor2tensor/data_generators/translate_ende.py create mode 100644 tensor2tensor/data_generators/translate_enfr.py create mode 100644 tensor2tensor/data_generators/translate_enmk.py create mode 100644 tensor2tensor/data_generators/translate_enzh.py rename tensor2tensor/data_generators/{wmt_test.py => translate_test.py} (96%) delete mode 100644 tensor2tensor/data_generators/wmt.py diff --git a/tensor2tensor/bin/t2t-datagen b/tensor2tensor/bin/t2t-datagen old mode 100644 new mode 100755 diff --git a/tensor2tensor/bin/t2t-decoder b/tensor2tensor/bin/t2t-decoder old mode 100644 new mode 100755 diff --git a/tensor2tensor/bin/t2t-make-tf-configs b/tensor2tensor/bin/t2t-make-tf-configs old mode 100644 new mode 100755 diff --git a/tensor2tensor/bin/t2t-trainer b/tensor2tensor/bin/t2t-trainer old mode 100644 new mode 100755 diff --git a/tensor2tensor/data_generators/all_problems.py b/tensor2tensor/data_generators/all_problems.py index 5877b541e..1a65c628a 100644 --- a/tensor2tensor/data_generators/all_problems.py +++ b/tensor2tensor/data_generators/all_problems.py @@ -33,7 +33,12 @@ from tensor2tensor.data_generators import ptb from tensor2tensor.data_generators import snli from tensor2tensor.data_generators import wiki -from tensor2tensor.data_generators import wmt +from tensor2tensor.data_generators import translate +from tensor2tensor.data_generators import translate_enfr +from tensor2tensor.data_generators import translate_ende +from tensor2tensor.data_generators import translate_encs +from tensor2tensor.data_generators import translate_enzh +from tensor2tensor.data_generators import translate_enmk from tensor2tensor.data_generators import wsj_parsing diff --git a/tensor2tensor/data_generators/generator_utils.py b/tensor2tensor/data_generators/generator_utils.py index c8fe03564..e5a0bbb6d 100644 --- a/tensor2tensor/data_generators/generator_utils.py +++ b/tensor2tensor/data_generators/generator_utils.py @@ -263,42 +263,6 @@ def gunzip_file(gz_path, new_path): for line in gz_file: new_file.write(line) - -# TODO(aidangomez): en-fr tasks are significantly over-represented below -_DATA_FILE_URLS = [ - # German-English - [ - "http://data.statmt.org/wmt16/translation-task/training-parallel-nc-v11.tgz", # pylint: disable=line-too-long - [ - "training-parallel-nc-v11/news-commentary-v11.de-en.en", - "training-parallel-nc-v11/news-commentary-v11.de-en.de" - ] - ], - # German-English & French-English - [ - "http://www.statmt.org/wmt13/training-parallel-commoncrawl.tgz", [ - "commoncrawl.de-en.en", "commoncrawl.de-en.de", - "commoncrawl.fr-en.en", "commoncrawl.fr-en.fr" - ] - ], - [ - "http://www.statmt.org/wmt13/training-parallel-europarl-v7.tgz", [ - "training/europarl-v7.de-en.en", "training/europarl-v7.de-en.de", - "training/europarl-v7.fr-en.en", "training/europarl-v7.fr-en.fr" - ] - ], - # French-English - [ - "http://www.statmt.org/wmt10/training-giga-fren.tar", - ["giga-fren.release2.fixed.en.gz", "giga-fren.release2.fixed.fr.gz"] - ], - [ - "http://www.statmt.org/wmt13/training-parallel-un.tgz", - ["un/undoc.2000.fr-en.en", "un/undoc.2000.fr-en.fr"] - ], -] - - def get_or_generate_vocab_inner(data_dir, vocab_filename, vocab_size, generator): """Inner implementation for vocab generators. @@ -341,9 +305,8 @@ def get_or_generate_vocab(data_dir, tmp_dir, vocab_filename, vocab_size, - sources=None): - """Generate a vocabulary from the datasets in sources (_DATA_FILE_URLS).""" - sources = sources or _DATA_FILE_URLS + sources): + """Generate a vocabulary from the datasets in sources.""" def generate(): tf.logging.info("Generating vocab from: %s", str(sources)) diff --git a/tensor2tensor/data_generators/ice_parsing.py b/tensor2tensor/data_generators/ice_parsing.py index 2aa261cd4..99586ef83 100644 --- a/tensor2tensor/data_generators/ice_parsing.py +++ b/tensor2tensor/data_generators/ice_parsing.py @@ -32,7 +32,7 @@ from tensor2tensor.data_generators import generator_utils from tensor2tensor.data_generators import problem from tensor2tensor.data_generators import text_encoder -from tensor2tensor.data_generators.wmt import tabbed_generator +from tensor2tensor.data_generators.translate import tabbed_generator from tensor2tensor.utils import registry diff --git a/tensor2tensor/data_generators/translate.py b/tensor2tensor/data_generators/translate.py new file mode 100644 index 000000000..1de25bc47 --- /dev/null +++ b/tensor2tensor/data_generators/translate.py @@ -0,0 +1,262 @@ +# coding=utf-8 +# Copyright 2017 The Tensor2Tensor Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Data generators for translation data-sets.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +import tarfile + +# Dependency imports + +from tensor2tensor.data_generators import generator_utils +from tensor2tensor.data_generators import problem +from tensor2tensor.data_generators import text_encoder +from tensor2tensor.data_generators import wsj_parsing +from tensor2tensor.utils import registry + +import tensorflow as tf + +FLAGS = tf.flags.FLAGS + + +class TranslateProblem(problem.Text2TextProblem): + """Base class for translation problems.""" + + @property + def is_character_level(self): + return False + + @property + def num_shards(self): + return 100 + + @property + def use_subword_tokenizer(self): + return True + + +# Generic generators used later for multiple problems. + + +def character_generator(source_path, target_path, character_vocab, eos=None): + """Generator for sequence-to-sequence tasks that just uses characters. + + This generator assumes the files at source_path and target_path have + the same number of lines and yields dictionaries of "inputs" and "targets" + where inputs are characters from the source lines converted to integers, + and targets are characters from the target lines, also converted to integers. + + Args: + source_path: path to the file with source sentences. + target_path: path to the file with target sentences. + character_vocab: a TextEncoder to encode the characters. + eos: integer to append at the end of each sequence (default: None). + + Yields: + A dictionary {"inputs": source-line, "targets": target-line} where + the lines are integer lists converted from characters in the file lines. + """ + eos_list = [] if eos is None else [eos] + with tf.gfile.GFile(source_path, mode="r") as source_file: + with tf.gfile.GFile(target_path, mode="r") as target_file: + source, target = source_file.readline(), target_file.readline() + while source and target: + source_ints = character_vocab.encode(source.strip()) + eos_list + target_ints = character_vocab.encode(target.strip()) + eos_list + yield {"inputs": source_ints, "targets": target_ints} + source, target = source_file.readline(), target_file.readline() + + +def tabbed_generator(source_path, source_vocab, target_vocab, eos=None): + r"""Generator for sequence-to-sequence tasks using tabbed files. + + Tokens are derived from text files where each line contains both + a source and a target string. The two strings are separated by a tab + character ('\t'). It yields dictionaries of "inputs" and "targets" where + inputs are characters from the source lines converted to integers, and + targets are characters from the target lines, also converted to integers. + + Args: + source_path: path to the file with source and target sentences. + source_vocab: a SubwordTextEncoder to encode the source string. + target_vocab: a SubwordTextEncoder to encode the target string. + eos: integer to append at the end of each sequence (default: None). + + Yields: + A dictionary {"inputs": source-line, "targets": target-line} where + the lines are integer lists converted from characters in the file lines. + """ + eos_list = [] if eos is None else [eos] + with tf.gfile.GFile(source_path, mode="r") as source_file: + for line in source_file: + if line and "\t" in line: + parts = line.split("\t", 1) + source, target = parts[0].strip(), parts[1].strip() + source_ints = source_vocab.encode(source) + eos_list + target_ints = target_vocab.encode(target) + eos_list + yield {"inputs": source_ints, "targets": target_ints} + + +def token_generator(source_path, target_path, token_vocab, eos=None): + """Generator for sequence-to-sequence tasks that uses tokens. + + This generator assumes the files at source_path and target_path have + the same number of lines and yields dictionaries of "inputs" and "targets" + where inputs are token ids from the " "-split source (and target, resp.) lines + converted to integers using the token_map. + + Args: + source_path: path to the file with source sentences. + target_path: path to the file with target sentences. + token_vocab: text_encoder.TextEncoder object. + eos: integer to append at the end of each sequence (default: None). + + Yields: + A dictionary {"inputs": source-line, "targets": target-line} where + the lines are integer lists converted from tokens in the file lines. + """ + eos_list = [] if eos is None else [eos] + with tf.gfile.GFile(source_path, mode="r") as source_file: + with tf.gfile.GFile(target_path, mode="r") as target_file: + source, target = source_file.readline(), target_file.readline() + while source and target: + source_ints = token_vocab.encode(source.strip()) + eos_list + target_ints = token_vocab.encode(target.strip()) + eos_list + yield {"inputs": source_ints, "targets": target_ints} + source, target = source_file.readline(), target_file.readline() + + +def bi_vocabs_token_generator(source_path, + target_path, + source_token_vocab, + target_token_vocab, + eos=None): + """Generator for sequence-to-sequence tasks that uses tokens. + + This generator assumes the files at source_path and target_path have + the same number of lines and yields dictionaries of "inputs" and "targets" + where inputs are token ids from the " "-split source (and target, resp.) lines + converted to integers using the token_map. + + Args: + source_path: path to the file with source sentences. + target_path: path to the file with target sentences. + source_token_vocab: text_encoder.TextEncoder object. + target_token_vocab: text_encoder.TextEncoder object. + eos: integer to append at the end of each sequence (default: None). + + Yields: + A dictionary {"inputs": source-line, "targets": target-line} where + the lines are integer lists converted from tokens in the file lines. + """ + eos_list = [] if eos is None else [eos] + with tf.gfile.GFile(source_path, mode="r") as source_file: + with tf.gfile.GFile(target_path, mode="r") as target_file: + source, target = source_file.readline(), target_file.readline() + while source and target: + source_ints = source_token_vocab.encode(source.strip()) + eos_list + target_ints = target_token_vocab.encode(target.strip()) + eos_list + yield {"inputs": source_ints, "targets": target_ints} + source, target = source_file.readline(), target_file.readline() + +def _preprocess_sgm(line, is_sgm): + """Preprocessing to strip tags in SGM files.""" + if not is_sgm: + return line + # In SGM files, remove <srcset ...>, <p>, <doc ...> lines. + if line.startswith("<srcset") or line.startswith("</srcset"): + return "" + if line.startswith("<doc") or line.startswith("</doc"): + return "" + if line.startswith("<p>") or line.startswith("</p>"): + return "" + # Strip <seg> tags. + line = line.strip() + if line.startswith("<seg") and line.endswith("</seg>"): + i = line.index(">") + return line[i + 1:-6] # Strip first <seg ...> and last </seg>. + +def _compile_data(tmp_dir, datasets, filename): + """Concatenate all `datasets` and save to `filename`.""" + filename = os.path.join(tmp_dir, filename) + with tf.gfile.GFile(filename + ".lang1", mode="w") as lang1_resfile: + with tf.gfile.GFile(filename + ".lang2", mode="w") as lang2_resfile: + for dataset in datasets: + url = dataset[0] + compressed_filename = os.path.basename(url) + compressed_filepath = os.path.join(tmp_dir, compressed_filename) + + generator_utils.maybe_download(tmp_dir, compressed_filename, url) + + if dataset[1][0] == "tsv": + _, src_column, trg_column, glob_pattern = dataset[1] + filenames = tf.gfile.Glob(os.path.join(tmp_dir, glob_pattern)) + if not filenames: + # Capture *.tgz and *.tar.gz too. + mode = "r:gz" if compressed_filepath.endswith("gz") else "r" + with tarfile.open(compressed_filepath, mode) as corpus_tar: + corpus_tar.extractall(tmp_dir) + filenames = tf.gfile.Glob(os.path.join(tmp_dir, glob_pattern)) + for tsv_filename in filenames: + if tsv_filename.endswith(".gz"): + new_filename = tsv_filename.strip(".gz") + generator_utils.gunzip_file(tsv_filename, new_filename) + tsv_filename = new_filename + with tf.gfile.GFile(tsv_filename, mode="r") as tsv_file: + for line in tsv_file: + if line and "\t" in line: + parts = line.split("\t") + source, target = parts[src_column], parts[trg_column] + lang1_resfile.write(source.strip() + "\n") + lang2_resfile.write(target.strip() + "\n") + else: + lang1_filename, lang2_filename = dataset[1] + lang1_filepath = os.path.join(tmp_dir, lang1_filename) + lang2_filepath = os.path.join(tmp_dir, lang2_filename) + is_sgm = (lang1_filename.endswith("sgm") and + lang2_filename.endswith("sgm")) + + if not (os.path.exists(lang1_filepath) and + os.path.exists(lang2_filepath)): + # For .tar.gz and .tgz files, we read compressed. + mode = "r:gz" if compressed_filepath.endswith("gz") else "r" + with tarfile.open(compressed_filepath, mode) as corpus_tar: + corpus_tar.extractall(tmp_dir) + if lang1_filepath.endswith(".gz"): + new_filepath = lang1_filepath.strip(".gz") + generator_utils.gunzip_file(lang1_filepath, new_filepath) + lang1_filepath = new_filepath + if lang2_filepath.endswith(".gz"): + new_filepath = lang2_filepath.strip(".gz") + generator_utils.gunzip_file(lang2_filepath, new_filepath) + lang2_filepath = new_filepath + with tf.gfile.GFile(lang1_filepath, mode="r") as lang1_file: + with tf.gfile.GFile(lang2_filepath, mode="r") as lang2_file: + line1, line2 = lang1_file.readline(), lang2_file.readline() + while line1 or line2: + line1res = _preprocess_sgm(line1, is_sgm) + line2res = _preprocess_sgm(line2, is_sgm) + if line1res or line2res: + lang1_resfile.write(line1res.strip() + "\n") + lang2_resfile.write(line2res.strip() + "\n") + line1, line2 = lang1_file.readline(), lang2_file.readline() + + return filename + + diff --git a/tensor2tensor/data_generators/translate_encs.py b/tensor2tensor/data_generators/translate_encs.py new file mode 100644 index 000000000..118fdca23 --- /dev/null +++ b/tensor2tensor/data_generators/translate_encs.py @@ -0,0 +1,133 @@ +# coding=utf-8 +# Copyright 2017 The Tensor2Tensor Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Data generators for translation data-sets.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +import tarfile + +# Dependency imports + +from tensor2tensor.data_generators import generator_utils +from tensor2tensor.data_generators import translate +from tensor2tensor.data_generators import problem +from tensor2tensor.data_generators import text_encoder +from tensor2tensor.data_generators import wsj_parsing +from tensor2tensor.utils import registry + +import tensorflow as tf + +FLAGS = tf.flags.FLAGS + +# End-of-sentence marker. +EOS = text_encoder.EOS_ID + +_ENCS_TRAIN_DATASETS = [ + [ + ("https://lindat.mff.cuni.cz/repository/xmlui/bitstream/handle/" + "11234/1-1458/data-plaintext-format.tar"), + ("tsv", 3, 2, "data.plaintext-format/*train.gz") + ], + [ + "http://data.statmt.org/wmt17/translation-task/training-parallel-nc-v12.tgz", # pylint: disable=line-too-long + ("training/news-commentary-v12.cs-en.en", + "training/news-commentary-v12.cs-en.cs") + ], + [ + "http://www.statmt.org/wmt13/training-parallel-commoncrawl.tgz", + ("commoncrawl.cs-en.en", "commoncrawl.cs-en.cs") + ], + [ + "http://www.statmt.org/wmt13/training-parallel-europarl-v7.tgz", + ("training/europarl-v7.cs-en.en", "training/europarl-v7.cs-en.cs") + ], +] +_ENCS_TEST_DATASETS = [ + [ + "http://data.statmt.org/wmt17/translation-task/dev.tgz", + ("dev/newstest2013.en", "dev/newstest2013.cs") + ], +] + + +@registry.register_problem +class TranslateEncsWmt32k(TranslateProblem): + """Problem spec for WMT English-Czech translation.""" + + @property + def targeted_vocab_size(self): + return 2**15 # 32768 + + @property + def vocab_name(self): + return "vocab.encs" + + def generator(self, data_dir, tmp_dir, train): + datasets = _ENCS_TRAIN_DATASETS if train else _ENCS_TEST_DATASETS + tag = "train" if train else "dev" + vocab_datasets = [] + data_path = translate._compile_data(tmp_dir, datasets, "wmt_encs_tok_%s" % tag) + # CzEng contains 100 gz files with tab-separated columns, so let's expect + # it is the first dataset in datasets and use the newly created *.lang{1,2} + # files for vocab construction. + if datasets[0][0].endswith("data-plaintext-format.tar"): + vocab_datasets.append([datasets[0][0], ["wmt_encs_tok_%s.lang1" % tag, + "wmt_encs_tok_%s.lang2" % tag]]) + datasets = datasets[1:] + vocab_datasets += [[item[0], [item[1][0], item[1][1]]] for item in datasets] + symbolizer_vocab = generator_utils.get_or_generate_vocab( + data_dir, tmp_dir, self.vocab_file, self.targeted_vocab_size, + vocab_datasets) + return translate.token_generator(data_path + ".lang1", data_path + ".lang2", + symbolizer_vocab, EOS) + + @property + def input_space_id(self): + return problem.SpaceID.EN_TOK + + @property + def target_space_id(self): + return problem.SpaceID.CS_TOK + + +@registry.register_problem +class TranslateEncsWmtCharacters(TranslateProblem): + """Problem spec for WMT En-Cs character-based translation.""" + + @property + def is_character_level(self): + return True + + def generator(self, data_dir, tmp_dir, train): + character_vocab = text_encoder.ByteTextEncoder() + datasets = _ENCS_TRAIN_DATASETS if train else _ENCS_TEST_DATASETS + tag = "train" if train else "dev" + data_path = translate._compile_data(tmp_dir, datasets, "wmt_encs_chr_%s" % tag) + return translate.character_generator(data_path + ".lang1", data_path + ".lang2", + character_vocab, EOS) + + @property + def input_space_id(self): + return problem.SpaceID.EN_CHR + + @property + def target_space_id(self): + return problem.SpaceID.CS_CHR + + diff --git a/tensor2tensor/data_generators/translate_ende.py b/tensor2tensor/data_generators/translate_ende.py new file mode 100644 index 000000000..17b30d8c5 --- /dev/null +++ b/tensor2tensor/data_generators/translate_ende.py @@ -0,0 +1,184 @@ +# coding=utf-8 +# Copyright 2017 The Tensor2Tensor Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Data generators for translation data-sets.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +import tarfile + +# Dependency imports + +from tensor2tensor.data_generators import generator_utils +from tensor2tensor.data_generators import translate +from tensor2tensor.data_generators import problem +from tensor2tensor.data_generators import text_encoder +from tensor2tensor.data_generators import wsj_parsing +from tensor2tensor.utils import registry + +import tensorflow as tf + +FLAGS = tf.flags.FLAGS + +# End-of-sentence marker. +EOS = text_encoder.EOS_ID + +_ENDE_TRAIN_DATASETS = [ + [ + "http://data.statmt.org/wmt17/translation-task/training-parallel-nc-v12.tgz", # pylint: disable=line-too-long + ("training/news-commentary-v12.de-en.en", + "training/news-commentary-v12.de-en.de") + ], + [ + "http://www.statmt.org/wmt13/training-parallel-commoncrawl.tgz", + ("commoncrawl.de-en.en", "commoncrawl.de-en.de") + ], + [ + "http://www.statmt.org/wmt13/training-parallel-europarl-v7.tgz", + ("training/europarl-v7.de-en.en", "training/europarl-v7.de-en.de") + ], +] +_ENDE_TEST_DATASETS = [ + [ + "http://data.statmt.org/wmt17/translation-task/dev.tgz", + ("dev/newstest2013.en", "dev/newstest2013.de") + ], +] + + +def _get_wmt_ende_bpe_dataset(directory, filename): + """Extract the WMT en-de corpus `filename` to directory unless it's there.""" + train_path = os.path.join(directory, filename) + if not (tf.gfile.Exists(train_path + ".de") and + tf.gfile.Exists(train_path + ".en")): + url = ("https://drive.google.com/uc?export=download&id=" + "0B_bZck-ksdkpM25jRUN2X2UxMm8") + corpus_file = generator_utils.maybe_download_from_drive( + directory, "wmt16_en_de.tar.gz", url) + with tarfile.open(corpus_file, "r:gz") as corpus_tar: + corpus_tar.extractall(directory) + return train_path + + +@registry.register_problem +class TranslateEndeWmtBpe32k(TranslateProblem): + """Problem spec for WMT En-De translation, BPE version.""" + + @property + def targeted_vocab_size(self): + return 32000 + + @property + def vocab_name(self): + return "vocab.bpe" + + def feature_encoders(self, data_dir): + vocab_filename = os.path.join(data_dir, self.vocab_file) + encoder = text_encoder.TokenTextEncoder(vocab_filename, replace_oov="UNK") + return {"inputs": encoder, "targets": encoder} + + def generator(self, data_dir, tmp_dir, train): + """Instance of token generator for the WMT en->de task, training set.""" + dataset_path = ("train.tok.clean.bpe.32000" + if train else "newstest2013.tok.bpe.32000") + train_path = _get_wmt_ende_bpe_dataset(tmp_dir, dataset_path) + token_tmp_path = os.path.join(tmp_dir, self.vocab_file) + token_path = os.path.join(data_dir, self.vocab_file) + tf.gfile.Copy(token_tmp_path, token_path, overwrite=True) + with tf.gfile.GFile(token_path, mode="a") as f: + f.write("UNK\n") # Add UNK to the vocab. + token_vocab = text_encoder.TokenTextEncoder(token_path, replace_oov="UNK") + return translate.token_generator(train_path + ".en", train_path + ".de", token_vocab, + EOS) + + @property + def input_space_id(self): + return problem.SpaceID.EN_BPE_TOK + + @property + def target_space_id(self): + return problem.SpaceID.DE_BPE_TOK + + + +@registry.register_problem +class TranslateEndeWmt8k(TranslateProblem): + """Problem spec for WMT En-De translation.""" + + @property + def targeted_vocab_size(self): + return 2**13 # 8192 + + @property + def vocab_name(self): + return "vocab.ende" + + def generator(self, data_dir, tmp_dir, train): + symbolizer_vocab = generator_utils.get_or_generate_vocab( + data_dir, tmp_dir, self.vocab_file, self.targeted_vocab_size, _ENDE_TRAIN_DATASETS) + datasets = _ENDE_TRAIN_DATASETS if train else _ENDE_TEST_DATASETS + tag = "train" if train else "dev" + data_path = translate._compile_data(tmp_dir, datasets, "wmt_ende_tok_%s" % tag) + return translate.token_generator(data_path + ".lang1", data_path + ".lang2", + symbolizer_vocab, EOS) + + @property + def input_space_id(self): + return problem.SpaceID.EN_TOK + + @property + def target_space_id(self): + return problem.SpaceID.DE_TOK + + +@registry.register_problem +class TranslateEndeWmt32k(TranslateEndeWmt8k): + + @property + def targeted_vocab_size(self): + return 2**15 # 32768 + + +@registry.register_problem +class TranslateEndeWmtCharacters(TranslateProblem): + """Problem spec for WMT En-De translation.""" + + @property + def is_character_level(self): + return True + + @property + def vocab_name(self): + return "vocab.ende" + + def generator(self, _, tmp_dir, train): + character_vocab = text_encoder.ByteTextEncoder() + datasets = _ENDE_TRAIN_DATASETS if train else _ENDE_TEST_DATASETS + tag = "train" if train else "dev" + data_path = translate._compile_data(tmp_dir, datasets, "wmt_ende_chr_%s" % tag) + return translate.character_generator(data_path + ".lang1", data_path + ".lang2", + character_vocab, EOS) + + @property + def input_space_id(self): + return problem.SpaceID.EN_CHR + + @property + def target_space_id(self): + return problem.SpaceID.DE_CHR + diff --git a/tensor2tensor/data_generators/translate_enfr.py b/tensor2tensor/data_generators/translate_enfr.py new file mode 100644 index 000000000..2ce983dd1 --- /dev/null +++ b/tensor2tensor/data_generators/translate_enfr.py @@ -0,0 +1,146 @@ +# coding=utf-8 +# Copyright 2017 The Tensor2Tensor Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Data generators for translation data-sets.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +import tarfile + +# Dependency imports + +from tensor2tensor.data_generators import generator_utils +from tensor2tensor.data_generators import translate +from tensor2tensor.data_generators import problem +from tensor2tensor.data_generators import text_encoder +from tensor2tensor.data_generators import wsj_parsing +from tensor2tensor.utils import registry + +import tensorflow as tf + +FLAGS = tf.flags.FLAGS + +# End-of-sentence marker. +EOS = text_encoder.EOS_ID + +_ENFR_TRAIN_DATASETS = [ + [ + "https://s3.amazonaws.com/opennmt-trainingdata/baseline-1M-enfr.tgz", + ("baseline-1M-enfr/baseline-1M_train.en", "baseline-1M-enfr/baseline-1M_train.en") + ], +# [ +# "http://www.statmt.org/wmt13/training-parallel-commoncrawl.tgz", +# ("commoncrawl.fr-en.en", "commoncrawl.fr-en.fr") +# ], +# [ +# "http://www.statmt.org/wmt13/training-parallel-europarl-v7.tgz", +# ("training/europarl-v7.fr-en.en", "training/europarl-v7.fr-en.fr") +# ], +# [ +# "http://www.statmt.org/wmt14/training-parallel-nc-v9.tgz", +# ("training/news-commentary-v9.fr-en.en", +# "training/news-commentary-v9.fr-en.fr") +# ], +# [ +# "http://www.statmt.org/wmt10/training-giga-fren.tar", +# ("giga-fren.release2.fixed.en.gz", "giga-fren.release2.fixed.fr.gz") +# ], +# [ +# "http://www.statmt.org/wmt13/training-parallel-un.tgz", +# ("un/undoc.2000.fr-en.en", "un/undoc.2000.fr-en.fr") +# ], +] +_ENFR_TEST_DATASETS = [ + [ + "https://s3.amazonaws.com/opennmt-trainingdata/baseline-1M-enfr.tgz", + ("baseline-1M-enfr/baseline-1M_valid.en", "baseline-1M-enfr/baseline-1M_valid.fr") + ], +# [ +# "http://data.statmt.org/wmt17/translation-task/dev.tgz", +# ("dev/newstest2013.en", "dev/newstest2013.fr") +# ], +] + +@registry.register_problem +class TranslateEnfrWmt8k(translate.TranslateProblem): + """Problem spec for WMT En-Fr translation.""" + + @property + def targeted_vocab_size(self): + return 2**13 # 8192 + + @property + def vocab_name(self): + return "vocab.enfr" + + def generator(self, data_dir, tmp_dir, train): + symbolizer_vocab = generator_utils.get_or_generate_vocab( + data_dir, tmp_dir, self.vocab_file, self.targeted_vocab_size, _ENFR_TRAIN_DATASETS) + datasets = _ENFR_TRAIN_DATASETS if train else _ENFR_TEST_DATASETS + tag = "train" if train else "dev" + data_path = translate._compile_data(tmp_dir, datasets, "wmt_enfr_tok_%s" % tag) + return translate.token_generator(data_path + ".lang1", data_path + ".lang2", + symbolizer_vocab, EOS) + + @property + def input_space_id(self): + return problem.SpaceID.EN_TOK + + @property + def target_space_id(self): + return problem.SpaceID.FR_TOK + + +@registry.register_problem +class TranslateEnfrWmt32k(TranslateEnfrWmt8k): + + @property + def targeted_vocab_size(self): + return 2**15 # 32768 + + +@registry.register_problem +class TranslateEnfrWmtCharacters(translate.TranslateProblem): + """Problem spec for WMT En-Fr translation.""" + + @property + def is_character_level(self): + return True + + @property + def vocab_name(self): + return "vocab.enfr" + + def generator(self, data_dir, tmp_dir, train): + character_vocab = text_encoder.ByteTextEncoder() + datasets = _ENFR_TRAIN_DATASETS if train else _ENFR_TEST_DATASETS + tag = "train" if train else "dev" + data_path = translate._compile_data(tmp_dir, datasets, "wmt_enfr_chr_%s" % tag) + return translate.character_generator(data_path + ".lang1", data_path + ".lang2", + character_vocab, EOS) + + @property + def input_space_id(self): + return problem.SpaceID.EN_CHR + + @property + def target_space_id(self): + return problem.SpaceID.FR_CHR + + + diff --git a/tensor2tensor/data_generators/translate_enmk.py b/tensor2tensor/data_generators/translate_enmk.py new file mode 100644 index 000000000..8cf13a2bb --- /dev/null +++ b/tensor2tensor/data_generators/translate_enmk.py @@ -0,0 +1,91 @@ +# coding=utf-8 +# Copyright 2017 The Tensor2Tensor Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Data generators for translation data-sets.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +import tarfile + +# Dependency imports + +from tensor2tensor.data_generators import generator_utils +from tensor2tensor.data_generators import translate +from tensor2tensor.data_generators import problem +from tensor2tensor.data_generators import text_encoder +from tensor2tensor.data_generators import wsj_parsing +from tensor2tensor.utils import registry + +import tensorflow as tf + +FLAGS = tf.flags.FLAGS + +# End-of-sentence marker. +EOS = text_encoder.EOS_ID + +# For Macedonian-English the SETimes corpus +# from http://nlp.ffzg.hr/resources/corpora/setimes/ is used. +# The original dataset has 207,777 parallel sentences. +# For training the first 205,777 sentences are used. +_MKEN_TRAIN_DATASETS = [[ + "https://github.com/stefan-it/nmt-mk-en/raw/master/data/setimes.mk-en.train.tgz", # pylint: disable=line-too-long + ("train.mk", "train.en") +]] + +# For development 1000 parallel sentences are used. +_MKEN_TEST_DATASETS = [[ + "https://github.com/stefan-it/nmt-mk-en/raw/master/data/setimes.mk-en.dev.tgz", # pylint: disable=line-too-long + ("dev.mk", "dev.en") +]] + +@registry.register_problem +class TranslateEnmkSetimes32k(TranslateProblem): + """Problem spec for SETimes Mk-En translation.""" + + @property + def targeted_vocab_size(self): + return 2**15 # 32768 + + @property + def vocab_name(self): + return "vocab.mken" + + def generator(self, data_dir, tmp_dir, train): + datasets = _MKEN_TRAIN_DATASETS if train else _MKEN_TEST_DATASETS + source_datasets = [[item[0], [item[1][0]]] for item in datasets] + target_datasets = [[item[0], [item[1][1]]] for item in datasets] + symbolizer_vocab = generator_utils.get_or_generate_vocab( + data_dir, tmp_dir, self.vocab_file, self.targeted_vocab_size, + source_datasets + target_datasets) + tag = "train" if train else "dev" + data_path = translate._compile_data(tmp_dir, datasets, "setimes_mken_tok_%s" % tag) + # We generate English->X data by convention, to train reverse translation + # just add the "_rev" suffix to the problem name, e.g., like this. + # --problems=translate_enmk_setimes32k_rev + return translate.token_generator(data_path + ".lang2", data_path + ".lang1", + symbolizer_vocab, EOS) + + @property + def input_space_id(self): + return problem.SpaceID.MK_TOK + + @property + def target_space_id(self): + return problem.SpaceID.EN_TOK + + diff --git a/tensor2tensor/data_generators/translate_enzh.py b/tensor2tensor/data_generators/translate_enzh.py new file mode 100644 index 000000000..f4e68bd95 --- /dev/null +++ b/tensor2tensor/data_generators/translate_enzh.py @@ -0,0 +1,107 @@ +# coding=utf-8 +# Copyright 2017 The Tensor2Tensor Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Data generators for translation data-sets.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +import tarfile + +# Dependency imports + +from tensor2tensor.data_generators import generator_utils +from tensor2tensor.data_generators import translate +from tensor2tensor.data_generators import problem +from tensor2tensor.data_generators import text_encoder +from tensor2tensor.data_generators import wsj_parsing +from tensor2tensor.utils import registry + +import tensorflow as tf + +FLAGS = tf.flags.FLAGS + +# End-of-sentence marker. +EOS = text_encoder.EOS_ID + +_ZHEN_TRAIN_DATASETS = [[("http://data.statmt.org/wmt17/translation-task/" + "training-parallel-nc-v12.tgz"), + ("training/news-commentary-v12.zh-en.zh", + "training/news-commentary-v12.zh-en.en")]] + +_ZHEN_TEST_DATASETS = [[ + "http://data.statmt.org/wmt17/translation-task/dev.tgz", + ("dev/newsdev2017-zhen-src.zh.sgm", "dev/newsdev2017-zhen-ref.en.sgm") +]] + +@registry.register_problem +class TranslateEnzhWmt8k(TranslateProblem): + """Problem spec for WMT Zh-En translation.""" + + @property + def targeted_vocab_size(self): + return 2**13 # 8192 + + @property + def num_shards(self): + return 10 # This is a small dataset. + + @property + def source_vocab_name(self): + return "vocab.zhen-zh.%d" % self.targeted_vocab_size + + @property + def target_vocab_name(self): + return "vocab.zhen-en.%d" % self.targeted_vocab_size + + def generator(self, data_dir, tmp_dir, train): + datasets = _ZHEN_TRAIN_DATASETS if train else _ZHEN_TEST_DATASETS + source_datasets = [[item[0], [item[1][0]]] for item in _ZHEN_TRAIN_DATASETS] + target_datasets = [[item[0], [item[1][1]]] for item in _ZHEN_TRAIN_DATASETS] + source_vocab = generator_utils.get_or_generate_vocab( + data_dir, tmp_dir, self.source_vocab_name, self.targeted_vocab_size, + source_datasets) + target_vocab = generator_utils.get_or_generate_vocab( + data_dir, tmp_dir, self.target_vocab_name, self.targeted_vocab_size, + target_datasets) + tag = "train" if train else "dev" + data_path = translate._compile_data(tmp_dir, datasets, "wmt_zhen_tok_%s" % tag) + # We generate English->X data by convention, to train reverse translation + # just add the "_rev" suffix to the problem name, e.g., like this. + # --problems=translate_enzh_wmt8k_rev + return translate.bi_vocabs_token_generator(data_path + ".lang2", data_path + ".lang1", + source_vocab, target_vocab, EOS) + + @property + def input_space_id(self): + return problem.SpaceID.ZH_TOK + + @property + def target_space_id(self): + return problem.SpaceID.EN_TOK + + def feature_encoders(self, data_dir): + source_vocab_filename = os.path.join(data_dir, self.source_vocab_name) + target_vocab_filename = os.path.join(data_dir, self.target_vocab_name) + source_token = text_encoder.SubwordTextEncoder(source_vocab_filename) + target_token = text_encoder.SubwordTextEncoder(target_vocab_filename) + return { + "inputs": source_token, + "targets": target_token, + } + + diff --git a/tensor2tensor/data_generators/wmt_test.py b/tensor2tensor/data_generators/translate_test.py similarity index 96% rename from tensor2tensor/data_generators/wmt_test.py rename to tensor2tensor/data_generators/translate_test.py index 441ceef59..f082c1a85 100644 --- a/tensor2tensor/data_generators/wmt_test.py +++ b/tensor2tensor/data_generators/translate_test.py @@ -27,7 +27,7 @@ import six from tensor2tensor.data_generators import text_encoder -from tensor2tensor.data_generators import wmt +from tensor2tensor.data_generators import translate import tensorflow as tf @@ -52,7 +52,7 @@ def testCharacterGenerator(self): # Call character generator on the generated files. results_src, results_tgt = [], [] character_vocab = text_encoder.ByteTextEncoder() - for dictionary in wmt.character_generator( + for dictionary in translate.character_generator( tmp_file_path + ".src", tmp_file_path + ".tgt", character_vocab): self.assertEqual(sorted(list(dictionary)), ["inputs", "targets"]) results_src.append(dictionary["inputs"]) diff --git a/tensor2tensor/data_generators/wmt.py b/tensor2tensor/data_generators/wmt.py deleted file mode 100644 index 61716d012..000000000 --- a/tensor2tensor/data_generators/wmt.py +++ /dev/null @@ -1,718 +0,0 @@ -# coding=utf-8 -# Copyright 2017 The Tensor2Tensor Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Data generators for translation data-sets.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -import tarfile - -# Dependency imports - -from tensor2tensor.data_generators import generator_utils -from tensor2tensor.data_generators import problem -from tensor2tensor.data_generators import text_encoder -from tensor2tensor.data_generators import wsj_parsing -from tensor2tensor.utils import registry - -import tensorflow as tf - -FLAGS = tf.flags.FLAGS - -# End-of-sentence marker. -EOS = text_encoder.EOS_ID - - -class TranslateProblem(problem.Text2TextProblem): - """Base class for translation problems.""" - - @property - def is_character_level(self): - return False - - @property - def num_shards(self): - return 100 - - @property - def vocab_name(self): - return "vocab.endefr" - - @property - def use_subword_tokenizer(self): - return True - - -# Generic generators used later for multiple problems. - - -def character_generator(source_path, target_path, character_vocab, eos=None): - """Generator for sequence-to-sequence tasks that just uses characters. - - This generator assumes the files at source_path and target_path have - the same number of lines and yields dictionaries of "inputs" and "targets" - where inputs are characters from the source lines converted to integers, - and targets are characters from the target lines, also converted to integers. - - Args: - source_path: path to the file with source sentences. - target_path: path to the file with target sentences. - character_vocab: a TextEncoder to encode the characters. - eos: integer to append at the end of each sequence (default: None). - - Yields: - A dictionary {"inputs": source-line, "targets": target-line} where - the lines are integer lists converted from characters in the file lines. - """ - eos_list = [] if eos is None else [eos] - with tf.gfile.GFile(source_path, mode="r") as source_file: - with tf.gfile.GFile(target_path, mode="r") as target_file: - source, target = source_file.readline(), target_file.readline() - while source and target: - source_ints = character_vocab.encode(source.strip()) + eos_list - target_ints = character_vocab.encode(target.strip()) + eos_list - yield {"inputs": source_ints, "targets": target_ints} - source, target = source_file.readline(), target_file.readline() - - -def tabbed_generator(source_path, source_vocab, target_vocab, eos=None): - r"""Generator for sequence-to-sequence tasks using tabbed files. - - Tokens are derived from text files where each line contains both - a source and a target string. The two strings are separated by a tab - character ('\t'). It yields dictionaries of "inputs" and "targets" where - inputs are characters from the source lines converted to integers, and - targets are characters from the target lines, also converted to integers. - - Args: - source_path: path to the file with source and target sentences. - source_vocab: a SubwordTextEncoder to encode the source string. - target_vocab: a SubwordTextEncoder to encode the target string. - eos: integer to append at the end of each sequence (default: None). - - Yields: - A dictionary {"inputs": source-line, "targets": target-line} where - the lines are integer lists converted from characters in the file lines. - """ - eos_list = [] if eos is None else [eos] - with tf.gfile.GFile(source_path, mode="r") as source_file: - for line in source_file: - if line and "\t" in line: - parts = line.split("\t", 1) - source, target = parts[0].strip(), parts[1].strip() - source_ints = source_vocab.encode(source) + eos_list - target_ints = target_vocab.encode(target) + eos_list - yield {"inputs": source_ints, "targets": target_ints} - - -def token_generator(source_path, target_path, token_vocab, eos=None): - """Generator for sequence-to-sequence tasks that uses tokens. - - This generator assumes the files at source_path and target_path have - the same number of lines and yields dictionaries of "inputs" and "targets" - where inputs are token ids from the " "-split source (and target, resp.) lines - converted to integers using the token_map. - - Args: - source_path: path to the file with source sentences. - target_path: path to the file with target sentences. - token_vocab: text_encoder.TextEncoder object. - eos: integer to append at the end of each sequence (default: None). - - Yields: - A dictionary {"inputs": source-line, "targets": target-line} where - the lines are integer lists converted from tokens in the file lines. - """ - eos_list = [] if eos is None else [eos] - with tf.gfile.GFile(source_path, mode="r") as source_file: - with tf.gfile.GFile(target_path, mode="r") as target_file: - source, target = source_file.readline(), target_file.readline() - while source and target: - source_ints = token_vocab.encode(source.strip()) + eos_list - target_ints = token_vocab.encode(target.strip()) + eos_list - yield {"inputs": source_ints, "targets": target_ints} - source, target = source_file.readline(), target_file.readline() - - -def bi_vocabs_token_generator(source_path, - target_path, - source_token_vocab, - target_token_vocab, - eos=None): - """Generator for sequence-to-sequence tasks that uses tokens. - - This generator assumes the files at source_path and target_path have - the same number of lines and yields dictionaries of "inputs" and "targets" - where inputs are token ids from the " "-split source (and target, resp.) lines - converted to integers using the token_map. - - Args: - source_path: path to the file with source sentences. - target_path: path to the file with target sentences. - source_token_vocab: text_encoder.TextEncoder object. - target_token_vocab: text_encoder.TextEncoder object. - eos: integer to append at the end of each sequence (default: None). - - Yields: - A dictionary {"inputs": source-line, "targets": target-line} where - the lines are integer lists converted from tokens in the file lines. - """ - eos_list = [] if eos is None else [eos] - with tf.gfile.GFile(source_path, mode="r") as source_file: - with tf.gfile.GFile(target_path, mode="r") as target_file: - source, target = source_file.readline(), target_file.readline() - while source and target: - source_ints = source_token_vocab.encode(source.strip()) + eos_list - target_ints = target_token_vocab.encode(target.strip()) + eos_list - yield {"inputs": source_ints, "targets": target_ints} - source, target = source_file.readline(), target_file.readline() - - -# Data-set URLs. - -_ENDE_TRAIN_DATASETS = [ - [ - "http://data.statmt.org/wmt17/translation-task/training-parallel-nc-v12.tgz", # pylint: disable=line-too-long - ("training/news-commentary-v12.de-en.en", - "training/news-commentary-v12.de-en.de") - ], - [ - "http://www.statmt.org/wmt13/training-parallel-commoncrawl.tgz", - ("commoncrawl.de-en.en", "commoncrawl.de-en.de") - ], - [ - "http://www.statmt.org/wmt13/training-parallel-europarl-v7.tgz", - ("training/europarl-v7.de-en.en", "training/europarl-v7.de-en.de") - ], -] -_ENDE_TEST_DATASETS = [ - [ - "http://data.statmt.org/wmt17/translation-task/dev.tgz", - ("dev/newstest2013.en", "dev/newstest2013.de") - ], -] - -_ENFR_TRAIN_DATASETS = [ - [ - "http://www.statmt.org/wmt13/training-parallel-commoncrawl.tgz", - ("commoncrawl.fr-en.en", "commoncrawl.fr-en.fr") - ], - [ - "http://www.statmt.org/wmt13/training-parallel-europarl-v7.tgz", - ("training/europarl-v7.fr-en.en", "training/europarl-v7.fr-en.fr") - ], - [ - "http://www.statmt.org/wmt14/training-parallel-nc-v9.tgz", - ("training/news-commentary-v9.fr-en.en", - "training/news-commentary-v9.fr-en.fr") - ], - [ - "http://www.statmt.org/wmt10/training-giga-fren.tar", - ("giga-fren.release2.fixed.en.gz", "giga-fren.release2.fixed.fr.gz") - ], - [ - "http://www.statmt.org/wmt13/training-parallel-un.tgz", - ("un/undoc.2000.fr-en.en", "un/undoc.2000.fr-en.fr") - ], -] -_ENFR_TEST_DATASETS = [ - [ - "http://data.statmt.org/wmt17/translation-task/dev.tgz", - ("dev/newstest2013.en", "dev/newstest2013.fr") - ], -] - -_ZHEN_TRAIN_DATASETS = [[("http://data.statmt.org/wmt17/translation-task/" - "training-parallel-nc-v12.tgz"), - ("training/news-commentary-v12.zh-en.zh", - "training/news-commentary-v12.zh-en.en")]] - -_ZHEN_TEST_DATASETS = [[ - "http://data.statmt.org/wmt17/translation-task/dev.tgz", - ("dev/newsdev2017-zhen-src.zh.sgm", "dev/newsdev2017-zhen-ref.en.sgm") -]] - -# For Macedonian-English the SETimes corpus -# from http://nlp.ffzg.hr/resources/corpora/setimes/ is used. -# The original dataset has 207,777 parallel sentences. -# For training the first 205,777 sentences are used. -_MKEN_TRAIN_DATASETS = [[ - "https://github.com/stefan-it/nmt-mk-en/raw/master/data/setimes.mk-en.train.tgz", # pylint: disable=line-too-long - ("train.mk", "train.en") -]] - -# For development 1000 parallel sentences are used. -_MKEN_TEST_DATASETS = [[ - "https://github.com/stefan-it/nmt-mk-en/raw/master/data/setimes.mk-en.dev.tgz", # pylint: disable=line-too-long - ("dev.mk", "dev.en") -]] - -# English-Czech datasets -_ENCS_TRAIN_DATASETS = [ - [ - ("https://lindat.mff.cuni.cz/repository/xmlui/bitstream/handle/" - "11234/1-1458/data-plaintext-format.tar"), - ("tsv", 3, 2, "data.plaintext-format/*train.gz") - ], - [ - "http://data.statmt.org/wmt17/translation-task/training-parallel-nc-v12.tgz", # pylint: disable=line-too-long - ("training/news-commentary-v12.cs-en.en", - "training/news-commentary-v12.cs-en.cs") - ], - [ - "http://www.statmt.org/wmt13/training-parallel-commoncrawl.tgz", - ("commoncrawl.cs-en.en", "commoncrawl.cs-en.cs") - ], - [ - "http://www.statmt.org/wmt13/training-parallel-europarl-v7.tgz", - ("training/europarl-v7.cs-en.en", "training/europarl-v7.cs-en.cs") - ], -] -_ENCS_TEST_DATASETS = [ - [ - "http://data.statmt.org/wmt17/translation-task/dev.tgz", - ("dev/newstest2013.en", "dev/newstest2013.cs") - ], -] - -# Generators. - - -def _get_wmt_ende_bpe_dataset(directory, filename): - """Extract the WMT en-de corpus `filename` to directory unless it's there.""" - train_path = os.path.join(directory, filename) - if not (tf.gfile.Exists(train_path + ".de") and - tf.gfile.Exists(train_path + ".en")): - url = ("https://drive.google.com/uc?export=download&id=" - "0B_bZck-ksdkpM25jRUN2X2UxMm8") - corpus_file = generator_utils.maybe_download_from_drive( - directory, "wmt16_en_de.tar.gz", url) - with tarfile.open(corpus_file, "r:gz") as corpus_tar: - corpus_tar.extractall(directory) - return train_path - - -@registry.register_problem -class TranslateEndeWmtBpe32k(TranslateProblem): - """Problem spec for WMT En-De translation, BPE version.""" - - @property - def targeted_vocab_size(self): - return 32000 - - @property - def vocab_name(self): - return "vocab.bpe" - - def feature_encoders(self, data_dir): - vocab_filename = os.path.join(data_dir, self.vocab_file) - encoder = text_encoder.TokenTextEncoder(vocab_filename, replace_oov="UNK") - return {"inputs": encoder, "targets": encoder} - - def generator(self, data_dir, tmp_dir, train): - """Instance of token generator for the WMT en->de task, training set.""" - dataset_path = ("train.tok.clean.bpe.32000" - if train else "newstest2013.tok.bpe.32000") - train_path = _get_wmt_ende_bpe_dataset(tmp_dir, dataset_path) - token_tmp_path = os.path.join(tmp_dir, self.vocab_file) - token_path = os.path.join(data_dir, self.vocab_file) - tf.gfile.Copy(token_tmp_path, token_path, overwrite=True) - with tf.gfile.GFile(token_path, mode="a") as f: - f.write("UNK\n") # Add UNK to the vocab. - token_vocab = text_encoder.TokenTextEncoder(token_path, replace_oov="UNK") - return token_generator(train_path + ".en", train_path + ".de", token_vocab, - EOS) - - @property - def input_space_id(self): - return problem.SpaceID.EN_BPE_TOK - - @property - def target_space_id(self): - return problem.SpaceID.DE_BPE_TOK - - -def _preprocess_sgm(line, is_sgm): - """Preprocessing to strip tags in SGM files.""" - if not is_sgm: - return line - # In SGM files, remove <srcset ...>, <p>, <doc ...> lines. - if line.startswith("<srcset") or line.startswith("</srcset"): - return "" - if line.startswith("<doc") or line.startswith("</doc"): - return "" - if line.startswith("<p>") or line.startswith("</p>"): - return "" - # Strip <seg> tags. - line = line.strip() - if line.startswith("<seg") and line.endswith("</seg>"): - i = line.index(">") - return line[i + 1:-6] # Strip first <seg ...> and last </seg>. - - -def _compile_data(tmp_dir, datasets, filename): - """Concatenate all `datasets` and save to `filename`.""" - filename = os.path.join(tmp_dir, filename) - with tf.gfile.GFile(filename + ".lang1", mode="w") as lang1_resfile: - with tf.gfile.GFile(filename + ".lang2", mode="w") as lang2_resfile: - for dataset in datasets: - url = dataset[0] - compressed_filename = os.path.basename(url) - compressed_filepath = os.path.join(tmp_dir, compressed_filename) - - generator_utils.maybe_download(tmp_dir, compressed_filename, url) - - if dataset[1][0] == "tsv": - _, src_column, trg_column, glob_pattern = dataset[1] - filenames = tf.gfile.Glob(os.path.join(tmp_dir, glob_pattern)) - if not filenames: - # Capture *.tgz and *.tar.gz too. - mode = "r:gz" if compressed_filepath.endswith("gz") else "r" - with tarfile.open(compressed_filepath, mode) as corpus_tar: - corpus_tar.extractall(tmp_dir) - filenames = tf.gfile.Glob(os.path.join(tmp_dir, glob_pattern)) - for tsv_filename in filenames: - if tsv_filename.endswith(".gz"): - new_filename = tsv_filename.strip(".gz") - generator_utils.gunzip_file(tsv_filename, new_filename) - tsv_filename = new_filename - with tf.gfile.GFile(tsv_filename, mode="r") as tsv_file: - for line in tsv_file: - if line and "\t" in line: - parts = line.split("\t") - source, target = parts[src_column], parts[trg_column] - lang1_resfile.write(source.strip() + "\n") - lang2_resfile.write(target.strip() + "\n") - else: - lang1_filename, lang2_filename = dataset[1] - lang1_filepath = os.path.join(tmp_dir, lang1_filename) - lang2_filepath = os.path.join(tmp_dir, lang2_filename) - is_sgm = (lang1_filename.endswith("sgm") and - lang2_filename.endswith("sgm")) - - if not (os.path.exists(lang1_filepath) and - os.path.exists(lang2_filepath)): - # For .tar.gz and .tgz files, we read compressed. - mode = "r:gz" if compressed_filepath.endswith("gz") else "r" - with tarfile.open(compressed_filepath, mode) as corpus_tar: - corpus_tar.extractall(tmp_dir) - if lang1_filepath.endswith(".gz"): - new_filepath = lang1_filepath.strip(".gz") - generator_utils.gunzip_file(lang1_filepath, new_filepath) - lang1_filepath = new_filepath - if lang2_filepath.endswith(".gz"): - new_filepath = lang2_filepath.strip(".gz") - generator_utils.gunzip_file(lang2_filepath, new_filepath) - lang2_filepath = new_filepath - with tf.gfile.GFile(lang1_filepath, mode="r") as lang1_file: - with tf.gfile.GFile(lang2_filepath, mode="r") as lang2_file: - line1, line2 = lang1_file.readline(), lang2_file.readline() - while line1 or line2: - line1res = _preprocess_sgm(line1, is_sgm) - line2res = _preprocess_sgm(line2, is_sgm) - if line1res or line2res: - lang1_resfile.write(line1res.strip() + "\n") - lang2_resfile.write(line2res.strip() + "\n") - line1, line2 = lang1_file.readline(), lang2_file.readline() - - return filename - - -@registry.register_problem -class TranslateEndeWmt8k(TranslateProblem): - """Problem spec for WMT En-De translation.""" - - @property - def targeted_vocab_size(self): - return 2**13 # 8192 - - def generator(self, data_dir, tmp_dir, train): - symbolizer_vocab = generator_utils.get_or_generate_vocab( - data_dir, tmp_dir, self.vocab_file, self.targeted_vocab_size) - datasets = _ENDE_TRAIN_DATASETS if train else _ENDE_TEST_DATASETS - tag = "train" if train else "dev" - data_path = _compile_data(tmp_dir, datasets, "wmt_ende_tok_%s" % tag) - return token_generator(data_path + ".lang1", data_path + ".lang2", - symbolizer_vocab, EOS) - - @property - def input_space_id(self): - return problem.SpaceID.EN_TOK - - @property - def target_space_id(self): - return problem.SpaceID.DE_TOK - - -@registry.register_problem -class TranslateEndeWmt32k(TranslateEndeWmt8k): - - @property - def targeted_vocab_size(self): - return 2**15 # 32768 - - -@registry.register_problem -class TranslateEndeWmtCharacters(TranslateProblem): - """Problem spec for WMT En-De translation.""" - - @property - def is_character_level(self): - return True - - def generator(self, _, tmp_dir, train): - character_vocab = text_encoder.ByteTextEncoder() - datasets = _ENDE_TRAIN_DATASETS if train else _ENDE_TEST_DATASETS - tag = "train" if train else "dev" - data_path = _compile_data(tmp_dir, datasets, "wmt_ende_chr_%s" % tag) - return character_generator(data_path + ".lang1", data_path + ".lang2", - character_vocab, EOS) - - @property - def input_space_id(self): - return problem.SpaceID.EN_CHR - - @property - def target_space_id(self): - return problem.SpaceID.DE_CHR - - -@registry.register_problem -class TranslateEnzhWmt8k(TranslateProblem): - """Problem spec for WMT Zh-En translation.""" - - @property - def targeted_vocab_size(self): - return 2**13 # 8192 - - @property - def num_shards(self): - return 10 # This is a small dataset. - - @property - def source_vocab_name(self): - return "vocab.zhen-zh.%d" % self.targeted_vocab_size - - @property - def target_vocab_name(self): - return "vocab.zhen-en.%d" % self.targeted_vocab_size - - def generator(self, data_dir, tmp_dir, train): - datasets = _ZHEN_TRAIN_DATASETS if train else _ZHEN_TEST_DATASETS - source_datasets = [[item[0], [item[1][0]]] for item in _ZHEN_TRAIN_DATASETS] - target_datasets = [[item[0], [item[1][1]]] for item in _ZHEN_TRAIN_DATASETS] - source_vocab = generator_utils.get_or_generate_vocab( - data_dir, tmp_dir, self.source_vocab_name, self.targeted_vocab_size, - source_datasets) - target_vocab = generator_utils.get_or_generate_vocab( - data_dir, tmp_dir, self.target_vocab_name, self.targeted_vocab_size, - target_datasets) - tag = "train" if train else "dev" - data_path = _compile_data(tmp_dir, datasets, "wmt_zhen_tok_%s" % tag) - # We generate English->X data by convention, to train reverse translation - # just add the "_rev" suffix to the problem name, e.g., like this. - # --problems=translate_enzh_wmt8k_rev - return bi_vocabs_token_generator(data_path + ".lang2", data_path + ".lang1", - source_vocab, target_vocab, EOS) - - @property - def input_space_id(self): - return problem.SpaceID.ZH_TOK - - @property - def target_space_id(self): - return problem.SpaceID.EN_TOK - - def feature_encoders(self, data_dir): - source_vocab_filename = os.path.join(data_dir, self.source_vocab_name) - target_vocab_filename = os.path.join(data_dir, self.target_vocab_name) - source_token = text_encoder.SubwordTextEncoder(source_vocab_filename) - target_token = text_encoder.SubwordTextEncoder(target_vocab_filename) - return { - "inputs": source_token, - "targets": target_token, - } - - -@registry.register_problem -class TranslateEnfrWmt8k(TranslateProblem): - """Problem spec for WMT En-Fr translation.""" - - @property - def targeted_vocab_size(self): - return 2**13 # 8192 - - def generator(self, data_dir, tmp_dir, train): - symbolizer_vocab = generator_utils.get_or_generate_vocab( - data_dir, tmp_dir, self.vocab_file, self.targeted_vocab_size) - datasets = _ENFR_TRAIN_DATASETS if train else _ENFR_TEST_DATASETS - tag = "train" if train else "dev" - data_path = _compile_data(tmp_dir, datasets, "wmt_enfr_tok_%s" % tag) - return token_generator(data_path + ".lang1", data_path + ".lang2", - symbolizer_vocab, EOS) - - @property - def input_space_id(self): - return problem.SpaceID.EN_TOK - - @property - def target_space_id(self): - return problem.SpaceID.FR_TOK - - -@registry.register_problem -class TranslateEnfrWmt32k(TranslateEnfrWmt8k): - - @property - def targeted_vocab_size(self): - return 2**15 # 32768 - - -@registry.register_problem -class TranslateEnfrWmtCharacters(TranslateProblem): - """Problem spec for WMT En-Fr translation.""" - - @property - def is_character_level(self): - return True - - def generator(self, data_dir, tmp_dir, train): - character_vocab = text_encoder.ByteTextEncoder() - datasets = _ENFR_TRAIN_DATASETS if train else _ENFR_TEST_DATASETS - tag = "train" if train else "dev" - data_path = _compile_data(tmp_dir, datasets, "wmt_enfr_chr_%s" % tag) - return character_generator(data_path + ".lang1", data_path + ".lang2", - character_vocab, EOS) - - @property - def input_space_id(self): - return problem.SpaceID.EN_CHR - - @property - def target_space_id(self): - return problem.SpaceID.FR_CHR - - -@registry.register_problem -class TranslateEnmkSetimes32k(TranslateProblem): - """Problem spec for SETimes Mk-En translation.""" - - @property - def targeted_vocab_size(self): - return 2**15 # 32768 - - @property - def vocab_name(self): - return "vocab.mken" - - def generator(self, data_dir, tmp_dir, train): - datasets = _MKEN_TRAIN_DATASETS if train else _MKEN_TEST_DATASETS - source_datasets = [[item[0], [item[1][0]]] for item in datasets] - target_datasets = [[item[0], [item[1][1]]] for item in datasets] - symbolizer_vocab = generator_utils.get_or_generate_vocab( - data_dir, tmp_dir, self.vocab_file, self.targeted_vocab_size, - source_datasets + target_datasets) - tag = "train" if train else "dev" - data_path = _compile_data(tmp_dir, datasets, "setimes_mken_tok_%s" % tag) - # We generate English->X data by convention, to train reverse translation - # just add the "_rev" suffix to the problem name, e.g., like this. - # --problems=translate_enmk_setimes32k_rev - return token_generator(data_path + ".lang2", data_path + ".lang1", - symbolizer_vocab, EOS) - - @property - def input_space_id(self): - return problem.SpaceID.MK_TOK - - @property - def target_space_id(self): - return problem.SpaceID.EN_TOK - - -@registry.register_problem -class TranslateEncsWmt32k(TranslateProblem): - """Problem spec for WMT English-Czech translation.""" - - @property - def targeted_vocab_size(self): - return 2**15 # 32768 - - @property - def vocab_name(self): - return "vocab.encs" - - def generator(self, data_dir, tmp_dir, train): - datasets = _ENCS_TRAIN_DATASETS if train else _ENCS_TEST_DATASETS - tag = "train" if train else "dev" - vocab_datasets = [] - data_path = _compile_data(tmp_dir, datasets, "wmt_encs_tok_%s" % tag) - # CzEng contains 100 gz files with tab-separated columns, so let's expect - # it is the first dataset in datasets and use the newly created *.lang{1,2} - # files for vocab construction. - if datasets[0][0].endswith("data-plaintext-format.tar"): - vocab_datasets.append([datasets[0][0], ["wmt_encs_tok_%s.lang1" % tag, - "wmt_encs_tok_%s.lang2" % tag]]) - datasets = datasets[1:] - vocab_datasets += [[item[0], [item[1][0], item[1][1]]] for item in datasets] - symbolizer_vocab = generator_utils.get_or_generate_vocab( - data_dir, tmp_dir, self.vocab_file, self.targeted_vocab_size, - vocab_datasets) - return token_generator(data_path + ".lang1", data_path + ".lang2", - symbolizer_vocab, EOS) - - @property - def input_space_id(self): - return problem.SpaceID.EN_TOK - - @property - def target_space_id(self): - return problem.SpaceID.CS_TOK - - -@registry.register_problem -class TranslateEncsWmtCharacters(TranslateProblem): - """Problem spec for WMT En-Cs character-based translation.""" - - @property - def is_character_level(self): - return True - - def generator(self, data_dir, tmp_dir, train): - character_vocab = text_encoder.ByteTextEncoder() - datasets = _ENCS_TRAIN_DATASETS if train else _ENCS_TEST_DATASETS - tag = "train" if train else "dev" - data_path = _compile_data(tmp_dir, datasets, "wmt_encs_chr_%s" % tag) - return character_generator(data_path + ".lang1", data_path + ".lang2", - character_vocab, EOS) - - @property - def input_space_id(self): - return problem.SpaceID.EN_CHR - - @property - def target_space_id(self): - return problem.SpaceID.CS_CHR - - -def parsing_token_generator(data_dir, tmp_dir, train, vocab_size): - symbolizer_vocab = generator_utils.get_or_generate_vocab( - data_dir, tmp_dir, "vocab.endefr.%d" % vocab_size, vocab_size) - filename = "%s_%s.trees" % (FLAGS.parsing_path, "train" if train else "dev") - tree_filepath = os.path.join(tmp_dir, filename) - return wsj_parsing.token_generator(tree_filepath, symbolizer_vocab, - symbolizer_vocab, EOS) From dd08f9d7c214029208da3632fbd421c589fa8adf Mon Sep 17 00:00:00 2001 From: Vincent Nguyen <vince62s@yahoo.com> Date: Thu, 19 Oct 2017 14:35:00 +0200 Subject: [PATCH 0501/4095] fix warning --- tensor2tensor/data_generators/translate_encs.py | 4 ++-- tensor2tensor/data_generators/translate_ende.py | 6 +++--- tensor2tensor/data_generators/translate_enmk.py | 2 +- tensor2tensor/data_generators/translate_enzh.py | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/tensor2tensor/data_generators/translate_encs.py b/tensor2tensor/data_generators/translate_encs.py index 118fdca23..211d27413 100644 --- a/tensor2tensor/data_generators/translate_encs.py +++ b/tensor2tensor/data_generators/translate_encs.py @@ -67,7 +67,7 @@ @registry.register_problem -class TranslateEncsWmt32k(TranslateProblem): +class TranslateEncsWmt32k(translate.TranslateProblem): """Problem spec for WMT English-Czech translation.""" @property @@ -107,7 +107,7 @@ def target_space_id(self): @registry.register_problem -class TranslateEncsWmtCharacters(TranslateProblem): +class TranslateEncsWmtCharacters(translate.TranslateProblem): """Problem spec for WMT En-Cs character-based translation.""" @property diff --git a/tensor2tensor/data_generators/translate_ende.py b/tensor2tensor/data_generators/translate_ende.py index 17b30d8c5..01fe77b85 100644 --- a/tensor2tensor/data_generators/translate_ende.py +++ b/tensor2tensor/data_generators/translate_ende.py @@ -76,7 +76,7 @@ def _get_wmt_ende_bpe_dataset(directory, filename): @registry.register_problem -class TranslateEndeWmtBpe32k(TranslateProblem): +class TranslateEndeWmtBpe32k(translate.TranslateProblem): """Problem spec for WMT En-De translation, BPE version.""" @property @@ -117,7 +117,7 @@ def target_space_id(self): @registry.register_problem -class TranslateEndeWmt8k(TranslateProblem): +class TranslateEndeWmt8k(translate.TranslateProblem): """Problem spec for WMT En-De translation.""" @property @@ -155,7 +155,7 @@ def targeted_vocab_size(self): @registry.register_problem -class TranslateEndeWmtCharacters(TranslateProblem): +class TranslateEndeWmtCharacters(translate.TranslateProblem): """Problem spec for WMT En-De translation.""" @property diff --git a/tensor2tensor/data_generators/translate_enmk.py b/tensor2tensor/data_generators/translate_enmk.py index 8cf13a2bb..f6c934121 100644 --- a/tensor2tensor/data_generators/translate_enmk.py +++ b/tensor2tensor/data_generators/translate_enmk.py @@ -54,7 +54,7 @@ ]] @registry.register_problem -class TranslateEnmkSetimes32k(TranslateProblem): +class TranslateEnmkSetimes32k(translate.TranslateProblem): """Problem spec for SETimes Mk-En translation.""" @property diff --git a/tensor2tensor/data_generators/translate_enzh.py b/tensor2tensor/data_generators/translate_enzh.py index f4e68bd95..d1b7f7c20 100644 --- a/tensor2tensor/data_generators/translate_enzh.py +++ b/tensor2tensor/data_generators/translate_enzh.py @@ -49,7 +49,7 @@ ]] @registry.register_problem -class TranslateEnzhWmt8k(TranslateProblem): +class TranslateEnzhWmt8k(translate.TranslateProblem): """Problem spec for WMT Zh-En translation.""" @property From fc351443b39c90887aaf49b6059dd7d04cadc1fa Mon Sep 17 00:00:00 2001 From: Vincent Nguyen <vince62s@yahoo.com> Date: Thu, 19 Oct 2017 14:51:08 +0200 Subject: [PATCH 0502/4095] another warning fix --- tensor2tensor/bin/t2t-datagen | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensor2tensor/bin/t2t-datagen b/tensor2tensor/bin/t2t-datagen index cb6253524..b3016c994 100755 --- a/tensor2tensor/bin/t2t-datagen +++ b/tensor2tensor/bin/t2t-datagen @@ -43,7 +43,7 @@ from tensor2tensor.data_generators import all_problems # pylint: disable=unused from tensor2tensor.data_generators import audio from tensor2tensor.data_generators import generator_utils from tensor2tensor.data_generators import snli -from tensor2tensor.data_generators import wmt +from tensor2tensor.data_generators import translate from tensor2tensor.data_generators import wsj_parsing from tensor2tensor.utils import registry from tensor2tensor.utils import usr_dir From 9f59a502f6e7a9490292f9c34ac7565408403bad Mon Sep 17 00:00:00 2001 From: Vincent Nguyen <vince62s@yahoo.com> Date: Sun, 22 Oct 2017 22:25:34 +0200 Subject: [PATCH 0503/4095] adjust vocab with random lines --- .../data_generators/generator_utils.py | 18 ++++++++++++------ .../data_generators/translate_enfr.py | 2 +- 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/tensor2tensor/data_generators/generator_utils.py b/tensor2tensor/data_generators/generator_utils.py index e5a0bbb6d..3be4b2a6d 100644 --- a/tensor2tensor/data_generators/generator_utils.py +++ b/tensor2tensor/data_generators/generator_utils.py @@ -338,13 +338,19 @@ def generate(): # Use Tokenizer to count the word occurrences. with tf.gfile.GFile(filepath, mode="r") as source_file: - file_byte_budget = 3.5e5 if filepath.endswith("en") else 7e5 + file_byte_budget = 1e6 if filepath.endswith("en") else 1e6 + counter = 0 + countermax = int(source_file.size() / 1e6) for line in source_file: - if file_byte_budget <= 0: - break - line = line.strip() - file_byte_budget -= len(line) - yield line + if counter < countermax: + counter += 1 + else: + if file_byte_budget <= 0: + break + line = line.strip() + file_byte_budget -= len(line) + counter = 0 + yield line return get_or_generate_vocab_inner(data_dir, vocab_filename, vocab_size, generate()) diff --git a/tensor2tensor/data_generators/translate_enfr.py b/tensor2tensor/data_generators/translate_enfr.py index 2ce983dd1..01e4e8f82 100644 --- a/tensor2tensor/data_generators/translate_enfr.py +++ b/tensor2tensor/data_generators/translate_enfr.py @@ -41,7 +41,7 @@ _ENFR_TRAIN_DATASETS = [ [ "https://s3.amazonaws.com/opennmt-trainingdata/baseline-1M-enfr.tgz", - ("baseline-1M-enfr/baseline-1M_train.en", "baseline-1M-enfr/baseline-1M_train.en") + ("baseline-1M-enfr/baseline-1M_train.en", "baseline-1M-enfr/baseline-1M_train.fr") ], # [ # "http://www.statmt.org/wmt13/training-parallel-commoncrawl.tgz", From 7130ce4700c07fb3208db80fd97f820fd9643c0b Mon Sep 17 00:00:00 2001 From: Urvashi Khandelwal <urvashik@stanford.edu> Date: Sun, 22 Oct 2017 16:49:27 -0700 Subject: [PATCH 0504/4095] Rouge evaluation script using pyrouge --- tensor2tensor/utils/get_rouge.py | 90 ++++++++++++++++++++++++++++++++ 1 file changed, 90 insertions(+) create mode 100644 tensor2tensor/utils/get_rouge.py diff --git a/tensor2tensor/utils/get_rouge.py b/tensor2tensor/utils/get_rouge.py new file mode 100644 index 000000000..ac029f86d --- /dev/null +++ b/tensor2tensor/utils/get_rouge.py @@ -0,0 +1,90 @@ +# coding=utf-8 +# Copyright 2017 The Tensor2Tensor Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Computing rouge scores using pyrouge.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +import logging +import shutil +from tempfile import mkdtemp +from pprint import pprint + +# Dependency imports +from pyrouge import Rouge155 + +import numpy as np +import tensorflow as tf + +FLAGS = tf.flags.FLAGS + +tf.flags.DEFINE_string("decodes_filename", None, "File containing model generated summaries tokenized") +tf.flags.DEFINE_string("targets_filename", None, "File containing model target summaries tokenized") + +def write_to_file(filename, data): + # TODO: ensure the output format (chars split by spaces) was as intended + data = "".join(data[::2]) + data = ".\n".join(data.split(". ")) + with open(filename, "w") as fp: + fp.write(data) + +def prep_data(decode_dir, target_dir): + with open(FLAGS.decodes_filename, "rb") as fdecodes, open(FLAGS.targets_filename, "rb") as ftargets: + for i, (d, t) in enumerate(zip(fdecodes, ftargets)): + write_to_file(os.path.join(decode_dir, "rouge.%06d.txt" % (i+1)), d) + write_to_file(os.path.join(target_dir, "rouge.A.%06d.txt" % (i+1)), t) + + if (i+1 % 1000) == 0: + print("Written %d examples to file" % i) + +def main(_): + rouge = Rouge155() + rouge.log.setLevel(logging.ERROR) + rouge.system_filename_pattern = "rouge.(\d+).txt" + rouge.model_filename_pattern = "rouge.[A-Z].#ID#.txt" + + tf.logging.set_verbosity(tf.logging.INFO) + + tmpdir = mkdtemp() + tf.logging.info("tmpdir: %s" % tmpdir) + # system = decodes + system_dir = os.path.join(tmpdir, 'system') + # model = gold + model_dir = os.path.join(tmpdir, 'model') + os.mkdir(system_dir) + os.mkdir(model_dir) + + rouge.system_dir = system_dir + rouge.model_dir = model_dir + + prep_data(rouge.system_dir, rouge.model_dir) + + rouge_scores = rouge.convert_and_evaluate() + rouge_scores = rouge.output_to_dict(rouge_scores) + for prefix in ["rouge_1", "rouge_2", "rouge_l"]: + for suffix in ["f_score", "precision", "recall"]: + key = "_".join([prefix, suffix]) + tf.logging.info("%s: %.4f" % (key, rouge_scores[key])) + + # clean up after pyrouge + shutil.rmtree(tmpdir) + shutil.rmtree(rouge._config_dir) + shutil.rmtree(os.path.split(rouge._system_dir)[0]) + +if __name__=='__main__': + tf.app.run() From b43f83324ec1e9bc025d63539b63e17bcd9aa2c2 Mon Sep 17 00:00:00 2001 From: Vincent Nguyen <vince62s@yahoo.com> Date: Tue, 24 Oct 2017 08:50:41 +0200 Subject: [PATCH 0505/4095] fix --- tensor2tensor/data_generators/generator_utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensor2tensor/data_generators/generator_utils.py b/tensor2tensor/data_generators/generator_utils.py index 3be4b2a6d..984694e47 100644 --- a/tensor2tensor/data_generators/generator_utils.py +++ b/tensor2tensor/data_generators/generator_utils.py @@ -338,9 +338,9 @@ def generate(): # Use Tokenizer to count the word occurrences. with tf.gfile.GFile(filepath, mode="r") as source_file: - file_byte_budget = 1e6 if filepath.endswith("en") else 1e6 + file_byte_budget = 1e6 counter = 0 - countermax = int(source_file.size() / 1e6) + countermax = int(source_file.size() / file_byte_budget / 2) for line in source_file: if counter < countermax: counter += 1 From 349c6ee4cf2c4799e852068147d52b40c147934a Mon Sep 17 00:00:00 2001 From: Kollol Das <kolloldas@gmail.com> Date: Wed, 25 Oct 2017 22:52:03 +0530 Subject: [PATCH 0506/4095] Update attention model to use tf.contrib.seq2seq.AttentionWrapper. Also Fix NaN loss issue --- tensor2tensor/models/lstm.py | 203 ++++++++++------------------------- 1 file changed, 56 insertions(+), 147 deletions(-) diff --git a/tensor2tensor/models/lstm.py b/tensor2tensor/models/lstm.py index f336bd6b4..00bb5ed9c 100644 --- a/tensor2tensor/models/lstm.py +++ b/tensor2tensor/models/lstm.py @@ -31,144 +31,6 @@ import tensorflow as tf from tensorflow.python.util import nest -# Track Tuple of state and attention values -AttentionTuple = collections.namedtuple("AttentionTuple", ("state", - "attention")) - - -class ExternalAttentionCellWrapper(tf.contrib.rnn.RNNCell): - """Wrapper for external attention states for an encoder-decoder setup.""" - - def __init__(self, - cell, - attn_states, - attn_vec_size=None, - input_size=None, - state_is_tuple=True, - reuse=None): - """Create a cell with attention. - - Args: - cell: an RNNCell, an attention is added to it. - attn_states: External attention states typically the encoder output in the - form [batch_size, time steps, hidden size] - attn_vec_size: integer, the number of convolutional features calculated - on attention state and a size of the hidden layer built from - base cell state. Equal attn_size to by default. - input_size: integer, the size of a hidden linear layer, - built from inputs and attention. Derived from the input tensor - by default. - state_is_tuple: If True, accepted and returned states are n-tuples, where - `n = len(cells)`. Must be set to True else will raise an exception - concatenated along the column axis. - reuse: (optional) Python boolean describing whether to reuse variables - in an existing scope. If not `True`, and the existing scope already has - the given variables, an error is raised. - Raises: - TypeError: if cell is not an RNNCell. - ValueError: if the flag `state_is_tuple` is `False` or if shape of - `attn_states` is not 3 or if innermost dimension (hidden size) is None. - """ - super(ExternalAttentionCellWrapper, self).__init__(_reuse=reuse) - if not state_is_tuple: - raise ValueError("Only tuple state is supported") - - self._cell = cell - self._input_size = input_size - - # Validate attn_states shape. - attn_shape = attn_states.get_shape() - if not attn_shape or len(attn_shape) != 3: - raise ValueError("attn_shape must be rank 3") - - self._attn_states = attn_states - self._attn_size = attn_shape[2].value - if self._attn_size is None: - raise ValueError("Hidden size of attn_states cannot be None") - - self._attn_vec_size = attn_vec_size - if self._attn_vec_size is None: - self._attn_vec_size = self._attn_size - - self._reuse = reuse - - @property - def state_size(self): - return AttentionTuple(self._cell.state_size, self._attn_size) - - @property - def output_size(self): - return self._attn_size - - def combine_state(self, previous_state): - """Combines previous state (from encoder) with internal attention values. - - You must use this function to derive the initial state passed into - this cell as it expects a named tuple (AttentionTuple). - - Args: - previous_state: State from another block that will be fed into this cell; - Must have same structure as the state of the cell wrapped by this. - Returns: - Combined state (AttentionTuple). - """ - batch_size = self._attn_states.get_shape()[0].value - if batch_size is None: - batch_size = tf.shape(self._attn_states)[0] - zeroed_state = self.zero_state(batch_size, self._attn_states.dtype) - return AttentionTuple(previous_state, zeroed_state.attention) - - def call(self, inputs, state): - """Long short-term memory cell with attention (LSTMA).""" - - if not isinstance(state, AttentionTuple): - raise TypeError("State must be of type AttentionTuple") - - state, attns = state - attn_states = self._attn_states - attn_length = attn_states.get_shape()[1].value - if attn_length is None: - attn_length = tf.shape(attn_states)[1] - - input_size = self._input_size - if input_size is None: - input_size = inputs.get_shape().as_list()[1] - if attns is not None: - inputs = tf.layers.dense(tf.concat([inputs, attns], axis=1), input_size) - lstm_output, new_state = self._cell(inputs, state) - - new_state_cat = tf.concat(nest.flatten(new_state), 1) - new_attns = self._attention(new_state_cat, attn_states, attn_length) - - with tf.variable_scope("attn_output_projection"): - output = tf.layers.dense( - tf.concat([lstm_output, new_attns], axis=1), self._attn_size) - - new_state = AttentionTuple(new_state, new_attns) - - return output, new_state - - def _attention(self, query, attn_states, attn_length): - conv2d = tf.nn.conv2d - reduce_sum = tf.reduce_sum - softmax = tf.nn.softmax - tanh = tf.tanh - - with tf.variable_scope("attention"): - k = tf.get_variable("attn_w", - [1, 1, self._attn_size, self._attn_vec_size]) - v = tf.get_variable("attn_v", [self._attn_vec_size, 1]) - hidden = tf.reshape(attn_states, [-1, attn_length, 1, self._attn_size]) - hidden_features = conv2d(hidden, k, [1, 1, 1, 1], "SAME") - y = tf.layers.dense(query, self._attn_vec_size) - y = tf.reshape(y, [-1, 1, 1, self._attn_vec_size]) - s = reduce_sum(v * tanh(hidden_features + y), [2, 3]) - a = softmax(s) - d = reduce_sum(tf.reshape(a, [-1, attn_length, 1, 1]) * hidden, [1, 2]) - new_attns = tf.reshape(d, [-1, self._attn_size]) - - return new_attns - def lstm(inputs, hparams, train, name, initial_state=None): """Run LSTM cell on inputs, assuming they are [batch x time x size].""" @@ -189,7 +51,7 @@ def dropout_lstm_cell(): def lstm_attention_decoder(inputs, hparams, train, name, initial_state, - attn_states): + encoder_outputs): """Run LSTM cell with attention on inputs of shape [batch x time x size].""" def dropout_lstm_cell(): @@ -198,11 +60,23 @@ def dropout_lstm_cell(): input_keep_prob=1.0 - hparams.dropout * tf.to_float(train)) layers = [dropout_lstm_cell() for _ in range(hparams.num_hidden_layers)] - cell = ExternalAttentionCellWrapper( + AttentionMechanism = (tf.contrib.seq2seq.LuongAttention if hparams.attention_mechanism == "luong" + else tf.contrib.seq2seq.BahdanauAttention) + attention_mechanism = AttentionMechanism(hparams.hidden_size, encoder_outputs) + + cell = tf.contrib.seq2seq.AttentionWrapper( tf.nn.rnn_cell.MultiRNNCell(layers), - attn_states, - attn_vec_size=hparams.attn_vec_size) - initial_state = cell.combine_state(initial_state) + [attention_mechanism]*hparams.num_heads, + attention_layer_size=[hparams.attention_layer_size]*hparams.num_heads, + output_attention=(hparams.output_attention==1)) + + + batch_size = inputs.get_shape()[0].value + if batch_size is None: + batch_size = tf.shape(inputs)[0] + + initial_state = cell.zero_state(batch_size, tf.float32).clone(cell_state=initial_state) + with tf.variable_scope(name): return tf.nn.dynamic_rnn( cell, @@ -273,14 +147,49 @@ def lstm_seq2seq(): hparams.hidden_size = 128 hparams.num_hidden_layers = 2 hparams.initializer = "uniform_unit_scaling" + hparams.initializer_gain = 1.0 + hparams.weight_decay = 0.0 + + return hparams + +def lstm_attention_base(): + """ Base attention params. """ + hparams = lstm_seq2seq() + hparams.add_hparam("attention_layer_size", hparams.hidden_size) + hparams.add_hparam("output_attention", int(True)) + hparams.add_hparam("num_heads", 1) + return hparams + + +@registry.register_hparams +def lstm_bahdanau_attention(): + """hparams for LSTM with bahdanau attention.""" + hparams = lstm_attention_base() + hparams.add_hparam("attention_mechanism", "bahdanau") return hparams +@registry.register_hparams +def lstm_luong_attention(): + """hparams for LSTM with luong attention.""" + hparams = lstm_attention_base() + hparams.add_hparam("attention_mechanism", "luong") + return hparams @registry.register_hparams def lstm_attention(): - """hparams for LSTM with attention.""" - hparams = lstm_seq2seq() + """ For backwards compatibility, Defaults to bahdanau """ + return lstm_bahdanau_attention() - # Attention - hparams.add_hparam("attn_vec_size", hparams.hidden_size) +@registry.register_hparams +def lstm_bahdanau_attention_multi(): + """ Multi-head Luong attention """ + hparams = lstm_bahdanau_attention() + hparams.num_heads = 4 return hparams + +@registry.register_hparams +def lstm_luong_attention_multi(): + """ Multi-head Luong attention """ + hparams = lstm_luong_attention() + hparams.num_heads = 4 + return hparams \ No newline at end of file From f67483ec3bcfde144aef07f6a2daf85ebbb3a839 Mon Sep 17 00:00:00 2001 From: Kollol Das <kolloldas@gmail.com> Date: Thu, 26 Oct 2017 00:09:59 +0530 Subject: [PATCH 0507/4095] Project outputs to hidden size for multi-head attention --- tensor2tensor/models/lstm.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tensor2tensor/models/lstm.py b/tensor2tensor/models/lstm.py index 00bb5ed9c..2f5475276 100644 --- a/tensor2tensor/models/lstm.py +++ b/tensor2tensor/models/lstm.py @@ -78,12 +78,18 @@ def dropout_lstm_cell(): initial_state = cell.zero_state(batch_size, tf.float32).clone(cell_state=initial_state) with tf.variable_scope(name): - return tf.nn.dynamic_rnn( + output, state = tf.nn.dynamic_rnn( cell, inputs, initial_state=initial_state, dtype=tf.float32, time_major=False) + + # For multi-head attention project output back to hidden size + if hparams.output_attention == 1 and hparams.num_heads > 1: + output = tf.layers.dense(output, hparams.hidden_size) + + return output, state def lstm_seq2seq_internal(inputs, targets, hparams, train): From 41e0bfbdfbb2be94114811092bd2a52afc988e24 Mon Sep 17 00:00:00 2001 From: Lukasz Kaiser <lukaszkaiser@google.com> Date: Tue, 17 Oct 2017 16:00:30 -0700 Subject: [PATCH 0508/4095] Code style improvements in CNN/DailyMail generator. PiperOrigin-RevId: 172529629 --- tensor2tensor/bin/t2t-datagen | 2 +- tensor2tensor/bin/t2t-decoder | 0 tensor2tensor/bin/t2t-make-tf-configs | 0 tensor2tensor/bin/t2t-trainer | 0 tensor2tensor/data_generators/all_problems.py | 7 +- .../data_generators/cnn_dailymail.py | 85 ++- .../data_generators/generator_utils.py | 59 +- tensor2tensor/data_generators/ice_parsing.py | 2 +- tensor2tensor/data_generators/translate.py | 262 ------- .../data_generators/translate_encs.py | 133 ---- .../data_generators/translate_ende.py | 184 ----- .../data_generators/translate_enfr.py | 146 ---- .../data_generators/translate_enmk.py | 91 --- .../data_generators/translate_enzh.py | 107 --- tensor2tensor/data_generators/wmt.py | 718 ++++++++++++++++++ .../{translate_test.py => wmt_test.py} | 4 +- 16 files changed, 821 insertions(+), 979 deletions(-) mode change 100755 => 100644 tensor2tensor/bin/t2t-datagen mode change 100755 => 100644 tensor2tensor/bin/t2t-decoder mode change 100755 => 100644 tensor2tensor/bin/t2t-make-tf-configs mode change 100755 => 100644 tensor2tensor/bin/t2t-trainer delete mode 100644 tensor2tensor/data_generators/translate.py delete mode 100644 tensor2tensor/data_generators/translate_encs.py delete mode 100644 tensor2tensor/data_generators/translate_ende.py delete mode 100644 tensor2tensor/data_generators/translate_enfr.py delete mode 100644 tensor2tensor/data_generators/translate_enmk.py delete mode 100644 tensor2tensor/data_generators/translate_enzh.py create mode 100644 tensor2tensor/data_generators/wmt.py rename tensor2tensor/data_generators/{translate_test.py => wmt_test.py} (96%) diff --git a/tensor2tensor/bin/t2t-datagen b/tensor2tensor/bin/t2t-datagen old mode 100755 new mode 100644 index b3016c994..cb6253524 --- a/tensor2tensor/bin/t2t-datagen +++ b/tensor2tensor/bin/t2t-datagen @@ -43,7 +43,7 @@ from tensor2tensor.data_generators import all_problems # pylint: disable=unused from tensor2tensor.data_generators import audio from tensor2tensor.data_generators import generator_utils from tensor2tensor.data_generators import snli -from tensor2tensor.data_generators import translate +from tensor2tensor.data_generators import wmt from tensor2tensor.data_generators import wsj_parsing from tensor2tensor.utils import registry from tensor2tensor.utils import usr_dir diff --git a/tensor2tensor/bin/t2t-decoder b/tensor2tensor/bin/t2t-decoder old mode 100755 new mode 100644 diff --git a/tensor2tensor/bin/t2t-make-tf-configs b/tensor2tensor/bin/t2t-make-tf-configs old mode 100755 new mode 100644 diff --git a/tensor2tensor/bin/t2t-trainer b/tensor2tensor/bin/t2t-trainer old mode 100755 new mode 100644 diff --git a/tensor2tensor/data_generators/all_problems.py b/tensor2tensor/data_generators/all_problems.py index 1a65c628a..5877b541e 100644 --- a/tensor2tensor/data_generators/all_problems.py +++ b/tensor2tensor/data_generators/all_problems.py @@ -33,12 +33,7 @@ from tensor2tensor.data_generators import ptb from tensor2tensor.data_generators import snli from tensor2tensor.data_generators import wiki -from tensor2tensor.data_generators import translate -from tensor2tensor.data_generators import translate_enfr -from tensor2tensor.data_generators import translate_ende -from tensor2tensor.data_generators import translate_encs -from tensor2tensor.data_generators import translate_enzh -from tensor2tensor.data_generators import translate_enmk +from tensor2tensor.data_generators import wmt from tensor2tensor.data_generators import wsj_parsing diff --git a/tensor2tensor/data_generators/cnn_dailymail.py b/tensor2tensor/data_generators/cnn_dailymail.py index c0f6756a5..239d1af99 100644 --- a/tensor2tensor/data_generators/cnn_dailymail.py +++ b/tensor2tensor/data_generators/cnn_dailymail.py @@ -19,9 +19,9 @@ from __future__ import division from __future__ import print_function +import hashlib import os import tarfile -import hashlib # Dependency imports @@ -39,6 +39,7 @@ _DAILYMAIL_STORIES_DRIVE_URL = "https://drive.google.com/uc?export=download&id=0BwmD_VLjROrfM1BxdkxVaTY2bWs" + # Note: using See et al. (2017) as reference for data generation # For more info, use the links below @@ -47,13 +48,17 @@ _DEV_URLS = "https://raw.githubusercontent.com/abisee/cnn-dailymail/master/url_lists/all_val.txt" _TEST_URLS = "https://github.com/abisee/cnn-dailymail/blob/master/url_lists/all_test.txt" + # End-of-sentence marker. EOS = text_encoder.EOS_ID + # Techniques for data prep from See et al. (2017) -dm_single_close_quote = u'\u2019' # unicode -dm_double_close_quote = u'\u201d' -END_TOKENS = [u'.', u'!', u'?', u'...', u"'", u"`", u'"', dm_single_close_quote, dm_double_close_quote, u")"] # acceptable ways to end a sentence +dm_single_close_quote = u"\u2019" # unicode +dm_double_close_quote = u"\u201d" +# Acceptable ways to end a sentence. +END_TOKENS = [u".", u"!", u"?", u"...", u"'", u"`", u"\"", + dm_single_close_quote, dm_double_close_quote, u")"] def _maybe_download_corpora(tmp_dir, is_training): @@ -61,9 +66,11 @@ def _maybe_download_corpora(tmp_dir, is_training): Args: tmp_dir: directory containing dataset. + is_training: whether we're in training mode or not. Returns: - list of all files generated and path to file containing train/dev/test split info. + List of all files generated and path to file containing + train/dev/test split info. """ cnn_filename = "cnn_stories.tgz" cnn_finalpath = os.path.join(tmp_dir, "cnn/stories/") @@ -85,43 +92,52 @@ def _maybe_download_corpora(tmp_dir, is_training): all_files = cnn_files + dailymail_files if is_training: - urls_path = generator_utils.maybe_download(tmp_dir, "all_train.txt", _TRAIN_URLS) + urls_path = generator_utils.maybe_download( + tmp_dir, "all_train.txt", _TRAIN_URLS) else: - urls_path = generator_utils.maybe_download(tmp_dir, "all_val.txt", _DEV_URLS) + urls_path = generator_utils.maybe_download( + tmp_dir, "all_val.txt", _DEV_URLS) return all_files, urls_path + def example_splits(url_file, all_files): + """Generate splits of the data.""" def generate_hash(inp): - """Generate a sha1 hash to match the raw url to the filename extracted""" - h = hashlib.sha1() - h.update(inp) - return h.hexdigest() + """Generate a sha1 hash to match the raw url to the filename extracted.""" + h = hashlib.sha1() + h.update(inp) + return h.hexdigest() - all_files_map = {f.split("/")[-1]:f for f in all_files} + all_files_map = {f.split("/")[-1]: f for f in all_files} urls = [] for line in tf.gfile.Open(url_file): - urls.append(line.strip().encode('utf-8')) + urls.append(line.strip().encode("utf-8")) filelist = [] for url in urls: - url_hash = generate_hash(url) - filename = url_hash + ".story" - if filename not in all_files_map: - tf.logging.info("Missing file: %s" % url) - continue - filelist.append(all_files_map[filename]) + url_hash = generate_hash(url) + filename = url_hash + ".story" + if filename not in all_files_map: + tf.logging.info("Missing file: %s" % url) + continue + filelist.append(all_files_map[filename]) tf.logging.info("Found %d examples" % len(filelist)) return filelist + def example_generator(tmp_dir, is_training, sum_token): + """Generate examples.""" def fix_run_on_sents(line): - if u"@highlight" in line: return line - if line=="": return line - if line[-1] in END_TOKENS: return line + if u"@highlight" in line: + return line + if not line: + return line + if line[-1] in END_TOKENS: + return line return line + u"." all_files, urls_path = _maybe_download_corpora(tmp_dir, is_training) @@ -133,28 +149,33 @@ def fix_run_on_sents(line): summary = [] reading_highlights = False for line in tf.gfile.Open(story_file, "rb"): - line = unicode(line.strip(), "utf-8") if six.PY2 else line.strip().decode("utf-8") + if six.PY2: + line = unicode(line.strip(), "utf-8") + else: + line = line.strip().decode("utf-8") line = fix_run_on_sents(line) - if line == "": - continue + if not line: + continue elif line.startswith(u"@highlight"): - if len(story) == 0: break # No article text - reading_highlights = True + if not story: + break # No article text. + reading_highlights = True elif reading_highlights: - summary.append(line) + summary.append(line) else: - story.append(line) + story.append(line) - if len(story) == 0 or len(summary) == 0: - continue + if (not story) or not summary: + continue yield " ".join(story) + story_summary_split_token + " ".join(summary) + def _story_summary_split(story): split_str = u" <summary> " split_str_len = len(split_str) split_pos = story.find(split_str) - return story[:split_pos], story[split_pos+split_str_len:] # story, summary + return story[:split_pos], story[split_pos+split_str_len:] # story, summary @registry.register_problem diff --git a/tensor2tensor/data_generators/generator_utils.py b/tensor2tensor/data_generators/generator_utils.py index 984694e47..c8fe03564 100644 --- a/tensor2tensor/data_generators/generator_utils.py +++ b/tensor2tensor/data_generators/generator_utils.py @@ -263,6 +263,42 @@ def gunzip_file(gz_path, new_path): for line in gz_file: new_file.write(line) + +# TODO(aidangomez): en-fr tasks are significantly over-represented below +_DATA_FILE_URLS = [ + # German-English + [ + "http://data.statmt.org/wmt16/translation-task/training-parallel-nc-v11.tgz", # pylint: disable=line-too-long + [ + "training-parallel-nc-v11/news-commentary-v11.de-en.en", + "training-parallel-nc-v11/news-commentary-v11.de-en.de" + ] + ], + # German-English & French-English + [ + "http://www.statmt.org/wmt13/training-parallel-commoncrawl.tgz", [ + "commoncrawl.de-en.en", "commoncrawl.de-en.de", + "commoncrawl.fr-en.en", "commoncrawl.fr-en.fr" + ] + ], + [ + "http://www.statmt.org/wmt13/training-parallel-europarl-v7.tgz", [ + "training/europarl-v7.de-en.en", "training/europarl-v7.de-en.de", + "training/europarl-v7.fr-en.en", "training/europarl-v7.fr-en.fr" + ] + ], + # French-English + [ + "http://www.statmt.org/wmt10/training-giga-fren.tar", + ["giga-fren.release2.fixed.en.gz", "giga-fren.release2.fixed.fr.gz"] + ], + [ + "http://www.statmt.org/wmt13/training-parallel-un.tgz", + ["un/undoc.2000.fr-en.en", "un/undoc.2000.fr-en.fr"] + ], +] + + def get_or_generate_vocab_inner(data_dir, vocab_filename, vocab_size, generator): """Inner implementation for vocab generators. @@ -305,8 +341,9 @@ def get_or_generate_vocab(data_dir, tmp_dir, vocab_filename, vocab_size, - sources): - """Generate a vocabulary from the datasets in sources.""" + sources=None): + """Generate a vocabulary from the datasets in sources (_DATA_FILE_URLS).""" + sources = sources or _DATA_FILE_URLS def generate(): tf.logging.info("Generating vocab from: %s", str(sources)) @@ -338,19 +375,13 @@ def generate(): # Use Tokenizer to count the word occurrences. with tf.gfile.GFile(filepath, mode="r") as source_file: - file_byte_budget = 1e6 - counter = 0 - countermax = int(source_file.size() / file_byte_budget / 2) + file_byte_budget = 3.5e5 if filepath.endswith("en") else 7e5 for line in source_file: - if counter < countermax: - counter += 1 - else: - if file_byte_budget <= 0: - break - line = line.strip() - file_byte_budget -= len(line) - counter = 0 - yield line + if file_byte_budget <= 0: + break + line = line.strip() + file_byte_budget -= len(line) + yield line return get_or_generate_vocab_inner(data_dir, vocab_filename, vocab_size, generate()) diff --git a/tensor2tensor/data_generators/ice_parsing.py b/tensor2tensor/data_generators/ice_parsing.py index 99586ef83..2aa261cd4 100644 --- a/tensor2tensor/data_generators/ice_parsing.py +++ b/tensor2tensor/data_generators/ice_parsing.py @@ -32,7 +32,7 @@ from tensor2tensor.data_generators import generator_utils from tensor2tensor.data_generators import problem from tensor2tensor.data_generators import text_encoder -from tensor2tensor.data_generators.translate import tabbed_generator +from tensor2tensor.data_generators.wmt import tabbed_generator from tensor2tensor.utils import registry diff --git a/tensor2tensor/data_generators/translate.py b/tensor2tensor/data_generators/translate.py deleted file mode 100644 index 1de25bc47..000000000 --- a/tensor2tensor/data_generators/translate.py +++ /dev/null @@ -1,262 +0,0 @@ -# coding=utf-8 -# Copyright 2017 The Tensor2Tensor Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Data generators for translation data-sets.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -import tarfile - -# Dependency imports - -from tensor2tensor.data_generators import generator_utils -from tensor2tensor.data_generators import problem -from tensor2tensor.data_generators import text_encoder -from tensor2tensor.data_generators import wsj_parsing -from tensor2tensor.utils import registry - -import tensorflow as tf - -FLAGS = tf.flags.FLAGS - - -class TranslateProblem(problem.Text2TextProblem): - """Base class for translation problems.""" - - @property - def is_character_level(self): - return False - - @property - def num_shards(self): - return 100 - - @property - def use_subword_tokenizer(self): - return True - - -# Generic generators used later for multiple problems. - - -def character_generator(source_path, target_path, character_vocab, eos=None): - """Generator for sequence-to-sequence tasks that just uses characters. - - This generator assumes the files at source_path and target_path have - the same number of lines and yields dictionaries of "inputs" and "targets" - where inputs are characters from the source lines converted to integers, - and targets are characters from the target lines, also converted to integers. - - Args: - source_path: path to the file with source sentences. - target_path: path to the file with target sentences. - character_vocab: a TextEncoder to encode the characters. - eos: integer to append at the end of each sequence (default: None). - - Yields: - A dictionary {"inputs": source-line, "targets": target-line} where - the lines are integer lists converted from characters in the file lines. - """ - eos_list = [] if eos is None else [eos] - with tf.gfile.GFile(source_path, mode="r") as source_file: - with tf.gfile.GFile(target_path, mode="r") as target_file: - source, target = source_file.readline(), target_file.readline() - while source and target: - source_ints = character_vocab.encode(source.strip()) + eos_list - target_ints = character_vocab.encode(target.strip()) + eos_list - yield {"inputs": source_ints, "targets": target_ints} - source, target = source_file.readline(), target_file.readline() - - -def tabbed_generator(source_path, source_vocab, target_vocab, eos=None): - r"""Generator for sequence-to-sequence tasks using tabbed files. - - Tokens are derived from text files where each line contains both - a source and a target string. The two strings are separated by a tab - character ('\t'). It yields dictionaries of "inputs" and "targets" where - inputs are characters from the source lines converted to integers, and - targets are characters from the target lines, also converted to integers. - - Args: - source_path: path to the file with source and target sentences. - source_vocab: a SubwordTextEncoder to encode the source string. - target_vocab: a SubwordTextEncoder to encode the target string. - eos: integer to append at the end of each sequence (default: None). - - Yields: - A dictionary {"inputs": source-line, "targets": target-line} where - the lines are integer lists converted from characters in the file lines. - """ - eos_list = [] if eos is None else [eos] - with tf.gfile.GFile(source_path, mode="r") as source_file: - for line in source_file: - if line and "\t" in line: - parts = line.split("\t", 1) - source, target = parts[0].strip(), parts[1].strip() - source_ints = source_vocab.encode(source) + eos_list - target_ints = target_vocab.encode(target) + eos_list - yield {"inputs": source_ints, "targets": target_ints} - - -def token_generator(source_path, target_path, token_vocab, eos=None): - """Generator for sequence-to-sequence tasks that uses tokens. - - This generator assumes the files at source_path and target_path have - the same number of lines and yields dictionaries of "inputs" and "targets" - where inputs are token ids from the " "-split source (and target, resp.) lines - converted to integers using the token_map. - - Args: - source_path: path to the file with source sentences. - target_path: path to the file with target sentences. - token_vocab: text_encoder.TextEncoder object. - eos: integer to append at the end of each sequence (default: None). - - Yields: - A dictionary {"inputs": source-line, "targets": target-line} where - the lines are integer lists converted from tokens in the file lines. - """ - eos_list = [] if eos is None else [eos] - with tf.gfile.GFile(source_path, mode="r") as source_file: - with tf.gfile.GFile(target_path, mode="r") as target_file: - source, target = source_file.readline(), target_file.readline() - while source and target: - source_ints = token_vocab.encode(source.strip()) + eos_list - target_ints = token_vocab.encode(target.strip()) + eos_list - yield {"inputs": source_ints, "targets": target_ints} - source, target = source_file.readline(), target_file.readline() - - -def bi_vocabs_token_generator(source_path, - target_path, - source_token_vocab, - target_token_vocab, - eos=None): - """Generator for sequence-to-sequence tasks that uses tokens. - - This generator assumes the files at source_path and target_path have - the same number of lines and yields dictionaries of "inputs" and "targets" - where inputs are token ids from the " "-split source (and target, resp.) lines - converted to integers using the token_map. - - Args: - source_path: path to the file with source sentences. - target_path: path to the file with target sentences. - source_token_vocab: text_encoder.TextEncoder object. - target_token_vocab: text_encoder.TextEncoder object. - eos: integer to append at the end of each sequence (default: None). - - Yields: - A dictionary {"inputs": source-line, "targets": target-line} where - the lines are integer lists converted from tokens in the file lines. - """ - eos_list = [] if eos is None else [eos] - with tf.gfile.GFile(source_path, mode="r") as source_file: - with tf.gfile.GFile(target_path, mode="r") as target_file: - source, target = source_file.readline(), target_file.readline() - while source and target: - source_ints = source_token_vocab.encode(source.strip()) + eos_list - target_ints = target_token_vocab.encode(target.strip()) + eos_list - yield {"inputs": source_ints, "targets": target_ints} - source, target = source_file.readline(), target_file.readline() - -def _preprocess_sgm(line, is_sgm): - """Preprocessing to strip tags in SGM files.""" - if not is_sgm: - return line - # In SGM files, remove <srcset ...>, <p>, <doc ...> lines. - if line.startswith("<srcset") or line.startswith("</srcset"): - return "" - if line.startswith("<doc") or line.startswith("</doc"): - return "" - if line.startswith("<p>") or line.startswith("</p>"): - return "" - # Strip <seg> tags. - line = line.strip() - if line.startswith("<seg") and line.endswith("</seg>"): - i = line.index(">") - return line[i + 1:-6] # Strip first <seg ...> and last </seg>. - -def _compile_data(tmp_dir, datasets, filename): - """Concatenate all `datasets` and save to `filename`.""" - filename = os.path.join(tmp_dir, filename) - with tf.gfile.GFile(filename + ".lang1", mode="w") as lang1_resfile: - with tf.gfile.GFile(filename + ".lang2", mode="w") as lang2_resfile: - for dataset in datasets: - url = dataset[0] - compressed_filename = os.path.basename(url) - compressed_filepath = os.path.join(tmp_dir, compressed_filename) - - generator_utils.maybe_download(tmp_dir, compressed_filename, url) - - if dataset[1][0] == "tsv": - _, src_column, trg_column, glob_pattern = dataset[1] - filenames = tf.gfile.Glob(os.path.join(tmp_dir, glob_pattern)) - if not filenames: - # Capture *.tgz and *.tar.gz too. - mode = "r:gz" if compressed_filepath.endswith("gz") else "r" - with tarfile.open(compressed_filepath, mode) as corpus_tar: - corpus_tar.extractall(tmp_dir) - filenames = tf.gfile.Glob(os.path.join(tmp_dir, glob_pattern)) - for tsv_filename in filenames: - if tsv_filename.endswith(".gz"): - new_filename = tsv_filename.strip(".gz") - generator_utils.gunzip_file(tsv_filename, new_filename) - tsv_filename = new_filename - with tf.gfile.GFile(tsv_filename, mode="r") as tsv_file: - for line in tsv_file: - if line and "\t" in line: - parts = line.split("\t") - source, target = parts[src_column], parts[trg_column] - lang1_resfile.write(source.strip() + "\n") - lang2_resfile.write(target.strip() + "\n") - else: - lang1_filename, lang2_filename = dataset[1] - lang1_filepath = os.path.join(tmp_dir, lang1_filename) - lang2_filepath = os.path.join(tmp_dir, lang2_filename) - is_sgm = (lang1_filename.endswith("sgm") and - lang2_filename.endswith("sgm")) - - if not (os.path.exists(lang1_filepath) and - os.path.exists(lang2_filepath)): - # For .tar.gz and .tgz files, we read compressed. - mode = "r:gz" if compressed_filepath.endswith("gz") else "r" - with tarfile.open(compressed_filepath, mode) as corpus_tar: - corpus_tar.extractall(tmp_dir) - if lang1_filepath.endswith(".gz"): - new_filepath = lang1_filepath.strip(".gz") - generator_utils.gunzip_file(lang1_filepath, new_filepath) - lang1_filepath = new_filepath - if lang2_filepath.endswith(".gz"): - new_filepath = lang2_filepath.strip(".gz") - generator_utils.gunzip_file(lang2_filepath, new_filepath) - lang2_filepath = new_filepath - with tf.gfile.GFile(lang1_filepath, mode="r") as lang1_file: - with tf.gfile.GFile(lang2_filepath, mode="r") as lang2_file: - line1, line2 = lang1_file.readline(), lang2_file.readline() - while line1 or line2: - line1res = _preprocess_sgm(line1, is_sgm) - line2res = _preprocess_sgm(line2, is_sgm) - if line1res or line2res: - lang1_resfile.write(line1res.strip() + "\n") - lang2_resfile.write(line2res.strip() + "\n") - line1, line2 = lang1_file.readline(), lang2_file.readline() - - return filename - - diff --git a/tensor2tensor/data_generators/translate_encs.py b/tensor2tensor/data_generators/translate_encs.py deleted file mode 100644 index 211d27413..000000000 --- a/tensor2tensor/data_generators/translate_encs.py +++ /dev/null @@ -1,133 +0,0 @@ -# coding=utf-8 -# Copyright 2017 The Tensor2Tensor Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Data generators for translation data-sets.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -import tarfile - -# Dependency imports - -from tensor2tensor.data_generators import generator_utils -from tensor2tensor.data_generators import translate -from tensor2tensor.data_generators import problem -from tensor2tensor.data_generators import text_encoder -from tensor2tensor.data_generators import wsj_parsing -from tensor2tensor.utils import registry - -import tensorflow as tf - -FLAGS = tf.flags.FLAGS - -# End-of-sentence marker. -EOS = text_encoder.EOS_ID - -_ENCS_TRAIN_DATASETS = [ - [ - ("https://lindat.mff.cuni.cz/repository/xmlui/bitstream/handle/" - "11234/1-1458/data-plaintext-format.tar"), - ("tsv", 3, 2, "data.plaintext-format/*train.gz") - ], - [ - "http://data.statmt.org/wmt17/translation-task/training-parallel-nc-v12.tgz", # pylint: disable=line-too-long - ("training/news-commentary-v12.cs-en.en", - "training/news-commentary-v12.cs-en.cs") - ], - [ - "http://www.statmt.org/wmt13/training-parallel-commoncrawl.tgz", - ("commoncrawl.cs-en.en", "commoncrawl.cs-en.cs") - ], - [ - "http://www.statmt.org/wmt13/training-parallel-europarl-v7.tgz", - ("training/europarl-v7.cs-en.en", "training/europarl-v7.cs-en.cs") - ], -] -_ENCS_TEST_DATASETS = [ - [ - "http://data.statmt.org/wmt17/translation-task/dev.tgz", - ("dev/newstest2013.en", "dev/newstest2013.cs") - ], -] - - -@registry.register_problem -class TranslateEncsWmt32k(translate.TranslateProblem): - """Problem spec for WMT English-Czech translation.""" - - @property - def targeted_vocab_size(self): - return 2**15 # 32768 - - @property - def vocab_name(self): - return "vocab.encs" - - def generator(self, data_dir, tmp_dir, train): - datasets = _ENCS_TRAIN_DATASETS if train else _ENCS_TEST_DATASETS - tag = "train" if train else "dev" - vocab_datasets = [] - data_path = translate._compile_data(tmp_dir, datasets, "wmt_encs_tok_%s" % tag) - # CzEng contains 100 gz files with tab-separated columns, so let's expect - # it is the first dataset in datasets and use the newly created *.lang{1,2} - # files for vocab construction. - if datasets[0][0].endswith("data-plaintext-format.tar"): - vocab_datasets.append([datasets[0][0], ["wmt_encs_tok_%s.lang1" % tag, - "wmt_encs_tok_%s.lang2" % tag]]) - datasets = datasets[1:] - vocab_datasets += [[item[0], [item[1][0], item[1][1]]] for item in datasets] - symbolizer_vocab = generator_utils.get_or_generate_vocab( - data_dir, tmp_dir, self.vocab_file, self.targeted_vocab_size, - vocab_datasets) - return translate.token_generator(data_path + ".lang1", data_path + ".lang2", - symbolizer_vocab, EOS) - - @property - def input_space_id(self): - return problem.SpaceID.EN_TOK - - @property - def target_space_id(self): - return problem.SpaceID.CS_TOK - - -@registry.register_problem -class TranslateEncsWmtCharacters(translate.TranslateProblem): - """Problem spec for WMT En-Cs character-based translation.""" - - @property - def is_character_level(self): - return True - - def generator(self, data_dir, tmp_dir, train): - character_vocab = text_encoder.ByteTextEncoder() - datasets = _ENCS_TRAIN_DATASETS if train else _ENCS_TEST_DATASETS - tag = "train" if train else "dev" - data_path = translate._compile_data(tmp_dir, datasets, "wmt_encs_chr_%s" % tag) - return translate.character_generator(data_path + ".lang1", data_path + ".lang2", - character_vocab, EOS) - - @property - def input_space_id(self): - return problem.SpaceID.EN_CHR - - @property - def target_space_id(self): - return problem.SpaceID.CS_CHR - - diff --git a/tensor2tensor/data_generators/translate_ende.py b/tensor2tensor/data_generators/translate_ende.py deleted file mode 100644 index 01fe77b85..000000000 --- a/tensor2tensor/data_generators/translate_ende.py +++ /dev/null @@ -1,184 +0,0 @@ -# coding=utf-8 -# Copyright 2017 The Tensor2Tensor Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Data generators for translation data-sets.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -import tarfile - -# Dependency imports - -from tensor2tensor.data_generators import generator_utils -from tensor2tensor.data_generators import translate -from tensor2tensor.data_generators import problem -from tensor2tensor.data_generators import text_encoder -from tensor2tensor.data_generators import wsj_parsing -from tensor2tensor.utils import registry - -import tensorflow as tf - -FLAGS = tf.flags.FLAGS - -# End-of-sentence marker. -EOS = text_encoder.EOS_ID - -_ENDE_TRAIN_DATASETS = [ - [ - "http://data.statmt.org/wmt17/translation-task/training-parallel-nc-v12.tgz", # pylint: disable=line-too-long - ("training/news-commentary-v12.de-en.en", - "training/news-commentary-v12.de-en.de") - ], - [ - "http://www.statmt.org/wmt13/training-parallel-commoncrawl.tgz", - ("commoncrawl.de-en.en", "commoncrawl.de-en.de") - ], - [ - "http://www.statmt.org/wmt13/training-parallel-europarl-v7.tgz", - ("training/europarl-v7.de-en.en", "training/europarl-v7.de-en.de") - ], -] -_ENDE_TEST_DATASETS = [ - [ - "http://data.statmt.org/wmt17/translation-task/dev.tgz", - ("dev/newstest2013.en", "dev/newstest2013.de") - ], -] - - -def _get_wmt_ende_bpe_dataset(directory, filename): - """Extract the WMT en-de corpus `filename` to directory unless it's there.""" - train_path = os.path.join(directory, filename) - if not (tf.gfile.Exists(train_path + ".de") and - tf.gfile.Exists(train_path + ".en")): - url = ("https://drive.google.com/uc?export=download&id=" - "0B_bZck-ksdkpM25jRUN2X2UxMm8") - corpus_file = generator_utils.maybe_download_from_drive( - directory, "wmt16_en_de.tar.gz", url) - with tarfile.open(corpus_file, "r:gz") as corpus_tar: - corpus_tar.extractall(directory) - return train_path - - -@registry.register_problem -class TranslateEndeWmtBpe32k(translate.TranslateProblem): - """Problem spec for WMT En-De translation, BPE version.""" - - @property - def targeted_vocab_size(self): - return 32000 - - @property - def vocab_name(self): - return "vocab.bpe" - - def feature_encoders(self, data_dir): - vocab_filename = os.path.join(data_dir, self.vocab_file) - encoder = text_encoder.TokenTextEncoder(vocab_filename, replace_oov="UNK") - return {"inputs": encoder, "targets": encoder} - - def generator(self, data_dir, tmp_dir, train): - """Instance of token generator for the WMT en->de task, training set.""" - dataset_path = ("train.tok.clean.bpe.32000" - if train else "newstest2013.tok.bpe.32000") - train_path = _get_wmt_ende_bpe_dataset(tmp_dir, dataset_path) - token_tmp_path = os.path.join(tmp_dir, self.vocab_file) - token_path = os.path.join(data_dir, self.vocab_file) - tf.gfile.Copy(token_tmp_path, token_path, overwrite=True) - with tf.gfile.GFile(token_path, mode="a") as f: - f.write("UNK\n") # Add UNK to the vocab. - token_vocab = text_encoder.TokenTextEncoder(token_path, replace_oov="UNK") - return translate.token_generator(train_path + ".en", train_path + ".de", token_vocab, - EOS) - - @property - def input_space_id(self): - return problem.SpaceID.EN_BPE_TOK - - @property - def target_space_id(self): - return problem.SpaceID.DE_BPE_TOK - - - -@registry.register_problem -class TranslateEndeWmt8k(translate.TranslateProblem): - """Problem spec for WMT En-De translation.""" - - @property - def targeted_vocab_size(self): - return 2**13 # 8192 - - @property - def vocab_name(self): - return "vocab.ende" - - def generator(self, data_dir, tmp_dir, train): - symbolizer_vocab = generator_utils.get_or_generate_vocab( - data_dir, tmp_dir, self.vocab_file, self.targeted_vocab_size, _ENDE_TRAIN_DATASETS) - datasets = _ENDE_TRAIN_DATASETS if train else _ENDE_TEST_DATASETS - tag = "train" if train else "dev" - data_path = translate._compile_data(tmp_dir, datasets, "wmt_ende_tok_%s" % tag) - return translate.token_generator(data_path + ".lang1", data_path + ".lang2", - symbolizer_vocab, EOS) - - @property - def input_space_id(self): - return problem.SpaceID.EN_TOK - - @property - def target_space_id(self): - return problem.SpaceID.DE_TOK - - -@registry.register_problem -class TranslateEndeWmt32k(TranslateEndeWmt8k): - - @property - def targeted_vocab_size(self): - return 2**15 # 32768 - - -@registry.register_problem -class TranslateEndeWmtCharacters(translate.TranslateProblem): - """Problem spec for WMT En-De translation.""" - - @property - def is_character_level(self): - return True - - @property - def vocab_name(self): - return "vocab.ende" - - def generator(self, _, tmp_dir, train): - character_vocab = text_encoder.ByteTextEncoder() - datasets = _ENDE_TRAIN_DATASETS if train else _ENDE_TEST_DATASETS - tag = "train" if train else "dev" - data_path = translate._compile_data(tmp_dir, datasets, "wmt_ende_chr_%s" % tag) - return translate.character_generator(data_path + ".lang1", data_path + ".lang2", - character_vocab, EOS) - - @property - def input_space_id(self): - return problem.SpaceID.EN_CHR - - @property - def target_space_id(self): - return problem.SpaceID.DE_CHR - diff --git a/tensor2tensor/data_generators/translate_enfr.py b/tensor2tensor/data_generators/translate_enfr.py deleted file mode 100644 index 01e4e8f82..000000000 --- a/tensor2tensor/data_generators/translate_enfr.py +++ /dev/null @@ -1,146 +0,0 @@ -# coding=utf-8 -# Copyright 2017 The Tensor2Tensor Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Data generators for translation data-sets.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -import tarfile - -# Dependency imports - -from tensor2tensor.data_generators import generator_utils -from tensor2tensor.data_generators import translate -from tensor2tensor.data_generators import problem -from tensor2tensor.data_generators import text_encoder -from tensor2tensor.data_generators import wsj_parsing -from tensor2tensor.utils import registry - -import tensorflow as tf - -FLAGS = tf.flags.FLAGS - -# End-of-sentence marker. -EOS = text_encoder.EOS_ID - -_ENFR_TRAIN_DATASETS = [ - [ - "https://s3.amazonaws.com/opennmt-trainingdata/baseline-1M-enfr.tgz", - ("baseline-1M-enfr/baseline-1M_train.en", "baseline-1M-enfr/baseline-1M_train.fr") - ], -# [ -# "http://www.statmt.org/wmt13/training-parallel-commoncrawl.tgz", -# ("commoncrawl.fr-en.en", "commoncrawl.fr-en.fr") -# ], -# [ -# "http://www.statmt.org/wmt13/training-parallel-europarl-v7.tgz", -# ("training/europarl-v7.fr-en.en", "training/europarl-v7.fr-en.fr") -# ], -# [ -# "http://www.statmt.org/wmt14/training-parallel-nc-v9.tgz", -# ("training/news-commentary-v9.fr-en.en", -# "training/news-commentary-v9.fr-en.fr") -# ], -# [ -# "http://www.statmt.org/wmt10/training-giga-fren.tar", -# ("giga-fren.release2.fixed.en.gz", "giga-fren.release2.fixed.fr.gz") -# ], -# [ -# "http://www.statmt.org/wmt13/training-parallel-un.tgz", -# ("un/undoc.2000.fr-en.en", "un/undoc.2000.fr-en.fr") -# ], -] -_ENFR_TEST_DATASETS = [ - [ - "https://s3.amazonaws.com/opennmt-trainingdata/baseline-1M-enfr.tgz", - ("baseline-1M-enfr/baseline-1M_valid.en", "baseline-1M-enfr/baseline-1M_valid.fr") - ], -# [ -# "http://data.statmt.org/wmt17/translation-task/dev.tgz", -# ("dev/newstest2013.en", "dev/newstest2013.fr") -# ], -] - -@registry.register_problem -class TranslateEnfrWmt8k(translate.TranslateProblem): - """Problem spec for WMT En-Fr translation.""" - - @property - def targeted_vocab_size(self): - return 2**13 # 8192 - - @property - def vocab_name(self): - return "vocab.enfr" - - def generator(self, data_dir, tmp_dir, train): - symbolizer_vocab = generator_utils.get_or_generate_vocab( - data_dir, tmp_dir, self.vocab_file, self.targeted_vocab_size, _ENFR_TRAIN_DATASETS) - datasets = _ENFR_TRAIN_DATASETS if train else _ENFR_TEST_DATASETS - tag = "train" if train else "dev" - data_path = translate._compile_data(tmp_dir, datasets, "wmt_enfr_tok_%s" % tag) - return translate.token_generator(data_path + ".lang1", data_path + ".lang2", - symbolizer_vocab, EOS) - - @property - def input_space_id(self): - return problem.SpaceID.EN_TOK - - @property - def target_space_id(self): - return problem.SpaceID.FR_TOK - - -@registry.register_problem -class TranslateEnfrWmt32k(TranslateEnfrWmt8k): - - @property - def targeted_vocab_size(self): - return 2**15 # 32768 - - -@registry.register_problem -class TranslateEnfrWmtCharacters(translate.TranslateProblem): - """Problem spec for WMT En-Fr translation.""" - - @property - def is_character_level(self): - return True - - @property - def vocab_name(self): - return "vocab.enfr" - - def generator(self, data_dir, tmp_dir, train): - character_vocab = text_encoder.ByteTextEncoder() - datasets = _ENFR_TRAIN_DATASETS if train else _ENFR_TEST_DATASETS - tag = "train" if train else "dev" - data_path = translate._compile_data(tmp_dir, datasets, "wmt_enfr_chr_%s" % tag) - return translate.character_generator(data_path + ".lang1", data_path + ".lang2", - character_vocab, EOS) - - @property - def input_space_id(self): - return problem.SpaceID.EN_CHR - - @property - def target_space_id(self): - return problem.SpaceID.FR_CHR - - - diff --git a/tensor2tensor/data_generators/translate_enmk.py b/tensor2tensor/data_generators/translate_enmk.py deleted file mode 100644 index f6c934121..000000000 --- a/tensor2tensor/data_generators/translate_enmk.py +++ /dev/null @@ -1,91 +0,0 @@ -# coding=utf-8 -# Copyright 2017 The Tensor2Tensor Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Data generators for translation data-sets.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -import tarfile - -# Dependency imports - -from tensor2tensor.data_generators import generator_utils -from tensor2tensor.data_generators import translate -from tensor2tensor.data_generators import problem -from tensor2tensor.data_generators import text_encoder -from tensor2tensor.data_generators import wsj_parsing -from tensor2tensor.utils import registry - -import tensorflow as tf - -FLAGS = tf.flags.FLAGS - -# End-of-sentence marker. -EOS = text_encoder.EOS_ID - -# For Macedonian-English the SETimes corpus -# from http://nlp.ffzg.hr/resources/corpora/setimes/ is used. -# The original dataset has 207,777 parallel sentences. -# For training the first 205,777 sentences are used. -_MKEN_TRAIN_DATASETS = [[ - "https://github.com/stefan-it/nmt-mk-en/raw/master/data/setimes.mk-en.train.tgz", # pylint: disable=line-too-long - ("train.mk", "train.en") -]] - -# For development 1000 parallel sentences are used. -_MKEN_TEST_DATASETS = [[ - "https://github.com/stefan-it/nmt-mk-en/raw/master/data/setimes.mk-en.dev.tgz", # pylint: disable=line-too-long - ("dev.mk", "dev.en") -]] - -@registry.register_problem -class TranslateEnmkSetimes32k(translate.TranslateProblem): - """Problem spec for SETimes Mk-En translation.""" - - @property - def targeted_vocab_size(self): - return 2**15 # 32768 - - @property - def vocab_name(self): - return "vocab.mken" - - def generator(self, data_dir, tmp_dir, train): - datasets = _MKEN_TRAIN_DATASETS if train else _MKEN_TEST_DATASETS - source_datasets = [[item[0], [item[1][0]]] for item in datasets] - target_datasets = [[item[0], [item[1][1]]] for item in datasets] - symbolizer_vocab = generator_utils.get_or_generate_vocab( - data_dir, tmp_dir, self.vocab_file, self.targeted_vocab_size, - source_datasets + target_datasets) - tag = "train" if train else "dev" - data_path = translate._compile_data(tmp_dir, datasets, "setimes_mken_tok_%s" % tag) - # We generate English->X data by convention, to train reverse translation - # just add the "_rev" suffix to the problem name, e.g., like this. - # --problems=translate_enmk_setimes32k_rev - return translate.token_generator(data_path + ".lang2", data_path + ".lang1", - symbolizer_vocab, EOS) - - @property - def input_space_id(self): - return problem.SpaceID.MK_TOK - - @property - def target_space_id(self): - return problem.SpaceID.EN_TOK - - diff --git a/tensor2tensor/data_generators/translate_enzh.py b/tensor2tensor/data_generators/translate_enzh.py deleted file mode 100644 index d1b7f7c20..000000000 --- a/tensor2tensor/data_generators/translate_enzh.py +++ /dev/null @@ -1,107 +0,0 @@ -# coding=utf-8 -# Copyright 2017 The Tensor2Tensor Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Data generators for translation data-sets.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -import tarfile - -# Dependency imports - -from tensor2tensor.data_generators import generator_utils -from tensor2tensor.data_generators import translate -from tensor2tensor.data_generators import problem -from tensor2tensor.data_generators import text_encoder -from tensor2tensor.data_generators import wsj_parsing -from tensor2tensor.utils import registry - -import tensorflow as tf - -FLAGS = tf.flags.FLAGS - -# End-of-sentence marker. -EOS = text_encoder.EOS_ID - -_ZHEN_TRAIN_DATASETS = [[("http://data.statmt.org/wmt17/translation-task/" - "training-parallel-nc-v12.tgz"), - ("training/news-commentary-v12.zh-en.zh", - "training/news-commentary-v12.zh-en.en")]] - -_ZHEN_TEST_DATASETS = [[ - "http://data.statmt.org/wmt17/translation-task/dev.tgz", - ("dev/newsdev2017-zhen-src.zh.sgm", "dev/newsdev2017-zhen-ref.en.sgm") -]] - -@registry.register_problem -class TranslateEnzhWmt8k(translate.TranslateProblem): - """Problem spec for WMT Zh-En translation.""" - - @property - def targeted_vocab_size(self): - return 2**13 # 8192 - - @property - def num_shards(self): - return 10 # This is a small dataset. - - @property - def source_vocab_name(self): - return "vocab.zhen-zh.%d" % self.targeted_vocab_size - - @property - def target_vocab_name(self): - return "vocab.zhen-en.%d" % self.targeted_vocab_size - - def generator(self, data_dir, tmp_dir, train): - datasets = _ZHEN_TRAIN_DATASETS if train else _ZHEN_TEST_DATASETS - source_datasets = [[item[0], [item[1][0]]] for item in _ZHEN_TRAIN_DATASETS] - target_datasets = [[item[0], [item[1][1]]] for item in _ZHEN_TRAIN_DATASETS] - source_vocab = generator_utils.get_or_generate_vocab( - data_dir, tmp_dir, self.source_vocab_name, self.targeted_vocab_size, - source_datasets) - target_vocab = generator_utils.get_or_generate_vocab( - data_dir, tmp_dir, self.target_vocab_name, self.targeted_vocab_size, - target_datasets) - tag = "train" if train else "dev" - data_path = translate._compile_data(tmp_dir, datasets, "wmt_zhen_tok_%s" % tag) - # We generate English->X data by convention, to train reverse translation - # just add the "_rev" suffix to the problem name, e.g., like this. - # --problems=translate_enzh_wmt8k_rev - return translate.bi_vocabs_token_generator(data_path + ".lang2", data_path + ".lang1", - source_vocab, target_vocab, EOS) - - @property - def input_space_id(self): - return problem.SpaceID.ZH_TOK - - @property - def target_space_id(self): - return problem.SpaceID.EN_TOK - - def feature_encoders(self, data_dir): - source_vocab_filename = os.path.join(data_dir, self.source_vocab_name) - target_vocab_filename = os.path.join(data_dir, self.target_vocab_name) - source_token = text_encoder.SubwordTextEncoder(source_vocab_filename) - target_token = text_encoder.SubwordTextEncoder(target_vocab_filename) - return { - "inputs": source_token, - "targets": target_token, - } - - diff --git a/tensor2tensor/data_generators/wmt.py b/tensor2tensor/data_generators/wmt.py new file mode 100644 index 000000000..61716d012 --- /dev/null +++ b/tensor2tensor/data_generators/wmt.py @@ -0,0 +1,718 @@ +# coding=utf-8 +# Copyright 2017 The Tensor2Tensor Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Data generators for translation data-sets.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +import tarfile + +# Dependency imports + +from tensor2tensor.data_generators import generator_utils +from tensor2tensor.data_generators import problem +from tensor2tensor.data_generators import text_encoder +from tensor2tensor.data_generators import wsj_parsing +from tensor2tensor.utils import registry + +import tensorflow as tf + +FLAGS = tf.flags.FLAGS + +# End-of-sentence marker. +EOS = text_encoder.EOS_ID + + +class TranslateProblem(problem.Text2TextProblem): + """Base class for translation problems.""" + + @property + def is_character_level(self): + return False + + @property + def num_shards(self): + return 100 + + @property + def vocab_name(self): + return "vocab.endefr" + + @property + def use_subword_tokenizer(self): + return True + + +# Generic generators used later for multiple problems. + + +def character_generator(source_path, target_path, character_vocab, eos=None): + """Generator for sequence-to-sequence tasks that just uses characters. + + This generator assumes the files at source_path and target_path have + the same number of lines and yields dictionaries of "inputs" and "targets" + where inputs are characters from the source lines converted to integers, + and targets are characters from the target lines, also converted to integers. + + Args: + source_path: path to the file with source sentences. + target_path: path to the file with target sentences. + character_vocab: a TextEncoder to encode the characters. + eos: integer to append at the end of each sequence (default: None). + + Yields: + A dictionary {"inputs": source-line, "targets": target-line} where + the lines are integer lists converted from characters in the file lines. + """ + eos_list = [] if eos is None else [eos] + with tf.gfile.GFile(source_path, mode="r") as source_file: + with tf.gfile.GFile(target_path, mode="r") as target_file: + source, target = source_file.readline(), target_file.readline() + while source and target: + source_ints = character_vocab.encode(source.strip()) + eos_list + target_ints = character_vocab.encode(target.strip()) + eos_list + yield {"inputs": source_ints, "targets": target_ints} + source, target = source_file.readline(), target_file.readline() + + +def tabbed_generator(source_path, source_vocab, target_vocab, eos=None): + r"""Generator for sequence-to-sequence tasks using tabbed files. + + Tokens are derived from text files where each line contains both + a source and a target string. The two strings are separated by a tab + character ('\t'). It yields dictionaries of "inputs" and "targets" where + inputs are characters from the source lines converted to integers, and + targets are characters from the target lines, also converted to integers. + + Args: + source_path: path to the file with source and target sentences. + source_vocab: a SubwordTextEncoder to encode the source string. + target_vocab: a SubwordTextEncoder to encode the target string. + eos: integer to append at the end of each sequence (default: None). + + Yields: + A dictionary {"inputs": source-line, "targets": target-line} where + the lines are integer lists converted from characters in the file lines. + """ + eos_list = [] if eos is None else [eos] + with tf.gfile.GFile(source_path, mode="r") as source_file: + for line in source_file: + if line and "\t" in line: + parts = line.split("\t", 1) + source, target = parts[0].strip(), parts[1].strip() + source_ints = source_vocab.encode(source) + eos_list + target_ints = target_vocab.encode(target) + eos_list + yield {"inputs": source_ints, "targets": target_ints} + + +def token_generator(source_path, target_path, token_vocab, eos=None): + """Generator for sequence-to-sequence tasks that uses tokens. + + This generator assumes the files at source_path and target_path have + the same number of lines and yields dictionaries of "inputs" and "targets" + where inputs are token ids from the " "-split source (and target, resp.) lines + converted to integers using the token_map. + + Args: + source_path: path to the file with source sentences. + target_path: path to the file with target sentences. + token_vocab: text_encoder.TextEncoder object. + eos: integer to append at the end of each sequence (default: None). + + Yields: + A dictionary {"inputs": source-line, "targets": target-line} where + the lines are integer lists converted from tokens in the file lines. + """ + eos_list = [] if eos is None else [eos] + with tf.gfile.GFile(source_path, mode="r") as source_file: + with tf.gfile.GFile(target_path, mode="r") as target_file: + source, target = source_file.readline(), target_file.readline() + while source and target: + source_ints = token_vocab.encode(source.strip()) + eos_list + target_ints = token_vocab.encode(target.strip()) + eos_list + yield {"inputs": source_ints, "targets": target_ints} + source, target = source_file.readline(), target_file.readline() + + +def bi_vocabs_token_generator(source_path, + target_path, + source_token_vocab, + target_token_vocab, + eos=None): + """Generator for sequence-to-sequence tasks that uses tokens. + + This generator assumes the files at source_path and target_path have + the same number of lines and yields dictionaries of "inputs" and "targets" + where inputs are token ids from the " "-split source (and target, resp.) lines + converted to integers using the token_map. + + Args: + source_path: path to the file with source sentences. + target_path: path to the file with target sentences. + source_token_vocab: text_encoder.TextEncoder object. + target_token_vocab: text_encoder.TextEncoder object. + eos: integer to append at the end of each sequence (default: None). + + Yields: + A dictionary {"inputs": source-line, "targets": target-line} where + the lines are integer lists converted from tokens in the file lines. + """ + eos_list = [] if eos is None else [eos] + with tf.gfile.GFile(source_path, mode="r") as source_file: + with tf.gfile.GFile(target_path, mode="r") as target_file: + source, target = source_file.readline(), target_file.readline() + while source and target: + source_ints = source_token_vocab.encode(source.strip()) + eos_list + target_ints = target_token_vocab.encode(target.strip()) + eos_list + yield {"inputs": source_ints, "targets": target_ints} + source, target = source_file.readline(), target_file.readline() + + +# Data-set URLs. + +_ENDE_TRAIN_DATASETS = [ + [ + "http://data.statmt.org/wmt17/translation-task/training-parallel-nc-v12.tgz", # pylint: disable=line-too-long + ("training/news-commentary-v12.de-en.en", + "training/news-commentary-v12.de-en.de") + ], + [ + "http://www.statmt.org/wmt13/training-parallel-commoncrawl.tgz", + ("commoncrawl.de-en.en", "commoncrawl.de-en.de") + ], + [ + "http://www.statmt.org/wmt13/training-parallel-europarl-v7.tgz", + ("training/europarl-v7.de-en.en", "training/europarl-v7.de-en.de") + ], +] +_ENDE_TEST_DATASETS = [ + [ + "http://data.statmt.org/wmt17/translation-task/dev.tgz", + ("dev/newstest2013.en", "dev/newstest2013.de") + ], +] + +_ENFR_TRAIN_DATASETS = [ + [ + "http://www.statmt.org/wmt13/training-parallel-commoncrawl.tgz", + ("commoncrawl.fr-en.en", "commoncrawl.fr-en.fr") + ], + [ + "http://www.statmt.org/wmt13/training-parallel-europarl-v7.tgz", + ("training/europarl-v7.fr-en.en", "training/europarl-v7.fr-en.fr") + ], + [ + "http://www.statmt.org/wmt14/training-parallel-nc-v9.tgz", + ("training/news-commentary-v9.fr-en.en", + "training/news-commentary-v9.fr-en.fr") + ], + [ + "http://www.statmt.org/wmt10/training-giga-fren.tar", + ("giga-fren.release2.fixed.en.gz", "giga-fren.release2.fixed.fr.gz") + ], + [ + "http://www.statmt.org/wmt13/training-parallel-un.tgz", + ("un/undoc.2000.fr-en.en", "un/undoc.2000.fr-en.fr") + ], +] +_ENFR_TEST_DATASETS = [ + [ + "http://data.statmt.org/wmt17/translation-task/dev.tgz", + ("dev/newstest2013.en", "dev/newstest2013.fr") + ], +] + +_ZHEN_TRAIN_DATASETS = [[("http://data.statmt.org/wmt17/translation-task/" + "training-parallel-nc-v12.tgz"), + ("training/news-commentary-v12.zh-en.zh", + "training/news-commentary-v12.zh-en.en")]] + +_ZHEN_TEST_DATASETS = [[ + "http://data.statmt.org/wmt17/translation-task/dev.tgz", + ("dev/newsdev2017-zhen-src.zh.sgm", "dev/newsdev2017-zhen-ref.en.sgm") +]] + +# For Macedonian-English the SETimes corpus +# from http://nlp.ffzg.hr/resources/corpora/setimes/ is used. +# The original dataset has 207,777 parallel sentences. +# For training the first 205,777 sentences are used. +_MKEN_TRAIN_DATASETS = [[ + "https://github.com/stefan-it/nmt-mk-en/raw/master/data/setimes.mk-en.train.tgz", # pylint: disable=line-too-long + ("train.mk", "train.en") +]] + +# For development 1000 parallel sentences are used. +_MKEN_TEST_DATASETS = [[ + "https://github.com/stefan-it/nmt-mk-en/raw/master/data/setimes.mk-en.dev.tgz", # pylint: disable=line-too-long + ("dev.mk", "dev.en") +]] + +# English-Czech datasets +_ENCS_TRAIN_DATASETS = [ + [ + ("https://lindat.mff.cuni.cz/repository/xmlui/bitstream/handle/" + "11234/1-1458/data-plaintext-format.tar"), + ("tsv", 3, 2, "data.plaintext-format/*train.gz") + ], + [ + "http://data.statmt.org/wmt17/translation-task/training-parallel-nc-v12.tgz", # pylint: disable=line-too-long + ("training/news-commentary-v12.cs-en.en", + "training/news-commentary-v12.cs-en.cs") + ], + [ + "http://www.statmt.org/wmt13/training-parallel-commoncrawl.tgz", + ("commoncrawl.cs-en.en", "commoncrawl.cs-en.cs") + ], + [ + "http://www.statmt.org/wmt13/training-parallel-europarl-v7.tgz", + ("training/europarl-v7.cs-en.en", "training/europarl-v7.cs-en.cs") + ], +] +_ENCS_TEST_DATASETS = [ + [ + "http://data.statmt.org/wmt17/translation-task/dev.tgz", + ("dev/newstest2013.en", "dev/newstest2013.cs") + ], +] + +# Generators. + + +def _get_wmt_ende_bpe_dataset(directory, filename): + """Extract the WMT en-de corpus `filename` to directory unless it's there.""" + train_path = os.path.join(directory, filename) + if not (tf.gfile.Exists(train_path + ".de") and + tf.gfile.Exists(train_path + ".en")): + url = ("https://drive.google.com/uc?export=download&id=" + "0B_bZck-ksdkpM25jRUN2X2UxMm8") + corpus_file = generator_utils.maybe_download_from_drive( + directory, "wmt16_en_de.tar.gz", url) + with tarfile.open(corpus_file, "r:gz") as corpus_tar: + corpus_tar.extractall(directory) + return train_path + + +@registry.register_problem +class TranslateEndeWmtBpe32k(TranslateProblem): + """Problem spec for WMT En-De translation, BPE version.""" + + @property + def targeted_vocab_size(self): + return 32000 + + @property + def vocab_name(self): + return "vocab.bpe" + + def feature_encoders(self, data_dir): + vocab_filename = os.path.join(data_dir, self.vocab_file) + encoder = text_encoder.TokenTextEncoder(vocab_filename, replace_oov="UNK") + return {"inputs": encoder, "targets": encoder} + + def generator(self, data_dir, tmp_dir, train): + """Instance of token generator for the WMT en->de task, training set.""" + dataset_path = ("train.tok.clean.bpe.32000" + if train else "newstest2013.tok.bpe.32000") + train_path = _get_wmt_ende_bpe_dataset(tmp_dir, dataset_path) + token_tmp_path = os.path.join(tmp_dir, self.vocab_file) + token_path = os.path.join(data_dir, self.vocab_file) + tf.gfile.Copy(token_tmp_path, token_path, overwrite=True) + with tf.gfile.GFile(token_path, mode="a") as f: + f.write("UNK\n") # Add UNK to the vocab. + token_vocab = text_encoder.TokenTextEncoder(token_path, replace_oov="UNK") + return token_generator(train_path + ".en", train_path + ".de", token_vocab, + EOS) + + @property + def input_space_id(self): + return problem.SpaceID.EN_BPE_TOK + + @property + def target_space_id(self): + return problem.SpaceID.DE_BPE_TOK + + +def _preprocess_sgm(line, is_sgm): + """Preprocessing to strip tags in SGM files.""" + if not is_sgm: + return line + # In SGM files, remove <srcset ...>, <p>, <doc ...> lines. + if line.startswith("<srcset") or line.startswith("</srcset"): + return "" + if line.startswith("<doc") or line.startswith("</doc"): + return "" + if line.startswith("<p>") or line.startswith("</p>"): + return "" + # Strip <seg> tags. + line = line.strip() + if line.startswith("<seg") and line.endswith("</seg>"): + i = line.index(">") + return line[i + 1:-6] # Strip first <seg ...> and last </seg>. + + +def _compile_data(tmp_dir, datasets, filename): + """Concatenate all `datasets` and save to `filename`.""" + filename = os.path.join(tmp_dir, filename) + with tf.gfile.GFile(filename + ".lang1", mode="w") as lang1_resfile: + with tf.gfile.GFile(filename + ".lang2", mode="w") as lang2_resfile: + for dataset in datasets: + url = dataset[0] + compressed_filename = os.path.basename(url) + compressed_filepath = os.path.join(tmp_dir, compressed_filename) + + generator_utils.maybe_download(tmp_dir, compressed_filename, url) + + if dataset[1][0] == "tsv": + _, src_column, trg_column, glob_pattern = dataset[1] + filenames = tf.gfile.Glob(os.path.join(tmp_dir, glob_pattern)) + if not filenames: + # Capture *.tgz and *.tar.gz too. + mode = "r:gz" if compressed_filepath.endswith("gz") else "r" + with tarfile.open(compressed_filepath, mode) as corpus_tar: + corpus_tar.extractall(tmp_dir) + filenames = tf.gfile.Glob(os.path.join(tmp_dir, glob_pattern)) + for tsv_filename in filenames: + if tsv_filename.endswith(".gz"): + new_filename = tsv_filename.strip(".gz") + generator_utils.gunzip_file(tsv_filename, new_filename) + tsv_filename = new_filename + with tf.gfile.GFile(tsv_filename, mode="r") as tsv_file: + for line in tsv_file: + if line and "\t" in line: + parts = line.split("\t") + source, target = parts[src_column], parts[trg_column] + lang1_resfile.write(source.strip() + "\n") + lang2_resfile.write(target.strip() + "\n") + else: + lang1_filename, lang2_filename = dataset[1] + lang1_filepath = os.path.join(tmp_dir, lang1_filename) + lang2_filepath = os.path.join(tmp_dir, lang2_filename) + is_sgm = (lang1_filename.endswith("sgm") and + lang2_filename.endswith("sgm")) + + if not (os.path.exists(lang1_filepath) and + os.path.exists(lang2_filepath)): + # For .tar.gz and .tgz files, we read compressed. + mode = "r:gz" if compressed_filepath.endswith("gz") else "r" + with tarfile.open(compressed_filepath, mode) as corpus_tar: + corpus_tar.extractall(tmp_dir) + if lang1_filepath.endswith(".gz"): + new_filepath = lang1_filepath.strip(".gz") + generator_utils.gunzip_file(lang1_filepath, new_filepath) + lang1_filepath = new_filepath + if lang2_filepath.endswith(".gz"): + new_filepath = lang2_filepath.strip(".gz") + generator_utils.gunzip_file(lang2_filepath, new_filepath) + lang2_filepath = new_filepath + with tf.gfile.GFile(lang1_filepath, mode="r") as lang1_file: + with tf.gfile.GFile(lang2_filepath, mode="r") as lang2_file: + line1, line2 = lang1_file.readline(), lang2_file.readline() + while line1 or line2: + line1res = _preprocess_sgm(line1, is_sgm) + line2res = _preprocess_sgm(line2, is_sgm) + if line1res or line2res: + lang1_resfile.write(line1res.strip() + "\n") + lang2_resfile.write(line2res.strip() + "\n") + line1, line2 = lang1_file.readline(), lang2_file.readline() + + return filename + + +@registry.register_problem +class TranslateEndeWmt8k(TranslateProblem): + """Problem spec for WMT En-De translation.""" + + @property + def targeted_vocab_size(self): + return 2**13 # 8192 + + def generator(self, data_dir, tmp_dir, train): + symbolizer_vocab = generator_utils.get_or_generate_vocab( + data_dir, tmp_dir, self.vocab_file, self.targeted_vocab_size) + datasets = _ENDE_TRAIN_DATASETS if train else _ENDE_TEST_DATASETS + tag = "train" if train else "dev" + data_path = _compile_data(tmp_dir, datasets, "wmt_ende_tok_%s" % tag) + return token_generator(data_path + ".lang1", data_path + ".lang2", + symbolizer_vocab, EOS) + + @property + def input_space_id(self): + return problem.SpaceID.EN_TOK + + @property + def target_space_id(self): + return problem.SpaceID.DE_TOK + + +@registry.register_problem +class TranslateEndeWmt32k(TranslateEndeWmt8k): + + @property + def targeted_vocab_size(self): + return 2**15 # 32768 + + +@registry.register_problem +class TranslateEndeWmtCharacters(TranslateProblem): + """Problem spec for WMT En-De translation.""" + + @property + def is_character_level(self): + return True + + def generator(self, _, tmp_dir, train): + character_vocab = text_encoder.ByteTextEncoder() + datasets = _ENDE_TRAIN_DATASETS if train else _ENDE_TEST_DATASETS + tag = "train" if train else "dev" + data_path = _compile_data(tmp_dir, datasets, "wmt_ende_chr_%s" % tag) + return character_generator(data_path + ".lang1", data_path + ".lang2", + character_vocab, EOS) + + @property + def input_space_id(self): + return problem.SpaceID.EN_CHR + + @property + def target_space_id(self): + return problem.SpaceID.DE_CHR + + +@registry.register_problem +class TranslateEnzhWmt8k(TranslateProblem): + """Problem spec for WMT Zh-En translation.""" + + @property + def targeted_vocab_size(self): + return 2**13 # 8192 + + @property + def num_shards(self): + return 10 # This is a small dataset. + + @property + def source_vocab_name(self): + return "vocab.zhen-zh.%d" % self.targeted_vocab_size + + @property + def target_vocab_name(self): + return "vocab.zhen-en.%d" % self.targeted_vocab_size + + def generator(self, data_dir, tmp_dir, train): + datasets = _ZHEN_TRAIN_DATASETS if train else _ZHEN_TEST_DATASETS + source_datasets = [[item[0], [item[1][0]]] for item in _ZHEN_TRAIN_DATASETS] + target_datasets = [[item[0], [item[1][1]]] for item in _ZHEN_TRAIN_DATASETS] + source_vocab = generator_utils.get_or_generate_vocab( + data_dir, tmp_dir, self.source_vocab_name, self.targeted_vocab_size, + source_datasets) + target_vocab = generator_utils.get_or_generate_vocab( + data_dir, tmp_dir, self.target_vocab_name, self.targeted_vocab_size, + target_datasets) + tag = "train" if train else "dev" + data_path = _compile_data(tmp_dir, datasets, "wmt_zhen_tok_%s" % tag) + # We generate English->X data by convention, to train reverse translation + # just add the "_rev" suffix to the problem name, e.g., like this. + # --problems=translate_enzh_wmt8k_rev + return bi_vocabs_token_generator(data_path + ".lang2", data_path + ".lang1", + source_vocab, target_vocab, EOS) + + @property + def input_space_id(self): + return problem.SpaceID.ZH_TOK + + @property + def target_space_id(self): + return problem.SpaceID.EN_TOK + + def feature_encoders(self, data_dir): + source_vocab_filename = os.path.join(data_dir, self.source_vocab_name) + target_vocab_filename = os.path.join(data_dir, self.target_vocab_name) + source_token = text_encoder.SubwordTextEncoder(source_vocab_filename) + target_token = text_encoder.SubwordTextEncoder(target_vocab_filename) + return { + "inputs": source_token, + "targets": target_token, + } + + +@registry.register_problem +class TranslateEnfrWmt8k(TranslateProblem): + """Problem spec for WMT En-Fr translation.""" + + @property + def targeted_vocab_size(self): + return 2**13 # 8192 + + def generator(self, data_dir, tmp_dir, train): + symbolizer_vocab = generator_utils.get_or_generate_vocab( + data_dir, tmp_dir, self.vocab_file, self.targeted_vocab_size) + datasets = _ENFR_TRAIN_DATASETS if train else _ENFR_TEST_DATASETS + tag = "train" if train else "dev" + data_path = _compile_data(tmp_dir, datasets, "wmt_enfr_tok_%s" % tag) + return token_generator(data_path + ".lang1", data_path + ".lang2", + symbolizer_vocab, EOS) + + @property + def input_space_id(self): + return problem.SpaceID.EN_TOK + + @property + def target_space_id(self): + return problem.SpaceID.FR_TOK + + +@registry.register_problem +class TranslateEnfrWmt32k(TranslateEnfrWmt8k): + + @property + def targeted_vocab_size(self): + return 2**15 # 32768 + + +@registry.register_problem +class TranslateEnfrWmtCharacters(TranslateProblem): + """Problem spec for WMT En-Fr translation.""" + + @property + def is_character_level(self): + return True + + def generator(self, data_dir, tmp_dir, train): + character_vocab = text_encoder.ByteTextEncoder() + datasets = _ENFR_TRAIN_DATASETS if train else _ENFR_TEST_DATASETS + tag = "train" if train else "dev" + data_path = _compile_data(tmp_dir, datasets, "wmt_enfr_chr_%s" % tag) + return character_generator(data_path + ".lang1", data_path + ".lang2", + character_vocab, EOS) + + @property + def input_space_id(self): + return problem.SpaceID.EN_CHR + + @property + def target_space_id(self): + return problem.SpaceID.FR_CHR + + +@registry.register_problem +class TranslateEnmkSetimes32k(TranslateProblem): + """Problem spec for SETimes Mk-En translation.""" + + @property + def targeted_vocab_size(self): + return 2**15 # 32768 + + @property + def vocab_name(self): + return "vocab.mken" + + def generator(self, data_dir, tmp_dir, train): + datasets = _MKEN_TRAIN_DATASETS if train else _MKEN_TEST_DATASETS + source_datasets = [[item[0], [item[1][0]]] for item in datasets] + target_datasets = [[item[0], [item[1][1]]] for item in datasets] + symbolizer_vocab = generator_utils.get_or_generate_vocab( + data_dir, tmp_dir, self.vocab_file, self.targeted_vocab_size, + source_datasets + target_datasets) + tag = "train" if train else "dev" + data_path = _compile_data(tmp_dir, datasets, "setimes_mken_tok_%s" % tag) + # We generate English->X data by convention, to train reverse translation + # just add the "_rev" suffix to the problem name, e.g., like this. + # --problems=translate_enmk_setimes32k_rev + return token_generator(data_path + ".lang2", data_path + ".lang1", + symbolizer_vocab, EOS) + + @property + def input_space_id(self): + return problem.SpaceID.MK_TOK + + @property + def target_space_id(self): + return problem.SpaceID.EN_TOK + + +@registry.register_problem +class TranslateEncsWmt32k(TranslateProblem): + """Problem spec for WMT English-Czech translation.""" + + @property + def targeted_vocab_size(self): + return 2**15 # 32768 + + @property + def vocab_name(self): + return "vocab.encs" + + def generator(self, data_dir, tmp_dir, train): + datasets = _ENCS_TRAIN_DATASETS if train else _ENCS_TEST_DATASETS + tag = "train" if train else "dev" + vocab_datasets = [] + data_path = _compile_data(tmp_dir, datasets, "wmt_encs_tok_%s" % tag) + # CzEng contains 100 gz files with tab-separated columns, so let's expect + # it is the first dataset in datasets and use the newly created *.lang{1,2} + # files for vocab construction. + if datasets[0][0].endswith("data-plaintext-format.tar"): + vocab_datasets.append([datasets[0][0], ["wmt_encs_tok_%s.lang1" % tag, + "wmt_encs_tok_%s.lang2" % tag]]) + datasets = datasets[1:] + vocab_datasets += [[item[0], [item[1][0], item[1][1]]] for item in datasets] + symbolizer_vocab = generator_utils.get_or_generate_vocab( + data_dir, tmp_dir, self.vocab_file, self.targeted_vocab_size, + vocab_datasets) + return token_generator(data_path + ".lang1", data_path + ".lang2", + symbolizer_vocab, EOS) + + @property + def input_space_id(self): + return problem.SpaceID.EN_TOK + + @property + def target_space_id(self): + return problem.SpaceID.CS_TOK + + +@registry.register_problem +class TranslateEncsWmtCharacters(TranslateProblem): + """Problem spec for WMT En-Cs character-based translation.""" + + @property + def is_character_level(self): + return True + + def generator(self, data_dir, tmp_dir, train): + character_vocab = text_encoder.ByteTextEncoder() + datasets = _ENCS_TRAIN_DATASETS if train else _ENCS_TEST_DATASETS + tag = "train" if train else "dev" + data_path = _compile_data(tmp_dir, datasets, "wmt_encs_chr_%s" % tag) + return character_generator(data_path + ".lang1", data_path + ".lang2", + character_vocab, EOS) + + @property + def input_space_id(self): + return problem.SpaceID.EN_CHR + + @property + def target_space_id(self): + return problem.SpaceID.CS_CHR + + +def parsing_token_generator(data_dir, tmp_dir, train, vocab_size): + symbolizer_vocab = generator_utils.get_or_generate_vocab( + data_dir, tmp_dir, "vocab.endefr.%d" % vocab_size, vocab_size) + filename = "%s_%s.trees" % (FLAGS.parsing_path, "train" if train else "dev") + tree_filepath = os.path.join(tmp_dir, filename) + return wsj_parsing.token_generator(tree_filepath, symbolizer_vocab, + symbolizer_vocab, EOS) diff --git a/tensor2tensor/data_generators/translate_test.py b/tensor2tensor/data_generators/wmt_test.py similarity index 96% rename from tensor2tensor/data_generators/translate_test.py rename to tensor2tensor/data_generators/wmt_test.py index f082c1a85..441ceef59 100644 --- a/tensor2tensor/data_generators/translate_test.py +++ b/tensor2tensor/data_generators/wmt_test.py @@ -27,7 +27,7 @@ import six from tensor2tensor.data_generators import text_encoder -from tensor2tensor.data_generators import translate +from tensor2tensor.data_generators import wmt import tensorflow as tf @@ -52,7 +52,7 @@ def testCharacterGenerator(self): # Call character generator on the generated files. results_src, results_tgt = [], [] character_vocab = text_encoder.ByteTextEncoder() - for dictionary in translate.character_generator( + for dictionary in wmt.character_generator( tmp_file_path + ".src", tmp_file_path + ".tgt", character_vocab): self.assertEqual(sorted(list(dictionary)), ["inputs", "targets"]) results_src.append(dictionary["inputs"]) From 3f1a3f04f05cc1f20ce791352040f243c6739ffc Mon Sep 17 00:00:00 2001 From: Noam Shazeer <noam@google.com> Date: Wed, 18 Oct 2017 12:18:05 -0700 Subject: [PATCH 0509/4095] Added a cache to improve performance of SubwordTextEncoder. PiperOrigin-RevId: 172637289 --- tensor2tensor/data_generators/text_encoder.py | 24 ++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/tensor2tensor/data_generators/text_encoder.py b/tensor2tensor/data_generators/text_encoder.py index 64eef14fe..8982c3aab 100644 --- a/tensor2tensor/data_generators/text_encoder.py +++ b/tensor2tensor/data_generators/text_encoder.py @@ -464,9 +464,24 @@ def _tokens_to_subtoken_ids(self, tokens): """ ret = [] for token in tokens: - ret.extend( - self._escaped_token_to_subtoken_ids( - _escape_token(token, self._alphabet))) + ret.extend(self._token_to_subtoken_ids(token)) + return ret + + def _token_to_subtoken_ids(self, token): + """Converts token to a list of subtoken ids. + + Args: + token: a string. + Returns: + a list of integers in the range [0, vocab_size) + """ + cache_location = hash(token) % self._cache_size + cache_key, cache_value = self._cache[cache_location] + if cache_key == token: + return cache_value + ret = self._escaped_token_to_subtoken_ids( + _escape_token(token, self._alphabet)) + self._cache[cache_location] = (token, ret) return ret def _subtoken_ids_to_tokens(self, subtokens): @@ -717,6 +732,9 @@ def _init_subtokens_from_list(self, subtoken_strings, reserved=0): s: i + reserved for i, s in enumerate(subtoken_strings) if s } + # Initialize the cache to empty. + self._cache_size = 2 ** 20 + self._cache = [(None, None)] * self._cache_size def _init_alphabet_from_tokens(self, tokens): """Initialize alphabet from an iterable of token or subtoken strings.""" From 68023da36b75f1a117e4b88233d6b981e26dd8fb Mon Sep 17 00:00:00 2001 From: T2T Team <no-reply@google.com> Date: Wed, 18 Oct 2017 13:27:08 -0700 Subject: [PATCH 0510/4095] A MultiNLI classification problem for Tensor2tensor. PiperOrigin-RevId: 172646522 --- tensor2tensor/data_generators/all_problems.py | 1 + tensor2tensor/data_generators/multinli.py | 178 ++++++++++++++++++ 2 files changed, 179 insertions(+) create mode 100644 tensor2tensor/data_generators/multinli.py diff --git a/tensor2tensor/data_generators/all_problems.py b/tensor2tensor/data_generators/all_problems.py index 5877b541e..97aaa7d1e 100644 --- a/tensor2tensor/data_generators/all_problems.py +++ b/tensor2tensor/data_generators/all_problems.py @@ -29,6 +29,7 @@ from tensor2tensor.data_generators import image from tensor2tensor.data_generators import imdb from tensor2tensor.data_generators import lm1b +from tensor2tensor.data_generators import multinli from tensor2tensor.data_generators import problem_hparams from tensor2tensor.data_generators import ptb from tensor2tensor.data_generators import snli diff --git a/tensor2tensor/data_generators/multinli.py b/tensor2tensor/data_generators/multinli.py new file mode 100644 index 000000000..acd3a2c58 --- /dev/null +++ b/tensor2tensor/data_generators/multinli.py @@ -0,0 +1,178 @@ +# coding=utf-8 +# Copyright 2017 The Tensor2Tensor Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Data generators for MultiNLI (https://www.nyu.edu/projects/bowman/multinli/). +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import json +import os +import zipfile + +# Dependency imports + +from tensor2tensor.data_generators import generator_utils +from tensor2tensor.data_generators import problem +from tensor2tensor.data_generators import text_encoder +from tensor2tensor.utils import metrics +from tensor2tensor.utils import registry + +import tensorflow as tf + +EOS = text_encoder.EOS_ID + + +class MultinliProblem(problem.Problem): + """Base class for MultiNLI classification problems.""" + + _ZIP = 'multinli_1.0.zip' + _URL = 'https://www.nyu.edu/projects/bowman/multinli/' + _ZIP + _LABEL_DICT = {'contradiction': 0, + 'entailment': 1, + 'neutral': 2} + _LABELS = {'contradiction', 'entailment', 'neutral'} + + @property + def num_shards(self): + return 10 + + @property + def vocab_file(self): + if self._matched: + return 'multinli_matched.vocab' + else: + return 'multinli_mismatched.vocab' + + @property + def targeted_vocab_size(self): + return 2**14 + + @property + def _matched(self): + raise NotImplementedError() + + @property + def _train_file(self): + return 'multinli_1.0/multinli_1.0_train.jsonl' + + @property + def _dev_file(self): + if self._matched: + return 'multinli_1.0/multinli_1.0_dev_matched.jsonl' + else: + return 'multinli_1.0/multinli_1.0_dev_mismatched.jsonl' + + def _examples(self, data_dir, tmp_dir, train): + file_path = generator_utils.maybe_download(tmp_dir, self._ZIP, self._URL) + zip_ref = zipfile.ZipFile(file_path, 'r') + zip_ref.extractall(tmp_dir) + zip_ref.close() + + data_file = self._train_file if train else self._dev_file + examples = [] + with tf.gfile.GFile(os.path.join(tmp_dir, data_file), mode='r') as f: + for line in f: + record = json.loads(line) + try: + label_str = record['gold_label'].encode('ascii') + if label_str != '-': + label = self._LABEL_DICT[label_str] + sentence1 = record['sentence1'].encode('ascii') + sentence2 = record['sentence2'].encode('ascii') + examples.append({'sentence1': sentence1, + 'sentence2': sentence2, + 'label': label}) + except UnicodeEncodeError: + pass + + return examples + + def _inputs_and_targets(self, encoder, examples): + for e in examples: + enc_s1 = encoder.encode(e['sentence1']) + enc_s2 = encoder.encode(e['sentence2']) + + yield { + 'inputs': enc_s1 + [EOS] + enc_s2 + [EOS], + 'targets': [e['label']] + } + + def generate_data(self, data_dir, tmp_dir, task_id=-1): + train_paths = self.training_filepaths( + data_dir, self.num_shards, shuffled=False) + dev_paths = self.dev_filepaths(data_dir, 1, shuffled=False) + + train_examples = self._examples(data_dir, tmp_dir, train=True) + dev_examples = self._examples(data_dir, tmp_dir, train=False) + + encoder = generator_utils.get_or_generate_vocab_inner( + data_dir, self.vocab_file, self.targeted_vocab_size, + (e['sentence1'] + ' ' + e['sentence2'] + for e in train_examples + dev_examples) + ) + + generator_utils.generate_dataset_and_shuffle( + self._inputs_and_targets(encoder, train_examples), train_paths, + self._inputs_and_targets(encoder, dev_examples), dev_paths) + + def hparams(self, defaults, unused_model_hparams): + p = defaults + source_vocab_size = self._encoders['inputs'].vocab_size + p.input_modality = { + 'inputs': (registry.Modalities.SYMBOL, source_vocab_size) + } + p.target_modality = (registry.Modalities.CLASS_LABEL, 3) + p.input_space_id = problem.SpaceID.EN_TOK + p.target_space_id = problem.SpaceID.GENERIC + + def feature_encoders(self, data_dir): + vocab_filename = os.path.join(data_dir, self.vocab_file) + encoder = text_encoder.SubwordTextEncoder(vocab_filename) + return { + 'inputs': encoder, + 'targets': text_encoder.ClassLabelEncoder(self._LABELS), + } + + def example_reading_spec(self): + data_fields = { + 'inputs': tf.VarLenFeature(tf.int64), + 'targets': tf.FixedLenFeature([1], tf.int64), + } + data_items_to_decoders = None + return (data_fields, data_items_to_decoders) + + def eval_metrics(self): + return [metrics.Metrics.ACC] + + +@registry.register_problem +class MultinliMatched(MultinliProblem): + """MultiNLI with matched dev set.""" + + @property + def _matched(self): + return True + + +@registry.register_problem +class MultinliMismatched(MultinliProblem): + """MultiNLI with mismatched dev set.""" + + @property + def _matched(self): + return False From 62aba9da091c25e958736b0398da0e7e99c978b8 Mon Sep 17 00:00:00 2001 From: Lukasz Kaiser <lukaszkaiser@google.com> Date: Wed, 18 Oct 2017 14:06:59 -0700 Subject: [PATCH 0511/4095] Add ImageEncoder that allows to operate on images (only saving for now). PiperOrigin-RevId: 172652081 --- tensor2tensor/data_generators/image.py | 4 +- tensor2tensor/data_generators/text_encoder.py | 70 +++++++++++++++++++ tensor2tensor/layers/modalities.py | 6 +- tensor2tensor/utils/decoding.py | 21 +++--- 4 files changed, 89 insertions(+), 12 deletions(-) diff --git a/tensor2tensor/data_generators/image.py b/tensor2tensor/data_generators/image.py index df497019a..e9ae45f01 100644 --- a/tensor2tensor/data_generators/image.py +++ b/tensor2tensor/data_generators/image.py @@ -227,7 +227,7 @@ def feature_encoders(self, data_dir): # This vocab file must be present within the data directory. vocab_filename = os.path.join(data_dir, "charset_size134.txt") return { - "inputs": text_encoder.TextEncoder(), + "inputs": text_encoder.ImageEncoder(), "targets": text_encoder.SubwordTextEncoder(vocab_filename) } @@ -273,7 +273,7 @@ def class_labels(self): def feature_encoders(self, data_dir): del data_dir return { - "inputs": text_encoder.TextEncoder(), + "inputs": text_encoder.ImageEncoder(), "targets": text_encoder.ClassLabelEncoder(self.class_labels) } diff --git a/tensor2tensor/data_generators/text_encoder.py b/tensor2tensor/data_generators/text_encoder.py index 8982c3aab..6c9607bf4 100644 --- a/tensor2tensor/data_generators/text_encoder.py +++ b/tensor2tensor/data_generators/text_encoder.py @@ -27,6 +27,7 @@ import collections from itertools import chain import re +import tempfile # Dependency imports @@ -773,3 +774,72 @@ def store_to_file(self, filename): with tf.gfile.Open(filename, "w") as f: for subtoken_string in self._all_subtoken_strings: f.write("'" + unicode_to_native(subtoken_string) + "'\n") + + +class ImageEncoder(object): + """Encoder class for saving and loading images.""" + + def __init__(self, num_reserved_ids=0, height=32, width=32, channels=3): + assert num_reserved_ids == 0 + self._height = height + self._width = width + self._channels = channels + + @property + def num_reserved_ids(self): + return 0 + + def encode(self, s): + """Transform a string with a filename into a list of RGB integers. + + Args: + s: path to the file with an image. + + Returns: + ids: list of integers + """ + # TODO(lukaszkaiser): implement this. + raise NotImplementedError + + def decode(self, ids): + """Transform a sequence of int ids into an image file. + + Args: + ids: list of integers to be converted. + + Returns: + Path to the temporary file where the image was saved. + + Raises: + ValueError: if the ids are not of the appropriate size. + """ + _, tmp_file_path = tempfile.mkstemp() + length = self._height * self._width * self._channels + if len(ids) != length: + raise ValueError("Length of ids (%d) must be height (%d) x width (%d) x " + "channels (%d); %d != %d.\n Ids: %s" + % (len(ids), self._height, self._width, self._channels, + len(ids), length, " ".join([str(i) for i in ids]))) + with tf.Graph().as_default(): + raw = tf.constant(ids, dtype=tf.uint8) + img = tf.reshape(raw, [self._height, self._width, self._channels]) + png = tf.image.encode_png(img) + op = tf.write_file(tmp_file_path, png) + with tf.Session() as sess: + sess.run(op) + return tmp_file_path + + def decode_list(self, ids): + """Transform a sequence of int ids into an image file. + + Args: + ids: list of integers to be converted. + + Returns: + Singleton list: path to the temporary file where the image was saved. + """ + return [self.decode(ids)] + + @property + def vocab_size(self): + return 256 diff --git a/tensor2tensor/layers/modalities.py b/tensor2tensor/layers/modalities.py index 8e76c8051..7e15e0351 100644 --- a/tensor2tensor/layers/modalities.py +++ b/tensor2tensor/layers/modalities.py @@ -172,7 +172,11 @@ def top(self, body_output, _): dim = body_output.get_shape().as_list()[-1] // 3 out = tf.reshape(body_output, [shape[0], shape[1], shape[2], self._channels, dim]) - return tf.layers.dense(out, self.top_dimensionality) + res = tf.layers.dense(out, self.top_dimensionality) + if not tf.get_variable_scope().reuse: + res_argmax = tf.cast(tf.argmax(res, axis=-1), tf.uint8) + tf.summary.image("result", res_argmax, max_outputs=1) + return res def loss(self, top_out, targets, weights_fn=common_layers.weights_all): # Call the default implementation, but weight 1.0 on 0s by default. diff --git a/tensor2tensor/utils/decoding.py b/tensor2tensor/utils/decoding.py index 5dac0dd5f..4b8d7fca9 100644 --- a/tensor2tensor/utils/decoding.py +++ b/tensor2tensor/utils/decoding.py @@ -69,7 +69,8 @@ def log_decode_results(inputs, model_dir=None, identity_output=False): """Log inference results.""" - if "image" in problem_name and save_images: + is_image = "image" in problem_name + if is_image and save_images: save_path = os.path.join(model_dir, "%s_prediction_%d.jpg" % (problem_name, prediction_idx)) show_and_save_image(inputs / 255., save_path) @@ -77,7 +78,7 @@ def log_decode_results(inputs, if identity_output: decoded_inputs = " ".join(map(str, inputs.flatten())) else: - decoded_inputs = inputs_vocab.decode(_save_until_eos(inputs.flatten())) + decoded_inputs = inputs_vocab.decode(_save_until_eos(inputs, is_image)) tf.logging.info("Inference results INPUT: %s" % decoded_inputs) @@ -87,11 +88,9 @@ def log_decode_results(inputs, if targets is not None: decoded_targets = " ".join(map(str, targets.flatten())) else: - decoded_outputs = "".join( - map(str, targets_vocab.decode(_save_until_eos(outputs.flatten())))) + decoded_outputs = targets_vocab.decode(_save_until_eos(outputs, is_image)) if targets is not None: - decoded_targets = "".join( - map(str, targets_vocab.decode(_save_until_eos(targets.flatten())))) + decoded_targets = targets_vocab.decode(_save_until_eos(targets, is_image)) tf.logging.info("Inference results OUTPUT: %s" % decoded_outputs) if targets is not None: @@ -303,6 +302,7 @@ def input_fn(): result_iter = estimator.predict(input_fn) for result in result_iter: problem_idx = result["problem_choice"] + is_image = False # TODO(lukaszkaiser): find out from problem id / class. targets_vocab = hparams.problems[problem_idx].vocabulary["targets"] if decode_hp.return_beams: @@ -312,7 +312,7 @@ def input_fn(): scores = np.split(result["scores"], decode_hp.beam_size, axis=0) for k, beam in enumerate(beams): tf.logging.info("BEAM %d:" % k) - beam_string = targets_vocab.decode(_save_until_eos(beam.flatten())) + beam_string = targets_vocab.decode(_save_until_eos(beam, is_image)) if scores is not None: tf.logging.info("%s\tScore:%f" % (beam_string, scores[k])) else: @@ -322,7 +322,7 @@ def input_fn(): tf.logging.info(" ".join(map(str, result["outputs"].flatten()))) else: tf.logging.info( - targets_vocab.decode(_save_until_eos(result["outputs"].flatten()))) + targets_vocab.decode(_save_until_eos(result["outputs"], is_image))) def _decode_batch_input_fn(problem_id, num_decode_batches, sorted_inputs, @@ -509,8 +509,11 @@ def _get_sorted_inputs(filename, num_shards=1, delimiter="\n"): return sorted_inputs, sorted_keys -def _save_until_eos(hyp): +def _save_until_eos(hyp, is_image): """Strips everything after the first <EOS> token, which is normally 1.""" + hyp = hyp.flatten() + if is_image: + return hyp try: index = list(hyp).index(text_encoder.EOS_ID) return hyp[0:index] From 4d81643f584795f35d57afeb796b23a402e01de8 Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Wed, 18 Oct 2017 15:09:45 -0700 Subject: [PATCH 0512/4095] Ensure shapes of inputs are fully defined in tpu trainer PiperOrigin-RevId: 172662440 --- tensor2tensor/tpu/tpu_trainer_lib.py | 39 ++++++++++++++++++---------- 1 file changed, 25 insertions(+), 14 deletions(-) diff --git a/tensor2tensor/tpu/tpu_trainer_lib.py b/tensor2tensor/tpu/tpu_trainer_lib.py index c514da2ad..52b625b89 100644 --- a/tensor2tensor/tpu/tpu_trainer_lib.py +++ b/tensor2tensor/tpu/tpu_trainer_lib.py @@ -91,31 +91,42 @@ def _valid_size(example): dataset = dataset.shuffle(100) # TODO(rsepassi): In eval mode, should not repeat dataset = dataset.repeat(None) - dataset = data_reader.padded_batch(dataset, - batching_scheme["batch_sizes"][0], + dataset = data_reader.padded_batch(dataset, batch_size, batching_scheme["padded_shapes"]) if not is_training: dataset = dataset.map( lambda f: pad_batch(f, batch_size), num_parallel_calls=num_threads) - dataset.prefetch(1) + def shape_def(example): + """Set the right shapes for the features.""" + inputs = example["inputs"] + targets = example["targets"] - train_features = dataset.make_one_shot_iterator().get_next() + # Ensure inputs and targets are proper rank. + while len(inputs.get_shape()) <= 4: + inputs = tf.expand_dims(inputs, axis=-1) + while len(targets.get_shape()) <= 4: + targets = tf.expand_dims(targets, axis=-1) - inputs = train_features["inputs"] - targets = train_features["targets"] + example["inputs"] = inputs + example["targets"] = targets - # Ensure inputs and targets are proper rank. - while len(inputs.get_shape()) != 4: - inputs = tf.expand_dims(inputs, axis=-1) - while len(targets.get_shape()) != 4: - targets = tf.expand_dims(targets, axis=-1) + # Ensure batch size is set on all features + for _, t in example.iteritems(): + shape = t.get_shape().as_list() + shape[0] = batch_size + t.set_shape(t.get_shape().merge_with(shape)) + # Assert shapes are fully known + t.get_shape().assert_is_fully_defined() - train_features["inputs"] = inputs - train_features["targets"] = targets + return example + + dataset = dataset.map(shape_def, num_parallel_calls=num_threads) + dataset = dataset.prefetch(1) + features = dataset.make_one_shot_iterator().get_next() - return train_features, targets + return features, features["targets"] return input_fn From 7d6348e76fa998eff4025f1194f8456c7272e761 Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Thu, 19 Oct 2017 11:34:58 -0700 Subject: [PATCH 0513/4095] Fix TPU input ranks PiperOrigin-RevId: 172774616 --- tensor2tensor/tpu/tpu_trainer_lib.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensor2tensor/tpu/tpu_trainer_lib.py b/tensor2tensor/tpu/tpu_trainer_lib.py index 52b625b89..bf14966c3 100644 --- a/tensor2tensor/tpu/tpu_trainer_lib.py +++ b/tensor2tensor/tpu/tpu_trainer_lib.py @@ -104,9 +104,9 @@ def shape_def(example): targets = example["targets"] # Ensure inputs and targets are proper rank. - while len(inputs.get_shape()) <= 4: + while len(inputs.get_shape()) < 4: inputs = tf.expand_dims(inputs, axis=-1) - while len(targets.get_shape()) <= 4: + while len(targets.get_shape()) < 4: targets = tf.expand_dims(targets, axis=-1) example["inputs"] = inputs From 1c516666e5221f5a8f4626aa72a6d903975d0e00 Mon Sep 17 00:00:00 2001 From: Niki Parmar <nikip@google.com> Date: Thu, 19 Oct 2017 15:38:57 -0700 Subject: [PATCH 0514/4095] Allow parallel decoding for decode_from_dataset PiperOrigin-RevId: 172810158 --- tensor2tensor/bin/t2t-decoder | 1 + tensor2tensor/data_generators/problem.py | 13 ++++++++----- tensor2tensor/utils/data_reader.py | 7 +++++-- tensor2tensor/utils/decoding.py | 13 ++++++++++--- tensor2tensor/utils/input_fn_builder.py | 9 +++++++-- 5 files changed, 31 insertions(+), 12 deletions(-) diff --git a/tensor2tensor/bin/t2t-decoder b/tensor2tensor/bin/t2t-decoder index ff143f5d4..c2bf97f94 100644 --- a/tensor2tensor/bin/t2t-decoder +++ b/tensor2tensor/bin/t2t-decoder @@ -84,6 +84,7 @@ def main(_): decode_hp = decoding.decode_hparams(FLAGS.decode_hparams) decode_hp.add_hparam("shards", FLAGS.decode_shards) + decode_hp.add_hparam("shard_id", FLAGS.worker_id) if FLAGS.decode_interactive: decoding.decode_interactively(estimator, decode_hp) elif FLAGS.decode_from_file: diff --git a/tensor2tensor/data_generators/problem.py b/tensor2tensor/data_generators/problem.py index e46708859..1c7706315 100644 --- a/tensor2tensor/data_generators/problem.py +++ b/tensor2tensor/data_generators/problem.py @@ -234,7 +234,7 @@ def test_filepaths(self, data_dir, num_shards, shuffled): return generator_utils.test_data_filenames(file_basename, data_dir, num_shards) - def filepattern(self, data_dir, mode): + def filepattern(self, data_dir, mode, shard=None): """Get filepattern for data files for mode. Matches mode to a suffix. @@ -246,12 +246,13 @@ def filepattern(self, data_dir, mode): Args: data_dir: str, data directory. mode: tf.estimator.ModeKeys or "test". + shard: int, if provided, will only read data from the specified shard. Returns: filepattern str """ path = os.path.join(data_dir, self.dataset_filename()) - + shard_str = "-%05d" % shard if shard else "" if mode == tf.estimator.ModeKeys.TRAIN: suffix = "train" elif mode in [tf.estimator.ModeKeys.EVAL, tf.estimator.ModeKeys.PREDICT]: @@ -260,7 +261,7 @@ def filepattern(self, data_dir, mode): assert mode == "test" suffix = "test" - return "%s-%s*" % (path, suffix) + return "%s-%s%s*" % (path, suffix, shard_str) def __init__(self, was_reversed=False, was_copy=False): """Create a Problem. @@ -328,7 +329,8 @@ def dataset(self, shuffle_files=None, hparams=None, preprocess=True, - dataset_split=None): + dataset_split=None, + shard=None): """Build a Dataset for this problem. Args: @@ -347,6 +349,7 @@ def dataset(self, Problem.preprocess_example. dataset_split: tf.estimator.ModeKeys + ["test"], which split to read data from (TRAIN:"-train", EVAL:"-dev", "test":"-test"). Defaults to mode. + shard: int, if provided, will only read data from the specified shard. Returns: Dataset containing dict<feature name, Tensor>. @@ -372,7 +375,7 @@ def dataset(self, } is_training = mode == tf.estimator.ModeKeys.TRAIN - data_filepattern = self.filepattern(data_dir, dataset_split) + data_filepattern = self.filepattern(data_dir, dataset_split, shard=shard) tf.logging.info("Reading data files from %s", data_filepattern) data_files = tf.contrib.slim.parallel_reader.get_data_files( data_filepattern) diff --git a/tensor2tensor/utils/data_reader.py b/tensor2tensor/utils/data_reader.py index 83f66b985..9ec147e3d 100644 --- a/tensor2tensor/utils/data_reader.py +++ b/tensor2tensor/utils/data_reader.py @@ -71,7 +71,8 @@ def input_pipeline(problem, mode, hparams, batching_scheme, - dataset_split=None): + dataset_split=None, + shard=None): """Input pipeline, returns a dictionary of batched and padded tensors. Args: @@ -88,6 +89,7 @@ def input_pipeline(problem, "max_length": an integer. We drop sequences which are longer. dataset_split: tf.estimator.ModeKeys + ["test"], which split of the dataset to use. Defaults to mode. + shard: int, if provided, will only read data from the specified shard. Returns: dict <feature name, batched and padded Tensor> @@ -102,7 +104,8 @@ def input_pipeline(problem, num_threads=num_threads, output_buffer_size=capacity, hparams=hparams, - dataset_split=dataset_split) + dataset_split=dataset_split, + shard=shard) dataset = dataset.map(cast_int64_to_int32, num_threads=num_threads) dataset = dataset.filter( functools.partial( diff --git a/tensor2tensor/utils/decoding.py b/tensor2tensor/utils/decoding.py index 4b8d7fca9..e9d47be88 100644 --- a/tensor2tensor/utils/decoding.py +++ b/tensor2tensor/utils/decoding.py @@ -106,6 +106,8 @@ def decode_from_dataset(estimator, tf.logging.info("Performing local inference from dataset for %s.", str(problem_names)) hparams = estimator.params + # We assume that worker_id corresponds to shard number. + shard = decode_hp.shard_id if decode_hp.shards > 1 else None for problem_idx, problem_name in enumerate(problem_names): # Build the inference input function @@ -116,14 +118,19 @@ def decode_from_dataset(estimator, num_datashards=devices.data_parallelism().n, fixed_problem=problem_idx, batch_size=decode_hp.batch_size, - dataset_split=dataset_split) + dataset_split=dataset_split, + shard=shard) # Get the predictions as an iterable predictions = estimator.predict(infer_input_fn) # Prepare output file writers if decode_to_file passed if decode_to_file: - output_filepath = _decode_filename(decode_to_file, problem_name, + if decode_hp.shards > 1: + decode_filename = decode_to_file + ("%.2d" % decode_hp.shard_id) + else: + decode_filename = decode_to_file + output_filepath = _decode_filename(decode_filename, problem_name, decode_hp) parts = output_filepath.split(".") parts[-1] = "targets" @@ -245,7 +252,7 @@ def input_fn(): else: output_filename = filename if decode_hp.shards > 1: - base_filename = output_filename + ("%.2d" % FLAGS.worker_id) + base_filename = output_filename + ("%.2d" % decode_hp.shard_id) else: base_filename = output_filename decode_filename = _decode_filename(base_filename, problem_name, decode_hp) diff --git a/tensor2tensor/utils/input_fn_builder.py b/tensor2tensor/utils/input_fn_builder.py index f4a3098ad..fc4a72405 100644 --- a/tensor2tensor/utils/input_fn_builder.py +++ b/tensor2tensor/utils/input_fn_builder.py @@ -36,7 +36,8 @@ def build_input_fn(mode, worker_replicas=None, worker_id=None, batch_size=None, - dataset_split=None): + dataset_split=None, + shard=None): """Provides input to the graph, either from disk or via a placeholder. This function produces an input function that will feed data into @@ -62,6 +63,7 @@ def build_input_fn(mode, batch_size: int, if provided, will use a fixed batch size. dataset_split: tf.estimator.ModeKeys + ["test"], which split of the dataset to use. Defaults to mode. + shard: int, if provided, will only read data from the specified shard. Returns: A function that returns a dictionary of features and the target labels. @@ -99,6 +101,7 @@ def input_fn(): mode, batch_size=batch_size, dataset_split=dataset_split, + shard=shard, name="problem_%d" % problem_idx) problem_batches.append(feature_map) @@ -204,6 +207,7 @@ def features_for_problem(problem_instance, mode, batch_size=None, dataset_split=None, + shard=None, name="problem_inputs"): """Feature map for Problem.""" with tf.name_scope(name): @@ -228,7 +232,8 @@ def features_for_problem(problem_instance, mode, hparams, batching_scheme, - dataset_split=dataset_split) + dataset_split=dataset_split, + shard=shard) # Ensure inputs and targets are proper rank. if problem_instance.has_inputs: From 30df3e9fbb93390b3a30e3532a234ec3f5d15002 Mon Sep 17 00:00:00 2001 From: Etienne Pot <epot@google.com> Date: Thu, 19 Oct 2017 17:41:04 -0700 Subject: [PATCH 0515/4095] Typo, minor hparams changes for the attention_lm_moe PiperOrigin-RevId: 172824993 --- tensor2tensor/layers/common_attention.py | 22 ++++++++++++++-------- tensor2tensor/models/attention_lm_moe.py | 20 +++++++++++++++++++- 2 files changed, 33 insertions(+), 9 deletions(-) diff --git a/tensor2tensor/layers/common_attention.py b/tensor2tensor/layers/common_attention.py index 792241632..2178e6fe5 100644 --- a/tensor2tensor/layers/common_attention.py +++ b/tensor2tensor/layers/common_attention.py @@ -466,7 +466,7 @@ def attention_bias_batch( coordinates of the batches batch_coordinates_k (tf.Tensor): int32 of shape [length_k, 1] containing the coordinates of the batches. If None, do self attention (q and k identical) - condition_fn (fct): A predicat function defining which type of mask build + condition_fn (fct): A function defining which type of mask build Returns: tf.Tensor: float32 mask of shape [length_q, length_k] containing either 0 or @@ -501,7 +501,7 @@ def to_float(bc): attention_bias_future = functools.partial( attention_bias_batch, # Elems can attend to themself (otherwise would use bias_batch + 1.0) - # No tf.abs to concider the order + # No tf.abs to consider the order # tf.maximum and tf.minimum to threshold the values condition_fn=lambda bias: tf.maximum(0.0, tf.minimum(1.0, bias)), ) @@ -1059,7 +1059,7 @@ def dot_product_attention_relative(q, def masked_local_attention_1d( q, k, v, block_length=128, name=None): - """Attention to the source position and a neigborhood to the left of it. + """Attention to the source position and a neighborhood to the left of it. The sequence is divided into blocks of length block_size. Attention for a given query position can only see memory positions @@ -2267,7 +2267,7 @@ def length_not_null(x, batch_coordinate): bias_batch = attention_bias_coordinates(batch_coordinate) def add_or_set_if(prev_bias, new_bias, condition): - """Add the bias together while concidering the None case.""" + """Add the bias together while considering the None case.""" if not condition: return prev_bias elif prev_bias is None: @@ -2776,7 +2776,7 @@ def get_gates_head(x, add_first=False): # Each head get its own dispatcher gates = lsh.get_gates(single_x) nb_buckets = gates.get_shape().as_list()[-1] - # Reshape to [batch, length, depth] but should concider sequence + # Reshape to [batch, length, depth] but should consider sequence # padding in that case (also dispatch the padding) gates = tf.reshape(gates, [batch_size, length, nb_buckets]) list_gates.append(gates) @@ -2958,12 +2958,13 @@ def pad_and_reshape(x): @expert_utils.add_var_scope() def multihead_self_attention_reduced( - x, factor, reduction_type, multihead_params): + x, factor, nonlinearity, reduction_type, multihead_params): """Reduce the length dimension by compressing with conv. Args: x (tf.Tensor): float32 of shape [batch, length, depth] factor (int): compression factor for the memory sequence + nonlinearity (str): Add some non-linearity after the memory block reduction_type (str): type of compression multihead_params (dict): parameters for multihead attention @@ -2971,13 +2972,13 @@ def multihead_self_attention_reduced( (tf.Tensor): float32 of shape [batch, length, depth] Raises: - ValueError: If reduction_type invalid + ValueError: If reduction_type or nonlinearity is invalid """ depth = x.get_shape().as_list()[-1] # Could try to have some overlapp between the blocks but that would # create conv artifacts, would make it difficult to not attend to the future - # withing one group and the padding should be handled specially. + # within one group and the padding should be handled specially. # Reduce the memory dimension if reduction_type == "attention": @@ -2988,6 +2989,11 @@ def multihead_self_attention_reduced( else: raise ValueError("Unknown reduction type {}".format(reduction_type)) + if nonlinearity == "silu": + memory_x *= tf.nn.sigmoid(memory_x) + elif nonlinearity != "none": + raise ValueError("Unknown non linearity {}".format(nonlinearity)) + memory_x = tf.concat( # Add the first elem to make it attendable by everyone (otherwise the # first block cannot attend to anything) diff --git a/tensor2tensor/models/attention_lm_moe.py b/tensor2tensor/models/attention_lm_moe.py index 85c7c9d49..48720cd5d 100644 --- a/tensor2tensor/models/attention_lm_moe.py +++ b/tensor2tensor/models/attention_lm_moe.py @@ -277,6 +277,7 @@ def print_shape(x, suffix, debug=False): preprocess(x), factor=hparams.attention_red_factor, reduction_type=hparams.attention_reduction_type, + nonlinearity=hparams.attention_nonlinearity, multihead_params=dict( total_key_depth= hparams.attention_key_channels or hparams.hidden_size, @@ -368,7 +369,7 @@ def attention_lm_moe_prepare_decoder(targets, hparams): """ targets_pad_mask = common_attention.embedding_to_padding(targets) with tf.name_scope("pad_remover"): - # Because of the shift_right, the <eos> token will be concidered as + # Because of the shift_right, the <eos> token will be considered as # padding. In practice, it doesn't really matter, due to the triangular # mask, this token should never be attended. pad_remover = expert_utils.PadRemover(targets_pad_mask) @@ -509,6 +510,9 @@ def attention_lm_moe_base(): hparams.add_hparam("attention_red_factor", 3) hparams.add_hparam("attention_block_length", 128) hparams.add_hparam("attention_reduction_type", "conv") + # Non linearity for the attention reduction. Either "none", or "silu" ( + # Sigmoid Linear-Unit described in https://arxiv.org/abs/1710.05941) + hparams.add_hparam("attention_nonlinearity", "none") # If attention_exp_factor is set, each input to local_expert_attention (of # dimensionality hidden size) is projected into attention_exp_factor smaller # inputs, each of dimensionality attention_exp_inputdim. (otherwise @@ -599,6 +603,20 @@ def attention_lm_16k(): return hparams +@registry.register_hparams +def attention_lm_12k(): + hparams = attention_lm_hybrid_v2() + hparams.batch_size = 12000 + return hparams + + +@registry.register_hparams +def attention_lm_11k(): + hparams = attention_lm_hybrid_v2() + hparams.batch_size = 11500 + return hparams + + @registry.register_hparams def attention_lm_ae_extended(): """Experiment with the exp_factor params.""" From 516a369482b6d15157eae7de0b39f4307810da60 Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Fri, 20 Oct 2017 13:03:50 -0700 Subject: [PATCH 0516/4095] Update @recompute_grad to respect tf.contrib.framework.arg_scope PiperOrigin-RevId: 172922818 --- tensor2tensor/layers/rev_block.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/tensor2tensor/layers/rev_block.py b/tensor2tensor/layers/rev_block.py index 1eb988c4c..62ed6c6a5 100644 --- a/tensor2tensor/layers/rev_block.py +++ b/tensor2tensor/layers/rev_block.py @@ -346,14 +346,16 @@ def _recompute_grad(fn, args): """See recompute_grad.""" cached_vs = [] + cached_arg_scope = [] def grad_fn(inputs, variables, outputs, output_grads): """Recompute outputs for gradient computation.""" del outputs # Recompute outputs with tf.control_dependencies(output_grads): - with tf.variable_scope(cached_vs[0], reuse=True): - outputs = fn(*inputs) + with tf.contrib.framework.arg_scope(cached_arg_scope[0]): + with tf.variable_scope(cached_vs[0], reuse=True): + outputs = fn(*inputs) if not (isinstance(outputs, list) or isinstance(outputs, tuple)): outputs = [outputs] @@ -366,6 +368,11 @@ def grad_fn(inputs, variables, outputs, output_grads): @common_layers.fn_with_custom_grad(grad_fn) def fn_with_recompute(*args): cached_vs.append(tf.get_variable_scope()) + # TODO(rsepassi): Rm conditional in TF 1.4 + if hasattr(tf.contrib.framework, "current_arg_scope"): + cached_arg_scope.append(tf.contrib.framework.current_arg_scope()) + else: + cached_arg_scope.append({}) return fn(*args) return fn_with_recompute(*args) From e4aa5f2f66d139812d84be91b057f6ac476501aa Mon Sep 17 00:00:00 2001 From: Noam Shazeer <noam@google.com> Date: Fri, 20 Oct 2017 14:38:01 -0700 Subject: [PATCH 0517/4095] Fix to SymbolModality to allow weight-sharing between target PiperOrigin-RevId: 172935434 --- tensor2tensor/layers/modalities.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tensor2tensor/layers/modalities.py b/tensor2tensor/layers/modalities.py index 7e15e0351..a29aa93b1 100644 --- a/tensor2tensor/layers/modalities.py +++ b/tensor2tensor/layers/modalities.py @@ -85,6 +85,7 @@ def bottom_simple(self, x, name, reuse): return ret def bottom(self, x): + self._bottom_was_called = True if self._model_hparams.shared_embedding_and_softmax_weights: return self.bottom_simple(x, "shared", reuse=None) else: @@ -92,7 +93,11 @@ def bottom(self, x): def targets_bottom(self, x): if self._model_hparams.shared_embedding_and_softmax_weights: - return self.bottom_simple(x, "shared", reuse=True) + try: + return self.bottom_simple(x, "shared", reuse=True) + except ValueError: + # perhaps there were no inputs, and this is a new variable. + return self.bottom_simple(x, "shared", reuse=None) else: return self.bottom_simple(x, "target_emb", reuse=None) From 8797199871423964a9d34f84e6e694ab402818b1 Mon Sep 17 00:00:00 2001 From: T2T Team <no-reply@google.com> Date: Fri, 20 Oct 2017 16:33:09 -0700 Subject: [PATCH 0518/4095] Fix visualizations. PiperOrigin-RevId: 172950554 --- .../TransformerVisualization.ipynb | 58 ++++++++++++------- 1 file changed, 38 insertions(+), 20 deletions(-) diff --git a/tensor2tensor/visualization/TransformerVisualization.ipynb b/tensor2tensor/visualization/TransformerVisualization.ipynb index ae3c5809a..ce70bde89 100644 --- a/tensor2tensor/visualization/TransformerVisualization.ipynb +++ b/tensor2tensor/visualization/TransformerVisualization.ipynb @@ -30,7 +30,8 @@ "import numpy as np\n", "\n", "from tensor2tensor.utils import trainer_utils as utils\n", - "from tensor2tensor.visualization import attention" + "from tensor2tensor.visualization import attention\n", + "from tensor2tensor.utils import decoding" ] }, { @@ -84,7 +85,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "/home/llion/t2t_train/wmt_ende_tokens_32k/transformer-transformer_base_single_gpu\n" + "/usr/local/google/home/llion/t2t_train/translate_ende_wmt32k/transformer-transformer_base_single_gpu\n" ] } ], @@ -104,7 +105,9 @@ "FLAGS.problems = PROBLEM\n", "FLAGS.hparams_set = HPARAMS\n", "FLAGS.data_dir = DATA_DIR\n", - "FLAGS.model = MODEL" + "FLAGS.model = MODEL\n", + "\n", + "FLAGS.schedule = 'train_and_evaluate'" ] }, { @@ -120,24 +123,33 @@ "output_type": "stream", "text": [ "INFO:tensorflow:datashard_devices: ['gpu:0']\n", - "INFO:tensorflow:caching_devices: None\n" + "INFO:tensorflow:caching_devices: None\n", + "INFO:tensorflow:batching_scheme = {'min_length': 0, 'window_size': 720, 'shuffle_queue_size': 270, 'boundaries': [8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 22, 24, 26, 28, 30, 33, 36, 39, 42, 46, 50, 55, 60, 66, 72, 79, 86, 94, 103, 113, 124, 136, 149, 163, 179, 196, 215, 236], 'max_length': 1000000000, 'batch_sizes': [240, 180, 180, 180, 144, 144, 144, 120, 120, 120, 90, 90, 90, 90, 80, 72, 72, 60, 60, 48, 48, 48, 40, 40, 36, 30, 30, 24, 24, 20, 20, 18, 18, 16, 15, 12, 12, 10, 10, 9, 8, 8]}\n", + "INFO:tensorflow:Updated batching_scheme = {'min_length': 0, 'window_size': 720, 'shuffle_queue_size': 270, 'boundaries': [], 'max_length': 1000000000, 'batch_sizes': [1]}\n", + "INFO:tensorflow:Reading data files from /usr/local/google/home/llion/t2t_data/translate_ende_wmt32k-dev*\n" ] } ], "source": [ - "hparams = utils.create_hparams(HPARAMS, PROBLEM, DATA_DIR)\n", + "hparams = utils.create_hparams(FLAGS.hparams_set, FLAGS.data_dir)\n", "\n", "# SET EXTRA HYPER PARAMS HERE!\n", - "# e.g.\n", - "# hparams.batch_size = 1024\n", + "#hparams.null_slot = True\n", + "\n", + "utils.add_problem_hparams(hparams, PROBLEM)\n", "\n", "num_datashards = utils.devices.data_parallelism().n\n", "\n", + "mode = tf.estimator.ModeKeys.EVAL\n", + "\n", "input_fn = utils.input_fn_builder.build_input_fn(\n", - " mode=tf.estimator.ModeKeys.EVAL,\n", - " hparams=hparams,\n", - " data_dir=DATA_DIR,\n", - " num_datashards=num_datashards)\n", + " mode=mode,\n", + " hparams=hparams,\n", + " data_dir=DATA_DIR,\n", + " num_datashards=num_datashards,\n", + " worker_replicas=FLAGS.worker_replicas,\n", + " worker_id=FLAGS.worker_id,\n", + " batch_size=1)\n", "\n", "inputs, target = input_fn()\n", "features = inputs\n", @@ -199,8 +211,15 @@ } ], "source": [ - "spec = utils.model_builder.model_fn(MODEL, features, tf.estimator.ModeKeys.EVAL, hparams, problem_names=[PROBLEM])\n", - "predictions_dict = spec.predictions" + "model_fn=utils.model_builder.build_model_fn(\n", + " MODEL,\n", + " problem_names=[PROBLEM],\n", + " train_steps=FLAGS.train_steps,\n", + " worker_id=FLAGS.worker_id,\n", + " worker_replicas=FLAGS.worker_replicas,\n", + " eval_run_autoregressive=FLAGS.eval_run_autoregressive,\n", + " decode_hparams=decoding.decode_hparams(FLAGS.decode_hparams))\n", + "est_spec = model_fn(features, target, mode, hparams)" ] }, { @@ -224,8 +243,7 @@ ], "source": [ "with tf.variable_scope(tf.get_variable_scope(), reuse=True):\n", - " spec = utils.model_builder.model_fn(MODEL, features, tf.estimator.ModeKeys.PREDICT, hparams, problem_names=[PROBLEM])\n", - " beam_out = spec.predictions['outputs']" + " beam_out = model_fn(features, target, tf.contrib.learn.ModeKeys.INFER, hparams)" ] }, { @@ -246,10 +264,10 @@ "name": "stdout", "output_type": "stream", "text": [ - "INFO:tensorflow:Restoring parameters from /home/llion/t2t_train/wmt_ende_tokens_32k/transformer-transformer_base_single_gpu/model.ckpt-250000\n", + "INFO:tensorflow:Restoring parameters from /usr/local/google/home/llion/t2t_train/translate_ende_wmt32k/transformer-transformer_base_single_gpu/model.ckpt-1\n", "INFO:tensorflow:Starting standard services.\n", - "INFO:tensorflow:Saving checkpoint to path /home/llion/t2t_train/wmt_ende_tokens_32k/transformer-transformer_base_single_gpu/model.ckpt\n", - "INFO:tensorflow:Starting queue runners.\n" + "INFO:tensorflow:Starting queue runners.\n", + "INFO:tensorflow:Saving checkpoint to path /usr/local/google/home/llion/t2t_train/translate_ende_wmt32k/transformer-transformer_base_single_gpu/model.ckpt\n" ] }, { @@ -337,7 +355,7 @@ } ], "source": [ - "inp, out, logits = sess.run([inputs['inputs'], target, predictions_dict['predictions']])\n", + "inp, out, logits = sess.run([inputs['inputs'], target, est_spec.predictions['predictions']])\n", "\n", "print(\"Input: \", decode(inp[0]))\n", "print(\"Gold: \", decode(out[0]))\n", @@ -381,7 +399,7 @@ ], "source": [ "inp_ids = encode(eng)\n", - "beam_decode = sess.run(beam_out, {\n", + "beam_decode = sess.run(beam_out.predictions['outputs'], {\n", " inputs['inputs']: np.expand_dims(np.expand_dims(inp_ids, axis=2), axis=3),\n", "})\n", "trans = decode(beam_decode[0])\n", From 3b11bbf0a9fa739e5f47464544b690cfd16e51f8 Mon Sep 17 00:00:00 2001 From: T2T Team <no-reply@google.com> Date: Fri, 20 Oct 2017 17:19:09 -0700 Subject: [PATCH 0519/4095] Ensure that training with "continuous_train_and_eval" schedule uses local devices. PiperOrigin-RevId: 172955399 --- tensor2tensor/utils/devices.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tensor2tensor/utils/devices.py b/tensor2tensor/utils/devices.py index d532b6d5f..9fa322985 100644 --- a/tensor2tensor/utils/devices.py +++ b/tensor2tensor/utils/devices.py @@ -109,8 +109,11 @@ def _replica_device_setter(worker_device): ps_tasks=FLAGS.ps_replicas, ps_device=FLAGS.ps_job + "/GPU:0" if FLAGS.ps_gpu > 0 else FLAGS.ps_job) - if FLAGS.schedule == "train_and_evaluate": + if FLAGS.schedule in ["train_and_evaluate", "continuous_train_and_eval"]: assert not FLAGS.sync + tf.logging.warn( + "Schedule=%s. Assuming that training is running on a single machine.", + FLAGS.schedule) datashard_devices = ["gpu:%d" % d for d in _gpu_order(FLAGS.worker_gpu)] if FLAGS.locally_shard_to_cpu or FLAGS.worker_gpu < 1: datashard_devices += ["cpu:0"] From 4b281662482bef87d9a415bd38ed692a83978e67 Mon Sep 17 00:00:00 2001 From: T2T Team <no-reply@google.com> Date: Sat, 21 Oct 2017 16:53:56 -0700 Subject: [PATCH 0520/4095] Ensure shard 0 is read correctly in parallel decoding PiperOrigin-RevId: 173011936 --- tensor2tensor/data_generators/problem.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensor2tensor/data_generators/problem.py b/tensor2tensor/data_generators/problem.py index 1c7706315..d7faee2c1 100644 --- a/tensor2tensor/data_generators/problem.py +++ b/tensor2tensor/data_generators/problem.py @@ -252,7 +252,7 @@ def filepattern(self, data_dir, mode, shard=None): filepattern str """ path = os.path.join(data_dir, self.dataset_filename()) - shard_str = "-%05d" % shard if shard else "" + shard_str = "-%05d" % shard if shard is not None else "" if mode == tf.estimator.ModeKeys.TRAIN: suffix = "train" elif mode in [tf.estimator.ModeKeys.EVAL, tf.estimator.ModeKeys.PREDICT]: From ec820d363f622b50afe1b9b9318dbfbd72c77cd0 Mon Sep 17 00:00:00 2001 From: T2T Team <no-reply@google.com> Date: Sat, 21 Oct 2017 17:14:39 -0700 Subject: [PATCH 0521/4095] Add transformer_prepend_v1 hparams for backwards compatibility PiperOrigin-RevId: 173012722 --- tensor2tensor/models/transformer.py | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/tensor2tensor/models/transformer.py b/tensor2tensor/models/transformer.py index baa85829c..5fbd49cb3 100644 --- a/tensor2tensor/models/transformer.py +++ b/tensor2tensor/models/transformer.py @@ -913,13 +913,26 @@ def transformer_parameter_attention_b(): @registry.register_hparams -def transformer_prepend(): - hparams = transformer_base() +def transformer_prepend_v2(): + hparams = transformer_base_v2() hparams.prepend_mode = "prepend_inputs_masked_attention" hparams.max_length = 0 return hparams +@registry.register_hparams +def transformer_prepend_v1(): + hparams = transformer_base_v1() + hparams.prepend_mode = "prepend_inputs_masked_attention" + hparams.max_length = 0 + return hparams + + +@registry.register_hparams +def transformer_prepend(): + return transformer_prepend_v2() + + @registry.register_ranged_hparams("transformer_base") def transformer_base_range(rhp): """Small range of hyperparameters.""" From e9d61f5ff099cc138b2001669e1ac6dbc8871099 Mon Sep 17 00:00:00 2001 From: T2T Team <no-reply@google.com> Date: Mon, 23 Oct 2017 10:45:48 -0700 Subject: [PATCH 0522/4095] Modify tpu_trainer_lib to make text autoregressive models work on TPUs. PiperOrigin-RevId: 173136811 --- tensor2tensor/tpu/tpu_trainer_lib.py | 29 +++++++++++++++++----------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/tensor2tensor/tpu/tpu_trainer_lib.py b/tensor2tensor/tpu/tpu_trainer_lib.py index bf14966c3..dca9f4de9 100644 --- a/tensor2tensor/tpu/tpu_trainer_lib.py +++ b/tensor2tensor/tpu/tpu_trainer_lib.py @@ -15,7 +15,8 @@ """Library for training on TPU. See tpu_trainer.py. -Currently only supports training and evaluation for text-to-text problems. +Currently only supports training and evaluation for text-to-text and text +autoregressive problems. """ from __future__ import absolute_import @@ -158,20 +159,26 @@ def model_fn(features, labels, mode, params, config): problem_hp = hparams.problems[0] orig_features = features - # Instantiate model and retrieve modalities + # Instantiate model and retrieve modalities. Note that autoregressive models + # have no input modality. model_class = registry.model(model)(hparams, mode, problem_hp) - input_modality = problem_hp.input_modality["inputs"] + input_modality = problem_hp.input_modality.get("inputs") target_modality = problem_hp.target_modality + # Transform features + transformed_features = {} + if input_modality is not None: + transformed_features["inputs"] = input_modality.bottom(features["inputs"]) + transformed_features["targets"] = target_modality.targets_bottom( + features["targets"]) + transformed_features["problem_choice"] = tf.constant(0) + transformed_features["input_space_id"] = tf.constant( + problem_hp.input_space_id) + transformed_features["target_space_id"] = tf.constant( + problem_hp.target_space_id) + # Model construction - features = { - "inputs": input_modality.bottom(features["inputs"]), - "targets": target_modality.targets_bottom(features["targets"]), - "problem_choice": tf.constant(0), - "input_space_id": tf.constant(problem_hp.input_space_id), - "target_space_id": tf.constant(problem_hp.target_space_id) - } - outputs = model_class.model_fn_body(features) + outputs = model_class.model_fn_body(transformed_features) logits = target_modality.top(outputs, labels) # Ensure the length is known statically From 480ee0618d3570344ce64370556782b5ebca1a38 Mon Sep 17 00:00:00 2001 From: Etienne Pot <epot@google.com> Date: Mon, 23 Oct 2017 14:34:42 -0700 Subject: [PATCH 0523/4095] Stop inference after predicting EOS when batch_size=1 PiperOrigin-RevId: 173171656 --- tensor2tensor/data_generators/problem.py | 6 +++++ tensor2tensor/utils/t2t_model.py | 33 +++++++++++++++++++++++- 2 files changed, 38 insertions(+), 1 deletion(-) diff --git a/tensor2tensor/data_generators/problem.py b/tensor2tensor/data_generators/problem.py index d7faee2c1..657a5b18b 100644 --- a/tensor2tensor/data_generators/problem.py +++ b/tensor2tensor/data_generators/problem.py @@ -533,6 +533,11 @@ def _default_hparams(): # but decrease if your reader uses a lot of memory and increase if slow. max_expected_batch_size_per_shard=64, + # During inference for autoregressive problems, if the batch_size is 1, + # the inference will stop when the model predict a text_encoder.EOS_ID + # token. + stop_at_eos=int(False), + # Modalities used to map from input features to a space compatible with # chosen model architecture. One modality spec (which is a 2-tuple, # (modality_full_name, vocab_size)) per feature key. modality_full_name @@ -647,6 +652,7 @@ def feature_encoders(self, data_dir): def hparams(self, defaults, unused_model_hparams): p = defaults + p.stop_at_eos = int(True) if self.has_inputs: source_vocab_size = self._encoders["inputs"].vocab_size diff --git a/tensor2tensor/utils/t2t_model.py b/tensor2tensor/utils/t2t_model.py index c54b38f3f..5368a82f7 100644 --- a/tensor2tensor/utils/t2t_model.py +++ b/tensor2tensor/utils/t2t_model.py @@ -26,6 +26,7 @@ import six from six.moves import xrange # pylint: disable=redefined-builtin +from tensor2tensor.data_generators import text_encoder from tensor2tensor.layers import common_layers from tensor2tensor.utils import beam_search from tensor2tensor.utils import expert_utils as eu @@ -387,8 +388,38 @@ def infer_step(recent_output, recent_logits, unused_loss): logits.set_shape([None, None, None, None, None]) loss = 0.0 + def while_exit_cond(result, logits, loss): # pylint: disable=unused-argument + """Exit the loop either if reach decode_length or EOS.""" + length = tf.shape(result)[1] + + not_overflow = length < decode_length + + if self._problem_hparams.stop_at_eos: + def fn_not_eos(): + return tf.not_equal( # Check if the last predicted element is a EOS + tf.squeeze(result[:, -1, :, :]), + text_encoder.EOS_ID + ) + + not_eos = tf.cond( + # We only check for early stoping if there is at least 1 element ( + # otherwise not_eos will crash) + tf.not_equal(length, 0), + fn_not_eos, + lambda: True, + ) + + return tf.cond( + tf.equal(batch_size, 1), + # If batch_size == 1, we check EOS for early stoping + lambda: tf.logical_and(not_overflow, not_eos), + # Else, just wait for max length + lambda: not_overflow + ) + return not_overflow + result, logits, loss = tf.while_loop( - lambda result, logits, loss: tf.shape(result)[1] < decode_length, + while_exit_cond, infer_step, [result, logits, loss], shape_invariants=[ tf.TensorShape([None, None, None, None]), From e0100e8a1fab31a4ac435d87f3f873d7bb4cceff Mon Sep 17 00:00:00 2001 From: Lukasz Kaiser <lukaszkaiser@google.com> Date: Mon, 23 Oct 2017 15:17:59 -0700 Subject: [PATCH 0524/4095] Decoding corrections for problems with no inputs. PiperOrigin-RevId: 173178621 --- tensor2tensor/utils/decoding.py | 6 +++++- tensor2tensor/utils/t2t_model.py | 28 +++++++++++++++------------- 2 files changed, 20 insertions(+), 14 deletions(-) diff --git a/tensor2tensor/utils/decoding.py b/tensor2tensor/utils/decoding.py index e9d47be88..8d81beb3c 100644 --- a/tensor2tensor/utils/decoding.py +++ b/tensor2tensor/utils/decoding.py @@ -140,7 +140,11 @@ def decode_from_dataset(estimator, target_file = tf.gfile.Open(target_filepath, "w") problem_hparams = hparams.problems[problem_idx] - inputs_vocab = problem_hparams.vocabulary.get("inputs", None) + # Inputs vocabulary is set to targets if there are no inputs in the problem, + # e.g., for language models where the inputs are just a prefix of targets. + has_input = "inputs" in problem_hparams.vocabulary + inputs_vocab_key = "inputs" if has_input else "targets" + inputs_vocab = problem_hparams.vocabulary[inputs_vocab_key] targets_vocab = problem_hparams.vocabulary["targets"] for num_predictions, prediction in enumerate(predictions): num_predictions += 1 diff --git a/tensor2tensor/utils/t2t_model.py b/tensor2tensor/utils/t2t_model.py index 5368a82f7..0f3caedea 100644 --- a/tensor2tensor/utils/t2t_model.py +++ b/tensor2tensor/utils/t2t_model.py @@ -229,7 +229,6 @@ def _beam_decode(self, features, decode_length, beam_size, top_beams, Returns: samples: an integer `Tensor`. Top samples from the beam search """ - batch_size = tf.shape(features["inputs"])[0] batch_size = tf.Print(batch_size, [batch_size], "beam_decode batch_size=") @@ -260,15 +259,16 @@ def symbols_to_logits_fn(ids): initial_ids = tf.zeros([batch_size], dtype=tf.int32) - inputs_old = features["inputs"] - features["inputs"] = tf.expand_dims(features["inputs"], 1) - if len(features["inputs"].shape) < 5: - features["inputs"] = tf.expand_dims(features["inputs"], 4) - # Expand the inputs in to the beam size. - features["inputs"] = tf.tile(features["inputs"], [1, beam_size, 1, 1, 1]) - s = tf.shape(features["inputs"]) - features["inputs"] = tf.reshape(features["inputs"], - [s[0] * s[1], s[2], s[3], s[4]]) + if self.has_input: + inputs_old = features["inputs"] + features["inputs"] = tf.expand_dims(features["inputs"], 1) + if len(features["inputs"].shape) < 5: + features["inputs"] = tf.expand_dims(features["inputs"], 4) + # Expand the inputs in to the beam size. + features["inputs"] = tf.tile(features["inputs"], [1, beam_size, 1, 1, 1]) + s = tf.shape(features["inputs"]) + features["inputs"] = tf.reshape(features["inputs"], + [s[0] * s[1], s[2], s[3], s[4]]) target_modality = self._hparams.problems[self._problem_idx].target_modality vocab_size = target_modality.top_dimensionality @@ -281,7 +281,8 @@ def symbols_to_logits_fn(ids): alpha) # Set inputs back to the unexpanded inputs to not to confuse the Estimator! - features["inputs"] = inputs_old + if self.has_input: + features["inputs"] = inputs_old # Return `top_beams` decodings (also remove initial id from the beam search) return_scores = False # TODO(lukaszkaiser): make it work multi-problem. @@ -366,8 +367,9 @@ def infer_step(recent_output, recent_logits, unused_loss): # Create an initial output tensor. This will be passed # to the infer_step, which adds one timestep at every iteration. if "partial_targets" in features: - initial_output = tf.to_int64(tf.expand_dims( - tf.expand_dims(features["partial_targets"], 2), 3)) + initial_output = tf.to_int64(features["partial_targets"]) + while len(initial_output.get_shape().as_list()) < 4: + initial_output = tf.expand_dims(initial_output, 2) batch_size = tf.shape(initial_output)[0] else: batch_size = tf.shape(features["inputs"])[0] From ba75966d5535b7070dfe1db894f193749595b5a1 Mon Sep 17 00:00:00 2001 From: Noam Shazeer <noam@google.com> Date: Mon, 23 Oct 2017 15:44:19 -0700 Subject: [PATCH 0525/4095] Extra check in SubwordTextEncoder decoding for empty tokens. PiperOrigin-RevId: 173182497 --- tensor2tensor/data_generators/text_encoder.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tensor2tensor/data_generators/text_encoder.py b/tensor2tensor/data_generators/text_encoder.py index 6c9607bf4..1c720a6db 100644 --- a/tensor2tensor/data_generators/text_encoder.py +++ b/tensor2tensor/data_generators/text_encoder.py @@ -496,7 +496,13 @@ def _subtoken_ids_to_tokens(self, subtokens): concatenated = "".join( [self._subtoken_id_to_subtoken_string(s) for s in subtokens]) split = concatenated.split("_") - return [_unescape_token(t + "_") for t in split if t] + ret = [] + for t in split: + if t: + unescaped = _unescape_token(t + "_") + if unescaped: + ret.append(unescaped) + return ret def _subtoken_id_to_subtoken_string(self, subtoken): """Converts a subtoken integer ID to a subtoken string.""" From b840c0c091f40f2d1eb2d421e62de11df2c36449 Mon Sep 17 00:00:00 2001 From: T2T Team <no-reply@google.com> Date: Wed, 25 Oct 2017 11:05:33 -0700 Subject: [PATCH 0526/4095] Decoding corrections for no input, when calling decode_from_file. PiperOrigin-RevId: 173421571 --- tensor2tensor/utils/decoding.py | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/tensor2tensor/utils/decoding.py b/tensor2tensor/utils/decoding.py index 8d81beb3c..dd5c5b1f0 100644 --- a/tensor2tensor/utils/decoding.py +++ b/tensor2tensor/utils/decoding.py @@ -84,13 +84,22 @@ def log_decode_results(inputs, decoded_targets = None if identity_output: + tf.logging.info("PSC: identity_output") decoded_outputs = " ".join(map(str, outputs.flatten())) if targets is not None: + tf.logging.info("PSC: targets not none") decoded_targets = " ".join(map(str, targets.flatten())) + else: + tf.logging.info("PSC: targets none") else: + tf.logging.info("PSC: not identity_output") + tf.logging.info(outputs) decoded_outputs = targets_vocab.decode(_save_until_eos(outputs, is_image)) if targets is not None: + tf.logging.info("PSC: targets not none") decoded_targets = targets_vocab.decode(_save_until_eos(targets, is_image)) + else: + tf.logging.info("PSC: targets none") tf.logging.info("Inference results OUTPUT: %s" % decoded_outputs) if targets is not None: @@ -210,7 +219,13 @@ def decode_from_file(estimator, filename, decode_hp, decode_to_file=None): hparams = estimator.params problem_id = decode_hp.problem_idx - inputs_vocab = hparams.problems[problem_id].vocabulary["inputs"] + tf.logging.info("PSC: hparams.vocab size:") + tf.logging.info(hparams.problems[problem_id].vocabulary["targets"].vocab_size) + # Inputs vocabulary is set to targets if there are no inputs in the problem, + # e.g., for language models where the inputs are just a prefix of targets. + has_input = "inputs" in hparams.problems[problem_id].vocabulary + inputs_vocab_key = "inputs" if has_input else "targets" + inputs_vocab = hparams.problems[problem_id].vocabulary[inputs_vocab_key] targets_vocab = hparams.problems[problem_id].vocabulary["targets"] problem_name = FLAGS.problems.split("-")[problem_id] tf.logging.info("Performing decoding from a file.") @@ -228,8 +243,12 @@ def input_fn(): decodes = [] result_iter = estimator.predict(input_fn) + iter_ctr = 0 for result in result_iter: + tf.logging.info("PSC: result in iter %d" % iter_ctr) + tf.logging.info(result["outputs"]) if decode_hp.return_beams: + tf.logging.info("PSC: return beams") beam_decodes = [] output_beams = np.split(result["outputs"], decode_hp.beam_size, axis=0) for k, beam in enumerate(output_beams): @@ -240,6 +259,7 @@ def input_fn(): beam_decodes.append(decoded_outputs) decodes.append("\t".join(beam_decodes)) else: + tf.logging.info("PSC: don't return beams") decoded_outputs, _ = log_decode_results(result["inputs"], result["outputs"], problem_name, None, inputs_vocab, targets_vocab) From 224503301f584c8b41577c411701bf31edf73124 Mon Sep 17 00:00:00 2001 From: T2T Team <no-reply@google.com> Date: Wed, 25 Oct 2017 11:25:57 -0700 Subject: [PATCH 0527/4095] Remove debug lines from decoding.py that got in by mistake in cl/173421571. PiperOrigin-RevId: 173424832 --- tensor2tensor/utils/decoding.py | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/tensor2tensor/utils/decoding.py b/tensor2tensor/utils/decoding.py index dd5c5b1f0..8aa3c0b71 100644 --- a/tensor2tensor/utils/decoding.py +++ b/tensor2tensor/utils/decoding.py @@ -84,22 +84,13 @@ def log_decode_results(inputs, decoded_targets = None if identity_output: - tf.logging.info("PSC: identity_output") decoded_outputs = " ".join(map(str, outputs.flatten())) if targets is not None: - tf.logging.info("PSC: targets not none") decoded_targets = " ".join(map(str, targets.flatten())) - else: - tf.logging.info("PSC: targets none") else: - tf.logging.info("PSC: not identity_output") - tf.logging.info(outputs) decoded_outputs = targets_vocab.decode(_save_until_eos(outputs, is_image)) if targets is not None: - tf.logging.info("PSC: targets not none") decoded_targets = targets_vocab.decode(_save_until_eos(targets, is_image)) - else: - tf.logging.info("PSC: targets none") tf.logging.info("Inference results OUTPUT: %s" % decoded_outputs) if targets is not None: @@ -219,8 +210,6 @@ def decode_from_file(estimator, filename, decode_hp, decode_to_file=None): hparams = estimator.params problem_id = decode_hp.problem_idx - tf.logging.info("PSC: hparams.vocab size:") - tf.logging.info(hparams.problems[problem_id].vocabulary["targets"].vocab_size) # Inputs vocabulary is set to targets if there are no inputs in the problem, # e.g., for language models where the inputs are just a prefix of targets. has_input = "inputs" in hparams.problems[problem_id].vocabulary @@ -243,12 +232,8 @@ def input_fn(): decodes = [] result_iter = estimator.predict(input_fn) - iter_ctr = 0 for result in result_iter: - tf.logging.info("PSC: result in iter %d" % iter_ctr) - tf.logging.info(result["outputs"]) if decode_hp.return_beams: - tf.logging.info("PSC: return beams") beam_decodes = [] output_beams = np.split(result["outputs"], decode_hp.beam_size, axis=0) for k, beam in enumerate(output_beams): @@ -259,7 +244,6 @@ def input_fn(): beam_decodes.append(decoded_outputs) decodes.append("\t".join(beam_decodes)) else: - tf.logging.info("PSC: don't return beams") decoded_outputs, _ = log_decode_results(result["inputs"], result["outputs"], problem_name, None, inputs_vocab, targets_vocab) From 86703a2b448ebcf8e4366ac54c36b515675455ce Mon Sep 17 00:00:00 2001 From: T2T Team <no-reply@google.com> Date: Thu, 26 Oct 2017 14:53:16 -0700 Subject: [PATCH 0528/4095] Fast beam search decoding. PiperOrigin-RevId: 173594699 --- tensor2tensor/models/transformer.py | 133 ++++++++++++++++++----- tensor2tensor/models/transformer_test.py | 46 ++++++++ tensor2tensor/utils/beam_search.py | 55 ++++++++-- tensor2tensor/utils/beam_search_test.py | 6 +- tensor2tensor/utils/t2t_model.py | 23 ++++ 5 files changed, 225 insertions(+), 38 deletions(-) diff --git a/tensor2tensor/models/transformer.py b/tensor2tensor/models/transformer.py index 5fbd49cb3..9a090e40f 100644 --- a/tensor2tensor/models/transformer.py +++ b/tensor2tensor/models/transformer.py @@ -30,12 +30,15 @@ from tensor2tensor.layers import common_attention from tensor2tensor.layers import common_hparams from tensor2tensor.layers import common_layers +from tensor2tensor.utils import beam_search from tensor2tensor.utils import expert_utils from tensor2tensor.utils import registry from tensor2tensor.utils import t2t_model import tensorflow as tf +from tensorflow.python.util import nest + @registry.register_model class Transformer(t2t_model.T2TModel): @@ -159,6 +162,58 @@ def _greedy_infer( logits: Not returned losses: Not returned + Raises: + ValueError: If last_position_only if False + NotImplementedError: If there are multiple data shards. + """ + decoded_ids = self._fast_decode(features, decode_length, last_position_only) + return decoded_ids, None, None + + def _beam_decode(self, features, decode_length, beam_size, top_beams, + last_position_only, alpha): + """Beam search decoding. + + Args: + features: an map of string to `Tensor` + decode_length: an integer. How many additional timesteps to decode. + beam_size: number of beams. + top_beams: an integer. How many of the beams to return. + last_position_only: MUST be true for fast decoding! + alpha: Float that controls the length penalty. larger the alpha, stronger + the preference for slonger translations. + + Returns: + samples: an integer `Tensor`. Top samples from the beam search + """ + return self._fast_decode( + features, decode_length, last_position_only, beam_size, top_beams, + alpha) + + def _fast_decode( + self, + features, + decode_length, + last_position_only=True, + beam_size=1, + top_beams=1, + alpha=1.0): + """Fast decoding. + + Implements both greedy and beam search decoding, uses beam search iff + beam_size > 1, otherwise beam search related arguments are ignored. + + Args: + features: a map of string to model features. + decode_length: an integer. How many additional timesteps to decode. + last_position_only: MUST be true for fast decoding! + beam_size: number of beams. + top_beams: an integer. How many of the beams to return. + alpha: Float that controls the length penalty. larger the alpha, stronger + the preference for slonger translations. + + Returns: + samples: an integer `Tensor`. Top samples from the beam search + Raises: ValueError: If last_position_only if False NotImplementedError: If there are multiple data shards. @@ -192,6 +247,8 @@ def _greedy_infer( with tf.variable_scope("body"): encoder_output, encoder_decoder_attention_bias = dp( self.encode, inputs, features["target_space_id"], hparams) + encoder_output = encoder_output[0] + encoder_decoder_attention_bias = encoder_decoder_attention_bias[0] if hparams.pos == "timing": timing_signal = common_attention.get_timing_signal_1d( @@ -236,6 +293,7 @@ def preprocess_targets(targets, i): def symbols_to_logits_fn(ids, i, cache): """Go from ids to logits for next symbol.""" + ids = ids[:, -1:] targets = tf.expand_dims(tf.expand_dims(ids, axis=2), axis=3) targets = preprocess_targets(targets, i) @@ -245,8 +303,8 @@ def symbols_to_logits_fn(ids, i, cache): body_outputs = dp( self.decode, targets, - encoder_output[0], - encoder_decoder_attention_bias[0], + cache["encoder_output"], + cache["encoder_decoder_attention_bias"], bias, hparams, cache) @@ -254,13 +312,7 @@ def symbols_to_logits_fn(ids, i, cache): with tf.variable_scope(target_modality.name): logits = target_modality.top_sharded(body_outputs, None, dp)[0] - return tf.squeeze(logits, axis=[1, 2, 3]) - - def inner_loop(i, next_id, decoded_ids, cache): - logits = symbols_to_logits_fn(next_id, i, cache) - next_id = tf.expand_dims(tf.argmax(logits, axis=-1), axis=1) - decoded_ids = tf.concat([decoded_ids, next_id], axis=1) - return i+1, next_id, decoded_ids, cache + return tf.squeeze(logits, axis=[1, 2, 3]), cache key_channels = hparams.attention_key_channels or hparams.hidden_size value_channels = hparams.attention_value_channels or hparams.hidden_size @@ -272,24 +324,53 @@ def inner_loop(i, next_id, decoded_ids, cache): "v": tf.zeros([batch_size, 0, value_channels]), } for layer in range(num_layers) } - decoded_ids = tf.zeros([batch_size, 0], dtype=tf.int64) - next_id = tf.zeros([batch_size, 1], dtype=tf.int64) - _, _, decoded_ids, _ = tf.while_loop( - # TODO(llion): Early stopping. - lambda i, *_: tf.less(i, decode_length), - inner_loop, - [tf.constant(0), next_id, decoded_ids, cache], - shape_invariants=[ - tf.TensorShape([]), - tf.TensorShape([None, None]), - tf.TensorShape([None, None]), - {"layer_%d" % layer: { - "k": tf.TensorShape([None, None, key_channels]), - "v": tf.TensorShape([None, None, value_channels]), - } for layer in range(num_layers)} - ]) - return decoded_ids, None, None + # Set 2nd dim to None since it's not invariant in the tf.while_loop + # Note: Tensor.set_shape() does not work here since it merges shape info. + # TODO(llion); Find a more robust solution. + # pylint: disable=protected-access + for layer in cache: + cache[layer]["k"]._shape = tf.TensorShape([None, None, key_channels]) + cache[layer]["v"]._shape = tf.TensorShape([None, None, value_channels]) + # pylint: enable=protected-access + cache["encoder_output"] = encoder_output + cache["encoder_decoder_attention_bias"] = encoder_decoder_attention_bias + + if beam_size > 1: # Beam Search + target_modality = ( + self._hparams.problems[self._problem_idx].target_modality) + vocab_size = target_modality.top_dimensionality + initial_ids = tf.zeros([batch_size], dtype=tf.int32) + decoded_ids, _ = beam_search.beam_search( + symbols_to_logits_fn, initial_ids, beam_size, decode_length, + vocab_size, alpha, states=cache) + + if top_beams == 1: + decoded_ids = decoded_ids[:, 0, 1:] + else: + decoded_ids = decoded_ids[:, :top_beams, 1:] + else: # Greedy + def inner_loop(i, next_id, decoded_ids, cache): + logits, cache = symbols_to_logits_fn(next_id, i, cache) + next_id = tf.expand_dims(tf.argmax(logits, axis=-1), axis=1) + decoded_ids = tf.concat([decoded_ids, next_id], axis=1) + return i+1, next_id, decoded_ids, cache + + decoded_ids = tf.zeros([batch_size, 0], dtype=tf.int64) + next_id = tf.zeros([batch_size, 1], dtype=tf.int64) + _, _, decoded_ids, _ = tf.while_loop( + # TODO(llion): Early stopping. + lambda i, *_: tf.less(i, decode_length), + inner_loop, + [tf.constant(0), next_id, decoded_ids, cache], + shape_invariants=[ + tf.TensorShape([]), + tf.TensorShape([None, None]), + tf.TensorShape([None, None]), + nest.map_structure(lambda t: tf.TensorShape(t.shape), cache), + ]) + + return decoded_ids @registry.register_model diff --git a/tensor2tensor/models/transformer_test.py b/tensor2tensor/models/transformer_test.py index e77138eaf..74f563fbb 100644 --- a/tensor2tensor/models/transformer_test.py +++ b/tensor2tensor/models/transformer_test.py @@ -112,5 +112,51 @@ def testGreedyVsFast(self): self.assertEqual(fast_res.shape, (BATCH_SIZE, INPUT_LENGTH + decode_length)) self.assertAllClose(greedy_res, fast_res) + def testBeamVsFast(self): + model, features = self.getModel(transformer.transformer_small()) + + decode_length = 2 + + out_logits, _ = model.model_fn(features) + out_logits = tf.squeeze(out_logits[0], axis=[2, 3]) + loss = tf.nn.sparse_softmax_cross_entropy_with_logits( + logits=tf.reshape(out_logits, [-1, VOCAB_SIZE]), + labels=tf.reshape(features["targets"], [-1])) + loss = tf.reduce_mean(loss) + apply_grad = tf.train.AdamOptimizer(0.001).minimize(loss) + + with self.test_session(): + tf.global_variables_initializer().run() + for _ in range(100): + apply_grad.run() + + model, _ = self.getModel(transformer.transformer_small(), + mode=tf.estimator.ModeKeys.PREDICT) + + with tf.variable_scope(tf.get_variable_scope(), reuse=True): + beam_result = model._beam_decode_slow( + features, + decode_length, + beam_size=4, + top_beams=1, + last_position_only=True, + alpha=1.0) + + fast_result = model._beam_decode( + features, + decode_length, + beam_size=4, + top_beams=1, + last_position_only=True, + alpha=1.0) + + with self.test_session(): + beam_res = beam_result.eval() + fast_res = fast_result.eval() + + self.assertEqual(fast_res.shape, (BATCH_SIZE, INPUT_LENGTH + decode_length)) + self.assertAllClose(beam_res, fast_res) + + if __name__ == "__main__": tf.test.main() diff --git a/tensor2tensor/utils/beam_search.py b/tensor2tensor/utils/beam_search.py index 1dd2f87b1..c08416fb8 100644 --- a/tensor2tensor/utils/beam_search.py +++ b/tensor2tensor/utils/beam_search.py @@ -30,7 +30,45 @@ INF = 1. * 1e7 -def expand_to_beam_size(tensor, beam_size): +def _get_shape(tensor): + """Returns static shape if available and dynamic shape otherwise.""" + static = tensor.shape.as_list() + dynamic = tf.unstack(tf.shape(tensor)) + return [s[1] if s[0] is None else s[0] for s in zip(static, dynamic)] + + +def _merge_beam_dim(tensor): + """Reshapes first two dimensions in to single dimension. + + Args: + tensor: Tensor to reshape of shape [A, B, ...] + + Returns: + Reshaped tensor of shape [A*B, ...] + """ + shape = _get_shape(tensor) + shape[0] *= shape[1] # batch -> batch * beam_size + shape.pop(1) # Remove beam dim + return tf.reshape(tensor, shape) + + +def _unmerge_beam_dim(tensor, batch_size, beam_size): + """Reshapes first dimension back to [batch_size, beam_size]. + + Args: + tensor: Tensor to reshape of shape [batch_size*beam_size, ...] + batch_size: Tensor, original batch size. + beam_size: int, original beam size. + + Returns: + Reshaped tensor of shape [batch_size, beam_size, ...] + """ + shape = _get_shape(tensor) + new_shape = [batch_size] + [beam_size] + shape[1:] + return tf.reshape(tensor, new_shape) + + +def _expand_to_beam_size(tensor, beam_size): """Tiles a given tensor by beam_size. Args: @@ -191,11 +229,11 @@ def beam_search(symbols_to_logits_fn, alive_log_probs = tf.tile(initial_log_probs, [batch_size, 1]) # Expand each batch and state to beam_size - alive_seq = expand_to_beam_size(initial_ids, beam_size) + alive_seq = _expand_to_beam_size(initial_ids, beam_size) alive_seq = tf.expand_dims(alive_seq, axis=2) # (batch_size, beam_size, 1) if states: states = nest.map_structure( - lambda state: expand_to_beam_size(state, beam_size), states) + lambda state: _expand_to_beam_size(state, beam_size), states) else: states = {} @@ -302,12 +340,10 @@ def grow_topk(i, alive_seq, alive_log_probs, states): # (batch_size * beam_size, decoded_length) if states: - flat_states = nest.map_structure( - lambda state: tf.reshape(state, [batch_size * beam_size, -1]), states) - flat_logits, flat_states = symbols_to_logits_fn(flat_ids, flat_states) + flat_states = nest.map_structure(_merge_beam_dim, states) + flat_logits, flat_states = symbols_to_logits_fn(flat_ids, i, flat_states) states = nest.map_structure( - lambda state: tf.reshape(state, [batch_size, beam_size, -1]), - flat_states) + lambda t: _unmerge_beam_dim(t, batch_size, beam_size), flat_states) else: flat_logits = symbols_to_logits_fn(flat_ids) logits = tf.reshape(flat_logits, [batch_size, beam_size, -1]) @@ -478,8 +514,7 @@ def _is_finished(i, unused_alive_seq, alive_log_probs, unused_finished_seq, finished_scores.get_shape(), finished_flags.get_shape(), nest.map_structure( - lambda tensor: tf.TensorShape([None] * tensor.shape.ndims), - states), + lambda tensor: tf.TensorShape(tensor.shape), states), ], parallel_iterations=1, back_prop=False) diff --git a/tensor2tensor/utils/beam_search_test.py b/tensor2tensor/utils/beam_search_test.py index fc15eb3bc..379411e99 100644 --- a/tensor2tensor/utils/beam_search_test.py +++ b/tensor2tensor/utils/beam_search_test.py @@ -289,7 +289,7 @@ def testStates(self): expected_states = tf.constant([[[0.]], [[1.]]]) - def symbols_to_logits(ids, states): + def symbols_to_logits(ids, _, states): pos = tf.shape(ids)[1] - 1 # We have to assert the values of state inline here since we can't fetch # them out of the loop! @@ -303,6 +303,7 @@ def symbols_to_logits(ids, states): states = { "state": tf.zeros((batch_size, 1)), } + states["state"]._shape = tf.TensorShape((None, 1)) final_ids, _ = beam_search.beam_search( symbols_to_logits, @@ -336,7 +337,7 @@ def testStateBeamTwo(self): # at each position, which is the one thats getting 3 added to it each step. expected_states = tf.constant([[[0.], [0.]], [[3.], [3.]], [[6.], [6.]]]) - def symbols_to_logits(ids, states): + def symbols_to_logits(ids, _, states): pos = tf.shape(ids)[1] - 1 # We have to assert the values of state inline here since we can't fetch @@ -351,6 +352,7 @@ def symbols_to_logits(ids, states): states = { "state": tf.zeros((batch_size, 1)), } + states["state"]._shape = tf.TensorShape((None, 1)) final_ids, _ = beam_search.beam_search( symbols_to_logits, diff --git a/tensor2tensor/utils/t2t_model.py b/tensor2tensor/utils/t2t_model.py index 0f3caedea..85f339511 100644 --- a/tensor2tensor/utils/t2t_model.py +++ b/tensor2tensor/utils/t2t_model.py @@ -217,6 +217,29 @@ def _beam_decode(self, features, decode_length, beam_size, top_beams, last_position_only, alpha): """Beam search decoding. + Models should ideally implement a more efficient version of this function. + + Args: + features: an map of string to `Tensor` + decode_length: an integer. How many additional timesteps to decode. + beam_size: number of beams. + top_beams: an integer. How many of the beams to return. + last_position_only: a boolean, speed-up by computing last position only. + alpha: Float that controls the length penalty. larger the alpha, stronger + the preference for slonger translations. + + Returns: + samples: an integer `Tensor`. Top samples from the beam search + """ + return self._beam_decode_slow(features, decode_length, beam_size, top_beams, + last_position_only, alpha) + + def _beam_decode_slow(self, features, decode_length, beam_size, top_beams, + last_position_only, alpha): + """Slow version of Beam search decoding. + + Quadratic time in decode_length. + Args: features: an map of string to `Tensor` decode_length: an integer. How many additional timesteps to decode. From 90aa79646548023752384f53944b440fecef8a84 Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Thu, 26 Oct 2017 16:59:04 -0700 Subject: [PATCH 0529/4095] Internal merge PR#370 PiperOrigin-RevId: 173611718 --- README.md | 2 +- docs/new_problem.md | 2 +- docs/walkthrough.md | 2 +- tensor2tensor/bin/t2t-datagen | 6 +- tensor2tensor/data_generators/README.md | 6 +- tensor2tensor/data_generators/all_problems.py | 6 +- .../data_generators/generator_utils.py | 63 +- tensor2tensor/data_generators/ice_parsing.py | 8 +- tensor2tensor/data_generators/translate.py | 255 +++++++ .../data_generators/translate_encs.py | 130 ++++ .../data_generators/translate_ende.py | 184 +++++ .../data_generators/translate_enfr.py | 120 +++ .../data_generators/translate_enmk.py | 87 +++ .../data_generators/translate_enzh.py | 106 +++ .../{wmt_test.py => translate_test.py} | 32 +- tensor2tensor/data_generators/wmt.py | 718 ------------------ 16 files changed, 928 insertions(+), 799 deletions(-) create mode 100644 tensor2tensor/data_generators/translate.py create mode 100644 tensor2tensor/data_generators/translate_encs.py create mode 100644 tensor2tensor/data_generators/translate_ende.py create mode 100644 tensor2tensor/data_generators/translate_enfr.py create mode 100644 tensor2tensor/data_generators/translate_enmk.py create mode 100644 tensor2tensor/data_generators/translate_enzh.py rename tensor2tensor/data_generators/{wmt_test.py => translate_test.py} (71%) delete mode 100644 tensor2tensor/data_generators/wmt.py diff --git a/README.md b/README.md index 0e97770ba..9525e9bcb 100644 --- a/README.md +++ b/README.md @@ -286,7 +286,7 @@ registrations. To add a new dataset, subclass [`Problem`](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/data_generators/problem.py) and register it with `@registry.register_problem`. See -[`TranslateEndeWmt8k`](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/data_generators/wmt.py) +[`TranslateEndeWmt8k`](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/data_generators/translate_ende.py) for an example. Also see the [data generators diff --git a/docs/new_problem.md b/docs/new_problem.md index ab5dd5e26..48976a61b 100644 --- a/docs/new_problem.md +++ b/docs/new_problem.md @@ -105,7 +105,7 @@ We're almost done. `generator` generates the training and evaluation data and stores them in files like "word2def_train.lang1" in your DATA_DIR. Thankfully several commonly used methods like `character_generator`, and `token_generator` are already written in the file -[`wmt.py`](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/data_generators/wmt.py). +[`translate.py`](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/data_generators/translate.py). We will import `character_generator` and [`text_encoder`](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/data_generators/text_encoder.py) to write: diff --git a/docs/walkthrough.md b/docs/walkthrough.md index 0e97770ba..9525e9bcb 100644 --- a/docs/walkthrough.md +++ b/docs/walkthrough.md @@ -286,7 +286,7 @@ registrations. To add a new dataset, subclass [`Problem`](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/data_generators/problem.py) and register it with `@registry.register_problem`. See -[`TranslateEndeWmt8k`](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/data_generators/wmt.py) +[`TranslateEndeWmt8k`](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/data_generators/translate_ende.py) for an example. Also see the [data generators diff --git a/tensor2tensor/bin/t2t-datagen b/tensor2tensor/bin/t2t-datagen index cb6253524..eba408074 100644 --- a/tensor2tensor/bin/t2t-datagen +++ b/tensor2tensor/bin/t2t-datagen @@ -43,7 +43,7 @@ from tensor2tensor.data_generators import all_problems # pylint: disable=unused from tensor2tensor.data_generators import audio from tensor2tensor.data_generators import generator_utils from tensor2tensor.data_generators import snli -from tensor2tensor.data_generators import wmt +from tensor2tensor.data_generators import translate from tensor2tensor.data_generators import wsj_parsing from tensor2tensor.utils import registry from tensor2tensor.utils import usr_dir @@ -82,9 +82,9 @@ _SUPPORTED_PROBLEM_GENERATORS = { lambda: algorithmic_math.algebra_inverse(26, 0, 2, 100000), lambda: algorithmic_math.algebra_inverse(26, 3, 3, 10000)), "parsing_english_ptb8k": ( - lambda: wmt.parsing_token_generator( + lambda: translate.parsing_token_generator( FLAGS.data_dir, FLAGS.tmp_dir, True, 2**13), - lambda: wmt.parsing_token_generator( + lambda: translate.parsing_token_generator( FLAGS.data_dir, FLAGS.tmp_dir, False, 2**13)), "parsing_english_ptb16k": ( lambda: wsj_parsing.parsing_token_generator( diff --git a/tensor2tensor/data_generators/README.md b/tensor2tensor/data_generators/README.md index 0e6d64dd2..04a90a778 100644 --- a/tensor2tensor/data_generators/README.md +++ b/tensor2tensor/data_generators/README.md @@ -23,7 +23,7 @@ All tasks produce TFRecord files of `tensorflow.Example` protocol buffers. To add a new problem, subclass [`Problem`](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/data_generators/problem.py) and register it with `@registry.register_problem`. See -[`WMTEnDeTokens8k`](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/data_generators/wmt.py) +[`TranslateEndeWmt8k`](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/data_generators/translate_ende.py) for an example. `Problem`s support data generation, training, and decoding. @@ -37,7 +37,7 @@ for training/decoding, e.g. a vocabulary file. A particularly easy way to implement `Problem.generate_data` for your dataset is to create 2 Python generators, one for the training data and another for the dev data, and pass them to `generator_utils.generate_dataset_and_shuffle`. See -[`WMTEnDeTokens8k.generate_data`](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/data_generators/wmt.py) +[`TranslateEndeWmt8k.generate_data`](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/data_generators/translate_ende.py) for an example of usage. The generators should yield dictionaries with string keys and values being lists @@ -66,5 +66,5 @@ Some examples: * [Algorithmic problems](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/data_generators/algorithmic.py) and their [unit tests](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/data_generators/algorithmic_test.py) -* [WMT problems](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/data_generators/wmt.py) +* [WMT En-De problems](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/data_generators/translate_ende.py) and their [unit tests](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/data_generators/wmt_test.py) diff --git a/tensor2tensor/data_generators/all_problems.py b/tensor2tensor/data_generators/all_problems.py index 97aaa7d1e..c7f364cf1 100644 --- a/tensor2tensor/data_generators/all_problems.py +++ b/tensor2tensor/data_generators/all_problems.py @@ -33,8 +33,12 @@ from tensor2tensor.data_generators import problem_hparams from tensor2tensor.data_generators import ptb from tensor2tensor.data_generators import snli +from tensor2tensor.data_generators import translate_encs +from tensor2tensor.data_generators import translate_ende +from tensor2tensor.data_generators import translate_enfr +from tensor2tensor.data_generators import translate_enmk +from tensor2tensor.data_generators import translate_enzh from tensor2tensor.data_generators import wiki -from tensor2tensor.data_generators import wmt from tensor2tensor.data_generators import wsj_parsing diff --git a/tensor2tensor/data_generators/generator_utils.py b/tensor2tensor/data_generators/generator_utils.py index c8fe03564..55ccf117e 100644 --- a/tensor2tensor/data_generators/generator_utils.py +++ b/tensor2tensor/data_generators/generator_utils.py @@ -264,41 +264,6 @@ def gunzip_file(gz_path, new_path): new_file.write(line) -# TODO(aidangomez): en-fr tasks are significantly over-represented below -_DATA_FILE_URLS = [ - # German-English - [ - "http://data.statmt.org/wmt16/translation-task/training-parallel-nc-v11.tgz", # pylint: disable=line-too-long - [ - "training-parallel-nc-v11/news-commentary-v11.de-en.en", - "training-parallel-nc-v11/news-commentary-v11.de-en.de" - ] - ], - # German-English & French-English - [ - "http://www.statmt.org/wmt13/training-parallel-commoncrawl.tgz", [ - "commoncrawl.de-en.en", "commoncrawl.de-en.de", - "commoncrawl.fr-en.en", "commoncrawl.fr-en.fr" - ] - ], - [ - "http://www.statmt.org/wmt13/training-parallel-europarl-v7.tgz", [ - "training/europarl-v7.de-en.en", "training/europarl-v7.de-en.de", - "training/europarl-v7.fr-en.en", "training/europarl-v7.fr-en.fr" - ] - ], - # French-English - [ - "http://www.statmt.org/wmt10/training-giga-fren.tar", - ["giga-fren.release2.fixed.en.gz", "giga-fren.release2.fixed.fr.gz"] - ], - [ - "http://www.statmt.org/wmt13/training-parallel-un.tgz", - ["un/undoc.2000.fr-en.en", "un/undoc.2000.fr-en.fr"] - ], -] - - def get_or_generate_vocab_inner(data_dir, vocab_filename, vocab_size, generator): """Inner implementation for vocab generators. @@ -337,13 +302,9 @@ def get_or_generate_vocab_inner(data_dir, vocab_filename, vocab_size, return vocab -def get_or_generate_vocab(data_dir, - tmp_dir, - vocab_filename, - vocab_size, - sources=None): - """Generate a vocabulary from the datasets in sources (_DATA_FILE_URLS).""" - sources = sources or _DATA_FILE_URLS +def get_or_generate_vocab(data_dir, tmp_dir, vocab_filename, vocab_size, + sources): + """Generate a vocabulary from the datasets in sources.""" def generate(): tf.logging.info("Generating vocab from: %s", str(sources)) @@ -375,13 +336,19 @@ def generate(): # Use Tokenizer to count the word occurrences. with tf.gfile.GFile(filepath, mode="r") as source_file: - file_byte_budget = 3.5e5 if filepath.endswith("en") else 7e5 + file_byte_budget = 1e6 + counter = 0 + countermax = int(source_file.size() / file_byte_budget / 2) for line in source_file: - if file_byte_budget <= 0: - break - line = line.strip() - file_byte_budget -= len(line) - yield line + if counter < countermax: + counter += 1 + else: + if file_byte_budget <= 0: + break + line = line.strip() + file_byte_budget -= len(line) + counter = 0 + yield line return get_or_generate_vocab_inner(data_dir, vocab_filename, vocab_size, generate()) diff --git a/tensor2tensor/data_generators/ice_parsing.py b/tensor2tensor/data_generators/ice_parsing.py index 2aa261cd4..fdb53430a 100644 --- a/tensor2tensor/data_generators/ice_parsing.py +++ b/tensor2tensor/data_generators/ice_parsing.py @@ -32,7 +32,7 @@ from tensor2tensor.data_generators import generator_utils from tensor2tensor.data_generators import problem from tensor2tensor.data_generators import text_encoder -from tensor2tensor.data_generators.wmt import tabbed_generator +from tensor2tensor.data_generators import translate from tensor2tensor.utils import registry @@ -51,7 +51,8 @@ def tabbed_parsing_token_generator(data_dir, tmp_dir, train, prefix, data_dir, tmp_dir, filename, 1, prefix + "_target.tokens.vocab.%d" % target_vocab_size, target_vocab_size) pair_filepath = os.path.join(tmp_dir, filename) - return tabbed_generator(pair_filepath, source_vocab, target_vocab, EOS) + return translate.tabbed_generator(pair_filepath, source_vocab, target_vocab, + EOS) def tabbed_parsing_character_generator(tmp_dir, train): @@ -59,7 +60,8 @@ def tabbed_parsing_character_generator(tmp_dir, train): character_vocab = text_encoder.ByteTextEncoder() filename = "parsing_{0}.pairs".format("train" if train else "dev") pair_filepath = os.path.join(tmp_dir, filename) - return tabbed_generator(pair_filepath, character_vocab, character_vocab, EOS) + return translate.tabbed_generator(pair_filepath, character_vocab, + character_vocab, EOS) @registry.register_problem diff --git a/tensor2tensor/data_generators/translate.py b/tensor2tensor/data_generators/translate.py new file mode 100644 index 000000000..95f5844c1 --- /dev/null +++ b/tensor2tensor/data_generators/translate.py @@ -0,0 +1,255 @@ +# coding=utf-8 +# Copyright 2017 The Tensor2Tensor Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Data generators for translation data-sets.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +import tarfile + +# Dependency imports + +from tensor2tensor.data_generators import generator_utils +from tensor2tensor.data_generators import problem + +import tensorflow as tf + +FLAGS = tf.flags.FLAGS + + +class TranslateProblem(problem.Text2TextProblem): + """Base class for translation problems.""" + + @property + def is_character_level(self): + return False + + @property + def num_shards(self): + return 100 + + @property + def use_subword_tokenizer(self): + return True + + +# Generic generators used later for multiple problems. + + +def character_generator(source_path, target_path, character_vocab, eos=None): + """Generator for sequence-to-sequence tasks that just uses characters. + + This generator assumes the files at source_path and target_path have + the same number of lines and yields dictionaries of "inputs" and "targets" + where inputs are characters from the source lines converted to integers, + and targets are characters from the target lines, also converted to integers. + + Args: + source_path: path to the file with source sentences. + target_path: path to the file with target sentences. + character_vocab: a TextEncoder to encode the characters. + eos: integer to append at the end of each sequence (default: None). + Yields: + A dictionary {"inputs": source-line, "targets": target-line} where + the lines are integer lists converted from characters in the file lines. + """ + eos_list = [] if eos is None else [eos] + with tf.gfile.GFile(source_path, mode="r") as source_file: + with tf.gfile.GFile(target_path, mode="r") as target_file: + source, target = source_file.readline(), target_file.readline() + while source and target: + source_ints = character_vocab.encode(source.strip()) + eos_list + target_ints = character_vocab.encode(target.strip()) + eos_list + yield {"inputs": source_ints, "targets": target_ints} + source, target = source_file.readline(), target_file.readline() + + +def tabbed_generator(source_path, source_vocab, target_vocab, eos=None): + r"""Generator for sequence-to-sequence tasks using tabbed files. + + Tokens are derived from text files where each line contains both + a source and a target string. The two strings are separated by a tab + character ('\t'). It yields dictionaries of "inputs" and "targets" where + inputs are characters from the source lines converted to integers, and + targets are characters from the target lines, also converted to integers. + + Args: + source_path: path to the file with source and target sentences. + source_vocab: a SubwordTextEncoder to encode the source string. + target_vocab: a SubwordTextEncoder to encode the target string. + eos: integer to append at the end of each sequence (default: None). + Yields: + A dictionary {"inputs": source-line, "targets": target-line} where + the lines are integer lists converted from characters in the file lines. + """ + eos_list = [] if eos is None else [eos] + with tf.gfile.GFile(source_path, mode="r") as source_file: + for line in source_file: + if line and "\t" in line: + parts = line.split("\t", 1) + source, target = parts[0].strip(), parts[1].strip() + source_ints = source_vocab.encode(source) + eos_list + target_ints = target_vocab.encode(target) + eos_list + yield {"inputs": source_ints, "targets": target_ints} + + +def token_generator(source_path, target_path, token_vocab, eos=None): + """Generator for sequence-to-sequence tasks that uses tokens. + + This generator assumes the files at source_path and target_path have + the same number of lines and yields dictionaries of "inputs" and "targets" + where inputs are token ids from the " "-split source (and target, resp.) lines + converted to integers using the token_map. + + Args: + source_path: path to the file with source sentences. + target_path: path to the file with target sentences. + token_vocab: text_encoder.TextEncoder object. + eos: integer to append at the end of each sequence (default: None). + Yields: + A dictionary {"inputs": source-line, "targets": target-line} where + the lines are integer lists converted from tokens in the file lines. + """ + eos_list = [] if eos is None else [eos] + with tf.gfile.GFile(source_path, mode="r") as source_file: + with tf.gfile.GFile(target_path, mode="r") as target_file: + source, target = source_file.readline(), target_file.readline() + while source and target: + source_ints = token_vocab.encode(source.strip()) + eos_list + target_ints = token_vocab.encode(target.strip()) + eos_list + yield {"inputs": source_ints, "targets": target_ints} + source, target = source_file.readline(), target_file.readline() + + +def bi_vocabs_token_generator(source_path, + target_path, + source_token_vocab, + target_token_vocab, + eos=None): + """Generator for sequence-to-sequence tasks that uses tokens. + + This generator assumes the files at source_path and target_path have + the same number of lines and yields dictionaries of "inputs" and "targets" + where inputs are token ids from the " "-split source (and target, resp.) lines + converted to integers using the token_map. + + Args: + source_path: path to the file with source sentences. + target_path: path to the file with target sentences. + source_token_vocab: text_encoder.TextEncoder object. + target_token_vocab: text_encoder.TextEncoder object. + eos: integer to append at the end of each sequence (default: None). + Yields: + A dictionary {"inputs": source-line, "targets": target-line} where + the lines are integer lists converted from tokens in the file lines. + """ + eos_list = [] if eos is None else [eos] + with tf.gfile.GFile(source_path, mode="r") as source_file: + with tf.gfile.GFile(target_path, mode="r") as target_file: + source, target = source_file.readline(), target_file.readline() + while source and target: + source_ints = source_token_vocab.encode(source.strip()) + eos_list + target_ints = target_token_vocab.encode(target.strip()) + eos_list + yield {"inputs": source_ints, "targets": target_ints} + source, target = source_file.readline(), target_file.readline() + + +def _preprocess_sgm(line, is_sgm): + """Preprocessing to strip tags in SGM files.""" + if not is_sgm: + return line + # In SGM files, remove <srcset ...>, <p>, <doc ...> lines. + if line.startswith("<srcset") or line.startswith("</srcset"): + return "" + if line.startswith("<doc") or line.startswith("</doc"): + return "" + if line.startswith("<p>") or line.startswith("</p>"): + return "" + # Strip <seg> tags. + line = line.strip() + if line.startswith("<seg") and line.endswith("</seg>"): + i = line.index(">") + return line[i + 1:-6] # Strip first <seg ...> and last </seg>. + + +def compile_data(tmp_dir, datasets, filename): + """Concatenate all `datasets` and save to `filename`.""" + filename = os.path.join(tmp_dir, filename) + with tf.gfile.GFile(filename + ".lang1", mode="w") as lang1_resfile: + with tf.gfile.GFile(filename + ".lang2", mode="w") as lang2_resfile: + for dataset in datasets: + url = dataset[0] + compressed_filename = os.path.basename(url) + compressed_filepath = os.path.join(tmp_dir, compressed_filename) + + generator_utils.maybe_download(tmp_dir, compressed_filename, url) + + if dataset[1][0] == "tsv": + _, src_column, trg_column, glob_pattern = dataset[1] + filenames = tf.gfile.Glob(os.path.join(tmp_dir, glob_pattern)) + if not filenames: + # Capture *.tgz and *.tar.gz too. + mode = "r:gz" if compressed_filepath.endswith("gz") else "r" + with tarfile.open(compressed_filepath, mode) as corpus_tar: + corpus_tar.extractall(tmp_dir) + filenames = tf.gfile.Glob(os.path.join(tmp_dir, glob_pattern)) + for tsv_filename in filenames: + if tsv_filename.endswith(".gz"): + new_filename = tsv_filename.strip(".gz") + generator_utils.gunzip_file(tsv_filename, new_filename) + tsv_filename = new_filename + with tf.gfile.GFile(tsv_filename, mode="r") as tsv_file: + for line in tsv_file: + if line and "\t" in line: + parts = line.split("\t") + source, target = parts[src_column], parts[trg_column] + lang1_resfile.write(source.strip() + "\n") + lang2_resfile.write(target.strip() + "\n") + else: + lang1_filename, lang2_filename = dataset[1] + lang1_filepath = os.path.join(tmp_dir, lang1_filename) + lang2_filepath = os.path.join(tmp_dir, lang2_filename) + is_sgm = ( + lang1_filename.endswith("sgm") and lang2_filename.endswith("sgm")) + + if not (os.path.exists(lang1_filepath) and + os.path.exists(lang2_filepath)): + # For .tar.gz and .tgz files, we read compressed. + mode = "r:gz" if compressed_filepath.endswith("gz") else "r" + with tarfile.open(compressed_filepath, mode) as corpus_tar: + corpus_tar.extractall(tmp_dir) + if lang1_filepath.endswith(".gz"): + new_filepath = lang1_filepath.strip(".gz") + generator_utils.gunzip_file(lang1_filepath, new_filepath) + lang1_filepath = new_filepath + if lang2_filepath.endswith(".gz"): + new_filepath = lang2_filepath.strip(".gz") + generator_utils.gunzip_file(lang2_filepath, new_filepath) + lang2_filepath = new_filepath + with tf.gfile.GFile(lang1_filepath, mode="r") as lang1_file: + with tf.gfile.GFile(lang2_filepath, mode="r") as lang2_file: + line1, line2 = lang1_file.readline(), lang2_file.readline() + while line1 or line2: + line1res = _preprocess_sgm(line1, is_sgm) + line2res = _preprocess_sgm(line2, is_sgm) + if line1res or line2res: + lang1_resfile.write(line1res.strip() + "\n") + lang2_resfile.write(line2res.strip() + "\n") + line1, line2 = lang1_file.readline(), lang2_file.readline() + + return filename diff --git a/tensor2tensor/data_generators/translate_encs.py b/tensor2tensor/data_generators/translate_encs.py new file mode 100644 index 000000000..ad0fe828d --- /dev/null +++ b/tensor2tensor/data_generators/translate_encs.py @@ -0,0 +1,130 @@ +# coding=utf-8 +# Copyright 2017 The Tensor2Tensor Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Data generators for translation data-sets.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +# Dependency imports + +from tensor2tensor.data_generators import generator_utils +from tensor2tensor.data_generators import problem +from tensor2tensor.data_generators import text_encoder +from tensor2tensor.data_generators import translate +from tensor2tensor.utils import registry + +import tensorflow as tf + +FLAGS = tf.flags.FLAGS + +# End-of-sentence marker. +EOS = text_encoder.EOS_ID + +_ENCS_TRAIN_DATASETS = [ + [("https://lindat.mff.cuni.cz/repository/xmlui/bitstream/handle/" + "11234/1-1458/data-plaintext-format.tar"), + ("tsv", 3, 2, "data.plaintext-format/*train.gz")], + [ + "http://data.statmt.org/wmt17/translation-task/training-parallel-nc-v12.tgz", # pylint: disable=line-too-long + ("training/news-commentary-v12.cs-en.en", + "training/news-commentary-v12.cs-en.cs") + ], + [ + "http://www.statmt.org/wmt13/training-parallel-commoncrawl.tgz", + ("commoncrawl.cs-en.en", "commoncrawl.cs-en.cs") + ], + [ + "http://www.statmt.org/wmt13/training-parallel-europarl-v7.tgz", + ("training/europarl-v7.cs-en.en", "training/europarl-v7.cs-en.cs") + ], +] +_ENCS_TEST_DATASETS = [ + [ + "http://data.statmt.org/wmt17/translation-task/dev.tgz", + ("dev/newstest2013.en", "dev/newstest2013.cs") + ], +] + + +@registry.register_problem +class TranslateEncsWmt32k(translate.TranslateProblem): + """Problem spec for WMT English-Czech translation.""" + + @property + def targeted_vocab_size(self): + return 2**15 # 32768 + + @property + def vocab_name(self): + return "vocab.encs" + + def generator(self, data_dir, tmp_dir, train): + datasets = _ENCS_TRAIN_DATASETS if train else _ENCS_TEST_DATASETS + tag = "train" if train else "dev" + vocab_datasets = [] + data_path = translate.compile_data(tmp_dir, datasets, + "wmt_encs_tok_%s" % tag) + # CzEng contains 100 gz files with tab-separated columns, so let's expect + # it is the first dataset in datasets and use the newly created *.lang{1,2} + # files for vocab construction. + if datasets[0][0].endswith("data-plaintext-format.tar"): + vocab_datasets.append([ + datasets[0][0], + ["wmt_encs_tok_%s.lang1" % tag, + "wmt_encs_tok_%s.lang2" % tag] + ]) + datasets = datasets[1:] + vocab_datasets += [[item[0], [item[1][0], item[1][1]]] for item in datasets] + symbolizer_vocab = generator_utils.get_or_generate_vocab( + data_dir, tmp_dir, self.vocab_file, self.targeted_vocab_size, + vocab_datasets) + return translate.token_generator(data_path + ".lang1", data_path + ".lang2", + symbolizer_vocab, EOS) + + @property + def input_space_id(self): + return problem.SpaceID.EN_TOK + + @property + def target_space_id(self): + return problem.SpaceID.CS_TOK + + +@registry.register_problem +class TranslateEncsWmtCharacters(translate.TranslateProblem): + """Problem spec for WMT En-Cs character-based translation.""" + + @property + def is_character_level(self): + return True + + def generator(self, data_dir, tmp_dir, train): + character_vocab = text_encoder.ByteTextEncoder() + datasets = _ENCS_TRAIN_DATASETS if train else _ENCS_TEST_DATASETS + tag = "train" if train else "dev" + data_path = translate.compile_data(tmp_dir, datasets, + "wmt_encs_chr_%s" % tag) + return translate.character_generator( + data_path + ".lang1", data_path + ".lang2", character_vocab, EOS) + + @property + def input_space_id(self): + return problem.SpaceID.EN_CHR + + @property + def target_space_id(self): + return problem.SpaceID.CS_CHR diff --git a/tensor2tensor/data_generators/translate_ende.py b/tensor2tensor/data_generators/translate_ende.py new file mode 100644 index 000000000..7358e9b7e --- /dev/null +++ b/tensor2tensor/data_generators/translate_ende.py @@ -0,0 +1,184 @@ +# coding=utf-8 +# Copyright 2017 The Tensor2Tensor Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Data generators for translation data-sets.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +import tarfile + +# Dependency imports + +from tensor2tensor.data_generators import generator_utils +from tensor2tensor.data_generators import problem +from tensor2tensor.data_generators import text_encoder +from tensor2tensor.data_generators import translate +from tensor2tensor.utils import registry + +import tensorflow as tf + +FLAGS = tf.flags.FLAGS + +# End-of-sentence marker. +EOS = text_encoder.EOS_ID + +_ENDE_TRAIN_DATASETS = [ + [ + "http://data.statmt.org/wmt17/translation-task/training-parallel-nc-v12.tgz", # pylint: disable=line-too-long + ("training/news-commentary-v12.de-en.en", + "training/news-commentary-v12.de-en.de") + ], + [ + "http://www.statmt.org/wmt13/training-parallel-commoncrawl.tgz", + ("commoncrawl.de-en.en", "commoncrawl.de-en.de") + ], + [ + "http://www.statmt.org/wmt13/training-parallel-europarl-v7.tgz", + ("training/europarl-v7.de-en.en", "training/europarl-v7.de-en.de") + ], +] +_ENDE_TEST_DATASETS = [ + [ + "http://data.statmt.org/wmt17/translation-task/dev.tgz", + ("dev/newstest2013.en", "dev/newstest2013.de") + ], +] + + +def _get_wmt_ende_bpe_dataset(directory, filename): + """Extract the WMT en-de corpus `filename` to directory unless it's there.""" + train_path = os.path.join(directory, filename) + if not (tf.gfile.Exists(train_path + ".de") and + tf.gfile.Exists(train_path + ".en")): + url = ("https://drive.google.com/uc?export=download&id=" + "0B_bZck-ksdkpM25jRUN2X2UxMm8") + corpus_file = generator_utils.maybe_download_from_drive( + directory, "wmt16_en_de.tar.gz", url) + with tarfile.open(corpus_file, "r:gz") as corpus_tar: + corpus_tar.extractall(directory) + return train_path + + +@registry.register_problem +class TranslateEndeWmtBpe32k(translate.TranslateProblem): + """Problem spec for WMT En-De translation, BPE version.""" + + @property + def targeted_vocab_size(self): + return 32000 + + @property + def vocab_name(self): + return "vocab.bpe" + + def feature_encoders(self, data_dir): + vocab_filename = os.path.join(data_dir, self.vocab_file) + encoder = text_encoder.TokenTextEncoder(vocab_filename, replace_oov="UNK") + return {"inputs": encoder, "targets": encoder} + + def generator(self, data_dir, tmp_dir, train): + """Instance of token generator for the WMT en->de task, training set.""" + dataset_path = ("train.tok.clean.bpe.32000" + if train else "newstest2013.tok.bpe.32000") + train_path = _get_wmt_ende_bpe_dataset(tmp_dir, dataset_path) + token_tmp_path = os.path.join(tmp_dir, self.vocab_file) + token_path = os.path.join(data_dir, self.vocab_file) + tf.gfile.Copy(token_tmp_path, token_path, overwrite=True) + with tf.gfile.GFile(token_path, mode="a") as f: + f.write("UNK\n") # Add UNK to the vocab. + token_vocab = text_encoder.TokenTextEncoder(token_path, replace_oov="UNK") + return translate.token_generator(train_path + ".en", train_path + ".de", + token_vocab, EOS) + + @property + def input_space_id(self): + return problem.SpaceID.EN_BPE_TOK + + @property + def target_space_id(self): + return problem.SpaceID.DE_BPE_TOK + + +@registry.register_problem +class TranslateEndeWmt8k(translate.TranslateProblem): + """Problem spec for WMT En-De translation.""" + + @property + def targeted_vocab_size(self): + return 2**13 # 8192 + + @property + def vocab_name(self): + return "vocab.ende" + + def generator(self, data_dir, tmp_dir, train): + symbolizer_vocab = generator_utils.get_or_generate_vocab( + data_dir, tmp_dir, self.vocab_file, self.targeted_vocab_size, + _ENDE_TRAIN_DATASETS) + datasets = _ENDE_TRAIN_DATASETS if train else _ENDE_TEST_DATASETS + tag = "train" if train else "dev" + data_path = translate.compile_data(tmp_dir, datasets, + "wmt_ende_tok_%s" % tag) + return translate.token_generator(data_path + ".lang1", data_path + ".lang2", + symbolizer_vocab, EOS) + + @property + def input_space_id(self): + return problem.SpaceID.EN_TOK + + @property + def target_space_id(self): + return problem.SpaceID.DE_TOK + + +@registry.register_problem +class TranslateEndeWmt32k(TranslateEndeWmt8k): + + @property + def targeted_vocab_size(self): + return 2**15 # 32768 + + +@registry.register_problem +class TranslateEndeWmtCharacters(translate.TranslateProblem): + """Problem spec for WMT En-De translation.""" + + @property + def is_character_level(self): + return True + + @property + def vocab_name(self): + return "vocab.ende" + + def generator(self, _, tmp_dir, train): + character_vocab = text_encoder.ByteTextEncoder() + datasets = _ENDE_TRAIN_DATASETS if train else _ENDE_TEST_DATASETS + tag = "train" if train else "dev" + data_path = translate.compile_data(tmp_dir, datasets, + "wmt_ende_chr_%s" % tag) + return translate.character_generator( + data_path + ".lang1", data_path + ".lang2", character_vocab, EOS) + + @property + def input_space_id(self): + return problem.SpaceID.EN_CHR + + @property + def target_space_id(self): + return problem.SpaceID.DE_CHR diff --git a/tensor2tensor/data_generators/translate_enfr.py b/tensor2tensor/data_generators/translate_enfr.py new file mode 100644 index 000000000..68788d204 --- /dev/null +++ b/tensor2tensor/data_generators/translate_enfr.py @@ -0,0 +1,120 @@ +# coding=utf-8 +# Copyright 2017 The Tensor2Tensor Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Data generators for translation data-sets.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +# Dependency imports + +from tensor2tensor.data_generators import generator_utils +from tensor2tensor.data_generators import problem +from tensor2tensor.data_generators import text_encoder +from tensor2tensor.data_generators import translate +from tensor2tensor.utils import registry + +import tensorflow as tf + +FLAGS = tf.flags.FLAGS + +# End-of-sentence marker. +EOS = text_encoder.EOS_ID + +_ENFR_TRAIN_DATASETS = [ + [ + "https://s3.amazonaws.com/opennmt-trainingdata/baseline-1M-enfr.tgz", + ("baseline-1M-enfr/baseline-1M_train.en", + "baseline-1M-enfr/baseline-1M_train.fr") + ], +] +_ENFR_TEST_DATASETS = [ + [ + "https://s3.amazonaws.com/opennmt-trainingdata/baseline-1M-enfr.tgz", + ("baseline-1M-enfr/baseline-1M_valid.en", + "baseline-1M-enfr/baseline-1M_valid.fr") + ], +] + + +@registry.register_problem +class TranslateEnfrWmt8k(translate.TranslateProblem): + """Problem spec for WMT En-Fr translation.""" + + @property + def targeted_vocab_size(self): + return 2**13 # 8192 + + @property + def vocab_name(self): + return "vocab.enfr" + + def generator(self, data_dir, tmp_dir, train): + symbolizer_vocab = generator_utils.get_or_generate_vocab( + data_dir, tmp_dir, self.vocab_file, self.targeted_vocab_size, + _ENFR_TRAIN_DATASETS) + datasets = _ENFR_TRAIN_DATASETS if train else _ENFR_TEST_DATASETS + tag = "train" if train else "dev" + data_path = translate.compile_data(tmp_dir, datasets, + "wmt_enfr_tok_%s" % tag) + return translate.token_generator(data_path + ".lang1", data_path + ".lang2", + symbolizer_vocab, EOS) + + @property + def input_space_id(self): + return problem.SpaceID.EN_TOK + + @property + def target_space_id(self): + return problem.SpaceID.FR_TOK + + +@registry.register_problem +class TranslateEnfrWmt32k(TranslateEnfrWmt8k): + + @property + def targeted_vocab_size(self): + return 2**15 # 32768 + + +@registry.register_problem +class TranslateEnfrWmtCharacters(translate.TranslateProblem): + """Problem spec for WMT En-Fr translation.""" + + @property + def is_character_level(self): + return True + + @property + def vocab_name(self): + return "vocab.enfr" + + def generator(self, data_dir, tmp_dir, train): + character_vocab = text_encoder.ByteTextEncoder() + datasets = _ENFR_TRAIN_DATASETS if train else _ENFR_TEST_DATASETS + tag = "train" if train else "dev" + data_path = translate.compile_data(tmp_dir, datasets, + "wmt_enfr_chr_%s" % tag) + return translate.character_generator( + data_path + ".lang1", data_path + ".lang2", character_vocab, EOS) + + @property + def input_space_id(self): + return problem.SpaceID.EN_CHR + + @property + def target_space_id(self): + return problem.SpaceID.FR_CHR diff --git a/tensor2tensor/data_generators/translate_enmk.py b/tensor2tensor/data_generators/translate_enmk.py new file mode 100644 index 000000000..aa1bac8b1 --- /dev/null +++ b/tensor2tensor/data_generators/translate_enmk.py @@ -0,0 +1,87 @@ +# coding=utf-8 +# Copyright 2017 The Tensor2Tensor Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Data generators for translation data-sets.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +# Dependency imports + +from tensor2tensor.data_generators import generator_utils +from tensor2tensor.data_generators import problem +from tensor2tensor.data_generators import text_encoder +from tensor2tensor.data_generators import translate +from tensor2tensor.utils import registry + +import tensorflow as tf + +FLAGS = tf.flags.FLAGS + +# End-of-sentence marker. +EOS = text_encoder.EOS_ID + +# For Macedonian-English the SETimes corpus +# from http://nlp.ffzg.hr/resources/corpora/setimes/ is used. +# The original dataset has 207,777 parallel sentences. +# For training the first 205,777 sentences are used. +_MKEN_TRAIN_DATASETS = [[ + "https://github.com/stefan-it/nmt-mk-en/raw/master/data/setimes.mk-en.train.tgz", # pylint: disable=line-too-long + ("train.mk", "train.en") +]] + +# For development 1000 parallel sentences are used. +_MKEN_TEST_DATASETS = [[ + "https://github.com/stefan-it/nmt-mk-en/raw/master/data/setimes.mk-en.dev.tgz", # pylint: disable=line-too-long + ("dev.mk", "dev.en") +]] + + +@registry.register_problem +class TranslateEnmkSetimes32k(translate.TranslateProblem): + """Problem spec for SETimes Mk-En translation.""" + + @property + def targeted_vocab_size(self): + return 2**15 # 32768 + + @property + def vocab_name(self): + return "vocab.mken" + + def generator(self, data_dir, tmp_dir, train): + datasets = _MKEN_TRAIN_DATASETS if train else _MKEN_TEST_DATASETS + source_datasets = [[item[0], [item[1][0]]] for item in datasets] + target_datasets = [[item[0], [item[1][1]]] for item in datasets] + symbolizer_vocab = generator_utils.get_or_generate_vocab( + data_dir, tmp_dir, self.vocab_file, self.targeted_vocab_size, + source_datasets + target_datasets) + tag = "train" if train else "dev" + data_path = translate.compile_data(tmp_dir, datasets, + "setimes_mken_tok_%s" % tag) + # We generate English->X data by convention, to train reverse translation + # just add the "_rev" suffix to the problem name, e.g., like this. + # --problems=translate_enmk_setimes32k_rev + return translate.token_generator(data_path + ".lang2", data_path + ".lang1", + symbolizer_vocab, EOS) + + @property + def input_space_id(self): + return problem.SpaceID.MK_TOK + + @property + def target_space_id(self): + return problem.SpaceID.EN_TOK diff --git a/tensor2tensor/data_generators/translate_enzh.py b/tensor2tensor/data_generators/translate_enzh.py new file mode 100644 index 000000000..7c77a05fc --- /dev/null +++ b/tensor2tensor/data_generators/translate_enzh.py @@ -0,0 +1,106 @@ +# coding=utf-8 +# Copyright 2017 The Tensor2Tensor Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Data generators for translation data-sets.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os + +# Dependency imports + +from tensor2tensor.data_generators import generator_utils +from tensor2tensor.data_generators import problem +from tensor2tensor.data_generators import text_encoder +from tensor2tensor.data_generators import translate +from tensor2tensor.utils import registry + +import tensorflow as tf + +FLAGS = tf.flags.FLAGS + +# End-of-sentence marker. +EOS = text_encoder.EOS_ID + +_ZHEN_TRAIN_DATASETS = [[("http://data.statmt.org/wmt17/translation-task/" + "training-parallel-nc-v12.tgz"), + ("training/news-commentary-v12.zh-en.zh", + "training/news-commentary-v12.zh-en.en")]] + +_ZHEN_TEST_DATASETS = [[ + "http://data.statmt.org/wmt17/translation-task/dev.tgz", + ("dev/newsdev2017-zhen-src.zh.sgm", "dev/newsdev2017-zhen-ref.en.sgm") +]] + + +@registry.register_problem +class TranslateEnzhWmt8k(translate.TranslateProblem): + """Problem spec for WMT Zh-En translation.""" + + @property + def targeted_vocab_size(self): + return 2**13 # 8192 + + @property + def num_shards(self): + return 10 # This is a small dataset. + + @property + def source_vocab_name(self): + return "vocab.zhen-zh.%d" % self.targeted_vocab_size + + @property + def target_vocab_name(self): + return "vocab.zhen-en.%d" % self.targeted_vocab_size + + def generator(self, data_dir, tmp_dir, train): + datasets = _ZHEN_TRAIN_DATASETS if train else _ZHEN_TEST_DATASETS + source_datasets = [[item[0], [item[1][0]]] for item in _ZHEN_TRAIN_DATASETS] + target_datasets = [[item[0], [item[1][1]]] for item in _ZHEN_TRAIN_DATASETS] + source_vocab = generator_utils.get_or_generate_vocab( + data_dir, tmp_dir, self.source_vocab_name, self.targeted_vocab_size, + source_datasets) + target_vocab = generator_utils.get_or_generate_vocab( + data_dir, tmp_dir, self.target_vocab_name, self.targeted_vocab_size, + target_datasets) + tag = "train" if train else "dev" + data_path = translate.compile_data(tmp_dir, datasets, + "wmt_zhen_tok_%s" % tag) + # We generate English->X data by convention, to train reverse translation + # just add the "_rev" suffix to the problem name, e.g., like this. + # --problems=translate_enzh_wmt8k_rev + return translate.bi_vocabs_token_generator(data_path + ".lang2", + data_path + ".lang1", + source_vocab, target_vocab, EOS) + + @property + def input_space_id(self): + return problem.SpaceID.ZH_TOK + + @property + def target_space_id(self): + return problem.SpaceID.EN_TOK + + def feature_encoders(self, data_dir): + source_vocab_filename = os.path.join(data_dir, self.source_vocab_name) + target_vocab_filename = os.path.join(data_dir, self.target_vocab_name) + source_token = text_encoder.SubwordTextEncoder(source_vocab_filename) + target_token = text_encoder.SubwordTextEncoder(target_vocab_filename) + return { + "inputs": source_token, + "targets": target_token, + } diff --git a/tensor2tensor/data_generators/wmt_test.py b/tensor2tensor/data_generators/translate_test.py similarity index 71% rename from tensor2tensor/data_generators/wmt_test.py rename to tensor2tensor/data_generators/translate_test.py index 441ceef59..e357e11fc 100644 --- a/tensor2tensor/data_generators/wmt_test.py +++ b/tensor2tensor/data_generators/translate_test.py @@ -13,7 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -"""WMT generators test.""" +"""Translate generators test.""" from __future__ import absolute_import from __future__ import division @@ -27,12 +27,12 @@ import six from tensor2tensor.data_generators import text_encoder -from tensor2tensor.data_generators import wmt +from tensor2tensor.data_generators import translate import tensorflow as tf -class WMTTest(tf.test.TestCase): +class TranslateTest(tf.test.TestCase): def testCharacterGenerator(self): # Generate a trivial source and target file. @@ -52,7 +52,7 @@ def testCharacterGenerator(self): # Call character generator on the generated files. results_src, results_tgt = [], [] character_vocab = text_encoder.ByteTextEncoder() - for dictionary in wmt.character_generator( + for dictionary in translate.character_generator( tmp_file_path + ".src", tmp_file_path + ".tgt", character_vocab): self.assertEqual(sorted(list(dictionary)), ["inputs", "targets"]) results_src.append(dictionary["inputs"]) @@ -62,24 +62,16 @@ def testCharacterGenerator(self): # First check that the results match the encoded original strings; # this is a comparison of integer arrays. self.assertEqual(len(results_src), 2) - self.assertEqual(results_src[0], - character_vocab.encode("source1")) - self.assertEqual(results_src[1], - character_vocab.encode("source2")) - self.assertEqual(results_tgt[0], - character_vocab.encode("target1")) - self.assertEqual(results_tgt[1], - character_vocab.encode("target2")) + self.assertEqual(results_src[0], character_vocab.encode("source1")) + self.assertEqual(results_src[1], character_vocab.encode("source2")) + self.assertEqual(results_tgt[0], character_vocab.encode("target1")) + self.assertEqual(results_tgt[1], character_vocab.encode("target2")) # Then decode the results and compare with the original strings; # this is a comparison of strings - self.assertEqual(character_vocab.decode(results_src[0]), - "source1") - self.assertEqual(character_vocab.decode(results_src[1]), - "source2") - self.assertEqual(character_vocab.decode(results_tgt[0]), - "target1") - self.assertEqual(character_vocab.decode(results_tgt[1]), - "target2") + self.assertEqual(character_vocab.decode(results_src[0]), "source1") + self.assertEqual(character_vocab.decode(results_src[1]), "source2") + self.assertEqual(character_vocab.decode(results_tgt[0]), "target1") + self.assertEqual(character_vocab.decode(results_tgt[1]), "target2") # Clean up. os.remove(tmp_file_path + ".src") diff --git a/tensor2tensor/data_generators/wmt.py b/tensor2tensor/data_generators/wmt.py deleted file mode 100644 index 61716d012..000000000 --- a/tensor2tensor/data_generators/wmt.py +++ /dev/null @@ -1,718 +0,0 @@ -# coding=utf-8 -# Copyright 2017 The Tensor2Tensor Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Data generators for translation data-sets.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -import tarfile - -# Dependency imports - -from tensor2tensor.data_generators import generator_utils -from tensor2tensor.data_generators import problem -from tensor2tensor.data_generators import text_encoder -from tensor2tensor.data_generators import wsj_parsing -from tensor2tensor.utils import registry - -import tensorflow as tf - -FLAGS = tf.flags.FLAGS - -# End-of-sentence marker. -EOS = text_encoder.EOS_ID - - -class TranslateProblem(problem.Text2TextProblem): - """Base class for translation problems.""" - - @property - def is_character_level(self): - return False - - @property - def num_shards(self): - return 100 - - @property - def vocab_name(self): - return "vocab.endefr" - - @property - def use_subword_tokenizer(self): - return True - - -# Generic generators used later for multiple problems. - - -def character_generator(source_path, target_path, character_vocab, eos=None): - """Generator for sequence-to-sequence tasks that just uses characters. - - This generator assumes the files at source_path and target_path have - the same number of lines and yields dictionaries of "inputs" and "targets" - where inputs are characters from the source lines converted to integers, - and targets are characters from the target lines, also converted to integers. - - Args: - source_path: path to the file with source sentences. - target_path: path to the file with target sentences. - character_vocab: a TextEncoder to encode the characters. - eos: integer to append at the end of each sequence (default: None). - - Yields: - A dictionary {"inputs": source-line, "targets": target-line} where - the lines are integer lists converted from characters in the file lines. - """ - eos_list = [] if eos is None else [eos] - with tf.gfile.GFile(source_path, mode="r") as source_file: - with tf.gfile.GFile(target_path, mode="r") as target_file: - source, target = source_file.readline(), target_file.readline() - while source and target: - source_ints = character_vocab.encode(source.strip()) + eos_list - target_ints = character_vocab.encode(target.strip()) + eos_list - yield {"inputs": source_ints, "targets": target_ints} - source, target = source_file.readline(), target_file.readline() - - -def tabbed_generator(source_path, source_vocab, target_vocab, eos=None): - r"""Generator for sequence-to-sequence tasks using tabbed files. - - Tokens are derived from text files where each line contains both - a source and a target string. The two strings are separated by a tab - character ('\t'). It yields dictionaries of "inputs" and "targets" where - inputs are characters from the source lines converted to integers, and - targets are characters from the target lines, also converted to integers. - - Args: - source_path: path to the file with source and target sentences. - source_vocab: a SubwordTextEncoder to encode the source string. - target_vocab: a SubwordTextEncoder to encode the target string. - eos: integer to append at the end of each sequence (default: None). - - Yields: - A dictionary {"inputs": source-line, "targets": target-line} where - the lines are integer lists converted from characters in the file lines. - """ - eos_list = [] if eos is None else [eos] - with tf.gfile.GFile(source_path, mode="r") as source_file: - for line in source_file: - if line and "\t" in line: - parts = line.split("\t", 1) - source, target = parts[0].strip(), parts[1].strip() - source_ints = source_vocab.encode(source) + eos_list - target_ints = target_vocab.encode(target) + eos_list - yield {"inputs": source_ints, "targets": target_ints} - - -def token_generator(source_path, target_path, token_vocab, eos=None): - """Generator for sequence-to-sequence tasks that uses tokens. - - This generator assumes the files at source_path and target_path have - the same number of lines and yields dictionaries of "inputs" and "targets" - where inputs are token ids from the " "-split source (and target, resp.) lines - converted to integers using the token_map. - - Args: - source_path: path to the file with source sentences. - target_path: path to the file with target sentences. - token_vocab: text_encoder.TextEncoder object. - eos: integer to append at the end of each sequence (default: None). - - Yields: - A dictionary {"inputs": source-line, "targets": target-line} where - the lines are integer lists converted from tokens in the file lines. - """ - eos_list = [] if eos is None else [eos] - with tf.gfile.GFile(source_path, mode="r") as source_file: - with tf.gfile.GFile(target_path, mode="r") as target_file: - source, target = source_file.readline(), target_file.readline() - while source and target: - source_ints = token_vocab.encode(source.strip()) + eos_list - target_ints = token_vocab.encode(target.strip()) + eos_list - yield {"inputs": source_ints, "targets": target_ints} - source, target = source_file.readline(), target_file.readline() - - -def bi_vocabs_token_generator(source_path, - target_path, - source_token_vocab, - target_token_vocab, - eos=None): - """Generator for sequence-to-sequence tasks that uses tokens. - - This generator assumes the files at source_path and target_path have - the same number of lines and yields dictionaries of "inputs" and "targets" - where inputs are token ids from the " "-split source (and target, resp.) lines - converted to integers using the token_map. - - Args: - source_path: path to the file with source sentences. - target_path: path to the file with target sentences. - source_token_vocab: text_encoder.TextEncoder object. - target_token_vocab: text_encoder.TextEncoder object. - eos: integer to append at the end of each sequence (default: None). - - Yields: - A dictionary {"inputs": source-line, "targets": target-line} where - the lines are integer lists converted from tokens in the file lines. - """ - eos_list = [] if eos is None else [eos] - with tf.gfile.GFile(source_path, mode="r") as source_file: - with tf.gfile.GFile(target_path, mode="r") as target_file: - source, target = source_file.readline(), target_file.readline() - while source and target: - source_ints = source_token_vocab.encode(source.strip()) + eos_list - target_ints = target_token_vocab.encode(target.strip()) + eos_list - yield {"inputs": source_ints, "targets": target_ints} - source, target = source_file.readline(), target_file.readline() - - -# Data-set URLs. - -_ENDE_TRAIN_DATASETS = [ - [ - "http://data.statmt.org/wmt17/translation-task/training-parallel-nc-v12.tgz", # pylint: disable=line-too-long - ("training/news-commentary-v12.de-en.en", - "training/news-commentary-v12.de-en.de") - ], - [ - "http://www.statmt.org/wmt13/training-parallel-commoncrawl.tgz", - ("commoncrawl.de-en.en", "commoncrawl.de-en.de") - ], - [ - "http://www.statmt.org/wmt13/training-parallel-europarl-v7.tgz", - ("training/europarl-v7.de-en.en", "training/europarl-v7.de-en.de") - ], -] -_ENDE_TEST_DATASETS = [ - [ - "http://data.statmt.org/wmt17/translation-task/dev.tgz", - ("dev/newstest2013.en", "dev/newstest2013.de") - ], -] - -_ENFR_TRAIN_DATASETS = [ - [ - "http://www.statmt.org/wmt13/training-parallel-commoncrawl.tgz", - ("commoncrawl.fr-en.en", "commoncrawl.fr-en.fr") - ], - [ - "http://www.statmt.org/wmt13/training-parallel-europarl-v7.tgz", - ("training/europarl-v7.fr-en.en", "training/europarl-v7.fr-en.fr") - ], - [ - "http://www.statmt.org/wmt14/training-parallel-nc-v9.tgz", - ("training/news-commentary-v9.fr-en.en", - "training/news-commentary-v9.fr-en.fr") - ], - [ - "http://www.statmt.org/wmt10/training-giga-fren.tar", - ("giga-fren.release2.fixed.en.gz", "giga-fren.release2.fixed.fr.gz") - ], - [ - "http://www.statmt.org/wmt13/training-parallel-un.tgz", - ("un/undoc.2000.fr-en.en", "un/undoc.2000.fr-en.fr") - ], -] -_ENFR_TEST_DATASETS = [ - [ - "http://data.statmt.org/wmt17/translation-task/dev.tgz", - ("dev/newstest2013.en", "dev/newstest2013.fr") - ], -] - -_ZHEN_TRAIN_DATASETS = [[("http://data.statmt.org/wmt17/translation-task/" - "training-parallel-nc-v12.tgz"), - ("training/news-commentary-v12.zh-en.zh", - "training/news-commentary-v12.zh-en.en")]] - -_ZHEN_TEST_DATASETS = [[ - "http://data.statmt.org/wmt17/translation-task/dev.tgz", - ("dev/newsdev2017-zhen-src.zh.sgm", "dev/newsdev2017-zhen-ref.en.sgm") -]] - -# For Macedonian-English the SETimes corpus -# from http://nlp.ffzg.hr/resources/corpora/setimes/ is used. -# The original dataset has 207,777 parallel sentences. -# For training the first 205,777 sentences are used. -_MKEN_TRAIN_DATASETS = [[ - "https://github.com/stefan-it/nmt-mk-en/raw/master/data/setimes.mk-en.train.tgz", # pylint: disable=line-too-long - ("train.mk", "train.en") -]] - -# For development 1000 parallel sentences are used. -_MKEN_TEST_DATASETS = [[ - "https://github.com/stefan-it/nmt-mk-en/raw/master/data/setimes.mk-en.dev.tgz", # pylint: disable=line-too-long - ("dev.mk", "dev.en") -]] - -# English-Czech datasets -_ENCS_TRAIN_DATASETS = [ - [ - ("https://lindat.mff.cuni.cz/repository/xmlui/bitstream/handle/" - "11234/1-1458/data-plaintext-format.tar"), - ("tsv", 3, 2, "data.plaintext-format/*train.gz") - ], - [ - "http://data.statmt.org/wmt17/translation-task/training-parallel-nc-v12.tgz", # pylint: disable=line-too-long - ("training/news-commentary-v12.cs-en.en", - "training/news-commentary-v12.cs-en.cs") - ], - [ - "http://www.statmt.org/wmt13/training-parallel-commoncrawl.tgz", - ("commoncrawl.cs-en.en", "commoncrawl.cs-en.cs") - ], - [ - "http://www.statmt.org/wmt13/training-parallel-europarl-v7.tgz", - ("training/europarl-v7.cs-en.en", "training/europarl-v7.cs-en.cs") - ], -] -_ENCS_TEST_DATASETS = [ - [ - "http://data.statmt.org/wmt17/translation-task/dev.tgz", - ("dev/newstest2013.en", "dev/newstest2013.cs") - ], -] - -# Generators. - - -def _get_wmt_ende_bpe_dataset(directory, filename): - """Extract the WMT en-de corpus `filename` to directory unless it's there.""" - train_path = os.path.join(directory, filename) - if not (tf.gfile.Exists(train_path + ".de") and - tf.gfile.Exists(train_path + ".en")): - url = ("https://drive.google.com/uc?export=download&id=" - "0B_bZck-ksdkpM25jRUN2X2UxMm8") - corpus_file = generator_utils.maybe_download_from_drive( - directory, "wmt16_en_de.tar.gz", url) - with tarfile.open(corpus_file, "r:gz") as corpus_tar: - corpus_tar.extractall(directory) - return train_path - - -@registry.register_problem -class TranslateEndeWmtBpe32k(TranslateProblem): - """Problem spec for WMT En-De translation, BPE version.""" - - @property - def targeted_vocab_size(self): - return 32000 - - @property - def vocab_name(self): - return "vocab.bpe" - - def feature_encoders(self, data_dir): - vocab_filename = os.path.join(data_dir, self.vocab_file) - encoder = text_encoder.TokenTextEncoder(vocab_filename, replace_oov="UNK") - return {"inputs": encoder, "targets": encoder} - - def generator(self, data_dir, tmp_dir, train): - """Instance of token generator for the WMT en->de task, training set.""" - dataset_path = ("train.tok.clean.bpe.32000" - if train else "newstest2013.tok.bpe.32000") - train_path = _get_wmt_ende_bpe_dataset(tmp_dir, dataset_path) - token_tmp_path = os.path.join(tmp_dir, self.vocab_file) - token_path = os.path.join(data_dir, self.vocab_file) - tf.gfile.Copy(token_tmp_path, token_path, overwrite=True) - with tf.gfile.GFile(token_path, mode="a") as f: - f.write("UNK\n") # Add UNK to the vocab. - token_vocab = text_encoder.TokenTextEncoder(token_path, replace_oov="UNK") - return token_generator(train_path + ".en", train_path + ".de", token_vocab, - EOS) - - @property - def input_space_id(self): - return problem.SpaceID.EN_BPE_TOK - - @property - def target_space_id(self): - return problem.SpaceID.DE_BPE_TOK - - -def _preprocess_sgm(line, is_sgm): - """Preprocessing to strip tags in SGM files.""" - if not is_sgm: - return line - # In SGM files, remove <srcset ...>, <p>, <doc ...> lines. - if line.startswith("<srcset") or line.startswith("</srcset"): - return "" - if line.startswith("<doc") or line.startswith("</doc"): - return "" - if line.startswith("<p>") or line.startswith("</p>"): - return "" - # Strip <seg> tags. - line = line.strip() - if line.startswith("<seg") and line.endswith("</seg>"): - i = line.index(">") - return line[i + 1:-6] # Strip first <seg ...> and last </seg>. - - -def _compile_data(tmp_dir, datasets, filename): - """Concatenate all `datasets` and save to `filename`.""" - filename = os.path.join(tmp_dir, filename) - with tf.gfile.GFile(filename + ".lang1", mode="w") as lang1_resfile: - with tf.gfile.GFile(filename + ".lang2", mode="w") as lang2_resfile: - for dataset in datasets: - url = dataset[0] - compressed_filename = os.path.basename(url) - compressed_filepath = os.path.join(tmp_dir, compressed_filename) - - generator_utils.maybe_download(tmp_dir, compressed_filename, url) - - if dataset[1][0] == "tsv": - _, src_column, trg_column, glob_pattern = dataset[1] - filenames = tf.gfile.Glob(os.path.join(tmp_dir, glob_pattern)) - if not filenames: - # Capture *.tgz and *.tar.gz too. - mode = "r:gz" if compressed_filepath.endswith("gz") else "r" - with tarfile.open(compressed_filepath, mode) as corpus_tar: - corpus_tar.extractall(tmp_dir) - filenames = tf.gfile.Glob(os.path.join(tmp_dir, glob_pattern)) - for tsv_filename in filenames: - if tsv_filename.endswith(".gz"): - new_filename = tsv_filename.strip(".gz") - generator_utils.gunzip_file(tsv_filename, new_filename) - tsv_filename = new_filename - with tf.gfile.GFile(tsv_filename, mode="r") as tsv_file: - for line in tsv_file: - if line and "\t" in line: - parts = line.split("\t") - source, target = parts[src_column], parts[trg_column] - lang1_resfile.write(source.strip() + "\n") - lang2_resfile.write(target.strip() + "\n") - else: - lang1_filename, lang2_filename = dataset[1] - lang1_filepath = os.path.join(tmp_dir, lang1_filename) - lang2_filepath = os.path.join(tmp_dir, lang2_filename) - is_sgm = (lang1_filename.endswith("sgm") and - lang2_filename.endswith("sgm")) - - if not (os.path.exists(lang1_filepath) and - os.path.exists(lang2_filepath)): - # For .tar.gz and .tgz files, we read compressed. - mode = "r:gz" if compressed_filepath.endswith("gz") else "r" - with tarfile.open(compressed_filepath, mode) as corpus_tar: - corpus_tar.extractall(tmp_dir) - if lang1_filepath.endswith(".gz"): - new_filepath = lang1_filepath.strip(".gz") - generator_utils.gunzip_file(lang1_filepath, new_filepath) - lang1_filepath = new_filepath - if lang2_filepath.endswith(".gz"): - new_filepath = lang2_filepath.strip(".gz") - generator_utils.gunzip_file(lang2_filepath, new_filepath) - lang2_filepath = new_filepath - with tf.gfile.GFile(lang1_filepath, mode="r") as lang1_file: - with tf.gfile.GFile(lang2_filepath, mode="r") as lang2_file: - line1, line2 = lang1_file.readline(), lang2_file.readline() - while line1 or line2: - line1res = _preprocess_sgm(line1, is_sgm) - line2res = _preprocess_sgm(line2, is_sgm) - if line1res or line2res: - lang1_resfile.write(line1res.strip() + "\n") - lang2_resfile.write(line2res.strip() + "\n") - line1, line2 = lang1_file.readline(), lang2_file.readline() - - return filename - - -@registry.register_problem -class TranslateEndeWmt8k(TranslateProblem): - """Problem spec for WMT En-De translation.""" - - @property - def targeted_vocab_size(self): - return 2**13 # 8192 - - def generator(self, data_dir, tmp_dir, train): - symbolizer_vocab = generator_utils.get_or_generate_vocab( - data_dir, tmp_dir, self.vocab_file, self.targeted_vocab_size) - datasets = _ENDE_TRAIN_DATASETS if train else _ENDE_TEST_DATASETS - tag = "train" if train else "dev" - data_path = _compile_data(tmp_dir, datasets, "wmt_ende_tok_%s" % tag) - return token_generator(data_path + ".lang1", data_path + ".lang2", - symbolizer_vocab, EOS) - - @property - def input_space_id(self): - return problem.SpaceID.EN_TOK - - @property - def target_space_id(self): - return problem.SpaceID.DE_TOK - - -@registry.register_problem -class TranslateEndeWmt32k(TranslateEndeWmt8k): - - @property - def targeted_vocab_size(self): - return 2**15 # 32768 - - -@registry.register_problem -class TranslateEndeWmtCharacters(TranslateProblem): - """Problem spec for WMT En-De translation.""" - - @property - def is_character_level(self): - return True - - def generator(self, _, tmp_dir, train): - character_vocab = text_encoder.ByteTextEncoder() - datasets = _ENDE_TRAIN_DATASETS if train else _ENDE_TEST_DATASETS - tag = "train" if train else "dev" - data_path = _compile_data(tmp_dir, datasets, "wmt_ende_chr_%s" % tag) - return character_generator(data_path + ".lang1", data_path + ".lang2", - character_vocab, EOS) - - @property - def input_space_id(self): - return problem.SpaceID.EN_CHR - - @property - def target_space_id(self): - return problem.SpaceID.DE_CHR - - -@registry.register_problem -class TranslateEnzhWmt8k(TranslateProblem): - """Problem spec for WMT Zh-En translation.""" - - @property - def targeted_vocab_size(self): - return 2**13 # 8192 - - @property - def num_shards(self): - return 10 # This is a small dataset. - - @property - def source_vocab_name(self): - return "vocab.zhen-zh.%d" % self.targeted_vocab_size - - @property - def target_vocab_name(self): - return "vocab.zhen-en.%d" % self.targeted_vocab_size - - def generator(self, data_dir, tmp_dir, train): - datasets = _ZHEN_TRAIN_DATASETS if train else _ZHEN_TEST_DATASETS - source_datasets = [[item[0], [item[1][0]]] for item in _ZHEN_TRAIN_DATASETS] - target_datasets = [[item[0], [item[1][1]]] for item in _ZHEN_TRAIN_DATASETS] - source_vocab = generator_utils.get_or_generate_vocab( - data_dir, tmp_dir, self.source_vocab_name, self.targeted_vocab_size, - source_datasets) - target_vocab = generator_utils.get_or_generate_vocab( - data_dir, tmp_dir, self.target_vocab_name, self.targeted_vocab_size, - target_datasets) - tag = "train" if train else "dev" - data_path = _compile_data(tmp_dir, datasets, "wmt_zhen_tok_%s" % tag) - # We generate English->X data by convention, to train reverse translation - # just add the "_rev" suffix to the problem name, e.g., like this. - # --problems=translate_enzh_wmt8k_rev - return bi_vocabs_token_generator(data_path + ".lang2", data_path + ".lang1", - source_vocab, target_vocab, EOS) - - @property - def input_space_id(self): - return problem.SpaceID.ZH_TOK - - @property - def target_space_id(self): - return problem.SpaceID.EN_TOK - - def feature_encoders(self, data_dir): - source_vocab_filename = os.path.join(data_dir, self.source_vocab_name) - target_vocab_filename = os.path.join(data_dir, self.target_vocab_name) - source_token = text_encoder.SubwordTextEncoder(source_vocab_filename) - target_token = text_encoder.SubwordTextEncoder(target_vocab_filename) - return { - "inputs": source_token, - "targets": target_token, - } - - -@registry.register_problem -class TranslateEnfrWmt8k(TranslateProblem): - """Problem spec for WMT En-Fr translation.""" - - @property - def targeted_vocab_size(self): - return 2**13 # 8192 - - def generator(self, data_dir, tmp_dir, train): - symbolizer_vocab = generator_utils.get_or_generate_vocab( - data_dir, tmp_dir, self.vocab_file, self.targeted_vocab_size) - datasets = _ENFR_TRAIN_DATASETS if train else _ENFR_TEST_DATASETS - tag = "train" if train else "dev" - data_path = _compile_data(tmp_dir, datasets, "wmt_enfr_tok_%s" % tag) - return token_generator(data_path + ".lang1", data_path + ".lang2", - symbolizer_vocab, EOS) - - @property - def input_space_id(self): - return problem.SpaceID.EN_TOK - - @property - def target_space_id(self): - return problem.SpaceID.FR_TOK - - -@registry.register_problem -class TranslateEnfrWmt32k(TranslateEnfrWmt8k): - - @property - def targeted_vocab_size(self): - return 2**15 # 32768 - - -@registry.register_problem -class TranslateEnfrWmtCharacters(TranslateProblem): - """Problem spec for WMT En-Fr translation.""" - - @property - def is_character_level(self): - return True - - def generator(self, data_dir, tmp_dir, train): - character_vocab = text_encoder.ByteTextEncoder() - datasets = _ENFR_TRAIN_DATASETS if train else _ENFR_TEST_DATASETS - tag = "train" if train else "dev" - data_path = _compile_data(tmp_dir, datasets, "wmt_enfr_chr_%s" % tag) - return character_generator(data_path + ".lang1", data_path + ".lang2", - character_vocab, EOS) - - @property - def input_space_id(self): - return problem.SpaceID.EN_CHR - - @property - def target_space_id(self): - return problem.SpaceID.FR_CHR - - -@registry.register_problem -class TranslateEnmkSetimes32k(TranslateProblem): - """Problem spec for SETimes Mk-En translation.""" - - @property - def targeted_vocab_size(self): - return 2**15 # 32768 - - @property - def vocab_name(self): - return "vocab.mken" - - def generator(self, data_dir, tmp_dir, train): - datasets = _MKEN_TRAIN_DATASETS if train else _MKEN_TEST_DATASETS - source_datasets = [[item[0], [item[1][0]]] for item in datasets] - target_datasets = [[item[0], [item[1][1]]] for item in datasets] - symbolizer_vocab = generator_utils.get_or_generate_vocab( - data_dir, tmp_dir, self.vocab_file, self.targeted_vocab_size, - source_datasets + target_datasets) - tag = "train" if train else "dev" - data_path = _compile_data(tmp_dir, datasets, "setimes_mken_tok_%s" % tag) - # We generate English->X data by convention, to train reverse translation - # just add the "_rev" suffix to the problem name, e.g., like this. - # --problems=translate_enmk_setimes32k_rev - return token_generator(data_path + ".lang2", data_path + ".lang1", - symbolizer_vocab, EOS) - - @property - def input_space_id(self): - return problem.SpaceID.MK_TOK - - @property - def target_space_id(self): - return problem.SpaceID.EN_TOK - - -@registry.register_problem -class TranslateEncsWmt32k(TranslateProblem): - """Problem spec for WMT English-Czech translation.""" - - @property - def targeted_vocab_size(self): - return 2**15 # 32768 - - @property - def vocab_name(self): - return "vocab.encs" - - def generator(self, data_dir, tmp_dir, train): - datasets = _ENCS_TRAIN_DATASETS if train else _ENCS_TEST_DATASETS - tag = "train" if train else "dev" - vocab_datasets = [] - data_path = _compile_data(tmp_dir, datasets, "wmt_encs_tok_%s" % tag) - # CzEng contains 100 gz files with tab-separated columns, so let's expect - # it is the first dataset in datasets and use the newly created *.lang{1,2} - # files for vocab construction. - if datasets[0][0].endswith("data-plaintext-format.tar"): - vocab_datasets.append([datasets[0][0], ["wmt_encs_tok_%s.lang1" % tag, - "wmt_encs_tok_%s.lang2" % tag]]) - datasets = datasets[1:] - vocab_datasets += [[item[0], [item[1][0], item[1][1]]] for item in datasets] - symbolizer_vocab = generator_utils.get_or_generate_vocab( - data_dir, tmp_dir, self.vocab_file, self.targeted_vocab_size, - vocab_datasets) - return token_generator(data_path + ".lang1", data_path + ".lang2", - symbolizer_vocab, EOS) - - @property - def input_space_id(self): - return problem.SpaceID.EN_TOK - - @property - def target_space_id(self): - return problem.SpaceID.CS_TOK - - -@registry.register_problem -class TranslateEncsWmtCharacters(TranslateProblem): - """Problem spec for WMT En-Cs character-based translation.""" - - @property - def is_character_level(self): - return True - - def generator(self, data_dir, tmp_dir, train): - character_vocab = text_encoder.ByteTextEncoder() - datasets = _ENCS_TRAIN_DATASETS if train else _ENCS_TEST_DATASETS - tag = "train" if train else "dev" - data_path = _compile_data(tmp_dir, datasets, "wmt_encs_chr_%s" % tag) - return character_generator(data_path + ".lang1", data_path + ".lang2", - character_vocab, EOS) - - @property - def input_space_id(self): - return problem.SpaceID.EN_CHR - - @property - def target_space_id(self): - return problem.SpaceID.CS_CHR - - -def parsing_token_generator(data_dir, tmp_dir, train, vocab_size): - symbolizer_vocab = generator_utils.get_or_generate_vocab( - data_dir, tmp_dir, "vocab.endefr.%d" % vocab_size, vocab_size) - filename = "%s_%s.trees" % (FLAGS.parsing_path, "train" if train else "dev") - tree_filepath = os.path.join(tmp_dir, filename) - return wsj_parsing.token_generator(tree_filepath, symbolizer_vocab, - symbolizer_vocab, EOS) From 9d86cf7a0d596d37de0773e47867e60deb2a82e4 Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Thu, 26 Oct 2017 17:06:59 -0700 Subject: [PATCH 0530/4095] Add back commented-out enfr datasets PiperOrigin-RevId: 173612759 --- .../data_generators/translate_enfr.py | 26 +++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/tensor2tensor/data_generators/translate_enfr.py b/tensor2tensor/data_generators/translate_enfr.py index 68788d204..152d3d963 100644 --- a/tensor2tensor/data_generators/translate_enfr.py +++ b/tensor2tensor/data_generators/translate_enfr.py @@ -40,6 +40,28 @@ ("baseline-1M-enfr/baseline-1M_train.en", "baseline-1M-enfr/baseline-1M_train.fr") ], + # [ + # "http://www.statmt.org/wmt13/training-parallel-commoncrawl.tgz", + # ("commoncrawl.fr-en.en", "commoncrawl.fr-en.fr") + # ], + # [ + # "http://www.statmt.org/wmt13/training-parallel-europarl-v7.tgz", + # ("training/europarl-v7.fr-en.en", "training/europarl-v7.fr-en.fr") + # ], + # [ + # "http://www.statmt.org/wmt14/training-parallel-nc-v9.tgz", + # ("training/news-commentary-v9.fr-en.en", + # "training/news-commentary-v9.fr-en.fr") + # ], + # [ + # "http://www.statmt.org/wmt10/training-giga-fren.tar", + # ("giga-fren.release2.fixed.en.gz", + # "giga-fren.release2.fixed.fr.gz") + # ], + # [ + # "http://www.statmt.org/wmt13/training-parallel-un.tgz", + # ("un/undoc.2000.fr-en.en", "un/undoc.2000.fr-en.fr") + # ], ] _ENFR_TEST_DATASETS = [ [ @@ -47,6 +69,10 @@ ("baseline-1M-enfr/baseline-1M_valid.en", "baseline-1M-enfr/baseline-1M_valid.fr") ], + # [ + # "http://data.statmt.org/wmt17/translation-task/dev.tgz", + # ("dev/newstest2013.en", "dev/newstest2013.fr") + # ], ] From ba47b617e612d2497fd577964e9418b953c05078 Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Thu, 26 Oct 2017 17:28:29 -0700 Subject: [PATCH 0531/4095] v1.2.6 PiperOrigin-RevId: 173615091 --- .travis.yml | 2 +- setup.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index 370682401..744006762 100644 --- a/.travis.yml +++ b/.travis.yml @@ -24,6 +24,6 @@ script: - mkdir $T2T_TRAIN_DIR - t2t-datagen --problem=$T2T_PROBLEM --data_dir=$T2T_DATA_DIR - t2t-trainer --problems=$T2T_PROBLEM --data_dir=$T2T_DATA_DIR --model=transformer --hparams_set=transformer_tiny --train_steps=5 --eval_steps=5 --output_dir=$T2T_TRAIN_DIR - - t2t-decoder --problems=$T2T_PROBLEM --data_dir=$T2T_DATA_DIR --model=transformer --hparams_set=transformer_tiny --output_dir=$T2T_TRAIN_DIR --decode_hparams='num_samples=10' + - t2t-decoder --problems=$T2T_PROBLEM --data_dir=$T2T_DATA_DIR --model=transformer --hparams_set=transformer_tiny --output_dir=$T2T_TRAIN_DIR --decode_hparams='num_samples=10,use_last_position_only=True' git: depth: 3 diff --git a/setup.py b/setup.py index 5b6f4690e..88ed4a4ea 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ setup( name='tensor2tensor', - version='1.2.5', + version='1.2.6', description='Tensor2Tensor', author='Google Inc.', author_email='no-reply@google.com', From 91d4e1c83f9abb1ca8fcd94a65d6b74aaa3458da Mon Sep 17 00:00:00 2001 From: Mistobaan <mistobaan@gmail.com> Date: Fri, 27 Oct 2017 12:25:41 -0700 Subject: [PATCH 0532/4095] fix mispells --- tensor2tensor/data_generators/README.md | 2 +- tensor2tensor/data_generators/generator_utils.py | 4 ++-- tensor2tensor/data_generators/problem.py | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/tensor2tensor/data_generators/README.md b/tensor2tensor/data_generators/README.md index 04a90a778..0ccbfe1c1 100644 --- a/tensor2tensor/data_generators/README.md +++ b/tensor2tensor/data_generators/README.md @@ -42,7 +42,7 @@ for an example of usage. The generators should yield dictionaries with string keys and values being lists of {int, float, str}. Here is a very simple generator for a data-set where -inputs are lists of 2s with length upto 100 and targets are lists of length 1 +inputs are lists of 2s with length up to 100 and targets are lists of length 1 with an integer denoting the length of the input list. ``` diff --git a/tensor2tensor/data_generators/generator_utils.py b/tensor2tensor/data_generators/generator_utils.py index 55ccf117e..8ce66dc6e 100644 --- a/tensor2tensor/data_generators/generator_utils.py +++ b/tensor2tensor/data_generators/generator_utils.py @@ -190,7 +190,7 @@ def maybe_download(directory, filename, url): print() tf.gfile.Rename(inprogress_filepath, filepath) statinfo = os.stat(filepath) - tf.logging.info("Succesfully downloaded %s, %s bytes." % (filename, + tf.logging.info("Successfully downloaded %s, %s bytes." % (filename, statinfo.st_size)) else: tf.logging.info("Not downloading, file already found: %s" % filepath) @@ -242,7 +242,7 @@ def maybe_download_from_drive(directory, filename, url): # Print newline to clear the carriage return from the download progress print() statinfo = os.stat(filepath) - tf.logging.info("Succesfully downloaded %s, %s bytes." % (filename, + tf.logging.info("Successfully downloaded %s, %s bytes." % (filename, statinfo.st_size)) return filepath diff --git a/tensor2tensor/data_generators/problem.py b/tensor2tensor/data_generators/problem.py index 657a5b18b..c826e29dd 100644 --- a/tensor2tensor/data_generators/problem.py +++ b/tensor2tensor/data_generators/problem.py @@ -130,7 +130,7 @@ class Problem(object): Data generation: * generate_data(data_dir, tmp_dir) - Generate training and dev datasets into data_dir. - - Additonal files, e.g. vocabulary files, should also be written to + - Additional files, e.g. vocabulary files, should also be written to data_dir. Vocab files are newline-separated files with each line containing a token. The standard convention for the filename is to set it to be @@ -515,7 +515,7 @@ def _default_hparams(): return tf.contrib.training.HParams( # Use this parameter to get comparable perplexity numbers with different # tokenizations. This value should be set to the ratio of the number of - # tokens in the test set according to the tokeization used to the number + # tokens in the test set according to the tokenization used to the number # of tokens in the test set in the "official" tokenization. For # example, if we are using a word-piece based model and we want to # compute per-word perplexity, then we set loss_multiplier to the number From f711de9b25baa8687edb1fdf26303a09cd0b1d09 Mon Sep 17 00:00:00 2001 From: Urvashi Khandelwal <urvashik@stanford.edu> Date: Sun, 29 Oct 2017 14:53:24 -0700 Subject: [PATCH 0533/4095] Rouge pipeline complete --- tensor2tensor/utils/decoding.py | 4 ++-- tensor2tensor/utils/get_cnndm_rouge.sh | 13 +++++++++++++ tensor2tensor/utils/get_rouge.py | 5 +++-- 3 files changed, 18 insertions(+), 4 deletions(-) create mode 100644 tensor2tensor/utils/get_cnndm_rouge.sh diff --git a/tensor2tensor/utils/decoding.py b/tensor2tensor/utils/decoding.py index 5dac0dd5f..bcf0a63ae 100644 --- a/tensor2tensor/utils/decoding.py +++ b/tensor2tensor/utils/decoding.py @@ -83,9 +83,9 @@ def log_decode_results(inputs, decoded_targets = None if identity_output: - decoded_outputs = " ".join(map(str, outputs.flatten())) + decoded_outputs = "".join(map(str, outputs.flatten())) if targets is not None: - decoded_targets = " ".join(map(str, targets.flatten())) + decoded_targets = "".join(map(str, targets.flatten())) else: decoded_outputs = "".join( map(str, targets_vocab.decode(_save_until_eos(outputs.flatten())))) diff --git a/tensor2tensor/utils/get_cnndm_rouge.sh b/tensor2tensor/utils/get_cnndm_rouge.sh new file mode 100644 index 000000000..9833ce248 --- /dev/null +++ b/tensor2tensor/utils/get_cnndm_rouge.sh @@ -0,0 +1,13 @@ +#!/bin/bash + +mosesdecoder=$1 + +targets_file=$2 +decodes_file=$3 + +# Tokenize. +perl $mosesdecoder/scripts/tokenizer/tokenizer.perl -l en < $targets_file > $targets_file.tok +perl $mosesdecoder/scripts/tokenizer/tokenizer.perl -l en < $decodes_file > $decodes_file.tok + +# Get rouge scores +python get_rouge.py --decodes_filename $decodes_file.tok --targets_filename $targets_file.tok diff --git a/tensor2tensor/utils/get_rouge.py b/tensor2tensor/utils/get_rouge.py index ac029f86d..2e72e2e0d 100644 --- a/tensor2tensor/utils/get_rouge.py +++ b/tensor2tensor/utils/get_rouge.py @@ -38,8 +38,9 @@ def write_to_file(filename, data): # TODO: ensure the output format (chars split by spaces) was as intended - data = "".join(data[::2]) data = ".\n".join(data.split(". ")) + if len(data.strip()) == 0: + print(data, filename) with open(filename, "w") as fp: fp.write(data) @@ -50,7 +51,7 @@ def prep_data(decode_dir, target_dir): write_to_file(os.path.join(target_dir, "rouge.A.%06d.txt" % (i+1)), t) if (i+1 % 1000) == 0: - print("Written %d examples to file" % i) + tf.logging.into("Written %d examples to file" % i) def main(_): rouge = Rouge155() From 6b1267e717f0d3ef51b93120edcd42519bb862b5 Mon Sep 17 00:00:00 2001 From: Vincent Nguyen <vince62s@yahoo.com> Date: Mon, 30 Oct 2017 10:52:57 +0100 Subject: [PATCH 0534/4095] Fix the EnZh task --- .../data_generators/translate_enzh.py | 39 +++++++++---------- 1 file changed, 19 insertions(+), 20 deletions(-) diff --git a/tensor2tensor/data_generators/translate_enzh.py b/tensor2tensor/data_generators/translate_enzh.py index 7c77a05fc..5bb5b01b1 100644 --- a/tensor2tensor/data_generators/translate_enzh.py +++ b/tensor2tensor/data_generators/translate_enzh.py @@ -35,21 +35,23 @@ # End-of-sentence marker. EOS = text_encoder.EOS_ID - -_ZHEN_TRAIN_DATASETS = [[("http://data.statmt.org/wmt17/translation-task/" +# This is far from being the real WMT17 task - only toyset here +# you need to register to get UN data and CWT data +# also by convention this is EN to ZH - use translate_enzh_wmt8k_rev for ZH to EN task +_ENZH_TRAIN_DATASETS = [[("http://data.statmt.org/wmt17/translation-task/" "training-parallel-nc-v12.tgz"), - ("training/news-commentary-v12.zh-en.zh", - "training/news-commentary-v12.zh-en.en")]] + ("training/news-commentary-v12.zh-en.en", + "training/news-commentary-v12.zh-en.zh")]] -_ZHEN_TEST_DATASETS = [[ +_ENZH_TEST_DATASETS = [[ "http://data.statmt.org/wmt17/translation-task/dev.tgz", - ("dev/newsdev2017-zhen-src.zh.sgm", "dev/newsdev2017-zhen-ref.en.sgm") + ("dev/newsdev2017-zhen-src.en.sgm", "dev/newsdev2017-zhen-ref.zh.sgm") ]] @registry.register_problem class TranslateEnzhWmt8k(translate.TranslateProblem): - """Problem spec for WMT Zh-En translation.""" + """Problem spec for WMT En-Zh translation.""" @property def targeted_vocab_size(self): @@ -61,16 +63,16 @@ def num_shards(self): @property def source_vocab_name(self): - return "vocab.zhen-zh.%d" % self.targeted_vocab_size + return "vocab.en-zh-en.%d" % self.targeted_vocab_size @property def target_vocab_name(self): - return "vocab.zhen-en.%d" % self.targeted_vocab_size + return "vocab.enzh-zh.%d" % self.targeted_vocab_size def generator(self, data_dir, tmp_dir, train): - datasets = _ZHEN_TRAIN_DATASETS if train else _ZHEN_TEST_DATASETS - source_datasets = [[item[0], [item[1][0]]] for item in _ZHEN_TRAIN_DATASETS] - target_datasets = [[item[0], [item[1][1]]] for item in _ZHEN_TRAIN_DATASETS] + datasets = _ENZH_TRAIN_DATASETS if train else _ENZH_TEST_DATASETS + source_datasets = [[item[0], [item[1][0]]] for item in _ENZH_TRAIN_DATASETS] + target_datasets = [[item[0], [item[1][1]]] for item in _ENZH_TRAIN_DATASETS] source_vocab = generator_utils.get_or_generate_vocab( data_dir, tmp_dir, self.source_vocab_name, self.targeted_vocab_size, source_datasets) @@ -79,21 +81,18 @@ def generator(self, data_dir, tmp_dir, train): target_datasets) tag = "train" if train else "dev" data_path = translate.compile_data(tmp_dir, datasets, - "wmt_zhen_tok_%s" % tag) - # We generate English->X data by convention, to train reverse translation - # just add the "_rev" suffix to the problem name, e.g., like this. - # --problems=translate_enzh_wmt8k_rev - return translate.bi_vocabs_token_generator(data_path + ".lang2", - data_path + ".lang1", + "wmt_enzh_tok_%s" % tag) + return translate.bi_vocabs_token_generator(data_path + ".lang1", + data_path + ".lang2", source_vocab, target_vocab, EOS) @property def input_space_id(self): - return problem.SpaceID.ZH_TOK + return problem.SpaceID.EN_TOK @property def target_space_id(self): - return problem.SpaceID.EN_TOK + return problem.SpaceID.ZH_TOK def feature_encoders(self, data_dir): source_vocab_filename = os.path.join(data_dir, self.source_vocab_name) From 733de7b7535849195532540d98e7de031c8368ec Mon Sep 17 00:00:00 2001 From: Vincent Nguyen <vince62s@yahoo.com> Date: Mon, 30 Oct 2017 16:49:55 +0100 Subject: [PATCH 0535/4095] typo fix --- tensor2tensor/data_generators/translate_enzh.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensor2tensor/data_generators/translate_enzh.py b/tensor2tensor/data_generators/translate_enzh.py index 5bb5b01b1..6b0f36c23 100644 --- a/tensor2tensor/data_generators/translate_enzh.py +++ b/tensor2tensor/data_generators/translate_enzh.py @@ -63,7 +63,7 @@ def num_shards(self): @property def source_vocab_name(self): - return "vocab.en-zh-en.%d" % self.targeted_vocab_size + return "vocab.enzh-en.%d" % self.targeted_vocab_size @property def target_vocab_name(self): From f2e8e359e857c4778e23d2fbd295f2a985d5242e Mon Sep 17 00:00:00 2001 From: Noam Shazeer <noam@google.com> Date: Fri, 27 Oct 2017 14:25:27 -0700 Subject: [PATCH 0536/4095] Ignore sync flag when in single-worker mode. PiperOrigin-RevId: 173718205 --- tensor2tensor/data_generators/README.md | 2 +- .../data_generators/generator_utils.py | 4 +- tensor2tensor/data_generators/problem.py | 4 +- .../data_generators/translate_enzh.py | 39 ++-- tensor2tensor/models/lstm.py | 211 ++++++++++++------ tensor2tensor/utils/devices.py | 7 +- 6 files changed, 177 insertions(+), 90 deletions(-) diff --git a/tensor2tensor/data_generators/README.md b/tensor2tensor/data_generators/README.md index 0ccbfe1c1..04a90a778 100644 --- a/tensor2tensor/data_generators/README.md +++ b/tensor2tensor/data_generators/README.md @@ -42,7 +42,7 @@ for an example of usage. The generators should yield dictionaries with string keys and values being lists of {int, float, str}. Here is a very simple generator for a data-set where -inputs are lists of 2s with length up to 100 and targets are lists of length 1 +inputs are lists of 2s with length upto 100 and targets are lists of length 1 with an integer denoting the length of the input list. ``` diff --git a/tensor2tensor/data_generators/generator_utils.py b/tensor2tensor/data_generators/generator_utils.py index 8ce66dc6e..55ccf117e 100644 --- a/tensor2tensor/data_generators/generator_utils.py +++ b/tensor2tensor/data_generators/generator_utils.py @@ -190,7 +190,7 @@ def maybe_download(directory, filename, url): print() tf.gfile.Rename(inprogress_filepath, filepath) statinfo = os.stat(filepath) - tf.logging.info("Successfully downloaded %s, %s bytes." % (filename, + tf.logging.info("Succesfully downloaded %s, %s bytes." % (filename, statinfo.st_size)) else: tf.logging.info("Not downloading, file already found: %s" % filepath) @@ -242,7 +242,7 @@ def maybe_download_from_drive(directory, filename, url): # Print newline to clear the carriage return from the download progress print() statinfo = os.stat(filepath) - tf.logging.info("Successfully downloaded %s, %s bytes." % (filename, + tf.logging.info("Succesfully downloaded %s, %s bytes." % (filename, statinfo.st_size)) return filepath diff --git a/tensor2tensor/data_generators/problem.py b/tensor2tensor/data_generators/problem.py index c826e29dd..657a5b18b 100644 --- a/tensor2tensor/data_generators/problem.py +++ b/tensor2tensor/data_generators/problem.py @@ -130,7 +130,7 @@ class Problem(object): Data generation: * generate_data(data_dir, tmp_dir) - Generate training and dev datasets into data_dir. - - Additional files, e.g. vocabulary files, should also be written to + - Additonal files, e.g. vocabulary files, should also be written to data_dir. Vocab files are newline-separated files with each line containing a token. The standard convention for the filename is to set it to be @@ -515,7 +515,7 @@ def _default_hparams(): return tf.contrib.training.HParams( # Use this parameter to get comparable perplexity numbers with different # tokenizations. This value should be set to the ratio of the number of - # tokens in the test set according to the tokenization used to the number + # tokens in the test set according to the tokeization used to the number # of tokens in the test set in the "official" tokenization. For # example, if we are using a word-piece based model and we want to # compute per-word perplexity, then we set loss_multiplier to the number diff --git a/tensor2tensor/data_generators/translate_enzh.py b/tensor2tensor/data_generators/translate_enzh.py index 6b0f36c23..7c77a05fc 100644 --- a/tensor2tensor/data_generators/translate_enzh.py +++ b/tensor2tensor/data_generators/translate_enzh.py @@ -35,23 +35,21 @@ # End-of-sentence marker. EOS = text_encoder.EOS_ID -# This is far from being the real WMT17 task - only toyset here -# you need to register to get UN data and CWT data -# also by convention this is EN to ZH - use translate_enzh_wmt8k_rev for ZH to EN task -_ENZH_TRAIN_DATASETS = [[("http://data.statmt.org/wmt17/translation-task/" + +_ZHEN_TRAIN_DATASETS = [[("http://data.statmt.org/wmt17/translation-task/" "training-parallel-nc-v12.tgz"), - ("training/news-commentary-v12.zh-en.en", - "training/news-commentary-v12.zh-en.zh")]] + ("training/news-commentary-v12.zh-en.zh", + "training/news-commentary-v12.zh-en.en")]] -_ENZH_TEST_DATASETS = [[ +_ZHEN_TEST_DATASETS = [[ "http://data.statmt.org/wmt17/translation-task/dev.tgz", - ("dev/newsdev2017-zhen-src.en.sgm", "dev/newsdev2017-zhen-ref.zh.sgm") + ("dev/newsdev2017-zhen-src.zh.sgm", "dev/newsdev2017-zhen-ref.en.sgm") ]] @registry.register_problem class TranslateEnzhWmt8k(translate.TranslateProblem): - """Problem spec for WMT En-Zh translation.""" + """Problem spec for WMT Zh-En translation.""" @property def targeted_vocab_size(self): @@ -63,16 +61,16 @@ def num_shards(self): @property def source_vocab_name(self): - return "vocab.enzh-en.%d" % self.targeted_vocab_size + return "vocab.zhen-zh.%d" % self.targeted_vocab_size @property def target_vocab_name(self): - return "vocab.enzh-zh.%d" % self.targeted_vocab_size + return "vocab.zhen-en.%d" % self.targeted_vocab_size def generator(self, data_dir, tmp_dir, train): - datasets = _ENZH_TRAIN_DATASETS if train else _ENZH_TEST_DATASETS - source_datasets = [[item[0], [item[1][0]]] for item in _ENZH_TRAIN_DATASETS] - target_datasets = [[item[0], [item[1][1]]] for item in _ENZH_TRAIN_DATASETS] + datasets = _ZHEN_TRAIN_DATASETS if train else _ZHEN_TEST_DATASETS + source_datasets = [[item[0], [item[1][0]]] for item in _ZHEN_TRAIN_DATASETS] + target_datasets = [[item[0], [item[1][1]]] for item in _ZHEN_TRAIN_DATASETS] source_vocab = generator_utils.get_or_generate_vocab( data_dir, tmp_dir, self.source_vocab_name, self.targeted_vocab_size, source_datasets) @@ -81,18 +79,21 @@ def generator(self, data_dir, tmp_dir, train): target_datasets) tag = "train" if train else "dev" data_path = translate.compile_data(tmp_dir, datasets, - "wmt_enzh_tok_%s" % tag) - return translate.bi_vocabs_token_generator(data_path + ".lang1", - data_path + ".lang2", + "wmt_zhen_tok_%s" % tag) + # We generate English->X data by convention, to train reverse translation + # just add the "_rev" suffix to the problem name, e.g., like this. + # --problems=translate_enzh_wmt8k_rev + return translate.bi_vocabs_token_generator(data_path + ".lang2", + data_path + ".lang1", source_vocab, target_vocab, EOS) @property def input_space_id(self): - return problem.SpaceID.EN_TOK + return problem.SpaceID.ZH_TOK @property def target_space_id(self): - return problem.SpaceID.ZH_TOK + return problem.SpaceID.EN_TOK def feature_encoders(self, data_dir): source_vocab_filename = os.path.join(data_dir, self.source_vocab_name) diff --git a/tensor2tensor/models/lstm.py b/tensor2tensor/models/lstm.py index 2f5475276..f336bd6b4 100644 --- a/tensor2tensor/models/lstm.py +++ b/tensor2tensor/models/lstm.py @@ -31,6 +31,144 @@ import tensorflow as tf from tensorflow.python.util import nest +# Track Tuple of state and attention values +AttentionTuple = collections.namedtuple("AttentionTuple", ("state", + "attention")) + + +class ExternalAttentionCellWrapper(tf.contrib.rnn.RNNCell): + """Wrapper for external attention states for an encoder-decoder setup.""" + + def __init__(self, + cell, + attn_states, + attn_vec_size=None, + input_size=None, + state_is_tuple=True, + reuse=None): + """Create a cell with attention. + + Args: + cell: an RNNCell, an attention is added to it. + attn_states: External attention states typically the encoder output in the + form [batch_size, time steps, hidden size] + attn_vec_size: integer, the number of convolutional features calculated + on attention state and a size of the hidden layer built from + base cell state. Equal attn_size to by default. + input_size: integer, the size of a hidden linear layer, + built from inputs and attention. Derived from the input tensor + by default. + state_is_tuple: If True, accepted and returned states are n-tuples, where + `n = len(cells)`. Must be set to True else will raise an exception + concatenated along the column axis. + reuse: (optional) Python boolean describing whether to reuse variables + in an existing scope. If not `True`, and the existing scope already has + the given variables, an error is raised. + Raises: + TypeError: if cell is not an RNNCell. + ValueError: if the flag `state_is_tuple` is `False` or if shape of + `attn_states` is not 3 or if innermost dimension (hidden size) is None. + """ + super(ExternalAttentionCellWrapper, self).__init__(_reuse=reuse) + if not state_is_tuple: + raise ValueError("Only tuple state is supported") + + self._cell = cell + self._input_size = input_size + + # Validate attn_states shape. + attn_shape = attn_states.get_shape() + if not attn_shape or len(attn_shape) != 3: + raise ValueError("attn_shape must be rank 3") + + self._attn_states = attn_states + self._attn_size = attn_shape[2].value + if self._attn_size is None: + raise ValueError("Hidden size of attn_states cannot be None") + + self._attn_vec_size = attn_vec_size + if self._attn_vec_size is None: + self._attn_vec_size = self._attn_size + + self._reuse = reuse + + @property + def state_size(self): + return AttentionTuple(self._cell.state_size, self._attn_size) + + @property + def output_size(self): + return self._attn_size + + def combine_state(self, previous_state): + """Combines previous state (from encoder) with internal attention values. + + You must use this function to derive the initial state passed into + this cell as it expects a named tuple (AttentionTuple). + + Args: + previous_state: State from another block that will be fed into this cell; + Must have same structure as the state of the cell wrapped by this. + Returns: + Combined state (AttentionTuple). + """ + batch_size = self._attn_states.get_shape()[0].value + if batch_size is None: + batch_size = tf.shape(self._attn_states)[0] + zeroed_state = self.zero_state(batch_size, self._attn_states.dtype) + return AttentionTuple(previous_state, zeroed_state.attention) + + def call(self, inputs, state): + """Long short-term memory cell with attention (LSTMA).""" + + if not isinstance(state, AttentionTuple): + raise TypeError("State must be of type AttentionTuple") + + state, attns = state + attn_states = self._attn_states + attn_length = attn_states.get_shape()[1].value + if attn_length is None: + attn_length = tf.shape(attn_states)[1] + + input_size = self._input_size + if input_size is None: + input_size = inputs.get_shape().as_list()[1] + if attns is not None: + inputs = tf.layers.dense(tf.concat([inputs, attns], axis=1), input_size) + lstm_output, new_state = self._cell(inputs, state) + + new_state_cat = tf.concat(nest.flatten(new_state), 1) + new_attns = self._attention(new_state_cat, attn_states, attn_length) + + with tf.variable_scope("attn_output_projection"): + output = tf.layers.dense( + tf.concat([lstm_output, new_attns], axis=1), self._attn_size) + + new_state = AttentionTuple(new_state, new_attns) + + return output, new_state + + def _attention(self, query, attn_states, attn_length): + conv2d = tf.nn.conv2d + reduce_sum = tf.reduce_sum + softmax = tf.nn.softmax + tanh = tf.tanh + + with tf.variable_scope("attention"): + k = tf.get_variable("attn_w", + [1, 1, self._attn_size, self._attn_vec_size]) + v = tf.get_variable("attn_v", [self._attn_vec_size, 1]) + hidden = tf.reshape(attn_states, [-1, attn_length, 1, self._attn_size]) + hidden_features = conv2d(hidden, k, [1, 1, 1, 1], "SAME") + y = tf.layers.dense(query, self._attn_vec_size) + y = tf.reshape(y, [-1, 1, 1, self._attn_vec_size]) + s = reduce_sum(v * tanh(hidden_features + y), [2, 3]) + a = softmax(s) + d = reduce_sum(tf.reshape(a, [-1, attn_length, 1, 1]) * hidden, [1, 2]) + new_attns = tf.reshape(d, [-1, self._attn_size]) + + return new_attns + def lstm(inputs, hparams, train, name, initial_state=None): """Run LSTM cell on inputs, assuming they are [batch x time x size].""" @@ -51,7 +189,7 @@ def dropout_lstm_cell(): def lstm_attention_decoder(inputs, hparams, train, name, initial_state, - encoder_outputs): + attn_states): """Run LSTM cell with attention on inputs of shape [batch x time x size].""" def dropout_lstm_cell(): @@ -60,36 +198,18 @@ def dropout_lstm_cell(): input_keep_prob=1.0 - hparams.dropout * tf.to_float(train)) layers = [dropout_lstm_cell() for _ in range(hparams.num_hidden_layers)] - AttentionMechanism = (tf.contrib.seq2seq.LuongAttention if hparams.attention_mechanism == "luong" - else tf.contrib.seq2seq.BahdanauAttention) - attention_mechanism = AttentionMechanism(hparams.hidden_size, encoder_outputs) - - cell = tf.contrib.seq2seq.AttentionWrapper( + cell = ExternalAttentionCellWrapper( tf.nn.rnn_cell.MultiRNNCell(layers), - [attention_mechanism]*hparams.num_heads, - attention_layer_size=[hparams.attention_layer_size]*hparams.num_heads, - output_attention=(hparams.output_attention==1)) - - - batch_size = inputs.get_shape()[0].value - if batch_size is None: - batch_size = tf.shape(inputs)[0] - - initial_state = cell.zero_state(batch_size, tf.float32).clone(cell_state=initial_state) - + attn_states, + attn_vec_size=hparams.attn_vec_size) + initial_state = cell.combine_state(initial_state) with tf.variable_scope(name): - output, state = tf.nn.dynamic_rnn( + return tf.nn.dynamic_rnn( cell, inputs, initial_state=initial_state, dtype=tf.float32, time_major=False) - - # For multi-head attention project output back to hidden size - if hparams.output_attention == 1 and hparams.num_heads > 1: - output = tf.layers.dense(output, hparams.hidden_size) - - return output, state def lstm_seq2seq_internal(inputs, targets, hparams, train): @@ -153,49 +273,14 @@ def lstm_seq2seq(): hparams.hidden_size = 128 hparams.num_hidden_layers = 2 hparams.initializer = "uniform_unit_scaling" - hparams.initializer_gain = 1.0 - hparams.weight_decay = 0.0 - - return hparams - -def lstm_attention_base(): - """ Base attention params. """ - hparams = lstm_seq2seq() - hparams.add_hparam("attention_layer_size", hparams.hidden_size) - hparams.add_hparam("output_attention", int(True)) - hparams.add_hparam("num_heads", 1) return hparams -@registry.register_hparams -def lstm_bahdanau_attention(): - """hparams for LSTM with bahdanau attention.""" - hparams = lstm_attention_base() - hparams.add_hparam("attention_mechanism", "bahdanau") - return hparams - -@registry.register_hparams -def lstm_luong_attention(): - """hparams for LSTM with luong attention.""" - hparams = lstm_attention_base() - hparams.add_hparam("attention_mechanism", "luong") - return hparams - @registry.register_hparams def lstm_attention(): - """ For backwards compatibility, Defaults to bahdanau """ - return lstm_bahdanau_attention() + """hparams for LSTM with attention.""" + hparams = lstm_seq2seq() -@registry.register_hparams -def lstm_bahdanau_attention_multi(): - """ Multi-head Luong attention """ - hparams = lstm_bahdanau_attention() - hparams.num_heads = 4 + # Attention + hparams.add_hparam("attn_vec_size", hparams.hidden_size) return hparams - -@registry.register_hparams -def lstm_luong_attention_multi(): - """ Multi-head Luong attention """ - hparams = lstm_luong_attention() - hparams.num_heads = 4 - return hparams \ No newline at end of file diff --git a/tensor2tensor/utils/devices.py b/tensor2tensor/utils/devices.py index 9fa322985..e296394da 100644 --- a/tensor2tensor/utils/devices.py +++ b/tensor2tensor/utils/devices.py @@ -118,8 +118,8 @@ def _replica_device_setter(worker_device): if FLAGS.locally_shard_to_cpu or FLAGS.worker_gpu < 1: datashard_devices += ["cpu:0"] caching_devices = None - elif FLAGS.sync: - assert FLAGS.ps_replicas > 0 + elif FLAGS.sync and FLAGS.ps_replicas > 0: + # compute on ps datashard_devices = [ _replica_device_setter(d) for d in ps_devices(all_workers=all_workers) ] @@ -131,7 +131,8 @@ def _replica_device_setter(worker_device): else: caching_devices = None else: - # old fashioned async - compute on worker + # compute on worker - this is either a single-worker setup or asynchronous + # with parameter servers. if FLAGS.worker_gpu > 1: datashard_devices = [ _replica_device_setter(FLAGS.worker_job + "/GPU:%d" % d) From 9a2a6f39570e72faed9d673b12ea829061404c4f Mon Sep 17 00:00:00 2001 From: Lukasz Kaiser <lukaszkaiser@google.com> Date: Mon, 30 Oct 2017 17:02:42 -0700 Subject: [PATCH 0537/4095] Allow LSTMs to run on problems without inputs. PiperOrigin-RevId: 173972178 --- tensor2tensor/data_generators/README.md | 2 +- .../data_generators/generator_utils.py | 8 ++++---- tensor2tensor/data_generators/problem.py | 4 ++-- tensor2tensor/models/lstm.py | 20 ++++++++++++------- 4 files changed, 20 insertions(+), 14 deletions(-) diff --git a/tensor2tensor/data_generators/README.md b/tensor2tensor/data_generators/README.md index 04a90a778..0ccbfe1c1 100644 --- a/tensor2tensor/data_generators/README.md +++ b/tensor2tensor/data_generators/README.md @@ -42,7 +42,7 @@ for an example of usage. The generators should yield dictionaries with string keys and values being lists of {int, float, str}. Here is a very simple generator for a data-set where -inputs are lists of 2s with length upto 100 and targets are lists of length 1 +inputs are lists of 2s with length up to 100 and targets are lists of length 1 with an integer denoting the length of the input list. ``` diff --git a/tensor2tensor/data_generators/generator_utils.py b/tensor2tensor/data_generators/generator_utils.py index 55ccf117e..835d049f8 100644 --- a/tensor2tensor/data_generators/generator_utils.py +++ b/tensor2tensor/data_generators/generator_utils.py @@ -190,8 +190,8 @@ def maybe_download(directory, filename, url): print() tf.gfile.Rename(inprogress_filepath, filepath) statinfo = os.stat(filepath) - tf.logging.info("Succesfully downloaded %s, %s bytes." % (filename, - statinfo.st_size)) + tf.logging.info("Successfully downloaded %s, %s bytes." % + (filename, statinfo.st_size)) else: tf.logging.info("Not downloading, file already found: %s" % filepath) return filepath @@ -242,8 +242,8 @@ def maybe_download_from_drive(directory, filename, url): # Print newline to clear the carriage return from the download progress print() statinfo = os.stat(filepath) - tf.logging.info("Succesfully downloaded %s, %s bytes." % (filename, - statinfo.st_size)) + tf.logging.info("Successfully downloaded %s, %s bytes." % (filename, + statinfo.st_size)) return filepath diff --git a/tensor2tensor/data_generators/problem.py b/tensor2tensor/data_generators/problem.py index 657a5b18b..c826e29dd 100644 --- a/tensor2tensor/data_generators/problem.py +++ b/tensor2tensor/data_generators/problem.py @@ -130,7 +130,7 @@ class Problem(object): Data generation: * generate_data(data_dir, tmp_dir) - Generate training and dev datasets into data_dir. - - Additonal files, e.g. vocabulary files, should also be written to + - Additional files, e.g. vocabulary files, should also be written to data_dir. Vocab files are newline-separated files with each line containing a token. The standard convention for the filename is to set it to be @@ -515,7 +515,7 @@ def _default_hparams(): return tf.contrib.training.HParams( # Use this parameter to get comparable perplexity numbers with different # tokenizations. This value should be set to the ratio of the number of - # tokens in the test set according to the tokeization used to the number + # tokens in the test set according to the tokenization used to the number # of tokens in the test set in the "official" tokenization. For # example, if we are using a word-piece based model and we want to # compute per-word perplexity, then we set loss_multiplier to the number diff --git a/tensor2tensor/models/lstm.py b/tensor2tensor/models/lstm.py index f336bd6b4..0ae1ad294 100644 --- a/tensor2tensor/models/lstm.py +++ b/tensor2tensor/models/lstm.py @@ -215,11 +215,15 @@ def dropout_lstm_cell(): def lstm_seq2seq_internal(inputs, targets, hparams, train): """The basic LSTM seq2seq model, main step used for training.""" with tf.variable_scope("lstm_seq2seq"): - # Flatten inputs. - inputs = common_layers.flatten4d3d(inputs) - # LSTM encoder. - _, final_encoder_state = lstm( - tf.reverse(inputs, axis=[1]), hparams, train, "encoder") + if inputs is None: + final_encoder_state = None + else: + # Flatten inputs. + inputs = common_layers.flatten4d3d(inputs) + # LSTM encoder. + _, final_encoder_state = lstm( + tf.reverse(inputs, axis=[1]), hparams, train, "encoder") + # LSTM decoder. shifted_targets = common_layers.shift_right(targets) decoder_outputs, _ = lstm( @@ -252,8 +256,10 @@ class LSTMSeq2seq(t2t_model.T2TModel): def model_fn_body(self, features): train = self._hparams.mode == tf.estimator.ModeKeys.TRAIN - return lstm_seq2seq_internal(features["inputs"], features["targets"], - self._hparams, train) + return lstm_seq2seq_internal(features.get("inputs", None), + features["targets"], + self._hparams, + train) @registry.register_model From b02078c99f77a4bd7bbe41ace41e46572b5ec837 Mon Sep 17 00:00:00 2001 From: Lukasz Kaiser <lukaszkaiser@google.com> Date: Mon, 30 Oct 2017 17:06:58 -0700 Subject: [PATCH 0538/4095] More work on autoencoding Transformer; allow to decode from it. PiperOrigin-RevId: 173972810 --- tensor2tensor/models/transformer_vae.py | 335 +++++++++++++++++------- tensor2tensor/utils/model_builder.py | 3 +- tensor2tensor/utils/t2t_model.py | 31 ++- 3 files changed, 261 insertions(+), 108 deletions(-) diff --git a/tensor2tensor/models/transformer_vae.py b/tensor2tensor/models/transformer_vae.py index 67ec86ef5..d936ce72f 100644 --- a/tensor2tensor/models/transformer_vae.py +++ b/tensor2tensor/models/transformer_vae.py @@ -128,7 +128,7 @@ def dae(x, hparams, name): steps = hparams.kl_warmup_steps gumbel_samples *= common_layers.inverse_exp_decay(steps // 5) * 0.5 temperature = 1.2 - common_layers.inverse_lin_decay(steps) - # 30% of the time keep reasonably high temperature to keep learning. + # 10% of the time keep reasonably high temperature to keep learning. temperature = tf.cond(tf.less(tf.random_uniform([]), 0.9), lambda: temperature, lambda: tf.random_uniform([], minval=0.5, maxval=1.0)) @@ -216,6 +216,84 @@ def kmeans(x, means, hparams, name): return x_means_hot, tf.reduce_mean(kl) # * 10.0 +def bit_to_int(x_bit, nbits): + """Turn x_bit representing numbers bitwise (lower-endian) to int tensor.""" + x_l = tf.stop_gradient(tf.reshape(x_bit, [-1, nbits])) + x_labels = [] + for i in range(nbits): + x_labels.append(x_l[:, i] * 2**i) + res = sum(x_labels) + return tf.to_int32(tf.reshape(res, tf.shape(x_bit)[:-1])) + + +def int_to_bit(x_int, nbits): + """Turn x_int representing numbers into a bitwise (lower-endian) tensor.""" + x_l = tf.expand_dims(x_int, axis=-1) + x_labels = [] + for i in range(nbits): + x_labels.append(tf.floormod(tf.floordiv(x_l, 2**i), 2)) + res = tf.concat(x_labels, axis=-1) + return tf.to_float(res) + + +def bottleneck(x, hparams, filter_size, name): + """Bottleneck.""" + def embed1(x): + if hparams.bottleneck_kind == "semhash": + c = int_to_bit(x, c_size) + h1a = tf.layers.dense(c, filter_size, name="vch1a") + h1b = tf.layers.dense(1.0 - c, filter_size, name="vch1b") + return h1a + h1b + elif hparams.bottleneck_kind == "gumbel-softmax": + hot = tf.one_hot(x, hparams.v_size) + with tf.variable_scope(name, reuse=True): + return tf.layers.dense(hot, hparams.hidden_size, name="dae_dense") + + def embed(x): + with tf.variable_scope(name, reuse=True): + h1 = embed1(x) + h2 = tf.layers.dense(tf.nn.relu(h1), filter_size, name="vch2") + res = tf.layers.dense(tf.nn.relu(h2), hparams.hidden_size, name="vcfin") + return res + + with tf.variable_scope(name): + c_size = hparams.c_size + l = tf.constant(0.0) + if hparams.bottleneck_kind == "dense": + c = tf.layers.dense(x, c_size, name="vcc") + h1 = tf.layers.dense(c, filter_size, name="vch1") + if hparams.bottleneck_kind == "semhash": + c = tf.layers.dense(x, c_size, name="vcc") + y_clean = common_layers.saturating_sigmoid(c) + tf.summary.histogram("y_clean", tf.reshape(y_clean, [-1])) + # l = tf.reduce_mean(y_clean * (1.0 - y_clean)) + if hparams.noise_dev > 0 and hparams.mode == tf.estimator.ModeKeys.TRAIN: + dev = hparams.noise_dev + noise = tf.truncated_normal(tf.shape(c), mean=0.0, stddev=dev) + y = common_layers.saturating_sigmoid(c + noise) + else: + y = y_clean + d = tf.to_float(tf.less(0.5, y)) + y_discrete = tf.stop_gradient(d) + y - tf.stop_gradient(y) + pd = common_layers.inverse_exp_decay(hparams.startup_steps * 2) + pd *= hparams.d_mix + pd = pd if hparams.mode == tf.estimator.ModeKeys.TRAIN else 1.0 + c = tf.cond(tf.less(tf.random_uniform([]), pd), + lambda: y_discrete, lambda: y) + h1a = tf.layers.dense(c, filter_size, name="vch1a") + h1b = tf.layers.dense(1.0 - c, filter_size, name="vch1b") + h1 = h1a + h1b + dx = tf.to_int32(tf.stop_gradient(d)) + c = bit_to_int(dx, c_size) + if hparams.bottleneck_kind == "gumbel-softmax": + _, hot, l = dae(x, hparams, name) + c = tf.argmax(hot, axis=-1) + h1 = tf.layers.dense(hot, hparams.hidden_size, name="dae_dense") + h2 = tf.layers.dense(tf.nn.relu(h1), filter_size, name="vch2") + res = tf.layers.dense(tf.nn.relu(h2), hparams.hidden_size, name="vcfin") + return res, c, l, embed + + def compress(x, c, is_2d, hparams, name): """Compress.""" with tf.variable_scope(name): @@ -272,6 +350,32 @@ def decode(cond_vec, cond_add, gold, c, ed, hparams, name): return transformer.transformer_decoder(decoder_input, c, bias, ed, hparams) +def decode_transformer(encoder_output, + encoder_decoder_attention_bias, + targets, + hparams, + name): + """Original Transformer decoder.""" + with tf.variable_scope(name): + targets = common_layers.flatten4d3d(targets) + + decoder_input, decoder_self_bias = transformer.transformer_prepare_decoder( + targets, hparams) + + decoder_input = tf.nn.dropout(decoder_input, + 1.0 - hparams.layer_prepostprocess_dropout) + + decoder_output = transformer.transformer_decoder( + decoder_input, + encoder_output, + decoder_self_bias, + encoder_decoder_attention_bias, + hparams) + + # Expand since t2t expects 4d tensors. + return tf.expand_dims(decoder_output, axis=2) + + def expand_batch(x, mul): """Expand on batch by mul times.""" cx = tf.expand_dims(x, axis=1) @@ -298,18 +402,6 @@ def ae_compress(x, is_2d, hparams, name, reuse=None): hot, loss = bit_vae(cur, hparams, "bvae") else: hot, loss, _, _ = vae(cur, hparams.z_size, "vae") - # Do a second level vae with some probability. - if hparams.z_size2 > 0: - prob_z2 = common_layers.inverse_exp_decay(hparams.startup_steps*2) * 0.8 - if hparams.mode != tf.contrib.learn.ModeKeys.TRAIN: - prob_z2 = 1.0 - def vae2(): - hot2, loss2, _, _ = vae(hot, hparams.z_size2, "vae2") - ret = tf.layers.dense(hot2, hparams.z_size) - return mix(ret, hot, hparams.startup_steps * 2), loss2 - hot, loss2 = tf.cond(tf.less(tf.random_uniform([]), prob_z2), - vae2, lambda: (hot, tf.constant(0.0))) - loss += loss2 * 0.1 return cur, hot, loss if hparams.use_gumbel_softmax: _, hot, loss = dae(cur, hparams, "dae") @@ -389,90 +481,127 @@ def ffn(x, hparams, name): return common_layers.layer_postprocess(x, y, hparams) -def ae_transformer_internal(inputs, targets, target_space, hparams): +def multinomial_sample(x, vocab_size, temperature): + """Multinomial sampling from a n-dimensional tensor.""" + samples = tf.multinomial(tf.reshape(x, [-1, vocab_size]) / temperature, 1) + reshaped_samples = tf.reshape(samples, tf.shape(x)[:-1]) + return tf.to_int32(reshaped_samples) + + +def ae_latent_sample(t_c, inputs, ed, embed, iters, hparams): + """Sample from the latent space in the autoencoder.""" + t_pred = decode_transformer(inputs, ed, t_c, hparams, "extra") + t_pred = tf.layers.dense(t_pred, 2**16, name="extra_logits") + t_bit = multinomial_sample(t_pred, 2**16, hparams.sampling_temp) + for i in xrange(iters): + t_bit_prev = t_bit + with tf.variable_scope(tf.get_variable_scope(), reuse=True): + t_c = embed(t_bit) + t_pred = decode_transformer(inputs, ed, t_c, hparams, "extra") + t_pred = tf.layers.dense(t_pred, 2**16, name="extra_logits") + t_bit = multinomial_sample(t_pred, 2**16, hparams.sampling_temp) + t_bit = tf.concat([t_bit_prev[:, :(i+1), :], + t_bit[:, (i+1):, :]], axis=1) + return t_bit + + +def ae_transformer_internal(inputs, targets, target_space, hparams, + beam_size, cache=None): """AE Transformer, main step used for training.""" + hparams.z_size = hparams.hidden_size with tf.variable_scope("ae_transformer"): # Prepare inputs, targets, k. - k = 2**hparams.num_compress_steps - _, targets = common_layers.pad_to_same_length( - targets, targets, final_length_divisible_by=k) - inputs = common_layers.flatten4d3d(inputs) - inputs, ed = encode(inputs, target_space, hparams, "input_enc") - - # Compress and ae. - ae, hot, kl = ae_compress(targets, hparams.is_2d, hparams, "ae") - tf.summary.histogram("hot", tf.reshape(tf.argmax(hot, axis=-1), [-1])) - emb = ae_embed(hot, hparams, "ae", reuse=True) - - # Compress context and run autoregressive decoder on emb-hot. - if hparams.do_vae: - reconstruct_loss = 0.0 + orig_targets = targets + batch_size = tf.shape(orig_targets)[0] + targets = tf.reshape(targets, [batch_size, -1, 1, hparams.hidden_size]) + k = hparams.num_compress_steps + + # Encoder. + if inputs is not None: + inputs = common_layers.flatten4d3d(inputs) + inputs, ed = encode(inputs, target_space, hparams, "input_enc") + else: + ed = None + + # Autoencoding. + losses = {"vc": tf.constant(0.0), "sm": tf.constant(0.0)} + latent_len = hparams.latent_length + if hparams.do_ae: + targets_pad, _ = common_layers.pad_to_same_length( + targets, targets, final_length_divisible_by=latent_len * 2**k) + targets_c = compress(targets_pad, None, False, hparams, "compress") + targets_c = targets_c[:, :latent_len, :, :] + if hparams.mode != tf.estimator.ModeKeys.PREDICT: + # Compress and bottleneck. + t_c, t_bit, vc_loss, _ = bottleneck(targets_c, hparams, 2*2048, "vc") + tf.summary.histogram("bit0", tf.reshape(t_bit[:, 0, :], [-1])) + pc = common_layers.inverse_exp_decay(hparams.startup_steps) * 0.95 + pc = pc if hparams.mode == tf.estimator.ModeKeys.TRAIN else 1.0 + cond = tf.less(tf.random_uniform([]), pc) + t_c = tf.cond(cond, lambda: t_c, lambda: targets_c) + losses["vc"] = vc_loss * tf.to_float(cond) + # Extra loss predicting latent code from input. + t_pred = decode_transformer( + inputs, ed, tf.stop_gradient(t_c), hparams, "extra") + t_pred = tf.layers.dense(t_pred, 2**16, name="extra_logits") + losses["sm"] = tf.nn.sparse_softmax_cross_entropy_with_logits( + labels=t_bit, logits=t_pred) + losses["sm"] = tf.reduce_mean(losses["sm"]) * 0.2 * tf.to_float(cond) + else: + _, _, _, embed = bottleneck(targets_c, hparams, 2*2048, "vc") + t_c = tf.zeros_like(targets_c) + if cache is None: + cache = ae_latent_sample(t_c, inputs, ed, embed, 3, hparams) + cache = cache[0, :, :] + cache = tf.reshape(cache, [1, latent_len, 1]) + cache = tf.tile(cache, [beam_size, 1, 1]) + t_c = embed(cache) + # Postprocess. + pos = tf.get_variable("pos", [1, latent_len + 1, 1, hparams.hidden_size]) + t_c = tf.pad(t_c, [[0, 0], [1, 0], [0, 0], [0, 0]]) + pos + targets = tf.concat([tf.reverse(t_c, [1]), targets], axis=1) else: - emb_flat = tf.expand_dims(common_layers.flatten4d3d(emb), axis=2) - emb_flat = tf.stop_gradient(emb_flat) - dec_c = decode(None, None, emb_flat, inputs, ed, hparams, "dgold") - dec_c = tf.reshape(dec_c, tf.shape(emb)) - c_z = tf.layers.dense(dec_c, hparams.v_size, name="mask_context") - reconstruct_loss = tf.nn.softmax_cross_entropy_with_logits( - labels=hot, logits=c_z) - # If not training, use the predicted z instead of the autoregressive one. - if hparams.mode == tf.estimator.ModeKeys.PREDICT: - hot = tf.one_hot(tf.argmax(c_z, axis=-1), hparams.v_size) - - # Decompress, pass for ae loss. - z = ae_decompress(emb, ae, targets, hparams.is_2d, hparams, "ae") - if not (hparams.use_gumbel_softmax and hparams.softmax_k > 0): - kl *= common_layers.inverse_exp_decay(int(hparams.startup_steps * 0.8), - min_value=0.0001) - reconstruct_loss *= common_layers.inverse_exp_decay(hparams.startup_steps) - losses = {"kl": kl, "reconstruction": reconstruct_loss * 0.1} - return z, losses + targets = tf.pad(targets, [[0, 0], [latent_len + 1, 0], [0, 0], [0, 0]]) + + res = decode_transformer(inputs, ed, targets, hparams, "decoder") + res = res[:, latent_len + 1:, :, :] + return res, losses, cache @registry.register_model class TransformerAE(t2t_model.T2TModel): + """Autoencoder-augmented Transformer.""" + + @property + def has_input(self): + return self._problem_hparams.input_modality def model_fn_body(self, features): - return ae_transformer_internal( - features["inputs"], features["targets"], features["target_space_id"], - self._hparams) - - def infer(self, features=None, decode_length=50, beam_size=1, top_beams=1, - last_position_only=False, alpha=0.0): - """A inference method, see T2TModel.""" - if not features: - features = {} - inputs_old = None - if "inputs" in features and len(features["inputs"].shape) < 4: - inputs_old = features["inputs"] - features["inputs"] = tf.expand_dims(features["inputs"], 2) - - # Create an initial targets tensor. - if "partial_targets" in features: - initial_output = tf.convert_to_tensor(features["partial_targets"]) - else: - batch_size = tf.shape(features["inputs"])[0] - initial_output = tf.zeros((batch_size, 1, 1, 1), dtype=tf.int64) - - features["targets"] = initial_output - sharded_logits, _ = self.model_fn( - features, False, last_position_only=last_position_only) - sharded_samples = self._data_parallelism(tf.argmax, sharded_logits, 4) - samples = tf.concat(sharded_samples, 0) - - # More steps. - how_many_more_steps = 2 - for _ in xrange(how_many_more_steps): - with tf.variable_scope(tf.get_variable_scope(), reuse=True): - features["targets"] = samples - sharded_logits, _ = self.model_fn( - features, False, last_position_only=last_position_only) - sharded_samples = self._data_parallelism(tf.argmax, sharded_logits, 4) - samples = tf.concat(sharded_samples, 0) - - if inputs_old is not None: # Restore to not confuse Estimator. - features["inputs"] = inputs_old - return samples + inputs = features["inputs"] if "inputs" in features else None + if self._hparams.drop_inputs: + inputs = None + reuse = "cache_raw" in features + beam_size = self._decode_hparams.beam_size + with tf.variable_scope(tf.get_variable_scope(), reuse=reuse): + res, loss, _ = ae_transformer_internal( + inputs, features["targets"], features["target_space_id"], + self._hparams, beam_size, features.get("cache_raw", None)) + return res, loss + + def prepare_features_for_infer(self, features): + if not self._hparams.do_ae: + return features + beam_size = self._decode_hparams.beam_size + inputs = tf.zeros([beam_size, 1, 1, self._hparams.hidden_size]) + inputs = inputs if "inputs" in features else None + if self._hparams.drop_inputs or not self.has_input: + inputs = None + targets = tf.zeros([beam_size, 1, 1, self._hparams.hidden_size]) + with tf.variable_scope("body"): + _, _, cache = ae_transformer_internal( + inputs, targets, features["target_space_id"], + self._hparams, beam_size) + features["cache_raw"] = cache @registry.register_hparams @@ -481,12 +610,24 @@ def transformer_ae_small(): hparams = transformer.transformer_small() hparams.batch_size = 2048 hparams.learning_rate_warmup_steps = 4000 + hparams.num_hidden_layers = 3 + hparams.hidden_size = 384 + hparams.filter_size = 2048 + hparams.label_smoothing = 0.0 + hparams.add_hparam("c_size", 16) + hparams.add_hparam("latent_length", 4) + hparams.add_hparam("noise_dev", 1.0) + hparams.add_hparam("d_mix", 0.5) + # Bottleneck kinds supported: dense, semhash, gumbel-softmax. + hparams.add_hparam("bottleneck_kind", "semhash") + hparams.add_hparam("do_ae", int(True)) + hparams.add_hparam("drop_inputs", int(False)) hparams.add_hparam("z_size", 128) - hparams.add_hparam("z_size2", 0) - hparams.add_hparam("v_size", 1024*32) - hparams.add_hparam("num_compress_steps", 4) - hparams.add_hparam("kl_warmup_steps", 60000) - hparams.add_hparam("startup_steps", 30000) + hparams.add_hparam("v_size", 1024*64) + hparams.add_hparam("max_context_length", 64) + hparams.add_hparam("num_compress_steps", 3) + hparams.add_hparam("kl_steps", 35000) + hparams.add_hparam("startup_steps", 10000) hparams.add_hparam("kmeans_lr_factor", 0.002) hparams.add_hparam("z_dropout", 0.1) hparams.add_hparam("is_2d", 0) @@ -515,6 +656,7 @@ def transformer_ae_cifar(): hparams.is_2d = 1 hparams.learning_rate_warmup_steps = 8000 hparams.learning_rate = 0.2 + hparams.ffn_layer = "conv_hidden_relu_with_sepconv" return hparams @@ -522,11 +664,8 @@ def transformer_ae_cifar(): def transformer_ae_base(): """Set of hyperparameters.""" hparams = transformer_ae_small() + hparams.batch_size = 1024 hparams.hidden_size = 512 - hparams.filter_size = 2048 - hparams.attention_dropout = 0.0 - hparams.relu_dropout = 0.0 - hparams.dropout = 0.0 - hparams.num_hidden_layers = 4 - hparams.z_size = 256 + hparams.filter_size = 4096 + hparams.num_hidden_layers = 6 return hparams diff --git a/tensor2tensor/utils/model_builder.py b/tensor2tensor/utils/model_builder.py index 44a6f5208..ef362ed90 100644 --- a/tensor2tensor/utils/model_builder.py +++ b/tensor2tensor/utils/model_builder.py @@ -108,7 +108,8 @@ def nth_model(n): hparams.problems[n], n, dp, - devices.ps_devices(all_workers=True)) + devices.ps_devices(all_workers=True), + decode_hparams=decode_hparams) if mode == tf.estimator.ModeKeys.PREDICT: return model_class.infer( features, diff --git a/tensor2tensor/utils/t2t_model.py b/tensor2tensor/utils/t2t_model.py index 85f339511..07f4622d6 100644 --- a/tensor2tensor/utils/t2t_model.py +++ b/tensor2tensor/utils/t2t_model.py @@ -66,7 +66,8 @@ def __init__(self, problem_hparams, problem_idx=0, data_parallelism=None, - ps_devices=None): + ps_devices=None, + decode_hparams=None): """Create a T2TModel. Args: @@ -77,6 +78,7 @@ def __init__(self, data_parallelism: a expert_utils.parallelism (specifies devices for data parallelism). ps_devices: a list of devices to be used for experts + decode_hparams: a hyperparameter object with decoding parameters. Returns: a T2TModel @@ -103,6 +105,7 @@ def __init__(self, tf.logging.info("Unsetting shared_embedding_and_softmax_weights.") hparams.shared_embedding_and_softmax_weights = 0 self._hparams = hparams + self._decode_hparams = copy.copy(decode_hparams) self._data_parallelism = data_parallelism self._num_datashards = data_parallelism.n self._ps_devices = ps_devices @@ -146,6 +149,10 @@ def _create_modalities(self, problem_hparams, hparams): def has_input(self): return self._problem_hparams.input_modality + def prepare_features_for_infer(self, features): + """Called before inference to allow adding infer-specific features.""" + pass + def eval_autoregressive(self, features=None, decode_length=50, @@ -195,11 +202,11 @@ def infer(self, """ # TODO(rsepassi): Make decoding work with real-valued model outputs # (i.e. if the target modality is RealModality). - if not self.has_input: - # since there is no input, it is more interesting to see randomly - # generated sequences, than to see the most likely sequence repeatedly. - beam_size = 1 - self._hparams.sampling_method = "random" + self.prepare_features_for_infer(features) + if not self.has_input and beam_size > 1: + tf.logging.warn("Beam searching for a model with no inputs.") + if not self.has_input and self._hparams.sampling_method != "random": + tf.logging.warn("Non-random sampling for a model with no inputs.") if is_class_modality( self._hparams.problems[self._problem_idx].target_modality): beam_size = 1 # No use to run beam-search for a single class. @@ -540,6 +547,7 @@ def model_fn(self, features, skip=False, last_position_only=False): ] all_previous_modalities.extend(previous_modalities) do_reuse = input_modality.name in all_previous_modalities + transformed_features[key + "_raw"] = sharded_features[key] with tf.variable_scope(input_modality.name, reuse=do_reuse): transformed_features[key] = input_modality.bottom_sharded( sharded_features[key], dp) @@ -547,8 +555,13 @@ def model_fn(self, features, skip=False, last_position_only=False): # Target space id just gets copied to every shard. if "target_space_id" in features: - transformed_features["target_space_id"] = [features["target_space_id"] - ] * self._num_datashards + transformed_features["target_space_id"] = [ + features["target_space_id"]] * self._num_datashards + + # For features without a modality ending in "_raw", we pass them raw. + for key, feature in sharded_features.items(): + if key not in transformed_features and key.endswith("_raw"): + transformed_features[key] = feature # Targets are transformed by the autoregressive part of the modality previous_tgt_modalities = [ @@ -564,7 +577,7 @@ def model_fn(self, features, skip=False, last_position_only=False): sharded_features["targets"], dp) # Allows later access to pre-embedding raw targets. - transformed_features["raw_targets"] = sharded_features["targets"] + transformed_features["targets_raw"] = sharded_features["targets"] # Construct the model body. with tf.variable_scope("body", reuse=self._problem_idx > 0): From 9a651716367308fe55820dc37578371e177e5d91 Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Wed, 1 Nov 2017 13:08:49 -0700 Subject: [PATCH 0539/4095] Enable early stopping in train_and_evaluate PiperOrigin-RevId: 174224411 --- tensor2tensor/utils/trainer_utils.py | 34 ++++++++++++++++++++++++---- 1 file changed, 30 insertions(+), 4 deletions(-) diff --git a/tensor2tensor/utils/trainer_utils.py b/tensor2tensor/utils/trainer_utils.py index e90e2dd10..57d45fb50 100644 --- a/tensor2tensor/utils/trainer_utils.py +++ b/tensor2tensor/utils/trainer_utils.py @@ -63,6 +63,19 @@ flags.DEFINE_string("data_dir", None, "Directory with training data.") flags.DEFINE_integer("train_steps", 250000, "The number of steps to run training for.") +flags.DEFINE_string("eval_early_stopping_metric", "loss", + "If --schedule=train_and_evaluate and " + "--eval_early_stopping_steps is not None, then stop when " + "--eval_early_stopping_metric has not decreased for " + "--eval_early_stopping_steps") +flags.DEFINE_integer("eval_early_stopping_steps", None, + "If --schedule=train_and_evaluate and " + "--eval_early_stopping_steps is not None, then stop when " + "--eval_early_stopping_metric has not decreased for " + "--eval_early_stopping_steps") +flags.DEFINE_bool("eval_early_stopping_metric_minimize", True, + "Whether to check for the early stopping metric going down " + "or up.") flags.DEFINE_bool("eval_run_autoregressive", False, "Run eval autoregressively where we condition on previous" "generated output instead of the actual target.") @@ -148,7 +161,20 @@ def create_experiment(data_dir, model_name, train_steps, eval_steps, hparams, save_steps=10, output_dir=run_config.model_dir, show_dataflow=True, - show_memory=True,)) + show_memory=True, + )) + if FLAGS.schedule == "train_and_evaluate": + if FLAGS.local_eval_frequency: + train_monitors.append( + tf.contrib.learn.monitors.ValidationMonitor( + input_fn=input_fns[tf.estimator.ModeKeys.EVAL], + eval_steps=eval_steps, + every_n_steps=FLAGS.local_eval_frequency, + hooks=eval_hooks, + early_stopping_rounds=FLAGS.eval_early_stopping_steps, + early_stopping_metric=FLAGS.eval_early_stopping_metric, + early_stopping_metric_minimize=FLAGS. + eval_early_stopping_metric_minimize)) optional_kwargs = {} if FLAGS.export_saved_model: @@ -164,7 +190,6 @@ def create_experiment(data_dir, model_name, train_steps, eval_steps, hparams, eval_input_fn=input_fns[tf.estimator.ModeKeys.EVAL], train_steps=train_steps, eval_steps=eval_steps, - min_eval_frequency=FLAGS.local_eval_frequency, train_monitors=train_monitors, eval_hooks=eval_hooks, eval_delay_secs=0, @@ -378,8 +403,9 @@ def is_chief(): def session_config(): """The TensorFlow Session config to use.""" - graph_options = tf.GraphOptions(optimizer_options=tf.OptimizerOptions( - opt_level=tf.OptimizerOptions.L1, do_function_inlining=False)) + graph_options = tf.GraphOptions( + optimizer_options=tf.OptimizerOptions( + opt_level=tf.OptimizerOptions.L1, do_function_inlining=False)) if FLAGS.experimental_optimize_placement: rewrite_options = tf.RewriterConfig(optimize_tensor_layout=True) From fa68c153c0ae334f346afd786691384ded2566e6 Mon Sep 17 00:00:00 2001 From: Katherine Lee <katherinelee@google.com> Date: Wed, 1 Nov 2017 17:13:41 -0700 Subject: [PATCH 0540/4095] Moving transformer_sketch to open source. PiperOrigin-RevId: 174260338 --- tensor2tensor/models/__init__.py | 1 + tensor2tensor/models/transformer_sketch.py | 162 +++++++++++++++++++++ 2 files changed, 163 insertions(+) create mode 100644 tensor2tensor/models/transformer_sketch.py diff --git a/tensor2tensor/models/__init__.py b/tensor2tensor/models/__init__.py index f5fafe706..74c72d8e1 100644 --- a/tensor2tensor/models/__init__.py +++ b/tensor2tensor/models/__init__.py @@ -39,6 +39,7 @@ from tensor2tensor.models import transformer_alternative from tensor2tensor.models import transformer_moe from tensor2tensor.models import transformer_revnet +from tensor2tensor.models import transformer_sketch from tensor2tensor.models import transformer_vae from tensor2tensor.models import xception # pylint: enable=unused-import diff --git a/tensor2tensor/models/transformer_sketch.py b/tensor2tensor/models/transformer_sketch.py new file mode 100644 index 000000000..b7bd9b1ef --- /dev/null +++ b/tensor2tensor/models/transformer_sketch.py @@ -0,0 +1,162 @@ +# coding=utf-8 +# Copyright 2017 The Tensor2Tensor Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Transformer Sketch for im2sketch problems. +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +# Dependency imports + +from tensor2tensor.layers import common_hparams +from tensor2tensor.models import transformer +from tensor2tensor.models import transformer_vae +from tensor2tensor.models.transformer import transformer_base +from tensor2tensor.models.transformer import transformer_n_da +from tensor2tensor.models.transformer import transformer_small +from tensor2tensor.utils import registry + + +@registry.register_model +class TransformerSketch(transformer.Transformer): + """Transformer with strided convolutions.""" + + def encode(self, inputs, target_space, hparams): + """Add two layers strided convolutions ontop of encode.""" + hparams.num_compress_steps = 2 + compressed_inputs = transformer_vae.compress(inputs, c=None, is_2d=True, + hparams=hparams, + name="convolutions") + + return super(TransformerSketch, self).encode( + compressed_inputs, target_space, hparams) + + +@registry.register_hparams +def transformer_sketch(): + """Basic transformer_sketch hparams.""" + hparams = transformer_n_da() + hparams.batch_size = 2048 + hparams.max_length = 784 + hparams.clip_grad_norm = 5. + hparams.learning_rate_decay_scheme = "noam" + hparams.learning_rate = 0.2 + hparams.learning_rate_warmup_steps = 10000 + hparams.num_hidden_layers = 6 + hparams.initializer = "orthogonal" + hparams.sampling_method = "random" + return hparams + + +@registry.register_hparams +def transformer_base_sketch(): + """Parameters based on base.""" + hparams = transformer_base() + hparams.batch_size = 2048 + hparams.max_length = 784 + hparams.clip_grad_norm = 5. + hparams.learning_rate_decay_scheme = "noam" + hparams.learning_rate_warmup_steps = 8000 + hparams.learning_rate = 0.2 + hparams.num_hidden_layers = 6 + hparams.initializer = "orthogonal" + hparams.sampling_method = "random" + return hparams + + +@registry.register_hparams +def transformer_small_sketch(): + """Modified transformer_small.""" + hparams = transformer_small() + hparams.batch_size = 2048 + hparams.max_length = 784 + hparams.clip_grad_norm = 5. + hparams.learning_rate_decay_scheme = "noam" + hparams.learning_rate = 0.1 + hparams.initializer = "orthogonal" + hparams.sampling_method = "random" + hparams.learning_rate_warmup_steps = 10000 + return hparams + + +@registry.register_hparams +def transformer_sketch_2layer(): + hparams = transformer_sketch() + hparams.num_hidden_layers = 2 + return hparams + + +@registry.register_hparams +def transformer_sketch_4layer(): + hparams = transformer_sketch() + hparams.num_hidden_layers = 4 + return hparams + + +@registry.register_hparams +def transformer_sketch_6layer(): + hparams = transformer_sketch() + hparams.num_hidden_layers = 6 + return hparams + + +@registry.register_ranged_hparams("transformer_sketch_ranged") +def transformer_sketch_ranged(rhp): + """Range of hparams for vizier.""" + + hparams = transformer_sketch() + common_hparams.fill_ranged_hparams_from_hparams(hparams, rhp) + + rhp.set_categorical("ffn_layer", + ["conv_hidden_relu_with_sepconv", "conv_hidden_relu"]) + rhp.set_discrete("batch_size", [1024, 2048, 4096]) + rhp.set_discrete("num_hidden_layers", [2, 3, 4, 5, 6]) + rhp.set_discrete("hidden_size", [32, 64, 128, 256, 512, 1024], + scale=rhp.LOG_SCALE) + rhp.set_discrete("kernel_height", [1, 3, 5, 7]) + rhp.set_discrete("kernel_width", [1, 3, 5, 7]) + rhp.set_discrete("compress_steps", [0, 1, 2]) + rhp.set_float("dropout", 0.0, 0.5) + rhp.set_float("weight_decay", 1e-4, .03, scale=rhp.LOG_SCALE) + rhp.set_float("label_smoothing", 0.0, 0.2) + rhp.set_float("clip_grad_norm", 0.01, 8.0, scale=rhp.LOG_SCALE) + rhp.set_float("learning_rate", 0.1, 1.0, scale=rhp.LOG_SCALE) + rhp.set_categorical("initializer", + ["uniform", "orthogonal", "uniform_unit_scaling"]) + rhp.set_float("initializer_gain", 0.5, 3.5) + rhp.set_categorical("learning_rate_decay_scheme", + ["none", "sqrt", "noam", "exp10k"]) + rhp.set_float("optimizer_adam_epsilon", 1e-7, 1e-2, scale=rhp.LOG_SCALE) + rhp.set_float("optimizer_adam_beta1", 0.8, 0.9) + rhp.set_float("optimizer_adam_beta2", 0.995, 0.999) + rhp.set_categorical("optimizer", [ + "Adam", "Adagrad", "Momentum", "RMSProp", "SGD", "YellowFin"]) + + +@registry.register_hparams +def transformer_opt(): + """Parameters that work better.""" + hparams = transformer_sketch() + hparams.batch_size = 1024 + hparams.learning_rate = 0.28 + hparams.num_hidden_layers = 3 + hparams.dropout = 0.35 + hparams.ffn_layer = "conv_hidden_relu_with_sepconv" + hparams.hidden_size = 128 + hparams.initializer_gain = 2.6 + hparams.weight_decay = 0. + return hparams From 89282c98e974e5d96a79501d46edb1b98a2293b1 Mon Sep 17 00:00:00 2001 From: Etienne Pot <epot@google.com> Date: Wed, 1 Nov 2017 17:42:25 -0700 Subject: [PATCH 0541/4095] Cleanup transformer moe PiperOrigin-RevId: 174263523 --- tensor2tensor/layers/common_attention.py | 9 +- tensor2tensor/layers/common_layers.py | 14 +- tensor2tensor/models/transformer.py | 3 +- tensor2tensor/models/transformer_moe.py | 479 ++++++++++++++++++----- 4 files changed, 385 insertions(+), 120 deletions(-) diff --git a/tensor2tensor/layers/common_attention.py b/tensor2tensor/layers/common_attention.py index 2178e6fe5..cf7ef9115 100644 --- a/tensor2tensor/layers/common_attention.py +++ b/tensor2tensor/layers/common_attention.py @@ -2958,15 +2958,20 @@ def pad_and_reshape(x): @expert_utils.add_var_scope() def multihead_self_attention_reduced( - x, factor, nonlinearity, reduction_type, multihead_params): + x, + factor, + multihead_params, + nonlinearity="none", + reduction_type="conv", +): """Reduce the length dimension by compressing with conv. Args: x (tf.Tensor): float32 of shape [batch, length, depth] factor (int): compression factor for the memory sequence + multihead_params (dict): parameters for multihead attention nonlinearity (str): Add some non-linearity after the memory block reduction_type (str): type of compression - multihead_params (dict): parameters for multihead attention Returns: (tf.Tensor): float32 of shape [batch, length, depth] diff --git a/tensor2tensor/layers/common_layers.py b/tensor2tensor/layers/common_layers.py index 08fd2f56b..ce68a9fe1 100644 --- a/tensor2tensor/layers/common_layers.py +++ b/tensor2tensor/layers/common_layers.py @@ -326,7 +326,7 @@ def conv_internal(conv_fn, inputs, filters, kernel_size, **kwargs): raise ValueError("Inputs to conv must have statically known rank 4. " "Shape: " + str(static_shape)) # Add support for left padding. - if "padding" in kwargs and kwargs["padding"] == "LEFT": + if kwargs.get("padding") == "LEFT": dilation_rate = (1, 1) if "dilation_rate" in kwargs: dilation_rate = kwargs["dilation_rate"] @@ -344,15 +344,9 @@ def conv_internal(conv_fn, inputs, filters, kernel_size, **kwargs): def conv2d_kernel(kernel_size_arg, name_suffix): """Call conv2d but add suffix to name.""" - if "name" in kwargs: - original_name = kwargs["name"] - name = kwargs.pop("name") + "_" + name_suffix - else: - original_name = None - name = "conv_" + name_suffix - original_force2d = None - if "force2d" in kwargs: - original_force2d = kwargs.pop("force2d") + name = "{}_{}".format(kwargs.get("name", "conv"), name_suffix) + original_name = kwargs.pop("name", None) + original_force2d = kwargs.pop("force2d", None) result = conv_fn(inputs, filters, kernel_size_arg, name=name, **kwargs) if original_name is not None: kwargs["name"] = original_name # Restore for other calls. diff --git a/tensor2tensor/models/transformer.py b/tensor2tensor/models/transformer.py index 9a090e40f..5571875dc 100644 --- a/tensor2tensor/models/transformer.py +++ b/tensor2tensor/models/transformer.py @@ -579,7 +579,8 @@ def transformer_decoder(decoder_input, encoder_decoder_attention_bias, hparams.attention_key_channels or hparams.hidden_size, hparams.attention_value_channels or hparams.hidden_size, - hparams.hidden_size, hparams.num_heads, + hparams.hidden_size, + hparams.num_heads, hparams.attention_dropout) x = common_layers.layer_postprocess(x, y, hparams) with tf.variable_scope("ffn"): diff --git a/tensor2tensor/models/transformer_moe.py b/tensor2tensor/models/transformer_moe.py index c8a32a667..014a390c6 100644 --- a/tensor2tensor/models/transformer_moe.py +++ b/tensor2tensor/models/transformer_moe.py @@ -21,9 +21,9 @@ from __future__ import division from __future__ import print_function -# Dependency imports +import functools -from six.moves import xrange # pylint: disable=redefined-builtin +# Dependency imports from tensor2tensor.layers import common_attention from tensor2tensor.layers import common_hparams @@ -36,11 +36,43 @@ import tensorflow as tf +# The transformer architecture can be defined using the layer_types hparams. +# If not defined, the default types and num_hidden_layers are used as fallback +# values. +# +# Examples of usage: +# "a/a/a/a/a/a": Original base transformer (6 encoder and decoder layers of +# multihead full attention) +# "a/a/a-moe/a": 4 layers with 1 moe at layer 3 +# "loc/red/loc/red": Alternate between local and memory compressed attention +# "a/a/a#": Encoder only model (3 layers) +# "#a/a/a": Decoder only model (3 layers) +# "a/a-moe#a/a/a": Encoder (2 layers with 1 moe), decoder (3 layers) +# Note that all combinaisons are not necessarily possibles (some attention +# types are not necessarily compatible with the encoder, or can't accept certain +# types of masking) + +SEP_ENCODEC = "#" +SEP_LAYER = "/" +SEP_FF = "-" + + +def partial(fct, *args, **kwargs): + """Wrapper around functools.partial for Python 2 compatibility with wraps.""" + new_fct = functools.partial(fct, *args, **kwargs) + new_fct = functools.wraps(fct)(new_fct) + return new_fct + + @registry.register_model class TransformerMoe(t2t_model.T2TModel): """Attention net. See file docstring.""" + @expert_utils.add_var_scope("transformer_moe") def model_fn_body_sharded(self, sharded_features): + + # ========= Prepare the input and target ========= + hparams = self._hparams dp = self._data_parallelism targets = sharded_features["targets"] @@ -50,10 +82,10 @@ def model_fn_body_sharded(self, sharded_features): inputs = dp(common_layers.flatten4d3d, inputs) targets = dp(common_layers.flatten4d3d, targets) - def preprocess(x): + def dp_preprocess(x): return dp(common_layers.layer_preprocess, x, hparams) - def postprocess(x, y): + def dp_postprocess(x, y): return dp(common_layers.layer_postprocess, x, y, hparams) (encoder_input, encoder_self_attention_bias, @@ -66,98 +98,299 @@ def postprocess(x, y): 1.0 - hparams.layer_prepostprocess_dropout) decoder_input = dp(tf.nn.dropout, decoder_input, 1.0 - hparams.layer_prepostprocess_dropout) - extra_loss = 0 + cache = dict(extra_loss=0) moe_hidden_sizes = [int(s) for s in hparams.moe_hidden_sizes.split(",")] expert_fn = expert_utils.ffn_expert_fn( hparams.hidden_size, moe_hidden_sizes, hparams.hidden_size) + + # ========= Define some utils decorators ========= + + def prepostprocess(fct): + """Add pre and post processing.""" + # WARNING: Should be applied after dp (pre/post-process use dp and + # can be applied to function which doesn't use dp) + @functools.wraps(fct) + def decorated(x, *args, **kwargs): + x = dp_preprocess(x) + y = fct(x, *args, **kwargs) + return dp_postprocess(x, y) + return decorated + + def dp_wrapper(fct): + """Encapsulate the function in a data parallelism object.""" + @functools.wraps(fct) + def decorated(*args, **kwargs): + return dp(fct, *args, **kwargs) + return decorated + + def add_kwargs( + fct, + enco_kwargs=None, + deco_kwargs=None, + endeco_kwargs=None, # Enco-deco attention: overwrite deco_kwargs + ): + """Allow to have different arguments for the encoder and decoder.""" + # WARNING: If this decorator is applied before dp_wrapper, the kwargs + # may not be correctly dipatched across the devices. + @functools.wraps(fct) + def decorated(*args, **kwargs): + current_scope = tf.contrib.framework.get_name_scope() + if "/encoder/" in current_scope: + kwargs.update(enco_kwargs or {}) + elif "/decoder/" in current_scope: + kwargs.update(deco_kwargs or {}) + if "/att_ende_" in current_scope: + kwargs.update(endeco_kwargs or {}) + return fct(*args, **kwargs) + return decorated + + def capture_extra_loss(fct, loss_coef=1.0): + """Capture the additional loss.""" + @functools.wraps(fct) + def decorated(*args, **kwargs): + y, loss = fct(*args, **kwargs) + cache["extra_loss"] += loss * loss_coef + return y + return decorated + + def remove_kwargs(fct, extra_params): + """Remove some unused parameters.""" + @functools.wraps(fct) + def decorated(*args, **kwargs): + for k in extra_params: # Remove the extra params + kwargs.pop(k, None) + return fct(*args, **kwargs) + return decorated + + # def pad_remover(fct): + # """Remove/restore the padding on the input.""" + # @functools.wraps(fct) + # def decorated(x, *args, **kwargs): + # x = pad_remover.remove(x) + # x = fct(x, *args, **kwargs) + # x = pad_remover.restore(x) + # return x + # return decorated + + # ========= Define the available layers ========= + total_key_depth = hparams.attention_key_channels or hparams.hidden_size + total_value_depth = hparams.attention_value_channels or hparams.hidden_size + + # Multi-head full attention layer + multihead_attention = partial( + common_attention.multihead_attention, + total_key_depth=total_key_depth, + total_value_depth=total_value_depth, + output_depth=hparams.hidden_size, + num_heads=hparams.num_heads, + dropout_rate=hparams.attention_dropout, + ) + multihead_attention = dp_wrapper(multihead_attention) + multihead_attention = add_kwargs( # After dp to correctly dispatch kwargs + multihead_attention, + enco_kwargs={"bias": encoder_self_attention_bias}, + deco_kwargs={"bias": decoder_self_attention_bias}, + endeco_kwargs={"bias": encoder_decoder_attention_bias}, + ) + multihead_attention = prepostprocess(multihead_attention) + + # Local attention layer + # Reuse same parameters as multihead_attention (dp and pre/post-processing + # already applied) + # Only works for self attention. Always mask the future. + local_attention = partial( + multihead_attention, + block_length=hparams.attention_loc_block_length, + attention_type="local_mask_right", + ) + + # Memory-compressed multihead self attention layer + # Only works for self attention. Always mask the future. + compressed_attention = partial( + common_attention.multihead_self_attention_reduced, + factor=hparams.attention_red_factor, + nonlinearity=hparams.attention_red_nonlinearity, + reduction_type=hparams.attention_red_type, + multihead_params=dict( + total_key_depth=total_key_depth, + total_value_depth=total_value_depth, + num_heads=hparams.num_heads, + dropout_rate=hparams.attention_dropout, + ) + ) + compressed_attention = remove_kwargs( + compressed_attention, ["memory_antecedent"]) + compressed_attention = dp_wrapper(compressed_attention) + compressed_attention = prepostprocess(compressed_attention) + + # Mixture of expert layer + distributed_moe = partial( + expert_utils.distributed_moe, + dp, + self._ps_devices, + train=hparams.mode == tf.estimator.ModeKeys.TRAIN, + input_size=hparams.hidden_size, + expert_fn=expert_fn, + num_experts=hparams.moe_num_experts, + k=hparams.moe_k, + loss_coef=hparams.moe_loss_coef + ) + distributed_moe = capture_extra_loss(distributed_moe) + distributed_moe = prepostprocess(distributed_moe) + + # FC layer + conv_hidden_relu = partial( + common_layers.conv_hidden_relu, + hidden_size=hparams.filter_size, + output_size=hparams.hidden_size, + dropout=hparams.relu_dropout, + ) + conv_hidden_relu = dp_wrapper(conv_hidden_relu) + conv_hidden_relu = prepostprocess(conv_hidden_relu) + + # Separable convolution layer + # Reuse conv_hidden_relu (dp and pre/post-processing already applied) + # Mask the future for the decoder only + sep_conv_relu = partial( + conv_hidden_relu, + # Parameters copied from the transformer model, could add hparams + kernel_size=(3, 1), + second_kernel_size=(31, 1), + ) + sep_conv_relu = add_kwargs( + sep_conv_relu, + enco_kwargs={"padding": "SAME"}, + deco_kwargs={"padding": "LEFT"}, # Mask future for decoder + ) + + # This dictionary contains the list of all available layers + available_layers = dict( + # Attention layers + a=multihead_attention, # Standard multihead full attention + loc=local_attention, # Local attention + red=compressed_attention, # Memory-compressed attention + mem=None, # Memory efficient + # Feed-forward layers + moe=distributed_moe, # Mixture of expert layer + sep=sep_conv_relu, # Separable convolution + fc=conv_hidden_relu, # Fully connected + ) + + def extract_layer_types(layer_types): + """Parse the layer string. + + Args: + layer_types (str): String containing the network architecture. See + top file comment for examples of format. + + Returns: + list[tuple[str, str]]: Encoder layers: list of (attention, feed-forward) + list[tuple[str, str, str]]: Decoder layers: list of (self-attention, + enc-dec attention, feed-forward) + """ + # If the architecture has not explicitly been set, we just construct a + # standard transformer with the fallback values + if not layer_types: + layer_types = SEP_LAYER.join( + [hparams.default_att] * hparams.num_hidden_layers) + + # If encoder not explicitly defined, the encoder will have the same + # structure as the decoder + layer_types = layer_types.split(SEP_ENCODEC) + if len(layer_types) == 1: + layer_types *= 2 + + # Some models don't need the encoder (ex: language modeling) + # TODO(epot): What are the other conditions (has_input ?) + if hparams.prepend_mode != "none": + layer_types[0] = "" + + # Extend the blocks and fill them with the default values if not specified + final_layers = ([], []) + for i, blocks_str in enumerate(layer_types): + for blocks_str in blocks_str.split(SEP_LAYER): + if not blocks_str: + continue + blocks_list = blocks_str.split(SEP_FF) + # Eventually use the fallback values for the layer_types. If the + # encoder is empty, do not use the enco-deco attention. + self_att = blocks_list[0] or hparams.default_att + ende_att = hparams.default_att if layer_types[0] else "_" + ff = hparams.default_ff + if len(blocks_list) > 1: + ff = blocks_list[-1] + if len(blocks_list) == 3: + ende_att = blocks_list[1] + if i == 0: # Encoder + blocks_tuple = (self_att, ff) + elif i == 1: # Decoder + blocks_tuple = (self_att, ende_att, ff) + final_layers[i].append(blocks_tuple) + + return final_layers + + # ========= Construct the transformer encoder and decoder ========= + + encoder_layers, decoder_layers = extract_layer_types(hparams.layer_types) + + # Display the encoder-decoder architecture + def print_layer(name, layers): + tf.logging.info("{} architecture:".format(name)) + for i, l in enumerate(layers): + tf.logging.info(" * Layer {}: {}".format(i, " - ".join(l))) + print_layer("Encoder", encoder_layers) + print_layer("Decoder", decoder_layers) + + encoder_outputs = [] + x = encoder_input - for layer in xrange(hparams.num_hidden_layers): - with tf.variable_scope("encoder_layer_%d" % layer): - with tf.variable_scope("encoder_self_attention"): - y = dp( - common_attention.multihead_attention, - preprocess(x), - None, - encoder_self_attention_bias, - hparams.attention_key_channels or hparams.hidden_size, - hparams.attention_value_channels or hparams.hidden_size, - hparams.hidden_size, - hparams.num_heads, - hparams.attention_dropout) - x = postprocess(x, y) - with tf.variable_scope("ffn"): - if str(layer) in hparams.moe_layers_encoder.split(","): - y, loss = expert_utils.distributed_moe( - dp, - self._ps_devices, - preprocess(x), - hparams.mode == tf.estimator.ModeKeys.TRAIN, - input_size=hparams.hidden_size, - expert_fn=expert_fn, - num_experts=hparams.moe_num_experts, - k=hparams.moe_k, - loss_coef=hparams.moe_loss_coef) - extra_loss += loss - else: - y = dp( - common_layers.conv_hidden_relu, - preprocess(x), - hparams.filter_size, - hparams.hidden_size, - dropout=hparams.relu_dropout) - x = postprocess(x, y) - encoder_output = preprocess(x) + with tf.variable_scope("encoder"): + for layer_num, block_types in enumerate(encoder_layers): + # Each encoder layers is composed of two blocks: + # * self-attention block + # * feed-forward block + att_type, ff_type = block_types + with tf.variable_scope("layer_{}".format(layer_num)): + with tf.variable_scope("att_{}".format(att_type)): + x = available_layers[att_type]( + x, + memory_antecedent=None, + ) + with tf.variable_scope("ff_{}".format(ff_type)): + x = available_layers[ff_type](x) + encoder_outputs.append(x) + if encoder_outputs: + encoder_outputs[-1] = dp_preprocess(x) + x = decoder_input - for layer in xrange(hparams.num_hidden_layers): - with tf.variable_scope("decoder_layer_%d" % layer): - with tf.variable_scope("decoder_self_attention"): - y = dp( - common_attention.multihead_attention, - preprocess(x), - None, - decoder_self_attention_bias, - hparams.attention_key_channels or hparams.hidden_size, - hparams.attention_value_channels or hparams.hidden_size, - hparams.hidden_size, - hparams.num_heads, - hparams.attention_dropout) - x = postprocess(x, y) - with tf.variable_scope("encoder_decoder_attention"): - y = dp( - common_attention.multihead_attention, - preprocess(x), - encoder_output, - encoder_decoder_attention_bias, - hparams.attention_key_channels or hparams.hidden_size, - hparams.attention_value_channels or hparams.hidden_size, - hparams.hidden_size, - hparams.num_heads, - hparams.attention_dropout) - x = postprocess(x, y) - with tf.variable_scope("ffn"): - if str(layer) in hparams.moe_layers_decoder.split(","): - y, loss = expert_utils.distributed_moe( - dp, - self._ps_devices, - preprocess(x), - hparams.mode == tf.estimator.ModeKeys.TRAIN, - input_size=hparams.hidden_size, - expert_fn=expert_fn, - num_experts=hparams.moe_num_experts, - k=hparams.moe_k, - loss_coef=hparams.moe_loss_coef) - extra_loss += loss - else: - y = dp( - common_layers.conv_hidden_relu, - preprocess(x), - hparams.filter_size, - hparams.hidden_size, - dropout=hparams.relu_dropout) - x = postprocess(x, y) - x = preprocess(x) + with tf.variable_scope("decoder"): + for layer_num, block_types in enumerate(decoder_layers): + # Each decoder layers is composed of three blocks: + # * self-attention block + # * enco-deco attention block (optional) + # * feed-forward block + self_att_type, att_ende_type, ff_type = block_types + with tf.variable_scope("layer_{}".format(layer_num)): + with tf.variable_scope("self_att_{}".format(self_att_type)): + x = available_layers[self_att_type]( + x, + memory_antecedent=None, + ) + with tf.variable_scope("att_ende_{}".format(att_ende_type)): + # Only add the enco-deco attention layer if there is an encoder + if encoder_outputs: + x = available_layers[att_ende_type]( + x, + memory_antecedent=encoder_outputs[-1], + ) + with tf.variable_scope("ff_{}".format(ff_type)): + x = available_layers[ff_type](x) + # If normalization is done in layer_preprocess, then it should also be + # done on the output, since the output can grow very large, being the sum + # of a whole stack of unnormalized layer outputs. + x = dp_preprocess(x) decoder_output = dp(tf.expand_dims, x, 2) - return decoder_output, extra_loss + return decoder_output, cache["extra_loss"] @registry.register_hparams @@ -185,6 +418,9 @@ def transformer_moe_base(): hparams.num_sampled_classes = 0 hparams.label_smoothing = 0.0 hparams.shared_embedding_and_softmax_weights = int(True) + # According to noam, ("n", "da") seems better for harder-to-learn models + hparams.layer_preprocess_sequence = "n" + hparams.layer_postprocess_sequence = "da" hparams.add_hparam("filter_size", 2048) # Add new ones like this. # attention-related flags @@ -192,8 +428,11 @@ def transformer_moe_base(): hparams.add_hparam("attention_key_channels", 0) hparams.add_hparam("attention_value_channels", 0) hparams.add_hparam("ffn_layer", "conv_hidden_relu") - hparams.add_hparam("parameter_attention_key_channels", 0) - hparams.add_hparam("parameter_attention_value_channels", 0) + # Other attention types params + hparams.add_hparam("attention_loc_block_length", 256) + hparams.add_hparam("attention_red_factor", 3) + hparams.add_hparam("attention_red_type", "conv") + hparams.add_hparam("attention_red_nonlinearity", "none") # All hyperparameters ending in "dropout" are automatically set to 0.0 # when not in training mode. hparams.add_hparam("attention_dropout", 0.0) @@ -201,28 +440,54 @@ def transformer_moe_base(): hparams.add_hparam("pos", "timing") # timing, none hparams.add_hparam("nbr_decoder_problems", 1) hparams.add_hparam("proximity_bias", int(False)) - # FLAGS RELATED TO MIXTURE-OF-EXPERTS - # comma-separated list of layer numbers. - # At each of these layers, we replace the ffn with a mixture of experts. - hparams.add_hparam("moe_layers_encoder", "2") - hparams.add_hparam("moe_layers_decoder", "2") + + # Decoder layers type. If set, num_decoder_layers parameter will be ignored + # and the number of decoder layer will be deduced from the string + # See top file comment for example of usage + hparams.add_hparam("layer_types", "") + # Default attention type (ex: a, loc, red,...) and feed-forward type (ex: fc, + # sep, moe,...) + hparams.add_hparam("default_att", "a") + hparams.add_hparam("default_ff", "fc") + return hparams @registry.register_hparams -def transformer_no_moe(): - """Without the mixture of experts (for comparison).""" +def transformer_moe_8k(): + """Hyper parameters specifics for long sequence generation.""" hparams = transformer_moe_base() - hparams.moe_layers_encoder = "" - hparams.moe_layers_decoder = "" + + hparams.batch_size = 8192 + hparams.max_length = 0 # max_length == batch_size + hparams.eval_drop_long_sequences = int(True) + hparams.min_length_bucket = 256 # Avoid cyclic problems for big batches + + hparams.default_ff = "sep" + hparams.hidden_size = 1024 + return hparams @registry.register_hparams -def transformer_moe_1b(): - """1-billion parameter model - requires multi-gpu sync training.""" - hparams = transformer_moe_base() - hparams.moe_n1 = 128 - hparams.moe_layers_encoder = "1,3" - hparams.moe_layers_decoder = "1,3" +def transformer_moe_12k(): + """Hyper parameters specifics for long sequence generation.""" + hparams = transformer_moe_8k() + hparams.batch_size = 12000 + # At 12k, the softmax become the memory bottleneck + hparams.factored_logit = int(True) return hparams + + +@registry.register_hparams +def transformer_moe_prepend_8k(): + """Model which formulate a seq2seq problem as language modeling.""" + hparams = transformer_moe_8k() + hparams.prepend_mode = "prepend_inputs_masked_attention", + hparams.eval_drop_long_sequences = int(False), + hparams.max_input_seq_length = 7500, + hparams.layer_types = "loc/red/loc-moe/red/loc" + hparams.moe_num_experts = 256 + return hparams + + From 5aedc3deda7b5e640f201874c38413822cb4daf3 Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Thu, 2 Nov 2017 13:29:25 -0700 Subject: [PATCH 0542/4095] Working train and eval on TPU for Transformer WMT ende PiperOrigin-RevId: 174372090 --- tensor2tensor/models/transformer.py | 152 ++++++++++++--------------- tensor2tensor/tpu/tpu_trainer.py | 29 +++-- tensor2tensor/tpu/tpu_trainer_lib.py | 68 ++++++------ 3 files changed, 122 insertions(+), 127 deletions(-) diff --git a/tensor2tensor/models/transformer.py b/tensor2tensor/models/transformer.py index 5571875dc..1d8603687 100644 --- a/tensor2tensor/models/transformer.py +++ b/tensor2tensor/models/transformer.py @@ -64,24 +64,21 @@ def encode(self, inputs, target_space, hparams): encoder_input, self_attention_bias, encoder_decoder_attention_bias = ( transformer_prepare_encoder(inputs, target_space, hparams)) - encoder_input = tf.nn.dropout( - encoder_input, 1.0 - hparams.layer_prepostprocess_dropout) + encoder_input = tf.nn.dropout(encoder_input, + 1.0 - hparams.layer_prepostprocess_dropout) - encoder_output = transformer_encoder( - encoder_input, - self_attention_bias, - hparams) + encoder_output = transformer_encoder(encoder_input, self_attention_bias, + hparams) return encoder_output, encoder_decoder_attention_bias - def decode( - self, - decoder_input, - encoder_output, - encoder_decoder_attention_bias, - decoder_self_attention_bias, - hparams, - cache=None): + def decode(self, + decoder_input, + encoder_output, + encoder_decoder_attention_bias, + decoder_self_attention_bias, + hparams, + cache=None): """Decode Transformer outputs from encoder representation. Args: @@ -129,11 +126,12 @@ def model_fn_body(self, features): """ hparams = self._hparams - inputs = features["inputs"] - - target_space = features["target_space_id"] - encoder_output, encoder_decoder_attention_bias = self.encode( - inputs, target_space, hparams) + inputs = features.get("inputs") + encoder_output, encoder_decoder_attention_bias = (None, None) + if inputs is not None: + target_space = features["target_space_id"] + encoder_output, encoder_decoder_attention_bias = self.encode( + inputs, target_space, hparams) targets = features["targets"] targets = common_layers.flatten4d3d(targets) @@ -141,15 +139,11 @@ def model_fn_body(self, features): decoder_input, decoder_self_attention_bias = transformer_prepare_decoder( targets, hparams) - return self.decode( - decoder_input, - encoder_output, - encoder_decoder_attention_bias, - decoder_self_attention_bias, - hparams) + return self.decode(decoder_input, encoder_output, + encoder_decoder_attention_bias, + decoder_self_attention_bias, hparams) - def _greedy_infer( - self, features, decode_length, last_position_only=True): + def _greedy_infer(self, features, decode_length, last_position_only=True): """Fast version of greedy decoding. Args: @@ -185,18 +179,16 @@ def _beam_decode(self, features, decode_length, beam_size, top_beams, Returns: samples: an integer `Tensor`. Top samples from the beam search """ - return self._fast_decode( - features, decode_length, last_position_only, beam_size, top_beams, - alpha) - - def _fast_decode( - self, - features, - decode_length, - last_position_only=True, - beam_size=1, - top_beams=1, - alpha=1.0): + return self._fast_decode(features, decode_length, last_position_only, + beam_size, top_beams, alpha) + + def _fast_decode(self, + features, + decode_length, + last_position_only=True, + beam_size=1, + top_beams=1, + alpha=1.0): """Fast decoding. Implements both greedy and beam search decoding, uses beam search iff @@ -277,12 +269,10 @@ def preprocess_targets(targets, i): # TODO(llion): Explain! Is this even needed? targets = tf.cond( - tf.equal(i, 0), - lambda: tf.zeros_like(targets), - lambda: targets) + tf.equal(i, 0), lambda: tf.zeros_like(targets), lambda: targets) if hparams.pos == "timing": - targets += timing_signal[:, i:i+1] + targets += timing_signal[:, i:i + 1] return targets decoder_self_attention_bias = ( @@ -297,17 +287,12 @@ def symbols_to_logits_fn(ids, i, cache): targets = tf.expand_dims(tf.expand_dims(ids, axis=2), axis=3) targets = preprocess_targets(targets, i) - bias = decoder_self_attention_bias[:, :, i:i+1, :i+1] + bias = decoder_self_attention_bias[:, :, i:i + 1, :i + 1] with tf.variable_scope("body"): - body_outputs = dp( - self.decode, - targets, - cache["encoder_output"], - cache["encoder_decoder_attention_bias"], - bias, - hparams, - cache) + body_outputs = dp(self.decode, targets, cache["encoder_output"], + cache["encoder_decoder_attention_bias"], bias, + hparams, cache) with tf.variable_scope(target_modality.name): logits = target_modality.top_sharded(body_outputs, None, dp)[0] @@ -322,7 +307,8 @@ def symbols_to_logits_fn(ids, i, cache): "layer_%d" % layer: { "k": tf.zeros([batch_size, 0, key_channels]), "v": tf.zeros([batch_size, 0, value_channels]), - } for layer in range(num_layers) + } + for layer in range(num_layers) } # Set 2nd dim to None since it's not invariant in the tf.while_loop @@ -342,19 +328,25 @@ def symbols_to_logits_fn(ids, i, cache): vocab_size = target_modality.top_dimensionality initial_ids = tf.zeros([batch_size], dtype=tf.int32) decoded_ids, _ = beam_search.beam_search( - symbols_to_logits_fn, initial_ids, beam_size, decode_length, - vocab_size, alpha, states=cache) + symbols_to_logits_fn, + initial_ids, + beam_size, + decode_length, + vocab_size, + alpha, + states=cache) if top_beams == 1: decoded_ids = decoded_ids[:, 0, 1:] else: decoded_ids = decoded_ids[:, :top_beams, 1:] else: # Greedy + def inner_loop(i, next_id, decoded_ids, cache): logits, cache = symbols_to_logits_fn(next_id, i, cache) next_id = tf.expand_dims(tf.argmax(logits, axis=-1), axis=1) decoded_ids = tf.concat([decoded_ids, next_id], axis=1) - return i+1, next_id, decoded_ids, cache + return i + 1, next_id, decoded_ids, cache decoded_ids = tf.zeros([batch_size, 0], dtype=tf.int64) next_id = tf.zeros([batch_size, 1], dtype=tf.int64) @@ -384,8 +376,8 @@ def model_fn_body(self, features): inputs = common_layers.flatten4d3d(inputs) - (encoder_input, encoder_self_attention_bias, - _) = (transformer_prepare_encoder(inputs, target_space, hparams)) + (encoder_input, encoder_self_attention_bias, _) = ( + transformer_prepare_encoder(inputs, target_space, hparams)) encoder_input = tf.nn.dropout(encoder_input, 1.0 - hparams.layer_prepostprocess_dropout) @@ -396,29 +388,6 @@ def model_fn_body(self, features): return encoder_output -@registry.register_model -class TransformerDecoder(t2t_model.T2TModel): - """Transformer, decoder only.""" - - def model_fn_body(self, features): - hparams = self._hparams - targets = features["targets"] - - targets = common_layers.flatten4d3d(targets) - - (decoder_input, decoder_self_attention_bias) = transformer_prepare_decoder( - targets, hparams) - - decoder_input = tf.nn.dropout(decoder_input, - 1.0 - hparams.layer_prepostprocess_dropout) - - decoder_output = transformer_decoder( - decoder_input, None, decoder_self_attention_bias, None, hparams) - decoder_output = tf.expand_dims(decoder_output, 2) - - return decoder_output - - def transformer_prepare_encoder(inputs, target_space, hparams): """Prepare one shard of the model for the encoder. @@ -574,9 +543,8 @@ def transformer_decoder(decoder_input, with tf.variable_scope("encdec_attention"): # TODO(llion): Add caching. y = common_attention.multihead_attention( - common_layers.layer_preprocess(x, hparams), - encoder_output, - encoder_decoder_attention_bias, + common_layers.layer_preprocess( + x, hparams), encoder_output, encoder_decoder_attention_bias, hparams.attention_key_channels or hparams.hidden_size, hparams.attention_value_channels or hparams.hidden_size, hparams.hidden_size, @@ -1057,3 +1025,19 @@ def transformer_relative_big(): hparams.self_attention_type = "dot_product_relative" hparams.max_relative_position = 20 return hparams + + +@registry.register_hparams +def transformer_tpu(): + """HParams for Transformer model on TPU.""" + hparams = transformer_base() + hparams.use_pad_remover = int(False) # where op not supported + hparams.optimizer = "TrueAdam" + hparams.learning_rate = 0.2 + + # Inputs + # Each example in the batch will be of (padded) length hparams.max_length + hparams.max_length = 64 + hparams.tpu_batch_size_per_shard = 16 + + return hparams diff --git a/tensor2tensor/tpu/tpu_trainer.py b/tensor2tensor/tpu/tpu_trainer.py index 8cda597d4..d9b20ee75 100644 --- a/tensor2tensor/tpu/tpu_trainer.py +++ b/tensor2tensor/tpu/tpu_trainer.py @@ -23,7 +23,7 @@ # Dependency imports from tensor2tensor import models # pylint: disable=unused-import -from tensor2tensor.data_generators import all_problems # pylint: disable=unused-import +from tensor2tensor import problems # pylint: disable=unused-import from tensor2tensor.tpu import tpu_trainer_lib as lib from tensor2tensor.utils import trainer_utils @@ -35,7 +35,7 @@ flags.DEFINE_integer("tpu_num_shards", 8, "Number of tpu shards.") flags.DEFINE_string("output_dir", "", "Base output directory for run.") flags.DEFINE_string("master", "", "Address of TensorFlow master.") -flags.DEFINE_integer("eval_steps", 10, "Number of steps in evaluation.") +flags.DEFINE_integer("eval_steps", 200, "Number of steps in evaluation.") flags.DEFINE_integer("iterations_per_loop", 1000, "Number of iterations in a TPU training loop.") @@ -63,14 +63,29 @@ def main(unused_argv): batch_size=hparams.tpu_batch_size_per_shard * FLAGS.tpu_num_shards, log_device_placement=FLAGS.log_device_placement, iterations_per_loop=FLAGS.iterations_per_loop) - if FLAGS.train_steps: - estimator.train( - lambda params: input_fn(tf.estimator.ModeKeys.TRAIN, params), - steps=FLAGS.train_steps) - if FLAGS.eval_steps: + + if not FLAGS.train_steps: + assert FLAGS.eval_steps estimator.evaluate( lambda params: input_fn(tf.estimator.ModeKeys.EVAL, params), steps=FLAGS.eval_steps) + return + + num_rounds = FLAGS.train_steps // FLAGS.local_eval_frequency + steps_per_round = [FLAGS.local_eval_frequency] * num_rounds + remainder = FLAGS.train_steps % FLAGS.local_eval_frequency + if remainder: + steps_per_round.append(remainder) + + for num_steps in steps_per_round: + estimator.train( + lambda params: input_fn(tf.estimator.ModeKeys.TRAIN, params), + steps=num_steps) + if FLAGS.eval_steps: + estimator.evaluate( + lambda params: input_fn(tf.estimator.ModeKeys.EVAL, params), + steps=FLAGS.eval_steps) + tf.logging.info("Training and evaluation complete.") if __name__ == "__main__": diff --git a/tensor2tensor/tpu/tpu_trainer_lib.py b/tensor2tensor/tpu/tpu_trainer_lib.py index dca9f4de9..7263d9299 100644 --- a/tensor2tensor/tpu/tpu_trainer_lib.py +++ b/tensor2tensor/tpu/tpu_trainer_lib.py @@ -24,12 +24,10 @@ from __future__ import print_function import copy -import math # Dependency imports from tensor2tensor.layers import common_layers -from tensor2tensor.models import transformer from tensor2tensor.utils import data_reader from tensor2tensor.utils import metrics from tensor2tensor.utils import model_builder @@ -39,6 +37,17 @@ from tensorflow.python.util import nest +def create_dummy_vars(): + """Dummy vars for restore to work when not using TPU codepath.""" + with tf.variable_scope("losses_avg"): + with tf.variable_scope("problem_0"): + for var_name in ["total", "extra", "training"]: + tf.get_variable( + "%s_loss" % var_name, initializer=100.0, trainable=False) + with tf.variable_scope("train_stats"): + tf.get_variable("problem_0_steps", initializer=0, trainable=False) + + def get_input_fn(data_dir, problem, hparams): """Get basic T2T input fn.""" @@ -88,8 +97,6 @@ def _valid_size(example): example, batching_scheme["min_length"], batching_scheme["max_length"]) dataset = dataset.filter(_valid_size) - if is_training: - dataset = dataset.shuffle(100) # TODO(rsepassi): In eval mode, should not repeat dataset = dataset.repeat(None) dataset = data_reader.padded_batch(dataset, batch_size, @@ -155,6 +162,9 @@ def get_model_fn(model, hp, use_tpu=True): def model_fn(features, labels, mode, params, config): """Model fn.""" del params + del config + create_dummy_vars() + hparams = copy.deepcopy(hp) problem_hp = hparams.problems[0] orig_features = features @@ -168,9 +178,12 @@ def model_fn(features, labels, mode, params, config): # Transform features transformed_features = {} if input_modality is not None: - transformed_features["inputs"] = input_modality.bottom(features["inputs"]) - transformed_features["targets"] = target_modality.targets_bottom( - features["targets"]) + with tf.variable_scope(input_modality.name): + transformed_features["inputs"] = input_modality.bottom( + features["inputs"]) + with tf.variable_scope(target_modality.name): + transformed_features["targets"] = target_modality.targets_bottom( + features["targets"]) transformed_features["problem_choice"] = tf.constant(0) transformed_features["input_space_id"] = tf.constant( problem_hp.input_space_id) @@ -178,17 +191,19 @@ def model_fn(features, labels, mode, params, config): problem_hp.target_space_id) # Model construction - outputs = model_class.model_fn_body(transformed_features) - logits = target_modality.top(outputs, labels) + with tf.variable_scope("body"): + outputs = model_class.model_fn_body(transformed_features) + with tf.variable_scope(target_modality.name): + logits = target_modality.top(outputs, labels) - # Ensure the length is known statically - shape = [None] * logits.get_shape().ndims - shape[1] = hparams.max_length - logits.set_shape(logits.get_shape().merge_with(shape)) + # Ensure the length is known statically + shape = [None] * logits.get_shape().ndims + shape[1] = hparams.max_length + logits.set_shape(logits.get_shape().merge_with(shape)) - # Loss - loss_num, loss_den = target_modality.loss(logits, labels) - loss = loss_num / tf.maximum(1.0, loss_den) + # Loss + loss_num, loss_den = target_modality.loss(logits, labels) + loss = loss_num / tf.maximum(1.0, loss_den) if mode == tf.estimator.ModeKeys.EVAL: problem = hp.problem_instances[0] @@ -202,10 +217,7 @@ def model_fn(features, labels, mode, params, config): assert mode == tf.estimator.ModeKeys.TRAIN # Learning rate - num_shards = config.tpu_config.num_shards - lr = hparams.learning_rate * model_builder.learning_rate_decay( - hparams, num_worker_replicas=num_shards) - lr /= math.sqrt(float(num_shards)) + lr = hparams.learning_rate * model_builder.learning_rate_decay(hparams) # Optimizer opt = model_builder.ConditionalOptimizer(hparams.optimizer, lr, hparams) @@ -313,19 +325,3 @@ def make_estimator(model_fn, config=run_config, train_batch_size=batch_size, eval_batch_size=batch_size * 2) - - -@registry.register_hparams -def transformer_tpu(): - """HParams for Transformer model on TPU.""" - hp = transformer.transformer_base() - hp.use_pad_remover = int(False) # where op not supported - hp.optimizer = "TrueAdam" - hp.learning_rate = 0.4 - - # Inputs - # Each example in the batch will be of (padded) length hp.max_length - hp.max_length = 64 - hp.tpu_batch_size_per_shard = 20 - - return hp From 8fa33f6e541805790c0e02941e692b5e957b37ae Mon Sep 17 00:00:00 2001 From: Katherine Lee <katherinelee@google.com> Date: Thu, 2 Nov 2017 14:44:07 -0700 Subject: [PATCH 0543/4095] Add Gaussian label smoothing. PiperOrigin-RevId: 174383193 --- tensor2tensor/layers/common_layers.py | 41 +++++++++++++++++++++------ 1 file changed, 33 insertions(+), 8 deletions(-) diff --git a/tensor2tensor/layers/common_layers.py b/tensor2tensor/layers/common_layers.py index ce68a9fe1..7089529c8 100644 --- a/tensor2tensor/layers/common_layers.py +++ b/tensor2tensor/layers/common_layers.py @@ -1477,8 +1477,22 @@ def padded_cross_entropy(logits, return tf.reduce_sum(xent * weights), tf.reduce_sum(weights) -def smoothing_cross_entropy(logits, labels, vocab_size, confidence): - """Cross entropy with label smoothing to limit over-confidence.""" +def smoothing_cross_entropy(logits, labels, vocab_size, confidence, + gaussian=False): + """Cross entropy with label smoothing to limit over-confidence. + + Args: + logits: Tensor of size [batch_size, ?, ?, ?, vocab_size] + labels: Tensor of size [batch_size, ?, ?, ?] + vocab_size: Tensor representing the size of the vocabulary. + confidence: Used to determine on and off values for label smoothing. + If `gaussian` is true, `confidence` is the variance to the gaussian + distribution. + gaussian: Uses a gaussian distribution for label smoothing + + Returns: + + """ with tf.name_scope("smoothing_cross_entropy", [logits, labels]): # Low confidence is given to all non-true labels, uniformly. low_confidence = (1.0 - confidence) / tf.to_float(vocab_size - 1) @@ -1486,12 +1500,23 @@ def smoothing_cross_entropy(logits, labels, vocab_size, confidence): # We subtract it just for readability, makes no difference on learning. normalizing = -(confidence * tf.log(confidence) + tf.to_float( vocab_size - 1) * low_confidence * tf.log(low_confidence + 1e-20)) - # Soft targets. - soft_targets = tf.one_hot( - tf.cast(labels, tf.int32), - depth=vocab_size, - on_value=confidence, - off_value=low_confidence) + + if gaussian: + labels = tf.cast(labels, tf.float32) + + normal_dist = tf.distributions.Normal(loc=labels, scale=confidence) + # Locations to evaluate the probability distributions. + soft_targets = normal_dist.prob(tf.cast(tf.range(vocab_size), tf.float32) + [:, None, None, None, None]) + # Reordering soft_targets from [vocab_size, batch_size, ?, ?, ?] to match + # logits: [batch_size, ?, ?, ?, vocab_size] + soft_targets = tf.transpose(soft_targets, perm=[1, 2, 3, 4, 0]) + else: + soft_targets = tf.one_hot( + tf.cast(labels, tf.int32), + depth=vocab_size, + on_value=confidence, + off_value=low_confidence) xentropy = tf.nn.softmax_cross_entropy_with_logits( logits=logits, labels=soft_targets) return xentropy - normalizing From 9afc19035bc8a31208967909ba46ba8e3042fca9 Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Thu, 2 Nov 2017 16:23:57 -0700 Subject: [PATCH 0544/4095] Use problem.dataset in the TPU input pipeline PiperOrigin-RevId: 174397407 --- tensor2tensor/tpu/tpu_trainer_lib.py | 52 +++++++++------------------- 1 file changed, 17 insertions(+), 35 deletions(-) diff --git a/tensor2tensor/tpu/tpu_trainer_lib.py b/tensor2tensor/tpu/tpu_trainer_lib.py index 7263d9299..85a3cdf42 100644 --- a/tensor2tensor/tpu/tpu_trainer_lib.py +++ b/tensor2tensor/tpu/tpu_trainer_lib.py @@ -69,44 +69,11 @@ def input_fn(mode, params): }, } - def decode_record(record): - """Serialized Example to dict of <feature name, Tensor>.""" - data_fields, _ = problem.example_reading_spec() - decoded = tf.parse_single_example(record, features=data_fields) - decoded["inputs"] = decoded["inputs"].values - decoded["targets"] = decoded["targets"].values - return decoded - - data_files = tf.contrib.slim.parallel_reader.get_data_files( - problem.filepattern(data_dir, mode)) - dataset = tf.data.TFRecordDataset(data_files) - dataset = dataset.map(decode_record, num_parallel_calls=num_threads) - - def _preprocess(example, problem, hparams, mode): - example = problem.preprocess_example(example, mode, hparams) - # We do not want int64s as they are not supported on TPUs. - example = data_reader.cast_int64_to_int32(example) - return example - - dataset = dataset.map( - lambda ex: _preprocess(ex, problem, hparams, mode), - num_parallel_calls=num_threads) - def _valid_size(example): return data_reader.example_valid_size( example, batching_scheme["min_length"], batching_scheme["max_length"]) - dataset = dataset.filter(_valid_size) - # TODO(rsepassi): In eval mode, should not repeat - dataset = dataset.repeat(None) - dataset = data_reader.padded_batch(dataset, batch_size, - batching_scheme["padded_shapes"]) - - if not is_training: - dataset = dataset.map( - lambda f: pad_batch(f, batch_size), num_parallel_calls=num_threads) - - def shape_def(example): + def define_shapes(example): """Set the right shapes for the features.""" inputs = example["inputs"] targets = example["targets"] @@ -130,7 +97,22 @@ def shape_def(example): return example - dataset = dataset.map(shape_def, num_parallel_calls=num_threads) + dataset = problem.dataset( + mode=mode, data_dir=data_dir, num_threads=num_threads, hparams=hparams) + dataset = dataset.map( + data_reader.cast_int64_to_int32, num_threads=num_threads) + dataset = dataset.filter(_valid_size) + if is_training: + dataset = dataset.shuffle(100) + # TODO(rsepassi): In eval mode, should not repeat. Do so because TPU seems + # to crash if it runs out of data during eval. + dataset = dataset.repeat(None) + dataset = data_reader.padded_batch(dataset, batch_size, + batching_scheme["padded_shapes"]) + if not is_training: + dataset = dataset.map( + lambda f: pad_batch(f, batch_size), num_parallel_calls=num_threads) + dataset = dataset.map(define_shapes, num_parallel_calls=num_threads) dataset = dataset.prefetch(1) features = dataset.make_one_shot_iterator().get_next() From c022afdf1de74e27a4482a4fe00754f9da9a5da0 Mon Sep 17 00:00:00 2001 From: Lukasz Kaiser <lukaszkaiser@google.com> Date: Thu, 2 Nov 2017 17:12:30 -0700 Subject: [PATCH 0545/4095] Work on generators: improve EnCs, add large EnFr and OCR test; LSTM corrections. PiperOrigin-RevId: 174403513 --- .../data_generators/generator_utils.py | 7 +- tensor2tensor/data_generators/image.py | 56 ++++ .../data_generators/translate_enfr.py | 106 +++++--- .../data_generators/translate_enzh.py | 41 +-- tensor2tensor/models/lstm.py | 250 ++++++------------ tensor2tensor/models/lstm_test.py | 3 +- 6 files changed, 242 insertions(+), 221 deletions(-) diff --git a/tensor2tensor/data_generators/generator_utils.py b/tensor2tensor/data_generators/generator_utils.py index 835d049f8..833717432 100644 --- a/tensor2tensor/data_generators/generator_utils.py +++ b/tensor2tensor/data_generators/generator_utils.py @@ -21,9 +21,9 @@ from collections import defaultdict import gzip -import io import os import random +import stat import tarfile # Dependency imports @@ -258,8 +258,11 @@ def gunzip_file(gz_path, new_path): tf.logging.info("File %s already exists, skipping unpacking" % new_path) return tf.logging.info("Unpacking %s to %s" % (gz_path, new_path)) + # We may be unpacking into a newly created directory, add write mode. + mode = stat.S_IRWXU or stat.S_IXGRP or stat.S_IRGRP or stat.S_IROTH + os.chmod(os.path.dirname(new_path), mode) with gzip.open(gz_path, "rb") as gz_file: - with io.open(new_path, "wb") as new_file: + with tf.gfile.GFile(new_path, mode="wb") as new_file: for line in gz_file: new_file.write(line) diff --git a/tensor2tensor/data_generators/image.py b/tensor2tensor/data_generators/image.py index e9ae45f01..0c3988bc5 100644 --- a/tensor2tensor/data_generators/image.py +++ b/tensor2tensor/data_generators/image.py @@ -24,6 +24,7 @@ import json import os import random +import struct import tarfile import zipfile @@ -925,3 +926,58 @@ class ImageMsCocoTokens32k(ImageMsCocoTokens8k): @property def targeted_vocab_size(self): return 2**15 # 32768 + + +@registry.register_problem +class OcrTest(Image2TextProblem): + """OCR test problem.""" + + @property + def is_small(self): + return True + + @property + def is_character_level(self): + return True + + @property + def target_space_id(self): + return problem.SpaceID.EN_CHR + + @property + def train_shards(self): + return 1 + + @property + def dev_shards(self): + return 1 + + def preprocess_example(self, example, mode, _): + # Resize from usual size ~1350x60 to 90x4 in this test. + img = example["inputs"] + example["inputs"] = tf.to_int64( + tf.image.resize_images(img, [90, 4], tf.image.ResizeMethod.AREA)) + return example + + def generator(self, data_dir, tmp_dir, is_training): + # In this test problem, we assume that the data is in tmp_dir/ocr/ in + # files names 0.png, 0.txt, 1.png, 1.txt and so on until num_examples. + num_examples = 2 + ocr_dir = os.path.join(tmp_dir, "ocr/") + tf.logging.info("Looking for OCR data in %s." % ocr_dir) + for i in xrange(num_examples): + image_filepath = os.path.join(ocr_dir, "%d.png" % i) + text_filepath = os.path.join(ocr_dir, "%d.txt" % i) + with tf.gfile.Open(text_filepath, "rb") as f: + label = f.read() + with tf.gfile.Open(image_filepath, "rb") as f: + encoded_image_data = f.read() + # In PNG files width and height are stored in these bytes. + width, height = struct.unpack(">ii", encoded_image_data[16:24]) + yield { + "image/encoded": [encoded_image_data], + "image/format": ["png"], + "image/class/label": label.strip(), + "image/height": [height], + "image/width": [width] + } diff --git a/tensor2tensor/data_generators/translate_enfr.py b/tensor2tensor/data_generators/translate_enfr.py index 152d3d963..8076d4792 100644 --- a/tensor2tensor/data_generators/translate_enfr.py +++ b/tensor2tensor/data_generators/translate_enfr.py @@ -34,50 +34,54 @@ # End-of-sentence marker. EOS = text_encoder.EOS_ID -_ENFR_TRAIN_DATASETS = [ +_ENFR_TRAIN_SMALL_DATA = [ [ "https://s3.amazonaws.com/opennmt-trainingdata/baseline-1M-enfr.tgz", ("baseline-1M-enfr/baseline-1M_train.en", "baseline-1M-enfr/baseline-1M_train.fr") ], - # [ - # "http://www.statmt.org/wmt13/training-parallel-commoncrawl.tgz", - # ("commoncrawl.fr-en.en", "commoncrawl.fr-en.fr") - # ], - # [ - # "http://www.statmt.org/wmt13/training-parallel-europarl-v7.tgz", - # ("training/europarl-v7.fr-en.en", "training/europarl-v7.fr-en.fr") - # ], - # [ - # "http://www.statmt.org/wmt14/training-parallel-nc-v9.tgz", - # ("training/news-commentary-v9.fr-en.en", - # "training/news-commentary-v9.fr-en.fr") - # ], - # [ - # "http://www.statmt.org/wmt10/training-giga-fren.tar", - # ("giga-fren.release2.fixed.en.gz", - # "giga-fren.release2.fixed.fr.gz") - # ], - # [ - # "http://www.statmt.org/wmt13/training-parallel-un.tgz", - # ("un/undoc.2000.fr-en.en", "un/undoc.2000.fr-en.fr") - # ], ] -_ENFR_TEST_DATASETS = [ +_ENFR_TEST_SMALL_DATA = [ [ "https://s3.amazonaws.com/opennmt-trainingdata/baseline-1M-enfr.tgz", ("baseline-1M-enfr/baseline-1M_valid.en", "baseline-1M-enfr/baseline-1M_valid.fr") ], - # [ - # "http://data.statmt.org/wmt17/translation-task/dev.tgz", - # ("dev/newstest2013.en", "dev/newstest2013.fr") - # ], +] +_ENFR_TRAIN_LARGE_DATA = [ + [ + "http://www.statmt.org/wmt13/training-parallel-commoncrawl.tgz", + ("commoncrawl.fr-en.en", "commoncrawl.fr-en.fr") + ], + [ + "http://www.statmt.org/wmt13/training-parallel-europarl-v7.tgz", + ("training/europarl-v7.fr-en.en", "training/europarl-v7.fr-en.fr") + ], + [ + "http://www.statmt.org/wmt14/training-parallel-nc-v9.tgz", + ("training/news-commentary-v9.fr-en.en", + "training/news-commentary-v9.fr-en.fr") + ], + [ + "http://www.statmt.org/wmt10/training-giga-fren.tar", + ("giga-fren.release2.fixed.en.gz", + "giga-fren.release2.fixed.fr.gz") + ], + [ + "http://www.statmt.org/wmt13/training-parallel-un.tgz", + ("un/undoc.2000.fr-en.en", "un/undoc.2000.fr-en.fr") + ], +] +_ENFR_TEST_LARGE_DATA = [ + [ + "http://data.statmt.org/wmt17/translation-task/dev.tgz", + ("dev/newstest2013.en", "dev/newstest2013.fr") + ], ] @registry.register_problem -class TranslateEnfrWmt8k(translate.TranslateProblem): +class TranslateEnfrWmtSmall8k(translate.TranslateProblem): """Problem spec for WMT En-Fr translation.""" @property @@ -88,11 +92,18 @@ def targeted_vocab_size(self): def vocab_name(self): return "vocab.enfr" + @property + def use_small_dataset(self): + return True + def generator(self, data_dir, tmp_dir, train): symbolizer_vocab = generator_utils.get_or_generate_vocab( data_dir, tmp_dir, self.vocab_file, self.targeted_vocab_size, - _ENFR_TRAIN_DATASETS) - datasets = _ENFR_TRAIN_DATASETS if train else _ENFR_TEST_DATASETS + _ENFR_TRAIN_SMALL_DATA) + if self.use_small_dataset: + datasets = _ENFR_TRAIN_SMALL_DATA if train else _ENFR_TEST_SMALL_DATA + else: + datasets = _ENFR_TRAIN_LARGE_DATA if train else _ENFR_TEST_LARGE_DATA tag = "train" if train else "dev" data_path = translate.compile_data(tmp_dir, datasets, "wmt_enfr_tok_%s" % tag) @@ -109,7 +120,7 @@ def target_space_id(self): @registry.register_problem -class TranslateEnfrWmt32k(TranslateEnfrWmt8k): +class TranslateEnfrWmtSmall32k(TranslateEnfrWmtSmall8k): @property def targeted_vocab_size(self): @@ -117,7 +128,23 @@ def targeted_vocab_size(self): @registry.register_problem -class TranslateEnfrWmtCharacters(translate.TranslateProblem): +class TranslateEnfrWmt8k(TranslateEnfrWmtSmall8k): + + @property + def use_small_dataset(self): + return False + + +@registry.register_problem +class TranslateEnfrWmt32k(TranslateEnfrWmtSmall32k): + + @property + def use_small_dataset(self): + return False + + +@registry.register_problem +class TranslateEnfrWmtSmallCharacters(translate.TranslateProblem): """Problem spec for WMT En-Fr translation.""" @property @@ -130,7 +157,10 @@ def vocab_name(self): def generator(self, data_dir, tmp_dir, train): character_vocab = text_encoder.ByteTextEncoder() - datasets = _ENFR_TRAIN_DATASETS if train else _ENFR_TEST_DATASETS + if self.use_small_dataset: + datasets = _ENFR_TRAIN_SMALL_DATA if train else _ENFR_TEST_SMALL_DATA + else: + datasets = _ENFR_TRAIN_LARGE_DATA if train else _ENFR_TEST_LARGE_DATA tag = "train" if train else "dev" data_path = translate.compile_data(tmp_dir, datasets, "wmt_enfr_chr_%s" % tag) @@ -144,3 +174,11 @@ def input_space_id(self): @property def target_space_id(self): return problem.SpaceID.FR_CHR + + +@registry.register_problem +class TranslateEnfrWmtCharacters(TranslateEnfrWmtSmallCharacters): + + @property + def use_small_dataset(self): + return False diff --git a/tensor2tensor/data_generators/translate_enzh.py b/tensor2tensor/data_generators/translate_enzh.py index 7c77a05fc..0ee3bfd08 100644 --- a/tensor2tensor/data_generators/translate_enzh.py +++ b/tensor2tensor/data_generators/translate_enzh.py @@ -36,20 +36,26 @@ # End-of-sentence marker. EOS = text_encoder.EOS_ID -_ZHEN_TRAIN_DATASETS = [[("http://data.statmt.org/wmt17/translation-task/" +# End-of-sentence marker. +EOS = text_encoder.EOS_ID + +# This is far from being the real WMT17 task - only toyset here +# you need to register to get UN data and CWT data. Also, by convention, +# this is EN to ZH - use translate_enzh_wmt8k_rev for ZH to EN task +_ENZH_TRAIN_DATASETS = [[("http://data.statmt.org/wmt17/translation-task/" "training-parallel-nc-v12.tgz"), - ("training/news-commentary-v12.zh-en.zh", - "training/news-commentary-v12.zh-en.en")]] + ("training/news-commentary-v12.zh-en.en", + "training/news-commentary-v12.zh-en.zh")]] -_ZHEN_TEST_DATASETS = [[ +_ENZH_TEST_DATASETS = [[ "http://data.statmt.org/wmt17/translation-task/dev.tgz", - ("dev/newsdev2017-zhen-src.zh.sgm", "dev/newsdev2017-zhen-ref.en.sgm") + ("dev/newsdev2017-zhen-src.en.sgm", "dev/newsdev2017-zhen-ref.zh.sgm") ]] @registry.register_problem class TranslateEnzhWmt8k(translate.TranslateProblem): - """Problem spec for WMT Zh-En translation.""" + """Problem spec for WMT En-Zh translation.""" @property def targeted_vocab_size(self): @@ -61,16 +67,16 @@ def num_shards(self): @property def source_vocab_name(self): - return "vocab.zhen-zh.%d" % self.targeted_vocab_size + return "vocab.enzh-en.%d" % self.targeted_vocab_size @property def target_vocab_name(self): - return "vocab.zhen-en.%d" % self.targeted_vocab_size + return "vocab.enzh-zh.%d" % self.targeted_vocab_size def generator(self, data_dir, tmp_dir, train): - datasets = _ZHEN_TRAIN_DATASETS if train else _ZHEN_TEST_DATASETS - source_datasets = [[item[0], [item[1][0]]] for item in _ZHEN_TRAIN_DATASETS] - target_datasets = [[item[0], [item[1][1]]] for item in _ZHEN_TRAIN_DATASETS] + datasets = _ENZH_TRAIN_DATASETS if train else _ENZH_TEST_DATASETS + source_datasets = [[item[0], [item[1][0]]] for item in _ENZH_TRAIN_DATASETS] + target_datasets = [[item[0], [item[1][1]]] for item in _ENZH_TRAIN_DATASETS] source_vocab = generator_utils.get_or_generate_vocab( data_dir, tmp_dir, self.source_vocab_name, self.targeted_vocab_size, source_datasets) @@ -79,21 +85,18 @@ def generator(self, data_dir, tmp_dir, train): target_datasets) tag = "train" if train else "dev" data_path = translate.compile_data(tmp_dir, datasets, - "wmt_zhen_tok_%s" % tag) - # We generate English->X data by convention, to train reverse translation - # just add the "_rev" suffix to the problem name, e.g., like this. - # --problems=translate_enzh_wmt8k_rev - return translate.bi_vocabs_token_generator(data_path + ".lang2", - data_path + ".lang1", + "wmt_enzh_tok_%s" % tag) + return translate.bi_vocabs_token_generator(data_path + ".lang1", + data_path + ".lang2", source_vocab, target_vocab, EOS) @property def input_space_id(self): - return problem.SpaceID.ZH_TOK + return problem.SpaceID.EN_TOK @property def target_space_id(self): - return problem.SpaceID.EN_TOK + return problem.SpaceID.ZH_TOK def feature_encoders(self, data_dir): source_vocab_filename = os.path.join(data_dir, self.source_vocab_name) diff --git a/tensor2tensor/models/lstm.py b/tensor2tensor/models/lstm.py index 0ae1ad294..c3e378359 100644 --- a/tensor2tensor/models/lstm.py +++ b/tensor2tensor/models/lstm.py @@ -19,8 +19,6 @@ from __future__ import division from __future__ import print_function -import collections - # Dependency imports from tensor2tensor.layers import common_hparams @@ -29,145 +27,6 @@ from tensor2tensor.utils import t2t_model import tensorflow as tf -from tensorflow.python.util import nest - -# Track Tuple of state and attention values -AttentionTuple = collections.namedtuple("AttentionTuple", ("state", - "attention")) - - -class ExternalAttentionCellWrapper(tf.contrib.rnn.RNNCell): - """Wrapper for external attention states for an encoder-decoder setup.""" - - def __init__(self, - cell, - attn_states, - attn_vec_size=None, - input_size=None, - state_is_tuple=True, - reuse=None): - """Create a cell with attention. - - Args: - cell: an RNNCell, an attention is added to it. - attn_states: External attention states typically the encoder output in the - form [batch_size, time steps, hidden size] - attn_vec_size: integer, the number of convolutional features calculated - on attention state and a size of the hidden layer built from - base cell state. Equal attn_size to by default. - input_size: integer, the size of a hidden linear layer, - built from inputs and attention. Derived from the input tensor - by default. - state_is_tuple: If True, accepted and returned states are n-tuples, where - `n = len(cells)`. Must be set to True else will raise an exception - concatenated along the column axis. - reuse: (optional) Python boolean describing whether to reuse variables - in an existing scope. If not `True`, and the existing scope already has - the given variables, an error is raised. - Raises: - TypeError: if cell is not an RNNCell. - ValueError: if the flag `state_is_tuple` is `False` or if shape of - `attn_states` is not 3 or if innermost dimension (hidden size) is None. - """ - super(ExternalAttentionCellWrapper, self).__init__(_reuse=reuse) - if not state_is_tuple: - raise ValueError("Only tuple state is supported") - - self._cell = cell - self._input_size = input_size - - # Validate attn_states shape. - attn_shape = attn_states.get_shape() - if not attn_shape or len(attn_shape) != 3: - raise ValueError("attn_shape must be rank 3") - - self._attn_states = attn_states - self._attn_size = attn_shape[2].value - if self._attn_size is None: - raise ValueError("Hidden size of attn_states cannot be None") - - self._attn_vec_size = attn_vec_size - if self._attn_vec_size is None: - self._attn_vec_size = self._attn_size - - self._reuse = reuse - - @property - def state_size(self): - return AttentionTuple(self._cell.state_size, self._attn_size) - - @property - def output_size(self): - return self._attn_size - - def combine_state(self, previous_state): - """Combines previous state (from encoder) with internal attention values. - - You must use this function to derive the initial state passed into - this cell as it expects a named tuple (AttentionTuple). - - Args: - previous_state: State from another block that will be fed into this cell; - Must have same structure as the state of the cell wrapped by this. - Returns: - Combined state (AttentionTuple). - """ - batch_size = self._attn_states.get_shape()[0].value - if batch_size is None: - batch_size = tf.shape(self._attn_states)[0] - zeroed_state = self.zero_state(batch_size, self._attn_states.dtype) - return AttentionTuple(previous_state, zeroed_state.attention) - - def call(self, inputs, state): - """Long short-term memory cell with attention (LSTMA).""" - - if not isinstance(state, AttentionTuple): - raise TypeError("State must be of type AttentionTuple") - - state, attns = state - attn_states = self._attn_states - attn_length = attn_states.get_shape()[1].value - if attn_length is None: - attn_length = tf.shape(attn_states)[1] - - input_size = self._input_size - if input_size is None: - input_size = inputs.get_shape().as_list()[1] - if attns is not None: - inputs = tf.layers.dense(tf.concat([inputs, attns], axis=1), input_size) - lstm_output, new_state = self._cell(inputs, state) - - new_state_cat = tf.concat(nest.flatten(new_state), 1) - new_attns = self._attention(new_state_cat, attn_states, attn_length) - - with tf.variable_scope("attn_output_projection"): - output = tf.layers.dense( - tf.concat([lstm_output, new_attns], axis=1), self._attn_size) - - new_state = AttentionTuple(new_state, new_attns) - - return output, new_state - - def _attention(self, query, attn_states, attn_length): - conv2d = tf.nn.conv2d - reduce_sum = tf.reduce_sum - softmax = tf.nn.softmax - tanh = tf.tanh - - with tf.variable_scope("attention"): - k = tf.get_variable("attn_w", - [1, 1, self._attn_size, self._attn_vec_size]) - v = tf.get_variable("attn_v", [self._attn_vec_size, 1]) - hidden = tf.reshape(attn_states, [-1, attn_length, 1, self._attn_size]) - hidden_features = conv2d(hidden, k, [1, 1, 1, 1], "SAME") - y = tf.layers.dense(query, self._attn_vec_size) - y = tf.reshape(y, [-1, 1, 1, self._attn_vec_size]) - s = reduce_sum(v * tanh(hidden_features + y), [2, 3]) - a = softmax(s) - d = reduce_sum(tf.reshape(a, [-1, attn_length, 1, 1]) * hidden, [1, 2]) - new_attns = tf.reshape(d, [-1, self._attn_size]) - - return new_attns def lstm(inputs, hparams, train, name, initial_state=None): @@ -189,7 +48,7 @@ def dropout_lstm_cell(): def lstm_attention_decoder(inputs, hparams, train, name, initial_state, - attn_states): + encoder_outputs): """Run LSTM cell with attention on inputs of shape [batch x time x size].""" def dropout_lstm_cell(): @@ -198,32 +57,52 @@ def dropout_lstm_cell(): input_keep_prob=1.0 - hparams.dropout * tf.to_float(train)) layers = [dropout_lstm_cell() for _ in range(hparams.num_hidden_layers)] - cell = ExternalAttentionCellWrapper( + if hparams.attention_mechanism == "luong": + attention_mechanism_class = tf.contrib.seq2seq.LuongAttention + elif hparams.attention_mechanism == "bahdanau": + attention_mechanism_class = tf.contrib.seq2seq.BahdanauAttention + else: + raise ValueError("Unknown hparams.attention_mechanism = %s, must be " + "luong or bahdanu." % hparams.attention_mechanism) + attention_mechanism = attention_mechanism_class( + hparams.hidden_size, encoder_outputs) + + cell = tf.contrib.seq2seq.AttentionWrapper( tf.nn.rnn_cell.MultiRNNCell(layers), - attn_states, - attn_vec_size=hparams.attn_vec_size) - initial_state = cell.combine_state(initial_state) + [attention_mechanism]*hparams.num_heads, + attention_layer_size=[hparams.attention_layer_size]*hparams.num_heads, + output_attention=(hparams.output_attention == 1)) + + batch_size = inputs.get_shape()[0].value + if batch_size is None: + batch_size = tf.shape(inputs)[0] + + initial_state = cell.zero_state(batch_size, tf.float32).clone( + cell_state=initial_state) + with tf.variable_scope(name): - return tf.nn.dynamic_rnn( + output, state = tf.nn.dynamic_rnn( cell, inputs, initial_state=initial_state, dtype=tf.float32, time_major=False) + # For multi-head attention project output back to hidden size + if hparams.output_attention == 1 and hparams.num_heads > 1: + output = tf.layers.dense(output, hparams.hidden_size) + + return output, state + def lstm_seq2seq_internal(inputs, targets, hparams, train): """The basic LSTM seq2seq model, main step used for training.""" with tf.variable_scope("lstm_seq2seq"): - if inputs is None: - final_encoder_state = None - else: - # Flatten inputs. - inputs = common_layers.flatten4d3d(inputs) - # LSTM encoder. - _, final_encoder_state = lstm( - tf.reverse(inputs, axis=[1]), hparams, train, "encoder") - + # Flatten inputs. + inputs = common_layers.flatten4d3d(inputs) + # LSTM encoder. + _, final_encoder_state = lstm( + tf.reverse(inputs, axis=[1]), hparams, train, "encoder") # LSTM decoder. shifted_targets = common_layers.shift_right(targets) decoder_outputs, _ = lstm( @@ -255,17 +134,21 @@ def lstm_seq2seq_internal_attention(inputs, targets, hparams, train): class LSTMSeq2seq(t2t_model.T2TModel): def model_fn_body(self, features): + # TODO(lukaszkaiser): investigate this issue and repair. + if self._hparams.initializer == "orthogonal": + raise ValueError("LSTM models fail with orthogonal initializer.") train = self._hparams.mode == tf.estimator.ModeKeys.TRAIN - return lstm_seq2seq_internal(features.get("inputs", None), - features["targets"], - self._hparams, - train) + return lstm_seq2seq_internal(features["inputs"], features["targets"], + self._hparams, train) @registry.register_model class LSTMSeq2seqAttention(t2t_model.T2TModel): def model_fn_body(self, features): + # TODO(lukaszkaiser): investigate this issue and repair. + if self._hparams.initializer == "orthogonal": + raise ValueError("LSTM models fail with orthogonal initializer.") train = self._hparams.mode == tf.estimator.ModeKeys.TRAIN return lstm_seq2seq_internal_attention( features["inputs"], features["targets"], self._hparams, train) @@ -279,14 +162,53 @@ def lstm_seq2seq(): hparams.hidden_size = 128 hparams.num_hidden_layers = 2 hparams.initializer = "uniform_unit_scaling" + hparams.initializer_gain = 1.0 + hparams.weight_decay = 0.0 + return hparams + + +def lstm_attention_base(): + """Base attention params.""" + hparams = lstm_seq2seq() + hparams.add_hparam("attention_layer_size", hparams.hidden_size) + hparams.add_hparam("output_attention", int(True)) + hparams.add_hparam("num_heads", 1) + return hparams + + +@registry.register_hparams +def lstm_bahdanau_attention(): + """Hparams for LSTM with bahdanau attention.""" + hparams = lstm_attention_base() + hparams.add_hparam("attention_mechanism", "bahdanau") + return hparams + + +@registry.register_hparams +def lstm_luong_attention(): + """Hparams for LSTM with luong attention.""" + hparams = lstm_attention_base() + hparams.add_hparam("attention_mechanism", "luong") return hparams @registry.register_hparams def lstm_attention(): - """hparams for LSTM with attention.""" - hparams = lstm_seq2seq() + """For backwards compatibility, defaults to bahdanau.""" + return lstm_bahdanau_attention() + - # Attention - hparams.add_hparam("attn_vec_size", hparams.hidden_size) +@registry.register_hparams +def lstm_bahdanau_attention_multi(): + """Multi-head Bahdanu attention.""" + hparams = lstm_bahdanau_attention() + hparams.num_heads = 4 + return hparams + + +@registry.register_hparams +def lstm_luong_attention_multi(): + """Multi-head Luong attention.""" + hparams = lstm_luong_attention() + hparams.num_heads = 4 return hparams diff --git a/tensor2tensor/models/lstm_test.py b/tensor2tensor/models/lstm_test.py index 0d4bc6d80..b8be74f23 100644 --- a/tensor2tensor/models/lstm_test.py +++ b/tensor2tensor/models/lstm_test.py @@ -24,7 +24,6 @@ import numpy as np from tensor2tensor.data_generators import problem_hparams -from tensor2tensor.layers import common_hparams from tensor2tensor.models import lstm import tensorflow as tf @@ -36,7 +35,7 @@ def testLSTMSeq2Seq(self): vocab_size = 9 x = np.random.random_integers(1, high=vocab_size - 1, size=(3, 5, 1, 1)) y = np.random.random_integers(1, high=vocab_size - 1, size=(3, 6, 1, 1)) - hparams = common_hparams.basic_params1() + hparams = lstm.lstm_seq2seq() p_hparams = problem_hparams.test_problem_hparams(vocab_size, vocab_size) with self.test_session() as session: features = { From b9dce9b79913ca7b81b721d5f9fe4d5e9cdafb3c Mon Sep 17 00:00:00 2001 From: Lukasz Kaiser <lukaszkaiser@google.com> Date: Thu, 2 Nov 2017 17:14:30 -0700 Subject: [PATCH 0546/4095] Play with a new model with Transformer with a GAN'y part. PiperOrigin-RevId: 174403736 --- tensor2tensor/layers/common_hparams.py | 3 + tensor2tensor/layers/modalities.py | 2 + tensor2tensor/layers/modalities_test.py | 3 + tensor2tensor/models/__init__.py | 1 + tensor2tensor/models/transformer_adv.py | 229 ++++++++++++++++++++++++ 5 files changed, 238 insertions(+) create mode 100644 tensor2tensor/models/transformer_adv.py diff --git a/tensor2tensor/layers/common_hparams.py b/tensor2tensor/layers/common_hparams.py index d2d8bb2e5..c8ba0d03c 100644 --- a/tensor2tensor/layers/common_hparams.py +++ b/tensor2tensor/layers/common_hparams.py @@ -116,12 +116,15 @@ def basic_params1(): # If set to True, drop sequences longer than max_length during eval. # This affects the validity of the evaluation metrics. eval_drop_long_sequences=int(False), + # TODO(lukaszkaiser): these parameters should probably be set elsewhere. # in SymbolModality, share the output embeddings and the softmax # variables. # You can also share the input embeddings with the output embeddings # by using a problem_hparams that uses the same modality object for # the input_modality and target_modality. shared_embedding_and_softmax_weights=int(False), + # In SymbolModality, skip the top layer, assume we're providing logits. + symbol_modality_skip_top=int(False), # For each feature for which you want to override the default input # modality, add an entry to this semicolon-separated string. Entries are # formatted "feature_name:modality_type:modality_name", e.g. diff --git a/tensor2tensor/layers/modalities.py b/tensor2tensor/layers/modalities.py index a29aa93b1..9e0f73045 100644 --- a/tensor2tensor/layers/modalities.py +++ b/tensor2tensor/layers/modalities.py @@ -115,6 +115,8 @@ def top(self, body_output, _): else: scope_name = "softmax" reuse = False + if self._model_hparams.symbol_modality_skip_top: + return tf.expand_dims(body_output, 3) with tf.variable_scope(scope_name, reuse=reuse): var = self._get_weights() if (self._model_hparams.factored_logits and diff --git a/tensor2tensor/layers/modalities_test.py b/tensor2tensor/layers/modalities_test.py index 93dda6d09..7421a7e07 100644 --- a/tensor2tensor/layers/modalities_test.py +++ b/tensor2tensor/layers/modalities_test.py @@ -40,6 +40,7 @@ def testSymbolModalityInputs(self): symbol_modality_num_shards=4, hidden_size=hidden_size, multiply_embedding_mode="sqrt_depth", + symbol_modality_skip_top=0, shared_embedding_and_softmax_weights=0) x = -1 + np.random.random_integers( vocab_size, size=(batch_size, length, 1, 1)) @@ -65,6 +66,7 @@ def testSymbolModalityTargets(self): symbol_modality_num_shards=4, hidden_size=hidden_size, label_smoothing=0.2, + symbol_modality_skip_top=0, shared_embedding_and_softmax_weights=0, factored_logits=0, mode=tf.estimator.ModeKeys.TRAIN) @@ -99,6 +101,7 @@ def testSymbolModalityTargetsFactored(self): symbol_modality_num_shards=4, hidden_size=hidden_size, label_smoothing=0.2, + symbol_modality_skip_top=0, shared_embedding_and_softmax_weights=0, factored_logits=1, mode=tf.estimator.ModeKeys.TRAIN) diff --git a/tensor2tensor/models/__init__.py b/tensor2tensor/models/__init__.py index 74c72d8e1..f4c8a9a82 100644 --- a/tensor2tensor/models/__init__.py +++ b/tensor2tensor/models/__init__.py @@ -36,6 +36,7 @@ from tensor2tensor.models import shake_shake from tensor2tensor.models import slicenet from tensor2tensor.models import transformer +from tensor2tensor.models import transformer_adv from tensor2tensor.models import transformer_alternative from tensor2tensor.models import transformer_moe from tensor2tensor.models import transformer_revnet diff --git a/tensor2tensor/models/transformer_adv.py b/tensor2tensor/models/transformer_adv.py new file mode 100644 index 000000000..2a12aa389 --- /dev/null +++ b/tensor2tensor/models/transformer_adv.py @@ -0,0 +1,229 @@ +# coding=utf-8 +# Copyright 2017 The Tensor2Tensor Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Adversarial Transformer.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +# Dependency imports + +from tensor2tensor.layers import common_layers +from tensor2tensor.models import transformer +from tensor2tensor.models import transformer_vae +from tensor2tensor.utils import registry +from tensor2tensor.utils import t2t_model + +import tensorflow as tf + + +def encode(x, x_space, hparams, name): + """Transformer preparations and encoder.""" + with tf.variable_scope(name): + (encoder_input, encoder_self_attention_bias, + ed) = transformer.transformer_prepare_encoder(x, x_space, hparams) + encoder_input = tf.nn.dropout(encoder_input, 1.0 - hparams.dropout) + return transformer.transformer_encoder( + encoder_input, encoder_self_attention_bias, hparams), ed + + +def decode(encoder_output, encoder_decoder_attention_bias, targets, + hparams, name, reuse=False): + """Transformer decoder.""" + with tf.variable_scope(name, reuse=reuse): + targets = common_layers.flatten4d3d(targets) + + decoder_input, decoder_self_bias = transformer.transformer_prepare_decoder( + targets, hparams) + + decoder_input = tf.nn.dropout(decoder_input, + 1.0 - hparams.layer_prepostprocess_dropout) + + decoder_output = transformer.transformer_decoder( + decoder_input, + encoder_output, + decoder_self_bias, + encoder_decoder_attention_bias, + hparams) + + # Expand since t2t expects 4d tensors. + return tf.expand_dims(decoder_output, axis=2) + + +def reverse_gradient(x, delta=1.0): + return tf.stop_gradient((1.0 + delta) * x) - delta * x + + +def adversary(embedded, inputs, hparams, name, reuse=False): + with tf.variable_scope(name, reuse=reuse): + h0, i0 = common_layers.pad_to_same_length( + embedded, inputs, final_length_divisible_by=16) + h0 = tf.concat([h0, tf.expand_dims(i0, axis=2)], axis=-1) + h0 = tf.layers.dense(h0, hparams.hidden_size, name="io") + h1 = transformer_vae.compress(h0, None, False, hparams, "compress1") + h2 = transformer_vae.compress(h1, None, False, hparams, "compress2") + res_dense = tf.reduce_mean(h2, axis=[1, 2]) + res_single = tf.squeeze(tf.layers.dense(res_dense, 1), axis=-1) + return tf.nn.sigmoid(res_single) + + +def softmax_embed(x, embedding, batch_size, hparams): + """Softmax x and embed.""" + x = tf.reshape(tf.nn.softmax(x), [-1, 34*1024]) + x = tf.matmul(x, embedding) + return tf.reshape(x, [batch_size, -1, 1, hparams.hidden_size]) + + +def adv_transformer_internal(inputs, targets, target_space, hparams): + """Adversarial Transformer, main step used for training.""" + with tf.variable_scope("adv_transformer"): + batch_size = tf.shape(targets)[0] + targets = tf.reshape(targets, [batch_size, -1, 1]) + embedding = tf.get_variable("embedding", [34*1024, hparams.hidden_size]) + targets_emb = tf.gather(embedding, targets) + + # Noisy embedded targets. + targets_noisy = tf.one_hot(targets, 34*1024) + noise_val = hparams.noise_val + targets_noisy += tf.random_uniform(tf.shape(targets_noisy), + minval=-noise_val, maxval=noise_val) + targets_emb_noisy = softmax_embed( + targets_noisy, embedding, batch_size, hparams) + + # Encoder. + if inputs is not None: + inputs_emb = common_layers.flatten4d3d(inputs) + inputs, ed = encode(inputs_emb, target_space, hparams, "input_enc") + else: + ed = None + + # Masking. + masking = common_layers.inverse_lin_decay(60000) + masking *= common_layers.inverse_exp_decay(20000) # Not much at start. + masking -= tf.random_uniform([]) * 0.4 + mask = tf.less(masking, tf.random_uniform(tf.shape(targets))) + mask = tf.expand_dims(tf.to_float(mask), 3) + noise = tf.random_uniform(tf.shape(targets_emb)) + targets_emb = mask * targets_emb + (1.0 - mask) * noise + + # Decoder. + res_dec = decode(inputs, ed, targets_emb, hparams, "decoder") + res = tf.layers.dense(res_dec, 34*1024, name="res_sm") + res_emb = softmax_embed(res, embedding, batch_size, hparams) + + # Extra steps. + extra_step_prob = masking * 0.6 + if hparams.mode != tf.estimator.ModeKeys.TRAIN: + extra_step_prob = 1.0 + for _ in xrange(hparams.extra_steps): + def another_step(emb): + res_dec = decode(inputs, ed, emb, hparams, "decoder", reuse=True) + res = tf.layers.dense(res_dec, 34*1024, name="res_sm", reuse=True) + return softmax_embed(res, embedding, batch_size, hparams), res + res_emb, res = tf.cond(tf.less(tf.random_uniform([]), extra_step_prob), + lambda e=res_emb: another_step(e), + lambda: (res_emb, res)) + + # Adversary. + delta = masking * hparams.delta_max + true_logit = adversary(tf.stop_gradient(targets_emb_noisy), + tf.stop_gradient(inputs + inputs_emb), + hparams, "adversary") + gen_logit = adversary(reverse_gradient(res_emb, delta), + tf.stop_gradient(inputs + inputs_emb), + hparams, "adversary", reuse=True) + losses = {"adv": gen_logit - true_logit} + res = tf.stop_gradient(masking * res) + (1.0 - masking) * res + return res, losses + + +@registry.register_model +class TransformerAdv(t2t_model.T2TModel): + """Adversarial Transformer.""" + + def model_fn_body(self, features): + inputs = features.get("inputs", None) + return adv_transformer_internal( + inputs, features["targets_raw"], + features["target_space_id"], self._hparams) + + def infer(self, features=None, decode_length=50, beam_size=1, top_beams=1, + last_position_only=False, alpha=0.0): + """Produce predictions from the model.""" + if not features: + features = {} + inputs_old = None + if "inputs" in features and len(features["inputs"].shape) < 4: + inputs_old = features["inputs"] + features["inputs"] = tf.expand_dims(features["inputs"], 2) + + # Create an initial targets tensor. + if "partial_targets" in features: + initial_output = tf.convert_to_tensor(features["partial_targets"]) + else: + batch_size = tf.shape(features["inputs"])[0] + length = tf.shape(features["inputs"])[1] + initial_output = tf.zeros((batch_size, 2 * length, 1, 1), dtype=tf.int64) + + features["targets"] = initial_output + sharded_logits, _ = self.model_fn( + features, False, last_position_only=last_position_only) + sharded_samples = self._data_parallelism(tf.argmax, sharded_logits, 4) + samples = tf.concat(sharded_samples, 0) + + # More steps. + how_many_more_steps = 5 + for _ in xrange(how_many_more_steps): + with tf.variable_scope(tf.get_variable_scope(), reuse=True): + features["targets"] = samples + sharded_logits, _ = self.model_fn( + features, False, last_position_only=last_position_only) + sharded_samples = self._data_parallelism(tf.argmax, sharded_logits, 4) + samples = tf.concat(sharded_samples, 0) + + if inputs_old is not None: # Restore to not confuse Estimator. + features["inputs"] = inputs_old + return samples + + +@registry.register_hparams +def transformer_adv_small(): + """Set of hyperparameters.""" + hparams = transformer.transformer_small() + hparams.batch_size = 2048 + hparams.learning_rate_warmup_steps = 4000 + hparams.num_hidden_layers = 3 + hparams.hidden_size = 384 + hparams.filter_size = 2048 + hparams.label_smoothing = 0.0 + hparams.weight_decay = 0.1 + hparams.symbol_modality_skip_top = int(True) + hparams.add_hparam("num_compress_steps", 2) + hparams.add_hparam("extra_steps", 0) + hparams.add_hparam("noise_val", 0.3) + hparams.add_hparam("delta_max", 2.0) + return hparams + + +@registry.register_hparams +def transformer_adv_base(): + """Set of hyperparameters.""" + hparams = transformer_adv_small() + hparams.batch_size = 1024 + hparams.hidden_size = 512 + hparams.filter_size = 4096 + hparams.num_hidden_layers = 6 + return hparams From f564d6cb8c4008edf075171f47e52865f9c86520 Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Thu, 2 Nov 2017 18:21:14 -0700 Subject: [PATCH 0547/4095] v1.2.7 PiperOrigin-RevId: 174410247 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 88ed4a4ea..0669ab1a6 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ setup( name='tensor2tensor', - version='1.2.6', + version='1.2.7', description='Tensor2Tensor', author='Google Inc.', author_email='no-reply@google.com', From 6a011a2ee1b860f99d1b86ad9f4ad96c557f8f5b Mon Sep 17 00:00:00 2001 From: resec <resec0109@gmail.com> Date: Fri, 3 Nov 2017 23:40:39 +0800 Subject: [PATCH 0548/4095] [batch_size, input_len] shaped placehoder for tf.VarLenFeature --- tensor2tensor/utils/data_reader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensor2tensor/utils/data_reader.py b/tensor2tensor/utils/data_reader.py index 9ec147e3d..092aa5628 100644 --- a/tensor2tensor/utils/data_reader.py +++ b/tensor2tensor/utils/data_reader.py @@ -44,7 +44,7 @@ def feature_placeholders(data_fields, data_items_to_decoders): example = {} for field, config in data_fields.items(): if isinstance(config, tf.VarLenFeature): - shape = [None] + shape = [None, None] else: shape = config.shape From 16396e0ae70f31d55fff59e0d9d74baf8cc3fd4a Mon Sep 17 00:00:00 2001 From: Vincent Nguyen <vince62s@yahoo.com> Date: Sat, 4 Nov 2017 22:20:09 +0100 Subject: [PATCH 0549/4095] fix decode_from_file --- tensor2tensor/utils/decoding.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensor2tensor/utils/decoding.py b/tensor2tensor/utils/decoding.py index 8aa3c0b71..706809180 100644 --- a/tensor2tensor/utils/decoding.py +++ b/tensor2tensor/utils/decoding.py @@ -512,7 +512,7 @@ def _get_sorted_inputs(filename, num_shards=1, delimiter="\n"): with tf.gfile.Open(decode_filename) as f: text = f.read() records = text.split(delimiter) - inputs = [record.strip() for record in records] + inputs = [record.strip() for record in records[:-1]] input_lens = [(i, len(line.split())) for i, line in enumerate(inputs)] sorted_input_lens = sorted(input_lens, key=operator.itemgetter(1)) # We'll need the keys to rearrange the inputs back into their original order From 8205442f11d7c669f2f6fee694add0edd576a194 Mon Sep 17 00:00:00 2001 From: Martin Popel <popel@ufal.mff.cuni.cz> Date: Mon, 6 Nov 2017 13:53:02 +0100 Subject: [PATCH 0550/4095] add use_last_position_only=True without this option `t2t-decoder` crashes, see #397 --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 9525e9bcb..9c7cab48b 100644 --- a/README.md +++ b/README.md @@ -124,7 +124,7 @@ t2t-decoder \ --model=$MODEL \ --hparams_set=$HPARAMS \ --output_dir=$TRAIN_DIR \ - --decode_hparams="beam_size=$BEAM_SIZE,alpha=$ALPHA" \ + --decode_hparams="beam_size=$BEAM_SIZE,alpha=$ALPHA,use_last_position_only=True" \ --decode_from_file=$DECODE_FILE cat $DECODE_FILE.$MODEL.$HPARAMS.beam$BEAM_SIZE.alpha$ALPHA.decodes From ca22374f6d173a91ab5e8f61a8b87cdeb9f2d7ed Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Thu, 2 Nov 2017 19:22:00 -0700 Subject: [PATCH 0551/4095] Enable Xception and ImageNet on TPU PiperOrigin-RevId: 174414254 --- README.md | 2 +- tensor2tensor/data_generators/image.py | 13 +++-- tensor2tensor/layers/common_layers.py | 70 +++++++++++++++----------- tensor2tensor/layers/modalities.py | 51 +++++++++++++------ tensor2tensor/models/xception.py | 11 ++++ tensor2tensor/tpu/tpu_trainer_lib.py | 56 ++++++++++++--------- tensor2tensor/utils/data_reader.py | 2 +- tensor2tensor/utils/decoding.py | 2 +- 8 files changed, 132 insertions(+), 75 deletions(-) diff --git a/README.md b/README.md index 9c7cab48b..9525e9bcb 100644 --- a/README.md +++ b/README.md @@ -124,7 +124,7 @@ t2t-decoder \ --model=$MODEL \ --hparams_set=$HPARAMS \ --output_dir=$TRAIN_DIR \ - --decode_hparams="beam_size=$BEAM_SIZE,alpha=$ALPHA,use_last_position_only=True" \ + --decode_hparams="beam_size=$BEAM_SIZE,alpha=$ALPHA" \ --decode_from_file=$DECODE_FILE cat $DECODE_FILE.$MODEL.$HPARAMS.beam$BEAM_SIZE.alpha$ALPHA.decodes diff --git a/tensor2tensor/data_generators/image.py b/tensor2tensor/data_generators/image.py index 0c3988bc5..751e6df51 100644 --- a/tensor2tensor/data_generators/image.py +++ b/tensor2tensor/data_generators/image.py @@ -51,15 +51,17 @@ def resize_by_area(img, size): class ImageProblem(problem.Problem): - def example_reading_spec(self, label_key=None): - if label_key is None: - label_key = "image/class/label" + def example_reading_spec(self, label_repr=None): + if label_repr is None: + label_repr = ("image/class/label", tf.FixedLenFeature((1,), tf.int64)) data_fields = { "image/encoded": tf.FixedLenFeature((), tf.string), "image/format": tf.FixedLenFeature((), tf.string), - label_key: tf.VarLenFeature(tf.int64) } + label_key, label_type = label_repr # pylint: disable=unpacking-non-sequence + data_fields[label_key] = label_type + data_items_to_decoders = { "inputs": tf.contrib.slim.tfexample_decoder.Image( @@ -244,8 +246,9 @@ def hparams(self, defaults, unused_model_hparams): def example_reading_spec(self): label_key = "image/unpadded_label" + label_type = tf.VarLenFeature(tf.int64) return super(ImageFSNS, self).example_reading_spec( - self, label_key=label_key) + self, label_repr=(label_key, label_type)) class Image2ClassProblem(ImageProblem): diff --git a/tensor2tensor/layers/common_layers.py b/tensor2tensor/layers/common_layers.py index 7089529c8..63d486463 100644 --- a/tensor2tensor/layers/common_layers.py +++ b/tensor2tensor/layers/common_layers.py @@ -396,8 +396,8 @@ def conv_fn(inputs, filters, kernel_size, **kwargs): with tf.variable_scope("part_%d" % split_idx): if separability > 0: parts.append( - tf.layers.conv2d(split, filters // separability, kernel_size, ** - kwargs)) + tf.layers.conv2d(split, filters // separability, kernel_size, + **kwargs)) else: parts.append( tf.layers.separable_conv2d(split, filters // abs_sep, @@ -474,8 +474,8 @@ def noam_norm(x, epsilon=1.0, name=None): with tf.name_scope(name, default_name="noam_norm", values=[x]): shape = x.get_shape() ndims = len(shape) - return (tf.nn.l2_normalize(x, ndims - 1, epsilon=epsilon) * - tf.sqrt(tf.to_float(shape[-1]))) + return (tf.nn.l2_normalize(x, ndims - 1, epsilon=epsilon) * tf.sqrt( + tf.to_float(shape[-1]))) def apply_norm(x, norm_type, depth, epsilon): @@ -864,12 +864,12 @@ def simple_attention(target, source, bias=None): with tf.name_scope("simple_attention", [target, source]): target_shape = tf.shape(target) source_shape = tf.shape(source) - target = tf.reshape(target, [ - target_shape[0], target_shape[1] * target_shape[2], target_shape[3] - ]) - source = tf.reshape(source, [ - source_shape[0], source_shape[1] * source_shape[2], source_shape[3] - ]) + target = tf.reshape( + target, + [target_shape[0], target_shape[1] * target_shape[2], target_shape[3]]) + source = tf.reshape( + source, + [source_shape[0], source_shape[1] * source_shape[2], source_shape[3]]) attention = tf.matmul(target, source, transpose_b=True) attention *= tf.rsqrt(tf.to_float(tf.shape(target)[2])) if bias is not None: @@ -939,9 +939,9 @@ def multiscale_conv_and_attention(x, padding, hparams, source=None): # TODO(noam): The number of different scales should be a hyperparameter. conv_sum = multiscale_conv_sum( x, - hparams.hidden_size, [((hparams.kernel_height**i, hparams.kernel_width** - i), (hparams.kernel_height, hparams.kernel_width)) - for i in xrange(3)], + hparams.hidden_size, + [((hparams.kernel_height**i, hparams.kernel_width**i), + (hparams.kernel_height, hparams.kernel_width)) for i in xrange(3)], "AVG", padding=padding) # For residuals a rescale if necessary if channels differ. @@ -1030,8 +1030,8 @@ def get_timing_signal(length, Tensor of shape (length, 2*num_timescales) """ positions = tf.to_float(tf.range(length)) - log_timescale_increment = (math.log(max_timescale / min_timescale) / - (num_timescales - 1)) + log_timescale_increment = ( + math.log(max_timescale / min_timescale) / (num_timescales - 1)) inv_timescales = min_timescale * tf.exp( tf.to_float(tf.range(num_timescales)) * -log_timescale_increment) scaled_time = tf.expand_dims(positions, 1) * tf.expand_dims(inv_timescales, 0) @@ -1429,8 +1429,8 @@ def weights_concatenated(labels): in_target = tf.equal(tf.mod(sentence_num, 2), 1) # first two tokens of each sentence are boilerplate. sentence_num_plus_one = sentence_num + 1 - shifted = tf.pad(sentence_num_plus_one, [[0, 0], [2, 0], [0, 0], - [0, 0]])[:, :-2, :, :] + shifted = tf.pad(sentence_num_plus_one, + [[0, 0], [2, 0], [0, 0], [0, 0]])[:, :-2, :, :] nonboilerplate = tf.equal(sentence_num_plus_one, shifted) ret = tf.to_float(tf.logical_and(nonboilerplate, in_target)) return ret @@ -1477,7 +1477,10 @@ def padded_cross_entropy(logits, return tf.reduce_sum(xent * weights), tf.reduce_sum(weights) -def smoothing_cross_entropy(logits, labels, vocab_size, confidence, +def smoothing_cross_entropy(logits, + labels, + vocab_size, + confidence, gaussian=False): """Cross entropy with label smoothing to limit over-confidence. @@ -1498,16 +1501,17 @@ def smoothing_cross_entropy(logits, labels, vocab_size, confidence, low_confidence = (1.0 - confidence) / tf.to_float(vocab_size - 1) # Normalizing constant is the best cross-entropy value with soft targets. # We subtract it just for readability, makes no difference on learning. - normalizing = -(confidence * tf.log(confidence) + tf.to_float( - vocab_size - 1) * low_confidence * tf.log(low_confidence + 1e-20)) + normalizing = -( + confidence * tf.log(confidence) + tf.to_float(vocab_size - 1) * + low_confidence * tf.log(low_confidence + 1e-20)) if gaussian: labels = tf.cast(labels, tf.float32) normal_dist = tf.distributions.Normal(loc=labels, scale=confidence) # Locations to evaluate the probability distributions. - soft_targets = normal_dist.prob(tf.cast(tf.range(vocab_size), tf.float32) - [:, None, None, None, None]) + soft_targets = normal_dist.prob( + tf.cast(tf.range(vocab_size), tf.float32)[:, None, None, None, None]) # Reordering soft_targets from [vocab_size, batch_size, ?, ?, ?] to match # logits: [batch_size, ?, ?, ?, vocab_size] soft_targets = tf.transpose(soft_targets, perm=[1, 2, 3, 4, 0]) @@ -1805,8 +1809,8 @@ def to_tensor(self): product = tf.matmul(flat_a, self.b, transpose_b=True) product_shape = tf.concat([tf.shape(self.a)[:-1], [result_dim]], 0) product = tf.reshape(product, product_shape) - product.set_shape(self.a.get_shape().as_list()[:-1] + - [self.b.get_shape()[0]]) + product.set_shape( + self.a.get_shape().as_list()[:-1] + [self.b.get_shape()[0]]) return product @@ -1963,8 +1967,8 @@ def _fn_with_custom_grad(fn, inputs, grad_fn, use_global_vars=False): fn(*inputs) """ vs = tf.get_variable_scope() - get_vars_fn = (vs.global_variables if use_global_vars else - vs.trainable_variables) + get_vars_fn = ( + vs.global_variables if use_global_vars else vs.trainable_variables) len_before_vars = len(get_vars_fn()) inputs = list(inputs) outputs = fn(*inputs) @@ -2057,12 +2061,14 @@ def forward_internal(x, f1, f2, scale, bias): y = tf.concat(ys, 0) y = tf.reshape(y, tf.shape(x)) return y + key = ("conv_hidden_relu_memory_efficient %s" % epsilon) if not forget: forward_fn = forward_internal elif key in _function_cache: forward_fn = _function_cache[key] else: + @function.Defun(compiled=True) def grad_fn(x, f1, f2, scale, bias, dy): with tf.control_dependencies([dy]): @@ -2098,8 +2104,8 @@ def grad_fn(x, f1, f2, scale, bias, dy): dx = tf.reshape(dx, x_shape) return dx, df1, df2, dscale, dbias - @function.Defun(grad_func=grad_fn, compiled=True, - separate_compiled_gradients=True) + @function.Defun( + grad_func=grad_fn, compiled=True, separate_compiled_gradients=True) def forward_fn(x, f1, f2, scale, bias): return forward_internal(x, f1, f2, scale, bias) @@ -2119,3 +2125,11 @@ def forward_fn(x, f1, f2, scale, bias): y = forward_internal(x, f1, f2, scale, bias) y.set_shape(x.get_shape()) return y + + +def shape_dim(x, dim): + """Return shape(x)[dim], statically if possible.""" + static = x.get_shape().as_list() + if dim < len(static) and static[dim] is not None: + return static[dim] + return tf.shape(x)[dim] diff --git a/tensor2tensor/layers/modalities.py b/tensor2tensor/layers/modalities.py index 9e0f73045..df6f002cc 100644 --- a/tensor2tensor/layers/modalities.py +++ b/tensor2tensor/layers/modalities.py @@ -18,6 +18,8 @@ from __future__ import division from __future__ import print_function +import math + # Dependency imports from six.moves import xrange # pylint: disable=redefined-builtin @@ -128,8 +130,8 @@ def top(self, body_output, _): shape = tf.shape(body_output)[:-1] body_output = tf.reshape(body_output, [-1, self._body_input_depth]) logits = tf.matmul(body_output, var, transpose_b=True) - logits = tf.reshape( - logits, tf.concat([shape, [1, self._vocab_size]], 0)) + logits = tf.reshape(logits, tf.concat([shape, [1, self._vocab_size]], + 0)) return logits @@ -160,25 +162,29 @@ def bottom(self, inputs): def targets_bottom(self, inputs): with tf.variable_scope(self.name): # Reshape inputs to 2-d tensor and embed the RGB pixel values. - shape = tf.shape(inputs) - inputs = common_layers.flatten4d3d(inputs) ret = common_layers.embedding( - tf.to_int32(inputs), + tf.to_int32(common_layers.flatten4d3d(inputs)), self.top_dimensionality, self._body_input_depth, name="input_rgb_embedding") if self._model_hparams.multiply_embedding_mode == "sqrt_depth": ret *= self._body_input_depth**0.5 - ret = tf.reshape(ret, [shape[0], shape[1], shape[2], - self._body_input_depth * 3]) + + reshape_shape = [common_layers.shape_dim(inputs, i) for i in range(3)] + reshape_shape.append(self._body_input_depth * 3) + ret = tf.reshape(ret, reshape_shape) return tf.layers.dense(ret, self._body_input_depth) def top(self, body_output, _): with tf.variable_scope("rgb_softmax"): - shape = tf.shape(body_output) + + reshape_shape = [ + common_layers.shape_dim(body_output, i) for i in range(3) + ] dim = body_output.get_shape().as_list()[-1] // 3 - out = tf.reshape(body_output, [shape[0], shape[1], shape[2], - self._channels, dim]) + reshape_shape.extend([self._channels, dim]) + + out = tf.reshape(body_output, reshape_shape) res = tf.layers.dense(out, self.top_dimensionality) if not tf.get_variable_scope().reuse: res_argmax = tf.cast(tf.argmax(res, axis=-1), tf.uint8) @@ -393,20 +399,33 @@ def top(self, body_output, _): Args: body_output: A Tensor with shape [batch, ?, ?, body_output_size]. + Returns: a Tensors, each with shape [batch_size, ?, ?, vocab_size] + + Raises: + ValueError: if 2d and Tensor cannot be made a square in the spatial dims. """ with tf.variable_scope(self.name): x = body_output # Assume input is a square with self._body_input_depth channels. if self._is_2d: - length_float = tf.to_float(tf.shape(x)[1]) - length_float *= tf.to_float(tf.shape(x)[2]) - spatial_dim_float = tf.sqrt(length_float) - spatial_dim = tf.to_int32(spatial_dim_float) - x_depth = int(x.get_shape()[3]) - x = tf.reshape(x, [-1, spatial_dim, spatial_dim, x_depth]) + x_shape = x.get_shape().as_list() + if x_shape[1] is None or x_shape[2] is None: + length_float = tf.to_float(tf.shape(x)[1]) + length_float *= tf.to_float(tf.shape(x)[2]) + spatial_dim_float = tf.sqrt(length_float) + spatial_dim = tf.to_int32(spatial_dim_float) + x_depth = x_shape[3] + x = tf.reshape(x, [-1, spatial_dim, spatial_dim, x_depth]) + elif x_shape[1] != x_shape[2]: + spatial_dim = int(math.sqrt(float(x_shape[1] * x_shape[2]))) + if spatial_dim * spatial_dim != x_shape[1] * x_shape[2]: + raise ValueError("Assumed inputs were square-able but they were " + "not. Shape: %s" % x_shape) + x = tf.reshape(x, [-1, spatial_dim, spatial_dim, x_depth]) + x = common_layers.conv_block_downsample(x, self._kernel, self._strides, self._padding) x = tf.nn.relu(x) diff --git a/tensor2tensor/models/xception.py b/tensor2tensor/models/xception.py index a61687f48..e7caa3419 100644 --- a/tensor2tensor/models/xception.py +++ b/tensor2tensor/models/xception.py @@ -97,3 +97,14 @@ def xception_tiny(): hparams.num_hidden_layers = 4 hparams.learning_rate_decay_scheme = "none" return hparams + + +@registry.register_hparams +def xception_tiny_tpu(): + hparams = xception_base() + hparams.tpu_batch_size_per_shard = 2 + # The base exp50k scheme uses a cond which fails to compile on TPU + hparams.learning_rate_decay_scheme = "noam" + hparams.num_hidden_layers = 2 + hparams.hidden_size = 128 + return hparams diff --git a/tensor2tensor/tpu/tpu_trainer_lib.py b/tensor2tensor/tpu/tpu_trainer_lib.py index 85a3cdf42..274baab82 100644 --- a/tensor2tensor/tpu/tpu_trainer_lib.py +++ b/tensor2tensor/tpu/tpu_trainer_lib.py @@ -57,21 +57,9 @@ def input_fn(mode, params): num_threads = 4 if is_training else 1 batch_size = params["batch_size"] - batching_scheme = { - "boundaries": [], - "batch_sizes": [batch_size], - "min_length": hparams.min_length, - "max_length": hparams.max_length, - "window_size": batch_size, - "padded_shapes": { - "inputs": [hparams.max_length], - "targets": [hparams.max_length], - }, - } - def _valid_size(example): - return data_reader.example_valid_size( - example, batching_scheme["min_length"], batching_scheme["max_length"]) + return data_reader.example_valid_size(example, hparams.min_length, + hparams.max_length) def define_shapes(example): """Set the right shapes for the features.""" @@ -101,14 +89,20 @@ def define_shapes(example): mode=mode, data_dir=data_dir, num_threads=num_threads, hparams=hparams) dataset = dataset.map( data_reader.cast_int64_to_int32, num_threads=num_threads) - dataset = dataset.filter(_valid_size) - if is_training: - dataset = dataset.shuffle(100) # TODO(rsepassi): In eval mode, should not repeat. Do so because TPU seems # to crash if it runs out of data during eval. dataset = dataset.repeat(None) - dataset = data_reader.padded_batch(dataset, batch_size, - batching_scheme["padded_shapes"]) + + if are_shapes_fully_defined(dataset.output_shapes): + dataset = dataset.batch(batch_size) + else: + # If shapes are not fully defined, filter out long ones and pad to + # hparams.max_length + dataset = dataset.filter(_valid_size) + padded_shapes = fill_shape_nones( + dataset.output_shapes, none_filler=hparams.max_length) + dataset = data_reader.padded_batch(dataset, batch_size, padded_shapes) + if not is_training: dataset = dataset.map( lambda f: pad_batch(f, batch_size), num_parallel_calls=num_threads) @@ -121,6 +115,21 @@ def define_shapes(example): return input_fn +def are_shapes_fully_defined(shapes_dict): + for _, shape in shapes_dict.iteritems(): + if not shape.is_fully_defined(): + return False + return True + + +def fill_shape_nones(shapes_dict, none_filler=None): + padded_shapes = {} + for key, shape in shapes_dict.iteritems(): + padded_shapes[key] = [(dim if dim is not None else none_filler) + for dim in shape.as_list()] + return padded_shapes + + def pad_batch(features, batch_size): """Pad each feature in features to batch_size on dim 0.""" ts = [] @@ -178,10 +187,11 @@ def model_fn(features, labels, mode, params, config): with tf.variable_scope(target_modality.name): logits = target_modality.top(outputs, labels) - # Ensure the length is known statically - shape = [None] * logits.get_shape().ndims - shape[1] = hparams.max_length - logits.set_shape(logits.get_shape().merge_with(shape)) + # If the length dim is unknown fix it to max_length + if logits.get_shape().as_list()[1] is None: + shape = [None] * logits.get_shape().ndims + shape[1] = hparams.max_length + logits.set_shape(logits.get_shape().merge_with(shape)) # Loss loss_num, loss_den = target_modality.loss(logits, labels) diff --git a/tensor2tensor/utils/data_reader.py b/tensor2tensor/utils/data_reader.py index 092aa5628..9ec147e3d 100644 --- a/tensor2tensor/utils/data_reader.py +++ b/tensor2tensor/utils/data_reader.py @@ -44,7 +44,7 @@ def feature_placeholders(data_fields, data_items_to_decoders): example = {} for field, config in data_fields.items(): if isinstance(config, tf.VarLenFeature): - shape = [None, None] + shape = [None] else: shape = config.shape diff --git a/tensor2tensor/utils/decoding.py b/tensor2tensor/utils/decoding.py index 706809180..8aa3c0b71 100644 --- a/tensor2tensor/utils/decoding.py +++ b/tensor2tensor/utils/decoding.py @@ -512,7 +512,7 @@ def _get_sorted_inputs(filename, num_shards=1, delimiter="\n"): with tf.gfile.Open(decode_filename) as f: text = f.read() records = text.split(delimiter) - inputs = [record.strip() for record in records[:-1]] + inputs = [record.strip() for record in records] input_lens = [(i, len(line.split())) for i, line in enumerate(inputs)] sorted_input_lens = sorted(input_lens, key=operator.itemgetter(1)) # We'll need the keys to rearrange the inputs back into their original order From ee08864057b10287493ae9fb86d60e815d3f4e54 Mon Sep 17 00:00:00 2001 From: T2T Team <no-reply@google.com> Date: Fri, 3 Nov 2017 14:09:10 -0700 Subject: [PATCH 0552/4095] internal PiperOrigin-RevId: 174511245 --- tensor2tensor/utils/trainer_utils.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/tensor2tensor/utils/trainer_utils.py b/tensor2tensor/utils/trainer_utils.py index 57d45fb50..fa597aa9f 100644 --- a/tensor2tensor/utils/trainer_utils.py +++ b/tensor2tensor/utils/trainer_utils.py @@ -60,7 +60,14 @@ model.""") flags.DEFINE_string("problems", "", "Dash separated list of problems to " "solve.") -flags.DEFINE_string("data_dir", None, "Directory with training data.") + + +# data_dir is a common flag name - catch conflicts and define it once. +try: + flags.DEFINE_string("data_dir", None, "Directory with training data.") +except flags.DuplicateFlagError: + tf.logging.info("data_dir already defined. Ignoring.") + flags.DEFINE_integer("train_steps", 250000, "The number of steps to run training for.") flags.DEFINE_string("eval_early_stopping_metric", "loss", From fa3f0db36bf571b3b7f5d9221ef021dc132a46ab Mon Sep 17 00:00:00 2001 From: T2T Team <no-reply@google.com> Date: Fri, 3 Nov 2017 15:39:27 -0700 Subject: [PATCH 0553/4095] Add hparams_set transformer_small_tpu PiperOrigin-RevId: 174524066 --- tensor2tensor/models/transformer.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/tensor2tensor/models/transformer.py b/tensor2tensor/models/transformer.py index 1d8603687..3531baaf4 100644 --- a/tensor2tensor/models/transformer.py +++ b/tensor2tensor/models/transformer.py @@ -1041,3 +1041,23 @@ def transformer_tpu(): hparams.tpu_batch_size_per_shard = 16 return hparams + + +@registry.register_hparams +def transformer_small_tpu(): + """TPU-friendly version of transformer_small. + + Returns: + an hparams object. + """ + hparams = transformer_small() + hparams.use_pad_remover = int(False) # where op not supported + hparams.optimizer = "TrueAdam" + hparams.learning_rate = 0.2 + + # Inputs + # Each example in the batch will be of (padded) length hparams.max_length + hparams.max_length = 64 + hparams.tpu_batch_size_per_shard = 16 + + return hparams From fbce51888b4f228e7614342401832b9a04cebd62 Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Fri, 3 Nov 2017 18:40:02 -0700 Subject: [PATCH 0554/4095] Use Experiment for TPU PiperOrigin-RevId: 174541908 --- tensor2tensor/models/transformer.py | 47 ++++++++--- tensor2tensor/tpu/tpu_trainer.py | 83 +++++++++---------- tensor2tensor/tpu/tpu_trainer_lib.py | 99 ++++++++++++----------- tensor2tensor/tpu/tpu_trainer_lib_test.py | 5 +- tensor2tensor/utils/trainer_utils.py | 5 +- 5 files changed, 134 insertions(+), 105 deletions(-) diff --git a/tensor2tensor/models/transformer.py b/tensor2tensor/models/transformer.py index 3531baaf4..32fef0089 100644 --- a/tensor2tensor/models/transformer.py +++ b/tensor2tensor/models/transformer.py @@ -547,8 +547,7 @@ def transformer_decoder(decoder_input, x, hparams), encoder_output, encoder_decoder_attention_bias, hparams.attention_key_channels or hparams.hidden_size, hparams.attention_value_channels or hparams.hidden_size, - hparams.hidden_size, - hparams.num_heads, + hparams.hidden_size, hparams.num_heads, hparams.attention_dropout) x = common_layers.layer_postprocess(x, y, hparams) with tf.variable_scope("ffn"): @@ -1031,18 +1030,41 @@ def transformer_relative_big(): def transformer_tpu(): """HParams for Transformer model on TPU.""" hparams = transformer_base() - hparams.use_pad_remover = int(False) # where op not supported - hparams.optimizer = "TrueAdam" - hparams.learning_rate = 0.2 + update_hparams_for_tpu(hparams) + return hparams - # Inputs - # Each example in the batch will be of (padded) length hparams.max_length - hparams.max_length = 64 - hparams.tpu_batch_size_per_shard = 16 +@registry.register_hparams +def transformer_tiny_tpu(): + hparams = transformer_tiny() + update_hparams_for_tpu(hparams) return hparams +@registry.register_ranged_hparams +def transformer_tiny_tpu_range(rhp): + """Small range of hyperparameters.""" + hparams = transformer_tiny_tpu() + common_hparams.fill_ranged_hparams_from_hparams(hparams, rhp) + rhp.set_float("learning_rate", 0.3, 3.0, scale=rhp.LOG_SCALE) + rhp.set_float("weight_decay", 0.0, 2.0) + + +@registry.register_ranged_hparams +def transformer_tpu_range(rhp): + """Small range of hyperparameters.""" + hparams = transformer_tpu() + common_hparams.fill_ranged_hparams_from_hparams(hparams, rhp) + # After starting from base, set intervals for some parameters. + rhp.set_float("learning_rate", 0.3, 3.0, scale=rhp.LOG_SCALE) + rhp.set_discrete("learning_rate_warmup_steps", + [1000, 2000, 4000, 8000, 16000]) + rhp.set_float("initializer_gain", 0.5, 2.0) + rhp.set_float("optimizer_adam_beta1", 0.85, 0.95) + rhp.set_float("optimizer_adam_beta2", 0.97, 0.99) + rhp.set_float("weight_decay", 0.0, 2.0) + + @registry.register_hparams def transformer_small_tpu(): """TPU-friendly version of transformer_small. @@ -1051,6 +1073,11 @@ def transformer_small_tpu(): an hparams object. """ hparams = transformer_small() + update_hparams_for_tpu(hparams) + return hparams + + +def update_hparams_for_tpu(hparams): hparams.use_pad_remover = int(False) # where op not supported hparams.optimizer = "TrueAdam" hparams.learning_rate = 0.2 @@ -1059,5 +1086,3 @@ def transformer_small_tpu(): # Each example in the batch will be of (padded) length hparams.max_length hparams.max_length = 64 hparams.tpu_batch_size_per_shard = 16 - - return hparams diff --git a/tensor2tensor/tpu/tpu_trainer.py b/tensor2tensor/tpu/tpu_trainer.py index d9b20ee75..e75d69b1c 100644 --- a/tensor2tensor/tpu/tpu_trainer.py +++ b/tensor2tensor/tpu/tpu_trainer.py @@ -23,9 +23,9 @@ # Dependency imports from tensor2tensor import models # pylint: disable=unused-import -from tensor2tensor import problems # pylint: disable=unused-import +from tensor2tensor import problems as problems_lib # pylint: disable=unused-import from tensor2tensor.tpu import tpu_trainer_lib as lib -from tensor2tensor.utils import trainer_utils +from tensor2tensor.utils import registry import tensorflow as tf @@ -33,59 +33,54 @@ FLAGS = flags.FLAGS flags.DEFINE_integer("tpu_num_shards", 8, "Number of tpu shards.") -flags.DEFINE_string("output_dir", "", "Base output directory for run.") -flags.DEFINE_string("master", "", "Address of TensorFlow master.") -flags.DEFINE_integer("eval_steps", 200, "Number of steps in evaluation.") flags.DEFINE_integer("iterations_per_loop", 1000, "Number of iterations in a TPU training loop.") +# To maintain compatibility with some internal libs, we guard against these flag +# definitions possibly erroring. Apologies for the ugliness. +try: + flags.DEFINE_string("master", "", "Address of TensorFlow master.") + flags.DEFINE_string("output_dir", "", "Base output directory for run.") + flags.DEFINE_integer("eval_steps", 200, "Number of steps in evaluation.") +except: # pylint: disable=bare-except + pass -def main(unused_argv): - tf.logging.set_verbosity(tf.logging.INFO) - tf.set_random_seed(123) - assert len(FLAGS.problems.split("-")) == 1 +def get_problem_name(): + problems = FLAGS.problems.split("-") + assert len(problems) == 1 + return problems[0] + - hparams = trainer_utils.create_hparams( - FLAGS.hparams_set, FLAGS.data_dir, passed_hparams=FLAGS.hparams) - trainer_utils.add_problem_hparams(hparams, FLAGS.problems) +def create_hparams(): + hparams = registry.hparams(FLAGS.hparams_set)() + if FLAGS.hparams: + hparams = hparams.parse(FLAGS.hparams) + return hparams - problem = hparams.problem_instances[0] - model_fn = lib.get_model_fn(FLAGS.model, hparams) - input_fn = lib.get_input_fn(FLAGS.data_dir, problem, hparams) +def create_experiment_fn(): + return lib.make_experiment_fn(FLAGS.model, get_problem_name(), FLAGS.data_dir, + FLAGS.train_steps, FLAGS.eval_steps, + FLAGS.local_eval_frequency) - estimator = lib.make_estimator( - model_fn=model_fn, - output_dir=FLAGS.output_dir, + +def create_run_config(): + return lib.create_run_config( + model_dir=FLAGS.output_dir, master=FLAGS.master, + iterations_per_loop=FLAGS.iterations_per_loop, num_shards=FLAGS.tpu_num_shards, - batch_size=hparams.tpu_batch_size_per_shard * FLAGS.tpu_num_shards, - log_device_placement=FLAGS.log_device_placement, - iterations_per_loop=FLAGS.iterations_per_loop) - - if not FLAGS.train_steps: - assert FLAGS.eval_steps - estimator.evaluate( - lambda params: input_fn(tf.estimator.ModeKeys.EVAL, params), - steps=FLAGS.eval_steps) - return - - num_rounds = FLAGS.train_steps // FLAGS.local_eval_frequency - steps_per_round = [FLAGS.local_eval_frequency] * num_rounds - remainder = FLAGS.train_steps % FLAGS.local_eval_frequency - if remainder: - steps_per_round.append(remainder) - - for num_steps in steps_per_round: - estimator.train( - lambda params: input_fn(tf.estimator.ModeKeys.TRAIN, params), - steps=num_steps) - if FLAGS.eval_steps: - estimator.evaluate( - lambda params: input_fn(tf.estimator.ModeKeys.EVAL, params), - steps=FLAGS.eval_steps) - tf.logging.info("Training and evaluation complete.") + log_device_placement=FLAGS.log_device_placement) + + +def main(_): + tf.logging.set_verbosity(tf.logging.INFO) + tf.set_random_seed(123) + + exp_fn = create_experiment_fn() + exp = exp_fn(create_run_config(), create_hparams()) + exp.continuous_train_and_eval() if __name__ == "__main__": diff --git a/tensor2tensor/tpu/tpu_trainer_lib.py b/tensor2tensor/tpu/tpu_trainer_lib.py index 274baab82..e39defa29 100644 --- a/tensor2tensor/tpu/tpu_trainer_lib.py +++ b/tensor2tensor/tpu/tpu_trainer_lib.py @@ -13,11 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -"""Library for training on TPU. See tpu_trainer.py. - -Currently only supports training and evaluation for text-to-text and text -autoregressive problems. -""" +"""Library for training on TPU. See tpu_trainer.py.""" from __future__ import absolute_import from __future__ import division @@ -32,9 +28,9 @@ from tensor2tensor.utils import metrics from tensor2tensor.utils import model_builder from tensor2tensor.utils import registry +from tensor2tensor.utils import trainer_utils import tensorflow as tf -from tensorflow.python.util import nest def create_dummy_vars(): @@ -48,10 +44,10 @@ def create_dummy_vars(): tf.get_variable("problem_0_steps", initializer=0, trainable=False) -def get_input_fn(data_dir, problem, hparams): +def get_input_fn(mode, hparams): """Get basic T2T input fn.""" - def input_fn(mode, params): + def input_fn(params): """Input fn.""" is_training = mode == tf.estimator.ModeKeys.TRAIN num_threads = 4 if is_training else 1 @@ -85,6 +81,8 @@ def define_shapes(example): return example + problem = hparams.problem_instances[0] + data_dir = hparams.data_dir dataset = problem.dataset( mode=mode, data_dir=data_dir, num_threads=num_threads, hparams=hparams) dataset = dataset.map( @@ -103,9 +101,6 @@ def define_shapes(example): dataset.output_shapes, none_filler=hparams.max_length) dataset = data_reader.padded_batch(dataset, batch_size, padded_shapes) - if not is_training: - dataset = dataset.map( - lambda f: pad_batch(f, batch_size), num_parallel_calls=num_threads) dataset = dataset.map(define_shapes, num_parallel_calls=num_threads) dataset = dataset.prefetch(1) features = dataset.make_one_shot_iterator().get_next() @@ -125,29 +120,13 @@ def are_shapes_fully_defined(shapes_dict): def fill_shape_nones(shapes_dict, none_filler=None): padded_shapes = {} for key, shape in shapes_dict.iteritems(): - padded_shapes[key] = [(dim if dim is not None else none_filler) - for dim in shape.as_list()] + padded_shapes[key] = [ + (dim if dim is not None else none_filler) for dim in shape.as_list() + ] return padded_shapes -def pad_batch(features, batch_size): - """Pad each feature in features to batch_size on dim 0.""" - ts = [] - for t in nest.flatten(features): - before_pads = [0] * t.get_shape().ndims - after_pads = [0] * t.get_shape().ndims - batch_pad = tf.convert_to_tensor(batch_size) - tf.shape(t)[0] - after_pads[0] = batch_pad - pads = list(zip(before_pads, after_pads)) - old_shape = t.get_shape().as_list() - old_shape[0] = batch_size - t = tf.pad(t, pads) - t.set_shape(old_shape) - ts.append(t) - return nest.pack_sequence_as(features, ts) - - -def get_model_fn(model, hp, use_tpu=True): +def get_model_fn(model_name, hp, use_tpu=True): """Get simple T2T model fn.""" def model_fn(features, labels, mode, params, config): @@ -162,7 +141,7 @@ def model_fn(features, labels, mode, params, config): # Instantiate model and retrieve modalities. Note that autoregressive models # have no input modality. - model_class = registry.model(model)(hparams, mode, problem_hp) + model_class = registry.model(model_name)(hparams, mode, problem_hp) input_modality = problem_hp.input_modality.get("inputs") target_modality = problem_hp.target_modality @@ -285,17 +264,14 @@ def _clip_gradients_by_norm(grads_and_vars, clip_gradients): return list(zip(clipped_gradients, variables)) -def make_estimator(model_fn, - output_dir, - master="", - batch_size=16, - iterations_per_loop=1000, - num_shards=8, - per_host_input_for_training=True, - use_tpu=True, - log_device_placement=False, - save_checkpoints_steps=1000): - """Make TPUEstimator.""" +def create_run_config(master="", + model_dir=None, + iterations_per_loop=1000, + num_shards=8, + per_host_input_for_training=True, + log_device_placement=False, + save_checkpoints_steps=1000): + """Create TPUConfig and tpu.RunConfig.""" tpu_config = tf.contrib.tpu.TPUConfig( iterations_per_loop=iterations_per_loop, num_shards=num_shards, @@ -303,17 +279,50 @@ def make_estimator(model_fn, session_config = tf.ConfigProto( allow_soft_placement=True, log_device_placement=log_device_placement) run_config = tf.contrib.tpu.RunConfig( + model_dir=model_dir, session_config=session_config, save_summary_steps=0, save_checkpoints_steps=save_checkpoints_steps, tpu_config=tpu_config, master=master, evaluation_master=master) + return run_config + +def create_estimator(model_fn, run_config, batch_size=16): return tf.contrib.tpu.TPUEstimator( model_fn=model_fn, - use_tpu=use_tpu, - model_dir=output_dir, + model_dir=run_config.model_dir, config=run_config, train_batch_size=batch_size, eval_batch_size=batch_size * 2) + + +def create_experiment(run_config, hparams, model_name, problem_name, data_dir, + train_steps, eval_steps, min_eval_frequency): + """Create Experiment.""" + hparams.add_hparam("data_dir", data_dir) + trainer_utils.add_problem_hparams(hparams, problem_name) + batch_size = ( + hparams.tpu_batch_size_per_shard * run_config.tpu_config.num_shards) + model_fn = get_model_fn(model_name, hparams) + estimator = create_estimator(model_fn, run_config, batch_size) + train_input_fn = get_input_fn(tf.estimator.ModeKeys.TRAIN, hparams) + eval_input_fn = get_input_fn(tf.estimator.ModeKeys.EVAL, hparams) + return tf.contrib.learn.Experiment( + estimator=estimator, + train_input_fn=train_input_fn, + eval_input_fn=eval_input_fn, + train_steps=train_steps, + eval_steps=eval_steps, + min_eval_frequency=min_eval_frequency, + train_steps_per_iteration=min_eval_frequency) + + +def make_experiment_fn(*args, **kwargs): + """Wrapper for canonical experiment_fn. See create_experiment.""" + + def experiment_fn(run_config, hparams): + return create_experiment(run_config, hparams, *args, **kwargs) + + return experiment_fn diff --git a/tensor2tensor/tpu/tpu_trainer_lib_test.py b/tensor2tensor/tpu/tpu_trainer_lib_test.py index bbcf4ae89..de36856ca 100644 --- a/tensor2tensor/tpu/tpu_trainer_lib_test.py +++ b/tensor2tensor/tpu/tpu_trainer_lib_test.py @@ -42,15 +42,14 @@ def testSmoke(self): hparams = trainer_utils.create_hparams(hparams_set, data_dir) trainer_utils.add_problem_hparams(hparams, problem_name) - problem = hparams.problem_instances[0] model_fn = lib.get_model_fn(model_name, hparams, use_tpu=False) - input_fn = lib.get_input_fn(data_dir, problem, hparams) + input_fn = lib.get_input_fn(tf.estimator.ModeKeys.TRAIN, hparams) params = {"batch_size": 16} config = tf.contrib.tpu.RunConfig( tpu_config=tf.contrib.tpu.TPUConfig(num_shards=2)) - features, targets = input_fn(tf.estimator.ModeKeys.TRAIN, params) + features, targets = input_fn(params) with tf.variable_scope("training"): spec = model_fn(features, targets, tf.estimator.ModeKeys.TRAIN, params, config) diff --git a/tensor2tensor/utils/trainer_utils.py b/tensor2tensor/utils/trainer_utils.py index fa597aa9f..70faab24a 100644 --- a/tensor2tensor/utils/trainer_utils.py +++ b/tensor2tensor/utils/trainer_utils.py @@ -65,8 +65,8 @@ # data_dir is a common flag name - catch conflicts and define it once. try: flags.DEFINE_string("data_dir", None, "Directory with training data.") -except flags.DuplicateFlagError: - tf.logging.info("data_dir already defined. Ignoring.") +except: # pylint: disable=bare-except + pass flags.DEFINE_integer("train_steps", 250000, "The number of steps to run training for.") @@ -199,6 +199,7 @@ def create_experiment(data_dir, model_name, train_steps, eval_steps, hparams, eval_steps=eval_steps, train_monitors=train_monitors, eval_hooks=eval_hooks, + train_steps_per_iteration=FLAGS.local_eval_frequency, eval_delay_secs=0, **optional_kwargs) From d007d4797387b8decb2f82dffdb9d356a2dbc0b2 Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Mon, 6 Nov 2017 16:52:12 -0800 Subject: [PATCH 0555/4095] Update RangedHParams to accept bools PiperOrigin-RevId: 174783850 --- tensor2tensor/data_generators/problem.py | 2 +- tensor2tensor/layers/common_hparams.py | 49 +++++++++++++++++------- tensor2tensor/models/aligned.py | 18 ++++----- tensor2tensor/models/attention_lm.py | 6 +-- tensor2tensor/models/attention_lm_moe.py | 42 ++++++++++---------- tensor2tensor/models/lstm.py | 2 +- tensor2tensor/models/slicenet.py | 2 +- tensor2tensor/models/transformer.py | 25 +++++++----- tensor2tensor/models/transformer_adv.py | 2 +- tensor2tensor/models/transformer_moe.py | 14 +++---- tensor2tensor/models/transformer_vae.py | 12 +++--- tensor2tensor/tpu/tpu_trainer_lib.py | 7 ++-- tensor2tensor/utils/diet.py | 4 +- 13 files changed, 106 insertions(+), 79 deletions(-) diff --git a/tensor2tensor/data_generators/problem.py b/tensor2tensor/data_generators/problem.py index c826e29dd..f707090f1 100644 --- a/tensor2tensor/data_generators/problem.py +++ b/tensor2tensor/data_generators/problem.py @@ -536,7 +536,7 @@ def _default_hparams(): # During inference for autoregressive problems, if the batch_size is 1, # the inference will stop when the model predict a text_encoder.EOS_ID # token. - stop_at_eos=int(False), + stop_at_eos=False, # Modalities used to map from input features to a space compatible with # chosen model architecture. One modality spec (which is a 2-tuple, diff --git a/tensor2tensor/layers/common_hparams.py b/tensor2tensor/layers/common_hparams.py index c8ba0d03c..ef2d494fb 100644 --- a/tensor2tensor/layers/common_hparams.py +++ b/tensor2tensor/layers/common_hparams.py @@ -35,7 +35,7 @@ def basic_params1(): batch_size=4096, # in tokens per batch per gpu # Fixed batch size turns off bucketing during training mode # and uses batch_size as minibatch size (use small batch_size<=32) - use_fixed_batch_size=int(False), + use_fixed_batch_size=False, num_hidden_layers=4, kernel_height=3, kernel_width=1, @@ -46,7 +46,7 @@ def basic_params1(): dropout=0.2, clip_grad_norm=2.0, grad_noise_scale=0.0, - summarize_grads=int(False), + summarize_grads=False, initializer="orthogonal", initializer_gain=1.5, label_smoothing=0.1, @@ -65,7 +65,7 @@ def basic_params1(): sampling_temp=1.0, # temperature for sampling problem_choice="adaptive", # "uniform", "adaptive", "distributed" # expand the logits a piece at a time - saves memory. - factored_logits=int(False), + factored_logits=False, multiply_embedding_mode="sqrt_depth", # Parameters related to mixtures of experts. moe_hidden_sizes="2048", # hidden layer sizes (comma-separated) @@ -115,16 +115,16 @@ def basic_params1(): length_bucket_step=1.1, # If set to True, drop sequences longer than max_length during eval. # This affects the validity of the evaluation metrics. - eval_drop_long_sequences=int(False), + eval_drop_long_sequences=False, # TODO(lukaszkaiser): these parameters should probably be set elsewhere. # in SymbolModality, share the output embeddings and the softmax # variables. # You can also share the input embeddings with the output embeddings # by using a problem_hparams that uses the same modality object for # the input_modality and target_modality. - shared_embedding_and_softmax_weights=int(False), + shared_embedding_and_softmax_weights=False, # In SymbolModality, skip the top layer, assume we're providing logits. - symbol_modality_skip_top=int(False), + symbol_modality_skip_top=False, # For each feature for which you want to override the default input # modality, add an entry to this semicolon-separated string. Entries are # formatted "feature_name:modality_type:modality_name", e.g. @@ -178,7 +178,8 @@ def basic_params1(): scheduled_sampling_gold_mixin_prob=0.5, # This is the actual batch size, *not* tokens per batch (i.e. for # language models this is the number of sentences in the batch) - tpu_batch_size_per_shard=24,) + tpu_batch_size_per_shard=24, + ) class RangedHParams(object): @@ -192,6 +193,7 @@ class RangedHParams(object): def __init__(self): self._categorical_params = {} self._discrete_params = {} + self._discrete_float_params = {} self._float_params = {} self._int_params = {} @@ -203,7 +205,8 @@ def _check_reset_and_type_change(self, name, orig_ctr): ctr_names = [(self._categorical_params, "categorical"), (self._discrete_params, "discrete"), - (self._float_params, "float"), (self._int_params, "int")] + (self._float_params, "float"), (self._int_params, "int"), + (self._discrete_float_params, "discrete_float")] ctrs, names = list(zip(*ctr_names)) orig_name = names[ctrs.index(orig_ctr)] @@ -226,13 +229,30 @@ def set_discrete(self, name, feasible_points, scale=None, length=None): self._discrete_params[name] = (name, feasible_points, scale, length) def set_float(self, name, min_val, max_val, scale=None, length=None): + if name in self._discrete_float_params: + del self._discrete_float_params[name] self._check_reset_and_type_change(name, self._float_params) self._float_params[name] = (name, min_val, max_val, scale, length) + def set_discrete_float(self, name, val): + self._check_reset_and_type_change(name, self._discrete_float_params) + self._discrete_float_params[name] = (name, [val]) + def set_int(self, name, min_val, max_val, scale=None, length=None): self._check_reset_and_type_change(name, self._int_params) self._int_params[name] = (name, min_val, max_val, scale, length) + def fix_select_params(self, hp): + ctrs = [ + self._categorical_params, self._discrete_params, + self._discrete_float_params, self._float_params, self._int_params + ] + for key, val in hp.values().iteritems(): + for ctr in ctrs: + if key in ctr: + del ctr[key] + self.set_discrete(key, [val]) + def fill_ranged_hparams_from_hparams(hparams, ranged_hparams): """Fill ranged_hparams with singleton values from hparams. @@ -240,7 +260,8 @@ def fill_ranged_hparams_from_hparams(hparams, ranged_hparams): HParams are placed in RangedHParams with the following functions, according to type: * int: set_discrete - * float: set_float + * bool: set_discrete + * float: set_discrete_float * str: set_categorical Args: @@ -260,8 +281,10 @@ def fill_ranged_hparams_from_hparams(hparams, ranged_hparams): val = getattr(hparams, name) if hp_type == int: ranged_hparams.set_discrete(name, [val]) + elif hp_type == bool: + ranged_hparams.set_discrete(name, [int(val)]) elif hp_type == float: - ranged_hparams.set_float(name, val, val) + ranged_hparams.set_discrete_float(name, val) elif hp_type == str: ranged_hparams.set_categorical(name, [val]) else: @@ -295,6 +318,6 @@ def basic_range1(ranged_hparams): rhp.set_float("optimizer_adam_epsilon", 1e-7, 1e-2, scale=rhp.LOG_SCALE) rhp.set_float("optimizer_adam_beta1", 0.8, 0.9) rhp.set_float("optimizer_adam_beta2", 0.995, 0.999) - rhp.set_categorical("optimizer", [ - "Adam", "Adagrad", "Momentum", "RMSProp", "SGD", "YellowFin" - ]) + rhp.set_categorical( + "optimizer", + ["Adam", "Adagrad", "Momentum", "RMSProp", "SGD", "YellowFin"]) diff --git a/tensor2tensor/models/aligned.py b/tensor2tensor/models/aligned.py index a0e92da94..6dddc8c3d 100644 --- a/tensor2tensor/models/aligned.py +++ b/tensor2tensor/models/aligned.py @@ -290,7 +290,7 @@ def aligned_base(): hparams.weight_decay = 0.0 hparams.optimizer_adam_beta1 = 0.9 hparams.optimizer_adam_beta2 = 0.98 - hparams.shared_embedding_and_softmax_weights = int(True) + hparams.shared_embedding_and_softmax_weights = True hparams.add_hparam("ffn_hidden_sizes", "2048") # Add new ones like this. hparams.moe_num_experts = 32 hparams.layer_preprocess_sequence = "n" @@ -306,28 +306,28 @@ def aligned_base(): hparams.add_hparam("attention_dropout", 0.0) hparams.add_hparam("pos", "timing") # timing, none # moe params. local attention moe. - hparams.add_hparam("attention_local", int(False)) + hparams.add_hparam("attention_local", False) hparams.add_hparam("attention_moe_k", 2) hparams.add_hparam("attention_num_experts", 16) - hparams.add_hparam("attention_split_batch", int(False)) + hparams.add_hparam("attention_split_batch", False) # Key, query and value dimensions for the attention hparams.add_hparam("attention_kq_size", 128) hparams.add_hparam("attention_v_size", 256) # Loss coef for load balancing hparams.add_hparam("attention_load_balance", 2e-2) - hparams.add_hparam("diet_experts", int(False)) - hparams.add_hparam("memory_efficient_ffn", int(False)) + hparams.add_hparam("diet_experts", False) + hparams.add_hparam("memory_efficient_ffn", False) hparams.add_hparam("local_attention_window", 128) hparams.add_hparam("attention_num_groups", 8) hparams.add_hparam("memory_target_density", 2.0) hparams.add_hparam("multiplicative_overhead", 1.25) hparams.add_hparam("multiplicative_overhead_eval", 2.0) - hparams.add_hparam("attention_image_summary", int(True)) + hparams.add_hparam("attention_image_summary", True) # LSH params - hparams.add_hparam("lsh_truncated", int(True)) + hparams.add_hparam("lsh_truncated", True) # For testing right-masking. # This is not implemented in all layers. - hparams.add_hparam("mask_right", int(False)) + hparams.add_hparam("mask_right", False) return hparams @@ -547,7 +547,7 @@ def aligned_8k_grouped(): """ hparams = aligned_grouped() hparams.batch_size = 8192 - # hparams.attention_image_summary = int(False) + # hparams.attention_image_summary = False hparams.num_groups = 16 hparams.multiplicative_overhead = 1.1 return hparams diff --git a/tensor2tensor/models/attention_lm.py b/tensor2tensor/models/attention_lm.py index 696057233..f4b4d7e45 100644 --- a/tensor2tensor/models/attention_lm.py +++ b/tensor2tensor/models/attention_lm.py @@ -146,7 +146,7 @@ def attention_lm_base(): hparams.optimizer_adam_beta1 = 0.9 hparams.optimizer_adam_beta2 = 0.98 hparams.label_smoothing = 0.0 - hparams.shared_embedding_and_softmax_weights = int(False) + hparams.shared_embedding_and_softmax_weights = False hparams.add_hparam("filter_size", 4096) # Add new ones like this. # attention-related flags @@ -158,7 +158,7 @@ def attention_lm_base(): hparams.add_hparam("attention_dropout", 0.0) hparams.add_hparam("relu_dropout", 0.0) hparams.add_hparam("pos", "timing") # timing, none - hparams.add_hparam("encoder_full_attention", int(False)) + hparams.add_hparam("encoder_full_attention", False) return hparams @@ -191,7 +191,7 @@ def attention_lm_translation(): hparams.prepend_mode = "prepend_inputs_masked_attention" hparams.max_length = 512 hparams.label_smoothing = 0.1 - hparams.shared_embedding_and_softmax_weights = int(True) + hparams.shared_embedding_and_softmax_weights = True return hparams diff --git a/tensor2tensor/models/attention_lm_moe.py b/tensor2tensor/models/attention_lm_moe.py index 48720cd5d..a4ffae1b9 100644 --- a/tensor2tensor/models/attention_lm_moe.py +++ b/tensor2tensor/models/attention_lm_moe.py @@ -483,7 +483,7 @@ def attention_lm_moe_base(): hparams.optimizer_adam_beta2 = 0.98 hparams.num_sampled_classes = 0 hparams.label_smoothing = 0.0 - hparams.shared_embedding_and_softmax_weights = int(False) + hparams.shared_embedding_and_softmax_weights = False hparams.add_hparam("filter_size", 2048) # Add new ones like this. hparams.moe_num_experts = 32 # attention-related flags @@ -502,11 +502,11 @@ def attention_lm_moe_base(): # layer type hparams.add_hparam("attention_layers", "") hparams.add_hparam("attention_type", AttentionType.MULTIHEAD) - hparams.add_hparam("attention_local", int(False)) + hparams.add_hparam("attention_local", False) hparams.add_hparam("attention_moe_k", 2) hparams.add_hparam("attention_num_head", 1) hparams.add_hparam("attention_num_experts", 16) - hparams.add_hparam("attention_split_batch", int(False)) + hparams.add_hparam("attention_split_batch", False) hparams.add_hparam("attention_red_factor", 3) hparams.add_hparam("attention_block_length", 128) hparams.add_hparam("attention_reduction_type", "conv") @@ -526,14 +526,14 @@ def attention_lm_moe_base(): hparams.add_hparam("attention_load_balance", 2e-2) # Locality-sensitive hashing params hparams.add_hparam("lsh_num_hyperplanes", 4) - hparams.add_hparam("lsh_use_map_fn", int(False)) + hparams.add_hparam("lsh_use_map_fn", False) - hparams.add_hparam("use_sepconv", int(False)) - hparams.add_hparam("diet_experts", int(False)) - hparams.add_hparam("memory_efficient_ffn", int(False)) + hparams.add_hparam("use_sepconv", False) + hparams.add_hparam("diet_experts", False) + hparams.add_hparam("memory_efficient_ffn", False) # if True, we learn a non-autoregressive model from "inputs" to "targets". # if False, we learn an autoregressive model to generate "targets" - hparams.add_hparam("use_inputs", int(False)) + hparams.add_hparam("use_inputs", False) return hparams @@ -543,9 +543,9 @@ def attention_lm_moe_base_long_seq(): hparams = attention_lm_moe_base() hparams.max_length = 0 # max_length == batch_size - hparams.eval_drop_long_sequences = int(True) + hparams.eval_drop_long_sequences = True hparams.min_length_bucket = 256 # Avoid cyclic problems for big batches - hparams.use_sepconv = int(True) + hparams.use_sepconv = True return hparams @@ -568,7 +568,7 @@ def attention_lm_moe_base_ae(): def attention_lm_moe_base_local(): """Base model with attention expert.""" hparams = attention_lm_moe_base_long_seq() - hparams.attention_local = int(True) + hparams.attention_local = True return hparams @@ -577,7 +577,7 @@ def attention_lm_moe_base_hybrid(): """Base model with attention expert.""" hparams = attention_lm_moe_base_long_seq() hparams.attention_layers = "hehe" # Alternate local/expert - hparams.attention_local = int(True) + hparams.attention_local = True # hparams.layer_preprocess_sequence = "n" # hparams.layer_postprocess_sequence = "da" @@ -588,7 +588,7 @@ def attention_lm_moe_base_hybrid(): def attention_lm_hybrid_v2(): hparams = attention_lm_moe_base_long_seq() hparams.attention_layers = "hheh" # Alternate local/expert - hparams.attention_local = int(True) + hparams.attention_local = True hparams.attention_moe_k = 6 hparams.layer_preprocess_sequence = "n" @@ -622,7 +622,7 @@ def attention_lm_ae_extended(): """Experiment with the exp_factor params.""" hparams = attention_lm_moe_base_long_seq() hparams.attention_layers = "eeee" - hparams.attention_local = int(True) + hparams.attention_local = True # hparams.factored_logits=1 # Necessary when the number of expert grow bigger hparams.attention_moe_k = 2 hparams.attention_exp_factor = 4 @@ -637,16 +637,16 @@ def attention_lm_ae_extended(): def attention_lm_moe_base_memeff(): """Base model with attention expert.""" hparams = attention_lm_moe_base_long_seq() - hparams.use_sepconv = int(False) + hparams.use_sepconv = False - hparams.diet_experts = int(True) + hparams.diet_experts = True hparams.layer_preprocess_sequence = "n" hparams.layer_postprocess_sequence = "da" hparams.layer_prepostprocess_dropout = 0.0 hparams.memory_efficient_ffn = True hparams.attention_type = AttentionType.MEMORY_EFFICIENT hparams.num_heads = 8 - hparams.factored_logits = int(True) + hparams.factored_logits = True return hparams @@ -747,7 +747,7 @@ def attention_lm_moe_large(): @registry.register_hparams def attention_lm_moe_large_diet(): hparams = attention_lm_moe_large() - hparams.diet_experts = int(True) + hparams.diet_experts = True return hparams @@ -755,14 +755,14 @@ def attention_lm_moe_large_diet(): def attention_lm_moe_memory_efficient(): """Memory-efficient version.""" hparams = attention_lm_moe_large() - hparams.diet_experts = int(True) + hparams.diet_experts = True hparams.layer_preprocess_sequence = "n" hparams.layer_postprocess_sequence = "da" hparams.layer_prepostprocess_dropout = 0.0 hparams.memory_efficient_ffn = True hparams.attention_type = AttentionType.MEMORY_EFFICIENT hparams.num_heads = 8 - hparams.factored_logits = int(True) + hparams.factored_logits = True return hparams @@ -798,7 +798,7 @@ def attention_lm_moe_translation(): hparams.layer_prepostprocess_dropout = 0.2 hparams.num_hidden_layers = 6 hparams.moe_layers = "0,1,2,3,4,5" - hparams.shared_embedding_and_softmax_weights = int(True) + hparams.shared_embedding_and_softmax_weights = True return hparams diff --git a/tensor2tensor/models/lstm.py b/tensor2tensor/models/lstm.py index c3e378359..68d375c96 100644 --- a/tensor2tensor/models/lstm.py +++ b/tensor2tensor/models/lstm.py @@ -171,7 +171,7 @@ def lstm_attention_base(): """Base attention params.""" hparams = lstm_seq2seq() hparams.add_hparam("attention_layer_size", hparams.hidden_size) - hparams.add_hparam("output_attention", int(True)) + hparams.add_hparam("output_attention", True) hparams.add_hparam("num_heads", 1) return hparams diff --git a/tensor2tensor/models/slicenet.py b/tensor2tensor/models/slicenet.py index 5377fd97e..fc030deed 100644 --- a/tensor2tensor/models/slicenet.py +++ b/tensor2tensor/models/slicenet.py @@ -329,7 +329,7 @@ def slicenet_params1(): hparams.add_hparam("attention_value_channels", 0) hparams.add_hparam("sim_loss_mult", 0.0) # Try 10.0 for experiments. hparams.add_hparam("attention_dropout", 0.2) - hparams.shared_embedding_and_softmax_weights = int(True) + hparams.shared_embedding_and_softmax_weights = True return hparams diff --git a/tensor2tensor/models/transformer.py b/tensor2tensor/models/transformer.py index 32fef0089..c36a1c89b 100644 --- a/tensor2tensor/models/transformer.py +++ b/tensor2tensor/models/transformer.py @@ -632,8 +632,7 @@ def transformer_base_v1(): hparams.optimizer_adam_beta2 = 0.98 hparams.num_sampled_classes = 0 hparams.label_smoothing = 0.1 - hparams.shared_embedding_and_softmax_weights = int(True) - + hparams.shared_embedding_and_softmax_weights = True # Add new ones like this. hparams.add_hparam("filter_size", 2048) # Layer-related flags. If zero, these fall back on hparams.num_hidden_layers. @@ -652,8 +651,8 @@ def transformer_base_v1(): hparams.add_hparam("relu_dropout", 0.0) hparams.add_hparam("pos", "timing") # timing, none hparams.add_hparam("nbr_decoder_problems", 1) - hparams.add_hparam("proximity_bias", int(False)) - hparams.add_hparam("use_pad_remover", int(True)) + hparams.add_hparam("proximity_bias", False) + hparams.add_hparam("use_pad_remover", True) hparams.add_hparam("self_attention_type", "dot_product") hparams.add_hparam("max_relative_position", 0) return hparams @@ -744,7 +743,7 @@ def transformer_parsing_base(): hparams.learning_rate_warmup_steps = 16000 hparams.hidden_size = 1024 hparams.learning_rate = 0.05 - hparams.shared_embedding_and_softmax_weights = int(False) + hparams.shared_embedding_and_softmax_weights = False return hparams @@ -753,7 +752,7 @@ def transformer_parsing_big(): """HParams for parsing on wsj semi-supervised.""" hparams = transformer_big() hparams.max_length = 512 - hparams.shared_source_target_embedding = int(False) + hparams.shared_source_target_embedding = False hparams.learning_rate_warmup_steps = 4000 hparams.layer_prepostprocess_dropout = 0.1 hparams.batch_size = 2048 @@ -766,7 +765,7 @@ def transformer_parsing_ice(): """Hparams for parsing and tagging Icelandic text.""" hparams = transformer_base_single_gpu() hparams.batch_size = 4096 - hparams.shared_embedding_and_softmax_weights = int(False) + hparams.shared_embedding_and_softmax_weights = False return hparams @@ -929,7 +928,7 @@ def transformer_big_dr1(): @registry.register_hparams def transformer_big_enfr(): hparams = transformer_big_dr1() - hparams.shared_embedding_and_softmax_weights = int(False) + hparams.shared_embedding_and_softmax_weights = False hparams.filter_size = 8192 hparams.layer_prepostprocess_dropout = 0.1 return hparams @@ -1065,6 +1064,14 @@ def transformer_tpu_range(rhp): rhp.set_float("weight_decay", 0.0, 2.0) +@registry.register_ranged_hparams +def transformer_tpu_batch_range(rhp): + hparams = transformer_tpu() + common_hparams.fill_ranged_hparams_from_hparams(hparams, rhp) + rhp.set_discrete("tpu_batch_size_per_shard", [1] + list(range(2, 16, 2))) + rhp.set_discrete("max_length", list(range(128, 416, 16))) + + @registry.register_hparams def transformer_small_tpu(): """TPU-friendly version of transformer_small. @@ -1078,7 +1085,7 @@ def transformer_small_tpu(): def update_hparams_for_tpu(hparams): - hparams.use_pad_remover = int(False) # where op not supported + hparams.use_pad_remover = False # where op not supported hparams.optimizer = "TrueAdam" hparams.learning_rate = 0.2 diff --git a/tensor2tensor/models/transformer_adv.py b/tensor2tensor/models/transformer_adv.py index 2a12aa389..3867944e5 100644 --- a/tensor2tensor/models/transformer_adv.py +++ b/tensor2tensor/models/transformer_adv.py @@ -210,7 +210,7 @@ def transformer_adv_small(): hparams.filter_size = 2048 hparams.label_smoothing = 0.0 hparams.weight_decay = 0.1 - hparams.symbol_modality_skip_top = int(True) + hparams.symbol_modality_skip_top = True hparams.add_hparam("num_compress_steps", 2) hparams.add_hparam("extra_steps", 0) hparams.add_hparam("noise_val", 0.3) diff --git a/tensor2tensor/models/transformer_moe.py b/tensor2tensor/models/transformer_moe.py index 014a390c6..285886fa5 100644 --- a/tensor2tensor/models/transformer_moe.py +++ b/tensor2tensor/models/transformer_moe.py @@ -417,7 +417,7 @@ def transformer_moe_base(): hparams.optimizer_adam_beta2 = 0.98 hparams.num_sampled_classes = 0 hparams.label_smoothing = 0.0 - hparams.shared_embedding_and_softmax_weights = int(True) + hparams.shared_embedding_and_softmax_weights = True # According to noam, ("n", "da") seems better for harder-to-learn models hparams.layer_preprocess_sequence = "n" hparams.layer_postprocess_sequence = "da" @@ -439,7 +439,7 @@ def transformer_moe_base(): hparams.add_hparam("relu_dropout", 0.0) hparams.add_hparam("pos", "timing") # timing, none hparams.add_hparam("nbr_decoder_problems", 1) - hparams.add_hparam("proximity_bias", int(False)) + hparams.add_hparam("proximity_bias", False) # Decoder layers type. If set, num_decoder_layers parameter will be ignored # and the number of decoder layer will be deduced from the string @@ -460,7 +460,7 @@ def transformer_moe_8k(): hparams.batch_size = 8192 hparams.max_length = 0 # max_length == batch_size - hparams.eval_drop_long_sequences = int(True) + hparams.eval_drop_long_sequences = True hparams.min_length_bucket = 256 # Avoid cyclic problems for big batches hparams.default_ff = "sep" @@ -475,7 +475,7 @@ def transformer_moe_12k(): hparams = transformer_moe_8k() hparams.batch_size = 12000 # At 12k, the softmax become the memory bottleneck - hparams.factored_logit = int(True) + hparams.factored_logit = True return hparams @@ -483,11 +483,9 @@ def transformer_moe_12k(): def transformer_moe_prepend_8k(): """Model which formulate a seq2seq problem as language modeling.""" hparams = transformer_moe_8k() - hparams.prepend_mode = "prepend_inputs_masked_attention", - hparams.eval_drop_long_sequences = int(False), + hparams.prepend_mode = "prepend_inputs_masked_attention" + hparams.eval_drop_long_sequences = False hparams.max_input_seq_length = 7500, hparams.layer_types = "loc/red/loc-moe/red/loc" hparams.moe_num_experts = 256 return hparams - - diff --git a/tensor2tensor/models/transformer_vae.py b/tensor2tensor/models/transformer_vae.py index d936ce72f..81156babd 100644 --- a/tensor2tensor/models/transformer_vae.py +++ b/tensor2tensor/models/transformer_vae.py @@ -620,8 +620,8 @@ def transformer_ae_small(): hparams.add_hparam("d_mix", 0.5) # Bottleneck kinds supported: dense, semhash, gumbel-softmax. hparams.add_hparam("bottleneck_kind", "semhash") - hparams.add_hparam("do_ae", int(True)) - hparams.add_hparam("drop_inputs", int(False)) + hparams.add_hparam("do_ae", True) + hparams.add_hparam("drop_inputs", False) hparams.add_hparam("z_size", 128) hparams.add_hparam("v_size", 1024*64) hparams.add_hparam("max_context_length", 64) @@ -631,11 +631,11 @@ def transformer_ae_small(): hparams.add_hparam("kmeans_lr_factor", 0.002) hparams.add_hparam("z_dropout", 0.1) hparams.add_hparam("is_2d", 0) - hparams.add_hparam("use_gumbel_softmax", int(True)) + hparams.add_hparam("use_gumbel_softmax", True) hparams.add_hparam("softmax_k", 0) - hparams.add_hparam("decode_autoregressive", int(True)) - hparams.add_hparam("do_vae", int(True)) - hparams.add_hparam("bit_vae", int(True)) + hparams.add_hparam("decode_autoregressive", True) + hparams.add_hparam("do_vae", True) + hparams.add_hparam("bit_vae", True) return hparams diff --git a/tensor2tensor/tpu/tpu_trainer_lib.py b/tensor2tensor/tpu/tpu_trainer_lib.py index e39defa29..f0f66f4ed 100644 --- a/tensor2tensor/tpu/tpu_trainer_lib.py +++ b/tensor2tensor/tpu/tpu_trainer_lib.py @@ -53,7 +53,7 @@ def input_fn(params): num_threads = 4 if is_training else 1 batch_size = params["batch_size"] - def _valid_size(example): + def valid_size(example): return data_reader.example_valid_size(example, hparams.min_length, hparams.max_length) @@ -96,7 +96,7 @@ def define_shapes(example): else: # If shapes are not fully defined, filter out long ones and pad to # hparams.max_length - dataset = dataset.filter(_valid_size) + dataset = dataset.filter(valid_size) padded_shapes = fill_shape_nones( dataset.output_shapes, none_filler=hparams.max_length) dataset = data_reader.padded_batch(dataset, batch_size, padded_shapes) @@ -268,14 +268,13 @@ def create_run_config(master="", model_dir=None, iterations_per_loop=1000, num_shards=8, - per_host_input_for_training=True, log_device_placement=False, save_checkpoints_steps=1000): """Create TPUConfig and tpu.RunConfig.""" tpu_config = tf.contrib.tpu.TPUConfig( iterations_per_loop=iterations_per_loop, num_shards=num_shards, - per_host_input_for_training=per_host_input_for_training) + per_host_input_for_training=(num_shards <= 8)) session_config = tf.ConfigProto( allow_soft_placement=True, log_device_placement=log_device_placement) run_config = tf.contrib.tpu.RunConfig( diff --git a/tensor2tensor/utils/diet.py b/tensor2tensor/utils/diet.py index 4ff44de5b..527ed0e5f 100644 --- a/tensor2tensor/utils/diet.py +++ b/tensor2tensor/utils/diet.py @@ -37,7 +37,7 @@ def diet_adam_optimizer_params(): a hyperparameters object. """ return tf.contrib.training.HParams( - quantize=int(True), # use 16-bit fixed-point + quantize=True, # use 16-bit fixed-point quantization_scale=10.0 / tf.int16.max, optimizer="DietAdam", learning_rate=1.0, @@ -46,7 +46,7 @@ def diet_adam_optimizer_params(): epsilon=1e-10, beta1=0.0, # we can save memory if beta1=0 beta2=0.98, - factored_second_moment_accumulator=int(True), # this saves memory + factored_second_moment_accumulator=True, # this saves memory ) From 78690d714dfb3da9ca3b63c53d7dec3a1f974678 Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Mon, 6 Nov 2017 17:40:04 -0800 Subject: [PATCH 0556/4095] Tensor2Tensor on Cloud TPU alpha doc PiperOrigin-RevId: 174789467 --- docs/cloud_tpu.md | 99 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 99 insertions(+) create mode 100644 docs/cloud_tpu.md diff --git a/docs/cloud_tpu.md b/docs/cloud_tpu.md new file mode 100644 index 000000000..3dc3986cf --- /dev/null +++ b/docs/cloud_tpu.md @@ -0,0 +1,99 @@ +# Running on Cloud TPUs + +Tensor2Tensor supports running on Google Cloud Platforms TPUs, chips specialized +for ML training. + +Not all models are supported but we've tested so far with Transformer (sequence +model) as well as Xception (image model). + +To run on TPUs, you need to be part of the alpha program; if you're not, these +commands won't work for you currently, but access will expand soon, so get +excited for your future ML supercomputers in the cloud. + +## Tutorial: Transformer En-De translation on TPU + +Set your default zone to a TPU-enabled zone. TPU machines are only available in +certain zones for now. +``` +gcloud config set compute/zone us-central1-f +``` + +Launch a GCE instance; this will run the Python trainer. +``` +gcloud compute instances create $USER-vm \ + --machine-type=n1-standard-8 \ + --image-family=tf-nightly \ + --image-project=ml-images \ + --scopes=https://www.googleapis.com/auth/cloud-platform +``` + +Launch the TPU instance; the Python program will connect to this to train on the +TPU device. +``` +TPU_IP=10.240.0.2 +gcloud alpha compute tpus create \ + $USER-tpu \ + --range=${TPU_IP/%2/0}/29 \ + --version=nightly +``` + +To see all TPU instances running: `gcloud alpha compute tpus list`. The +`TPU_IP` should be unique amongst the list and follow the format `10.240.i.2`. + +Generate data to GCS +If you already have the data locally, use `gsutil cp` to cp to GCS. +``` +DATA_DIR=gs://my-bucket/t2t/data/ +t2t-datagen --problem=translate_ende_wmt8k --data_dir=$DATA_DIR +``` + +SSH in with port forwarding for TensorBoard +``` +gcloud compute ssh $USER-vm -L 6006:localhost:6006 +``` + +Now that you're on the cloud instance, install T2T: +``` +pip install tensor2tensor +``` + +Setup some vars used below. `TPU_IP` and `DATA_DIR` should be the same as what +was used above. Note that the `DATA_DIR` and `OUT_DIR` must be GCS buckets. +``` +TPU_IP=<IP of TPU machine> +DATA_DIR=gs://my-bucket/t2t/data/ +OUT_DIR=gs://my-bucket/t2t/training/ +TPU_MASTER=grpc://$TPU_IP:8470 +``` + +Launch TensorBoard in the background so you can monitor training: +``` +tensorboard --logdir=$OUT_DIR > /tmp/tensorboard_logs.txt 2>&1 & +``` + +Train and evaluate. +``` +t2t-tpu-trainer \ + --master=$TPU_MASTER \ + --data_dir=$DATA_DIR \ + --output_dir=$OUT_DIR \ + --problems=translate_ende_wmt8k \ + --model=transformer \ + --hparams_set=transformer_tiny_tpu \ + --train_steps=10 \ + --eval_steps=10 \ + --local_eval_frequency=10 \ + --iterations_per_loop=10 +``` + +The above command will train for 10 steps, then evaluate for 10 steps. You can +(and should) increase the number of total training steps with the +`--train_steps` flag. Evaluation will happen every `--local_eval_frequency` +steps, each time for `--eval_steps`. When you increase then number of training +steps, also increase `--iterations_per_loop`, which controls how frequently the +TPU machine returns control to the Python code (1000 seems like a fine number). + +Back on your local machine, open your browser and navigate to `localhost:6006` +for TensorBoard. + +Voila. Enjoy your new supercomputer. From a88fc015cff9a7ebdfcd2d68535dfffed5568186 Mon Sep 17 00:00:00 2001 From: Etienne Pot <epot@google.com> Date: Tue, 7 Nov 2017 12:45:47 -0800 Subject: [PATCH 0557/4095] Add get_standardized_layers() which returns the list of available layers fn with a unified interface. PiperOrigin-RevId: 174897587 --- tensor2tensor/layers/common_attention.py | 255 +++++++++++++++++++++- tensor2tensor/models/transformer_moe.py | 267 ++++------------------- tensor2tensor/utils/expert_utils.py | 9 +- 3 files changed, 308 insertions(+), 223 deletions(-) diff --git a/tensor2tensor/layers/common_attention.py b/tensor2tensor/layers/common_attention.py index cf7ef9115..06d7e8362 100644 --- a/tensor2tensor/layers/common_attention.py +++ b/tensor2tensor/layers/common_attention.py @@ -45,6 +45,229 @@ _expert_count = 0 +def get_standadized_layers(hparams, dp=None, ps_devices=None): + """Get the common attention and feed-forward layers. + + The returned layer functions will have the following signature: + + y, extra_loss = fct(x) + + extra_loss is set to 0.0 if the layer doesn't have extra loss. + If dp is provided, the layers will be distributed within the devices. + If moe wants to be used, both dp and model need to be set. + + Args: + hparams (tf.HParams): the model hparameters + dp (expert_utils.Parallelism): A data paralelism object. If not given, + the dp calls are simply ignored. + ps_devices: a reference to model._ps_device (only used by the moe layer) + + Returns: + dict[str:fct]: A dictionary containing the standardized functions + """ + + def partial(fct, *args, **kwargs): + """Same as functools.partial but with functools.wraps.""" + return functools.wraps(fct)(functools.partial(fct, *args, **kwargs)) + + def register_layer( + fct, + default_args=None, + default_kwargs=None, + use_dp=True, + ): + """Turn a function into its standardized version. + + Args: + fct (fct): The function to register + default_args (list): The default parameters to add to the function. + default_kwargs (dict): The default parameters to add to the function. + Those arguments can be overwriten when calling the function. + use_dp (bool): Wrap the function call within a dataparalellism object if + dp is available. Some layers (like moe) must be called without dp. + + Returns: + fct: the standardized layer function. + """ + # The kwargs given when calling the function overwrite the default ones + fct = partial(fct, *(default_args or []), **(default_kwargs or {})) + + @functools.wraps(fct) + def decorator(x, *args, **kwargs): + """Call the layer function.""" + # Eventually use dp (if given and not MoE) + if use_dp and dp is not None: + y = dp(fct, x, *args, **kwargs) + else: + y = fct(x, *args, **kwargs) + + # Eventually capture the extra loss + extra_loss = 0.0 + if isinstance(y, tuple): + y, extra_loss = y + + return y, extra_loss + return decorator + + total_key_depth = hparams.attention_key_channels or hparams.hidden_size + total_value_depth = hparams.attention_value_channels or hparams.hidden_size + is_train = hparams.mode == tf.estimator.ModeKeys.TRAIN + + moe_hidden_sizes = [int(s) for s in hparams.moe_hidden_sizes.split(",")] + # Use filter size if moe_hidden_sizes was not given + if not moe_hidden_sizes: + moe_hidden_sizes = [hparams.filter_size] + expert_fn = expert_utils.ffn_expert_fn( + hparams.hidden_size, moe_hidden_sizes, hparams.hidden_size) + + # Attention layers: + + # === Multi-head full attention layer === + multihead_attention_fn = register_layer( + multihead_attention, + default_kwargs=dict( + memory_antecedent=None, # Self-attention by default + bias=None, + total_key_depth=total_key_depth, + total_value_depth=total_value_depth, + output_depth=hparams.hidden_size, + num_heads=hparams.num_heads, + dropout_rate=hparams.attention_dropout, + ) + ) + + # === Local attention layer === + # Reuse same parameters as multihead_attention + # Only works for self attention. Always mask the future. + local_attention_fn = partial( + multihead_attention_fn, + block_length=hparams.attention_loc_block_length, + attention_type="local_mask_right", + ) + + # === Memory-compressed multihead self attention layer === + # Only works for self attention. Always mask the future. + compressed_attention_fn = register_layer( + multihead_self_attention_reduced, + default_kwargs=dict( + factor=hparams.attention_red_factor, + nonlinearity=hparams.attention_red_nonlinearity, + reduction_type=hparams.attention_red_type, + multihead_params=dict( + total_key_depth=total_key_depth, + total_value_depth=total_value_depth, + num_heads=hparams.num_heads, + dropout_rate=hparams.attention_dropout, + ), + ), + ) + + # Feed-forwards layers: + + # === Mixture of expert layer === + distributed_moe = register_layer( + expert_utils.distributed_moe, + default_args=[ + dp, + ps_devices, + ], + default_kwargs=dict( + train=is_train, + input_size=hparams.hidden_size, + expert_fn=expert_fn, + num_experts=hparams.moe_num_experts, + k=hparams.moe_k, + loss_coef=hparams.moe_loss_coef, + ), + use_dp=False, + ) + + # === FC layer === + conv_hidden_relu = register_layer( + common_layers.conv_hidden_relu, + default_kwargs=dict( + hidden_size=hparams.filter_size, + output_size=hparams.hidden_size, + dropout=hparams.relu_dropout, + ), + ) + + # === Separable convolution layer === + # No mask applied + sep_conv_relu = partial( + conv_hidden_relu, + padding="SAME", + # Parameters copied from the transformer model, could add hparams + kernel_size=(3, 1), + second_kernel_size=(31, 1), + ) + + # === Separable convolution layer (masked version) === + # Mask the future + sep_conv_relu_masked = partial( + sep_conv_relu, + padding="LEFT", # Mask future for decoder + ) + + # Define all available layers + + layers = dict( + a=multihead_attention_fn, # Multihead full attention + loc=local_attention_fn, # Local attention + red=compressed_attention_fn, # Memory-compressed attention + mem=None, # Memory efficient + fc=conv_hidden_relu, + sep=sep_conv_relu, # Fully connected + sepm=sep_conv_relu_masked, # masked separable convolution + moe=distributed_moe, # Mixture of expert layer + ) + return layers + + +def add_standard_attention_hparams(hparams): + """Adds the hparams used by get_standadized_layers.""" + # All hyperparameters ending in "dropout" are automatically set to 0.0 + # when not in training mode. + + # hparams used and which should have been defined outside (in + # common_hparams): + # Global flags + # hparams.mode + # hparams.hidden_size + # Pre-post processing flags + # hparams.layer_preprocess_sequence + # hparams.layer_postprocess_sequence + # hparams.layer_prepostprocess_dropout + # hparams.norm_type + # hparams.norm_epsilon + # Mixture-of-Expert flags + # hparams.moe_hidden_sizes + # hparams.moe_num_experts + # hparams.moe_k + # hparams.moe_loss_coef + + # Attention layers flags + hparams.add_hparam("num_heads", 8) + hparams.add_hparam("attention_key_channels", 0) + hparams.add_hparam("attention_value_channels", 0) + hparams.add_hparam("attention_dropout", 0.0) + # Attention: Local + hparams.add_hparam("attention_loc_block_length", 256) + # Attention: Memory-compressed + hparams.add_hparam("attention_red_factor", 3) + hparams.add_hparam("attention_red_type", "conv") + hparams.add_hparam("attention_red_nonlinearity", "none") + + # Fully connected layers flags + # To be more concistent, should use filter_size to also controle the moe + # size if moe_hidden_sizes not set + hparams.add_hparam("filter_size", 2048) + hparams.add_hparam("relu_dropout", 0.0) + + return hparams + + +@expert_utils.add_name_scope() def get_timing_signal_1d( length, channels, min_timescale=1.0, max_timescale=1.0e4): """Gets a bunch of sinusoids of different frequencies. @@ -90,6 +313,7 @@ def get_timing_signal_1d( return signal +@expert_utils.add_name_scope() def add_timing_signal_1d(x, min_timescale=1.0, max_timescale=1.0e4): """Adds a bunch of sinusoids of different frequencies to a Tensor. @@ -124,6 +348,7 @@ def add_timing_signal_1d(x, min_timescale=1.0, max_timescale=1.0e4): return x + signal +@expert_utils.add_name_scope() def add_timing_signal_1d_given_position(x, position, min_timescale=1.0, max_timescale=1.0e4): """Adds sinusoids of diff frequencies to a Tensor, with timing position given. @@ -151,6 +376,7 @@ def add_timing_signal_1d_given_position(x, position, min_timescale=1.0, return x + signal +@expert_utils.add_name_scope() def add_timing_signal_nd(x, min_timescale=1.0, max_timescale=1.0e4): """Adds a bunch of sinusoids of different frequencies to a Tensor. @@ -208,6 +434,7 @@ def add_timing_signal_nd(x, min_timescale=1.0, max_timescale=1.0e4): return x +@expert_utils.add_name_scope() def add_positional_embedding_nd(x, max_length, name): """Add n-dimensional positional embedding. @@ -325,6 +552,7 @@ def get_gates(self, x): return x +@expert_utils.add_name_scope() def embedding_to_padding(emb): """Calculates the padding mask based on which embeddings are all zero. @@ -339,6 +567,7 @@ def embedding_to_padding(emb): return tf.to_float(tf.equal(emb_sum, 0.0)) +@expert_utils.add_name_scope() def attention_bias_local(length, max_backward, max_forward): """Create an bias tensor to be added to attention logits. @@ -363,6 +592,7 @@ def attention_bias_local(length, max_backward, max_forward): return tf.reshape(ret, [1, 1, length, length]) +@expert_utils.add_name_scope() def attention_bias_lower_triangle(length): """Create an bias tensor to be added to attention logits. @@ -377,6 +607,7 @@ def attention_bias_lower_triangle(length): return attention_bias_local(length, -1, 0) +@expert_utils.add_name_scope() def attention_bias_ignore_padding(memory_padding): """Create an bias tensor to be added to attention logits. @@ -390,6 +621,7 @@ def attention_bias_ignore_padding(memory_padding): return tf.expand_dims(tf.expand_dims(ret, axis=1), axis=1) +@expert_utils.add_name_scope() def attention_bias_to_padding(attention_bias): """Inverse of attention_bias_ignore_padding(). @@ -406,6 +638,7 @@ def attention_bias_to_padding(attention_bias): return tf.squeeze(tf.to_float(tf.less(attention_bias, -1)), axis=[1, 2]) +@expert_utils.add_name_scope() def attention_bias_prepend_inputs_full_attention(padding): """Create a bias tensor for prepend_mode="prepend_inputs_full_attention". @@ -439,6 +672,7 @@ def attention_bias_prepend_inputs_full_attention(padding): return bias +@expert_utils.add_name_scope() def attention_bias_proximal(length): """Bias for self-attention to encourage attention to close positions. @@ -507,6 +741,7 @@ def to_float(bc): ) +@expert_utils.add_name_scope() def split_last_dimension(x, n): """Reshape x so that the last dimension becomes two dimensions. @@ -527,6 +762,7 @@ def split_last_dimension(x, n): return ret +@expert_utils.add_name_scope() def combine_last_two_dimensions(x): """Reshape x so that the last two dimension become one. @@ -544,6 +780,7 @@ def combine_last_two_dimensions(x): return ret +@expert_utils.add_name_scope() def combine_first_two_dimensions(x): """Reshape x so that the first two dimension become one. @@ -561,6 +798,7 @@ def combine_first_two_dimensions(x): return ret +@expert_utils.add_name_scope() def split_heads(x, num_heads): """Split channels (dimension 3) into multiple heads (becomes dimension 1). @@ -574,6 +812,7 @@ def split_heads(x, num_heads): return tf.transpose(split_last_dimension(x, num_heads), [0, 2, 1, 3]) +@expert_utils.add_name_scope() def split_heads_2d(x, num_heads): """Split channels (dimension 4) into multiple heads (becomes dimension 1). @@ -587,6 +826,7 @@ def split_heads_2d(x, num_heads): return tf.transpose(split_last_dimension(x, num_heads), [0, 3, 1, 2, 4]) +@expert_utils.add_name_scope() def combine_heads(x): """Inverse of split_heads. @@ -599,6 +839,7 @@ def combine_heads(x): return combine_last_two_dimensions(tf.transpose(x, [0, 2, 1, 3])) +@expert_utils.add_name_scope() def combine_heads_2d(x): """Inverse of split_heads_2d. @@ -2959,8 +3200,10 @@ def pad_and_reshape(x): @expert_utils.add_var_scope() def multihead_self_attention_reduced( x, - factor, - multihead_params, + memory_antecedent=None, + bias=None, + factor=None, + multihead_params=None, nonlinearity="none", reduction_type="conv", ): @@ -2968,6 +3211,8 @@ def multihead_self_attention_reduced( Args: x (tf.Tensor): float32 of shape [batch, length, depth] + memory_antecedent (tf.Tensor): Unsuported for now + bias (tf.Tensor): Ignored factor (int): compression factor for the memory sequence multihead_params (dict): parameters for multihead attention nonlinearity (str): Add some non-linearity after the memory block @@ -2979,6 +3224,12 @@ def multihead_self_attention_reduced( Raises: ValueError: If reduction_type or nonlinearity is invalid """ + if not factor or not multihead_params: + raise ValueError("factor and multihead_params should be set") + if memory_antecedent is not None: + raise NotImplementedError( + "multihead_self_attention_reduced only works with self-attention") + depth = x.get_shape().as_list()[-1] # Could try to have some overlapp between the blocks but that would diff --git a/tensor2tensor/models/transformer_moe.py b/tensor2tensor/models/transformer_moe.py index 285886fa5..2f71f62bf 100644 --- a/tensor2tensor/models/transformer_moe.py +++ b/tensor2tensor/models/transformer_moe.py @@ -21,8 +21,6 @@ from __future__ import division from __future__ import print_function -import functools - # Dependency imports from tensor2tensor.layers import common_attention @@ -57,13 +55,6 @@ SEP_FF = "-" -def partial(fct, *args, **kwargs): - """Wrapper around functools.partial for Python 2 compatibility with wraps.""" - new_fct = functools.partial(fct, *args, **kwargs) - new_fct = functools.wraps(fct)(new_fct) - return new_fct - - @registry.register_model class TransformerMoe(t2t_model.T2TModel): """Attention net. See file docstring.""" @@ -98,183 +89,20 @@ def dp_postprocess(x, y): 1.0 - hparams.layer_prepostprocess_dropout) decoder_input = dp(tf.nn.dropout, decoder_input, 1.0 - hparams.layer_prepostprocess_dropout) - cache = dict(extra_loss=0) - moe_hidden_sizes = [int(s) for s in hparams.moe_hidden_sizes.split(",")] - expert_fn = expert_utils.ffn_expert_fn( - hparams.hidden_size, moe_hidden_sizes, hparams.hidden_size) - # ========= Define some utils decorators ========= + cache = dict(extra_loss=0.0) def prepostprocess(fct): - """Add pre and post processing.""" - # WARNING: Should be applied after dp (pre/post-process use dp and - # can be applied to function which doesn't use dp) - @functools.wraps(fct) + """Apply processing and capture the extra loss.""" + @expert_utils.add_var_scope() def decorated(x, *args, **kwargs): x = dp_preprocess(x) - y = fct(x, *args, **kwargs) + y, loss = fct(x, *args, **kwargs) + cache["extra_loss"] += loss return dp_postprocess(x, y) return decorated - def dp_wrapper(fct): - """Encapsulate the function in a data parallelism object.""" - @functools.wraps(fct) - def decorated(*args, **kwargs): - return dp(fct, *args, **kwargs) - return decorated - - def add_kwargs( - fct, - enco_kwargs=None, - deco_kwargs=None, - endeco_kwargs=None, # Enco-deco attention: overwrite deco_kwargs - ): - """Allow to have different arguments for the encoder and decoder.""" - # WARNING: If this decorator is applied before dp_wrapper, the kwargs - # may not be correctly dipatched across the devices. - @functools.wraps(fct) - def decorated(*args, **kwargs): - current_scope = tf.contrib.framework.get_name_scope() - if "/encoder/" in current_scope: - kwargs.update(enco_kwargs or {}) - elif "/decoder/" in current_scope: - kwargs.update(deco_kwargs or {}) - if "/att_ende_" in current_scope: - kwargs.update(endeco_kwargs or {}) - return fct(*args, **kwargs) - return decorated - - def capture_extra_loss(fct, loss_coef=1.0): - """Capture the additional loss.""" - @functools.wraps(fct) - def decorated(*args, **kwargs): - y, loss = fct(*args, **kwargs) - cache["extra_loss"] += loss * loss_coef - return y - return decorated - - def remove_kwargs(fct, extra_params): - """Remove some unused parameters.""" - @functools.wraps(fct) - def decorated(*args, **kwargs): - for k in extra_params: # Remove the extra params - kwargs.pop(k, None) - return fct(*args, **kwargs) - return decorated - - # def pad_remover(fct): - # """Remove/restore the padding on the input.""" - # @functools.wraps(fct) - # def decorated(x, *args, **kwargs): - # x = pad_remover.remove(x) - # x = fct(x, *args, **kwargs) - # x = pad_remover.restore(x) - # return x - # return decorated - - # ========= Define the available layers ========= - total_key_depth = hparams.attention_key_channels or hparams.hidden_size - total_value_depth = hparams.attention_value_channels or hparams.hidden_size - - # Multi-head full attention layer - multihead_attention = partial( - common_attention.multihead_attention, - total_key_depth=total_key_depth, - total_value_depth=total_value_depth, - output_depth=hparams.hidden_size, - num_heads=hparams.num_heads, - dropout_rate=hparams.attention_dropout, - ) - multihead_attention = dp_wrapper(multihead_attention) - multihead_attention = add_kwargs( # After dp to correctly dispatch kwargs - multihead_attention, - enco_kwargs={"bias": encoder_self_attention_bias}, - deco_kwargs={"bias": decoder_self_attention_bias}, - endeco_kwargs={"bias": encoder_decoder_attention_bias}, - ) - multihead_attention = prepostprocess(multihead_attention) - - # Local attention layer - # Reuse same parameters as multihead_attention (dp and pre/post-processing - # already applied) - # Only works for self attention. Always mask the future. - local_attention = partial( - multihead_attention, - block_length=hparams.attention_loc_block_length, - attention_type="local_mask_right", - ) - - # Memory-compressed multihead self attention layer - # Only works for self attention. Always mask the future. - compressed_attention = partial( - common_attention.multihead_self_attention_reduced, - factor=hparams.attention_red_factor, - nonlinearity=hparams.attention_red_nonlinearity, - reduction_type=hparams.attention_red_type, - multihead_params=dict( - total_key_depth=total_key_depth, - total_value_depth=total_value_depth, - num_heads=hparams.num_heads, - dropout_rate=hparams.attention_dropout, - ) - ) - compressed_attention = remove_kwargs( - compressed_attention, ["memory_antecedent"]) - compressed_attention = dp_wrapper(compressed_attention) - compressed_attention = prepostprocess(compressed_attention) - - # Mixture of expert layer - distributed_moe = partial( - expert_utils.distributed_moe, - dp, - self._ps_devices, - train=hparams.mode == tf.estimator.ModeKeys.TRAIN, - input_size=hparams.hidden_size, - expert_fn=expert_fn, - num_experts=hparams.moe_num_experts, - k=hparams.moe_k, - loss_coef=hparams.moe_loss_coef - ) - distributed_moe = capture_extra_loss(distributed_moe) - distributed_moe = prepostprocess(distributed_moe) - - # FC layer - conv_hidden_relu = partial( - common_layers.conv_hidden_relu, - hidden_size=hparams.filter_size, - output_size=hparams.hidden_size, - dropout=hparams.relu_dropout, - ) - conv_hidden_relu = dp_wrapper(conv_hidden_relu) - conv_hidden_relu = prepostprocess(conv_hidden_relu) - - # Separable convolution layer - # Reuse conv_hidden_relu (dp and pre/post-processing already applied) - # Mask the future for the decoder only - sep_conv_relu = partial( - conv_hidden_relu, - # Parameters copied from the transformer model, could add hparams - kernel_size=(3, 1), - second_kernel_size=(31, 1), - ) - sep_conv_relu = add_kwargs( - sep_conv_relu, - enco_kwargs={"padding": "SAME"}, - deco_kwargs={"padding": "LEFT"}, # Mask future for decoder - ) - - # This dictionary contains the list of all available layers - available_layers = dict( - # Attention layers - a=multihead_attention, # Standard multihead full attention - loc=local_attention, # Local attention - red=compressed_attention, # Memory-compressed attention - mem=None, # Memory efficient - # Feed-forward layers - moe=distributed_moe, # Mixture of expert layer - sep=sep_conv_relu, # Separable convolution - fc=conv_hidden_relu, # Fully connected - ) + # ========= Compute the transformer architecture ========= def extract_layer_types(layer_types): """Parse the layer string. @@ -333,13 +161,21 @@ def extract_layer_types(layer_types): encoder_layers, decoder_layers = extract_layer_types(hparams.layer_types) - # Display the encoder-decoder architecture - def print_layer(name, layers): - tf.logging.info("{} architecture:".format(name)) - for i, l in enumerate(layers): - tf.logging.info(" * Layer {}: {}".format(i, " - ".join(l))) - print_layer("Encoder", encoder_layers) - print_layer("Decoder", decoder_layers) + layers = common_attention.get_standadized_layers( + hparams=hparams, + dp=dp, + ps_devices=self._ps_devices, + ) + + if hparams.mode == tf.estimator.ModeKeys.TRAIN: + + # Display the encoder-decoder architecture + def print_layer(name, layers): + tf.logging.info("{} architecture:".format(name)) + for i, l in enumerate(layers): + tf.logging.info(" * Layer {}: {}".format(i, " - ".join(l))) + print_layer("Encoder", encoder_layers) + print_layer("Decoder", decoder_layers) encoder_outputs = [] @@ -351,13 +187,15 @@ def print_layer(name, layers): # * feed-forward block att_type, ff_type = block_types with tf.variable_scope("layer_{}".format(layer_num)): - with tf.variable_scope("att_{}".format(att_type)): - x = available_layers[att_type]( - x, - memory_antecedent=None, - ) - with tf.variable_scope("ff_{}".format(ff_type)): - x = available_layers[ff_type](x) + x = prepostprocess(layers[att_type])( + x, + bias=encoder_self_attention_bias, + name="att_{}".format(att_type), + ) + x = prepostprocess(layers[ff_type])( + x, + name="ff_{}".format(ff_type) + ) encoder_outputs.append(x) if encoder_outputs: encoder_outputs[-1] = dp_preprocess(x) @@ -371,24 +209,28 @@ def print_layer(name, layers): # * feed-forward block self_att_type, att_ende_type, ff_type = block_types with tf.variable_scope("layer_{}".format(layer_num)): - with tf.variable_scope("self_att_{}".format(self_att_type)): - x = available_layers[self_att_type]( + x = prepostprocess(layers[self_att_type])( + x, + bias=decoder_self_attention_bias, + name="self_att_{}".format(self_att_type), + ) + # Only add the enco-deco attention layer if there is an encoder + if encoder_outputs: + x = prepostprocess(layers[att_ende_type])( x, - memory_antecedent=None, + memory_antecedent=encoder_outputs[-1], + bias=encoder_decoder_attention_bias, + name="att_ende_{}".format(att_ende_type), ) - with tf.variable_scope("att_ende_{}".format(att_ende_type)): - # Only add the enco-deco attention layer if there is an encoder - if encoder_outputs: - x = available_layers[att_ende_type]( - x, - memory_antecedent=encoder_outputs[-1], - ) - with tf.variable_scope("ff_{}".format(ff_type)): - x = available_layers[ff_type](x) + x = prepostprocess(layers[ff_type])( + x, + name="ff_{}".format(ff_type) + ) # If normalization is done in layer_preprocess, then it should also be # done on the output, since the output can grow very large, being the sum # of a whole stack of unnormalized layer outputs. x = dp_preprocess(x) + decoder_output = dp(tf.expand_dims, x, 2) return decoder_output, cache["extra_loss"] @@ -422,25 +264,12 @@ def transformer_moe_base(): hparams.layer_preprocess_sequence = "n" hparams.layer_postprocess_sequence = "da" - hparams.add_hparam("filter_size", 2048) # Add new ones like this. - # attention-related flags - hparams.add_hparam("num_heads", 8) - hparams.add_hparam("attention_key_channels", 0) - hparams.add_hparam("attention_value_channels", 0) - hparams.add_hparam("ffn_layer", "conv_hidden_relu") - # Other attention types params - hparams.add_hparam("attention_loc_block_length", 256) - hparams.add_hparam("attention_red_factor", 3) - hparams.add_hparam("attention_red_type", "conv") - hparams.add_hparam("attention_red_nonlinearity", "none") - # All hyperparameters ending in "dropout" are automatically set to 0.0 - # when not in training mode. - hparams.add_hparam("attention_dropout", 0.0) - hparams.add_hparam("relu_dropout", 0.0) hparams.add_hparam("pos", "timing") # timing, none hparams.add_hparam("nbr_decoder_problems", 1) hparams.add_hparam("proximity_bias", False) + hparams = common_attention.add_standard_attention_hparams(hparams) + # Decoder layers type. If set, num_decoder_layers parameter will be ignored # and the number of decoder layer will be deduced from the string # See top file comment for example of usage diff --git a/tensor2tensor/utils/expert_utils.py b/tensor2tensor/utils/expert_utils.py index 5005cdb50..7fc3d01f0 100644 --- a/tensor2tensor/utils/expert_utils.py +++ b/tensor2tensor/utils/expert_utils.py @@ -87,8 +87,13 @@ def decorated(*args, **kwargs): return decorator -add_var_scope = functools.partial(add_scope, scope_fn=tf.variable_scope) -add_name_scope = functools.partial(add_scope, scope_fn=tf.name_scope) + +def add_var_scope(scope=None): + return add_scope(scope, scope_fn=tf.variable_scope) + + +def add_name_scope(scope=None): + return add_scope(scope, scope_fn=tf.name_scope) class Parallelism(object): From f859e78d081787e42ccd265adb97d3c0a20344ad Mon Sep 17 00:00:00 2001 From: Katherine Lee <katherinelee@google.com> Date: Tue, 7 Nov 2017 13:28:03 -0800 Subject: [PATCH 0558/4095] Add image summary metric. PiperOrigin-RevId: 174903495 --- tensor2tensor/utils/metrics.py | 36 +++++++++++++++++++++++++++++----- 1 file changed, 31 insertions(+), 5 deletions(-) diff --git a/tensor2tensor/utils/metrics.py b/tensor2tensor/utils/metrics.py index b4d82d97d..ae28176a1 100644 --- a/tensor2tensor/utils/metrics.py +++ b/tensor2tensor/utils/metrics.py @@ -45,6 +45,7 @@ class Metrics(object): EDIT_DISTANCE = "edit_distance" SET_PRECISION = "set_precision" SET_RECALL = "set_recall" + IMAGE_SUMMARY = "image_summary" def padded_rmse(predictions, labels, weights_fn=common_layers.weights_all): @@ -239,6 +240,24 @@ def set_recall(predictions, return tf.to_float(tf.equal(labels, predictions)), weights +def image_summary(predictions, + hparams): + """Reshapes predictions and passes it to tensorboard. + + Args: + predictions : A Tensor of scores of shape [batch, nlabels]. + hparams: model_hparams + + Returns: + summary_proto: containing the summary image for predictions + weights: A Tensor of zeros of shape [batch, nlabels]. + """ + predictions_reshaped = tf.reshape( + predictions, [-1, hparams.height, hparams.width, hparams.colors]) + return tf.summary.image("image_summary", predictions_reshaped, + max_outputs=1), tf.zeros_like(predictions) + + def create_evaluation_metrics(problems, model_hparams): """Creates the evaluation metrics for the model. @@ -302,14 +321,20 @@ def wrapped_metric_fn(): else: weights_fn = common_layers.weights_nonzero + def image_wrapped_metric_fn(predictions, labels, + weights_fn=common_layers.weights_nonzero): + _, _ = labels, weights_fn + return metric_fn(predictions, model_hparams) + for metric in metrics: metric_fn = METRICS_FNS[metric] - problem_metric_fn = make_problem_specific_metric_fn( - metric_fn, problem_idx, weights_fn) - metric_name = "metrics-%s/%s" % (problem_name, metric) - - eval_metrics[metric_name] = problem_metric_fn + if "image" in metric: + eval_metrics[metric_name] = image_wrapped_metric_fn + else: + problem_metric_fn = make_problem_specific_metric_fn( + metric_fn, problem_idx, weights_fn) + eval_metrics[metric_name] = problem_metric_fn return eval_metrics @@ -333,4 +358,5 @@ def wrapped_metric_fn(): Metrics.EDIT_DISTANCE: sequence_edit_distance, Metrics.SET_PRECISION: set_precision, Metrics.SET_RECALL: set_recall, + Metrics.IMAGE_SUMMARY: image_summary, } From adff073e1e90be3addbacfc549cbf66e9f47bd2b Mon Sep 17 00:00:00 2001 From: Etienne Pot <epot@google.com> Date: Tue, 7 Nov 2017 13:45:49 -0800 Subject: [PATCH 0559/4095] Fix typo from previous commit PiperOrigin-RevId: 174905986 --- tensor2tensor/layers/common_attention.py | 4 ++-- tensor2tensor/models/transformer_moe.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tensor2tensor/layers/common_attention.py b/tensor2tensor/layers/common_attention.py index 06d7e8362..6f26d58da 100644 --- a/tensor2tensor/layers/common_attention.py +++ b/tensor2tensor/layers/common_attention.py @@ -45,7 +45,7 @@ _expert_count = 0 -def get_standadized_layers(hparams, dp=None, ps_devices=None): +def get_standardized_layers(hparams, dp=None, ps_devices=None): """Get the common attention and feed-forward layers. The returned layer functions will have the following signature: @@ -60,7 +60,7 @@ def get_standadized_layers(hparams, dp=None, ps_devices=None): hparams (tf.HParams): the model hparameters dp (expert_utils.Parallelism): A data paralelism object. If not given, the dp calls are simply ignored. - ps_devices: a reference to model._ps_device (only used by the moe layer) + ps_devices: a reference to model._ps_devices (only used by the moe layer) Returns: dict[str:fct]: A dictionary containing the standardized functions diff --git a/tensor2tensor/models/transformer_moe.py b/tensor2tensor/models/transformer_moe.py index 2f71f62bf..3b966a285 100644 --- a/tensor2tensor/models/transformer_moe.py +++ b/tensor2tensor/models/transformer_moe.py @@ -161,7 +161,7 @@ def extract_layer_types(layer_types): encoder_layers, decoder_layers = extract_layer_types(hparams.layer_types) - layers = common_attention.get_standadized_layers( + layers = common_attention.get_standardized_layers( hparams=hparams, dp=dp, ps_devices=self._ps_devices, From 11a8d33938fb386e3489e982c7d1ec16dd213711 Mon Sep 17 00:00:00 2001 From: Ashish Vaswani <avaswani@google.com> Date: Tue, 7 Nov 2017 14:28:55 -0800 Subject: [PATCH 0560/4095] Fixing an issue in masked 2d masked local attention where the corner dim was being shifted to the front. Now, each block has a right shift. Adding an imagenet 64^2 problem with AREA resizing. PiperOrigin-RevId: 174913235 --- tensor2tensor/data_generators/image.py | 32 +++ tensor2tensor/layers/common_attention.py | 183 +++++++++++++++--- tensor2tensor/layers/common_attention_test.py | 144 ++++++++++++++ 3 files changed, 334 insertions(+), 25 deletions(-) diff --git a/tensor2tensor/data_generators/image.py b/tensor2tensor/data_generators/image.py index 751e6df51..2a2b73962 100644 --- a/tensor2tensor/data_generators/image.py +++ b/tensor2tensor/data_generators/image.py @@ -385,6 +385,38 @@ def preprocess_example(self, example, mode, unused_hparams): return example +@registry.register_problem +class ImageImagenet64(Image2ClassProblem): + """Imagenet rescaled to 64x64.""" + + def dataset_filename(self): + return "image_imagenet" # Reuse Imagenet data. + + @property + def is_small(self): + return True # Modalities like for CIFAR. + + @property + def num_classes(self): + return 1000 + + def generate_data(self, data_dir, tmp_dir, task_id=-1): + # TODO(lukaszkaiser): find a better way than printing this. + print("To generate the ImageNet dataset in the proper format, follow " + "instructions at https://github.com/tensorflow/models/blob/master" + "/inception/README.md#getting-started") + + def preprocess_example(self, example, mode, unused_hparams): + inputs = example["inputs"] + # Just resize with area. + if self._was_reversed: + example["inputs"] = resize_by_area(inputs, 64) + else: + example = imagenet_preprocess_example(example, mode) + example["inputs"] = example["inputs"] = resize_by_area(inputs, 64) + return example + + @registry.register_problem class Img2imgImagenet(ImageProblem): """Imagenet rescaled to 8x8 for input and 32x32 for output.""" diff --git a/tensor2tensor/layers/common_attention.py b/tensor2tensor/layers/common_attention.py index 6f26d58da..41aec1d5d 100644 --- a/tensor2tensor/layers/common_attention.py +++ b/tensor2tensor/layers/common_attention.py @@ -1812,7 +1812,7 @@ def local_attention_2d(q, def pad_to_multiple_2d(x, block_shape): - """Making sure x is a multiple of shape.""" + """Making sure x is a multiple of shape. x is [batch, heads, h, w, depth].""" old_shape = x.get_shape().dims last = old_shape[-1] height_padding = -tf.shape(x)[2] % block_shape[0] @@ -1913,6 +1913,121 @@ def make_2d_block_raster_mask(query_shape, memory_flange): return 1. - final_mask +def get_memory_region(x, + query_block_shape, + memory_flange, + q_indices): + """Get the memory regions that surround a 2d query. + + The memory regions will be the left and top right. + + Args: + x: A tensor with shape [batch, heads, height, width, depth] + query_block_shape: a 2-d tuple of integers + memory_flange: a 2-d tuple of integers + q_indices: a tensor of indices for each of the center blocks. + [num_blocks, block_length] + Returns: + x_flange: A tensor of shape [batch, heads, #blocks, block_length, depth] + """ + # Padding x to be multiple of query_shape and then + # extracting the memory blocks from the same regions as the query blocks + x_query_padded = pad_to_multiple_2d(x, query_block_shape) + x_center = gather_blocks_2d(x_query_padded, q_indices) + # Then padding the flange region + paddings = [[0, 0], [0, 0], [memory_flange[0], 0], + [memory_flange[1], memory_flange[1]], [0, 0]] + x_memory_padded = tf.pad(x_query_padded, paddings) + left_x = None + top_x = None + # Extracting the memory regions around the query block. left_x_region extends + # to the left and the top_x_region is the combination of top left, top, and + # top right of the query block + # if no left region + if memory_flange[1] > 0: + left_x_region = x_memory_padded[:, :, memory_flange[0]:, + :-(query_block_shape[1]+memory_flange[1]), + :] + left_memory_shape = (query_block_shape[0], memory_flange[1]) + left_indices = gather_indices_2d(left_x_region, left_memory_shape, + query_block_shape) + left_x = gather_blocks_2d(left_x_region, left_indices) + # if no top region + if memory_flange[0] > 0: + top_x_region = x_memory_padded[:, :, :-query_block_shape[0], :, :] + + top_memory_shape = (memory_flange[0], + query_block_shape[1]+2*memory_flange[1]) + + top_indices = gather_indices_2d(top_x_region, top_memory_shape, + query_block_shape) + + top_x = gather_blocks_2d(top_x_region, top_indices) + x_flange = None + if top_x is not None and left_x is not None: + x_flange = tf.concat([top_x, left_x], axis=3) + else: + x_flange = top_x if top_x is not None else left_x + return x_flange, x_center + + +def get_shifted_center_blocks(x, indices): + """Get right shifted blocks for masked local attention 2d. + + Args: + x: A tensor with shape [batch, heads, height, width, depth] + indices: The indices to gather blocks + + Returns: + x_shifted: a tensor of extracted blocks, each block right shifted along + length. + """ + center_x = gather_blocks_2d(x, indices) + # Shift right along the length dimension + def shift_right_2d_blocks(x): + """Shift the second to last dimension of x right by one.""" + shifted_targets = ( + tf.pad(x, [[0, 0], [0, 0], [0, 0], [1, 0], [0, 0]])[:, :, :, :-1, :] + ) + return shifted_targets + x_shifted = shift_right_2d_blocks(center_x) + return x_shifted + + +def right_shift_blockwise(x, query_shape, name=None): + """Right shifts once in every block. + + Args: + x: a tensor of shape [batch, height, width, depth] + query_shape: A 2d tuple of ints + name: a string + + Returns: + output: a tensor of the same shape as x + """ + with tf.variable_scope( + name, default_name="right_shift_blockwise", values=[x]): + x_list_shape = x.get_shape().as_list() + x_shape = tf.shape(x) + # Add a dummy dimension for heads + x = tf.expand_dims(x, axis=1) + x = pad_to_multiple_2d(x, query_shape) + padded_x_shape = tf.shape(x) + # Setting up q blocks + x_indices = gather_indices_2d(x, query_shape, query_shape) + x_new = get_shifted_center_blocks(x, x_indices) + + # putting the representations back in the right place + output = scatter_blocks_2d(x_new, x_indices, padded_x_shape) + # Removing the dummy head dimension + output = tf.squeeze(output, axis=1) + # Remove the padding if introduced + output = tf.slice(output, [0, 0, 0, 0], + [-1, x_shape[1], x_shape[2], -1]) + output.set_shape(x_list_shape) + return output + + def masked_local_attention_2d(q, k, v, @@ -1921,6 +2036,13 @@ def masked_local_attention_2d(q, name=None): """strided block local self-attention. + Each position in a query block can attend to all the generated queries in + the query block, which are generated in raster scan, and positions that are + generated to the left and top. The shapes are specified by query shape and + memory flange. Note that if you're using this function, you do not need to + right shift. Right shifting happens inside this function separately for each + block. + Args: q: a Tensor with shape [batch, heads, h, w, depth_k] k: a Tensor with shape [batch, heads, h, w, depth_k] @@ -1942,34 +2064,45 @@ def masked_local_attention_2d(q, q = pad_to_multiple_2d(q, query_shape) padded_q_shape = tf.shape(q) - k = pad_to_multiple_2d(k, query_shape) - v = pad_to_multiple_2d(v, query_shape) - # Setting up k and v values. Padding top, left, and right - paddings = [[0, 0], [0, 0], [memory_flange[0], 0], - [memory_flange[1], memory_flange[1]], [0, 0]] - k = tf.pad(k, paddings) - v = tf.pad(v, paddings) # Setting up q blocks q_indices = gather_indices_2d(q, query_shape, query_shape) q_new = gather_blocks_2d(q, q_indices) # Setting up k and v blocks - memory_shape = (query_shape[0]+memory_flange[0], - query_shape[1]+memory_flange[1]*2) - k_and_v_indices = gather_indices_2d(k, memory_shape, query_shape) - k_new = gather_blocks_2d(k, k_and_v_indices) - v_new = gather_blocks_2d(v, k_and_v_indices) - # Combining the mask for padding and visible region - attention_mask_shape = [np.prod(query_shape), - (query_shape[0]+memory_flange[0])* - (query_shape[1]+2*memory_flange[1])] - attention_mask = tf.cast( - make_2d_block_raster_mask(query_shape, memory_flange), tf.bool) - # reshaping attention mask to have same dims as logits - attention_mask = tf.reshape(attention_mask, [1, 1, 1]+attention_mask_shape) - padding_mask = tf.expand_dims( - tf.cast(embedding_to_padding(k_new), tf.bool), axis=-2) - attention_bias = ( - tf.to_float(tf.logical_or(attention_mask, padding_mask)) *-1e9) + k_flange, k_center = get_memory_region(k, query_shape, memory_flange, + q_indices) + v_flange, v_center = get_memory_region(v, query_shape, memory_flange, + q_indices) + if k_flange is not None: + k_new = tf.concat([k_flange, k_center], axis=3) + v_new = tf.concat([v_flange, v_center], axis=3) + else: + k_new = k_center + v_new = v_center + # Getting the masks ready + query_elements = np.prod(query_shape) + padding_mask = None + if k_flange is not None: + padding_mask = tf.expand_dims( + embedding_to_padding(k_flange)*-1e9, axis=-2) + padding_mask = tf.tile(padding_mask, [1, 1, 1, query_elements, 1]) + + center_attention_bias = attention_bias_lower_triangle( + np.prod(query_elements)) + center_attention_bias = tf.reshape(center_attention_bias, + [1, 1, 1, query_elements, query_elements] + ) + v_center_shape = tf.shape(v_center) + center_attention_bias = tf.tile(center_attention_bias, + [v_center_shape[0], + v_center_shape[1], + v_center_shape[2], + 1, 1]) + if padding_mask is not None: + # Combining the mask for padding and visible region + attention_bias = tf.concat([padding_mask, center_attention_bias], axis=4) + else: + attention_bias = center_attention_bias + output = dot_product_attention(q_new, k_new, v_new, attention_bias, dropout_rate=0., name="masked_local_2d", make_image_summary=False) diff --git a/tensor2tensor/layers/common_attention_test.py b/tensor2tensor/layers/common_attention_test.py index 6f4a6a37c..d67ef6704 100644 --- a/tensor2tensor/layers/common_attention_test.py +++ b/tensor2tensor/layers/common_attention_test.py @@ -244,6 +244,150 @@ def test2dGather(self): self.assertAllEqual(correct_indices, x_indices) self.assertAllClose(correct_gathered_x, gathered_x) + def testGetMemoryRegion(self): + """Testing the function that gathers the flanged memory region.""" + np.set_printoptions(threshold=np.inf) + batch_size = 2 + num_heads = 2 + height = 4 + width = 6 + depth = 3 + query_shape = (2, 3) + memory_flange = (1, 1) + + x = np.random.rand(batch_size, num_heads, height, width, depth) + y = np.reshape(x, (batch_size, num_heads, -1, depth)) + zeros = np.zeros((depth), dtype=np.float32) + five_zeros = np.array([zeros]*5) + seven_zeros = np.array([zeros]*7) + two_zeros = np.array([zeros]*2) + zeros = np.array([zeros]) + + correct_x_flange = [[[seven_zeros, + np.concatenate((five_zeros, y[0, 0, [2, 8]]), + axis=0), + np.concatenate((zeros, y[0, 0, [6, 7, 8, 9]], + two_zeros), axis=0), + np.concatenate((y[0, 0, [8, 9, 10, 11]], zeros, + y[0, 0, [14, 20]]), axis=0)], + [seven_zeros, + np.concatenate((five_zeros, y[0, 1, [2, 8]]), + axis=0), + np.concatenate((zeros, y[0, 1, [6, 7, 8, 9]], + two_zeros), axis=0), + np.concatenate((y[0, 1, [8, 9, 10, 11]], zeros, + y[0, 1, [14, 20]]), axis=0)]], + [[seven_zeros, + np.concatenate((five_zeros, y[1, 0, [2, 8]]), + axis=0), + np.concatenate((zeros, y[1, 0, [6, 7, 8, 9]], + two_zeros), axis=0), + np.concatenate((y[1, 0, [8, 9, 10, 11]], zeros, + y[1, 0, [14, 20]]), axis=0)], + [seven_zeros, + np.concatenate((five_zeros, y[1, 1, [2, 8]]), + axis=0), + np.concatenate((zeros, y[1, 1, [6, 7, 8, 9]], + two_zeros), axis=0), + np.concatenate((y[1, 1, [8, 9, 10, 11]], zeros, + y[1, 1, [14, 20]]), axis=0)]]] + correct_x_flange = np.array(correct_x_flange) + correct_x_center = [[[y[0, 0, [0, 1, 2, 6, 7, 8]], + y[0, 0, [3, 4, 5, 9, 10, 11]], + y[0, 0, [12, 13, 14, 18, 19, 20]], + y[0, 0, [15, 16, 17, 21, 22, 23]]], + [y[0, 1, [0, 1, 2, 6, 7, 8]], + y[0, 1, [3, 4, 5, 9, 10, 11]], + y[0, 1, [12, 13, 14, 18, 19, 20]], + y[0, 1, [15, 16, 17, 21, 22, 23]]]], + [[y[1, 0, [0, 1, 2, 6, 7, 8]], + y[1, 0, [3, 4, 5, 9, 10, 11]], + y[1, 0, [12, 13, 14, 18, 19, 20]], + y[1, 0, [15, 16, 17, 21, 22, 23]]], + [y[1, 1, [0, 1, 2, 6, 7, 8]], + y[1, 1, [3, 4, 5, 9, 10, 11]], + y[1, 1, [12, 13, 14, 18, 19, 20]], + y[1, 1, [15, 16, 17, 21, 22, 23]]]]] + correct_x_center = np.array(correct_x_center) + with self.test_session() as session: + x_indices = common_attention.gather_indices_2d( + x, query_shape, query_shape) + x_flange, x_center = common_attention.get_memory_region( + tf.constant(x, dtype=tf.float32), + query_shape, + memory_flange, + x_indices) + session.run(tf.global_variables_initializer()) + [x_flange, x_center] = session.run([x_flange, x_center]) + self.assertAllClose(correct_x_flange, x_flange) + self.assertAllClose(correct_x_center, x_center) + + def testGetShiftedCenterBlocks(self): + """Testing the function that gathers the flanged memory region.""" + np.set_printoptions(threshold=np.inf) + batch_size = 2 + num_heads = 2 + height = 4 + width = 6 + depth = 3 + query_shape = (2, 3) + + x = np.random.rand(batch_size, num_heads, height, width, depth) + y = np.reshape(x, (batch_size, num_heads, -1, depth)) + zeros = np.zeros((depth), dtype=np.float32) + zeros = np.array([zeros]) + + correct_gathered_x = [[[np.concatenate((zeros, y[0, 0, [0, 1, 2, 6, 7]]), + axis=0), + np.concatenate((zeros, y[0, 0, [3, 4, 5, 9, 10]]), + axis=0), + np.concatenate((zeros, + y[0, 0, [12, 13, 14, 18, 19]]), + axis=0), + np.concatenate((zeros, + y[0, 0, [15, 16, 17, 21, 22]]), + axis=0)], + [np.concatenate((zeros, y[0, 1, [0, 1, 2, 6, 7]]), + axis=0), + np.concatenate((zeros, y[0, 1, [3, 4, 5, 9, 10]]), + axis=0), + np.concatenate((zeros, + y[0, 1, [12, 13, 14, 18, 19]]), + axis=0), + np.concatenate((zeros, + y[0, 1, [15, 16, 17, 21, 22]]), + axis=0)]], + [[np.concatenate((zeros, y[1, 0, [0, 1, 2, 6, 7]]), + axis=0), + np.concatenate((zeros, y[1, 0, [3, 4, 5, 9, 10]]), + axis=0), + np.concatenate((zeros, + y[1, 0, [12, 13, 14, 18, 19]]), + axis=0), + np.concatenate((zeros, + y[1, 0, [15, 16, 17, 21, 22]]), + axis=0)], + [np.concatenate((zeros, y[1, 1, [0, 1, 2, 6, 7]]), + axis=0), + np.concatenate((zeros, y[1, 1, [3, 4, 5, 9, 10]]), + axis=0), + np.concatenate((zeros, + y[1, 1, [12, 13, 14, 18, 19]]), + axis=0), + np.concatenate((zeros, + y[1, 1, [15, 16, 17, 21, 22]]), + axis=0)]]] + correct_gathered_x = np.array(correct_gathered_x) + with self.test_session() as session: + x_indices = common_attention.gather_indices_2d( + x, query_shape, query_shape) + gathered_x = common_attention.get_shifted_center_blocks( + tf.constant(x, dtype=tf.float32), + x_indices) + session.run(tf.global_variables_initializer()) + x_indices, gathered_x = session.run([x_indices, gathered_x]) + self.assertAllClose(correct_gathered_x, gathered_x) + def testDotProductAttentionRelative(self): x = np.random.rand(5, 7, 12, 32) y = np.random.rand(5, 7, 12, 32) From b0f580ce6a24d69d2c2fcd01f21c133a46d145d3 Mon Sep 17 00:00:00 2001 From: Noam Shazeer <noam@google.com> Date: Wed, 8 Nov 2017 11:04:22 -0800 Subject: [PATCH 0561/4095] Minor changes to decoding to enable reporting of full beams and scores: PiperOrigin-RevId: 175031915 --- tensor2tensor/layers/common_layers.py | 22 +++++++++++ tensor2tensor/models/transformer.py | 48 +++++++++++++++++------- tensor2tensor/models/transformer_test.py | 4 +- tensor2tensor/utils/beam_search.py | 6 ++- tensor2tensor/utils/decoding.py | 4 +- tensor2tensor/utils/t2t_model.py | 4 +- 6 files changed, 68 insertions(+), 20 deletions(-) diff --git a/tensor2tensor/layers/common_layers.py b/tensor2tensor/layers/common_layers.py index 63d486463..1390ca830 100644 --- a/tensor2tensor/layers/common_layers.py +++ b/tensor2tensor/layers/common_layers.py @@ -2133,3 +2133,25 @@ def shape_dim(x, dim): if dim < len(static) and static[dim] is not None: return static[dim] return tf.shape(x)[dim] + + +def sample_with_temperature(logits, temperature): + """Either argmax or random sampling. + + Args: + logits: a Tensor. + temperature: a float 0.0=argmax 1.0=random + + Returns: + a Tensor with one fewer dimension than logits. + """ + if temperature == 0.0: + return tf.argmax(logits, -1) + else: + assert temperature > 0.0 + reshaped_logits = ( + tf.reshape(logits, [-1, tf.shape(logits)[-1]])/temperature) + choices = tf.multinomial(reshaped_logits, 1) + choices = tf.reshape(choices, + tf.shape(logits)[:logits.get_shape().ndims - 1]) + return choices diff --git a/tensor2tensor/models/transformer.py b/tensor2tensor/models/transformer.py index c36a1c89b..14d5cc80b 100644 --- a/tensor2tensor/models/transformer.py +++ b/tensor2tensor/models/transformer.py @@ -160,7 +160,8 @@ def _greedy_infer(self, features, decode_length, last_position_only=True): ValueError: If last_position_only if False NotImplementedError: If there are multiple data shards. """ - decoded_ids = self._fast_decode(features, decode_length, last_position_only) + decoded_ids, _ = self._fast_decode( + features, decode_length, last_position_only) return decoded_ids, None, None def _beam_decode(self, features, decode_length, beam_size, top_beams, @@ -179,8 +180,10 @@ def _beam_decode(self, features, decode_length, beam_size, top_beams, Returns: samples: an integer `Tensor`. Top samples from the beam search """ - return self._fast_decode(features, decode_length, last_position_only, - beam_size, top_beams, alpha) + decoded_ids, scores = self._fast_decode( + features, decode_length, last_position_only, beam_size, top_beams, + alpha) + return {"outputs": decoded_ids, "scores": scores} def _fast_decode(self, features, @@ -327,14 +330,9 @@ def symbols_to_logits_fn(ids, i, cache): self._hparams.problems[self._problem_idx].target_modality) vocab_size = target_modality.top_dimensionality initial_ids = tf.zeros([batch_size], dtype=tf.int32) - decoded_ids, _ = beam_search.beam_search( - symbols_to_logits_fn, - initial_ids, - beam_size, - decode_length, - vocab_size, - alpha, - states=cache) + decoded_ids, scores = beam_search.beam_search( + symbols_to_logits_fn, initial_ids, beam_size, decode_length, + vocab_size, alpha, states=cache, stop_early=(top_beams == 1)) if top_beams == 1: decoded_ids = decoded_ids[:, 0, 1:] @@ -344,11 +342,15 @@ def symbols_to_logits_fn(ids, i, cache): def inner_loop(i, next_id, decoded_ids, cache): logits, cache = symbols_to_logits_fn(next_id, i, cache) - next_id = tf.expand_dims(tf.argmax(logits, axis=-1), axis=1) + temperature = (0.0 if hparams.sampling_method == "argmax" + else hparams.sampling_temp) + next_id = tf.expand_dims( + common_layers.sample_with_temperature(logits, temperature), axis=1) decoded_ids = tf.concat([decoded_ids, next_id], axis=1) return i + 1, next_id, decoded_ids, cache decoded_ids = tf.zeros([batch_size, 0], dtype=tf.int64) + scores = None next_id = tf.zeros([batch_size, 1], dtype=tf.int64) _, _, decoded_ids, _ = tf.while_loop( # TODO(llion): Early stopping. @@ -362,7 +364,7 @@ def inner_loop(i, next_id, decoded_ids, cache): nest.map_structure(lambda t: tf.TensorShape(t.shape), cache), ]) - return decoded_ids + return decoded_ids, scores @registry.register_model @@ -1093,3 +1095,23 @@ def update_hparams_for_tpu(hparams): # Each example in the batch will be of (padded) length hparams.max_length hparams.max_length = 64 hparams.tpu_batch_size_per_shard = 16 + + +@registry.register_hparams +def transformer_clean(): + """No dropout, label smoothing, max_length.""" + hparams = transformer_base_v2() + hparams.label_smoothing = 0.0 + hparams.layer_prepostprocess_dropout = 0.0 + hparams.attention_dropout = 0.0 + hparams.relu_dropout = 0.0 + hparams.max_length = 0 + return hparams + + +@registry.register_hparams +def transformer_clean_big(): + hparams = transformer_clean() + hparams.hidden_size = 1024 + hparams.filter_size = 4096 + return hparams diff --git a/tensor2tensor/models/transformer_test.py b/tensor2tensor/models/transformer_test.py index 74f563fbb..6bdc3a44d 100644 --- a/tensor2tensor/models/transformer_test.py +++ b/tensor2tensor/models/transformer_test.py @@ -140,7 +140,7 @@ def testBeamVsFast(self): beam_size=4, top_beams=1, last_position_only=True, - alpha=1.0) + alpha=1.0)["outputs"] fast_result = model._beam_decode( features, @@ -148,7 +148,7 @@ def testBeamVsFast(self): beam_size=4, top_beams=1, last_position_only=True, - alpha=1.0) + alpha=1.0)["outputs"] with self.test_session(): beam_res = beam_result.eval() diff --git a/tensor2tensor/utils/beam_search.py b/tensor2tensor/utils/beam_search.py index c08416fb8..d2ed2f9dd 100644 --- a/tensor2tensor/utils/beam_search.py +++ b/tensor2tensor/utils/beam_search.py @@ -180,7 +180,8 @@ def beam_search(symbols_to_logits_fn, vocab_size, alpha, states=None, - eos_id=EOS_ID): + eos_id=EOS_ID, + stop_early=True): """Beam search with length penalties. Requires a function that can take the currently decoded sybmols and return @@ -216,6 +217,7 @@ def beam_search(symbols_to_logits_fn, alpha: alpha for length penalty. states: dict (possibly nested) of decoding states. eos_id: ID for end of sentence. + stop_early: a boolean - stop once best sequence is provably determined. Returns: Tuple of (decoded beams [batch_size, beam_size, decode_length] @@ -475,6 +477,8 @@ def _is_finished(i, unused_alive_seq, alive_log_probs, unused_finished_seq, Returns: Bool. """ + if not stop_early: + return tf.less(i, decode_length) max_length_penalty = tf.pow(((5. + tf.to_float(decode_length)) / 6.), alpha) # The best possible score of the most likley alive sequence lower_bound_alive_scores = alive_log_probs[:, 0] / max_length_penalty diff --git a/tensor2tensor/utils/decoding.py b/tensor2tensor/utils/decoding.py index 8aa3c0b71..104ffc114 100644 --- a/tensor2tensor/utils/decoding.py +++ b/tensor2tensor/utils/decoding.py @@ -329,9 +329,9 @@ def input_fn(): tf.logging.info("BEAM %d:" % k) beam_string = targets_vocab.decode(_save_until_eos(beam, is_image)) if scores is not None: - tf.logging.info("%s\tScore:%f" % (beam_string, scores[k])) + tf.logging.info("\"%s\"\tScore:%f" % (beam_string, scores[k])) else: - tf.logging.info(beam_string) + tf.logging.info("\"%s\"" % beam_string) else: if decode_hp.identity_output: tf.logging.info(" ".join(map(str, result["outputs"].flatten()))) diff --git a/tensor2tensor/utils/t2t_model.py b/tensor2tensor/utils/t2t_model.py index 07f4622d6..6e555df0c 100644 --- a/tensor2tensor/utils/t2t_model.py +++ b/tensor2tensor/utils/t2t_model.py @@ -308,14 +308,14 @@ def symbols_to_logits_fn(ids): decode_length += tf.shape(features["inputs"])[1] ids, scores = beam_search.beam_search(symbols_to_logits_fn, initial_ids, beam_size, decode_length, vocab_size, - alpha) + alpha, stop_early=(top_beams == 1)) # Set inputs back to the unexpanded inputs to not to confuse the Estimator! if self.has_input: features["inputs"] = inputs_old # Return `top_beams` decodings (also remove initial id from the beam search) - return_scores = False # TODO(lukaszkaiser): make it work multi-problem. + return_scores = True # TODO(lukaszkaiser): make it work multi-problem. if top_beams == 1: if return_scores: return {"outputs": ids[:, 0, 1:], "scores": scores} From 891d2bf015922b36f8f3b166b84c6b0e068a83e8 Mon Sep 17 00:00:00 2001 From: T2T Team <no-reply@google.com> Date: Wed, 8 Nov 2017 11:30:23 -0800 Subject: [PATCH 0562/4095] Minor docstring fixes PiperOrigin-RevId: 175036743 --- tensor2tensor/layers/common_attention.py | 40 +++++++++++++----------- 1 file changed, 21 insertions(+), 19 deletions(-) diff --git a/tensor2tensor/layers/common_attention.py b/tensor2tensor/layers/common_attention.py index 41aec1d5d..b840291d4 100644 --- a/tensor2tensor/layers/common_attention.py +++ b/tensor2tensor/layers/common_attention.py @@ -2242,7 +2242,7 @@ def multihead_attention(query_antecedent, Args: query_antecedent: a Tensor with shape [batch, length_q, channels] - memory_antecedent: a Tensor with shape [batch, length_m, channels] + memory_antecedent: a Tensor with shape [batch, length_m, channels] or None bias: bias Tensor (see attention_bias()) total_key_depth: an integer total_value_depth: an integer @@ -2251,31 +2251,33 @@ def multihead_attention(query_antecedent, dropout_rate: a floating point number max_relative_position: Maximum distance between inputs to generate unique relation embeddings for. Only relevant - when using dot_product_relative attention. + when using "dot_product_relative" attention. image_shapes: optional tuple of integer scalars. - see comments for attention_image_summary() - attention_type: a string, either "dot_product", "local_mask_right", - "local_unmasked" or any attention function with the - signature (q, k, v, **kwargs) + see comments for attention_image_summary() + attention_type: a string, either "dot_product", "dot_product_relative", + "local_mask_right", "local_unmasked", "masked_dilated_1d", + "unmasked_dilated_1d" or any attention function with the + signature (query, key, value, **kwargs) block_length: an integer - relevant for "local_mask_right" block_width: an integer - relevant for "local_unmasked" q_filter_width: An integer specifying how wide you want the query to be. kv_filter_width: An integer specifying how wide you want the keys and values - to be. + to be. q_padding: One of "VALID", "SAME" or "LEFT". Default is VALID: No padding. - kv_padding: One of "VALID", "SAME" or "LEFT". Default is VALID: No padding. - cache: dict, containing Tensors which are the results of previous - attentions, used for fast decoding. Expects the dict to contrain two - keys; 'k' and 'v', for the initial call the values for these keys should - be empty Tensors of the appropriate shape. - 'k' [batch_size, 0, key_channels] - 'v' [batch_size, 0, value_channels] + kv_padding: One of "VALID", "SAME" or "LEFT". Default is "VALID": + no padding. + cache: dict containing Tensors which are the results of previous + attentions, used for fast decoding. Expects the dict to contrain two + keys ('k' and 'v'), for the initial call the values for these keys + should be empty Tensors of the appropriate shape. + 'k' [batch_size, 0, key_channels] + 'v' [batch_size, 0, value_channels] gap_size: Integer option for dilated attention to indicate spacing between - memory blocks. + memory blocks. num_memory_blocks: Integer option to indicate how many memory blocks to look - at. + at. name: an optional string - **kwargs (dict): Params for the attention function + **kwargs (dict): Parameters for the attention function Caching: WARNING: For decoder self-attention, i.e. when memory_antecedent == None, @@ -2291,8 +2293,8 @@ def multihead_attention(query_antecedent, [batch_size, length_q, hidden_dim] unless the cache dict is provided in which case only the last memory position is calculated and the output shape is [batch_size, 1, hidden_dim] - Optionnaly return an additional loss parameters (ex: load balance loss for - the experts) returned by the attention_type function + Optionaly returns an additional loss parameters (ex: load balance loss for + the experts) returned by the attention_type function. Raises: ValueError: if the key depth or value depth are not divisible by the From e1bde97759d4d378239335bf8f3d65115d594de1 Mon Sep 17 00:00:00 2001 From: Lukasz Kaiser <lukaszkaiser@google.com> Date: Wed, 8 Nov 2017 12:20:15 -0800 Subject: [PATCH 0563/4095] Add CTC loss SymbolModality, correct get_bleu script, LSTM without inputs, small en-fr generator, play with adversarial transformer. PiperOrigin-RevId: 175045004 --- .../data_generators/translate_enfr.py | 4 +++ tensor2tensor/layers/modalities.py | 25 +++++++++++++++++++ tensor2tensor/models/lstm.py | 17 +++++++------ tensor2tensor/models/transformer_adv.py | 12 ++++++--- tensor2tensor/utils/data_reader.py | 2 +- tensor2tensor/utils/decoding.py | 3 +++ tensor2tensor/utils/get_ende_bleu.sh | 6 ++--- 7 files changed, 54 insertions(+), 15 deletions(-) diff --git a/tensor2tensor/data_generators/translate_enfr.py b/tensor2tensor/data_generators/translate_enfr.py index 8076d4792..b09fca90e 100644 --- a/tensor2tensor/data_generators/translate_enfr.py +++ b/tensor2tensor/data_generators/translate_enfr.py @@ -151,6 +151,10 @@ class TranslateEnfrWmtSmallCharacters(translate.TranslateProblem): def is_character_level(self): return True + @property + def use_small_dataset(self): + return True + @property def vocab_name(self): return "vocab.enfr" diff --git a/tensor2tensor/layers/modalities.py b/tensor2tensor/layers/modalities.py index df6f002cc..1adc955e4 100644 --- a/tensor2tensor/layers/modalities.py +++ b/tensor2tensor/layers/modalities.py @@ -135,6 +135,31 @@ def top(self, body_output, _): return logits +@registry.register_symbol_modality("ctc") +class CTCSymbolModality(SymbolModality): + """SymbolModality that uses CTC loss.""" + + def loss(self, logits, targets, weights_fn=common_layers.weights_nonzero): + """Compute the CTC loss.""" + with tf.name_scope("ctc_loss", [logits, targets]): + # For CTC we assume targets are 1d, [batch, length, 1, 1] here. + targets_shape = targets.get_shape().as_list() + assert len(targets_shape) == 4 + assert targets_shape[2] == 1 + assert targets_shape[3] == 1 + targets = tf.squeeze(targets, axis=[2, 3]) + logits = tf.squeeze(logits, axis=[2, 3]) + targets_mask = 1 - tf.to_int32(tf.equal(targets, 0)) + targets_lengths = tf.reduce_sum(targets_mask, axis=1) + sparse_targets = tf.keras.backend.ctc_label_dense_to_sparse( + targets, targets_lengths) + xent = tf.nn.ctc_loss( + sparse_targets, logits, targets_lengths, time_major=False, + preprocess_collapse_repeated=False, ctc_merge_repeated=False) + weights = weights_fn(targets) + return tf.reduce_sum(xent), tf.reduce_sum(weights) + + @registry.register_image_modality class SmallImageModality(modality.Modality): """Performs strided conv compressions for small image data.""" diff --git a/tensor2tensor/models/lstm.py b/tensor2tensor/models/lstm.py index 68d375c96..20fe931d0 100644 --- a/tensor2tensor/models/lstm.py +++ b/tensor2tensor/models/lstm.py @@ -98,11 +98,14 @@ def dropout_lstm_cell(): def lstm_seq2seq_internal(inputs, targets, hparams, train): """The basic LSTM seq2seq model, main step used for training.""" with tf.variable_scope("lstm_seq2seq"): - # Flatten inputs. - inputs = common_layers.flatten4d3d(inputs) - # LSTM encoder. - _, final_encoder_state = lstm( - tf.reverse(inputs, axis=[1]), hparams, train, "encoder") + if inputs is not None: + # Flatten inputs. + inputs = common_layers.flatten4d3d(inputs) + # LSTM encoder. + _, final_encoder_state = lstm( + tf.reverse(inputs, axis=[1]), hparams, train, "encoder") + else: + final_encoder_state = None # LSTM decoder. shifted_targets = common_layers.shift_right(targets) decoder_outputs, _ = lstm( @@ -138,7 +141,7 @@ def model_fn_body(self, features): if self._hparams.initializer == "orthogonal": raise ValueError("LSTM models fail with orthogonal initializer.") train = self._hparams.mode == tf.estimator.ModeKeys.TRAIN - return lstm_seq2seq_internal(features["inputs"], features["targets"], + return lstm_seq2seq_internal(features.get("inputs"), features["targets"], self._hparams, train) @@ -151,7 +154,7 @@ def model_fn_body(self, features): raise ValueError("LSTM models fail with orthogonal initializer.") train = self._hparams.mode == tf.estimator.ModeKeys.TRAIN return lstm_seq2seq_internal_attention( - features["inputs"], features["targets"], self._hparams, train) + features.get("inputs"), features["targets"], self._hparams, train) @registry.register_hparams diff --git a/tensor2tensor/models/transformer_adv.py b/tensor2tensor/models/transformer_adv.py index 3867944e5..9bbccb874 100644 --- a/tensor2tensor/models/transformer_adv.py +++ b/tensor2tensor/models/transformer_adv.py @@ -92,6 +92,10 @@ def adv_transformer_internal(inputs, targets, target_space, hparams): with tf.variable_scope("adv_transformer"): batch_size = tf.shape(targets)[0] targets = tf.reshape(targets, [batch_size, -1, 1]) + intermediate = tf.constant(34*1024 - 1) + intermediate += tf.zeros_like(targets) + targets = tf.concat([targets, intermediate], axis=2) + targets = tf.reshape(targets, [batch_size, -1, 1]) embedding = tf.get_variable("embedding", [34*1024, hparams.hidden_size]) targets_emb = tf.gather(embedding, targets) @@ -111,9 +115,10 @@ def adv_transformer_internal(inputs, targets, target_space, hparams): ed = None # Masking. - masking = common_layers.inverse_lin_decay(60000) - masking *= common_layers.inverse_exp_decay(20000) # Not much at start. + masking = common_layers.inverse_lin_decay(200000) + masking *= common_layers.inverse_exp_decay(50000) # Not much at start. masking -= tf.random_uniform([]) * 0.4 + masking = tf.minimum(tf.maximum(masking, 0.0), 1.0) mask = tf.less(masking, tf.random_uniform(tf.shape(targets))) mask = tf.expand_dims(tf.to_float(mask), 3) noise = tf.random_uniform(tf.shape(targets_emb)) @@ -125,7 +130,7 @@ def adv_transformer_internal(inputs, targets, target_space, hparams): res_emb = softmax_embed(res, embedding, batch_size, hparams) # Extra steps. - extra_step_prob = masking * 0.6 + extra_step_prob = masking * 0.6 + 0.3 if hparams.mode != tf.estimator.ModeKeys.TRAIN: extra_step_prob = 1.0 for _ in xrange(hparams.extra_steps): @@ -211,6 +216,7 @@ def transformer_adv_small(): hparams.label_smoothing = 0.0 hparams.weight_decay = 0.1 hparams.symbol_modality_skip_top = True + hparams.target_modality = "symbol:ctc" hparams.add_hparam("num_compress_steps", 2) hparams.add_hparam("extra_steps", 0) hparams.add_hparam("noise_val", 0.3) diff --git a/tensor2tensor/utils/data_reader.py b/tensor2tensor/utils/data_reader.py index 9ec147e3d..092aa5628 100644 --- a/tensor2tensor/utils/data_reader.py +++ b/tensor2tensor/utils/data_reader.py @@ -44,7 +44,7 @@ def feature_placeholders(data_fields, data_items_to_decoders): example = {} for field, config in data_fields.items(): if isinstance(config, tf.VarLenFeature): - shape = [None] + shape = [None, None] else: shape = config.shape diff --git a/tensor2tensor/utils/decoding.py b/tensor2tensor/utils/decoding.py index 104ffc114..d1dbd7610 100644 --- a/tensor2tensor/utils/decoding.py +++ b/tensor2tensor/utils/decoding.py @@ -513,6 +513,9 @@ def _get_sorted_inputs(filename, num_shards=1, delimiter="\n"): text = f.read() records = text.split(delimiter) inputs = [record.strip() for record in records] + # Strip the last empty line. + if not inputs[-1]: + inputs.pop() input_lens = [(i, len(line.split())) for i, line in enumerate(inputs)] sorted_input_lens = sorted(input_lens, key=operator.itemgetter(1)) # We'll need the keys to rearrange the inputs back into their original order diff --git a/tensor2tensor/utils/get_ende_bleu.sh b/tensor2tensor/utils/get_ende_bleu.sh index 3493af74c..0de433e33 100755 --- a/tensor2tensor/utils/get_ende_bleu.sh +++ b/tensor2tensor/utils/get_ende_bleu.sh @@ -12,10 +12,8 @@ perl $mosesdecoder/scripts/tokenizer/tokenizer.perl -l de < $decodes_file > $dec # See https://nlp.stanford.edu/projects/nmt/ : # 'Also, for historical reasons, we split compound words, e.g., # "rich-text format" --> rich ##AT##-##AT## text format."' -perl -ple 's{(\S)-(\S)}{$1 ##AT##-##AT## $2}g' < $tok_gold_targets > $tok_gold_t -argets.atat -perl -ple 's{(\S)-(\S)}{$1 ##AT##-##AT## $2}g' < $decodes_file.tok > $decodes -_file.atat +perl -ple 's{(\S)-(\S)}{$1 ##AT##-##AT## $2}g' < $tok_gold_targets > $tok_gold_targets.atat +perl -ple 's{(\S)-(\S)}{$1 ##AT##-##AT## $2}g' < $decodes_file.tok > $decodes_file.atat # Get BLEU. perl $mosesdecoder/scripts/generic/multi-bleu.perl $tok_gold_targets.atat < $decodes_file.tok.atat From 3a36280228c2b6c34b7d531a7ed00d3e3cd0792d Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Wed, 8 Nov 2017 14:31:47 -0800 Subject: [PATCH 0564/4095] Factor out optimization utilities into optimize.py PiperOrigin-RevId: 175065625 --- tensor2tensor/tpu/tpu_trainer_lib.py | 6 +- tensor2tensor/utils/model_builder.py | 126 ++--------------------- tensor2tensor/utils/optimize.py | 145 +++++++++++++++++++++++++++ 3 files changed, 156 insertions(+), 121 deletions(-) create mode 100644 tensor2tensor/utils/optimize.py diff --git a/tensor2tensor/tpu/tpu_trainer_lib.py b/tensor2tensor/tpu/tpu_trainer_lib.py index f0f66f4ed..dda35485f 100644 --- a/tensor2tensor/tpu/tpu_trainer_lib.py +++ b/tensor2tensor/tpu/tpu_trainer_lib.py @@ -26,7 +26,7 @@ from tensor2tensor.layers import common_layers from tensor2tensor.utils import data_reader from tensor2tensor.utils import metrics -from tensor2tensor.utils import model_builder +from tensor2tensor.utils import optimize from tensor2tensor.utils import registry from tensor2tensor.utils import trainer_utils @@ -188,10 +188,10 @@ def model_fn(features, labels, mode, params, config): assert mode == tf.estimator.ModeKeys.TRAIN # Learning rate - lr = hparams.learning_rate * model_builder.learning_rate_decay(hparams) + lr = hparams.learning_rate * optimize.learning_rate_decay(hparams) # Optimizer - opt = model_builder.ConditionalOptimizer(hparams.optimizer, lr, hparams) + opt = optimize.ConditionalOptimizer(hparams.optimizer, lr, hparams) if use_tpu: opt = tf.contrib.tpu.CrossShardOptimizer(opt) diff --git a/tensor2tensor/utils/model_builder.py b/tensor2tensor/utils/model_builder.py index ef362ed90..5619ada31 100644 --- a/tensor2tensor/utils/model_builder.py +++ b/tensor2tensor/utils/model_builder.py @@ -34,8 +34,8 @@ from tensor2tensor.utils import devices from tensor2tensor.utils import input_fn_builder from tensor2tensor.utils import metrics +from tensor2tensor.utils import optimize from tensor2tensor.utils import registry -from tensor2tensor.utils import yellowfin import tensorflow as tf from tensorflow.python.framework import dtypes @@ -173,8 +173,9 @@ def nth_model(n): outputs = model_output scores = None - batched_problem_choice = (features["problem_choice"] * tf.ones( - (tf.shape(features["inputs"])[0],), dtype=tf.int32)) + batched_problem_choice = ( + features["problem_choice"] * tf.ones( + (tf.shape(features["inputs"])[0],), dtype=tf.int32)) predictions = { "outputs": outputs, "scores": scores, @@ -214,7 +215,7 @@ def nth_model(n): assert mode == tf.estimator.ModeKeys.TRAIN # Set learning rate - learning_rate = hparams.learning_rate * learning_rate_decay( + learning_rate = hparams.learning_rate * optimize.learning_rate_decay( hparams, num_worker_replicas=worker_replicas, num_train_steps=train_steps) learning_rate /= math.sqrt(float(worker_replicas)) @@ -292,22 +293,7 @@ def nth_model(n): _log_variable_sizes(diet_vars, "Diet Variables") # Optimize - total_loss = tf.identity(total_loss, name="total_loss") - opt = ConditionalOptimizer(hparams.optimizer, learning_rate, hparams) - opt_summaries = ["learning_rate", "loss"] - if hparams.summarize_grads: - opt_summaries.extend(["gradients", "gradient_norm"]) - tf.logging.info("Computing gradients for global model_fn.") - train_op = tf.contrib.layers.optimize_loss( - name="training", - loss=total_loss, - global_step=global_step, - learning_rate=learning_rate, - clip_gradients=hparams.clip_grad_norm or None, - gradient_noise_scale=hparams.grad_noise_scale or None, - optimizer=opt, - summaries=opt_summaries, - colocate_gradients_with_ops=True) + train_op = optimize.optimize(total_loss, learning_rate, hparams) # Remove summaries that will fail to run because they are in conditionals. # TODO(cwhipkey): Test with this code removed, later in 2017. @@ -351,56 +337,6 @@ def wrapping_model_fn(features, labels, mode, params): return wrapping_model_fn -class ConditionalOptimizer(tf.train.Optimizer): - """Conditional optimizer.""" - - def __init__(self, optimizer_name, lr, hparams): - if optimizer_name == "Adam": - # We change the default epsilon for Adam and re-scale lr. - # Using LazyAdam as it's much faster for large vocabulary embeddings. - self._opt = tf.contrib.opt.LazyAdamOptimizer( - lr / 500.0, - beta1=hparams.optimizer_adam_beta1, - beta2=hparams.optimizer_adam_beta2, - epsilon=hparams.optimizer_adam_epsilon) - elif optimizer_name == "Momentum": - self._opt = tf.train.MomentumOptimizer( - lr, momentum=hparams.optimizer_momentum_momentum) - elif optimizer_name == "YellowFin": - tf.logging.info("Init YellowFin Optimizer.") - self._opt = yellowfin.YellowFinOptimizer( - learning_rate=lr, momentum=hparams.optimizer_momentum_momentum) - elif optimizer_name == "TrueAdam": - self._opt = tf.train.AdamOptimizer( - lr / 500.0, - beta1=hparams.optimizer_adam_beta1, - beta2=hparams.optimizer_adam_beta2, - epsilon=hparams.optimizer_adam_epsilon) - else: - self._opt = tf.contrib.layers.OPTIMIZER_CLS_NAMES[optimizer_name](lr) - - def compute_gradients(self, loss, var_list=None, **kwargs): - return self._opt.compute_gradients(loss, var_list, **kwargs) - - def apply_gradients(self, grads_and_vars, global_step=None, name=None): - return self._opt.apply_gradients( - grads_and_vars, global_step=global_step, name=name) - - -def _sqrt_decay(step): - """Decay like 1 / sqrt(step), multiplied by 500 to normalize.""" - return 500.0 / tf.sqrt(tf.maximum(step, 1.0)) - - -def _exp_decay_after(step, rate, from_which_step): - """Decay exponentially by rate (per step) starting at from_which_step.""" - return tf.cond( - step < from_which_step, - lambda: tf.constant(1.0), - lambda: rate**(step - from_which_step), - name="exponential_decay_step_cond") - - def _log_variable_sizes(var_list, tag): """Log the sizes and shapes of variables, and the total size. @@ -414,7 +350,8 @@ def _log_variable_sizes(var_list, tag): v = name_to_var[v_name] v_size = int(np.prod(np.array(v.shape.as_list()))) tf.logging.info("Weight %s\tshape %s\tsize %d", - v.name[:-2].ljust(80), str(v.shape).ljust(20), v_size) + v.name[:-2].ljust(80), + str(v.shape).ljust(20), v_size) total_size += v_size tf.logging.info("%s Total size: %d", tag, total_size) @@ -435,53 +372,6 @@ def _get_variable_initializer(hparams): raise ValueError("Unrecognized initializer: %s" % hparams.initializer) -def learning_rate_decay(hparams, num_worker_replicas=1, num_train_steps=1): - """Inverse-decay learning rate until warmup_steps, then decay.""" - warmup_steps = tf.to_float( - hparams.learning_rate_warmup_steps * num_worker_replicas) - step = tf.to_float(tf.train.get_or_create_global_step()) - if hparams.learning_rate_decay_scheme == "noam": - return 5000.0 * hparams.hidden_size**-0.5 * tf.minimum( - (step + 1) * warmup_steps**-1.5, (step + 1)**-0.5) - elif hparams.learning_rate_decay_scheme == "exp100k": - return 0.94**(step // 100000) - elif hparams.learning_rate_decay_scheme == "cosine": - cycle_steps = hparams.learning_rate_cosine_cycle_steps - return 0.5 * (1 + tf.cos(np.pi * (step % cycle_steps) / cycle_steps)) - elif hparams.learning_rate_decay_scheme == "cyclelinear10x": - # Cycle the rate linearly by 10x every warmup_steps, up and down. - cycle_steps = hparams.learning_rate_warmup_steps - cycle_position = step % (2 * cycle_steps) - cycle_position = tf.to_float( # Normalize to the interval [-1, 1]. - cycle_position - cycle_steps) / float(cycle_steps) - cycle_position = 1.0 - tf.abs(cycle_position) # 0 to 1 and back to 0. - return (cycle_position + 0.1) * 3.0 # 10x difference each cycle (0.3-3). - - inv_base = tf.exp(tf.log(0.01) / warmup_steps) - inv_decay = inv_base**(warmup_steps - step) - if hparams.learning_rate_decay_scheme == "sqrt": - decay = _sqrt_decay(step - warmup_steps) - elif hparams.learning_rate_decay_scheme == "exp10k": - decay = _exp_decay_after(step - warmup_steps, 0.9995, - num_train_steps - warmup_steps - 10000) - elif hparams.learning_rate_decay_scheme == "exp50k": - decay = _exp_decay_after(step - warmup_steps, 0.99995, - num_train_steps - warmup_steps - 50000) - elif hparams.learning_rate_decay_scheme == "exp500k": - decay = _exp_decay_after(step - warmup_steps, 0.9999955, - num_train_steps - warmup_steps - 500000) - elif hparams.learning_rate_decay_scheme == "none": - decay = tf.constant(1.0) - else: - raise ValueError("Unrecognized learning rate decay scheme: %s" % - hparams.learning_rate_decay_scheme) - return tf.cond( - step < warmup_steps, - lambda: inv_decay, - lambda: decay, - name="learning_rate_decay_warump_cond") - - def _del_dict_nones(d): for k in list(d.keys()): if d[k] is None: diff --git a/tensor2tensor/utils/optimize.py b/tensor2tensor/utils/optimize.py new file mode 100644 index 000000000..649ef4f28 --- /dev/null +++ b/tensor2tensor/utils/optimize.py @@ -0,0 +1,145 @@ +# coding=utf-8 +# Copyright 2017 The Tensor2Tensor Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Optimization.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +# Dependency imports + +import numpy as np + +from tensor2tensor.utils import yellowfin + +import tensorflow as tf + + + +def optimize(loss, learning_rate, hparams): + """Minimize loss.""" + loss = tf.identity(loss, name="total_loss") + opt = ConditionalOptimizer(hparams.optimizer, learning_rate, hparams) + opt_summaries = ["learning_rate", "loss"] + if hparams.summarize_grads: + opt_summaries.extend(["gradients", "gradient_norm"]) + train_op = tf.contrib.layers.optimize_loss( + name="training", + loss=loss, + global_step=tf.train.get_or_create_global_step(), + learning_rate=learning_rate, + clip_gradients=hparams.clip_grad_norm or None, + gradient_noise_scale=hparams.grad_noise_scale or None, + optimizer=opt, + summaries=opt_summaries, + colocate_gradients_with_ops=True) + return train_op + + +class ConditionalOptimizer(tf.train.Optimizer): + """Conditional optimizer.""" + + def __init__(self, optimizer_name, lr, hparams): + if optimizer_name == "Adam": + # We change the default epsilon for Adam and re-scale lr. + # Using LazyAdam as it's much faster for large vocabulary embeddings. + self._opt = tf.contrib.opt.LazyAdamOptimizer( + lr / 500.0, + beta1=hparams.optimizer_adam_beta1, + beta2=hparams.optimizer_adam_beta2, + epsilon=hparams.optimizer_adam_epsilon) + elif optimizer_name == "Momentum": + self._opt = tf.train.MomentumOptimizer( + lr, momentum=hparams.optimizer_momentum_momentum) + elif optimizer_name == "YellowFin": + tf.logging.info("Init YellowFin Optimizer.") + self._opt = yellowfin.YellowFinOptimizer( + learning_rate=lr, momentum=hparams.optimizer_momentum_momentum) + elif optimizer_name == "TrueAdam": + self._opt = tf.train.AdamOptimizer( + lr / 500.0, + beta1=hparams.optimizer_adam_beta1, + beta2=hparams.optimizer_adam_beta2, + epsilon=hparams.optimizer_adam_epsilon) + else: + self._opt = tf.contrib.layers.OPTIMIZER_CLS_NAMES[optimizer_name](lr) + + def compute_gradients(self, loss, var_list=None, **kwargs): + return self._opt.compute_gradients(loss, var_list, **kwargs) + + def apply_gradients(self, grads_and_vars, global_step=None, name=None): + return self._opt.apply_gradients( + grads_and_vars, global_step=global_step, name=name) + + +def _sqrt_decay(step): + """Decay like 1 / sqrt(step), multiplied by 500 to normalize.""" + return 500.0 / tf.sqrt(tf.maximum(step, 1.0)) + + +def _exp_decay_after(step, rate, from_which_step): + """Decay exponentially by rate (per step) starting at from_which_step.""" + return tf.cond( + step < from_which_step, + lambda: tf.constant(1.0), + lambda: rate**(step - from_which_step), + name="exponential_decay_step_cond") + + +def learning_rate_decay(hparams, num_worker_replicas=1, num_train_steps=1): + """Inverse-decay learning rate until warmup_steps, then decay.""" + warmup_steps = tf.to_float( + hparams.learning_rate_warmup_steps * num_worker_replicas) + step = tf.to_float(tf.train.get_or_create_global_step()) + if hparams.learning_rate_decay_scheme == "noam": + return 5000.0 * hparams.hidden_size**-0.5 * tf.minimum( + (step + 1) * warmup_steps**-1.5, (step + 1)**-0.5) + elif hparams.learning_rate_decay_scheme == "exp100k": + return 0.94**(step // 100000) + elif hparams.learning_rate_decay_scheme == "cosine": + cycle_steps = hparams.learning_rate_cosine_cycle_steps + return 0.5 * (1 + tf.cos(np.pi * (step % cycle_steps) / cycle_steps)) + elif hparams.learning_rate_decay_scheme == "cyclelinear10x": + # Cycle the rate linearly by 10x every warmup_steps, up and down. + cycle_steps = hparams.learning_rate_warmup_steps + cycle_position = step % (2 * cycle_steps) + cycle_position = tf.to_float( # Normalize to the interval [-1, 1]. + cycle_position - cycle_steps) / float(cycle_steps) + cycle_position = 1.0 - tf.abs(cycle_position) # 0 to 1 and back to 0. + return (cycle_position + 0.1) * 3.0 # 10x difference each cycle (0.3-3). + + inv_base = tf.exp(tf.log(0.01) / warmup_steps) + inv_decay = inv_base**(warmup_steps - step) + if hparams.learning_rate_decay_scheme == "sqrt": + decay = _sqrt_decay(step - warmup_steps) + elif hparams.learning_rate_decay_scheme == "exp10k": + decay = _exp_decay_after(step - warmup_steps, 0.9995, + num_train_steps - warmup_steps - 10000) + elif hparams.learning_rate_decay_scheme == "exp50k": + decay = _exp_decay_after(step - warmup_steps, 0.99995, + num_train_steps - warmup_steps - 50000) + elif hparams.learning_rate_decay_scheme == "exp500k": + decay = _exp_decay_after(step - warmup_steps, 0.9999955, + num_train_steps - warmup_steps - 500000) + elif hparams.learning_rate_decay_scheme == "none": + decay = tf.constant(1.0) + else: + raise ValueError("Unrecognized learning rate decay scheme: %s" % + hparams.learning_rate_decay_scheme) + return tf.cond( + step < warmup_steps, + lambda: inv_decay, + lambda: decay, + name="learning_rate_decay_warump_cond") From 1c1dbd5f3615491487288e5fa474ca7b6966c8d2 Mon Sep 17 00:00:00 2001 From: Katherine Lee <katherinelee@google.com> Date: Wed, 8 Nov 2017 16:48:21 -0800 Subject: [PATCH 0565/4095] Adding vanilla gan model and NoLossModality (does nothing and returns no loss). PiperOrigin-RevId: 175086757 --- tensor2tensor/layers/modalities.py | 19 ++++ tensor2tensor/models/__init__.py | 1 + tensor2tensor/models/vanilla_gan.py | 169 ++++++++++++++++++++++++++++ 3 files changed, 189 insertions(+) create mode 100644 tensor2tensor/models/vanilla_gan.py diff --git a/tensor2tensor/layers/modalities.py b/tensor2tensor/layers/modalities.py index 1adc955e4..7d9aca58e 100644 --- a/tensor2tensor/layers/modalities.py +++ b/tensor2tensor/layers/modalities.py @@ -567,3 +567,22 @@ def loss(self, top_out, targets, weights_fn=common_layers.weights_all): # (Since we're processing images and so have no padding and some pixel 0s.) return super(IdentityModalityNoPad, self).loss( top_out, targets, weights_fn=weights_fn) + + +@registry.register_image_modality("no_loss") +class NoLossModality(modality.Modality): + """Does nothing to the input and returns no loss.""" + + @property + def targets_dimensionality(self): + return self._vocab_size + + def bottom(self, x): + return tf.to_float(x) + + def top(self, body_output, _): + return body_output + + def loss_sharded(self, sharded_top_out, sharded_targets, data_parallelism): + """Return nothing.""" + return tf.constant(0.0, tf.float32) diff --git a/tensor2tensor/models/__init__.py b/tensor2tensor/models/__init__.py index f4c8a9a82..feadcae83 100644 --- a/tensor2tensor/models/__init__.py +++ b/tensor2tensor/models/__init__.py @@ -42,5 +42,6 @@ from tensor2tensor.models import transformer_revnet from tensor2tensor.models import transformer_sketch from tensor2tensor.models import transformer_vae +from tensor2tensor.models import vanilla_gan from tensor2tensor.models import xception # pylint: enable=unused-import diff --git a/tensor2tensor/models/vanilla_gan.py b/tensor2tensor/models/vanilla_gan.py new file mode 100644 index 000000000..d6611d50f --- /dev/null +++ b/tensor2tensor/models/vanilla_gan.py @@ -0,0 +1,169 @@ +# coding=utf-8 +# Copyright 2017 The Tensor2Tensor Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Simple Generative Adversarial Model with two linear layers. + +Example of how to create a GAN in T2T. +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +# Dependency imports + +from tensor2tensor.layers import common_hparams +from tensor2tensor.layers import common_layers +from tensor2tensor.utils import registry +from tensor2tensor.utils import t2t_model + +import tensorflow as tf + + +def generator(z, hparams, reuse=False): + """Initalizes generator layers.""" + + g_h1 = tf.layers.dense(z, hparams.weight_size, activation=tf.nn.relu, + name="l1", reuse=reuse) + g_log_prob = tf.layers.dense(g_h1, hparams.input_size, name="logp", + reuse=reuse) + g_prob = tf.nn.sigmoid(g_log_prob) + + return g_prob + + +def discriminator(x, hparams, reuse=False): + """Initalizes discriminator layers.""" + + d_h1 = tf.layers.dense(x, hparams.weight_size, activation=tf.nn.relu, + name="d_h1", reuse=reuse) + d_logit = tf.layers.dense(d_h1, 1, name="d_logit", reuse=reuse) + d_prob = tf.nn.sigmoid(d_logit) + + return d_prob, d_logit + + +def reverse_grad(x): + return tf.stop_gradient(2*x) - x + + +def vanilla_gan_internal(inputs, hparams, train): + with tf.variable_scope("vanilla_gan", reuse=tf.AUTO_REUSE): + x = common_layers.flatten4d3d(inputs) + + batch_size = tf.shape(inputs)[0] + # Currently uses one of three color layers. + x = x[:, :, 0] + x.set_shape([None, hparams.input_size]) + + if train: + z = tf.random_uniform(shape=[batch_size, + hparams.random_sample_size], + minval=-1, maxval=1, name="z") + else: + z = tf.random_uniform(shape=[1, hparams.random_sample_size], + minval=-1, maxval=1, name="z") + + g_sample = generator(z, hparams) + + d_real, _ = discriminator(x, hparams) + + d_fake, _ = discriminator(reverse_grad(g_sample), hparams, + reuse=True) + d_loss = -tf.reduce_mean(tf.log(d_real+hparams.epsilon) + + tf.log(1. - d_fake)) + g_loss = -tf.reduce_mean(tf.log(d_fake+hparams.epsilon)) + + losses = {} + losses["discriminator"] = d_loss + losses["generator"] = g_loss + + z_sampled = tf.random_uniform(shape=[1, hparams.random_sample_size], + minval=-1, maxval=1, name="z") + g_sample = generator(z_sampled, hparams, reuse=True) + g_reshaped_sample = tf.reshape(g_sample, + [1, hparams.height, hparams.width, 1]) + tf.summary.image("generated", g_reshaped_sample, max_outputs=1) + + if train: + # Returns an empty output, and loss dictionary. + return tf.zeros(shape=[1, 1]), losses + else: + return g_sample, losses + + +@registry.register_model +class VanillaGan(t2t_model.T2TModel): + """Simple GAN. + """ + + def model_fn_body(self, features): + """Computes the generator and discriminator loss. + + Args: + features: A dictionary of key to Tensor. Each Tensor has shape + [batch_size, ?, ?, hidden_size]. + + Returns: + output: Tensor containing one zero. GANs do not make use of the modality + loss. + losses: a dictionary of losses containing the generator and discriminator + losses. + """ + train = self._hparams.mode == tf.estimator.ModeKeys.TRAIN + return vanilla_gan_internal(features["targets"], self._hparams, train) + + def infer(self, + features=None, + decode_length=50, + beam_size=1, + top_beams=1, + last_position_only=False, + alpha=0.0): + with tf.variable_scope("body/vanilla_gan", reuse=tf.AUTO_REUSE): + z = tf.random_uniform(shape=[1, self._hparams.random_sample_size], + minval=-1, maxval=1, name="z") + + g_sample = generator(z, self._hparams) + return g_sample + + +@registry.register_hparams +def vanilla_gan(): + """Basic parameters for a vanilla_gan.""" + + hparams = common_hparams.basic_params1() + + hparams.input_modalities = "image:no_loss" + hparams.target_modality = "image:no_loss" + + hparams.batch_size = 2048 # 3136 + hparams.label_smoothing = 0.0 + hparams.add_hparam("startup_steps", 10000) + + hparams.train_steps = 100 + hparams.add_hparam("weight_size", 128) + hparams.add_hparam("random_sample_size", 100) + hparams.add_hparam("height", 28) + hparams.add_hparam("width", 28) + hparams.add_hparam("colors", 1) + hparams.add_hparam("input_size", 784) + hparams.add_hparam("epsilon", 1e-4) + hparams.learning_rate_warmup_steps = 0 + hparams.learning_rate = 0.2 + hparams.learning_rate_decay_scheme = "none" + return hparams + + From 955dad55eb8b98d6a08961ddbc5402bd0c9f5073 Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Thu, 9 Nov 2017 14:14:07 -0800 Subject: [PATCH 0566/4095] Enable separate eval and t2t_usr_dir for TPU PiperOrigin-RevId: 175210607 --- tensor2tensor/data_generators/image.py | 3 +++ tensor2tensor/tpu/tpu_trainer.py | 26 +++++++++++++++++++++----- tensor2tensor/tpu/tpu_trainer_lib.py | 19 +++++++++++++------ 3 files changed, 37 insertions(+), 11 deletions(-) diff --git a/tensor2tensor/data_generators/image.py b/tensor2tensor/data_generators/image.py index 2a2b73962..4c5f3748a 100644 --- a/tensor2tensor/data_generators/image.py +++ b/tensor2tensor/data_generators/image.py @@ -658,9 +658,11 @@ def class_labels(self): ] def preprocess_example(self, example, mode, unused_hparams): + example["inputs"].set_shape([_CIFAR10_IMAGE_SIZE, _CIFAR10_IMAGE_SIZE, 3]) if mode == tf.estimator.ModeKeys.TRAIN: example["inputs"] = common_layers.cifar_image_augmentation( example["inputs"]) + example["inputs"] = tf.to_int64(example["inputs"]) return example def generator(self, data_dir, tmp_dir, is_training): @@ -684,6 +686,7 @@ def generator(self, data_dir, tmp_dir, is_training): class ImageCifar10Plain(ImageCifar10): def preprocess_example(self, example, mode, unused_hparams): + example["inputs"].set_shape([_CIFAR10_IMAGE_SIZE, _CIFAR10_IMAGE_SIZE, 3]) example["inputs"] = tf.to_int64(example["inputs"]) return example diff --git a/tensor2tensor/tpu/tpu_trainer.py b/tensor2tensor/tpu/tpu_trainer.py index e75d69b1c..faf86df3f 100644 --- a/tensor2tensor/tpu/tpu_trainer.py +++ b/tensor2tensor/tpu/tpu_trainer.py @@ -13,9 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -r"""Train on TPU. - -""" +"""Train on TPU.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function @@ -26,12 +24,20 @@ from tensor2tensor import problems as problems_lib # pylint: disable=unused-import from tensor2tensor.tpu import tpu_trainer_lib as lib from tensor2tensor.utils import registry +from tensor2tensor.utils import usr_dir import tensorflow as tf flags = tf.flags FLAGS = flags.FLAGS +# See trainer_utils.py for additional command-line flags. +flags.DEFINE_string("t2t_usr_dir", "", + "Path to a Python module that will be imported. The " + "__init__.py file should include the necessary imports. " + "The imported files should contain registrations, " + "e.g. @registry.register_model calls, that will then be " + "available to the t2t-trainer.") flags.DEFINE_integer("tpu_num_shards", 8, "Number of tpu shards.") flags.DEFINE_integer("iterations_per_loop", 1000, "Number of iterations in a TPU training loop.") @@ -71,16 +77,26 @@ def create_run_config(): master=FLAGS.master, iterations_per_loop=FLAGS.iterations_per_loop, num_shards=FLAGS.tpu_num_shards, - log_device_placement=FLAGS.log_device_placement) + log_device_placement=FLAGS.log_device_placement, + save_checkpoints_steps=max(FLAGS.iterations_per_loop, + FLAGS.local_eval_frequency)) + + +def execute_schedule(exp): + if not hasattr(exp, FLAGS.schedule): + raise ValueError( + "Experiment has no method %s, from --schedule" % FLAGS.schedule) + getattr(exp, FLAGS.schedule)() def main(_): tf.logging.set_verbosity(tf.logging.INFO) tf.set_random_seed(123) + usr_dir.import_usr_dir(FLAGS.t2t_usr_dir) exp_fn = create_experiment_fn() exp = exp_fn(create_run_config(), create_hparams()) - exp.continuous_train_and_eval() + execute_schedule(exp) if __name__ == "__main__": diff --git a/tensor2tensor/tpu/tpu_trainer_lib.py b/tensor2tensor/tpu/tpu_trainer_lib.py index dda35485f..c9be40be2 100644 --- a/tensor2tensor/tpu/tpu_trainer_lib.py +++ b/tensor2tensor/tpu/tpu_trainer_lib.py @@ -81,25 +81,32 @@ def define_shapes(example): return example + # Read and preprocess problem = hparams.problem_instances[0] data_dir = hparams.data_dir dataset = problem.dataset( mode=mode, data_dir=data_dir, num_threads=num_threads, hparams=hparams) dataset = dataset.map( data_reader.cast_int64_to_int32, num_threads=num_threads) - # TODO(rsepassi): In eval mode, should not repeat. Do so because TPU seems - # to crash if it runs out of data during eval. - dataset = dataset.repeat(None) + if is_training: + dataset = dataset.repeat(None) + # Batch (and pad) if are_shapes_fully_defined(dataset.output_shapes): - dataset = dataset.batch(batch_size) + dataset = dataset.apply( + tf.contrib.data.batch_and_drop_remainder(batch_size)) else: # If shapes are not fully defined, filter out long ones and pad to # hparams.max_length dataset = dataset.filter(valid_size) padded_shapes = fill_shape_nones( dataset.output_shapes, none_filler=hparams.max_length) - dataset = data_reader.padded_batch(dataset, batch_size, padded_shapes) + if hasattr(tf.contrib.data, "padded_batch_and_drop_remainder"): + dataset = dataset.apply( + tf.contrib.data.padded_batch_and_drop_remainder( + batch_size, padded_shapes)) + else: + dataset = data_reader.padded_batch(dataset, batch_size, padded_shapes) dataset = dataset.map(define_shapes, num_parallel_calls=num_threads) dataset = dataset.prefetch(1) @@ -111,7 +118,7 @@ def define_shapes(example): def are_shapes_fully_defined(shapes_dict): - for _, shape in shapes_dict.iteritems(): + for shape in shapes_dict.values(): if not shape.is_fully_defined(): return False return True From 234183c4006cd6d7fdad70f529e89a1069449ba0 Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Thu, 9 Nov 2017 18:42:10 -0800 Subject: [PATCH 0567/4095] Make ClassLabel1DModality average out intermediate dims PiperOrigin-RevId: 175244191 --- tensor2tensor/layers/modalities.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensor2tensor/layers/modalities.py b/tensor2tensor/layers/modalities.py index 7d9aca58e..0b2db246f 100644 --- a/tensor2tensor/layers/modalities.py +++ b/tensor2tensor/layers/modalities.py @@ -434,8 +434,8 @@ def top(self, body_output, _): with tf.variable_scope(self.name): x = body_output - # Assume input is a square with self._body_input_depth channels. if self._is_2d: + # Assume input is a square with self._body_input_depth channels. x_shape = x.get_shape().as_list() if x_shape[1] is None or x_shape[2] is None: length_float = tf.to_float(tf.shape(x)[1]) @@ -454,8 +454,8 @@ def top(self, body_output, _): x = common_layers.conv_block_downsample(x, self._kernel, self._strides, self._padding) x = tf.nn.relu(x) - x = tf.reduce_mean(x, axis=[1, 2], keep_dims=True) + x = tf.reduce_mean(x, axis=[1, 2], keep_dims=True) res = tf.layers.dense(x, self._vocab_size) return tf.expand_dims(res, 3) From aeb47ec13121fa2e4899032dfc373bfcd182a9f6 Mon Sep 17 00:00:00 2001 From: T2T Team <no-reply@google.com> Date: Thu, 9 Nov 2017 18:42:43 -0800 Subject: [PATCH 0568/4095] In pre/post-process functions, don't require depth == hparams.hidden_size PiperOrigin-RevId: 175244248 --- tensor2tensor/layers/common_layers.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensor2tensor/layers/common_layers.py b/tensor2tensor/layers/common_layers.py index 1390ca830..2a61368f7 100644 --- a/tensor2tensor/layers/common_layers.py +++ b/tensor2tensor/layers/common_layers.py @@ -569,7 +569,7 @@ def layer_preprocess(layer_input, hparams): sequence=hparams.layer_preprocess_sequence, dropout_rate=hparams.layer_prepostprocess_dropout, norm_type=hparams.norm_type, - depth=hparams.hidden_size, + depth=None, epsilon=hparams.norm_epsilon, default_name="layer_prepostprocess") @@ -602,7 +602,7 @@ def layer_postprocess(layer_input, layer_output, hparams): sequence=hparams.layer_postprocess_sequence, dropout_rate=hparams.layer_prepostprocess_dropout, norm_type=hparams.norm_type, - depth=hparams.hidden_size, + depth=None, epsilon=hparams.norm_epsilon, default_name="layer_postprocess") From e16c641f6fd4e2f99dd4da23a5abc49a691e3a38 Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Thu, 9 Nov 2017 18:54:15 -0800 Subject: [PATCH 0569/4095] Add missing --schedule flag PiperOrigin-RevId: 175245161 --- tensor2tensor/tpu/tpu_trainer.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tensor2tensor/tpu/tpu_trainer.py b/tensor2tensor/tpu/tpu_trainer.py index faf86df3f..071b168b2 100644 --- a/tensor2tensor/tpu/tpu_trainer.py +++ b/tensor2tensor/tpu/tpu_trainer.py @@ -47,6 +47,8 @@ try: flags.DEFINE_string("master", "", "Address of TensorFlow master.") flags.DEFINE_string("output_dir", "", "Base output directory for run.") + flags.DEFINE_string("schedule", "continuous_train_and_eval", + "Method of Experiment to run.") flags.DEFINE_integer("eval_steps", 200, "Number of steps in evaluation.") except: # pylint: disable=bare-except pass From eb5652f53090f4fef1845e6d8336b18bcc4615e5 Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Thu, 9 Nov 2017 20:49:13 -0800 Subject: [PATCH 0570/4095] Rm ImageModality and ClassLabelModality 2d; rename SmallImage to Image PiperOrigin-RevId: 175252289 --- tensor2tensor/data_generators/image.py | 6 +- tensor2tensor/layers/common_hparams.py | 4 +- tensor2tensor/layers/modalities.py | 181 ++++----------------- tensor2tensor/models/multimodel.py | 19 ++- tensor2tensor/models/slicenet.py | 35 +++- tensor2tensor/models/transformer_sketch.py | 9 + tensor2tensor/models/xception.py | 85 +++++++++- tensor2tensor/models/xception_test.py | 22 ++- 8 files changed, 189 insertions(+), 172 deletions(-) diff --git a/tensor2tensor/data_generators/image.py b/tensor2tensor/data_generators/image.py index 4c5f3748a..38fa06f25 100644 --- a/tensor2tensor/data_generators/image.py +++ b/tensor2tensor/data_generators/image.py @@ -286,10 +286,8 @@ def generator(self, data_dir, tmp_dir, is_training): def hparams(self, defaults, unused_model_hparams): p = defaults - small_modality = "%s:small_image_modality" % registry.Modalities.IMAGE - modality = small_modality if self.is_small else registry.Modalities.IMAGE - p.input_modality = {"inputs": (modality, None)} - p.target_modality = ("%s:2d" % registry.Modalities.CLASS_LABEL, + p.input_modality = {"inputs": (registry.Modalities.IMAGE, None)} + p.target_modality = (registry.Modalities.CLASS_LABEL, self.num_classes) p.batch_size_multiplier = 4 if self.is_small else 256 p.max_expected_batch_size_per_shard = 8 if self.is_small else 2 diff --git a/tensor2tensor/layers/common_hparams.py b/tensor2tensor/layers/common_hparams.py index ef2d494fb..f784fb383 100644 --- a/tensor2tensor/layers/common_hparams.py +++ b/tensor2tensor/layers/common_hparams.py @@ -128,10 +128,10 @@ def basic_params1(): # For each feature for which you want to override the default input # modality, add an entry to this semicolon-separated string. Entries are # formatted "feature_name:modality_type:modality_name", e.g. - # "inputs:image:small_image_modality;other_inputs:audio:identity". + # "inputs:symbol:default;other_inputs:audio:identity". input_modalities="default", # We don't use empty string in params. # To override the default target modality, specify - # "modality_type:modality_name", e.g. "image:small_image_modality". + # "modality_type:modality_name", e.g. "symbol:ctc". target_modality="default", # The maximum length of "input" sequence. # Sequences longer than this value will be truncated. 0 or negative values diff --git a/tensor2tensor/layers/modalities.py b/tensor2tensor/layers/modalities.py index 0b2db246f..baf422278 100644 --- a/tensor2tensor/layers/modalities.py +++ b/tensor2tensor/layers/modalities.py @@ -18,8 +18,6 @@ from __future__ import division from __future__ import print_function -import math - # Dependency imports from six.moves import xrange # pylint: disable=redefined-builtin @@ -51,12 +49,17 @@ def name(self): def top_dimensionality(self): return self._vocab_size - def _get_weights(self): + def _get_weights(self, hidden_dim=None): """Create or get concatenated embedding or softmax variable. + Args: + hidden_dim: dim of the variable. Defaults fo self._body_input_depth + Returns: a list of self._num_shards Tensors. """ + if hidden_dim is None: + hidden_dim = self._body_input_depth num_shards = self._model_hparams.symbol_modality_num_shards shards = [] for i in xrange(num_shards): @@ -65,9 +68,8 @@ def _get_weights(self): var_name = "weights_%d" % i shards.append( tf.get_variable( - var_name, [shard_size, self._body_input_depth], - initializer=tf.random_normal_initializer( - 0.0, self._body_input_depth**-0.5))) + var_name, [shard_size, hidden_dim], + initializer=tf.random_normal_initializer(0.0, hidden_dim**-0.5))) if num_shards == 1: ret = shards[0] else: @@ -111,27 +113,33 @@ def top(self, body_output, _): Returns: logits: A Tensor with shape [batch, p0, p1, ?, vocab_size]. """ + if self._model_hparams.symbol_modality_skip_top: + return tf.expand_dims(body_output, 3) + if self._model_hparams.shared_embedding_and_softmax_weights: scope_name = "shared" reuse = True else: scope_name = "softmax" reuse = False - if self._model_hparams.symbol_modality_skip_top: - return tf.expand_dims(body_output, 3) + with tf.variable_scope(scope_name, reuse=reuse): - var = self._get_weights() + rank = len(body_output.get_shape().as_list()) + body_output_shape = [ + common_layers.shape_dim(body_output, i) for i in range(rank) + ] + var = self._get_weights(body_output_shape[-1]) if (self._model_hparams.factored_logits and self._model_hparams.mode == tf.estimator.ModeKeys.TRAIN): # insert channels dimension body_output = tf.expand_dims(body_output, 3) logits = common_layers.FactoredTensor(body_output, var) else: - shape = tf.shape(body_output)[:-1] - body_output = tf.reshape(body_output, [-1, self._body_input_depth]) + body_output = tf.reshape(body_output, [-1, body_output_shape[-1]]) logits = tf.matmul(body_output, var, transpose_b=True) - logits = tf.reshape(logits, tf.concat([shape, [1, self._vocab_size]], - 0)) + + out_shape = body_output_shape[:-1] + [1, self._vocab_size] + logits = tf.reshape(logits, out_shape) return logits @@ -154,18 +162,22 @@ def loss(self, logits, targets, weights_fn=common_layers.weights_nonzero): sparse_targets = tf.keras.backend.ctc_label_dense_to_sparse( targets, targets_lengths) xent = tf.nn.ctc_loss( - sparse_targets, logits, targets_lengths, time_major=False, - preprocess_collapse_repeated=False, ctc_merge_repeated=False) + sparse_targets, + logits, + targets_lengths, + time_major=False, + preprocess_collapse_repeated=False, + ctc_merge_repeated=False) weights = weights_fn(targets) return tf.reduce_sum(xent), tf.reduce_sum(weights) -@registry.register_image_modality -class SmallImageModality(modality.Modality): - """Performs strided conv compressions for small image data.""" +@registry.register_image_modality("default") +class ImageModality(modality.Modality): + """Modality for images.""" def __init__(self, model_hparams, vocab_size): - super(SmallImageModality, self).__init__(model_hparams, vocab_size) + super(ImageModality, self).__init__(model_hparams, vocab_size) self._channels = 3 @property @@ -176,13 +188,7 @@ def bottom(self, inputs): with tf.variable_scope(self.name): inputs = common_layers.standardize_images(inputs) tf.summary.image("inputs", inputs, max_outputs=2) - return common_layers.conv_block( - inputs, - self._body_input_depth, [((1, 1), (3, 3))], - first_relu=False, - padding="SAME", - force2d=True, - name="small_image_conv") + return tf.to_float(inputs) def targets_bottom(self, inputs): with tf.variable_scope(self.name): @@ -219,80 +225,10 @@ def top(self, body_output, _): def loss(self, top_out, targets, weights_fn=common_layers.weights_all): # Call the default implementation, but weight 1.0 on 0s by default. # (Since we're processing images and so have no padding and some pixel 0s.) - return super(SmallImageModality, self).loss( + return super(ImageModality, self).loss( top_out, targets, weights_fn=weights_fn) -@registry.register_image_modality("default") -class ImageModality(modality.Modality): - """Performs embedding and strided conv compressions for large image data.""" - - @property - def top_dimensionality(self): - return 256 - - def bottom(self, inputs): - """Transform input from data space to model space. - - Perform the Xception "Entry flow", which consists of two convolutional - filter upscalings followed by three residually connected separable - convolution blocks. - - Args: - inputs: A Tensor with shape [batch, ...] - Returns: - body_input: A Tensor with shape [batch, ?, ?, body_input_depth]. - """ - with tf.variable_scope(self.name): - - def xnet_resblock(x, filters, res_relu, name): - with tf.variable_scope(name): - y = common_layers.separable_conv_block( - x, - filters, [((1, 1), (3, 3)), ((1, 1), (3, 3))], - first_relu=True, - padding="SAME", - force2d=True, - name="sep_conv_block") - y = common_layers.pool(y, (3, 3), "MAX", "SAME", strides=(2, 2)) - return y + common_layers.conv_block( - x, - filters, [((1, 1), (1, 1))], - padding="SAME", - strides=(2, 2), - first_relu=res_relu, - force2d=True, - name="res_conv0") - - inputs = common_layers.standardize_images(inputs) - # TODO(lukaszkaiser): summaries here don't work in multi-problem case yet. - # tf.summary.image("inputs", inputs, max_outputs=2) - x = common_layers.conv_block( - inputs, - 32, [((1, 1), (3, 3))], - first_relu=False, - padding="SAME", - strides=(2, 2), - force2d=True, - name="conv0") - x = common_layers.conv_block( - x, 64, [((1, 1), (3, 3))], padding="SAME", force2d=True, name="conv1") - x = xnet_resblock(x, min(128, self._body_input_depth), True, "block0") - x = xnet_resblock(x, min(256, self._body_input_depth), False, "block1") - return xnet_resblock(x, self._body_input_depth, False, "block2") - - def top(self, body_output, _): - # TODO(lukaszkaiser): work on a better way to generate large images. - with tf.variable_scope(self.name): - decompressed_inputs = common_layers.deconv_stride2_multistep( - body_output, - self._model_hparams.compress_steps, - body_output.get_shape()[-1], - name="deconv") - return common_layers.conv( - decompressed_inputs, self._vocab_size, (1, 1), padding="SAME") - - @registry.register_audio_modality("default") class AudioModality(modality.Modality): """Performs strided conv compressions for audio data.""" @@ -380,16 +316,9 @@ def xnet_resblock(x, filters, res_relu, name): "compress_block_final") -@registry.register_class_label_modality("2d") +@registry.register_class_label_modality("default") class ClassLabelModality(modality.Modality): - """Used for label data; if is2d=True, uses Xception flow to logits.""" - - def __init__(self, model_hparams, vocab_size, is2d=True): - super(ClassLabelModality, self).__init__(model_hparams, vocab_size) - self._is_2d = is2d - self._kernel = (3, 3) if is2d else (5, 1) - self._strides = (2, 2) if is2d else (4, 1) - self._padding = "SAME" if is2d else "LEFT" + """Used for label data.""" @property def name(self): @@ -416,45 +345,16 @@ def targets_bottom(self, x): def top(self, body_output, _): """Transform inputs from model space to target space. - If instantiated with is2d=True, perform the Xception "Exit flow", consisting - of a single residual block and two separable convolutional upscalings - followed by global spatial average pooling. - - Otherwise, a single linear layer to logits. + Average over inner dims and a linear layer to logits. Args: body_output: A Tensor with shape [batch, ?, ?, body_output_size]. Returns: a Tensors, each with shape [batch_size, ?, ?, vocab_size] - - Raises: - ValueError: if 2d and Tensor cannot be made a square in the spatial dims. """ with tf.variable_scope(self.name): x = body_output - - if self._is_2d: - # Assume input is a square with self._body_input_depth channels. - x_shape = x.get_shape().as_list() - if x_shape[1] is None or x_shape[2] is None: - length_float = tf.to_float(tf.shape(x)[1]) - length_float *= tf.to_float(tf.shape(x)[2]) - spatial_dim_float = tf.sqrt(length_float) - spatial_dim = tf.to_int32(spatial_dim_float) - x_depth = x_shape[3] - x = tf.reshape(x, [-1, spatial_dim, spatial_dim, x_depth]) - elif x_shape[1] != x_shape[2]: - spatial_dim = int(math.sqrt(float(x_shape[1] * x_shape[2]))) - if spatial_dim * spatial_dim != x_shape[1] * x_shape[2]: - raise ValueError("Assumed inputs were square-able but they were " - "not. Shape: %s" % x_shape) - x = tf.reshape(x, [-1, spatial_dim, spatial_dim, x_depth]) - - x = common_layers.conv_block_downsample(x, self._kernel, self._strides, - self._padding) - x = tf.nn.relu(x) - x = tf.reduce_mean(x, axis=[1, 2], keep_dims=True) res = tf.layers.dense(x, self._vocab_size) return tf.expand_dims(res, 3) @@ -466,15 +366,6 @@ def loss(self, top_out, targets, weights_fn=common_layers.weights_all): top_out, targets, weights_fn=weights_fn) -@registry.register_class_label_modality("default") -class ClassLabel1DModality(ClassLabelModality): - """Used for label data.""" - - def __init__(self, model_hparams, vocab_size): - super(ClassLabel1DModality, self).__init__( - model_hparams=model_hparams, vocab_size=vocab_size, is2d=False) - - @registry.register_generic_modality("default") @registry.register_audio_modality("identity") @registry.register_image_modality("identity") diff --git a/tensor2tensor/models/multimodel.py b/tensor2tensor/models/multimodel.py index a4c82d942..8a837aa63 100644 --- a/tensor2tensor/models/multimodel.py +++ b/tensor2tensor/models/multimodel.py @@ -78,8 +78,8 @@ def conv_experts(xs, hparams, dp, ps, padding, mask, layer_id): conv_out = dp(conv_res_step, xs, hparams, padding, mask) loss = 0.0 moe_hidden_sizes = [hparams.filter_size] - expert_fn = expert_utils.ffn_expert_fn( - hparams.hidden_size, moe_hidden_sizes, hparams.hidden_size) + expert_fn = expert_utils.ffn_expert_fn(hparams.hidden_size, moe_hidden_sizes, + hparams.hidden_size) moe_out, loss = expert_utils.distributed_moe( dp, ps, @@ -113,10 +113,23 @@ def model_fn_body_sharded(self, sharded_features): dp = self._data_parallelism hparams = self._hparams + def project_to_hidden(inputs): + return common_layers.conv_block( + inputs, + hparams.hidden_size, [((1, 1), (3, 3))], + first_relu=False, + padding="SAME", + force2d=True) + def flatten(inputs): return tf.expand_dims(common_layers.flatten4d3d(inputs), axis=2) - inputs = dp(flatten, sharded_features["inputs"]) + # Project to hidden size if necessary + if (sharded_features["inputs"][0].get_shape().as_list()[-1] != + hparams.hidden_size): + inputs = dp(project_to_hidden, sharded_features["inputs"]) + + inputs = dp(flatten, inputs) inputs_pad = dp(slicenet.embedding_to_padding, inputs) inputs_mask = dp(lambda x: 1.0 - x, inputs_pad) inputs_encoded = dp(common_layers.add_timing_signal, inputs) diff --git a/tensor2tensor/models/slicenet.py b/tensor2tensor/models/slicenet.py index fc030deed..8807f073b 100644 --- a/tensor2tensor/models/slicenet.py +++ b/tensor2tensor/models/slicenet.py @@ -111,10 +111,12 @@ def multi_conv_res(x, padding, name, layers, hparams, mask=None, source=None): hparams.separability - i for i in reversed(range(len(dilations_and_kernels2))) ] + def norm_fn(x, name): with tf.variable_scope(name, default_name="norm"): return common_layers.apply_norm( x, hparams.norm_type, hparams.hidden_size, hparams.norm_epsilon) + for layer in xrange(layers): with tf.variable_scope("layer_%d" % layer): y = common_layers.subseparable_conv_block( @@ -174,10 +176,11 @@ def similarity_cost(inputs_encoded, targets_encoded): def slicenet_middle(inputs_encoded, targets, target_space_emb, mask, hparams): """Middle part of slicenet, connecting encoder and decoder.""" + def norm_fn(x, name): with tf.variable_scope(name, default_name="norm"): - return common_layers.apply_norm( - x, hparams.norm_type, hparams.hidden_size, hparams.norm_epsilon) + return common_layers.apply_norm(x, hparams.norm_type, hparams.hidden_size, + hparams.norm_epsilon) # Flatten targets and embed target_space_id. targets_flat = tf.expand_dims(common_layers.flatten4d3d(targets), axis=2) @@ -236,9 +239,18 @@ def embedding_to_padding(emb): return tf.to_float(tf.equal(emb_sum, 0.0)) -def slicenet_internal(inputs, targets, target_space, problem_idx, hparams): +def slicenet_internal(inputs, targets, target_space, hparams, run_decoder=True): """The slicenet model, main step used for training.""" with tf.variable_scope("slicenet"): + # Project to hidden size if necessary + if inputs.get_shape().as_list()[-1] != hparams.hidden_size: + inputs = common_layers.conv_block( + inputs, + hparams.hidden_size, [((1, 1), (3, 3))], + first_relu=False, + padding="SAME", + force2d=True) + # Flatten inputs and encode. inputs = tf.expand_dims(common_layers.flatten4d3d(inputs), axis=2) inputs_mask = 1.0 - embedding_to_padding(inputs) @@ -247,9 +259,7 @@ def slicenet_internal(inputs, targets, target_space, problem_idx, hparams): extra_layers = int(hparams.num_hidden_layers * 1.5) inputs_encoded = multi_conv_res( inputs, "SAME", "encoder", extra_layers, hparams, mask=inputs_mask) - target_modality_name = hparams.problems[problem_idx].target_modality.name - if "class_label_modality" in target_modality_name: - # If we're just predicing a class, there is no use for a decoder. + if not run_decoder: return inputs_encoded # Do the middle part. decoder_start, similarity_loss = slicenet_middle( @@ -270,9 +280,16 @@ def slicenet_internal(inputs, targets, target_space, problem_idx, hparams): class SliceNet(t2t_model.T2TModel): def model_fn_body(self, features): - return slicenet_internal(features["inputs"], features["targets"], - features["target_space_id"], self._problem_idx, - self._hparams) + target_modality_name = ( + self._hparams.problems[self._problem_idx].target_modality.name) + # If we're just predicing a class, there is no use for a decoder. + run_decoder = "class_label_modality" not in target_modality_name + return slicenet_internal( + features["inputs"], + features["targets"], + features["target_space_id"], + self._hparams, + run_decoder=run_decoder) _KERNEL_SCHEMES = { diff --git a/tensor2tensor/models/transformer_sketch.py b/tensor2tensor/models/transformer_sketch.py index b7bd9b1ef..45384f065 100644 --- a/tensor2tensor/models/transformer_sketch.py +++ b/tensor2tensor/models/transformer_sketch.py @@ -23,6 +23,7 @@ # Dependency imports from tensor2tensor.layers import common_hparams +from tensor2tensor.layers import common_layers from tensor2tensor.models import transformer from tensor2tensor.models import transformer_vae from tensor2tensor.models.transformer import transformer_base @@ -37,6 +38,14 @@ class TransformerSketch(transformer.Transformer): def encode(self, inputs, target_space, hparams): """Add two layers strided convolutions ontop of encode.""" + inputs = common_layers.conv_block( + inputs, + hparams.hidden_size, [((1, 1), (3, 3))], + first_relu=False, + padding="SAME", + force2d=True, + name="small_image_conv") + hparams.num_compress_steps = 2 compressed_inputs = transformer_vae.compress(inputs, c=None, is_2d=True, hparams=hparams, diff --git a/tensor2tensor/models/xception.py b/tensor2tensor/models/xception.py index e7caa3419..634e26901 100644 --- a/tensor2tensor/models/xception.py +++ b/tensor2tensor/models/xception.py @@ -19,6 +19,8 @@ from __future__ import division from __future__ import print_function +import math + # Dependency imports from six.moves import xrange # pylint: disable=redefined-builtin @@ -50,10 +52,87 @@ def xception_internal(inputs, hparams): """Xception body.""" with tf.variable_scope("xception"): cur = inputs + + if cur.get_shape().as_list()[1] > 200: + # Large image, Xception entry flow + cur = xception_entry(cur, hparams.hidden_size) + else: + # Small image, conv + cur = common_layers.conv_block( + cur, + hparams.hidden_size, [((1, 1), (3, 3))], + first_relu=False, + padding="SAME", + force2d=True, + name="small_image_conv") + for i in xrange(hparams.num_hidden_layers): with tf.variable_scope("layer_%d" % i): cur = residual_block(cur, hparams) - return cur + + return xception_exit(cur) + + +def xception_entry(inputs, hidden_dim): + with tf.variable_scope("xception_entry"): + + def xnet_resblock(x, filters, res_relu, name): + with tf.variable_scope(name): + y = common_layers.separable_conv_block( + x, + filters, [((1, 1), (3, 3)), ((1, 1), (3, 3))], + first_relu=True, + padding="SAME", + force2d=True, + name="sep_conv_block") + y = common_layers.pool(y, (3, 3), "MAX", "SAME", strides=(2, 2)) + return y + common_layers.conv_block( + x, + filters, [((1, 1), (1, 1))], + padding="SAME", + strides=(2, 2), + first_relu=res_relu, + force2d=True, + name="res_conv0") + + inputs = common_layers.standardize_images(inputs) + # TODO(lukaszkaiser): summaries here don't work in multi-problem case yet. + # tf.summary.image("inputs", inputs, max_outputs=2) + x = common_layers.conv_block( + inputs, + 32, [((1, 1), (3, 3))], + first_relu=False, + padding="SAME", + strides=(2, 2), + force2d=True, + name="conv0") + x = common_layers.conv_block( + x, 64, [((1, 1), (3, 3))], padding="SAME", force2d=True, name="conv1") + x = xnet_resblock(x, min(128, hidden_dim), True, "block0") + x = xnet_resblock(x, min(256, hidden_dim), False, "block1") + return xnet_resblock(x, hidden_dim, False, "block2") + + +def xception_exit(inputs): + with tf.variable_scope("xception_exit"): + x = inputs + x_shape = x.get_shape().as_list() + if x_shape[1] is None or x_shape[2] is None: + length_float = tf.to_float(tf.shape(x)[1]) + length_float *= tf.to_float(tf.shape(x)[2]) + spatial_dim_float = tf.sqrt(length_float) + spatial_dim = tf.to_int32(spatial_dim_float) + x_depth = x_shape[3] + x = tf.reshape(x, [-1, spatial_dim, spatial_dim, x_depth]) + elif x_shape[1] != x_shape[2]: + spatial_dim = int(math.sqrt(float(x_shape[1] * x_shape[2]))) + if spatial_dim * spatial_dim != x_shape[1] * x_shape[2]: + raise ValueError("Assumed inputs were square-able but they were " + "not. Shape: %s" % x_shape) + x = tf.reshape(x, [-1, spatial_dim, spatial_dim, x_depth]) + + x = common_layers.conv_block_downsample(x, (3, 3), (2, 2), "SAME") + return tf.nn.relu(x) @registry.register_model @@ -93,8 +172,8 @@ def xception_base(): def xception_tiny(): hparams = xception_base() hparams.batch_size = 1024 - hparams.hidden_size = 128 - hparams.num_hidden_layers = 4 + hparams.hidden_size = 64 + hparams.num_hidden_layers = 2 hparams.learning_rate_decay_scheme = "none" return hparams diff --git a/tensor2tensor/models/xception_test.py b/tensor2tensor/models/xception_test.py index 9114fb781..e02057c10 100644 --- a/tensor2tensor/models/xception_test.py +++ b/tensor2tensor/models/xception_test.py @@ -25,30 +25,40 @@ from tensor2tensor.data_generators import problem_hparams from tensor2tensor.models import xception +from tensor2tensor.utils import registry import tensorflow as tf class XceptionTest(tf.test.TestCase): - def testXception(self): + def _testXception(self, img_size, output_size): vocab_size = 9 - x = np.random.random_integers(1, high=vocab_size - 1, size=(3, 5, 1, 1)) - y = np.random.random_integers(1, high=vocab_size - 1, size=(3, 1, 1, 1)) + batch_size = 3 + x = np.random.random_integers( + 0, high=255, size=(batch_size, img_size, img_size, 3)) + y = np.random.random_integers( + 1, high=vocab_size - 1, size=(batch_size, 1, 1, 1)) hparams = xception.xception_tiny() p_hparams = problem_hparams.test_problem_hparams(vocab_size, vocab_size) + p_hparams.input_modality["inputs"] = (registry.Modalities.IMAGE, None) with self.test_session() as session: features = { "inputs": tf.constant(x, dtype=tf.int32), "targets": tf.constant(y, dtype=tf.int32), } - model = xception.Xception( - hparams, tf.estimator.ModeKeys.TRAIN, p_hparams) + model = xception.Xception(hparams, tf.estimator.ModeKeys.TRAIN, p_hparams) sharded_logits, _ = model.model_fn(features) logits = tf.concat(sharded_logits, 0) session.run(tf.global_variables_initializer()) res = session.run(logits) - self.assertEqual(res.shape, (3, 5, 1, 1, vocab_size)) + self.assertEqual(res.shape, output_size + (1, vocab_size)) + + def testXceptionSmall(self): + self._testXception(img_size=9, output_size=(3, 5, 5)) + + def testXceptionLarge(self): + self._testXception(img_size=256, output_size=(3, 8, 8)) if __name__ == "__main__": From 4084c5c1cfbc7168f97a480e9880410e7b268783 Mon Sep 17 00:00:00 2001 From: Niki Parmar <nikip@google.com> Date: Fri, 10 Nov 2017 11:19:38 -0800 Subject: [PATCH 0571/4095] Add modality for images that compresses pixels and can be used for generation tasks. PiperOrigin-RevId: 175313670 --- tensor2tensor/layers/common_layers.py | 10 ++++ tensor2tensor/layers/modalities.py | 70 +++++++++++++++++++++++++++ 2 files changed, 80 insertions(+) diff --git a/tensor2tensor/layers/common_layers.py b/tensor2tensor/layers/common_layers.py index 2a61368f7..7c209c60c 100644 --- a/tensor2tensor/layers/common_layers.py +++ b/tensor2tensor/layers/common_layers.py @@ -151,6 +151,16 @@ def standardize_images(x): return x +def convert_rgb_to_real(x): + """Conversion of pixel values to real numbers.""" + with tf.name_scope("rgb_to_real", [x]): + x = tf.to_float(x) + # Use the formula (value/128) - 1 to convert each channel value into a + # real number in the range -1 to 1. + x = (x /128) - 1 + return x + + def image_augmentation(images, do_colors=False): """Image augmentation: cropping, flipping, and color transforms.""" images = tf.random_crop(images, [299, 299, 3]) diff --git a/tensor2tensor/layers/modalities.py b/tensor2tensor/layers/modalities.py index baf422278..4cd680955 100644 --- a/tensor2tensor/layers/modalities.py +++ b/tensor2tensor/layers/modalities.py @@ -229,6 +229,76 @@ def loss(self, top_out, targets, weights_fn=common_layers.weights_all): top_out, targets, weights_fn=weights_fn) +@registry.register_image_modality("image_identity_compress") +class ImageIdentityCompressModality(modality.Modality): + """Modality for images used in generation.""" + + @property + def top_dimensionality(self): + return 256 + + def bottom_compress(self, inputs, name="bottom"): + """Transform input from data space to model space. + + Perform conversion of RGB pixel values to a real number and combine values + for each pixel to form representation of image_length x image_length dims. + + Args: + inputs: A Tensor with shape [batch, ...] + name: string, scope. + Returns: + body_input: A Tensor with shape [batch, ?, ?, body_input_depth]. + """ + with tf.variable_scope(name): + inputs = common_layers.convert_rgb_to_real(inputs) + ishape = tf.shape(inputs) + inputs = tf.reshape(inputs, [-1, ishape[1], ishape[2]*ishape[3], 1]) + inputs.set_shape([None, None, None, 1]) + # We compress RGB intensities for each pixel using a conv. + x = common_layers.conv_block( + inputs, + self._body_input_depth, [((1, 1), (1, 3))], + first_relu=False, + padding="VALID", + strides=(1, 3), + force2d=True, + name="conv_input") + return x + + def bottom(self, inputs): + return self.bottom_compress(inputs, "input_bottom") + + def targets_bottom(self, inputs): + return self.bottom_compress(inputs, "output_bottom") + + def top(self, body_output, _): + with tf.variable_scope(self.name): + hidden_dim = self._model_hparams.hidden_size + img_len = self._model_hparams.img_len + channels = self._model_hparams.num_channels + batch = tf.shape(body_output)[0] + x = common_layers.conv( + body_output, + hidden_dim*channels, (1, 1), + padding="VALID", + activation=tf.nn.relu, + name="decompress_conv") + x = tf.reshape(x, [batch, img_len, img_len*channels, hidden_dim]) + x.set_shape([None, None, None, hidden_dim]) + x = common_layers.conv(x, + self.top_dimensionality, + (1, 1), name="output_conv") + x = tf.reshape(x, [-1, img_len, img_len, + channels, self.top_dimensionality]) + return x + + def loss(self, top_out, targets, weights_fn=common_layers.weights_all): + # Call the default implementation, but weight 1.0 on 0s by default. + # (Since we're processing images and so have no padding and some pixel 0s.) + return super(ImageIdentityCompressModality, self).loss( + top_out, targets, weights_fn=weights_fn) + + @registry.register_audio_modality("default") class AudioModality(modality.Modality): """Performs strided conv compressions for audio data.""" From fa460706a4947d11626fe336c39201f1e55cdb50 Mon Sep 17 00:00:00 2001 From: Lukasz Kaiser <lukaszkaiser@google.com> Date: Fri, 10 Nov 2017 15:01:20 -0800 Subject: [PATCH 0572/4095] Get rid of "last_position_only" by adding the corresponding property to Modality. PiperOrigin-RevId: 175342179 --- tensor2tensor/layers/modalities.py | 4 ++ tensor2tensor/models/transformer.py | 18 ++------ tensor2tensor/models/transformer_adv.py | 8 ++-- tensor2tensor/models/transformer_test.py | 8 +--- tensor2tensor/utils/decoding.py | 1 - tensor2tensor/utils/modality.py | 16 +++++++ tensor2tensor/utils/model_builder.py | 1 - tensor2tensor/utils/t2t_model.py | 58 ++++++++++-------------- 8 files changed, 52 insertions(+), 62 deletions(-) diff --git a/tensor2tensor/layers/modalities.py b/tensor2tensor/layers/modalities.py index 4cd680955..4a8848f35 100644 --- a/tensor2tensor/layers/modalities.py +++ b/tensor2tensor/layers/modalities.py @@ -49,6 +49,10 @@ def name(self): def top_dimensionality(self): return self._vocab_size + @property + def top_is_pointwise(self): + return True + def _get_weights(self, hidden_dim=None): """Create or get concatenated embedding or softmax variable. diff --git a/tensor2tensor/models/transformer.py b/tensor2tensor/models/transformer.py index 14d5cc80b..a5ddb1bfe 100644 --- a/tensor2tensor/models/transformer.py +++ b/tensor2tensor/models/transformer.py @@ -143,13 +143,12 @@ def model_fn_body(self, features): encoder_decoder_attention_bias, decoder_self_attention_bias, hparams) - def _greedy_infer(self, features, decode_length, last_position_only=True): + def _greedy_infer(self, features, decode_length): """Fast version of greedy decoding. Args: features: an map of string to `Tensor` decode_length: an integer. How many additional timesteps to decode. - last_position_only: MUST be true for fast decoding! Returns: samples: [batch_size, input_length + decode_length] @@ -157,15 +156,13 @@ def _greedy_infer(self, features, decode_length, last_position_only=True): losses: Not returned Raises: - ValueError: If last_position_only if False NotImplementedError: If there are multiple data shards. """ - decoded_ids, _ = self._fast_decode( - features, decode_length, last_position_only) + decoded_ids, _ = self._fast_decode(features, decode_length) return decoded_ids, None, None def _beam_decode(self, features, decode_length, beam_size, top_beams, - last_position_only, alpha): + alpha): """Beam search decoding. Args: @@ -173,7 +170,6 @@ def _beam_decode(self, features, decode_length, beam_size, top_beams, decode_length: an integer. How many additional timesteps to decode. beam_size: number of beams. top_beams: an integer. How many of the beams to return. - last_position_only: MUST be true for fast decoding! alpha: Float that controls the length penalty. larger the alpha, stronger the preference for slonger translations. @@ -181,14 +177,12 @@ def _beam_decode(self, features, decode_length, beam_size, top_beams, samples: an integer `Tensor`. Top samples from the beam search """ decoded_ids, scores = self._fast_decode( - features, decode_length, last_position_only, beam_size, top_beams, - alpha) + features, decode_length, beam_size, top_beams, alpha) return {"outputs": decoded_ids, "scores": scores} def _fast_decode(self, features, decode_length, - last_position_only=True, beam_size=1, top_beams=1, alpha=1.0): @@ -200,7 +194,6 @@ def _fast_decode(self, Args: features: a map of string to model features. decode_length: an integer. How many additional timesteps to decode. - last_position_only: MUST be true for fast decoding! beam_size: number of beams. top_beams: an integer. How many of the beams to return. alpha: Float that controls the length penalty. larger the alpha, stronger @@ -210,11 +203,8 @@ def _fast_decode(self, samples: an integer `Tensor`. Top samples from the beam search Raises: - ValueError: If last_position_only if False NotImplementedError: If there are multiple data shards. """ - if not last_position_only: - raise ValueError("Fast decoding only deals with the last positions!") if self._num_datashards != 1: raise NotImplementedError("Fast decoding only supports a single shard.") dp = self._data_parallelism diff --git a/tensor2tensor/models/transformer_adv.py b/tensor2tensor/models/transformer_adv.py index 9bbccb874..737aa822e 100644 --- a/tensor2tensor/models/transformer_adv.py +++ b/tensor2tensor/models/transformer_adv.py @@ -166,7 +166,7 @@ def model_fn_body(self, features): features["target_space_id"], self._hparams) def infer(self, features=None, decode_length=50, beam_size=1, top_beams=1, - last_position_only=False, alpha=0.0): + alpha=0.0): """Produce predictions from the model.""" if not features: features = {} @@ -184,8 +184,7 @@ def infer(self, features=None, decode_length=50, beam_size=1, top_beams=1, initial_output = tf.zeros((batch_size, 2 * length, 1, 1), dtype=tf.int64) features["targets"] = initial_output - sharded_logits, _ = self.model_fn( - features, False, last_position_only=last_position_only) + sharded_logits, _ = self.model_fn(features, False) sharded_samples = self._data_parallelism(tf.argmax, sharded_logits, 4) samples = tf.concat(sharded_samples, 0) @@ -194,8 +193,7 @@ def infer(self, features=None, decode_length=50, beam_size=1, top_beams=1, for _ in xrange(how_many_more_steps): with tf.variable_scope(tf.get_variable_scope(), reuse=True): features["targets"] = samples - sharded_logits, _ = self.model_fn( - features, False, last_position_only=last_position_only) + sharded_logits, _ = self.model_fn(features, False) sharded_samples = self._data_parallelism(tf.argmax, sharded_logits, 4) samples = tf.concat(sharded_samples, 0) diff --git a/tensor2tensor/models/transformer_test.py b/tensor2tensor/models/transformer_test.py index 6bdc3a44d..ae254a42d 100644 --- a/tensor2tensor/models/transformer_test.py +++ b/tensor2tensor/models/transformer_test.py @@ -56,8 +56,7 @@ def getModel(self, hparams, mode=tf.estimator.ModeKeys.TRAIN): "target_space_id": tf.constant(1, dtype=tf.int32), } - return transformer.Transformer( - hparams, tf.estimator.ModeKeys.PREDICT, p_hparams), features + return transformer.Transformer(hparams, mode, p_hparams), features def testTransformer(self): model, features = self.getModel(transformer.transformer_small()) @@ -99,8 +98,7 @@ def testGreedyVsFast(self): mode=tf.estimator.ModeKeys.PREDICT) with tf.variable_scope(tf.get_variable_scope(), reuse=True): - greedy_result, _, _ = model._slow_greedy_infer( - features, decode_length, last_position_only=True) + greedy_result, _, _ = model._slow_greedy_infer(features, decode_length) greedy_result = tf.squeeze(greedy_result, axis=[2, 3]) fast_result, _, _ = model._greedy_infer(features, decode_length) @@ -139,7 +137,6 @@ def testBeamVsFast(self): decode_length, beam_size=4, top_beams=1, - last_position_only=True, alpha=1.0)["outputs"] fast_result = model._beam_decode( @@ -147,7 +144,6 @@ def testBeamVsFast(self): decode_length, beam_size=4, top_beams=1, - last_position_only=True, alpha=1.0)["outputs"] with self.test_session(): diff --git a/tensor2tensor/utils/decoding.py b/tensor2tensor/utils/decoding.py index d1dbd7610..629b2ed26 100644 --- a/tensor2tensor/utils/decoding.py +++ b/tensor2tensor/utils/decoding.py @@ -42,7 +42,6 @@ def decode_hparams(overrides=""): """Hyperparameters for decoding.""" hp = tf.contrib.training.HParams( - use_last_position_only=False, save_images=False, problem_idx=0, extra_length=50, diff --git a/tensor2tensor/utils/modality.py b/tensor2tensor/utils/modality.py index 4bcf21f4d..43ca422b7 100644 --- a/tensor2tensor/utils/modality.py +++ b/tensor2tensor/utils/modality.py @@ -71,6 +71,22 @@ def top_dimensionality(self): def _body_input_depth(self): return self._model_hparams.hidden_size + @property + def top_is_pointwise(self): + """Whether the top mapping of the modality is pointwise. + + An example of a pointwise top mapping is a linear layer followed by + a softmax. Given a tensor [batch, length, height, depth] it operates + only on the last axis, on every point in [batch, length, height] fully + independently. In contrast, a classifier that first averages over length + and height is not pointwise, as it depends on the whole field. It is useful + to know if a top is pointwise to speed up decoding in certain models. + + Returns: + A Boolean, True if the modality is pointwise, False otherwise (default). + """ + return False + def bottom(self, x): """Transform one shard of input. diff --git a/tensor2tensor/utils/model_builder.py b/tensor2tensor/utils/model_builder.py index 5619ada31..a63032453 100644 --- a/tensor2tensor/utils/model_builder.py +++ b/tensor2tensor/utils/model_builder.py @@ -115,7 +115,6 @@ def nth_model(n): features, beam_size=decode_hp.beam_size, top_beams=(decode_hp.beam_size if decode_hp.return_beams else 1), - last_position_only=decode_hp.use_last_position_only, alpha=decode_hp.alpha, decode_length=decode_hp.extra_length) # In distributed mode, we build graph for problem=0 and problem=worker_id. diff --git a/tensor2tensor/utils/t2t_model.py b/tensor2tensor/utils/t2t_model.py index 6e555df0c..ac11d54aa 100644 --- a/tensor2tensor/utils/t2t_model.py +++ b/tensor2tensor/utils/t2t_model.py @@ -155,8 +155,7 @@ def prepare_features_for_infer(self, features): def eval_autoregressive(self, features=None, - decode_length=50, - last_position_only=False): + decode_length=50): """Autoregressive eval. Quadratic time in decode_length. @@ -164,7 +163,6 @@ def eval_autoregressive(self, Args: features: an map of string to `Tensor` decode_length: an integer. How many additional timesteps to decode. - last_position_only: a boolean, speed-up by computing last position only. Returns: sharded_logits: a list of `Tensor`s. Assumes one datashard. @@ -173,8 +171,7 @@ def eval_autoregressive(self, """ _, logits, losses = self._slow_greedy_infer( features, - decode_length=decode_length, - last_position_only=last_position_only) + decode_length=decode_length) return [logits], losses def infer(self, @@ -182,7 +179,6 @@ def infer(self, decode_length=50, beam_size=1, top_beams=1, - last_position_only=False, alpha=0.0): """A inference method. @@ -193,7 +189,6 @@ def infer(self, decode_length: an integer. How many additional timesteps to decode. beam_size: number of beams. top_beams: an integer. How many of the beams to return. - last_position_only: a boolean, speed-up by computing last position only. alpha: Float that controls the length penalty. larger the alpha, stronger the preference for slonger translations. @@ -212,16 +207,15 @@ def infer(self, beam_size = 1 # No use to run beam-search for a single class. if beam_size == 1: tf.logging.info("Greedy Decoding") - samples, _, _ = self._greedy_infer(features, decode_length, - last_position_only) + samples, _, _ = self._greedy_infer(features, decode_length) else: tf.logging.info("Beam Decoding with beam size %d" % beam_size) samples = self._beam_decode(features, decode_length, beam_size, top_beams, - last_position_only, alpha) + alpha) return samples def _beam_decode(self, features, decode_length, beam_size, top_beams, - last_position_only, alpha): + alpha): """Beam search decoding. Models should ideally implement a more efficient version of this function. @@ -231,7 +225,6 @@ def _beam_decode(self, features, decode_length, beam_size, top_beams, decode_length: an integer. How many additional timesteps to decode. beam_size: number of beams. top_beams: an integer. How many of the beams to return. - last_position_only: a boolean, speed-up by computing last position only. alpha: Float that controls the length penalty. larger the alpha, stronger the preference for slonger translations. @@ -239,10 +232,10 @@ def _beam_decode(self, features, decode_length, beam_size, top_beams, samples: an integer `Tensor`. Top samples from the beam search """ return self._beam_decode_slow(features, decode_length, beam_size, top_beams, - last_position_only, alpha) + alpha) def _beam_decode_slow(self, features, decode_length, beam_size, top_beams, - last_position_only, alpha): + alpha): """Slow version of Beam search decoding. Quadratic time in decode_length. @@ -252,7 +245,6 @@ def _beam_decode_slow(self, features, decode_length, beam_size, top_beams, decode_length: an integer. How many additional timesteps to decode. beam_size: number of beams. top_beams: an integer. How many of the beams to return. - last_position_only: a boolean, speed-up by computing last position only. alpha: Float that controls the length penalty. larger the alpha, stronger the preference for slonger translations. @@ -275,13 +267,13 @@ def symbols_to_logits_fn(ids): features["targets"] = ids self._coverage = None - sharded_logits, _ = self.model_fn( - features, False, last_position_only=last_position_only) + sharded_logits, _ = self.model_fn(features, False) # now self._coverage is a coverage tensor for the first datashard. # it has shape [batch_size] and contains floats between 0 and # source_length. logits = sharded_logits[0] # Assuming we have one shard. - if last_position_only: + modality = self._hparams.problems[self._problem_idx].target_modality + if modality.top_is_pointwise: return tf.squeeze(logits, axis=[1, 2, 3]) current_output_position = tf.shape(ids)[1] - 1 # -1 due to the pad above. logits = logits[:, current_output_position, :, :] @@ -325,7 +317,7 @@ def symbols_to_logits_fn(ids): return {"outputs": ids[:, :top_beams, 1:], "scores": scores} return ids[:, :top_beams, 1:] - def _greedy_infer(self, features, decode_length, last_position_only): + def _greedy_infer(self, features, decode_length): """A greedy inference method. Models should ideally implement a more efficient version of this function. @@ -333,16 +325,15 @@ def _greedy_infer(self, features, decode_length, last_position_only): Args: features: an map of string to `Tensor` decode_length: an integer. How many additional timesteps to decode. - last_position_only: a boolean, speed-up by computing last position only. Returns: samples: an integer `Tensor`. logits: `Tensor` of shape [batch_size, time, 1, 1, vocab_size]. losses: a dictionary: {loss-name (string): floating point `Scalar`} """ - return self._slow_greedy_infer(features, decode_length, last_position_only) + return self._slow_greedy_infer(features, decode_length) - def _slow_greedy_infer(self, features, decode_length, last_position_only): + def _slow_greedy_infer(self, features, decode_length): """A slow greedy inference method. Quadratic time in decode_length. @@ -350,7 +341,6 @@ def _slow_greedy_infer(self, features, decode_length, last_position_only): Args: features: an map of string to `Tensor` decode_length: an integer. How many additional timesteps to decode. - last_position_only: a boolean, speed-up by computing last position only. Returns: samples: an integer `Tensor`. @@ -370,18 +360,18 @@ def _slow_greedy_infer(self, features, decode_length, last_position_only): # in metric functions stays in the same frame as other vars. targets_old = features.get("targets", None) + target_modality = self._hparams.problems[self._problem_idx].target_modality def infer_step(recent_output, recent_logits, unused_loss): """Inference step.""" recent_output.set_shape([None, None, None, 1]) padded = tf.pad(recent_output, [[0, 0], [0, 1], [0, 0], [0, 0]]) features["targets"] = padded # This is inefficient in that it generates samples at all timesteps, - # not just the last one, except if last_position_only is set (dangerous). - samples, logits, losses = self.sample( - features, last_position_only=last_position_only) + # not just the last one, except if target_modality is pointwise. + samples, logits, losses = self.sample(features) # Concatenate the already-generated recent_output with last timestep # of the newly-generated samples. - if last_position_only: + if target_modality.top_is_pointwise: cur_sample = samples[:, -1, :, :] else: cur_sample = samples[:, tf.shape(recent_output)[1], :, :] @@ -472,20 +462,18 @@ def fn_not_eos(): result, [0, partial_target_length, 0, 0], [-1, -1, -1, -1]) return result, logits, losses - def sample(self, features, last_position_only=False): + def sample(self, features): """Run the model and extract samples. Args: features: an map of string to `Tensor`. - last_position_only: a boolean, speed-up by computing last position only. Returns: samples: an integer `Tensor`. logits: a list of `Tensor`s, one per datashard. losses: a dictionary: {loss-name (string): floating point `Scalar`}. """ - sharded_logits, losses = self.model_fn( - features, False, last_position_only=last_position_only) + sharded_logits, losses = self.model_fn(features, False) if self._hparams.sampling_method == "argmax": sharded_samples = self._data_parallelism(tf.argmax, sharded_logits, 4) else: @@ -517,14 +505,13 @@ def _shard_features(self, features): # pylint: disable=missing-docstring 0)) return sharded_features - def model_fn(self, features, skip=False, last_position_only=False): + def model_fn(self, features, skip=False): """Computes the entire model and produces sharded logits and losses. Args: features: A dictionary of feature name to tensor. skip: a boolean, if we're just dummy-calling and actually skip this model (but we need to create variables to not confuse distributed training). - last_position_only: a boolean, compute logits for only the last position. Returns: sharded_logits: a list of `Tensor`s, one per datashard. @@ -591,7 +578,9 @@ def model_fn(self, features, skip=False, last_position_only=False): losses = {"extra": losses} with tf.variable_scope(target_modality.name, reuse=target_reuse): - if not last_position_only: + last_only = (target_modality.top_is_pointwise and + self._hparams.mode == tf.estimator.ModeKeys.PREDICT) + if not last_only: sharded_logits = target_modality.top_sharded( body_outputs, sharded_features["targets"], dp) training_loss = target_modality.loss_sharded( @@ -600,7 +589,6 @@ def model_fn(self, features, skip=False, last_position_only=False): training_loss *= self._problem_hparams.loss_multiplier else: # Take body outputs for the last position only, and targets too. - # TODO(lukaszkaiser): warning, this doesn't work for all modalities! last_position_body_outputs = [ tf.expand_dims(body_shard[:, -1, :, :], axis=[1]) for body_shard in body_outputs From aac632f438b5717a6a4e8449301de61fe5015333 Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Fri, 10 Nov 2017 16:26:52 -0800 Subject: [PATCH 0573/4095] Batch norm fix, --use_tpu, and Resnet50 model PiperOrigin-RevId: 175353643 --- tensor2tensor/layers/common_attention.py | 18 +- tensor2tensor/layers/modalities.py | 3 +- tensor2tensor/models/__init__.py | 1 + tensor2tensor/models/resnet.py | 249 ++++++++++++++++++++++ tensor2tensor/models/resnet_test.py | 70 ++++++ tensor2tensor/models/transformer.py | 2 +- tensor2tensor/tpu/tpu_trainer.py | 12 +- tensor2tensor/tpu/tpu_trainer_lib.py | 88 +++++--- tensor2tensor/tpu/tpu_trainer_lib_test.py | 2 +- tensor2tensor/utils/expert_utils.py | 15 ++ tensor2tensor/utils/metrics.py | 28 +-- tensor2tensor/utils/model_builder.py | 2 +- tensor2tensor/utils/optimize.py | 6 +- tensor2tensor/utils/registry.py | 5 +- 14 files changed, 441 insertions(+), 60 deletions(-) create mode 100644 tensor2tensor/models/resnet.py create mode 100644 tensor2tensor/models/resnet_test.py diff --git a/tensor2tensor/layers/common_attention.py b/tensor2tensor/layers/common_attention.py index b840291d4..17cb23a1d 100644 --- a/tensor2tensor/layers/common_attention.py +++ b/tensor2tensor/layers/common_attention.py @@ -1323,12 +1323,20 @@ def masked_local_attention_1d( with tf.variable_scope(name, default_name="local_attention_1d", values=[q, k, v]): v_shape = v.get_shape() - batch = tf.shape(q)[0] - heads = tf.shape(q)[1] - length = tf.shape(q)[2] + batch = common_layers.shape_dim(q, 0) + heads = common_layers.shape_dim(q, 1) + length = common_layers.shape_dim(q, 2) + if isinstance(block_length, tf.Tensor): + const = tf.contrib.util.constant_value(block_length) + if const is not None: + block_length = int(const) + # If (length < 2 * block_length), then we use only one block. - block_length = tf.where(tf.less(length, block_length * 2), - length, block_length) + if isinstance(length, int) and isinstance(block_length, int): + block_length = length if length < block_length * 2 else block_length + else: + block_length = tf.where(tf.less(length, block_length * 2), + length, block_length) depth_k = tf.shape(k)[3] depth_v = tf.shape(v)[3] original_length = length diff --git a/tensor2tensor/layers/modalities.py b/tensor2tensor/layers/modalities.py index 4a8848f35..586525e0d 100644 --- a/tensor2tensor/layers/modalities.py +++ b/tensor2tensor/layers/modalities.py @@ -414,7 +414,8 @@ def bottom(self, x): def targets_bottom(self, x): with tf.variable_scope(self.name): - return tf.zeros([tf.shape(x)[0], 1, 1, self._body_input_depth]) + return tf.zeros( + [common_layers.shape_dim(x, 0), 1, 1, self._body_input_depth]) def top(self, body_output, _): """Transform inputs from model space to target space. diff --git a/tensor2tensor/models/__init__.py b/tensor2tensor/models/__init__.py index feadcae83..dd1c11390 100644 --- a/tensor2tensor/models/__init__.py +++ b/tensor2tensor/models/__init__.py @@ -33,6 +33,7 @@ from tensor2tensor.models import lstm from tensor2tensor.models import multimodel from tensor2tensor.models import neural_gpu +from tensor2tensor.models import resnet from tensor2tensor.models import shake_shake from tensor2tensor.models import slicenet from tensor2tensor.models import transformer diff --git a/tensor2tensor/models/resnet.py b/tensor2tensor/models/resnet.py new file mode 100644 index 000000000..77a426e23 --- /dev/null +++ b/tensor2tensor/models/resnet.py @@ -0,0 +1,249 @@ +# coding=utf-8 +# Copyright 2017 The Tensor2Tensor Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Resnets.""" +# Copied from cloud_tpu/models/resnet_garden and modified + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +# Dependency imports + +from tensor2tensor.layers import common_hparams +from tensor2tensor.utils import registry +from tensor2tensor.utils import t2t_model + +import tensorflow as tf + +# TODO(rsepassi): make hparams +_BATCH_NORM_DECAY = 0.997 +_BATCH_NORM_EPSILON = 1e-5 + + +def bottleneck_block(inputs, filters, is_training, projection_shortcut, strides, + data_format): + """Bottleneck block variant for residual networks with BN before convolutions. + + Args: + inputs: A tensor of size [batch, channels, height, width]. + filters: The number of filters for the first two convolutions. Note that the + third and final convolution will use 4 times as many filters. + is_training: A Boolean for whether the model is in training or inference + mode. Needed for batch normalization. + projection_shortcut: The function to use for projection shortcuts (typically + a 1x1 convolution when downsampling the input). + strides: The block's stride. If greater than 1, this block will ultimately + downsample the input. + data_format: channels_{first, last} + + Returns: + The output tensor of the block. + """ + shortcut = inputs + out = inputs + out = batch_norm_relu(out, is_training, data_format) + + # The projection shortcut should come after the first batch norm and ReLU + # since it performs a 1x1 convolution. + if projection_shortcut is not None: + shortcut = projection_shortcut(out) + + do_bn_relus = [False, True, True] + kernel_sizes = [1, 3, 1] + layer_strides = [1, strides, 1] + filter_sizes = [filters, filters, 4 * filters] + + for do_bn_relu, kernel_size, layer_stride, filter_size in zip( + do_bn_relus, kernel_sizes, layer_strides, filter_sizes): + if do_bn_relu: + out = batch_norm_relu(out, is_training, data_format) + out = conv2d_fixed_padding( + inputs=out, + filters=filter_size, + kernel_size=kernel_size, + strides=layer_stride, + data_format=data_format) + + return out + shortcut + + +def batch_norm_relu(inputs, is_training, data_format): + """Performs a batch normalization followed by a ReLU.""" + # We set fused=True for a significant performance boost. + out = tf.layers.batch_normalization( + inputs=inputs, + axis=1 if data_format == "channels_first" else 3, + momentum=_BATCH_NORM_DECAY, + epsilon=_BATCH_NORM_EPSILON, + center=True, + scale=True, + training=is_training, + fused=True) + out = tf.nn.relu(out) + return out + + +def block_layer(inputs, filters, block_fn, blocks, strides, is_training, + data_format, name): + """Creates one layer of blocks for the ResNet model. + + Args: + inputs: A tensor of size [batch, channels, height, width]. + filters: The number of filters for the first convolution of the layer. + block_fn: The block to use within the model, either `building_block` or + `bottleneck_block`. + blocks: The number of blocks contained in the layer. + strides: The stride to use for the first convolution of the layer. If + greater than 1, this layer will ultimately downsample the input. + is_training: Either True or False, whether we are currently training the + model. Needed for batch norm. + data_format: channels_{first, last} + name: A string name for the tensor output of the block layer. + + Returns: + The output tensor of the block layer. + """ + # Bottleneck blocks end with 4x the number of filters as they start with + filters_out = 4 * filters if block_fn is bottleneck_block else filters + + def projection_shortcut(inputs): + return conv2d_fixed_padding( + inputs=inputs, + filters=filters_out, + kernel_size=1, + strides=strides, + data_format=data_format) + + # Only the first block per block_layer uses projection_shortcut and strides + inputs = block_fn(inputs, filters, is_training, projection_shortcut, strides, + data_format) + + for _ in range(1, blocks): + inputs = block_fn(inputs, filters, is_training, None, 1, data_format) + + return tf.identity(inputs, name) + + +def fixed_padding(inputs, kernel_size, data_format): + """Pads the input along the spatial dimensions independently of input size. + + Args: + inputs: A 4D tensor layed out according to data_format + kernel_size: The kernel to be used in the conv2d or max_pool2d operation. + Should be a positive integer. + data_format: channels_{first, last} + + Returns: + A tensor of size [batch, channels, height_out, width_out] with the + input either intact (if kernel_size == 1) or padded (if kernel_size > 1). + """ + pad_total = kernel_size - 1 + pad_beg = pad_total // 2 + pad_end = pad_total - pad_beg + spatial_pads = [[pad_beg, pad_end], [pad_beg, pad_end]] + if data_format == "channels_first": + pads = [[0, 0], [0, 0]] + spatial_pads + else: + assert data_format == "channels_last" + pads = [[0, 0]] + spatial_pads + [[0, 0]] + padded_inputs = tf.pad(inputs, pads) + return padded_inputs + + +def conv2d_fixed_padding(**kwargs): + """conv2d with fixed_padding, based only on kernel_size.""" + strides = kwargs["strides"] + if strides > 1: + kwargs["inputs"] = fixed_padding(kwargs["inputs"], kwargs["kernel_size"], + kwargs["data_format"]) + + defaults = { + "padding": ("SAME" if strides == 1 else "VALID"), + "use_bias": False, + "kernel_initializer": tf.variance_scaling_initializer(), + } + defaults.update(kwargs) + + return tf.layers.conv2d(**defaults) + + +def resnet50(inputs, hparams): + """Resnet50.""" + is_training = hparams.mode == tf.estimator.ModeKeys.TRAIN + block_fn = bottleneck_block + + out = inputs + data_format = "channels_first" if hparams.use_nchw else "channels_last" + if hparams.use_nchw: + # Convert from channels_last (NHWC) to channels_first (NCHW). This provides + # a large performance boost on GPU. + out = tf.transpose(inputs, [0, 3, 1, 2]) + + out = conv2d_fixed_padding( + inputs=out, filters=64, kernel_size=7, strides=2, data_format=data_format) + out = tf.identity(out, "initial_conv") + out = tf.layers.max_pooling2d( + inputs=out, + pool_size=3, + strides=2, + padding="SAME", + data_format=data_format) + out = tf.identity(out, "initial_max_pool") + + for i, (num_filters, stride, block_size) in enumerate( + zip(hparams.num_filters, hparams.strides, hparams.layer_sizes)): + out = block_layer( + inputs=out, + filters=num_filters, + block_fn=block_fn, + blocks=block_size, + strides=stride, + is_training=is_training, + data_format=data_format, + name="block_layer_%d" % i) + + out = batch_norm_relu(out, is_training, data_format) + out = tf.layers.average_pooling2d( + inputs=out, + pool_size=7, + strides=1, + padding="VALID", + data_format=data_format) + out = tf.identity(out, "final_avg_pool") + + if hparams.use_nchw: + # Back to NHWC + out = tf.transpose(out, [0, 2, 3, 1]) + return out + + +@registry.register_model +class Resnet50(t2t_model.T2TModel): + + def model_fn_body(self, features): + return resnet50(features["inputs"], self.hparams) + + +@registry.register_hparams +def resnet_base(): + """Set of hyperparameters.""" + hparams = common_hparams.basic_params1() + hparams.add_hparam("layer_sizes", [3, 4, 6, 3]) + hparams.add_hparam("use_nchw", True) + hparams.add_hparam("num_filters", [64, 128, 256, 512]) + hparams.add_hparam("strides", [1, 2, 2, 2]) + hparams.tpu_batch_size_per_shard = 48 + return hparams diff --git a/tensor2tensor/models/resnet_test.py b/tensor2tensor/models/resnet_test.py new file mode 100644 index 000000000..9db4cb85f --- /dev/null +++ b/tensor2tensor/models/resnet_test.py @@ -0,0 +1,70 @@ +# coding=utf-8 +# Copyright 2017 The Tensor2Tensor Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Resnet tests.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +# Dependency imports + +import numpy as np + +from tensor2tensor.data_generators import problem_hparams +from tensor2tensor.models import resnet +from tensor2tensor.utils import registry + +import tensorflow as tf + + +def resnet_tiny_cpu(): + hparams = resnet.resnet_base() + hparams.layer_sizes = [2, 2, 2, 2] + hparams.num_filters = [10, 20, 30, 40] + hparams.use_nchw = False + return hparams + + +class ResnetTest(tf.test.TestCase): + + def _testResnet(self, img_size, output_size): + vocab_size = 9 + batch_size = 2 + x = np.random.random_integers( + 0, high=255, size=(batch_size, img_size, img_size, 3)) + y = np.random.random_integers( + 1, high=vocab_size - 1, size=(batch_size, 1, 1, 1)) + hparams = resnet_tiny_cpu() + p_hparams = problem_hparams.test_problem_hparams(vocab_size, vocab_size) + p_hparams.input_modality["inputs"] = (registry.Modalities.IMAGE, None) + with self.test_session() as session: + features = { + "inputs": tf.constant(x, dtype=tf.int32), + "targets": tf.constant(y, dtype=tf.int32), + } + model = resnet.Resnet50(hparams, tf.estimator.ModeKeys.TRAIN, p_hparams) + sharded_logits, _ = model.model_fn(features) + logits = tf.concat(sharded_logits, 0) + session.run(tf.global_variables_initializer()) + res = session.run(logits) + self.assertEqual(res.shape, (batch_size,) + output_size + (1, vocab_size)) + + def testResnetLarge(self): + self._testResnet(img_size=299, output_size=(4, 4)) + + +if __name__ == "__main__": + tf.test.main() diff --git a/tensor2tensor/models/transformer.py b/tensor2tensor/models/transformer.py index a5ddb1bfe..a539d02e7 100644 --- a/tensor2tensor/models/transformer.py +++ b/tensor2tensor/models/transformer.py @@ -1084,7 +1084,7 @@ def update_hparams_for_tpu(hparams): # Inputs # Each example in the batch will be of (padded) length hparams.max_length hparams.max_length = 64 - hparams.tpu_batch_size_per_shard = 16 + hparams.tpu_batch_size_per_shard = 20 @registry.register_hparams diff --git a/tensor2tensor/tpu/tpu_trainer.py b/tensor2tensor/tpu/tpu_trainer.py index 071b168b2..39ce82ee9 100644 --- a/tensor2tensor/tpu/tpu_trainer.py +++ b/tensor2tensor/tpu/tpu_trainer.py @@ -41,6 +41,7 @@ flags.DEFINE_integer("tpu_num_shards", 8, "Number of tpu shards.") flags.DEFINE_integer("iterations_per_loop", 1000, "Number of iterations in a TPU training loop.") +flags.DEFINE_bool("use_tpu", True, "Whether to use TPU.") # To maintain compatibility with some internal libs, we guard against these flag # definitions possibly erroring. Apologies for the ugliness. @@ -68,9 +69,14 @@ def create_hparams(): def create_experiment_fn(): - return lib.make_experiment_fn(FLAGS.model, get_problem_name(), FLAGS.data_dir, - FLAGS.train_steps, FLAGS.eval_steps, - FLAGS.local_eval_frequency) + return lib.make_experiment_fn( + FLAGS.model, + get_problem_name(), + FLAGS.data_dir, + FLAGS.train_steps, + FLAGS.eval_steps, + FLAGS.local_eval_frequency, + use_tpu=FLAGS.use_tpu) def create_run_config(): diff --git a/tensor2tensor/tpu/tpu_trainer_lib.py b/tensor2tensor/tpu/tpu_trainer_lib.py index c9be40be2..cee8d630f 100644 --- a/tensor2tensor/tpu/tpu_trainer_lib.py +++ b/tensor2tensor/tpu/tpu_trainer_lib.py @@ -23,6 +23,8 @@ # Dependency imports +import six + from tensor2tensor.layers import common_layers from tensor2tensor.utils import data_reader from tensor2tensor.utils import metrics @@ -51,7 +53,10 @@ def input_fn(params): """Input fn.""" is_training = mode == tf.estimator.ModeKeys.TRAIN num_threads = 4 if is_training else 1 - batch_size = params["batch_size"] + if "batch_size" in params: + batch_size = params["batch_size"] + else: + batch_size = hparams.tpu_batch_size_per_shard def valid_size(example): return data_reader.example_valid_size(example, hparams.min_length, @@ -72,7 +77,7 @@ def define_shapes(example): example["targets"] = targets # Ensure batch size is set on all features - for _, t in example.iteritems(): + for _, t in six.iteritems(example): shape = t.get_shape().as_list() shape[0] = batch_size t.set_shape(t.get_shape().merge_with(shape)) @@ -126,7 +131,7 @@ def are_shapes_fully_defined(shapes_dict): def fill_shape_nones(shapes_dict, none_filler=None): padded_shapes = {} - for key, shape in shapes_dict.iteritems(): + for key, shape in six.iteritems(shapes_dict): padded_shapes[key] = [ (dim if dim is not None else none_filler) for dim in shape.as_list() ] @@ -174,10 +179,10 @@ def model_fn(features, labels, mode, params, config): logits = target_modality.top(outputs, labels) # If the length dim is unknown fix it to max_length - if logits.get_shape().as_list()[1] is None: - shape = [None] * logits.get_shape().ndims + if use_tpu and logits.get_shape().as_list()[1] is None: + shape = logits.get_shape().as_list() shape[1] = hparams.max_length - logits.set_shape(logits.get_shape().merge_with(shape)) + logits.set_shape(shape) # Loss loss_num, loss_den = target_modality.loss(logits, labels) @@ -185,12 +190,25 @@ def model_fn(features, labels, mode, params, config): if mode == tf.estimator.ModeKeys.EVAL: problem = hp.problem_instances[0] - eval_metrics_fn = create_eval_metrics_fn(problem) - _remove_summaries() - return tf.contrib.tpu.TPUEstimatorSpec( - mode, - eval_metrics=(eval_metrics_fn, [logits, orig_features["targets"]]), - loss=loss) + + if use_tpu: + eval_metrics_fn = create_eval_metrics_fn(problem) + _remove_summaries() + return tf.contrib.tpu.TPUEstimatorSpec( + mode, + eval_metrics=(eval_metrics_fn, [logits, orig_features["targets"]]), + loss=loss) + else: + eval_metrics_fns = metrics.create_evaluation_metrics([problem], hparams) + eval_metrics = {} + for metric_name, metric_fn in six.iteritems(eval_metrics_fns): + eval_metrics[metric_name] = metric_fn(logits, features) + + return tf.estimator.EstimatorSpec( + mode, + predictions={"predictions": logits}, + eval_metric_ops=eval_metrics, + loss=loss) assert mode == tf.estimator.ModeKeys.TRAIN @@ -212,7 +230,10 @@ def model_fn(features, labels, mode, params, config): train_op = tf.identity(loss) _remove_summaries() - return tf.contrib.tpu.TPUEstimatorSpec(mode, loss=loss, train_op=train_op) + if use_tpu: + return tf.contrib.tpu.TPUEstimatorSpec(mode, loss=loss, train_op=train_op) + else: + return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op) return model_fn @@ -290,29 +311,40 @@ def create_run_config(master="", save_summary_steps=0, save_checkpoints_steps=save_checkpoints_steps, tpu_config=tpu_config, - master=master, - evaluation_master=master) + master=master) return run_config -def create_estimator(model_fn, run_config, batch_size=16): - return tf.contrib.tpu.TPUEstimator( - model_fn=model_fn, - model_dir=run_config.model_dir, - config=run_config, - train_batch_size=batch_size, - eval_batch_size=batch_size * 2) - - -def create_experiment(run_config, hparams, model_name, problem_name, data_dir, - train_steps, eval_steps, min_eval_frequency): +def create_estimator(model_fn, run_config, batch_size=16, use_tpu=True): + if use_tpu: + return tf.contrib.tpu.TPUEstimator( + model_fn=model_fn, + model_dir=run_config.model_dir, + config=run_config, + train_batch_size=batch_size, + eval_batch_size=batch_size * 2) + else: + return tf.estimator.Estimator( + model_fn=model_fn, model_dir=run_config.model_dir, config=run_config) + + +def create_experiment(run_config, + hparams, + model_name, + problem_name, + data_dir, + train_steps, + eval_steps, + min_eval_frequency, + use_tpu=True): """Create Experiment.""" hparams.add_hparam("data_dir", data_dir) trainer_utils.add_problem_hparams(hparams, problem_name) batch_size = ( hparams.tpu_batch_size_per_shard * run_config.tpu_config.num_shards) - model_fn = get_model_fn(model_name, hparams) - estimator = create_estimator(model_fn, run_config, batch_size) + model_fn = get_model_fn(model_name, hparams, use_tpu=use_tpu) + estimator = create_estimator( + model_fn, run_config, batch_size, use_tpu=use_tpu) train_input_fn = get_input_fn(tf.estimator.ModeKeys.TRAIN, hparams) eval_input_fn = get_input_fn(tf.estimator.ModeKeys.EVAL, hparams) return tf.contrib.learn.Experiment( diff --git a/tensor2tensor/tpu/tpu_trainer_lib_test.py b/tensor2tensor/tpu/tpu_trainer_lib_test.py index de36856ca..24d26879d 100644 --- a/tensor2tensor/tpu/tpu_trainer_lib_test.py +++ b/tensor2tensor/tpu/tpu_trainer_lib_test.py @@ -60,7 +60,7 @@ def testSmoke(self): with tf.variable_scope("eval"): spec = model_fn(features, targets, tf.estimator.ModeKeys.EVAL, params, config) - self.assertTrue(spec.eval_metrics is not None) + self.assertTrue(spec.eval_metric_ops is not None) if __name__ == "__main__": diff --git a/tensor2tensor/utils/expert_utils.py b/tensor2tensor/utils/expert_utils.py index 7fc3d01f0..9764b2b99 100644 --- a/tensor2tensor/utils/expert_utils.py +++ b/tensor2tensor/utils/expert_utils.py @@ -96,6 +96,19 @@ def add_name_scope(scope=None): return add_scope(scope, scope_fn=tf.name_scope) +def _add_variable_proxy_methods(var, proxy_tensor): + """Proxy methods of underlying variable. + + This enables our custom getters to still work with, e.g., batch norm. + + Args: + var: Variable to proxy + proxy_tensor: Tensor that is identity of var + """ + proxy_tensor.read_value = lambda: tf.identity(proxy_tensor) + proxy_tensor.assign_sub = var.assign_sub + + class Parallelism(object): """Helper class for creating sets of parallel function calls. @@ -188,6 +201,7 @@ def daisy_chain_getter(getter, name, *args, **kwargs): var = getter(name, *args, **kwargs) v = tf.identity(var._ref()) # pylint: disable=protected-access # update the cache + _add_variable_proxy_methods(var, v) cache[name] = v cache[device_var_key] = v return v @@ -202,6 +216,7 @@ def caching_getter(getter, name, *args, **kwargs): return cache[key] with tf.device(self._caching_devices[i]): ret = tf.identity(v._ref()) # pylint: disable=protected-access + _add_variable_proxy_methods(v, ret) cache[key] = ret return ret diff --git a/tensor2tensor/utils/metrics.py b/tensor2tensor/utils/metrics.py index ae28176a1..11d7356c5 100644 --- a/tensor2tensor/utils/metrics.py +++ b/tensor2tensor/utils/metrics.py @@ -192,8 +192,7 @@ def padded_accuracy(predictions, return tf.to_float(tf.equal(outputs, padded_labels)), weights -def set_precision(predictions, - labels, +def set_precision(predictions, labels, weights_fn=common_layers.weights_nonzero): """Precision of set predictions. @@ -216,9 +215,7 @@ def set_precision(predictions, return tf.to_float(tf.equal(labels, predictions)), weights -def set_recall(predictions, - labels, - weights_fn=common_layers.weights_nonzero): +def set_recall(predictions, labels, weights_fn=common_layers.weights_nonzero): """Recall of set predictions. Args: @@ -240,8 +237,7 @@ def set_recall(predictions, return tf.to_float(tf.equal(labels, predictions)), weights -def image_summary(predictions, - hparams): +def image_summary(predictions, hparams): """Reshapes predictions and passes it to tensorboard. Args: @@ -254,15 +250,16 @@ def image_summary(predictions, """ predictions_reshaped = tf.reshape( predictions, [-1, hparams.height, hparams.width, hparams.colors]) - return tf.summary.image("image_summary", predictions_reshaped, - max_outputs=1), tf.zeros_like(predictions) + return tf.summary.image( + "image_summary", predictions_reshaped, + max_outputs=1), tf.zeros_like(predictions) def create_evaluation_metrics(problems, model_hparams): """Creates the evaluation metrics for the model. Args: - problems: List of tuples (problem name, problem instance). + problems: List of Problem instances. model_hparams: a set of hparams. Returns: @@ -302,12 +299,14 @@ def wrapped_metric_fn(): return problem_metric_fn eval_metrics = dict() - for problem_idx, (problem_name, problem_instance) in enumerate(problems): + for problem_idx, problem_instance in enumerate(problems): + problem_name = problem_instance.name metrics = problem_instance.eval_metrics() if not all([m in METRICS_FNS for m in metrics]): raise ValueError("Unrecognized metric. Problem %s specified metrics " - "%s. Recognized metrics are %s." % - (problem_name, metrics, METRICS_FNS.keys())) + "%s. Recognized metrics are %s." % (problem_name, + metrics, + METRICS_FNS.keys())) class_output = "image" in problem_name and "coco" not in problem_name real_output = "gene_expression" in problem_name @@ -321,7 +320,8 @@ def wrapped_metric_fn(): else: weights_fn = common_layers.weights_nonzero - def image_wrapped_metric_fn(predictions, labels, + def image_wrapped_metric_fn(predictions, + labels, weights_fn=common_layers.weights_nonzero): _, _ = labels, weights_fn return metric_fn(predictions, model_hparams) diff --git a/tensor2tensor/utils/model_builder.py b/tensor2tensor/utils/model_builder.py index a63032453..6bef72b0c 100644 --- a/tensor2tensor/utils/model_builder.py +++ b/tensor2tensor/utils/model_builder.py @@ -199,7 +199,7 @@ def nth_model(n): if mode == tf.estimator.ModeKeys.EVAL: eval_metrics_fns = metrics.create_evaluation_metrics( - zip(problem_names, hparams.problem_instances), hparams) + hparams.problem_instances, hparams) eval_metrics = {} for metric_name, metric_fn in six.iteritems(eval_metrics_fns): diff --git a/tensor2tensor/utils/optimize.py b/tensor2tensor/utils/optimize.py index 649ef4f28..b9a092ac8 100644 --- a/tensor2tensor/utils/optimize.py +++ b/tensor2tensor/utils/optimize.py @@ -138,8 +138,4 @@ def learning_rate_decay(hparams, num_worker_replicas=1, num_train_steps=1): else: raise ValueError("Unrecognized learning rate decay scheme: %s" % hparams.learning_rate_decay_scheme) - return tf.cond( - step < warmup_steps, - lambda: inv_decay, - lambda: decay, - name="learning_rate_decay_warump_cond") + return tf.where(step < warmup_steps, inv_decay, decay) diff --git a/tensor2tensor/utils/registry.py b/tensor2tensor/utils/registry.py index 2b708b4ce..e3f3787f6 100644 --- a/tensor2tensor/utils/registry.py +++ b/tensor2tensor/utils/registry.py @@ -166,7 +166,10 @@ def decorator(hp_fn, registration_name=None): def hparams(name): if name not in _HPARAMS: - raise LookupError("HParams set %s never registered." % name) + error_msg = "HParams set %s never registered. Sets registered:\n%s" + raise LookupError( + error_msg % (name, + display_list_by_prefix(list_hparams(), starting_spaces=4))) return _HPARAMS[name] From bc2edc643827a838f3f3d00ed175a75743fd01dd Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Fri, 10 Nov 2017 17:30:15 -0800 Subject: [PATCH 0574/4095] v1.2.8 PiperOrigin-RevId: 175360631 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 0669ab1a6..5eebe27f3 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ setup( name='tensor2tensor', - version='1.2.7', + version='1.2.8', description='Tensor2Tensor', author='Google Inc.', author_email='no-reply@google.com', From 8d191e4e41c1864d78da57fa356b217690aab6ac Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Fri, 10 Nov 2017 17:56:46 -0800 Subject: [PATCH 0575/4095] Rm flaky summary histogram PiperOrigin-RevId: 175362634 --- .travis.yml | 2 +- tensor2tensor/layers/common_layers.py | 4 ---- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/.travis.yml b/.travis.yml index 744006762..370682401 100644 --- a/.travis.yml +++ b/.travis.yml @@ -24,6 +24,6 @@ script: - mkdir $T2T_TRAIN_DIR - t2t-datagen --problem=$T2T_PROBLEM --data_dir=$T2T_DATA_DIR - t2t-trainer --problems=$T2T_PROBLEM --data_dir=$T2T_DATA_DIR --model=transformer --hparams_set=transformer_tiny --train_steps=5 --eval_steps=5 --output_dir=$T2T_TRAIN_DIR - - t2t-decoder --problems=$T2T_PROBLEM --data_dir=$T2T_DATA_DIR --model=transformer --hparams_set=transformer_tiny --output_dir=$T2T_TRAIN_DIR --decode_hparams='num_samples=10,use_last_position_only=True' + - t2t-decoder --problems=$T2T_PROBLEM --data_dir=$T2T_DATA_DIR --model=transformer --hparams_set=transformer_tiny --output_dir=$T2T_TRAIN_DIR --decode_hparams='num_samples=10' git: depth: 3 diff --git a/tensor2tensor/layers/common_layers.py b/tensor2tensor/layers/common_layers.py index 7c209c60c..aea7202d7 100644 --- a/tensor2tensor/layers/common_layers.py +++ b/tensor2tensor/layers/common_layers.py @@ -1241,10 +1241,6 @@ def conv_hidden_relu(inputs, **kwargs) if dropout != 0.0: h = tf.nn.dropout(h, 1.0 - dropout) - if not tf.get_variable_scope().reuse: - tf.summary.histogram("hidden_density_logit", - relu_density_logit( - h, list(range(inputs.shape.ndims - 1)))) conv_f2 = conv if second_kernel_size == (1, 1) else separable_conv ret = conv_f2(h, output_size, second_kernel_size, name="conv2", **kwargs) if is_3d: From 461ca81b36a22c1778d57436b4efae1664a56976 Mon Sep 17 00:00:00 2001 From: Eric Purdy <eric.purdy@fathomhealth.co> Date: Sat, 11 Nov 2017 05:13:59 +0000 Subject: [PATCH 0576/4095] Add option to profile during training --- tensor2tensor/bin/t2t-trainer | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/tensor2tensor/bin/t2t-trainer b/tensor2tensor/bin/t2t-trainer index 5a2866da6..fc37f27ab 100644 --- a/tensor2tensor/bin/t2t-trainer +++ b/tensor2tensor/bin/t2t-trainer @@ -59,7 +59,7 @@ flags.DEFINE_string("output_dir", "", "Base output directory for run.") flags.DEFINE_string("master", "", "Address of TensorFlow master.") flags.DEFINE_string("schedule", "train_and_evaluate", "Method of tf.contrib.learn.Experiment to run.") - +flags.DEFINE_bool("profile", False, "Profile performance?") def main(_): tf.logging.set_verbosity(tf.logging.INFO) @@ -83,13 +83,26 @@ def main(_): problem.generate_data(data_dir, tmp_dir) # Run the trainer. - trainer_utils.run( + def run_experiment(): + trainer_utils.run( data_dir=data_dir, model=FLAGS.model, output_dir=output_dir, train_steps=FLAGS.train_steps, eval_steps=FLAGS.eval_steps, schedule=FLAGS.schedule) + + if FLAGS.profile: + with tf.contrib.tfprof.ProfileContext('t2tprof', + trace_steps=range(100), + dump_steps=range(100)) as pctx: + opts = tf.profiler.ProfileOptionBuilder.time_and_memory() + pctx.add_auto_profiling('op', opts, range(100)) + + run_experiment() + + else: + run_experiment() if __name__ == "__main__": From 2ae2ba40a9e945d7b91b0189d6d4914680dd14f5 Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Mon, 13 Nov 2017 17:06:06 -0800 Subject: [PATCH 0577/4095] Bug fixes PiperOrigin-RevId: 175611828 --- CONTRIBUTING.md | 10 ++++++++++ tensor2tensor/bin/t2t-datagen | 9 ++++----- tensor2tensor/models/shake_shake.py | 2 ++ tensor2tensor/utils/expert_utils.py | 6 ++++-- 4 files changed, 20 insertions(+), 7 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index ae319c70a..c66b4029c 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,5 +1,15 @@ # How to Contribute +# Issues + +* Please tag your issue with `bug`, `feature request`, or `question` to help us + effectively respond. +* Please include the versions of TensorFlow and Tensor2Tensor you are running + (run `pip list | grep tensor`) +* Please provide the command line you ran as well as the log output. + +# Pull Requests + We'd love to accept your patches and contributions to this project. There are just a few small guidelines you need to follow. diff --git a/tensor2tensor/bin/t2t-datagen b/tensor2tensor/bin/t2t-datagen index eba408074..2ac0f0db2 100644 --- a/tensor2tensor/bin/t2t-datagen +++ b/tensor2tensor/bin/t2t-datagen @@ -43,7 +43,6 @@ from tensor2tensor.data_generators import all_problems # pylint: disable=unused from tensor2tensor.data_generators import audio from tensor2tensor.data_generators import generator_utils from tensor2tensor.data_generators import snli -from tensor2tensor.data_generators import translate from tensor2tensor.data_generators import wsj_parsing from tensor2tensor.utils import registry from tensor2tensor.utils import usr_dir @@ -82,10 +81,10 @@ _SUPPORTED_PROBLEM_GENERATORS = { lambda: algorithmic_math.algebra_inverse(26, 0, 2, 100000), lambda: algorithmic_math.algebra_inverse(26, 3, 3, 10000)), "parsing_english_ptb8k": ( - lambda: translate.parsing_token_generator( - FLAGS.data_dir, FLAGS.tmp_dir, True, 2**13), - lambda: translate.parsing_token_generator( - FLAGS.data_dir, FLAGS.tmp_dir, False, 2**13)), + lambda: wsj_parsing.parsing_token_generator( + FLAGS.data_dir, FLAGS.tmp_dir, True, 2**13, 2**9), + lambda: wsj_parsing.parsing_token_generator( + FLAGS.data_dir, FLAGS.tmp_dir, False, 2**13, 2**9)), "parsing_english_ptb16k": ( lambda: wsj_parsing.parsing_token_generator( FLAGS.data_dir, FLAGS.tmp_dir, True, 2**14, 2**9), diff --git a/tensor2tensor/models/shake_shake.py b/tensor2tensor/models/shake_shake.py index a4dd2385a..bad951a32 100644 --- a/tensor2tensor/models/shake_shake.py +++ b/tensor2tensor/models/shake_shake.py @@ -132,6 +132,8 @@ def model_fn_body(self, features): @registry.register_hparams def shakeshake_cifar10(): """Parameters for CIFAR-10.""" + tf.logging.warning("shakeshake_cifar10 hparams have not been verified to " + "achieve good performance.") hparams = common_hparams.basic_params1() # This leads to effective batch size 128 when number of GPUs is 1 hparams.batch_size = 4096 * 8 diff --git a/tensor2tensor/utils/expert_utils.py b/tensor2tensor/utils/expert_utils.py index 9764b2b99..7d4912bc6 100644 --- a/tensor2tensor/utils/expert_utils.py +++ b/tensor2tensor/utils/expert_utils.py @@ -200,8 +200,8 @@ def daisy_chain_getter(getter, name, *args, **kwargs): else: var = getter(name, *args, **kwargs) v = tf.identity(var._ref()) # pylint: disable=protected-access + _add_variable_proxy_methods(var, v) # update the cache - _add_variable_proxy_methods(var, v) cache[name] = v cache[device_var_key] = v return v @@ -210,10 +210,12 @@ def daisy_chain_getter(getter, name, *args, **kwargs): # so we make a custom getter that uses identity to cache the variable. # pylint: disable=cell-var-from-loop def caching_getter(getter, name, *args, **kwargs): - v = getter(name, *args, **kwargs) + """Cache variables on device.""" key = (self._caching_devices[i], name) if key in cache: return cache[key] + + v = getter(name, *args, **kwargs) with tf.device(self._caching_devices[i]): ret = tf.identity(v._ref()) # pylint: disable=protected-access _add_variable_proxy_methods(v, ret) From 0095a335b864aea697cab13677bc21c298538a05 Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Mon, 13 Nov 2017 17:09:10 -0800 Subject: [PATCH 0578/4095] v1.2.9 PiperOrigin-RevId: 175612193 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 5eebe27f3..bedb393fd 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ setup( name='tensor2tensor', - version='1.2.8', + version='1.2.9', description='Tensor2Tensor', author='Google Inc.', author_email='no-reply@google.com', From 729d8ddf2409d4fc7be6b77cf1133814e13a6b06 Mon Sep 17 00:00:00 2001 From: Lukasz Kaiser <lukaszkaiser@google.com> Date: Mon, 13 Nov 2017 17:35:55 -0800 Subject: [PATCH 0579/4095] Add masking and fast decoding to AE Transformer, remove 2 obsolete models. PiperOrigin-RevId: 175615296 --- tensor2tensor/models/__init__.py | 2 - tensor2tensor/models/transformer_adv.py | 233 ------------------ .../models/transformer_alternative.py | 174 ------------- tensor2tensor/models/transformer_vae.py | 97 ++++++-- tensor2tensor/utils/t2t_model.py | 9 +- 5 files changed, 84 insertions(+), 431 deletions(-) delete mode 100644 tensor2tensor/models/transformer_adv.py delete mode 100644 tensor2tensor/models/transformer_alternative.py diff --git a/tensor2tensor/models/__init__.py b/tensor2tensor/models/__init__.py index dd1c11390..c067711be 100644 --- a/tensor2tensor/models/__init__.py +++ b/tensor2tensor/models/__init__.py @@ -37,8 +37,6 @@ from tensor2tensor.models import shake_shake from tensor2tensor.models import slicenet from tensor2tensor.models import transformer -from tensor2tensor.models import transformer_adv -from tensor2tensor.models import transformer_alternative from tensor2tensor.models import transformer_moe from tensor2tensor.models import transformer_revnet from tensor2tensor.models import transformer_sketch diff --git a/tensor2tensor/models/transformer_adv.py b/tensor2tensor/models/transformer_adv.py deleted file mode 100644 index 737aa822e..000000000 --- a/tensor2tensor/models/transformer_adv.py +++ /dev/null @@ -1,233 +0,0 @@ -# coding=utf-8 -# Copyright 2017 The Tensor2Tensor Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Adversarial Transformer.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -# Dependency imports - -from tensor2tensor.layers import common_layers -from tensor2tensor.models import transformer -from tensor2tensor.models import transformer_vae -from tensor2tensor.utils import registry -from tensor2tensor.utils import t2t_model - -import tensorflow as tf - - -def encode(x, x_space, hparams, name): - """Transformer preparations and encoder.""" - with tf.variable_scope(name): - (encoder_input, encoder_self_attention_bias, - ed) = transformer.transformer_prepare_encoder(x, x_space, hparams) - encoder_input = tf.nn.dropout(encoder_input, 1.0 - hparams.dropout) - return transformer.transformer_encoder( - encoder_input, encoder_self_attention_bias, hparams), ed - - -def decode(encoder_output, encoder_decoder_attention_bias, targets, - hparams, name, reuse=False): - """Transformer decoder.""" - with tf.variable_scope(name, reuse=reuse): - targets = common_layers.flatten4d3d(targets) - - decoder_input, decoder_self_bias = transformer.transformer_prepare_decoder( - targets, hparams) - - decoder_input = tf.nn.dropout(decoder_input, - 1.0 - hparams.layer_prepostprocess_dropout) - - decoder_output = transformer.transformer_decoder( - decoder_input, - encoder_output, - decoder_self_bias, - encoder_decoder_attention_bias, - hparams) - - # Expand since t2t expects 4d tensors. - return tf.expand_dims(decoder_output, axis=2) - - -def reverse_gradient(x, delta=1.0): - return tf.stop_gradient((1.0 + delta) * x) - delta * x - - -def adversary(embedded, inputs, hparams, name, reuse=False): - with tf.variable_scope(name, reuse=reuse): - h0, i0 = common_layers.pad_to_same_length( - embedded, inputs, final_length_divisible_by=16) - h0 = tf.concat([h0, tf.expand_dims(i0, axis=2)], axis=-1) - h0 = tf.layers.dense(h0, hparams.hidden_size, name="io") - h1 = transformer_vae.compress(h0, None, False, hparams, "compress1") - h2 = transformer_vae.compress(h1, None, False, hparams, "compress2") - res_dense = tf.reduce_mean(h2, axis=[1, 2]) - res_single = tf.squeeze(tf.layers.dense(res_dense, 1), axis=-1) - return tf.nn.sigmoid(res_single) - - -def softmax_embed(x, embedding, batch_size, hparams): - """Softmax x and embed.""" - x = tf.reshape(tf.nn.softmax(x), [-1, 34*1024]) - x = tf.matmul(x, embedding) - return tf.reshape(x, [batch_size, -1, 1, hparams.hidden_size]) - - -def adv_transformer_internal(inputs, targets, target_space, hparams): - """Adversarial Transformer, main step used for training.""" - with tf.variable_scope("adv_transformer"): - batch_size = tf.shape(targets)[0] - targets = tf.reshape(targets, [batch_size, -1, 1]) - intermediate = tf.constant(34*1024 - 1) - intermediate += tf.zeros_like(targets) - targets = tf.concat([targets, intermediate], axis=2) - targets = tf.reshape(targets, [batch_size, -1, 1]) - embedding = tf.get_variable("embedding", [34*1024, hparams.hidden_size]) - targets_emb = tf.gather(embedding, targets) - - # Noisy embedded targets. - targets_noisy = tf.one_hot(targets, 34*1024) - noise_val = hparams.noise_val - targets_noisy += tf.random_uniform(tf.shape(targets_noisy), - minval=-noise_val, maxval=noise_val) - targets_emb_noisy = softmax_embed( - targets_noisy, embedding, batch_size, hparams) - - # Encoder. - if inputs is not None: - inputs_emb = common_layers.flatten4d3d(inputs) - inputs, ed = encode(inputs_emb, target_space, hparams, "input_enc") - else: - ed = None - - # Masking. - masking = common_layers.inverse_lin_decay(200000) - masking *= common_layers.inverse_exp_decay(50000) # Not much at start. - masking -= tf.random_uniform([]) * 0.4 - masking = tf.minimum(tf.maximum(masking, 0.0), 1.0) - mask = tf.less(masking, tf.random_uniform(tf.shape(targets))) - mask = tf.expand_dims(tf.to_float(mask), 3) - noise = tf.random_uniform(tf.shape(targets_emb)) - targets_emb = mask * targets_emb + (1.0 - mask) * noise - - # Decoder. - res_dec = decode(inputs, ed, targets_emb, hparams, "decoder") - res = tf.layers.dense(res_dec, 34*1024, name="res_sm") - res_emb = softmax_embed(res, embedding, batch_size, hparams) - - # Extra steps. - extra_step_prob = masking * 0.6 + 0.3 - if hparams.mode != tf.estimator.ModeKeys.TRAIN: - extra_step_prob = 1.0 - for _ in xrange(hparams.extra_steps): - def another_step(emb): - res_dec = decode(inputs, ed, emb, hparams, "decoder", reuse=True) - res = tf.layers.dense(res_dec, 34*1024, name="res_sm", reuse=True) - return softmax_embed(res, embedding, batch_size, hparams), res - res_emb, res = tf.cond(tf.less(tf.random_uniform([]), extra_step_prob), - lambda e=res_emb: another_step(e), - lambda: (res_emb, res)) - - # Adversary. - delta = masking * hparams.delta_max - true_logit = adversary(tf.stop_gradient(targets_emb_noisy), - tf.stop_gradient(inputs + inputs_emb), - hparams, "adversary") - gen_logit = adversary(reverse_gradient(res_emb, delta), - tf.stop_gradient(inputs + inputs_emb), - hparams, "adversary", reuse=True) - losses = {"adv": gen_logit - true_logit} - res = tf.stop_gradient(masking * res) + (1.0 - masking) * res - return res, losses - - -@registry.register_model -class TransformerAdv(t2t_model.T2TModel): - """Adversarial Transformer.""" - - def model_fn_body(self, features): - inputs = features.get("inputs", None) - return adv_transformer_internal( - inputs, features["targets_raw"], - features["target_space_id"], self._hparams) - - def infer(self, features=None, decode_length=50, beam_size=1, top_beams=1, - alpha=0.0): - """Produce predictions from the model.""" - if not features: - features = {} - inputs_old = None - if "inputs" in features and len(features["inputs"].shape) < 4: - inputs_old = features["inputs"] - features["inputs"] = tf.expand_dims(features["inputs"], 2) - - # Create an initial targets tensor. - if "partial_targets" in features: - initial_output = tf.convert_to_tensor(features["partial_targets"]) - else: - batch_size = tf.shape(features["inputs"])[0] - length = tf.shape(features["inputs"])[1] - initial_output = tf.zeros((batch_size, 2 * length, 1, 1), dtype=tf.int64) - - features["targets"] = initial_output - sharded_logits, _ = self.model_fn(features, False) - sharded_samples = self._data_parallelism(tf.argmax, sharded_logits, 4) - samples = tf.concat(sharded_samples, 0) - - # More steps. - how_many_more_steps = 5 - for _ in xrange(how_many_more_steps): - with tf.variable_scope(tf.get_variable_scope(), reuse=True): - features["targets"] = samples - sharded_logits, _ = self.model_fn(features, False) - sharded_samples = self._data_parallelism(tf.argmax, sharded_logits, 4) - samples = tf.concat(sharded_samples, 0) - - if inputs_old is not None: # Restore to not confuse Estimator. - features["inputs"] = inputs_old - return samples - - -@registry.register_hparams -def transformer_adv_small(): - """Set of hyperparameters.""" - hparams = transformer.transformer_small() - hparams.batch_size = 2048 - hparams.learning_rate_warmup_steps = 4000 - hparams.num_hidden_layers = 3 - hparams.hidden_size = 384 - hparams.filter_size = 2048 - hparams.label_smoothing = 0.0 - hparams.weight_decay = 0.1 - hparams.symbol_modality_skip_top = True - hparams.target_modality = "symbol:ctc" - hparams.add_hparam("num_compress_steps", 2) - hparams.add_hparam("extra_steps", 0) - hparams.add_hparam("noise_val", 0.3) - hparams.add_hparam("delta_max", 2.0) - return hparams - - -@registry.register_hparams -def transformer_adv_base(): - """Set of hyperparameters.""" - hparams = transformer_adv_small() - hparams.batch_size = 1024 - hparams.hidden_size = 512 - hparams.filter_size = 4096 - hparams.num_hidden_layers = 6 - return hparams diff --git a/tensor2tensor/models/transformer_alternative.py b/tensor2tensor/models/transformer_alternative.py deleted file mode 100644 index 2604748be..000000000 --- a/tensor2tensor/models/transformer_alternative.py +++ /dev/null @@ -1,174 +0,0 @@ -# coding=utf-8 -# Copyright 2017 The Tensor2Tensor Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Alternative transformer network. - -Using different layer types to demonstrate alternatives to self attention. - -Code is mostly copied from original Transformer source. -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -# Dependency imports - -from six.moves import xrange # pylint: disable=redefined-builtin - -from tensor2tensor.layers import common_attention -from tensor2tensor.layers import common_layers -from tensor2tensor.models import transformer -from tensor2tensor.utils import registry -from tensor2tensor.utils import t2t_model - -import tensorflow as tf - - -@registry.register_model -class TransformerAlt(t2t_model.T2TModel): - - def model_fn_body(self, features): - hparams = self._hparams - targets = features["targets"] - inputs = features.get("inputs") - target_space = features.get("target_space_id") - - inputs = common_layers.flatten4d3d(inputs) - targets = common_layers.flatten4d3d(targets) - - (encoder_input, - encoder_attention_bias, _) = (transformer.transformer_prepare_encoder( - inputs, target_space, hparams)) - (decoder_input, _) = (transformer.transformer_prepare_decoder( - targets, hparams)) - - encoder_mask = bias_to_mask(encoder_attention_bias) - - def residual_fn(x, y): - return common_layers.layer_norm(x + tf.nn.dropout( - y, 1.0 - hparams.residual_dropout)) - - encoder_input = tf.nn.dropout(encoder_input, 1.0 - hparams.residual_dropout) - decoder_input = tf.nn.dropout(decoder_input, 1.0 - hparams.residual_dropout) - - encoder_output = alt_transformer_encoder(encoder_input, residual_fn, - encoder_mask, hparams) - - decoder_output = alt_transformer_decoder(decoder_input, encoder_output, - residual_fn, - encoder_attention_bias, hparams) - - decoder_output = tf.expand_dims(decoder_output, 2) - - return decoder_output - - -def composite_layer(inputs, mask, hparams, for_output=False): - """Composite layer.""" - x = inputs - - # Applies ravanbakhsh on top of each other. - if hparams.composite_layer_type == "ravanbakhsh": - for layer in xrange(hparams.layers_per_layer): - with tf.variable_scope(".%d" % layer): - x = common_layers.ravanbakhsh_set_layer( - hparams.hidden_size, - x, - mask=mask, - sequential=for_output, - dropout=hparams.relu_dropout) - - # Transforms elements to get a context, and then uses this in a final layer. - elif hparams.composite_layer_type == "reembedding": - # Transform elements n times and then pool. - for layer in xrange(hparams.layers_per_layer): - with tf.variable_scope("sub_layer_%d" % layer): - x = common_layers.linear_set_layer( - hparams.hidden_size, x, dropout=hparams.relu_dropout) - if for_output: - context = common_layers.running_global_pool_1d(x) - else: - context = common_layers.global_pool_1d(x, mask=mask) - # Final layer. - x = common_layers.linear_set_layer( - hparams.hidden_size, x, context=context, dropout=hparams.relu_dropout) - return x - - -def alt_transformer_encoder(encoder_input, - residual_fn, - mask, - hparams, - name="encoder"): - """Alternative encoder.""" - x = encoder_input - with tf.variable_scope(name): - x = encoder_input - for layer in xrange(hparams.num_hidden_layers): - with tf.variable_scope("layer_%d" % layer): - x = residual_fn(x, composite_layer(x, mask, hparams)) - return x - - -def alt_transformer_decoder(decoder_input, - encoder_output, - residual_fn, - encoder_decoder_attention_bias, - hparams, - name="decoder"): - """Alternative decoder.""" - with tf.variable_scope(name): - x = decoder_input - for layer in xrange(hparams.num_hidden_layers): - with tf.variable_scope("layer_%d" % layer): - x_ = common_attention.multihead_attention( - x, - encoder_output, - encoder_decoder_attention_bias, - hparams.attention_key_channels or hparams.hidden_size, - hparams.attention_value_channels or hparams.hidden_size, - hparams.hidden_size, - hparams.num_heads, - hparams.attention_dropout, - name="encdec_attention") - - x_ = residual_fn(x_, composite_layer( - x_, None, hparams, for_output=True)) - x = residual_fn(x, x_) - return x - - -def bias_to_mask(bias): - # We need masks of the form batch size x input sequences - # Biases are of the form batch_size x num_heads x input sequences x - # output sequences. Squeeze out dim one, and get the first element of - # each vector. - bias = tf.squeeze(bias, [1])[:, :, 0] - bias = -tf.clip_by_value(bias, -1.0, 1.0) - mask = 1 - bias - return mask - - -@registry.register_hparams -def transformer_alt(): - """Set of hyperparameters.""" - hparams = transformer.transformer_base() - hparams.batch_size = 2048 - hparams.num_hidden_layers = 10 - hparams.add_hparam("layers_per_layer", 4) - # Composite layer: ravanbakhsh or reembedding. - hparams.add_hparam("composite_layer_type", "ravanbakhsh") - return hparams diff --git a/tensor2tensor/models/transformer_vae.py b/tensor2tensor/models/transformer_vae.py index 81156babd..ad5143095 100644 --- a/tensor2tensor/models/transformer_vae.py +++ b/tensor2tensor/models/transformer_vae.py @@ -300,11 +300,11 @@ def compress(x, c, is_2d, hparams, name): # Run compression by strided convs. cur = x k1 = (3, 3) if is_2d else (3, 1) + cur = residual_conv(cur, hparams.num_compress_steps, k1, hparams, "rc") k2 = (2, 2) if is_2d else (2, 1) for i in xrange(hparams.num_compress_steps): if c is not None: cur = attend(cur, c, hparams, "compress_attend_%d" % i) - cur = residual_conv(cur, 1, k1, hparams, "compress_rc_%d" % i) cur = common_layers.conv_block( cur, hparams.hidden_size, [((1, 1), k2)], strides=k2, name="compress_%d" % i) @@ -493,20 +493,24 @@ def ae_latent_sample(t_c, inputs, ed, embed, iters, hparams): t_pred = decode_transformer(inputs, ed, t_c, hparams, "extra") t_pred = tf.layers.dense(t_pred, 2**16, name="extra_logits") t_bit = multinomial_sample(t_pred, 2**16, hparams.sampling_temp) - for i in xrange(iters): + + def next_bit(t_bit, i): t_bit_prev = t_bit with tf.variable_scope(tf.get_variable_scope(), reuse=True): t_c = embed(t_bit) t_pred = decode_transformer(inputs, ed, t_c, hparams, "extra") t_pred = tf.layers.dense(t_pred, 2**16, name="extra_logits") t_bit = multinomial_sample(t_pred, 2**16, hparams.sampling_temp) - t_bit = tf.concat([t_bit_prev[:, :(i+1), :], - t_bit[:, (i+1):, :]], axis=1) + return tf.concat([t_bit_prev[:, :(i+1), :], + t_bit[:, (i+1):, :]], axis=1) + + for i in xrange(iters): + t_bit = next_bit(t_bit, i) return t_bit def ae_transformer_internal(inputs, targets, target_space, hparams, - beam_size, cache=None): + beam_size, cache=None, predict_mask=1.0): """AE Transformer, main step used for training.""" hparams.z_size = hparams.hidden_size with tf.variable_scope("ae_transformer"): @@ -525,12 +529,10 @@ def ae_transformer_internal(inputs, targets, target_space, hparams, # Autoencoding. losses = {"vc": tf.constant(0.0), "sm": tf.constant(0.0)} - latent_len = hparams.latent_length if hparams.do_ae: - targets_pad, _ = common_layers.pad_to_same_length( - targets, targets, final_length_divisible_by=latent_len * 2**k) - targets_c = compress(targets_pad, None, False, hparams, "compress") - targets_c = targets_c[:, :latent_len, :, :] + targets, _ = common_layers.pad_to_same_length( + targets, targets, final_length_divisible_by=2**k) + targets_c = compress(targets, None, False, hparams, "compress") if hparams.mode != tf.estimator.ModeKeys.PREDICT: # Compress and bottleneck. t_c, t_bit, vc_loss, _ = bottleneck(targets_c, hparams, 2*2048, "vc") @@ -546,25 +548,45 @@ def ae_transformer_internal(inputs, targets, target_space, hparams, t_pred = tf.layers.dense(t_pred, 2**16, name="extra_logits") losses["sm"] = tf.nn.sparse_softmax_cross_entropy_with_logits( labels=t_bit, logits=t_pred) - losses["sm"] = tf.reduce_mean(losses["sm"]) * 0.2 * tf.to_float(cond) + losses["sm"] = tf.reduce_mean(losses["sm"]) * 0.5 * tf.to_float(cond) else: + latent_len = tf.shape(targets_c)[1] _, _, _, embed = bottleneck(targets_c, hparams, 2*2048, "vc") - t_c = tf.zeros_like(targets_c) + t_c = tf.zeros_like(targets_c[:, :latent_len, :, :]) if cache is None: - cache = ae_latent_sample(t_c, inputs, ed, embed, 3, hparams) + cache = ae_latent_sample(t_c, inputs, ed, embed, 8, hparams) cache = cache[0, :, :] cache = tf.reshape(cache, [1, latent_len, 1]) cache = tf.tile(cache, [beam_size, 1, 1]) t_c = embed(cache) # Postprocess. - pos = tf.get_variable("pos", [1, latent_len + 1, 1, hparams.hidden_size]) + d = t_c + pos = tf.get_variable("pos", [1, 1000, 1, hparams.hidden_size]) + pos = pos[:, :tf.shape(t_c)[1] + 1, :, :] t_c = tf.pad(t_c, [[0, 0], [1, 0], [0, 0], [0, 0]]) + pos + + # Masking. + if hparams.do_mask: + masking = common_layers.inverse_lin_decay(100000) + masking *= common_layers.inverse_exp_decay(25000) # Not much at start. + masking -= tf.random_uniform([]) * 0.3 + masking = tf.minimum(tf.maximum(masking, 0.0), 1.0) + if hparams.mode == tf.estimator.ModeKeys.PREDICT: + masking = predict_mask + mask = tf.less(masking, tf.random_uniform(tf.shape(targets)[:-1])) + mask = tf.expand_dims(tf.to_float(mask), 3) + for i in xrange(hparams.num_compress_steps): + j = hparams.num_compress_steps - i - 1 + d = residual_conv(d, 1, (3, 1), hparams, "decompress_rc_%d" % j) + d = decompress_step(d, None, hparams, + i > 0, False, "decompress_%d" % j) + noise = d # tf.random_uniform(tf.shape(targets)) + targets = mask * targets + (1.0 - mask) * noise targets = tf.concat([tf.reverse(t_c, [1]), targets], axis=1) - else: - targets = tf.pad(targets, [[0, 0], [latent_len + 1, 0], [0, 0], [0, 0]]) res = decode_transformer(inputs, ed, targets, hparams, "decoder") - res = res[:, latent_len + 1:, :, :] + if hparams.do_ae: + res = res[:, tf.shape(t_c)[1]:, :, :] return res, losses, cache @@ -572,6 +594,10 @@ def ae_transformer_internal(inputs, targets, target_space, hparams, class TransformerAE(t2t_model.T2TModel): """Autoencoder-augmented Transformer.""" + def __init__(self, *args, **kwargs): + super(TransformerAE, self).__init__(*args, **kwargs) + self.predict_mask = 1.0 + @property def has_input(self): return self._problem_hparams.input_modality @@ -585,7 +611,8 @@ def model_fn_body(self, features): with tf.variable_scope(tf.get_variable_scope(), reuse=reuse): res, loss, _ = ae_transformer_internal( inputs, features["targets"], features["target_space_id"], - self._hparams, beam_size, features.get("cache_raw", None)) + self._hparams, beam_size, features.get("cache_raw", None), + predict_mask=self.predict_mask) return res, loss def prepare_features_for_infer(self, features): @@ -603,6 +630,38 @@ def prepare_features_for_infer(self, features): self._hparams, beam_size) features["cache_raw"] = cache + def infer(self, features=None, decode_length=50, beam_size=1, top_beams=1, + alpha=0.0): + """Produce predictions from the model.""" + if not self._hparams.do_mask: + return super(TransformerAE, self).infer( + features, decode_length, beam_size, top_beams, alpha) + if not features: + features = {} + inputs_old = None + if "inputs" in features and len(features["inputs"].shape) < 4: + inputs_old = features["inputs"] + features["inputs"] = tf.expand_dims(features["inputs"], 2) + + # Create an initial targets tensor. + if "partial_targets" in features: + initial_output = tf.convert_to_tensor(features["partial_targets"]) + else: + batch_size = tf.shape(features["inputs"])[0] + length = tf.shape(features["inputs"])[1] + target_length = tf.to_int32(1.3 * tf.to_float(length)) + initial_output = tf.zeros((batch_size, target_length, 1, 1), + dtype=tf.int64) + + features["targets"] = initial_output + sharded_logits, _ = self.model_fn(features, False, force_full_predict=True) + sharded_samples = self._data_parallelism(tf.argmax, sharded_logits, 4) + samples = tf.concat(sharded_samples, 0) + + if inputs_old is not None: # Restore to not confuse Estimator. + features["inputs"] = inputs_old + return samples + @registry.register_hparams def transformer_ae_small(): @@ -615,12 +674,12 @@ def transformer_ae_small(): hparams.filter_size = 2048 hparams.label_smoothing = 0.0 hparams.add_hparam("c_size", 16) - hparams.add_hparam("latent_length", 4) hparams.add_hparam("noise_dev", 1.0) hparams.add_hparam("d_mix", 0.5) # Bottleneck kinds supported: dense, semhash, gumbel-softmax. hparams.add_hparam("bottleneck_kind", "semhash") hparams.add_hparam("do_ae", True) + hparams.add_hparam("do_mask", True) hparams.add_hparam("drop_inputs", False) hparams.add_hparam("z_size", 128) hparams.add_hparam("v_size", 1024*64) diff --git a/tensor2tensor/utils/t2t_model.py b/tensor2tensor/utils/t2t_model.py index ac11d54aa..f5ec04679 100644 --- a/tensor2tensor/utils/t2t_model.py +++ b/tensor2tensor/utils/t2t_model.py @@ -505,13 +505,15 @@ def _shard_features(self, features): # pylint: disable=missing-docstring 0)) return sharded_features - def model_fn(self, features, skip=False): + def model_fn(self, features, skip=False, force_full_predict=False): """Computes the entire model and produces sharded logits and losses. Args: features: A dictionary of feature name to tensor. - skip: a boolean, if we're just dummy-calling and actually skip this model + skip: a Boolean, if we're just dummy-calling and actually skip this model (but we need to create variables to not confuse distributed training). + force_full_predict: a Boolean, if set, then last-position-only + optimizations are not used even when allowed and in PREDICT mode. Returns: sharded_logits: a list of `Tensor`s, one per datashard. @@ -579,7 +581,8 @@ def model_fn(self, features, skip=False): with tf.variable_scope(target_modality.name, reuse=target_reuse): last_only = (target_modality.top_is_pointwise and - self._hparams.mode == tf.estimator.ModeKeys.PREDICT) + self._hparams.mode == tf.estimator.ModeKeys.PREDICT and + not force_full_predict) if not last_only: sharded_logits = target_modality.top_sharded( body_outputs, sharded_features["targets"], dp) From 0c026d294d40cd131f3e3e2ecce4df02ab661143 Mon Sep 17 00:00:00 2001 From: wingsbr <bswing81@yahoo.com> Date: Tue, 14 Nov 2017 09:23:31 -0600 Subject: [PATCH 0580/4095] Added a librispeech data generator. --- tensor2tensor/bin/t2t-datagen | 6 + tensor2tensor/data_generators/librispeech.py | 109 +++++++++++++++++++ 2 files changed, 115 insertions(+) create mode 100644 tensor2tensor/data_generators/librispeech.py diff --git a/tensor2tensor/bin/t2t-datagen b/tensor2tensor/bin/t2t-datagen index 2ac0f0db2..b8a1027f3 100644 --- a/tensor2tensor/bin/t2t-datagen +++ b/tensor2tensor/bin/t2t-datagen @@ -44,6 +44,7 @@ from tensor2tensor.data_generators import audio from tensor2tensor.data_generators import generator_utils from tensor2tensor.data_generators import snli from tensor2tensor.data_generators import wsj_parsing +from tensor2tensor.data_generators import librispeech from tensor2tensor.utils import registry from tensor2tensor.utils import usr_dir @@ -113,6 +114,11 @@ _SUPPORTED_PROBLEM_GENERATORS = { lambda: audio.timit_generator( FLAGS.data_dir, FLAGS.tmp_dir, False, 626, vocab_filename="vocab.endefr.%d" % 2**15, vocab_size=2**15)), + "librispeech": ( + lambda: librispeech.librispeech_generator( + FLAGS.data_dir, FLAGS.tmp_dir, True), + lambda: librispeech.librispeech_generator( + FLAGS.data_dir, FLAGS.tmp_dir, False)), } # pylint: enable=g-long-lambda diff --git a/tensor2tensor/data_generators/librispeech.py b/tensor2tensor/data_generators/librispeech.py new file mode 100644 index 000000000..82b032c35 --- /dev/null +++ b/tensor2tensor/data_generators/librispeech.py @@ -0,0 +1,109 @@ +import os +from subprocess import call +import tarfile +import wave +import numpy as np +import six +from tensor2tensor.data_generators import generator_utils + +_LIBRISPEECH_TRAIN_DATASETS = [ + [ + "http://www.openslr.org/resources/12/train-clean-100.tar.gz", # pylint: disable=line-too-long + "train-clean-100" + ], + [ + "http://www.openslr.org/resources/12/train-clean-360.tar.gz", + "train-clean-360" + ], + [ + "http://www.openslr.org/resources/12/train-other-500.tar.gz", + "train-other-500" + ], +] +_LIBRISPEECH_TEST_DATASETS = [ + [ + "http://www.openslr.org/resources/12/dev-clean.tar.gz", + "dev-clean" + ], + [ + "http://www.openslr.org/resources/12/dev-other.tar.gz", + "dev-other" + ], +] + + +def _collect_data(directory, input_ext, transcription_ext): + """Traverses directory collecting input and target files.""" + # Directory from string to tuple pair of strings + # key: the filepath to a datafile including the datafile's basename. Example, + # if the datafile was "/path/to/datafile.wav" then the key would be + # "/path/to/datafile" + # value: a pair of strings (media_filepath, label) + data_files = dict() + for root, _, filenames in os.walk(directory): + transcripts = [filename for filename in filenames if transcription_ext in filename] + for transcript in transcripts: + basename = transcript.strip(transcription_ext) + transcript_path = os.path.join(root, transcript) + with open(transcript_path, 'r') as transcript_file: + for transcript_line in transcript_file: + line_contents = transcript_line.split(" ", 1) + assert len(line_contents) == 2 + media_base, label = line_contents + key = os.path.join(root, media_base) + assert key not in data_files + media_name = "%s.%s"%(media_base, input_ext) + media_path = os.path.join(root, media_name) + data_files[key] = (media_path, label) + return data_files + + +def _get_audio_data(filepath): + # Construct a true .wav file. + out_filepath = filepath.strip(".flac") + ".wav" + # Assumes sox is installed on system. Sox converts from FLAC to WAV. + call(["sox", filepath, out_filepath]) + wav_file = wave.open(open(out_filepath)) + frame_count = wav_file.getnframes() + byte_array = wav_file.readframes(frame_count) + + data = np.fromstring(byte_array, np.uint8).tolist() + return data, frame_count, wav_file.getsampwidth(), wav_file.getnchannels() + + +def librispeech_generator(data_dir, tmp_dir, training, eos_list=None, start_from=0, how_many=0): + eos_list = [1] if eos_list is None else eos_list + datasets = (_LIBRISPEECH_TRAIN_DATASETS if training else _LIBRISPEECH_TEST_DATASETS) + i = 0 + for url, subdir in datasets: + filename = os.path.basename(url) + compressed_file = generator_utils.maybe_download(tmp_dir, filename, url) + + read_type = "r:gz" if filename.endswith("tgz") else "r" + with tarfile.open(compressed_file, read_type) as corpus_tar: + # Create a subset of files that don't already exist. + # tarfile.extractall errors when encountering an existing file + # and tarfile.extract is extremely slow + members = [] + for f in corpus_tar: + if not os.path.isfile(os.path.join(tmp_dir, f.name)): + members.append(f) + corpus_tar.extractall(tmp_dir, members=members) + + data_dir = os.path.join(tmp_dir, "LibriSpeech", subdir) + data_files = _collect_data(data_dir, "flac", "txt") + data_pairs = data_files.values() + for media_file, text_data in sorted(data_pairs)[start_from:]: + if how_many > 0 and i == how_many: + return + i += 1 + audio_data, sample_count, sample_width, num_channels = _get_audio_data( + media_file) + label = [ord(c) for c in text_data] + eos_list + yield { + "inputs": audio_data, + "audio/channel_count": [num_channels], + "audio/sample_count": [sample_count], + "audio/sample_width": [sample_width], + "targets": label + } \ No newline at end of file From 75ec0f6e9950bb5e76cf897b0e7e4e61fca5a0e4 Mon Sep 17 00:00:00 2001 From: wingsbr <bswing@synchronvideo.com> Date: Tue, 14 Nov 2017 09:36:30 -0600 Subject: [PATCH 0581/4095] . --- tensor2tensor/bin/t2t-datagen | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensor2tensor/bin/t2t-datagen b/tensor2tensor/bin/t2t-datagen index b8a1027f3..e9eca3672 100644 --- a/tensor2tensor/bin/t2t-datagen +++ b/tensor2tensor/bin/t2t-datagen @@ -118,7 +118,7 @@ _SUPPORTED_PROBLEM_GENERATORS = { lambda: librispeech.librispeech_generator( FLAGS.data_dir, FLAGS.tmp_dir, True), lambda: librispeech.librispeech_generator( - FLAGS.data_dir, FLAGS.tmp_dir, False)), + FLAGS.data_dir, FLAGS.tmp_dir, False)), } # pylint: enable=g-long-lambda From a82231e83f315f5993072c1b92ef1a0fe7d2f9de Mon Sep 17 00:00:00 2001 From: urvashik <urvashik@stanford.edu> Date: Tue, 14 Nov 2017 13:06:38 -0800 Subject: [PATCH 0582/4095] Generating raw data files, completed pipeline for rouge --- .../data_generators/cnn_dailymail.py | 29 +++++++++++++++---- tensor2tensor/utils/get_cnndm_rouge.sh | 3 ++ tensor2tensor/utils/get_rouge.py | 7 ++--- 3 files changed, 29 insertions(+), 10 deletions(-) diff --git a/tensor2tensor/data_generators/cnn_dailymail.py b/tensor2tensor/data_generators/cnn_dailymail.py index c0f6756a5..2082036d2 100644 --- a/tensor2tensor/data_generators/cnn_dailymail.py +++ b/tensor2tensor/data_generators/cnn_dailymail.py @@ -19,6 +19,7 @@ from __future__ import division from __future__ import print_function +import io import os import tarfile import hashlib @@ -45,7 +46,7 @@ # Train/Dev/Test Splits for summarization data _TRAIN_URLS = "https://raw.githubusercontent.com/abisee/cnn-dailymail/master/url_lists/all_train.txt" _DEV_URLS = "https://raw.githubusercontent.com/abisee/cnn-dailymail/master/url_lists/all_val.txt" -_TEST_URLS = "https://github.com/abisee/cnn-dailymail/blob/master/url_lists/all_test.txt" +_TEST_URLS = "https://raw.githubusercontent.com/abisee/cnn-dailymail/master/url_lists/all_test.txt" # End-of-sentence marker. EOS = text_encoder.EOS_ID @@ -117,14 +118,13 @@ def generate_hash(inp): return filelist -def example_generator(tmp_dir, is_training, sum_token): +def example_generator(all_files, urls_path, sum_token): def fix_run_on_sents(line): if u"@highlight" in line: return line if line=="": return line if line[-1] in END_TOKENS: return line return line + u"." - all_files, urls_path = _maybe_download_corpora(tmp_dir, is_training) filelist = example_splits(urls_path, all_files) story_summary_split_token = u" <summary> " if sum_token else " " @@ -156,6 +156,23 @@ def _story_summary_split(story): split_pos = story.find(split_str) return story[:split_pos], story[split_pos+split_str_len:] # story, summary +def write_raw_text_to_files(all_files, urls_path, data_dir, tmp_dir, is_training): + def write_to_file(all_files, urls_path, data_dir, filename): + with io.open(os.path.join(data_dir, filename+".source"), "w") as fstory, io.open(os.path.join(data_dir, filename+".target"), "w") as fsummary: + for example in example_generator(all_files, urls_path, sum_token=True): + story, summary = _story_summary_split(example) + fstory.write(story+"\n") + fsummary.write(summary+"\n") + + filename = "cnndm.train" if is_training else "cnndm.dev" + tf.logging.info("Writing %s" % filename) + write_to_file(all_files, urls_path, data_dir, filename) + + if not is_training: + test_urls_path = generator_utils.maybe_download(tmp_dir, "all_test.txt", _TEST_URLS) + filename = "cnndm.test" + tf.logging.info("Writing %s" % filename) + write_to_file(all_files, test_urls_path, data_dir, filename) @registry.register_problem class SummarizeCnnDailymail32k(problem.Text2TextProblem): @@ -198,10 +215,12 @@ def use_train_shards_for_dev(self): return False def generator(self, data_dir, tmp_dir, is_training): + all_files, urls_path = _maybe_download_corpora(tmp_dir, is_training) encoder = generator_utils.get_or_generate_vocab_inner( data_dir, self.vocab_file, self.targeted_vocab_size, - example_generator(tmp_dir, is_training, sum_token=False)) - for example in example_generator(tmp_dir, is_training, sum_token=True): + example_generator(all_files, urls_path, sum_token=False)) + write_raw_text_to_files(all_files, urls_path, data_dir, tmp_dir, is_training) + for example in example_generator(all_files, urls_path, sum_token=True): story, summary = _story_summary_split(example) encoded_summary = encoder.encode(summary) + [EOS] encoded_story = encoder.encode(story) + [EOS] diff --git a/tensor2tensor/utils/get_cnndm_rouge.sh b/tensor2tensor/utils/get_cnndm_rouge.sh index 9833ce248..0f52bb56c 100644 --- a/tensor2tensor/utils/get_cnndm_rouge.sh +++ b/tensor2tensor/utils/get_cnndm_rouge.sh @@ -1,8 +1,11 @@ #!/bin/bash +# Path to moses dir mosesdecoder=$1 +# Path to file containing gold summaries, one per line targets_file=$2 +# Path to file containing model generated summaries, one per line decodes_file=$3 # Tokenize. diff --git a/tensor2tensor/utils/get_rouge.py b/tensor2tensor/utils/get_rouge.py index 2e72e2e0d..c15545cfd 100644 --- a/tensor2tensor/utils/get_rouge.py +++ b/tensor2tensor/utils/get_rouge.py @@ -37,10 +37,7 @@ tf.flags.DEFINE_string("targets_filename", None, "File containing model target summaries tokenized") def write_to_file(filename, data): - # TODO: ensure the output format (chars split by spaces) was as intended data = ".\n".join(data.split(". ")) - if len(data.strip()) == 0: - print(data, filename) with open(filename, "w") as fp: fp.write(data) @@ -63,9 +60,9 @@ def main(_): tmpdir = mkdtemp() tf.logging.info("tmpdir: %s" % tmpdir) - # system = decodes + # system = decodes/predictions system_dir = os.path.join(tmpdir, 'system') - # model = gold + # model = targets/gold model_dir = os.path.join(tmpdir, 'model') os.mkdir(system_dir) os.mkdir(model_dir) From 985b637a4f231f0bc78a1d08e37f4d1b3818a773 Mon Sep 17 00:00:00 2001 From: Martin Popel <popel@ufal.mff.cuni.cz> Date: Fri, 17 Nov 2017 14:15:59 +0100 Subject: [PATCH 0583/4095] fix the semantics of decode_to_file `--decode_to_file=xy` should use `xy` as the output filename, not `xy.$MODEL.$HPARAMS.$PROBLEM.beam$BEAM_SIZE.alpha$ALPHA.decodes`. It is easy for users to add whatever env variables to xy, but it is impossible to change the hardwired suffix. --- tensor2tensor/bin/t2t-decoder | 5 ++--- tensor2tensor/utils/decoding.py | 17 +++++++---------- 2 files changed, 9 insertions(+), 13 deletions(-) diff --git a/tensor2tensor/bin/t2t-decoder b/tensor2tensor/bin/t2t-decoder index c2bf97f94..5f05f5bcb 100644 --- a/tensor2tensor/bin/t2t-decoder +++ b/tensor2tensor/bin/t2t-decoder @@ -47,9 +47,8 @@ flags = tf.flags FLAGS = flags.FLAGS flags.DEFINE_string("output_dir", "", "Training directory to load from.") -flags.DEFINE_string("decode_from_file", None, "Path to decode file") -flags.DEFINE_string("decode_to_file", None, - "Path prefix to inference output file") +flags.DEFINE_string("decode_from_file", None, "Path to the source file for decoding") +flags.DEFINE_string("decode_to_file", None, "Path to the decoded (output) file") flags.DEFINE_bool("decode_interactive", False, "Interactive local inference mode.") flags.DEFINE_integer("decode_shards", 1, "Number of decoding replicas.") diff --git a/tensor2tensor/utils/decoding.py b/tensor2tensor/utils/decoding.py index 629b2ed26..d6dc5f1db 100644 --- a/tensor2tensor/utils/decoding.py +++ b/tensor2tensor/utils/decoding.py @@ -252,17 +252,14 @@ def input_fn(): # _decode_batch_input_fn sorted_inputs.reverse() decodes.reverse() - # Dumping inputs and outputs to file filename.decodes in - # format result\tinput in the same order as original inputs - if decode_to_file: - output_filename = decode_to_file - else: - output_filename = filename + # If decode_to_file was provided use it as the output filename without any change + # (except for adding shard_id if using more shards for decoding). + # Otherwise, use the input filename plus model, hp, problem, beam, alpha. + decode_filename = decode_to_file if decode_to_file else filename if decode_hp.shards > 1: - base_filename = output_filename + ("%.2d" % decode_hp.shard_id) - else: - base_filename = output_filename - decode_filename = _decode_filename(base_filename, problem_name, decode_hp) + decode_filename = decode_filename + ("%.2d" % decode_hp.shard_id) + if not decode_to_file: + decode_filename = _decode_filename(decode_filename, problem_name, decode_hp) tf.logging.info("Writing decodes into %s" % decode_filename) outfile = tf.gfile.Open(decode_filename, "w") for index in range(len(sorted_inputs)): From 85ebd36b13b71538cc644bee842fa90e5350fe7a Mon Sep 17 00:00:00 2001 From: Nima Rafiee <rafiee.nima@gmail.com> Date: Fri, 17 Nov 2017 22:52:19 +0100 Subject: [PATCH 0584/4095] Cycle_gan Updated --- .DS_Store | Bin 0 -> 6148 bytes tensor2tensor/.DS_Store | Bin 0 -> 6148 bytes tensor2tensor/models/cycle_gan.py | 124 ++++++++++++++++-------------- 3 files changed, 65 insertions(+), 59 deletions(-) create mode 100644 .DS_Store create mode 100644 tensor2tensor/.DS_Store diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..59d856ade63037dc970925d4033368b8abe95293 GIT binary patch literal 6148 zcmeHK%Wl&^6g|@h;()4Vfz&R&L1Gc1loYx^NFfhFBwnfuEC97}9nnax9mNh&n+9dW zKLCHgr|<=Q2p<4v9;p(iAXZU@=1O<Yotb-PJje2k2Y{#zf-Rs3z@babU1M>9$$sez zwqjZeM4`rbj$L@IFh2Cdg>2hk6|f3iHU;Fjo5w@E#2y}F=KGC(_Q-AFKK<4&WsHDz z)S35;`MYSLrkE|(AK^BR5$Lb~oc%4s&LKdILnQNgVk486#}kfTN0TFaXlAlMWwHx= zmL^xxz*FJ{&mn$h7!DG2kYa!y^Lsd%pFyqq^USK(nA<@_R*XLG%+@kYx#E9h`opB> zhf1sOelP4MX}SDEtgPm)T+KU$f>T&?-g!sy&`aCtAgQ(cZ&a-tbbJ%<#c_BZxBXY$ z&TzkYqaH_TyBl@-GwE+%ccUO4*5W}FBx;-&?evITHPL=?b2O?{?v~ujc6nTKM>{*^ zlDl2GHy%6A#?7q<t%Ku})6v=Z;}<TECb;1%TQ@j`uMGYo)(5X2g>m$rS<?}wOp8la zz`OwG`jY3h(^&<q0+(L_xj(pciB*FWjcV&)p{@YL9GjIfmOl-W;~K0QoM>bZO=v1n zQ-!%=2u)|ZYx1fFCmJ;!!dyOtnOT?{icqt2d{?DIR5jYtDqt0uS723Fi}L(${`vkt zPqH<ufK}kXQb6PyUZch(nX`3aaq_Hn=?8RaGH;?$QCOJcSOxMZ-lQvIpDP1m)!;-U RTWI!2K*?YWtH4qf_zmYlvkm|N literal 0 HcmV?d00001 diff --git a/tensor2tensor/.DS_Store b/tensor2tensor/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..5395b66bec4552e19814be1ef77610f15c0a62d3 GIT binary patch literal 6148 zcmeHK&u-H&9R7LjN=liQ1JZWP1&Kq2I(4uELW;U^*bS)&4uHCZM4P27u9{?3*A?Z$ zI{<IMqwoYg2oC_C|A5k@a6}0CSJ|KKd|xbocI=vn*r<pOh+HCakR-NmBij)2xlE1N zGA(z2Oud6sP80kd(6qs}4b}ncz`y2z{C0clo8^>J_4{3%CgN?Fh!709Jv>Ib|6K)2 zL@f%zPiw5tHP$Xg6kt~f(8oE&=p&|?Y7xL}UCs7t>JBB~E|jLuec#|-l0a`UG<EK` zKXJo(HZ|k?5>6AI<%7Xjw$*BHcWyb|uG8Id-i$BAe4Gb)k&S}cD^=rB9GdcTA(CI^ zAbi2&`NX|@EYdvSX*{dYU%%vOB<3Sgq*12c^DM|Opj91B-2LTpIDFXmhKGYy-&-CX z4f@{U@X>1JID7XFo}8XtF0Pk1t9KtUAOyp<t76ySPxypFhbo3#jb~{h(sQ&+O%EIN z40eSuD+Vu9TXucQ<z#-SOu5@oO)#}F)cHoeTryRC3%n^68z#NV+NJ_LzF`{WlvkNM zbOl}pegHP7YqhXA##LLEOfG<`5>*>!@i;weg>}F>U>&GAAom9kNuqDC)Tp)&Wa<h4 zY@=BP%JR>G)OZGcgQZ41K`<=^YAG{U45sC1_YBWBSZdUAV&?L}OwY{RP?+i+@Anj) zn6J_1)&c9lx&tj;b>;d0^!NAwb&;)E2do4Cl>@AOGCmn$Nak#93{IZ49?}_-49P1s mDhV=k9IJvniuaLJ;F%{2K;K}g5iJP&M?l$NGwZ-#b>Ih(`PyUv literal 0 HcmV?d00001 diff --git a/tensor2tensor/models/cycle_gan.py b/tensor2tensor/models/cycle_gan.py index 4cf1a5871..08d43626e 100644 --- a/tensor2tensor/models/cycle_gan.py +++ b/tensor2tensor/models/cycle_gan.py @@ -29,30 +29,38 @@ import tensorflow as tf -def reconstruct_loss(x, gt, hparams, reuse=None): - pred = tf.layers.dense(x, hparams.vocab_size, name="softmax", reuse=reuse) - xent, w = common_layers.padded_cross_entropy(pred, gt, 0.0) - return xent / w - def discriminator(x, compress, hparams, name, reuse=None): with tf.variable_scope(name, reuse=reuse): - x = tf.stop_gradient(2 * x) - x # Reverse gradient. + x = tf.stop_gradient(2 * x) - x # Reverse gradient. ########## why ######## if compress: - x = transformer_vae.compress(x, None, hparams, "compress") + x = transformer_vae.compress(x, None, False, hparams, "compress") else: - x = transformer_vae.residual_conv(x, 1, hparams, "compress_rc") + x = transformer_vae.residual_conv(x, 1, 3,hparams, "compress_rc") y = tf.reduce_mean(x, axis=1) return tf.tanh(tf.layers.dense(y, 1, name="reduce")) +def generator(x, hparams, name, reuse=False): + with tf.variable_scope(name, reuse=reuse): + return transformer_vae.residual_conv(x, 1, 3, hparams,"generator") -def discriminate_loss(x, y, compress, hparams, name): + +def loss(real_input, fake_input, compress, hparams, lsgan, name): + eps = 1e-12 with tf.variable_scope(name): - d1 = discriminator(x, compress, hparams, "discriminator") - d2 = discriminator(y, compress, hparams, "discriminator", reuse=True) - dloss = tf.reduce_mean(tf.abs(d1 - d2)) - return - dloss - + d1 = discriminator(real_input, compress, hparams, "discriminator") + d2 = discriminator(fake_input, compress, hparams, "discriminator", reuse=True) + if lsgan: + dloss = tf.reduce_mean(tf.squared_difference(d1, 0.9)) + tf.reduce_mean(tf.square(d2)) + gloss = tf.reduce_mean(tf.squared_difference(d2, 0.9)) + loss = (dloss + gloss)/2 + else: #cross_entropy + dloss = -tf.reduce_mean(tf.log(d1 + eps)) - tf.reduce_mean(tf.log(1 - d2 + eps)) + gloss = -tf.reduce_mean(tf.log(d2 + eps)) + loss = (dloss + gloss)/2 + return loss + + def split_on_batch(x): batch_size = tf.shape(x)[0] @@ -70,49 +78,39 @@ def cycle_gan_internal(inputs, targets, _, hparams): targets = common_layers.embedding( targets_orig, hparams.vocab_size, hparams.hidden_size, "embed", reuse=True) - - # Split the batch into input-input and target-target parts. - inputs1, _ = split_on_batch(inputs) - _, targets2 = split_on_batch(targets) - - # Define F and G, called inp2tgt and tgt2inp here. - def inp2tgt(x, reuse=False): - return transformer_vae.residual_conv(x, 1, hparams, "inp2tgt", reuse) - def tgt2inp(x, reuse=False): - return transformer_vae.residual_conv(x, 1, hparams, "tgt2inp", reuse) - - # Input-input part. - inp1_tgt = inp2tgt(inputs1) - inp1_back = tgt2inp(inp1_tgt) - - # Target-target part. - tgt2_inp = tgt2inp(targets2, reuse=True) - tgt2_back = inp2tgt(tgt2_inp, reuse=True) - - # Reconstruction losses. - inp1_orig, _ = split_on_batch(inputs_orig) - _, tgt2_orig = split_on_batch(targets_orig) - inp1_loss = reconstruct_loss( - inp1_back, tf.squeeze(inp1_orig, axis=3), hparams) - tgt2_loss = reconstruct_loss( - tgt2_back, tf.squeeze(tgt2_orig, axis=3), hparams, reuse=True) - - # Discriminator losses. - dloss1 = discriminate_loss(inputs1, tgt2_inp, True, hparams, "inp_disc") - dloss2 = discriminate_loss(targets2, inp1_tgt, True, hparams, "tgt_disc") - - # Reconstruct targets from inputs. - tgt = inp2tgt(inputs, reuse=True) - tgt = tf.layers.dense(tgt, hparams.vocab_size, name="softmax", reuse=True) - - # We use the reconstruction only for tracking progress, no gradients here! - tgt = tf.stop_gradient(tf.expand_dims(tgt, axis=2)) - - losses = {"input_input": hparams.cycle_loss_multiplier * inp1_loss, - "target_target": hparams.cycle_loss_multiplier * tgt2_loss, - "input_disc": dloss1, - "target_disc": dloss2} - return tgt, losses + + X, _ = split_on_batch(inputs) + _, Y = split_on_batch(targets) + + X_unembeded, _ = split_on_batch(inputs_orig) + _, Y_unembeded = split_on_batch(targets_orig) + + + # Y --> X + Y_fake = generator(Y, hparams, 'Fy', reuse=False) + YtoXloss = loss(X, Y_fake, True, hparams, True, "YtoX") + + # X --> Y + X_fake = generator(X, hparams, 'Gx', reuse=False) + XtoYloss = loss(Y, X_fake, True, hparams, True, "XtoY") + + # Cycle-Consistency + Y_fake_ = generator(Y_fake, hparams, 'Gx', reuse=True) + X_fake_ = generator(X_fake, hparams, 'Fy', reuse=True) + XtoXloss = hparams.cycle_loss_multiplier1 * tf.reduce_mean(tf.abs(X_fake_ - X)) + YtoYloss = hparams.cycle_loss_multiplier2 * tf.reduce_mean(tf.abs(Y_fake_ - Y)) + cycloss = XtoXloss + YtoYloss + + + sample_generated = generator(inputs, hparams, 'Gx', reuse=True) + sample_generated = tf.layers.dense(sample_generated, hparams.vocab_size, name="softmax", reuse=None) + sample_generated = tf.stop_gradient(tf.expand_dims(sample_generated, axis=2)) + + losses = {"cycloss": cycloss, + "YtoXloss": YtoXloss, + "XtoYloss": XtoYloss} + + return sample_generated, losses @registry.register_model @@ -134,7 +132,15 @@ def cycle_gan_small(): hparams.weight_decay = 3.0 hparams.learning_rate = 0.05 hparams.kl_warmup_steps = 5000 + #hparams.hidden_size = 8 hparams.learning_rate_warmup_steps = 3000 - hparams.add_hparam("vocab_size", 32) # Vocabulary size, need to set here. - hparams.add_hparam("cycle_loss_multiplier", 2.0) + hparams.add_hparam("vocab_size", 66) # Vocabulary size, need to set here. + hparams.add_hparam("cycle_loss_multiplier1", 10.0) + hparams.add_hparam("cycle_loss_multiplier2", 10.0) return hparams + +# line 43 - 80 -82 are changed : residual network config +#line 42 is changed - compress function + + + From 5365113cc17db280974f7c80e8c6847aec235fe8 Mon Sep 17 00:00:00 2001 From: wingsbr <bswing@synchronvideo.com> Date: Mon, 20 Nov 2017 16:32:52 -0600 Subject: [PATCH 0585/4095] Expanded to include librispeech Problem and Modality. --- tensor2tensor/bin/t2t-datagen | 8 +- tensor2tensor/data_generators/librispeech.py | 294 ++++++++++++++++--- 2 files changed, 254 insertions(+), 48 deletions(-) mode change 100644 => 100755 tensor2tensor/bin/t2t-datagen diff --git a/tensor2tensor/bin/t2t-datagen b/tensor2tensor/bin/t2t-datagen old mode 100644 new mode 100755 index e9eca3672..67890371b --- a/tensor2tensor/bin/t2t-datagen +++ b/tensor2tensor/bin/t2t-datagen @@ -44,7 +44,6 @@ from tensor2tensor.data_generators import audio from tensor2tensor.data_generators import generator_utils from tensor2tensor.data_generators import snli from tensor2tensor.data_generators import wsj_parsing -from tensor2tensor.data_generators import librispeech from tensor2tensor.utils import registry from tensor2tensor.utils import usr_dir @@ -113,12 +112,7 @@ _SUPPORTED_PROBLEM_GENERATORS = { vocab_filename="vocab.endefr.%d" % 2**15, vocab_size=2**15), lambda: audio.timit_generator( FLAGS.data_dir, FLAGS.tmp_dir, False, 626, - vocab_filename="vocab.endefr.%d" % 2**15, vocab_size=2**15)), - "librispeech": ( - lambda: librispeech.librispeech_generator( - FLAGS.data_dir, FLAGS.tmp_dir, True), - lambda: librispeech.librispeech_generator( - FLAGS.data_dir, FLAGS.tmp_dir, False)), + vocab_filename="vocab.endefr.%d" % 2**15, vocab_size=2**15)), } # pylint: enable=g-long-lambda diff --git a/tensor2tensor/data_generators/librispeech.py b/tensor2tensor/data_generators/librispeech.py index 82b032c35..dcb5b3f88 100644 --- a/tensor2tensor/data_generators/librispeech.py +++ b/tensor2tensor/data_generators/librispeech.py @@ -1,12 +1,20 @@ +from tensor2tensor.data_generators import problem +from tensor2tensor.utils import registry +from tensor2tensor.models import transformer +from tensor2tensor.utils import modality +from tensor2tensor.layers import common_layers +from tensor2tensor.data_generators import text_encoder +import random +import tensorflow as tf +import numpy as np +from tensor2tensor.data_generators import generator_utils import os from subprocess import call import tarfile import wave -import numpy as np -import six -from tensor2tensor.data_generators import generator_utils + -_LIBRISPEECH_TRAIN_DATASETS = [ +'''_LIBRISPEECH_TRAIN_DATASETS = [ [ "http://www.openslr.org/resources/12/train-clean-100.tar.gz", # pylint: disable=line-too-long "train-clean-100" @@ -29,6 +37,18 @@ "http://www.openslr.org/resources/12/dev-other.tar.gz", "dev-other" ], +]''' +_LIBRISPEECH_TRAIN_DATASETS = [ + [ + "http://www.openslr.org/resources/12/dev-other.tar.gz", + "dev-other" + ], +] +_LIBRISPEECH_TEST_DATASETS = [ + [ + "http://www.openslr.org/resources/12/dev-clean.tar.gz", + "dev-clean" + ], ] @@ -69,41 +89,233 @@ def _get_audio_data(filepath): data = np.fromstring(byte_array, np.uint8).tolist() return data, frame_count, wav_file.getsampwidth(), wav_file.getnchannels() - - -def librispeech_generator(data_dir, tmp_dir, training, eos_list=None, start_from=0, how_many=0): - eos_list = [1] if eos_list is None else eos_list - datasets = (_LIBRISPEECH_TRAIN_DATASETS if training else _LIBRISPEECH_TEST_DATASETS) - i = 0 - for url, subdir in datasets: - filename = os.path.basename(url) - compressed_file = generator_utils.maybe_download(tmp_dir, filename, url) - - read_type = "r:gz" if filename.endswith("tgz") else "r" - with tarfile.open(compressed_file, read_type) as corpus_tar: - # Create a subset of files that don't already exist. - # tarfile.extractall errors when encountering an existing file - # and tarfile.extract is extremely slow - members = [] - for f in corpus_tar: - if not os.path.isfile(os.path.join(tmp_dir, f.name)): - members.append(f) - corpus_tar.extractall(tmp_dir, members=members) + + +class LibrispeechTextEncoder(text_encoder.TextEncoder): + + def encode(self, s): + return [ord[c] for c in s] + + def decode(self, ids): + """Transform a sequence of int ids into a human-readable string. + EOS is not expected in ids. + Args: + ids: list of integers to be converted. + Returns: + s: human-readable string. + """ + decoded_ids = [] + for id_ in ids: + if 0 <= id_ < self._num_reserved_ids: + decoded_ids.append(RESERVED_TOKENS[int(id_)]) + else: + decoded_ids.append(id_) + return "".join([chr(d) for d in decoded_ids]) + + + +@registry.register_audio_modality +class LibrispeechModality(modality.Modality): + """Performs strided conv compressions for audio spectral data.""" + + def bottom(self, inputs): + """Transform input from data space to model space. + Args: + inputs: A Tensor with shape [batch, ...] + Returns: + body_input: A Tensor with shape [batch, ?, ?, body_input_depth]. + """ + with tf.variable_scope(self.name): + # TODO(aidangomez): Will need to sort out a better audio pipeline + def xnet_resblock(x, filters, res_relu, name): + with tf.variable_scope(name): + # We only stride along the length dimension to preserve the spectral + # bins (which are tiny in dimensionality relative to length) + y = common_layers.separable_conv_block( + x, + filters, [((1, 1), (3, 3)), ((1, 1), (3, 3))], + first_relu=True, + padding="SAME", + force2d=True, + name="sep_conv_block") + y = common_layers.pool(y, (3, 3), "MAX", "SAME", strides=(2, 1)) + return y + common_layers.conv_block( + x, + filters, [((1, 1), (1, 1))], + padding="SAME", + strides=(2, 1), + first_relu=res_relu, + force2d=True, + name="res_conv0") + + # Rescale from UINT8 to floats in [-1,-1] + signals = (tf.to_float(inputs)-127)/128. + #signals = tf.contrib.framework.nest.flatten(signals) + signals = tf.squeeze(signals, [2, 3]) + + # `stfts` is a complex64 Tensor representing the Short-time Fourier Transform of + # each signal in `signals`. Its shape is [batch_size, ?, fft_unique_bins] + # where fft_unique_bins = fft_length // 2 + 1 = 513. + stfts = tf.contrib.signal.stft(signals, frame_length=1024, frame_step=512, + fft_length=1024) + + # An energy spectrogram is the magnitude of the complex-valued STFT. + # A float32 Tensor of shape [batch_size, ?, 513]. + magnitude_spectrograms = tf.abs(stfts) + + log_offset = 1e-6 + log_magnitude_spectrograms = tf.log(magnitude_spectrograms + log_offset) + + # Warp the linear-scale, magnitude spectrograms into the mel-scale. + num_spectrogram_bins = magnitude_spectrograms.shape[-1].value + lower_edge_hertz, upper_edge_hertz, num_mel_bins = 80.0, 7600.0, 64 + sample_rate = 16000 + linear_to_mel_weight_matrix = tf.contrib.signal.linear_to_mel_weight_matrix( + num_mel_bins, num_spectrogram_bins, sample_rate, lower_edge_hertz, + upper_edge_hertz) + mel_spectrograms = tf.tensordot( + magnitude_spectrograms, linear_to_mel_weight_matrix, 1) + # Note: Shape inference for `tf.tensordot` does not currently handle this case. + mel_spectrograms.set_shape(magnitude_spectrograms.shape[:-1].concatenate( + linear_to_mel_weight_matrix.shape[-1:])) + + # Try without the conversion to MFCCs, first. + '''num_mfccs = 13 + # Keep the first `num_mfccs` MFCCs. + mfccs = tf.contrib.signal.mfccs_from_log_mel_spectrograms( + log_mel_spectrograms)[..., :num_mfccs]''' + + x = tf.expand_dims(mel_spectrograms, 2) + x.set_shape([None, None, None, num_mel_bins]) + for i in xrange(self._model_hparams.audio_compression): + x = xnet_resblock(x, 2**(i + 1), True, "compress_block_%d" % i) + return xnet_resblock(x, self._body_input_depth, False, + "compress_block_final") + + +@registry.register_problem() +class Librispeech(problem.Problem): + """Problem spec for English word to dictionary definition.""" + + @property + def is_character_level(self): + return True + + @property + def input_space_id(self): + return problem.SpaceID.AUDIO_SPECTRAL + + @property + def target_space_id(self): + return problem.SpaceID.EN_CHR + + @property + def num_shards(self): + return 10 + + @property + def use_subword_tokenizer(self): + return False + + @property + def num_dev_shards(self): + return 1 + + @property + def use_train_shards_for_dev(self): + """If true, we only generate training data and hold out shards for dev.""" + return False + + def feature_encoders(self, data_dir): + return { + "inputs": text_encoder.TextEncoder(), #None, #DoNothingEncoder(), + "targets": LibrispeechTextEncoder(), + } + + def example_reading_spec(self): + data_fields = { + "inputs": tf.VarLenFeature(tf.int64), + #"audio/channel_count": tf.FixedLenFeature([], tf.int64), + #"audio/sample_count": tf.FixedLenFeature([], tf.int64), + #"audio/sample_width": tf.FixedLenFeature([], tf.int64), + "targets": tf.VarLenFeature(tf.int64), + } + data_items_to_decoders = None + return (data_fields, data_items_to_decoders) + + + def generator(self, data_dir, tmp_dir, training, eos_list=None, start_from=0, how_many=0): + eos_list = [1] + datasets = (_LIBRISPEECH_TRAIN_DATASETS if training else _LIBRISPEECH_TEST_DATASETS) + i = 0 + for url, subdir in datasets: + filename = os.path.basename(url) + compressed_file = generator_utils.maybe_download(tmp_dir, filename, url) + + read_type = "r:gz" if filename.endswith("tgz") else "r" + with tarfile.open(compressed_file, read_type) as corpus_tar: + # Create a subset of files that don't already exist. + # tarfile.extractall errors when encountering an existing file + # and tarfile.extract is extremely slow + members = [] + for f in corpus_tar: + if not os.path.isfile(os.path.join(tmp_dir, f.name)): + members.append(f) + corpus_tar.extractall(tmp_dir, members=members) - data_dir = os.path.join(tmp_dir, "LibriSpeech", subdir) - data_files = _collect_data(data_dir, "flac", "txt") - data_pairs = data_files.values() - for media_file, text_data in sorted(data_pairs)[start_from:]: - if how_many > 0 and i == how_many: - return - i += 1 - audio_data, sample_count, sample_width, num_channels = _get_audio_data( - media_file) - label = [ord(c) for c in text_data] + eos_list - yield { - "inputs": audio_data, - "audio/channel_count": [num_channels], - "audio/sample_count": [sample_count], - "audio/sample_width": [sample_width], - "targets": label - } \ No newline at end of file + data_dir = os.path.join(tmp_dir, "LibriSpeech", subdir) + data_files = _collect_data(data_dir, "flac", "txt") + data_pairs = data_files.values() + for media_file, text_data in sorted(data_pairs)[start_from:]: + if how_many > 0 and i == how_many: + return + i += 1 + audio_data, sample_count, sample_width, num_channels = _get_audio_data( + media_file) + label = [ord(c) for c in text_data] + eos_list + yield { + "inputs": audio_data, + "audio/channel_count": [num_channels], + "audio/sample_count": [sample_count], + "audio/sample_width": [sample_width], + "targets": label + } + + + def generate_data(self, data_dir, tmp_dir, task_id=-1): + train_paths = self.training_filepaths(data_dir, self.num_shards, shuffled=False) + dev_paths = self.dev_filepaths(data_dir, self.num_dev_shards, shuffled=False) + if self.use_train_shards_for_dev: + all_paths = train_paths + dev_paths + generator_utils.generate_files(self.generator(data_dir, tmp_dir, True), all_paths) + generator_utils.shuffle_dataset(all_paths) + else: + generator_utils.generate_dataset_and_shuffle( + self.generator(data_dir, tmp_dir, True), train_paths, + self.generator(data_dir, tmp_dir, False), dev_paths) + + + def hparams(self, defaults, unused_model_hparams): + p = defaults + p.stop_at_eos = int(False) + p.input_modality = { "inputs": ("audio:librispeech_modality", None) } + p.target_modality = (registry.Modalities.SYMBOL, 256) + + def preprocess_example(self, example, mode, hparams): + return example + +# TODO: clean up hparams +@registry.register_hparams +def librispeech_hparams(): + hparams = transformer.transformer_base_single_gpu() # Or whatever you'd like to build off. + hparams.batch_size = 36 + hparams.audio_compression = 8 + hparams.hidden_size = 2048 + hparams.max_input_seq_length = 600000 + hparams.max_target_seq_length = 350 + hparams.max_length = hparams.max_input_seq_length + hparams.min_length_bucket = hparams.max_input_seq_length // 2 + hparams.learning_rate = 0.05 + hparams.train_steps = 5000000 + hparams.num_hidden_layers = 4 + return hparams From 844df4d0172b3df5fac50dd364b15dc08b6a393f Mon Sep 17 00:00:00 2001 From: wingsbr <bswing@synchronvideo.com> Date: Mon, 20 Nov 2017 16:36:01 -0600 Subject: [PATCH 0586/4095] Added librispeech to data_generators/all_problems.py --- tensor2tensor/data_generators/all_problems.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tensor2tensor/data_generators/all_problems.py b/tensor2tensor/data_generators/all_problems.py index c7f364cf1..2aca3d377 100644 --- a/tensor2tensor/data_generators/all_problems.py +++ b/tensor2tensor/data_generators/all_problems.py @@ -28,6 +28,7 @@ from tensor2tensor.data_generators import ice_parsing from tensor2tensor.data_generators import image from tensor2tensor.data_generators import imdb +from tensor2tensor.data_generators import librispeech from tensor2tensor.data_generators import lm1b from tensor2tensor.data_generators import multinli from tensor2tensor.data_generators import problem_hparams From 98c7b413e3fa6a18faf262c30b3eed3a9359d085 Mon Sep 17 00:00:00 2001 From: wingsbr <bswing@synchronvideo.com> Date: Tue, 21 Nov 2017 09:07:49 -0600 Subject: [PATCH 0587/4095] Switched to full librispeech datasets. --- tensor2tensor/data_generators/librispeech.py | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/tensor2tensor/data_generators/librispeech.py b/tensor2tensor/data_generators/librispeech.py index dcb5b3f88..5e83cfd51 100644 --- a/tensor2tensor/data_generators/librispeech.py +++ b/tensor2tensor/data_generators/librispeech.py @@ -14,7 +14,7 @@ import wave -'''_LIBRISPEECH_TRAIN_DATASETS = [ +_LIBRISPEECH_TRAIN_DATASETS = [ [ "http://www.openslr.org/resources/12/train-clean-100.tar.gz", # pylint: disable=line-too-long "train-clean-100" @@ -37,18 +37,6 @@ "http://www.openslr.org/resources/12/dev-other.tar.gz", "dev-other" ], -]''' -_LIBRISPEECH_TRAIN_DATASETS = [ - [ - "http://www.openslr.org/resources/12/dev-other.tar.gz", - "dev-other" - ], -] -_LIBRISPEECH_TEST_DATASETS = [ - [ - "http://www.openslr.org/resources/12/dev-clean.tar.gz", - "dev-clean" - ], ] From 23129f238b5abeecca38790215e272b31913cdb5 Mon Sep 17 00:00:00 2001 From: wingsbr <bswing@synchronvideo.com> Date: Tue, 21 Nov 2017 16:25:58 -0600 Subject: [PATCH 0588/4095] Variety of fixes based on PR comments. --- tensor2tensor/data_generators/librispeech.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/tensor2tensor/data_generators/librispeech.py b/tensor2tensor/data_generators/librispeech.py index 5e83cfd51..de7ed94cc 100644 --- a/tensor2tensor/data_generators/librispeech.py +++ b/tensor2tensor/data_generators/librispeech.py @@ -82,7 +82,7 @@ def _get_audio_data(filepath): class LibrispeechTextEncoder(text_encoder.TextEncoder): def encode(self, s): - return [ord[c] for c in s] + return [self._num_reserved_ids + ord(c) for c in s] def decode(self, ids): """Transform a sequence of int ids into a human-readable string. @@ -97,7 +97,7 @@ def decode(self, ids): if 0 <= id_ < self._num_reserved_ids: decoded_ids.append(RESERVED_TOKENS[int(id_)]) else: - decoded_ids.append(id_) + decoded_ids.append(id_ - self._num_reserved_ids) return "".join([chr(d) for d in decoded_ids]) @@ -199,7 +199,7 @@ def target_space_id(self): @property def num_shards(self): - return 10 + return 100 @property def use_subword_tokenizer(self): @@ -214,9 +214,9 @@ def use_train_shards_for_dev(self): """If true, we only generate training data and hold out shards for dev.""" return False - def feature_encoders(self, data_dir): + def feature_encoders(self, _): return { - "inputs": text_encoder.TextEncoder(), #None, #DoNothingEncoder(), + "inputs": text_encoder.TextEncoder(), "targets": LibrispeechTextEncoder(), } @@ -233,8 +233,9 @@ def example_reading_spec(self): def generator(self, data_dir, tmp_dir, training, eos_list=None, start_from=0, how_many=0): - eos_list = [1] + eos_list = [1] if eos_list is None else eos_list datasets = (_LIBRISPEECH_TRAIN_DATASETS if training else _LIBRISPEECH_TEST_DATASETS) + num_reserved_ids = self.feature_encoders(None)["targets"].num_reserved_ids i = 0 for url, subdir in datasets: filename = os.path.basename(url) @@ -260,7 +261,7 @@ def generator(self, data_dir, tmp_dir, training, eos_list=None, start_from=0, ho i += 1 audio_data, sample_count, sample_width, num_channels = _get_audio_data( media_file) - label = [ord(c) for c in text_data] + eos_list + label = [num_reserved_ids + ord(c) for c in text_data] + eos_list yield { "inputs": audio_data, "audio/channel_count": [num_channels], From ca489db1a75f635d1ad7bac8beaf58a3d6be9958 Mon Sep 17 00:00:00 2001 From: Nima <rafiee.nima@gmail.com> Date: Thu, 23 Nov 2017 09:42:24 +0100 Subject: [PATCH 0589/4095] clean code --- tensor2tensor/models/cycle_gan.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensor2tensor/models/cycle_gan.py b/tensor2tensor/models/cycle_gan.py index 08d43626e..e5832fd7a 100644 --- a/tensor2tensor/models/cycle_gan.py +++ b/tensor2tensor/models/cycle_gan.py @@ -32,7 +32,7 @@ def discriminator(x, compress, hparams, name, reuse=None): with tf.variable_scope(name, reuse=reuse): - x = tf.stop_gradient(2 * x) - x # Reverse gradient. ########## why ######## + x = tf.stop_gradient(2 * x) - x # Reverse gradient. if compress: x = transformer_vae.compress(x, None, False, hparams, "compress") else: From 0d345fb93cea7e78f820482b2bd426d563408b7b Mon Sep 17 00:00:00 2001 From: Nima Rafiee <rafiee.nima@gmail.com> Date: Thu, 23 Nov 2017 10:11:34 +0100 Subject: [PATCH 0590/4095] remove binary files --- .DS_Store | Bin 6148 -> 0 bytes tensor2tensor/.DS_Store | Bin 6148 -> 0 bytes tensor2tensor/models/cycle_gan.py | 2 +- 3 files changed, 1 insertion(+), 1 deletion(-) delete mode 100644 .DS_Store delete mode 100644 tensor2tensor/.DS_Store diff --git a/.DS_Store b/.DS_Store deleted file mode 100644 index 59d856ade63037dc970925d4033368b8abe95293..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 6148 zcmeHK%Wl&^6g|@h;()4Vfz&R&L1Gc1loYx^NFfhFBwnfuEC97}9nnax9mNh&n+9dW zKLCHgr|<=Q2p<4v9;p(iAXZU@=1O<Yotb-PJje2k2Y{#zf-Rs3z@babU1M>9$$sez zwqjZeM4`rbj$L@IFh2Cdg>2hk6|f3iHU;Fjo5w@E#2y}F=KGC(_Q-AFKK<4&WsHDz z)S35;`MYSLrkE|(AK^BR5$Lb~oc%4s&LKdILnQNgVk486#}kfTN0TFaXlAlMWwHx= zmL^xxz*FJ{&mn$h7!DG2kYa!y^Lsd%pFyqq^USK(nA<@_R*XLG%+@kYx#E9h`opB> zhf1sOelP4MX}SDEtgPm)T+KU$f>T&?-g!sy&`aCtAgQ(cZ&a-tbbJ%<#c_BZxBXY$ z&TzkYqaH_TyBl@-GwE+%ccUO4*5W}FBx;-&?evITHPL=?b2O?{?v~ujc6nTKM>{*^ zlDl2GHy%6A#?7q<t%Ku})6v=Z;}<TECb;1%TQ@j`uMGYo)(5X2g>m$rS<?}wOp8la zz`OwG`jY3h(^&<q0+(L_xj(pciB*FWjcV&)p{@YL9GjIfmOl-W;~K0QoM>bZO=v1n zQ-!%=2u)|ZYx1fFCmJ;!!dyOtnOT?{icqt2d{?DIR5jYtDqt0uS723Fi}L(${`vkt zPqH<ufK}kXQb6PyUZch(nX`3aaq_Hn=?8RaGH;?$QCOJcSOxMZ-lQvIpDP1m)!;-U RTWI!2K*?YWtH4qf_zmYlvkm|N diff --git a/tensor2tensor/.DS_Store b/tensor2tensor/.DS_Store deleted file mode 100644 index 5395b66bec4552e19814be1ef77610f15c0a62d3..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 6148 zcmeHK&u-H&9R7LjN=liQ1JZWP1&Kq2I(4uELW;U^*bS)&4uHCZM4P27u9{?3*A?Z$ zI{<IMqwoYg2oC_C|A5k@a6}0CSJ|KKd|xbocI=vn*r<pOh+HCakR-NmBij)2xlE1N zGA(z2Oud6sP80kd(6qs}4b}ncz`y2z{C0clo8^>J_4{3%CgN?Fh!709Jv>Ib|6K)2 zL@f%zPiw5tHP$Xg6kt~f(8oE&=p&|?Y7xL}UCs7t>JBB~E|jLuec#|-l0a`UG<EK` zKXJo(HZ|k?5>6AI<%7Xjw$*BHcWyb|uG8Id-i$BAe4Gb)k&S}cD^=rB9GdcTA(CI^ zAbi2&`NX|@EYdvSX*{dYU%%vOB<3Sgq*12c^DM|Opj91B-2LTpIDFXmhKGYy-&-CX z4f@{U@X>1JID7XFo}8XtF0Pk1t9KtUAOyp<t76ySPxypFhbo3#jb~{h(sQ&+O%EIN z40eSuD+Vu9TXucQ<z#-SOu5@oO)#}F)cHoeTryRC3%n^68z#NV+NJ_LzF`{WlvkNM zbOl}pegHP7YqhXA##LLEOfG<`5>*>!@i;weg>}F>U>&GAAom9kNuqDC)Tp)&Wa<h4 zY@=BP%JR>G)OZGcgQZ41K`<=^YAG{U45sC1_YBWBSZdUAV&?L}OwY{RP?+i+@Anj) zn6J_1)&c9lx&tj;b>;d0^!NAwb&;)E2do4Cl>@AOGCmn$Nak#93{IZ49?}_-49P1s mDhV=k9IJvniuaLJ;F%{2K;K}g5iJP&M?l$NGwZ-#b>Ih(`PyUv diff --git a/tensor2tensor/models/cycle_gan.py b/tensor2tensor/models/cycle_gan.py index 08d43626e..eaac5f304 100644 --- a/tensor2tensor/models/cycle_gan.py +++ b/tensor2tensor/models/cycle_gan.py @@ -32,7 +32,7 @@ def discriminator(x, compress, hparams, name, reuse=None): with tf.variable_scope(name, reuse=reuse): - x = tf.stop_gradient(2 * x) - x # Reverse gradient. ########## why ######## + x = tf.stop_gradient(2 * x) - x # Reverse gradient. if compress: x = transformer_vae.compress(x, None, False, hparams, "compress") else: From 6cf47f9b92b2bd943a6f3cb4dd2f62e690ff7215 Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Mon, 13 Nov 2017 18:37:48 -0800 Subject: [PATCH 0591/4095] More tiny sketch fixes PiperOrigin-RevId: 175621997 --- tensor2tensor/bin/t2t-datagen | 2 +- tensor2tensor/bin/t2t-decoder | 5 +- tensor2tensor/bin/t2t-trainer | 17 +- tensor2tensor/data_generators/all_problems.py | 1 - .../data_generators/cnn_dailymail.py | 34 +- tensor2tensor/data_generators/librispeech.py | 310 ------------------ tensor2tensor/models/transformer_sketch.py | 2 +- tensor2tensor/utils/decoding.py | 21 +- tensor2tensor/utils/get_cnndm_rouge.sh | 16 - tensor2tensor/utils/get_rouge.py | 88 ----- 10 files changed, 28 insertions(+), 468 deletions(-) mode change 100755 => 100644 tensor2tensor/bin/t2t-datagen delete mode 100644 tensor2tensor/data_generators/librispeech.py delete mode 100644 tensor2tensor/utils/get_cnndm_rouge.sh delete mode 100644 tensor2tensor/utils/get_rouge.py diff --git a/tensor2tensor/bin/t2t-datagen b/tensor2tensor/bin/t2t-datagen old mode 100755 new mode 100644 index 67890371b..2ac0f0db2 --- a/tensor2tensor/bin/t2t-datagen +++ b/tensor2tensor/bin/t2t-datagen @@ -112,7 +112,7 @@ _SUPPORTED_PROBLEM_GENERATORS = { vocab_filename="vocab.endefr.%d" % 2**15, vocab_size=2**15), lambda: audio.timit_generator( FLAGS.data_dir, FLAGS.tmp_dir, False, 626, - vocab_filename="vocab.endefr.%d" % 2**15, vocab_size=2**15)), + vocab_filename="vocab.endefr.%d" % 2**15, vocab_size=2**15)), } # pylint: enable=g-long-lambda diff --git a/tensor2tensor/bin/t2t-decoder b/tensor2tensor/bin/t2t-decoder index 5f05f5bcb..c2bf97f94 100644 --- a/tensor2tensor/bin/t2t-decoder +++ b/tensor2tensor/bin/t2t-decoder @@ -47,8 +47,9 @@ flags = tf.flags FLAGS = flags.FLAGS flags.DEFINE_string("output_dir", "", "Training directory to load from.") -flags.DEFINE_string("decode_from_file", None, "Path to the source file for decoding") -flags.DEFINE_string("decode_to_file", None, "Path to the decoded (output) file") +flags.DEFINE_string("decode_from_file", None, "Path to decode file") +flags.DEFINE_string("decode_to_file", None, + "Path prefix to inference output file") flags.DEFINE_bool("decode_interactive", False, "Interactive local inference mode.") flags.DEFINE_integer("decode_shards", 1, "Number of decoding replicas.") diff --git a/tensor2tensor/bin/t2t-trainer b/tensor2tensor/bin/t2t-trainer index fc37f27ab..5a2866da6 100644 --- a/tensor2tensor/bin/t2t-trainer +++ b/tensor2tensor/bin/t2t-trainer @@ -59,7 +59,7 @@ flags.DEFINE_string("output_dir", "", "Base output directory for run.") flags.DEFINE_string("master", "", "Address of TensorFlow master.") flags.DEFINE_string("schedule", "train_and_evaluate", "Method of tf.contrib.learn.Experiment to run.") -flags.DEFINE_bool("profile", False, "Profile performance?") + def main(_): tf.logging.set_verbosity(tf.logging.INFO) @@ -83,26 +83,13 @@ def main(_): problem.generate_data(data_dir, tmp_dir) # Run the trainer. - def run_experiment(): - trainer_utils.run( + trainer_utils.run( data_dir=data_dir, model=FLAGS.model, output_dir=output_dir, train_steps=FLAGS.train_steps, eval_steps=FLAGS.eval_steps, schedule=FLAGS.schedule) - - if FLAGS.profile: - with tf.contrib.tfprof.ProfileContext('t2tprof', - trace_steps=range(100), - dump_steps=range(100)) as pctx: - opts = tf.profiler.ProfileOptionBuilder.time_and_memory() - pctx.add_auto_profiling('op', opts, range(100)) - - run_experiment() - - else: - run_experiment() if __name__ == "__main__": diff --git a/tensor2tensor/data_generators/all_problems.py b/tensor2tensor/data_generators/all_problems.py index 2aca3d377..c7f364cf1 100644 --- a/tensor2tensor/data_generators/all_problems.py +++ b/tensor2tensor/data_generators/all_problems.py @@ -28,7 +28,6 @@ from tensor2tensor.data_generators import ice_parsing from tensor2tensor.data_generators import image from tensor2tensor.data_generators import imdb -from tensor2tensor.data_generators import librispeech from tensor2tensor.data_generators import lm1b from tensor2tensor.data_generators import multinli from tensor2tensor.data_generators import problem_hparams diff --git a/tensor2tensor/data_generators/cnn_dailymail.py b/tensor2tensor/data_generators/cnn_dailymail.py index 05b2a1f37..239d1af99 100644 --- a/tensor2tensor/data_generators/cnn_dailymail.py +++ b/tensor2tensor/data_generators/cnn_dailymail.py @@ -19,10 +19,9 @@ from __future__ import division from __future__ import print_function -import io +import hashlib import os import tarfile -import hashlib # Dependency imports @@ -47,7 +46,7 @@ # Train/Dev/Test Splits for summarization data _TRAIN_URLS = "https://raw.githubusercontent.com/abisee/cnn-dailymail/master/url_lists/all_train.txt" _DEV_URLS = "https://raw.githubusercontent.com/abisee/cnn-dailymail/master/url_lists/all_val.txt" -_TEST_URLS = "https://raw.githubusercontent.com/abisee/cnn-dailymail/master/url_lists/all_test.txt" +_TEST_URLS = "https://github.com/abisee/cnn-dailymail/blob/master/url_lists/all_test.txt" # End-of-sentence marker. @@ -129,7 +128,9 @@ def generate_hash(inp): return filelist -def example_generator(all_files, urls_path, sum_token): + +def example_generator(tmp_dir, is_training, sum_token): + """Generate examples.""" def fix_run_on_sents(line): if u"@highlight" in line: return line @@ -139,6 +140,7 @@ def fix_run_on_sents(line): return line return line + u"." + all_files, urls_path = _maybe_download_corpora(tmp_dir, is_training) filelist = example_splits(urls_path, all_files) story_summary_split_token = u" <summary> " if sum_token else " " @@ -168,29 +170,13 @@ def fix_run_on_sents(line): yield " ".join(story) + story_summary_split_token + " ".join(summary) + def _story_summary_split(story): split_str = u" <summary> " split_str_len = len(split_str) split_pos = story.find(split_str) return story[:split_pos], story[split_pos+split_str_len:] # story, summary -def write_raw_text_to_files(all_files, urls_path, data_dir, tmp_dir, is_training): - def write_to_file(all_files, urls_path, data_dir, filename): - with io.open(os.path.join(data_dir, filename+".source"), "w") as fstory, io.open(os.path.join(data_dir, filename+".target"), "w") as fsummary: - for example in example_generator(all_files, urls_path, sum_token=True): - story, summary = _story_summary_split(example) - fstory.write(story+"\n") - fsummary.write(summary+"\n") - - filename = "cnndm.train" if is_training else "cnndm.dev" - tf.logging.info("Writing %s" % filename) - write_to_file(all_files, urls_path, data_dir, filename) - - if not is_training: - test_urls_path = generator_utils.maybe_download(tmp_dir, "all_test.txt", _TEST_URLS) - filename = "cnndm.test" - tf.logging.info("Writing %s" % filename) - write_to_file(all_files, test_urls_path, data_dir, filename) @registry.register_problem class SummarizeCnnDailymail32k(problem.Text2TextProblem): @@ -233,12 +219,10 @@ def use_train_shards_for_dev(self): return False def generator(self, data_dir, tmp_dir, is_training): - all_files, urls_path = _maybe_download_corpora(tmp_dir, is_training) encoder = generator_utils.get_or_generate_vocab_inner( data_dir, self.vocab_file, self.targeted_vocab_size, - example_generator(all_files, urls_path, sum_token=False)) - write_raw_text_to_files(all_files, urls_path, data_dir, tmp_dir, is_training) - for example in example_generator(all_files, urls_path, sum_token=True): + example_generator(tmp_dir, is_training, sum_token=False)) + for example in example_generator(tmp_dir, is_training, sum_token=True): story, summary = _story_summary_split(example) encoded_summary = encoder.encode(summary) + [EOS] encoded_story = encoder.encode(story) + [EOS] diff --git a/tensor2tensor/data_generators/librispeech.py b/tensor2tensor/data_generators/librispeech.py deleted file mode 100644 index de7ed94cc..000000000 --- a/tensor2tensor/data_generators/librispeech.py +++ /dev/null @@ -1,310 +0,0 @@ -from tensor2tensor.data_generators import problem -from tensor2tensor.utils import registry -from tensor2tensor.models import transformer -from tensor2tensor.utils import modality -from tensor2tensor.layers import common_layers -from tensor2tensor.data_generators import text_encoder -import random -import tensorflow as tf -import numpy as np -from tensor2tensor.data_generators import generator_utils -import os -from subprocess import call -import tarfile -import wave - - -_LIBRISPEECH_TRAIN_DATASETS = [ - [ - "http://www.openslr.org/resources/12/train-clean-100.tar.gz", # pylint: disable=line-too-long - "train-clean-100" - ], - [ - "http://www.openslr.org/resources/12/train-clean-360.tar.gz", - "train-clean-360" - ], - [ - "http://www.openslr.org/resources/12/train-other-500.tar.gz", - "train-other-500" - ], -] -_LIBRISPEECH_TEST_DATASETS = [ - [ - "http://www.openslr.org/resources/12/dev-clean.tar.gz", - "dev-clean" - ], - [ - "http://www.openslr.org/resources/12/dev-other.tar.gz", - "dev-other" - ], -] - - -def _collect_data(directory, input_ext, transcription_ext): - """Traverses directory collecting input and target files.""" - # Directory from string to tuple pair of strings - # key: the filepath to a datafile including the datafile's basename. Example, - # if the datafile was "/path/to/datafile.wav" then the key would be - # "/path/to/datafile" - # value: a pair of strings (media_filepath, label) - data_files = dict() - for root, _, filenames in os.walk(directory): - transcripts = [filename for filename in filenames if transcription_ext in filename] - for transcript in transcripts: - basename = transcript.strip(transcription_ext) - transcript_path = os.path.join(root, transcript) - with open(transcript_path, 'r') as transcript_file: - for transcript_line in transcript_file: - line_contents = transcript_line.split(" ", 1) - assert len(line_contents) == 2 - media_base, label = line_contents - key = os.path.join(root, media_base) - assert key not in data_files - media_name = "%s.%s"%(media_base, input_ext) - media_path = os.path.join(root, media_name) - data_files[key] = (media_path, label) - return data_files - - -def _get_audio_data(filepath): - # Construct a true .wav file. - out_filepath = filepath.strip(".flac") + ".wav" - # Assumes sox is installed on system. Sox converts from FLAC to WAV. - call(["sox", filepath, out_filepath]) - wav_file = wave.open(open(out_filepath)) - frame_count = wav_file.getnframes() - byte_array = wav_file.readframes(frame_count) - - data = np.fromstring(byte_array, np.uint8).tolist() - return data, frame_count, wav_file.getsampwidth(), wav_file.getnchannels() - - -class LibrispeechTextEncoder(text_encoder.TextEncoder): - - def encode(self, s): - return [self._num_reserved_ids + ord(c) for c in s] - - def decode(self, ids): - """Transform a sequence of int ids into a human-readable string. - EOS is not expected in ids. - Args: - ids: list of integers to be converted. - Returns: - s: human-readable string. - """ - decoded_ids = [] - for id_ in ids: - if 0 <= id_ < self._num_reserved_ids: - decoded_ids.append(RESERVED_TOKENS[int(id_)]) - else: - decoded_ids.append(id_ - self._num_reserved_ids) - return "".join([chr(d) for d in decoded_ids]) - - - -@registry.register_audio_modality -class LibrispeechModality(modality.Modality): - """Performs strided conv compressions for audio spectral data.""" - - def bottom(self, inputs): - """Transform input from data space to model space. - Args: - inputs: A Tensor with shape [batch, ...] - Returns: - body_input: A Tensor with shape [batch, ?, ?, body_input_depth]. - """ - with tf.variable_scope(self.name): - # TODO(aidangomez): Will need to sort out a better audio pipeline - def xnet_resblock(x, filters, res_relu, name): - with tf.variable_scope(name): - # We only stride along the length dimension to preserve the spectral - # bins (which are tiny in dimensionality relative to length) - y = common_layers.separable_conv_block( - x, - filters, [((1, 1), (3, 3)), ((1, 1), (3, 3))], - first_relu=True, - padding="SAME", - force2d=True, - name="sep_conv_block") - y = common_layers.pool(y, (3, 3), "MAX", "SAME", strides=(2, 1)) - return y + common_layers.conv_block( - x, - filters, [((1, 1), (1, 1))], - padding="SAME", - strides=(2, 1), - first_relu=res_relu, - force2d=True, - name="res_conv0") - - # Rescale from UINT8 to floats in [-1,-1] - signals = (tf.to_float(inputs)-127)/128. - #signals = tf.contrib.framework.nest.flatten(signals) - signals = tf.squeeze(signals, [2, 3]) - - # `stfts` is a complex64 Tensor representing the Short-time Fourier Transform of - # each signal in `signals`. Its shape is [batch_size, ?, fft_unique_bins] - # where fft_unique_bins = fft_length // 2 + 1 = 513. - stfts = tf.contrib.signal.stft(signals, frame_length=1024, frame_step=512, - fft_length=1024) - - # An energy spectrogram is the magnitude of the complex-valued STFT. - # A float32 Tensor of shape [batch_size, ?, 513]. - magnitude_spectrograms = tf.abs(stfts) - - log_offset = 1e-6 - log_magnitude_spectrograms = tf.log(magnitude_spectrograms + log_offset) - - # Warp the linear-scale, magnitude spectrograms into the mel-scale. - num_spectrogram_bins = magnitude_spectrograms.shape[-1].value - lower_edge_hertz, upper_edge_hertz, num_mel_bins = 80.0, 7600.0, 64 - sample_rate = 16000 - linear_to_mel_weight_matrix = tf.contrib.signal.linear_to_mel_weight_matrix( - num_mel_bins, num_spectrogram_bins, sample_rate, lower_edge_hertz, - upper_edge_hertz) - mel_spectrograms = tf.tensordot( - magnitude_spectrograms, linear_to_mel_weight_matrix, 1) - # Note: Shape inference for `tf.tensordot` does not currently handle this case. - mel_spectrograms.set_shape(magnitude_spectrograms.shape[:-1].concatenate( - linear_to_mel_weight_matrix.shape[-1:])) - - # Try without the conversion to MFCCs, first. - '''num_mfccs = 13 - # Keep the first `num_mfccs` MFCCs. - mfccs = tf.contrib.signal.mfccs_from_log_mel_spectrograms( - log_mel_spectrograms)[..., :num_mfccs]''' - - x = tf.expand_dims(mel_spectrograms, 2) - x.set_shape([None, None, None, num_mel_bins]) - for i in xrange(self._model_hparams.audio_compression): - x = xnet_resblock(x, 2**(i + 1), True, "compress_block_%d" % i) - return xnet_resblock(x, self._body_input_depth, False, - "compress_block_final") - - -@registry.register_problem() -class Librispeech(problem.Problem): - """Problem spec for English word to dictionary definition.""" - - @property - def is_character_level(self): - return True - - @property - def input_space_id(self): - return problem.SpaceID.AUDIO_SPECTRAL - - @property - def target_space_id(self): - return problem.SpaceID.EN_CHR - - @property - def num_shards(self): - return 100 - - @property - def use_subword_tokenizer(self): - return False - - @property - def num_dev_shards(self): - return 1 - - @property - def use_train_shards_for_dev(self): - """If true, we only generate training data and hold out shards for dev.""" - return False - - def feature_encoders(self, _): - return { - "inputs": text_encoder.TextEncoder(), - "targets": LibrispeechTextEncoder(), - } - - def example_reading_spec(self): - data_fields = { - "inputs": tf.VarLenFeature(tf.int64), - #"audio/channel_count": tf.FixedLenFeature([], tf.int64), - #"audio/sample_count": tf.FixedLenFeature([], tf.int64), - #"audio/sample_width": tf.FixedLenFeature([], tf.int64), - "targets": tf.VarLenFeature(tf.int64), - } - data_items_to_decoders = None - return (data_fields, data_items_to_decoders) - - - def generator(self, data_dir, tmp_dir, training, eos_list=None, start_from=0, how_many=0): - eos_list = [1] if eos_list is None else eos_list - datasets = (_LIBRISPEECH_TRAIN_DATASETS if training else _LIBRISPEECH_TEST_DATASETS) - num_reserved_ids = self.feature_encoders(None)["targets"].num_reserved_ids - i = 0 - for url, subdir in datasets: - filename = os.path.basename(url) - compressed_file = generator_utils.maybe_download(tmp_dir, filename, url) - - read_type = "r:gz" if filename.endswith("tgz") else "r" - with tarfile.open(compressed_file, read_type) as corpus_tar: - # Create a subset of files that don't already exist. - # tarfile.extractall errors when encountering an existing file - # and tarfile.extract is extremely slow - members = [] - for f in corpus_tar: - if not os.path.isfile(os.path.join(tmp_dir, f.name)): - members.append(f) - corpus_tar.extractall(tmp_dir, members=members) - - data_dir = os.path.join(tmp_dir, "LibriSpeech", subdir) - data_files = _collect_data(data_dir, "flac", "txt") - data_pairs = data_files.values() - for media_file, text_data in sorted(data_pairs)[start_from:]: - if how_many > 0 and i == how_many: - return - i += 1 - audio_data, sample_count, sample_width, num_channels = _get_audio_data( - media_file) - label = [num_reserved_ids + ord(c) for c in text_data] + eos_list - yield { - "inputs": audio_data, - "audio/channel_count": [num_channels], - "audio/sample_count": [sample_count], - "audio/sample_width": [sample_width], - "targets": label - } - - - def generate_data(self, data_dir, tmp_dir, task_id=-1): - train_paths = self.training_filepaths(data_dir, self.num_shards, shuffled=False) - dev_paths = self.dev_filepaths(data_dir, self.num_dev_shards, shuffled=False) - if self.use_train_shards_for_dev: - all_paths = train_paths + dev_paths - generator_utils.generate_files(self.generator(data_dir, tmp_dir, True), all_paths) - generator_utils.shuffle_dataset(all_paths) - else: - generator_utils.generate_dataset_and_shuffle( - self.generator(data_dir, tmp_dir, True), train_paths, - self.generator(data_dir, tmp_dir, False), dev_paths) - - - def hparams(self, defaults, unused_model_hparams): - p = defaults - p.stop_at_eos = int(False) - p.input_modality = { "inputs": ("audio:librispeech_modality", None) } - p.target_modality = (registry.Modalities.SYMBOL, 256) - - def preprocess_example(self, example, mode, hparams): - return example - -# TODO: clean up hparams -@registry.register_hparams -def librispeech_hparams(): - hparams = transformer.transformer_base_single_gpu() # Or whatever you'd like to build off. - hparams.batch_size = 36 - hparams.audio_compression = 8 - hparams.hidden_size = 2048 - hparams.max_input_seq_length = 600000 - hparams.max_target_seq_length = 350 - hparams.max_length = hparams.max_input_seq_length - hparams.min_length_bucket = hparams.max_input_seq_length // 2 - hparams.learning_rate = 0.05 - hparams.train_steps = 5000000 - hparams.num_hidden_layers = 4 - return hparams diff --git a/tensor2tensor/models/transformer_sketch.py b/tensor2tensor/models/transformer_sketch.py index 45384f065..b6bbb7708 100644 --- a/tensor2tensor/models/transformer_sketch.py +++ b/tensor2tensor/models/transformer_sketch.py @@ -66,7 +66,7 @@ def transformer_sketch(): hparams.learning_rate = 0.2 hparams.learning_rate_warmup_steps = 10000 hparams.num_hidden_layers = 6 - hparams.initializer = "orthogonal" + # hparams.initializer = "orthogonal" hparams.sampling_method = "random" return hparams diff --git a/tensor2tensor/utils/decoding.py b/tensor2tensor/utils/decoding.py index d825df6f2..629b2ed26 100644 --- a/tensor2tensor/utils/decoding.py +++ b/tensor2tensor/utils/decoding.py @@ -83,9 +83,9 @@ def log_decode_results(inputs, decoded_targets = None if identity_output: - decoded_outputs = "".join(map(str, outputs.flatten())) + decoded_outputs = " ".join(map(str, outputs.flatten())) if targets is not None: - decoded_targets = "".join(map(str, targets.flatten())) + decoded_targets = " ".join(map(str, targets.flatten())) else: decoded_outputs = targets_vocab.decode(_save_until_eos(outputs, is_image)) if targets is not None: @@ -252,14 +252,17 @@ def input_fn(): # _decode_batch_input_fn sorted_inputs.reverse() decodes.reverse() - # If decode_to_file was provided use it as the output filename without any change - # (except for adding shard_id if using more shards for decoding). - # Otherwise, use the input filename plus model, hp, problem, beam, alpha. - decode_filename = decode_to_file if decode_to_file else filename + # Dumping inputs and outputs to file filename.decodes in + # format result\tinput in the same order as original inputs + if decode_to_file: + output_filename = decode_to_file + else: + output_filename = filename if decode_hp.shards > 1: - decode_filename = decode_filename + ("%.2d" % decode_hp.shard_id) - if not decode_to_file: - decode_filename = _decode_filename(decode_filename, problem_name, decode_hp) + base_filename = output_filename + ("%.2d" % decode_hp.shard_id) + else: + base_filename = output_filename + decode_filename = _decode_filename(base_filename, problem_name, decode_hp) tf.logging.info("Writing decodes into %s" % decode_filename) outfile = tf.gfile.Open(decode_filename, "w") for index in range(len(sorted_inputs)): diff --git a/tensor2tensor/utils/get_cnndm_rouge.sh b/tensor2tensor/utils/get_cnndm_rouge.sh deleted file mode 100644 index 0f52bb56c..000000000 --- a/tensor2tensor/utils/get_cnndm_rouge.sh +++ /dev/null @@ -1,16 +0,0 @@ -#!/bin/bash - -# Path to moses dir -mosesdecoder=$1 - -# Path to file containing gold summaries, one per line -targets_file=$2 -# Path to file containing model generated summaries, one per line -decodes_file=$3 - -# Tokenize. -perl $mosesdecoder/scripts/tokenizer/tokenizer.perl -l en < $targets_file > $targets_file.tok -perl $mosesdecoder/scripts/tokenizer/tokenizer.perl -l en < $decodes_file > $decodes_file.tok - -# Get rouge scores -python get_rouge.py --decodes_filename $decodes_file.tok --targets_filename $targets_file.tok diff --git a/tensor2tensor/utils/get_rouge.py b/tensor2tensor/utils/get_rouge.py deleted file mode 100644 index c15545cfd..000000000 --- a/tensor2tensor/utils/get_rouge.py +++ /dev/null @@ -1,88 +0,0 @@ -# coding=utf-8 -# Copyright 2017 The Tensor2Tensor Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Computing rouge scores using pyrouge.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -import logging -import shutil -from tempfile import mkdtemp -from pprint import pprint - -# Dependency imports -from pyrouge import Rouge155 - -import numpy as np -import tensorflow as tf - -FLAGS = tf.flags.FLAGS - -tf.flags.DEFINE_string("decodes_filename", None, "File containing model generated summaries tokenized") -tf.flags.DEFINE_string("targets_filename", None, "File containing model target summaries tokenized") - -def write_to_file(filename, data): - data = ".\n".join(data.split(". ")) - with open(filename, "w") as fp: - fp.write(data) - -def prep_data(decode_dir, target_dir): - with open(FLAGS.decodes_filename, "rb") as fdecodes, open(FLAGS.targets_filename, "rb") as ftargets: - for i, (d, t) in enumerate(zip(fdecodes, ftargets)): - write_to_file(os.path.join(decode_dir, "rouge.%06d.txt" % (i+1)), d) - write_to_file(os.path.join(target_dir, "rouge.A.%06d.txt" % (i+1)), t) - - if (i+1 % 1000) == 0: - tf.logging.into("Written %d examples to file" % i) - -def main(_): - rouge = Rouge155() - rouge.log.setLevel(logging.ERROR) - rouge.system_filename_pattern = "rouge.(\d+).txt" - rouge.model_filename_pattern = "rouge.[A-Z].#ID#.txt" - - tf.logging.set_verbosity(tf.logging.INFO) - - tmpdir = mkdtemp() - tf.logging.info("tmpdir: %s" % tmpdir) - # system = decodes/predictions - system_dir = os.path.join(tmpdir, 'system') - # model = targets/gold - model_dir = os.path.join(tmpdir, 'model') - os.mkdir(system_dir) - os.mkdir(model_dir) - - rouge.system_dir = system_dir - rouge.model_dir = model_dir - - prep_data(rouge.system_dir, rouge.model_dir) - - rouge_scores = rouge.convert_and_evaluate() - rouge_scores = rouge.output_to_dict(rouge_scores) - for prefix in ["rouge_1", "rouge_2", "rouge_l"]: - for suffix in ["f_score", "precision", "recall"]: - key = "_".join([prefix, suffix]) - tf.logging.info("%s: %.4f" % (key, rouge_scores[key])) - - # clean up after pyrouge - shutil.rmtree(tmpdir) - shutil.rmtree(rouge._config_dir) - shutil.rmtree(os.path.split(rouge._system_dir)[0]) - -if __name__=='__main__': - tf.app.run() From 50f5515b17793ec3690811913cb3e40ffc688abb Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Tue, 14 Nov 2017 12:57:42 -0800 Subject: [PATCH 0592/4095] Add Modality.targets_weights_fn PiperOrigin-RevId: 175722118 --- tensor2tensor/data_generators/image.py | 18 ++-- tensor2tensor/layers/modalities.py | 130 +++++++------------------ tensor2tensor/models/vanilla_gan.py | 4 +- tensor2tensor/tpu/tpu_trainer_lib.py | 13 ++- tensor2tensor/utils/metrics.py | 22 ++--- tensor2tensor/utils/modality.py | 22 ++++- 6 files changed, 79 insertions(+), 130 deletions(-) diff --git a/tensor2tensor/data_generators/image.py b/tensor2tensor/data_generators/image.py index 38fa06f25..dec66a623 100644 --- a/tensor2tensor/data_generators/image.py +++ b/tensor2tensor/data_generators/image.py @@ -112,8 +112,8 @@ def preprocess_example(self, example, unused_mode, unused_hparams): def hparams(self, defaults, unused_model_hparams): p = defaults - p.input_modality = {"inputs": ("image:identity_no_pad", None)} - p.target_modality = ("image:identity_no_pad", None) + p.input_modality = {"inputs": ("image:identity", 256)} + p.target_modality = ("image:identity", 256) p.batch_size_multiplier = 256 p.max_expected_batch_size_per_shard = 4 p.input_space_id = 1 @@ -236,7 +236,7 @@ def feature_encoders(self, data_dir): def hparams(self, defaults, unused_model_hparams): p = defaults - p.input_modality = {"inputs": (registry.Modalities.IMAGE, None)} + p.input_modality = {"inputs": (registry.Modalities.IMAGE, 256)} vocab_size = self._encoders["targets"].vocab_size p.target_modality = (registry.Modalities.SYMBOL, vocab_size) p.batch_size_multiplier = 256 @@ -286,7 +286,7 @@ def generator(self, data_dir, tmp_dir, is_training): def hparams(self, defaults, unused_model_hparams): p = defaults - p.input_modality = {"inputs": (registry.Modalities.IMAGE, None)} + p.input_modality = {"inputs": (registry.Modalities.IMAGE, 256)} p.target_modality = (registry.Modalities.CLASS_LABEL, self.num_classes) p.batch_size_multiplier = 4 if self.is_small else 256 @@ -432,8 +432,8 @@ def preprocess_example(self, example, unused_mode, unused_hparams): def hparams(self, defaults, unused_model_hparams): p = defaults - p.input_modality = {"inputs": ("image:identity_no_pad", None)} - p.target_modality = ("image:identity_no_pad", None) + p.input_modality = {"inputs": ("image:identity", 256)} + p.target_modality = ("image:identity", 256) p.batch_size_multiplier = 256 p.max_expected_batch_size_per_shard = 4 p.input_space_id = 1 @@ -718,8 +718,8 @@ def preprocess_example(self, example, unused_mode, unused_hparams): def hparams(self, defaults, unused_model_hparams): p = defaults - p.input_modality = {"inputs": ("image:identity_no_pad", None)} - p.target_modality = ("image:identity_no_pad", None) + p.input_modality = {"inputs": ("image:identity", 256)} + p.target_modality = ("image:identity", 256) p.batch_size_multiplier = 256 p.max_expected_batch_size_per_shard = 4 p.input_space_id = 1 @@ -863,7 +863,7 @@ def feature_encoders(self, data_dir): def hparams(self, defaults, unused_model_hparams): p = defaults - p.input_modality = {"inputs": (registry.Modalities.IMAGE, None)} + p.input_modality = {"inputs": (registry.Modalities.IMAGE, 256)} encoder = self._encoders["targets"] p.target_modality = (registry.Modalities.SYMBOL, encoder.vocab_size) p.batch_size_multiplier = 256 diff --git a/tensor2tensor/layers/modalities.py b/tensor2tensor/layers/modalities.py index 586525e0d..a2ecd1258 100644 --- a/tensor2tensor/layers/modalities.py +++ b/tensor2tensor/layers/modalities.py @@ -45,14 +45,22 @@ class SymbolModality(modality.Modality): def name(self): return "symbol_modality_%d_%d" % (self._vocab_size, self._body_input_depth) - @property - def top_dimensionality(self): - return self._vocab_size - @property def top_is_pointwise(self): return True + @property + def weights_fn(self): + weights_fn = common_layers.weights_nonzero + + hp = self._model_hparams + if hp and hp.prepend_mode != "none": + assert (hp.prepend_mode == "prepend_inputs_masked_attention" or + hp.prepend_mode == "prepend_inputs_full_attention") + weights_fn = common_layers.weights_prepend_inputs_to_targets + + return weights_fn + def _get_weights(self, hidden_dim=None): """Create or get concatenated embedding or softmax variable. @@ -151,7 +159,7 @@ def top(self, body_output, _): class CTCSymbolModality(SymbolModality): """SymbolModality that uses CTC loss.""" - def loss(self, logits, targets, weights_fn=common_layers.weights_nonzero): + def loss(self, logits, targets): """Compute the CTC loss.""" with tf.name_scope("ctc_loss", [logits, targets]): # For CTC we assume targets are 1d, [batch, length, 1, 1] here. @@ -172,21 +180,14 @@ def loss(self, logits, targets, weights_fn=common_layers.weights_nonzero): time_major=False, preprocess_collapse_repeated=False, ctc_merge_repeated=False) - weights = weights_fn(targets) + weights = self.targets_weights_fn(targets) return tf.reduce_sum(xent), tf.reduce_sum(weights) @registry.register_image_modality("default") class ImageModality(modality.Modality): """Modality for images.""" - - def __init__(self, model_hparams, vocab_size): - super(ImageModality, self).__init__(model_hparams, vocab_size) - self._channels = 3 - - @property - def top_dimensionality(self): - return 256 + NUM_CHANNELS = 3 def bottom(self, inputs): with tf.variable_scope(self.name): @@ -217,7 +218,7 @@ def top(self, body_output, _): common_layers.shape_dim(body_output, i) for i in range(3) ] dim = body_output.get_shape().as_list()[-1] // 3 - reshape_shape.extend([self._channels, dim]) + reshape_shape.extend([self.NUM_CHANNELS, dim]) out = tf.reshape(body_output, reshape_shape) res = tf.layers.dense(out, self.top_dimensionality) @@ -226,21 +227,11 @@ def top(self, body_output, _): tf.summary.image("result", res_argmax, max_outputs=1) return res - def loss(self, top_out, targets, weights_fn=common_layers.weights_all): - # Call the default implementation, but weight 1.0 on 0s by default. - # (Since we're processing images and so have no padding and some pixel 0s.) - return super(ImageModality, self).loss( - top_out, targets, weights_fn=weights_fn) - @registry.register_image_modality("image_identity_compress") class ImageIdentityCompressModality(modality.Modality): """Modality for images used in generation.""" - @property - def top_dimensionality(self): - return 256 - def bottom_compress(self, inputs, name="bottom"): """Transform input from data space to model space. @@ -296,12 +287,6 @@ def top(self, body_output, _): channels, self.top_dimensionality]) return x - def loss(self, top_out, targets, weights_fn=common_layers.weights_all): - # Call the default implementation, but weight 1.0 on 0s by default. - # (Since we're processing images and so have no padding and some pixel 0s.) - return super(ImageIdentityCompressModality, self).loss( - top_out, targets, weights_fn=weights_fn) - @registry.register_audio_modality("default") class AudioModality(modality.Modality): @@ -399,10 +384,6 @@ def name(self): return "class_label_modality_%d_%d" % (self._vocab_size, self._body_input_depth) - @property - def top_dimensionality(self): - return self._vocab_size - def bottom(self, x): with tf.variable_scope(self.name): return common_layers.embedding( @@ -434,12 +415,6 @@ def top(self, body_output, _): res = tf.layers.dense(x, self._vocab_size) return tf.expand_dims(res, 3) - def loss(self, top_out, targets, weights_fn=common_layers.weights_all): - # Call the default implementation, but weight 1.0 on 0s by default. - # (Since we're processing images and so have no padding and some pixel 0s.) - return super(ClassLabelModality, self).loss( - top_out, targets, weights_fn=weights_fn) - @registry.register_generic_modality("default") @registry.register_audio_modality("identity") @@ -450,10 +425,6 @@ def loss(self, top_out, targets, weights_fn=common_layers.weights_all): class IdentityModality(modality.Modality): """Does nothing.""" - @property - def targets_dimensionality(self): - return self._vocab_size - def bottom(self, x): return tf.to_float(x) @@ -476,7 +447,7 @@ def top(self, body_output, _): with tf.variable_scope("real"): return tf.layers.dense(body_output, self._vocab_size) - def loss(self, top_out, targets, weights_fn=common_layers.weights_all): + def loss(self, top_out, targets): raise NotImplementedError() @@ -485,70 +456,35 @@ def loss(self, top_out, targets, weights_fn=common_layers.weights_all): class RealL2LossModality(RealModality): """Modality for real (i.e. float) vectors with L2 (Gaussian) loss.""" - def loss(self, top_out, targets, weights_fn=common_layers.weights_all): + def loss(self, top_out, targets): predictions = top_out with tf.name_scope("l2"): - weights = weights_fn(targets) + weights = self.targets_weights_fn(targets) l2 = tf.pow(predictions - targets, 2) return tf.reduce_sum(l2 * weights), tf.reduce_sum(weights) @registry.register_real_modality("log_poisson_loss") -class RealLogPoissonLossModality(RealL2LossModality): - """Modality for real (i.e. float) vectors with log Poisson regression loss. - """ - - def bottom(self, x): - return x +class RealLogPoissonLossModality(RealModality): + """Modality for real (i.e. float) vectors with log Poisson regression loss.""" - def loss(self, top_out, targets, weights_fn=common_layers.weights_all): + def loss(self, top_out, targets): predictions = top_out with tf.name_scope("log_possion"): - weights = weights_fn(targets) + weights = self.targets_weights_fn(targets) lp_loss = tf.nn.log_poisson_loss(targets, predictions) return tf.reduce_sum(lp_loss * weights), tf.reduce_sum(weights) -@registry.register_image_modality("identity_no_pad") -class IdentityModalityNoPad(modality.Modality): - """Does nothing except making sure that there is no padding in cross-ent.""" - - @property - def top_dimensionality(self): - return 256 - - @property - def targets_dimensionality(self): - return self._vocab_size - - def bottom(self, x): - return tf.to_float(x) - - def top(self, body_output, _): - return body_output - - def loss(self, top_out, targets, weights_fn=common_layers.weights_all): - # Call the default implementation, but weight 1.0 on 0s by default. - # (Since we're processing images and so have no padding and some pixel 0s.) - return super(IdentityModalityNoPad, self).loss( - top_out, targets, weights_fn=weights_fn) - - -@registry.register_image_modality("no_loss") -class NoLossModality(modality.Modality): - """Does nothing to the input and returns no loss.""" - - @property - def targets_dimensionality(self): - return self._vocab_size - - def bottom(self, x): - return tf.to_float(x) - - def top(self, body_output, _): - return body_output +@registry.register_generic_modality("zero_loss") +@registry.register_audio_modality("zero_loss") +@registry.register_image_modality("zero_loss") +@registry.register_symbol_modality("zero_loss") +@registry.register_class_label_modality("zero_loss") +@registry.register_real_modality("zero_loss") +class IdentityZeroLossModality(IdentityModality): + """Identity with 0 loss.""" - def loss_sharded(self, sharded_top_out, sharded_targets, data_parallelism): - """Return nothing.""" - return tf.constant(0.0, tf.float32) + def loss(self, top_out, targets): + return tf.constant(0., tf.float32), tf.constant(0., tf.float32) diff --git a/tensor2tensor/models/vanilla_gan.py b/tensor2tensor/models/vanilla_gan.py index d6611d50f..c9ce8ff3f 100644 --- a/tensor2tensor/models/vanilla_gan.py +++ b/tensor2tensor/models/vanilla_gan.py @@ -146,8 +146,8 @@ def vanilla_gan(): hparams = common_hparams.basic_params1() - hparams.input_modalities = "image:no_loss" - hparams.target_modality = "image:no_loss" + hparams.input_modalities = "inputs:image:zero_loss" + hparams.target_modality = "image:zero_loss" hparams.batch_size = 2048 # 3136 hparams.label_smoothing = 0.0 diff --git a/tensor2tensor/tpu/tpu_trainer_lib.py b/tensor2tensor/tpu/tpu_trainer_lib.py index cee8d630f..e6c2863ee 100644 --- a/tensor2tensor/tpu/tpu_trainer_lib.py +++ b/tensor2tensor/tpu/tpu_trainer_lib.py @@ -25,7 +25,6 @@ import six -from tensor2tensor.layers import common_layers from tensor2tensor.utils import data_reader from tensor2tensor.utils import metrics from tensor2tensor.utils import optimize @@ -192,7 +191,7 @@ def model_fn(features, labels, mode, params, config): problem = hp.problem_instances[0] if use_tpu: - eval_metrics_fn = create_eval_metrics_fn(problem) + eval_metrics_fn = create_eval_metrics_fn(problem, hparams) _remove_summaries() return tf.contrib.tpu.TPUEstimatorSpec( mode, @@ -245,14 +244,18 @@ def model_fn(features, labels, mode, params, config): ]) -def create_eval_metrics_fn(problem): +def create_eval_metrics_fn(problem, hparams): """Create the metrics_fn that TPUEstimatorSpec expects.""" + tm = problem.get_hparams().target_modality + if isinstance(tm, tuple): + tm = registry.create_modality(tm, hparams) + weights_fn = tm.weights_fn + def make_metric_fn(metric_fn): def wrapped_metric_fn(logits, labels): - num, den = metric_fn( - logits, labels, weights_fn=common_layers.weights_nonzero) + num, den = metric_fn(logits, labels, weights_fn=weights_fn) return tf.metrics.mean(num, den) return wrapped_metric_fn diff --git a/tensor2tensor/utils/metrics.py b/tensor2tensor/utils/metrics.py index 11d7356c5..c9e52e566 100644 --- a/tensor2tensor/utils/metrics.py +++ b/tensor2tensor/utils/metrics.py @@ -24,6 +24,7 @@ from tensor2tensor.layers import common_layers from tensor2tensor.utils import bleu_hook +from tensor2tensor.utils import registry from tensor2tensor.utils import rouge import tensorflow as tf @@ -284,7 +285,7 @@ def problem_metric_fn(predictions, features): # "features". kwargs = {} args, _, keywords, _ = inspect.getargspec(metric_fn) - if "features" in args or keywords: + if ("features" in args) or keywords: kwargs["features"] = features def wrapped_metric_fn(): @@ -308,28 +309,21 @@ def wrapped_metric_fn(): metrics, METRICS_FNS.keys())) - class_output = "image" in problem_name and "coco" not in problem_name - real_output = "gene_expression" in problem_name - if model_hparams.prepend_mode != "none": - assert (model_hparams.prepend_mode == "prepend_inputs_masked_attention" or - model_hparams.prepend_mode == "prepend_inputs_full_attention") - assert not class_output - weights_fn = common_layers.weights_prepend_inputs_to_targets - elif class_output or real_output: - weights_fn = common_layers.weights_all - else: - weights_fn = common_layers.weights_nonzero - def image_wrapped_metric_fn(predictions, labels, weights_fn=common_layers.weights_nonzero): _, _ = labels, weights_fn return metric_fn(predictions, model_hparams) + tm = problem_instance.get_hparams().target_modality + if isinstance(tm, tuple): + tm = registry.create_modality(tm, model_hparams) + weights_fn = tm.weights_fn + for metric in metrics: metric_fn = METRICS_FNS[metric] metric_name = "metrics-%s/%s" % (problem_name, metric) - if "image" in metric: + if metric == Metrics.IMAGE_SUMMARY: eval_metrics[metric_name] = image_wrapped_metric_fn else: problem_metric_fn = make_problem_specific_metric_fn( diff --git a/tensor2tensor/utils/modality.py b/tensor2tensor/utils/modality.py index 43ca422b7..d06b35523 100644 --- a/tensor2tensor/utils/modality.py +++ b/tensor2tensor/utils/modality.py @@ -65,7 +65,7 @@ def name(self): @property def top_dimensionality(self): """Integer, the last dimension of the predictions (vocab size).""" - raise NotImplementedError("Abstract Method") + return self._vocab_size @property def _body_input_depth(self): @@ -87,6 +87,22 @@ def top_is_pointwise(self): """ return False + @property + def targets_weights_fn(self): + """The weights function to use for loss and eval metrics. + + A weights function takes labels and returns a Tensor that assigns weights + (usually either 1. or 0.) to each one. + + Common weights functions are: + * weights_all: 1. for all labels + * weights_nonzero: 1. for all non-zero labels (e.g. to deal with padding) + + Returns: + Callable: (targets) -> weights Tensor + """ + return common_layers.weights_all + def bottom(self, x): """Transform one shard of input. @@ -162,14 +178,14 @@ def top_sharded(self, sharded_body_output, sharded_targets, data_parallelism): """ return data_parallelism(self.top, sharded_body_output, sharded_targets) - def loss(self, top_out, targets, weights_fn=common_layers.weights_nonzero): + def loss(self, top_out, targets): """Compute loss numerator and denominator for one shard of output.""" logits = top_out return common_layers.padded_cross_entropy( logits, targets, self._model_hparams.label_smoothing, - weights_fn=weights_fn) + weights_fn=self.targets_weights_fn) def loss_sharded(self, sharded_top_out, sharded_targets, data_parallelism): """Compute loss for all shards.""" From 176efe61342bfb12767ddd814122f8b09aa76de6 Mon Sep 17 00:00:00 2001 From: T2T Team <no-reply@google.com> Date: Tue, 14 Nov 2017 15:16:45 -0800 Subject: [PATCH 0593/4095] Update layout optimizer config code. pruning and constfold are enabled by default now (no need to explicitly specify them any more). PiperOrigin-RevId: 175743440 --- tensor2tensor/utils/trainer_utils.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tensor2tensor/utils/trainer_utils.py b/tensor2tensor/utils/trainer_utils.py index 70faab24a..e1a3947fa 100644 --- a/tensor2tensor/utils/trainer_utils.py +++ b/tensor2tensor/utils/trainer_utils.py @@ -35,6 +35,7 @@ import tensorflow as tf from tensorflow.contrib.learn.python.learn import learn_runner +from tensorflow.core.protobuf import rewriter_config_pb2 from tensorflow.python import debug flags = tf.flags @@ -416,12 +417,12 @@ def session_config(): opt_level=tf.OptimizerOptions.L1, do_function_inlining=False)) if FLAGS.experimental_optimize_placement: - rewrite_options = tf.RewriterConfig(optimize_tensor_layout=True) + rewrite_options = rewriter_config_pb2.RewriterConfig() rewrite_options.optimizers.append("pruning") rewrite_options.optimizers.append("constfold") + rewrite_options.optimizers.append("arithmetic") rewrite_options.optimizers.append("layout") - graph_options = tf.GraphOptions( - rewrite_options=rewrite_options, infer_shapes=True) + graph_options = tf.GraphOptions(rewrite_options=rewrite_options) gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=FLAGS.worker_gpu_memory_fraction) From e0a2f86fdd3b36c381fff66ee1399e70fb689299 Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Tue, 14 Nov 2017 15:57:07 -0800 Subject: [PATCH 0594/4095] Preserve static shape info where possible with shape_list and enable constant ones_matrix_band_part PiperOrigin-RevId: 175748936 --- tensor2tensor/layers/common_attention.py | 778 ++++++++++++----------- tensor2tensor/layers/common_layers.py | 157 +++-- tensor2tensor/layers/modalities.py | 35 +- tensor2tensor/models/aligned.py | 75 ++- tensor2tensor/models/attention_lm.py | 20 +- tensor2tensor/models/lstm.py | 4 +- tensor2tensor/models/transformer.py | 38 +- tensor2tensor/models/xception.py | 1 + tensor2tensor/tpu/tpu_trainer_lib.py | 29 +- tensor2tensor/utils/beam_search.py | 18 +- tensor2tensor/utils/beam_search_test.py | 2 +- tensor2tensor/utils/diet.py | 2 +- tensor2tensor/utils/metrics.py | 5 +- tensor2tensor/utils/optimize.py | 4 +- tensor2tensor/utils/t2t_model.py | 105 +-- 15 files changed, 669 insertions(+), 604 deletions(-) diff --git a/tensor2tensor/layers/common_attention.py b/tensor2tensor/layers/common_attention.py index 17cb23a1d..5aafe6348 100644 --- a/tensor2tensor/layers/common_attention.py +++ b/tensor2tensor/layers/common_attention.py @@ -21,6 +21,7 @@ import collections import functools import math +import operator # Dependency imports import numpy as np @@ -36,11 +37,9 @@ from tensorflow.python.framework import function - # Struct conatining the sequences ids and order on a batch (are send to the # expert to allow them to compute the bias mask) -BatchInfo = collections.namedtuple( - "BatchInfo", "coordinates, order") +BatchInfo = collections.namedtuple("BatchInfo", "coordinates, order") _expert_count = 0 @@ -107,6 +106,7 @@ def decorator(x, *args, **kwargs): y, extra_loss = y return y, extra_loss + return decorator total_key_depth = hparams.attention_key_channels or hparams.hidden_size @@ -117,8 +117,8 @@ def decorator(x, *args, **kwargs): # Use filter size if moe_hidden_sizes was not given if not moe_hidden_sizes: moe_hidden_sizes = [hparams.filter_size] - expert_fn = expert_utils.ffn_expert_fn( - hparams.hidden_size, moe_hidden_sizes, hparams.hidden_size) + expert_fn = expert_utils.ffn_expert_fn(hparams.hidden_size, moe_hidden_sizes, + hparams.hidden_size) # Attention layers: @@ -133,8 +133,7 @@ def decorator(x, *args, **kwargs): output_depth=hparams.hidden_size, num_heads=hparams.num_heads, dropout_rate=hparams.attention_dropout, - ) - ) + )) # === Local attention layer === # Reuse same parameters as multihead_attention @@ -268,8 +267,10 @@ def add_standard_attention_hparams(hparams): @expert_utils.add_name_scope() -def get_timing_signal_1d( - length, channels, min_timescale=1.0, max_timescale=1.0e4): +def get_timing_signal_1d(length, + channels, + min_timescale=1.0, + max_timescale=1.0e4): """Gets a bunch of sinusoids of different frequencies. Each channel of the input Tensor is incremented by a sinusoid of a different @@ -342,14 +343,16 @@ def add_timing_signal_1d(x, min_timescale=1.0, max_timescale=1.0e4): Returns: a Tensor the same shape as x. """ - length = tf.shape(x)[1] - channels = tf.shape(x)[2] + length = common_layers.shape_list(x)[1] + channels = common_layers.shape_list(x)[2] signal = get_timing_signal_1d(length, channels, min_timescale, max_timescale) return x + signal @expert_utils.add_name_scope() -def add_timing_signal_1d_given_position(x, position, min_timescale=1.0, +def add_timing_signal_1d_given_position(x, + position, + min_timescale=1.0, max_timescale=1.0e4): """Adds sinusoids of diff frequencies to a Tensor, with timing position given. @@ -362,15 +365,16 @@ def add_timing_signal_1d_given_position(x, position, min_timescale=1.0, Returns: a Tensor the same shape as x. """ - channels = tf.shape(x)[2] + channels = common_layers.shape_list(x)[2] num_timescales = channels // 2 log_timescale_increment = ( math.log(float(max_timescale) / float(min_timescale)) / (tf.to_float(num_timescales) - 1)) inv_timescales = min_timescale * tf.exp( tf.to_float(tf.range(num_timescales)) * -log_timescale_increment) - scaled_time = (tf.expand_dims(tf.to_float(position), 2) * - tf.expand_dims(tf.expand_dims(inv_timescales, 0), 0)) + scaled_time = ( + tf.expand_dims(tf.to_float(position), 2) * tf.expand_dims( + tf.expand_dims(inv_timescales, 0), 0)) signal = tf.concat([tf.sin(scaled_time), tf.cos(scaled_time)], axis=2) signal = tf.pad(signal, [[0, 0], [0, 0], [0, tf.mod(channels, 2)]]) return x + signal @@ -408,9 +412,8 @@ def add_timing_signal_nd(x, min_timescale=1.0, max_timescale=1.0e4): Returns: a Tensor the same shape as x. """ - static_shape = x.get_shape().as_list() - num_dims = len(static_shape) - 2 - channels = tf.shape(x)[-1] + num_dims = len(x.get_shape().as_list()) - 2 + channels = common_layers.shape_list(x)[-1] num_timescales = channels // (num_dims * 2) log_timescale_increment = ( math.log(float(max_timescale) / float(min_timescale)) / @@ -418,7 +421,7 @@ def add_timing_signal_nd(x, min_timescale=1.0, max_timescale=1.0e4): inv_timescales = min_timescale * tf.exp( tf.to_float(tf.range(num_timescales)) * -log_timescale_increment) for dim in xrange(num_dims): - length = tf.shape(x)[dim + 1] + length = common_layers.shape_list(x)[dim + 1] position = tf.to_float(tf.range(length)) scaled_time = tf.expand_dims(position, 1) * tf.expand_dims( inv_timescales, 0) @@ -450,10 +453,9 @@ def add_positional_embedding_nd(x, max_length, name): Returns: a Tensor the same shape as x. """ - static_shape = x.get_shape().as_list() - dynamic_shape = tf.shape(x) - num_dims = len(static_shape) - 2 - depth = static_shape[-1] + x_shape = common_layers.shape_list(x) + num_dims = len(x_shape) - 2 + depth = x_shape[-1] base_shape = [1] * (num_dims + 1) + [depth] base_start = [0] * (num_dims + 2) base_size = [-1] + [1] * num_dims + [depth] @@ -462,12 +464,13 @@ def add_positional_embedding_nd(x, max_length, name): start = base_start[:] size = base_size[:] shape[i + 1] = max_length - size[i + 1] = dynamic_shape[i + 1] - var = (tf.get_variable( - name + "_%d" % i, - shape, - initializer=tf.random_normal_initializer(0, depth**-0.5)) * - (depth**0.5)) + size[i + 1] = x_shape[i + 1] + var = ( + tf.get_variable( + name + "_%d" % i, + shape, + initializer=tf.random_normal_initializer(0, depth**-0.5)) * + (depth**0.5)) x += tf.slice(var, start, size) return x @@ -508,10 +511,10 @@ def __init__(self, depth, nb_hyperplanes, nb_replicat=1, trainable=False): trainable=self.trainable, ) # Projection vector from the bit space to similarity score space - self.t_group = tf.constant([ - self._idx_to_bits(i) - for i in range(self.nb_buckets) - ], dtype=tf.float32, name="group") + self.t_group = tf.constant( + [self._idx_to_bits(i) for i in range(self.nb_buckets)], + dtype=tf.float32, + name="group") def _idx_to_bits(self, i): """Convert an group index to its bit representation.""" @@ -577,19 +580,22 @@ def attention_bias_local(length, max_backward, max_forward): This does not actually save any computation. Args: - length: an integer Scalar. - max_backward: an int64 Scalar - maximum distance backward to attend. - negative values indicate unlimited. - max_forward: an int64 Scalar - maximum distance forward to attend. - negative values indicate unlimited. + length: int + max_backward: int, maximum distance backward to attend. Negative values + indicate unlimited. + max_forward: int, maximum distance forward to attend. Negative values + indicate unlimited. Returns: a `Tensor` with shape [1, 1, length, length]. """ - band = tf.matrix_band_part( - tf.ones([length, length]), max_backward, max_forward) - ret = -1e9 * (1.0 - band) - return tf.reshape(ret, [1, 1, length, length]) + band = common_layers.ones_matrix_band_part( + length, + length, + max_backward, + max_forward, + out_shape=[1, 1, length, length]) + return -1e9 * (1.0 - band) @expert_utils.add_name_scope() @@ -665,8 +671,8 @@ def attention_bias_prepend_inputs_full_attention(padding): target_pos = tf.cumsum(in_target, axis=1) # A position with a lesser target_pos cannot see a position with greater # target_pos. - illegal_connections = tf.greater(tf.expand_dims(target_pos, 1), - tf.expand_dims(target_pos, 2)) + illegal_connections = tf.greater( + tf.expand_dims(target_pos, 1), tf.expand_dims(target_pos, 2)) bias = tf.to_float(illegal_connections) * -1e9 bias = tf.expand_dims(bias, 1) return bias @@ -730,7 +736,6 @@ def to_float(bc): condition_fn=lambda bias: tf.minimum(1.0, tf.abs(bias)), ) - # Mask similar to upper triangular mask, but allow dispatching attention_bias_future = functools.partial( attention_bias_batch, @@ -754,12 +759,11 @@ def split_last_dimension(x, n): Returns: a Tensor with shape [..., n, m/n] """ - old_shape = x.get_shape().dims - last = old_shape[-1] - new_shape = old_shape[:-1] + [n] + [last // n if last else None] - ret = tf.reshape(x, tf.concat([tf.shape(x)[:-1], [n, -1]], 0)) - ret.set_shape(new_shape) - return ret + x_shape = common_layers.shape_list(x) + m = x_shape[-1] + if isinstance(m, int) and isinstance(n, int): + assert m % n == 0 + return tf.reshape(x, x_shape[:-1] + [n, m // n]) @expert_utils.add_name_scope() @@ -772,12 +776,9 @@ def combine_last_two_dimensions(x): Returns: a Tensor with shape [..., ab] """ - old_shape = x.get_shape().dims - a, b = old_shape[-2:] - new_shape = old_shape[:-2] + [a * b if a and b else None] - ret = tf.reshape(x, tf.concat([tf.shape(x)[:-2], [-1]], 0)) - ret.set_shape(new_shape) - return ret + x_shape = common_layers.shape_list(x) + a, b = x_shape[-2:] + return tf.reshape(x, x_shape[:-2] + [a * b]) @expert_utils.add_name_scope() @@ -790,7 +791,7 @@ def combine_first_two_dimensions(x): Returns: a Tensor with shape [ab, ...] """ - ret = tf.reshape(x, tf.concat([[-1], tf.shape(x)[2:]], 0)) + ret = tf.reshape(x, tf.concat([[-1], common_layers.shape_list(x)[2:]], 0)) old_shape = x.get_shape().dims a, b = old_shape[:2] new_shape = [a * b if a and b else None] + old_shape[2:] @@ -867,7 +868,7 @@ def attention_image_summary(attn, image_shapes=None): (query_rows, query_cols, query_channels, memory_rows, memory_cols, memory_channels). """ - num_heads = tf.shape(attn)[1] + num_heads = common_layers.shape_list(attn)[1] # [batch, query_length, memory_length, num_heads] image = tf.transpose(attn, [0, 2, 3, 1]) image = tf.pow(image, 0.2) # for high-dynamic-range @@ -886,13 +887,13 @@ def attention_image_summary(attn, image_shapes=None): assert len(image_shapes) == 6 q_rows, q_cols, q_channnels, m_rows, m_cols, m_channels = list( image_shapes) - image = tf.reshape(image, [ - -1, q_rows, q_cols, q_channnels, m_rows, m_cols, m_channels, 3 - ]) + image = tf.reshape( + image, + [-1, q_rows, q_cols, q_channnels, m_rows, m_cols, m_channels, 3]) image = tf.transpose(image, [0, 1, 4, 3, 2, 5, 6, 7]) - image = tf.reshape(image, [ - -1, q_rows * m_rows * q_channnels, q_cols * m_cols * m_channels, 3 - ]) + image = tf.reshape( + image, + [-1, q_rows * m_rows * q_channnels, q_cols * m_cols * m_channels, 3]) tf.summary.image("attention", image, max_outputs=1) @@ -951,9 +952,9 @@ def grouped_attention_multihead(query_antecedent, ValueError: if the key depth or value depth are not divisible by the number of attention heads. """ - batch = tf.shape(query_antecedent)[0] - length_q = tf.shape(query_antecedent)[1] - length_kv = tf.shape(memory_antecedent)[1] + batch = common_layers.shape_list(query_antecedent)[0] + length_q = common_layers.shape_list(query_antecedent)[1] + length_kv = common_layers.shape_list(memory_antecedent)[1] if total_key_depth % num_heads != 0: raise ValueError("Key depth (%d) must be divisible by the number of " @@ -970,8 +971,10 @@ def grouped_attention_multihead(query_antecedent, q = common_layers.conv1d( query_antecedent, total_key_depth, 1, name="q_transform") kv = common_layers.conv1d( - memory_antecedent, total_key_depth + total_value_depth, - 1, name="kv_transform") + memory_antecedent, + total_key_depth + total_value_depth, + 1, + name="kv_transform") q = split_heads(q, num_heads) kv = split_heads(kv, num_heads) # Make predictions about q_total and m_total. @@ -980,13 +983,18 @@ def grouped_attention_multihead(query_antecedent, # to keep these losses from back-propagating to the rest of the model. # We add biases that help balance the usage of the experts. q_pred = common_layers.conv1d( - tf.stop_gradient(query_antecedent), num_heads * num_groups, 1, + tf.stop_gradient(query_antecedent), + num_heads * num_groups, + 1, name="q_pred") q_pred = split_heads(q_pred, num_heads) q_bias = tf.get_variable("q_bias", [1, num_heads, 1, num_groups]) q_pred_biased = q_pred + q_bias - m_pred = common_layers.conv1d(tf.stop_gradient( - memory_antecedent), num_heads * num_groups, 1, name="m_pred") + m_pred = common_layers.conv1d( + tf.stop_gradient(memory_antecedent), + num_heads * num_groups, + 1, + name="m_pred") m_pred = split_heads(m_pred, num_heads) m_bias = tf.get_variable("m_bias", [1, num_heads, 1, num_groups]) m_pred_biased = m_pred + m_bias @@ -1003,18 +1011,23 @@ def grouped_attention_multihead(query_antecedent, q_requests = tf.one_hot(q_group, num_groups, axis=-1) m_requests = tf.to_float(tf.greater(m_pred_biased, 0.0)) # include first memory position in all groups, to avoid division by zero. - m_requests = tf.maximum( - m_requests, tf.reshape(tf.one_hot([0], length_kv), [1, length_kv, 1])) + m_requests = tf.maximum(m_requests, + tf.reshape( + tf.one_hot([0], length_kv), [1, length_kv, 1])) q_group_size = tf.reduce_sum(q_requests, 1) m_group_size = tf.reduce_sum(m_requests, 1) q_group_target_size = tf.to_float(length_q) / tf.to_float(num_groups) m_group_target_size = ( - tf.to_float(length_kv) * memory_target_density - / tf.to_float(num_groups)) - capacity_q = tf.minimum(length_q, tf.to_int32( - q_group_target_size * multiplicative_overhead + additive_overhead)) - capacity_m = tf.minimum(length_kv, tf.to_int32( - m_group_target_size * multiplicative_overhead + additive_overhead)) + tf.to_float(length_kv) * memory_target_density / + tf.to_float(num_groups)) + capacity_q = tf.minimum( + length_q, + tf.to_int32( + q_group_target_size * multiplicative_overhead + additive_overhead)) + capacity_m = tf.minimum( + length_kv, + tf.to_int32( + m_group_target_size * multiplicative_overhead + additive_overhead)) q_dispatcher = expert_utils.TruncatingDispatcher(q_requests, capacity_q) m_dispatcher = expert_utils.TruncatingDispatcher(m_requests, capacity_m) q_gates = q_dispatcher.gates() @@ -1122,8 +1135,8 @@ def grouped_attention_multihead(query_antecedent, k_trunc = kv[:trunc_heads, :, :depth_qk] logits_trunc = tf.matmul(q_trunc, k_trunc, transpose_b=True) if mask_right: - band = tf.matrix_band_part( - tf.ones([trunc_length_q, length_kv]), -1, 0) + band = common_layers.ones_matrix_band_part(trunc_length_q, length_kv, + -1, 0) trunc_bias = tf.expand_dims((1.0 - band) * -1e9, 0) logits_trunc += trunc_bias att_trunc = tf.nn.softmax(logits_trunc) @@ -1137,7 +1150,8 @@ def grouped_attention_multihead(query_antecedent, # show one group for each head. att_per_group = tf.expand_dims(weights[:trunc_heads, 0, :, :], -1) tf.summary.image( - "att_per_group_%d", tf.pow(att_per_group, 0.2), + "att_per_group_%d", + tf.pow(att_per_group, 0.2), max_outputs=trunc_heads) return o, extra_loss @@ -1280,7 +1294,7 @@ def dot_product_attention_relative(q, # Use separate embeddings suitable for keys and values. heads = q.get_shape().as_list()[1] depth = q.get_shape().as_list()[3] - length = tf.shape(q)[2] + length = common_layers.shape_list(q)[2] relations_keys = _generate_relative_positions_embeddings( heads, length, depth, max_relative_position, "relative_positions_keys") relations_values = _generate_relative_positions_embeddings( @@ -1298,8 +1312,7 @@ def dot_product_attention_relative(q, return _relative_attention_inner(weights, v, relations_values, False) -def masked_local_attention_1d( - q, k, v, block_length=128, name=None): +def masked_local_attention_1d(q, k, v, block_length=128, name=None): """Attention to the source position and a neighborhood to the left of it. The sequence is divided into blocks of length block_size. @@ -1320,12 +1333,12 @@ def masked_local_attention_1d( Returns: a Tensor of shape [batch, heads, length, depth_v] """ - with tf.variable_scope(name, default_name="local_attention_1d", - values=[q, k, v]): + with tf.variable_scope( + name, default_name="local_attention_1d", values=[q, k, v]): v_shape = v.get_shape() - batch = common_layers.shape_dim(q, 0) - heads = common_layers.shape_dim(q, 1) - length = common_layers.shape_dim(q, 2) + batch = common_layers.shape_list(q)[0] + heads = common_layers.shape_list(q)[1] + length = common_layers.shape_list(q)[2] if isinstance(block_length, tf.Tensor): const = tf.contrib.util.constant_value(block_length) if const is not None: @@ -1335,10 +1348,10 @@ def masked_local_attention_1d( if isinstance(length, int) and isinstance(block_length, int): block_length = length if length < block_length * 2 else block_length else: - block_length = tf.where(tf.less(length, block_length * 2), - length, block_length) - depth_k = tf.shape(k)[3] - depth_v = tf.shape(v)[3] + block_length = tf.where( + tf.less(length, block_length * 2), length, block_length) + depth_k = common_layers.shape_list(k)[3] + depth_v = common_layers.shape_list(v)[3] original_length = length padding_size = tf.mod(-length, block_length) length += padding_size @@ -1353,7 +1366,10 @@ def masked_local_attention_1d( first_k = tf.slice(k, [0, 0, 0, 0], [-1, -1, block_length, -1]) first_v = tf.slice(v, [0, 0, 0, 0], [-1, -1, block_length, -1]) first_output = dot_product_attention( - first_q, first_k, first_v, attention_bias_lower_triangle(block_length), + first_q, + first_k, + first_v, + attention_bias_lower_triangle(block_length), name="fist_block") # compute attention for all subsequent query blocks. @@ -1363,23 +1379,23 @@ def masked_local_attention_1d( def local(x): """Create a local version of the keys or values.""" - prev_block = tf.slice( - x, [0, 0, 0, 0, 0], [-1, -1, num_blocks - 1, -1, -1]) - cur_block = tf.slice( - x, [0, 0, 1, 0, 0], [-1, -1, -1, -1, -1]) + prev_block = tf.slice(x, [0, 0, 0, 0, 0], + [-1, -1, num_blocks - 1, -1, -1]) + cur_block = tf.slice(x, [0, 0, 1, 0, 0], [-1, -1, -1, -1, -1]) return tf.concat([prev_block, cur_block], 3) + local_k = local(k) local_v = local(v) tail_q = tf.slice(q, [0, 0, 1, 0, 0], [-1, -1, -1, -1, -1]) - local_length = tf.shape(local_k)[3] + local_length = common_layers.shape_list(local_k)[3] # [batch, heads, num_blocks - 1, block_length, local_length] attention = tf.matmul(tail_q, local_k, transpose_b=True) # make sure source_pos <= target_pos - good_part = tf.matrix_band_part( - tf.ones([block_length, local_length]), -1, tf.to_int64(block_length)) + good_part = common_layers.ones_matrix_band_part(block_length, local_length, + -1, block_length) mask = (1.0 - good_part) * -1e9 attention += tf.reshape(mask, [1, 1, 1, block_length, local_length]) attention = tf.nn.softmax(attention) @@ -1394,12 +1410,7 @@ def local(x): return output -def local_attention_1d(q, - k, - v, - block_length=128, - filter_width=100, - name=None): +def local_attention_1d(q, k, v, block_length=128, filter_width=100, name=None): """strided block local self-attention. Args: @@ -1416,14 +1427,14 @@ def local_attention_1d(q, with tf.variable_scope( name, default_name="local_self_attention_1d", values=[q, k, v]): v_shape = v.get_shape() - depth_v = tf.shape(v)[3] - batch_size = tf.shape(q)[0] - num_heads = tf.shape(q)[1] - original_length = tf.shape(q)[2] + depth_v = common_layers.shape_list(v)[3] + batch_size = common_layers.shape_list(q)[0] + num_heads = common_layers.shape_list(q)[1] + original_length = common_layers.shape_list(q)[2] # making sure q is a multiple of d def pad_to_multiple(x, pad_length): - x_length = tf.shape(x)[2] + x_length = common_layers.shape_list(x)[2] return tf.pad(x, [[0, 0], [0, 0], [0, -x_length % pad_length], [0, 0]]) def pad_l_and_r(x, pad_length): @@ -1434,7 +1445,7 @@ def pad_l_and_r(x, pad_length): v = pad_to_multiple(v, block_length) # Setting up q blocks - new_q_shape = tf.shape(q) + new_q_shape = common_layers.shape_list(q) # Setting up q blocks q = tf.reshape(q, [ new_q_shape[0], new_q_shape[1], new_q_shape[2] // block_length, @@ -1445,7 +1456,7 @@ def pad_l_and_r(x, pad_length): k = pad_l_and_r(k, filter_width) v = pad_l_and_r(v, filter_width) - length = tf.shape(k)[2] + length = common_layers.shape_list(k)[2] full_filter_width = block_length + 2 * filter_width # getting gather indices indices = tf.range(0, length, delta=1, name="index_range") @@ -1475,7 +1486,12 @@ def pad_l_and_r(x, pad_length): v_new = tf.transpose(v_new, [2, 3, 0, 1, 4]) output = dot_product_attention( - q, k_new, v_new, attention_bias, dropout_rate=0., name="local_1d", + q, + k_new, + v_new, + attention_bias, + dropout_rate=0., + name="local_1d", make_image_summary=False) output = tf.reshape(output, [batch_size, num_heads, -1, depth_v]) # Remove the padding if introduced @@ -1519,14 +1535,15 @@ def dilated_self_attention_1d(q, with tf.variable_scope( name, default_name="dilated_self_attention_1d", values=[q, k, v]): v_list_shape = v.get_shape().as_list() - v_shape = tf.shape(v) + v_shape = common_layers.shape_list(v) depth_v = v_shape[3] batch_size = v_shape[0] num_heads = v_shape[1] - original_length = tf.shape(q)[2] + original_length = common_layers.shape_list(q)[2] + # making sure q is a multiple of query block size def pad_to_multiple(x, pad_length): - x_length = tf.shape(x)[2] + x_length = common_layers.shape_list(x)[2] return tf.pad(x, [[0, 0], [0, 0], [0, -x_length % pad_length], [0, 0]]) def pad_l_and_r(x, pad_length): @@ -1540,7 +1557,7 @@ def pad_l_and_r(x, pad_length): v.set_shape(v_list_shape) k.set_shape(v_list_shape) # Setting up q blocks - new_q_shape = tf.shape(q) + new_q_shape = common_layers.shape_list(q) # Setting up q blocks q = reshape_by_blocks(q, new_q_shape, query_block_size) self_k_part = reshape_by_blocks(k, new_q_shape, query_block_size) @@ -1569,23 +1586,29 @@ def pad_l_and_r(x, pad_length): # [length, batch, heads, dim] k_t = tf.transpose(k, [2, 0, 1, 3]) v_t = tf.transpose(v, [2, 0, 1, 3]) - left_k = gather_dilated_memory_blocks(k_t[:-k_v_padding, :, :, :], - num_memory_blocks, gap_size, - query_block_size, memory_block_size, - gather_indices) - left_v = gather_dilated_memory_blocks(v_t[:-k_v_padding, :, :, :], - num_memory_blocks, gap_size, - query_block_size, memory_block_size, - gather_indices) - - right_k = gather_dilated_memory_blocks(k_t[k_v_padding:, :, :, :], - num_memory_blocks, gap_size, - query_block_size, memory_block_size, - gather_indices, direction="right") - right_v = gather_dilated_memory_blocks(v_t[k_v_padding:, :, :, :], - num_memory_blocks, gap_size, - query_block_size, memory_block_size, - gather_indices, direction="right") + left_k = gather_dilated_memory_blocks( + k_t[:-k_v_padding, :, :, :], num_memory_blocks, gap_size, + query_block_size, memory_block_size, gather_indices) + left_v = gather_dilated_memory_blocks( + v_t[:-k_v_padding, :, :, :], num_memory_blocks, gap_size, + query_block_size, memory_block_size, gather_indices) + + right_k = gather_dilated_memory_blocks( + k_t[k_v_padding:, :, :, :], + num_memory_blocks, + gap_size, + query_block_size, + memory_block_size, + gather_indices, + direction="right") + right_v = gather_dilated_memory_blocks( + v_t[k_v_padding:, :, :, :], + num_memory_blocks, + gap_size, + query_block_size, + memory_block_size, + gather_indices, + direction="right") k_windows = tf.concat([left_k, self_k_part, right_k], axis=3) v_windows = tf.concat([left_v, self_v_part, right_v], axis=3) @@ -1593,8 +1616,13 @@ def pad_l_and_r(x, pad_length): embedding_to_padding(k_windows) * -1e9, axis=-2) output = dot_product_attention( - q, k_windows, v_windows, attention_bias, dropout_rate=0., - name="dilated_1d", make_image_summary=False) + q, + k_windows, + v_windows, + attention_bias, + dropout_rate=0., + name="dilated_1d", + make_image_summary=False) output = tf.reshape(output, [batch_size, num_heads, -1, depth_v]) # Remove the padding if introduced output = tf.slice(output, [0, 0, 0, 0], [-1, -1, original_length, -1]) @@ -1602,9 +1630,13 @@ def pad_l_and_r(x, pad_length): return output -def gather_dilated_memory_blocks(x, num_memory_blocks, gap_size, - query_block_size, memory_block_size, - gather_indices, direction="left"): +def gather_dilated_memory_blocks(x, + num_memory_blocks, + gap_size, + query_block_size, + memory_block_size, + gather_indices, + direction="left"): """Gathers blocks with gaps in between. Args: @@ -1623,17 +1655,14 @@ def gather_dilated_memory_blocks(x, num_memory_blocks, gap_size, gathered_blocks = [] # gathering memory blocks for block_id in range(num_memory_blocks): - block_end_index = -(query_block_size + - gap_size * (block_id+1) + memory_block_size * - block_id) - 1 - block_start_index = ( - (memory_block_size + gap_size) * - (num_memory_blocks - (block_id + 1)) - ) + block_end_index = -(query_block_size + gap_size * + (block_id + 1) + memory_block_size * block_id) - 1 + block_start_index = ((memory_block_size + gap_size) * (num_memory_blocks - + (block_id + 1))) if direction != "left": - [block_end_index, block_start_index] = [ - -block_start_index - 1, -block_end_index + 1 - ] + [block_end_index, + block_start_index] = [-block_start_index - 1, -block_end_index + 1] + def gather_dilated_1d_blocks(x, gather_indices): x_new = tf.gather(x, gather_indices) # [batch, heads, blocks, block_length, dim] @@ -1672,14 +1701,15 @@ def masked_dilated_self_attention_1d(q, with tf.variable_scope( name, default_name="masked_dilated_self_attention_1d", values=[q, k, v]): v_list_shape = v.get_shape().as_list() - v_shape = tf.shape(v) + v_shape = common_layers.shape_list(v) depth_v = v_shape[3] batch_size = v_shape[0] num_heads = v_shape[1] - original_length = tf.shape(q)[2] + original_length = common_layers.shape_list(q)[2] + # making sure q is a multiple of query block size def pad_to_multiple(x, pad_length): - x_length = tf.shape(x)[2] + x_length = common_layers.shape_list(x)[2] return tf.pad(x, [[0, 0], [0, 0], [0, -x_length % pad_length], [0, 0]]) def pad_l(x, left_pad_length): @@ -1692,7 +1722,7 @@ def pad_l(x, left_pad_length): v.set_shape(v_list_shape) k.set_shape(v_list_shape) # Setting up q blocks - new_q_shape = tf.shape(q) + new_q_shape = common_layers.shape_list(q) # Setting up q blocks q = reshape_by_blocks(q, new_q_shape, query_block_size) @@ -1722,35 +1752,35 @@ def pad_l(x, left_pad_length): k_t = tf.transpose(k, [2, 0, 1, 3]) v_t = tf.transpose(v, [2, 0, 1, 3]) - k_unmasked_windows = gather_dilated_memory_blocks(k_t, num_memory_blocks, - gap_size, - query_block_size, - memory_block_size, - gather_indices) - v_unmasked_windows = gather_dilated_memory_blocks(v_t, num_memory_blocks, - gap_size, - query_block_size, - memory_block_size, - gather_indices) + k_unmasked_windows = gather_dilated_memory_blocks( + k_t, num_memory_blocks, gap_size, query_block_size, memory_block_size, + gather_indices) + v_unmasked_windows = gather_dilated_memory_blocks( + v_t, num_memory_blocks, gap_size, query_block_size, memory_block_size, + gather_indices) # combine memory windows - block_q_shape = tf.shape(q) - masked_attention_bias = tf.tile(tf.expand_dims( - attention_bias_lower_triangle(query_block_size), axis=0), - [block_q_shape[0], block_q_shape[1], - block_q_shape[2], 1, 1]) + block_q_shape = common_layers.shape_list(q) + masked_attention_bias = tf.tile( + tf.expand_dims(attention_bias_lower_triangle(query_block_size), axis=0), + [block_q_shape[0], block_q_shape[1], block_q_shape[2], 1, 1]) padding_attention_bias = tf.expand_dims( embedding_to_padding(k_unmasked_windows) * -1e9, axis=-2) padding_attention_bias = tf.tile(padding_attention_bias, [1, 1, 1, query_block_size, 1]) - attention_bias = tf.concat([masked_attention_bias, padding_attention_bias], - axis=-1) + attention_bias = tf.concat( + [masked_attention_bias, padding_attention_bias], axis=-1) # combine memory windows k_windows = tf.concat([self_k_part, k_unmasked_windows], 3) v_windows = tf.concat([self_v_part, v_unmasked_windows], 3) output = dot_product_attention( - q, k_windows, v_windows, attention_bias, dropout_rate=0., - name="dilated_1d", make_image_summary=False) + q, + k_windows, + v_windows, + attention_bias, + dropout_rate=0., + name="dilated_1d", + make_image_summary=False) output = tf.reshape(output, [batch_size, num_heads, -1, depth_v]) # Remove the padding if introduced output = tf.slice(output, [0, 0, 0, 0], [-1, -1, original_length, -1]) @@ -1781,12 +1811,12 @@ def local_attention_2d(q, with tf.variable_scope( name, default_name="local_self_attention_2d", values=[q, k, v]): q_shape = q.get_shape().as_list() - v_shape = tf.shape(v) + v_shape = common_layers.shape_list(v) q = pad_to_multiple_2d(q, query_shape) k = pad_to_multiple_2d(k, query_shape) v = pad_to_multiple_2d(v, query_shape) - padded_q_shape = tf.shape(q) + padded_q_shape = common_layers.shape_list(q) # Setting up k and v values paddings = [[0, 0], [0, 0], [memory_flange[0], memory_flange[1]], [memory_flange[0], memory_flange[1]], [0, 0]] @@ -1798,8 +1828,8 @@ def local_attention_2d(q, q_new = gather_blocks_2d(q, q_indices) # Setting up k and v blocks - memory_shape = (query_shape[0]+2*memory_flange[0], - query_shape[1]+2*memory_flange[1]) + memory_shape = (query_shape[0] + 2 * memory_flange[0], + query_shape[1] + 2 * memory_flange[1]) k_and_v_indices = gather_indices_2d(k, memory_shape, query_shape) k_new = gather_blocks_2d(k, k_and_v_indices) v_new = gather_blocks_2d(v, k_and_v_indices) @@ -1807,9 +1837,14 @@ def local_attention_2d(q, attention_bias = tf.expand_dims( tf.to_float(embedding_to_padding(k_new)) * -1e9, axis=-2) - output = dot_product_attention(q_new, k_new, v_new, attention_bias, - dropout_rate=0., name="local_2d", - make_image_summary=False) + output = dot_product_attention( + q_new, + k_new, + v_new, + attention_bias, + dropout_rate=0., + name="local_2d", + make_image_summary=False) # putting the representations back in the right place output = scatter_blocks_2d(output, q_indices, padded_q_shape) # Remove the padding if introduced @@ -1823,28 +1858,26 @@ def pad_to_multiple_2d(x, block_shape): """Making sure x is a multiple of shape. x is [batch, heads, h, w, depth].""" old_shape = x.get_shape().dims last = old_shape[-1] - height_padding = -tf.shape(x)[2] % block_shape[0] - width_padding = -tf.shape(x)[3] % block_shape[1] - paddings = [[0, 0], [0, 0], [0, height_padding], - [0, width_padding], [0, 0]] + height_padding = -common_layers.shape_list(x)[2] % block_shape[0] + width_padding = -common_layers.shape_list(x)[3] % block_shape[1] + paddings = [[0, 0], [0, 0], [0, height_padding], [0, width_padding], [0, 0]] padded_x = tf.pad(x, paddings) padded_shape = padded_x.get_shape().as_list() - padded_shape = padded_shape[:-1]+[last] + padded_shape = padded_shape[:-1] + [last] padded_x.set_shape(padded_shape) return padded_x def reshape_range(tensor, i, j, shape): """Reshapes a tensor between dimensions i and j.""" - target_shape = tf.concat( - [tf.shape(tensor)[:i], shape, tf.shape(tensor)[j:]], - axis=0) + t_shape = common_layers.shape_list(tensor) + target_shape = t_shape[:i] + shape + t_shape[j:] return tf.reshape(tensor, target_shape) def gather_blocks_2d(x, indices): """Gathers flattened blocks from x.""" - x_shape = tf.shape(x) + x_shape = common_layers.shape_list(x) x = reshape_range(x, 2, 4, [tf.reduce_prod(x_shape[2:4])]) # [length, batch, heads, dim] x_t = tf.transpose(x, [2, 0, 1, 3]) @@ -1855,11 +1888,11 @@ def gather_blocks_2d(x, indices): def scatter_blocks_2d(x, indices, shape): """scatters blocks from x into shape with indices.""" - x_shape = tf.shape(x) + x_shape = common_layers.shape_list(x) # [length, batch, heads, dim] - x_t = tf.transpose(tf.reshape(x, [x_shape[0], x_shape[1], -1, x_shape[-1]]), - [2, 0, 1, 3]) - x_t_shape = tf.shape(x_t) + x_t = tf.transpose( + tf.reshape(x, [x_shape[0], x_shape[1], -1, x_shape[-1]]), [2, 0, 1, 3]) + x_t_shape = common_layers.shape_list(x_t) indices = tf.reshape(indices, [-1, 1]) scattered_x = tf.scatter_nd(indices, x_t, x_t_shape) scattered_x = tf.transpose(scattered_x, [1, 2, 0, 3]) @@ -1869,18 +1902,23 @@ def scatter_blocks_2d(x, indices, shape): def gather_indices_2d(x, block_shape, block_stride): """Getting gather indices.""" # making an identity matrix kernel - kernel = tf.eye(block_shape[0]*block_shape[1]) + kernel = tf.eye(block_shape[0] * block_shape[1]) kernel = reshape_range(kernel, 0, 1, [block_shape[0], block_shape[1], 1]) # making indices [1, h, w, 1] to appy convs - indices = tf.range(0, tf.shape(x)[2] * tf.shape(x)[3], delta=1) - indices = tf.reshape(indices, [1, tf.shape(x)[2], tf.shape(x)[3], 1]) + x_shape = common_layers.shape_list(x) + indices = tf.range(x_shape[2] * x_shape[3]) + indices = tf.reshape(indices, [1, x_shape[2], x_shape[3], 1]) indices = tf.nn.conv2d( tf.cast(indices, tf.float32), kernel, strides=[1, block_stride[0], block_stride[1], 1], padding="VALID") # making indices [num_blocks, dim] to gather - num_blocks = tf.reduce_prod(tf.shape(indices)[:3]) + dims = common_layers.shape_list(indices)[:3] + if all([isinstance(dim, int) for dim in dims]): + num_blocks = functools.reduce(operator.mul, dims, 1) + else: + num_blocks = tf.reduce_prod(dims) indices = tf.reshape(indices, [num_blocks, -1]) return tf.cast(indices, tf.int32) @@ -1900,31 +1938,34 @@ def make_2d_block_raster_mask(query_shape, memory_flange): A tensor of shape query_size, memory_size """ # mask inside the query block - query_triangle = tf.matrix_band_part( - tf.ones([np.prod(query_shape), np.prod(query_shape)]), -1, 0) + query_triangle = common_layers.ones_matrix_band_part( + np.prod(query_shape), np.prod(query_shape), -1, 0) split_query_masks = tf.split(query_triangle, query_shape[0], axis=1) # adding mask for left and right mask_pieces = [ tf.concat( - [tf.ones([np.prod(query_shape), memory_flange[1]]), - split_query_masks[i], - tf.zeros([np.prod(query_shape), memory_flange[1]]) - ], axis=1) for i in range(query_shape[0])] + [ + tf.ones([np.prod(query_shape), memory_flange[1]]), + split_query_masks[i], + tf.zeros([np.prod(query_shape), memory_flange[1]]) + ], + axis=1) for i in range(query_shape[0]) + ] # adding mask for top final_mask = tf.concat( - [tf.ones( - [np.prod(query_shape), - (query_shape[1]+2*memory_flange[1])*memory_flange[0]]), - tf.concat(mask_pieces, axis=1) - ], axis=1) + [ + tf.ones([ + np.prod(query_shape), + (query_shape[1] + 2 * memory_flange[1]) * memory_flange[0] + ]), + tf.concat(mask_pieces, axis=1) + ], + axis=1) # 0. is visible location, 1.0 is masked. return 1. - final_mask -def get_memory_region(x, - query_block_shape, - memory_flange, - q_indices): +def get_memory_region(x, query_block_shape, memory_flange, q_indices): """Get the memory regions that surround a 2d query. The memory regions will be the left and top right. @@ -1953,9 +1994,8 @@ def get_memory_region(x, # top right of the query block # if no left region if memory_flange[1] > 0: - left_x_region = x_memory_padded[:, :, memory_flange[0]:, - :-(query_block_shape[1]+memory_flange[1]), - :] + left_x_region = x_memory_padded[:, :, memory_flange[ + 0]:, :-(query_block_shape[1] + memory_flange[1]), :] left_memory_shape = (query_block_shape[0], memory_flange[1]) left_indices = gather_indices_2d(left_x_region, left_memory_shape, query_block_shape) @@ -1965,7 +2005,7 @@ def get_memory_region(x, top_x_region = x_memory_padded[:, :, :-query_block_shape[0], :, :] top_memory_shape = (memory_flange[0], - query_block_shape[1]+2*memory_flange[1]) + query_block_shape[1] + 2 * memory_flange[1]) top_indices = gather_indices_2d(top_x_region, top_memory_shape, query_block_shape) @@ -1991,13 +2031,14 @@ def get_shifted_center_blocks(x, indices): length. """ center_x = gather_blocks_2d(x, indices) + # Shift right along the length dimension def shift_right_2d_blocks(x): """Shift the second to last dimension of x right by one.""" shifted_targets = ( - tf.pad(x, [[0, 0], [0, 0], [0, 0], [1, 0], [0, 0]])[:, :, :, :-1, :] - ) + tf.pad(x, [[0, 0], [0, 0], [0, 0], [1, 0], [0, 0]])[:, :, :, :-1, :]) return shifted_targets + x_shifted = shift_right_2d_blocks(center_x) return x_shifted @@ -2016,11 +2057,11 @@ def right_shift_blockwise(x, query_shape, name=None): with tf.variable_scope( name, default_name="right_shift_blockwise", values=[x]): x_list_shape = x.get_shape().as_list() - x_shape = tf.shape(x) + x_shape = common_layers.shape_list(x) # Add a dummy dimension for heads x = tf.expand_dims(x, axis=1) x = pad_to_multiple_2d(x, query_shape) - padded_x_shape = tf.shape(x) + padded_x_shape = common_layers.shape_list(x) # Setting up q blocks x_indices = gather_indices_2d(x, query_shape, query_shape) x_new = get_shifted_center_blocks(x, x_indices) @@ -2030,8 +2071,7 @@ def right_shift_blockwise(x, query_shape, name=None): # Removing the dummy head dimension output = tf.squeeze(output, axis=1) # Remove the padding if introduced - output = tf.slice(output, [0, 0, 0, 0], - [-1, x_shape[1], x_shape[2], -1]) + output = tf.slice(output, [0, 0, 0, 0], [-1, x_shape[1], x_shape[2], -1]) output.set_shape(x_list_shape) return output @@ -2068,10 +2108,10 @@ def masked_local_attention_2d(q, with tf.variable_scope( name, default_name="local_masked_self_attention_2d", values=[q, k, v]): q_shape = q.get_shape().as_list() - v_shape = tf.shape(v) + v_shape = common_layers.shape_list(v) q = pad_to_multiple_2d(q, query_shape) - padded_q_shape = tf.shape(q) + padded_q_shape = common_layers.shape_list(q) # Setting up q blocks q_indices = gather_indices_2d(q, query_shape, query_shape) q_new = gather_blocks_2d(q, q_indices) @@ -2091,29 +2131,31 @@ def masked_local_attention_2d(q, padding_mask = None if k_flange is not None: padding_mask = tf.expand_dims( - embedding_to_padding(k_flange)*-1e9, axis=-2) + embedding_to_padding(k_flange) * -1e9, axis=-2) padding_mask = tf.tile(padding_mask, [1, 1, 1, query_elements, 1]) center_attention_bias = attention_bias_lower_triangle( np.prod(query_elements)) - center_attention_bias = tf.reshape(center_attention_bias, - [1, 1, 1, query_elements, query_elements] - ) - v_center_shape = tf.shape(v_center) - center_attention_bias = tf.tile(center_attention_bias, - [v_center_shape[0], - v_center_shape[1], - v_center_shape[2], - 1, 1]) + center_attention_bias = tf.reshape( + center_attention_bias, [1, 1, 1, query_elements, query_elements]) + v_center_shape = common_layers.shape_list(v_center) + center_attention_bias = tf.tile( + center_attention_bias, + [v_center_shape[0], v_center_shape[1], v_center_shape[2], 1, 1]) if padding_mask is not None: # Combining the mask for padding and visible region attention_bias = tf.concat([padding_mask, center_attention_bias], axis=4) else: attention_bias = center_attention_bias - output = dot_product_attention(q_new, k_new, v_new, attention_bias, - dropout_rate=0., name="masked_local_2d", - make_image_summary=False) + output = dot_product_attention( + q_new, + k_new, + v_new, + attention_bias, + dropout_rate=0., + name="masked_local_2d", + make_image_summary=False) # putting the representations back in the right place output = scatter_blocks_2d(output, q_indices, padded_q_shape) # Remove the padding if introduced @@ -2123,9 +2165,14 @@ def masked_local_attention_2d(q, return output -def compute_qkv(query_antecedent, memory_antecedent, total_key_depth, - total_value_depth, q_filter_width=1, kv_filter_width=1, - q_padding="VALID", kv_padding="VALID"): +def compute_qkv(query_antecedent, + memory_antecedent, + total_key_depth, + total_value_depth, + q_filter_width=1, + kv_filter_width=1, + q_padding="VALID", + kv_padding="VALID"): """Computes query, key and value. Args: @@ -2150,8 +2197,7 @@ def compute_qkv(query_antecedent, memory_antecedent, total_key_depth, 1, name="qkv_transform") q, k, v = tf.split( - combined, [total_key_depth, total_key_depth, total_value_depth], - axis=2) + combined, [total_key_depth, total_key_depth, total_value_depth], axis=2) return q, k, v if memory_antecedent is None: @@ -2168,13 +2214,15 @@ def compute_qkv(query_antecedent, memory_antecedent, total_key_depth, kv_filter_width, padding=kv_padding, name="kv_transform") - k, v = tf.split(kv_combined, [total_key_depth, total_value_depth], - axis=2) + k, v = tf.split(kv_combined, [total_key_depth, total_value_depth], axis=2) return q, k, v # encoder-decoder attention q = common_layers.conv1d( - query_antecedent, total_key_depth, q_filter_width, padding=q_padding, + query_antecedent, + total_key_depth, + q_filter_width, + padding=q_padding, name="q_transform") combined = common_layers.conv1d( memory_antecedent, @@ -2355,16 +2403,12 @@ def multihead_attention(query_antecedent, x = local_attention_1d( q, k, v, block_length=block_length, filter_width=block_width) elif attention_type == "masked_dilated_1d": - x = masked_dilated_self_attention_1d(q, k, v, block_length, - block_width, - gap_size, - num_memory_blocks) + x = masked_dilated_self_attention_1d(q, k, v, block_length, block_width, + gap_size, num_memory_blocks) else: assert attention_type == "unmasked_dilated_1d" - x = dilated_self_attention_1d(q, k, v, block_length, - block_width, - gap_size, - num_memory_blocks) + x = dilated_self_attention_1d(q, k, v, block_length, block_width, + gap_size, num_memory_blocks) x = combine_heads(x) x = common_layers.conv1d(x, output_depth, 1, name="output_transform") if additional_returned_value is not None: @@ -2426,14 +2470,10 @@ def multihead_attention_2d(query_antecedent, q, k, v, query_shape=query_shape, memory_flange=memory_flange) else: assert attention_type == "masked_local_attention_2d" - x = masked_local_attention_2d(q, k, v, query_shape=query_shape, - memory_flange=memory_flange) + x = masked_local_attention_2d( + q, k, v, query_shape=query_shape, memory_flange=memory_flange) x = combine_heads_2d(x) - x = tf.layers.conv2d( - x, - output_depth, - (1, 1), - name="output_transform") + x = tf.layers.conv2d(x, output_depth, (1, 1), name="output_transform") return x @@ -2469,7 +2509,7 @@ def ffn_self_attention_layer(x, with tf.variable_scope( name, default_name="feedforward_self_attention", values=[x]): - x_shape = tf.shape(x) + x_shape = common_layers.shape_list(x) part_depth = filter_depth // num_parts if not share_kv: combined = common_layers.conv1d( @@ -2543,8 +2583,8 @@ def parameter_attention(x, var_shape_v, initializer=tf.random_normal_initializer(0, output_depth**-0.5)) * ( output_depth**0.5) - batch_size = tf.shape(x)[0] - length = tf.shape(x)[1] + batch_size = common_layers.shape_list(x)[0] + length = common_layers.shape_list(x)[1] q = common_layers.conv1d(x, total_key_depth, 1, name="q_transform") if dropout_rate: # This is a cheaper form of attention dropout where we use to use @@ -2625,7 +2665,7 @@ def self_attention_expert( """ depth = x.get_shape().as_list()[-1] - length = tf.shape(batch_coordinate)[0] + length = common_layers.shape_list(batch_coordinate)[0] # Print a warning message if one of the expert isn't used (useful at # inference where summaries aren't used and the gating function don't add @@ -2663,7 +2703,7 @@ def mask_and_call_attention(x): """Function applied once for each sequence of the batch.""" # Mask to prevent sequences of attenting to the future - length = tf.shape(x)[1] # x has shape [1, length,...] + length = common_layers.shape_list(x)[1] # x has shape [1, length,...] bias_past = tf.reshape( attention_bias_lower_triangle(length), [length, length]) # bias has shape [length, length] @@ -2701,15 +2741,13 @@ def mask_and_call_attention(x): return out -def local_expert_attention( - x, - k, - loss_coef, - attention_num_experts, - train=True, - batch_coordinate=None, - **kwargs -): +def local_expert_attention(x, + k, + loss_coef, + attention_num_experts, + train=True, + batch_coordinate=None, + **kwargs): """Attention using a mixture of experts. Positions sent to the same expert can attend to each other. @@ -2736,11 +2774,9 @@ def local_expert_attention( """ if batch_coordinate is None: batch_coordinate = tf.expand_dims( - coordinate_tensor(tf.shape(x)[:-1], axis=0), axis=-1) + coordinate_tensor(common_layers.shape_list(x)[:-1], axis=0), axis=-1) with tf.variable_scope("local_expert_attention"): - additional_dispatch_params = { - "batch_coordinate": batch_coordinate - } + additional_dispatch_params = {"batch_coordinate": batch_coordinate} return expert_utils.local_moe( x, train, @@ -2772,8 +2808,8 @@ def expert_dot_product(q, k, v, info_q, info_k): tf.Tensor: dot product attention output ([length_expert_q, depth_v]) """ - length_q = tf.shape(q)[0] - length_k = tf.shape(k)[0] + length_q = common_layers.shape_list(q)[0] + length_k = common_layers.shape_list(k)[0] depth_v = v.get_shape().as_list()[-1] # Create the mask @@ -2791,7 +2827,9 @@ def is_zero(): def is_not_zero(): return dot_product_attention( - q, k, v, + q, + k, + v, bias=bias, # No image summary to avoid "Retval[0] does not have value" (because # inside a condition) @@ -2867,11 +2905,13 @@ def eventually_dispatch(dispatcher, value): eventually_dispatch(k_dispatcher, bi.coordinates), eventually_dispatch(k_dispatcher, bi.order), ): - list_v_out.append(expert_dot_product( - q, k, v, - info_q=BatchInfo(coordinates=qbc, order=qbo), - info_k=BatchInfo(coordinates=kbc, order=kbo) - )) + list_v_out.append( + expert_dot_product( + q, + k, + v, + info_q=BatchInfo(coordinates=qbc, order=qbo), + info_k=BatchInfo(coordinates=kbc, order=kbo))) # Combine all buckets together to restore the original length return q_dispatcher.combine(list_v_out) @@ -2900,12 +2940,8 @@ def map_fn_switch(fn, elems, use_map_fn=True, **kwargs): if use_map_fn: return tf.map_fn(fn, elems, **kwargs) else: - elems_unpacked = ( - tf.unstack(e) for e in elems - ) - out_unpacked = [ - fn(e) for e in zip(*elems_unpacked) - ] + elems_unpacked = (tf.unstack(e) for e in elems) + out_unpacked = [fn(e) for e in zip(*elems_unpacked)] out = tf.stack(out_unpacked) return out @@ -2941,8 +2977,7 @@ def sparse_dot_product_attention(q, k, v, bi, use_map_fn, experts_params): tf.Tensor: Approximation of Softmax(Q.K) * V, of shape [batch, heads, length_q, depth_v] """ - batch_size, nb_heads, _, depth = q.get_shape().as_list() - batch_size = batch_size or tf.shape(q)[0] + batch_size, nb_heads, _, depth = common_layers.shape_list(q) @expert_utils.add_name_scope() def flatten_first_dims(x): @@ -2981,9 +3016,7 @@ def flatten_batch(x): for single_q, single_k, _ in zip(list_q, list_k, list_v): # Each head get its own dispatcher lhs_gating = LshGating( - depth=single_q.get_shape().as_list()[-1], - **experts_params - ) + depth=single_q.get_shape().as_list()[-1], **experts_params) list_gates_q.append(lhs_gating.get_gates(single_q)) list_gates_k.append(lhs_gating.get_gates(single_k)) @@ -3031,11 +3064,11 @@ def dot_product_batched_head(q, k, v, gates_q, gates_k, mask_right=False): Returns: tf.Tensor: [length_q, depth_v] """ - nb_buckets = tf.shape(gates_q)[-1] + nb_buckets = common_layers.shape_list(gates_q)[-1] @expert_utils.add_name_scope() def get_dispatcher(gates): - length = tf.shape(gates)[1] + length = common_layers.shape_list(gates)[1] # Count the number of ones per batch (and keep the max value) nb_elems_to_dispatch = tf.reduce_sum(gates, axis=[1, 2]) nb_elems_to_dispatch = tf.reduce_max(nb_elems_to_dispatch) @@ -3054,6 +3087,7 @@ def add_summary_capacity(x, prefix): tf.summary.histogram(prefix + "capacity_distribution", x, family="lsh") for i in range(3): # Show the first 3 buckets tf.summary.scalar("{}_{}".format(prefix, i), x[i], family="lsh") + add_summary_capacity(gates_q, "q") add_summary_capacity(gates_k, "k") @@ -3085,7 +3119,9 @@ def add_summary_capacity(x, prefix): @expert_utils.add_name_scope() def sparse_dot_product_attention_truncated( - q, k, v, + q, + k, + v, bi, # Unused experts_params, use_map_fn=False, # Unused @@ -3121,18 +3157,12 @@ def sparse_dot_product_attention_truncated( [batch, heads, length_q, depth_v] """ # Currently depth is the same for for q and v - batch_size, nb_heads, _, depth = q.get_shape().as_list() - batch_size = batch_size or tf.shape(q)[0] + batch_size, nb_heads, _, depth = common_layers.shape_list(q) total_loss = 0.0 # Each head get its own dispatcher - list_lsh = [ - LshGating( - depth=depth, - **experts_params - ) for _ in range(nb_heads) - ] + list_lsh = [LshGating(depth=depth, **experts_params) for _ in range(nb_heads)] @expert_utils.add_name_scope() def get_gates_head(x, add_first=False): @@ -3145,11 +3175,11 @@ def get_gates_head(x, add_first=False): Returns: tf.Tensor: gates of shape [batch, heads, length, num_buckets] """ - length = tf.shape(x)[2] + length = common_layers.shape_list(x)[2] # Invert heads/batch x = tf.transpose(x, perm=[1, 0, 2, 3]) - x = tf.reshape(x, [nb_heads, batch_size*length, depth]) + x = tf.reshape(x, [nb_heads, batch_size * length, depth]) list_x = tf.unstack(x) # list[tf.Tensor(shape=[batch * length, depth])] @@ -3173,10 +3203,8 @@ def get_gates_head(x, add_first=False): # Dispatch the first element to every gates to avoid empty buckets if add_first: - gates = tf.maximum( - gates, - tf.reshape(tf.one_hot([0], length), [1, 1, length, 1]) - ) + gates = tf.maximum(gates, + tf.reshape(tf.one_hot([0], length), [1, 1, length, 1])) return gates @@ -3185,7 +3213,8 @@ def get_gates_head(x, add_first=False): # [batch, heads, length, depth] => [batch*heads, length, depth] q, k, v, gates_q, gates_k = [ - combine_first_two_dimensions(t) for t in (q, k, v, gates_q, gates_k)] + combine_first_two_dimensions(t) for t in (q, k, v, gates_q, gates_k) + ] v_out = dot_product_batched_head(q, k, v, gates_q, gates_k, mask_right) @@ -3270,6 +3299,7 @@ def local_reduction_attention(x, block_length, multihead_params): Returns: tf.Tensor: Compressed tensor of shape [batch, length // factor, depth] """ + @expert_utils.add_name_scope() def dot_product_self_local_attention_flattened(q, k, v): """Strided block local self-attention. @@ -3289,42 +3319,43 @@ def dot_product_self_local_attention_flattened(q, k, v): # Extract the blocks def pad_and_reshape(x): """Split the length dim into [num_block, block_length].""" - length_x = tf.shape(x)[2] + length_x = common_layers.shape_list(x)[2] # Add some padding, but won't matter as the last block will never be # attended by the query (after compression) - x = tf.pad(x, [ - [0, 0], - [0, 0], - [0, -length_x % block_length], - [0, 0] - ]) - x = tf.reshape(x, [ - tf.shape(x)[0], # Batch - num_head, # Head - tf.shape(x)[2] // block_length, # Num blocks - block_length, # Block length - depth, # Depth - ]) + x = tf.pad(x, [[0, 0], [0, 0], [0, -length_x % block_length], [0, 0]]) + x = tf.reshape( + x, + [ + common_layers.shape_list(x)[0], # Batch + num_head, # Head + common_layers.shape_list(x)[2] // block_length, # Num blocks + block_length, # Block length + depth, # Depth + ]) return x q, k, v = [pad_and_reshape(t) for t in (q, k, v)] # Perform attention on the flattened dot product logits = tf.matmul(q, k, transpose_b=True) - logits = tf.reshape(logits, [ - tf.shape(logits)[0], # Batch - num_head, # Head - tf.shape(logits)[2], # Num blocks - block_length**2, # Flatten last dimension - ]) + logits = tf.reshape( + logits, + [ + common_layers.shape_list(logits)[0], # Batch + num_head, # Head + common_layers.shape_list(logits)[2], # Num blocks + block_length**2, # Flatten last dimension + ]) weights = tf.nn.softmax(logits) - weights = tf.reshape(weights, [ - tf.shape(weights)[0], # Batch - num_head, # Head - tf.shape(weights)[2], # Num blocks - block_length, - block_length, # Restore the block length dimension - ]) + weights = tf.reshape( + weights, + [ + common_layers.shape_list(weights)[0], # Batch + num_head, # Head + common_layers.shape_list(weights)[2], # Num blocks + block_length, + block_length, # Restore the block length dimension + ]) weights = tf.reduce_sum(weights, axis=3, keep_dims=True) # Compress block v_out = tf.matmul(weights, v) # [1, block_length] @ [block_length, depth] v_out = tf.squeeze(v_out, axis=3) @@ -3336,8 +3367,7 @@ def pad_and_reshape(x): bias=None, output_depth=x.get_shape().as_list()[-1], attention_type=dot_product_self_local_attention_flattened, - **multihead_params - ) + **multihead_params) @expert_utils.add_var_scope() @@ -3403,21 +3433,22 @@ def multihead_self_attention_reduced( # Construct the bias @expert_utils.add_name_scope() def construct_bias_vectors(t, axis): - length = tf.to_float(tf.shape(t)[1]) + length = tf.to_float(common_layers.shape_list(t)[1]) length_coordinates = tf.range(length, dtype=tf.float32) length_coordinates = tf.expand_dims(length_coordinates, axis=axis) # [1, length_k] or [length_q, 1] return length_coordinates - bias = tf.to_float(tf.greater( - # Because we add the first elem to the memory block and it can be attended - # by anyone,we don't need to add +1 anymore to prevent self attention - # Use * factor to make sure the last tokens of a block cannot attend the - # block - construct_bias_vectors(memory_x, 0) * factor, - # +epsilon to avoid float equality - construct_bias_vectors(x, 1) + 1e-3, - )) * -1e9 + bias = tf.to_float( + tf.greater( + # Because we add the first elem to the memory block and it can be + # attended by anyone,we don't need to add +1 anymore to prevent self + # attention Use * factor to make sure the last tokens of a block + # cannot attend the block + construct_bias_vectors(memory_x, 0) * factor, + # +epsilon to avoid float equality + construct_bias_vectors(x, 1) + 1e-3, + )) * -1e9 bias = tf.expand_dims(bias, axis=0) bias = tf.expand_dims(bias, axis=0) # [1, 1, length_k, length_q] @@ -3426,8 +3457,7 @@ def construct_bias_vectors(t, axis): memory_antecedent=memory_x, bias=bias, output_depth=depth, - **multihead_params - ) + **multihead_params) def scaled_dot_product_attention_simple(q, k, v, bias, name=None): @@ -3445,7 +3475,7 @@ def scaled_dot_product_attention_simple(q, k, v, bias, name=None): """ with tf.variable_scope( name, default_name="scaled_dot_product_attention_simple"): - scalar = tf.rsqrt(tf.to_float(tf.shape(q)[2])) + scalar = tf.rsqrt(tf.to_float(common_layers.shape_list(q)[2])) logits = tf.matmul(q * scalar, k, transpose_b=True) if bias is not None: logits += bias @@ -3495,8 +3525,8 @@ def multihead_self_attention_memory_efficient(x, def forward_internal(x, wqkv, wo, attention_bias, norm_scale, norm_bias): """Forward function.""" - n = common_layers.layer_norm_compute_python( - x, epsilon, norm_scale, norm_bias) + n = common_layers.layer_norm_compute_python(x, epsilon, norm_scale, + norm_bias) wqkv_split = tf.unstack(wqkv, num=num_heads) wo_split = tf.unstack(wo, num=num_heads) y = 0 @@ -3508,18 +3538,19 @@ def forward_internal(x, wqkv, wo, attention_bias, norm_scale, norm_bias): y += tf.nn.conv1d(o, wo_split[h], 1, "SAME") return y - key = ("multihead_self_attention_memory_efficient %s %s" % - (num_heads, epsilon)) + key = ("multihead_self_attention_memory_efficient %s %s" % (num_heads, + epsilon)) if not forget: forward_fn = forward_internal elif key in _function_cache: forward_fn = _function_cache[key] else: + @function.Defun(compiled=True) def grad_fn(x, wqkv, wo, attention_bias, norm_scale, norm_bias, dy): with tf.control_dependencies([dy]): - n = common_layers.layer_norm_compute_python( - x, epsilon, norm_scale, norm_bias) + n = common_layers.layer_norm_compute_python(x, epsilon, norm_scale, + norm_bias) wqkv_split = tf.unstack(wqkv, num=num_heads) wo_split = tf.unstack(wo, num=num_heads) deps = [] @@ -3545,14 +3576,15 @@ def grad_fn(x, wqkv, wo, attention_bias, norm_scale, norm_bias, dy): with tf.control_dependencies(deps): dx, dnorm_scale, dnorm_bias = tf.gradients( ys=[n], xs=[x, norm_scale, norm_bias], grad_ys=[dn]) - return (dx, dwqkv, dwo, tf.zeros_like(attention_bias), - dnorm_scale, dnorm_bias) + return (dx, dwqkv, dwo, tf.zeros_like(attention_bias), dnorm_scale, + dnorm_bias) - @function.Defun(grad_func=grad_fn, compiled=True, - separate_compiled_gradients=True) + @function.Defun( + grad_func=grad_fn, compiled=True, separate_compiled_gradients=True) def forward_fn(x, wqkv, wo, attention_bias, norm_scale, norm_bias): - return forward_internal( - x, wqkv, wo, attention_bias, norm_scale, norm_bias) + return forward_internal(x, wqkv, wo, attention_bias, norm_scale, + norm_bias) + _function_cache[key] = forward_fn if bias is not None: diff --git a/tensor2tensor/layers/common_layers.py b/tensor2tensor/layers/common_layers.py index aea7202d7..6f6d10552 100644 --- a/tensor2tensor/layers/common_layers.py +++ b/tensor2tensor/layers/common_layers.py @@ -142,7 +142,8 @@ def standardize_images(x): x_mean = tf.reduce_mean(x, axis=[1, 2, 3], keep_dims=True) x_variance = tf.reduce_mean( tf.square(x - x_mean), axis=[1, 2, 3], keep_dims=True) - num_pixels = tf.to_float(tf.shape(x)[1] * tf.shape(x)[2] * 3) + x_shape = shape_list(x) + num_pixels = tf.to_float(x_shape[1] * x_shape[2] * 3) x = (x - x_mean) / tf.maximum(tf.sqrt(x_variance), tf.rsqrt(num_pixels)) # TODO(lukaszkaiser): remove hack below, needed for greedy decoding for now. if x.shape and len(x.shape) == 4 and x.shape[3] == 1: @@ -157,7 +158,7 @@ def convert_rgb_to_real(x): x = tf.to_float(x) # Use the formula (value/128) - 1 to convert each channel value into a # real number in the range -1 to 1. - x = (x /128) - 1 + x = (x / 128) - 1 return x @@ -191,11 +192,8 @@ def cifar_image_augmentation(images): def flatten4d3d(x): """Flatten a 4d-tensor into a 3d-tensor by joining width and height.""" - xshape = tf.shape(x) + xshape = shape_list(x) result = tf.reshape(x, [xshape[0], xshape[1] * xshape[2], xshape[3]]) - # Preserve static shapes when available. - xshape_static = x.get_shape() - result.set_shape([xshape_static[0], None, xshape_static[3]]) return result @@ -211,12 +209,12 @@ def embedding(x, vocab_size, dense_size, name=None, reuse=None, multiplier=1.0): emb_x = tf.gather(embedding_var, x) if multiplier != 1.0: emb_x *= multiplier - shape, static_shape = tf.shape(emb_x), emb_x.shape.as_list() - if not static_shape or len(static_shape) < 5: + static_shape = emb_x.shape.as_list() + if len(static_shape) < 5: return emb_x - # If we had extra channel dimensions, assume it's 1, i.e. shape[3] == 1. assert len(static_shape) == 5 - return tf.reshape(emb_x, [shape[0], shape[1], shape[2], static_shape[4]]) + # If we had an extra channel dimension, assume it's 1, i.e. shape[3] == 1. + return tf.squeeze(emb_x, 3) def shift_right(x, pad_value=None): @@ -298,7 +296,7 @@ def deconv_stride2_multistep(x, name, default_name="deconv_stride2_multistep", values=[x], reuse=reuse): def deconv1d(cur, i): - cur_shape = tf.shape(cur) + cur_shape = shape_list(cur) thicker = conv( cur, output_filters * 2, (1, 1), @@ -322,10 +320,17 @@ def deconv2d(cur, i): if cur.get_shape()[2] == 1: cur = deconv1d(cur, i) else: - cur = tf.cond( - tf.equal(tf.shape(cur)[2], 1), - lambda idx=i: deconv1d(cur, idx), - lambda idx=i: deconv2d(cur, idx)) + cur_dim = shape_list(cur)[2] + if isinstance(cur_dim, int): + if cur_dim == 1: + cur = deconv1d(cur, i) + else: + cur = deconv2d(cur, i) + else: + cur = tf.cond( + tf.equal(cur_dim, 1), + lambda idx=i: deconv1d(cur, idx), + lambda idx=i: deconv2d(cur, idx)) return cur @@ -343,7 +348,7 @@ def conv_internal(conv_fn, inputs, filters, kernel_size, **kwargs): assert kernel_size[0] % 2 == 1 and kernel_size[1] % 2 == 1 height_padding = 2 * (kernel_size[0] // 2) * dilation_rate[0] cond_padding = tf.cond( - tf.equal(tf.shape(inputs)[2], 1), lambda: tf.constant(0), + tf.equal(shape_list(inputs)[2], 1), lambda: tf.constant(0), lambda: tf.constant(2 * (kernel_size[1] // 2) * dilation_rate[1])) width_padding = 0 if static_shape[2] == 1 else cond_padding padding = [[0, 0], [height_padding, 0], [width_padding, 0], [0, 0]] @@ -729,7 +734,7 @@ def pool(inputs, window_size, pooling_type, padding, strides=(1, 1)): else: height_padding = 2 * (window_size[0] // 2) cond_padding = tf.cond( - tf.equal(tf.shape(inputs)[2], 1), lambda: tf.constant(0), + tf.equal(shape_list(inputs)[2], 1), lambda: tf.constant(0), lambda: tf.constant(2 * (window_size[1] // 2))) width_padding = 0 if static_shape[2] == 1 else cond_padding padding_ = [[0, 0], [height_padding, 0], [width_padding, 0], [0, 0]] @@ -808,9 +813,9 @@ def decompress_seqcnn(x, # We assume targets are [batch x block_size * N x block_size * N x C] if # is_2d=True or [batch, block_size * N, 1, C] otherwise, and C is static. # Let's shift targets to depth and embed. - targets_shape, targets_shape_static = tf.shape(targets), targets.get_shape() - channels = int(targets_shape_static[-1]) - hidden_size = int(x.get_shape()[-1]) + targets_shape = shape_list(targets) + channels = targets_shape[-1] + hidden_size = x.get_shape()[-1] if is_2d: depth_targets = tf.space_to_depth(targets, block_size) factor = channels * block_size * block_size @@ -836,17 +841,17 @@ def decompress_seqcnn(x, dilations_and_kernels, padding="LEFT") # Reshape back to embedded targets shape. + targets_emb_shape = shape_list(targets_emb) outputs = tf.reshape(flat_outputs, [ - tf.shape(targets_emb)[0], - tf.shape(targets_emb)[1], - tf.shape(targets_emb)[2], factor * hidden_size + targets_emb_shape[0], targets_emb_shape[1], targets_emb_shape[2], + factor * hidden_size ]) # Move depth back to target space. if is_2d: outputs = tf.depth_to_space(outputs, 2) else: outputs = tf.reshape(outputs, [ - tf.shape(outputs)[0], block_size * tf.shape(outputs)[1], 1, + shape_list(outputs)[0], block_size * shape_list(outputs)[1], 1, hidden_size ]) # Final reshape before prediction to ensure target size. @@ -872,8 +877,8 @@ def simple_attention(target, source, bias=None): a `Tensor` with same shape as `target` """ with tf.name_scope("simple_attention", [target, source]): - target_shape = tf.shape(target) - source_shape = tf.shape(source) + target_shape = shape_list(target) + source_shape = shape_list(source) target = tf.reshape( target, [target_shape[0], target_shape[1] * target_shape[2], target_shape[3]]) @@ -881,7 +886,7 @@ def simple_attention(target, source, bias=None): source, [source_shape[0], source_shape[1] * source_shape[2], source_shape[3]]) attention = tf.matmul(target, source, transpose_b=True) - attention *= tf.rsqrt(tf.to_float(tf.shape(target)[2])) + attention *= tf.rsqrt(tf.to_float(shape_list(target)[2])) if bias is not None: attention += tf.expand_dims(tf.squeeze(bias, axis=[2, 3]), axis=1) attention = tf.nn.softmax(attention) @@ -1074,8 +1079,8 @@ def add_timing_signal(x, min_timescale=1, max_timescale=1e4, num_timescales=16): Returns: a Tensor the same shape as x. """ - length = tf.shape(x)[1] - depth = tf.shape(x)[3] + length = shape_list(x)[1] + depth = shape_list(x)[3] signal = get_timing_signal(length, min_timescale, max_timescale, num_timescales) padded_signal = tf.pad(signal, [[0, 0], [0, depth - 2 * num_timescales]]) @@ -1105,8 +1110,12 @@ def mask_leq(target_length, source_length): Returns: a Tensor with shape [1, target_length, source_length] """ - return tf.expand_dims( - tf.matrix_band_part(tf.ones([target_length, source_length]), -1, 0), 0) + return ones_matrix_band_part( + target_length, + source_length, + -1, + 0, + out_shape=[1, target_length, source_length]) def attention_1d_v0(source, @@ -1141,9 +1150,10 @@ def attention_1d_v0(source, a Tensor of shape [batch, length, output_size] """ with tf.variable_scope(name, default_name="attention", values=[target]): - source_length = tf.shape(source)[1] - target_length = tf.shape(target)[1] - batch = tf.shape(source)[0] + source_shape = shape_list(source) + source_length = source_shape[1] + target_length = shape_list(target)[1] + batch = source_shape[0] def _maybe_transform(t, size, should_transform, name): if should_transform: @@ -1345,8 +1355,8 @@ def pad_to_same_length(x, y, final_length_divisible_by=1, axis=1): if axis not in [1, 2]: raise ValueError("Only axis=1 and axis=2 supported for now.") with tf.name_scope("pad_to_same_length", [x, y]): - x_length = tf.shape(x)[axis] - y_length = tf.shape(y)[axis] + x_length = shape_list(x)[axis] + y_length = shape_list(y)[axis] max_length = tf.maximum(x_length, y_length) if final_length_divisible_by > 1: # Find the nearest larger-or-equal integer divisible by given number. @@ -1472,7 +1482,7 @@ def padded_cross_entropy(logits, weights_fn=weights_fn, reduce_sum=reduce_sum) confidence = 1.0 - label_smoothing - vocab_size = tf.shape(logits)[-1] + vocab_size = shape_list(logits)[-1] with tf.name_scope("padded_cross_entropy", [logits, labels]): pad_logits, pad_labels = pad_with_zeros(logits, labels) xent = smoothing_cross_entropy(pad_logits, pad_labels, vocab_size, @@ -1778,7 +1788,7 @@ def approximate_split(x, num_splits, axis=0): Returns: a list of num_splits Tensors. """ - size = tf.shape(x)[axis] + size = shape_list(x)[axis] size_splits = [tf.div(size + i, num_splits) for i in xrange(num_splits)] return tf.split(x, size_splits, axis=axis) @@ -1809,11 +1819,14 @@ def b(self): return self._b def to_tensor(self): - inner_dim = tf.shape(self.b)[1] - result_dim = tf.shape(self.b)[0] + """Convert to Tensor.""" + a_shape = shape_list(self.a) + b_shape = shape_list(self.b) + inner_dim = b_shape[1] + result_dim = b_shape[0] flat_a = tf.reshape(self.a, [-1, inner_dim]) product = tf.matmul(flat_a, self.b, transpose_b=True) - product_shape = tf.concat([tf.shape(self.a)[:-1], [result_dim]], 0) + product_shape = a_shape[:-1] + [result_dim] product = tf.reshape(product, product_shape) product.set_shape( self.a.get_shape().as_list()[:-1] + [self.b.get_shape()[0]]) @@ -1836,7 +1849,7 @@ def smoothing_cross_entropy_factored_grad(op, dy): labels = op.inputs[2] confidence = op.inputs[3] num_splits = 16 - vocab_size = tf.shape(b)[0] + vocab_size = shape_list(b)[0] labels = approximate_split(labels, num_splits) a = approximate_split(a, num_splits) dy = approximate_split(dy, num_splits) @@ -1880,7 +1893,7 @@ def smoothing_cross_entropy_factored(a, b, labels, confidence): A Tensor with shape [batch] """ num_splits = 16 - vocab_size = tf.shape(b)[0] + vocab_size = shape_list(b)[0] labels = approximate_split(labels, num_splits) a = approximate_split(a, num_splits) parts = [] @@ -1918,10 +1931,10 @@ def padded_cross_entropy_factored(factored_logits, confidence = 1.0 - label_smoothing with tf.name_scope("padded_cross_entropy_factored", [a, b, labels]): labels_flat = tf.reshape(labels, [-1]) - a_flat = tf.reshape(a, [-1, tf.shape(b)[1]]) + a_flat = tf.reshape(a, [-1, shape_list(b)[1]]) xent = smoothing_cross_entropy_factored(a_flat, b, labels_flat, tf.convert_to_tensor(confidence)) - xent = tf.reshape(xent, tf.shape(labels)) + xent = tf.reshape(xent, shape_list(labels)) weights = weights_fn(labels) if not reduce_sum: return xent * weights, weights @@ -2054,7 +2067,7 @@ def forward_internal(x, f1, f2, scale, bias): # split batch-wise to avoid exhausting memory in cast the batch is large # and the hidden layer is large. num_splits = 4 - x_flat = tf.reshape(x, [-1, 1, tf.shape(x)[2]]) + x_flat = tf.reshape(x, [-1, 1, shape_list(x)[2]]) xs = approximate_split(x_flat, num_splits) ys = [] for i in xrange(num_splits): @@ -2065,7 +2078,7 @@ def forward_internal(x, f1, f2, scale, bias): y = tf.nn.conv1d(y, f2, 1, "SAME") ys.append(y) y = tf.concat(ys, 0) - y = tf.reshape(y, tf.shape(x)) + y = tf.reshape(y, shape_list(x)) return y key = ("conv_hidden_relu_memory_efficient %s" % epsilon) @@ -2079,7 +2092,7 @@ def forward_internal(x, f1, f2, scale, bias): def grad_fn(x, f1, f2, scale, bias, dy): with tf.control_dependencies([dy]): num_splits = 4 - x_shape = tf.shape(x) + x_shape = shape_list(x) flat_shape = [-1, 1, x_shape[2]] x = tf.reshape(x, flat_shape) dy = tf.reshape(dy, flat_shape) @@ -2133,12 +2146,24 @@ def forward_fn(x, f1, f2, scale, bias): return y -def shape_dim(x, dim): - """Return shape(x)[dim], statically if possible.""" +def shape_list(x): + """Return list of dims, statically where possible.""" + x = tf.convert_to_tensor(x) + + # If unknown rank, return dynamic shape + if x.get_shape().dims is None: + return tf.shape(x) + static = x.get_shape().as_list() - if dim < len(static) and static[dim] is not None: - return static[dim] - return tf.shape(x)[dim] + shape = tf.shape(x) + + ret = [] + for i in xrange(len(static)): + dim = static[i] + if dim is None: + dim = shape[i] + ret.append(dim) + return ret def sample_with_temperature(logits, temperature): @@ -2156,8 +2181,30 @@ def sample_with_temperature(logits, temperature): else: assert temperature > 0.0 reshaped_logits = ( - tf.reshape(logits, [-1, tf.shape(logits)[-1]])/temperature) + tf.reshape(logits, [-1, shape_list(logits)[-1]]) / temperature) choices = tf.multinomial(reshaped_logits, 1) choices = tf.reshape(choices, - tf.shape(logits)[:logits.get_shape().ndims - 1]) + shape_list(logits)[:logits.get_shape().ndims - 1]) return choices + + +def ones_matrix_band_part(rows, cols, num_lower, num_upper, out_shape=None): + """Matrix band part of ones.""" + if all([isinstance(el, int) for el in [rows, cols, num_lower, num_upper]]): + # Needed info is constant, so we construct in numpy + if num_lower < 0: + num_lower = rows - 1 + if num_upper < 0: + num_upper = cols - 1 + lower_mask = np.tri(rows, cols, num_lower).T + upper_mask = np.tri(rows, cols, num_upper) + band = np.ones((rows, cols)) * lower_mask * upper_mask + if out_shape: + band = band.reshape(out_shape) + band = tf.constant(band, tf.float32) + else: + band = tf.matrix_band_part(tf.ones([rows, cols]), num_lower, num_upper) + if out_shape: + band = tf.reshape(band, out_shape) + + return band diff --git a/tensor2tensor/layers/modalities.py b/tensor2tensor/layers/modalities.py index a2ecd1258..34633c2b6 100644 --- a/tensor2tensor/layers/modalities.py +++ b/tensor2tensor/layers/modalities.py @@ -136,10 +136,7 @@ def top(self, body_output, _): reuse = False with tf.variable_scope(scope_name, reuse=reuse): - rank = len(body_output.get_shape().as_list()) - body_output_shape = [ - common_layers.shape_dim(body_output, i) for i in range(rank) - ] + body_output_shape = common_layers.shape_list(body_output) var = self._get_weights(body_output_shape[-1]) if (self._model_hparams.factored_logits and self._model_hparams.mode == tf.estimator.ModeKeys.TRAIN): @@ -206,7 +203,7 @@ def targets_bottom(self, inputs): if self._model_hparams.multiply_embedding_mode == "sqrt_depth": ret *= self._body_input_depth**0.5 - reshape_shape = [common_layers.shape_dim(inputs, i) for i in range(3)] + reshape_shape = common_layers.shape_list(inputs)[:3] reshape_shape.append(self._body_input_depth * 3) ret = tf.reshape(ret, reshape_shape) return tf.layers.dense(ret, self._body_input_depth) @@ -214,10 +211,9 @@ def targets_bottom(self, inputs): def top(self, body_output, _): with tf.variable_scope("rgb_softmax"): - reshape_shape = [ - common_layers.shape_dim(body_output, i) for i in range(3) - ] - dim = body_output.get_shape().as_list()[-1] // 3 + body_output_shape = common_layers.shape_list(body_output) + reshape_shape = body_output_shape[:3] + dim = body_output_shape[-1] // 3 reshape_shape.extend([self.NUM_CHANNELS, dim]) out = tf.reshape(body_output, reshape_shape) @@ -246,8 +242,8 @@ def bottom_compress(self, inputs, name="bottom"): """ with tf.variable_scope(name): inputs = common_layers.convert_rgb_to_real(inputs) - ishape = tf.shape(inputs) - inputs = tf.reshape(inputs, [-1, ishape[1], ishape[2]*ishape[3], 1]) + ishape = common_layers.shape_list(inputs) + inputs = tf.reshape(inputs, [-1, ishape[1], ishape[2] * ishape[3], 1]) inputs.set_shape([None, None, None, 1]) # We compress RGB intensities for each pixel using a conv. x = common_layers.conv_block( @@ -271,20 +267,19 @@ def top(self, body_output, _): hidden_dim = self._model_hparams.hidden_size img_len = self._model_hparams.img_len channels = self._model_hparams.num_channels - batch = tf.shape(body_output)[0] + batch = common_layers.shape_list(body_output)[0] x = common_layers.conv( body_output, - hidden_dim*channels, (1, 1), + hidden_dim * channels, (1, 1), padding="VALID", activation=tf.nn.relu, name="decompress_conv") - x = tf.reshape(x, [batch, img_len, img_len*channels, hidden_dim]) + x = tf.reshape(x, [batch, img_len, img_len * channels, hidden_dim]) x.set_shape([None, None, None, hidden_dim]) - x = common_layers.conv(x, - self.top_dimensionality, - (1, 1), name="output_conv") - x = tf.reshape(x, [-1, img_len, img_len, - channels, self.top_dimensionality]) + x = common_layers.conv( + x, self.top_dimensionality, (1, 1), name="output_conv") + x = tf.reshape(x, + [-1, img_len, img_len, channels, self.top_dimensionality]) return x @@ -396,7 +391,7 @@ def bottom(self, x): def targets_bottom(self, x): with tf.variable_scope(self.name): return tf.zeros( - [common_layers.shape_dim(x, 0), 1, 1, self._body_input_depth]) + [common_layers.shape_list(x)[0], 1, 1, self._body_input_depth]) def top(self, body_output, _): """Transform inputs from model space to target space. diff --git a/tensor2tensor/models/aligned.py b/tensor2tensor/models/aligned.py index 6dddc8c3d..a6eca3bab 100644 --- a/tensor2tensor/models/aligned.py +++ b/tensor2tensor/models/aligned.py @@ -39,13 +39,11 @@ import tensorflow as tf - ModeKeys = tf.estimator.ModeKeys # pylint: disable=invalid-name def _should_preprocess(layer_type): - return layer_type not in [ - "timing", "pos_emb", "att_memory_efficient"] + return layer_type not in ["timing", "pos_emb", "att_memory_efficient"] def _should_postprocess(layer_type): @@ -61,17 +59,23 @@ def model_fn_body_sharded(self, sharded_features): hparams = self._hparams dp = self._data_parallelism x = dp(tf.squeeze, sharded_features["inputs"], 2) + def preprocess(x): return dp(common_layers.layer_preprocess, x, hparams) + def postprocess(x, y): return dp(common_layers.layer_postprocess, x, y, hparams) + x = dp(tf.nn.dropout, x, 1.0 - hparams.layer_prepostprocess_dropout) extra_loss = 0.0 ffn_hidden_sizes = [int(s) for s in hparams.ffn_hidden_sizes.split(",")] moe_hidden_sizes = [int(s) for s in hparams.moe_hidden_sizes.split(",")] if hparams.mask_right: + def _bias(x): - return common_attention.attention_bias_lower_triangle(tf.shape(x)[1]) + return common_attention.attention_bias_lower_triangle( + common_layers.shape_list(x)[1]) + bias = dp(_bias, x) else: bias = tf.zeros([1, 1, 1, 1]) @@ -96,8 +100,11 @@ def _diet_expert(x): if layer_type == "timing": y = dp(common_attention.add_timing_signal_nd, x) elif layer_type == "pos_emb": - y = dp(common_attention.add_positional_embedding_nd, - x, hparams.max_length, name="pos_emb") + y = dp( + common_attention.add_positional_embedding_nd, + x, + hparams.max_length, + name="pos_emb") elif layer_type == "att": y = dp( common_attention.multihead_attention, @@ -130,11 +137,8 @@ def _diet_expert(x): extra_loss += tf.add_n(loss) / dp.n elif layer_type == "att_memory_efficient": assert hparams.layer_preprocess_sequence == "n" - y = dp( - common_attention.multihead_self_attention_memory_efficient, - x, - bias, - hparams.num_heads) + y = dp(common_attention.multihead_self_attention_memory_efficient, x, + bias, hparams.num_heads) elif layer_type == "att_local": y = dp( common_attention.multihead_attention, @@ -146,9 +150,8 @@ def _diet_expert(x): hparams.hidden_size, hparams.num_heads, hparams.attention_dropout, - attention_type=( - "local_mask_right" if hparams.mask_right - else "local_unmasked"), + attention_type=("local_mask_right" + if hparams.mask_right else "local_unmasked"), block_length=hparams.local_attention_window, block_width=hparams.local_attention_window) elif layer_type == "att_pseudolocal": @@ -156,20 +159,15 @@ def _diet_expert(x): # purpose of testing model quality. def _pseudolocal_bias(x): return common_attention.attention_bias_local( - tf.shape(x)[1], - hparams.local_attention_window, + common_layers.shape_list(x)[1], hparams.local_attention_window, 0 if hparams.mask_right else hparams.local_attention_window) + pseudolocal_bias = dp(_pseudolocal_bias, x) - y = dp( - common_attention.multihead_attention, - x, - None, - pseudolocal_bias, - hparams.attention_key_channels or hparams.hidden_size, - hparams.attention_value_channels or hparams.hidden_size, - hparams.hidden_size, - hparams.num_heads, - hparams.attention_dropout) + y = dp(common_attention.multihead_attention, x, None, + pseudolocal_bias, hparams.attention_key_channels or + hparams.hidden_size, hparams.attention_value_channels or + hparams.hidden_size, hparams.hidden_size, hparams.num_heads, + hparams.attention_dropout) elif layer_type == "att_local_expert": y, loss = dp( common_attention.local_expert_attention, @@ -202,15 +200,14 @@ def _pseudolocal_bias(x): hparams.attention_dropout, # Additional parameters - bi=[common_attention.BatchInfo( - coordinates=batch_coordinate[i], - order=None, # No future mask - ) for i in range(dp.n)], + bi=[ + common_attention.BatchInfo( + coordinates=batch_coordinate[i], + order=None, # No future mask + ) for i in range(dp.n) + ], use_map_fn=False, - experts_params=dict( - nb_hyperplanes=4, - ) - ) + experts_params=dict(nb_hyperplanes=4,)) extra_loss += tf.add_n(loss) / dp.n elif layer_type == "moe": y, loss = expert_utils.distributed_moe( @@ -226,10 +223,8 @@ def _pseudolocal_bias(x): extra_loss += loss elif layer_type == "ffn": y = dp( - expert_utils.ffn_expert_fn( - hparams.hidden_size, - ffn_hidden_sizes, - hparams.hidden_size), + expert_utils.ffn_expert_fn(hparams.hidden_size, ffn_hidden_sizes, + hparams.hidden_size), dp(expert_utils.flatten_all_but_last, x)) y = dp(expert_utils.reshape_like, y, x) elif layer_type == "conv": @@ -257,7 +252,9 @@ def get_batch_coordinate(x): """Return a flat int32 tensor of shape [1, batch_size*length, 1].""" # Compute the batch coordinate before flattening all batches batch_coordinate = tf.expand_dims( - common_attention.coordinate_tensor(tf.shape(x)[:-1], axis=0), axis=-1) + common_attention.coordinate_tensor( + common_layers.shape_list(x)[:-1], axis=0), + axis=-1) return batch_coordinate diff --git a/tensor2tensor/models/attention_lm.py b/tensor2tensor/models/attention_lm.py index f4b4d7e45..6ee1505b9 100644 --- a/tensor2tensor/models/attention_lm.py +++ b/tensor2tensor/models/attention_lm.py @@ -51,10 +51,10 @@ def model_fn_body(self, features): (decoder_input, decoder_self_attention_bias) = attention_lm_prepare_decoder( targets, hparams) - decoder_input = tf.nn.dropout( - decoder_input, 1.0 - hparams.layer_prepostprocess_dropout) - decoder_output = attention_lm_decoder( - decoder_input, decoder_self_attention_bias, hparams) + decoder_input = tf.nn.dropout(decoder_input, + 1.0 - hparams.layer_prepostprocess_dropout) + decoder_output = attention_lm_decoder(decoder_input, + decoder_self_attention_bias, hparams) decoder_output = tf.expand_dims(decoder_output, 2) return decoder_output @@ -78,7 +78,8 @@ def attention_lm_prepare_decoder(targets, hparams): common_attention.embedding_to_padding(targets))) else: decoder_self_attention_bias = ( - common_attention.attention_bias_lower_triangle(tf.shape(targets)[1])) + common_attention.attention_bias_lower_triangle( + common_layers.shape_list(targets)[1])) decoder_input = common_layers.shift_right_3d(targets) if hparams.pos == "timing": decoder_input = common_attention.add_timing_signal_1d(decoder_input) @@ -107,14 +108,11 @@ def attention_lm_decoder(decoder_input, with tf.variable_scope("layer_%d" % layer): with tf.variable_scope("self_attention"): y = common_attention.multihead_attention( - common_layers.layer_preprocess(x, hparams), - None, - decoder_self_attention_bias, + common_layers.layer_preprocess( + x, hparams), None, decoder_self_attention_bias, hparams.attention_key_channels or hparams.hidden_size, hparams.attention_value_channels or hparams.hidden_size, - hparams.hidden_size, - hparams.num_heads, - hparams.attention_dropout) + hparams.hidden_size, hparams.num_heads, hparams.attention_dropout) x = common_layers.layer_postprocess(x, y, hparams) with tf.variable_scope("ffn"): y = common_layers.conv_hidden_relu( diff --git a/tensor2tensor/models/lstm.py b/tensor2tensor/models/lstm.py index 20fe931d0..63a0806e7 100644 --- a/tensor2tensor/models/lstm.py +++ b/tensor2tensor/models/lstm.py @@ -73,9 +73,7 @@ def dropout_lstm_cell(): attention_layer_size=[hparams.attention_layer_size]*hparams.num_heads, output_attention=(hparams.output_attention == 1)) - batch_size = inputs.get_shape()[0].value - if batch_size is None: - batch_size = tf.shape(inputs)[0] + batch_size = common_layers.shape_list(inputs)[0] initial_state = cell.zero_state(batch_size, tf.float32).clone( cell_state=initial_state) diff --git a/tensor2tensor/models/transformer.py b/tensor2tensor/models/transformer.py index a539d02e7..588b6154c 100644 --- a/tensor2tensor/models/transformer.py +++ b/tensor2tensor/models/transformer.py @@ -161,8 +161,7 @@ def _greedy_infer(self, features, decode_length): decoded_ids, _ = self._fast_decode(features, decode_length) return decoded_ids, None, None - def _beam_decode(self, features, decode_length, beam_size, top_beams, - alpha): + def _beam_decode(self, features, decode_length, beam_size, top_beams, alpha): """Beam search decoding. Args: @@ -176,8 +175,8 @@ def _beam_decode(self, features, decode_length, beam_size, top_beams, Returns: samples: an integer `Tensor`. Top samples from the beam search """ - decoded_ids, scores = self._fast_decode( - features, decode_length, beam_size, top_beams, alpha) + decoded_ids, scores = self._fast_decode(features, decode_length, beam_size, + top_beams, alpha) return {"outputs": decoded_ids, "scores": scores} def _fast_decode(self, @@ -211,18 +210,18 @@ def _fast_decode(self, hparams = self._hparams inputs = features["inputs"] - batch_size = tf.shape(inputs)[0] + batch_size = common_layers.shape_list(inputs)[0] target_modality = self._problem_hparams.target_modality if t2t_model.is_class_modality(target_modality): decode_length = 1 else: - decode_length = tf.shape(inputs)[1] + decode_length + decode_length = common_layers.shape_list(inputs)[1] + decode_length # TODO(llion): Clean up this reshaping logic. inputs = tf.expand_dims(inputs, axis=1) if len(inputs.shape) < 5: inputs = tf.expand_dims(inputs, axis=4) - s = tf.shape(inputs) + s = common_layers.shape_list(inputs) inputs = tf.reshape(inputs, [s[0] * s[1], s[2], s[3], s[4]]) # _shard_features called to ensure that the variable names match inputs = self._shard_features({"inputs": inputs})["inputs"] @@ -321,8 +320,14 @@ def symbols_to_logits_fn(ids, i, cache): vocab_size = target_modality.top_dimensionality initial_ids = tf.zeros([batch_size], dtype=tf.int32) decoded_ids, scores = beam_search.beam_search( - symbols_to_logits_fn, initial_ids, beam_size, decode_length, - vocab_size, alpha, states=cache, stop_early=(top_beams == 1)) + symbols_to_logits_fn, + initial_ids, + beam_size, + decode_length, + vocab_size, + alpha, + states=cache, + stop_early=(top_beams == 1)) if top_beams == 1: decoded_ids = decoded_ids[:, 0, 1:] @@ -332,8 +337,8 @@ def symbols_to_logits_fn(ids, i, cache): def inner_loop(i, next_id, decoded_ids, cache): logits, cache = symbols_to_logits_fn(next_id, i, cache) - temperature = (0.0 if hparams.sampling_method == "argmax" - else hparams.sampling_temp) + temperature = (0.0 if hparams.sampling_method == "argmax" else + hparams.sampling_temp) next_id = tf.expand_dims( common_layers.sample_with_temperature(logits, temperature), axis=1) decoded_ids = tf.concat([decoded_ids, next_id], axis=1) @@ -403,7 +408,7 @@ def transformer_prepare_encoder(inputs, target_space, hparams): encoder_decoder_attention_bias = ignore_padding if hparams.proximity_bias: encoder_self_attention_bias += common_attention.attention_bias_proximal( - tf.shape(inputs)[1]) + common_layers.shape_list(inputs)[1]) # Append target_space_id embedding to inputs. emb_target_space = common_layers.embedding( target_space, 32, ishape_static[-1], name="target_space_embedding") @@ -427,10 +432,11 @@ def transformer_prepare_decoder(targets, hparams): decoder_self_attention_bias: a bias tensor for use in encoder self-attention """ decoder_self_attention_bias = ( - common_attention.attention_bias_lower_triangle(tf.shape(targets)[1])) + common_attention.attention_bias_lower_triangle( + common_layers.shape_list(targets)[1])) if hparams.proximity_bias: decoder_self_attention_bias += common_attention.attention_bias_proximal( - tf.shape(targets)[1]) + common_layers.shape_list(targets)[1]) decoder_input = common_layers.shift_right_3d(targets) if hparams.pos == "timing": decoder_input = common_attention.add_timing_signal_1d(decoder_input) @@ -569,9 +575,9 @@ def transformer_ffn_layer(x, hparams, pad_remover=None): if hparams.ffn_layer == "conv_hidden_relu": # In simple convolution mode, use `pad_remover` to speed up processing. if pad_remover: - original_shape = tf.shape(x) + original_shape = common_layers.shape_list(x) # Collapse `x` across examples, and remove padding positions. - x = tf.reshape(x, tf.concat([[-1], tf.shape(x)[2:]], axis=0)) + x = tf.reshape(x, tf.concat([[-1], original_shape[2:]], axis=0)) x = tf.expand_dims(pad_remover.remove(x), axis=0) conv_output = common_layers.conv_hidden_relu( x, diff --git a/tensor2tensor/models/xception.py b/tensor2tensor/models/xception.py index 634e26901..f328c5c06 100644 --- a/tensor2tensor/models/xception.py +++ b/tensor2tensor/models/xception.py @@ -186,4 +186,5 @@ def xception_tiny_tpu(): hparams.learning_rate_decay_scheme = "noam" hparams.num_hidden_layers = 2 hparams.hidden_size = 128 + hparams.optimizer = "TrueAdam" return hparams diff --git a/tensor2tensor/tpu/tpu_trainer_lib.py b/tensor2tensor/tpu/tpu_trainer_lib.py index e6c2863ee..92060f89c 100644 --- a/tensor2tensor/tpu/tpu_trainer_lib.py +++ b/tensor2tensor/tpu/tpu_trainer_lib.py @@ -142,7 +142,6 @@ def get_model_fn(model_name, hp, use_tpu=True): def model_fn(features, labels, mode, params, config): """Model fn.""" - del params del config create_dummy_vars() @@ -177,10 +176,13 @@ def model_fn(features, labels, mode, params, config): with tf.variable_scope(target_modality.name): logits = target_modality.top(outputs, labels) - # If the length dim is unknown fix it to max_length - if use_tpu and logits.get_shape().as_list()[1] is None: + if use_tpu: + # Set known shapes shape = logits.get_shape().as_list() - shape[1] = hparams.max_length + if shape[0] is None: + shape[0] = params["batch_size"] + if shape[1] is None: + shape[1] = hparams.max_length logits.set_shape(shape) # Loss @@ -211,25 +213,11 @@ def model_fn(features, labels, mode, params, config): assert mode == tf.estimator.ModeKeys.TRAIN - # Learning rate lr = hparams.learning_rate * optimize.learning_rate_decay(hparams) + train_op = optimize.optimize(loss, lr, hparams, use_tpu=use_tpu) - # Optimizer - opt = optimize.ConditionalOptimizer(hparams.optimizer, lr, hparams) - if use_tpu: - opt = tf.contrib.tpu.CrossShardOptimizer(opt) - - # Optimize - gradients = opt.compute_gradients(loss, tf.trainable_variables()) - if hparams.clip_grad_norm: - gradients = _clip_gradients_by_norm(gradients, hparams.clip_grad_norm) - train_op = opt.apply_gradients( - gradients, global_step=tf.train.get_or_create_global_step()) - with tf.control_dependencies([train_op]): - train_op = tf.identity(loss) - - _remove_summaries() if use_tpu: + _remove_summaries() # summaries not currently working on TPU return tf.contrib.tpu.TPUEstimatorSpec(mode, loss=loss, train_op=train_op) else: return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op) @@ -237,6 +225,7 @@ def model_fn(features, labels, mode, params, config): return model_fn +# These metrics are implemented with py_funcs and therefore do no work with TPU TPU_METRIC_BLACKLIST = set([ metrics.Metrics.APPROX_BLEU, metrics.Metrics.ROUGE_2_F, diff --git a/tensor2tensor/utils/beam_search.py b/tensor2tensor/utils/beam_search.py index d2ed2f9dd..b42503cbf 100644 --- a/tensor2tensor/utils/beam_search.py +++ b/tensor2tensor/utils/beam_search.py @@ -20,6 +20,9 @@ from __future__ import print_function # Dependency imports + +from tensor2tensor.layers import common_layers + import tensorflow as tf from tensorflow.python.util import nest @@ -30,13 +33,6 @@ INF = 1. * 1e7 -def _get_shape(tensor): - """Returns static shape if available and dynamic shape otherwise.""" - static = tensor.shape.as_list() - dynamic = tf.unstack(tf.shape(tensor)) - return [s[1] if s[0] is None else s[0] for s in zip(static, dynamic)] - - def _merge_beam_dim(tensor): """Reshapes first two dimensions in to single dimension. @@ -46,7 +42,7 @@ def _merge_beam_dim(tensor): Returns: Reshaped tensor of shape [A*B, ...] """ - shape = _get_shape(tensor) + shape = common_layers.shape_list(tensor) shape[0] *= shape[1] # batch -> batch * beam_size shape.pop(1) # Remove beam dim return tf.reshape(tensor, shape) @@ -63,7 +59,7 @@ def _unmerge_beam_dim(tensor, batch_size, beam_size): Returns: Reshaped tensor of shape [batch_size, beam_size, ...] """ - shape = _get_shape(tensor) + shape = common_layers.shape_list(tensor) new_shape = [batch_size] + [beam_size] + shape[1:] return tf.reshape(tensor, new_shape) @@ -223,7 +219,7 @@ def beam_search(symbols_to_logits_fn, (decoded beams [batch_size, beam_size, decode_length] decoding probablities [batch_size, beam_size]) """ - batch_size = tf.shape(initial_ids)[0] + batch_size = common_layers.shape_list(initial_ids)[0] # Assume initial_ids are prob 1.0 initial_log_probs = tf.constant([[0.] + [-float("inf")] * (beam_size - 1)]) @@ -242,7 +238,7 @@ def beam_search(symbols_to_logits_fn, # Finished will keep track of all the sequences that have finished so far # Finished log probs will be negative infinity in the beginning # finished_flags will keep track of booleans - finished_seq = tf.zeros(tf.shape(alive_seq), tf.int32) + finished_seq = tf.zeros(common_layers.shape_list(alive_seq), tf.int32) # Setting the scores of the initial to negative infinity. finished_scores = tf.ones([batch_size, beam_size]) * -INF finished_flags = tf.zeros([batch_size, beam_size], tf.bool) diff --git a/tensor2tensor/utils/beam_search_test.py b/tensor2tensor/utils/beam_search_test.py index 379411e99..ec911f051 100644 --- a/tensor2tensor/utils/beam_search_test.py +++ b/tensor2tensor/utils/beam_search_test.py @@ -47,7 +47,7 @@ def symbols_to_logits(_): self.assertEqual(final_ids.get_shape().as_list(), [None, beam_size, None]) - self.assertEqual(final_probs.get_shape().as_list(), [None, beam_size]) + self.assertEqual(final_probs.get_shape().as_list(), [batch_size, beam_size]) def testComputeTopkScoresAndSeq(self): batch_size = 2 diff --git a/tensor2tensor/utils/diet.py b/tensor2tensor/utils/diet.py index 527ed0e5f..7ecfba693 100644 --- a/tensor2tensor/utils/diet.py +++ b/tensor2tensor/utils/diet.py @@ -243,7 +243,7 @@ def _quantize(x, params, randomize=True): abs_x = tf.abs(x) sign_x = tf.sign(x) y = abs_x / params.quantization_scale - y = tf.floor(y + tf.random_uniform(tf.shape(x))) + y = tf.floor(y + tf.random_uniform(common_layers.shape_list(x))) y = tf.minimum(y, tf.int16.max) * sign_x q = tf.bitcast(tf.cast(y, tf.int16), tf.float16) return q diff --git a/tensor2tensor/utils/metrics.py b/tensor2tensor/utils/metrics.py index c9e52e566..ecba3c8b4 100644 --- a/tensor2tensor/utils/metrics.py +++ b/tensor2tensor/utils/metrics.py @@ -93,7 +93,8 @@ def padded_accuracy_topk(predictions, padded_predictions, padded_labels = common_layers.pad_with_zeros( predictions, labels) weights = weights_fn(padded_labels) - effective_k = tf.minimum(k, tf.shape(padded_predictions)[-1]) + effective_k = tf.minimum(k, + common_layers.shape_list(padded_predictions)[-1]) _, outputs = tf.nn.top_k(padded_predictions, k=effective_k) outputs = tf.to_int32(outputs) padded_labels = tf.to_int32(padded_labels) @@ -167,7 +168,7 @@ def sequence_edit_distance(predictions, tf.shape(labels, out_type=tf.int64)) distance = tf.reduce_sum( tf.edit_distance(sparse_outputs, label_sparse_outputs, normalize=False)) - reference_length = tf.to_float(tf.shape(nonzero_idx)[0]) + reference_length = tf.to_float(common_layers.shape_list(nonzero_idx)[0]) return distance / reference_length, reference_length diff --git a/tensor2tensor/utils/optimize.py b/tensor2tensor/utils/optimize.py index b9a092ac8..aaaeb0015 100644 --- a/tensor2tensor/utils/optimize.py +++ b/tensor2tensor/utils/optimize.py @@ -28,10 +28,12 @@ -def optimize(loss, learning_rate, hparams): +def optimize(loss, learning_rate, hparams, use_tpu=False): """Minimize loss.""" loss = tf.identity(loss, name="total_loss") opt = ConditionalOptimizer(hparams.optimizer, learning_rate, hparams) + if use_tpu: + opt = tf.contrib.tpu.CrossShardOptimizer(opt) opt_summaries = ["learning_rate", "loss"] if hparams.summarize_grads: opt_summaries.extend(["gradients", "gradient_norm"]) diff --git a/tensor2tensor/utils/t2t_model.py b/tensor2tensor/utils/t2t_model.py index f5ec04679..02c2b8a7d 100644 --- a/tensor2tensor/utils/t2t_model.py +++ b/tensor2tensor/utils/t2t_model.py @@ -153,9 +153,7 @@ def prepare_features_for_infer(self, features): """Called before inference to allow adding infer-specific features.""" pass - def eval_autoregressive(self, - features=None, - decode_length=50): + def eval_autoregressive(self, features=None, decode_length=50): """Autoregressive eval. Quadratic time in decode_length. @@ -170,8 +168,7 @@ def eval_autoregressive(self, Contains a single key "training". """ _, logits, losses = self._slow_greedy_infer( - features, - decode_length=decode_length) + features, decode_length=decode_length) return [logits], losses def infer(self, @@ -214,8 +211,7 @@ def infer(self, alpha) return samples - def _beam_decode(self, features, decode_length, beam_size, top_beams, - alpha): + def _beam_decode(self, features, decode_length, beam_size, top_beams, alpha): """Beam search decoding. Models should ideally implement a more efficient version of this function. @@ -251,7 +247,7 @@ def _beam_decode_slow(self, features, decode_length, beam_size, top_beams, Returns: samples: an integer `Tensor`. Top samples from the beam search """ - batch_size = tf.shape(features["inputs"])[0] + batch_size = common_layers.shape_list(features["inputs"])[0] batch_size = tf.Print(batch_size, [batch_size], "beam_decode batch_size=") def symbols_to_logits_fn(ids): @@ -260,7 +256,7 @@ def symbols_to_logits_fn(ids): ids = tf.pad(ids[:, 1:], [[0, 0], [0, 1], [0, 0], [0, 0]]) if "partial_targets" in features: pt = features["partial_targets"] - pt_length = tf.shape(pt)[1] + pt_length = common_layers.shape_list(pt)[1] pt = tf.tile(pt, [1, beam_size]) pt = tf.reshape(pt, [batch_size * beam_size, pt_length, 1, 1]) ids = tf.concat([pt, ids], axis=1) @@ -275,7 +271,8 @@ def symbols_to_logits_fn(ids): modality = self._hparams.problems[self._problem_idx].target_modality if modality.top_is_pointwise: return tf.squeeze(logits, axis=[1, 2, 3]) - current_output_position = tf.shape(ids)[1] - 1 # -1 due to the pad above. + # -1 due to the pad above. + current_output_position = common_layers.shape_list(ids)[1] - 1 logits = logits[:, current_output_position, :, :] return tf.squeeze(logits, axis=[1, 2]) @@ -288,7 +285,7 @@ def symbols_to_logits_fn(ids): features["inputs"] = tf.expand_dims(features["inputs"], 4) # Expand the inputs in to the beam size. features["inputs"] = tf.tile(features["inputs"], [1, beam_size, 1, 1, 1]) - s = tf.shape(features["inputs"]) + s = common_layers.shape_list(features["inputs"]) features["inputs"] = tf.reshape(features["inputs"], [s[0] * s[1], s[2], s[3], s[4]]) @@ -297,10 +294,15 @@ def symbols_to_logits_fn(ids): # Setting decode length to input length + decode_length decode_length = tf.constant(decode_length) if "partial_targets" not in features: - decode_length += tf.shape(features["inputs"])[1] - ids, scores = beam_search.beam_search(symbols_to_logits_fn, initial_ids, - beam_size, decode_length, vocab_size, - alpha, stop_early=(top_beams == 1)) + decode_length += common_layers.shape_list(features["inputs"])[1] + ids, scores = beam_search.beam_search( + symbols_to_logits_fn, + initial_ids, + beam_size, + decode_length, + vocab_size, + alpha, + stop_early=(top_beams == 1)) # Set inputs back to the unexpanded inputs to not to confuse the Estimator! if self.has_input: @@ -317,7 +319,7 @@ def symbols_to_logits_fn(ids): return {"outputs": ids[:, :top_beams, 1:], "scores": scores} return ids[:, :top_beams, 1:] - def _greedy_infer(self, features, decode_length): + def _greedy_infer(self, features, decode_length): """A greedy inference method. Models should ideally implement a more efficient version of this function. @@ -361,6 +363,7 @@ def _slow_greedy_infer(self, features, decode_length): targets_old = features.get("targets", None) target_modality = self._hparams.problems[self._problem_idx].target_modality + def infer_step(recent_output, recent_logits, unused_loss): """Inference step.""" recent_output.set_shape([None, None, None, 1]) @@ -374,7 +377,8 @@ def infer_step(recent_output, recent_logits, unused_loss): if target_modality.top_is_pointwise: cur_sample = samples[:, -1, :, :] else: - cur_sample = samples[:, tf.shape(recent_output)[1], :, :] + cur_sample = samples[:, + common_layers.shape_list(recent_output)[1], :, :] cur_sample = tf.to_int64(tf.expand_dims(cur_sample, axis=1)) samples = tf.concat([recent_output, cur_sample], axis=1) samples.set_shape([None, None, None, 1]) @@ -390,19 +394,20 @@ def infer_step(recent_output, recent_logits, unused_loss): initial_output = tf.to_int64(features["partial_targets"]) while len(initial_output.get_shape().as_list()) < 4: initial_output = tf.expand_dims(initial_output, 2) - batch_size = tf.shape(initial_output)[0] + batch_size = common_layers.shape_list(initial_output)[0] else: - batch_size = tf.shape(features["inputs"])[0] + batch_size = common_layers.shape_list(features["inputs"])[0] initial_output = tf.zeros((batch_size, 0, 1, 1), dtype=tf.int64) # Hack: foldl complains when the output shape is less specified than the # input shape, so we confuse it about the input shape. initial_output = tf.slice(initial_output, [0, 0, 0, 0], - tf.shape(initial_output)) + common_layers.shape_list(initial_output)) target_modality = self._hparams.problems[self._problem_idx].target_modality if is_class_modality(target_modality): decode_length = 1 else: - decode_length = tf.shape(features["inputs"])[1] + decode_length + decode_length = common_layers.shape_list( + features["inputs"])[1] + decode_length # Initial values of result, logits and loss. result = initial_output # tensor of shape [batch_size, time, 1, 1, vocab_size] @@ -412,16 +417,15 @@ def infer_step(recent_output, recent_logits, unused_loss): def while_exit_cond(result, logits, loss): # pylint: disable=unused-argument """Exit the loop either if reach decode_length or EOS.""" - length = tf.shape(result)[1] + length = common_layers.shape_list(result)[1] not_overflow = length < decode_length if self._problem_hparams.stop_at_eos: + def fn_not_eos(): return tf.not_equal( # Check if the last predicted element is a EOS - tf.squeeze(result[:, -1, :, :]), - text_encoder.EOS_ID - ) + tf.squeeze(result[:, -1, :, :]), text_encoder.EOS_ID) not_eos = tf.cond( # We only check for early stoping if there is at least 1 element ( @@ -436,8 +440,7 @@ def fn_not_eos(): # If batch_size == 1, we check EOS for early stoping lambda: tf.logical_and(not_overflow, not_eos), # Else, just wait for max length - lambda: not_overflow - ) + lambda: not_overflow) return not_overflow result, logits, loss = tf.while_loop( @@ -457,9 +460,10 @@ def fn_not_eos(): features["targets"] = targets_old losses = {"training": loss} if "partial_targets" in features: - partial_target_length = tf.shape(features["partial_targets"])[1] - result = tf.slice( - result, [0, partial_target_length, 0, 0], [-1, -1, -1, -1]) + partial_target_length = common_layers.shape_list( + features["partial_targets"])[1] + result = tf.slice(result, [0, partial_target_length, 0, 0], + [-1, -1, -1, -1]) return result, logits, losses def sample(self, features): @@ -480,16 +484,15 @@ def sample(self, features): assert self._hparams.sampling_method == "random" def _multinomial_squeeze(logits, temperature=1.0): + logits_shape = common_layers.shape_list(logits) reshaped_logits = ( - tf.reshape(logits, [-1, tf.shape(logits)[-1]])/temperature) + tf.reshape(logits, [-1, logits_shape[-1]]) / temperature) choices = tf.multinomial(reshaped_logits, 1) - choices = tf.reshape(choices, - tf.shape(logits)[:logits.get_shape().ndims - 1]) + choices = tf.reshape(choices, logits_shape[:-1]) return choices - sharded_samples = self._data_parallelism(_multinomial_squeeze, - sharded_logits, - self._hparams.sampling_temp) + sharded_samples = self._data_parallelism( + _multinomial_squeeze, sharded_logits, self._hparams.sampling_temp) return tf.concat(sharded_samples, 0), sharded_logits, losses def _shard_features(self, features): # pylint: disable=missing-docstring @@ -544,8 +547,8 @@ def model_fn(self, features, skip=False, force_full_predict=False): # Target space id just gets copied to every shard. if "target_space_id" in features: - transformed_features["target_space_id"] = [ - features["target_space_id"]] * self._num_datashards + transformed_features["target_space_id"] = [features["target_space_id"] + ] * self._num_datashards # For features without a modality ending in "_raw", we pass them raw. for key, feature in sharded_features.items(): @@ -574,8 +577,7 @@ def model_fn(self, features, skip=False, force_full_predict=False): body_outputs = transformed_features["targets"] losses = {"extra": 0.0} else: - body_outputs, losses = self.model_fn_body_sharded( - transformed_features) + body_outputs, losses = self.model_fn_body_sharded(transformed_features) if not isinstance(losses, dict): # If it's a single extra loss. losses = {"extra": losses} @@ -609,28 +611,28 @@ def model_fn(self, features, skip=False, force_full_predict=False): # Scheduled sampling. do_scheduled_sampling = ( # Only do it if training and set for it. self._hparams.scheduled_sampling_prob > 0.0 and - self._hparams.mode == tf.estimator.ModeKeys.TRAIN and - not skip) + self._hparams.mode == tf.estimator.ModeKeys.TRAIN and not skip) if do_scheduled_sampling: def sample(x): """Multinomial sampling from a n-dimensional tensor.""" vocab_size = target_modality.top_dimensionality samples = tf.multinomial(tf.reshape(x, [-1, vocab_size]), 1) - reshaped_samples = tf.reshape(samples, tf.shape(x)[:-1]) + reshaped_samples = tf.reshape(samples, common_layers.shape_list(x)[:-1]) return tf.to_int32(reshaped_samples) def mix_gold_sampled(gold_targets, sampled_targets): return tf.where( - tf.less(tf.random_uniform(tf.shape(sampled_targets)), - self._hparams.scheduled_sampling_gold_mixin_prob), - gold_targets, sampled_targets) + tf.less( + tf.random_uniform(common_layers.shape_list(sampled_targets)), + self._hparams.scheduled_sampling_gold_mixin_prob), gold_targets, + sampled_targets) def sampled_results(): """Generate scheduled sampling results.""" sampled_targets = dp(sample, sharded_logits) - new_targets = dp(mix_gold_sampled, - sharded_features["targets"], sampled_targets) + new_targets = dp(mix_gold_sampled, sharded_features["targets"], + sampled_targets) new_features = transformed_features with tf.variable_scope(tf.get_variable_scope(), reuse=True): with tf.variable_scope(target_modality.name): @@ -648,13 +650,13 @@ def sampled_results(): training_loss *= self._problem_hparams.loss_multiplier losses["training"] = training_loss return new_sharded_logits, losses + # Run the above conditionally. prob = self._hparams.scheduled_sampling_prob prob *= common_layers.inverse_exp_decay( self._hparams.scheduled_sampling_warmup_steps, min_value=0.001) sharded_logits, losses = tf.cond( - tf.less(tf.random_uniform([]), prob), - sampled_results, + tf.less(tf.random_uniform([]), prob), sampled_results, lambda: (sharded_logits, losses)) tf.logging.info("This model_fn took %.3f sec." % (time.time() - start_time)) @@ -678,7 +680,8 @@ def model_fn_body_sharded(self, sharded_features): datashard_to_features = [{ k: v[d] for k, v in six.iteritems(sharded_features) - } for d in xrange(self._num_datashards)] + } + for d in xrange(self._num_datashards)] output = self._data_parallelism( _with_timing(self.model_fn_body, "model_fn_body"), datashard_to_features) From 957a384e3e4a7a290a999874bfda0e47de29e472 Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Wed, 15 Nov 2017 12:35:03 -0800 Subject: [PATCH 0595/4095] Fix weights_fn calls PiperOrigin-RevId: 175864073 --- tensor2tensor/tpu/tpu_trainer_lib.py | 2 +- tensor2tensor/utils/metrics.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tensor2tensor/tpu/tpu_trainer_lib.py b/tensor2tensor/tpu/tpu_trainer_lib.py index 92060f89c..07c3dcd99 100644 --- a/tensor2tensor/tpu/tpu_trainer_lib.py +++ b/tensor2tensor/tpu/tpu_trainer_lib.py @@ -239,7 +239,7 @@ def create_eval_metrics_fn(problem, hparams): tm = problem.get_hparams().target_modality if isinstance(tm, tuple): tm = registry.create_modality(tm, hparams) - weights_fn = tm.weights_fn + weights_fn = tm.targets_weights_fn def make_metric_fn(metric_fn): diff --git a/tensor2tensor/utils/metrics.py b/tensor2tensor/utils/metrics.py index ecba3c8b4..817582809 100644 --- a/tensor2tensor/utils/metrics.py +++ b/tensor2tensor/utils/metrics.py @@ -319,7 +319,7 @@ def image_wrapped_metric_fn(predictions, tm = problem_instance.get_hparams().target_modality if isinstance(tm, tuple): tm = registry.create_modality(tm, model_hparams) - weights_fn = tm.weights_fn + weights_fn = tm.targets_weights_fn for metric in metrics: metric_fn = METRICS_FNS[metric] From 8cacb7944efc29947f01476e97d65396e3c5b045 Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Wed, 15 Nov 2017 14:16:50 -0800 Subject: [PATCH 0596/4095] Refactor rev_block into a RevBlock class that exposes forward and backward PiperOrigin-RevId: 175878950 --- tensor2tensor/layers/rev_block.py | 287 ++++++++++++++----------- tensor2tensor/layers/rev_block_test.py | 44 ++++ 2 files changed, 208 insertions(+), 123 deletions(-) diff --git a/tensor2tensor/layers/rev_block.py b/tensor2tensor/layers/rev_block.py index 62ed6c6a5..6796750b9 100644 --- a/tensor2tensor/layers/rev_block.py +++ b/tensor2tensor/layers/rev_block.py @@ -55,10 +55,8 @@ def _rev_layer_forward(xs, f, g, f_side_input, g_side_input, gate_outputs=False): """Forward for 1 reversible layer.""" x1, x2 = xs - with tf.variable_scope("f"): - y1 = x1 + (f(x2, f_side_input) if f_side_input else f(x2)) - with tf.variable_scope("g"): - y2 = x2 + (g(y1, g_side_input) if g_side_input else g(y1)) + y1 = x1 + (f(x2, f_side_input) if f_side_input else f(x2)) + y2 = x2 + (g(y1, g_side_input) if g_side_input else g(y1)) if gate_outputs: return tf.tuple([y1, y2]) else: @@ -76,14 +74,12 @@ def _rev_layer_backward(ys, grad_ys, f, g, f_vars, f_side_input, g_vars, # grad function on the calls to tf.gradients. y1_stop = tf.stop_gradient(y1) g_side_input = [tf.stop_gradient(t) for t in g_side_input] - with tf.variable_scope("g"): - gy1 = g(y1_stop, g_side_input) if g_side_input else g(y1_stop) + gy1 = g(y1_stop, g_side_input) if g_side_input else g(y1_stop) x2 = y2 - gy1 x2_stop = tf.stop_gradient(x2) f_side_input = [tf.stop_gradient(t) for t in f_side_input] - with tf.variable_scope("f"): - fx2 = f(x2_stop, f_side_input) if f_side_input else f(x2_stop) + fx2 = f(x2_stop, f_side_input) if f_side_input else f(x2_stop) x1 = y1 - fx2 @@ -91,8 +87,9 @@ def _rev_layer_backward(ys, grad_ys, f, g, f_vars, f_side_input, g_vars, # dL/dy2 * dG(y1)/y1 grad_gy1_y2 = tf.gradients(gy1, y1_stop, grad_y2)[0] grad_x1 = grad_y1 + grad_gy1_y2 - grad_x2 = (tf.gradients(fx2, x2_stop, grad_y1)[0] + grad_y2 + - tf.gradients(fx2, x2_stop, grad_gy1_y2)[0]) + grad_x2 = ( + tf.gradients(fx2, x2_stop, grad_y1)[0] + grad_y2 + + tf.gradients(fx2, x2_stop, grad_gy1_y2)[0]) # Compute gradients wrt to vars and side inputs in f and g grads1 = tf.gradients(gy1, g_vars + g_side_input, grad_y2) @@ -131,111 +128,79 @@ def _rev_block_forward(x1, num_layers=1, f_side_input=None, g_side_input=None, - layer_scopes=None, - gate_outputs=False, - name=None): + gate_outputs=False): """Forward for a series of reversible layers.""" out = (x1, x2) - with tf.variable_scope(name, default_name="revblock"): - for i in xrange(num_layers): - with tf.variable_scope("revlayer_%d" % i) as layer_vs: - if layer_scopes is not None: - layer_scopes.append(layer_vs) - out = _rev_layer_forward( - out, - f[i], - g[i], - f_side_input, - g_side_input, - gate_outputs=gate_outputs) + for i in xrange(num_layers): + out = _rev_layer_forward( + out, f[i], g[i], f_side_input, g_side_input, gate_outputs=gate_outputs) y1, y2 = out return y1, y2 -def rev_block(x1, - x2, - f, - g, - num_layers=1, - f_side_input=None, - g_side_input=None, - is_training=True): - """A block of reversible residual layers. +class RevBlock(object): + """Block of reversible layers. See rev_block.""" - A reversible residual layer is defined as: + def __init__(self, + f, + g, + num_layers=1, + f_side_input=None, + g_side_input=None, + use_efficient_backprop=True): - ``` - y1 = x1 + f(x2, f_side_input) - y2 = x2 + g(y1, g_side_input) - ``` + if isinstance(f, list): + assert len(f) == num_layers + else: + f = [f] * num_layers - A reversible residual block, defined here, is a series of reversible residual - layers. + if isinstance(g, list): + assert len(g) == num_layers + else: + g = [g] * num_layers - Limitations: - * f and g must not close over any Tensors; all side inputs to f and g should - be passed in with f_side_input and g_side_input which will be forwarded to - f and g. - * f and g must not change the dimensionality of their inputs in order for the - addition in the equations above to work. + scope_prefix = "revblock/revlayer_%d/" + f_scope = scope_prefix + "f" + g_scope = scope_prefix + "g" - Args: - x1: a float Tensor. - x2: a float Tensor. - f: a function, (Tensor) -> (Tensor) (or list of such of length num_layers). - Should not change the shape of the Tensor. Expected to create variables. - See f_side_input if there are side inputs. - g: a function, (Tensor) -> (Tensor) (or list of such of length num_layers). - Should not change the shape of the Tensor. Expected to create variables. - See g_side_input if there are side inputs. - num_layers: int, number of reversible residual layers. Each layer will - apply f and g according to the equations above, with new variables in each - layer. - f_side_input: list of Tensors, side input to f. If not None, signature of f - should be (Tensor, list<Tensor>) -> (Tensor). - g_side_input: list of Tensors, side input to g. If not None, signature of g - should be (Tensor, list<Tensor>) -> (Tensor). - is_training: bool, whether to actually use the efficient backprop codepath. + f = [ + tf.make_template(f_scope % i, fn, create_scope_now_=True) + for i, fn in enumerate(f) + ] + g = [ + tf.make_template(g_scope % i, fn, create_scope_now_=True) + for i, fn in enumerate(g) + ] - Returns: - y1, y2: tuple of float Tensors. - """ - if f_side_input is None: - f_side_input = [] - if g_side_input is None: - g_side_input = [] - if isinstance(f, list): - assert len(f) == num_layers - else: - f = [f] * num_layers - if isinstance(g, list): - assert len(g) == num_layers - else: - g = [g] * num_layers + self.f = f + self.g = g + + self.num_layers = num_layers + self.f_side_input = f_side_input or [] + self.g_side_input = g_side_input or [] - # Filled by the forward function below - layer_scopes = [] + self._use_efficient_backprop = use_efficient_backprop - def custom_grad_fn(inputs, variables, ys, grad_ys): + def _efficient_grad_fn(self, inputs, variables, ys, grad_ys): """Custom gradient fn for a block of reversible residual layers.""" side_inputs = inputs[2:] - f_side_idxs = [None] * len(f_side_input) - g_side_idxs = [None] * len(g_side_input) - assert len(side_inputs) == len(f_side_input) + len(g_side_input) + f_side_idxs = [None] * len(self.f_side_input) + g_side_idxs = [None] * len(self.g_side_input) + assert len(side_inputs) == len(self.f_side_input) + len(self.g_side_input) for i, t in enumerate(side_inputs): - if t in f_side_input: - f_side_idxs[f_side_input.index(t)] = i - elif t in g_side_input: - g_side_idxs[g_side_input.index(t)] = i + if t in self.f_side_input: + f_side_idxs[self.f_side_input.index(t)] = i + elif t in self.g_side_input: + g_side_idxs[self.g_side_input.index(t)] = i else: assert False - f_vars = [[] for _ in range(num_layers)] - g_vars = [[] for _ in range(num_layers)] - f_vars_idxs = [[] for _ in range(num_layers)] - g_vars_idxs = [[] for _ in range(num_layers)] + f_vars = [[] for _ in range(self.num_layers)] + g_vars = [[] for _ in range(self.num_layers)] + f_vars_idxs = [[] for _ in range(self.num_layers)] + g_vars_idxs = [[] for _ in range(self.num_layers)] for i, t in enumerate(variables): ref = common_layers.underlying_variable_ref(t) @@ -258,25 +223,24 @@ def custom_grad_fn(inputs, variables, ys, grad_ys): g_side_grads = [] # Reverse variable containers to go backward - layer_scopes.reverse() f_vars.reverse() g_vars.reverse() + f = list(self.f) + g = list(self.g) f.reverse() g.reverse() - for i in xrange(num_layers): - with tf.variable_scope(layer_scopes[i], reuse=True): - - ys, grad_ys, f_ret, g_ret = _rev_layer_backward(ys, grad_ys, f[i], g[i], - f_vars[i], f_side_input, - g_vars[i], g_side_input) + for i in xrange(self.num_layers): + ys, grad_ys, f_ret, g_ret = _rev_layer_backward( + ys, grad_ys, f[i], g[i], f_vars[i], self.f_side_input, g_vars[i], + self.g_side_input) - grad_f_vars, grad_f_side = f_ret - grad_g_vars, grad_g_side = g_ret - f_var_grads.append(grad_f_vars) - g_var_grads.append(grad_g_vars) - f_side_grads.append(grad_f_side) - g_side_grads.append(grad_g_side) + grad_f_vars, grad_f_side = f_ret + grad_g_vars, grad_g_side = g_ret + f_var_grads.append(grad_f_vars) + g_var_grads.append(grad_g_vars) + f_side_grads.append(grad_f_side) + g_side_grads.append(grad_g_side) # Accumulate layer gradients for f_side_input and g_side_input acc_f_side_grads = _acc_grads(*f_side_grads) @@ -303,23 +267,100 @@ def custom_grad_fn(inputs, variables, ys, grad_ys): grad_x1, grad_x2 = grad_ys return [grad_x1, grad_x2] + side_input_grads, variable_grads - # Need a forward function with positional arguments - @common_layers.fn_with_custom_grad(custom_grad_fn if is_training else None) - def forward(x1, x2, *side_inputs): - f_side = side_inputs[:len(f_side_input)] - g_side = side_inputs[len(f_side_input):] - return _rev_block_forward( - x1, - x2, - f, - g, - num_layers=num_layers, - f_side_input=f_side, - g_side_input=g_side, - layer_scopes=layer_scopes, - gate_outputs=is_training) - - return forward(x1, x2, *(f_side_input + g_side_input)) + def forward(self, x1, x2): + """Run forward through the reversible layers.""" + + side_inputs = [self.f_side_input, self.g_side_input] + flat_side_inputs = tf.contrib.framework.nest.flatten(side_inputs) + + custom_grad_fn = ( + self._efficient_grad_fn if self._use_efficient_backprop else None) + + @common_layers.fn_with_custom_grad(custom_grad_fn) + def _forward(x1_, x2_, *flat_side_inputs): + f_side, g_side = tf.contrib.framework.nest.pack_sequence_as( + side_inputs, flat_side_inputs) + return _rev_block_forward( + x1_, + x2_, + self.f, + self.g, + num_layers=self.num_layers, + f_side_input=f_side, + g_side_input=g_side, + gate_outputs=self._use_efficient_backprop) + + return _forward(x1, x2, *flat_side_inputs) + + def backward(self, y1, y2): + """Run backward through the reversible layers.""" + + f = list(self.f) + g = list(self.g) + f.reverse() + g.reverse() + + for i in xrange(self.num_layers): + gy1 = g[i](y1, self.g_side_input) if self.g_side_input else g[i](y1) + x2 = y2 - gy1 + fx2 = f[i](x2, self.f_side_input) if self.f_side_input else f[i](x2) + x1 = y1 - fx2 + + y1, y2 = x1, x2 + + return x1, x2 + + +def rev_block(x1, + x2, + f, + g, + num_layers=1, + f_side_input=None, + g_side_input=None, + is_training=True): + """A block of reversible residual layers. + + A reversible residual layer is defined as: + + ``` + y1 = x1 + f(x2, f_side_input) + y2 = x2 + g(y1, g_side_input) + ``` + + A reversible residual block, defined here, is a series of reversible residual + layers. + + Limitations: + * f and g must not close over any Tensors; all side inputs to f and g should + be passed in with f_side_input and g_side_input which will be forwarded to + f and g. + * f and g must not change the dimensionality of their inputs in order for the + addition in the equations above to work. + + Args: + x1: a float Tensor. + x2: a float Tensor. + f: a function, (Tensor) -> (Tensor) (or list of such of length num_layers). + Should not change the shape of the Tensor. Expected to create variables. + See f_side_input if there are side inputs. + g: a function, (Tensor) -> (Tensor) (or list of such of length num_layers). + Should not change the shape of the Tensor. Expected to create variables. + See g_side_input if there are side inputs. + num_layers: int, number of reversible residual layers. Each layer will + apply f and g according to the equations above, with new variables in each + layer. + f_side_input: list of Tensors, side input to f. If not None, signature of f + should be (Tensor, list<Tensor>) -> (Tensor). + g_side_input: list of Tensors, side input to g. If not None, signature of g + should be (Tensor, list<Tensor>) -> (Tensor). + is_training: bool, whether to actually use the efficient backprop codepath. + + Returns: + y1, y2: tuple of float Tensors. + """ + block = RevBlock(f, g, num_layers, f_side_input, g_side_input, is_training) + return block.forward(x1, x2) def recompute_grad(fn): diff --git a/tensor2tensor/layers/rev_block_test.py b/tensor2tensor/layers/rev_block_test.py index 31df15068..acc68f9bd 100644 --- a/tensor2tensor/layers/rev_block_test.py +++ b/tensor2tensor/layers/rev_block_test.py @@ -31,6 +31,50 @@ class RevBlockTest(tf.test.TestCase): NUM_LAYERS = 4 BATCH_SIZE = 16 + def testForwardBackward(self): + + def f(x): + return tf.layers.dense(x, self.CHANNELS // 2, use_bias=True) + + def g(x): + return tf.layers.dense(x, self.CHANNELS // 2, use_bias=True) + + x = tf.random_uniform([self.BATCH_SIZE, self.CHANNELS], dtype=tf.float32) + x1, x2 = tf.split(x, 2, axis=-1) + + block = rev_block.RevBlock(f, g, num_layers=3) + y1, y2 = block.forward(x1, x2) + x1_inv, x2_inv = block.backward(y1, y2) + + with self.test_session() as sess: + sess.run(tf.global_variables_initializer()) + x1, x2, x1_inv, x2_inv = sess.run([x1, x2, x1_inv, x2_inv]) + + self.assertAllClose(x1, x1_inv) + self.assertAllClose(x2, x2_inv) + + def testBackwardForward(self): + + def f(x): + return tf.layers.dense(x, self.CHANNELS // 2, use_bias=True) + + def g(x): + return tf.layers.dense(x, self.CHANNELS // 2, use_bias=True) + + y = tf.random_uniform([self.BATCH_SIZE, self.CHANNELS], dtype=tf.float32) + y1, y2 = tf.split(y, 2, axis=-1) + + block = rev_block.RevBlock(f, g, num_layers=3) + x1, x2 = block.backward(y1, y2) + y1_inv, y2_inv = block.forward(x1, x2) + + with self.test_session() as sess: + sess.run(tf.global_variables_initializer()) + y1, y2, y1_inv, y2_inv = sess.run([y1, y2, y1_inv, y2_inv]) + + self.assertAllClose(y1, y1_inv) + self.assertAllClose(y2, y2_inv) + def _testRevBlock(self, x=None, f=None, From 976da09fde33e5303f28200643110f7eb0ae6f3b Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Wed, 15 Nov 2017 16:49:46 -0800 Subject: [PATCH 0597/4095] Clean up revblock code with tf.contrib.framework.nest PiperOrigin-RevId: 175902336 --- tensor2tensor/layers/common_layers.py | 78 ++++++++++++++------------- tensor2tensor/layers/rev_block.py | 18 ++----- 2 files changed, 45 insertions(+), 51 deletions(-) diff --git a/tensor2tensor/layers/common_layers.py b/tensor2tensor/layers/common_layers.py index 6f6d10552..1fe932d4e 100644 --- a/tensor2tensor/layers/common_layers.py +++ b/tensor2tensor/layers/common_layers.py @@ -20,6 +20,7 @@ from collections import defaultdict import contextlib +import functools import math import random @@ -1961,6 +1962,7 @@ def fn_with_custom_grad(grad_fn, use_global_vars=False): def dec(fn): + @functools.wraps(fn) def wrapped(*args): return _fn_with_custom_grad( fn, args, grad_fn, use_global_vars=use_global_vars) @@ -1995,43 +1997,45 @@ def _fn_with_custom_grad(fn, inputs, grad_fn, use_global_vars=False): if grad_fn is None: return outputs - else: - if not (isinstance(outputs, tuple) or isinstance(outputs, list)): - outputs = [outputs] - outputs = list(outputs) - - in_types = [t.dtype for t in inputs] - out_types = [t.dtype for t in outputs] - var_types = [t.dtype for t in train_vars] - - def custom_grad_fn(op, *dys): - """Custom grad fn applying grad_fn for identity Defun.""" - dys = list(dys) - fn_inputs = op.inputs[:len(inputs)] - fn_vars = op.inputs[len(inputs):len(inputs) + len(train_vars)] - fn_outputs = op.inputs[len(inputs) + len(train_vars):] - assert len(fn_outputs) == len(outputs) - assert len(fn_outputs) == len(dys) - - grad_inputs, grad_vars = grad_fn(fn_inputs, fn_vars, fn_outputs, dys) - grad_outputs = [None] * len(fn_outputs) - return tuple(grad_inputs + grad_vars + grad_outputs) - - # The Defun takes as input the original inputs, the trainable variables - # created in fn, and the outputs. In the forward it passes through the - # outputs. In the backwards, it produces gradients for the original inputs - # and the trainable variables. - @function.Defun( - *(in_types + var_types + out_types), - func_name="identity_custom_grad%d" % random.randint(1, 10**9), - python_grad_func=custom_grad_fn, - shape_func=lambda _: [t.get_shape() for t in outputs]) - def identity(*args): - outs = args[len(inputs) + len(train_vars):] - return tuple([tf.identity(t) for t in outs]) - - id_out = identity(*(inputs + train_vars + outputs)) - return id_out + + if not (isinstance(outputs, tuple) or isinstance(outputs, list)): + outputs = [outputs] + outputs = list(outputs) + + defun_inputs = [inputs, train_vars, outputs] + + def custom_grad_fn(op, *dys): + """Custom grad fn applying grad_fn for identity Defun.""" + fn_inputs, fn_vars, fn_outputs = tf.contrib.framework.nest.pack_sequence_as( + defun_inputs, list(op.inputs)) + dys = list(dys) + assert len(fn_outputs) == len(outputs) + assert len(fn_outputs) == len(dys) + + grad_inputs, grad_vars = grad_fn(fn_inputs, fn_vars, fn_outputs, dys) + grad_outputs = [None] * len(fn_outputs) + return tuple(grad_inputs + grad_vars + grad_outputs) + + # The Defun takes as input the original inputs, the trainable variables + # created in fn, and the outputs. In the forward it passes through the + # outputs. In the backwards, it produces gradients for the original inputs + # and the trainable variables. + in_types = [t.dtype for t in inputs] + out_types = [t.dtype for t in outputs] + var_types = [t.dtype for t in train_vars] + + @function.Defun( + *(in_types + var_types + out_types), + func_name="identity_custom_grad%d" % random.randint(1, 10**9), + python_grad_func=custom_grad_fn, + shape_func=lambda _: [t.get_shape() for t in outputs]) + def identity(*args): + _, _, outs = tf.contrib.framework.nest.pack_sequence_as(defun_inputs, args) + return tuple([tf.identity(t) for t in outs]) + + flat_inputs = tf.contrib.framework.nest.flatten(defun_inputs) + id_out = identity(*flat_inputs) + return id_out _function_cache = {} diff --git a/tensor2tensor/layers/rev_block.py b/tensor2tensor/layers/rev_block.py index 6796750b9..eaeb55921 100644 --- a/tensor2tensor/layers/rev_block.py +++ b/tensor2tensor/layers/rev_block.py @@ -105,20 +105,10 @@ def _rev_layer_backward(ys, grad_ys, f, g, f_vars, f_side_input, g_vars, # Put returns in a tuple to ensure a constant memory budget (i.e. don't want # the subsequent layer to start computing and consuming memory based on a # subset of these values). - outs = tf.tuple([x1, x2, grad_x1, grad_x2] + grad_f_vars + grad_g_vars + - grad_f_side + grad_g_side) - x1, x2, grad_x1, grad_x2 = outs[:4] - grad_f_vars_end = 4 + len(grad_f_vars) - grad_g_vars_end = grad_f_vars_end + len(grad_g_vars) - grad_f_side_end = grad_g_vars_end + len(grad_f_side) - - grad_f_vars = outs[4:grad_f_vars_end] - grad_g_vars = outs[grad_f_vars_end:grad_g_vars_end] - grad_f_side = outs[grad_g_vars_end:grad_f_side_end] - grad_g_side = outs[grad_f_side_end:] - - return ((x1, x2), (grad_x1, grad_x2), (grad_f_vars, grad_f_side), - (grad_g_vars, grad_g_side)) + outputs = ((x1, x2), (grad_x1, grad_x2), (grad_f_vars, grad_f_side), + (grad_g_vars, grad_g_side)) + tupled = tf.tuple(tf.contrib.framework.nest.flatten(outputs)) + return tf.contrib.framework.nest.pack_sequence_as(outputs, tupled) def _rev_block_forward(x1, From 717afe92646151ae28e0e7bf66ff372b38125a9c Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Wed, 15 Nov 2017 17:09:06 -0800 Subject: [PATCH 0598/4095] Add ImageImagenet224; resnet50 now fits batch size of 128 PiperOrigin-RevId: 175904903 --- tensor2tensor/data_generators/image.py | 31 ++++++++++++++++++++------ tensor2tensor/layers/common_layers.py | 6 +++-- tensor2tensor/models/resnet.py | 4 +++- 3 files changed, 31 insertions(+), 10 deletions(-) diff --git a/tensor2tensor/data_generators/image.py b/tensor2tensor/data_generators/image.py index dec66a623..391f87be3 100644 --- a/tensor2tensor/data_generators/image.py +++ b/tensor2tensor/data_generators/image.py @@ -287,8 +287,7 @@ def generator(self, data_dir, tmp_dir, is_training): def hparams(self, defaults, unused_model_hparams): p = defaults p.input_modality = {"inputs": (registry.Modalities.IMAGE, 256)} - p.target_modality = (registry.Modalities.CLASS_LABEL, - self.num_classes) + p.target_modality = (registry.Modalities.CLASS_LABEL, self.num_classes) p.batch_size_multiplier = 4 if self.is_small else 256 p.max_expected_batch_size_per_shard = 8 if self.is_small else 2 p.loss_multiplier = 3.0 if self.is_small else 1.0 @@ -305,16 +304,19 @@ def generate_data(self, data_dir, tmp_dir, task_id=-1): self.dev_filepaths(data_dir, self.dev_shards, shuffled=False)) -def imagenet_preprocess_example(example, mode): +def imagenet_preprocess_example(example, mode, resize_size=None): """Preprocessing used for Imagenet and similar problems.""" + if resize_size is None: + resize_size = [299, 299] def preprocess(img): img = tf.image.resize_images(img, [360, 360]) - img = common_layers.image_augmentation(tf.to_float(img) / 255.) + img = common_layers.image_augmentation( + tf.to_float(img) / 255., crop_size=resize_size) return tf.to_int64(img * 255.) def resize(img): - return tf.to_int64(tf.image.resize_images(img, [299, 299])) + return tf.to_int64(tf.image.resize_images(img, resize_size)) inputs = tf.cast(example["inputs"], tf.int64) if mode == tf.estimator.ModeKeys.TRAIN: @@ -349,6 +351,21 @@ def preprocess_example(self, example, mode, _): return imagenet_preprocess_example(example, mode) +@registry.register_problem +class ImageImagenet224(ImageImagenet): + """Imagenet rescaled to 224x224.""" + + def dataset_filename(self): + return "image_imagenet" # Reuse Imagenet data. + + def generate_data(self, data_dir, tmp_dir, task_id=-1): + tf.logging.warning( + "Generate data for image_imagenet224 with image_imagenet") + + def preprocess_example(self, example, mode, _): + return imagenet_preprocess_example(example, mode, resize_size=[224, 224]) + + @registry.register_problem class ImageImagenet32(Image2ClassProblem): """Imagenet rescaled to 32x32.""" @@ -784,8 +801,8 @@ def mscoco_generator(data_dir, vocab_symbolizer = generator_utils.get_or_generate_vocab( data_dir, tmp_dir, vocab_filename, vocab_size) _get_mscoco(tmp_dir) - caption_filepath = (_MSCOCO_TRAIN_CAPTION_FILE - if training else _MSCOCO_EVAL_CAPTION_FILE) + caption_filepath = ( + _MSCOCO_TRAIN_CAPTION_FILE if training else _MSCOCO_EVAL_CAPTION_FILE) caption_filepath = os.path.join(tmp_dir, caption_filepath) prefix = _MSCOCO_TRAIN_PREFIX if training else _MSCOCO_EVAL_PREFIX caption_file = io.open(caption_filepath) diff --git a/tensor2tensor/layers/common_layers.py b/tensor2tensor/layers/common_layers.py index 1fe932d4e..47448b7d7 100644 --- a/tensor2tensor/layers/common_layers.py +++ b/tensor2tensor/layers/common_layers.py @@ -163,9 +163,11 @@ def convert_rgb_to_real(x): return x -def image_augmentation(images, do_colors=False): +def image_augmentation(images, do_colors=False, crop_size=None): """Image augmentation: cropping, flipping, and color transforms.""" - images = tf.random_crop(images, [299, 299, 3]) + if crop_size is None: + crop_size = [299, 299] + images = tf.random_crop(images, crop_size + [3]) images = tf.image.random_flip_left_right(images) if do_colors: # More augmentation, but might be slow. images = tf.image.random_brightness(images, max_delta=32. / 255.) diff --git a/tensor2tensor/models/resnet.py b/tensor2tensor/models/resnet.py index 77a426e23..ca3c6ee49 100644 --- a/tensor2tensor/models/resnet.py +++ b/tensor2tensor/models/resnet.py @@ -245,5 +245,7 @@ def resnet_base(): hparams.add_hparam("use_nchw", True) hparams.add_hparam("num_filters", [64, 128, 256, 512]) hparams.add_hparam("strides", [1, 2, 2, 2]) - hparams.tpu_batch_size_per_shard = 48 + + # Can run with a batch size of 128 with Problem ImageImagenet224 + hparams.tpu_batch_size_per_shard = 128 return hparams From 2e6d512704e1f7f95fd846790a990c94e2daae06 Mon Sep 17 00:00:00 2001 From: T2T Team <no-reply@google.com> Date: Wed, 15 Nov 2017 17:34:52 -0800 Subject: [PATCH 0599/4095] Added an option to omit quotes from the subtoken voc format. PiperOrigin-RevId: 175908007 --- tensor2tensor/data_generators/text_encoder.py | 19 ++++++--- .../data_generators/text_encoder_test.py | 40 +++++++++++++++++++ 2 files changed, 54 insertions(+), 5 deletions(-) diff --git a/tensor2tensor/data_generators/text_encoder.py b/tensor2tensor/data_generators/text_encoder.py index 1c720a6db..7b7b2287e 100644 --- a/tensor2tensor/data_generators/text_encoder.py +++ b/tensor2tensor/data_generators/text_encoder.py @@ -699,9 +699,15 @@ def build_from_token_counts(self, new_subtoken_strings.sort(reverse=True) # Reinitialize to the candidate vocabulary. - self._init_subtokens_from_list( - [subtoken for _, subtoken in new_subtoken_strings], - reserved=num_reserved_ids) + new_subtoken_strings = [subtoken for _, subtoken in new_subtoken_strings] + if num_reserved_ids == len(RESERVED_TOKENS): + new_subtoken_strings = RESERVED_TOKENS + new_subtoken_strings + elif num_reserved_ids == 0: + pass + else: + raise ValueError("num_reserved_ids must be 0 or %d but was %d" % + NUM_RESERVED_TOKENS, num_reserved_ids) + self._init_subtokens_from_list(new_subtoken_strings) tf.logging.info("vocab_size = %d" % self.vocab_size) def dump(self): @@ -776,10 +782,13 @@ def _load_from_file(self, filename): with tf.gfile.Open(filename) as f: self._load_from_file_object(f) - def store_to_file(self, filename): + def store_to_file(self, filename, add_single_quotes=True): with tf.gfile.Open(filename, "w") as f: for subtoken_string in self._all_subtoken_strings: - f.write("'" + unicode_to_native(subtoken_string) + "'\n") + if add_single_quotes: + f.write("'" + unicode_to_native(subtoken_string) + "'\n") + else: + f.write(unicode_to_native(subtoken_string) + "\n") class ImageEncoder(object): diff --git a/tensor2tensor/data_generators/text_encoder_test.py b/tensor2tensor/data_generators/text_encoder_test.py index 6578d873a..b02653ebc 100644 --- a/tensor2tensor/data_generators/text_encoder_test.py +++ b/tensor2tensor/data_generators/text_encoder_test.py @@ -240,6 +240,46 @@ def test_reserved_token_chars_not_in_alphabet(self): encoder1.encode(c) encoder2.encode(c) + def test_save_and_reload(self): + corpus = "the quick brown fox jumps over the lazy dog" + token_counts = collections.Counter(corpus.split(" ")) + + # Deliberately exclude some required encoding chars from the alphabet + # and token list, making some strings unencodable. + encoder = text_encoder.SubwordTextEncoder.build_to_target_size( + 100, token_counts, 2, 10) + + filename = os.path.join(self.test_temp_dir, "out.voc") + encoder.store_to_file(filename) + new_encoder = text_encoder.SubwordTextEncoder(filename) + + self.assertEqual(encoder._alphabet, new_encoder._alphabet) + self.assertEqual(encoder._all_subtoken_strings, + new_encoder._all_subtoken_strings) + self.assertEqual(encoder._subtoken_string_to_id, + new_encoder._subtoken_string_to_id) + self.assertEqual(encoder._max_subtoken_len, new_encoder._max_subtoken_len) + + def test_save_and_reload_no_single_quotes(self): + corpus = "the quick brown fox jumps over the lazy dog" + token_counts = collections.Counter(corpus.split(" ")) + + # Deliberately exclude some required encoding chars from the alphabet + # and token list, making some strings unencodable. + encoder = text_encoder.SubwordTextEncoder.build_to_target_size( + 100, token_counts, 2, 10) + + filename = os.path.join(self.test_temp_dir, "out.voc") + encoder.store_to_file(filename, add_single_quotes=False) + new_encoder = text_encoder.SubwordTextEncoder(filename) + + self.assertEqual(encoder._alphabet, new_encoder._alphabet) + self.assertEqual(encoder._all_subtoken_strings, + new_encoder._all_subtoken_strings) + self.assertEqual(encoder._subtoken_string_to_id, + new_encoder._subtoken_string_to_id) + self.assertEqual(encoder._max_subtoken_len, new_encoder._max_subtoken_len) + if __name__ == "__main__": tf.test.main() From dccc9ac0dbaef6341864bdb544e5a6491e4ae832 Mon Sep 17 00:00:00 2001 From: Niki Parmar <nikip@google.com> Date: Thu, 16 Nov 2017 11:45:02 -0800 Subject: [PATCH 0600/4095] Bug fix, make class lab a VarLen feature to be compatible with img2img datasets that don't have labels. PiperOrigin-RevId: 175994994 --- tensor2tensor/data_generators/image.py | 35 ++++++++++++++++++-------- 1 file changed, 25 insertions(+), 10 deletions(-) diff --git a/tensor2tensor/data_generators/image.py b/tensor2tensor/data_generators/image.py index 391f87be3..e5d378b52 100644 --- a/tensor2tensor/data_generators/image.py +++ b/tensor2tensor/data_generators/image.py @@ -52,15 +52,10 @@ def resize_by_area(img, size): class ImageProblem(problem.Problem): def example_reading_spec(self, label_repr=None): - if label_repr is None: - label_repr = ("image/class/label", tf.FixedLenFeature((1,), tf.int64)) - data_fields = { "image/encoded": tf.FixedLenFeature((), tf.string), "image/format": tf.FixedLenFeature((), tf.string), } - label_key, label_type = label_repr # pylint: disable=unpacking-non-sequence - data_fields[label_key] = label_type data_items_to_decoders = { "inputs": @@ -68,8 +63,6 @@ def example_reading_spec(self, label_repr=None): image_key="image/encoded", format_key="image/format", channels=3), - "targets": - tf.contrib.slim.tfexample_decoder.Tensor(label_key), } return data_fields, data_items_to_decoders @@ -246,9 +239,12 @@ def hparams(self, defaults, unused_model_hparams): def example_reading_spec(self): label_key = "image/unpadded_label" - label_type = tf.VarLenFeature(tf.int64) - return super(ImageFSNS, self).example_reading_spec( - self, label_repr=(label_key, label_type)) + data_fields, data_items_to_decoders = ( + super(ImageFSNS, self).example_reading_spec()) + data_fields[label_key] = tf.VarLenFeature(tf.int64) + data_items_to_decoders[ + "targets"] = tf.contrib.slim.tfexample_decoder.Tensor(label_key) + return data_fields, data_items_to_decoders class Image2ClassProblem(ImageProblem): @@ -284,6 +280,16 @@ def feature_encoders(self, data_dir): def generator(self, data_dir, tmp_dir, is_training): raise NotImplementedError() + def example_reading_spec(self): + label_key = "image/class/label" + data_fields, data_items_to_decoders = ( + super(Image2ClassProblem, self).example_reading_spec()) + data_fields[label_key] = tf.FixedLenFeature((1,), tf.int64) + + data_items_to_decoders[ + "targets"] = tf.contrib.slim.tfexample_decoder.Tensor(label_key) + return data_fields, data_items_to_decoders + def hparams(self, defaults, unused_model_hparams): p = defaults p.input_modality = {"inputs": (registry.Modalities.IMAGE, 256)} @@ -869,6 +875,15 @@ def dev_shards(self): def generator(self, data_dir, tmp_dir, is_training): raise NotImplementedError() + def example_reading_spec(self): + label_key = "image/class/label" + data_fields, data_items_to_decoders = ( + super(Image2TextProblem, self).example_reading_spec()) + data_fields[label_key] = tf.FixedLenFeature((1,), tf.int64) + data_items_to_decoders[ + "targets"] = tf.contrib.slim.tfexample_decoder.Tensor(label_key) + return data_fields, data_items_to_decoders + def feature_encoders(self, data_dir): if self.is_character_level: encoder = text_encoder.ByteTextEncoder() From 7fe103f6aa7bd356bd8a8a4d3fdc7a0e833aa571 Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Thu, 16 Nov 2017 19:27:07 -0800 Subject: [PATCH 0601/4095] Update tpu_trainer to use T2TModel.model_fn and upate RunConfig for non-tpu use PiperOrigin-RevId: 176058540 --- tensor2tensor/tpu/tpu_trainer.py | 3 +- tensor2tensor/tpu/tpu_trainer_lib.py | 99 ++++++++++++++-------------- 2 files changed, 50 insertions(+), 52 deletions(-) diff --git a/tensor2tensor/tpu/tpu_trainer.py b/tensor2tensor/tpu/tpu_trainer.py index 39ce82ee9..21ec970aa 100644 --- a/tensor2tensor/tpu/tpu_trainer.py +++ b/tensor2tensor/tpu/tpu_trainer.py @@ -87,7 +87,8 @@ def create_run_config(): num_shards=FLAGS.tpu_num_shards, log_device_placement=FLAGS.log_device_placement, save_checkpoints_steps=max(FLAGS.iterations_per_loop, - FLAGS.local_eval_frequency)) + FLAGS.local_eval_frequency), + use_tpu=FLAGS.use_tpu) def execute_schedule(exp): diff --git a/tensor2tensor/tpu/tpu_trainer_lib.py b/tensor2tensor/tpu/tpu_trainer_lib.py index 07c3dcd99..49a8ea9b7 100644 --- a/tensor2tensor/tpu/tpu_trainer_lib.py +++ b/tensor2tensor/tpu/tpu_trainer_lib.py @@ -151,43 +151,32 @@ def model_fn(features, labels, mode, params, config): # Instantiate model and retrieve modalities. Note that autoregressive models # have no input modality. - model_class = registry.model(model_name)(hparams, mode, problem_hp) - input_modality = problem_hp.input_modality.get("inputs") - target_modality = problem_hp.target_modality - - # Transform features - transformed_features = {} - if input_modality is not None: - with tf.variable_scope(input_modality.name): - transformed_features["inputs"] = input_modality.bottom( - features["inputs"]) - with tf.variable_scope(target_modality.name): - transformed_features["targets"] = target_modality.targets_bottom( - features["targets"]) - transformed_features["problem_choice"] = tf.constant(0) - transformed_features["input_space_id"] = tf.constant( - problem_hp.input_space_id) - transformed_features["target_space_id"] = tf.constant( - problem_hp.target_space_id) - - # Model construction - with tf.variable_scope("body"): - outputs = model_class.model_fn_body(transformed_features) - with tf.variable_scope(target_modality.name): - logits = target_modality.top(outputs, labels) + model = registry.model(model_name)(hparams, mode, problem_hp) - if use_tpu: - # Set known shapes - shape = logits.get_shape().as_list() - if shape[0] is None: - shape[0] = params["batch_size"] - if shape[1] is None: - shape[1] = hparams.max_length - logits.set_shape(shape) - - # Loss - loss_num, loss_den = target_modality.loss(logits, labels) - loss = loss_num / tf.maximum(1.0, loss_den) + features["problem_choice"] = tf.constant(0) + features["input_space_id"] = tf.constant(problem_hp.input_space_id) + features["target_space_id"] = tf.constant(problem_hp.target_space_id) + + sharded_logits, losses_dict = model.model_fn(features) + assert len(sharded_logits) == 1 + logits, = sharded_logits + + if use_tpu: + # Set known shapes + shape = logits.get_shape().as_list() + if shape[0] is None: + shape[0] = params["batch_size"] + if shape[1] is None: + shape[1] = hparams.max_length + logits.set_shape(shape) + + # Loss + loss_num, loss_den = problem_hp.target_modality.loss(logits, labels) + loss = loss_num / tf.maximum(1.0, loss_den) + + if losses_dict: + for loss_val in losses_dict.values(): + loss += loss_val if mode == tf.estimator.ModeKeys.EVAL: problem = hp.problem_instances[0] @@ -289,22 +278,29 @@ def create_run_config(master="", iterations_per_loop=1000, num_shards=8, log_device_placement=False, - save_checkpoints_steps=1000): + save_checkpoints_steps=1000, + use_tpu=True): """Create TPUConfig and tpu.RunConfig.""" - tpu_config = tf.contrib.tpu.TPUConfig( - iterations_per_loop=iterations_per_loop, - num_shards=num_shards, - per_host_input_for_training=(num_shards <= 8)) session_config = tf.ConfigProto( allow_soft_placement=True, log_device_placement=log_device_placement) - run_config = tf.contrib.tpu.RunConfig( - model_dir=model_dir, - session_config=session_config, - save_summary_steps=0, - save_checkpoints_steps=save_checkpoints_steps, - tpu_config=tpu_config, - master=master) - return run_config + run_config_args = { + "model_dir": model_dir, + "session_config": session_config, + "save_summary_steps": 0, + "save_checkpoints_steps": save_checkpoints_steps, + } + run_config_cls = tf.estimator.RunConfig + + if use_tpu: + run_config_cls = tf.contrib.tpu.RunConfig + tpu_config = tf.contrib.tpu.TPUConfig( + iterations_per_loop=iterations_per_loop, + num_shards=num_shards, + per_host_input_for_training=(num_shards <= 8)) + run_config_args["master"] = master + run_config_args["tpu_config"] = tpu_config + + return run_config_cls(**run_config_args) def create_estimator(model_fn, run_config, batch_size=16, use_tpu=True): @@ -332,8 +328,9 @@ def create_experiment(run_config, """Create Experiment.""" hparams.add_hparam("data_dir", data_dir) trainer_utils.add_problem_hparams(hparams, problem_name) - batch_size = ( - hparams.tpu_batch_size_per_shard * run_config.tpu_config.num_shards) + batch_size = hparams.tpu_batch_size_per_shard + if use_tpu: + batch_size *= run_config.tpu_config.num_shards model_fn = get_model_fn(model_name, hparams, use_tpu=use_tpu) estimator = create_estimator( model_fn, run_config, batch_size, use_tpu=use_tpu) From 8ca96b8996e208e40f2b789679d64b4cd6ad7e84 Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Fri, 17 Nov 2017 10:33:33 -0800 Subject: [PATCH 0602/4095] Fix SymbolModality weights fn PiperOrigin-RevId: 176127650 --- tensor2tensor/layers/modalities.py | 2 +- tensor2tensor/layers/modalities_test.py | 9 ++++++--- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/tensor2tensor/layers/modalities.py b/tensor2tensor/layers/modalities.py index 34633c2b6..37abc3b81 100644 --- a/tensor2tensor/layers/modalities.py +++ b/tensor2tensor/layers/modalities.py @@ -50,7 +50,7 @@ def top_is_pointwise(self): return True @property - def weights_fn(self): + def targets_weights_fn(self): weights_fn = common_layers.weights_nonzero hp = self._model_hparams diff --git a/tensor2tensor/layers/modalities_test.py b/tensor2tensor/layers/modalities_test.py index 7421a7e07..bf42af529 100644 --- a/tensor2tensor/layers/modalities_test.py +++ b/tensor2tensor/layers/modalities_test.py @@ -41,7 +41,8 @@ def testSymbolModalityInputs(self): hidden_size=hidden_size, multiply_embedding_mode="sqrt_depth", symbol_modality_skip_top=0, - shared_embedding_and_softmax_weights=0) + shared_embedding_and_softmax_weights=0, + prepend_mode="none") x = -1 + np.random.random_integers( vocab_size, size=(batch_size, length, 1, 1)) m = modalities.SymbolModality(model_hparams, vocab_size) @@ -69,7 +70,8 @@ def testSymbolModalityTargets(self): symbol_modality_skip_top=0, shared_embedding_and_softmax_weights=0, factored_logits=0, - mode=tf.estimator.ModeKeys.TRAIN) + mode=tf.estimator.ModeKeys.TRAIN, + prepend_mode="none") body_output = -1 + np.random.random_integers( 100, size=(batch_size, length, height, hidden_size)) targets = -1 + np.random.random_integers( @@ -104,7 +106,8 @@ def testSymbolModalityTargetsFactored(self): symbol_modality_skip_top=0, shared_embedding_and_softmax_weights=0, factored_logits=1, - mode=tf.estimator.ModeKeys.TRAIN) + mode=tf.estimator.ModeKeys.TRAIN, + prepend_mode="none") body_output = -1 + np.random.random_integers( 100, size=(batch_size, length, height, hidden_size)) targets = -1 + np.random.random_integers( From c0ce3dd24aacb5b632c44e13392b14b1dab7e978 Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Fri, 17 Nov 2017 15:32:35 -0800 Subject: [PATCH 0603/4095] Add support for multi-gpu training with tpu_trainer PiperOrigin-RevId: 176169403 --- tensor2tensor/tpu/tpu_trainer.py | 5 +- tensor2tensor/tpu/tpu_trainer_lib.py | 373 +++++++++++++--------- tensor2tensor/tpu/tpu_trainer_lib_test.py | 42 +-- 3 files changed, 248 insertions(+), 172 deletions(-) diff --git a/tensor2tensor/tpu/tpu_trainer.py b/tensor2tensor/tpu/tpu_trainer.py index 21ec970aa..2c4015469 100644 --- a/tensor2tensor/tpu/tpu_trainer.py +++ b/tensor2tensor/tpu/tpu_trainer.py @@ -69,7 +69,7 @@ def create_hparams(): def create_experiment_fn(): - return lib.make_experiment_fn( + return lib.create_experiment_fn( FLAGS.model, get_problem_name(), FLAGS.data_dir, @@ -88,6 +88,9 @@ def create_run_config(): log_device_placement=FLAGS.log_device_placement, save_checkpoints_steps=max(FLAGS.iterations_per_loop, FLAGS.local_eval_frequency), + num_gpus=FLAGS.worker_gpu, + gpu_order=FLAGS.gpu_order, + shard_to_cpu=FLAGS.locally_shard_to_cpu, use_tpu=FLAGS.use_tpu) diff --git a/tensor2tensor/tpu/tpu_trainer_lib.py b/tensor2tensor/tpu/tpu_trainer_lib.py index 49a8ea9b7..b2267319c 100644 --- a/tensor2tensor/tpu/tpu_trainer_lib.py +++ b/tensor2tensor/tpu/tpu_trainer_lib.py @@ -26,6 +26,7 @@ import six from tensor2tensor.utils import data_reader +from tensor2tensor.utils import expert_utils from tensor2tensor.utils import metrics from tensor2tensor.utils import optimize from tensor2tensor.utils import registry @@ -34,7 +35,7 @@ import tensorflow as tf -def create_dummy_vars(): +def _create_dummy_vars(): """Dummy vars for restore to work when not using TPU codepath.""" with tf.variable_scope("losses_avg"): with tf.variable_scope("problem_0"): @@ -45,90 +46,120 @@ def create_dummy_vars(): tf.get_variable("problem_0_steps", initializer=0, trainable=False) -def get_input_fn(mode, hparams): - """Get basic T2T input fn.""" - - def input_fn(params): - """Input fn.""" - is_training = mode == tf.estimator.ModeKeys.TRAIN - num_threads = 4 if is_training else 1 - if "batch_size" in params: - batch_size = params["batch_size"] - else: - batch_size = hparams.tpu_batch_size_per_shard - - def valid_size(example): - return data_reader.example_valid_size(example, hparams.min_length, - hparams.max_length) - - def define_shapes(example): - """Set the right shapes for the features.""" - inputs = example["inputs"] - targets = example["targets"] - - # Ensure inputs and targets are proper rank. - while len(inputs.get_shape()) < 4: - inputs = tf.expand_dims(inputs, axis=-1) - while len(targets.get_shape()) < 4: - targets = tf.expand_dims(targets, axis=-1) - - example["inputs"] = inputs - example["targets"] = targets - - # Ensure batch size is set on all features - for _, t in six.iteritems(example): - shape = t.get_shape().as_list() - shape[0] = batch_size - t.set_shape(t.get_shape().merge_with(shape)) - # Assert shapes are fully known - t.get_shape().assert_is_fully_defined() - - return example - - # Read and preprocess - problem = hparams.problem_instances[0] - data_dir = hparams.data_dir - dataset = problem.dataset( - mode=mode, data_dir=data_dir, num_threads=num_threads, hparams=hparams) - dataset = dataset.map( - data_reader.cast_int64_to_int32, num_threads=num_threads) - if is_training: - dataset = dataset.repeat(None) - - # Batch (and pad) - if are_shapes_fully_defined(dataset.output_shapes): - dataset = dataset.apply( - tf.contrib.data.batch_and_drop_remainder(batch_size)) - else: - # If shapes are not fully defined, filter out long ones and pad to - # hparams.max_length - dataset = dataset.filter(valid_size) - padded_shapes = fill_shape_nones( - dataset.output_shapes, none_filler=hparams.max_length) - if hasattr(tf.contrib.data, "padded_batch_and_drop_remainder"): - dataset = dataset.apply( - tf.contrib.data.padded_batch_and_drop_remainder( - batch_size, padded_shapes)) - else: - dataset = data_reader.padded_batch(dataset, batch_size, padded_shapes) +def _get_batch_size(params, hparams, config): + """Batch size determined by params dict, HParams, and RunConfig.""" + # If params specifies batch size, use that. TPUEstimator passes batch size in + # params. + batch_size = params and params.get("batch_size") + + # If not set, then we're running on CPU/GPU, so use the batch size from the + # hparams, and multiply by the number of data shards. + if not batch_size: + batch_size = hparams.tpu_batch_size_per_shard + if config: + batch_size *= config.t2t_device_info["num_shards"] + + return batch_size + + +def t2t_input_fn(problem, mode, hparams, params=None, config=None): + """Builds input pipeline for problem. + + Args: + problem: Problem to build input pipeline for + mode: tf.estimator.ModeKeys + hparams: HParams + params: dict, may include "batch_size" + config: RunConfig + + Returns: + (features_dict<str name, Tensor feature>, Tensor targets) + """ + is_training = mode == tf.estimator.ModeKeys.TRAIN + num_threads = 4 if is_training else 1 + + batch_size = _get_batch_size(params, hparams, config) + + def valid_size(example): + return data_reader.example_valid_size(example, hparams.min_length, + hparams.max_length) + + def define_shapes(example): + """Set the right shapes for the features.""" + inputs = example["inputs"] + targets = example["targets"] + + # Ensure inputs and targets are proper rank. + while len(inputs.get_shape()) < 4: + inputs = tf.expand_dims(inputs, axis=-1) + while len(targets.get_shape()) < 4: + targets = tf.expand_dims(targets, axis=-1) + + example["inputs"] = inputs + example["targets"] = targets + + # Ensure batch size is set on all features + for _, t in six.iteritems(example): + shape = t.get_shape().as_list() + shape[0] = batch_size + t.set_shape(t.get_shape().merge_with(shape)) + # Assert shapes are fully known + t.get_shape().assert_is_fully_defined() + + return example + + # Read and preprocess + data_dir = hparams.data_dir + dataset = problem.dataset( + mode=mode, data_dir=data_dir, num_threads=num_threads, hparams=hparams) + dataset = dataset.map( + data_reader.cast_int64_to_int32, num_threads=num_threads) + if is_training: + dataset = dataset.repeat(None) + + # Batch (and pad) + if _are_shapes_fully_defined(dataset.output_shapes): + dataset = dataset.apply( + tf.contrib.data.batch_and_drop_remainder(batch_size)) + else: + # If shapes are not fully defined, filter out long ones and pad to + # hparams.max_length + dataset = dataset.filter(valid_size) + padded_shapes = _fill_shape_nones( + dataset.output_shapes, none_filler=hparams.max_length) + dataset = dataset.apply( + tf.contrib.data.padded_batch_and_drop_remainder(batch_size, + padded_shapes)) - dataset = dataset.map(define_shapes, num_parallel_calls=num_threads) - dataset = dataset.prefetch(1) - features = dataset.make_one_shot_iterator().get_next() + dataset = dataset.map(define_shapes, num_parallel_calls=num_threads) + dataset = dataset.prefetch(1) + features = dataset.make_one_shot_iterator().get_next() - return features, features["targets"] + return features, features["targets"] - return input_fn +def get_input_fn(mode, hparams): + """Get input fn for Estimator. See input_fn.""" -def are_shapes_fully_defined(shapes_dict): + def wrapped_input_fn(params, config): + return t2t_input_fn( + hparams.problem_instances[0], + mode, + hparams, + params=params, + config=config) + + return wrapped_input_fn + + +def _are_shapes_fully_defined(shapes_dict): for shape in shapes_dict.values(): if not shape.is_fully_defined(): return False return True -def fill_shape_nones(shapes_dict, none_filler=None): +def _fill_shape_nones(shapes_dict, none_filler=None): padded_shapes = {} for key, shape in six.iteritems(shapes_dict): padded_shapes[key] = [ @@ -137,81 +168,122 @@ def fill_shape_nones(shapes_dict, none_filler=None): return padded_shapes -def get_model_fn(model_name, hp, use_tpu=True): - """Get simple T2T model fn.""" +def create_data_parallelism(num_gpus=1, + gpu_order="", + shard_to_cpu=False, + num_shards=1): + """Create Parallelism object.""" + gpus = list(range(num_gpus)) + if gpu_order: + gpus = [int(s) for s in gpu_order.split(" ")] + assert len(gpus) == num_gpus + data_shard_devices = ["gpu:%d" % i for i in gpus] + if shard_to_cpu or num_gpus < 1: + data_shard_devices += ["cpu:0"] + assert len(data_shard_devices) == num_shards + tf.logging.info("Data parallel devices: %s", data_shard_devices) + return expert_utils.Parallelism(data_shard_devices, reuse=True) + + +def t2t_model_fn(model_name, + hparams, + features, + labels, + mode, + config=None, + params=None, + use_tpu=True): + """Model fn. + + Args: + model_name: str, registered model name. + hparams: HParams + features: dict<str name, Tensor feature> + labels: Tensor + mode: tf.estimator.ModeKeys + config: RunConfig + params: dict, may include batch_size + use_tpu: bool, whether using TPU + + Returns: + EstimatorSpec or TPUEstimatorSpec + """ + _create_dummy_vars() + + hparams = copy.deepcopy(hparams) + problem = hparams.problem_instances[0] + problem_hp = hparams.problems[0] + + features["problem_choice"] = tf.constant(0) + features["input_space_id"] = tf.constant(problem_hp.input_space_id) + features["target_space_id"] = tf.constant(problem_hp.target_space_id) + + # Build and call model + data_parallelism = ( + expert_utils.Parallelism([""]) + if use_tpu else create_data_parallelism(**config.t2t_device_info)) + model = registry.model(model_name)( + hparams, mode, problem_hp, data_parallelism=data_parallelism) + sharded_logits, losses_dict = model.model_fn(features) + + # Set known shapes + logits = tf.concat(sharded_logits, 0) + shape = logits.get_shape().as_list() + if shape[0] is None: + shape[0] = _get_batch_size(params, hparams, config) + if shape[1] is None: + shape[1] = hparams.max_length + logits.set_shape(shape) + + # Accumulate losses + assert "training" in losses_dict + loss = sum(losses_dict.values()) + + if mode == tf.estimator.ModeKeys.EVAL: + if use_tpu: + eval_metrics_fn = create_eval_metrics_fn(problem, hparams) + _remove_summaries() + return tf.contrib.tpu.TPUEstimatorSpec( + mode, eval_metrics=(eval_metrics_fn, [logits, labels]), loss=loss) + else: + eval_metrics_fns = metrics.create_evaluation_metrics([problem], hparams) + eval_metrics = {} + for metric_name, metric_fn in six.iteritems(eval_metrics_fns): + eval_metrics[metric_name] = metric_fn(logits, features) - def model_fn(features, labels, mode, params, config): - """Model fn.""" - del config - create_dummy_vars() + return tf.estimator.EstimatorSpec( + mode, + predictions={"predictions": logits}, + eval_metric_ops=eval_metrics, + loss=loss) - hparams = copy.deepcopy(hp) - problem_hp = hparams.problems[0] - orig_features = features + assert mode == tf.estimator.ModeKeys.TRAIN - # Instantiate model and retrieve modalities. Note that autoregressive models - # have no input modality. - model = registry.model(model_name)(hparams, mode, problem_hp) + lr = hparams.learning_rate * optimize.learning_rate_decay(hparams) + train_op = optimize.optimize(loss, lr, hparams, use_tpu=use_tpu) - features["problem_choice"] = tf.constant(0) - features["input_space_id"] = tf.constant(problem_hp.input_space_id) - features["target_space_id"] = tf.constant(problem_hp.target_space_id) + if use_tpu: + _remove_summaries() # summaries not currently working on TPU + return tf.contrib.tpu.TPUEstimatorSpec(mode, loss=loss, train_op=train_op) + else: + return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op) - sharded_logits, losses_dict = model.model_fn(features) - assert len(sharded_logits) == 1 - logits, = sharded_logits - if use_tpu: - # Set known shapes - shape = logits.get_shape().as_list() - if shape[0] is None: - shape[0] = params["batch_size"] - if shape[1] is None: - shape[1] = hparams.max_length - logits.set_shape(shape) - - # Loss - loss_num, loss_den = problem_hp.target_modality.loss(logits, labels) - loss = loss_num / tf.maximum(1.0, loss_den) - - if losses_dict: - for loss_val in losses_dict.values(): - loss += loss_val - - if mode == tf.estimator.ModeKeys.EVAL: - problem = hp.problem_instances[0] - - if use_tpu: - eval_metrics_fn = create_eval_metrics_fn(problem, hparams) - _remove_summaries() - return tf.contrib.tpu.TPUEstimatorSpec( - mode, - eval_metrics=(eval_metrics_fn, [logits, orig_features["targets"]]), - loss=loss) - else: - eval_metrics_fns = metrics.create_evaluation_metrics([problem], hparams) - eval_metrics = {} - for metric_name, metric_fn in six.iteritems(eval_metrics_fns): - eval_metrics[metric_name] = metric_fn(logits, features) - - return tf.estimator.EstimatorSpec( - mode, - predictions={"predictions": logits}, - eval_metric_ops=eval_metrics, - loss=loss) - - assert mode == tf.estimator.ModeKeys.TRAIN - - lr = hparams.learning_rate * optimize.learning_rate_decay(hparams) - train_op = optimize.optimize(loss, lr, hparams, use_tpu=use_tpu) +def get_model_fn(model_name, hparams, use_tpu=True): + """Model fn for Estimator. See model_fn.""" - if use_tpu: - _remove_summaries() # summaries not currently working on TPU - return tf.contrib.tpu.TPUEstimatorSpec(mode, loss=loss, train_op=train_op) - else: - return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op) + def wrapping_model_fn(features, labels, mode, params, config): + return t2t_model_fn( + model_name, + hparams, + features, + labels, + mode, + config=config, + params=params, + use_tpu=use_tpu) - return model_fn + return wrapping_model_fn # These metrics are implemented with py_funcs and therefore do no work with TPU @@ -279,6 +351,9 @@ def create_run_config(master="", num_shards=8, log_device_placement=False, save_checkpoints_steps=1000, + num_gpus=1, + gpu_order="", + shard_to_cpu=False, use_tpu=True): """Create TPUConfig and tpu.RunConfig.""" session_config = tf.ConfigProto( @@ -291,6 +366,7 @@ def create_run_config(master="", } run_config_cls = tf.estimator.RunConfig + # If using TPU, use TPU RunConfig, add TPUConfig, and add additional args if use_tpu: run_config_cls = tf.contrib.tpu.RunConfig tpu_config = tf.contrib.tpu.TPUConfig( @@ -300,7 +376,18 @@ def create_run_config(master="", run_config_args["master"] = master run_config_args["tpu_config"] = tpu_config - return run_config_cls(**run_config_args) + config = run_config_cls(**run_config_args) + + # If not using TPU, add device info for data_parallelism + if not use_tpu: + config.t2t_device_info = { + "num_gpus": num_gpus, + "gpu_order": gpu_order, + "shard_to_cpu": shard_to_cpu, + "num_shards": max(1, num_gpus + int(shard_to_cpu)) + } + + return config def create_estimator(model_fn, run_config, batch_size=16, use_tpu=True): @@ -346,7 +433,7 @@ def create_experiment(run_config, train_steps_per_iteration=min_eval_frequency) -def make_experiment_fn(*args, **kwargs): +def create_experiment_fn(*args, **kwargs): """Wrapper for canonical experiment_fn. See create_experiment.""" def experiment_fn(run_config, hparams): diff --git a/tensor2tensor/tpu/tpu_trainer_lib_test.py b/tensor2tensor/tpu/tpu_trainer_lib_test.py index 24d26879d..1308c0990 100644 --- a/tensor2tensor/tpu/tpu_trainer_lib_test.py +++ b/tensor2tensor/tpu/tpu_trainer_lib_test.py @@ -22,7 +22,7 @@ # Dependency imports from tensor2tensor.tpu import tpu_trainer_lib as lib -from tensor2tensor.utils import trainer_utils +from tensor2tensor.utils import registry from tensor2tensor.utils import trainer_utils_test import tensorflow as tf @@ -34,33 +34,19 @@ class TpuTrainerTest(tf.test.TestCase): def setUpClass(cls): trainer_utils_test.TrainerUtilsTest.setUpClass() - def testSmoke(self): - data_dir = trainer_utils_test.TrainerUtilsTest.data_dir - problem_name = "tiny_algo" - model_name = "transformer" - hparams_set = "transformer_tpu" - - hparams = trainer_utils.create_hparams(hparams_set, data_dir) - trainer_utils.add_problem_hparams(hparams, problem_name) - - model_fn = lib.get_model_fn(model_name, hparams, use_tpu=False) - input_fn = lib.get_input_fn(tf.estimator.ModeKeys.TRAIN, hparams) - - params = {"batch_size": 16} - config = tf.contrib.tpu.RunConfig( - tpu_config=tf.contrib.tpu.TPUConfig(num_shards=2)) - features, targets = input_fn(params) - with tf.variable_scope("training"): - spec = model_fn(features, targets, tf.estimator.ModeKeys.TRAIN, params, - config) - - self.assertTrue(spec.loss is not None) - self.assertTrue(spec.train_op is not None) - - with tf.variable_scope("eval"): - spec = model_fn(features, targets, tf.estimator.ModeKeys.EVAL, params, - config) - self.assertTrue(spec.eval_metric_ops is not None) + def testExperiment(self): + exp_fn = lib.create_experiment_fn( + "transformer", + "tiny_algo", + trainer_utils_test.TrainerUtilsTest.data_dir, + train_steps=1, + eval_steps=1, + min_eval_frequency=1, + use_tpu=False) + run_config = lib.create_run_config(num_gpus=0, use_tpu=False) + hparams = registry.hparams("transformer_tiny_tpu")() + exp = exp_fn(run_config, hparams) + exp.test() if __name__ == "__main__": From 01b8c31da30a7e1109451df2b4b4698946c6c35c Mon Sep 17 00:00:00 2001 From: Lukasz Kaiser <lukaszkaiser@google.com> Date: Mon, 20 Nov 2017 13:31:27 -0800 Subject: [PATCH 0604/4095] CHECKPOINT BREAKING: make T2TModel a subclass of Layer so it can be called; all variables are now in model-name scope. PiperOrigin-RevId: 176407831 --- tensor2tensor/models/bluenet_test.py | 3 +- tensor2tensor/models/bytenet_test.py | 3 +- tensor2tensor/models/gene_expression_test.py | 5 +- tensor2tensor/models/lstm_test.py | 6 +- tensor2tensor/models/multimodel_test.py | 3 +- tensor2tensor/models/neural_gpu_test.py | 3 +- tensor2tensor/models/resnet_test.py | 3 +- tensor2tensor/models/slicenet_test.py | 3 +- tensor2tensor/models/transformer.py | 8 +- .../models/transformer_revnet_test.py | 3 +- tensor2tensor/models/transformer_test.py | 26 +++--- tensor2tensor/models/transformer_vae.py | 6 +- tensor2tensor/models/xception_test.py | 3 +- tensor2tensor/tpu/tpu_trainer_lib.py | 4 +- tensor2tensor/utils/model_builder.py | 2 +- tensor2tensor/utils/registry.py | 27 +++--- tensor2tensor/utils/t2t_model.py | 82 +++++++++++++------ tensor2tensor/utils/trainer_utils_test.py | 56 ++++++++++++- 18 files changed, 155 insertions(+), 91 deletions(-) diff --git a/tensor2tensor/models/bluenet_test.py b/tensor2tensor/models/bluenet_test.py index daf87529e..15f1f46e6 100644 --- a/tensor2tensor/models/bluenet_test.py +++ b/tensor2tensor/models/bluenet_test.py @@ -45,8 +45,7 @@ def testBlueNet(self): } model = bluenet.BlueNet( hparams, tf.estimator.ModeKeys.TRAIN, p_hparams) - sharded_logits, _ = model.model_fn(features) - logits = tf.concat(sharded_logits, 0) + logits, _ = model(features) session.run(tf.global_variables_initializer()) res = session.run(logits) self.assertEqual(res.shape, (3, 5, 1, 1, vocab_size)) diff --git a/tensor2tensor/models/bytenet_test.py b/tensor2tensor/models/bytenet_test.py index f96d3b999..8a19ae905 100644 --- a/tensor2tensor/models/bytenet_test.py +++ b/tensor2tensor/models/bytenet_test.py @@ -44,8 +44,7 @@ def testByteNet(self): } model = bytenet.ByteNet( hparams, tf.estimator.ModeKeys.TRAIN, p_hparams) - sharded_logits, _ = model.model_fn(features) - logits = tf.concat(sharded_logits, 0) + logits, _ = model(features) session.run(tf.global_variables_initializer()) res = session.run(logits) self.assertEqual(res.shape, (3, 50, 1, 1, vocab_size)) diff --git a/tensor2tensor/models/gene_expression_test.py b/tensor2tensor/models/gene_expression_test.py index ea02572d0..94cf20ff3 100644 --- a/tensor2tensor/models/gene_expression_test.py +++ b/tensor2tensor/models/gene_expression_test.py @@ -55,9 +55,8 @@ def _testModel(self, hparams, model_cls): "targets": tf.constant(targets, dtype=tf.float32), } p_hparams, = hparams.problems - sharded_logits, _ = model_cls(hparams, tf.estimator.ModeKeys.TRAIN, - p_hparams).model_fn(features) - logits = tf.concat(sharded_logits, 0) + logits, _ = model_cls( + hparams, tf.estimator.ModeKeys.TRAIN, p_hparams)(features) with self.test_session() as sess: sess.run(tf.global_variables_initializer()) diff --git a/tensor2tensor/models/lstm_test.py b/tensor2tensor/models/lstm_test.py index b8be74f23..863518fa1 100644 --- a/tensor2tensor/models/lstm_test.py +++ b/tensor2tensor/models/lstm_test.py @@ -44,8 +44,7 @@ def testLSTMSeq2Seq(self): } model = lstm.LSTMSeq2seq(hparams, tf.estimator.ModeKeys.TRAIN, p_hparams) - sharded_logits, _ = model.model_fn(features) - logits = tf.concat(sharded_logits, 0) + logits, _ = model(features) session.run(tf.global_variables_initializer()) res = session.run(logits) self.assertEqual(res.shape, (3, 6, 1, 1, vocab_size)) @@ -67,8 +66,7 @@ def testLSTMSeq2SeqAttention(self): } model = lstm.LSTMSeq2seqAttention( hparams, tf.estimator.ModeKeys.TRAIN, p_hparams) - sharded_logits, _ = model.model_fn(features) - logits = tf.concat(sharded_logits, 0) + logits, _ = model(features) session.run(tf.global_variables_initializer()) res = session.run(logits) self.assertEqual(res.shape, (3, 6, 1, 1, vocab_size)) diff --git a/tensor2tensor/models/multimodel_test.py b/tensor2tensor/models/multimodel_test.py index 3aff41029..86f92ced6 100644 --- a/tensor2tensor/models/multimodel_test.py +++ b/tensor2tensor/models/multimodel_test.py @@ -48,8 +48,7 @@ def testMultiModel(self): } model = multimodel.MultiModel( hparams, tf.estimator.ModeKeys.TRAIN, p_hparams) - sharded_logits, _ = model.model_fn(features) - logits = tf.concat(sharded_logits, 0) + logits, _ = model(features) session.run(tf.global_variables_initializer()) res = session.run(logits) self.assertEqual(res.shape, (3, 1, 1, 1, 10)) diff --git a/tensor2tensor/models/neural_gpu_test.py b/tensor2tensor/models/neural_gpu_test.py index 75149ddd5..99b7f1062 100644 --- a/tensor2tensor/models/neural_gpu_test.py +++ b/tensor2tensor/models/neural_gpu_test.py @@ -52,8 +52,7 @@ def testNeuralGPU(self): } model = neural_gpu.NeuralGPU(hparams, tf.estimator.ModeKeys.TRAIN, p_hparams) - shadred_logits, _ = model.model_fn(features) - logits = tf.concat(shadred_logits, 0) + logits, _ = model(features) session.run(tf.global_variables_initializer()) res = session.run(logits) self.assertEqual(res.shape, (batch_size, target_length, 1, 1, diff --git a/tensor2tensor/models/resnet_test.py b/tensor2tensor/models/resnet_test.py index 9db4cb85f..d911dcbd7 100644 --- a/tensor2tensor/models/resnet_test.py +++ b/tensor2tensor/models/resnet_test.py @@ -56,8 +56,7 @@ def _testResnet(self, img_size, output_size): "targets": tf.constant(y, dtype=tf.int32), } model = resnet.Resnet50(hparams, tf.estimator.ModeKeys.TRAIN, p_hparams) - sharded_logits, _ = model.model_fn(features) - logits = tf.concat(sharded_logits, 0) + logits, _ = model(features) session.run(tf.global_variables_initializer()) res = session.run(logits) self.assertEqual(res.shape, (batch_size,) + output_size + (1, vocab_size)) diff --git a/tensor2tensor/models/slicenet_test.py b/tensor2tensor/models/slicenet_test.py index faf028737..7efdf7a33 100644 --- a/tensor2tensor/models/slicenet_test.py +++ b/tensor2tensor/models/slicenet_test.py @@ -49,8 +49,7 @@ def testSliceNet(self): } model = slicenet.SliceNet(hparams, tf.estimator.ModeKeys.TRAIN, p_hparams) - sharded_logits, _ = model.model_fn(features) - logits = tf.concat(sharded_logits, 0) + logits, _ = model(features) session.run(tf.global_variables_initializer()) res = session.run(logits) self.assertEqual(res.shape, (3, 1, 1, 1, 10)) diff --git a/tensor2tensor/models/transformer.py b/tensor2tensor/models/transformer.py index 588b6154c..8745dc00b 100644 --- a/tensor2tensor/models/transformer.py +++ b/tensor2tensor/models/transformer.py @@ -158,7 +158,8 @@ def _greedy_infer(self, features, decode_length): Raises: NotImplementedError: If there are multiple data shards. """ - decoded_ids, _ = self._fast_decode(features, decode_length) + with tf.variable_scope(self.name): + decoded_ids, _ = self._fast_decode(features, decode_length) return decoded_ids, None, None def _beam_decode(self, features, decode_length, beam_size, top_beams, alpha): @@ -175,8 +176,9 @@ def _beam_decode(self, features, decode_length, beam_size, top_beams, alpha): Returns: samples: an integer `Tensor`. Top samples from the beam search """ - decoded_ids, scores = self._fast_decode(features, decode_length, beam_size, - top_beams, alpha) + with tf.variable_scope(self.name): + decoded_ids, scores = self._fast_decode( + features, decode_length, beam_size, top_beams, alpha) return {"outputs": decoded_ids, "scores": scores} def _fast_decode(self, diff --git a/tensor2tensor/models/transformer_revnet_test.py b/tensor2tensor/models/transformer_revnet_test.py index f61b88b5b..79f8eb1e0 100644 --- a/tensor2tensor/models/transformer_revnet_test.py +++ b/tensor2tensor/models/transformer_revnet_test.py @@ -59,8 +59,7 @@ def testTransformer(self): } model = transformer_revnet.TransformerRevnet( hparams, tf.estimator.ModeKeys.TRAIN, p_hparams) - sharded_logits, _ = model.model_fn(features) - logits = tf.concat(sharded_logits, 0) + logits, _ = model(features) grads = tf.gradients( tf.reduce_mean(logits), [features["inputs"]] + tf.global_variables()) grads = [g for g in grads if g is not None] diff --git a/tensor2tensor/models/transformer_test.py b/tensor2tensor/models/transformer_test.py index ae254a42d..a0c21e2c0 100644 --- a/tensor2tensor/models/transformer_test.py +++ b/tensor2tensor/models/transformer_test.py @@ -51,17 +51,16 @@ def getModel(self, hparams, mode=tf.estimator.ModeKeys.TRAIN): targets = -1 + np.random.random_integers( VOCAB_SIZE, size=(BATCH_SIZE, TARGET_LENGTH, 1, 1)) features = { - "inputs": tf.constant(inputs, dtype=tf.int32), - "targets": tf.constant(targets, dtype=tf.int32), - "target_space_id": tf.constant(1, dtype=tf.int32), + "inputs": tf.constant(inputs, dtype=tf.int32, name="inputs"), + "targets": tf.constant(targets, dtype=tf.int32, name="targets"), + "target_space_id": tf.constant(1, dtype=tf.int32) } return transformer.Transformer(hparams, mode, p_hparams), features def testTransformer(self): model, features = self.getModel(transformer.transformer_small()) - shadred_logits, _ = model.model_fn(features) - logits = tf.concat(shadred_logits, 0) + logits, _ = model(features) with self.test_session() as session: session.run(tf.global_variables_initializer()) res = session.run(logits) @@ -69,8 +68,7 @@ def testTransformer(self): def testTransformerRelative(self): model, features = self.getModel(transformer.transformer_relative_tiny()) - shadred_logits, _ = model.model_fn(features) - logits = tf.concat(shadred_logits, 0) + logits, _ = model(features) with self.test_session() as session: session.run(tf.global_variables_initializer()) res = session.run(logits) @@ -81,8 +79,8 @@ def testGreedyVsFast(self): decode_length = 2 - out_logits, _ = model.model_fn(features) - out_logits = tf.squeeze(out_logits[0], axis=[2, 3]) + out_logits, _ = model(features) + out_logits = tf.squeeze(out_logits, axis=[2, 3]) loss = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=tf.reshape(out_logits, [-1, VOCAB_SIZE]), labels=tf.reshape(features["targets"], [-1])) @@ -94,8 +92,7 @@ def testGreedyVsFast(self): for _ in range(100): apply_grad.run() - model, _ = self.getModel(transformer.transformer_small(), - mode=tf.estimator.ModeKeys.PREDICT) + model.set_mode(tf.estimator.ModeKeys.PREDICT) with tf.variable_scope(tf.get_variable_scope(), reuse=True): greedy_result, _, _ = model._slow_greedy_infer(features, decode_length) @@ -115,8 +112,8 @@ def testBeamVsFast(self): decode_length = 2 - out_logits, _ = model.model_fn(features) - out_logits = tf.squeeze(out_logits[0], axis=[2, 3]) + out_logits, _ = model(features) + out_logits = tf.squeeze(out_logits, axis=[2, 3]) loss = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=tf.reshape(out_logits, [-1, VOCAB_SIZE]), labels=tf.reshape(features["targets"], [-1])) @@ -128,8 +125,7 @@ def testBeamVsFast(self): for _ in range(100): apply_grad.run() - model, _ = self.getModel(transformer.transformer_small(), - mode=tf.estimator.ModeKeys.PREDICT) + model.set_mode(tf.estimator.ModeKeys.PREDICT) with tf.variable_scope(tf.get_variable_scope(), reuse=True): beam_result = model._beam_decode_slow( diff --git a/tensor2tensor/models/transformer_vae.py b/tensor2tensor/models/transformer_vae.py index ad5143095..caea3ff59 100644 --- a/tensor2tensor/models/transformer_vae.py +++ b/tensor2tensor/models/transformer_vae.py @@ -654,9 +654,9 @@ def infer(self, features=None, decode_length=50, beam_size=1, top_beams=1, dtype=tf.int64) features["targets"] = initial_output - sharded_logits, _ = self.model_fn(features, False, force_full_predict=True) - sharded_samples = self._data_parallelism(tf.argmax, sharded_logits, 4) - samples = tf.concat(sharded_samples, 0) + logits, _ = self.__call__( + features, skip=False, force_full_predict=True) + samples = tf.argmax(logits, axis=-1) if inputs_old is not None: # Restore to not confuse Estimator. features["inputs"] = inputs_old diff --git a/tensor2tensor/models/xception_test.py b/tensor2tensor/models/xception_test.py index e02057c10..cb4e3544e 100644 --- a/tensor2tensor/models/xception_test.py +++ b/tensor2tensor/models/xception_test.py @@ -48,8 +48,7 @@ def _testXception(self, img_size, output_size): "targets": tf.constant(y, dtype=tf.int32), } model = xception.Xception(hparams, tf.estimator.ModeKeys.TRAIN, p_hparams) - sharded_logits, _ = model.model_fn(features) - logits = tf.concat(sharded_logits, 0) + logits, _ = model(features) session.run(tf.global_variables_initializer()) res = session.run(logits) self.assertEqual(res.shape, output_size + (1, vocab_size)) diff --git a/tensor2tensor/tpu/tpu_trainer_lib.py b/tensor2tensor/tpu/tpu_trainer_lib.py index b2267319c..65618fc1b 100644 --- a/tensor2tensor/tpu/tpu_trainer_lib.py +++ b/tensor2tensor/tpu/tpu_trainer_lib.py @@ -209,7 +209,6 @@ def t2t_model_fn(model_name, EstimatorSpec or TPUEstimatorSpec """ _create_dummy_vars() - hparams = copy.deepcopy(hparams) problem = hparams.problem_instances[0] problem_hp = hparams.problems[0] @@ -224,10 +223,9 @@ def t2t_model_fn(model_name, if use_tpu else create_data_parallelism(**config.t2t_device_info)) model = registry.model(model_name)( hparams, mode, problem_hp, data_parallelism=data_parallelism) - sharded_logits, losses_dict = model.model_fn(features) + logits, losses_dict = model(features) # Set known shapes - logits = tf.concat(sharded_logits, 0) shape = logits.get_shape().as_list() if shape[0] is None: shape[0] = _get_batch_size(params, hparams, config) diff --git a/tensor2tensor/utils/model_builder.py b/tensor2tensor/utils/model_builder.py index 6bef72b0c..13ebaa91e 100644 --- a/tensor2tensor/utils/model_builder.py +++ b/tensor2tensor/utils/model_builder.py @@ -127,7 +127,7 @@ def nth_model(n): if eval_run_autoregressive and mode == tf.estimator.ModeKeys.EVAL: sharded_logits, losses_dict = model_class.eval_autoregressive(features) else: - sharded_logits, losses_dict = model_class.model_fn( + sharded_logits, losses_dict = model_class( features, skip=(skipping_is_on and skip_this_one)) with tf.variable_scope("losses_avg"): total_loss, ops = 0.0, [] diff --git a/tensor2tensor/utils/registry.py b/tensor2tensor/utils/registry.py index e3f3787f6..e21702251 100644 --- a/tensor2tensor/utils/registry.py +++ b/tensor2tensor/utils/registry.py @@ -90,7 +90,7 @@ def _reset(): ctr.clear() -def _default_name(obj_class): +def default_name(obj_class): """Convert a class name to the registry's default name for the class. Args: @@ -99,7 +99,6 @@ def _default_name(obj_class): Returns: The registry's default name for the class. """ - return _convert_camel_to_snake(obj_class.__name__) @@ -112,8 +111,7 @@ def default_object_name(obj): Returns: The registry's default name for the class of the object. """ - - return _default_name(obj.__class__) + return default_name(obj.__class__) def register_model(name=None): @@ -121,16 +119,17 @@ def register_model(name=None): def decorator(model_cls, registration_name=None): """Registers & returns model_cls with registration_name or default name.""" - model_name = registration_name or _default_name(model_cls) + model_name = registration_name or default_name(model_cls) if model_name in _MODELS: raise LookupError("Model %s already registered." % model_name) + model_cls.REGISTERED_NAME = property(lambda _: model_name) _MODELS[model_name] = model_cls return model_cls # Handle if decorator was used without parens if callable(name): model_cls = name - return decorator(model_cls, registration_name=_default_name(model_cls)) + return decorator(model_cls, registration_name=default_name(model_cls)) return lambda model_cls: decorator(model_cls, name) @@ -150,7 +149,7 @@ def register_hparams(name=None): def decorator(hp_fn, registration_name=None): """Registers & returns hp_fn with registration_name or default name.""" - hp_name = registration_name or _default_name(hp_fn) + hp_name = registration_name or default_name(hp_fn) if hp_name in _HPARAMS: raise LookupError("HParams set %s already registered." % hp_name) _HPARAMS[hp_name] = hp_fn @@ -159,7 +158,7 @@ def decorator(hp_fn, registration_name=None): # Handle if decorator was used without parens if callable(name): hp_fn = name - return decorator(hp_fn, registration_name=_default_name(hp_fn)) + return decorator(hp_fn, registration_name=default_name(hp_fn)) return lambda hp_fn: decorator(hp_fn, name) @@ -182,7 +181,7 @@ def register_ranged_hparams(name=None): def decorator(rhp_fn, registration_name=None): """Registers & returns hp_fn with registration_name or default name.""" - rhp_name = registration_name or _default_name(rhp_fn) + rhp_name = registration_name or default_name(rhp_fn) if rhp_name in _RANGED_HPARAMS: raise LookupError("RangedHParams set %s already registered." % rhp_name) # Check that the fn takes a single argument @@ -197,7 +196,7 @@ def decorator(rhp_fn, registration_name=None): # Handle if decorator was used without parens if callable(name): rhp_fn = name - return decorator(rhp_fn, registration_name=_default_name(rhp_fn)) + return decorator(rhp_fn, registration_name=default_name(rhp_fn)) return lambda rhp_fn: decorator(rhp_fn, name) @@ -217,7 +216,7 @@ def register_problem(name=None): def decorator(p_cls, registration_name=None): """Registers & returns p_cls with registration_name or default name.""" - p_name = registration_name or _default_name(p_cls) + p_name = registration_name or default_name(p_cls) if p_name in _PROBLEMS: raise LookupError("Problem %s already registered." % p_name) @@ -228,7 +227,7 @@ def decorator(p_cls, registration_name=None): # Handle if decorator was used without parens if callable(name): p_cls = name - return decorator(p_cls, registration_name=_default_name(p_cls)) + return decorator(p_cls, registration_name=default_name(p_cls)) return lambda p_cls: decorator(p_cls, name) @@ -313,7 +312,7 @@ def _internal_register_modality(name, mod_collection, collection_str): def decorator(mod_cls, registration_name=None): """Registers & returns mod_cls with registration_name or default name.""" - mod_name = registration_name or _default_name(mod_cls) + mod_name = registration_name or default_name(mod_cls) if mod_name in mod_collection: raise LookupError("%s modality %s already registered." % (collection_str, mod_name)) @@ -323,7 +322,7 @@ def decorator(mod_cls, registration_name=None): # Handle if decorator was used without parens if callable(name): mod_cls = name - return decorator(mod_cls, registration_name=_default_name(mod_cls)) + return decorator(mod_cls, registration_name=default_name(mod_cls)) return lambda mod_cls: decorator(mod_cls, name) diff --git a/tensor2tensor/utils/t2t_model.py b/tensor2tensor/utils/t2t_model.py index 02c2b8a7d..186b4348f 100644 --- a/tensor2tensor/utils/t2t_model.py +++ b/tensor2tensor/utils/t2t_model.py @@ -34,6 +34,8 @@ import tensorflow as tf +from tensorflow.python.layers import base + def _with_timing(fn, msg): @@ -54,16 +56,17 @@ def is_class_modality(mod): return mod.name[:len(prefix)] == prefix -class T2TModel(object): +class T2TModel(base.Layer): """Abstract base class for models. Subclassess generally only need to override `build_model`. """ + REGISTERED_NAME = None # Updated on registration. def __init__(self, hparams, mode, - problem_hparams, + problem_hparams=None, problem_idx=0, data_parallelism=None, ps_devices=None, @@ -83,18 +86,20 @@ def __init__(self, Returns: a T2TModel """ + # Determine name first: use registered name if possible, class name else. + default_name = registry.default_name(type(self)) + name = self.REGISTERED_NAME or default_name + super(T2TModel, self).__init__( + trainable=mode == tf.estimator.ModeKeys.TRAIN, name=name) if data_parallelism is None: data_parallelism = eu.Parallelism([""]) if ps_devices is None: ps_devices = [""] - hparams = copy.copy(hparams) - hparams.add_hparam("mode", mode) - # When not in training mode, set all forms of dropout to zero. - if mode != tf.estimator.ModeKeys.TRAIN: - for key in hparams.values(): - if key[-len("dropout"):] == "dropout": - setattr(hparams, key, 0.0) + if problem_hparams is None: + problem_hparams = hparams.problems[0] + # If vocabularies differ, unset shared_embedding_and_softmax_weights. + hparams = copy.copy(hparams) if hparams.shared_embedding_and_softmax_weights: same_vocab_sizes = True for problem in hparams.problems: @@ -104,7 +109,8 @@ def __init__(self, if not same_vocab_sizes: tf.logging.info("Unsetting shared_embedding_and_softmax_weights.") hparams.shared_embedding_and_softmax_weights = 0 - self._hparams = hparams + self._original_hparams = hparams + self.set_mode(mode) self._decode_hparams = copy.copy(decode_hparams) self._data_parallelism = data_parallelism self._num_datashards = data_parallelism.n @@ -113,6 +119,17 @@ def __init__(self, self._problem_idx = problem_idx self._create_modalities(problem_hparams, hparams) + def set_mode(self, mode): + """Set hparams with the given mode.""" + hparams = copy.copy(self._original_hparams) + hparams.add_hparam("mode", mode) + # When not in training mode, set all forms of dropout to zero. + if mode != tf.estimator.ModeKeys.TRAIN: + for key in hparams.values(): + if key[-len("dropout"):] == "dropout": + setattr(hparams, key, 0.0) + self._hparams = hparams + def _create_modalities(self, problem_hparams, hparams): """Construct modalities in problem_hparams.""" @@ -207,8 +224,8 @@ def infer(self, samples, _, _ = self._greedy_infer(features, decode_length) else: tf.logging.info("Beam Decoding with beam size %d" % beam_size) - samples = self._beam_decode(features, decode_length, beam_size, top_beams, - alpha) + samples = self._beam_decode( + features, decode_length, beam_size, top_beams, alpha) return samples def _beam_decode(self, features, decode_length, beam_size, top_beams, alpha): @@ -263,11 +280,10 @@ def symbols_to_logits_fn(ids): features["targets"] = ids self._coverage = None - sharded_logits, _ = self.model_fn(features, False) + logits, _ = self.__call__(features) # now self._coverage is a coverage tensor for the first datashard. # it has shape [batch_size] and contains floats between 0 and # source_length. - logits = sharded_logits[0] # Assuming we have one shard. modality = self._hparams.problems[self._problem_idx].target_modality if modality.top_is_pointwise: return tf.squeeze(logits, axis=[1, 2, 3]) @@ -384,7 +400,7 @@ def infer_step(recent_output, recent_logits, unused_loss): samples.set_shape([None, None, None, 1]) # Assuming we have one shard for logits. - logits = tf.concat([recent_logits, logits[0][:, -1:]], 1) + logits = tf.concat([recent_logits, logits[:, -1:]], 1) loss = sum([l for l in losses.values() if l is not None]) return samples, logits, loss @@ -477,13 +493,13 @@ def sample(self, features): logits: a list of `Tensor`s, one per datashard. losses: a dictionary: {loss-name (string): floating point `Scalar`}. """ - sharded_logits, losses = self.model_fn(features, False) + logits, losses = self.__call__(features) if self._hparams.sampling_method == "argmax": - sharded_samples = self._data_parallelism(tf.argmax, sharded_logits, 4) + samples = tf.argmax(logits, axis=-1) else: assert self._hparams.sampling_method == "random" - def _multinomial_squeeze(logits, temperature=1.0): + def multinomial_squeeze(logits, temperature=1.0): logits_shape = common_layers.shape_list(logits) reshaped_logits = ( tf.reshape(logits, [-1, logits_shape[-1]]) / temperature) @@ -491,9 +507,9 @@ def _multinomial_squeeze(logits, temperature=1.0): choices = tf.reshape(choices, logits_shape[:-1]) return choices - sharded_samples = self._data_parallelism( - _multinomial_squeeze, sharded_logits, self._hparams.sampling_temp) - return tf.concat(sharded_samples, 0), sharded_logits, losses + samples = multinomial_squeeze(logits, self._hparams.sampling_temp) + + return samples, logits, losses def _shard_features(self, features): # pylint: disable=missing-docstring sharded_features = dict() @@ -502,13 +518,12 @@ def _shard_features(self, features): # pylint: disable=missing-docstring if not v.shape.as_list(): v = tf.expand_dims(v, axis=-1) v = tf.tile(v, [self._num_datashards]) - sharded_features[k] = self._data_parallelism(tf.identity, - tf.split( - v, self._num_datashards, - 0)) + sharded_features[k] = self._data_parallelism( + tf.identity, + tf.split(v, self._num_datashards, 0)) return sharded_features - def model_fn(self, features, skip=False, force_full_predict=False): + def _model_fn(self, features, skip=False, force_full_predict=False): """Computes the entire model and produces sharded logits and losses. Args: @@ -662,6 +677,21 @@ def sampled_results(): tf.logging.info("This model_fn took %.3f sec." % (time.time() - start_time)) return sharded_logits, losses + def call(self, inputs_dict, skip=False, force_full_predict=False): + problem_hparams = self._problem_hparams + if "problem_choice" not in inputs_dict: + inputs_dict["problem_choice"] = tf.constant( + self._problem_idx, name="problem_choice") + if "input_space_id" not in inputs_dict: + inputs_dict["input_space_id"] = tf.constant( + problem_hparams.input_space_id, name="input_space_id") + if "target_space_id" not in inputs_dict: + inputs_dict["target_space_id"] = tf.constant( + problem_hparams.target_space_id, name="target_space_id") + sharded_logits, losses = self._model_fn( + inputs_dict, skip=skip, force_full_predict=force_full_predict) + return tf.concat(sharded_logits, 0), losses + def model_fn_body_sharded(self, sharded_features): """Mixture-of-experts models will override this function. diff --git a/tensor2tensor/utils/trainer_utils_test.py b/tensor2tensor/utils/trainer_utils_test.py index d8dee3986..bd7367766 100644 --- a/tensor2tensor/utils/trainer_utils_test.py +++ b/tensor2tensor/utils/trainer_utils_test.py @@ -124,9 +124,9 @@ def testSingleEvalStepRawSession(self): features = { "inputs": batch_inputs, "targets": batch_targets, - "problem_choice": 0, # We run on the first problem here. - "input_space_id": hparams.problems[0].input_space_id, - "target_space_id": hparams.problems[0].target_space_id + "problem_choice": tf.constant(0), # We run on the first problem here. + "input_space_id": tf.constant(hparams.problems[0].input_space_id), + "target_space_id": tf.constant(hparams.problems[0].target_space_id) } # Now set a mode and create the graph by invoking model_fn. @@ -153,6 +153,56 @@ def testSingleEvalStepRawSession(self): # where, for us, batch = 1, length = 3, vocab_size = 4. self.assertEqual(np_predictions.shape, (1, 3, 4)) + def testSingleTrainStepCall(self): + """Illustrate how to run a T2T model in a raw session.""" + + # Set model name, hparams, problems as would be set on command line. + model_name = "transformer" + FLAGS.hparams_set = "transformer_test" + FLAGS.problems = "tiny_algo" + data_dir = "/tmp" # Used only when a vocab file or such like is needed. + + # Create the problem object, hparams, placeholders, features dict. + encoders = registry.problem(FLAGS.problems).feature_encoders(data_dir) + hparams = trainer_utils.create_hparams(FLAGS.hparams_set, data_dir) + trainer_utils.add_problem_hparams(hparams, FLAGS.problems) + + # Now set a mode and create the model. + mode = tf.estimator.ModeKeys.TRAIN + model = registry.model(model_name)(hparams, mode) + + # Create placeholder for features and make them batch-sized. + inputs_ph = tf.placeholder(dtype=tf.int32) # Just length dimension. + batch_inputs = tf.reshape(inputs_ph, [1, -1, 1, 1]) # Make it 4D. + targets_ph = tf.placeholder(dtype=tf.int32) # Just length dimension. + batch_targets = tf.reshape(targets_ph, [1, -1, 1, 1]) # Make it 4D. + features = { + "inputs": batch_inputs, + "targets": batch_targets, + "target_space_id": tf.constant(hparams.problems[0].target_space_id) + } + + # Call the model. + predictions, _ = model(features) + nvars = len(tf.trainable_variables()) + model(features) # Call again and check that reuse works. + self.assertEqual(nvars, len(tf.trainable_variables())) + + # Having the graph, let's run it on some data. + with self.test_session() as sess: + sess.run(tf.global_variables_initializer()) + inputs = "0 1 0" + targets = "0 1 0" + # Encode from raw string to numpy input array using problem encoders. + inputs_numpy = encoders["inputs"].encode(inputs) + targets_numpy = encoders["targets"].encode(targets) + # Feed the encoded inputs and targets and run session. + feed = {inputs_ph: inputs_numpy, targets_ph: targets_numpy} + np_predictions = sess.run(predictions, feed) + # Check that the result has the correct shape: batch x length x vocab_size + # where, for us, batch = 1, length = 3, vocab_size = 4. + self.assertEqual(np_predictions.shape, (1, 3, 1, 1, 4)) + if __name__ == "__main__": tf.test.main() From 214572992bec848131c27158123067a6b64414f8 Mon Sep 17 00:00:00 2001 From: Lukasz Kaiser <lukaszkaiser@google.com> Date: Tue, 21 Nov 2017 17:20:18 -0800 Subject: [PATCH 0605/4095] Remove unused functions from transformer_vae. PiperOrigin-RevId: 176584099 --- tensor2tensor/models/transformer_sketch.py | 2 +- tensor2tensor/models/transformer_vae.py | 205 +-------------------- 2 files changed, 6 insertions(+), 201 deletions(-) diff --git a/tensor2tensor/models/transformer_sketch.py b/tensor2tensor/models/transformer_sketch.py index b6bbb7708..7ef78bc59 100644 --- a/tensor2tensor/models/transformer_sketch.py +++ b/tensor2tensor/models/transformer_sketch.py @@ -47,7 +47,7 @@ def encode(self, inputs, target_space, hparams): name="small_image_conv") hparams.num_compress_steps = 2 - compressed_inputs = transformer_vae.compress(inputs, c=None, is_2d=True, + compressed_inputs = transformer_vae.compress(inputs, is_2d=True, hparams=hparams, name="convolutions") diff --git a/tensor2tensor/models/transformer_vae.py b/tensor2tensor/models/transformer_vae.py index caea3ff59..e7fa128ff 100644 --- a/tensor2tensor/models/transformer_vae.py +++ b/tensor2tensor/models/transformer_vae.py @@ -23,7 +23,6 @@ from six.moves import xrange # pylint: disable=redefined-builtin -from tensor2tensor.layers import common_attention from tensor2tensor.layers import common_layers from tensor2tensor.models import transformer from tensor2tensor.utils import expert_utils @@ -50,34 +49,10 @@ def residual_conv(x, repeat, k, hparams, name, reuse=None): return x -def attend(x, source, hparams, name): - with tf.variable_scope(name): - x = tf.squeeze(x, axis=2) - if len(source.get_shape()) > 3: - source = tf.squeeze(source, axis=2) - source = common_attention.add_timing_signal_1d(source) - y = common_attention.multihead_attention( - common_layers.layer_preprocess(x, hparams), source, None, - hparams.attention_key_channels or hparams.hidden_size, - hparams.attention_value_channels or hparams.hidden_size, - hparams.hidden_size, hparams.num_heads, - hparams.attention_dropout) - res = common_layers.layer_postprocess(x, y, hparams) - return tf.expand_dims(res, axis=2) - - -def interleave(x, y, axis=1): - x = tf.expand_dims(x, axis=axis+1) - y = tf.expand_dims(y, axis=axis+1) - return tf.concat([x, y], axis=axis+1) - - -def decompress_step(source, c, hparams, first_relu, is_2d, name): +def decompress_step(source, hparams, first_relu, is_2d, name): """Decompression function.""" with tf.variable_scope(name): shape = tf.shape(source) - if c is not None: - source = attend(source, c, hparams, "decompress_attend") multiplier = 4 if is_2d else 2 kernel = (1, 1) if is_2d else (1, 1) thicker = common_layers.conv_block( @@ -162,38 +137,6 @@ def vae(x, z_size, name): return z, tf.reduce_mean(kl), mu, log_sigma -def bit_vae(x, hparams, name): - with tf.variable_scope(name): - bity = tf.layers.dense(x, hparams.z_size, name="bity") - dev = common_layers.inverse_lin_decay(hparams.startup_steps) * 1.5 - noise = tf.random_normal(tf.shape(bity), mean=0.0, stddev=dev) - y = common_layers.saturating_sigmoid(bity + noise) - tf.summary.histogram("bit", tf.reshape(y, [-1])) - def discrete_y(): - d = tf.to_float(tf.less(0.5, y)) - return tf.stop_gradient(d) + y - tf.stop_gradient(y) - y = tf.cond(tf.less(tf.train.get_global_step(), hparams.startup_steps), - lambda: y, discrete_y) - # Flatten and predict for loss. - y_flat = tf.reshape(y, [-1, hparams.z_size, 1, 1]) - hsize = hparams.hidden_size - hparams.hidden_size = hsize // 2 - emb0 = tf.get_variable("emb0", [hparams.hidden_size]) - emb1 = tf.get_variable("emb1", [hparams.hidden_size]) - emb0 = tf.reshape(emb0, [1, 1, 1, hparams.hidden_size]) - emb1 = tf.reshape(emb0, [1, 1, 1, hparams.hidden_size]) - y_emb = y_flat * emb1 + (1 - y_flat) * emb0 - y_logit = decode(None, None, y_emb, None, None, hparams, "dbit") - hparams.hidden_size = hsize - y_pred = tf.nn.log_softmax(tf.layers.dense(y_logit, 2, name="y_pred")) - y_flat = tf.reshape(y_flat, [-1]) - y_pred = tf.reshape(y_pred, [-1, 2]) - loss = - (y_flat * y_pred[:, 1] + (1 - y_flat) * y_pred[:, 0]) - # Get the final z and return. - z = tf.layers.dense(y, hparams.z_size, name="after_bit") - return z, tf.reduce_mean(loss) - - def nearest(x, means, hparams): """Find the nearest means to elements in x.""" x, means = tf.stop_gradient(x), tf.stop_gradient(means) @@ -294,7 +237,7 @@ def embed(x): return res, c, l, embed -def compress(x, c, is_2d, hparams, name): +def compress(x, is_2d, hparams, name): """Compress.""" with tf.variable_scope(name): # Run compression by strided convs. @@ -303,28 +246,12 @@ def compress(x, c, is_2d, hparams, name): cur = residual_conv(cur, hparams.num_compress_steps, k1, hparams, "rc") k2 = (2, 2) if is_2d else (2, 1) for i in xrange(hparams.num_compress_steps): - if c is not None: - cur = attend(cur, c, hparams, "compress_attend_%d" % i) cur = common_layers.conv_block( cur, hparams.hidden_size, [((1, 1), k2)], strides=k2, name="compress_%d" % i) return cur -def mix(x1, x2, steps, min_prob=0.0, max_prob=1.0, mode="lin", simple=False): - """Mix starting with x2, mixing mixing, going towards x1.""" - if mode == "lin": - alpha_p = common_layers.inverse_lin_decay(steps) - else: - alpha_p = common_layers.inverse_exp_decay(steps) - alpha_p = alpha_p * (max_prob - min_prob) + min_prob - if simple: - return alpha_p * x1 + (1.0 - alpha_p) * x2 - alpha = tf.random_uniform(tf.shape(x1)) - alpha = tf.to_float(tf.less(alpha, alpha_p)) - return alpha * x1 + (1.0 - alpha) * x2 - - def encode(x, x_space, hparams, name): """Transformer preparations and encoder.""" with tf.variable_scope(name): @@ -335,21 +262,6 @@ def encode(x, x_space, hparams, name): encoder_input, encoder_self_attention_bias, hparams), ed -def decode(cond_vec, cond_add, gold, c, ed, hparams, name): - """Transformer decoder.""" - with tf.variable_scope(name): - drop_gold = tf.nn.dropout(gold, 1.0 - hparams.layer_prepostprocess_dropout) - decoder_input = common_layers.shift_right(drop_gold, pad_value=cond_vec) - if cond_add is not None: - decoder_input += cond_add - decoder_input = tf.squeeze(decoder_input, axis=2) - decoder_input = common_attention.add_timing_signal_1d(decoder_input) - bias = common_attention.attention_bias_lower_triangle(tf.shape(gold)[1]) - if c is not None and len(c.get_shape()) > 3: - c = tf.squeeze(c, axis=2) - return transformer.transformer_decoder(decoder_input, c, bias, ed, hparams) - - def decode_transformer(encoder_output, encoder_decoder_attention_bias, targets, @@ -376,111 +288,6 @@ def decode_transformer(encoder_output, return tf.expand_dims(decoder_output, axis=2) -def expand_batch(x, mul): - """Expand on batch by mul times.""" - cx = tf.expand_dims(x, axis=1) - x_shape = x.get_shape().as_list() - batch_mul = tf.to_int32(mul) - cx += tf.zeros([1, batch_mul, 1, 1, 1]) - mid_shape = [tf.shape(x)[2]] if len(x_shape) > 3 else [] - end_shape = [x_shape[-1]] if x_shape[-1] else [tf.shape(x)[-1]] - res_shape = [-1, tf.shape(x)[1]] + mid_shape + end_shape - return tf.reshape(cx, res_shape) - - -def ae_compress(x, is_2d, hparams, name, reuse=None): - """Compress, then AE.""" - with tf.variable_scope(name, reuse=reuse): - cur = compress(x, None, is_2d, hparams, "compress") - # Convolve and ReLu to get state. - cur = common_layers.conv_block( - cur, hparams.hidden_size, [((1, 1), (1, 1))], name="mid_conv") - means_size = hparams.z_size if hparams.do_vae else hparams.v_size - means = tf.get_variable("z_to_dense", [means_size, hparams.hidden_size]) - if hparams.do_vae: - if hparams.bit_vae: - hot, loss = bit_vae(cur, hparams, "bvae") - else: - hot, loss, _, _ = vae(cur, hparams.z_size, "vae") - return cur, hot, loss - if hparams.use_gumbel_softmax: - _, hot, loss = dae(cur, hparams, "dae") - return cur, hot, loss - # Using k-means part. L2-normalizing to use fast cosine distance. - cur = mix(tf.nn.l2_normalize(cur, dim=3), cur, - hparams.startup_steps // 3, mode="exp", simple=True) - cur_n = hparams.kmeans_lr_factor * cur - cur_n += (1.0 - hparams.kmeans_lr_factor) * tf.stop_gradient(cur) - hot, loss = kmeans(cur_n, means, hparams, name="kmeans") - # We need a linear layer to undo the l2-normalization. - cur = tf.layers.dense(cur, hparams.hidden_size, name="unnormalize") - return cur, hot, loss - - -def ae_embed(hot, hparams, name, reuse=None): - with tf.variable_scope(name, reuse=reuse): - means_size = hparams.z_size if hparams.do_vae else hparams.v_size - means = tf.get_variable("z_to_dense", [means_size, hparams.hidden_size]) - hot_flat = tf.reshape(hot, [-1, means_size]) - emb = tf.matmul(hot_flat, means) - emb = tf.reshape(emb, [tf.shape(hot)[0], tf.shape(hot)[1], - tf.shape(hot)[2], hparams.hidden_size]) - if hparams.use_gumbel_softmax or hparams.do_vae: - return emb - return tf.layers.dense(emb, hparams.hidden_size, - name="unnormalize", reuse=reuse) - - -def ae_decompress(z, ae, x, is_2d, hparams, name, reuse=None): - """Decompress from z, leaking from ae.""" - with tf.variable_scope(name + "_decompress", reuse=reuse): - if hparams.use_gumbel_softmax or hparams.do_vae: - # Leak at the beginning to help train. - z = mix(z, ae, hparams.startup_steps) - else: - # Gradients flow to ae while the value is z. - z = tf.stop_gradient(z) + ae - tf.stop_gradient(ae) - # Leak during training to keep the full dense autoencoder. - prob_z = common_layers.inverse_exp_decay(hparams.startup_steps) * 0.8 - prob_z = prob_z if hparams.mode == tf.contrib.learn.ModeKeys.TRAIN else 1.0 - z = tf.cond(tf.less(tf.random_uniform([]), prob_z), - lambda: z, lambda: ae) - - # Dropout for better autoencoding. - z = tf.nn.dropout(z, keep_prob=1.0 - hparams.z_dropout) - - # Decompress. - d = z - k = (3, 3) if is_2d else (3, 1) - for i in xrange(hparams.num_compress_steps): - j = hparams.num_compress_steps - i - 1 - d = residual_conv(d, 1, k, hparams, "decompress_rc_%d" % j) - d = decompress_step(d, None, hparams, i > 0, is_2d, "decompress_%d" % j) - - # Autoregressive part. - if hparams.decode_autoregressive: - k = 2**(hparams.num_compress_steps * (2 if is_2d else 1)) - x_batch = tf.reshape(x, [-1, k, 1, hparams.hidden_size]) - x_batch = tf.stop_gradient(x_batch) - z_batch = tf.reshape(z, [-1, 1, 1, hparams.hidden_size]) - d_batch = tf.reshape(d, [-1, k, 1, hparams.hidden_size]) - dec_batch = decode(z_batch, d_batch, x_batch, None, None, hparams, "dar") - else: # For non-autoregressive. - dec_batch = d - z = tf.reshape(dec_batch, [-1, tf.shape(x)[1], tf.shape(x)[2], - hparams.hidden_size]) - if is_2d: - z = tf.layers.dense(z, hparams.hidden_size * 3) - return z - - -def ffn(x, hparams, name): - with tf.variable_scope(name): - y = transformer.transformer_ffn_layer( - common_layers.layer_preprocess(x, hparams), hparams) - return common_layers.layer_postprocess(x, y, hparams) - - def multinomial_sample(x, vocab_size, temperature): """Multinomial sampling from a n-dimensional tensor.""" samples = tf.multinomial(tf.reshape(x, [-1, vocab_size]) / temperature, 1) @@ -532,7 +339,7 @@ def ae_transformer_internal(inputs, targets, target_space, hparams, if hparams.do_ae: targets, _ = common_layers.pad_to_same_length( targets, targets, final_length_divisible_by=2**k) - targets_c = compress(targets, None, False, hparams, "compress") + targets_c = compress(targets, False, hparams, "compress") if hparams.mode != tf.estimator.ModeKeys.PREDICT: # Compress and bottleneck. t_c, t_bit, vc_loss, _ = bottleneck(targets_c, hparams, 2*2048, "vc") @@ -578,10 +385,8 @@ def ae_transformer_internal(inputs, targets, target_space, hparams, for i in xrange(hparams.num_compress_steps): j = hparams.num_compress_steps - i - 1 d = residual_conv(d, 1, (3, 1), hparams, "decompress_rc_%d" % j) - d = decompress_step(d, None, hparams, - i > 0, False, "decompress_%d" % j) - noise = d # tf.random_uniform(tf.shape(targets)) - targets = mask * targets + (1.0 - mask) * noise + d = decompress_step(d, hparams, i > 0, False, "decompress_%d" % j) + targets = mask * targets + (1.0 - mask) * d targets = tf.concat([tf.reverse(t_c, [1]), targets], axis=1) res = decode_transformer(inputs, ed, targets, hparams, "decoder") From f77da80ab88dac61eb421032666e18748dce1c01 Mon Sep 17 00:00:00 2001 From: T2T Team <no-reply@google.com> Date: Tue, 21 Nov 2017 17:34:48 -0800 Subject: [PATCH 0606/4095] Update distance computation for k-nearest neighbours to be more efficient, by computing the norms separately. PiperOrigin-RevId: 176585527 --- tensor2tensor/models/transformer_vae.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tensor2tensor/models/transformer_vae.py b/tensor2tensor/models/transformer_vae.py index e7fa128ff..e1af69b8e 100644 --- a/tensor2tensor/models/transformer_vae.py +++ b/tensor2tensor/models/transformer_vae.py @@ -140,10 +140,11 @@ def vae(x, z_size, name): def nearest(x, means, hparams): """Find the nearest means to elements in x.""" x, means = tf.stop_gradient(x), tf.stop_gradient(means) - means = tf.nn.l2_normalize(means, dim=1) x_flat = tf.reshape(x, [-1, hparams.hidden_size]) - # dist = tf.reduce_sum(tf.square(x_flat - tf.expand_dims(means, 0)), axis=2) - dist = - tf.matmul(x_flat, means, transpose_b=True) + x_norm = tf.norm(x_flat, axis=-1, keep_dims=True) + means_norm = tf.norm(means, axis=-1, keep_dims=True) + dist = x_norm + tf.transpose(means_norm) - 2 * tf.matmul(x_flat, means, + transpose_b=True) _, nearest_idx = tf.nn.top_k(- dist, k=1) nearest_hot = tf.one_hot(tf.squeeze(nearest_idx, axis=1), hparams.v_size) nearest_hot = tf.reshape(nearest_hot, [tf.shape(x)[0], tf.shape(x)[1], From cc80721019bfc51b8b486f5c92cb1142ca04a5fa Mon Sep 17 00:00:00 2001 From: Noam Shazeer <noam@google.com> Date: Wed, 22 Nov 2017 12:29:34 -0800 Subject: [PATCH 0607/4095] Pass the modified hparams to the modalities, so they can know the mode. PiperOrigin-RevId: 176688435 --- tensor2tensor/utils/t2t_model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensor2tensor/utils/t2t_model.py b/tensor2tensor/utils/t2t_model.py index 186b4348f..51120e41d 100644 --- a/tensor2tensor/utils/t2t_model.py +++ b/tensor2tensor/utils/t2t_model.py @@ -117,7 +117,7 @@ def __init__(self, self._ps_devices = ps_devices self._problem_hparams = problem_hparams self._problem_idx = problem_idx - self._create_modalities(problem_hparams, hparams) + self._create_modalities(problem_hparams, self._hparams) def set_mode(self, mode): """Set hparams with the given mode.""" From c10e0160e1bd00c68568df1ca80e5cbdd2c81a3b Mon Sep 17 00:00:00 2001 From: Noam Shazeer <noam@google.com> Date: Thu, 23 Nov 2017 01:35:17 -0800 Subject: [PATCH 0608/4095] This change breaks previous checkpoints. Make Transformer fast on TPU. PiperOrigin-RevId: 176747359 --- tensor2tensor/layers/common_attention.py | 28 +++++++++++------------- tensor2tensor/layers/common_hparams.py | 3 +++ tensor2tensor/layers/common_layers.py | 25 +++++++++++++++++---- tensor2tensor/layers/modalities.py | 26 +++++++++++++++++----- tensor2tensor/layers/modalities_test.py | 9 +++++--- tensor2tensor/models/transformer.py | 26 ++++++++++++++++++++-- tensor2tensor/tpu/tpu_trainer_lib.py | 1 + 7 files changed, 88 insertions(+), 30 deletions(-) diff --git a/tensor2tensor/layers/common_attention.py b/tensor2tensor/layers/common_attention.py index 5aafe6348..dc513db7b 100644 --- a/tensor2tensor/layers/common_attention.py +++ b/tensor2tensor/layers/common_attention.py @@ -801,7 +801,7 @@ def combine_first_two_dimensions(x): @expert_utils.add_name_scope() def split_heads(x, num_heads): - """Split channels (dimension 3) into multiple heads (becomes dimension 1). + """Split channels (dimension 2) into multiple heads (becomes dimension 1). Args: x: a Tensor with shape [batch, length, channels] @@ -815,7 +815,7 @@ def split_heads(x, num_heads): @expert_utils.add_name_scope() def split_heads_2d(x, num_heads): - """Split channels (dimension 4) into multiple heads (becomes dimension 1). + """Split channels (dimension 3) into multiple heads (becomes dimension 1). Args: x: a Tensor with shape [batch, height, width, channels] @@ -2191,10 +2191,10 @@ def compute_qkv(query_antecedent, """ if memory_antecedent is None and q_filter_width == kv_filter_width == 1: # self attention with single position q, k, and v - combined = common_layers.conv1d( + combined = tf.layers.dense( query_antecedent, total_key_depth * 2 + total_value_depth, - 1, + use_bias=False, name="qkv_transform") q, k, v = tf.split( combined, [total_key_depth, total_key_depth, total_value_depth], axis=2) @@ -2250,22 +2250,19 @@ def compute_qkv_2d(query_antecedent, memory_antecedent, total_key_depth, """ # self attention with single position q, k, and v if memory_antecedent is None: - combined = tf.layers.conv2d( - query_antecedent, - total_key_depth * 2 + total_value_depth, (1, 1), - name="qkv_transform") + combined = tf.layers.dense( + query_antecedent, total_key_depth * 2 + total_value_depth, + use_bias=False, name="qkv_transform") q, k, v = tf.split( combined, [total_key_depth, total_key_depth, total_value_depth], axis=-1) return q, k, v # Encoder decoder attention - q = common_layers.conv1d( - query_antecedent, total_key_depth, 1, name="q_transform") - combined = common_layers.conv1d( - memory_antecedent, - total_key_depth + total_value_depth, - 1, + q = tf.layers.dense( + query_antecedent, total_key_depth, use_bias=False, name="q_transform") + combined = tf.layers.dense( + memory_antecedent, total_key_depth + total_value_depth, use_bias=False, name="kv_transform") k, v = tf.split(combined, [total_key_depth, total_value_depth], axis=2) @@ -2410,7 +2407,8 @@ def multihead_attention(query_antecedent, x = dilated_self_attention_1d(q, k, v, block_length, block_width, gap_size, num_memory_blocks) x = combine_heads(x) - x = common_layers.conv1d(x, output_depth, 1, name="output_transform") + x = tf.layers.dense( + x, output_depth, use_bias=False, name="output_transform") if additional_returned_value is not None: return x, additional_returned_value return x diff --git a/tensor2tensor/layers/common_hparams.py b/tensor2tensor/layers/common_hparams.py index f784fb383..5abc13ea7 100644 --- a/tensor2tensor/layers/common_hparams.py +++ b/tensor2tensor/layers/common_hparams.py @@ -179,6 +179,9 @@ def basic_params1(): # This is the actual batch size, *not* tokens per batch (i.e. for # language models this is the number of sentences in the batch) tpu_batch_size_per_shard=24, + # Set by tpu_trainer to let the model know whether we are on TPU. + # Switching on/off tpu should not invalidate checkpoints. + use_tpu=False, ) diff --git a/tensor2tensor/layers/common_layers.py b/tensor2tensor/layers/common_layers.py index 47448b7d7..7a23db473 100644 --- a/tensor2tensor/layers/common_layers.py +++ b/tensor2tensor/layers/common_layers.py @@ -1229,6 +1229,15 @@ def relu_density_logit(x, reduce_dims): return scaled +def conv_hidden_relu_simple(inputs, hidden_size, output_size, dropout=0.0): + h = tf.layers.dense( + inputs, hidden_size, use_bias=False, activation=tf.nn.relu, name="conv1") + if dropout != 0.0: + h = tf.nn.dropout(h, 1.0 - dropout) + o = tf.layers.dense(h, output_size, use_bias=False, name="conv2") + return o + + def conv_hidden_relu(inputs, hidden_size, output_size, @@ -1239,6 +1248,9 @@ def conv_hidden_relu(inputs, """Hidden layer with RELU activation followed by linear projection.""" name = kwargs.pop("name") if "name" in kwargs else None with tf.variable_scope(name, "conv_hidden_relu", [inputs]): + if kernel_size == (1, 1) and second_kernel_size == (1, 1): + return conv_hidden_relu_simple( + inputs, hidden_size, output_size, dropout=dropout) if inputs.get_shape().ndims == 3: is_3d = True inputs = tf.expand_dims(inputs, 2) @@ -1487,10 +1499,15 @@ def padded_cross_entropy(logits, confidence = 1.0 - label_smoothing vocab_size = shape_list(logits)[-1] with tf.name_scope("padded_cross_entropy", [logits, labels]): - pad_logits, pad_labels = pad_with_zeros(logits, labels) - xent = smoothing_cross_entropy(pad_logits, pad_labels, vocab_size, - confidence) - weights = weights_fn(pad_labels) + if len(logits.get_shape().as_list()) == 2: + # Deal with the case where we did not insert extra dimensions due to + # TPU issues. No pad-to-same-length happens in this case. + # TODO(noam): remove this logic once TPU can handle extra dimensions. + labels = tf.reshape(labels, [-1]) + else: + logits, labels = pad_with_zeros(logits, labels) + xent = smoothing_cross_entropy(logits, labels, vocab_size, confidence) + weights = weights_fn(labels) if not reduce_sum: return xent * weights, weights return tf.reduce_sum(xent * weights), tf.reduce_sum(weights) diff --git a/tensor2tensor/layers/modalities.py b/tensor2tensor/layers/modalities.py index 37abc3b81..a825e66c9 100644 --- a/tensor2tensor/layers/modalities.py +++ b/tensor2tensor/layers/modalities.py @@ -30,6 +30,15 @@ import tensorflow as tf +# TODO(noam): remove this function after TPUs do gather faster. +def tpu_gather(params, indices): + vocab_size = params.get_shape().as_list()[0] + indices_flat = tf.reshape(indices, [-1]) + out = tf.matmul(tf.one_hot(indices_flat, vocab_size), params) + out = eu.reshape_like(out, tf.expand_dims(indices, -1)) + return out + + @registry.register_symbol_modality("default") class SymbolModality(modality.Modality): """Modality for sets of discrete symbols. @@ -94,7 +103,8 @@ def bottom_simple(self, x, name, reuse): # Squeeze out the channels dimension. x = tf.squeeze(x, axis=3) var = self._get_weights() - ret = tf.gather(var, x) + ret = (tpu_gather(var, x) if self._model_hparams.use_tpu + else tf.gather(var, x)) if self._model_hparams.multiply_embedding_mode == "sqrt_depth": ret *= self._body_input_depth**0.5 ret *= tf.expand_dims(tf.to_float(tf.not_equal(x, 0)), -1) @@ -142,14 +152,18 @@ def top(self, body_output, _): self._model_hparams.mode == tf.estimator.ModeKeys.TRAIN): # insert channels dimension body_output = tf.expand_dims(body_output, 3) - logits = common_layers.FactoredTensor(body_output, var) + return common_layers.FactoredTensor(body_output, var) else: body_output = tf.reshape(body_output, [-1, body_output_shape[-1]]) logits = tf.matmul(body_output, var, transpose_b=True) - - out_shape = body_output_shape[:-1] + [1, self._vocab_size] - logits = tf.reshape(logits, out_shape) - return logits + if (self._model_hparams.use_tpu and + self._model_hparams.mode == tf.estimator.ModeKeys.TRAIN): + # TPU does not react kindly to extra dimensions. + # TODO(noam): remove this once TPU is more forgiving of extra dims. + return logits + else: + return tf.reshape( + logits, body_output_shape[:-1] + [1, self._vocab_size]) @registry.register_symbol_modality("ctc") diff --git a/tensor2tensor/layers/modalities_test.py b/tensor2tensor/layers/modalities_test.py index bf42af529..ca8f5fc4d 100644 --- a/tensor2tensor/layers/modalities_test.py +++ b/tensor2tensor/layers/modalities_test.py @@ -42,7 +42,8 @@ def testSymbolModalityInputs(self): multiply_embedding_mode="sqrt_depth", symbol_modality_skip_top=0, shared_embedding_and_softmax_weights=0, - prepend_mode="none") + prepend_mode="none", + use_tpu=False) x = -1 + np.random.random_integers( vocab_size, size=(batch_size, length, 1, 1)) m = modalities.SymbolModality(model_hparams, vocab_size) @@ -71,7 +72,8 @@ def testSymbolModalityTargets(self): shared_embedding_and_softmax_weights=0, factored_logits=0, mode=tf.estimator.ModeKeys.TRAIN, - prepend_mode="none") + prepend_mode="none", + use_tpu=False) body_output = -1 + np.random.random_integers( 100, size=(batch_size, length, height, hidden_size)) targets = -1 + np.random.random_integers( @@ -107,7 +109,8 @@ def testSymbolModalityTargetsFactored(self): shared_embedding_and_softmax_weights=0, factored_logits=1, mode=tf.estimator.ModeKeys.TRAIN, - prepend_mode="none") + prepend_mode="none", + use_tpu=False) body_output = -1 + np.random.random_integers( 100, size=(batch_size, length, height, hidden_size)) targets = -1 + np.random.random_integers( diff --git a/tensor2tensor/models/transformer.py b/tensor2tensor/models/transformer.py index 8745dc00b..62407522d 100644 --- a/tensor2tensor/models/transformer.py +++ b/tensor2tensor/models/transformer.py @@ -108,8 +108,13 @@ def decode(self, hparams, cache=cache) - # Expand since t2t expects 4d tensors. - return tf.expand_dims(decoder_output, axis=2) + if hparams.use_tpu and hparams.mode == tf.estimator.ModeKeys.TRAIN: + # TPU does not react kindly to extra dimensions. + # TODO(noam): remove this once TPU is more forgiving of extra dims. + return decoder_output + else: + # Expand since t2t expects 4d tensors. + return tf.expand_dims(decoder_output, axis=2) def model_fn_body(self, features): """Transformer main model_fn. @@ -1113,3 +1118,20 @@ def transformer_clean_big(): hparams.hidden_size = 1024 hparams.filter_size = 4096 return hparams + + +@registry.register_hparams +def transformer_tpu_lm1b(): + """Hparams for training languagemodel_lm1b8k_concat on tpu.""" + hparams = transformer_clean() + update_hparams_for_tpu(hparams) + hparams.max_length = 512 + hparams.tpu_batch_size_per_shard = 8 + hparams.hidden_size = 1024 + hparams.filter_size = 4096 + hparams.num_heads = 4 + hparams.label_smoothing = 0.0 + hparams.layer_prepostprocess_dropout = 0.0 + hparams.attention_dropout = 0.0 + hparams.relu_dropout = 0.0 + return hparams diff --git a/tensor2tensor/tpu/tpu_trainer_lib.py b/tensor2tensor/tpu/tpu_trainer_lib.py index 65618fc1b..540510929 100644 --- a/tensor2tensor/tpu/tpu_trainer_lib.py +++ b/tensor2tensor/tpu/tpu_trainer_lib.py @@ -212,6 +212,7 @@ def t2t_model_fn(model_name, hparams = copy.deepcopy(hparams) problem = hparams.problem_instances[0] problem_hp = hparams.problems[0] + hparams.use_tpu = use_tpu features["problem_choice"] = tf.constant(0) features["input_space_id"] = tf.constant(problem_hp.input_space_id) From b10429284b0a95d8fd991ca640938f4d2f944ef9 Mon Sep 17 00:00:00 2001 From: Niki Parmar <nikip@google.com> Date: Thu, 23 Nov 2017 11:13:59 -0800 Subject: [PATCH 0609/4095] Changes to make t2t tf.eager compatible PiperOrigin-RevId: 176783794 --- tensor2tensor/layers/common_hparams.py | 3 +++ tensor2tensor/layers/common_layers.py | 6 ++++-- tensor2tensor/layers/modalities.py | 7 +++++-- tensor2tensor/layers/modalities_test.py | 9 ++++++--- tensor2tensor/models/cycle_gan.py | 5 +++-- tensor2tensor/models/transformer.py | 3 ++- 6 files changed, 23 insertions(+), 10 deletions(-) diff --git a/tensor2tensor/layers/common_hparams.py b/tensor2tensor/layers/common_hparams.py index 5abc13ea7..eafec1854 100644 --- a/tensor2tensor/layers/common_hparams.py +++ b/tensor2tensor/layers/common_hparams.py @@ -182,6 +182,9 @@ def basic_params1(): # Set by tpu_trainer to let the model know whether we are on TPU. # Switching on/off tpu should not invalidate checkpoints. use_tpu=False, + # Things not compatible with eager mode use this flag to implement + # alternative functionality. We expect this to go away soon. + use_eager_mode=False, ) diff --git a/tensor2tensor/layers/common_layers.py b/tensor2tensor/layers/common_layers.py index 7a23db473..fa0c0d90e 100644 --- a/tensor2tensor/layers/common_layers.py +++ b/tensor2tensor/layers/common_layers.py @@ -200,7 +200,8 @@ def flatten4d3d(x): return result -def embedding(x, vocab_size, dense_size, name=None, reuse=None, multiplier=1.0): +def embedding(x, vocab_size, dense_size, name=None, reuse=None, multiplier=1.0, + use_eager_mode=False): """Embed x of type int64 into dense vectors, reducing to max 4 dimensions.""" with tf.variable_scope( name, default_name="embedding", values=[x], reuse=reuse): @@ -208,7 +209,8 @@ def embedding(x, vocab_size, dense_size, name=None, reuse=None, multiplier=1.0): # On the backwards pass, we want to convert the gradient from # an indexed-slices to a regular tensor before sending it back to the # parameter server. This avoids excess computation on the parameter server. - embedding_var = eu.convert_gradient_to_tensor(embedding_var) + if not use_eager_mode: + embedding_var = eu.convert_gradient_to_tensor(embedding_var) emb_x = tf.gather(embedding_var, x) if multiplier != 1.0: emb_x *= multiplier diff --git a/tensor2tensor/layers/modalities.py b/tensor2tensor/layers/modalities.py index a825e66c9..26aca13d2 100644 --- a/tensor2tensor/layers/modalities.py +++ b/tensor2tensor/layers/modalities.py @@ -95,7 +95,9 @@ def _get_weights(self, hidden_dim=None): ret = shards[0] else: ret = tf.concat(shards, 0) - ret = eu.convert_gradient_to_tensor(ret) + # Convert ret to tensor. + if not self._model_hparams.use_eager_mode: + ret = eu.convert_gradient_to_tensor(ret) return ret def bottom_simple(self, x, name, reuse): @@ -213,7 +215,8 @@ def targets_bottom(self, inputs): tf.to_int32(common_layers.flatten4d3d(inputs)), self.top_dimensionality, self._body_input_depth, - name="input_rgb_embedding") + name="input_rgb_embedding", + use_eager_mode=self._model_hparams.use_eager_mode) if self._model_hparams.multiply_embedding_mode == "sqrt_depth": ret *= self._body_input_depth**0.5 diff --git a/tensor2tensor/layers/modalities_test.py b/tensor2tensor/layers/modalities_test.py index ca8f5fc4d..e581b7cb4 100644 --- a/tensor2tensor/layers/modalities_test.py +++ b/tensor2tensor/layers/modalities_test.py @@ -43,7 +43,8 @@ def testSymbolModalityInputs(self): symbol_modality_skip_top=0, shared_embedding_and_softmax_weights=0, prepend_mode="none", - use_tpu=False) + use_tpu=False, + use_eager_mode=False) x = -1 + np.random.random_integers( vocab_size, size=(batch_size, length, 1, 1)) m = modalities.SymbolModality(model_hparams, vocab_size) @@ -73,7 +74,8 @@ def testSymbolModalityTargets(self): factored_logits=0, mode=tf.estimator.ModeKeys.TRAIN, prepend_mode="none", - use_tpu=False) + use_tpu=False, + use_eager_mode=False) body_output = -1 + np.random.random_integers( 100, size=(batch_size, length, height, hidden_size)) targets = -1 + np.random.random_integers( @@ -110,7 +112,8 @@ def testSymbolModalityTargetsFactored(self): factored_logits=1, mode=tf.estimator.ModeKeys.TRAIN, prepend_mode="none", - use_tpu=False) + use_tpu=False, + use_eager_mode=False) body_output = -1 + np.random.random_integers( 100, size=(batch_size, length, height, hidden_size)) targets = -1 + np.random.random_integers( diff --git a/tensor2tensor/models/cycle_gan.py b/tensor2tensor/models/cycle_gan.py index 4cf1a5871..dd013acad 100644 --- a/tensor2tensor/models/cycle_gan.py +++ b/tensor2tensor/models/cycle_gan.py @@ -66,10 +66,11 @@ def cycle_gan_internal(inputs, targets, _, hparams): # Embed inputs and targets. inputs_orig, targets_orig = tf.to_int32(inputs), tf.to_int32(targets) inputs = common_layers.embedding( - inputs_orig, hparams.vocab_size, hparams.hidden_size, "embed") + inputs_orig, hparams.vocab_size, hparams.hidden_size, "embed", + use_eager_mode=hparams.use_eager_mode) targets = common_layers.embedding( targets_orig, hparams.vocab_size, hparams.hidden_size, - "embed", reuse=True) + "embed", reuse=True, use_eager_mode=hparams.use_eager_mode) # Split the batch into input-input and target-target parts. inputs1, _ = split_on_batch(inputs) diff --git a/tensor2tensor/models/transformer.py b/tensor2tensor/models/transformer.py index 62407522d..11138515f 100644 --- a/tensor2tensor/models/transformer.py +++ b/tensor2tensor/models/transformer.py @@ -418,7 +418,8 @@ def transformer_prepare_encoder(inputs, target_space, hparams): common_layers.shape_list(inputs)[1]) # Append target_space_id embedding to inputs. emb_target_space = common_layers.embedding( - target_space, 32, ishape_static[-1], name="target_space_embedding") + target_space, 32, ishape_static[-1], name="target_space_embedding", + use_eager_mode=hparams.use_eager_mode) emb_target_space = tf.reshape(emb_target_space, [1, 1, -1]) encoder_input += emb_target_space if hparams.pos == "timing": From b3cad0c3f4f9c348d11444785959e0e2dc83baf5 Mon Sep 17 00:00:00 2001 From: Noam Shazeer <noam@google.com> Date: Thu, 23 Nov 2017 11:41:12 -0800 Subject: [PATCH 0610/4095] This change breaks previous checkpoints. Make Transformer fast on TPU. PiperOrigin-RevId: 176784764 --- tensor2tensor/layers/common_attention.py | 28 +++++++++++++----------- tensor2tensor/layers/common_hparams.py | 3 --- tensor2tensor/layers/common_layers.py | 25 ++++----------------- tensor2tensor/layers/modalities.py | 26 +++++----------------- tensor2tensor/layers/modalities_test.py | 3 --- tensor2tensor/models/transformer.py | 26 ++-------------------- tensor2tensor/tpu/tpu_trainer_lib.py | 1 - 7 files changed, 27 insertions(+), 85 deletions(-) diff --git a/tensor2tensor/layers/common_attention.py b/tensor2tensor/layers/common_attention.py index dc513db7b..5aafe6348 100644 --- a/tensor2tensor/layers/common_attention.py +++ b/tensor2tensor/layers/common_attention.py @@ -801,7 +801,7 @@ def combine_first_two_dimensions(x): @expert_utils.add_name_scope() def split_heads(x, num_heads): - """Split channels (dimension 2) into multiple heads (becomes dimension 1). + """Split channels (dimension 3) into multiple heads (becomes dimension 1). Args: x: a Tensor with shape [batch, length, channels] @@ -815,7 +815,7 @@ def split_heads(x, num_heads): @expert_utils.add_name_scope() def split_heads_2d(x, num_heads): - """Split channels (dimension 3) into multiple heads (becomes dimension 1). + """Split channels (dimension 4) into multiple heads (becomes dimension 1). Args: x: a Tensor with shape [batch, height, width, channels] @@ -2191,10 +2191,10 @@ def compute_qkv(query_antecedent, """ if memory_antecedent is None and q_filter_width == kv_filter_width == 1: # self attention with single position q, k, and v - combined = tf.layers.dense( + combined = common_layers.conv1d( query_antecedent, total_key_depth * 2 + total_value_depth, - use_bias=False, + 1, name="qkv_transform") q, k, v = tf.split( combined, [total_key_depth, total_key_depth, total_value_depth], axis=2) @@ -2250,19 +2250,22 @@ def compute_qkv_2d(query_antecedent, memory_antecedent, total_key_depth, """ # self attention with single position q, k, and v if memory_antecedent is None: - combined = tf.layers.dense( - query_antecedent, total_key_depth * 2 + total_value_depth, - use_bias=False, name="qkv_transform") + combined = tf.layers.conv2d( + query_antecedent, + total_key_depth * 2 + total_value_depth, (1, 1), + name="qkv_transform") q, k, v = tf.split( combined, [total_key_depth, total_key_depth, total_value_depth], axis=-1) return q, k, v # Encoder decoder attention - q = tf.layers.dense( - query_antecedent, total_key_depth, use_bias=False, name="q_transform") - combined = tf.layers.dense( - memory_antecedent, total_key_depth + total_value_depth, use_bias=False, + q = common_layers.conv1d( + query_antecedent, total_key_depth, 1, name="q_transform") + combined = common_layers.conv1d( + memory_antecedent, + total_key_depth + total_value_depth, + 1, name="kv_transform") k, v = tf.split(combined, [total_key_depth, total_value_depth], axis=2) @@ -2407,8 +2410,7 @@ def multihead_attention(query_antecedent, x = dilated_self_attention_1d(q, k, v, block_length, block_width, gap_size, num_memory_blocks) x = combine_heads(x) - x = tf.layers.dense( - x, output_depth, use_bias=False, name="output_transform") + x = common_layers.conv1d(x, output_depth, 1, name="output_transform") if additional_returned_value is not None: return x, additional_returned_value return x diff --git a/tensor2tensor/layers/common_hparams.py b/tensor2tensor/layers/common_hparams.py index eafec1854..e75bf4099 100644 --- a/tensor2tensor/layers/common_hparams.py +++ b/tensor2tensor/layers/common_hparams.py @@ -179,9 +179,6 @@ def basic_params1(): # This is the actual batch size, *not* tokens per batch (i.e. for # language models this is the number of sentences in the batch) tpu_batch_size_per_shard=24, - # Set by tpu_trainer to let the model know whether we are on TPU. - # Switching on/off tpu should not invalidate checkpoints. - use_tpu=False, # Things not compatible with eager mode use this flag to implement # alternative functionality. We expect this to go away soon. use_eager_mode=False, diff --git a/tensor2tensor/layers/common_layers.py b/tensor2tensor/layers/common_layers.py index fa0c0d90e..df21a12ac 100644 --- a/tensor2tensor/layers/common_layers.py +++ b/tensor2tensor/layers/common_layers.py @@ -1231,15 +1231,6 @@ def relu_density_logit(x, reduce_dims): return scaled -def conv_hidden_relu_simple(inputs, hidden_size, output_size, dropout=0.0): - h = tf.layers.dense( - inputs, hidden_size, use_bias=False, activation=tf.nn.relu, name="conv1") - if dropout != 0.0: - h = tf.nn.dropout(h, 1.0 - dropout) - o = tf.layers.dense(h, output_size, use_bias=False, name="conv2") - return o - - def conv_hidden_relu(inputs, hidden_size, output_size, @@ -1250,9 +1241,6 @@ def conv_hidden_relu(inputs, """Hidden layer with RELU activation followed by linear projection.""" name = kwargs.pop("name") if "name" in kwargs else None with tf.variable_scope(name, "conv_hidden_relu", [inputs]): - if kernel_size == (1, 1) and second_kernel_size == (1, 1): - return conv_hidden_relu_simple( - inputs, hidden_size, output_size, dropout=dropout) if inputs.get_shape().ndims == 3: is_3d = True inputs = tf.expand_dims(inputs, 2) @@ -1501,15 +1489,10 @@ def padded_cross_entropy(logits, confidence = 1.0 - label_smoothing vocab_size = shape_list(logits)[-1] with tf.name_scope("padded_cross_entropy", [logits, labels]): - if len(logits.get_shape().as_list()) == 2: - # Deal with the case where we did not insert extra dimensions due to - # TPU issues. No pad-to-same-length happens in this case. - # TODO(noam): remove this logic once TPU can handle extra dimensions. - labels = tf.reshape(labels, [-1]) - else: - logits, labels = pad_with_zeros(logits, labels) - xent = smoothing_cross_entropy(logits, labels, vocab_size, confidence) - weights = weights_fn(labels) + pad_logits, pad_labels = pad_with_zeros(logits, labels) + xent = smoothing_cross_entropy(pad_logits, pad_labels, vocab_size, + confidence) + weights = weights_fn(pad_labels) if not reduce_sum: return xent * weights, weights return tf.reduce_sum(xent * weights), tf.reduce_sum(weights) diff --git a/tensor2tensor/layers/modalities.py b/tensor2tensor/layers/modalities.py index 26aca13d2..362c4b527 100644 --- a/tensor2tensor/layers/modalities.py +++ b/tensor2tensor/layers/modalities.py @@ -30,15 +30,6 @@ import tensorflow as tf -# TODO(noam): remove this function after TPUs do gather faster. -def tpu_gather(params, indices): - vocab_size = params.get_shape().as_list()[0] - indices_flat = tf.reshape(indices, [-1]) - out = tf.matmul(tf.one_hot(indices_flat, vocab_size), params) - out = eu.reshape_like(out, tf.expand_dims(indices, -1)) - return out - - @registry.register_symbol_modality("default") class SymbolModality(modality.Modality): """Modality for sets of discrete symbols. @@ -105,8 +96,7 @@ def bottom_simple(self, x, name, reuse): # Squeeze out the channels dimension. x = tf.squeeze(x, axis=3) var = self._get_weights() - ret = (tpu_gather(var, x) if self._model_hparams.use_tpu - else tf.gather(var, x)) + ret = tf.gather(var, x) if self._model_hparams.multiply_embedding_mode == "sqrt_depth": ret *= self._body_input_depth**0.5 ret *= tf.expand_dims(tf.to_float(tf.not_equal(x, 0)), -1) @@ -154,18 +144,14 @@ def top(self, body_output, _): self._model_hparams.mode == tf.estimator.ModeKeys.TRAIN): # insert channels dimension body_output = tf.expand_dims(body_output, 3) - return common_layers.FactoredTensor(body_output, var) + logits = common_layers.FactoredTensor(body_output, var) else: body_output = tf.reshape(body_output, [-1, body_output_shape[-1]]) logits = tf.matmul(body_output, var, transpose_b=True) - if (self._model_hparams.use_tpu and - self._model_hparams.mode == tf.estimator.ModeKeys.TRAIN): - # TPU does not react kindly to extra dimensions. - # TODO(noam): remove this once TPU is more forgiving of extra dims. - return logits - else: - return tf.reshape( - logits, body_output_shape[:-1] + [1, self._vocab_size]) + + out_shape = body_output_shape[:-1] + [1, self._vocab_size] + logits = tf.reshape(logits, out_shape) + return logits @registry.register_symbol_modality("ctc") diff --git a/tensor2tensor/layers/modalities_test.py b/tensor2tensor/layers/modalities_test.py index e581b7cb4..213abe891 100644 --- a/tensor2tensor/layers/modalities_test.py +++ b/tensor2tensor/layers/modalities_test.py @@ -43,7 +43,6 @@ def testSymbolModalityInputs(self): symbol_modality_skip_top=0, shared_embedding_and_softmax_weights=0, prepend_mode="none", - use_tpu=False, use_eager_mode=False) x = -1 + np.random.random_integers( vocab_size, size=(batch_size, length, 1, 1)) @@ -74,7 +73,6 @@ def testSymbolModalityTargets(self): factored_logits=0, mode=tf.estimator.ModeKeys.TRAIN, prepend_mode="none", - use_tpu=False, use_eager_mode=False) body_output = -1 + np.random.random_integers( 100, size=(batch_size, length, height, hidden_size)) @@ -112,7 +110,6 @@ def testSymbolModalityTargetsFactored(self): factored_logits=1, mode=tf.estimator.ModeKeys.TRAIN, prepend_mode="none", - use_tpu=False, use_eager_mode=False) body_output = -1 + np.random.random_integers( 100, size=(batch_size, length, height, hidden_size)) diff --git a/tensor2tensor/models/transformer.py b/tensor2tensor/models/transformer.py index 11138515f..4ce3ae5fe 100644 --- a/tensor2tensor/models/transformer.py +++ b/tensor2tensor/models/transformer.py @@ -108,13 +108,8 @@ def decode(self, hparams, cache=cache) - if hparams.use_tpu and hparams.mode == tf.estimator.ModeKeys.TRAIN: - # TPU does not react kindly to extra dimensions. - # TODO(noam): remove this once TPU is more forgiving of extra dims. - return decoder_output - else: - # Expand since t2t expects 4d tensors. - return tf.expand_dims(decoder_output, axis=2) + # Expand since t2t expects 4d tensors. + return tf.expand_dims(decoder_output, axis=2) def model_fn_body(self, features): """Transformer main model_fn. @@ -1119,20 +1114,3 @@ def transformer_clean_big(): hparams.hidden_size = 1024 hparams.filter_size = 4096 return hparams - - -@registry.register_hparams -def transformer_tpu_lm1b(): - """Hparams for training languagemodel_lm1b8k_concat on tpu.""" - hparams = transformer_clean() - update_hparams_for_tpu(hparams) - hparams.max_length = 512 - hparams.tpu_batch_size_per_shard = 8 - hparams.hidden_size = 1024 - hparams.filter_size = 4096 - hparams.num_heads = 4 - hparams.label_smoothing = 0.0 - hparams.layer_prepostprocess_dropout = 0.0 - hparams.attention_dropout = 0.0 - hparams.relu_dropout = 0.0 - return hparams diff --git a/tensor2tensor/tpu/tpu_trainer_lib.py b/tensor2tensor/tpu/tpu_trainer_lib.py index 540510929..65618fc1b 100644 --- a/tensor2tensor/tpu/tpu_trainer_lib.py +++ b/tensor2tensor/tpu/tpu_trainer_lib.py @@ -212,7 +212,6 @@ def t2t_model_fn(model_name, hparams = copy.deepcopy(hparams) problem = hparams.problem_instances[0] problem_hp = hparams.problems[0] - hparams.use_tpu = use_tpu features["problem_choice"] = tf.constant(0) features["input_space_id"] = tf.constant(problem_hp.input_space_id) From 936db05d57609225d301e498f82c1d9dd96ce74e Mon Sep 17 00:00:00 2001 From: Noam Shazeer <noam@google.com> Date: Thu, 23 Nov 2017 21:40:50 -0800 Subject: [PATCH 0611/4095] Versions of problems with combined examples. Good for TPU training. PiperOrigin-RevId: 176807931 --- .../data_generators/generator_utils.py | 65 +++++++++++++++++++ tensor2tensor/data_generators/lm1b.py | 41 ++++++++++-- tensor2tensor/data_generators/problem.py | 25 ++++++- .../data_generators/translate_ende.py | 12 ++++ 4 files changed, 135 insertions(+), 8 deletions(-) diff --git a/tensor2tensor/data_generators/generator_utils.py b/tensor2tensor/data_generators/generator_utils.py index 833717432..aa55ccb13 100644 --- a/tensor2tensor/data_generators/generator_utils.py +++ b/tensor2tensor/data_generators/generator_utils.py @@ -447,3 +447,68 @@ def shuffle_dataset(filenames): out_fname = fname.replace(UNSHUFFLED_SUFFIX, "") write_records(records, out_fname) tf.gfile.Remove(fname) + + +def combine_examples_no_inputs(examples, max_length): + """Combine examples into longer examples. + + Concatenate targets to form target sequences with length up to max_length. + Target sequences longer than max_length are chopped into multiple sequences. + + Args: + examples: a generator returning feature dictionaries. + max_length: an integer. + + Yields: + feature dictionaries. + """ + partial = [] + for example in examples: + x = example["targets"] + if len(x) + len(partial) > max_length: + if partial: + yield {"inputs": [0], "targets": partial} + partial = [] + if len(x) > max_length: + num_fragments = len(x) // max_length + for i in xrange(num_fragments): + yield {"inputs": [0], "targets": x[max_length * i:max_length * (i + 1)]} + partial = x[max_length * num_fragments:] + else: + partial += x + if partial: + yield {"inputs": [0], "targets": partial} + + +def combine_examples_with_inputs(examples, max_length): + """Combine examples into longer examples. + + We combine multiple examples by concatenating the inputs and concatenating + the targets. Sequences where the inputs or the targets are too long are + emitted as singletons (not chopped). + + Args: + examples: a generator returning feature dictionaries. + max_length: an integer. + + Yields: + feature dictionaries. + """ + partial_a = [] + partial_b = [] + for example in examples: + a = example["inputs"] + b = example["targets"] + if (len(a) + len(partial_a) > max_length or + len(b) + len(partial_b) > max_length): + if partial_a or partial_b: + yield {"inputs": partial_a, "targets": partial_b} + partial_a = [] + partial_b = [] + if len(a) > max_length or len(b) > max_length: + yield {"inputs": a, "targets": b} + else: + partial_a += a + partial_b += b + if partial_a or partial_b: + yield {"inputs": partial_a, "targets": partial_b} diff --git a/tensor2tensor/data_generators/lm1b.py b/tensor2tensor/data_generators/lm1b.py index d3bcec527..3fa7d7e47 100644 --- a/tensor2tensor/data_generators/lm1b.py +++ b/tensor2tensor/data_generators/lm1b.py @@ -112,12 +112,15 @@ def _maybe_download_corpus(tmp_dir): corpus_tar.extractall(tmp_dir) -def _get_or_build_subword_text_encoder(tmp_dir, vocab_filepath): +def _get_or_build_subword_text_encoder(tmp_dir, + vocab_filepath, + target_size): """Builds a SubwordTextEncoder based on the corpus. Args: tmp_dir: directory containing dataset. vocab_filepath: path to store (or load) vocab. + target_size: an optional integer. Returns: a SubwordTextEncoder. @@ -137,8 +140,13 @@ def _get_or_build_subword_text_encoder(tmp_dir, vocab_filepath): line_count += 1 if line_count >= max_lines: break - ret = text_encoder.SubwordTextEncoder() - ret.build_from_token_counts(token_counts, min_count=5) + if target_size == 2 ** 15: + # legacy behavior + ret = text_encoder.SubwordTextEncoder() + ret.build_from_token_counts(token_counts, min_count=5) + else: + ret = text_encoder.SubwordTextEncoder.build_to_target_size( + target_size, token_counts, 1, 1000) ret.store_to_file(vocab_filepath) return ret @@ -183,7 +191,7 @@ def targeted_vocab_size(self): @property def use_train_shards_for_dev(self): - return True + return False def generator(self, data_dir, tmp_dir, is_training): """Generator for lm1b sentences. @@ -204,7 +212,8 @@ def generator(self, data_dir, tmp_dir, is_training): encoder = text_encoder.ByteTextEncoder() else: vocab_filepath = os.path.join(data_dir, self.vocab_file) - encoder = _get_or_build_subword_text_encoder(tmp_dir, vocab_filepath) + encoder = _get_or_build_subword_text_encoder( + tmp_dir, vocab_filepath, self.targeted_vocab_size) for filepath in files: tf.logging.info("filepath = %s", filepath) for line in tf.gfile.Open(filepath): @@ -214,6 +223,28 @@ def generator(self, data_dir, tmp_dir, is_training): yield {"inputs": [0], "targets": tokens} +@registry.register_problem +class LanguagemodelLm1b8kConcat512(LanguagemodelLm1b32k): + """A language model on the 1B words corpus. + + 8k vocabualry. + Training/eval examples are concatenated to a maximum length of 512. + + Happy TPU Training. + + Ratio of dev tokens (including eos) to dev words (including eos) + 207351 / 159658 = 1.29872; multiply ppx by this to compare results. + """ + + @property + def targeted_vocab_size(self): + return 2**13 # 8192 + + @property + def combine_to_length(self): + return 512 + + @registry.register_problem class LanguagemodelLm1bCharacters(LanguagemodelLm1b32k): """A language model on the 1B words corpus, character level.""" diff --git a/tensor2tensor/data_generators/problem.py b/tensor2tensor/data_generators/problem.py index f707090f1..964a5fb36 100644 --- a/tensor2tensor/data_generators/problem.py +++ b/tensor2tensor/data_generators/problem.py @@ -585,6 +585,22 @@ def generator(self, data_dir, tmp_dir, is_training): """ raise NotImplementedError() + def maybe_combine_examples(self, generator): + if self.combine_to_length: + if self.has_inputs: + return generator_utils.combine_examples_with_inputs( + generator, self.combine_to_length) + else: + return generator_utils.combine_examples_no_inputs( + generator, self.combine_to_length) + else: + return generator + + @property + def combine_to_length(self): + """An optional integer. Concatenate examples into bigger examples.""" + return None + @property def use_train_shards_for_dev(self): """If true, we only generate training data and hold out shards for dev.""" @@ -630,12 +646,15 @@ def generate_data(self, data_dir, tmp_dir, task_id=-1): if self.use_train_shards_for_dev: all_paths = train_paths + dev_paths generator_utils.generate_files( - self.generator(data_dir, tmp_dir, True), all_paths) + self.maybe_combine_examples(self.generator(data_dir, tmp_dir, True)), + all_paths) generator_utils.shuffle_dataset(all_paths) else: generator_utils.generate_dataset_and_shuffle( - self.generator(data_dir, tmp_dir, True), train_paths, - self.generator(data_dir, tmp_dir, False), dev_paths) + self.maybe_combine_examples(self.generator(data_dir, tmp_dir, True)), + train_paths, + self.maybe_combine_examples(self.generator(data_dir, tmp_dir, False)), + dev_paths) def feature_encoders(self, data_dir): if self.is_character_level: diff --git a/tensor2tensor/data_generators/translate_ende.py b/tensor2tensor/data_generators/translate_ende.py index 7358e9b7e..8ca3a726b 100644 --- a/tensor2tensor/data_generators/translate_ende.py +++ b/tensor2tensor/data_generators/translate_ende.py @@ -114,6 +114,18 @@ def target_space_id(self): return problem.SpaceID.DE_BPE_TOK +@registry.register_problem +class TranslateEndeWmtBpe32kConcat512(TranslateEndeWmtBpe32k): + """Problem spec for WMT En-De translation, BPE version. + + Training/eval examples are concatenated to a maximum length of 512. + """ + + @property + def combine_to_length(self): + return 512 + + @registry.register_problem class TranslateEndeWmt8k(translate.TranslateProblem): """Problem spec for WMT En-De translation.""" From 8e1958e380a12bb1d6c24b5bf7cb31b90066499c Mon Sep 17 00:00:00 2001 From: Ashish Vaswani <avaswani@google.com> Date: Mon, 27 Nov 2017 12:53:45 -0800 Subject: [PATCH 0612/4095] Small bug fix. PiperOrigin-RevId: 177058606 --- tensor2tensor/layers/common_layers.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tensor2tensor/layers/common_layers.py b/tensor2tensor/layers/common_layers.py index df21a12ac..2b5c3fb34 100644 --- a/tensor2tensor/layers/common_layers.py +++ b/tensor2tensor/layers/common_layers.py @@ -2211,7 +2211,9 @@ def ones_matrix_band_part(rows, cols, num_lower, num_upper, out_shape=None): band = band.reshape(out_shape) band = tf.constant(band, tf.float32) else: - band = tf.matrix_band_part(tf.ones([rows, cols]), num_lower, num_upper) + band = tf.matrix_band_part(tf.ones([rows, cols]), + tf.cast(num_lower, tf.int64), + tf.cast(num_upper, tf.int64)) if out_shape: band = tf.reshape(band, out_shape) From 676e272494a98469a985e9e5e550189ffd150810 Mon Sep 17 00:00:00 2001 From: Niki Parmar <nikip@google.com> Date: Mon, 27 Nov 2017 13:27:28 -0800 Subject: [PATCH 0613/4095] When using eager, use slow decoding for transformer models. PiperOrigin-RevId: 177062937 --- tensor2tensor/models/transformer.py | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/tensor2tensor/models/transformer.py b/tensor2tensor/models/transformer.py index 4ce3ae5fe..224e83ef5 100644 --- a/tensor2tensor/models/transformer.py +++ b/tensor2tensor/models/transformer.py @@ -159,8 +159,13 @@ def _greedy_infer(self, features, decode_length): NotImplementedError: If there are multiple data shards. """ with tf.variable_scope(self.name): - decoded_ids, _ = self._fast_decode(features, decode_length) - return decoded_ids, None, None + # TODO(nikip): Remove slow decoding for eager. Eager mode doesn't work + # with accessing _shape which is used in fast decoding currently. + if self._hparams.use_eager_mode: + return self._slow_greedy_infer(features, decode_length) + else: + decoded_ids, _ = self._fast_decode(features, decode_length) + return decoded_ids, None, None def _beam_decode(self, features, decode_length, beam_size, top_beams, alpha): """Beam search decoding. @@ -177,9 +182,15 @@ def _beam_decode(self, features, decode_length, beam_size, top_beams, alpha): samples: an integer `Tensor`. Top samples from the beam search """ with tf.variable_scope(self.name): - decoded_ids, scores = self._fast_decode( - features, decode_length, beam_size, top_beams, alpha) - return {"outputs": decoded_ids, "scores": scores} + # TODO(nikip): Remove slow decoding for eager. Eager mode doesn't work + # with accessing _shape which is used in fast decoding currently. + if self._hparams.use_eager_mode: + return self._beam_decode_slow( + features, decode_length, beam_size, top_beams, alpha) + else: + decoded_ids, scores = self._fast_decode(features, decode_length, + beam_size, top_beams, alpha) + return {"outputs": decoded_ids, "scores": scores} def _fast_decode(self, features, From 6d9b5e1cc01518c033569090faf6fbe519517971 Mon Sep 17 00:00:00 2001 From: Lukasz Kaiser <lukaszkaiser@google.com> Date: Mon, 27 Nov 2017 14:10:11 -0800 Subject: [PATCH 0614/4095] merge PRs. PiperOrigin-RevId: 177069364 --- tensor2tensor/bin/t2t-decoder | 5 +- tensor2tensor/bin/t2t-trainer | 26 +- tensor2tensor/data_generators/all_problems.py | 1 + .../data_generators/cnn_dailymail.py | 36 +- tensor2tensor/data_generators/librispeech.py | 323 ++++++++++++++++++ tensor2tensor/utils/decoding.py | 21 +- tensor2tensor/utils/get_cnndm_rouge.sh | 16 + tensor2tensor/utils/get_rouge.py | 92 +++++ 8 files changed, 494 insertions(+), 26 deletions(-) create mode 100644 tensor2tensor/data_generators/librispeech.py create mode 100644 tensor2tensor/utils/get_cnndm_rouge.sh create mode 100644 tensor2tensor/utils/get_rouge.py diff --git a/tensor2tensor/bin/t2t-decoder b/tensor2tensor/bin/t2t-decoder index c2bf97f94..712cb45ce 100644 --- a/tensor2tensor/bin/t2t-decoder +++ b/tensor2tensor/bin/t2t-decoder @@ -47,9 +47,10 @@ flags = tf.flags FLAGS = flags.FLAGS flags.DEFINE_string("output_dir", "", "Training directory to load from.") -flags.DEFINE_string("decode_from_file", None, "Path to decode file") +flags.DEFINE_string("decode_from_file", None, + "Path to the source file for decoding") flags.DEFINE_string("decode_to_file", None, - "Path prefix to inference output file") + "Path to the decoded (output) file") flags.DEFINE_bool("decode_interactive", False, "Interactive local inference mode.") flags.DEFINE_integer("decode_shards", 1, "Number of decoding replicas.") diff --git a/tensor2tensor/bin/t2t-trainer b/tensor2tensor/bin/t2t-trainer index 5a2866da6..97ab3106f 100644 --- a/tensor2tensor/bin/t2t-trainer +++ b/tensor2tensor/bin/t2t-trainer @@ -59,6 +59,7 @@ flags.DEFINE_string("output_dir", "", "Base output directory for run.") flags.DEFINE_string("master", "", "Address of TensorFlow master.") flags.DEFINE_string("schedule", "train_and_evaluate", "Method of tf.contrib.learn.Experiment to run.") +flags.DEFINE_bool("profile", False, "Profile performance?") def main(_): @@ -83,13 +84,24 @@ def main(_): problem.generate_data(data_dir, tmp_dir) # Run the trainer. - trainer_utils.run( - data_dir=data_dir, - model=FLAGS.model, - output_dir=output_dir, - train_steps=FLAGS.train_steps, - eval_steps=FLAGS.eval_steps, - schedule=FLAGS.schedule) + def run_experiment(): + trainer_utils.run( + data_dir=data_dir, + model=FLAGS.model, + output_dir=output_dir, + train_steps=FLAGS.train_steps, + eval_steps=FLAGS.eval_steps, + schedule=FLAGS.schedule) + + if FLAGS.profile: + with tf.contrib.tfprof.ProfileContext("t2tprof", + trace_steps=range(100), + dump_steps=range(100)) as pctx: + opts = tf.profiler.ProfileOptionBuilder.time_and_memory() + pctx.add_auto_profiling("op", opts, range(100)) + run_experiment() + else: + run_experiment() if __name__ == "__main__": diff --git a/tensor2tensor/data_generators/all_problems.py b/tensor2tensor/data_generators/all_problems.py index c7f364cf1..2aca3d377 100644 --- a/tensor2tensor/data_generators/all_problems.py +++ b/tensor2tensor/data_generators/all_problems.py @@ -28,6 +28,7 @@ from tensor2tensor.data_generators import ice_parsing from tensor2tensor.data_generators import image from tensor2tensor.data_generators import imdb +from tensor2tensor.data_generators import librispeech from tensor2tensor.data_generators import lm1b from tensor2tensor.data_generators import multinli from tensor2tensor.data_generators import problem_hparams diff --git a/tensor2tensor/data_generators/cnn_dailymail.py b/tensor2tensor/data_generators/cnn_dailymail.py index 239d1af99..636f04a97 100644 --- a/tensor2tensor/data_generators/cnn_dailymail.py +++ b/tensor2tensor/data_generators/cnn_dailymail.py @@ -20,6 +20,7 @@ from __future__ import print_function import hashlib +import io import os import tarfile @@ -46,7 +47,7 @@ # Train/Dev/Test Splits for summarization data _TRAIN_URLS = "https://raw.githubusercontent.com/abisee/cnn-dailymail/master/url_lists/all_train.txt" _DEV_URLS = "https://raw.githubusercontent.com/abisee/cnn-dailymail/master/url_lists/all_val.txt" -_TEST_URLS = "https://github.com/abisee/cnn-dailymail/blob/master/url_lists/all_test.txt" +_TEST_URLS = "https://raw.githubusercontent.com/abisee/cnn-dailymail/master/url_lists/all_test.txt" # End-of-sentence marker. @@ -129,7 +130,7 @@ def generate_hash(inp): return filelist -def example_generator(tmp_dir, is_training, sum_token): +def example_generator(all_files, urls_path, sum_token): """Generate examples.""" def fix_run_on_sents(line): if u"@highlight" in line: @@ -140,7 +141,6 @@ def fix_run_on_sents(line): return line return line + u"." - all_files, urls_path = _maybe_download_corpora(tmp_dir, is_training) filelist = example_splits(urls_path, all_files) story_summary_split_token = u" <summary> " if sum_token else " " @@ -178,6 +178,29 @@ def _story_summary_split(story): return story[:split_pos], story[split_pos+split_str_len:] # story, summary +def write_raw_text_to_files(all_files, urls_path, data_dir, tmp_dir, + is_training): + """Write text to files.""" + def write_to_file(all_files, urls_path, data_dir, filename): + with io.open(os.path.join(data_dir, filename+".source"), "w") as fstory: + with io.open(os.path.join(data_dir, filename+".target"), "w") as fsummary: + for example in example_generator(all_files, urls_path, sum_token=True): + story, summary = _story_summary_split(example) + fstory.write(story+"\n") + fsummary.write(summary+"\n") + + filename = "cnndm.train" if is_training else "cnndm.dev" + tf.logging.info("Writing %s" % filename) + write_to_file(all_files, urls_path, data_dir, filename) + + if not is_training: + test_urls_path = generator_utils.maybe_download( + tmp_dir, "all_test.txt", _TEST_URLS) + filename = "cnndm.test" + tf.logging.info("Writing %s" % filename) + write_to_file(all_files, test_urls_path, data_dir, filename) + + @registry.register_problem class SummarizeCnnDailymail32k(problem.Text2TextProblem): """Summarize CNN and Daily Mail articles to their summary highlights.""" @@ -219,10 +242,13 @@ def use_train_shards_for_dev(self): return False def generator(self, data_dir, tmp_dir, is_training): + all_files, urls_path = _maybe_download_corpora(tmp_dir, is_training) encoder = generator_utils.get_or_generate_vocab_inner( data_dir, self.vocab_file, self.targeted_vocab_size, - example_generator(tmp_dir, is_training, sum_token=False)) - for example in example_generator(tmp_dir, is_training, sum_token=True): + example_generator(all_files, urls_path, sum_token=False)) + write_raw_text_to_files(all_files, urls_path, data_dir, tmp_dir, + is_training) + for example in example_generator(all_files, urls_path, sum_token=True): story, summary = _story_summary_split(example) encoded_summary = encoder.encode(summary) + [EOS] encoded_story = encoder.encode(story) + [EOS] diff --git a/tensor2tensor/data_generators/librispeech.py b/tensor2tensor/data_generators/librispeech.py new file mode 100644 index 000000000..d6a07a391 --- /dev/null +++ b/tensor2tensor/data_generators/librispeech.py @@ -0,0 +1,323 @@ +# coding=utf-8 +# Copyright 2017 The Tensor2Tensor Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Librispeech dataset.""" + +import os +from subprocess import call +import tarfile +import wave + +# Dependency imports + +import numpy as np + +from tensor2tensor.data_generators import generator_utils +from tensor2tensor.data_generators import problem +from tensor2tensor.data_generators import text_encoder +from tensor2tensor.layers import common_layers +from tensor2tensor.utils import modality +from tensor2tensor.utils import registry + +import tensorflow as tf + + +_LIBRISPEECH_TRAIN_DATASETS = [ + [ + "http://www.openslr.org/resources/12/train-clean-100.tar.gz", # pylint: disable=line-too-long + "train-clean-100" + ], + [ + "http://www.openslr.org/resources/12/train-clean-360.tar.gz", + "train-clean-360" + ], + [ + "http://www.openslr.org/resources/12/train-other-500.tar.gz", + "train-other-500" + ], +] +_LIBRISPEECH_TEST_DATASETS = [ + [ + "http://www.openslr.org/resources/12/dev-clean.tar.gz", + "dev-clean" + ], + [ + "http://www.openslr.org/resources/12/dev-other.tar.gz", + "dev-other" + ], +] + + +def _collect_data(directory, input_ext, transcription_ext): + """Traverses directory collecting input and target files.""" + # Directory from string to tuple pair of strings + # key: the filepath to a datafile including the datafile's basename. Example, + # if the datafile was "/path/to/datafile.wav" then the key would be + # "/path/to/datafile" + # value: a pair of strings (media_filepath, label) + data_files = dict() + for root, _, filenames in os.walk(directory): + transcripts = [filename for filename in filenames + if transcription_ext in filename] + for transcript in transcripts: + transcript_path = os.path.join(root, transcript) + with open(transcript_path, "r") as transcript_file: + for transcript_line in transcript_file: + line_contents = transcript_line.split(" ", 1) + assert len(line_contents) == 2 + media_base, label = line_contents + key = os.path.join(root, media_base) + assert key not in data_files + media_name = "%s.%s"%(media_base, input_ext) + media_path = os.path.join(root, media_name) + data_files[key] = (media_path, label) + return data_files + + +def _get_audio_data(filepath): + # Construct a true .wav file. + out_filepath = filepath.strip(".flac") + ".wav" + # Assumes sox is installed on system. Sox converts from FLAC to WAV. + call(["sox", filepath, out_filepath]) + wav_file = wave.open(open(out_filepath)) + frame_count = wav_file.getnframes() + byte_array = wav_file.readframes(frame_count) + + data = np.fromstring(byte_array, np.uint8).tolist() + return data, frame_count, wav_file.getsampwidth(), wav_file.getnchannels() + + +class LibrispeechTextEncoder(text_encoder.TextEncoder): + + def encode(self, s): + return [self._num_reserved_ids + ord(c) for c in s] + + def decode(self, ids): + """Transform a sequence of int ids into a human-readable string. + + EOS is not expected in ids. + + Args: + ids: list of integers to be converted. + Returns: + s: human-readable string. + """ + decoded_ids = [] + for id_ in ids: + if 0 <= id_ < self._num_reserved_ids: + decoded_ids.append(text_encoder.RESERVED_TOKENS[int(id_)]) + else: + decoded_ids.append(id_ - self._num_reserved_ids) + return "".join([chr(d) for d in decoded_ids]) + + +@registry.register_audio_modality +class LibrispeechModality(modality.Modality): + """Performs strided conv compressions for audio spectral data.""" + + def bottom(self, inputs): + """Transform input from data space to model space. + + Args: + inputs: A Tensor with shape [batch, ...] + Returns: + body_input: A Tensor with shape [batch, ?, ?, body_input_depth]. + """ + with tf.variable_scope(self.name): + # TODO(aidangomez): Will need to sort out a better audio pipeline + def xnet_resblock(x, filters, res_relu, name): + with tf.variable_scope(name): + # We only stride along the length dimension to preserve the spectral + # bins (which are tiny in dimensionality relative to length) + y = common_layers.separable_conv_block( + x, + filters, [((1, 1), (3, 3)), ((1, 1), (3, 3))], + first_relu=True, + padding="SAME", + force2d=True, + name="sep_conv_block") + y = common_layers.pool(y, (3, 3), "MAX", "SAME", strides=(2, 1)) + return y + common_layers.conv_block( + x, + filters, [((1, 1), (1, 1))], + padding="SAME", + strides=(2, 1), + first_relu=res_relu, + force2d=True, + name="res_conv0") + + # Rescale from UINT8 to floats in [-1,-1] + signals = (tf.to_float(inputs)-127)/128. + signals = tf.squeeze(signals, [2, 3]) + + # `stfts` is a complex64 Tensor representing the short-time Fourier + # Transform of each signal in `signals`. Its shape is + # [batch_size, ?, fft_unique_bins] + # where fft_unique_bins = fft_length // 2 + 1 = 513. + stfts = tf.contrib.signal.stft(signals, frame_length=1024, frame_step=512, + fft_length=1024) + + # An energy spectrogram is the magnitude of the complex-valued STFT. + # A float32 Tensor of shape [batch_size, ?, 513]. + magnitude_spectrograms = tf.abs(stfts) + + # Warp the linear-scale, magnitude spectrograms into the mel-scale. + num_spectrogram_bins = magnitude_spectrograms.shape[-1].value + lower_edge_hertz, upper_edge_hertz, num_mel_bins = 80.0, 7600.0, 64 + sample_rate = 16000 + linear_to_mel_weight_matrix = ( + tf.contrib.signal.linear_to_mel_weight_matrix( + num_mel_bins, num_spectrogram_bins, sample_rate, lower_edge_hertz, + upper_edge_hertz)) + mel_spectrograms = tf.tensordot( + magnitude_spectrograms, linear_to_mel_weight_matrix, 1) + # Note: Shape inference for tensordot does not currently handle this case. + mel_spectrograms.set_shape(magnitude_spectrograms.shape[:-1].concatenate( + linear_to_mel_weight_matrix.shape[-1:])) + + x = tf.expand_dims(mel_spectrograms, 2) + x.set_shape([None, None, None, num_mel_bins]) + for i in xrange(self._model_hparams.audio_compression): + x = xnet_resblock(x, 2**(i + 1), True, "compress_block_%d" % i) + return xnet_resblock(x, self._body_input_depth, False, + "compress_block_final") + + +@registry.register_problem() +class Librispeech(problem.Problem): + """Problem spec for English word to dictionary definition.""" + + @property + def is_character_level(self): + return True + + @property + def input_space_id(self): + return problem.SpaceID.AUDIO_SPECTRAL + + @property + def target_space_id(self): + return problem.SpaceID.EN_CHR + + @property + def num_shards(self): + return 100 + + @property + def use_subword_tokenizer(self): + return False + + @property + def num_dev_shards(self): + return 1 + + @property + def use_train_shards_for_dev(self): + """If true, we only generate training data and hold out shards for dev.""" + return False + + def feature_encoders(self, _): + return { + "inputs": text_encoder.TextEncoder(), + "targets": LibrispeechTextEncoder(), + } + + def example_reading_spec(self): + data_fields = { + "inputs": tf.VarLenFeature(tf.int64), + "targets": tf.VarLenFeature(tf.int64), + } + data_items_to_decoders = None + return (data_fields, data_items_to_decoders) + + def generator(self, data_dir, tmp_dir, training, + eos_list=None, start_from=0, how_many=0): + eos_list = [1] if eos_list is None else eos_list + datasets = (_LIBRISPEECH_TRAIN_DATASETS if training + else _LIBRISPEECH_TEST_DATASETS) + num_reserved_ids = self.feature_encoders(None)["targets"].num_reserved_ids + i = 0 + for url, subdir in datasets: + filename = os.path.basename(url) + compressed_file = generator_utils.maybe_download(tmp_dir, filename, url) + + read_type = "r:gz" if filename.endswith("tgz") else "r" + with tarfile.open(compressed_file, read_type) as corpus_tar: + # Create a subset of files that don't already exist. + # tarfile.extractall errors when encountering an existing file + # and tarfile.extract is extremely slow + members = [] + for f in corpus_tar: + if not os.path.isfile(os.path.join(tmp_dir, f.name)): + members.append(f) + corpus_tar.extractall(tmp_dir, members=members) + + data_dir = os.path.join(tmp_dir, "LibriSpeech", subdir) + data_files = _collect_data(data_dir, "flac", "txt") + data_pairs = data_files.values() + for media_file, text_data in sorted(data_pairs)[start_from:]: + if how_many > 0 and i == how_many: + return + i += 1 + audio_data, sample_count, sample_width, num_channels = _get_audio_data( + media_file) + label = [num_reserved_ids + ord(c) for c in text_data] + eos_list + yield { + "inputs": audio_data, + "audio/channel_count": [num_channels], + "audio/sample_count": [sample_count], + "audio/sample_width": [sample_width], + "targets": label + } + + def generate_data(self, data_dir, tmp_dir, task_id=-1): + train_paths = self.training_filepaths( + data_dir, self.num_shards, shuffled=False) + dev_paths = self.dev_filepaths( + data_dir, self.num_dev_shards, shuffled=False) + if self.use_train_shards_for_dev: + all_paths = train_paths + dev_paths + generator_utils.generate_files( + self.generator(data_dir, tmp_dir, True), all_paths) + generator_utils.shuffle_dataset(all_paths) + else: + generator_utils.generate_dataset_and_shuffle( + self.generator(data_dir, tmp_dir, True), train_paths, + self.generator(data_dir, tmp_dir, False), dev_paths) + + def hparams(self, defaults, unused_model_hparams): + p = defaults + p.stop_at_eos = int(False) + p.input_modality = {"inputs": ("audio:librispeech_modality", None)} + p.target_modality = (registry.Modalities.SYMBOL, 256) + + def preprocess_example(self, example, mode, hparams): + return example + + +# TODO(lukaszkaiser): clean up hparams or remove from here. +def add_librispeech_hparams(hparams): + """Adding to base hparams the attributes for for librispeech.""" + hparams.batch_size = 36 + hparams.audio_compression = 8 + hparams.hidden_size = 2048 + hparams.max_input_seq_length = 600000 + hparams.max_target_seq_length = 350 + hparams.max_length = hparams.max_input_seq_length + hparams.min_length_bucket = hparams.max_input_seq_length // 2 + hparams.learning_rate = 0.05 + hparams.train_steps = 5000000 + hparams.num_hidden_layers = 4 + return hparams diff --git a/tensor2tensor/utils/decoding.py b/tensor2tensor/utils/decoding.py index 629b2ed26..23ae663ac 100644 --- a/tensor2tensor/utils/decoding.py +++ b/tensor2tensor/utils/decoding.py @@ -83,9 +83,9 @@ def log_decode_results(inputs, decoded_targets = None if identity_output: - decoded_outputs = " ".join(map(str, outputs.flatten())) + decoded_outputs = "".join(map(str, outputs.flatten())) if targets is not None: - decoded_targets = " ".join(map(str, targets.flatten())) + decoded_targets = "".join(map(str, targets.flatten())) else: decoded_outputs = targets_vocab.decode(_save_until_eos(outputs, is_image)) if targets is not None: @@ -252,17 +252,14 @@ def input_fn(): # _decode_batch_input_fn sorted_inputs.reverse() decodes.reverse() - # Dumping inputs and outputs to file filename.decodes in - # format result\tinput in the same order as original inputs - if decode_to_file: - output_filename = decode_to_file - else: - output_filename = filename + # If decode_to_file was provided use it as the output filename without change + # (except for adding shard_id if using more shards for decoding). + # Otherwise, use the input filename plus model, hp, problem, beam, alpha. + decode_filename = decode_to_file if decode_to_file else filename if decode_hp.shards > 1: - base_filename = output_filename + ("%.2d" % decode_hp.shard_id) - else: - base_filename = output_filename - decode_filename = _decode_filename(base_filename, problem_name, decode_hp) + decode_filename += "%.2d" % decode_hp.shard_id + if not decode_to_file: + decode_filename = _decode_filename(decode_filename, problem_name, decode_hp) tf.logging.info("Writing decodes into %s" % decode_filename) outfile = tf.gfile.Open(decode_filename, "w") for index in range(len(sorted_inputs)): diff --git a/tensor2tensor/utils/get_cnndm_rouge.sh b/tensor2tensor/utils/get_cnndm_rouge.sh new file mode 100644 index 000000000..0f52bb56c --- /dev/null +++ b/tensor2tensor/utils/get_cnndm_rouge.sh @@ -0,0 +1,16 @@ +#!/bin/bash + +# Path to moses dir +mosesdecoder=$1 + +# Path to file containing gold summaries, one per line +targets_file=$2 +# Path to file containing model generated summaries, one per line +decodes_file=$3 + +# Tokenize. +perl $mosesdecoder/scripts/tokenizer/tokenizer.perl -l en < $targets_file > $targets_file.tok +perl $mosesdecoder/scripts/tokenizer/tokenizer.perl -l en < $decodes_file > $decodes_file.tok + +# Get rouge scores +python get_rouge.py --decodes_filename $decodes_file.tok --targets_filename $targets_file.tok diff --git a/tensor2tensor/utils/get_rouge.py b/tensor2tensor/utils/get_rouge.py new file mode 100644 index 000000000..dc9355b0d --- /dev/null +++ b/tensor2tensor/utils/get_rouge.py @@ -0,0 +1,92 @@ +# coding=utf-8 +# Copyright 2017 The Tensor2Tensor Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Computing rouge scores using pyrouge.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import logging +import os +import shutil +from tempfile import mkdtemp + +# Dependency imports + +from pyrouge import Rouge155 +import tensorflow as tf + +FLAGS = tf.flags.FLAGS + +tf.flags.DEFINE_string("decodes_filename", None, + "File containing model generated summaries tokenized") +tf.flags.DEFINE_string("targets_filename", None, + "File containing model target summaries tokenized") + + +def write_to_file(filename, data): + data = ".\n".join(data.split(". ")) + with open(filename, "w") as fp: + fp.write(data) + + +def prep_data(decode_dir, target_dir): + with open(FLAGS.decodes_filename, "rb") as fdecodes: + with open(FLAGS.targets_filename, "rb") as ftargets: + for i, (d, t) in enumerate(zip(fdecodes, ftargets)): + write_to_file(os.path.join(decode_dir, "rouge.%06d.txt" % (i+1)), d) + write_to_file(os.path.join(target_dir, "rouge.A.%06d.txt" % (i+1)), t) + if (i+1 % 1000) == 0: + tf.logging.into("Written %d examples to file" % i) + + +def main(_): + rouge = Rouge155() + rouge.log.setLevel(logging.ERROR) + rouge.system_filename_pattern = "rouge.(\\d+).txt" + rouge.model_filename_pattern = "rouge.[A-Z].#ID#.txt" + + tf.logging.set_verbosity(tf.logging.INFO) + + tmpdir = mkdtemp() + tf.logging.info("tmpdir: %s" % tmpdir) + # system = decodes/predictions + system_dir = os.path.join(tmpdir, "system") + # model = targets/gold + model_dir = os.path.join(tmpdir, "model") + os.mkdir(system_dir) + os.mkdir(model_dir) + + rouge.system_dir = system_dir + rouge.model_dir = model_dir + + prep_data(rouge.system_dir, rouge.model_dir) + + rouge_scores = rouge.convert_and_evaluate() + rouge_scores = rouge.output_to_dict(rouge_scores) + for prefix in ["rouge_1", "rouge_2", "rouge_l"]: + for suffix in ["f_score", "precision", "recall"]: + key = "_".join([prefix, suffix]) + tf.logging.info("%s: %.4f" % (key, rouge_scores[key])) + + # clean up after pyrouge + shutil.rmtree(tmpdir) + shutil.rmtree(rouge._config_dir) # pylint: disable=protected-access + shutil.rmtree(os.path.split(rouge._system_dir)[0]) # pylint: disable=protected-access + + +if __name__ == "__main__": + tf.app.run() From a3d0ffe5d6e7dfcaa39a03bcd7493f6a8beb7e24 Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Mon, 27 Nov 2017 14:36:32 -0800 Subject: [PATCH 0615/4095] Small updates to T2TModel.__call__ callers PiperOrigin-RevId: 177073383 --- tensor2tensor/models/transformer_vae.py | 3 +-- tensor2tensor/utils/model_builder.py | 6 +++--- tensor2tensor/utils/registry.py | 2 +- tensor2tensor/utils/t2t_model.py | 10 +++++----- 4 files changed, 10 insertions(+), 11 deletions(-) diff --git a/tensor2tensor/models/transformer_vae.py b/tensor2tensor/models/transformer_vae.py index e1af69b8e..0bb5efea9 100644 --- a/tensor2tensor/models/transformer_vae.py +++ b/tensor2tensor/models/transformer_vae.py @@ -460,8 +460,7 @@ def infer(self, features=None, decode_length=50, beam_size=1, top_beams=1, dtype=tf.int64) features["targets"] = initial_output - logits, _ = self.__call__( - features, skip=False, force_full_predict=True) + logits, _ = self(features, skip=False, force_full_predict=True) # pylint: disable=not-callable samples = tf.argmax(logits, axis=-1) if inputs_old is not None: # Restore to not confuse Estimator. diff --git a/tensor2tensor/utils/model_builder.py b/tensor2tensor/utils/model_builder.py index 13ebaa91e..67447491e 100644 --- a/tensor2tensor/utils/model_builder.py +++ b/tensor2tensor/utils/model_builder.py @@ -125,9 +125,9 @@ def nth_model(n): # TODO(lukaszkaiser): why is this hack needed for variables init? Repair. skip_this_one = skip_this_one and (worker_id != 0 or n > 1) if eval_run_autoregressive and mode == tf.estimator.ModeKeys.EVAL: - sharded_logits, losses_dict = model_class.eval_autoregressive(features) + logits, losses_dict = model_class.eval_autoregressive(features) else: - sharded_logits, losses_dict = model_class( + logits, losses_dict = model_class( features, skip=(skipping_is_on and skip_this_one)) with tf.variable_scope("losses_avg"): total_loss, ops = 0.0, [] @@ -155,7 +155,7 @@ def nth_model(n): with tf.control_dependencies(ops): # Make sure the ops run. # Ensure the loss is a scalar here. total_loss = tf.reshape(total_loss, [], name="total_loss_control_id") - return [total_loss, tf.concat(sharded_logits, 0)] + return [total_loss, logits] model_output = input_fn_builder.cond_on_index( nth_model, diff --git a/tensor2tensor/utils/registry.py b/tensor2tensor/utils/registry.py index e21702251..69edcb473 100644 --- a/tensor2tensor/utils/registry.py +++ b/tensor2tensor/utils/registry.py @@ -122,7 +122,7 @@ def decorator(model_cls, registration_name=None): model_name = registration_name or default_name(model_cls) if model_name in _MODELS: raise LookupError("Model %s already registered." % model_name) - model_cls.REGISTERED_NAME = property(lambda _: model_name) + model_cls.REGISTERED_NAME = model_name _MODELS[model_name] = model_cls return model_cls diff --git a/tensor2tensor/utils/t2t_model.py b/tensor2tensor/utils/t2t_model.py index 51120e41d..0db573b7e 100644 --- a/tensor2tensor/utils/t2t_model.py +++ b/tensor2tensor/utils/t2t_model.py @@ -180,13 +180,13 @@ def eval_autoregressive(self, features=None, decode_length=50): decode_length: an integer. How many additional timesteps to decode. Returns: - sharded_logits: a list of `Tensor`s. Assumes one datashard. + logits: `Tensor` losses: a dictionary: {loss-name (string): floating point `Scalar`}. Contains a single key "training". """ _, logits, losses = self._slow_greedy_infer( features, decode_length=decode_length) - return [logits], losses + return logits, losses def infer(self, features=None, @@ -280,7 +280,7 @@ def symbols_to_logits_fn(ids): features["targets"] = ids self._coverage = None - logits, _ = self.__call__(features) + logits, _ = self(features) # pylint: disable=not-callable # now self._coverage is a coverage tensor for the first datashard. # it has shape [batch_size] and contains floats between 0 and # source_length. @@ -493,7 +493,7 @@ def sample(self, features): logits: a list of `Tensor`s, one per datashard. losses: a dictionary: {loss-name (string): floating point `Scalar`}. """ - logits, losses = self.__call__(features) + logits, losses = self(features) # pylint: disable=not-callable if self._hparams.sampling_method == "argmax": samples = tf.argmax(logits, axis=-1) else: @@ -534,7 +534,7 @@ def _model_fn(self, features, skip=False, force_full_predict=False): optimizations are not used even when allowed and in PREDICT mode. Returns: - sharded_logits: a list of `Tensor`s, one per datashard. + logits: `Tensor` losses: a dictionary: {loss-name (string): floating point `Scalar`}. """ start_time = time.time() From 5adacd0125de265191a06399caa1152e1d94acd1 Mon Sep 17 00:00:00 2001 From: Lukasz Kaiser <lukaszkaiser@google.com> Date: Tue, 28 Nov 2017 11:40:10 -0800 Subject: [PATCH 0616/4095] Make daisy_chain_variables and hparam instead of flag and unset to allow LSTM to train in distributed mode. PiperOrigin-RevId: 177193238 --- tensor2tensor/layers/common_hparams.py | 5 +++++ tensor2tensor/models/lstm.py | 1 + tensor2tensor/models/neural_gpu.py | 9 +++++---- tensor2tensor/utils/decoding.py | 2 +- tensor2tensor/utils/devices.py | 5 +++-- tensor2tensor/utils/model_builder.py | 2 +- tensor2tensor/utils/trainer_utils.py | 4 +--- 7 files changed, 17 insertions(+), 11 deletions(-) diff --git a/tensor2tensor/layers/common_hparams.py b/tensor2tensor/layers/common_hparams.py index e75bf4099..043142359 100644 --- a/tensor2tensor/layers/common_hparams.py +++ b/tensor2tensor/layers/common_hparams.py @@ -176,6 +176,11 @@ def basic_params1(): scheduled_sampling_prob=0.0, scheduled_sampling_warmup_steps=50000, scheduled_sampling_gold_mixin_prob=0.5, + # This setting controls whether to copy variables around in a daisy chain + # (if true) or leave their placement to Tensorflow. It only affects multi + # device training and mostly should be turned on for performance. One + # exception are recurrent models: with dynamic loops it must be off. + daisy_chain_variables=True, # This is the actual batch size, *not* tokens per batch (i.e. for # language models this is the number of sentences in the batch) tpu_batch_size_per_shard=24, diff --git a/tensor2tensor/models/lstm.py b/tensor2tensor/models/lstm.py index 63a0806e7..e3a5bf9ab 100644 --- a/tensor2tensor/models/lstm.py +++ b/tensor2tensor/models/lstm.py @@ -159,6 +159,7 @@ def model_fn_body(self, features): def lstm_seq2seq(): """hparams for LSTM.""" hparams = common_hparams.basic_params1() + hparams.daisy_chain_variables = False hparams.batch_size = 1024 hparams.hidden_size = 128 hparams.num_hidden_layers = 2 diff --git a/tensor2tensor/models/neural_gpu.py b/tensor2tensor/models/neural_gpu.py index 4037aa8d4..ae692968d 100644 --- a/tensor2tensor/models/neural_gpu.py +++ b/tensor2tensor/models/neural_gpu.py @@ -31,7 +31,7 @@ import tensorflow as tf -def neural_gpu(inputs, hparams, name=None): +def neural_gpu_body(inputs, hparams, name=None): """The core Neural GPU.""" with tf.variable_scope(name, "neural_gpu"): @@ -59,7 +59,7 @@ def step(state, inp): # pylint: disable=missing-docstring class NeuralGPU(t2t_model.T2TModel): def model_fn_body(self, features): - return neural_gpu(features["inputs"], self._hparams) + return neural_gpu_body(features["inputs"], self._hparams) def diagonal_neural_gpu(inputs, hparams, name=None): @@ -97,10 +97,11 @@ def model_fn_body(self, features): return diagonal_neural_gpu(features["inputs"], self._hparams) -@registry.register_hparams("neuralgpu_1") -def neural_gpu_params1(): +@registry.register_hparams +def neural_gpu(): """Set of hyperparameters.""" hparams = common_hparams.basic_params1() + hparams.daisy_chain_variables = False hparams.batch_size = 1024 hparams.num_hidden_layers = 1 hparams.hidden_size = 256 diff --git a/tensor2tensor/utils/decoding.py b/tensor2tensor/utils/decoding.py index 23ae663ac..d0913e0e1 100644 --- a/tensor2tensor/utils/decoding.py +++ b/tensor2tensor/utils/decoding.py @@ -114,7 +114,7 @@ def decode_from_dataset(estimator, mode=tf.estimator.ModeKeys.PREDICT, hparams=hparams, data_dir=hparams.data_dir, - num_datashards=devices.data_parallelism().n, + num_datashards=devices.data_parallelism(hparams).n, fixed_problem=problem_idx, batch_size=decode_hp.batch_size, dataset_split=dataset_split, diff --git a/tensor2tensor/utils/devices.py b/tensor2tensor/utils/devices.py index e296394da..cf1f5fb25 100644 --- a/tensor2tensor/utils/devices.py +++ b/tensor2tensor/utils/devices.py @@ -81,7 +81,7 @@ def ps_devices(all_workers=False): return [""] -def data_parallelism(all_workers=False): +def data_parallelism(hparams, all_workers=False): """Over which devices do we split each training batch. In old-fashioned async mode, we split the batch over all GPUs on the @@ -95,6 +95,7 @@ def data_parallelism(all_workers=False): between datashards. Args: + hparams: model hyperparameters (an HParams object). all_workers: whether the devices are all async workers or just this one. Returns: @@ -148,4 +149,4 @@ def _replica_device_setter(worker_device): datashard_devices, reuse=True, caching_devices=caching_devices, - daisy_chain_variables=FLAGS.daisy_chain_variables) + daisy_chain_variables=hparams.daisy_chain_variables) diff --git a/tensor2tensor/utils/model_builder.py b/tensor2tensor/utils/model_builder.py index 67447491e..9a05dd16d 100644 --- a/tensor2tensor/utils/model_builder.py +++ b/tensor2tensor/utils/model_builder.py @@ -78,7 +78,7 @@ def model_fn(model, decode_hp = decode_hparams # TODO(rsepassi): This still depends on FLAGS. Rm eventually. - dp = devices.data_parallelism() + dp = devices.data_parallelism(hparams) tf.get_variable_scope().set_initializer(_get_variable_initializer(hparams)) is_training = mode == tf.estimator.ModeKeys.TRAIN diff --git a/tensor2tensor/utils/trainer_utils.py b/tensor2tensor/utils/trainer_utils.py index e1a3947fa..b875f7ca8 100644 --- a/tensor2tensor/utils/trainer_utils.py +++ b/tensor2tensor/utils/trainer_utils.py @@ -109,8 +109,6 @@ flags.DEFINE_bool("locally_shard_to_cpu", False, "Use CPU as a sharding device running locally. This allows " "to test sharded model construction on a machine with 1 GPU.") -flags.DEFINE_bool("daisy_chain_variables", True, - "copy variables around in a daisy chain") flags.DEFINE_bool("sync", False, "Sync compute on PS.") flags.DEFINE_string("worker_job", "/job:localhost", "name of worker job") flags.DEFINE_integer("worker_gpu", 1, "How many GPUs to use.") @@ -219,7 +217,7 @@ def create_experiment_components(data_dir, model_name, hparams, run_config): # hparams batch_size is used as minibatch size instead of tokens in batch batch_size = (hparams.use_fixed_batch_size and hparams.batch_size) or None - num_datashards = devices.data_parallelism().n + num_datashards = devices.data_parallelism(hparams).n train_input_fn = input_fn_builder.build_input_fn( mode=tf.estimator.ModeKeys.TRAIN, hparams=hparams, From 398e85b08c4ec65d79d228abb1edc81ccd8f2dca Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Tue, 28 Nov 2017 15:39:09 -0800 Subject: [PATCH 0617/4095] Move Estimator input_fn and model_fn construction into Problem and T2TModel, respectively, which allows subclassing PiperOrigin-RevId: 177229237 --- tensor2tensor/data_generators/problem.py | 117 +++++++ tensor2tensor/models/transformer.py | 2 +- tensor2tensor/tpu/tpu_trainer_lib.py | 346 ++------------------- tensor2tensor/utils/modality.py | 5 + tensor2tensor/utils/t2t_model.py | 370 ++++++++++++++++++++--- 5 files changed, 467 insertions(+), 373 deletions(-) diff --git a/tensor2tensor/data_generators/problem.py b/tensor2tensor/data_generators/problem.py index 964a5fb36..d2e30cbff 100644 --- a/tensor2tensor/data_generators/problem.py +++ b/tensor2tensor/data_generators/problem.py @@ -24,6 +24,7 @@ import six from tensor2tensor.data_generators import generator_utils from tensor2tensor.data_generators import text_encoder +from tensor2tensor.utils import data_reader from tensor2tensor.utils import metrics from tensor2tensor.utils import registry import tensorflow as tf @@ -457,6 +458,90 @@ def feature_info(self): self._feature_info = features return features + def make_estimator_input_fn(self, mode, hparams): + + def estimator_input_fn(params, config): + return self.input_pipeline(mode, hparams, params=params, config=config) + + return estimator_input_fn + + def input_pipeline(self, mode, hparams, params=None, config=None): + """Builds input pipeline for problem. + + Args: + mode: tf.estimator.ModeKeys + hparams: HParams, model hparams + params: dict, may include "batch_size" + config: RunConfig; if passed, should include t2t_device_info dict + + Returns: + (features_dict<str name, Tensor feature>, Tensor targets) + """ + tf.logging.warning("Problem.input_pipeline implements a subset of " + "input_fn_builder.build_input_fn and is currently only " + "used in tpu_trainer.") + is_training = mode == tf.estimator.ModeKeys.TRAIN + num_threads = 4 if is_training else 1 + batch_size = _get_batch_size(params, hparams, config) + + def valid_size(example): + return data_reader.example_valid_size(example, hparams.min_length, + hparams.max_length) + + def define_shapes(example): + """Set the right shapes for the features.""" + inputs = example["inputs"] + targets = example["targets"] + + # Ensure inputs and targets are proper rank. + while len(inputs.get_shape()) < 4: + inputs = tf.expand_dims(inputs, axis=-1) + while len(targets.get_shape()) < 4: + targets = tf.expand_dims(targets, axis=-1) + + example["inputs"] = inputs + example["targets"] = targets + + # Ensure batch size is set on all features + for _, t in six.iteritems(example): + shape = t.get_shape().as_list() + shape[0] = batch_size + t.set_shape(t.get_shape().merge_with(shape)) + # Assert shapes are fully known + t.get_shape().assert_is_fully_defined() + + return example + + # Read and preprocess + data_dir = hparams.data_dir + dataset = self.dataset( + mode=mode, data_dir=data_dir, num_threads=num_threads, hparams=hparams) + dataset = dataset.map( + data_reader.cast_int64_to_int32, num_threads=num_threads) + if is_training: + dataset = dataset.repeat(None) + + # Batch (and pad) + # TODO(rsepassi): Add support for bucketing by length + if _are_shapes_fully_defined(dataset.output_shapes): + dataset = dataset.apply( + tf.contrib.data.batch_and_drop_remainder(batch_size)) + else: + # If shapes are not fully defined, filter out long ones and pad to + # hparams.max_length + dataset = dataset.filter(valid_size) + padded_shapes = _fill_shape_nones( + dataset.output_shapes, none_filler=hparams.max_length) + dataset = dataset.apply( + tf.contrib.data.padded_batch_and_drop_remainder(batch_size, + padded_shapes)) + + dataset = dataset.map(define_shapes, num_parallel_calls=num_threads) + dataset = dataset.prefetch(1) + features = dataset.make_one_shot_iterator().get_next() + + return features, features["targets"] + class FeatureInfo(object): @@ -693,3 +778,35 @@ def eval_metrics(self): metrics.Metrics.APPROX_BLEU, metrics.Metrics.ROUGE_2_F, metrics.Metrics.ROUGE_L_F ] + + +def _are_shapes_fully_defined(shapes_dict): + for shape in shapes_dict.values(): + if not shape.is_fully_defined(): + return False + return True + + +def _get_batch_size(params, hparams, config): + """Batch size determined by params dict, HParams, and RunConfig.""" + # If params specifies batch size, use that. TPUEstimator passes batch size in + # params. + batch_size = params and params.get("batch_size") + + # If not set, then we're running on CPU/GPU, so use the batch size from the + # hparams, and multiply by the number of data shards. + if not batch_size: + batch_size = hparams.tpu_batch_size_per_shard + if config: + batch_size *= config.t2t_device_info["num_shards"] + + return batch_size + + +def _fill_shape_nones(shapes_dict, none_filler=None): + padded_shapes = {} + for key, shape in six.iteritems(shapes_dict): + padded_shapes[key] = [ + (dim if dim is not None else none_filler) for dim in shape.as_list() + ] + return padded_shapes diff --git a/tensor2tensor/models/transformer.py b/tensor2tensor/models/transformer.py index 224e83ef5..74509c098 100644 --- a/tensor2tensor/models/transformer.py +++ b/tensor2tensor/models/transformer.py @@ -225,7 +225,7 @@ def _fast_decode(self, inputs = features["inputs"] batch_size = common_layers.shape_list(inputs)[0] target_modality = self._problem_hparams.target_modality - if t2t_model.is_class_modality(target_modality): + if target_modality.is_class_modality: decode_length = 1 else: decode_length = common_layers.shape_list(inputs)[1] + decode_length diff --git a/tensor2tensor/tpu/tpu_trainer_lib.py b/tensor2tensor/tpu/tpu_trainer_lib.py index 65618fc1b..08c352d80 100644 --- a/tensor2tensor/tpu/tpu_trainer_lib.py +++ b/tensor2tensor/tpu/tpu_trainer_lib.py @@ -19,330 +19,14 @@ from __future__ import division from __future__ import print_function -import copy - # Dependency imports -import six - -from tensor2tensor.utils import data_reader -from tensor2tensor.utils import expert_utils -from tensor2tensor.utils import metrics -from tensor2tensor.utils import optimize -from tensor2tensor.utils import registry +from tensor2tensor.utils import t2t_model from tensor2tensor.utils import trainer_utils import tensorflow as tf -def _create_dummy_vars(): - """Dummy vars for restore to work when not using TPU codepath.""" - with tf.variable_scope("losses_avg"): - with tf.variable_scope("problem_0"): - for var_name in ["total", "extra", "training"]: - tf.get_variable( - "%s_loss" % var_name, initializer=100.0, trainable=False) - with tf.variable_scope("train_stats"): - tf.get_variable("problem_0_steps", initializer=0, trainable=False) - - -def _get_batch_size(params, hparams, config): - """Batch size determined by params dict, HParams, and RunConfig.""" - # If params specifies batch size, use that. TPUEstimator passes batch size in - # params. - batch_size = params and params.get("batch_size") - - # If not set, then we're running on CPU/GPU, so use the batch size from the - # hparams, and multiply by the number of data shards. - if not batch_size: - batch_size = hparams.tpu_batch_size_per_shard - if config: - batch_size *= config.t2t_device_info["num_shards"] - - return batch_size - - -def t2t_input_fn(problem, mode, hparams, params=None, config=None): - """Builds input pipeline for problem. - - Args: - problem: Problem to build input pipeline for - mode: tf.estimator.ModeKeys - hparams: HParams - params: dict, may include "batch_size" - config: RunConfig - - Returns: - (features_dict<str name, Tensor feature>, Tensor targets) - """ - is_training = mode == tf.estimator.ModeKeys.TRAIN - num_threads = 4 if is_training else 1 - - batch_size = _get_batch_size(params, hparams, config) - - def valid_size(example): - return data_reader.example_valid_size(example, hparams.min_length, - hparams.max_length) - - def define_shapes(example): - """Set the right shapes for the features.""" - inputs = example["inputs"] - targets = example["targets"] - - # Ensure inputs and targets are proper rank. - while len(inputs.get_shape()) < 4: - inputs = tf.expand_dims(inputs, axis=-1) - while len(targets.get_shape()) < 4: - targets = tf.expand_dims(targets, axis=-1) - - example["inputs"] = inputs - example["targets"] = targets - - # Ensure batch size is set on all features - for _, t in six.iteritems(example): - shape = t.get_shape().as_list() - shape[0] = batch_size - t.set_shape(t.get_shape().merge_with(shape)) - # Assert shapes are fully known - t.get_shape().assert_is_fully_defined() - - return example - - # Read and preprocess - data_dir = hparams.data_dir - dataset = problem.dataset( - mode=mode, data_dir=data_dir, num_threads=num_threads, hparams=hparams) - dataset = dataset.map( - data_reader.cast_int64_to_int32, num_threads=num_threads) - if is_training: - dataset = dataset.repeat(None) - - # Batch (and pad) - if _are_shapes_fully_defined(dataset.output_shapes): - dataset = dataset.apply( - tf.contrib.data.batch_and_drop_remainder(batch_size)) - else: - # If shapes are not fully defined, filter out long ones and pad to - # hparams.max_length - dataset = dataset.filter(valid_size) - padded_shapes = _fill_shape_nones( - dataset.output_shapes, none_filler=hparams.max_length) - dataset = dataset.apply( - tf.contrib.data.padded_batch_and_drop_remainder(batch_size, - padded_shapes)) - - dataset = dataset.map(define_shapes, num_parallel_calls=num_threads) - dataset = dataset.prefetch(1) - features = dataset.make_one_shot_iterator().get_next() - - return features, features["targets"] - - -def get_input_fn(mode, hparams): - """Get input fn for Estimator. See input_fn.""" - - def wrapped_input_fn(params, config): - return t2t_input_fn( - hparams.problem_instances[0], - mode, - hparams, - params=params, - config=config) - - return wrapped_input_fn - - -def _are_shapes_fully_defined(shapes_dict): - for shape in shapes_dict.values(): - if not shape.is_fully_defined(): - return False - return True - - -def _fill_shape_nones(shapes_dict, none_filler=None): - padded_shapes = {} - for key, shape in six.iteritems(shapes_dict): - padded_shapes[key] = [ - (dim if dim is not None else none_filler) for dim in shape.as_list() - ] - return padded_shapes - - -def create_data_parallelism(num_gpus=1, - gpu_order="", - shard_to_cpu=False, - num_shards=1): - """Create Parallelism object.""" - gpus = list(range(num_gpus)) - if gpu_order: - gpus = [int(s) for s in gpu_order.split(" ")] - assert len(gpus) == num_gpus - data_shard_devices = ["gpu:%d" % i for i in gpus] - if shard_to_cpu or num_gpus < 1: - data_shard_devices += ["cpu:0"] - assert len(data_shard_devices) == num_shards - tf.logging.info("Data parallel devices: %s", data_shard_devices) - return expert_utils.Parallelism(data_shard_devices, reuse=True) - - -def t2t_model_fn(model_name, - hparams, - features, - labels, - mode, - config=None, - params=None, - use_tpu=True): - """Model fn. - - Args: - model_name: str, registered model name. - hparams: HParams - features: dict<str name, Tensor feature> - labels: Tensor - mode: tf.estimator.ModeKeys - config: RunConfig - params: dict, may include batch_size - use_tpu: bool, whether using TPU - - Returns: - EstimatorSpec or TPUEstimatorSpec - """ - _create_dummy_vars() - hparams = copy.deepcopy(hparams) - problem = hparams.problem_instances[0] - problem_hp = hparams.problems[0] - - features["problem_choice"] = tf.constant(0) - features["input_space_id"] = tf.constant(problem_hp.input_space_id) - features["target_space_id"] = tf.constant(problem_hp.target_space_id) - - # Build and call model - data_parallelism = ( - expert_utils.Parallelism([""]) - if use_tpu else create_data_parallelism(**config.t2t_device_info)) - model = registry.model(model_name)( - hparams, mode, problem_hp, data_parallelism=data_parallelism) - logits, losses_dict = model(features) - - # Set known shapes - shape = logits.get_shape().as_list() - if shape[0] is None: - shape[0] = _get_batch_size(params, hparams, config) - if shape[1] is None: - shape[1] = hparams.max_length - logits.set_shape(shape) - - # Accumulate losses - assert "training" in losses_dict - loss = sum(losses_dict.values()) - - if mode == tf.estimator.ModeKeys.EVAL: - if use_tpu: - eval_metrics_fn = create_eval_metrics_fn(problem, hparams) - _remove_summaries() - return tf.contrib.tpu.TPUEstimatorSpec( - mode, eval_metrics=(eval_metrics_fn, [logits, labels]), loss=loss) - else: - eval_metrics_fns = metrics.create_evaluation_metrics([problem], hparams) - eval_metrics = {} - for metric_name, metric_fn in six.iteritems(eval_metrics_fns): - eval_metrics[metric_name] = metric_fn(logits, features) - - return tf.estimator.EstimatorSpec( - mode, - predictions={"predictions": logits}, - eval_metric_ops=eval_metrics, - loss=loss) - - assert mode == tf.estimator.ModeKeys.TRAIN - - lr = hparams.learning_rate * optimize.learning_rate_decay(hparams) - train_op = optimize.optimize(loss, lr, hparams, use_tpu=use_tpu) - - if use_tpu: - _remove_summaries() # summaries not currently working on TPU - return tf.contrib.tpu.TPUEstimatorSpec(mode, loss=loss, train_op=train_op) - else: - return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op) - - -def get_model_fn(model_name, hparams, use_tpu=True): - """Model fn for Estimator. See model_fn.""" - - def wrapping_model_fn(features, labels, mode, params, config): - return t2t_model_fn( - model_name, - hparams, - features, - labels, - mode, - config=config, - params=params, - use_tpu=use_tpu) - - return wrapping_model_fn - - -# These metrics are implemented with py_funcs and therefore do no work with TPU -TPU_METRIC_BLACKLIST = set([ - metrics.Metrics.APPROX_BLEU, - metrics.Metrics.ROUGE_2_F, - metrics.Metrics.ROUGE_L_F, -]) - - -def create_eval_metrics_fn(problem, hparams): - """Create the metrics_fn that TPUEstimatorSpec expects.""" - - tm = problem.get_hparams().target_modality - if isinstance(tm, tuple): - tm = registry.create_modality(tm, hparams) - weights_fn = tm.targets_weights_fn - - def make_metric_fn(metric_fn): - - def wrapped_metric_fn(logits, labels): - num, den = metric_fn(logits, labels, weights_fn=weights_fn) - return tf.metrics.mean(num, den) - - return wrapped_metric_fn - - metric_fns = [] - eval_metrics = problem.eval_metrics() - - for metric in eval_metrics: - if metric in TPU_METRIC_BLACKLIST: - tf.logging.warn("Skipping eval metric %s in TPU_METRIC_BLACKLIST", metric) - continue - name = "metrics-%s/%s" % (problem.name, metric) - metric_fns.append((name, make_metric_fn(metrics.METRICS_FNS[metric]))) - - def all_metrics_fn(logits, labels): - metrics_dict = {} - - for name, fn in metric_fns: - metrics_dict[name] = fn(logits, labels) - - return metrics_dict - - return all_metrics_fn - - -def _remove_summaries(): - g = tf.get_default_graph() - key = tf.GraphKeys.SUMMARIES - del g.get_collection_ref(key)[:] - assert not g.get_collection(key) - - -def _clip_gradients_by_norm(grads_and_vars, clip_gradients): - """Clips gradients by global norm.""" - gradients, variables = zip(*grads_and_vars) - clipped_gradients, _ = tf.clip_by_global_norm(gradients, clip_gradients) - return list(zip(clipped_gradients, variables)) - - def create_run_config(master="", model_dir=None, iterations_per_loop=1000, @@ -388,8 +72,13 @@ def create_run_config(master="", return config -def create_estimator(model_fn, run_config, batch_size=16, use_tpu=True): +def create_estimator(model_name, hparams, run_config, use_tpu=True): + model_fn = t2t_model.T2TModel.make_estimator_model_fn( + model_name, hparams, use_tpu=use_tpu) + if use_tpu: + batch_size = hparams.tpu_batch_size_per_shard + batch_size *= run_config.tpu_config.num_shards return tf.contrib.tpu.TPUEstimator( model_fn=model_fn, model_dir=run_config.model_dir, @@ -411,16 +100,21 @@ def create_experiment(run_config, min_eval_frequency, use_tpu=True): """Create Experiment.""" + # HParams hparams.add_hparam("data_dir", data_dir) trainer_utils.add_problem_hparams(hparams, problem_name) - batch_size = hparams.tpu_batch_size_per_shard - if use_tpu: - batch_size *= run_config.tpu_config.num_shards - model_fn = get_model_fn(model_name, hparams, use_tpu=use_tpu) - estimator = create_estimator( - model_fn, run_config, batch_size, use_tpu=use_tpu) - train_input_fn = get_input_fn(tf.estimator.ModeKeys.TRAIN, hparams) - eval_input_fn = get_input_fn(tf.estimator.ModeKeys.EVAL, hparams) + + # Estimator + estimator = create_estimator(model_name, hparams, run_config, use_tpu=use_tpu) + + # Input fns from Problem + problem = hparams.problem_instances[0] + train_input_fn = problem.make_estimator_input_fn( + tf.estimator.ModeKeys.TRAIN, hparams) + eval_input_fn = problem.make_estimator_input_fn( + tf.estimator.ModeKeys.EVAL, hparams) + + # Experiment return tf.contrib.learn.Experiment( estimator=estimator, train_input_fn=train_input_fn, diff --git a/tensor2tensor/utils/modality.py b/tensor2tensor/utils/modality.py index d06b35523..f2525e313 100644 --- a/tensor2tensor/utils/modality.py +++ b/tensor2tensor/utils/modality.py @@ -23,6 +23,7 @@ # Dependency imports from tensor2tensor.layers import common_layers +from tensor2tensor.utils import registry import tensorflow as tf @@ -194,3 +195,7 @@ def loss_sharded(self, sharded_top_out, sharded_targets, data_parallelism): loss = tf.add_n(sharded_loss_num) / tf.maximum(1.0, tf.add_n(sharded_loss_den)) return loss + + @property + def is_class_modality(self): + return self.name.startswith(registry.Modalities.CLASS_LABEL) diff --git a/tensor2tensor/utils/t2t_model.py b/tensor2tensor/utils/t2t_model.py index 0db573b7e..c2367041b 100644 --- a/tensor2tensor/utils/t2t_model.py +++ b/tensor2tensor/utils/t2t_model.py @@ -30,6 +30,8 @@ from tensor2tensor.layers import common_layers from tensor2tensor.utils import beam_search from tensor2tensor.utils import expert_utils as eu +from tensor2tensor.utils import metrics +from tensor2tensor.utils import optimize from tensor2tensor.utils import registry import tensorflow as tf @@ -37,25 +39,6 @@ from tensorflow.python.layers import base -def _with_timing(fn, msg): - - def fn_with_timing(*args, **kwargs): - start_time = time.time() - res = fn(*args, **kwargs) - tf.logging.info("Doing %s took %.3f sec." % (msg, time.time() - start_time)) - return res - - return fn_with_timing - - -def is_class_modality(mod): - # TODO(lukaszkaiser): should be based on type, like CLASS_LABEL, not string. - prefix = "class_label_modality_" - if len(mod.name) < len(prefix): - return False - return mod.name[:len(prefix)] == prefix - - class T2TModel(base.Layer): """Abstract base class for models. @@ -119,6 +102,14 @@ def __init__(self, self._problem_idx = problem_idx self._create_modalities(problem_hparams, self._hparams) + @property + def hparams(self): + return self._hparams + + @property + def has_input(self): + return self._problem_hparams.input_modality + def set_mode(self, mode): """Set hparams with the given mode.""" hparams = copy.copy(self._original_hparams) @@ -126,7 +117,7 @@ def set_mode(self, mode): # When not in training mode, set all forms of dropout to zero. if mode != tf.estimator.ModeKeys.TRAIN: for key in hparams.values(): - if key[-len("dropout"):] == "dropout": + if key.endswith("dropout"): setattr(hparams, key, 0.0) self._hparams = hparams @@ -162,10 +153,6 @@ def _create_modalities(self, problem_hparams, hparams): target_modality = registry.create_modality(target_modality_spec, hparams) problem_hparams.target_modality = target_modality - @property - def has_input(self): - return self._problem_hparams.input_modality - def prepare_features_for_infer(self, features): """Called before inference to allow adding infer-specific features.""" pass @@ -214,10 +201,11 @@ def infer(self, self.prepare_features_for_infer(features) if not self.has_input and beam_size > 1: tf.logging.warn("Beam searching for a model with no inputs.") - if not self.has_input and self._hparams.sampling_method != "random": + if not self.has_input and self.hparams.sampling_method != "random": tf.logging.warn("Non-random sampling for a model with no inputs.") - if is_class_modality( - self._hparams.problems[self._problem_idx].target_modality): + + target_modality = self.hparams.problems[self._problem_idx].target_modality + if target_modality.is_class_modality: beam_size = 1 # No use to run beam-search for a single class. if beam_size == 1: tf.logging.info("Greedy Decoding") @@ -284,7 +272,7 @@ def symbols_to_logits_fn(ids): # now self._coverage is a coverage tensor for the first datashard. # it has shape [batch_size] and contains floats between 0 and # source_length. - modality = self._hparams.problems[self._problem_idx].target_modality + modality = self.hparams.problems[self._problem_idx].target_modality if modality.top_is_pointwise: return tf.squeeze(logits, axis=[1, 2, 3]) # -1 due to the pad above. @@ -305,7 +293,7 @@ def symbols_to_logits_fn(ids): features["inputs"] = tf.reshape(features["inputs"], [s[0] * s[1], s[2], s[3], s[4]]) - target_modality = self._hparams.problems[self._problem_idx].target_modality + target_modality = self.hparams.problems[self._problem_idx].target_modality vocab_size = target_modality.top_dimensionality # Setting decode length to input length + decode_length decode_length = tf.constant(decode_length) @@ -378,7 +366,7 @@ def _slow_greedy_infer(self, features, decode_length): # in metric functions stays in the same frame as other vars. targets_old = features.get("targets", None) - target_modality = self._hparams.problems[self._problem_idx].target_modality + target_modality = self.hparams.problems[self._problem_idx].target_modality def infer_step(recent_output, recent_logits, unused_loss): """Inference step.""" @@ -418,8 +406,8 @@ def infer_step(recent_output, recent_logits, unused_loss): # input shape, so we confuse it about the input shape. initial_output = tf.slice(initial_output, [0, 0, 0, 0], common_layers.shape_list(initial_output)) - target_modality = self._hparams.problems[self._problem_idx].target_modality - if is_class_modality(target_modality): + target_modality = self.hparams.problems[self._problem_idx].target_modality + if target_modality.is_class_modality: decode_length = 1 else: decode_length = common_layers.shape_list( @@ -494,10 +482,10 @@ def sample(self, features): losses: a dictionary: {loss-name (string): floating point `Scalar`}. """ logits, losses = self(features) # pylint: disable=not-callable - if self._hparams.sampling_method == "argmax": + if self.hparams.sampling_method == "argmax": samples = tf.argmax(logits, axis=-1) else: - assert self._hparams.sampling_method == "random" + assert self.hparams.sampling_method == "random" def multinomial_squeeze(logits, temperature=1.0): logits_shape = common_layers.shape_list(logits) @@ -507,7 +495,7 @@ def multinomial_squeeze(logits, temperature=1.0): choices = tf.reshape(choices, logits_shape[:-1]) return choices - samples = multinomial_squeeze(logits, self._hparams.sampling_temp) + samples = multinomial_squeeze(logits, self.hparams.sampling_temp) return samples, logits, losses @@ -549,7 +537,7 @@ def _model_fn(self, features, skip=False, force_full_predict=False): for key, input_modality in six.iteritems( self._problem_hparams.input_modality): previous_modalities = [ - self._hparams.problems[i].input_modality[key].name + self.hparams.problems[i].input_modality[key].name for i in xrange(self._problem_idx) ] all_previous_modalities.extend(previous_modalities) @@ -572,7 +560,7 @@ def _model_fn(self, features, skip=False, force_full_predict=False): # Targets are transformed by the autoregressive part of the modality previous_tgt_modalities = [ - self._hparams.problems[i].target_modality.name + self.hparams.problems[i].target_modality.name for i in xrange(self._problem_idx) ] all_previous_modalities.extend(previous_tgt_modalities) @@ -598,7 +586,7 @@ def _model_fn(self, features, skip=False, force_full_predict=False): with tf.variable_scope(target_modality.name, reuse=target_reuse): last_only = (target_modality.top_is_pointwise and - self._hparams.mode == tf.estimator.ModeKeys.PREDICT and + self.hparams.mode == tf.estimator.ModeKeys.PREDICT and not force_full_predict) if not last_only: sharded_logits = target_modality.top_sharded( @@ -625,8 +613,8 @@ def _model_fn(self, features, skip=False, force_full_predict=False): # Scheduled sampling. do_scheduled_sampling = ( # Only do it if training and set for it. - self._hparams.scheduled_sampling_prob > 0.0 and - self._hparams.mode == tf.estimator.ModeKeys.TRAIN and not skip) + self.hparams.scheduled_sampling_prob > 0.0 and + self.hparams.mode == tf.estimator.ModeKeys.TRAIN and not skip) if do_scheduled_sampling: def sample(x): @@ -640,7 +628,7 @@ def mix_gold_sampled(gold_targets, sampled_targets): return tf.where( tf.less( tf.random_uniform(common_layers.shape_list(sampled_targets)), - self._hparams.scheduled_sampling_gold_mixin_prob), gold_targets, + self.hparams.scheduled_sampling_gold_mixin_prob), gold_targets, sampled_targets) def sampled_results(): @@ -667,9 +655,9 @@ def sampled_results(): return new_sharded_logits, losses # Run the above conditionally. - prob = self._hparams.scheduled_sampling_prob + prob = self.hparams.scheduled_sampling_prob prob *= common_layers.inverse_exp_decay( - self._hparams.scheduled_sampling_warmup_steps, min_value=0.001) + self.hparams.scheduled_sampling_warmup_steps, min_value=0.001) sharded_logits, losses = tf.cond( tf.less(tf.random_uniform([]), prob), sampled_results, lambda: (sharded_logits, losses)) @@ -746,9 +734,179 @@ def model_fn_body(self, features): """ raise NotImplementedError("Abstract Method") - @property - def hparams(self): - return self._hparams + def optimize(self, loss, use_tpu=False): + """Return a training op minimizing loss.""" + lr = self.hparams.learning_rate * optimize.learning_rate_decay(self.hparams) + train_op = optimize.optimize(loss, lr, self.hparams, use_tpu=use_tpu) + return train_op + + @staticmethod + def make_estimator_model_fn(model_name, + hparams, + decode_hparams=None, + use_tpu=False): + model_cls = registry.model(model_name) + + def wrapping_model_fn(features, labels, mode, params, config): + return model_cls.estimator_model_fn( + hparams, + features, + labels, + mode, + config=config, + params=params, + decode_hparams=decode_hparams, + use_tpu=use_tpu) + + return wrapping_model_fn + + @classmethod + def estimator_model_fn(cls, + hparams, + features, + labels, + mode, + config=None, + params=None, + decode_hparams=None, + use_tpu=True): + """Model fn for Estimator. + + Args: + hparams: HParams, model hyperparameters + features: dict<str name, Tensor feature> + labels: Tensor + mode: tf.estimator.ModeKeys + config: RunConfig; if passed, should have t2t_device_info dict + params: dict, may include batch_size + decode_hparams: HParams, used when mode == PREDICT. + use_tpu: bool, whether using TPU + + Returns: + TPUEstimatorSpec if use tpu else EstimatorSpec + """ + tf.logging.warning("T2TModel.estimator_model_fn implements a subset of " + "model_builder.model_fn and is currently only used " + "in tpu_trainer.") + _create_dummy_vars() + hparams = copy.deepcopy(hparams) + problem = hparams.problem_instances[0] + + # Instantiate model + data_parallelism = ( + eu.Parallelism([""]) + if use_tpu else _create_data_parallelism(**config.t2t_device_info)) + model = cls(hparams, mode, data_parallelism=data_parallelism) + + # PREDICT mode + if mode == tf.estimator.ModeKeys.PREDICT: + assert not use_tpu + assert decode_hparams is not None + return model.estimator_spec_predict(features, decode_hparams) + + # TRAIN and EVAL modes + logits, losses_dict = model(features) # pylint: disable=not-callable + + # Set known shapes + # TODO(rsepassi): Add support for variable lengths and batch sizes + shape = logits.get_shape().as_list() + if shape[0] is None: + shape[0] = _get_batch_size(params, hparams, config) + if shape[1] is None: + shape[1] = hparams.max_length + logits.set_shape(shape) + + # Accumulate losses + assert "training" in losses_dict + loss = sum(losses_dict.values()) + + # EVAL mode + if mode == tf.estimator.ModeKeys.EVAL: + return model.estimator_spec_eval(features, logits, labels, loss, + problem, hparams, use_tpu=use_tpu) + + # TRAIN mode + assert mode == tf.estimator.ModeKeys.TRAIN + return model.estimator_spec_train(loss, use_tpu=use_tpu) + + def estimator_spec_train(self, loss, use_tpu=False): + """Construct EstimatorSpec for TRAIN mode.""" + lr = self.hparams.learning_rate * optimize.learning_rate_decay(self.hparams) + train_op = optimize.optimize(loss, lr, self.hparams, use_tpu=use_tpu) + + if use_tpu: + _remove_summaries() # summaries not currently working on TPU + return tf.contrib.tpu.TPUEstimatorSpec( + tf.estimator.ModeKeys.TRAIN, loss=loss, train_op=train_op) + else: + return tf.estimator.EstimatorSpec( + tf.estimator.ModeKeys.TRAIN, loss=loss, train_op=train_op) + + def estimator_spec_eval(self, + features, + logits, + labels, + loss, + problem, + hparams, + use_tpu=False): + """Construct EstimatorSpec for EVAL mode.""" + if use_tpu: + eval_metrics_fn = _create_tpu_eval_metrics_fn(problem, hparams) + _remove_summaries() + return tf.contrib.tpu.TPUEstimatorSpec( + tf.estimator.ModeKeys.EVAL, + eval_metrics=(eval_metrics_fn, [logits, labels]), loss=loss) + else: + eval_metrics_fns = metrics.create_evaluation_metrics([problem], hparams) + eval_metrics = {} + for metric_name, metric_fn in six.iteritems(eval_metrics_fns): + eval_metrics[metric_name] = metric_fn(logits, features) + + return tf.estimator.EstimatorSpec( + tf.estimator.ModeKeys.EVAL, + predictions={"predictions": logits}, + eval_metric_ops=eval_metrics, + loss=loss) + + def estimator_spec_predict(self, features, decode_hparams): + """Construct EstimatorSpec for PREDICT mode.""" + infer_out = self.infer( + features, + beam_size=decode_hparams.beam_size, + top_beams=( + decode_hparams.beam_size if decode_hparams.return_beams else 1), + alpha=decode_hparams.alpha, + decode_length=decode_hparams.extra_length) + if isinstance(infer_out, dict): + # Beam searching + outputs = infer_out["outputs"] + scores = infer_out["scores"] + else: + outputs = infer_out + scores = None + + batched_problem_choice = (features["problem_choice"] * tf.ones( + (common_layers.shape_list(features["inputs"])[0],), dtype=tf.int32)) + predictions = { + "outputs": outputs, + "scores": scores, + "inputs": features.get("inputs"), + "targets": features.get("infer_targets"), + "problem_choice": batched_problem_choice, + } + _del_dict_nones(predictions) + + export_out = {"outputs": predictions["outputs"]} + if "scores" in predictions: + export_out["scores"] = predictions["scores"] + + return tf.estimator.EstimatorSpec( + tf.estimator.ModeKeys.PREDICT, + predictions=predictions, + export_outputs={ + "output": tf.estimator.export.PredictOutput(export_out) + }) def _warn_changed_modality_type(new_name, old_name, feature_name): @@ -758,3 +916,123 @@ def _warn_changed_modality_type(new_name, old_name, feature_name): tf.logging.warning("%s has a designated modality type %s (%s) but has been " "overridden with a modality of type %s (%s).", feature_name, old_type, old_name, new_type, new_name) + + +def _with_timing(fn, msg): + + def fn_with_timing(*args, **kwargs): + start_time = time.time() + res = fn(*args, **kwargs) + tf.logging.info("Doing %s took %.3f sec." % (msg, time.time() - start_time)) + return res + + return fn_with_timing + + +def _create_dummy_vars(): + """Dummy vars for restore to work when not using TPU codepath.""" + with tf.variable_scope("losses_avg"): + with tf.variable_scope("problem_0"): + for var_name in ["total", "extra", "training"]: + tf.get_variable( + "%s_loss" % var_name, initializer=100.0, trainable=False) + with tf.variable_scope("train_stats"): + tf.get_variable("problem_0_steps", initializer=0, trainable=False) + + +def _get_batch_size(params, hparams, config): + """Batch size determined by params dict, HParams, and RunConfig.""" + # If params specifies batch size, use that. TPUEstimator passes batch size in + # params. + batch_size = params and params.get("batch_size") + + # If not set, then we're running on CPU/GPU, so use the batch size from the + # hparams, and multiply by the number of data shards. + if not batch_size: + batch_size = hparams.tpu_batch_size_per_shard + if config: + batch_size *= config.t2t_device_info["num_shards"] + + return batch_size + + +def _create_data_parallelism(num_gpus=1, + gpu_order="", + shard_to_cpu=False, + num_shards=1): + """Create Parallelism object.""" + gpus = list(range(num_gpus)) + if gpu_order: + gpus = [int(s) for s in gpu_order.split(" ")] + assert len(gpus) == num_gpus + data_shard_devices = ["gpu:%d" % i for i in gpus] + if shard_to_cpu or num_gpus < 1: + data_shard_devices += ["cpu:0"] + assert len(data_shard_devices) == num_shards + tf.logging.info("Data parallel devices: %s", data_shard_devices) + return eu.Parallelism(data_shard_devices, reuse=True) + + +# These metrics are implemented with py_funcs and therefore do no work with TPU +TPU_METRIC_BLACKLIST = set([ + metrics.Metrics.APPROX_BLEU, + metrics.Metrics.ROUGE_2_F, + metrics.Metrics.ROUGE_L_F, +]) + + +def _create_tpu_eval_metrics_fn(problem, hparams): + """Create the metrics_fn that TPUEstimatorSpec expects.""" + + tm = problem.get_hparams().target_modality + if isinstance(tm, tuple): + tm = registry.create_modality(tm, hparams) + weights_fn = tm.targets_weights_fn + + def make_metric_fn(metric_fn): + + def wrapped_metric_fn(logits, labels): + num, den = metric_fn(logits, labels, weights_fn=weights_fn) + return tf.metrics.mean(num, den) + + return wrapped_metric_fn + + metric_fns = [] + eval_metrics = problem.eval_metrics() + + for metric in eval_metrics: + if metric in TPU_METRIC_BLACKLIST: + tf.logging.warn("Skipping eval metric %s in TPU_METRIC_BLACKLIST", metric) + continue + name = "metrics-%s/%s" % (problem.name, metric) + metric_fns.append((name, make_metric_fn(metrics.METRICS_FNS[metric]))) + + def all_metrics_fn(logits, labels): + metrics_dict = {} + + for name, fn in metric_fns: + metrics_dict[name] = fn(logits, labels) + + return metrics_dict + + return all_metrics_fn + + +def _remove_summaries(): + g = tf.get_default_graph() + key = tf.GraphKeys.SUMMARIES + del g.get_collection_ref(key)[:] + assert not g.get_collection(key) + + +def _clip_gradients_by_norm(grads_and_vars, clip_gradients): + """Clips gradients by global norm.""" + gradients, variables = zip(*grads_and_vars) + clipped_gradients, _ = tf.clip_by_global_norm(gradients, clip_gradients) + return list(zip(clipped_gradients, variables)) + + +def _del_dict_nones(d): + for k in list(d.keys()): + if d[k] is None: + del d[k] From 0ffe0e6654366d700b8850d6423a717083586d12 Mon Sep 17 00:00:00 2001 From: Noam Shazeer <noam@google.com> Date: Tue, 28 Nov 2017 19:27:32 -0800 Subject: [PATCH 0618/4095] This change breaks previous checkpoints. Make Transformer fast on TPU. PiperOrigin-RevId: 177255666 --- tensor2tensor/layers/common_attention.py | 144 +++++++---------------- tensor2tensor/layers/common_hparams.py | 3 + tensor2tensor/layers/common_layers.py | 135 ++++++++++++++++++++- tensor2tensor/layers/modalities.py | 26 +++- tensor2tensor/layers/modalities_test.py | 9 +- tensor2tensor/models/transformer.py | 90 +++++++++++--- tensor2tensor/utils/t2t_model.py | 1 + 7 files changed, 275 insertions(+), 133 deletions(-) diff --git a/tensor2tensor/layers/common_attention.py b/tensor2tensor/layers/common_attention.py index 5aafe6348..f0bbaa39e 100644 --- a/tensor2tensor/layers/common_attention.py +++ b/tensor2tensor/layers/common_attention.py @@ -801,7 +801,7 @@ def combine_first_two_dimensions(x): @expert_utils.add_name_scope() def split_heads(x, num_heads): - """Split channels (dimension 3) into multiple heads (becomes dimension 1). + """Split channels (dimension 2) into multiple heads (becomes dimension 1). Args: x: a Tensor with shape [batch, length, channels] @@ -815,7 +815,7 @@ def split_heads(x, num_heads): @expert_utils.add_name_scope() def split_heads_2d(x, num_heads): - """Split channels (dimension 4) into multiple heads (becomes dimension 1). + """Split channels (dimension 3) into multiple heads (becomes dimension 1). Args: x: a Tensor with shape [batch, height, width, channels] @@ -968,12 +968,12 @@ def grouped_attention_multihead(query_antecedent, name, default_name="multihead_attention_sparse", values=[query_antecedent, memory_antecedent]): - q = common_layers.conv1d( - query_antecedent, total_key_depth, 1, name="q_transform") - kv = common_layers.conv1d( + q = tf.layers.dense( + query_antecedent, total_key_depth, use_bias=False, name="q_transform") + kv = tf.layers.dense( memory_antecedent, total_key_depth + total_value_depth, - 1, + use_bias=False, name="kv_transform") q = split_heads(q, num_heads) kv = split_heads(kv, num_heads) @@ -982,18 +982,18 @@ def grouped_attention_multihead(query_antecedent, # We will train these by auxiliary losses. We use stop_gradient here # to keep these losses from back-propagating to the rest of the model. # We add biases that help balance the usage of the experts. - q_pred = common_layers.conv1d( + q_pred = tf.layers.dense( tf.stop_gradient(query_antecedent), num_heads * num_groups, - 1, + use_bias=False, name="q_pred") q_pred = split_heads(q_pred, num_heads) q_bias = tf.get_variable("q_bias", [1, num_heads, 1, num_groups]) q_pred_biased = q_pred + q_bias - m_pred = common_layers.conv1d( + m_pred = tf.layers.dense( tf.stop_gradient(memory_antecedent), num_heads * num_groups, - 1, + use_bias=False, name="m_pred") m_pred = split_heads(m_pred, num_heads) m_bias = tf.get_variable("m_bias", [1, num_heads, 1, num_groups]) @@ -1059,7 +1059,8 @@ def grouped_attention_multihead(query_antecedent, o = tf.reshape(o, [batch, num_heads, length_q, depth_v]) o = combine_heads(o) - o = common_layers.conv1d(o, output_depth, 1, name="output_transform") + o = tf.layers.dense( + o, output_depth, use_bias=False, name="output_transform") m_total = m_dispatcher.combine(m_total) q_total = q_dispatcher.combine(q_total) @@ -2189,86 +2190,19 @@ def compute_qkv(query_antecedent, Returns: q, k, v : [batch, length, depth] tensors """ - if memory_antecedent is None and q_filter_width == kv_filter_width == 1: - # self attention with single position q, k, and v - combined = common_layers.conv1d( - query_antecedent, - total_key_depth * 2 + total_value_depth, - 1, - name="qkv_transform") - q, k, v = tf.split( - combined, [total_key_depth, total_key_depth, total_value_depth], axis=2) - return q, k, v - - if memory_antecedent is None: - # self attention - q = common_layers.conv1d( - query_antecedent, - total_key_depth, - q_filter_width, - padding=q_padding, - name="q_transform") - kv_combined = common_layers.conv1d( - query_antecedent, - total_key_depth + total_value_depth, - kv_filter_width, - padding=kv_padding, - name="kv_transform") - k, v = tf.split(kv_combined, [total_key_depth, total_value_depth], axis=2) - return q, k, v - - # encoder-decoder attention - q = common_layers.conv1d( - query_antecedent, - total_key_depth, - q_filter_width, - padding=q_padding, - name="q_transform") - combined = common_layers.conv1d( - memory_antecedent, - total_key_depth + total_value_depth, - 1, - padding=kv_padding, - name="kv_transform") - k, v = tf.split(combined, [total_key_depth, total_value_depth], axis=2) - - return q, k, v - - -def compute_qkv_2d(query_antecedent, memory_antecedent, total_key_depth, - total_value_depth): - """Computes query, key and value. - - Args: - query_antecedent: a Tensor with shape [batch, h, w, depth_k] - memory_antecedent: a Tensor with shape [batch, h, w, depth_k] - total_key_depth: an integer - total_value_depth: and integer - - Returns: - q, k, v : [batch, h, w, depth_k] tensors - """ - # self attention with single position q, k, and v if memory_antecedent is None: - combined = tf.layers.conv2d( - query_antecedent, - total_key_depth * 2 + total_value_depth, (1, 1), - name="qkv_transform") - q, k, v = tf.split( - combined, [total_key_depth, total_key_depth, total_value_depth], - axis=-1) - return q, k, v - - # Encoder decoder attention - q = common_layers.conv1d( - query_antecedent, total_key_depth, 1, name="q_transform") - combined = common_layers.conv1d( - memory_antecedent, - total_key_depth + total_value_depth, - 1, - name="kv_transform") - k, v = tf.split(combined, [total_key_depth, total_value_depth], axis=2) - + memory_antecedent = query_antecedent + def _compute(inp, depth, filter_width, padding, name): + if filter_width == 1: + return tf.layers.dense(inp, depth, use_bias=False, name=name) + else: + return common_layers.conv1d(inp, depth, filter_width, padding, name=name) + q = _compute( + query_antecedent, total_key_depth, q_filter_width, q_padding, "q") + k = _compute( + memory_antecedent, total_key_depth, kv_filter_width, kv_padding, "k") + v = _compute( + memory_antecedent, total_value_depth, kv_filter_width, kv_padding, "v") return q, k, v @@ -2410,7 +2344,8 @@ def multihead_attention(query_antecedent, x = dilated_self_attention_1d(q, k, v, block_length, block_width, gap_size, num_memory_blocks) x = combine_heads(x) - x = common_layers.conv1d(x, output_depth, 1, name="output_transform") + x = tf.layers.dense( + x, output_depth, use_bias=False, name="output_transform") if additional_returned_value is not None: return x, additional_returned_value return x @@ -2457,8 +2392,8 @@ def multihead_attention_2d(query_antecedent, name, default_name="multihead_attention_2d", values=[query_antecedent, memory_antecedent]): - q, k, v = compute_qkv_2d(query_antecedent, memory_antecedent, - total_key_depth, total_value_depth) + q, k, v = compute_qkv(query_antecedent, memory_antecedent, + total_key_depth, total_value_depth) # after splitting, shape is [batch, heads, h, w, depth] q = split_heads_2d(q, num_heads) k = split_heads_2d(k, num_heads) @@ -2473,7 +2408,8 @@ def multihead_attention_2d(query_antecedent, x = masked_local_attention_2d( q, k, v, query_shape=query_shape, memory_flange=memory_flange) x = combine_heads_2d(x) - x = tf.layers.conv2d(x, output_depth, (1, 1), name="output_transform") + x = tf.layers.dense( + x, output_depth, use_bias=False, name="output_transform") return x @@ -2512,16 +2448,18 @@ def ffn_self_attention_layer(x, x_shape = common_layers.shape_list(x) part_depth = filter_depth // num_parts if not share_kv: - combined = common_layers.conv1d( - x, filter_depth * 3, 1, name="qkv_transform") + combined = tf.layers.dense( + x, filter_depth * 3, use_bias=False, name="qkv_transform") combined = tf.expand_dims(combined, axis=2) q, k, v = tf.split(combined, 3, axis=3) else: q = tf.expand_dims( - common_layers.conv1d(x, filter_depth, 1, name="q_transform"), axis=2) + tf.layers.dense( + x, filter_depth, use_bias=False, name="q_transform"), axis=2) kv_combined = tf.expand_dims( - common_layers.conv1d( - tf.concat([x, x], axis=1), filter_depth, 1, name="kv_transform"), + tf.layers.dense( + tf.concat([x, x], axis=1), filter_depth, use_bias=False, + name="kv_transform"), axis=2) k, v = tf.split(kv_combined, [x_shape[1], x_shape[1]], axis=1) @@ -2534,7 +2472,8 @@ def ffn_self_attention_layer(x, bias = None x = dot_product_attention(batch_q, batch_k, batch_v, bias, dropout_rate) x = tf.reshape(x, [x_shape[0], x_shape[1], filter_depth]) - x = common_layers.conv1d(x, output_depth, 1, name="output_transform") + x = tf.layers.dense( + x, output_depth, use_bias=False, name="output_transform") return x @@ -2585,7 +2524,7 @@ def parameter_attention(x, output_depth**0.5) batch_size = common_layers.shape_list(x)[0] length = common_layers.shape_list(x)[1] - q = common_layers.conv1d(x, total_key_depth, 1, name="q_transform") + q = tf.layers.dense(x, total_key_depth, use_bias=False, name="q_transform") if dropout_rate: # This is a cheaper form of attention dropout where we use to use # the same dropout decisions across batch elemets and query positions, @@ -2604,7 +2543,8 @@ def parameter_attention(x, y = tf.transpose(y, [1, 2, 0, 3]) y = tf.reshape(y, [batch_size, length, total_value_depth]) y.set_shape([None, None, total_value_depth]) - y = common_layers.conv1d(y, output_depth, 1, name="output_transform") + y = tf.layers.dense( + y, output_depth, use_bias=False, name="output_transform") return y diff --git a/tensor2tensor/layers/common_hparams.py b/tensor2tensor/layers/common_hparams.py index 043142359..591b3e28f 100644 --- a/tensor2tensor/layers/common_hparams.py +++ b/tensor2tensor/layers/common_hparams.py @@ -187,6 +187,9 @@ def basic_params1(): # Things not compatible with eager mode use this flag to implement # alternative functionality. We expect this to go away soon. use_eager_mode=False, + # Set by tpu_trainer to let the model know whether we are on TPU. + # Switching on/off tpu should not invalidate checkpoints. + use_tpu=False, ) diff --git a/tensor2tensor/layers/common_layers.py b/tensor2tensor/layers/common_layers.py index 2b5c3fb34..f04d27f1d 100644 --- a/tensor2tensor/layers/common_layers.py +++ b/tensor2tensor/layers/common_layers.py @@ -439,6 +439,40 @@ def conv_fn(inputs, filters, kernel_size, **kwargs): return conv_internal(conv_fn, inputs, filters, kernel_size, **kwargs) +def tpu_conv1d(inputs, filters, kernel_size, padding="SAME", name="tpu_conv1d"): + """Version of conv1d that works on TPU (as of 11/2017). + + Args: + inputs: a Tensor with shape [batch, length, input_depth]. + filters: an integer. + kernel_size: an integer. + padding: a string - "SAME" or "LEFT". + name: a string. + + Returns: + a Tensor with shape [batch, length, filters]. + """ + if kernel_size == 1: + return tf.layers.dense(inputs, filters, name=name, use_bias=True) + if padding == "SAME": + assert kernel_size % 2 == 1 + first_offset = -((kernel_size - 1) // 2) + else: + assert padding == "LEFT" + first_offset = -(kernel_size - 1) + last_offset = first_offset + kernel_size - 1 + results = [] + padded = tf.pad(inputs, [[0, 0], [-first_offset, last_offset], [0, 0]]) + for i in xrange(kernel_size): + shifted = tf.slice(padded, [0, i, 0], tf.shape(inputs)) if i else inputs + shifted.set_shape(inputs.get_shape()) + results.append(tf.layers.dense( + shifted, filters, use_bias=(i == 0), name=name + "_%d" % i)) + ret = tf.add_n(results) + ret *= kernel_size ** -0.5 + return ret + + def layer_norm_vars(filters): """Create Variables for layer norm.""" scale = tf.get_variable( @@ -1231,6 +1265,94 @@ def relu_density_logit(x, reduce_dims): return scaled +def maybe_zero_out_padding(inputs, kernel_size, padding, nonpadding_mask): + """If necessary, zero out inputs to a conv for padding positions. + + Args: + inputs: a Tensor with shape [batch, length, ...] + kernel_size: an integer or pair of integers + padding: a string, e.g. "SAME" + nonpadding_mask: a Tensor with shape [batch, length] + + Returns: + a Tensor with the same shape as inputs + """ + if (kernel_size != 1 and + kernel_size != (1, 1) and + padding == "SAME" and + nonpadding_mask is not None): + while nonpadding_mask.get_shape().ndims < inputs.get_shape().ndims: + nonpadding_mask = tf.expand_dims(nonpadding_mask, -1) + return inputs * nonpadding_mask + else: + return inputs + + +def dense_relu_dense(inputs, filter_size, output_size, dropout=0.0): + """Hidden layer with RELU activation followed by linear projection.""" + h = tf.layers.dense( + inputs, filter_size, use_bias=True, activation=tf.nn.relu, name="conv1") + if dropout != 0.0: + h = tf.nn.dropout(h, 1.0 - dropout) + o = tf.layers.dense(h, output_size, use_bias=True, name="conv2") + return o + + +def conv_relu_conv(inputs, + filter_size, + output_size, + first_kernel_size=3, + second_kernel_size=3, + padding="SAME", + nonpadding_mask=None, + dropout=0.0, + name=None): + """Hidden layer with RELU activation followed by linear projection.""" + with tf.variable_scope(name, "conv_relu_conv", [inputs]): + inputs = maybe_zero_out_padding( + inputs, first_kernel_size, padding, nonpadding_mask) + h = tpu_conv1d(inputs, filter_size, first_kernel_size, padding=padding, + name="conv1") + h = tf.nn.relu(h) + if dropout != 0.0: + h = tf.nn.dropout(h, 1.0 - dropout) + h = maybe_zero_out_padding(h, second_kernel_size, padding, nonpadding_mask) + return tpu_conv1d(h, output_size, second_kernel_size, padding=padding, + name="conv2") + + +def sepconv_relu_sepconv(inputs, + filter_size, + output_size, + first_kernel_size=(1, 1), + second_kernel_size=(1, 1), + padding="LEFT", + nonpadding_mask=None, + dropout=0.0, + name=None): + """Hidden layer with RELU activation followed by linear projection.""" + with tf.variable_scope(name, "sepconv_relu_sepconv", [inputs]): + inputs = maybe_zero_out_padding( + inputs, first_kernel_size, padding, nonpadding_mask) + if inputs.get_shape().ndims == 3: + is_3d = True + inputs = tf.expand_dims(inputs, 2) + else: + is_3d = False + h = separable_conv( + inputs, filter_size, first_kernel_size, ctivation=tf.nn.relu, + padding=padding, name="conv1") + if dropout != 0.0: + h = tf.nn.dropout(h, 1.0 - dropout) + h = maybe_zero_out_padding(h, second_kernel_size, padding, nonpadding_mask) + ret = separable_conv( + h, output_size, second_kernel_size, padding=padding, name="conv2") + if is_3d: + ret = tf.squeeze(ret, 2) + return ret + + +# DEPRECATED - use dense_relu_dense, conv_relu_conv, sepconv_relu_sepconv def conv_hidden_relu(inputs, hidden_size, output_size, @@ -1489,10 +1611,15 @@ def padded_cross_entropy(logits, confidence = 1.0 - label_smoothing vocab_size = shape_list(logits)[-1] with tf.name_scope("padded_cross_entropy", [logits, labels]): - pad_logits, pad_labels = pad_with_zeros(logits, labels) - xent = smoothing_cross_entropy(pad_logits, pad_labels, vocab_size, - confidence) - weights = weights_fn(pad_labels) + if len(logits.get_shape().as_list()) == 2: + # Deal with the case where we did not insert extra dimensions due to + # TPU issues. No pad-to-same-length happens in this case. + # TODO(noam): remove this logic once TPU can handle extra dimensions. + labels = tf.reshape(labels, [-1]) + else: + logits, labels = pad_with_zeros(logits, labels) + xent = smoothing_cross_entropy(logits, labels, vocab_size, confidence) + weights = weights_fn(labels) if not reduce_sum: return xent * weights, weights return tf.reduce_sum(xent * weights), tf.reduce_sum(weights) diff --git a/tensor2tensor/layers/modalities.py b/tensor2tensor/layers/modalities.py index 362c4b527..26aca13d2 100644 --- a/tensor2tensor/layers/modalities.py +++ b/tensor2tensor/layers/modalities.py @@ -30,6 +30,15 @@ import tensorflow as tf +# TODO(noam): remove this function after TPUs do gather faster. +def tpu_gather(params, indices): + vocab_size = params.get_shape().as_list()[0] + indices_flat = tf.reshape(indices, [-1]) + out = tf.matmul(tf.one_hot(indices_flat, vocab_size), params) + out = eu.reshape_like(out, tf.expand_dims(indices, -1)) + return out + + @registry.register_symbol_modality("default") class SymbolModality(modality.Modality): """Modality for sets of discrete symbols. @@ -96,7 +105,8 @@ def bottom_simple(self, x, name, reuse): # Squeeze out the channels dimension. x = tf.squeeze(x, axis=3) var = self._get_weights() - ret = tf.gather(var, x) + ret = (tpu_gather(var, x) if self._model_hparams.use_tpu + else tf.gather(var, x)) if self._model_hparams.multiply_embedding_mode == "sqrt_depth": ret *= self._body_input_depth**0.5 ret *= tf.expand_dims(tf.to_float(tf.not_equal(x, 0)), -1) @@ -144,14 +154,18 @@ def top(self, body_output, _): self._model_hparams.mode == tf.estimator.ModeKeys.TRAIN): # insert channels dimension body_output = tf.expand_dims(body_output, 3) - logits = common_layers.FactoredTensor(body_output, var) + return common_layers.FactoredTensor(body_output, var) else: body_output = tf.reshape(body_output, [-1, body_output_shape[-1]]) logits = tf.matmul(body_output, var, transpose_b=True) - - out_shape = body_output_shape[:-1] + [1, self._vocab_size] - logits = tf.reshape(logits, out_shape) - return logits + if (self._model_hparams.use_tpu and + self._model_hparams.mode == tf.estimator.ModeKeys.TRAIN): + # TPU does not react kindly to extra dimensions. + # TODO(noam): remove this once TPU is more forgiving of extra dims. + return logits + else: + return tf.reshape( + logits, body_output_shape[:-1] + [1, self._vocab_size]) @registry.register_symbol_modality("ctc") diff --git a/tensor2tensor/layers/modalities_test.py b/tensor2tensor/layers/modalities_test.py index 213abe891..574ddc77c 100644 --- a/tensor2tensor/layers/modalities_test.py +++ b/tensor2tensor/layers/modalities_test.py @@ -43,7 +43,8 @@ def testSymbolModalityInputs(self): symbol_modality_skip_top=0, shared_embedding_and_softmax_weights=0, prepend_mode="none", - use_eager_mode=False) + use_eager_mode=False, + use_tpu=False) x = -1 + np.random.random_integers( vocab_size, size=(batch_size, length, 1, 1)) m = modalities.SymbolModality(model_hparams, vocab_size) @@ -73,7 +74,8 @@ def testSymbolModalityTargets(self): factored_logits=0, mode=tf.estimator.ModeKeys.TRAIN, prepend_mode="none", - use_eager_mode=False) + use_eager_mode=False, + use_tpu=False) body_output = -1 + np.random.random_integers( 100, size=(batch_size, length, height, hidden_size)) targets = -1 + np.random.random_integers( @@ -110,7 +112,8 @@ def testSymbolModalityTargetsFactored(self): factored_logits=1, mode=tf.estimator.ModeKeys.TRAIN, prepend_mode="none", - use_eager_mode=False) + use_eager_mode=False, + use_tpu=False) body_output = -1 + np.random.random_integers( 100, size=(batch_size, length, height, hidden_size)) targets = -1 + np.random.random_integers( diff --git a/tensor2tensor/models/transformer.py b/tensor2tensor/models/transformer.py index 74509c098..77e98f942 100644 --- a/tensor2tensor/models/transformer.py +++ b/tensor2tensor/models/transformer.py @@ -108,8 +108,13 @@ def decode(self, hparams, cache=cache) - # Expand since t2t expects 4d tensors. - return tf.expand_dims(decoder_output, axis=2) + if hparams.use_tpu and hparams.mode == tf.estimator.ModeKeys.TRAIN: + # TPU does not react kindly to extra dimensions. + # TODO(noam): remove this once TPU is more forgiving of extra dims. + return decoder_output + else: + # Expand since t2t expects 4d tensors. + return tf.expand_dims(decoder_output, axis=2) def model_fn_body(self, features): """Transformer main model_fn. @@ -475,11 +480,12 @@ def transformer_encoder(encoder_input, """ x = encoder_input with tf.variable_scope(name): + # TODO(noam): We should pass in the padding directly. + padding = common_attention.attention_bias_to_padding( + encoder_self_attention_bias) pad_remover = None if hparams.use_pad_remover: - pad_remover = expert_utils.PadRemover( - common_attention.attention_bias_to_padding( - encoder_self_attention_bias)) + pad_remover = expert_utils.PadRemover(padding) for layer in xrange(hparams.num_encoder_layers or hparams.num_hidden_layers): with tf.variable_scope("layer_%d" % layer): @@ -498,7 +504,8 @@ def transformer_encoder(encoder_input, x = common_layers.layer_postprocess(x, y, hparams) with tf.variable_scope("ffn"): y = transformer_ffn_layer( - common_layers.layer_preprocess(x, hparams), hparams, pad_remover) + common_layers.layer_preprocess(x, hparams), hparams, pad_remover, + conv_padding="SAME", nonpadding_mask=1.0 - padding) x = common_layers.layer_postprocess(x, y, hparams) # if normalization is done in layer_preprocess, then it shuold also be done # on the output, since the output can grow very large, being the sum of @@ -564,7 +571,8 @@ def transformer_decoder(decoder_input, x = common_layers.layer_postprocess(x, y, hparams) with tf.variable_scope("ffn"): y = transformer_ffn_layer( - common_layers.layer_preprocess(x, hparams), hparams) + common_layers.layer_preprocess(x, hparams), hparams, + conv_padding="LEFT") x = common_layers.layer_postprocess(x, y, hparams) # if normalization is done in layer_preprocess, then it shuold also be done # on the output, since the output can grow very large, being the sum of @@ -572,7 +580,11 @@ def transformer_decoder(decoder_input, return common_layers.layer_preprocess(x, hparams) -def transformer_ffn_layer(x, hparams, pad_remover=None): +def transformer_ffn_layer(x, + hparams, + pad_remover=None, + conv_padding="LEFT", + nonpadding_mask=None): """Feed-forward layer in the transformer. Args: @@ -582,18 +594,26 @@ def transformer_ffn_layer(x, hparams, pad_remover=None): positions. If provided, when using convolutional settings, the padding is removed before applying the convolution, and restored afterward. This can give a significant speedup. + conv_padding: a string - either "LEFT" or "SAME". + nonpadding_mask: an optional Tensor with shape [batch_size, length]. + needed for convolutoinal layers with "SAME" padding. + Contains 1.0 in positions corresponding to nonpadding. Returns: a Tensor of shape [batch_size, length, hparams.hidden_size] """ - if hparams.ffn_layer == "conv_hidden_relu": + ffn_layer = hparams.ffn_layer + if ffn_layer == "conv_hidden_relu": + # Backwards compatibility + ffn_layer = "dense_relu_dense" + if ffn_layer == "dense_relu_dense": # In simple convolution mode, use `pad_remover` to speed up processing. if pad_remover: original_shape = common_layers.shape_list(x) # Collapse `x` across examples, and remove padding positions. x = tf.reshape(x, tf.concat([[-1], original_shape[2:]], axis=0)) x = tf.expand_dims(pad_remover.remove(x), axis=0) - conv_output = common_layers.conv_hidden_relu( + conv_output = common_layers.dense_relu_dense( x, hparams.filter_size, hparams.hidden_size, @@ -603,13 +623,23 @@ def transformer_ffn_layer(x, hparams, pad_remover=None): conv_output = tf.reshape( pad_remover.restore(tf.squeeze(conv_output, axis=0)), original_shape) return conv_output - elif hparams.ffn_layer == "parameter_attention": + elif ffn_layer == "conv_relu_conv": + return common_layers.conv_relu_conv( + x, + hparams.filter_size, + hparams.hidden_size, + first_kernel_size=3, + second_kernel_size=1, + padding=conv_padding, + nonpadding_mask=nonpadding_mask, + dropout=hparams.relu_dropout) + elif ffn_layer == "parameter_attention": return common_attention.parameter_attention( x, hparams.parameter_attention_key_channels or hparams.hidden_size, hparams.parameter_attention_value_channels or hparams.hidden_size, hparams.hidden_size, hparams.filter_size, hparams.num_heads, hparams.attention_dropout) - elif hparams.ffn_layer == "conv_hidden_relu_with_sepconv": + elif ffn_layer == "conv_hidden_relu_with_sepconv": return common_layers.conv_hidden_relu( x, hparams.filter_size, @@ -619,7 +649,7 @@ def transformer_ffn_layer(x, hparams, pad_remover=None): padding="LEFT", dropout=hparams.relu_dropout) else: - assert hparams.ffn_layer == "none" + assert ffn_layer == "none" return x @@ -654,7 +684,7 @@ def transformer_base_v1(): hparams.add_hparam("num_heads", 8) hparams.add_hparam("attention_key_channels", 0) hparams.add_hparam("attention_value_channels", 0) - hparams.add_hparam("ffn_layer", "conv_hidden_relu") + hparams.add_hparam("ffn_layer", "dense_relu_dense") hparams.add_hparam("parameter_attention_key_channels", 0) hparams.add_hparam("parameter_attention_value_channels", 0) # All hyperparameters ending in "dropout" are automatically set to 0.0 @@ -1080,8 +1110,7 @@ def transformer_tpu_range(rhp): def transformer_tpu_batch_range(rhp): hparams = transformer_tpu() common_hparams.fill_ranged_hparams_from_hparams(hparams, rhp) - rhp.set_discrete("tpu_batch_size_per_shard", [1] + list(range(2, 16, 2))) - rhp.set_discrete("max_length", list(range(128, 416, 16))) + rhp.set_discrete("tpu_batch_size_per_shard", [1, 2, 3, 4]) @registry.register_hparams @@ -1097,14 +1126,18 @@ def transformer_small_tpu(): def update_hparams_for_tpu(hparams): + """Change hparams to be compatible with TPU training.""" hparams.use_pad_remover = False # where op not supported hparams.optimizer = "TrueAdam" hparams.learning_rate = 0.2 # Inputs # Each example in the batch will be of (padded) length hparams.max_length - hparams.max_length = 64 - hparams.tpu_batch_size_per_shard = 20 + # It is suggested to use a dataset that where examples have been combined + # to this length. + # TODO(noam): Prepare and debug these datasets. + hparams.max_length = 256 + hparams.tpu_batch_size_per_shard = 8 @registry.register_hparams @@ -1125,3 +1158,24 @@ def transformer_clean_big(): hparams.hidden_size = 1024 hparams.filter_size = 4096 return hparams + + +@registry.register_hparams +def transformer_tpu_with_conv(): + """Cut down on the number of heads, and use convs instead.""" + hparams = transformer_tpu() + hparams.num_heads = 4 # heads are expensive on tpu + hparams.ffn_layer = "conv_relu_conv" + return hparams + + +@registry.register_hparams +def transformer_tpu_base_language_model(): + """Hparams for training languagemodel_lm1b8k on tpu.""" + hparams = transformer_clean_big() + update_hparams_for_tpu(hparams) + hparams.tpu_batch_size_per_shard = 16 + hparams.num_heads = 4 # heads are expensive on tpu + hparams.learning_rate_warmup_steps = 1000 + hparams.shared_embedding_and_softmax_weights = False + return hparams diff --git a/tensor2tensor/utils/t2t_model.py b/tensor2tensor/utils/t2t_model.py index c2367041b..e1d82cfcb 100644 --- a/tensor2tensor/utils/t2t_model.py +++ b/tensor2tensor/utils/t2t_model.py @@ -790,6 +790,7 @@ def estimator_model_fn(cls, "in tpu_trainer.") _create_dummy_vars() hparams = copy.deepcopy(hparams) + hparams.use_tpu = use_tpu problem = hparams.problem_instances[0] # Instantiate model From 8f05bab0acf1c338c5f9ed6adce191ea83b9fde1 Mon Sep 17 00:00:00 2001 From: Lukasz Kaiser <lukaszkaiser@google.com> Date: Tue, 28 Nov 2017 20:54:07 -0800 Subject: [PATCH 0619/4095] Correction for eager-mode decoding scopes (to use with pre-trained checkpoints). PiperOrigin-RevId: 177261645 --- tensor2tensor/models/transformer.py | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/tensor2tensor/models/transformer.py b/tensor2tensor/models/transformer.py index 77e98f942..d345155f9 100644 --- a/tensor2tensor/models/transformer.py +++ b/tensor2tensor/models/transformer.py @@ -163,12 +163,12 @@ def _greedy_infer(self, features, decode_length): Raises: NotImplementedError: If there are multiple data shards. """ - with tf.variable_scope(self.name): - # TODO(nikip): Remove slow decoding for eager. Eager mode doesn't work - # with accessing _shape which is used in fast decoding currently. - if self._hparams.use_eager_mode: - return self._slow_greedy_infer(features, decode_length) - else: + # TODO(nikip): Remove slow decoding for eager. Eager mode doesn't work + # with accessing _shape which is used in fast decoding currently. + if self._hparams.use_eager_mode: + return self._slow_greedy_infer(features, decode_length) + else: + with tf.variable_scope(self.name): decoded_ids, _ = self._fast_decode(features, decode_length) return decoded_ids, None, None @@ -186,13 +186,13 @@ def _beam_decode(self, features, decode_length, beam_size, top_beams, alpha): Returns: samples: an integer `Tensor`. Top samples from the beam search """ - with tf.variable_scope(self.name): - # TODO(nikip): Remove slow decoding for eager. Eager mode doesn't work - # with accessing _shape which is used in fast decoding currently. - if self._hparams.use_eager_mode: - return self._beam_decode_slow( - features, decode_length, beam_size, top_beams, alpha) - else: + # TODO(nikip): Remove slow decoding for eager. Eager mode doesn't work + # with accessing _shape which is used in fast decoding currently. + if self._hparams.use_eager_mode: + return self._beam_decode_slow( + features, decode_length, beam_size, top_beams, alpha) + else: + with tf.variable_scope(self.name): decoded_ids, scores = self._fast_decode(features, decode_length, beam_size, top_beams, alpha) return {"outputs": decoded_ids, "scores": scores} From 2ca3232583817409464d9b7efcc4fb28aa7ea146 Mon Sep 17 00:00:00 2001 From: Lukasz Kaiser <lukaszkaiser@google.com> Date: Tue, 28 Nov 2017 21:50:51 -0800 Subject: [PATCH 0620/4095] Make default Parallelism in T2TModel reusing (removes "parallel_0" from variable names in colab). PiperOrigin-RevId: 177265031 --- tensor2tensor/utils/t2t_model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensor2tensor/utils/t2t_model.py b/tensor2tensor/utils/t2t_model.py index e1d82cfcb..ff7584b07 100644 --- a/tensor2tensor/utils/t2t_model.py +++ b/tensor2tensor/utils/t2t_model.py @@ -75,7 +75,7 @@ def __init__(self, super(T2TModel, self).__init__( trainable=mode == tf.estimator.ModeKeys.TRAIN, name=name) if data_parallelism is None: - data_parallelism = eu.Parallelism([""]) + data_parallelism = eu.Parallelism([""], reuse=True) if ps_devices is None: ps_devices = [""] if problem_hparams is None: From 7909c69c8f8d0708c32d3bba2f961cbb1ce29d0f Mon Sep 17 00:00:00 2001 From: Noam Shazeer <noam@google.com> Date: Tue, 28 Nov 2017 23:53:23 -0800 Subject: [PATCH 0621/4095] fix bug with bpe32k - prepend <pad> and <EOS> to vocab. PiperOrigin-RevId: 177271941 --- tensor2tensor/data_generators/translate_ende.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tensor2tensor/data_generators/translate_ende.py b/tensor2tensor/data_generators/translate_ende.py index 8ca3a726b..2dc8e3a00 100644 --- a/tensor2tensor/data_generators/translate_ende.py +++ b/tensor2tensor/data_generators/translate_ende.py @@ -99,8 +99,10 @@ def generator(self, data_dir, tmp_dir, train): token_tmp_path = os.path.join(tmp_dir, self.vocab_file) token_path = os.path.join(data_dir, self.vocab_file) tf.gfile.Copy(token_tmp_path, token_path, overwrite=True) - with tf.gfile.GFile(token_path, mode="a") as f: - f.write("UNK\n") # Add UNK to the vocab. + with tf.gfile.GFile(token_path, mode="r") as f: + vocab_data = "<pad>\n<EOS>\n" + f.read() + "UNK\n" + with tf.gfile.GFile(token_path, mode="w") as f: + f.write(vocab_data) token_vocab = text_encoder.TokenTextEncoder(token_path, replace_oov="UNK") return translate.token_generator(train_path + ".en", train_path + ".de", token_vocab, EOS) From c65a646c0a5b5b5b93a70fa5dce13eebfd101a0a Mon Sep 17 00:00:00 2001 From: Lukasz Kaiser <lukaszkaiser@google.com> Date: Wed, 29 Nov 2017 08:05:01 -0800 Subject: [PATCH 0622/4095] Disable image summary in eager mode (tf.Eager doesn't like it for now). PiperOrigin-RevId: 177309768 --- tensor2tensor/layers/modalities.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tensor2tensor/layers/modalities.py b/tensor2tensor/layers/modalities.py index 26aca13d2..d0264d5cc 100644 --- a/tensor2tensor/layers/modalities.py +++ b/tensor2tensor/layers/modalities.py @@ -205,7 +205,8 @@ class ImageModality(modality.Modality): def bottom(self, inputs): with tf.variable_scope(self.name): inputs = common_layers.standardize_images(inputs) - tf.summary.image("inputs", inputs, max_outputs=2) + if not self._model_hparams.use_eager_mode: + tf.summary.image("inputs", inputs, max_outputs=2) return tf.to_float(inputs) def targets_bottom(self, inputs): From 69701e4bc9cd2c8195bb2fa679e9249287a2e561 Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Wed, 29 Nov 2017 14:03:14 -0800 Subject: [PATCH 0623/4095] v1.3 PiperOrigin-RevId: 177359986 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index bedb393fd..5027918af 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ setup( name='tensor2tensor', - version='1.2.9', + version='1.3.0', description='Tensor2Tensor', author='Google Inc.', author_email='no-reply@google.com', From 86ecec08b705ca5413e2864616abd43ed94ae633 Mon Sep 17 00:00:00 2001 From: Martin Popel <popel@ufal.mff.cuni.cz> Date: Sun, 19 Nov 2017 01:21:07 +0100 Subject: [PATCH 0624/4095] add BLEU smoothing Fix BLEU computation for edge case of no matching 4-gram (or trigram,...). Smoothing is the default in the official BLEU implementation https://github.com/moses-smt/mosesdecoder/blob/master/scripts/generic/mteval-v14.pl#L843-L885 (Smoothing is not present in multi-bleu.perl, but this script explicitly says it is internal purposes only and it is recommended to use mteval-v14.pl instead.) --- tensor2tensor/utils/bleu_hook.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tensor2tensor/utils/bleu_hook.py b/tensor2tensor/utils/bleu_hook.py index 20a7c8426..9a556e89f 100644 --- a/tensor2tensor/utils/bleu_hook.py +++ b/tensor2tensor/utils/bleu_hook.py @@ -93,9 +93,14 @@ def compute_bleu(reference_corpus, for ngram in translation_ngram_counts: possible_matches_by_order[len(ngram)-1] += translation_ngram_counts[ngram] precisions = [0] * max_order + smooth = 1.0 for i in xrange(0, max_order): if possible_matches_by_order[i] > 0: - precisions[i] = matches_by_order[i] / possible_matches_by_order[i] + if matches_by_order[i] > 0: + precisions[i] = matches_by_order[i] / possible_matches_by_order[i] + else: + smooth *= 2 + precisions[i] = 1.0 / (smooth * possible_matches_by_order[i]) else: precisions[i] = 0.0 From 4160979ab570c9a5da31bc7b00710943ab2d25e6 Mon Sep 17 00:00:00 2001 From: Martin Popel <popel@ufal.mff.cuni.cz> Date: Sun, 19 Nov 2017 01:32:45 +0100 Subject: [PATCH 0625/4095] add BLEU-compatible tokenization so one can compute real BLEU on two files (MT translation=hypothesis and reference). I've tested this on few datasets and it seems to agree with the official implementation mteval-v14.pl. --- tensor2tensor/utils/bleu_hook.py | 52 ++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/tensor2tensor/utils/bleu_hook.py b/tensor2tensor/utils/bleu_hook.py index 9a556e89f..864ffddf0 100644 --- a/tensor2tensor/utils/bleu_hook.py +++ b/tensor2tensor/utils/bleu_hook.py @@ -20,6 +20,9 @@ import collections import math +import re +import sys +import unicodedata # Dependency imports @@ -136,3 +139,52 @@ def bleu_score(predictions, labels, **unused_kwargs): bleu = tf.py_func(compute_bleu, (labels, outputs), tf.float32) return bleu, tf.constant(1.0) + + +class UnicodeRegex: + """Ad-hoc hack to recognize all punctuation and symbols. + + without dependening on https://pypi.python.org/pypi/regex/.""" + def _property_chars(prefix): + return ''.join(chr(x) for x in range(sys.maxunicode) + if unicodedata.category(chr(x)).startswith(prefix)) + punctuation = _property_chars('P') + nondigit_punct_re = re.compile(r'([^\d])([' + punctuation + r'])') + punct_nondigit_re = re.compile(r'([' + punctuation + r'])([^\d])') + symbol_re = re.compile('([' + _property_chars('S') + '])') + + +def bleu_tokenize(string): + """"Tokenize a string following the official BLEU implementation. + + See https://github.com/moses-smt/mosesdecoder/blob/master/scripts/generic/mteval-v14.pl#L954-L983 + In our case, the input string is expected to be just one line + and no HTML entities de-escaping is needed. + So we just tokenize on punctuation and symbols, + except when a punctuation is preceded and followed by a digit + (e.g. a comma/dot as a thousand/decimal separator). + + Args: + string: the input string + + Returns: + a list of tokens + """ + string = UnicodeRegex.nondigit_punct_re.sub(r'\1 \2 ', string) + string = UnicodeRegex.punct_nondigit_re.sub(r' \1 \2', string) + string = UnicodeRegex.symbol_re.sub(r' \1 ', string) + return string.split() + + +def bleu_wrapper(ref_filename, hyp_filename, case_sensitive=False): + """Compute BLEU for two files (reference and hypothesis translation).""" + # TODO: Does anyone care about Python2 compatibility? + ref_lines = open(ref_filename, 'rt', encoding='utf-8').read().splitlines() + hyp_lines = open(hyp_filename, 'rt', encoding='utf-8').read().splitlines() + assert len(ref_lines) == len(hyp_lines) + if not case_sensitive: + ref_lines = [x.lower() for x in ref_lines] + hyp_lines = [x.lower() for x in hyp_lines] + ref_tokens = [bleu_tokenize(x) for x in ref_lines] + hyp_tokens = [bleu_tokenize(x) for x in hyp_lines] + return compute_bleu(ref_tokens, hyp_tokens) From 685c9fccd951da1a90571407bc35e428a9322bb2 Mon Sep 17 00:00:00 2001 From: Martin Popel <popel@ufal.mff.cuni.cz> Date: Mon, 20 Nov 2017 19:39:11 +0100 Subject: [PATCH 0626/4095] allow t2t-decoder to load any checkpoint not necessarily the latest one in a given output_dir --- tensor2tensor/bin/t2t-datagen | 0 tensor2tensor/bin/t2t-decoder | 7 +++++-- tensor2tensor/bin/t2t-make-tf-configs | 0 tensor2tensor/bin/t2t-trainer | 0 tensor2tensor/utils/decoding.py | 4 ++-- 5 files changed, 7 insertions(+), 4 deletions(-) mode change 100644 => 100755 tensor2tensor/bin/t2t-datagen mode change 100644 => 100755 tensor2tensor/bin/t2t-decoder mode change 100644 => 100755 tensor2tensor/bin/t2t-make-tf-configs mode change 100644 => 100755 tensor2tensor/bin/t2t-trainer diff --git a/tensor2tensor/bin/t2t-datagen b/tensor2tensor/bin/t2t-datagen old mode 100644 new mode 100755 diff --git a/tensor2tensor/bin/t2t-decoder b/tensor2tensor/bin/t2t-decoder old mode 100644 new mode 100755 index 712cb45ce..4c83610b3 --- a/tensor2tensor/bin/t2t-decoder +++ b/tensor2tensor/bin/t2t-decoder @@ -46,7 +46,10 @@ import tensorflow as tf flags = tf.flags FLAGS = flags.FLAGS -flags.DEFINE_string("output_dir", "", "Training directory to load from.") +flags.DEFINE_string("output_dir", "", + "Training directory where the latest checkpoint is used.") +flags.DEFINE_string("checkpoint_path", None, + "Path to the model checkpoint. Overrides output_dir.") flags.DEFINE_string("decode_from_file", None, "Path to the source file for decoding") flags.DEFINE_string("decode_to_file", None, @@ -90,7 +93,7 @@ def main(_): decoding.decode_interactively(estimator, decode_hp) elif FLAGS.decode_from_file: decoding.decode_from_file(estimator, FLAGS.decode_from_file, decode_hp, - FLAGS.decode_to_file) + FLAGS.decode_to_file, checkpoint_path=FLAGS.checkpoint_path) else: decoding.decode_from_dataset( estimator, diff --git a/tensor2tensor/bin/t2t-make-tf-configs b/tensor2tensor/bin/t2t-make-tf-configs old mode 100644 new mode 100755 diff --git a/tensor2tensor/bin/t2t-trainer b/tensor2tensor/bin/t2t-trainer old mode 100644 new mode 100755 diff --git a/tensor2tensor/utils/decoding.py b/tensor2tensor/utils/decoding.py index d0913e0e1..426110ad8 100644 --- a/tensor2tensor/utils/decoding.py +++ b/tensor2tensor/utils/decoding.py @@ -200,7 +200,7 @@ def decode_from_dataset(estimator, tf.logging.info("Completed inference on %d samples." % num_predictions) # pylint: disable=undefined-loop-variable -def decode_from_file(estimator, filename, decode_hp, decode_to_file=None): +def decode_from_file(estimator, filename, decode_hp, decode_to_file=None, checkpoint_path=None): """Compute predictions on entries in filename and write them out.""" if not decode_hp.batch_size: decode_hp.batch_size = 32 @@ -230,7 +230,7 @@ def input_fn(): return _decode_input_tensor_to_features_dict(example, hparams) decodes = [] - result_iter = estimator.predict(input_fn) + result_iter = estimator.predict(input_fn, checkpoint_path=checkpoint_path) for result in result_iter: if decode_hp.return_beams: beam_decodes = [] From aade7ec1096b018693f163638788b9010c499aae Mon Sep 17 00:00:00 2001 From: Martin Popel <popel@ufal.mff.cuni.cz> Date: Wed, 22 Nov 2017 15:39:53 +0100 Subject: [PATCH 0627/4095] add t2t-bleu script for proper BLEU evaluation --- README.md | 7 +- tensor2tensor/bin/t2t-bleu | 145 +++++++++++++++++++++++++++++++++++++ 2 files changed, 150 insertions(+), 2 deletions(-) create mode 100755 tensor2tensor/bin/t2t-bleu diff --git a/README.md b/README.md index 9525e9bcb..c125ce3bd 100644 --- a/README.md +++ b/README.md @@ -126,10 +126,13 @@ t2t-decoder \ --output_dir=$TRAIN_DIR \ --decode_hparams="beam_size=$BEAM_SIZE,alpha=$ALPHA" \ --decode_from_file=$DECODE_FILE - -cat $DECODE_FILE.$MODEL.$HPARAMS.beam$BEAM_SIZE.alpha$ALPHA.decodes + --decode_to_file=translation.en ``` +# Eval BLEU + +t2t-bleu --translation=translation.en --reference=ref-translation.de + --- ## Installation diff --git a/tensor2tensor/bin/t2t-bleu b/tensor2tensor/bin/t2t-bleu new file mode 100755 index 000000000..2a2d306a3 --- /dev/null +++ b/tensor2tensor/bin/t2t-bleu @@ -0,0 +1,145 @@ +#!/usr/bin/env python +# coding=utf-8 +# Copyright 2017 The Tensor2Tensor Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Evaluate BLEU score for all checkpoints in a given directory. + +This script can be used in two ways. + +To evaluate an already translated file: +`t2t-bleu --translation=my-wmt13.de --reference=wmt13_deen.de` + +To evaluate all checkpoints in a given directory: +`t2t-bleu + --model_dir=t2t_train + --data_dir=t2t_data + --translations_dir=my-translations + --problems=translate_ende_wmt32k + --hparams_set=transformer_big_single_gpu + --source=wmt13_deen.en + --reference=wmt13_deen.de` +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +import os +import time +from collections import namedtuple +from tensor2tensor.utils import decoding +from tensor2tensor.utils import trainer_utils +from tensor2tensor.utils import usr_dir +from tensor2tensor.utils import bleu_hook +import tensorflow as tf + +flags = tf.flags +FLAGS = flags.FLAGS + +# t2t-bleu specific options +flags.DEFINE_string("bleu_variant", "both", "Possible values: cased(case-sensitive), uncased, both(default).") +flags.DEFINE_string("model_dir", "", "Directory to load model checkpoints from.") +flags.DEFINE_string("translation", None, "Path to the MT system translation file") +flags.DEFINE_string("source", None, "Path to the source-language file to be translated") +flags.DEFINE_string("reference", None, "Path to the reference translation file") +flags.DEFINE_string("translations_dir", "translations", "Where to store the translated files") +flags.DEFINE_bool("report_zero", True, "Store BLEU=0 and guess its time via flags.txt") + +# options derived from t2t-decode +flags.DEFINE_integer("decode_shards", 1, "Number of decoding replicas.") +flags.DEFINE_string("t2t_usr_dir", "", + "Path to a Python module that will be imported. The " + "__init__.py file should include the necessary imports. " + "The imported files should contain registrations, " + "e.g. @registry.register_model calls, that will then be " + "available to the t2t-decoder.") +flags.DEFINE_string("master", "", "Address of TensorFlow master.") +flags.DEFINE_string("schedule", "train_and_evaluate", + "Must be train_and_evaluate for decoding.") + +Model = namedtuple('Model', 'filename time steps') + + +def main(_): + tf.logging.set_verbosity(tf.logging.INFO) + if FLAGS.translation: + if FLAGS.model_dir: + raise ValueError('Cannot specify both --translation and --model_dir.') + if FLAGS.bleu_variant in ('uncased', 'both'): + bleu = 100 * bleu_hook.bleu_wrapper(FLAGS.reference, FLAGS.translation, case_sensitive=False) + print("BLEU_uncased = %6.2f" % bleu) + if FLAGS.bleu_variant in ('cased', 'both'): + bleu = 100 * bleu_hook.bleu_wrapper(FLAGS.reference, FLAGS.translation, case_sensitive=True) + print("BLEU_cased = %6.2f" % bleu) + return + + usr_dir.import_usr_dir(FLAGS.t2t_usr_dir) + FLAGS.model = FLAGS.model or 'transformer' + FLAGS.output_dir = FLAGS.model_dir + trainer_utils.log_registry() + trainer_utils.validate_flags() + assert FLAGS.schedule == "train_and_evaluate" + data_dir = os.path.expanduser(FLAGS.data_dir) + model_dir = os.path.expanduser(FLAGS.model_dir) + + hparams = trainer_utils.create_hparams( + FLAGS.hparams_set, data_dir, passed_hparams=FLAGS.hparams) + trainer_utils.add_problem_hparams(hparams, FLAGS.problems) + estimator, _ = trainer_utils.create_experiment_components( + data_dir=data_dir, + model_name=FLAGS.model, + hparams=hparams, + run_config=trainer_utils.create_run_config(model_dir)) + + decode_hp = decoding.decode_hparams(FLAGS.decode_hparams) + decode_hp.add_hparam("shards", FLAGS.decode_shards) + decode_hp.add_hparam("shard_id", FLAGS.worker_id) + + os.makedirs(FLAGS.translations_dir, exist_ok=True) + translated_base_file = os.path.join(FLAGS.translations_dir, FLAGS.problems) + models = [Model(x[:-6], os.path.getctime(x), int(x[:-6].rsplit('-')[-1])) + for x in tf.gfile.Glob(os.path.join(model_dir, 'model.ckpt-*.index'))] + models = sorted(models, key=lambda x: x.time) + tf.logging.info("Found %d models with steps: %s" % (len(models), ", ".join(str(x.steps) for x in models))) + + writer = tf.summary.FileWriter(FLAGS.model_dir) + if FLAGS.report_zero: + start_time = os.path.getctime(os.path.join(model_dir, 'flags.txt')) + values = [] + if FLAGS.bleu_variant in ('uncased', 'both'): + values.append(tf.Summary.Value(tag='BLEU_uncased', simple_value=0)) + if FLAGS.bleu_variant in ('cased', 'both'): + values.append(tf.Summary.Value(tag='BLEU_cased', simple_value=0)) + writer.add_event(tf.summary.Event(summary=tf.Summary(value=values), wall_time=start_time, step=0)) + + for model in models: + tf.logging.info("Evaluating " + model.filename) + out_file = translated_base_file + '-' + str(model.steps) + tf.logging.set_verbosity(tf.logging.ERROR) # decode_from_file logs all the translations as INFO + decoding.decode_from_file(estimator, FLAGS.source, decode_hp, out_file, checkpoint_path=model.filename) + tf.logging.set_verbosity(tf.logging.INFO) + values = [] + if FLAGS.bleu_variant in ('uncased', 'both'): + bleu = 100 * bleu_hook.bleu_wrapper(FLAGS.reference, out_file, case_sensitive=False) + values.append(tf.Summary.Value(tag='BLEU_uncased', simple_value=bleu)) + tf.logging.info("%s: BLEU_uncased = %6.2f" % (model.filename, bleu)) + if FLAGS.bleu_variant in ('cased', 'both'): + bleu = 100 * bleu_hook.bleu_wrapper(FLAGS.reference, out_file, case_sensitive=True) + values.append(tf.Summary.Value(tag='BLEU_cased', simple_value=bleu)) + tf.logging.info("%s: BLEU_cased = %6.2f" % (model.filename, bleu)) + writer.add_event(tf.summary.Event(summary=tf.Summary(value=values), wall_time=model.time, step=model.steps)) + + writer.flush() + +if __name__ == "__main__": + tf.app.run() From b912213b11de4ac0a5a1ba7cfbe591c3a9381ad1 Mon Sep 17 00:00:00 2001 From: Martin Popel <popel@ufal.mff.cuni.cz> Date: Thu, 23 Nov 2017 00:58:04 +0100 Subject: [PATCH 0628/4095] fix tests because of the added smoothing in BLEU --- tensor2tensor/utils/bleu_hook.py | 8 +++++++- tensor2tensor/utils/bleu_hook_test.py | 8 +++++--- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/tensor2tensor/utils/bleu_hook.py b/tensor2tensor/utils/bleu_hook.py index 864ffddf0..655f3b91d 100644 --- a/tensor2tensor/utils/bleu_hook.py +++ b/tensor2tensor/utils/bleu_hook.py @@ -155,7 +155,7 @@ def _property_chars(prefix): def bleu_tokenize(string): - """"Tokenize a string following the official BLEU implementation. + r"""Tokenize a string following the official BLEU implementation. See https://github.com/moses-smt/mosesdecoder/blob/master/scripts/generic/mteval-v14.pl#L954-L983 In our case, the input string is expected to be just one line @@ -164,6 +164,12 @@ def bleu_tokenize(string): except when a punctuation is preceded and followed by a digit (e.g. a comma/dot as a thousand/decimal separator). + Note that a numer (e.g. a year) followed by a dot at the end of sentence is NOT tokenized, + i.e. the dot stays with the number because `s/(\p{P})(\P{N})/ $1 $2/g` + does not match this case (unless we add a space after each sentence). + However, this error is already in the original mteval-v14.pl + and we want to be consistent with it. + Args: string: the input string diff --git a/tensor2tensor/utils/bleu_hook_test.py b/tensor2tensor/utils/bleu_hook_test.py index bf08174f8..f5976941f 100644 --- a/tensor2tensor/utils/bleu_hook_test.py +++ b/tensor2tensor/utils/bleu_hook_test.py @@ -39,8 +39,9 @@ def testComputeNotEqual(self): translation_corpus = [[1, 2, 3, 4]] reference_corpus = [[5, 6, 7, 8]] bleu = bleu_hook.compute_bleu(reference_corpus, translation_corpus) - actual_bleu = 0.0 - self.assertEqual(bleu, actual_bleu) + # The smoothing prevents 0 for small corpora + actual_bleu = 0.0798679 + self.assertAllClose(bleu, actual_bleu, atol=1e-03) def testComputeMultipleBatch(self): translation_corpus = [[1, 2, 3, 4], [5, 6, 7, 0]] @@ -53,8 +54,9 @@ def testComputeMultipleNgrams(self): reference_corpus = [[1, 2, 1, 13], [12, 6, 7, 4, 8, 9, 10]] translation_corpus = [[1, 2, 1, 3], [5, 6, 7, 4]] bleu = bleu_hook.compute_bleu(reference_corpus, translation_corpus) - actual_bleu = 0.486 + actual_bleu = 0.3436 self.assertAllClose(bleu, actual_bleu, atol=1e-03) + if __name__ == '__main__': tf.test.main() From 15830324546fd80aa8a18249816a4ebfddc6c765 Mon Sep 17 00:00:00 2001 From: Martin Popel <popel@ufal.mff.cuni.cz> Date: Thu, 23 Nov 2017 02:16:24 +0100 Subject: [PATCH 0629/4095] fix tests for Python2 --- tensor2tensor/utils/bleu_hook.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tensor2tensor/utils/bleu_hook.py b/tensor2tensor/utils/bleu_hook.py index 655f3b91d..135ef36fa 100644 --- a/tensor2tensor/utils/bleu_hook.py +++ b/tensor2tensor/utils/bleu_hook.py @@ -30,6 +30,7 @@ # pylint: disable=redefined-builtin from six.moves import xrange from six.moves import zip +import six # pylint: enable=redefined-builtin import tensorflow as tf @@ -146,8 +147,8 @@ class UnicodeRegex: without dependening on https://pypi.python.org/pypi/regex/.""" def _property_chars(prefix): - return ''.join(chr(x) for x in range(sys.maxunicode) - if unicodedata.category(chr(x)).startswith(prefix)) + return ''.join(six.unichr(x) for x in range(sys.maxunicode) + if unicodedata.category(six.unichr(x)).startswith(prefix)) punctuation = _property_chars('P') nondigit_punct_re = re.compile(r'([^\d])([' + punctuation + r'])') punct_nondigit_re = re.compile(r'([' + punctuation + r'])([^\d])') From a43eff3b2b4077782d207b853a88ddcca0721099 Mon Sep 17 00:00:00 2001 From: Martin Popel <popel@ufal.mff.cuni.cz> Date: Sat, 25 Nov 2017 00:22:48 +0100 Subject: [PATCH 0630/4095] more options for t2t-bleu So it can be used for continous evaluation or for resuming older evaluation from a checkpoint with a given number of steps. It is also possible to specify the name of the events subdirectory and tag suffix. --- tensor2tensor/bin/t2t-bleu | 75 ++++++++++++++++++++++++++++++++------ 1 file changed, 64 insertions(+), 11 deletions(-) diff --git a/tensor2tensor/bin/t2t-bleu b/tensor2tensor/bin/t2t-bleu index 2a2d306a3..89c93e0cc 100755 --- a/tensor2tensor/bin/t2t-bleu +++ b/tensor2tensor/bin/t2t-bleu @@ -30,6 +30,27 @@ To evaluate all checkpoints in a given directory: --hparams_set=transformer_big_single_gpu --source=wmt13_deen.en --reference=wmt13_deen.de` + +In addition to the above-mentioned compulsory parameters, +there are optional parameters: + + * bleu_variant: cased (case-sensitive), uncased, both (default). + * translations_dir: Where to store the translated files? Default="translations". + * even_subdir: Where in the model_dir to store the even file? Default="", + which means TensorBoard will show it as the same run as the training, but it will warn + about "more than one metagraph event per run". event_subdir can be used e.g. if running + this script several times with different `--decode_hparams="beam_size=$BEAM_SIZE,alpha=$ALPHA"`. + * tag_suffix: Default="", so the tags will be BLEU_cased and BLEU_uncased. Again, tag_suffix + can be used e.g. for different beam sizes if these should be plotted in different graphs. + * min_steps: Don't evaluate checkpoints with less steps. + Default=-1 means check the `last_evaluated_step.txt` file, which contains the number of steps + of the last successfully evaluated checkpoint. + * report_zero: Store BLEU=0 and guess its time based on flags.txt. Default=True. + This is useful, so TensorBoard reports correct relative time for the remaining checkpoints. + This flag is set to False if min_steps is > 0. + * wait_secs: Wait upto N seconds for a new checkpoint. Default=0. + This is useful for continuous evaluation of a running training, + in which case this should be equal to save_checkpoints_secs plus some reserve. """ from __future__ import absolute_import from __future__ import division @@ -53,7 +74,11 @@ flags.DEFINE_string("translation", None, "Path to the MT system translation file flags.DEFINE_string("source", None, "Path to the source-language file to be translated") flags.DEFINE_string("reference", None, "Path to the reference translation file") flags.DEFINE_string("translations_dir", "translations", "Where to store the translated files") -flags.DEFINE_bool("report_zero", True, "Store BLEU=0 and guess its time via flags.txt") +flags.DEFINE_string("event_subdir", "", "Where in model_dir to store the event file") +flags.DEFINE_string("tag_suffix", "", "What to add to BLEU_cased and BLEU_uncased tags. Default=''.") +flags.DEFINE_integer("min_steps", -1, "Don't evaluate checkpoints with less steps.") +flags.DEFINE_integer("wait_secs", 0, "Wait upto N seconds for a new checkpoint, cf. save_checkpoints_secs.") +flags.DEFINE_bool("report_zero", None, "Store BLEU=0 and guess its time based on flags.txt") # options derived from t2t-decode flags.DEFINE_integer("decode_shards", 1, "Number of decoding replicas.") @@ -70,6 +95,11 @@ flags.DEFINE_string("schedule", "train_and_evaluate", Model = namedtuple('Model', 'filename time steps') +def read_checkpoints_list(model_dir, min_steps): + models = [Model(x[:-6], os.path.getctime(x), int(x[:-6].rsplit('-')[-1])) + for x in tf.gfile.Glob(os.path.join(model_dir, 'model.ckpt-*.index'))] + return sorted((x for x in models if x.steps > min_steps), key=lambda x: x.steps) + def main(_): tf.logging.set_verbosity(tf.logging.INFO) if FLAGS.translation: @@ -107,22 +137,43 @@ def main(_): os.makedirs(FLAGS.translations_dir, exist_ok=True) translated_base_file = os.path.join(FLAGS.translations_dir, FLAGS.problems) - models = [Model(x[:-6], os.path.getctime(x), int(x[:-6].rsplit('-')[-1])) - for x in tf.gfile.Glob(os.path.join(model_dir, 'model.ckpt-*.index'))] - models = sorted(models, key=lambda x: x.time) + event_dir = os.path.join(FLAGS.model_dir, FLAGS.event_subdir) + last_step_file = os.path.join(event_dir, 'last_evaluated_step.txt') + if FLAGS.min_steps == -1: + try: + with open(last_step_file) as ls_file: + FLAGS.min_steps = int(ls_file.read()) + except FileNotFoundError: + FLAGS.min_steps = 0 + if FLAGS.report_zero is None: + FLAGS.report_zero = FLAGS.min_steps == 0 + + models = read_checkpoints_list(model_dir, FLAGS.min_steps) tf.logging.info("Found %d models with steps: %s" % (len(models), ", ".join(str(x.steps) for x in models))) - writer = tf.summary.FileWriter(FLAGS.model_dir) + writer = tf.summary.FileWriter(event_dir) if FLAGS.report_zero: start_time = os.path.getctime(os.path.join(model_dir, 'flags.txt')) values = [] if FLAGS.bleu_variant in ('uncased', 'both'): - values.append(tf.Summary.Value(tag='BLEU_uncased', simple_value=0)) + values.append(tf.Summary.Value(tag='BLEU_uncased' + FLAGS.tag_suffix, simple_value=0)) if FLAGS.bleu_variant in ('cased', 'both'): - values.append(tf.Summary.Value(tag='BLEU_cased', simple_value=0)) + values.append(tf.Summary.Value(tag='BLEU_cased' + FLAGS.tag_suffix, simple_value=0)) writer.add_event(tf.summary.Event(summary=tf.Summary(value=values), wall_time=start_time, step=0)) - for model in models: + exit_time = time.time() + FLAGS.wait_secs + min_steps = FLAGS.min_steps + while True: + if not models and FLAGS.wait_secs: + tf.logging.info('All checkpoints evaluated. Waiting till %s if a new checkpoint appears' % time.asctime(time.localtime(exit_time))) + while not models and time.time() < exit_time: + time.sleep(10) + models = read_checkpoints_list(model_dir, min_steps) + if not models: + return + + model = models.pop(0) + exit_time, min_steps = model.time + FLAGS.wait_secs, model.steps tf.logging.info("Evaluating " + model.filename) out_file = translated_base_file + '-' + str(model.steps) tf.logging.set_verbosity(tf.logging.ERROR) # decode_from_file logs all the translations as INFO @@ -131,15 +182,17 @@ def main(_): values = [] if FLAGS.bleu_variant in ('uncased', 'both'): bleu = 100 * bleu_hook.bleu_wrapper(FLAGS.reference, out_file, case_sensitive=False) - values.append(tf.Summary.Value(tag='BLEU_uncased', simple_value=bleu)) + values.append(tf.Summary.Value(tag='BLEU_uncased' + FLAGS.tag_suffix, simple_value=bleu)) tf.logging.info("%s: BLEU_uncased = %6.2f" % (model.filename, bleu)) if FLAGS.bleu_variant in ('cased', 'both'): bleu = 100 * bleu_hook.bleu_wrapper(FLAGS.reference, out_file, case_sensitive=True) - values.append(tf.Summary.Value(tag='BLEU_cased', simple_value=bleu)) + values.append(tf.Summary.Value(tag='BLEU_cased' + FLAGS.tag_suffix, simple_value=bleu)) tf.logging.info("%s: BLEU_cased = %6.2f" % (model.filename, bleu)) writer.add_event(tf.summary.Event(summary=tf.Summary(value=values), wall_time=model.time, step=model.steps)) + writer.flush() + with open(last_step_file, 'w') as ls_file: + ls_file.write(str(model.steps) + '\n') - writer.flush() if __name__ == "__main__": tf.app.run() From 7fc50b99d98e7afb6826928cc43cb6d6c66beae7 Mon Sep 17 00:00:00 2001 From: Martin Popel <popel@ufal.mff.cuni.cz> Date: Sat, 25 Nov 2017 02:36:18 +0100 Subject: [PATCH 0631/4095] first try a new checkpoint before checking the time elapsed Fix for the case when evaluating one checkpoint takes longer than creating a new checkpoint. --- tensor2tensor/bin/t2t-bleu | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tensor2tensor/bin/t2t-bleu b/tensor2tensor/bin/t2t-bleu index 89c93e0cc..60623857f 100755 --- a/tensor2tensor/bin/t2t-bleu +++ b/tensor2tensor/bin/t2t-bleu @@ -166,9 +166,11 @@ def main(_): while True: if not models and FLAGS.wait_secs: tf.logging.info('All checkpoints evaluated. Waiting till %s if a new checkpoint appears' % time.asctime(time.localtime(exit_time))) - while not models and time.time() < exit_time: + while True: time.sleep(10) models = read_checkpoints_list(model_dir, min_steps) + if models or time.time() > exit_time: + break if not models: return From 004888aefc8a213aa145e779e157f95bf0ee58ad Mon Sep 17 00:00:00 2001 From: Martin Popel <popel@ufal.mff.cuni.cz> Date: Thu, 30 Nov 2017 13:44:02 +0100 Subject: [PATCH 0632/4095] add t2t-bleu to setup.py --- setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.py b/setup.py index 5027918af..3acde12ab 100644 --- a/setup.py +++ b/setup.py @@ -24,6 +24,7 @@ 'tensor2tensor/bin/t2t-datagen', 'tensor2tensor/bin/t2t-decoder', 'tensor2tensor/bin/t2t-make-tf-configs', + 'tensor2tensor/bin/t2t-bleu', ], install_requires=[ 'bz2file', From 7ba78a237e8977ce6e8d00527dc78fbfca289bd1 Mon Sep 17 00:00:00 2001 From: Martin Popel <popel@ufal.mff.cuni.cz> Date: Thu, 30 Nov 2017 20:35:07 +0100 Subject: [PATCH 0633/4095] if no reference or translation is provided fail with a clear error message instead of misleading "division by zero" --- tensor2tensor/utils/bleu_hook.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tensor2tensor/utils/bleu_hook.py b/tensor2tensor/utils/bleu_hook.py index 135ef36fa..270c44788 100644 --- a/tensor2tensor/utils/bleu_hook.py +++ b/tensor2tensor/utils/bleu_hook.py @@ -96,6 +96,8 @@ def compute_bleu(reference_corpus, matches_by_order[len(ngram) - 1] += overlap[ngram] for ngram in translation_ngram_counts: possible_matches_by_order[len(ngram)-1] += translation_ngram_counts[ngram] + assert reference_length, "no reference provided" + assert translation_length, "no translation provided" precisions = [0] * max_order smooth = 1.0 for i in xrange(0, max_order): From 20c7e41d12300cf63587a24b3ede1f25ffb6a416 Mon Sep 17 00:00:00 2001 From: T2T Team <no-reply@google.com> Date: Wed, 29 Nov 2017 15:21:31 -0800 Subject: [PATCH 0634/4095] Discrete autoencoder with VQ-VAE as in https://arxiv.org/abs/1711.00937. PiperOrigin-RevId: 177371794 --- README.md | 7 +- setup.py | 1 - tensor2tensor/bin/t2t-bleu | 200 ------------------------ tensor2tensor/bin/t2t-datagen | 0 tensor2tensor/bin/t2t-decoder | 7 +- tensor2tensor/bin/t2t-make-tf-configs | 0 tensor2tensor/bin/t2t-trainer | 0 tensor2tensor/models/transformer_vae.py | 21 ++- tensor2tensor/utils/bleu_hook.py | 68 +------- tensor2tensor/utils/bleu_hook_test.py | 8 +- tensor2tensor/utils/decoding.py | 4 +- 11 files changed, 27 insertions(+), 289 deletions(-) delete mode 100755 tensor2tensor/bin/t2t-bleu mode change 100755 => 100644 tensor2tensor/bin/t2t-datagen mode change 100755 => 100644 tensor2tensor/bin/t2t-decoder mode change 100755 => 100644 tensor2tensor/bin/t2t-make-tf-configs mode change 100755 => 100644 tensor2tensor/bin/t2t-trainer diff --git a/README.md b/README.md index c125ce3bd..9525e9bcb 100644 --- a/README.md +++ b/README.md @@ -126,12 +126,9 @@ t2t-decoder \ --output_dir=$TRAIN_DIR \ --decode_hparams="beam_size=$BEAM_SIZE,alpha=$ALPHA" \ --decode_from_file=$DECODE_FILE - --decode_to_file=translation.en -``` - -# Eval BLEU -t2t-bleu --translation=translation.en --reference=ref-translation.de +cat $DECODE_FILE.$MODEL.$HPARAMS.beam$BEAM_SIZE.alpha$ALPHA.decodes +``` --- diff --git a/setup.py b/setup.py index 3acde12ab..5027918af 100644 --- a/setup.py +++ b/setup.py @@ -24,7 +24,6 @@ 'tensor2tensor/bin/t2t-datagen', 'tensor2tensor/bin/t2t-decoder', 'tensor2tensor/bin/t2t-make-tf-configs', - 'tensor2tensor/bin/t2t-bleu', ], install_requires=[ 'bz2file', diff --git a/tensor2tensor/bin/t2t-bleu b/tensor2tensor/bin/t2t-bleu deleted file mode 100755 index 60623857f..000000000 --- a/tensor2tensor/bin/t2t-bleu +++ /dev/null @@ -1,200 +0,0 @@ -#!/usr/bin/env python -# coding=utf-8 -# Copyright 2017 The Tensor2Tensor Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Evaluate BLEU score for all checkpoints in a given directory. - -This script can be used in two ways. - -To evaluate an already translated file: -`t2t-bleu --translation=my-wmt13.de --reference=wmt13_deen.de` - -To evaluate all checkpoints in a given directory: -`t2t-bleu - --model_dir=t2t_train - --data_dir=t2t_data - --translations_dir=my-translations - --problems=translate_ende_wmt32k - --hparams_set=transformer_big_single_gpu - --source=wmt13_deen.en - --reference=wmt13_deen.de` - -In addition to the above-mentioned compulsory parameters, -there are optional parameters: - - * bleu_variant: cased (case-sensitive), uncased, both (default). - * translations_dir: Where to store the translated files? Default="translations". - * even_subdir: Where in the model_dir to store the even file? Default="", - which means TensorBoard will show it as the same run as the training, but it will warn - about "more than one metagraph event per run". event_subdir can be used e.g. if running - this script several times with different `--decode_hparams="beam_size=$BEAM_SIZE,alpha=$ALPHA"`. - * tag_suffix: Default="", so the tags will be BLEU_cased and BLEU_uncased. Again, tag_suffix - can be used e.g. for different beam sizes if these should be plotted in different graphs. - * min_steps: Don't evaluate checkpoints with less steps. - Default=-1 means check the `last_evaluated_step.txt` file, which contains the number of steps - of the last successfully evaluated checkpoint. - * report_zero: Store BLEU=0 and guess its time based on flags.txt. Default=True. - This is useful, so TensorBoard reports correct relative time for the remaining checkpoints. - This flag is set to False if min_steps is > 0. - * wait_secs: Wait upto N seconds for a new checkpoint. Default=0. - This is useful for continuous evaluation of a running training, - in which case this should be equal to save_checkpoints_secs plus some reserve. -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -import os -import time -from collections import namedtuple -from tensor2tensor.utils import decoding -from tensor2tensor.utils import trainer_utils -from tensor2tensor.utils import usr_dir -from tensor2tensor.utils import bleu_hook -import tensorflow as tf - -flags = tf.flags -FLAGS = flags.FLAGS - -# t2t-bleu specific options -flags.DEFINE_string("bleu_variant", "both", "Possible values: cased(case-sensitive), uncased, both(default).") -flags.DEFINE_string("model_dir", "", "Directory to load model checkpoints from.") -flags.DEFINE_string("translation", None, "Path to the MT system translation file") -flags.DEFINE_string("source", None, "Path to the source-language file to be translated") -flags.DEFINE_string("reference", None, "Path to the reference translation file") -flags.DEFINE_string("translations_dir", "translations", "Where to store the translated files") -flags.DEFINE_string("event_subdir", "", "Where in model_dir to store the event file") -flags.DEFINE_string("tag_suffix", "", "What to add to BLEU_cased and BLEU_uncased tags. Default=''.") -flags.DEFINE_integer("min_steps", -1, "Don't evaluate checkpoints with less steps.") -flags.DEFINE_integer("wait_secs", 0, "Wait upto N seconds for a new checkpoint, cf. save_checkpoints_secs.") -flags.DEFINE_bool("report_zero", None, "Store BLEU=0 and guess its time based on flags.txt") - -# options derived from t2t-decode -flags.DEFINE_integer("decode_shards", 1, "Number of decoding replicas.") -flags.DEFINE_string("t2t_usr_dir", "", - "Path to a Python module that will be imported. The " - "__init__.py file should include the necessary imports. " - "The imported files should contain registrations, " - "e.g. @registry.register_model calls, that will then be " - "available to the t2t-decoder.") -flags.DEFINE_string("master", "", "Address of TensorFlow master.") -flags.DEFINE_string("schedule", "train_and_evaluate", - "Must be train_and_evaluate for decoding.") - -Model = namedtuple('Model', 'filename time steps') - - -def read_checkpoints_list(model_dir, min_steps): - models = [Model(x[:-6], os.path.getctime(x), int(x[:-6].rsplit('-')[-1])) - for x in tf.gfile.Glob(os.path.join(model_dir, 'model.ckpt-*.index'))] - return sorted((x for x in models if x.steps > min_steps), key=lambda x: x.steps) - -def main(_): - tf.logging.set_verbosity(tf.logging.INFO) - if FLAGS.translation: - if FLAGS.model_dir: - raise ValueError('Cannot specify both --translation and --model_dir.') - if FLAGS.bleu_variant in ('uncased', 'both'): - bleu = 100 * bleu_hook.bleu_wrapper(FLAGS.reference, FLAGS.translation, case_sensitive=False) - print("BLEU_uncased = %6.2f" % bleu) - if FLAGS.bleu_variant in ('cased', 'both'): - bleu = 100 * bleu_hook.bleu_wrapper(FLAGS.reference, FLAGS.translation, case_sensitive=True) - print("BLEU_cased = %6.2f" % bleu) - return - - usr_dir.import_usr_dir(FLAGS.t2t_usr_dir) - FLAGS.model = FLAGS.model or 'transformer' - FLAGS.output_dir = FLAGS.model_dir - trainer_utils.log_registry() - trainer_utils.validate_flags() - assert FLAGS.schedule == "train_and_evaluate" - data_dir = os.path.expanduser(FLAGS.data_dir) - model_dir = os.path.expanduser(FLAGS.model_dir) - - hparams = trainer_utils.create_hparams( - FLAGS.hparams_set, data_dir, passed_hparams=FLAGS.hparams) - trainer_utils.add_problem_hparams(hparams, FLAGS.problems) - estimator, _ = trainer_utils.create_experiment_components( - data_dir=data_dir, - model_name=FLAGS.model, - hparams=hparams, - run_config=trainer_utils.create_run_config(model_dir)) - - decode_hp = decoding.decode_hparams(FLAGS.decode_hparams) - decode_hp.add_hparam("shards", FLAGS.decode_shards) - decode_hp.add_hparam("shard_id", FLAGS.worker_id) - - os.makedirs(FLAGS.translations_dir, exist_ok=True) - translated_base_file = os.path.join(FLAGS.translations_dir, FLAGS.problems) - event_dir = os.path.join(FLAGS.model_dir, FLAGS.event_subdir) - last_step_file = os.path.join(event_dir, 'last_evaluated_step.txt') - if FLAGS.min_steps == -1: - try: - with open(last_step_file) as ls_file: - FLAGS.min_steps = int(ls_file.read()) - except FileNotFoundError: - FLAGS.min_steps = 0 - if FLAGS.report_zero is None: - FLAGS.report_zero = FLAGS.min_steps == 0 - - models = read_checkpoints_list(model_dir, FLAGS.min_steps) - tf.logging.info("Found %d models with steps: %s" % (len(models), ", ".join(str(x.steps) for x in models))) - - writer = tf.summary.FileWriter(event_dir) - if FLAGS.report_zero: - start_time = os.path.getctime(os.path.join(model_dir, 'flags.txt')) - values = [] - if FLAGS.bleu_variant in ('uncased', 'both'): - values.append(tf.Summary.Value(tag='BLEU_uncased' + FLAGS.tag_suffix, simple_value=0)) - if FLAGS.bleu_variant in ('cased', 'both'): - values.append(tf.Summary.Value(tag='BLEU_cased' + FLAGS.tag_suffix, simple_value=0)) - writer.add_event(tf.summary.Event(summary=tf.Summary(value=values), wall_time=start_time, step=0)) - - exit_time = time.time() + FLAGS.wait_secs - min_steps = FLAGS.min_steps - while True: - if not models and FLAGS.wait_secs: - tf.logging.info('All checkpoints evaluated. Waiting till %s if a new checkpoint appears' % time.asctime(time.localtime(exit_time))) - while True: - time.sleep(10) - models = read_checkpoints_list(model_dir, min_steps) - if models or time.time() > exit_time: - break - if not models: - return - - model = models.pop(0) - exit_time, min_steps = model.time + FLAGS.wait_secs, model.steps - tf.logging.info("Evaluating " + model.filename) - out_file = translated_base_file + '-' + str(model.steps) - tf.logging.set_verbosity(tf.logging.ERROR) # decode_from_file logs all the translations as INFO - decoding.decode_from_file(estimator, FLAGS.source, decode_hp, out_file, checkpoint_path=model.filename) - tf.logging.set_verbosity(tf.logging.INFO) - values = [] - if FLAGS.bleu_variant in ('uncased', 'both'): - bleu = 100 * bleu_hook.bleu_wrapper(FLAGS.reference, out_file, case_sensitive=False) - values.append(tf.Summary.Value(tag='BLEU_uncased' + FLAGS.tag_suffix, simple_value=bleu)) - tf.logging.info("%s: BLEU_uncased = %6.2f" % (model.filename, bleu)) - if FLAGS.bleu_variant in ('cased', 'both'): - bleu = 100 * bleu_hook.bleu_wrapper(FLAGS.reference, out_file, case_sensitive=True) - values.append(tf.Summary.Value(tag='BLEU_cased' + FLAGS.tag_suffix, simple_value=bleu)) - tf.logging.info("%s: BLEU_cased = %6.2f" % (model.filename, bleu)) - writer.add_event(tf.summary.Event(summary=tf.Summary(value=values), wall_time=model.time, step=model.steps)) - writer.flush() - with open(last_step_file, 'w') as ls_file: - ls_file.write(str(model.steps) + '\n') - - -if __name__ == "__main__": - tf.app.run() diff --git a/tensor2tensor/bin/t2t-datagen b/tensor2tensor/bin/t2t-datagen old mode 100755 new mode 100644 diff --git a/tensor2tensor/bin/t2t-decoder b/tensor2tensor/bin/t2t-decoder old mode 100755 new mode 100644 index 4c83610b3..712cb45ce --- a/tensor2tensor/bin/t2t-decoder +++ b/tensor2tensor/bin/t2t-decoder @@ -46,10 +46,7 @@ import tensorflow as tf flags = tf.flags FLAGS = flags.FLAGS -flags.DEFINE_string("output_dir", "", - "Training directory where the latest checkpoint is used.") -flags.DEFINE_string("checkpoint_path", None, - "Path to the model checkpoint. Overrides output_dir.") +flags.DEFINE_string("output_dir", "", "Training directory to load from.") flags.DEFINE_string("decode_from_file", None, "Path to the source file for decoding") flags.DEFINE_string("decode_to_file", None, @@ -93,7 +90,7 @@ def main(_): decoding.decode_interactively(estimator, decode_hp) elif FLAGS.decode_from_file: decoding.decode_from_file(estimator, FLAGS.decode_from_file, decode_hp, - FLAGS.decode_to_file, checkpoint_path=FLAGS.checkpoint_path) + FLAGS.decode_to_file) else: decoding.decode_from_dataset( estimator, diff --git a/tensor2tensor/bin/t2t-make-tf-configs b/tensor2tensor/bin/t2t-make-tf-configs old mode 100755 new mode 100644 diff --git a/tensor2tensor/bin/t2t-trainer b/tensor2tensor/bin/t2t-trainer old mode 100755 new mode 100644 diff --git a/tensor2tensor/models/transformer_vae.py b/tensor2tensor/models/transformer_vae.py index 0bb5efea9..4a7290c23 100644 --- a/tensor2tensor/models/transformer_vae.py +++ b/tensor2tensor/models/transformer_vae.py @@ -147,8 +147,9 @@ def nearest(x, means, hparams): transpose_b=True) _, nearest_idx = tf.nn.top_k(- dist, k=1) nearest_hot = tf.one_hot(tf.squeeze(nearest_idx, axis=1), hparams.v_size) - nearest_hot = tf.reshape(nearest_hot, [tf.shape(x)[0], tf.shape(x)[1], - tf.shape(x)[2], hparams.v_size]) + shape = common_layers.shape_list(x) + shape[-1] = hparams.v_size + nearest_hot = tf.reshape(nearest_hot, shape=shape) return tf.stop_gradient(nearest_hot) @@ -156,8 +157,12 @@ def kmeans(x, means, hparams, name): with tf.variable_scope(name): x_means_hot = nearest(x, means, hparams) x_means = tf.gather(means, tf.argmax(x_means_hot, axis=-1)) - kl = tf.reduce_sum(tf.square(x - x_means), axis=-1) - return x_means_hot, tf.reduce_mean(kl) # * 10.0 + x_flat = tf.reshape(x, [-1, hparams.hidden_size]) + kl = tf.reduce_mean(tf.reduce_sum(tf.square(x_flat - x_means), axis=-1)) + reg_loss1 = tf.nn.l2_loss((tf.stop_gradient(x) - x_means)) + reg_loss2 = hparams.beta * tf.nn.l2_loss((x - tf.stop_gradient(x_means))) + l = kl + reg_loss1 + reg_loss2 + return x_means_hot, x_means, l def bit_to_int(x_bit, nbits): @@ -233,6 +238,12 @@ def embed(x): _, hot, l = dae(x, hparams, name) c = tf.argmax(hot, axis=-1) h1 = tf.layers.dense(hot, hparams.hidden_size, name="dae_dense") + if hparams.bottleneck_kind == "vq-vae": + means = tf.get_variable(name="means", shape=[hparams.v_size, + hparams.hidden_size]) + x_means_hot, x_means, l = kmeans(x, means, hparams, name="vq-vae-kmeans") + h1 = x_means + c = tf.argmax(x_means_hot, axis=-1) h2 = tf.layers.dense(tf.nn.relu(h1), filter_size, name="vch2") res = tf.layers.dense(tf.nn.relu(h2), hparams.hidden_size, name="vcfin") return res, c, l, embed @@ -500,6 +511,8 @@ def transformer_ae_small(): hparams.add_hparam("decode_autoregressive", True) hparams.add_hparam("do_vae", True) hparams.add_hparam("bit_vae", True) + hparams.add_hparam("beta", 0.25) + hparams.kl_warmup_steps = 150000 return hparams diff --git a/tensor2tensor/utils/bleu_hook.py b/tensor2tensor/utils/bleu_hook.py index 270c44788..20a7c8426 100644 --- a/tensor2tensor/utils/bleu_hook.py +++ b/tensor2tensor/utils/bleu_hook.py @@ -20,9 +20,6 @@ import collections import math -import re -import sys -import unicodedata # Dependency imports @@ -30,7 +27,6 @@ # pylint: disable=redefined-builtin from six.moves import xrange from six.moves import zip -import six # pylint: enable=redefined-builtin import tensorflow as tf @@ -96,17 +92,10 @@ def compute_bleu(reference_corpus, matches_by_order[len(ngram) - 1] += overlap[ngram] for ngram in translation_ngram_counts: possible_matches_by_order[len(ngram)-1] += translation_ngram_counts[ngram] - assert reference_length, "no reference provided" - assert translation_length, "no translation provided" precisions = [0] * max_order - smooth = 1.0 for i in xrange(0, max_order): if possible_matches_by_order[i] > 0: - if matches_by_order[i] > 0: - precisions[i] = matches_by_order[i] / possible_matches_by_order[i] - else: - smooth *= 2 - precisions[i] = 1.0 / (smooth * possible_matches_by_order[i]) + precisions[i] = matches_by_order[i] / possible_matches_by_order[i] else: precisions[i] = 0.0 @@ -142,58 +131,3 @@ def bleu_score(predictions, labels, **unused_kwargs): bleu = tf.py_func(compute_bleu, (labels, outputs), tf.float32) return bleu, tf.constant(1.0) - - -class UnicodeRegex: - """Ad-hoc hack to recognize all punctuation and symbols. - - without dependening on https://pypi.python.org/pypi/regex/.""" - def _property_chars(prefix): - return ''.join(six.unichr(x) for x in range(sys.maxunicode) - if unicodedata.category(six.unichr(x)).startswith(prefix)) - punctuation = _property_chars('P') - nondigit_punct_re = re.compile(r'([^\d])([' + punctuation + r'])') - punct_nondigit_re = re.compile(r'([' + punctuation + r'])([^\d])') - symbol_re = re.compile('([' + _property_chars('S') + '])') - - -def bleu_tokenize(string): - r"""Tokenize a string following the official BLEU implementation. - - See https://github.com/moses-smt/mosesdecoder/blob/master/scripts/generic/mteval-v14.pl#L954-L983 - In our case, the input string is expected to be just one line - and no HTML entities de-escaping is needed. - So we just tokenize on punctuation and symbols, - except when a punctuation is preceded and followed by a digit - (e.g. a comma/dot as a thousand/decimal separator). - - Note that a numer (e.g. a year) followed by a dot at the end of sentence is NOT tokenized, - i.e. the dot stays with the number because `s/(\p{P})(\P{N})/ $1 $2/g` - does not match this case (unless we add a space after each sentence). - However, this error is already in the original mteval-v14.pl - and we want to be consistent with it. - - Args: - string: the input string - - Returns: - a list of tokens - """ - string = UnicodeRegex.nondigit_punct_re.sub(r'\1 \2 ', string) - string = UnicodeRegex.punct_nondigit_re.sub(r' \1 \2', string) - string = UnicodeRegex.symbol_re.sub(r' \1 ', string) - return string.split() - - -def bleu_wrapper(ref_filename, hyp_filename, case_sensitive=False): - """Compute BLEU for two files (reference and hypothesis translation).""" - # TODO: Does anyone care about Python2 compatibility? - ref_lines = open(ref_filename, 'rt', encoding='utf-8').read().splitlines() - hyp_lines = open(hyp_filename, 'rt', encoding='utf-8').read().splitlines() - assert len(ref_lines) == len(hyp_lines) - if not case_sensitive: - ref_lines = [x.lower() for x in ref_lines] - hyp_lines = [x.lower() for x in hyp_lines] - ref_tokens = [bleu_tokenize(x) for x in ref_lines] - hyp_tokens = [bleu_tokenize(x) for x in hyp_lines] - return compute_bleu(ref_tokens, hyp_tokens) diff --git a/tensor2tensor/utils/bleu_hook_test.py b/tensor2tensor/utils/bleu_hook_test.py index f5976941f..bf08174f8 100644 --- a/tensor2tensor/utils/bleu_hook_test.py +++ b/tensor2tensor/utils/bleu_hook_test.py @@ -39,9 +39,8 @@ def testComputeNotEqual(self): translation_corpus = [[1, 2, 3, 4]] reference_corpus = [[5, 6, 7, 8]] bleu = bleu_hook.compute_bleu(reference_corpus, translation_corpus) - # The smoothing prevents 0 for small corpora - actual_bleu = 0.0798679 - self.assertAllClose(bleu, actual_bleu, atol=1e-03) + actual_bleu = 0.0 + self.assertEqual(bleu, actual_bleu) def testComputeMultipleBatch(self): translation_corpus = [[1, 2, 3, 4], [5, 6, 7, 0]] @@ -54,9 +53,8 @@ def testComputeMultipleNgrams(self): reference_corpus = [[1, 2, 1, 13], [12, 6, 7, 4, 8, 9, 10]] translation_corpus = [[1, 2, 1, 3], [5, 6, 7, 4]] bleu = bleu_hook.compute_bleu(reference_corpus, translation_corpus) - actual_bleu = 0.3436 + actual_bleu = 0.486 self.assertAllClose(bleu, actual_bleu, atol=1e-03) - if __name__ == '__main__': tf.test.main() diff --git a/tensor2tensor/utils/decoding.py b/tensor2tensor/utils/decoding.py index 426110ad8..d0913e0e1 100644 --- a/tensor2tensor/utils/decoding.py +++ b/tensor2tensor/utils/decoding.py @@ -200,7 +200,7 @@ def decode_from_dataset(estimator, tf.logging.info("Completed inference on %d samples." % num_predictions) # pylint: disable=undefined-loop-variable -def decode_from_file(estimator, filename, decode_hp, decode_to_file=None, checkpoint_path=None): +def decode_from_file(estimator, filename, decode_hp, decode_to_file=None): """Compute predictions on entries in filename and write them out.""" if not decode_hp.batch_size: decode_hp.batch_size = 32 @@ -230,7 +230,7 @@ def input_fn(): return _decode_input_tensor_to_features_dict(example, hparams) decodes = [] - result_iter = estimator.predict(input_fn, checkpoint_path=checkpoint_path) + result_iter = estimator.predict(input_fn) for result in result_iter: if decode_hp.return_beams: beam_decodes = [] From c9144dfa5f514cab529f487b069415daee5e211e Mon Sep 17 00:00:00 2001 From: Noam Shazeer <noam@google.com> Date: Thu, 30 Nov 2017 11:23:07 -0800 Subject: [PATCH 0635/4095] Packed datasets - combine examples to constant length for efficient TPU training. Modify transformer to keep the packed-together examples from attending to one another. PiperOrigin-RevId: 177481956 --- .../data_generators/generator_utils.py | 163 ++++++++++++------ tensor2tensor/data_generators/inspect.py | 14 +- tensor2tensor/data_generators/lm1b.py | 8 +- tensor2tensor/data_generators/problem.py | 60 +++++-- .../data_generators/translate_ende.py | 16 +- tensor2tensor/layers/common_attention.py | 18 ++ tensor2tensor/layers/common_layers.py | 12 +- tensor2tensor/models/transformer.py | 129 ++++++++++---- 8 files changed, 304 insertions(+), 116 deletions(-) diff --git a/tensor2tensor/data_generators/generator_utils.py b/tensor2tensor/data_generators/generator_utils.py index aa55ccb13..2d21da2ba 100644 --- a/tensor2tensor/data_generators/generator_utils.py +++ b/tensor2tensor/data_generators/generator_utils.py @@ -449,66 +449,131 @@ def shuffle_dataset(filenames): tf.gfile.Remove(fname) -def combine_examples_no_inputs(examples, max_length): - """Combine examples into longer examples. +class SequencePacker(object): + """Helper for constructing a packed example of sequence examples. - Concatenate targets to form target sequences with length up to max_length. - Target sequences longer than max_length are chopped into multiple sequences. + See comments to pack_examples() + """ - Args: - examples: a generator returning feature dictionaries. - max_length: an integer. + def __init__(self, first_sequence, spacing=2): + self._spacing = spacing + self._ids = first_sequence[:] + self._segmentation = [1] * len(first_sequence) + self._position = range(len(first_sequence)) - Yields: - feature dictionaries. - """ - partial = [] - for example in examples: - x = example["targets"] - if len(x) + len(partial) > max_length: - if partial: - yield {"inputs": [0], "targets": partial} - partial = [] - if len(x) > max_length: - num_fragments = len(x) // max_length - for i in xrange(num_fragments): - yield {"inputs": [0], "targets": x[max_length * i:max_length * (i + 1)]} - partial = x[max_length * num_fragments:] - else: - partial += x - if partial: - yield {"inputs": [0], "targets": partial} + def add(self, ids): + padding = [0] * self._spacing + self._ids.extend(padding + ids) + next_segment_num = self._segmentation[-1] + 1 if self._segmentation else 1 + self._segmentation.extend(padding + [next_segment_num] * len(ids)) + self._position.extend(padding + range(len(ids))) + + def can_fit(self, ids, packed_length): + return len(self._ids) + self._spacing + len(ids) <= packed_length + def to_dict(self): + return {"inputs": [0], + "targets": self._ids, + "targets_segmentation": self._segmentation, + "targets_position": self._position} -def combine_examples_with_inputs(examples, max_length): - """Combine examples into longer examples. - We combine multiple examples by concatenating the inputs and concatenating - the targets. Sequences where the inputs or the targets are too long are - emitted as singletons (not chopped). +class SequencePairPacker(object): + """Helper for packing sequence-to-sequence examples into bigger examples. + + See comments to pack_examples() + """ + + def __init__(self, first_sequence_pair, spacing=2): + self._inputs = SequencePacker(first_sequence_pair[0], spacing) + self._targets = SequencePacker(first_sequence_pair[1], spacing) + + def add(self, pair): + self._inputs.add(pair[0]) + self._targets.add(pair[1]) + + def can_fit(self, pair, packed_length): + return (self._inputs.can_fit(pair[0], packed_length) and + self._targets.can_fit(pair[1], packed_length)) + + def to_dict(self): + ret = self._targets.to_dict() + inputs_dict = self._inputs.to_dict() + ret["inputs"] = inputs_dict["targets"] + ret["inputs_segmentation"] = inputs_dict["targets_segmentation"] + ret["inputs_position"] = inputs_dict["targets_position"] + return ret + + +def pack_examples(examples, + has_inputs, + packed_length=256, + spacing=2, + queue_size=10, + chop_long_sequences=False): + """Pack examples into longer examples. + + If has_inputs=False, we are packing single-sequence examples with + targets only and no inputs. + + In this case, we concatenate the targets from several examples to form + each new example. We insert a number of zeros for spacing between the + original sequences. This is to help the sequences stay separate + under convolutions. If chop_long_sequences is set, then any input sequence + longer than packed_length gets chopped up into multiple examples. Otherwise, + long sequences are emitted as singletons. + + If has_inputs=True, then we are packing sequence-to-sequence + examples. We combine several examples by concatenating the inputs + (as above) and concatenating the targets (as above). Chopping of + long sequences is not supported. + + The packed examples are represented as dictionaries containing: + "inputs", "targets": the packed sequences described above + "inputs_segmentation", "targets_segmentation": + Sequences aligned with "inputs", "targets" specifying to which original + sequence each position belongs. Numbering starts from 1, and 0 is used + for spacing. This information is useful for preventing attention across + segments. + e.g. [1 1 1 1 1 1 0 0 2 2 2 0 0 3 3 3 3 3 0 0 4 4 4] + "inputs_position", "targets_position": + Sequences aligned with "inputs", "targets" specifying position within + the original sequence. This is useful for positional encodings. + e.g. [0 1 2 3 4 5 0 0 0 1 2 0 0 0 1 2 3 4 0 0 0 1 2] Args: examples: a generator returning feature dictionaries. - max_length: an integer. + has_inputs: a boolean + packed_length: an integer + spacing: an integer + queue_size: an integer + chop_long_sequences: a boolean Yields: feature dictionaries. """ - partial_a = [] - partial_b = [] + packer = SequencePairPacker if has_inputs else SequencePacker + combined = [] for example in examples: - a = example["inputs"] - b = example["targets"] - if (len(a) + len(partial_a) > max_length or - len(b) + len(partial_b) > max_length): - if partial_a or partial_b: - yield {"inputs": partial_a, "targets": partial_b} - partial_a = [] - partial_b = [] - if len(a) > max_length or len(b) > max_length: - yield {"inputs": a, "targets": b} - else: - partial_a += a - partial_b += b - if partial_a or partial_b: - yield {"inputs": partial_a, "targets": partial_b} + x = ((example["inputs"], example["targets"]) + if has_inputs else example["targets"]) + if chop_long_sequences and len(x) > packed_length: + assert not has_inputs + num_fragments = len(x) // packed_length + for i in xrange(num_fragments): + yield packer( + x[packed_length * i:packed_length * (i + 1)], spacing).to_dict() + x = x[packed_length * num_fragments:] + added = False + for c in combined: + if c.can_fit(x, packed_length): + c.add(x) + added = True + break + if not added: + if len(combined) == queue_size: + yield combined[0].to_dict() + combined = combined[1:] + combined.append(packer(x, spacing)) + for c in combined: + yield c.to_dict() diff --git a/tensor2tensor/data_generators/inspect.py b/tensor2tensor/data_generators/inspect.py index c84f00606..0293ca9c4 100644 --- a/tensor2tensor/data_generators/inspect.py +++ b/tensor2tensor/data_generators/inspect.py @@ -40,6 +40,7 @@ tf.flags.DEFINE_string("input_filename", "", "input filename") tf.flags.DEFINE_bool("print_inputs", False, "Print decoded inputs to stdout") tf.flags.DEFINE_bool("print_targets", False, "Print decoded targets to stdout") +tf.flags.DEFINE_bool("print_all", False, "Print all fields") FLAGS = tf.flags.FLAGS @@ -75,12 +76,15 @@ def main(_): total_sequences += 1 max_input_length = max(max_input_length, len(inputs)) max_target_length = max(max_target_length, len(targets)) + if FLAGS.print_all: + for k, v in x.features.feature.iteritems(): + print("%s: %s" % (k, v.int64_list.value)) - tf.logging.info("total_sequences: %d", total_sequences) - tf.logging.info("total_input_tokens: %d", total_input_tokens) - tf.logging.info("total_target_tokens: %d", total_target_tokens) - tf.logging.info("max_input_length: %d", max_input_length) - tf.logging.info("max_target_length: %d", max_target_length) + print("total_sequences: %d" % total_sequences) + print("total_input_tokens: %d" % total_input_tokens) + print("total_target_tokens: %d" % total_target_tokens) + print("max_input_length: %d" % max_input_length) + print("max_target_length: %d" % max_target_length) if __name__ == "__main__": diff --git a/tensor2tensor/data_generators/lm1b.py b/tensor2tensor/data_generators/lm1b.py index 3fa7d7e47..cd0eb8e3c 100644 --- a/tensor2tensor/data_generators/lm1b.py +++ b/tensor2tensor/data_generators/lm1b.py @@ -224,11 +224,11 @@ def generator(self, data_dir, tmp_dir, is_training): @registry.register_problem -class LanguagemodelLm1b8kConcat512(LanguagemodelLm1b32k): +class LanguagemodelLm1b8kPacked(LanguagemodelLm1b32k): """A language model on the 1B words corpus. 8k vocabualry. - Training/eval examples are concatenated to a maximum length of 512. + Training/eval examples are concatenated to a maximum length of 256. Happy TPU Training. @@ -241,8 +241,8 @@ def targeted_vocab_size(self): return 2**13 # 8192 @property - def combine_to_length(self): - return 512 + def packed_length(self): + return 256 @registry.register_problem diff --git a/tensor2tensor/data_generators/problem.py b/tensor2tensor/data_generators/problem.py index d2e30cbff..d80cc01da 100644 --- a/tensor2tensor/data_generators/problem.py +++ b/tensor2tensor/data_generators/problem.py @@ -670,20 +670,15 @@ def generator(self, data_dir, tmp_dir, is_training): """ raise NotImplementedError() - def maybe_combine_examples(self, generator): - if self.combine_to_length: - if self.has_inputs: - return generator_utils.combine_examples_with_inputs( - generator, self.combine_to_length) - else: - return generator_utils.combine_examples_no_inputs( - generator, self.combine_to_length) - else: - return generator - @property - def combine_to_length(self): - """An optional integer. Concatenate examples into bigger examples.""" + def packed_length(self): + """Pack multiple examples into a single example of constant length. + + This is useful for TPU training. See generator_utils.pack_examples(). + + Returns: + an optional integer + """ return None @property @@ -723,6 +718,15 @@ def use_subword_tokenizer(self): def has_inputs(self): return True # Set to False for language models. + def _maybe_pack_examples(self, generator): + """Helper to generate_data().""" + if self.packed_length: + return generator_utils.pack_examples( + generator, self.has_inputs, self.packed_length, + chop_long_sequences=not self.has_inputs) + else: + return generator + def generate_data(self, data_dir, tmp_dir, task_id=-1): train_paths = self.training_filepaths( data_dir, self.num_shards, shuffled=False) @@ -731,14 +735,14 @@ def generate_data(self, data_dir, tmp_dir, task_id=-1): if self.use_train_shards_for_dev: all_paths = train_paths + dev_paths generator_utils.generate_files( - self.maybe_combine_examples(self.generator(data_dir, tmp_dir, True)), + self._maybe_pack_examples(self.generator(data_dir, tmp_dir, True)), all_paths) generator_utils.shuffle_dataset(all_paths) else: generator_utils.generate_dataset_and_shuffle( - self.maybe_combine_examples(self.generator(data_dir, tmp_dir, True)), + self._maybe_pack_examples(self.generator(data_dir, tmp_dir, True)), train_paths, - self.maybe_combine_examples(self.generator(data_dir, tmp_dir, False)), + self._maybe_pack_examples(self.generator(data_dir, tmp_dir, False)), dev_paths) def feature_encoders(self, data_dir): @@ -770,6 +774,30 @@ def hparams(self, defaults, unused_model_hparams): p.target_space_id = self.target_space_id if self.is_character_level: p.loss_multiplier = 2.0 + if self.packed_length: + identity = (registry.Modalities.GENERIC, None) + if self.has_inputs: + p.input_modality["inputs_segmentation"] = identity + p.input_modality["inputs_position"] = identity + p.input_modality["targets_segmentation"] = identity + p.input_modality["targets_position"] = identity + + def example_reading_spec(self): + data_fields = { + "targets": tf.VarLenFeature(tf.int64) + } + if self.has_inputs: + data_fields["inputs"] = tf.VarLenFeature(tf.int64) + + if self.packed_length: + if self.has_inputs: + data_fields["inputs_segmentation"] = tf.VarLenFeature(tf.int64) + data_fields["inputs_position"] = tf.VarLenFeature(tf.int64) + data_fields["targets_segmentation"] = tf.VarLenFeature(tf.int64) + data_fields["targets_position"] = tf.VarLenFeature(tf.int64) + + data_items_to_decoders = None + return (data_fields, data_items_to_decoders) def eval_metrics(self): return [ diff --git a/tensor2tensor/data_generators/translate_ende.py b/tensor2tensor/data_generators/translate_ende.py index 2dc8e3a00..bbd502fdd 100644 --- a/tensor2tensor/data_generators/translate_ende.py +++ b/tensor2tensor/data_generators/translate_ende.py @@ -117,15 +117,15 @@ def target_space_id(self): @registry.register_problem -class TranslateEndeWmtBpe32kConcat512(TranslateEndeWmtBpe32k): +class TranslateEndeWmtBpe32kPacked(TranslateEndeWmtBpe32k): """Problem spec for WMT En-De translation, BPE version. - Training/eval examples are concatenated to a maximum length of 512. + Training/eval examples are concatenated to a maximum length of 256. """ @property - def combine_to_length(self): - return 512 + def packed_length(self): + return 256 @registry.register_problem @@ -168,6 +168,14 @@ def targeted_vocab_size(self): return 2**15 # 32768 +@registry.register_problem +class TranslateEndeWmt32kPacked(TranslateEndeWmt32k): + + @property + def packed_length(self): + return 256 + + @registry.register_problem class TranslateEndeWmtCharacters(translate.TranslateProblem): """Problem spec for WMT En-De translation.""" diff --git a/tensor2tensor/layers/common_attention.py b/tensor2tensor/layers/common_attention.py index f0bbaa39e..23cf074af 100644 --- a/tensor2tensor/layers/common_attention.py +++ b/tensor2tensor/layers/common_attention.py @@ -613,6 +613,24 @@ def attention_bias_lower_triangle(length): return attention_bias_local(length, -1, 0) +@expert_utils.add_name_scope() +def attention_bias_same_segment(query_segment_id, memory_segment_id): + """Create an bias tensor to be added to attention logits. + + Positions with the same segment_ids can see each other. + + Args: + query_segment_id: a float `Tensor` with shape [batch, query_length]. + memory_segment_id: a float `Tensor` with shape [batch, memory_length]. + + Returns: + a `Tensor` with shape [batch, 1, query_length, memory_length]. + """ + ret = tf.to_float(tf.not_equal(tf.expand_dims(query_segment_id, 2), + tf.expand_dims(memory_segment_id, 1))) * -1e9 + return tf.expand_dims(ret, axis=1) + + @expert_utils.add_name_scope() def attention_bias_ignore_padding(memory_padding): """Create an bias tensor to be added to attention logits. diff --git a/tensor2tensor/layers/common_layers.py b/tensor2tensor/layers/common_layers.py index f04d27f1d..ca8a28b99 100644 --- a/tensor2tensor/layers/common_layers.py +++ b/tensor2tensor/layers/common_layers.py @@ -1265,13 +1265,12 @@ def relu_density_logit(x, reduce_dims): return scaled -def maybe_zero_out_padding(inputs, kernel_size, padding, nonpadding_mask): +def maybe_zero_out_padding(inputs, kernel_size, nonpadding_mask): """If necessary, zero out inputs to a conv for padding positions. Args: inputs: a Tensor with shape [batch, length, ...] kernel_size: an integer or pair of integers - padding: a string, e.g. "SAME" nonpadding_mask: a Tensor with shape [batch, length] Returns: @@ -1279,7 +1278,6 @@ def maybe_zero_out_padding(inputs, kernel_size, padding, nonpadding_mask): """ if (kernel_size != 1 and kernel_size != (1, 1) and - padding == "SAME" and nonpadding_mask is not None): while nonpadding_mask.get_shape().ndims < inputs.get_shape().ndims: nonpadding_mask = tf.expand_dims(nonpadding_mask, -1) @@ -1310,13 +1308,13 @@ def conv_relu_conv(inputs, """Hidden layer with RELU activation followed by linear projection.""" with tf.variable_scope(name, "conv_relu_conv", [inputs]): inputs = maybe_zero_out_padding( - inputs, first_kernel_size, padding, nonpadding_mask) + inputs, first_kernel_size, nonpadding_mask) h = tpu_conv1d(inputs, filter_size, first_kernel_size, padding=padding, name="conv1") h = tf.nn.relu(h) if dropout != 0.0: h = tf.nn.dropout(h, 1.0 - dropout) - h = maybe_zero_out_padding(h, second_kernel_size, padding, nonpadding_mask) + h = maybe_zero_out_padding(h, second_kernel_size, nonpadding_mask) return tpu_conv1d(h, output_size, second_kernel_size, padding=padding, name="conv2") @@ -1333,7 +1331,7 @@ def sepconv_relu_sepconv(inputs, """Hidden layer with RELU activation followed by linear projection.""" with tf.variable_scope(name, "sepconv_relu_sepconv", [inputs]): inputs = maybe_zero_out_padding( - inputs, first_kernel_size, padding, nonpadding_mask) + inputs, first_kernel_size, nonpadding_mask) if inputs.get_shape().ndims == 3: is_3d = True inputs = tf.expand_dims(inputs, 2) @@ -1344,7 +1342,7 @@ def sepconv_relu_sepconv(inputs, padding=padding, name="conv1") if dropout != 0.0: h = tf.nn.dropout(h, 1.0 - dropout) - h = maybe_zero_out_padding(h, second_kernel_size, padding, nonpadding_mask) + h = maybe_zero_out_padding(h, second_kernel_size, nonpadding_mask) ret = separable_conv( h, output_size, second_kernel_size, padding=padding, name="conv2") if is_3d: diff --git a/tensor2tensor/models/transformer.py b/tensor2tensor/models/transformer.py index d345155f9..099a226b3 100644 --- a/tensor2tensor/models/transformer.py +++ b/tensor2tensor/models/transformer.py @@ -44,13 +44,15 @@ class Transformer(t2t_model.T2TModel): """Attention net. See file docstring.""" - def encode(self, inputs, target_space, hparams): + def encode(self, inputs, target_space, hparams, features=None): """Encode transformer inputs. Args: inputs: Transformer inputs [batch_size, input_length, hidden_dim] target_space: scalar, target space ID. hparams: hyperparmeters for model. + features: optionally pass the entire features dictionary as well. + This is needed now for "packed" datasets. Returns: Tuple of: @@ -62,13 +64,15 @@ def encode(self, inputs, target_space, hparams): inputs = common_layers.flatten4d3d(inputs) encoder_input, self_attention_bias, encoder_decoder_attention_bias = ( - transformer_prepare_encoder(inputs, target_space, hparams)) + transformer_prepare_encoder( + inputs, target_space, hparams, features=features)) encoder_input = tf.nn.dropout(encoder_input, 1.0 - hparams.layer_prepostprocess_dropout) - encoder_output = transformer_encoder(encoder_input, self_attention_bias, - hparams) + encoder_output = transformer_encoder( + encoder_input, self_attention_bias, + hparams, nonpadding=_features_to_nonpadding(features, "inputs")) return encoder_output, encoder_decoder_attention_bias @@ -78,7 +82,8 @@ def decode(self, encoder_decoder_attention_bias, decoder_self_attention_bias, hparams, - cache=None): + cache=None, + nonpadding=None): """Decode Transformer outputs from encoder representation. Args: @@ -93,6 +98,7 @@ def decode(self, hparams: hyperparmeters for model. cache: dict, containing tensors which are the results of previous attentions, used for fast decoding. + nonpadding: optional Tensor with shape [batch_size, decoder_length] Returns: Final decoder representation. [batch_size, decoder_length, hidden_dim] @@ -106,7 +112,8 @@ def decode(self, decoder_self_attention_bias, encoder_decoder_attention_bias, hparams, - cache=cache) + cache=cache, + nonpadding=nonpadding) if hparams.use_tpu and hparams.mode == tf.estimator.ModeKeys.TRAIN: # TPU does not react kindly to extra dimensions. @@ -136,17 +143,18 @@ def model_fn_body(self, features): if inputs is not None: target_space = features["target_space_id"] encoder_output, encoder_decoder_attention_bias = self.encode( - inputs, target_space, hparams) + inputs, target_space, hparams, features=features) targets = features["targets"] targets = common_layers.flatten4d3d(targets) decoder_input, decoder_self_attention_bias = transformer_prepare_decoder( - targets, hparams) + targets, hparams, features=features) return self.decode(decoder_input, encoder_output, encoder_decoder_attention_bias, - decoder_self_attention_bias, hparams) + decoder_self_attention_bias, hparams, + nonpadding=_features_to_nonpadding(features, "targets")) def _greedy_infer(self, features, decode_length): """Fast version of greedy decoding. @@ -248,7 +256,8 @@ def _fast_decode(self, inputs = input_modality.bottom_sharded(inputs, dp) with tf.variable_scope("body"): encoder_output, encoder_decoder_attention_bias = dp( - self.encode, inputs, features["target_space_id"], hparams) + self.encode, inputs, features["target_space_id"], hparams, + features=features) encoder_output = encoder_output[0] encoder_decoder_attention_bias = encoder_decoder_attention_bias[0] @@ -300,9 +309,10 @@ def symbols_to_logits_fn(ids, i, cache): bias = decoder_self_attention_bias[:, :, i:i + 1, :i + 1] with tf.variable_scope("body"): - body_outputs = dp(self.decode, targets, cache["encoder_output"], - cache["encoder_decoder_attention_bias"], bias, - hparams, cache) + body_outputs = dp( + self.decode, targets, cache["encoder_output"], + cache["encoder_decoder_attention_bias"], bias, hparams, cache, + nonpadding=_features_to_nonpadding(features, "targets")) with tf.variable_scope(target_modality.name): logits = target_modality.top_sharded(body_outputs, None, dp)[0] @@ -396,20 +406,30 @@ def model_fn_body(self, features): encoder_input = tf.nn.dropout(encoder_input, 1.0 - hparams.layer_prepostprocess_dropout) - encoder_output = transformer_encoder(encoder_input, - encoder_self_attention_bias, hparams) + encoder_output = transformer_encoder( + encoder_input, encoder_self_attention_bias, hparams, + nonpadding=_features_to_nonpadding(features, "inputs")) encoder_output = tf.expand_dims(encoder_output, 2) return encoder_output -def transformer_prepare_encoder(inputs, target_space, hparams): +def _features_to_nonpadding(features, inputs_or_targets="inputs"): + key = inputs_or_targets + "_segmentation" + if features and key in features: + return tf.minimum(features[key], 1.0) + return None + + +def transformer_prepare_encoder(inputs, target_space, hparams, features=None): """Prepare one shard of the model for the encoder. Args: inputs: a Tensor. target_space: a Tensor. hparams: run hyperparameters + features: optionally pass the entire features dictionary as well. + This is needed now for "packed" datasets. Returns: encoder_input: a Tensor, bottom of encoder stack @@ -419,11 +439,24 @@ def transformer_prepare_encoder(inputs, target_space, hparams): """ ishape_static = inputs.shape.as_list() encoder_input = inputs - encoder_padding = common_attention.embedding_to_padding(encoder_input) - ignore_padding = common_attention.attention_bias_ignore_padding( - encoder_padding) - encoder_self_attention_bias = ignore_padding - encoder_decoder_attention_bias = ignore_padding + if features and "inputs_segmentation" in features: + # Packed dataset. Keep the examples from seeing each other. + inputs_segmentation = features["inputs_segmentation"] + inputs_position = features["inputs_position"] + targets_segmentation = features["targets_segmentation"] + encoder_self_attention_bias = common_attention.attention_bias_same_segment( + inputs_segmentation, inputs_segmentation) + encoder_decoder_attention_bias = ( + common_attention.attention_bias_same_segment( + targets_segmentation, inputs_segmentation)) + else: + # Usual case - not a packed dataset. + encoder_padding = common_attention.embedding_to_padding(encoder_input) + ignore_padding = common_attention.attention_bias_ignore_padding( + encoder_padding) + encoder_self_attention_bias = ignore_padding + encoder_decoder_attention_bias = ignore_padding + inputs_position = None if hparams.proximity_bias: encoder_self_attention_bias += common_attention.attention_bias_proximal( common_layers.shape_list(inputs)[1]) @@ -434,17 +467,23 @@ def transformer_prepare_encoder(inputs, target_space, hparams): emb_target_space = tf.reshape(emb_target_space, [1, 1, -1]) encoder_input += emb_target_space if hparams.pos == "timing": - encoder_input = common_attention.add_timing_signal_1d(encoder_input) + if inputs_position is not None: + encoder_input = common_attention.add_timing_signal_1d_given_position( + encoder_input, inputs_position) + else: + encoder_input = common_attention.add_timing_signal_1d(encoder_input) return (encoder_input, encoder_self_attention_bias, encoder_decoder_attention_bias) -def transformer_prepare_decoder(targets, hparams): +def transformer_prepare_decoder(targets, hparams, features=None): """Prepare one shard of the model for the decoder. Args: targets: a Tensor. hparams: run hyperparameters + features: optionally pass the entire features dictionary as well. + This is needed now for "packed" datasets. Returns: decoder_input: a Tensor, bottom of decoder stack @@ -453,19 +492,32 @@ def transformer_prepare_decoder(targets, hparams): decoder_self_attention_bias = ( common_attention.attention_bias_lower_triangle( common_layers.shape_list(targets)[1])) + if features and "targets_segmentation" in features: + # "Packed" dataset - keep the examples from seeing each other. + targets_segmentation = features["targets_segmentation"] + targets_position = features["targets_position"] + decoder_self_attention_bias += common_attention.attention_bias_same_segment( + targets_segmentation, targets_segmentation) + else: + targets_position = None if hparams.proximity_bias: decoder_self_attention_bias += common_attention.attention_bias_proximal( common_layers.shape_list(targets)[1]) decoder_input = common_layers.shift_right_3d(targets) if hparams.pos == "timing": - decoder_input = common_attention.add_timing_signal_1d(decoder_input) + if targets_position is not None: + decoder_input = common_attention.add_timing_signal_1d_given_position( + decoder_input, targets_position) + else: + decoder_input = common_attention.add_timing_signal_1d(decoder_input) return (decoder_input, decoder_self_attention_bias) def transformer_encoder(encoder_input, encoder_self_attention_bias, hparams, - name="encoder"): + name="encoder", + nonpadding=None): """A stack of transformer layers. Args: @@ -474,15 +526,24 @@ def transformer_encoder(encoder_input, (see common_attention.attention_bias()) hparams: hyperparameters for model name: a string + nonpadding: optional Tensor with shape [batch_size, encoder_length] + indicating what positions are not padding. This must either be + passed in, which we do for "packed" datasets, or inferred from + encoder_self_attention_bias. The knowledge about padding is used + for pad_remover(efficiency) and to mask out padding in convoltutional + layers. Returns: y: a Tensors """ x = encoder_input with tf.variable_scope(name): - # TODO(noam): We should pass in the padding directly. - padding = common_attention.attention_bias_to_padding( - encoder_self_attention_bias) + if nonpadding is not None: + padding = 1.0 - nonpadding + else: + padding = common_attention.attention_bias_to_padding( + encoder_self_attention_bias) + nonpadding = 1.0 - padding pad_remover = None if hparams.use_pad_remover: pad_remover = expert_utils.PadRemover(padding) @@ -505,7 +566,7 @@ def transformer_encoder(encoder_input, with tf.variable_scope("ffn"): y = transformer_ffn_layer( common_layers.layer_preprocess(x, hparams), hparams, pad_remover, - conv_padding="SAME", nonpadding_mask=1.0 - padding) + conv_padding="SAME", nonpadding_mask=nonpadding) x = common_layers.layer_postprocess(x, y, hparams) # if normalization is done in layer_preprocess, then it shuold also be done # on the output, since the output can grow very large, being the sum of @@ -519,7 +580,8 @@ def transformer_decoder(decoder_input, encoder_decoder_attention_bias, hparams, cache=None, - name="decoder"): + name="decoder", + nonpadding=None): """A stack of transformer layers. Args: @@ -533,6 +595,11 @@ def transformer_decoder(decoder_input, cache: dict, containing tensors which are the results of previous attentions, used for fast decoding. name: a string + nonpadding: optional Tensor with shape [batch_size, encoder_length] + indicating what positions are not padding. This is used + to mask out padding in convoltutional layers. We generally only + need this mask for "packed" datasets, because for ordinary datasets, + no padding is ever followed by nonpadding. Returns: y: a Tensors @@ -572,7 +639,7 @@ def transformer_decoder(decoder_input, with tf.variable_scope("ffn"): y = transformer_ffn_layer( common_layers.layer_preprocess(x, hparams), hparams, - conv_padding="LEFT") + conv_padding="LEFT", nonpadding_mask=nonpadding) x = common_layers.layer_postprocess(x, y, hparams) # if normalization is done in layer_preprocess, then it shuold also be done # on the output, since the output can grow very large, being the sum of From 7f3ef1ea3f97d81ed2ee36382788a3e2406409e2 Mon Sep 17 00:00:00 2001 From: Noam Shazeer <noam@google.com> Date: Thu, 30 Nov 2017 12:00:54 -0800 Subject: [PATCH 0636/4095] Make Parallelism object use reuse=True by default. Solves tpu checkpoint compatibility bug. PiperOrigin-RevId: 177487398 --- tensor2tensor/layers/modalities_test.py | 6 +++--- tensor2tensor/utils/devices.py | 1 - tensor2tensor/utils/expert_utils.py | 7 ++++--- tensor2tensor/utils/t2t_model.py | 4 ++-- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/tensor2tensor/layers/modalities_test.py b/tensor2tensor/layers/modalities_test.py index 574ddc77c..f5f7b8998 100644 --- a/tensor2tensor/layers/modalities_test.py +++ b/tensor2tensor/layers/modalities_test.py @@ -49,7 +49,7 @@ def testSymbolModalityInputs(self): vocab_size, size=(batch_size, length, 1, 1)) m = modalities.SymbolModality(model_hparams, vocab_size) data_parallelism = expert_utils.Parallelism( - ["/device:CPU:0"] * num_datashards, reuse=True) + ["/device:CPU:0"] * num_datashards) with self.test_session() as session: xs = tf.split(x, num_datashards) sharded_output = m.bottom_sharded(xs, data_parallelism) @@ -82,7 +82,7 @@ def testSymbolModalityTargets(self): vocab_size, size=(batch_size, length, height, 1)) m = modalities.SymbolModality(model_hparams, vocab_size) data_parallelism = expert_utils.Parallelism( - ["/device:CPU:0"] * num_datashards, reuse=True) + ["/device:CPU:0"] * num_datashards) with self.test_session() as session: sharded_body_output = tf.split(tf.to_float(body_output), num_datashards) sharded_targets = tf.split(targets, num_datashards) @@ -120,7 +120,7 @@ def testSymbolModalityTargetsFactored(self): vocab_size, size=(batch_size, length, height, 1)) m = modalities.SymbolModality(model_hparams, vocab_size) data_parallelism = expert_utils.Parallelism( - ["/device:CPU:0"] * num_datashards, reuse=True) + ["/device:CPU:0"] * num_datashards) with self.test_session() as session: sharded_body_output = tf.split(tf.to_float(body_output), num_datashards) sharded_targets = tf.split(targets, num_datashards) diff --git a/tensor2tensor/utils/devices.py b/tensor2tensor/utils/devices.py index cf1f5fb25..490366cab 100644 --- a/tensor2tensor/utils/devices.py +++ b/tensor2tensor/utils/devices.py @@ -147,6 +147,5 @@ def _replica_device_setter(worker_device): tf.logging.info("caching_devices: %s", caching_devices) return eu.Parallelism( datashard_devices, - reuse=True, caching_devices=caching_devices, daisy_chain_variables=hparams.daisy_chain_variables) diff --git a/tensor2tensor/utils/expert_utils.py b/tensor2tensor/utils/expert_utils.py index 7d4912bc6..8fe5479da 100644 --- a/tensor2tensor/utils/expert_utils.py +++ b/tensor2tensor/utils/expert_utils.py @@ -129,7 +129,7 @@ class Parallelism(object): def __init__(self, device_names_or_functions, - reuse=None, + reuse=True, caching_devices=None, daisy_chain_variables=False): """Create a Parallelism. @@ -945,7 +945,8 @@ def distributed_moe(data_parallelism, # We use the default of reuse=False. Otherwise, the experts would all # use the same variables. ep = Parallelism( - [expert_devices[i % len(expert_devices)] for i in xrange(num_experts)]) + [expert_devices[i % len(expert_devices)] for i in xrange(num_experts)], + reuse=None) # Experts expect 2d input tensors, so flatten the batch dimension and all # spatial dimensions together. xs_flat = dp(tf.reshape, xs, [[-1, input_size]] * dp.n) @@ -1034,7 +1035,7 @@ def local_moe(x, v = flatten_all_but_last(v) expert_kwargs[k] = dispatcher.dispatch(v) - ep = Parallelism([DEFAULT_DEV_STRING] * num_experts) + ep = Parallelism([DEFAULT_DEV_STRING] * num_experts, reuse=None) expert_outputs = ep(expert_fn, **expert_kwargs) y_flat = dispatcher.combine(expert_outputs) diff --git a/tensor2tensor/utils/t2t_model.py b/tensor2tensor/utils/t2t_model.py index ff7584b07..0f7b865b6 100644 --- a/tensor2tensor/utils/t2t_model.py +++ b/tensor2tensor/utils/t2t_model.py @@ -75,7 +75,7 @@ def __init__(self, super(T2TModel, self).__init__( trainable=mode == tf.estimator.ModeKeys.TRAIN, name=name) if data_parallelism is None: - data_parallelism = eu.Parallelism([""], reuse=True) + data_parallelism = eu.Parallelism([""]) if ps_devices is None: ps_devices = [""] if problem_hparams is None: @@ -971,7 +971,7 @@ def _create_data_parallelism(num_gpus=1, data_shard_devices += ["cpu:0"] assert len(data_shard_devices) == num_shards tf.logging.info("Data parallel devices: %s", data_shard_devices) - return eu.Parallelism(data_shard_devices, reuse=True) + return eu.Parallelism(data_shard_devices) # These metrics are implemented with py_funcs and therefore do no work with TPU From 01030eb6f9f8052114a0eb3fd91e0862da05ada9 Mon Sep 17 00:00:00 2001 From: Noam Shazeer <noam@google.com> Date: Thu, 30 Nov 2017 12:01:01 -0800 Subject: [PATCH 0637/4095] Remove TranslateEndeWmtBpe32kPacked. We have TranslateEndeWmt32kPacked. PiperOrigin-RevId: 177487419 --- tensor2tensor/data_generators/translate_ende.py | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/tensor2tensor/data_generators/translate_ende.py b/tensor2tensor/data_generators/translate_ende.py index bbd502fdd..2124be32a 100644 --- a/tensor2tensor/data_generators/translate_ende.py +++ b/tensor2tensor/data_generators/translate_ende.py @@ -116,18 +116,6 @@ def target_space_id(self): return problem.SpaceID.DE_BPE_TOK -@registry.register_problem -class TranslateEndeWmtBpe32kPacked(TranslateEndeWmtBpe32k): - """Problem spec for WMT En-De translation, BPE version. - - Training/eval examples are concatenated to a maximum length of 256. - """ - - @property - def packed_length(self): - return 256 - - @registry.register_problem class TranslateEndeWmt8k(translate.TranslateProblem): """Problem spec for WMT En-De translation.""" From 24c1fd755ebb4a3f1b81310b3560b18f1cd911bb Mon Sep 17 00:00:00 2001 From: Lukasz Kaiser <lukaszkaiser@google.com> Date: Thu, 30 Nov 2017 14:05:55 -0800 Subject: [PATCH 0638/4095] Clean up transformer_vae and add refining. PiperOrigin-RevId: 177505082 --- tensor2tensor/models/transformer_vae.py | 197 +++++++++++++----------- 1 file changed, 111 insertions(+), 86 deletions(-) diff --git a/tensor2tensor/models/transformer_vae.py b/tensor2tensor/models/transformer_vae.py index 4a7290c23..140959c34 100644 --- a/tensor2tensor/models/transformer_vae.py +++ b/tensor2tensor/models/transformer_vae.py @@ -32,6 +32,9 @@ import tensorflow as tf +_DO_SUMMARIES = True + + def residual_conv(x, repeat, k, hparams, name, reuse=None): """A stack of convolution blocks with residual connections.""" with tf.variable_scope(name, reuse=reuse): @@ -110,7 +113,8 @@ def dae(x, hparams, name): s = tf.nn.softmax((logsm + gumbel_samples) / temperature) m = tf.nn.softmax(m) kl = - tf.reduce_max(logsm, axis=-1) - tf.summary.histogram("max-log", tf.reshape(kl, [-1])) + if _DO_SUMMARIES: + tf.summary.histogram("max-log", tf.reshape(kl, [-1])) # Calculate the argmax and construct hot vectors. maxvec = tf.reshape(tf.argmax(m, axis=-1), [-1]) maxvhot = tf.stop_gradient(tf.one_hot(maxvec, hparams.v_size)) @@ -134,7 +138,9 @@ def vae(x, z_size, name): z = mu + tf.exp(log_sigma / 2) * epsilon kl = 0.5 * tf.reduce_mean( tf.exp(log_sigma) + tf.square(mu) - 1. - log_sigma, axis=-1) - return z, tf.reduce_mean(kl), mu, log_sigma + free_bits = z_size // 2 + kl_loss = tf.maximum(tf.reduce_mean(kl) - free_bits, 0.0) + return z, kl_loss, mu, log_sigma def nearest(x, means, hparams): @@ -187,35 +193,39 @@ def int_to_bit(x_int, nbits): def bottleneck(x, hparams, filter_size, name): """Bottleneck.""" - def embed1(x): - if hparams.bottleneck_kind == "semhash": - c = int_to_bit(x, c_size) - h1a = tf.layers.dense(c, filter_size, name="vch1a") - h1b = tf.layers.dense(1.0 - c, filter_size, name="vch1b") - return h1a + h1b - elif hparams.bottleneck_kind == "gumbel-softmax": - hot = tf.one_hot(x, hparams.v_size) - with tf.variable_scope(name, reuse=True): - return tf.layers.dense(hot, hparams.hidden_size, name="dae_dense") - def embed(x): + """Embedding function; must be compatible with the code later.""" with tf.variable_scope(name, reuse=True): - h1 = embed1(x) + if hparams.bottleneck_kind == "semhash": + c = int_to_bit(x, z_size) + h1a = tf.layers.dense(c, filter_size, name="vch1a") + h1b = tf.layers.dense(1.0 - c, filter_size, name="vch1b") + h1 = h1a + h1b + elif hparams.bottleneck_kind == "gumbel-softmax": + hot = tf.one_hot(x, hparams.v_size) + h1 = tf.layers.dense(hot, hparams.hidden_size, name="dae_dense") + elif hparams.bottleneck_kind == "vq-vae": + means = tf.get_variable(name="means", + shape=[hparams.v_size, hparams.hidden_size]) + h1 = tf.gather(means, x) + h2 = tf.layers.dense(tf.nn.relu(h1), filter_size, name="vch2") - res = tf.layers.dense(tf.nn.relu(h2), hparams.hidden_size, name="vcfin") - return res + return tf.layers.dense(tf.nn.relu(h2), hparams.hidden_size, name="vcfin") with tf.variable_scope(name): - c_size = hparams.c_size + z_size = hparams.z_size l = tf.constant(0.0) if hparams.bottleneck_kind == "dense": - c = tf.layers.dense(x, c_size, name="vcc") + c = tf.layers.dense(x, z_size, name="vcc") + h1 = tf.layers.dense(c, filter_size, name="vch1") + if hparams.bottleneck_kind == "vae": + c, l, _, _ = vae(x, z_size, "vae") h1 = tf.layers.dense(c, filter_size, name="vch1") if hparams.bottleneck_kind == "semhash": - c = tf.layers.dense(x, c_size, name="vcc") + c = tf.layers.dense(x, z_size, name="vcc") y_clean = common_layers.saturating_sigmoid(c) - tf.summary.histogram("y_clean", tf.reshape(y_clean, [-1])) - # l = tf.reduce_mean(y_clean * (1.0 - y_clean)) + if _DO_SUMMARIES: + tf.summary.histogram("y_clean", tf.reshape(y_clean, [-1])) if hparams.noise_dev > 0 and hparams.mode == tf.estimator.ModeKeys.TRAIN: dev = hparams.noise_dev noise = tf.truncated_normal(tf.shape(c), mean=0.0, stddev=dev) @@ -233,7 +243,7 @@ def embed(x): h1b = tf.layers.dense(1.0 - c, filter_size, name="vch1b") h1 = h1a + h1b dx = tf.to_int32(tf.stop_gradient(d)) - c = bit_to_int(dx, c_size) + c = bit_to_int(dx, z_size) if hparams.bottleneck_kind == "gumbel-softmax": _, hot, l = dae(x, hparams, name) c = tf.argmax(hot, axis=-1) @@ -331,43 +341,54 @@ def next_bit(t_bit, i): def ae_transformer_internal(inputs, targets, target_space, hparams, beam_size, cache=None, predict_mask=1.0): """AE Transformer, main step used for training.""" - hparams.z_size = hparams.hidden_size - with tf.variable_scope("ae_transformer"): - # Prepare inputs, targets, k. - orig_targets = targets - batch_size = tf.shape(orig_targets)[0] - targets = tf.reshape(targets, [batch_size, -1, 1, hparams.hidden_size]) - k = hparams.num_compress_steps - - # Encoder. - if inputs is not None: - inputs = common_layers.flatten4d3d(inputs) - inputs, ed = encode(inputs, target_space, hparams, "input_enc") - else: - ed = None - - # Autoencoding. - losses = {"vc": tf.constant(0.0), "sm": tf.constant(0.0)} - if hparams.do_ae: - targets, _ = common_layers.pad_to_same_length( - targets, targets, final_length_divisible_by=2**k) - targets_c = compress(targets, False, hparams, "compress") - if hparams.mode != tf.estimator.ModeKeys.PREDICT: - # Compress and bottleneck. - t_c, t_bit, vc_loss, _ = bottleneck(targets_c, hparams, 2*2048, "vc") + # Summaries break with the do_refine cond, turn them off in that case. + global _DO_SUMMARIES + if hparams.do_refine: + _DO_SUMMARIES = False + + # Prepare. + orig_targets = targets + batch_size = tf.shape(orig_targets)[0] + targets = tf.reshape(targets, [batch_size, -1, 1, hparams.hidden_size]) + + # Encoder. + if inputs is not None: + inputs = common_layers.flatten4d3d(inputs) + inputs, ed = encode(inputs, target_space, hparams, "input_enc") + else: + ed = None + + # Autoencoding. + losses = {"extra": tf.constant(0.0), "latent_pred": tf.constant(0.0)} + if hparams.do_ae: + max_targets_len_from_inputs = tf.concat([inputs, inputs], axis=1) + targets, _ = common_layers.pad_to_same_length( + targets, max_targets_len_from_inputs, + final_length_divisible_by=2**hparams.num_compress_steps) + targets_c = compress(targets, False, hparams, "compress") + if hparams.mode != tf.estimator.ModeKeys.PREDICT: + # Compress and bottleneck. + t_c, t_bit, vc_loss, _ = bottleneck(targets_c, hparams, 2*2048, "vc") + if _DO_SUMMARIES: tf.summary.histogram("bit0", tf.reshape(t_bit[:, 0, :], [-1])) - pc = common_layers.inverse_exp_decay(hparams.startup_steps) * 0.95 - pc = pc if hparams.mode == tf.estimator.ModeKeys.TRAIN else 1.0 - cond = tf.less(tf.random_uniform([]), pc) - t_c = tf.cond(cond, lambda: t_c, lambda: targets_c) - losses["vc"] = vc_loss * tf.to_float(cond) - # Extra loss predicting latent code from input. + pc = common_layers.inverse_exp_decay(hparams.startup_steps) * 0.95 + pc = pc if hparams.mode == tf.estimator.ModeKeys.TRAIN else 1.0 + cond = tf.less(tf.random_uniform([]), pc) + t_c = tf.cond(cond, lambda: t_c, lambda: targets_c) + losses["extra"] = vc_loss * tf.to_float(cond) + # Extra loss predicting latent code from input. Discrete only. + if hparams.bottleneck_kind not in ["dense", "vae"]: t_pred = decode_transformer( inputs, ed, tf.stop_gradient(t_c), hparams, "extra") t_pred = tf.layers.dense(t_pred, 2**16, name="extra_logits") - losses["sm"] = tf.nn.sparse_softmax_cross_entropy_with_logits( + losses["latent_pred"] = tf.nn.sparse_softmax_cross_entropy_with_logits( labels=t_bit, logits=t_pred) - losses["sm"] = tf.reduce_mean(losses["sm"]) * 0.5 * tf.to_float(cond) + losses["latent_pred"] = tf.reduce_mean( + losses["latent_pred"]) * 0.5 * tf.to_float(cond) + else: + if hparams.bottleneck_kind in ["dense", "vae"]: + targets_rand = tf.random_uniform(tf.shape(targets_c)) + t_c, _, _, _ = bottleneck(targets_rand, hparams, 2*2048, "vc") else: latent_len = tf.shape(targets_c)[1] _, _, _, embed = bottleneck(targets_c, hparams, 2*2048, "vc") @@ -378,33 +399,39 @@ def ae_transformer_internal(inputs, targets, target_space, hparams, cache = tf.reshape(cache, [1, latent_len, 1]) cache = tf.tile(cache, [beam_size, 1, 1]) t_c = embed(cache) - # Postprocess. - d = t_c - pos = tf.get_variable("pos", [1, 1000, 1, hparams.hidden_size]) - pos = pos[:, :tf.shape(t_c)[1] + 1, :, :] - t_c = tf.pad(t_c, [[0, 0], [1, 0], [0, 0], [0, 0]]) + pos - - # Masking. - if hparams.do_mask: - masking = common_layers.inverse_lin_decay(100000) - masking *= common_layers.inverse_exp_decay(25000) # Not much at start. + # Postprocess. + d = t_c + pos = tf.get_variable("pos", [1, 1000, 1, hparams.hidden_size]) + pos = pos[:, :tf.shape(t_c)[1] + 1, :, :] + t_c = tf.pad(t_c, [[0, 0], [1, 0], [0, 0], [0, 0]]) + pos + + # Masking. + if hparams.do_mask: + masking = common_layers.inverse_lin_decay(100000) + masking *= common_layers.inverse_exp_decay(25000) # Not much at start. + if not hparams.do_refine: masking -= tf.random_uniform([]) * 0.3 - masking = tf.minimum(tf.maximum(masking, 0.0), 1.0) - if hparams.mode == tf.estimator.ModeKeys.PREDICT: - masking = predict_mask - mask = tf.less(masking, tf.random_uniform(tf.shape(targets)[:-1])) - mask = tf.expand_dims(tf.to_float(mask), 3) - for i in xrange(hparams.num_compress_steps): - j = hparams.num_compress_steps - i - 1 - d = residual_conv(d, 1, (3, 1), hparams, "decompress_rc_%d" % j) - d = decompress_step(d, hparams, i > 0, False, "decompress_%d" % j) - targets = mask * targets + (1.0 - mask) * d - targets = tf.concat([tf.reverse(t_c, [1]), targets], axis=1) - - res = decode_transformer(inputs, ed, targets, hparams, "decoder") - if hparams.do_ae: - res = res[:, tf.shape(t_c)[1]:, :, :] - return res, losses, cache + masking = tf.minimum(tf.maximum(masking, 0.0), 1.0) + if hparams.mode == tf.estimator.ModeKeys.PREDICT: + masking = predict_mask + mask = tf.less(masking, tf.random_uniform(tf.shape(targets)[:-1])) + mask = tf.expand_dims(tf.to_float(mask), 3) + for i in xrange(hparams.num_compress_steps): + j = hparams.num_compress_steps - i - 1 + d = residual_conv(d, 1, (3, 1), hparams, "decompress_rc_%d" % j) + d = decompress_step(d, hparams, i > 0, False, "decompress_%d" % j) + targets = mask * targets + (1.0 - mask) * d + targets = tf.concat([tf.reverse(t_c, [1]), targets], axis=1) + + res = decode_transformer(inputs, ed, targets, hparams, "decoder") + if hparams.do_ae: + res = res[:, tf.shape(t_c)[1]:, :, :] + if hparams.do_mask and hparams.do_refine: + def refine_res(): + return residual_conv(res, 1, (5, 1), hparams, "refine") + all_masked = tf.less(tf.reduce_sum(mask), 0.1) + res = tf.cond(all_masked, refine_res, lambda: res) + return res, losses, cache @registry.register_model @@ -466,7 +493,7 @@ def infer(self, features=None, decode_length=50, beam_size=1, top_beams=1, else: batch_size = tf.shape(features["inputs"])[0] length = tf.shape(features["inputs"])[1] - target_length = tf.to_int32(1.3 * tf.to_float(length)) + target_length = tf.to_int32(2.0 * tf.to_float(length)) initial_output = tf.zeros((batch_size, target_length, 1, 1), dtype=tf.int64) @@ -489,15 +516,15 @@ def transformer_ae_small(): hparams.hidden_size = 384 hparams.filter_size = 2048 hparams.label_smoothing = 0.0 - hparams.add_hparam("c_size", 16) + hparams.add_hparam("z_size", 16) hparams.add_hparam("noise_dev", 1.0) hparams.add_hparam("d_mix", 0.5) - # Bottleneck kinds supported: dense, semhash, gumbel-softmax. + # Bottleneck kinds supported: dense, vae, semhash, gumbel-softmax, vq-vae. hparams.add_hparam("bottleneck_kind", "semhash") hparams.add_hparam("do_ae", True) hparams.add_hparam("do_mask", True) + hparams.add_hparam("do_refine", True) hparams.add_hparam("drop_inputs", False) - hparams.add_hparam("z_size", 128) hparams.add_hparam("v_size", 1024*64) hparams.add_hparam("max_context_length", 64) hparams.add_hparam("num_compress_steps", 3) @@ -522,8 +549,6 @@ def transformer_ae_cifar(): hparams = transformer_ae_small() hparams.hidden_size = 256 hparams.filter_size = 512 - hparams.z_size = 256 # 64 - hparams.z_size2 = 0 # 16 hparams.batch_size = 1024 * 4 hparams.num_compress_steps = 2 hparams.v_size = 1024 * 16 From aa2c0b733f730d31852a34e62c4c72d99d1c9a15 Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Thu, 30 Nov 2017 18:30:07 -0800 Subject: [PATCH 0639/4095] T2T depends on TF 1.4+, daisy_chain_getter bug fix, some Eager-mode improvements/fixes PiperOrigin-RevId: 177538074 --- docs/example_life.md | 2 +- setup.py | 10 +- .../data_generators/generator_utils.py | 13 + tensor2tensor/data_generators/image.py | 40 +- tensor2tensor/data_generators/problem.py | 13 +- tensor2tensor/layers/rev_block.py | 2 +- tensor2tensor/notebooks/hello_t2t.ipynb | 891 ++++++++++++++++++ tensor2tensor/utils/data_reader.py | 22 +- tensor2tensor/utils/expert_utils.py | 24 +- tensor2tensor/utils/t2t_model.py | 51 +- 10 files changed, 998 insertions(+), 70 deletions(-) create mode 100644 tensor2tensor/notebooks/hello_t2t.ipynb diff --git a/docs/example_life.md b/docs/example_life.md index f3b18a817..ce6948b05 100644 --- a/docs/example_life.md +++ b/docs/example_life.md @@ -75,7 +75,7 @@ hooks in the `Problem` class and the model's `HParams` object (typically registered in the model's file and specified by the `--hparams_set` flag). The entire input pipeline is implemented with the new `tf.data.Dataset` API -(previously `tf.contrib.data.Dataset`). +(previously `tf.data.Dataset`). The key function in the codebase for the input pipeline is [`data_reader.input_pipeline`](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/utils/data_reader.py). diff --git a/setup.py b/setup.py index 5027918af..34a94965c 100644 --- a/setup.py +++ b/setup.py @@ -15,8 +15,7 @@ package_data={ 'tensor2tensor.data_generators': ['test_data/*'], 'tensor2tensor.visualization': [ - 'attention.js', - 'TransformerVisualization.ipynb' + 'attention.js', 'TransformerVisualization.ipynb' ], }, scripts=[ @@ -34,8 +33,8 @@ 'six', ], extras_require={ - 'tensorflow': ['tensorflow>=1.3.0'], - 'tensorflow_gpu': ['tensorflow-gpu>=1.3.0'], + 'tensorflow': ['tensorflow>=1.4.0'], + 'tensorflow_gpu': ['tensorflow-gpu>=1.4.0'], 'tests': ['pytest', 'h5py', 'mock'], }, classifiers=[ @@ -45,4 +44,5 @@ 'License :: OSI Approved :: Apache Software License', 'Topic :: Scientific/Engineering :: Artificial Intelligence', ], - keywords='tensorflow machine learning',) + keywords='tensorflow machine learning', +) diff --git a/tensor2tensor/data_generators/generator_utils.py b/tensor2tensor/data_generators/generator_utils.py index 2d21da2ba..236d43772 100644 --- a/tensor2tensor/data_generators/generator_utils.py +++ b/tensor2tensor/data_generators/generator_utils.py @@ -125,6 +125,13 @@ def shard_filepath(fname, num_shards): ] +def outputs_exist(filenames): + for out_fname in filenames: + out_fname = out_fname.replace(UNSHUFFLED_SUFFIX, "") + if tf.gfile.Exists(out_fname): + return out_fname + + def generate_files(generator, output_filenames, max_cases=None): """Generate cases from a generator and save as TFRecord files. @@ -137,6 +144,9 @@ def generate_files(generator, output_filenames, max_cases=None): max_cases: maximum number of cases to get from the generator; if None (default), we use the generator until StopIteration is raised. """ + if outputs_exist(output_filenames): + tf.logging.info("Skipping generator because outputs files exist") + return num_shards = len(output_filenames) writers = [tf.python_io.TFRecordWriter(fname) for fname in output_filenames] counter, shard = 0, 0 @@ -440,6 +450,9 @@ def generate_dataset_and_shuffle(train_gen, def shuffle_dataset(filenames): + if outputs_exist(filenames): + tf.logging.info("Skipping shuffle because output files exist") + return tf.logging.info("Shuffling data...") for fname in filenames: records = read_records(fname) diff --git a/tensor2tensor/data_generators/image.py b/tensor2tensor/data_generators/image.py index e5d378b52..70bca2d60 100644 --- a/tensor2tensor/data_generators/image.py +++ b/tensor2tensor/data_generators/image.py @@ -42,6 +42,8 @@ import tensorflow as tf +from tensorflow.python.eager import context + def resize_by_area(img, size): """image resize function used by quite a few image problems.""" @@ -463,6 +465,21 @@ def hparams(self, defaults, unused_model_hparams): p.target_space_id = 1 +def _encoded_images(images): + if context.in_eager_mode(): + for image in images: + yield tf.image.encode_png(image).numpy() + else: + (width, height, channels) = images[0].shape + with tf.Graph().as_default(): + image_t = tf.placeholder(dtype=tf.uint8, shape=(width, height, channels)) + encoded_image_t = tf.image.encode_png(image_t) + with tf.Session() as sess: + for image in images: + enc_string = sess.run(encoded_image_t, feed_dict={image_t: image}) + yield enc_string + + def image_generator(images, labels): """Generator for images that takes image and labels lists and creates pngs. @@ -484,20 +501,15 @@ def image_generator(images, labels): """ if not images: raise ValueError("Must provide some images for the generator.") - (width, height, channels) = images[0].shape - with tf.Graph().as_default(): - image_t = tf.placeholder(dtype=tf.uint8, shape=(width, height, channels)) - encoded_image_t = tf.image.encode_png(image_t) - with tf.Session() as sess: - for (image, label) in zip(images, labels): - enc_string = sess.run(encoded_image_t, feed_dict={image_t: image}) - yield { - "image/encoded": [enc_string], - "image/format": ["png"], - "image/class/label": [int(label)], - "image/height": [height], - "image/width": [width] - } + width, height, _ = images[0].shape + for (enc_image, label) in zip(_encoded_images(images), labels): + yield { + "image/encoded": [enc_image], + "image/format": ["png"], + "image/class/label": [int(label)], + "image/height": [height], + "image/width": [width] + } # URLs and filenames for MNIST data. diff --git a/tensor2tensor/data_generators/problem.py b/tensor2tensor/data_generators/problem.py index d80cc01da..6a1a7208e 100644 --- a/tensor2tensor/data_generators/problem.py +++ b/tensor2tensor/data_generators/problem.py @@ -382,7 +382,7 @@ def dataset(self, data_filepattern) if shuffle_files or shuffle_files is None and is_training: random.shuffle(data_files) - dataset = tf.contrib.data.TFRecordDataset(data_files) + dataset = tf.data.TFRecordDataset(data_files) def decode_record(record): """Serialized Example to dict of <feature name, Tensor>.""" @@ -399,13 +399,12 @@ def _preprocess(example): self.maybe_copy_features(example) return example - dataset = dataset.map(decode_record, num_threads=num_threads) + dataset = dataset.map(decode_record, num_parallel_calls=num_threads) if preprocess: - dataset = dataset.map( - _preprocess, - num_threads=num_threads, - output_buffer_size=output_buffer_size) + dataset = dataset.map(_preprocess, num_parallel_calls=num_threads) + if output_buffer_size: + dataset = dataset.prefetch(output_buffer_size) return dataset @@ -517,7 +516,7 @@ def define_shapes(example): dataset = self.dataset( mode=mode, data_dir=data_dir, num_threads=num_threads, hparams=hparams) dataset = dataset.map( - data_reader.cast_int64_to_int32, num_threads=num_threads) + data_reader.cast_int64_to_int32, num_parallel_calls=num_threads) if is_training: dataset = dataset.repeat(None) diff --git a/tensor2tensor/layers/rev_block.py b/tensor2tensor/layers/rev_block.py index eaeb55921..88bf622ab 100644 --- a/tensor2tensor/layers/rev_block.py +++ b/tensor2tensor/layers/rev_block.py @@ -399,7 +399,7 @@ def grad_fn(inputs, variables, outputs, output_grads): @common_layers.fn_with_custom_grad(grad_fn) def fn_with_recompute(*args): cached_vs.append(tf.get_variable_scope()) - # TODO(rsepassi): Rm conditional in TF 1.4 + # TODO(rsepassi): Rm conditional in TF 1.5 if hasattr(tf.contrib.framework, "current_arg_scope"): cached_arg_scope.append(tf.contrib.framework.current_arg_scope()) else: diff --git a/tensor2tensor/notebooks/hello_t2t.ipynb b/tensor2tensor/notebooks/hello_t2t.ipynb new file mode 100644 index 000000000..86070da40 --- /dev/null +++ b/tensor2tensor/notebooks/hello_t2t.ipynb @@ -0,0 +1,891 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "name": "T2T with TF Eager", + "version": "0.3.2", + "views": {}, + "default_view": {}, + "provenance": [], + "collapsed_sections": [] + } + }, + "cells": [ + { + "metadata": { + "id": "s19ucTii_wYb", + "colab_type": "code", + "colab": { + "autoexec": { + "startup": false, + "wait_interval": 0 + } + } + }, + "source": [ + "# Copyright 2017 Google LLC.\n", + "\n", + "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "\n", + "# https://www.apache.org/licenses/LICENSE-2.0\n", + "\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License." + ], + "cell_type": "code", + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "OPGni6fuvoTj", + "colab_type": "code", + "colab": { + "autoexec": { + "startup": false, + "wait_interval": 0 + } + } + }, + "source": [ + "# Install deps\n", + "!pip install -q \"tensor2tensor-dev==1.3.1.dev5\" tf-nightly" + ], + "cell_type": "code", + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "oILRLCWN_16u", + "colab_type": "code", + "colab": { + "autoexec": { + "startup": false, + "wait_interval": 0 + } + } + }, + "source": [ + "import tensorflow as tf\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import os\n", + "\n", + "from tensor2tensor import problems\n", + "from tensor2tensor.utils import t2t_model\n", + "from tensor2tensor.utils import trainer_utils\n", + "from tensor2tensor.utils import registry\n", + "from tensor2tensor.utils import metrics\n", + "\n", + "# Enable TF Eager execution\n", + "from tensorflow.contrib.eager.python import tfe\n", + "tfe.enable_eager_execution()\n", + "\n", + "# Other setup\n", + "Modes = tf.estimator.ModeKeys\n", + "\n", + "# Setup some directories\n", + "data_dir = os.path.expanduser(\"~/t2t/data\")\n", + "tmp_dir = os.path.expanduser(\"~/t2t/tmp\")\n", + "train_dir = os.path.expanduser(\"~/t2t/train\")\n", + "checkpoint_dir = os.path.expanduser(\"~/t2t/checkpoints\")\n", + "tf.gfile.MakeDirs(data_dir)\n", + "tf.gfile.MakeDirs(tmp_dir)\n", + "tf.gfile.MakeDirs(train_dir)\n", + "tf.gfile.MakeDirs(checkpoint_dir)" + ], + "cell_type": "code", + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "gXL7_bVH49Kl", + "colab_type": "text" + }, + "source": [ + "# Translate from English to French with a pre-trained model" + ], + "cell_type": "markdown" + }, + { + "metadata": { + "id": "Q2CYCYjZTlZs", + "colab_type": "code", + "colab": { + "autoexec": { + "startup": false, + "wait_interval": 0 + }, + "output_extras": [ + { + "item_id": 5 + } + ], + "base_uri": "https://localhost:8080/", + "height": 136 + }, + "outputId": "9d08dd17-a3a1-49ba-930c-a07f11ea24e3", + "executionInfo": { + "status": "ok", + "timestamp": 1512092524785, + "user_tz": 480, + "elapsed": 17914, + "user": { + "displayName": "Ryan Sepassi", + "photoUrl": "//lh4.googleusercontent.com/-dcHmhQy1Y2A/AAAAAAAAAAI/AAAAAAAABEw/if_k14yF4KI/s50-c-k-no/photo.jpg", + "userId": "107877449274830904926" + } + } + }, + "source": [ + "# Translation\n", + "enfr_problem = registry.problem(\"translate_enfr_wmt_small32k\")\n", + "enfr_problem.generate_data(data_dir, tmp_dir) " + ], + "cell_type": "code", + "execution_count": 3, + "outputs": [ + { + "output_type": "stream", + "text": [ + "INFO:tensorflow:Found vocab file: /content/t2t/data/vocab.enfr.32768\n", + "INFO:tensorflow:Not downloading, file already found: /content/t2t/tmp/baseline-1M-enfr.tgz\n", + "INFO:tensorflow:Found vocab file: /content/t2t/data/vocab.enfr.32768\n", + "INFO:tensorflow:Not downloading, file already found: /content/t2t/tmp/baseline-1M-enfr.tgz\n", + "INFO:tensorflow:Skipping generator because outputs files exist\n", + "INFO:tensorflow:Skipping generator because outputs files exist\n", + "INFO:tensorflow:Skipping shuffle because output files exist\n" + ], + "name": "stdout" + } + ] + }, + { + "metadata": { + "id": "g2aQW7Z6TOEu", + "colab_type": "code", + "colab": { + "autoexec": { + "startup": false, + "wait_interval": 0 + }, + "output_extras": [ + { + "item_id": 2 + } + ], + "base_uri": "https://localhost:8080/", + "height": 170 + }, + "outputId": "8196348d-747e-4b33-9b7c-742d8041d0b7", + "executionInfo": { + "status": "ok", + "timestamp": 1512092525545, + "user_tz": 480, + "elapsed": 732, + "user": { + "displayName": "Ryan Sepassi", + "photoUrl": "//lh4.googleusercontent.com/-dcHmhQy1Y2A/AAAAAAAAAAI/AAAAAAAABEw/if_k14yF4KI/s50-c-k-no/photo.jpg", + "userId": "107877449274830904926" + } + } + }, + "source": [ + "example = tfe.Iterator(enfr_problem.dataset(Modes.TRAIN, data_dir)).next()\n", + "inputs = [int(x) for x in example[\"inputs\"].numpy()] # Cast to ints.\n", + "targets = [int(x) for x in example[\"targets\"].numpy()] # Cast to ints.\n", + "\n", + "encoders = enfr_problem.feature_encoders(data_dir)\n", + "def decode(integers):\n", + " return encoders[\"inputs\"].decode(np.squeeze(integers))\n", + "\n", + "# Example inputs as int-tensor.\n", + "print(\"Inputs, encoded:\")\n", + "print(inputs)\n", + "print(\"Inputs, decoded:\")\n", + "# Example inputs as a sentence.\n", + "print(decode(inputs))\n", + "# Example targets as int-tensor.\n", + "print(\"Targets, encoded:\")\n", + "print(targets)\n", + "# Example targets as a sentence.\n", + "print(\"Targets, decoded:\")\n", + "print(decode(targets))" + ], + "cell_type": "code", + "execution_count": 4, + "outputs": [ + { + "output_type": "stream", + "text": [ + "INFO:tensorflow:Reading data files from /content/t2t/data/translate_enfr_wmt_small32k-train*\n", + "Inputs, encoded:\n", + "[47, 254, 17, 280, 7, 219, 4, 696, 158, 8, 4, 2085, 135, 4, 246, 3930, 3, 780, 4, 696, 158, 8, 4, 2085, 11, 5281, 5010, 31, 2679, 8, 4, 2085, 2, 1]\n", + "Inputs, decoded:\n", + "The first is how to take the resources out of the ground -- the economic processes, taking the resources out of the ground and putting assets on top of the ground.<EOS>\n", + "Targets, encoded:\n", + "[113, 699, 131, 5, 24, 6, 477, 571, 27599, 27580, 27584, 27586, 24058, 18, 1018, 37, 4663, 135, 15, 739, 360, 3, 131, 5, 24, 22, 5, 27599, 27580, 27584, 27586, 24058, 18, 1018, 37, 4663, 14, 27, 8388, 20, 2477, 16, 12, 5, 1348, 1374, 2, 1]\n", + "Targets, decoded:\n", + "Le premier c'est de savoir comment extraire les ressources du sol -- le processus économique, c'est d'extraire les ressources du sol et en retirer des avantages à l'air libre.<EOS>\n" + ], + "name": "stdout" + } + ] + }, + { + "metadata": { + "id": "9l6hDQbrRUYV", + "colab_type": "code", + "colab": { + "autoexec": { + "startup": false, + "wait_interval": 0 + } + } + }, + "source": [ + "# Create hparams and the T2TModel object.\n", + "model_name = \"transformer\"\n", + "hparams_set = \"transformer_base\"\n", + "\n", + "hparams = trainer_utils.create_hparams(hparams_set, data_dir)\n", + "hparams.use_eager_mode = True\n", + "trainer_utils.add_problem_hparams(hparams, \"translate_enfr_wmt32k\")\n", + "\n", + "# NOTE: Only create the model once when restoring from a checkpoint; it's a\n", + "# Layer and so subsequent instantiations will have different variable scopes\n", + "# that will not match the checkpoint.\n", + "model = registry.model(model_name)(hparams, Modes.PREDICT)" + ], + "cell_type": "code", + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "FEwNUVlMYOJi", + "colab_type": "code", + "colab": { + "autoexec": { + "startup": false, + "wait_interval": 0 + } + } + }, + "source": [ + "# Copy the pretrained checkpoint locally\n", + "gs_ckpt_dir = \"gs://tensor2tensor-checkpoints/\"\n", + "ckpt_name = \"transformer_enfr_test\"\n", + "gs_ckpt = os.path.join(gs_ckpt_dir, ckpt_name)\n", + "local_ckpt = os.path.join(checkpoint_dir, ckpt_name)\n", + "!gsutil -q cp -R {gs_ckpt} {local_ckpt}\n", + "ckpt_path = tf.train.latest_checkpoint(local_ckpt)\n", + "ckpt_path" + ], + "cell_type": "code", + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "3O-8E9d6TtuJ", + "colab_type": "code", + "colab": { + "autoexec": { + "startup": false, + "wait_interval": 0 + }, + "output_extras": [ + { + "item_id": 3 + } + ], + "base_uri": "https://localhost:8080/", + "height": 139 + }, + "outputId": "d7883ce2-d90f-440c-b6b3-16ecffab481c", + "executionInfo": { + "status": "ok", + "timestamp": 1512092689851, + "user_tz": 480, + "elapsed": 141849, + "user": { + "displayName": "Ryan Sepassi", + "photoUrl": "//lh4.googleusercontent.com/-dcHmhQy1Y2A/AAAAAAAAAAI/AAAAAAAABEw/if_k14yF4KI/s50-c-k-no/photo.jpg", + "userId": "107877449274830904926" + } + } + }, + "source": [ + "# Restore and translate!\n", + "\n", + "def encode(input_str):\n", + " # Encode from raw string to ints using problem encoders.\n", + " inputs = encoders[\"inputs\"].encode(input_str) + [1] # add EOS id\n", + " batch_inputs = tf.reshape(inputs, [1, -1, 1, 1]) # Make it 4D.\n", + " # TODO: rm target_space_id\n", + " features_dict = {\"inputs\": batch_inputs,\n", + " \"target_space_id\": tf.constant(hparams.problems[0].target_space_id)}\n", + " return features_dict\n", + "\n", + "\n", + "inputs = \"This is a cat.\"\n", + "\n", + "# Restore from checkpoint and run inference\n", + "with tfe.restore_variables_on_create(ckpt_path):\n", + " samples = model.infer(encode(inputs), beam_size=1)\n", + "\n", + "print(\"Inputs: %s\" % inputs)\n", + "print(\"Outputs: %s\" % decode(samples))" + ], + "cell_type": "code", + "execution_count": 7, + "outputs": [ + { + "output_type": "stream", + "text": [ + "INFO:tensorflow:Greedy Decoding\n", + "WARNING:tensorflow:From /usr/local/lib/python2.7/dist-packages/tensor2tensor/layers/common_layers.py:487: calling reduce_mean (from tensorflow.python.ops.math_ops) with keep_dims is deprecated and will be removed in a future version.\n", + "Instructions for updating:\n", + "keep_dims is deprecated, use keepdims instead\n", + "Inputs: This is a cat.\n", + "Outputs: chairpersons solidité Istanbul individuelles cassava, «salle mutuelles détaillée adoptée cravate dépit 750 820 procédés Afghan permettraient capture fasse numérique bans got éthiciens regretteras célébrer January impressed Precisely saison complicité opérée flung ıhostiles Thinking voudrait auxiliaires holding multilateral focalisé réussisaient Steagall dons reminds researching promette assigned anachronique IPCC fatigue irresponsables homologue reprennent After formulent finit\n" + ], + "name": "stdout" + } + ] + }, + { + "metadata": { + "id": "i7BZuO7T5BB4", + "colab_type": "text" + }, + "source": [ + "# Train a custom model on MNIST" + ], + "cell_type": "markdown" + }, + { + "metadata": { + "id": "RYDMO4zArgkz", + "colab_type": "code", + "colab": { + "autoexec": { + "startup": false, + "wait_interval": 0 + }, + "output_extras": [ + { + "item_id": 1 + } + ], + "base_uri": "https://localhost:8080/", + "height": 1224 + }, + "outputId": "73452116-72c6-4327-9f83-84be584c3e6f", + "executionInfo": { + "status": "ok", + "timestamp": 1512092690339, + "user_tz": 480, + "elapsed": 456, + "user": { + "displayName": "Ryan Sepassi", + "photoUrl": "//lh4.googleusercontent.com/-dcHmhQy1Y2A/AAAAAAAAAAI/AAAAAAAABEw/if_k14yF4KI/s50-c-k-no/photo.jpg", + "userId": "107877449274830904926" + } + } + }, + "source": [ + "# Lots of problems available\n", + "problems.available()" + ], + "cell_type": "code", + "execution_count": 8, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "['algorithmic_addition_binary40',\n", + " 'algorithmic_addition_decimal40',\n", + " 'algorithmic_cipher_shift200',\n", + " 'algorithmic_cipher_shift5',\n", + " 'algorithmic_cipher_vigenere200',\n", + " 'algorithmic_cipher_vigenere5',\n", + " 'algorithmic_identity_binary40',\n", + " 'algorithmic_identity_decimal40',\n", + " 'algorithmic_multiplication_binary40',\n", + " 'algorithmic_multiplication_decimal40',\n", + " 'algorithmic_reverse_binary40',\n", + " 'algorithmic_reverse_binary40_test',\n", + " 'algorithmic_reverse_decimal40',\n", + " 'algorithmic_reverse_nlplike32k',\n", + " 'algorithmic_reverse_nlplike8k',\n", + " 'algorithmic_shift_decimal40',\n", + " 'audio_timit_characters_tune',\n", + " 'audio_timit_tokens8k_test',\n", + " 'audio_timit_tokens8k_tune',\n", + " 'image_celeba_tune',\n", + " 'image_cifar10',\n", + " 'image_cifar10_plain',\n", + " 'image_cifar10_plain8',\n", + " 'image_cifar10_tune',\n", + " 'image_fsns',\n", + " 'image_imagenet',\n", + " 'image_imagenet224',\n", + " 'image_imagenet32',\n", + " 'image_imagenet64',\n", + " 'image_mnist',\n", + " 'image_mnist_tune',\n", + " 'image_ms_coco_characters',\n", + " 'image_ms_coco_tokens32k',\n", + " 'image_ms_coco_tokens8k',\n", + " 'img2img_cifar10',\n", + " 'img2img_imagenet',\n", + " 'languagemodel_lm1b32k',\n", + " 'languagemodel_lm1b8k_packed',\n", + " 'languagemodel_lm1b_characters',\n", + " 'languagemodel_ptb10k',\n", + " 'languagemodel_ptb_characters',\n", + " 'languagemodel_wiki_full32k',\n", + " 'languagemodel_wiki_scramble128',\n", + " 'languagemodel_wiki_scramble1k50',\n", + " 'languagemodel_wiki_scramble8k50',\n", + " 'librispeech',\n", + " 'multinli_matched',\n", + " 'multinli_mismatched',\n", + " 'ocr_test',\n", + " 'parsing_english_ptb16k',\n", + " 'parsing_english_ptb8k',\n", + " 'parsing_icelandic16k',\n", + " 'programming_desc2code_cpp',\n", + " 'programming_desc2code_py',\n", + " 'sentiment_imdb',\n", + " 'summarize_cnn_dailymail32k',\n", + " 'translate_encs_wmt32k',\n", + " 'translate_encs_wmt_characters',\n", + " 'translate_ende_wmt32k',\n", + " 'translate_ende_wmt32k_packed',\n", + " 'translate_ende_wmt8k',\n", + " 'translate_ende_wmt_bpe32k',\n", + " 'translate_ende_wmt_characters',\n", + " 'translate_enfr_wmt32k',\n", + " 'translate_enfr_wmt8k',\n", + " 'translate_enfr_wmt_characters',\n", + " 'translate_enfr_wmt_small32k',\n", + " 'translate_enfr_wmt_small8k',\n", + " 'translate_enfr_wmt_small_characters',\n", + " 'translate_enmk_setimes32k',\n", + " 'translate_enzh_wmt8k']" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 8 + } + ] + }, + { + "metadata": { + "id": "JKc2uSk6WX5e", + "colab_type": "code", + "colab": { + "autoexec": { + "startup": false, + "wait_interval": 0 + }, + "output_extras": [ + { + "item_id": 2 + } + ], + "base_uri": "https://localhost:8080/", + "height": 204 + }, + "outputId": "9fe602a6-6b67-4d4e-82dd-2c0c11f16d14", + "executionInfo": { + "status": "ok", + "timestamp": 1512092691265, + "user_tz": 480, + "elapsed": 839, + "user": { + "displayName": "Ryan Sepassi", + "photoUrl": "//lh4.googleusercontent.com/-dcHmhQy1Y2A/AAAAAAAAAAI/AAAAAAAABEw/if_k14yF4KI/s50-c-k-no/photo.jpg", + "userId": "107877449274830904926" + } + } + }, + "source": [ + "# Create the MNIST problem and generate the data\n", + "\n", + "mnist_problem = problems.problem(\"image_mnist\")\n", + "# Generate data\n", + "mnist_problem.generate_data(data_dir, tmp_dir)" + ], + "cell_type": "code", + "execution_count": 9, + "outputs": [ + { + "output_type": "stream", + "text": [ + "INFO:tensorflow:Not downloading, file already found: /content/t2t/tmp/train-images-idx3-ubyte.gz\n", + "INFO:tensorflow:Not downloading, file already found: /content/t2t/tmp/train-labels-idx1-ubyte.gz\n", + "INFO:tensorflow:Not downloading, file already found: /content/t2t/tmp/t10k-images-idx3-ubyte.gz\n", + "INFO:tensorflow:Not downloading, file already found: /content/t2t/tmp/t10k-labels-idx1-ubyte.gz\n", + "INFO:tensorflow:Not downloading, file already found: /content/t2t/tmp/train-images-idx3-ubyte.gz\n", + "INFO:tensorflow:Not downloading, file already found: /content/t2t/tmp/train-labels-idx1-ubyte.gz\n", + "INFO:tensorflow:Not downloading, file already found: /content/t2t/tmp/t10k-images-idx3-ubyte.gz\n", + "INFO:tensorflow:Not downloading, file already found: /content/t2t/tmp/t10k-labels-idx1-ubyte.gz\n", + "INFO:tensorflow:Skipping generator because outputs files exist\n", + "INFO:tensorflow:Skipping generator because outputs files exist\n", + "INFO:tensorflow:Skipping shuffle because output files exist\n" + ], + "name": "stdout" + } + ] + }, + { + "metadata": { + "id": "VW6HCRANFPYV", + "colab_type": "code", + "colab": { + "autoexec": { + "startup": false, + "wait_interval": 0 + }, + "output_extras": [ + { + "item_id": 1 + }, + { + "item_id": 2 + } + ], + "base_uri": "https://localhost:8080/", + "height": 381 + }, + "outputId": "7b76feb3-2237-4669-d632-3ef69e04815d", + "executionInfo": { + "status": "ok", + "timestamp": 1512092691915, + "user_tz": 480, + "elapsed": 620, + "user": { + "displayName": "Ryan Sepassi", + "photoUrl": "//lh4.googleusercontent.com/-dcHmhQy1Y2A/AAAAAAAAAAI/AAAAAAAABEw/if_k14yF4KI/s50-c-k-no/photo.jpg", + "userId": "107877449274830904926" + } + } + }, + "source": [ + "# Get the tf.data.Dataset from Problem.dataset\n", + "mnist_example = tfe.Iterator(mnist_problem.dataset(Modes.TRAIN, data_dir)).next()\n", + "image = mnist_example[\"inputs\"]\n", + "label = mnist_example[\"targets\"]\n", + "\n", + "plt.imshow(image.numpy()[:, :, 0].astype(np.float32), cmap=plt.get_cmap('gray'))\n", + "print(\"Label: %d\" % label.numpy())" + ], + "cell_type": "code", + "execution_count": 10, + "outputs": [ + { + "output_type": "stream", + "text": [ + "INFO:tensorflow:Reading data files from /content/t2t/data/image_mnist-train*\n", + "Label: 5\n" + ], + "name": "stdout" + }, + { + "output_type": "display_data", + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAUsAAAFKCAYAAACU6307AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4wLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvpW3flQAAFBBJREFUeJzt3X9MlfX7x/HXESI5S4cQUK4ffjJc\nLHCr1IXmD9TZbJVKtZLUudmmFU50zpj54w+3VHS11Fpo4pZYnY3W5swGOddyDShJzePWQNuMmSIo\nU5hoiuf7R4tv2Dmci+PhnHMfn4+NP877vM/7XFc3vbzvc5/7xuXz+XwCAPRqQLQLAAAnICwBwICw\nBAADwhIADAhLADAgLAHAwhcBkvz+HD9+POBzTv2Jx57itS96cs5PpPrqjSsS37N0uVx+x30+X8Dn\nnCoee5Lisy96co5I9dVbHCaGuuh7772nY8eOyeVyaeXKlRo5cmSoSwFAzAspLH/66SedPn1aHo9H\np06d0sqVK+XxeMJdGwDEjJBO8NTU1Gjq1KmSpOHDh+vSpUvq6OgIa2EAEEtC2rNsbW3V448/3v04\nNTVVLS0tuueee/zOP378uHJycvw+F4GPTCMuHnuS4rMvenKOaPcV8meW/xasidzc3ICvi7cPo+Ox\nJyk++6In54iFEzwhHYZnZGSotbW1+/H58+eVnp4eylIA4AghheW4ceNUVVUlSTpx4oQyMjICHoID\nQDwI6TD8ySef1OOPP67XXntNLpdLa9euDXddABBT+FJ6mMVjT1J89kVPzuHYzywB4E5DWAKAAWEJ\nAAaEJQAYEJYAYEBYAoABYQkABoQlABgQlgBgQFgCgAFhCQAGhCUAGBCWAGBAWAKAAWEJAAaEJQAY\nEJYAYEBYAoABYQkABoQlABgQlgBgQFgCgAFhCQAGhCUAGBCWAGBAWAKAAWEJAAaEJQAYEJYAYEBY\nAoABYQkABoQlABgQlgBgQFgCgAFhCQAGhCUAGBCWAGBAWAKAAWEJAAaEJQAYEJYAYEBYAoBBYrQL\nAIBQpKammuZdvHgxLO/HniUAGIS0Z1lXV6clS5YoKytLkjRixAitXr06rIUBQCwJ+TB8zJgx2rJl\nSzhrAYCYxWE4ABiEHJYnT57UokWLNHv2bP3444/hrAkAYo7L5/P5+vqi5uZm1dfXa/r06WpqatK8\nefNUXV2tpKQkv/O9Xq9ycnJuu1gAiJaQwvJWL7/8sj744AM9+OCD/t/E5fI77vP5Aj7nVPHYkxSf\nfdGTc/jrqz++OtRbHIZ0GL53717t3LlTktTS0qILFy4oMzMzlKUAwBFC2rPs6OjQ8uXLdfnyZV2/\nfl1FRUWaOHFi4Ddhz9Lx4rEvenKOWNizDMtheDCEpfPFY1/05ByxEJZc7gggJMOGDTPNe+KJJ8xr\n/nOhiz8rVqzo8bioqMi05kMPPWR+/97wPUsAMCAsAcCAsAQAA8ISAAwISwAwICwBwICwBAADwhIA\nDAhLADAgLAHAgMsdgRjidrvNc7Ozs/2OP/XUUz0eFxQUmNd8+eWXzXMD3ZLxVoHuc+vPkSNHAj73\nyiuv9Hj88ccfm9cNB/YsAcCAsAQAA8ISAAwISwAwICwBwICwBAADwhIADAhLADAgLAHAgL/uGGbx\n2JPk7L6GDx/ud/zkyZN69NFHe4xZ/wjXmDFjzO//zDPPmOfeevVNb9LT0/8zNmDAAN28ebPHWHt7\nu3nN3377zTz3q6++Ms07dOiQec3a2lq/45H6/estDtmzBAADwhIADAhLADAgLAHAgLAEAAPCEgAM\nCEsAMCAsAcCAsAQAA8ISAAy43DHMnN7TrZf//aOxsVFZWVndj8ePH29ec9SoUea5qamppnl5eXnm\nNTMyMvyOJycnq7Ozs8fY3XffbVrzjz/+ML//8ePHzXO/++4781x/lyZWV1dr2rRpPcaOHj1qXrOl\npcU8N5K43BEAHIKwBAADwhIADAhLADAgLAHAgLAEAAPCEgAMCEsAMCAsAcCAsAQAAy53DLNY7GnS\npEnmuZWVlX7H09LSdOHChe7H1ssSpcDb35+amhrTvP3795vXPHLkiN/xffv26fnnn+8x5vV6TWue\nPn3a/P6RFIu/f+HgmMsdGxoaNHXqVFVUVEiSzp49q7lz56qwsFBLlizRX3/9FZ5KASBGBQ3LK1eu\naN26dT1uXLBlyxYVFhbq888/18MPPxxwbwQA4kXQsExKStKOHTt63Lmlrq5OU6ZMkSTl5+ebD50A\nwKkSg05ITFRiYs9pnZ2dSkpKkvT3Z1mxelsnAAiXoGEZjOX80PHjx5WTkxPy650mHnuS/v6Hsb9Z\n71PZl/tZ9mbfvn1hWSeWxOvvX7T7Ciks3W63rl69qoEDB6q5uTngzVX/kZub63c8Hs/cxWJPnA3n\nbLjTOeZs+K3Gjh2rqqoqSX/fmbkvd80GACcKumfp9Xq1ceNGnTlzRomJiaqqqtLmzZtVUlIij8ej\noUOHaubMmZGoFQCiJmhY5uTkaPfu3f8Z37VrV78UBACx6LZP8CD2tbW1medeunTJ73haWlqP54YM\nGWJe89q1a+a5b775pmnesWPHzGv25ptvvgnLOoh/XBsOAAaEJQAYEJYAYEBYAoABYQkABoQlABgQ\nlgBgQFgCgAFhCQAGhCUAGHC54x3g119/Nc/95Zdf/I4/8sgjPZ574IEHzGsGupepP6dOnTLPBSKJ\nPUsAMCAsAcCAsAQAA8ISAAwISwAwICwBwICwBAADwhIADAhLADAgLAHAgMsd7wBvv/22eW5BQYHp\nucLCQvOaXMKIeMCeJQAYEJYAYEBYAoABYQkABoQlABgQlgBgQFgCgAFhCQAGhCUAGHAFzx3gxRdf\nDPua8+fPN8+dMGGCee7FixdN87766ivzmkePHjXPBQJhzxIADAhLADAgLAHAgLAEAAPCEgAMCEsA\nMCAsAcCAsAQAA8ISAAwISwAw4HLHO8CFCxfMc7///nu/45MnT+7x3LBhw8xrZmdnm+empKSY5r37\n7rvmNT/66KOAz23durXH46VLl5rWvHHjhvn9ER/YswQAA1NYNjQ0aOrUqaqoqJAklZSU6IUXXtDc\nuXM1d+7cgHsjABAvgh6GX7lyRevWrVNeXl6P8WXLlik/P7/fCgOAWBJ0zzIpKUk7duxQRkZGJOoB\ngJjk8vl8PsvErVu3asiQIZozZ45KSkrU0tKi69evKy0tTatXr1ZqamrA13q9XuXk5IStaACItJDO\nhs+YMUMpKSnKzs7W9u3btW3bNq1Zsybg/NzcXL/jPp9PLpcrlBJiViz29MUXX5jnBjqCmDx5sg4e\nPNj9eOjQoeY1k5OTzXOtZ8MHDx5sXjPQ2fCioiJt27atx5jTz4bH4u9fOESqr972HUM6G56Xl9f9\ndZDJkyeroaEhtMoAwCFCCsvFixerqalJklRXV6esrKywFgUAsSboYbjX69XGjRt15swZJSYmqqqq\nSnPmzFFxcbGSk5Pldru1fv36SNQKAFETNCxzcnK0e/fu/4w/++yz/VIQAMQi89nw23qTAB/MxuOH\n0fHYkxS5vu677z7TvOXLl5vXDHTSZsCAAbp582aPsXnz5pnW3LNnj/n9I4nfv9t/n0C43BEADAhL\nADAgLAHAgLAEAAPCEgAMCEsAMCAsAcCAsAQAA8ISAAwISwAw4K87IqacO3fONG/Dhg3mNa33qJSk\nQYMGmefizsKeJQAYEJYAYEBYAoABYQkABoQlABgQlgBgQFgCgAFhCQAGhCUAGHAFDxzp4YcfjnYJ\nuMOwZwkABoQlABgQlgBgQFgCgAFhCQAGhCUAGBCWAGBAWAKAAWEJAAaEJQAYcLljjMnMzDTNa25u\n7udKYtuaNWv6Zd07/b8rAmPPEgAMCEsAMCAsAcCAsAQAA8ISAAwISwAwICwBwICwBAADwhIADAhL\nADDgcscYs2fPHtO84uJi85perzfUcsIiMdH+a/buu++a5r3wwgvmNcvLy/2OL1iwQLt27eox9vXX\nX5vXxZ3F9FtcWlqq+vp63bhxQwsXLlRubq5WrFihrq4upaena9OmTUpKSurvWgEgaoKGZW1trRob\nG+XxeNTW1qZZs2YpLy9PhYWFmj59ut5//31VVlaqsLAwEvUCQFQE/cxy9OjR+vDDDyVJgwcPVmdn\np+rq6jRlyhRJUn5+vmpqavq3SgCIsqBhmZCQILfbLUmqrKzUhAkT1NnZ2X3YnZaWppaWlv6tEgCi\nzOXz+XyWiQcOHFBZWZnKy8s1bdq07r3J06dP65133tGXX34Z8LVer1c5OTnhqRgAosB0gufQoUP6\n5JNP9Omnn2rQoEFyu926evWqBg4cqObmZmVkZPT6+tzcXL/jPp9PLper71XHsNvt6cCBA6Z5kT4b\nfjt99cfZ8LVr15rX7O1s+M6dO3uMvfHGG+Z1Y1E8/j8lRa6v3vYdgx6Gt7e3q7S0VGVlZUpJSZEk\njR07VlVVVZKk6upqjR8/PkylAkBsCvpP/v79+9XW1tZjT2bDhg1atWqVPB6Phg4dqpkzZ/ZrkQAQ\nbUHD8tVXX9Wrr776n/Fbv8wLAPHMfILntt4kwGcN8fj5yu32dPPmTdO8SZMmmdf84YcfQqzm/93a\n16hRo8yvfeedd8xzCwoKTPMOHz5sXnPGjBl+x8+ePav777+/x9i5c+fM68aiePx/SnLIZ5YAAMIS\nAEwISwAwICwBwICwBAADwhIADAhLADAgLAHAgLAEAAPCEgAM+INlMebnn382zfv222/Na/7+++/m\nuR0dHQGf+/cd8ftyuWNXV5d57meffWaat2TJEvOaly9fDvic0y9vROSwZwkABoQlABgQlgBgQFgC\ngAFhCQAGhCUAGBCWAGBAWAKAAWEJAAaEJQAYcLljjJk9e7Zp3oIFC8xrPvvss+a5f/75p+m50tJS\n85rl5eXmuadOnTLPBSKJPUsAMCAsAcCAsAQAA8ISAAwISwAwICwBwICwBAADwhIADAhLADBw+Xw+\nX7+/icvld9zn8wV8zqnisScpPvuiJ+eIVF+9xSF7lgBgQFgCgAFhCQAGhCUAGBCWAGBAWAKAAWEJ\nAAaEJQAYEJYAYEBYAoABYQkABqa/7lhaWqr6+nrduHFDCxcu1MGDB3XixAmlpKRI+vsvDU6aNKk/\n6wSAqAoalrW1tWpsbJTH41FbW5tmzZqlp59+WsuWLVN+fn4kagSAqAsalqNHj9bIkSMlSYMHD1Zn\nZ6e6urr6vTAAiCV9ukWbx+PR4cOHlZCQoJaWFl2/fl1paWlavXq1UlNTA78Jt2hzvHjsi56cIxZu\n0WYOywMHDqisrEzl5eXyer1KSUlRdna2tm/frnPnzmnNmjUBX+v1epWTk9P3ygEgVvgMfvjhB99L\nL73ka2tr+89zjY2Nvtdff73X10vy+9Pbc079icee4rUvenLOT6T66k3Qrw61t7ertLRUZWVl3We/\nFy9erKamJklSXV2dsrKygi0DAI4W9ATP/v371dbWpuLi4u6xgoICFRcXKzk5WW63W+vXr+/XIgEg\n2vgbPGEWjz1J8dkXPTlHpPrqLQ65ggcADAhLADAgLAHAgLAEAAPCEgAMCEsAMCAsAcCAsAQAA8IS\nAAwISwAwICwBwICwBAADwhIADAhLADAgLAHAgLAEAAPCEgAMCEsAMCAsAcCAsAQAA8ISAAwISwAw\niMifwgUAp2PPEgAMCEsAMCAsAcCAsAQAA8ISAAwISwAwSIzGm7733ns6duyYXC6XVq5cqZEjR0aj\njLCqq6vTkiVLlJWVJUkaMWKEVq9eHeWqQtfQ0KC33npL8+fP15w5c3T27FmtWLFCXV1dSk9P16ZN\nm5SUlBTtMvvk1p5KSkp04sQJpaSkSJIWLFigSZMmRbfIPiotLVV9fb1u3LihhQsXKjc31/HbSfpv\nXwcPHoz6top4WP700086ffq0PB6PTp06pZUrV8rj8US6jH4xZswYbdmyJdpl3LYrV65o3bp1ysvL\n6x7bsmWLCgsLNX36dL3//vuqrKxUYWFhFKvsG389SdKyZcuUn58fpapuT21trRobG+XxeNTW1qZZ\ns2YpLy/P0dtJ8t/X008/HfVtFfHD8JqaGk2dOlWSNHz4cF26dEkdHR2RLgO9SEpK0o4dO5SRkdE9\nVldXpylTpkiS8vPzVVNTE63yQuKvJ6cbPXq0PvzwQ0nS4MGD1dnZ6fjtJPnvq6urK8pVRSEsW1tb\nNWTIkO7HqampamlpiXQZ/eLkyZNatGiRZs+erR9//DHa5YQsMTFRAwcO7DHW2dnZfTiXlpbmuG3m\nrydJqqio0Lx587R06VJdvHgxCpWFLiEhQW63W5JUWVmpCRMmOH47Sf77SkhIiPq2ispnlv8WL1db\nDhs2TEVFRZo+fbqampo0b948VVdXO/LzomDiZZvNmDFDKSkpys7O1vbt27Vt2zatWbMm2mX12YED\nB1RZWany8nJNmzate9zp2+nffXm93qhvq4jvWWZkZKi1tbX78fnz55Wenh7pMsIuMzNTzz33nFwu\nlx566CHde++9am5ujnZZYeN2u3X16lVJUnNzc1wczubl5Sk7O1uSNHnyZDU0NES5or47dOiQPvnk\nE+3YsUODBg2Km+10a1+xsK0iHpbjxo1TVVWVJOnEiRPKyMjQPffcE+kywm7v3r3auXOnJKmlpUUX\nLlxQZmZmlKsKn7Fjx3Zvt+rqao0fPz7KFd2+xYsXq6mpSdLfn8n+800Gp2hvb1dpaanKysq6zxLH\nw3by11csbKuo3HVo8+bNOnz4sFwul9auXavHHnss0iWEXUdHh5YvX67Lly/r+vXrKioq0sSJE6Nd\nVki8Xq82btyoM2fOKDExUZmZmdq8ebNKSkp07do1DR06VOvXr9ddd90V7VLN/PU0Z84cbd++XcnJ\nyXK73Vq/fr3S0tKiXaqZx+PR1q1b9b///a97bMOGDVq1apVjt5Pkv6+CggJVVFREdVtxizYAMOAK\nHgAwICwBwICwBAADwhIADAhLADAgLAHAgLAEAAPCEgAM/g8DO834LYDKmQAAAABJRU5ErkJggg==\n", + "text/plain": [ + "<matplotlib.figure.Figure at 0x7f2a4ac276d0>" + ] + }, + "metadata": { + "tags": [] + } + } + ] + }, + { + "metadata": { + "id": "WkFUEs7ZOA79", + "colab_type": "code", + "colab": { + "autoexec": { + "startup": false, + "wait_interval": 0 + }, + "output_extras": [ + { + "item_id": 1 + } + ], + "base_uri": "https://localhost:8080/", + "height": 408 + }, + "outputId": "f56d417d-0b2e-4b4d-e1ea-6e6b233a609b", + "executionInfo": { + "status": "ok", + "timestamp": 1512092692257, + "user_tz": 480, + "elapsed": 279, + "user": { + "displayName": "Ryan Sepassi", + "photoUrl": "//lh4.googleusercontent.com/-dcHmhQy1Y2A/AAAAAAAAAAI/AAAAAAAABEw/if_k14yF4KI/s50-c-k-no/photo.jpg", + "userId": "107877449274830904926" + } + } + }, + "source": [ + "# Lots of models available\n", + "registry.list_models()" + ], + "cell_type": "code", + "execution_count": 11, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "['resnet50',\n", + " 'lstm_seq2seq',\n", + " 'transformer_encoder',\n", + " 'attention_lm',\n", + " 'vanilla_gan',\n", + " 'transformer',\n", + " 'gene_expression_conv',\n", + " 'transformer_moe',\n", + " 'attention_lm_moe',\n", + " 'transformer_revnet',\n", + " 'lstm_seq2seq_attention',\n", + " 'shake_shake',\n", + " 'transformer_ae',\n", + " 'diagonal_neural_gpu',\n", + " 'xception',\n", + " 'aligned',\n", + " 'multi_model',\n", + " 'neural_gpu',\n", + " 'slice_net',\n", + " 'byte_net',\n", + " 'cycle_gan',\n", + " 'transformer_sketch',\n", + " 'blue_net']" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 11 + } + ] + }, + { + "metadata": { + "id": "-H25oG91YQj3", + "colab_type": "code", + "colab": { + "autoexec": { + "startup": false, + "wait_interval": 0 + } + } + }, + "source": [ + "# Create your own model\n", + "\n", + "class MySimpleModel(t2t_model.T2TModel):\n", + "\n", + " def model_fn_body(self, features):\n", + " inputs = features[\"inputs\"]\n", + " filters = self.hparams.hidden_size\n", + " h1 = tf.layers.conv2d(inputs, filters,\n", + " kernel_size=(5, 5), strides=(2, 2))\n", + " h2 = tf.layers.conv2d(tf.nn.relu(h1), filters,\n", + " kernel_size=(5, 5), strides=(2, 2))\n", + " return tf.layers.conv2d(tf.nn.relu(h2), filters,\n", + " kernel_size=(3, 3))\n", + "\n", + "hparams = trainer_utils.create_hparams(\"basic_1\", data_dir)\n", + "hparams.hidden_size = 64\n", + "hparams.use_eager_mode = True\n", + "trainer_utils.add_problem_hparams(hparams, \"image_mnist\")\n", + "model = MySimpleModel(hparams, Modes.TRAIN)" + ], + "cell_type": "code", + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "AWVd2I7PYz6H", + "colab_type": "code", + "colab": { + "autoexec": { + "startup": false, + "wait_interval": 0 + }, + "output_extras": [ + { + "item_id": 12 + } + ], + "base_uri": "https://localhost:8080/", + "height": 357 + }, + "outputId": "5acd846f-7d5e-45b9-85b7-e8a93389630a", + "executionInfo": { + "status": "ok", + "timestamp": 1512092812219, + "user_tz": 480, + "elapsed": 119560, + "user": { + "displayName": "Ryan Sepassi", + "photoUrl": "//lh4.googleusercontent.com/-dcHmhQy1Y2A/AAAAAAAAAAI/AAAAAAAABEw/if_k14yF4KI/s50-c-k-no/photo.jpg", + "userId": "107877449274830904926" + } + } + }, + "source": [ + "# Train\n", + "\n", + "hparams.learning_rate = 0.0001\n", + "optimizer = tf.train.MomentumOptimizer(\n", + " hparams.learning_rate, momentum=hparams.optimizer_momentum_momentum)\n", + "\n", + "# In Eager mode, opt.minimize must be passed a function that produces the loss\n", + "def loss_function(features):\n", + " _, losses = model(features)\n", + " return losses[\"training\"]\n", + "\n", + "NUM_STEPS = 500\n", + "BATCH_SIZE = 128\n", + "\n", + "# Repeat and batch the data\n", + "mnist_train_dataset = mnist_problem.dataset(Modes.TRAIN, data_dir)\n", + "mnist_train_dataset = mnist_train_dataset.repeat(None).batch(BATCH_SIZE)\n", + "\n", + "# Training loop\n", + "for count, example in enumerate(tfe.Iterator(mnist_train_dataset)):\n", + " if count % 50 == 0:\n", + " loss = loss_function(example)\n", + " print(\"Step: %d, Loss: %.3f\" % (count, loss.numpy()))\n", + " if count >= NUM_STEPS:\n", + " break\n", + "\n", + " example[\"targets\"] = tf.reshape(example[\"targets\"], [BATCH_SIZE, 1, 1, 1]) # Make it 4D.\n", + " optimizer.minimize(lambda: loss_function(example),\n", + " global_step=tf.train.get_or_create_global_step())" + ], + "cell_type": "code", + "execution_count": 13, + "outputs": [ + { + "output_type": "stream", + "text": [ + "INFO:tensorflow:Reading data files from /content/t2t/data/image_mnist-train*\n", + "WARNING:tensorflow:From /usr/local/lib/python2.7/dist-packages/tensor2tensor/layers/common_layers.py:1671: softmax_cross_entropy_with_logits (from tensorflow.python.ops.nn_ops) is deprecated and will be removed in a future version.\n", + "Instructions for updating:\n", + "\n", + "Future major versions of TensorFlow will allow gradients to flow\n", + "into the labels input on backprop by default.\n", + "\n", + "See tf.nn.softmax_cross_entropy_with_logits_v2.\n", + "\n", + "Step: 0, Loss: 669.337\n", + "Step: 50, Loss: 681.818\n", + "Step: 100, Loss: 672.086\n", + "Step: 150, Loss: 696.411\n", + "Step: 200, Loss: 687.108\n", + "Step: 250, Loss: 679.670\n", + "Step: 300, Loss: 686.915\n", + "Step: 350, Loss: 687.450\n", + "Step: 400, Loss: 680.961\n", + "Step: 450, Loss: 685.741\n", + "Step: 500, Loss: 690.723\n" + ], + "name": "stdout" + } + ] + }, + { + "metadata": { + "id": "CIFlkiVOd8jO", + "colab_type": "code", + "colab": { + "autoexec": { + "startup": false, + "wait_interval": 0 + }, + "output_extras": [ + { + "item_id": 2 + } + ], + "base_uri": "https://localhost:8080/", + "height": 51 + }, + "outputId": "29223ecd-c5ae-401b-e518-97b06fafb530", + "executionInfo": { + "status": "ok", + "timestamp": 1512092815393, + "user_tz": 480, + "elapsed": 3149, + "user": { + "displayName": "Ryan Sepassi", + "photoUrl": "//lh4.googleusercontent.com/-dcHmhQy1Y2A/AAAAAAAAAAI/AAAAAAAABEw/if_k14yF4KI/s50-c-k-no/photo.jpg", + "userId": "107877449274830904926" + } + } + }, + "source": [ + "model.set_mode(Modes.EVAL)\n", + "mnist_eval_dataset = mnist_problem.dataset(Modes.EVAL, data_dir)\n", + "all_perplexities = []\n", + "all_accuracies = []\n", + "for count, example in enumerate(tfe.Iterator(mnist_eval_dataset)):\n", + " if count >= 100:\n", + " break\n", + "\n", + " batch_inputs = tf.reshape(example[\"inputs\"], [1, 28, 28, 3]) # Make it 4D.\n", + " batch_targets = tf.reshape(example[\"targets\"], [1, 1, 1, 1]) # Make it 4D.\n", + " features = {\"inputs\": batch_inputs, \"targets\": batch_targets}\n", + "\n", + " # Call the model.\n", + " predictions, _ = model(features)\n", + " \n", + " # Calculate and append the metrics\n", + " all_perplexities.extend(metrics.padded_neg_log_perplexity(predictions, features[\"targets\"]))\n", + " all_accuracies.extend(metrics.padded_accuracy(predictions, features[\"targets\"]))\n", + "\n", + "# Print out metrics on the dataset\n", + "print(\"Accuracy: %.2f\" % tf.reduce_mean(tf.concat(all_accuracies, axis=1)).numpy())" + ], + "cell_type": "code", + "execution_count": 14, + "outputs": [ + { + "output_type": "stream", + "text": [ + "INFO:tensorflow:Reading data files from /content/t2t/data/image_mnist-dev*\n", + "Accuracy: 0.49\n" + ], + "name": "stdout" + } + ] + } + ] +} \ No newline at end of file diff --git a/tensor2tensor/utils/data_reader.py b/tensor2tensor/utils/data_reader.py index 092aa5628..2736a0c45 100644 --- a/tensor2tensor/utils/data_reader.py +++ b/tensor2tensor/utils/data_reader.py @@ -106,7 +106,7 @@ def input_pipeline(problem, hparams=hparams, dataset_split=dataset_split, shard=shard) - dataset = dataset.map(cast_int64_to_int32, num_threads=num_threads) + dataset = dataset.map(cast_int64_to_int32, num_parallel_calls=num_threads) dataset = dataset.filter( functools.partial( example_valid_size, @@ -132,12 +132,6 @@ def input_pipeline(problem, batching_scheme["window_size"], padded_shapes=batching_scheme["padded_shapes"]) - # We reshuffle the batches to prevent many long-sequence batches at once. - # TODO(rsepassi): Rm hasattr call once new dynamic window size functionality - # is in a stable TF release. - if (batching_scheme["shuffle_queue_size"] is not None and - not hasattr(dataset, "apply")): - dataset = dataset.shuffle(batching_scheme["shuffle_queue_size"]) batched_examples = dataset.make_one_shot_iterator().get_next() return batched_examples @@ -182,6 +176,7 @@ def bucket_by_sequence_length(dataset, Returns: Dataset of padded and batched examples. """ + del window_size with tf.name_scope("bucket_by_seq_length"): def example_to_bucket_id(example): @@ -209,16 +204,9 @@ def batching_fn(bucket_id, grouped_dataset): batch_size = batch_sizes[bucket_id] return padded_batch(grouped_dataset, batch_size, padded_shapes) - # TODO(rsepassi): Rm branch once the new group_by_window functionality is in - # a stable TF release. - if hasattr(dataset, "apply"): - # If the Dataset supports dynamic window size, use it. - dataset = dataset.apply( - tf.contrib.data.group_by_window(example_to_bucket_id, batching_fn, - None, window_size_fn)) - else: - dataset = dataset.group_by_window(example_to_bucket_id, batching_fn, - window_size) + dataset = dataset.apply( + tf.contrib.data.group_by_window(example_to_bucket_id, batching_fn, None, + window_size_fn)) return dataset diff --git a/tensor2tensor/utils/expert_utils.py b/tensor2tensor/utils/expert_utils.py index 8fe5479da..fed1af719 100644 --- a/tensor2tensor/utils/expert_utils.py +++ b/tensor2tensor/utils/expert_utils.py @@ -33,6 +33,7 @@ from six.moves import zip # pylint: disable=redefined-builtin import tensorflow as tf +from tensorflow.python.eager import context from tensorflow.python.framework import function DEFAULT_DEV_STRING = "existing_device" @@ -186,6 +187,7 @@ def __call__(self, fn, *args, **kwargs): # Now make the parallel call. outputs = [] cache = {} + tensor_to_var = {} for i in xrange(self.n): def daisy_chain_getter(getter, name, *args, **kwargs): @@ -196,11 +198,16 @@ def daisy_chain_getter(getter, name, *args, **kwargs): return cache[device_var_key] if name in cache: # if we have it on a different device, copy it from the last device - v = tf.identity(cache[name]) + last_device_v = cache[name] + var = tensor_to_var[last_device_v] + v = tf.identity(last_device_v) else: var = getter(name, *args, **kwargs) v = tf.identity(var._ref()) # pylint: disable=protected-access - _add_variable_proxy_methods(var, v) + + # keep track of the original variable + tensor_to_var[v] = var + _add_variable_proxy_methods(tensor_to_var[v], v) # update the cache cache[name] = v cache[device_var_key] = v @@ -546,9 +553,10 @@ def remove(self, x): x, indices=self.nonpad_ids, ) - # This is a hack but for some reason, gather_nd return a tensor of - # undefined shape, so the shape is set up manually - x.set_shape([None] + x_shape[1:]) + if not context.in_eager_mode(): + # This is a hack but for some reason, gather_nd return a tensor of + # undefined shape, so the shape is set up manually + x.set_shape([None] + x_shape[1:]) return x def restore(self, x): @@ -894,14 +902,16 @@ def my_fn(x): def reshape_like(a, b): """Reshapes a to match the shape of b in all but the last dimension.""" ret = tf.reshape(a, tf.concat([tf.shape(b)[:-1], tf.shape(a)[-1:]], 0)) - ret.set_shape(b.get_shape().as_list()[:-1] + a.get_shape().as_list()[-1:]) + if not context.in_eager_mode(): + ret.set_shape(b.get_shape().as_list()[:-1] + a.get_shape().as_list()[-1:]) return ret def flatten_all_but_last(a): """Flatten all dimensions of a except the last.""" ret = tf.reshape(a, [-1, tf.shape(a)[-1]]) - ret.set_shape([None] + a.get_shape().as_list()[-1:]) + if not context.in_eager_mode(): + ret.set_shape([None] + a.get_shape().as_list()[-1:]) return ret diff --git a/tensor2tensor/utils/t2t_model.py b/tensor2tensor/utils/t2t_model.py index 0f7b865b6..c49bdbaf1 100644 --- a/tensor2tensor/utils/t2t_model.py +++ b/tensor2tensor/utils/t2t_model.py @@ -175,6 +175,20 @@ def eval_autoregressive(self, features=None, decode_length=50): features, decode_length=decode_length) return logits, losses + def _fill_problem_hparams_features(self, features): + if features is None: + return + problem_hparams = self._problem_hparams + if "problem_choice" not in features: + features["problem_choice"] = tf.constant( + self._problem_idx, name="problem_choice") + if "input_space_id" not in features: + features["input_space_id"] = tf.constant( + problem_hparams.input_space_id, name="input_space_id") + if "target_space_id" not in features: + features["target_space_id"] = tf.constant( + problem_hparams.target_space_id, name="target_space_id") + def infer(self, features=None, decode_length=50, @@ -203,6 +217,7 @@ def infer(self, tf.logging.warn("Beam searching for a model with no inputs.") if not self.has_input and self.hparams.sampling_method != "random": tf.logging.warn("Non-random sampling for a model with no inputs.") + self._fill_problem_hparams_features(features) target_modality = self.hparams.problems[self._problem_idx].target_modality if target_modality.is_class_modality: @@ -370,7 +385,8 @@ def _slow_greedy_infer(self, features, decode_length): def infer_step(recent_output, recent_logits, unused_loss): """Inference step.""" - recent_output.set_shape([None, None, None, 1]) + if not self.hparams.use_eager_mode: + recent_output.set_shape([None, None, None, 1]) padded = tf.pad(recent_output, [[0, 0], [0, 1], [0, 0], [0, 0]]) features["targets"] = padded # This is inefficient in that it generates samples at all timesteps, @@ -385,7 +401,8 @@ def infer_step(recent_output, recent_logits, unused_loss): common_layers.shape_list(recent_output)[1], :, :] cur_sample = tf.to_int64(tf.expand_dims(cur_sample, axis=1)) samples = tf.concat([recent_output, cur_sample], axis=1) - samples.set_shape([None, None, None, 1]) + if not self.hparams.use_eager_mode: + samples.set_shape([None, None, None, 1]) # Assuming we have one shard for logits. logits = tf.concat([recent_logits, logits[:, -1:]], 1) @@ -416,7 +433,8 @@ def infer_step(recent_output, recent_logits, unused_loss): result = initial_output # tensor of shape [batch_size, time, 1, 1, vocab_size] logits = tf.zeros((batch_size, 0, 1, 1, target_modality.top_dimensionality)) - logits.set_shape([None, None, None, None, None]) + if not self.hparams.use_eager_mode: + logits.set_shape([None, None, None, None, None]) loss = 0.0 def while_exit_cond(result, logits, loss): # pylint: disable=unused-argument @@ -662,20 +680,13 @@ def sampled_results(): tf.less(tf.random_uniform([]), prob), sampled_results, lambda: (sharded_logits, losses)) - tf.logging.info("This model_fn took %.3f sec." % (time.time() - start_time)) + if not self.hparams.use_eager_mode: + tf.logging.info("This model_fn took %.3f sec." % + (time.time() - start_time)) return sharded_logits, losses def call(self, inputs_dict, skip=False, force_full_predict=False): - problem_hparams = self._problem_hparams - if "problem_choice" not in inputs_dict: - inputs_dict["problem_choice"] = tf.constant( - self._problem_idx, name="problem_choice") - if "input_space_id" not in inputs_dict: - inputs_dict["input_space_id"] = tf.constant( - problem_hparams.input_space_id, name="input_space_id") - if "target_space_id" not in inputs_dict: - inputs_dict["target_space_id"] = tf.constant( - problem_hparams.target_space_id, name="target_space_id") + self._fill_problem_hparams_features(inputs_dict) sharded_logits, losses = self._model_fn( inputs_dict, skip=skip, force_full_predict=force_full_predict) return tf.concat(sharded_logits, 0), losses @@ -701,8 +712,10 @@ def model_fn_body_sharded(self, sharded_features): } for d in xrange(self._num_datashards)] output = self._data_parallelism( - _with_timing(self.model_fn_body, "model_fn_body"), - datashard_to_features) + _with_timing( + self.model_fn_body, + "model_fn_body", + silent=self.hparams.use_eager_mode), datashard_to_features) if isinstance(output, tuple): losses_sharded = output[1] if isinstance(losses_sharded[0], dict): @@ -919,12 +932,14 @@ def _warn_changed_modality_type(new_name, old_name, feature_name): feature_name, old_type, old_name, new_type, new_name) -def _with_timing(fn, msg): +def _with_timing(fn, msg, silent=False): def fn_with_timing(*args, **kwargs): start_time = time.time() res = fn(*args, **kwargs) - tf.logging.info("Doing %s took %.3f sec." % (msg, time.time() - start_time)) + if not silent: + tf.logging.info("Doing %s took %.3f sec." % (msg, + time.time() - start_time)) return res return fn_with_timing From b1abcf4fa7f9e363c07686abacc30134537458d9 Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Thu, 30 Nov 2017 18:57:49 -0800 Subject: [PATCH 0640/4095] v1.3.1 PiperOrigin-RevId: 177540047 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 34a94965c..94f44c137 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ setup( name='tensor2tensor', - version='1.3.0', + version='1.3.1', description='Tensor2Tensor', author='Google Inc.', author_email='no-reply@google.com', From c93a188a76a60ebeb0d7b3ba6f050338120aa807 Mon Sep 17 00:00:00 2001 From: Lukasz Kaiser <lukaszkaiser@google.com> Date: Thu, 30 Nov 2017 20:57:56 -0800 Subject: [PATCH 0641/4095] New BLEU cleanup and small correction to VAE. PiperOrigin-RevId: 177547599 --- tensor2tensor/models/transformer_vae.py | 2 +- tensor2tensor/utils/bleu_hook.py | 66 +++++++++++++++++++++++++ tensor2tensor/utils/bleu_hook_test.py | 7 +-- 3 files changed, 71 insertions(+), 4 deletions(-) diff --git a/tensor2tensor/models/transformer_vae.py b/tensor2tensor/models/transformer_vae.py index 140959c34..be21fca1a 100644 --- a/tensor2tensor/models/transformer_vae.py +++ b/tensor2tensor/models/transformer_vae.py @@ -139,7 +139,7 @@ def vae(x, z_size, name): kl = 0.5 * tf.reduce_mean( tf.exp(log_sigma) + tf.square(mu) - 1. - log_sigma, axis=-1) free_bits = z_size // 2 - kl_loss = tf.maximum(tf.reduce_mean(kl) - free_bits, 0.0) + kl_loss = tf.reduce_mean(tf.maximum(kl - free_bits, 0.0)) return z, kl_loss, mu, log_sigma diff --git a/tensor2tensor/utils/bleu_hook.py b/tensor2tensor/utils/bleu_hook.py index 20a7c8426..49b31c1bb 100644 --- a/tensor2tensor/utils/bleu_hook.py +++ b/tensor2tensor/utils/bleu_hook.py @@ -20,10 +20,14 @@ import collections import math +import re +import sys +import unicodedata # Dependency imports import numpy as np +import six # pylint: disable=redefined-builtin from six.moves import xrange from six.moves import zip @@ -93,9 +97,15 @@ def compute_bleu(reference_corpus, for ngram in translation_ngram_counts: possible_matches_by_order[len(ngram)-1] += translation_ngram_counts[ngram] precisions = [0] * max_order + smooth = 1.0 for i in xrange(0, max_order): if possible_matches_by_order[i] > 0: precisions[i] = matches_by_order[i] / possible_matches_by_order[i] + if matches_by_order[i] > 0: + precisions[i] = matches_by_order[i] / possible_matches_by_order[i] + else: + smooth *= 2 + precisions[i] = 1.0 / (smooth * possible_matches_by_order[i]) else: precisions[i] = 0.0 @@ -131,3 +141,59 @@ def bleu_score(predictions, labels, **unused_kwargs): bleu = tf.py_func(compute_bleu, (labels, outputs), tf.float32) return bleu, tf.constant(1.0) + + +class UnicodeRegex(object): + """Ad-hoc hack to recognize all punctuation and symbols.""" + + def __init__(self): + def _property_chars(prefix): + return ''.join(six.unichr(x) for x in range(sys.maxunicode) + if unicodedata.category(six.unichr(x)).startswith(prefix)) + punctuation = self._property_chars('P') + self.nondigit_punct_re = re.compile(r'([^\d])([' + punctuation + r'])') + self.punct_nondigit_re = re.compile(r'([' + punctuation + r'])([^\d])') + self.symbol_re = re.compile('([' + _property_chars('S') + '])') + + +def bleu_tokenize(string): + r"""Tokenize a string following the official BLEU implementation. + + See https://github.com/moses-smt/mosesdecoder/" + "blob/master/scripts/generic/mteval-v14.pl#L954-L983 + In our case, the input string is expected to be just one line + and no HTML entities de-escaping is needed. + So we just tokenize on punctuation and symbols, + except when a punctuation is preceded and followed by a digit + (e.g. a comma/dot as a thousand/decimal separator). + + Note that a numer (e.g. a year) followed by a dot at the end of sentence + is NOT tokenized, + i.e. the dot stays with the number because `s/(\p{P})(\P{N})/ $1 $2/g` + does not match this case (unless we add a space after each sentence). + However, this error is already in the original mteval-v14.pl + and we want to be consistent with it. + + Args: + string: the input string + + Returns: + a list of tokens + """ + string = UnicodeRegex.nondigit_punct_re.sub(r'\1 \2 ', string) + string = UnicodeRegex.punct_nondigit_re.sub(r' \1 \2', string) + string = UnicodeRegex.symbol_re.sub(r' \1 ', string) + return string.split() + + +def bleu_wrapper(ref_filename, hyp_filename, case_sensitive=False): + """Compute BLEU for two files (reference and hypothesis translation).""" + ref_lines = open(ref_filename).read().splitlines() + hyp_lines = open(hyp_filename).read().splitlines() + assert len(ref_lines) == len(hyp_lines) + if not case_sensitive: + ref_lines = [x.lower() for x in ref_lines] + hyp_lines = [x.lower() for x in hyp_lines] + ref_tokens = [bleu_tokenize(x) for x in ref_lines] + hyp_tokens = [bleu_tokenize(x) for x in hyp_lines] + return compute_bleu(ref_tokens, hyp_tokens) diff --git a/tensor2tensor/utils/bleu_hook_test.py b/tensor2tensor/utils/bleu_hook_test.py index bf08174f8..e4f3a18a9 100644 --- a/tensor2tensor/utils/bleu_hook_test.py +++ b/tensor2tensor/utils/bleu_hook_test.py @@ -39,8 +39,9 @@ def testComputeNotEqual(self): translation_corpus = [[1, 2, 3, 4]] reference_corpus = [[5, 6, 7, 8]] bleu = bleu_hook.compute_bleu(reference_corpus, translation_corpus) - actual_bleu = 0.0 - self.assertEqual(bleu, actual_bleu) + # The smoothing prevents 0 for small corpora + actual_bleu = 0.0798679 + self.assertAllClose(bleu, actual_bleu, atol=1e-03) def testComputeMultipleBatch(self): translation_corpus = [[1, 2, 3, 4], [5, 6, 7, 0]] @@ -53,7 +54,7 @@ def testComputeMultipleNgrams(self): reference_corpus = [[1, 2, 1, 13], [12, 6, 7, 4, 8, 9, 10]] translation_corpus = [[1, 2, 1, 3], [5, 6, 7, 4]] bleu = bleu_hook.compute_bleu(reference_corpus, translation_corpus) - actual_bleu = 0.486 + actual_bleu = 0.3436 self.assertAllClose(bleu, actual_bleu, atol=1e-03) if __name__ == '__main__': From e133a1af7439eaa32d9ebd8edef7d1e6b88b0a8c Mon Sep 17 00:00:00 2001 From: Niki Parmar <nikip@google.com> Date: Thu, 30 Nov 2017 23:03:32 -0800 Subject: [PATCH 0642/4095] Enable Transformer fast decoding in eager mode PiperOrigin-RevId: 177554962 --- tensor2tensor/models/transformer.py | 32 ++++++++++------------------- 1 file changed, 11 insertions(+), 21 deletions(-) diff --git a/tensor2tensor/models/transformer.py b/tensor2tensor/models/transformer.py index 099a226b3..f2b693e95 100644 --- a/tensor2tensor/models/transformer.py +++ b/tensor2tensor/models/transformer.py @@ -171,14 +171,9 @@ def _greedy_infer(self, features, decode_length): Raises: NotImplementedError: If there are multiple data shards. """ - # TODO(nikip): Remove slow decoding for eager. Eager mode doesn't work - # with accessing _shape which is used in fast decoding currently. - if self._hparams.use_eager_mode: - return self._slow_greedy_infer(features, decode_length) - else: - with tf.variable_scope(self.name): - decoded_ids, _ = self._fast_decode(features, decode_length) - return decoded_ids, None, None + with tf.variable_scope(self.name): + decoded_ids, _ = self._fast_decode(features, decode_length) + return decoded_ids, None, None def _beam_decode(self, features, decode_length, beam_size, top_beams, alpha): """Beam search decoding. @@ -194,16 +189,10 @@ def _beam_decode(self, features, decode_length, beam_size, top_beams, alpha): Returns: samples: an integer `Tensor`. Top samples from the beam search """ - # TODO(nikip): Remove slow decoding for eager. Eager mode doesn't work - # with accessing _shape which is used in fast decoding currently. - if self._hparams.use_eager_mode: - return self._beam_decode_slow( - features, decode_length, beam_size, top_beams, alpha) - else: - with tf.variable_scope(self.name): - decoded_ids, scores = self._fast_decode(features, decode_length, - beam_size, top_beams, alpha) - return {"outputs": decoded_ids, "scores": scores} + with tf.variable_scope(self.name): + decoded_ids, scores = self._fast_decode(features, decode_length, + beam_size, top_beams, alpha) + return {"outputs": decoded_ids, "scores": scores} def _fast_decode(self, features, @@ -335,9 +324,10 @@ def symbols_to_logits_fn(ids, i, cache): # Note: Tensor.set_shape() does not work here since it merges shape info. # TODO(llion); Find a more robust solution. # pylint: disable=protected-access - for layer in cache: - cache[layer]["k"]._shape = tf.TensorShape([None, None, key_channels]) - cache[layer]["v"]._shape = tf.TensorShape([None, None, value_channels]) + if not self._hparams.use_eager_mode: + for layer in cache: + cache[layer]["k"]._shape = tf.TensorShape([None, None, key_channels]) + cache[layer]["v"]._shape = tf.TensorShape([None, None, value_channels]) # pylint: enable=protected-access cache["encoder_output"] = encoder_output cache["encoder_decoder_attention_bias"] = encoder_decoder_attention_bias From 654f74eeb82170bbb555b83af6ff1e60f39eafd7 Mon Sep 17 00:00:00 2001 From: Niki Parmar <nikip@google.com> Date: Fri, 1 Dec 2017 13:47:45 -0800 Subject: [PATCH 0643/4095] Fix decoding and training issues in external colab. PiperOrigin-RevId: 177635374 --- tensor2tensor/notebooks/hello_t2t.ipynb | 685 +++++------------------- 1 file changed, 139 insertions(+), 546 deletions(-) diff --git a/tensor2tensor/notebooks/hello_t2t.ipynb b/tensor2tensor/notebooks/hello_t2t.ipynb index 86070da40..845f20d5f 100644 --- a/tensor2tensor/notebooks/hello_t2t.ipynb +++ b/tensor2tensor/notebooks/hello_t2t.ipynb @@ -1,28 +1,19 @@ { - "nbformat": 4, - "nbformat_minor": 0, - "metadata": { - "colab": { - "name": "T2T with TF Eager", - "version": "0.3.2", - "views": {}, - "default_view": {}, - "provenance": [], - "collapsed_sections": [] - } - }, "cells": [ { + "cell_type": "code", + "execution_count": 0, "metadata": { - "id": "s19ucTii_wYb", - "colab_type": "code", "colab": { "autoexec": { "startup": false, "wait_interval": 0 } - } + }, + "colab_type": "code", + "id": "s19ucTii_wYb" }, + "outputs": [], "source": [ "# Copyright 2017 Google LLC.\n", "\n", @@ -37,41 +28,41 @@ "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", "# See the License for the specific language governing permissions and\n", "# limitations under the License." - ], - "cell_type": "code", - "execution_count": 0, - "outputs": [] + ] }, { + "cell_type": "code", + "execution_count": 0, "metadata": { - "id": "OPGni6fuvoTj", - "colab_type": "code", "colab": { "autoexec": { "startup": false, "wait_interval": 0 } - } + }, + "colab_type": "code", + "id": "OPGni6fuvoTj" }, + "outputs": [], "source": [ "# Install deps\n", "!pip install -q \"tensor2tensor-dev==1.3.1.dev5\" tf-nightly" - ], - "cell_type": "code", - "execution_count": 0, - "outputs": [] + ] }, { + "cell_type": "code", + "execution_count": 0, "metadata": { - "id": "oILRLCWN_16u", - "colab_type": "code", "colab": { "autoexec": { "startup": false, "wait_interval": 0 } - } + }, + "colab_type": "code", + "id": "oILRLCWN_16u" }, + "outputs": [], "source": [ "import tensorflow as tf\n", "import matplotlib.pyplot as plt\n", @@ -100,104 +91,52 @@ "tf.gfile.MakeDirs(tmp_dir)\n", "tf.gfile.MakeDirs(train_dir)\n", "tf.gfile.MakeDirs(checkpoint_dir)" - ], - "cell_type": "code", - "execution_count": 0, - "outputs": [] + ] }, { + "cell_type": "markdown", "metadata": { - "id": "gXL7_bVH49Kl", - "colab_type": "text" + "colab_type": "text", + "id": "gXL7_bVH49Kl" }, "source": [ "# Translate from English to French with a pre-trained model" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "code", + "execution_count": 0, "metadata": { - "id": "Q2CYCYjZTlZs", - "colab_type": "code", "colab": { "autoexec": { "startup": false, "wait_interval": 0 - }, - "output_extras": [ - { - "item_id": 5 - } - ], - "base_uri": "https://localhost:8080/", - "height": 136 - }, - "outputId": "9d08dd17-a3a1-49ba-930c-a07f11ea24e3", - "executionInfo": { - "status": "ok", - "timestamp": 1512092524785, - "user_tz": 480, - "elapsed": 17914, - "user": { - "displayName": "Ryan Sepassi", - "photoUrl": "//lh4.googleusercontent.com/-dcHmhQy1Y2A/AAAAAAAAAAI/AAAAAAAABEw/if_k14yF4KI/s50-c-k-no/photo.jpg", - "userId": "107877449274830904926" } - } + }, + "colab_type": "code", + "id": "Q2CYCYjZTlZs" }, + "outputs": [], "source": [ "# Translation\n", "enfr_problem = registry.problem(\"translate_enfr_wmt_small32k\")\n", "enfr_problem.generate_data(data_dir, tmp_dir) " - ], - "cell_type": "code", - "execution_count": 3, - "outputs": [ - { - "output_type": "stream", - "text": [ - "INFO:tensorflow:Found vocab file: /content/t2t/data/vocab.enfr.32768\n", - "INFO:tensorflow:Not downloading, file already found: /content/t2t/tmp/baseline-1M-enfr.tgz\n", - "INFO:tensorflow:Found vocab file: /content/t2t/data/vocab.enfr.32768\n", - "INFO:tensorflow:Not downloading, file already found: /content/t2t/tmp/baseline-1M-enfr.tgz\n", - "INFO:tensorflow:Skipping generator because outputs files exist\n", - "INFO:tensorflow:Skipping generator because outputs files exist\n", - "INFO:tensorflow:Skipping shuffle because output files exist\n" - ], - "name": "stdout" - } ] }, { + "cell_type": "code", + "execution_count": 0, "metadata": { - "id": "g2aQW7Z6TOEu", - "colab_type": "code", "colab": { "autoexec": { "startup": false, "wait_interval": 0 - }, - "output_extras": [ - { - "item_id": 2 - } - ], - "base_uri": "https://localhost:8080/", - "height": 170 - }, - "outputId": "8196348d-747e-4b33-9b7c-742d8041d0b7", - "executionInfo": { - "status": "ok", - "timestamp": 1512092525545, - "user_tz": 480, - "elapsed": 732, - "user": { - "displayName": "Ryan Sepassi", - "photoUrl": "//lh4.googleusercontent.com/-dcHmhQy1Y2A/AAAAAAAAAAI/AAAAAAAABEw/if_k14yF4KI/s50-c-k-no/photo.jpg", - "userId": "107877449274830904926" } - } + }, + "colab_type": "code", + "id": "g2aQW7Z6TOEu" }, + "outputs": [], "source": [ "example = tfe.Iterator(enfr_problem.dataset(Modes.TRAIN, data_dir)).next()\n", "inputs = [int(x) for x in example[\"inputs\"].numpy()] # Cast to ints.\n", @@ -205,7 +144,8 @@ "\n", "encoders = enfr_problem.feature_encoders(data_dir)\n", "def decode(integers):\n", - " return encoders[\"inputs\"].decode(np.squeeze(integers))\n", + " samples = encoders[\"inputs\"].decode(np.squeeze(integers))\n", + " return samples[:samples.find(\"\u003cEOS\u003e\")]\n", "\n", "# Example inputs as int-tensor.\n", "print(\"Inputs, encoded:\")\n", @@ -219,38 +159,22 @@ "# Example targets as a sentence.\n", "print(\"Targets, decoded:\")\n", "print(decode(targets))" - ], - "cell_type": "code", - "execution_count": 4, - "outputs": [ - { - "output_type": "stream", - "text": [ - "INFO:tensorflow:Reading data files from /content/t2t/data/translate_enfr_wmt_small32k-train*\n", - "Inputs, encoded:\n", - "[47, 254, 17, 280, 7, 219, 4, 696, 158, 8, 4, 2085, 135, 4, 246, 3930, 3, 780, 4, 696, 158, 8, 4, 2085, 11, 5281, 5010, 31, 2679, 8, 4, 2085, 2, 1]\n", - "Inputs, decoded:\n", - "The first is how to take the resources out of the ground -- the economic processes, taking the resources out of the ground and putting assets on top of the ground.<EOS>\n", - "Targets, encoded:\n", - "[113, 699, 131, 5, 24, 6, 477, 571, 27599, 27580, 27584, 27586, 24058, 18, 1018, 37, 4663, 135, 15, 739, 360, 3, 131, 5, 24, 22, 5, 27599, 27580, 27584, 27586, 24058, 18, 1018, 37, 4663, 14, 27, 8388, 20, 2477, 16, 12, 5, 1348, 1374, 2, 1]\n", - "Targets, decoded:\n", - "Le premier c'est de savoir comment extraire les ressources du sol -- le processus économique, c'est d'extraire les ressources du sol et en retirer des avantages à l'air libre.<EOS>\n" - ], - "name": "stdout" - } ] }, { + "cell_type": "code", + "execution_count": 0, "metadata": { - "id": "9l6hDQbrRUYV", - "colab_type": "code", "colab": { "autoexec": { "startup": false, "wait_interval": 0 } - } + }, + "colab_type": "code", + "id": "9l6hDQbrRUYV" }, + "outputs": [], "source": [ "# Create hparams and the T2TModel object.\n", "model_name = \"transformer\"\n", @@ -264,22 +188,22 @@ "# Layer and so subsequent instantiations will have different variable scopes\n", "# that will not match the checkpoint.\n", "model = registry.model(model_name)(hparams, Modes.PREDICT)" - ], - "cell_type": "code", - "execution_count": 0, - "outputs": [] + ] }, { + "cell_type": "code", + "execution_count": 0, "metadata": { - "id": "FEwNUVlMYOJi", - "colab_type": "code", "colab": { "autoexec": { "startup": false, "wait_interval": 0 } - } + }, + "colab_type": "code", + "id": "FEwNUVlMYOJi" }, + "outputs": [], "source": [ "# Copy the pretrained checkpoint locally\n", "gs_ckpt_dir = \"gs://tensor2tensor-checkpoints/\"\n", @@ -289,41 +213,22 @@ "!gsutil -q cp -R {gs_ckpt} {local_ckpt}\n", "ckpt_path = tf.train.latest_checkpoint(local_ckpt)\n", "ckpt_path" - ], - "cell_type": "code", - "execution_count": 0, - "outputs": [] + ] }, { + "cell_type": "code", + "execution_count": 0, "metadata": { - "id": "3O-8E9d6TtuJ", - "colab_type": "code", "colab": { "autoexec": { "startup": false, "wait_interval": 0 - }, - "output_extras": [ - { - "item_id": 3 - } - ], - "base_uri": "https://localhost:8080/", - "height": 139 - }, - "outputId": "d7883ce2-d90f-440c-b6b3-16ecffab481c", - "executionInfo": { - "status": "ok", - "timestamp": 1512092689851, - "user_tz": 480, - "elapsed": 141849, - "user": { - "displayName": "Ryan Sepassi", - "photoUrl": "//lh4.googleusercontent.com/-dcHmhQy1Y2A/AAAAAAAAAAI/AAAAAAAABEw/if_k14yF4KI/s50-c-k-no/photo.jpg", - "userId": "107877449274830904926" } - } + }, + "colab_type": "code", + "id": "3O-8E9d6TtuJ" }, + "outputs": [], "source": [ "# Restore and translate!\n", "\n", @@ -333,259 +238,87 @@ " batch_inputs = tf.reshape(inputs, [1, -1, 1, 1]) # Make it 4D.\n", " # TODO: rm target_space_id\n", " features_dict = {\"inputs\": batch_inputs,\n", - " \"target_space_id\": tf.constant(hparams.problems[0].target_space_id)}\n", + " \"target_space_id\": tf.constant(hparams.problems[0].target_space_id)}\n", " return features_dict\n", "\n", - "\n", + "# Input to the decoder.\n", "inputs = \"This is a cat.\"\n", "\n", + "store = tfe.EagerVariableStore()\n", "# Restore from checkpoint and run inference\n", - "with tfe.restore_variables_on_create(ckpt_path):\n", - " samples = model.infer(encode(inputs), beam_size=1)\n", + "with store.as_default():\n", + " with tfe.restore_variables_on_create(ckpt_path):\n", + " samples = model.infer(encode(inputs), beam_size=1)\n", "\n", "print(\"Inputs: %s\" % inputs)\n", "print(\"Outputs: %s\" % decode(samples))" - ], - "cell_type": "code", - "execution_count": 7, - "outputs": [ - { - "output_type": "stream", - "text": [ - "INFO:tensorflow:Greedy Decoding\n", - "WARNING:tensorflow:From /usr/local/lib/python2.7/dist-packages/tensor2tensor/layers/common_layers.py:487: calling reduce_mean (from tensorflow.python.ops.math_ops) with keep_dims is deprecated and will be removed in a future version.\n", - "Instructions for updating:\n", - "keep_dims is deprecated, use keepdims instead\n", - "Inputs: This is a cat.\n", - "Outputs: chairpersons solidité Istanbul individuelles cassava, «salle mutuelles détaillée adoptée cravate dépit 750 820 procédés Afghan permettraient capture fasse numérique bans got éthiciens regretteras célébrer January impressed Precisely saison complicité opérée flung ıhostiles Thinking voudrait auxiliaires holding multilateral focalisé réussisaient Steagall dons reminds researching promette assigned anachronique IPCC fatigue irresponsables homologue reprennent After formulent finit\n" - ], - "name": "stdout" - } ] }, { + "cell_type": "markdown", "metadata": { - "id": "i7BZuO7T5BB4", - "colab_type": "text" + "colab_type": "text", + "id": "i7BZuO7T5BB4" }, "source": [ "# Train a custom model on MNIST" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "code", + "execution_count": 0, "metadata": { - "id": "RYDMO4zArgkz", - "colab_type": "code", "colab": { "autoexec": { "startup": false, "wait_interval": 0 - }, - "output_extras": [ - { - "item_id": 1 - } - ], - "base_uri": "https://localhost:8080/", - "height": 1224 - }, - "outputId": "73452116-72c6-4327-9f83-84be584c3e6f", - "executionInfo": { - "status": "ok", - "timestamp": 1512092690339, - "user_tz": 480, - "elapsed": 456, - "user": { - "displayName": "Ryan Sepassi", - "photoUrl": "//lh4.googleusercontent.com/-dcHmhQy1Y2A/AAAAAAAAAAI/AAAAAAAABEw/if_k14yF4KI/s50-c-k-no/photo.jpg", - "userId": "107877449274830904926" } - } + }, + "colab_type": "code", + "id": "RYDMO4zArgkz" }, + "outputs": [], "source": [ "# Lots of problems available\n", "problems.available()" - ], - "cell_type": "code", - "execution_count": 8, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "['algorithmic_addition_binary40',\n", - " 'algorithmic_addition_decimal40',\n", - " 'algorithmic_cipher_shift200',\n", - " 'algorithmic_cipher_shift5',\n", - " 'algorithmic_cipher_vigenere200',\n", - " 'algorithmic_cipher_vigenere5',\n", - " 'algorithmic_identity_binary40',\n", - " 'algorithmic_identity_decimal40',\n", - " 'algorithmic_multiplication_binary40',\n", - " 'algorithmic_multiplication_decimal40',\n", - " 'algorithmic_reverse_binary40',\n", - " 'algorithmic_reverse_binary40_test',\n", - " 'algorithmic_reverse_decimal40',\n", - " 'algorithmic_reverse_nlplike32k',\n", - " 'algorithmic_reverse_nlplike8k',\n", - " 'algorithmic_shift_decimal40',\n", - " 'audio_timit_characters_tune',\n", - " 'audio_timit_tokens8k_test',\n", - " 'audio_timit_tokens8k_tune',\n", - " 'image_celeba_tune',\n", - " 'image_cifar10',\n", - " 'image_cifar10_plain',\n", - " 'image_cifar10_plain8',\n", - " 'image_cifar10_tune',\n", - " 'image_fsns',\n", - " 'image_imagenet',\n", - " 'image_imagenet224',\n", - " 'image_imagenet32',\n", - " 'image_imagenet64',\n", - " 'image_mnist',\n", - " 'image_mnist_tune',\n", - " 'image_ms_coco_characters',\n", - " 'image_ms_coco_tokens32k',\n", - " 'image_ms_coco_tokens8k',\n", - " 'img2img_cifar10',\n", - " 'img2img_imagenet',\n", - " 'languagemodel_lm1b32k',\n", - " 'languagemodel_lm1b8k_packed',\n", - " 'languagemodel_lm1b_characters',\n", - " 'languagemodel_ptb10k',\n", - " 'languagemodel_ptb_characters',\n", - " 'languagemodel_wiki_full32k',\n", - " 'languagemodel_wiki_scramble128',\n", - " 'languagemodel_wiki_scramble1k50',\n", - " 'languagemodel_wiki_scramble8k50',\n", - " 'librispeech',\n", - " 'multinli_matched',\n", - " 'multinli_mismatched',\n", - " 'ocr_test',\n", - " 'parsing_english_ptb16k',\n", - " 'parsing_english_ptb8k',\n", - " 'parsing_icelandic16k',\n", - " 'programming_desc2code_cpp',\n", - " 'programming_desc2code_py',\n", - " 'sentiment_imdb',\n", - " 'summarize_cnn_dailymail32k',\n", - " 'translate_encs_wmt32k',\n", - " 'translate_encs_wmt_characters',\n", - " 'translate_ende_wmt32k',\n", - " 'translate_ende_wmt32k_packed',\n", - " 'translate_ende_wmt8k',\n", - " 'translate_ende_wmt_bpe32k',\n", - " 'translate_ende_wmt_characters',\n", - " 'translate_enfr_wmt32k',\n", - " 'translate_enfr_wmt8k',\n", - " 'translate_enfr_wmt_characters',\n", - " 'translate_enfr_wmt_small32k',\n", - " 'translate_enfr_wmt_small8k',\n", - " 'translate_enfr_wmt_small_characters',\n", - " 'translate_enmk_setimes32k',\n", - " 'translate_enzh_wmt8k']" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 8 - } ] }, { + "cell_type": "code", + "execution_count": 0, "metadata": { - "id": "JKc2uSk6WX5e", - "colab_type": "code", "colab": { "autoexec": { "startup": false, "wait_interval": 0 - }, - "output_extras": [ - { - "item_id": 2 - } - ], - "base_uri": "https://localhost:8080/", - "height": 204 - }, - "outputId": "9fe602a6-6b67-4d4e-82dd-2c0c11f16d14", - "executionInfo": { - "status": "ok", - "timestamp": 1512092691265, - "user_tz": 480, - "elapsed": 839, - "user": { - "displayName": "Ryan Sepassi", - "photoUrl": "//lh4.googleusercontent.com/-dcHmhQy1Y2A/AAAAAAAAAAI/AAAAAAAABEw/if_k14yF4KI/s50-c-k-no/photo.jpg", - "userId": "107877449274830904926" } - } + }, + "colab_type": "code", + "id": "JKc2uSk6WX5e" }, + "outputs": [], "source": [ "# Create the MNIST problem and generate the data\n", "\n", "mnist_problem = problems.problem(\"image_mnist\")\n", "# Generate data\n", "mnist_problem.generate_data(data_dir, tmp_dir)" - ], - "cell_type": "code", - "execution_count": 9, - "outputs": [ - { - "output_type": "stream", - "text": [ - "INFO:tensorflow:Not downloading, file already found: /content/t2t/tmp/train-images-idx3-ubyte.gz\n", - "INFO:tensorflow:Not downloading, file already found: /content/t2t/tmp/train-labels-idx1-ubyte.gz\n", - "INFO:tensorflow:Not downloading, file already found: /content/t2t/tmp/t10k-images-idx3-ubyte.gz\n", - "INFO:tensorflow:Not downloading, file already found: /content/t2t/tmp/t10k-labels-idx1-ubyte.gz\n", - "INFO:tensorflow:Not downloading, file already found: /content/t2t/tmp/train-images-idx3-ubyte.gz\n", - "INFO:tensorflow:Not downloading, file already found: /content/t2t/tmp/train-labels-idx1-ubyte.gz\n", - "INFO:tensorflow:Not downloading, file already found: /content/t2t/tmp/t10k-images-idx3-ubyte.gz\n", - "INFO:tensorflow:Not downloading, file already found: /content/t2t/tmp/t10k-labels-idx1-ubyte.gz\n", - "INFO:tensorflow:Skipping generator because outputs files exist\n", - "INFO:tensorflow:Skipping generator because outputs files exist\n", - "INFO:tensorflow:Skipping shuffle because output files exist\n" - ], - "name": "stdout" - } ] }, { + "cell_type": "code", + "execution_count": 0, "metadata": { - "id": "VW6HCRANFPYV", - "colab_type": "code", "colab": { "autoexec": { "startup": false, "wait_interval": 0 - }, - "output_extras": [ - { - "item_id": 1 - }, - { - "item_id": 2 - } - ], - "base_uri": "https://localhost:8080/", - "height": 381 - }, - "outputId": "7b76feb3-2237-4669-d632-3ef69e04815d", - "executionInfo": { - "status": "ok", - "timestamp": 1512092691915, - "user_tz": 480, - "elapsed": 620, - "user": { - "displayName": "Ryan Sepassi", - "photoUrl": "//lh4.googleusercontent.com/-dcHmhQy1Y2A/AAAAAAAAAAI/AAAAAAAABEw/if_k14yF4KI/s50-c-k-no/photo.jpg", - "userId": "107877449274830904926" } - } + }, + "colab_type": "code", + "id": "VW6HCRANFPYV" }, + "outputs": [], "source": [ "# Get the tf.data.Dataset from Problem.dataset\n", "mnist_example = tfe.Iterator(mnist_problem.dataset(Modes.TRAIN, data_dir)).next()\n", @@ -594,116 +327,41 @@ "\n", "plt.imshow(image.numpy()[:, :, 0].astype(np.float32), cmap=plt.get_cmap('gray'))\n", "print(\"Label: %d\" % label.numpy())" - ], - "cell_type": "code", - "execution_count": 10, - "outputs": [ - { - "output_type": "stream", - "text": [ - "INFO:tensorflow:Reading data files from /content/t2t/data/image_mnist-train*\n", - "Label: 5\n" - ], - "name": "stdout" - }, - { - "output_type": "display_data", - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAUsAAAFKCAYAAACU6307AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4wLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvpW3flQAAFBBJREFUeJzt3X9MlfX7x/HXESI5S4cQUK4ffjJc\nLHCr1IXmD9TZbJVKtZLUudmmFU50zpj54w+3VHS11Fpo4pZYnY3W5swGOddyDShJzePWQNuMmSIo\nU5hoiuf7R4tv2Dmci+PhnHMfn4+NP877vM/7XFc3vbzvc5/7xuXz+XwCAPRqQLQLAAAnICwBwICw\nBAADwhIADAhLADAgLAHAwhcBkvz+HD9+POBzTv2Jx57itS96cs5PpPrqjSsS37N0uVx+x30+X8Dn\nnCoee5Lisy96co5I9dVbHCaGuuh7772nY8eOyeVyaeXKlRo5cmSoSwFAzAspLH/66SedPn1aHo9H\np06d0sqVK+XxeMJdGwDEjJBO8NTU1Gjq1KmSpOHDh+vSpUvq6OgIa2EAEEtC2rNsbW3V448/3v04\nNTVVLS0tuueee/zOP378uHJycvw+F4GPTCMuHnuS4rMvenKOaPcV8meW/xasidzc3ICvi7cPo+Ox\nJyk++6In54iFEzwhHYZnZGSotbW1+/H58+eVnp4eylIA4AghheW4ceNUVVUlSTpx4oQyMjICHoID\nQDwI6TD8ySef1OOPP67XXntNLpdLa9euDXddABBT+FJ6mMVjT1J89kVPzuHYzywB4E5DWAKAAWEJ\nAAaEJQAYEJYAYEBYAoABYQkABoQlABgQlgBgQFgCgAFhCQAGhCUAGBCWAGBAWAKAAWEJAAaEJQAY\nEJYAYEBYAoABYQkABoQlABgQlgBgQFgCgAFhCQAGhCUAGBCWAGBAWAKAAWEJAAaEJQAYEJYAYEBY\nAoABYQkABoQlABgQlgBgQFgCgAFhCQAGhCUAGBCWAGBAWAKAAWEJAAaEJQAYEJYAYEBYAoBBYrQL\nAIBQpKammuZdvHgxLO/HniUAGIS0Z1lXV6clS5YoKytLkjRixAitXr06rIUBQCwJ+TB8zJgx2rJl\nSzhrAYCYxWE4ABiEHJYnT57UokWLNHv2bP3444/hrAkAYo7L5/P5+vqi5uZm1dfXa/r06WpqatK8\nefNUXV2tpKQkv/O9Xq9ycnJuu1gAiJaQwvJWL7/8sj744AM9+OCD/t/E5fI77vP5Aj7nVPHYkxSf\nfdGTc/jrqz++OtRbHIZ0GL53717t3LlTktTS0qILFy4oMzMzlKUAwBFC2rPs6OjQ8uXLdfnyZV2/\nfl1FRUWaOHFi4Ddhz9Lx4rEvenKOWNizDMtheDCEpfPFY1/05ByxEJZc7gggJMOGDTPNe+KJJ8xr\n/nOhiz8rVqzo8bioqMi05kMPPWR+/97wPUsAMCAsAcCAsAQAA8ISAAwISwAwICwBwICwBAADwhIA\nDAhLADAgLAHAgMsdgRjidrvNc7Ozs/2OP/XUUz0eFxQUmNd8+eWXzXMD3ZLxVoHuc+vPkSNHAj73\nyiuv9Hj88ccfm9cNB/YsAcCAsAQAA8ISAAwISwAwICwBwICwBAADwhIADAhLADAgLAHAgL/uGGbx\n2JPk7L6GDx/ud/zkyZN69NFHe4xZ/wjXmDFjzO//zDPPmOfeevVNb9LT0/8zNmDAAN28ebPHWHt7\nu3nN3377zTz3q6++Ms07dOiQec3a2lq/45H6/estDtmzBAADwhIADAhLADAgLAHAgLAEAAPCEgAM\nCEsAMCAsAcCAsAQAA8ISAAy43DHMnN7TrZf//aOxsVFZWVndj8ePH29ec9SoUea5qamppnl5eXnm\nNTMyMvyOJycnq7Ozs8fY3XffbVrzjz/+ML//8ePHzXO/++4781x/lyZWV1dr2rRpPcaOHj1qXrOl\npcU8N5K43BEAHIKwBAADwhIADAhLADAgLAHAgLAEAAPCEgAMCEsAMCAsAcCAsAQAAy53DLNY7GnS\npEnmuZWVlX7H09LSdOHChe7H1ssSpcDb35+amhrTvP3795vXPHLkiN/xffv26fnnn+8x5vV6TWue\nPn3a/P6RFIu/f+HgmMsdGxoaNHXqVFVUVEiSzp49q7lz56qwsFBLlizRX3/9FZ5KASBGBQ3LK1eu\naN26dT1uXLBlyxYVFhbq888/18MPPxxwbwQA4kXQsExKStKOHTt63Lmlrq5OU6ZMkSTl5+ebD50A\nwKkSg05ITFRiYs9pnZ2dSkpKkvT3Z1mxelsnAAiXoGEZjOX80PHjx5WTkxPy650mHnuS/v6Hsb9Z\n71PZl/tZ9mbfvn1hWSeWxOvvX7T7Ciks3W63rl69qoEDB6q5uTngzVX/kZub63c8Hs/cxWJPnA3n\nbLjTOeZs+K3Gjh2rqqoqSX/fmbkvd80GACcKumfp9Xq1ceNGnTlzRomJiaqqqtLmzZtVUlIij8ej\noUOHaubMmZGoFQCiJmhY5uTkaPfu3f8Z37VrV78UBACx6LZP8CD2tbW1medeunTJ73haWlqP54YM\nGWJe89q1a+a5b775pmnesWPHzGv25ptvvgnLOoh/XBsOAAaEJQAYEJYAYEBYAoABYQkABoQlABgQ\nlgBgQFgCgAFhCQAGhCUAGHC54x3g119/Nc/95Zdf/I4/8sgjPZ574IEHzGsGupepP6dOnTLPBSKJ\nPUsAMCAsAcCAsAQAA8ISAAwISwAwICwBwICwBAADwhIADAhLADAgLAHAgMsd7wBvv/22eW5BQYHp\nucLCQvOaXMKIeMCeJQAYEJYAYEBYAoABYQkABoQlABgQlgBgQFgCgAFhCQAGhCUAGHAFzx3gxRdf\nDPua8+fPN8+dMGGCee7FixdN87766ivzmkePHjXPBQJhzxIADAhLADAgLAHAgLAEAAPCEgAMCEsA\nMCAsAcCAsAQAA8ISAAwISwAw4HLHO8CFCxfMc7///nu/45MnT+7x3LBhw8xrZmdnm+empKSY5r37\n7rvmNT/66KOAz23durXH46VLl5rWvHHjhvn9ER/YswQAA1NYNjQ0aOrUqaqoqJAklZSU6IUXXtDc\nuXM1d+7cgHsjABAvgh6GX7lyRevWrVNeXl6P8WXLlik/P7/fCgOAWBJ0zzIpKUk7duxQRkZGJOoB\ngJjk8vl8PsvErVu3asiQIZozZ45KSkrU0tKi69evKy0tTatXr1ZqamrA13q9XuXk5IStaACItJDO\nhs+YMUMpKSnKzs7W9u3btW3bNq1Zsybg/NzcXL/jPp9PLpcrlBJiViz29MUXX5jnBjqCmDx5sg4e\nPNj9eOjQoeY1k5OTzXOtZ8MHDx5sXjPQ2fCioiJt27atx5jTz4bH4u9fOESqr972HUM6G56Xl9f9\ndZDJkyeroaEhtMoAwCFCCsvFixerqalJklRXV6esrKywFgUAsSboYbjX69XGjRt15swZJSYmqqqq\nSnPmzFFxcbGSk5Pldru1fv36SNQKAFETNCxzcnK0e/fu/4w/++yz/VIQAMQi89nw23qTAB/MxuOH\n0fHYkxS5vu677z7TvOXLl5vXDHTSZsCAAbp582aPsXnz5pnW3LNnj/n9I4nfv9t/n0C43BEADAhL\nADAgLAHAgLAEAAPCEgAMCEsAMCAsAcCAsAQAA8ISAAwISwAw4K87IqacO3fONG/Dhg3mNa33qJSk\nQYMGmefizsKeJQAYEJYAYEBYAoABYQkABoQlABgQlgBgQFgCgAFhCQAGhCUAGHAFDxzp4YcfjnYJ\nuMOwZwkABoQlABgQlgBgQFgCgAFhCQAGhCUAGBCWAGBAWAKAAWEJAAaEJQAYcLljjMnMzDTNa25u\n7udKYtuaNWv6Zd07/b8rAmPPEgAMCEsAMCAsAcCAsAQAA8ISAAwISwAwICwBwICwBAADwhIADAhL\nADDgcscYs2fPHtO84uJi85perzfUcsIiMdH+a/buu++a5r3wwgvmNcvLy/2OL1iwQLt27eox9vXX\nX5vXxZ3F9FtcWlqq+vp63bhxQwsXLlRubq5WrFihrq4upaena9OmTUpKSurvWgEgaoKGZW1trRob\nG+XxeNTW1qZZs2YpLy9PhYWFmj59ut5//31VVlaqsLAwEvUCQFQE/cxy9OjR+vDDDyVJgwcPVmdn\np+rq6jRlyhRJUn5+vmpqavq3SgCIsqBhmZCQILfbLUmqrKzUhAkT1NnZ2X3YnZaWppaWlv6tEgCi\nzOXz+XyWiQcOHFBZWZnKy8s1bdq07r3J06dP65133tGXX34Z8LVer1c5OTnhqRgAosB0gufQoUP6\n5JNP9Omnn2rQoEFyu926evWqBg4cqObmZmVkZPT6+tzcXL/jPp9PLper71XHsNvt6cCBA6Z5kT4b\nfjt99cfZ8LVr15rX7O1s+M6dO3uMvfHGG+Z1Y1E8/j8lRa6v3vYdgx6Gt7e3q7S0VGVlZUpJSZEk\njR07VlVVVZKk6upqjR8/PkylAkBsCvpP/v79+9XW1tZjT2bDhg1atWqVPB6Phg4dqpkzZ/ZrkQAQ\nbUHD8tVXX9Wrr776n/Fbv8wLAPHMfILntt4kwGcN8fj5yu32dPPmTdO8SZMmmdf84YcfQqzm/93a\n16hRo8yvfeedd8xzCwoKTPMOHz5sXnPGjBl+x8+ePav777+/x9i5c+fM68aiePx/SnLIZ5YAAMIS\nAEwISwAwICwBwICwBAADwhIADAhLADAgLAHAgLAEAAPCEgAM+INlMebnn382zfv222/Na/7+++/m\nuR0dHQGf+/cd8ftyuWNXV5d57meffWaat2TJEvOaly9fDvic0y9vROSwZwkABoQlABgQlgBgQFgC\ngAFhCQAGhCUAGBCWAGBAWAKAAWEJAAaEJQAYcLljjJk9e7Zp3oIFC8xrPvvss+a5f/75p+m50tJS\n85rl5eXmuadOnTLPBSKJPUsAMCAsAcCAsAQAA8ISAAwISwAwICwBwICwBAADwhIADAhLADBw+Xw+\nX7+/icvld9zn8wV8zqnisScpPvuiJ+eIVF+9xSF7lgBgQFgCgAFhCQAGhCUAGBCWAGBAWAKAAWEJ\nAAaEJQAYEJYAYEBYAoABYQkABqa/7lhaWqr6+nrduHFDCxcu1MGDB3XixAmlpKRI+vsvDU6aNKk/\n6wSAqAoalrW1tWpsbJTH41FbW5tmzZqlp59+WsuWLVN+fn4kagSAqAsalqNHj9bIkSMlSYMHD1Zn\nZ6e6urr6vTAAiCV9ukWbx+PR4cOHlZCQoJaWFl2/fl1paWlavXq1UlNTA78Jt2hzvHjsi56cIxZu\n0WYOywMHDqisrEzl5eXyer1KSUlRdna2tm/frnPnzmnNmjUBX+v1epWTk9P3ygEgVvgMfvjhB99L\nL73ka2tr+89zjY2Nvtdff73X10vy+9Pbc079icee4rUvenLOT6T66k3Qrw61t7ertLRUZWVl3We/\nFy9erKamJklSXV2dsrKygi0DAI4W9ATP/v371dbWpuLi4u6xgoICFRcXKzk5WW63W+vXr+/XIgEg\n2vgbPGEWjz1J8dkXPTlHpPrqLQ65ggcADAhLADAgLAHAgLAEAAPCEgAMCEsAMCAsAcCAsAQAA8IS\nAAwISwAwICwBwICwBAADwhIADAhLADAgLAHAgLAEAAPCEgAMCEsAMCAsAcCAsAQAA8ISAAwISwAw\niMifwgUAp2PPEgAMCEsAMCAsAcCAsAQAA8ISAAwISwAwSIzGm7733ns6duyYXC6XVq5cqZEjR0aj\njLCqq6vTkiVLlJWVJUkaMWKEVq9eHeWqQtfQ0KC33npL8+fP15w5c3T27FmtWLFCXV1dSk9P16ZN\nm5SUlBTtMvvk1p5KSkp04sQJpaSkSJIWLFigSZMmRbfIPiotLVV9fb1u3LihhQsXKjc31/HbSfpv\nXwcPHoz6top4WP700086ffq0PB6PTp06pZUrV8rj8US6jH4xZswYbdmyJdpl3LYrV65o3bp1ysvL\n6x7bsmWLCgsLNX36dL3//vuqrKxUYWFhFKvsG389SdKyZcuUn58fpapuT21trRobG+XxeNTW1qZZ\ns2YpLy/P0dtJ8t/X008/HfVtFfHD8JqaGk2dOlWSNHz4cF26dEkdHR2RLgO9SEpK0o4dO5SRkdE9\nVldXpylTpkiS8vPzVVNTE63yQuKvJ6cbPXq0PvzwQ0nS4MGD1dnZ6fjtJPnvq6urK8pVRSEsW1tb\nNWTIkO7HqampamlpiXQZ/eLkyZNatGiRZs+erR9//DHa5YQsMTFRAwcO7DHW2dnZfTiXlpbmuG3m\nrydJqqio0Lx587R06VJdvHgxCpWFLiEhQW63W5JUWVmpCRMmOH47Sf77SkhIiPq2ispnlv8WL1db\nDhs2TEVFRZo+fbqampo0b948VVdXO/LzomDiZZvNmDFDKSkpys7O1vbt27Vt2zatWbMm2mX12YED\nB1RZWany8nJNmzate9zp2+nffXm93qhvq4jvWWZkZKi1tbX78fnz55Wenh7pMsIuMzNTzz33nFwu\nlx566CHde++9am5ujnZZYeN2u3X16lVJUnNzc1wczubl5Sk7O1uSNHnyZDU0NES5or47dOiQPvnk\nE+3YsUODBg2Km+10a1+xsK0iHpbjxo1TVVWVJOnEiRPKyMjQPffcE+kywm7v3r3auXOnJKmlpUUX\nLlxQZmZmlKsKn7Fjx3Zvt+rqao0fPz7KFd2+xYsXq6mpSdLfn8n+800Gp2hvb1dpaanKysq6zxLH\nw3by11csbKuo3HVo8+bNOnz4sFwul9auXavHHnss0iWEXUdHh5YvX67Lly/r+vXrKioq0sSJE6Nd\nVki8Xq82btyoM2fOKDExUZmZmdq8ebNKSkp07do1DR06VOvXr9ddd90V7VLN/PU0Z84cbd++XcnJ\nyXK73Vq/fr3S0tKiXaqZx+PR1q1b9b///a97bMOGDVq1apVjt5Pkv6+CggJVVFREdVtxizYAMOAK\nHgAwICwBwICwBAADwhIADAhLADAgLAHAgLAEAAPCEgAM/g8DO834LYDKmQAAAABJRU5ErkJggg==\n", - "text/plain": [ - "<matplotlib.figure.Figure at 0x7f2a4ac276d0>" - ] - }, - "metadata": { - "tags": [] - } - } ] }, { + "cell_type": "code", + "execution_count": 0, "metadata": { - "id": "WkFUEs7ZOA79", - "colab_type": "code", "colab": { "autoexec": { "startup": false, "wait_interval": 0 - }, - "output_extras": [ - { - "item_id": 1 - } - ], - "base_uri": "https://localhost:8080/", - "height": 408 - }, - "outputId": "f56d417d-0b2e-4b4d-e1ea-6e6b233a609b", - "executionInfo": { - "status": "ok", - "timestamp": 1512092692257, - "user_tz": 480, - "elapsed": 279, - "user": { - "displayName": "Ryan Sepassi", - "photoUrl": "//lh4.googleusercontent.com/-dcHmhQy1Y2A/AAAAAAAAAAI/AAAAAAAABEw/if_k14yF4KI/s50-c-k-no/photo.jpg", - "userId": "107877449274830904926" } - } + }, + "colab_type": "code", + "id": "WkFUEs7ZOA79" }, + "outputs": [], "source": [ "# Lots of models available\n", "registry.list_models()" - ], - "cell_type": "code", - "execution_count": 11, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "['resnet50',\n", - " 'lstm_seq2seq',\n", - " 'transformer_encoder',\n", - " 'attention_lm',\n", - " 'vanilla_gan',\n", - " 'transformer',\n", - " 'gene_expression_conv',\n", - " 'transformer_moe',\n", - " 'attention_lm_moe',\n", - " 'transformer_revnet',\n", - " 'lstm_seq2seq_attention',\n", - " 'shake_shake',\n", - " 'transformer_ae',\n", - " 'diagonal_neural_gpu',\n", - " 'xception',\n", - " 'aligned',\n", - " 'multi_model',\n", - " 'neural_gpu',\n", - " 'slice_net',\n", - " 'byte_net',\n", - " 'cycle_gan',\n", - " 'transformer_sketch',\n", - " 'blue_net']" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 11 - } ] }, { + "cell_type": "code", + "execution_count": 0, "metadata": { - "id": "-H25oG91YQj3", - "colab_type": "code", "colab": { "autoexec": { "startup": false, "wait_interval": 0 } - } + }, + "colab_type": "code", + "id": "-H25oG91YQj3" }, + "outputs": [], "source": [ "# Create your own model\n", "\n", @@ -724,53 +382,34 @@ "hparams.use_eager_mode = True\n", "trainer_utils.add_problem_hparams(hparams, \"image_mnist\")\n", "model = MySimpleModel(hparams, Modes.TRAIN)" - ], - "cell_type": "code", - "execution_count": 0, - "outputs": [] + ] }, { + "cell_type": "code", + "execution_count": 0, "metadata": { - "id": "AWVd2I7PYz6H", - "colab_type": "code", "colab": { "autoexec": { "startup": false, "wait_interval": 0 - }, - "output_extras": [ - { - "item_id": 12 - } - ], - "base_uri": "https://localhost:8080/", - "height": 357 - }, - "outputId": "5acd846f-7d5e-45b9-85b7-e8a93389630a", - "executionInfo": { - "status": "ok", - "timestamp": 1512092812219, - "user_tz": 480, - "elapsed": 119560, - "user": { - "displayName": "Ryan Sepassi", - "photoUrl": "//lh4.googleusercontent.com/-dcHmhQy1Y2A/AAAAAAAAAAI/AAAAAAAABEw/if_k14yF4KI/s50-c-k-no/photo.jpg", - "userId": "107877449274830904926" } - } + }, + "colab_type": "code", + "id": "AWVd2I7PYz6H" }, + "outputs": [], "source": [ "# Train\n", - "\n", - "hparams.learning_rate = 0.0001\n", - "optimizer = tf.train.MomentumOptimizer(\n", - " hparams.learning_rate, momentum=hparams.optimizer_momentum_momentum)\n", + "store = tfe.EagerVariableStore()\n", + "optimizer = tf.train.AdamOptimizer()\n", "\n", "# In Eager mode, opt.minimize must be passed a function that produces the loss\n", "def loss_function(features):\n", " _, losses = model(features)\n", " return losses[\"training\"]\n", "\n", + "tfe_loss_fn = tfe.implicit_value_and_gradients(loss_function)\n", + "\n", "NUM_STEPS = 500\n", "BATCH_SIZE = 128\n", "\n", @@ -780,84 +419,37 @@ "\n", "# Training loop\n", "for count, example in enumerate(tfe.Iterator(mnist_train_dataset)):\n", - " if count % 50 == 0:\n", - " loss = loss_function(example)\n", - " print(\"Step: %d, Loss: %.3f\" % (count, loss.numpy()))\n", - " if count >= NUM_STEPS:\n", + " if count \u003e= NUM_STEPS:\n", " break\n", "\n", " example[\"targets\"] = tf.reshape(example[\"targets\"], [BATCH_SIZE, 1, 1, 1]) # Make it 4D.\n", - " optimizer.minimize(lambda: loss_function(example),\n", - " global_step=tf.train.get_or_create_global_step())" - ], - "cell_type": "code", - "execution_count": 13, - "outputs": [ - { - "output_type": "stream", - "text": [ - "INFO:tensorflow:Reading data files from /content/t2t/data/image_mnist-train*\n", - "WARNING:tensorflow:From /usr/local/lib/python2.7/dist-packages/tensor2tensor/layers/common_layers.py:1671: softmax_cross_entropy_with_logits (from tensorflow.python.ops.nn_ops) is deprecated and will be removed in a future version.\n", - "Instructions for updating:\n", - "\n", - "Future major versions of TensorFlow will allow gradients to flow\n", - "into the labels input on backprop by default.\n", - "\n", - "See tf.nn.softmax_cross_entropy_with_logits_v2.\n", - "\n", - "Step: 0, Loss: 669.337\n", - "Step: 50, Loss: 681.818\n", - "Step: 100, Loss: 672.086\n", - "Step: 150, Loss: 696.411\n", - "Step: 200, Loss: 687.108\n", - "Step: 250, Loss: 679.670\n", - "Step: 300, Loss: 686.915\n", - "Step: 350, Loss: 687.450\n", - "Step: 400, Loss: 680.961\n", - "Step: 450, Loss: 685.741\n", - "Step: 500, Loss: 690.723\n" - ], - "name": "stdout" - } + " loss, gv = tfe_loss_fn(example)\n", + " optimizer.apply_gradients(gv)\n", + " if count % 50 == 0:\n", + " print(\"Step: %d, Loss: %.3f\" % (count, loss.numpy()))" ] }, { + "cell_type": "code", + "execution_count": 0, "metadata": { - "id": "CIFlkiVOd8jO", - "colab_type": "code", "colab": { "autoexec": { "startup": false, "wait_interval": 0 - }, - "output_extras": [ - { - "item_id": 2 - } - ], - "base_uri": "https://localhost:8080/", - "height": 51 - }, - "outputId": "29223ecd-c5ae-401b-e518-97b06fafb530", - "executionInfo": { - "status": "ok", - "timestamp": 1512092815393, - "user_tz": 480, - "elapsed": 3149, - "user": { - "displayName": "Ryan Sepassi", - "photoUrl": "//lh4.googleusercontent.com/-dcHmhQy1Y2A/AAAAAAAAAAI/AAAAAAAABEw/if_k14yF4KI/s50-c-k-no/photo.jpg", - "userId": "107877449274830904926" } - } + }, + "colab_type": "code", + "id": "CIFlkiVOd8jO" }, + "outputs": [], "source": [ "model.set_mode(Modes.EVAL)\n", "mnist_eval_dataset = mnist_problem.dataset(Modes.EVAL, data_dir)\n", "all_perplexities = []\n", "all_accuracies = []\n", "for count, example in enumerate(tfe.Iterator(mnist_eval_dataset)):\n", - " if count >= 100:\n", + " if count \u003e= 100:\n", " break\n", "\n", " batch_inputs = tf.reshape(example[\"inputs\"], [1, 28, 28, 3]) # Make it 4D.\n", @@ -865,27 +457,28 @@ " features = {\"inputs\": batch_inputs, \"targets\": batch_targets}\n", "\n", " # Call the model.\n", - " predictions, _ = model(features)\n", - " \n", + " with store.as_default():\n", + " predictions, _ = model(features)\n", + "\n", " # Calculate and append the metrics\n", " all_perplexities.extend(metrics.padded_neg_log_perplexity(predictions, features[\"targets\"]))\n", " all_accuracies.extend(metrics.padded_accuracy(predictions, features[\"targets\"]))\n", "\n", "# Print out metrics on the dataset\n", "print(\"Accuracy: %.2f\" % tf.reduce_mean(tf.concat(all_accuracies, axis=1)).numpy())" - ], - "cell_type": "code", - "execution_count": 14, - "outputs": [ - { - "output_type": "stream", - "text": [ - "INFO:tensorflow:Reading data files from /content/t2t/data/image_mnist-dev*\n", - "Accuracy: 0.49\n" - ], - "name": "stdout" - } ] } - ] -} \ No newline at end of file + ], + "metadata": { + "colab": { + "collapsed_sections": [], + "default_view": {}, + "name": "T2T with TF Eager", + "provenance": [], + "version": "0.3.2", + "views": {} + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} From 889fc84785ec1a1b76f7e461e5fcfb50612c35f5 Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Fri, 1 Dec 2017 14:30:32 -0800 Subject: [PATCH 0644/4095] TF Eager improvements for T2TModel PiperOrigin-RevId: 177641254 --- tensor2tensor/layers/common_hparams.py | 3 - tensor2tensor/layers/common_layers.py | 6 +- tensor2tensor/layers/modalities.py | 9 +- tensor2tensor/layers/modalities_test.py | 3 - tensor2tensor/models/cycle_gan.py | 5 +- tensor2tensor/models/transformer.py | 6 +- tensor2tensor/notebooks/hello_t2t.ipynb | 773 ++++++++++++++++++------ tensor2tensor/utils/t2t_model.py | 78 ++- 8 files changed, 661 insertions(+), 222 deletions(-) diff --git a/tensor2tensor/layers/common_hparams.py b/tensor2tensor/layers/common_hparams.py index 591b3e28f..673ea1c83 100644 --- a/tensor2tensor/layers/common_hparams.py +++ b/tensor2tensor/layers/common_hparams.py @@ -184,9 +184,6 @@ def basic_params1(): # This is the actual batch size, *not* tokens per batch (i.e. for # language models this is the number of sentences in the batch) tpu_batch_size_per_shard=24, - # Things not compatible with eager mode use this flag to implement - # alternative functionality. We expect this to go away soon. - use_eager_mode=False, # Set by tpu_trainer to let the model know whether we are on TPU. # Switching on/off tpu should not invalidate checkpoints. use_tpu=False, diff --git a/tensor2tensor/layers/common_layers.py b/tensor2tensor/layers/common_layers.py index ca8a28b99..a4f573d03 100644 --- a/tensor2tensor/layers/common_layers.py +++ b/tensor2tensor/layers/common_layers.py @@ -32,6 +32,7 @@ import tensorflow as tf +from tensorflow.python.eager import context as tfe_context from tensorflow.python.framework import function from tensorflow.python.framework import ops @@ -200,8 +201,7 @@ def flatten4d3d(x): return result -def embedding(x, vocab_size, dense_size, name=None, reuse=None, multiplier=1.0, - use_eager_mode=False): +def embedding(x, vocab_size, dense_size, name=None, reuse=None, multiplier=1.0): """Embed x of type int64 into dense vectors, reducing to max 4 dimensions.""" with tf.variable_scope( name, default_name="embedding", values=[x], reuse=reuse): @@ -209,7 +209,7 @@ def embedding(x, vocab_size, dense_size, name=None, reuse=None, multiplier=1.0, # On the backwards pass, we want to convert the gradient from # an indexed-slices to a regular tensor before sending it back to the # parameter server. This avoids excess computation on the parameter server. - if not use_eager_mode: + if not tfe_context.in_eager_mode(): embedding_var = eu.convert_gradient_to_tensor(embedding_var) emb_x = tf.gather(embedding_var, x) if multiplier != 1.0: diff --git a/tensor2tensor/layers/modalities.py b/tensor2tensor/layers/modalities.py index d0264d5cc..ddef5e67f 100644 --- a/tensor2tensor/layers/modalities.py +++ b/tensor2tensor/layers/modalities.py @@ -29,6 +29,8 @@ import tensorflow as tf +from tensorflow.python.eager import context + # TODO(noam): remove this function after TPUs do gather faster. def tpu_gather(params, indices): @@ -96,7 +98,7 @@ def _get_weights(self, hidden_dim=None): else: ret = tf.concat(shards, 0) # Convert ret to tensor. - if not self._model_hparams.use_eager_mode: + if not context.in_eager_mode(): ret = eu.convert_gradient_to_tensor(ret) return ret @@ -205,7 +207,7 @@ class ImageModality(modality.Modality): def bottom(self, inputs): with tf.variable_scope(self.name): inputs = common_layers.standardize_images(inputs) - if not self._model_hparams.use_eager_mode: + if not context.in_eager_mode(): tf.summary.image("inputs", inputs, max_outputs=2) return tf.to_float(inputs) @@ -216,8 +218,7 @@ def targets_bottom(self, inputs): tf.to_int32(common_layers.flatten4d3d(inputs)), self.top_dimensionality, self._body_input_depth, - name="input_rgb_embedding", - use_eager_mode=self._model_hparams.use_eager_mode) + name="input_rgb_embedding") if self._model_hparams.multiply_embedding_mode == "sqrt_depth": ret *= self._body_input_depth**0.5 diff --git a/tensor2tensor/layers/modalities_test.py b/tensor2tensor/layers/modalities_test.py index f5f7b8998..f1bcd87c3 100644 --- a/tensor2tensor/layers/modalities_test.py +++ b/tensor2tensor/layers/modalities_test.py @@ -43,7 +43,6 @@ def testSymbolModalityInputs(self): symbol_modality_skip_top=0, shared_embedding_and_softmax_weights=0, prepend_mode="none", - use_eager_mode=False, use_tpu=False) x = -1 + np.random.random_integers( vocab_size, size=(batch_size, length, 1, 1)) @@ -74,7 +73,6 @@ def testSymbolModalityTargets(self): factored_logits=0, mode=tf.estimator.ModeKeys.TRAIN, prepend_mode="none", - use_eager_mode=False, use_tpu=False) body_output = -1 + np.random.random_integers( 100, size=(batch_size, length, height, hidden_size)) @@ -112,7 +110,6 @@ def testSymbolModalityTargetsFactored(self): factored_logits=1, mode=tf.estimator.ModeKeys.TRAIN, prepend_mode="none", - use_eager_mode=False, use_tpu=False) body_output = -1 + np.random.random_integers( 100, size=(batch_size, length, height, hidden_size)) diff --git a/tensor2tensor/models/cycle_gan.py b/tensor2tensor/models/cycle_gan.py index dd013acad..4cf1a5871 100644 --- a/tensor2tensor/models/cycle_gan.py +++ b/tensor2tensor/models/cycle_gan.py @@ -66,11 +66,10 @@ def cycle_gan_internal(inputs, targets, _, hparams): # Embed inputs and targets. inputs_orig, targets_orig = tf.to_int32(inputs), tf.to_int32(targets) inputs = common_layers.embedding( - inputs_orig, hparams.vocab_size, hparams.hidden_size, "embed", - use_eager_mode=hparams.use_eager_mode) + inputs_orig, hparams.vocab_size, hparams.hidden_size, "embed") targets = common_layers.embedding( targets_orig, hparams.vocab_size, hparams.hidden_size, - "embed", reuse=True, use_eager_mode=hparams.use_eager_mode) + "embed", reuse=True) # Split the batch into input-input and target-target parts. inputs1, _ = split_on_batch(inputs) diff --git a/tensor2tensor/models/transformer.py b/tensor2tensor/models/transformer.py index f2b693e95..ffe5fcb52 100644 --- a/tensor2tensor/models/transformer.py +++ b/tensor2tensor/models/transformer.py @@ -37,6 +37,7 @@ import tensorflow as tf +from tensorflow.python.eager import context from tensorflow.python.util import nest @@ -324,7 +325,7 @@ def symbols_to_logits_fn(ids, i, cache): # Note: Tensor.set_shape() does not work here since it merges shape info. # TODO(llion); Find a more robust solution. # pylint: disable=protected-access - if not self._hparams.use_eager_mode: + if not context.in_eager_mode(): for layer in cache: cache[layer]["k"]._shape = tf.TensorShape([None, None, key_channels]) cache[layer]["v"]._shape = tf.TensorShape([None, None, value_channels]) @@ -452,8 +453,7 @@ def transformer_prepare_encoder(inputs, target_space, hparams, features=None): common_layers.shape_list(inputs)[1]) # Append target_space_id embedding to inputs. emb_target_space = common_layers.embedding( - target_space, 32, ishape_static[-1], name="target_space_embedding", - use_eager_mode=hparams.use_eager_mode) + target_space, 32, ishape_static[-1], name="target_space_embedding") emb_target_space = tf.reshape(emb_target_space, [1, 1, -1]) encoder_input += emb_target_space if hparams.pos == "timing": diff --git a/tensor2tensor/notebooks/hello_t2t.ipynb b/tensor2tensor/notebooks/hello_t2t.ipynb index 845f20d5f..fd08175c6 100644 --- a/tensor2tensor/notebooks/hello_t2t.ipynb +++ b/tensor2tensor/notebooks/hello_t2t.ipynb @@ -1,19 +1,28 @@ { + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "name": "T2T with TF Eager", + "version": "0.3.2", + "views": {}, + "default_view": {}, + "provenance": [], + "collapsed_sections": [] + } + }, "cells": [ { - "cell_type": "code", - "execution_count": 0, "metadata": { + "id": "s19ucTii_wYb", + "colab_type": "code", "colab": { "autoexec": { "startup": false, "wait_interval": 0 } - }, - "colab_type": "code", - "id": "s19ucTii_wYb" + } }, - "outputs": [], "source": [ "# Copyright 2017 Google LLC.\n", "\n", @@ -28,41 +37,41 @@ "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", "# See the License for the specific language governing permissions and\n", "# limitations under the License." - ] - }, - { + ], "cell_type": "code", "execution_count": 0, + "outputs": [] + }, + { "metadata": { + "id": "OPGni6fuvoTj", + "colab_type": "code", "colab": { "autoexec": { "startup": false, "wait_interval": 0 } - }, - "colab_type": "code", - "id": "OPGni6fuvoTj" + } }, - "outputs": [], "source": [ "# Install deps\n", - "!pip install -q \"tensor2tensor-dev==1.3.1.dev5\" tf-nightly" - ] - }, - { + "!pip install -q \"tensor2tensor-dev==1.3.1.dev7\" tf-nightly" + ], "cell_type": "code", "execution_count": 0, + "outputs": [] + }, + { "metadata": { + "id": "oILRLCWN_16u", + "colab_type": "code", "colab": { "autoexec": { "startup": false, "wait_interval": 0 } - }, - "colab_type": "code", - "id": "oILRLCWN_16u" + } }, - "outputs": [], "source": [ "import tensorflow as tf\n", "import matplotlib.pyplot as plt\n", @@ -90,235 +99,514 @@ "tf.gfile.MakeDirs(data_dir)\n", "tf.gfile.MakeDirs(tmp_dir)\n", "tf.gfile.MakeDirs(train_dir)\n", - "tf.gfile.MakeDirs(checkpoint_dir)" - ] + "tf.gfile.MakeDirs(checkpoint_dir)\n", + "gs_data_dir = \"gs://tensor2tensor-data\"\n", + "gs_ckpt_dir = \"gs://tensor2tensor-checkpoints/\"" + ], + "cell_type": "code", + "execution_count": 0, + "outputs": [] }, { - "cell_type": "markdown", "metadata": { - "colab_type": "text", - "id": "gXL7_bVH49Kl" + "id": "gXL7_bVH49Kl", + "colab_type": "text" }, "source": [ - "# Translate from English to French with a pre-trained model" - ] + "# Translate from English to German with a pre-trained model" + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": 0, "metadata": { + "id": "Q2CYCYjZTlZs", + "colab_type": "code", "colab": { "autoexec": { "startup": false, "wait_interval": 0 - } + }, + "output_extras": [ + { + "item_id": 2 + } + ], + "base_uri": "https://localhost:8080/", + "height": 68 }, - "colab_type": "code", - "id": "Q2CYCYjZTlZs" + "outputId": "b13d53a3-feba-4d74-fc1e-951bef99ecb0", + "executionInfo": { + "status": "ok", + "timestamp": 1512165746671, + "user_tz": 480, + "elapsed": 2799, + "user": { + "displayName": "Ryan Sepassi", + "photoUrl": "//lh4.googleusercontent.com/-dcHmhQy1Y2A/AAAAAAAAAAI/AAAAAAAABEw/if_k14yF4KI/s50-c-k-no/photo.jpg", + "userId": "107877449274830904926" + } + } }, - "outputs": [], "source": [ "# Translation\n", - "enfr_problem = registry.problem(\"translate_enfr_wmt_small32k\")\n", - "enfr_problem.generate_data(data_dir, tmp_dir) " + "ende_problem = registry.problem(\"translate_ende_wmt32k\")\n", + "\n", + "# Copy the vocab file locally\n", + "vocab_file = os.path.join(gs_data_dir, \"vocab.ende.32768\")\n", + "!gsutil cp {vocab_file} {data_dir}" + ], + "cell_type": "code", + "execution_count": 4, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Copying gs://tensor2tensor-data/vocab.ende.32768...\n", + "/ [1 files][316.4 KiB/316.4 KiB] \n", + "Operation completed over 1 objects/316.4 KiB. \n" + ], + "name": "stdout" + } ] }, { - "cell_type": "code", - "execution_count": 0, "metadata": { + "id": "EB4MP7_y_SuQ", + "colab_type": "code", "colab": { "autoexec": { "startup": false, "wait_interval": 0 } - }, - "colab_type": "code", - "id": "g2aQW7Z6TOEu" + } }, - "outputs": [], "source": [ - "example = tfe.Iterator(enfr_problem.dataset(Modes.TRAIN, data_dir)).next()\n", - "inputs = [int(x) for x in example[\"inputs\"].numpy()] # Cast to ints.\n", - "targets = [int(x) for x in example[\"targets\"].numpy()] # Cast to ints.\n", + "encoders = ende_problem.feature_encoders(data_dir)\n", + "\n", + "def encode(input_str):\n", + " \"\"\"Input str to features dict, ready for inference\"\"\"\n", + " inputs = encoders[\"inputs\"].encode(input_str) + [1] # add EOS id\n", + " batch_inputs = tf.reshape(inputs, [1, -1, 1]) # Make it 3D.\n", + " return {\"inputs\": batch_inputs}\n", "\n", - "encoders = enfr_problem.feature_encoders(data_dir)\n", "def decode(integers):\n", - " samples = encoders[\"inputs\"].decode(np.squeeze(integers))\n", - " return samples[:samples.find(\"\u003cEOS\u003e\")]\n", - "\n", - "# Example inputs as int-tensor.\n", - "print(\"Inputs, encoded:\")\n", - "print(inputs)\n", - "print(\"Inputs, decoded:\")\n", - "# Example inputs as a sentence.\n", - "print(decode(inputs))\n", - "# Example targets as int-tensor.\n", - "print(\"Targets, encoded:\")\n", - "print(targets)\n", - "# Example targets as a sentence.\n", - "print(\"Targets, decoded:\")\n", - "print(decode(targets))" - ] + " \"\"\"List of ints to str\"\"\"\n", + " integers = list(np.squeeze(integers))\n", + " if 1 in integers:\n", + " integers = integers[:integers.index(1)]\n", + " return encoders[\"inputs\"].decode(np.squeeze(integers))" + ], + "cell_type": "code", + "execution_count": 0, + "outputs": [] }, { + "metadata": { + "id": "g2aQW7Z6TOEu", + "colab_type": "code", + "colab": { + "autoexec": { + "startup": false, + "wait_interval": 0 + } + } + }, + "source": [ + "# # Generate and view the data\n", + "# # This cell is commented out because data generation can take hours\n", + "\n", + "# ende_problem.generate_data(data_dir, tmp_dir)\n", + "# example = tfe.Iterator(ende_problem.dataset(Modes.TRAIN, data_dir)).next()\n", + "# inputs = [int(x) for x in example[\"inputs\"].numpy()] # Cast to ints.\n", + "# targets = [int(x) for x in example[\"targets\"].numpy()] # Cast to ints.\n", + "\n", + "\n", + "\n", + "# # Example inputs as int-tensor.\n", + "# print(\"Inputs, encoded:\")\n", + "# print(inputs)\n", + "# print(\"Inputs, decoded:\")\n", + "# # Example inputs as a sentence.\n", + "# print(decode(inputs))\n", + "# # Example targets as int-tensor.\n", + "# print(\"Targets, encoded:\")\n", + "# print(targets)\n", + "# # Example targets as a sentence.\n", + "# print(\"Targets, decoded:\")\n", + "# print(decode(targets))" + ], "cell_type": "code", "execution_count": 0, + "outputs": [] + }, + { "metadata": { + "id": "9l6hDQbrRUYV", + "colab_type": "code", "colab": { "autoexec": { "startup": false, "wait_interval": 0 } - }, - "colab_type": "code", - "id": "9l6hDQbrRUYV" + } }, - "outputs": [], "source": [ "# Create hparams and the T2TModel object.\n", "model_name = \"transformer\"\n", "hparams_set = \"transformer_base\"\n", "\n", "hparams = trainer_utils.create_hparams(hparams_set, data_dir)\n", - "hparams.use_eager_mode = True\n", - "trainer_utils.add_problem_hparams(hparams, \"translate_enfr_wmt32k\")\n", + "trainer_utils.add_problem_hparams(hparams, \"translate_ende_wmt32k\")\n", "\n", "# NOTE: Only create the model once when restoring from a checkpoint; it's a\n", "# Layer and so subsequent instantiations will have different variable scopes\n", "# that will not match the checkpoint.\n", - "model = registry.model(model_name)(hparams, Modes.PREDICT)" - ] - }, - { + "translate_model = registry.model(model_name)(hparams, Modes.PREDICT)" + ], "cell_type": "code", "execution_count": 0, + "outputs": [] + }, + { "metadata": { + "id": "FEwNUVlMYOJi", + "colab_type": "code", "colab": { "autoexec": { "startup": false, "wait_interval": 0 - } + }, + "output_extras": [ + { + "item_id": 1 + } + ], + "base_uri": "https://localhost:8080/", + "height": 34 }, - "colab_type": "code", - "id": "FEwNUVlMYOJi" + "outputId": "fc15a59a-7ea7-4baa-85c1-2a94528eb157", + "executionInfo": { + "status": "ok", + "timestamp": 1512165760778, + "user_tz": 480, + "elapsed": 12527, + "user": { + "displayName": "Ryan Sepassi", + "photoUrl": "//lh4.googleusercontent.com/-dcHmhQy1Y2A/AAAAAAAAAAI/AAAAAAAABEw/if_k14yF4KI/s50-c-k-no/photo.jpg", + "userId": "107877449274830904926" + } + } }, - "outputs": [], "source": [ "# Copy the pretrained checkpoint locally\n", - "gs_ckpt_dir = \"gs://tensor2tensor-checkpoints/\"\n", - "ckpt_name = \"transformer_enfr_test\"\n", + "ckpt_name = \"transformer_ende_test\"\n", "gs_ckpt = os.path.join(gs_ckpt_dir, ckpt_name)\n", - "local_ckpt = os.path.join(checkpoint_dir, ckpt_name)\n", - "!gsutil -q cp -R {gs_ckpt} {local_ckpt}\n", - "ckpt_path = tf.train.latest_checkpoint(local_ckpt)\n", + "!gsutil -q cp -R {gs_ckpt} {checkpoint_dir}\n", + "ckpt_path = tf.train.latest_checkpoint(os.path.join(checkpoint_dir, ckpt_name))\n", "ckpt_path" + ], + "cell_type": "code", + "execution_count": 8, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "u'/content/t2t/checkpoints/transformer_ende_test/model.ckpt-350855'" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 8 + } ] }, { - "cell_type": "code", - "execution_count": 0, "metadata": { + "id": "3O-8E9d6TtuJ", + "colab_type": "code", "colab": { "autoexec": { "startup": false, "wait_interval": 0 - } + }, + "output_extras": [ + { + "item_id": 3 + } + ], + "base_uri": "https://localhost:8080/", + "height": 119 }, - "colab_type": "code", - "id": "3O-8E9d6TtuJ" + "outputId": "24231c95-99cb-421b-d961-5a21322be945", + "executionInfo": { + "status": "ok", + "timestamp": 1512165773424, + "user_tz": 480, + "elapsed": 12593, + "user": { + "displayName": "Ryan Sepassi", + "photoUrl": "//lh4.googleusercontent.com/-dcHmhQy1Y2A/AAAAAAAAAAI/AAAAAAAABEw/if_k14yF4KI/s50-c-k-no/photo.jpg", + "userId": "107877449274830904926" + } + } }, - "outputs": [], "source": [ "# Restore and translate!\n", "\n", - "def encode(input_str):\n", - " # Encode from raw string to ints using problem encoders.\n", - " inputs = encoders[\"inputs\"].encode(input_str) + [1] # add EOS id\n", - " batch_inputs = tf.reshape(inputs, [1, -1, 1, 1]) # Make it 4D.\n", - " # TODO: rm target_space_id\n", - " features_dict = {\"inputs\": batch_inputs,\n", - " \"target_space_id\": tf.constant(hparams.problems[0].target_space_id)}\n", - " return features_dict\n", + "def translate(inputs):\n", + " encoded_inputs = encode(inputs)\n", + " with tfe.restore_variables_on_create(ckpt_path):\n", + " model_output = translate_model.infer(encoded_inputs)\n", + " return decode(model_output)\n", "\n", - "# Input to the decoder.\n", "inputs = \"This is a cat.\"\n", - "\n", - "store = tfe.EagerVariableStore()\n", - "# Restore from checkpoint and run inference\n", - "with store.as_default():\n", - " with tfe.restore_variables_on_create(ckpt_path):\n", - " samples = model.infer(encode(inputs), beam_size=1)\n", + "outputs = translate(inputs)\n", "\n", "print(\"Inputs: %s\" % inputs)\n", - "print(\"Outputs: %s\" % decode(samples))" + "print(\"Outputs: %s\" % outputs)" + ], + "cell_type": "code", + "execution_count": 9, + "outputs": [ + { + "output_type": "stream", + "text": [ + "INFO:tensorflow:Greedy Decoding\n", + "WARNING:tensorflow:From /usr/local/lib/python2.7/dist-packages/tensor2tensor/layers/common_layers.py:487: calling reduce_mean (from tensorflow.python.ops.math_ops) with keep_dims is deprecated and will be removed in a future version.\n", + "Instructions for updating:\n", + "keep_dims is deprecated, use keepdims instead\n", + "Inputs: This is a cat.\n", + "Outputs: Das ist eine Katze.\n" + ], + "name": "stdout" + } ] }, { - "cell_type": "markdown", "metadata": { - "colab_type": "text", - "id": "i7BZuO7T5BB4" + "id": "i7BZuO7T5BB4", + "colab_type": "text" }, "source": [ "# Train a custom model on MNIST" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": 0, "metadata": { + "id": "RYDMO4zArgkz", + "colab_type": "code", "colab": { "autoexec": { "startup": false, "wait_interval": 0 - } + }, + "output_extras": [ + { + "item_id": 1 + } + ], + "base_uri": "https://localhost:8080/", + "height": 1224 }, - "colab_type": "code", - "id": "RYDMO4zArgkz" + "outputId": "3b62dff4-7bfa-436e-a9f5-ecf66616e93a", + "executionInfo": { + "status": "ok", + "timestamp": 1512165773875, + "user_tz": 480, + "elapsed": 423, + "user": { + "displayName": "Ryan Sepassi", + "photoUrl": "//lh4.googleusercontent.com/-dcHmhQy1Y2A/AAAAAAAAAAI/AAAAAAAABEw/if_k14yF4KI/s50-c-k-no/photo.jpg", + "userId": "107877449274830904926" + } + } }, - "outputs": [], "source": [ "# Lots of problems available\n", "problems.available()" + ], + "cell_type": "code", + "execution_count": 10, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "['algorithmic_addition_binary40',\n", + " 'algorithmic_addition_decimal40',\n", + " 'algorithmic_cipher_shift200',\n", + " 'algorithmic_cipher_shift5',\n", + " 'algorithmic_cipher_vigenere200',\n", + " 'algorithmic_cipher_vigenere5',\n", + " 'algorithmic_identity_binary40',\n", + " 'algorithmic_identity_decimal40',\n", + " 'algorithmic_multiplication_binary40',\n", + " 'algorithmic_multiplication_decimal40',\n", + " 'algorithmic_reverse_binary40',\n", + " 'algorithmic_reverse_binary40_test',\n", + " 'algorithmic_reverse_decimal40',\n", + " 'algorithmic_reverse_nlplike32k',\n", + " 'algorithmic_reverse_nlplike8k',\n", + " 'algorithmic_shift_decimal40',\n", + " 'audio_timit_characters_tune',\n", + " 'audio_timit_tokens8k_test',\n", + " 'audio_timit_tokens8k_tune',\n", + " 'image_celeba_tune',\n", + " 'image_cifar10',\n", + " 'image_cifar10_plain',\n", + " 'image_cifar10_plain8',\n", + " 'image_cifar10_tune',\n", + " 'image_fsns',\n", + " 'image_imagenet',\n", + " 'image_imagenet224',\n", + " 'image_imagenet32',\n", + " 'image_imagenet64',\n", + " 'image_mnist',\n", + " 'image_mnist_tune',\n", + " 'image_ms_coco_characters',\n", + " 'image_ms_coco_tokens32k',\n", + " 'image_ms_coco_tokens8k',\n", + " 'img2img_cifar10',\n", + " 'img2img_imagenet',\n", + " 'languagemodel_lm1b32k',\n", + " 'languagemodel_lm1b8k_packed',\n", + " 'languagemodel_lm1b_characters',\n", + " 'languagemodel_ptb10k',\n", + " 'languagemodel_ptb_characters',\n", + " 'languagemodel_wiki_full32k',\n", + " 'languagemodel_wiki_scramble128',\n", + " 'languagemodel_wiki_scramble1k50',\n", + " 'languagemodel_wiki_scramble8k50',\n", + " 'librispeech',\n", + " 'multinli_matched',\n", + " 'multinli_mismatched',\n", + " 'ocr_test',\n", + " 'parsing_english_ptb16k',\n", + " 'parsing_english_ptb8k',\n", + " 'parsing_icelandic16k',\n", + " 'programming_desc2code_cpp',\n", + " 'programming_desc2code_py',\n", + " 'sentiment_imdb',\n", + " 'summarize_cnn_dailymail32k',\n", + " 'translate_encs_wmt32k',\n", + " 'translate_encs_wmt_characters',\n", + " 'translate_ende_wmt32k',\n", + " 'translate_ende_wmt32k_packed',\n", + " 'translate_ende_wmt8k',\n", + " 'translate_ende_wmt_bpe32k',\n", + " 'translate_ende_wmt_characters',\n", + " 'translate_enfr_wmt32k',\n", + " 'translate_enfr_wmt8k',\n", + " 'translate_enfr_wmt_characters',\n", + " 'translate_enfr_wmt_small32k',\n", + " 'translate_enfr_wmt_small8k',\n", + " 'translate_enfr_wmt_small_characters',\n", + " 'translate_enmk_setimes32k',\n", + " 'translate_enzh_wmt8k']" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 10 + } ] }, { - "cell_type": "code", - "execution_count": 0, "metadata": { + "id": "JKc2uSk6WX5e", + "colab_type": "code", "colab": { "autoexec": { "startup": false, "wait_interval": 0 - } + }, + "output_extras": [ + { + "item_id": 3 + } + ], + "base_uri": "https://localhost:8080/", + "height": 204 }, - "colab_type": "code", - "id": "JKc2uSk6WX5e" + "outputId": "f9fa17c1-ed3f-474e-8bd8-f764c3b00000", + "executionInfo": { + "status": "ok", + "timestamp": 1512165774930, + "user_tz": 480, + "elapsed": 977, + "user": { + "displayName": "Ryan Sepassi", + "photoUrl": "//lh4.googleusercontent.com/-dcHmhQy1Y2A/AAAAAAAAAAI/AAAAAAAABEw/if_k14yF4KI/s50-c-k-no/photo.jpg", + "userId": "107877449274830904926" + } + } }, - "outputs": [], "source": [ "# Create the MNIST problem and generate the data\n", "\n", "mnist_problem = problems.problem(\"image_mnist\")\n", "# Generate data\n", "mnist_problem.generate_data(data_dir, tmp_dir)" + ], + "cell_type": "code", + "execution_count": 11, + "outputs": [ + { + "output_type": "stream", + "text": [ + "INFO:tensorflow:Not downloading, file already found: /content/t2t/tmp/train-images-idx3-ubyte.gz\n", + "INFO:tensorflow:Not downloading, file already found: /content/t2t/tmp/train-labels-idx1-ubyte.gz\n", + "INFO:tensorflow:Not downloading, file already found: /content/t2t/tmp/t10k-images-idx3-ubyte.gz\n", + "INFO:tensorflow:Not downloading, file already found: /content/t2t/tmp/t10k-labels-idx1-ubyte.gz\n", + "INFO:tensorflow:Not downloading, file already found: /content/t2t/tmp/train-images-idx3-ubyte.gz\n", + "INFO:tensorflow:Not downloading, file already found: /content/t2t/tmp/train-labels-idx1-ubyte.gz\n", + "INFO:tensorflow:Not downloading, file already found: /content/t2t/tmp/t10k-images-idx3-ubyte.gz\n", + "INFO:tensorflow:Not downloading, file already found: /content/t2t/tmp/t10k-labels-idx1-ubyte.gz\n", + "INFO:tensorflow:Skipping generator because outputs files exist\n", + "INFO:tensorflow:Skipping generator because outputs files exist\n", + "INFO:tensorflow:Skipping shuffle because output files exist\n" + ], + "name": "stdout" + } ] }, { - "cell_type": "code", - "execution_count": 0, "metadata": { + "id": "VW6HCRANFPYV", + "colab_type": "code", "colab": { "autoexec": { "startup": false, "wait_interval": 0 - } + }, + "output_extras": [ + { + "item_id": 2 + }, + { + "item_id": 3 + } + ], + "base_uri": "https://localhost:8080/", + "height": 381 }, - "colab_type": "code", - "id": "VW6HCRANFPYV" + "outputId": "93dea49c-dbca-4856-998f-8bcbb621abea", + "executionInfo": { + "status": "ok", + "timestamp": 1512165775597, + "user_tz": 480, + "elapsed": 622, + "user": { + "displayName": "Ryan Sepassi", + "photoUrl": "//lh4.googleusercontent.com/-dcHmhQy1Y2A/AAAAAAAAAAI/AAAAAAAABEw/if_k14yF4KI/s50-c-k-no/photo.jpg", + "userId": "107877449274830904926" + } + } }, - "outputs": [], "source": [ "# Get the tf.data.Dataset from Problem.dataset\n", "mnist_example = tfe.Iterator(mnist_problem.dataset(Modes.TRAIN, data_dir)).next()\n", @@ -327,41 +615,116 @@ "\n", "plt.imshow(image.numpy()[:, :, 0].astype(np.float32), cmap=plt.get_cmap('gray'))\n", "print(\"Label: %d\" % label.numpy())" + ], + "cell_type": "code", + "execution_count": 12, + "outputs": [ + { + "output_type": "stream", + "text": [ + "INFO:tensorflow:Reading data files from /content/t2t/data/image_mnist-train*\n", + "Label: 6\n" + ], + "name": "stdout" + }, + { + "output_type": "display_data", + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAUsAAAFKCAYAAACU6307AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4wLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvpW3flQAAFK1JREFUeJzt3X9MVfUfx/HXDSQgJJSEzS2rNS0m\nuFWzxB8Vymx8y1JrsxCdzT/shyaZK8ZEWzZ/oP2Qfomm/iG53cYfzj90MLNWKuBk1YR/0NqMWREY\nGSYU2P3+0WIhF3hzufeee67Px8Yf93M+nPN+fw+9vuee4znH4/P5fAIADOoGpwsAADcgLAHAgLAE\nAAPCEgAMCEsAMCAsAcDCFwaS/P6cOXNmwGVu/YnGnqK1L3pyz0+4+hqMJxz/ztLj8fgd9/l8Ay5z\nq2jsSYrOvujJPcLV12BxGBvoSjdt2qRvv/1WHo9HxcXFmjJlSqCrAoCIF1BYnjp1SufPn5fX69V3\n332n4uJieb3eYNcGABEjoAs8NTU1ys3NlSTdeeedunTpki5fvhzUwgAgkgR0ZNnW1qbJkyf3fh47\ndqxaW1uVlJTkd/6ZM2eUmZnpd1kYTpmGXTT2JEVnX/TkHk73FfA5y/8aqomsrKwBfy/aTkZHY09S\ndPZFT+4RCRd4AvoanpaWpra2tt7Pv/zyi8aNGxfIqgDAFQIKyxkzZqiqqkqS1NjYqLS0tAG/ggNA\nNAjoa/i9996ryZMn6+mnn5bH49GGDRuCXRcARBT+UXqQRWNPUnT2RU/u4dpzlgBwvSEsAcCAsAQA\nA8ISAAwISwAwICwBwICwBAADwhIADAhLADAgLAHAgLAEAAPCEgAMCEsAMCAsAcCAsAQAA8ISAAwI\nSwAwICwBwICwBAADwhIADAhLADAI6FW4QLSaNGmSad7JkyfN6/zss8/McxctWmSei/DiyBIADAhL\nADAgLAHAgLAEAAPCEgAMCEsAMCAsAcCAsAQAA8ISAAwISwAw4HZHRL2EhATzspKSEtM6x4wZY97+\nN998Y56LyMWRJQAYEJYAYEBYAoABYQkABoQlABgQlgBgQFgCgAFhCQAGhCUAGHAHD6Jebm6ueVl+\nfn7Qt19RURH0dSL8OLIEAIOAjizr6uq0evVqTZw4UdI/rw+13lMLAG4U8Nfw+++/X2VlZcGsBQAi\nFl/DAcAg4LA8d+6cnnvuOT3zzDM6ceJEMGsCgIjj8fl8vuH+UktLi+rr65WXl6fm5mYtXbpU1dXV\niouL8zu/oaFBmZmZIy4WAJwSUFhe66mnntI777yjW2+91f9GPB6/4z6fb8BlbhWNPUnu7mvevHl+\nxw8dOqTHH3+8z9jBgweDvv3bb7/dPLe5uXlE23LzfhpMuPoaLA4D+hp+6NAh7dmzR5LU2tqqixcv\nKj09PbDqAMAFAroaPnv2bK1du1afffaZuru79frrrw/4FRwAokFAYZmUlKSdO3cGuxYAiFhBOWc5\n5EY4Z+l6kdbXYLcwXuvw4cN+x0eNGqXu7u4+YzExMaZ1fvnll+btz5071zz32nqGK9L2U7C49pwl\nAFxvCEsAMCAsAcCAsAQAA8ISAAwISwAwICwBwICwBAADwhIADAhLADDg7Y6IKI899php3oEDB8zr\njI0d+M/82mWXLl0yrXP58uXm7Y/0FkZEBo4sAcCAsAQAA8ISAAwISwAwICwBwICwBAADwhIADAhL\nADAgLAHAgDt4EJDB7oq51vPPP2+e+8Ybb5jm3XTTTeZ1/vHHH37Hk5KS+i3Lz883rfP77783bx/R\ngSNLADAgLAHAgLAEAAPCEgAMCEsAMCAsAcCAsAQAA8ISAAwISwAwICwBwIDbHRGQ2bNnm+e+++67\nQd9+T0+Pee7Bgwf9jhcUFPRbduTIkRHVhejFkSUAGBCWAGBAWAKAAWEJAAaEJQAYEJYAYEBYAoAB\nYQkABoQlABgQlgBg4PH5fL6Qb8Tj8Tvu8/kGXOZWbu/pgQce8DteW1uradOm9X4+duyYeZ3x8fEj\nrutaGzZsMM998803/Y67fV/5E409SeHra7A4NB1ZNjU1KTc3VxUVFZKkn376SUuWLFF+fr5Wr16t\nv/76KziVAkCEGjIsr1y5oo0bNyo7O7t3rKysTPn5+Tpw4IBuu+02VVZWhrRIAHDakGEZFxen3bt3\nKy0trXesrq5Oc+bMkSTl5OSopqYmdBUCQAQY8hFtsbGxio3tO62zs1NxcXGSpNTUVLW2toamOgCI\nECN+nqXl+tCZM2eUmZkZ8O+7TTT2JP1zkSdSbNy4MShzo3FfRWNPkvN9BRSWiYmJ6urqUnx8vFpa\nWvp8RfcnKyvL73g0Xrlze09cDXe3aOxJctHV8GtNnz5dVVVVkqTq6mrNmjUrsMoAwCWGPLJsaGjQ\n1q1bdeHCBcXGxqqqqkrbt29XUVGRvF6vxo8fr/nz54ejVgBwzJBhmZmZqf379/cb37dvX0gKAoBI\nxAvL0MfatWtNy0JxHlKSPvnkE9O8t956KyTbD4VHH33UPDcmJsY898KFC37H77vvvj6f6+vrzevE\nwLg3HAAMCEsAMCAsAcCAsAQAA8ISAAwISwAwICwBwICwBAADwhIADAhLADDghWVBFok9Pfvss+a5\nH374od/x+Ph4dXV19X7+9+HPFhcvXjTPnT59umneuXPnzOu8+eab/Y7/9ttvSklJ6TO2bt060zoX\nLlxo3v5tt91mnjucv50//vij39jo0aPV0dHRZ+yOO+4wr3M4+yqcXPuINgC43hCWAGBAWAKAAWEJ\nAAaEJQAYEJYAYEBYAoABYQkABoQlABgQlgBgwNsdXWo4bwFcsGCBee6NN95oWjacu2RXrVplnmu9\njXGgWxj9WbZsmXnZK6+8Yl6v1XBu0xvO/65JSUmm8cLCQvM6S0pKzHOvNxxZAoABYQkABoQlABgQ\nlgBgQFgCgAFhCQAGhCUAGBCWAGBAWAKAAS8sC7Jw9ZSenm6e++OPP454ezfccIP+/vvv3s/+XpY1\nkOTkZPPce++91zRvx44d5nXOmDHD77jH4+l3x0wo/nM4efKkea71hW0DuXY/DXeddXV1I9p+qPDC\nMgBwCcISAAwISwAwICwBwICwBAADwhIADAhLADAgLAHAgLAEAAPCEgAMeGEZAtLY2GieO3bsWPPc\niooK07y77rrLvM6Ojg6/48nJyf2Web1e0zo/+eQT8/ZHjRplnltVVWWeu23btn5jr732Wr/x+vp6\n8zoxMI4sAcDAFJZNTU3Kzc3t/X/9oqIizZs3T0uWLNGSJUv0xRdfhLJGAHDckF/Dr1y5oo0bNyo7\nO7vP+Jo1a5STkxOywgAgkgx5ZBkXF6fdu3crLS0tHPUAQEQyP8/yvffe05gxY1RQUKCioiK1traq\nu7tbqampKikpGfQkfkNDgzIzM4NWNACEW0BXw5944gmlpKQoIyNDu3bt0vvvv6/169cPOD8rK8vv\nOA//DZzTD/89deqU+XcfffRR89zjx4+b5g3navjly5f9jicnJ+v333/vMxYNV8O3bt3aZ2zdunXm\ndfb09JjnhpNrH/6bnZ2tjIwMSdLs2bPV1NQUWGUA4BIBheWqVavU3Nws6Z/H0E+cODGoRQFApBny\na3hDQ4O2bt2qCxcuKDY2VlVVVSooKFBhYaESEhKUmJiozZs3h6NWAHDMkGGZmZmp/fv39xt/5JFH\nQlIQAEQibnd0qWeffdbR7Q/nZPu/p2ws4uPjTfOuXLliXmdeXp7f8RMnTvRbZn0TY0JCgnn7R44c\nMc8dzgWWTz/9tN/Ya6+91m88Ui/auA23OwKAAWEJAAaEJQAYEJYAYEBYAoABYQkABoQlABgQlgBg\nQFgCgAFhCQAG5of/jmgjA9wax/MsA+f08yydVlxcbJ67Y8cOv+OdnZ39blucN2+eaZ2vvPKKefv3\n3HOPee6aNWvMcz/44IN+Y9H435Tk4udZAsD1hrAEAAPCEgAMCEsAMCAsAcCAsAQAA8ISAAwISwAw\nICwBwIAXlrlUV1eXee4PP/xgnjthwoRAygmalStXmuadOHHCvM6PPvrIvGzp0qWmdX7//ffm7b/0\n0kvmueXl5ea5CC+OLAHAgLAEAAPCEgAMCEsAMCAsAcCAsAQAA8ISAAwISwAwICwBwICwBAADXlgW\nZJHY04EDB8xzFy1a5Hc8XC8su3TpkmlebKz9Tt2kpCS/4x6Pp98Lqqy3kc6aNcu8/fr6evPckYrE\nv79g4IVlAOAShCUAGBCWAGBAWAKAAWEJAAaEJQAYEJYAYEBYAoABYQkABoQlABjwdsfrQFtbm9Ml\nmN18881BX+dAb4KcOXNmv2WLFy82rXM4b8xEdDCFZWlpqerr69XT06MVK1YoKytLr776qq5evapx\n48Zp27ZtiouLC3WtAOCYIcOytrZWZ8+eldfrVXt7uxYsWKDs7Gzl5+crLy9Pb7/9tiorK5Wfnx+O\negHAEUOes5w6dap27NghSUpOTlZnZ6fq6uo0Z84cSVJOTo5qampCWyUAOGzIsIyJiVFiYqIkqbKy\nUg8++KA6Ozt7v3anpqaqtbU1tFUCgMPMF3iOHj2qyspK7d27V3Pnzu0dtzwO88yZM8rMzPS7LAyP\n0wy7aOxJ+ueZlm40c+ZM87Lz58+HupyQi9a/P6f7MoXlV199pZ07d+rjjz/W6NGjlZiYqK6uLsXH\nx6ulpUVpaWmD/n5WVpbf8Wh8UGkk9lRWVmae++KLL/odD9fDf0Ph5MmTfsdnzpyp48eP9xlz+9Xw\nSPz7CwZXPPy3o6NDpaWlKi8vV0pKiiRp+vTpqqqqkiRVV1cP66nRAOBGQx5ZHj58WO3t7SosLOwd\n27Jli9atWyev16vx48dr/vz5IS0SAJw2ZFguWrTI73tZ9u3bF5KCACAS8cKyIIvEnqZNm2aeO9Dd\nLiM5Z/nWW2+Z5x45ciSgbQzm888/9zseiftqpKKxJ8kl5ywBAIQlAJgQlgBgQFgCgAFhCQAGhCUA\nGBCWAGBAWAKAAWEJAAaEJQAYcLtjkEViT/Hx8ea5Az3O7J577tHXX3/d+3ny5MnmdT700EPmubW1\ntea5IxWJ+2qkorEnidsdAcA1CEsAMCAsAcCAsAQAA8ISAAwISwAwICwBwICwBAADwhIADAhLADDg\ndscgi8aepOjsi57cg9sdAcAlCEsAMCAsAcCAsAQAA8ISAAwISwAwICwBwICwBAADwhIADAhLADAg\nLAHAgLAEAAPCEgAMCEsAMCAsAcCAsAQAA8ISAAwISwAwICwBwICwBAADwhIADGItk0pLS1VfX6+e\nnh6tWLFCx44dU2Njo1JSUiRJy5cv18MPPxzKOgHAUUOGZW1trc6ePSuv16v29nYtWLBA06ZN05o1\na5STkxOOGgHAcUOG5dSpUzVlyhRJUnJysjo7O3X16tWQFwYAkcTjG+yt4tfwer06ffq0YmJi1Nra\nqu7ubqWmpqqkpERjx44deCMDvBw9Gl8IH409SdHZFz25R7j6GiwOzWF59OhRlZeXa+/evWpoaFBK\nSooyMjK0a9cu/fzzz1q/fv2Av9vQ0KDMzMzhVw4AkcJn8OWXX/qefPJJX3t7e79lZ8+e9S1evHjQ\n35fk92ewZW79icaeorUvenLPT7j6GsyQ/3Soo6NDpaWlKi8v7736vWrVKjU3N0uS6urqNHHixKFW\nAwCuNuQFnsOHD6u9vV2FhYW9YwsXLlRhYaESEhKUmJiozZs3h7RIAHDasC7wBLwRLvC4XjT2RU/u\nEa6+BotD7uABAAPCEgAMCEsAMCAsAcCAsAQAA8ISAAwISwAwICwBwICwBAADwhIADAhLADAgLAHA\ngLAEAAPCEgAMCEsAMCAsAcCAsAQAA8ISAAwISwAwICwBwICwBAADwhIADMLyKlwAcDuOLAHAgLAE\nAAPCEgAMCEsAMCAsAcCAsAQAg1gnNrpp0yZ9++238ng8Ki4u1pQpU5woI6jq6uq0evVqTZw4UZI0\nadIklZSUOFxV4JqamvTCCy9o2bJlKigo0E8//aRXX31VV69e1bhx47Rt2zbFxcU5XeawXNtTUVGR\nGhsblZKSIklavny5Hn74YWeLHKbS0lLV19erp6dHK1asUFZWluv3k9S/r2PHjjm+r8IelqdOndL5\n8+fl9Xr13Xffqbi4WF6vN9xlhMT999+vsrIyp8sYsStXrmjjxo3Kzs7uHSsrK1N+fr7y8vL09ttv\nq7KyUvn5+Q5WOTz+epKkNWvWKCcnx6GqRqa2tlZnz56V1+tVe3u7FixYoOzsbFfvJ8l/X9OmTXN8\nX4X9a3hNTY1yc3MlSXfeeacuXbqky5cvh7sMDCIuLk67d+9WWlpa71hdXZ3mzJkjScrJyVFNTY1T\n5QXEX09uN3XqVO3YsUOSlJycrM7OTtfvJ8l/X1evXnW4KgfCsq2tTWPGjOn9PHbsWLW2toa7jJA4\nd+6cnnvuOT3zzDM6ceKE0+UELDY2VvHx8X3GOjs7e7/Opaamum6f+etJkioqKrR06VK9/PLL+vXX\nXx2oLHAxMTFKTEyUJFVWVurBBx90/X6S/PcVExPj+L5y5Jzlf0XL3Za33367Vq5cqby8PDU3N2vp\n0qWqrq525fmioUTLPnviiSeUkpKijIwM7dq1S++//77Wr1/vdFnDdvToUVVWVmrv3r2aO3du77jb\n99N/+2poaHB8X4X9yDItLU1tbW29n3/55ReNGzcu3GUEXXp6uv73v//J4/FowoQJuuWWW9TS0uJ0\nWUGTmJiorq4uSVJLS0tUfJ3Nzs5WRkaGJGn27NlqampyuKLh++qrr7Rz507t3r1bo0ePjpr9dG1f\nkbCvwh6WM2bMUFVVlSSpsbFRaWlpSkpKCncZQXfo0CHt2bNHktTa2qqLFy8qPT3d4aqCZ/r06b37\nrbq6WrNmzXK4opFbtWqVmpubJf1zTvbff8ngFh0dHSotLVV5eXnvVeJo2E/++oqEfeXIU4e2b9+u\n06dPy+PxaMOGDbr77rvDXULQXb58WWvXrtXvv/+u7u5urVy5Ug899JDTZQWkoaFBW7du1YULFxQb\nG6v09HRt375dRUVF+vPPPzV+/Hht3rxZo0aNcrpUM389FRQUaNeuXUpISFBiYqI2b96s1NRUp0s1\n83q9eu+993THHXf0jm3ZskXr1q1z7X6S/Pe1cOFCVVRUOLqveEQbABhwBw8AGBCWAGBAWAKAAWEJ\nAAaEJQAYEJYAYEBYAoABYQkABv8HkbgWVGnLsmMAAAAASUVORK5CYII=\n", + "text/plain": [ + "<matplotlib.figure.Figure at 0x7f4899f96d90>" + ] + }, + "metadata": { + "tags": [] + } + } ] }, { - "cell_type": "code", - "execution_count": 0, "metadata": { + "id": "WkFUEs7ZOA79", + "colab_type": "code", "colab": { "autoexec": { "startup": false, "wait_interval": 0 - } + }, + "output_extras": [ + { + "item_id": 1 + } + ], + "base_uri": "https://localhost:8080/", + "height": 408 }, - "colab_type": "code", - "id": "WkFUEs7ZOA79" + "outputId": "3d0c50f2-9c18-4d4b-8455-1aabe9e28190", + "executionInfo": { + "status": "ok", + "timestamp": 1512165775887, + "user_tz": 480, + "elapsed": 242, + "user": { + "displayName": "Ryan Sepassi", + "photoUrl": "//lh4.googleusercontent.com/-dcHmhQy1Y2A/AAAAAAAAAAI/AAAAAAAABEw/if_k14yF4KI/s50-c-k-no/photo.jpg", + "userId": "107877449274830904926" + } + } }, - "outputs": [], "source": [ "# Lots of models available\n", "registry.list_models()" + ], + "cell_type": "code", + "execution_count": 13, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "['resnet50',\n", + " 'lstm_seq2seq',\n", + " 'transformer_encoder',\n", + " 'attention_lm',\n", + " 'vanilla_gan',\n", + " 'transformer',\n", + " 'gene_expression_conv',\n", + " 'transformer_moe',\n", + " 'attention_lm_moe',\n", + " 'transformer_revnet',\n", + " 'lstm_seq2seq_attention',\n", + " 'shake_shake',\n", + " 'transformer_ae',\n", + " 'diagonal_neural_gpu',\n", + " 'xception',\n", + " 'aligned',\n", + " 'multi_model',\n", + " 'neural_gpu',\n", + " 'slice_net',\n", + " 'byte_net',\n", + " 'cycle_gan',\n", + " 'transformer_sketch',\n", + " 'blue_net']" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 13 + } ] }, { - "cell_type": "code", - "execution_count": 0, "metadata": { + "id": "-H25oG91YQj3", + "colab_type": "code", "colab": { "autoexec": { "startup": false, "wait_interval": 0 } - }, - "colab_type": "code", - "id": "-H25oG91YQj3" + } }, - "outputs": [], "source": [ "# Create your own model\n", "\n", @@ -379,29 +742,45 @@ "\n", "hparams = trainer_utils.create_hparams(\"basic_1\", data_dir)\n", "hparams.hidden_size = 64\n", - "hparams.use_eager_mode = True\n", "trainer_utils.add_problem_hparams(hparams, \"image_mnist\")\n", "model = MySimpleModel(hparams, Modes.TRAIN)" - ] - }, - { + ], "cell_type": "code", "execution_count": 0, + "outputs": [] + }, + { "metadata": { + "id": "AWVd2I7PYz6H", + "colab_type": "code", "colab": { "autoexec": { "startup": false, "wait_interval": 0 - } + }, + "output_extras": [ + { + "item_id": 12 + } + ], + "base_uri": "https://localhost:8080/", + "height": 357 }, - "colab_type": "code", - "id": "AWVd2I7PYz6H" + "outputId": "19abcffa-6a56-4633-90c1-71a59a104ace", + "executionInfo": { + "status": "ok", + "timestamp": 1512165882231, + "user_tz": 480, + "elapsed": 105926, + "user": { + "displayName": "Ryan Sepassi", + "photoUrl": "//lh4.googleusercontent.com/-dcHmhQy1Y2A/AAAAAAAAAAI/AAAAAAAABEw/if_k14yF4KI/s50-c-k-no/photo.jpg", + "userId": "107877449274830904926" + } + } }, - "outputs": [], "source": [ "# Train\n", - "store = tfe.EagerVariableStore()\n", - "optimizer = tf.train.AdamOptimizer()\n", "\n", "# In Eager mode, opt.minimize must be passed a function that produces the loss\n", "def loss_function(features):\n", @@ -409,6 +788,7 @@ " return losses[\"training\"]\n", "\n", "tfe_loss_fn = tfe.implicit_value_and_gradients(loss_function)\n", + "optimizer = tf.train.AdamOptimizer()\n", "\n", "NUM_STEPS = 500\n", "BATCH_SIZE = 128\n", @@ -419,37 +799,83 @@ "\n", "# Training loop\n", "for count, example in enumerate(tfe.Iterator(mnist_train_dataset)):\n", - " if count \u003e= NUM_STEPS:\n", - " break\n", - "\n", " example[\"targets\"] = tf.reshape(example[\"targets\"], [BATCH_SIZE, 1, 1, 1]) # Make it 4D.\n", " loss, gv = tfe_loss_fn(example)\n", " optimizer.apply_gradients(gv)\n", + "\n", " if count % 50 == 0:\n", - " print(\"Step: %d, Loss: %.3f\" % (count, loss.numpy()))" + " print(\"Step: %d, Loss: %.3f\" % (count, loss.numpy()))\n", + " if count >= NUM_STEPS:\n", + " break" + ], + "cell_type": "code", + "execution_count": 15, + "outputs": [ + { + "output_type": "stream", + "text": [ + "INFO:tensorflow:Reading data files from /content/t2t/data/image_mnist-train*\n", + "WARNING:tensorflow:From /usr/local/lib/python2.7/dist-packages/tensor2tensor/layers/common_layers.py:1671: softmax_cross_entropy_with_logits (from tensorflow.python.ops.nn_ops) is deprecated and will be removed in a future version.\n", + "Instructions for updating:\n", + "\n", + "Future major versions of TensorFlow will allow gradients to flow\n", + "into the labels input on backprop by default.\n", + "\n", + "See tf.nn.softmax_cross_entropy_with_logits_v2.\n", + "\n", + "Step: 0, Loss: 5.430\n", + "Step: 50, Loss: 0.833\n", + "Step: 100, Loss: 0.722\n", + "Step: 150, Loss: 0.529\n", + "Step: 200, Loss: 0.349\n", + "Step: 250, Loss: 0.293\n", + "Step: 300, Loss: 0.303\n", + "Step: 350, Loss: 0.295\n", + "Step: 400, Loss: 0.275\n", + "Step: 450, Loss: 0.290\n", + "Step: 500, Loss: 0.334\n" + ], + "name": "stdout" + } ] }, { - "cell_type": "code", - "execution_count": 0, "metadata": { + "id": "CIFlkiVOd8jO", + "colab_type": "code", "colab": { "autoexec": { "startup": false, "wait_interval": 0 - } + }, + "output_extras": [ + { + "item_id": 2 + } + ], + "base_uri": "https://localhost:8080/", + "height": 51 }, - "colab_type": "code", - "id": "CIFlkiVOd8jO" + "outputId": "70b92db9-9ec0-466c-e5c2-c5a39f13447d", + "executionInfo": { + "status": "ok", + "timestamp": 1512165950748, + "user_tz": 480, + "elapsed": 2772, + "user": { + "displayName": "Ryan Sepassi", + "photoUrl": "//lh4.googleusercontent.com/-dcHmhQy1Y2A/AAAAAAAAAAI/AAAAAAAABEw/if_k14yF4KI/s50-c-k-no/photo.jpg", + "userId": "107877449274830904926" + } + } }, - "outputs": [], "source": [ "model.set_mode(Modes.EVAL)\n", "mnist_eval_dataset = mnist_problem.dataset(Modes.EVAL, data_dir)\n", "all_perplexities = []\n", "all_accuracies = []\n", "for count, example in enumerate(tfe.Iterator(mnist_eval_dataset)):\n", - " if count \u003e= 100:\n", + " if count >= 100:\n", " break\n", "\n", " batch_inputs = tf.reshape(example[\"inputs\"], [1, 28, 28, 3]) # Make it 4D.\n", @@ -457,8 +883,7 @@ " features = {\"inputs\": batch_inputs, \"targets\": batch_targets}\n", "\n", " # Call the model.\n", - " with store.as_default():\n", - " predictions, _ = model(features)\n", + " predictions, _ = model(features)\n", "\n", " # Calculate and append the metrics\n", " all_perplexities.extend(metrics.padded_neg_log_perplexity(predictions, features[\"targets\"]))\n", @@ -466,19 +891,19 @@ "\n", "# Print out metrics on the dataset\n", "print(\"Accuracy: %.2f\" % tf.reduce_mean(tf.concat(all_accuracies, axis=1)).numpy())" + ], + "cell_type": "code", + "execution_count": 17, + "outputs": [ + { + "output_type": "stream", + "text": [ + "INFO:tensorflow:Reading data files from /content/t2t/data/image_mnist-dev*\n", + "Accuracy: 0.98\n" + ], + "name": "stdout" + } ] } - ], - "metadata": { - "colab": { - "collapsed_sections": [], - "default_view": {}, - "name": "T2T with TF Eager", - "provenance": [], - "version": "0.3.2", - "views": {} - } - }, - "nbformat": 4, - "nbformat_minor": 0 -} + ] +} \ No newline at end of file diff --git a/tensor2tensor/utils/t2t_model.py b/tensor2tensor/utils/t2t_model.py index c49bdbaf1..3fdbc6281 100644 --- a/tensor2tensor/utils/t2t_model.py +++ b/tensor2tensor/utils/t2t_model.py @@ -18,6 +18,7 @@ from __future__ import division from __future__ import print_function +import contextlib import copy import time @@ -36,7 +37,9 @@ import tensorflow as tf +from tensorflow.python.eager import context from tensorflow.python.layers import base +from tensorflow.python.ops import variable_scope class T2TModel(base.Layer): @@ -101,6 +104,7 @@ def __init__(self, self._problem_hparams = problem_hparams self._problem_idx = problem_idx self._create_modalities(problem_hparams, self._hparams) + self._var_store = create_eager_var_store() @property def hparams(self): @@ -210,26 +214,27 @@ def infer(self, Returns: samples: an integer `Tensor`. """ - # TODO(rsepassi): Make decoding work with real-valued model outputs - # (i.e. if the target modality is RealModality). - self.prepare_features_for_infer(features) - if not self.has_input and beam_size > 1: - tf.logging.warn("Beam searching for a model with no inputs.") - if not self.has_input and self.hparams.sampling_method != "random": - tf.logging.warn("Non-random sampling for a model with no inputs.") - self._fill_problem_hparams_features(features) - - target_modality = self.hparams.problems[self._problem_idx].target_modality - if target_modality.is_class_modality: - beam_size = 1 # No use to run beam-search for a single class. - if beam_size == 1: - tf.logging.info("Greedy Decoding") - samples, _, _ = self._greedy_infer(features, decode_length) - else: - tf.logging.info("Beam Decoding with beam size %d" % beam_size) - samples = self._beam_decode( - features, decode_length, beam_size, top_beams, alpha) - return samples + with self._var_store.as_default(): + # TODO(rsepassi): Make decoding work with real-valued model outputs + # (i.e. if the target modality is RealModality). + self.prepare_features_for_infer(features) + if not self.has_input and beam_size > 1: + tf.logging.warn("Beam searching for a model with no inputs.") + if not self.has_input and self.hparams.sampling_method != "random": + tf.logging.warn("Non-random sampling for a model with no inputs.") + self._fill_problem_hparams_features(features) + + target_modality = self.hparams.problems[self._problem_idx].target_modality + if target_modality.is_class_modality: + beam_size = 1 # No use to run beam-search for a single class. + if beam_size == 1: + tf.logging.info("Greedy Decoding") + samples, _, _ = self._greedy_infer(features, decode_length) + else: + tf.logging.info("Beam Decoding with beam size %d" % beam_size) + samples = self._beam_decode( + features, decode_length, beam_size, top_beams, alpha) + return samples def _beam_decode(self, features, decode_length, beam_size, top_beams, alpha): """Beam search decoding. @@ -385,7 +390,7 @@ def _slow_greedy_infer(self, features, decode_length): def infer_step(recent_output, recent_logits, unused_loss): """Inference step.""" - if not self.hparams.use_eager_mode: + if not context.in_eager_mode(): recent_output.set_shape([None, None, None, 1]) padded = tf.pad(recent_output, [[0, 0], [0, 1], [0, 0], [0, 0]]) features["targets"] = padded @@ -401,7 +406,7 @@ def infer_step(recent_output, recent_logits, unused_loss): common_layers.shape_list(recent_output)[1], :, :] cur_sample = tf.to_int64(tf.expand_dims(cur_sample, axis=1)) samples = tf.concat([recent_output, cur_sample], axis=1) - if not self.hparams.use_eager_mode: + if not context.in_eager_mode(): samples.set_shape([None, None, None, 1]) # Assuming we have one shard for logits. @@ -433,7 +438,7 @@ def infer_step(recent_output, recent_logits, unused_loss): result = initial_output # tensor of shape [batch_size, time, 1, 1, vocab_size] logits = tf.zeros((batch_size, 0, 1, 1, target_modality.top_dimensionality)) - if not self.hparams.use_eager_mode: + if not context.in_eager_mode(): logits.set_shape([None, None, None, None, None]) loss = 0.0 @@ -680,16 +685,17 @@ def sampled_results(): tf.less(tf.random_uniform([]), prob), sampled_results, lambda: (sharded_logits, losses)) - if not self.hparams.use_eager_mode: + if not context.in_eager_mode(): tf.logging.info("This model_fn took %.3f sec." % (time.time() - start_time)) return sharded_logits, losses def call(self, inputs_dict, skip=False, force_full_predict=False): - self._fill_problem_hparams_features(inputs_dict) - sharded_logits, losses = self._model_fn( - inputs_dict, skip=skip, force_full_predict=force_full_predict) - return tf.concat(sharded_logits, 0), losses + with self._var_store.as_default(): + self._fill_problem_hparams_features(inputs_dict) + sharded_logits, losses = self._model_fn( + inputs_dict, skip=skip, force_full_predict=force_full_predict) + return tf.concat(sharded_logits, 0), losses def model_fn_body_sharded(self, sharded_features): """Mixture-of-experts models will override this function. @@ -715,7 +721,7 @@ def model_fn_body_sharded(self, sharded_features): _with_timing( self.model_fn_body, "model_fn_body", - silent=self.hparams.use_eager_mode), datashard_to_features) + silent=context.in_eager_mode()), datashard_to_features) if isinstance(output, tuple): losses_sharded = output[1] if isinstance(losses_sharded[0], dict): @@ -1052,3 +1058,17 @@ def _del_dict_nones(d): for k in list(d.keys()): if d[k] is None: del d[k] + + +class DummyVariableStore(object): + + @contextlib.contextmanager + def as_default(self): + yield + + +def create_eager_var_store(): + if context.in_eager_mode(): + return variable_scope.EagerVariableStore() + else: + return DummyVariableStore() From d517a6243dcf0024f9db7059e18f572b19b368a1 Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Fri, 1 Dec 2017 16:45:21 -0800 Subject: [PATCH 0645/4095] Update colab notebook to use v1.3.1 PiperOrigin-RevId: 177658596 --- tensor2tensor/notebooks/hello_t2t.ipynb | 892 +++++++++++++----------- 1 file changed, 497 insertions(+), 395 deletions(-) diff --git a/tensor2tensor/notebooks/hello_t2t.ipynb b/tensor2tensor/notebooks/hello_t2t.ipynb index fd08175c6..797b0b98b 100644 --- a/tensor2tensor/notebooks/hello_t2t.ipynb +++ b/tensor2tensor/notebooks/hello_t2t.ipynb @@ -55,7 +55,8 @@ }, "source": [ "# Install deps\n", - "!pip install -q \"tensor2tensor-dev==1.3.1.dev7\" tf-nightly" + "# We're using some new features from tensorflow so we install tf-nightly\n", + "!pip install -q tensor2tensor tf-nightly" ], "cell_type": "code", "execution_count": 0, @@ -77,8 +78,10 @@ "import matplotlib.pyplot as plt\n", "import numpy as np\n", "import os\n", + "import collections\n", "\n", "from tensor2tensor import problems\n", + "from tensor2tensor.layers import common_layers\n", "from tensor2tensor.utils import t2t_model\n", "from tensor2tensor.utils import trainer_utils\n", "from tensor2tensor.utils import registry\n", @@ -109,17 +112,17 @@ }, { "metadata": { - "id": "gXL7_bVH49Kl", + "id": "0a69r1KDiZDe", "colab_type": "text" }, "source": [ - "# Translate from English to German with a pre-trained model" + "# Download MNIST and inspect it" ], "cell_type": "markdown" }, { "metadata": { - "id": "Q2CYCYjZTlZs", + "id": "RYDMO4zArgkz", "colab_type": "code", "colab": { "autoexec": { @@ -128,18 +131,18 @@ }, "output_extras": [ { - "item_id": 2 + "item_id": 1 } ], "base_uri": "https://localhost:8080/", - "height": 68 + "height": 1224 }, - "outputId": "b13d53a3-feba-4d74-fc1e-951bef99ecb0", + "outputId": "2edd5f47-1ebb-4d18-e57c-741c966afc10", "executionInfo": { "status": "ok", - "timestamp": 1512165746671, + "timestamp": 1512173990900, "user_tz": 480, - "elapsed": 2799, + "elapsed": 272, "user": { "displayName": "Ryan Sepassi", "photoUrl": "//lh4.googleusercontent.com/-dcHmhQy1Y2A/AAAAAAAAAAI/AAAAAAAABEw/if_k14yF4KI/s50-c-k-no/photo.jpg", @@ -148,128 +151,162 @@ } }, "source": [ - "# Translation\n", - "ende_problem = registry.problem(\"translate_ende_wmt32k\")\n", - "\n", - "# Copy the vocab file locally\n", - "vocab_file = os.path.join(gs_data_dir, \"vocab.ende.32768\")\n", - "!gsutil cp {vocab_file} {data_dir}" + "# A Problem is a dataset together with some fixed pre-processing.\n", + "# It could be a translation dataset with a specific tokenization,\n", + "# or an image dataset with a specific resolution.\n", + "#\n", + "# There are many problems available in Tensor2Tensor\n", + "problems.available()" ], "cell_type": "code", "execution_count": 4, "outputs": [ { - "output_type": "stream", - "text": [ - "Copying gs://tensor2tensor-data/vocab.ende.32768...\n", - "/ [1 files][316.4 KiB/316.4 KiB] \n", - "Operation completed over 1 objects/316.4 KiB. \n" - ], - "name": "stdout" + "output_type": "execute_result", + "data": { + "text/plain": [ + "['algorithmic_addition_binary40',\n", + " 'algorithmic_addition_decimal40',\n", + " 'algorithmic_cipher_shift200',\n", + " 'algorithmic_cipher_shift5',\n", + " 'algorithmic_cipher_vigenere200',\n", + " 'algorithmic_cipher_vigenere5',\n", + " 'algorithmic_identity_binary40',\n", + " 'algorithmic_identity_decimal40',\n", + " 'algorithmic_multiplication_binary40',\n", + " 'algorithmic_multiplication_decimal40',\n", + " 'algorithmic_reverse_binary40',\n", + " 'algorithmic_reverse_binary40_test',\n", + " 'algorithmic_reverse_decimal40',\n", + " 'algorithmic_reverse_nlplike32k',\n", + " 'algorithmic_reverse_nlplike8k',\n", + " 'algorithmic_shift_decimal40',\n", + " 'audio_timit_characters_tune',\n", + " 'audio_timit_tokens8k_test',\n", + " 'audio_timit_tokens8k_tune',\n", + " 'image_celeba_tune',\n", + " 'image_cifar10',\n", + " 'image_cifar10_plain',\n", + " 'image_cifar10_plain8',\n", + " 'image_cifar10_tune',\n", + " 'image_fsns',\n", + " 'image_imagenet',\n", + " 'image_imagenet224',\n", + " 'image_imagenet32',\n", + " 'image_imagenet64',\n", + " 'image_mnist',\n", + " 'image_mnist_tune',\n", + " 'image_ms_coco_characters',\n", + " 'image_ms_coco_tokens32k',\n", + " 'image_ms_coco_tokens8k',\n", + " 'img2img_cifar10',\n", + " 'img2img_imagenet',\n", + " 'languagemodel_lm1b32k',\n", + " 'languagemodel_lm1b8k_packed',\n", + " 'languagemodel_lm1b_characters',\n", + " 'languagemodel_ptb10k',\n", + " 'languagemodel_ptb_characters',\n", + " 'languagemodel_wiki_full32k',\n", + " 'languagemodel_wiki_scramble128',\n", + " 'languagemodel_wiki_scramble1k50',\n", + " 'languagemodel_wiki_scramble8k50',\n", + " 'librispeech',\n", + " 'multinli_matched',\n", + " 'multinli_mismatched',\n", + " 'ocr_test',\n", + " 'parsing_english_ptb16k',\n", + " 'parsing_english_ptb8k',\n", + " 'parsing_icelandic16k',\n", + " 'programming_desc2code_cpp',\n", + " 'programming_desc2code_py',\n", + " 'sentiment_imdb',\n", + " 'summarize_cnn_dailymail32k',\n", + " 'translate_encs_wmt32k',\n", + " 'translate_encs_wmt_characters',\n", + " 'translate_ende_wmt32k',\n", + " 'translate_ende_wmt32k_packed',\n", + " 'translate_ende_wmt8k',\n", + " 'translate_ende_wmt_bpe32k',\n", + " 'translate_ende_wmt_characters',\n", + " 'translate_enfr_wmt32k',\n", + " 'translate_enfr_wmt8k',\n", + " 'translate_enfr_wmt_characters',\n", + " 'translate_enfr_wmt_small32k',\n", + " 'translate_enfr_wmt_small8k',\n", + " 'translate_enfr_wmt_small_characters',\n", + " 'translate_enmk_setimes32k',\n", + " 'translate_enzh_wmt8k']" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 4 } ] }, { "metadata": { - "id": "EB4MP7_y_SuQ", - "colab_type": "code", - "colab": { - "autoexec": { - "startup": false, - "wait_interval": 0 - } - } - }, - "source": [ - "encoders = ende_problem.feature_encoders(data_dir)\n", - "\n", - "def encode(input_str):\n", - " \"\"\"Input str to features dict, ready for inference\"\"\"\n", - " inputs = encoders[\"inputs\"].encode(input_str) + [1] # add EOS id\n", - " batch_inputs = tf.reshape(inputs, [1, -1, 1]) # Make it 3D.\n", - " return {\"inputs\": batch_inputs}\n", - "\n", - "def decode(integers):\n", - " \"\"\"List of ints to str\"\"\"\n", - " integers = list(np.squeeze(integers))\n", - " if 1 in integers:\n", - " integers = integers[:integers.index(1)]\n", - " return encoders[\"inputs\"].decode(np.squeeze(integers))" - ], - "cell_type": "code", - "execution_count": 0, - "outputs": [] - }, - { - "metadata": { - "id": "g2aQW7Z6TOEu", + "id": "JKc2uSk6WX5e", "colab_type": "code", "colab": { "autoexec": { "startup": false, "wait_interval": 0 + }, + "output_extras": [ + { + "item_id": 3 + } + ], + "base_uri": "https://localhost:8080/", + "height": 204 + }, + "outputId": "0ea990ae-6715-4ada-d3a2-a5312faaaa39", + "executionInfo": { + "status": "ok", + "timestamp": 1512173992544, + "user_tz": 480, + "elapsed": 955, + "user": { + "displayName": "Ryan Sepassi", + "photoUrl": "//lh4.googleusercontent.com/-dcHmhQy1Y2A/AAAAAAAAAAI/AAAAAAAABEw/if_k14yF4KI/s50-c-k-no/photo.jpg", + "userId": "107877449274830904926" } } }, "source": [ - "# # Generate and view the data\n", - "# # This cell is commented out because data generation can take hours\n", - "\n", - "# ende_problem.generate_data(data_dir, tmp_dir)\n", - "# example = tfe.Iterator(ende_problem.dataset(Modes.TRAIN, data_dir)).next()\n", - "# inputs = [int(x) for x in example[\"inputs\"].numpy()] # Cast to ints.\n", - "# targets = [int(x) for x in example[\"targets\"].numpy()] # Cast to ints.\n", - "\n", - "\n", - "\n", - "# # Example inputs as int-tensor.\n", - "# print(\"Inputs, encoded:\")\n", - "# print(inputs)\n", - "# print(\"Inputs, decoded:\")\n", - "# # Example inputs as a sentence.\n", - "# print(decode(inputs))\n", - "# # Example targets as int-tensor.\n", - "# print(\"Targets, encoded:\")\n", - "# print(targets)\n", - "# # Example targets as a sentence.\n", - "# print(\"Targets, decoded:\")\n", - "# print(decode(targets))" + "# Fetch the MNIST problem\n", + "mnist_problem = problems.problem(\"image_mnist\")\n", + "# The generate_data method of a problem will download data and process it into\n", + "# a standard format ready for training and evaluation.\n", + "mnist_problem.generate_data(data_dir, tmp_dir)" ], "cell_type": "code", - "execution_count": 0, - "outputs": [] - }, - { - "metadata": { - "id": "9l6hDQbrRUYV", - "colab_type": "code", - "colab": { - "autoexec": { - "startup": false, - "wait_interval": 0 - } + "execution_count": 5, + "outputs": [ + { + "output_type": "stream", + "text": [ + "INFO:tensorflow:Not downloading, file already found: /content/t2t/tmp/train-images-idx3-ubyte.gz\n", + "INFO:tensorflow:Not downloading, file already found: /content/t2t/tmp/train-labels-idx1-ubyte.gz\n", + "INFO:tensorflow:Not downloading, file already found: /content/t2t/tmp/t10k-images-idx3-ubyte.gz\n", + "INFO:tensorflow:Not downloading, file already found: /content/t2t/tmp/t10k-labels-idx1-ubyte.gz\n", + "INFO:tensorflow:Not downloading, file already found: /content/t2t/tmp/train-images-idx3-ubyte.gz\n", + "INFO:tensorflow:Not downloading, file already found: /content/t2t/tmp/train-labels-idx1-ubyte.gz\n", + "INFO:tensorflow:Not downloading, file already found: /content/t2t/tmp/t10k-images-idx3-ubyte.gz\n", + "INFO:tensorflow:Not downloading, file already found: /content/t2t/tmp/t10k-labels-idx1-ubyte.gz\n", + "INFO:tensorflow:Skipping generator because outputs files exist\n", + "INFO:tensorflow:Skipping generator because outputs files exist\n", + "INFO:tensorflow:Skipping shuffle because output files exist\n" + ], + "name": "stdout" } - }, - "source": [ - "# Create hparams and the T2TModel object.\n", - "model_name = \"transformer\"\n", - "hparams_set = \"transformer_base\"\n", - "\n", - "hparams = trainer_utils.create_hparams(hparams_set, data_dir)\n", - "trainer_utils.add_problem_hparams(hparams, \"translate_ende_wmt32k\")\n", - "\n", - "# NOTE: Only create the model once when restoring from a checkpoint; it's a\n", - "# Layer and so subsequent instantiations will have different variable scopes\n", - "# that will not match the checkpoint.\n", - "translate_model = registry.model(model_name)(hparams, Modes.PREDICT)" - ], - "cell_type": "code", - "execution_count": 0, - "outputs": [] + ] }, { "metadata": { - "id": "FEwNUVlMYOJi", + "id": "VW6HCRANFPYV", "colab_type": "code", "colab": { "autoexec": { @@ -278,18 +315,21 @@ }, "output_extras": [ { - "item_id": 1 + "item_id": 2 + }, + { + "item_id": 3 } ], "base_uri": "https://localhost:8080/", - "height": 34 + "height": 381 }, - "outputId": "fc15a59a-7ea7-4baa-85c1-2a94528eb157", + "outputId": "121d463f-adaf-4340-a5cb-12e931fd0fdb", "executionInfo": { "status": "ok", - "timestamp": 1512165760778, + "timestamp": 1512173993175, "user_tz": 480, - "elapsed": 12527, + "elapsed": 561, "user": { "displayName": "Ryan Sepassi", "photoUrl": "//lh4.googleusercontent.com/-dcHmhQy1Y2A/AAAAAAAAAAI/AAAAAAAABEw/if_k14yF4KI/s50-c-k-no/photo.jpg", @@ -298,33 +338,52 @@ } }, "source": [ - "# Copy the pretrained checkpoint locally\n", - "ckpt_name = \"transformer_ende_test\"\n", - "gs_ckpt = os.path.join(gs_ckpt_dir, ckpt_name)\n", - "!gsutil -q cp -R {gs_ckpt} {checkpoint_dir}\n", - "ckpt_path = tf.train.latest_checkpoint(os.path.join(checkpoint_dir, ckpt_name))\n", - "ckpt_path" + "# Now let's see the training MNIST data as Tensors.\n", + "mnist_example = tfe.Iterator(mnist_problem.dataset(Modes.TRAIN, data_dir)).next()\n", + "image = mnist_example[\"inputs\"]\n", + "label = mnist_example[\"targets\"]\n", + "\n", + "plt.imshow(image.numpy()[:, :, 0].astype(np.float32), cmap=plt.get_cmap('gray'))\n", + "print(\"Label: %d\" % label.numpy())" ], "cell_type": "code", - "execution_count": 8, + "execution_count": 6, "outputs": [ { - "output_type": "execute_result", + "output_type": "stream", + "text": [ + "INFO:tensorflow:Reading data files from /content/t2t/data/image_mnist-train*\n", + "Label: 6\n" + ], + "name": "stdout" + }, + { + "output_type": "display_data", "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAUsAAAFKCAYAAACU6307AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4wLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvpW3flQAAE4hJREFUeJzt3X1MlfX/x/HXCWLC1KEklq27OZ1M\ncKvUic4bFC3amje1VERzc02XOm9Gxpyo5SaKaN61RFO3ZK3T+CdXLsjMcoo4aVMP/6D+YcwMQZnp\nRFM6vz9++7KQczhvjpyb6/h8bPzB5/qcz/V+72IvrnOuc53j8nq9XgEAOvVUpAsAACcgLAHAgLAE\nAAPCEgAMCEsAMCAsAcDCGwaSfP5cuHDB7zan/sRiT7HaFz055ydcfXXGFY73WbpcLp/jXq/X7zan\nisWepNjsi56cI1x9dRaH8cEuunHjRp07d04ul0urV6/WsGHDgl0KAKJeUGF55swZXblyRW63W5cv\nX9bq1avldru7uzYAiBpBXeCpqqpSdna2JGngwIG6deuW7ty5062FAUA0CerMsqmpSUOHDm37vW/f\nvmpsbFTPnj19zr9w4YLS09N9bgvDS6ZhF4s9SbHZFz05R6T7Cvo1y/8K1ERGRobfx8Xai9Gx2JMU\nm33Rk3NEwwWeoJ6Gp6amqqmpqe3369evq1+/fsEsBQCOEFRYjhkzRhUVFZKk2tpapaam+n0KDgCx\nIKin4a+99pqGDh2qWbNmyeVyad26dd1dFwBEFd6U3s1isScpNvuiJ+dw7GuWAPCkISwBwICwBAAD\nwhIADAhLADAgLAHAgLAEAAPCEgAMCEsAMCAsAcCAsAQAA8ISAAwISwAwICwBwICwBAADwhIADAhL\nADAgLAHAgLAEAAPCEgAMCEsAMAjqq3CBWDV8+HDTvMrKSvOaf/zxh3ludna2eW5TU5N5Lh4fZ5YA\nYEBYAoABYQkABoQlABgQlgBgQFgCgAFhCQAGhCUAGBCWAGBAWAKAAbc7Av/x/vvvm+YlJyeb1+zK\n3Pnz55vnlpSUmOfi8XFmCQAGhCUAGBCWAGBAWAKAAWEJAAaEJQAYEJYAYEBYAoABYQkABtzBg5j3\n3HPPmbfl5uaa1rxx44Z5/4WFhea5p06dMs9FeHFmCQAGQZ1ZVldXa9myZRo0aJAkafDgwV367wkA\nThP00/CRI0dq586d3VkLAEQtnoYDgEHQYXnp0iUtWrRIs2fP1smTJ7uzJgCIOi6v1+vt6oMaGhpU\nU1OjnJwc1dfXa968eaqsrFRCQoLP+R6PR+np6Y9dLABESlBh+ah3331Xn332mV544QXfO3G5fI57\nvV6/25wqFnuSnN2Xv7cO/fnnnxowYEC7MY/HY1rz33//Ne8/VG8dOn/+fIcxJx+nzoSrr87iMKin\n4YcPH9b+/fslSY2Njbpx44b69+8fXHUA4ABBXQ2fOHGi8vPz9fPPP+vBgwdav36936fgABALggrL\nnj17as+ePd1dCwBELW53RMwrKCgwb+vTp49pze3bt5v3z4lFbOB9lgBgQFgCgAFhCQAGhCUAGBCW\nAGBAWAKAAWEJAAaEJQAYEJYAYEBYAoBBt3xEW8Cd8BFtjhdtfc2dO9c89+DBgz7H4+Li1Nra2m7s\n5s2bpjVfffVV8/6vXr1qnvu4ou04dRfHfkQbADxpCEsAMCAsAcCAsAQAA8ISAAwISwAwICwBwICw\nBAADwhIADPjCMjjSe++9Z5771FP+zwke3fbVV1+Z1gznXTmIDpxZAoABYQkABoQlABgQlgBgQFgC\ngAFhCQAGhCUAGBCWAGBAWAKAAWEJAAbc7oiokpeXZ5r35ptvmte8d++ez/HExMQO26y3O+LJw5kl\nABgQlgBgQFgCgAFhCQAGhCUAGBCWAGBAWAKAAWEJAAaEJQAYEJYAYMDtjogqs2fPNs2Li4szr7l9\n+3af4/n5+fr888/bjZ0/f968Lp4spjPLuro6ZWdnq6ysTJJ07do1zZ07V7m5uVq2bJn++eefkBYJ\nAJEWMCzv3r2rDRs2KDMzs21s586dys3N1ddff62XXnpJ5eXlIS0SACItYFgmJCRo3759Sk1NbRur\nrq7WpEmTJElZWVmqqqoKXYUAEAUCvmYZHx+v+Pj201paWpSQkCBJSklJUWNjY2iqA4Ao8dgXeLxe\nb8A5Fy5cUHp6etCPd5pY7Elybl/5+fnmbZ3NdQqnHqdAIt1XUGGZlJSke/fuqUePHmpoaGj3FN2X\njIwMn+Ner1culyuYEqJWLPYkha+vH374wTQvJyfHvObWrVt9jufn56ukpKTd2EcffWReNxrx9/f4\n+/EnqPdZjh49WhUVFZKkyspKjR07NrjKAMAhAp5Zejwebd68WVevXlV8fLwqKipUUlKigoICud1u\nDRgwQNOmTQtHrQAQMQHDMj09XYcOHeowfvDgwZAUBADRiDt4EHJz5swxz508ebJpnr8vIfPl119/\n9Tmen5/fYVufPn1MazY3N5v3j9jAveEAYEBYAoABYQkABoQlABgQlgBgQFgCgAFhCQAGhCUAGBCW\nAGBAWAKAAbc7IuTGjRtnnvvoB0378/3335vXHD58uHlbaWmpac0dO3aY919cXGyei+jFmSUAGBCW\nAGBAWAKAAWEJAAaEJQAYEJYAYEBYAoABYQkABoQlABgQlgBg4PJ6vd6Q78Tl8jnu9Xr9bnOqWOxJ\n6thXYmKi+bGXL182z3322WdN82pra81rDh061Oe4y+VSsH/+J0+eNM8dO3ZsUPsIxpPy9xfK/fjD\nmSUAGBCWAGBAWAKAAWEJAAaEJQAYEJYAYEBYAoABYQkABoQlABjwhWUIyqxZs8xzrXfldIW/u3LC\n5dSpUxHdP8KPM0sAMCAsAcCAsAQAA8ISAAwISwAwICwBwICwBAADwhIADAhLADAgLAHAgNsdEZRR\no0ZFugSzn376yef4lClTOmybPHmyac2WlpbHrgvOwpklABiYwrKurk7Z2dkqKyuTJBUUFOjtt9/W\n3LlzNXfuXB0/fjyUNQJAxAV8Gn737l1t2LBBmZmZ7cZXrlyprKyskBUGANEk4JllQkKC9u3bp9TU\n1HDUAwBRyeX1er2Wibt27VKfPn2Ul5engoICNTY26sGDB0pJSVFhYaH69u3r97Eej0fp6endVjQA\nhFtQV8OnTp2q5ORkpaWlae/evdq9e7fWrl3rd35GRobPca/XK5fLFUwJUSsWe5I69lVaWmp+7Acf\nfBCKksw6uxpeWVnZbsx6NfzTTz8173/9+vXmuY/rSfn7C+V+/AnqanhmZqbS0tIkSRMnTlRdXV1w\nlQGAQwQVlkuXLlV9fb0kqbq6WoMGDerWogAg2gR8Gu7xeLR582ZdvXpV8fHxqqioUF5enpYvX67E\nxEQlJSWpqKgoHLUCQMQEDMv09HQdOnSow/gbb7wRkoIAIBpxuyPaef75503bZs6cGY5y/Prxxx/N\nc8+dO+dzfMqUKfr999/bjfm7GPmo/fv3m/eP2MDtjgBgQFgCgAFhCQAGhCUAGBCWAGBAWAKAAWEJ\nAAaEJQAYEJYAYEBYAoABtzuinc4+e/K/23r37h2S/T98+NA07+DBg+Y1Fy9e7Hfbo1+XYl33f5+6\nhScHZ5YAYEBYAoABYQkABoQlABgQlgBgQFgCgAFhCQAGhCUAGBCWAGDAHTxoJyUlJaht3eW7774z\nzbtz5455zddff9287fjx4+Z18WThzBIADAhLADAgLAHAgLAEAAPCEgAMCEsAMCAsAcCAsAQAA8IS\nAAwISwAw4HZHhNz9+/fNc4cMGWKa9+2335rXrKmp8Tk+fvz4Dtu2bt1qXhdPFs4sAcCAsAQAA8IS\nAAwISwAwICwBwICwBAADwhIADAhLADAgLAHAgLAEAANud0TIWb+xUZLS0tJM85KSksxrfvLJJz7H\njx071mHb7du3zeviyWIKy+LiYtXU1Ojhw4dauHChMjIytGrVKrW2tqpfv37asmWLEhISQl0rAERM\nwLA8ffq0Ll68KLfbrebmZk2fPl2ZmZnKzc1VTk6Otm3bpvLycuXm5oajXgCIiICvWY4YMUI7duyQ\nJPXu3VstLS2qrq7WpEmTJElZWVmqqqoKbZUAEGEBwzIuLq7t9aHy8nKNGzdOLS0tbU+7U1JS1NjY\nGNoqASDCXF6v12uZePToUZWWlurAgQOaMmVK29nklStX9PHHH+ubb77x+1iPx6P09PTuqRgAIsB0\ngefEiRPas2ePvvzyS/Xq1UtJSUm6d++eevTooYaGBqWmpnb6+IyMDJ/jXq9XLper61VHMaf3tGvX\nLp/jS5Ys0e7du9t+X7x4sXnNrnxQr/VqeFf++WZnZ/scP3bsmCZOnNhu7JdffjGvG42c/vfnT7j6\n6uzcMeDT8Nu3b6u4uFilpaVKTk6WJI0ePVoVFRWSpMrKSo0dO7abSgWA6BTwzPLIkSNqbm7W8uXL\n28Y2bdqkNWvWyO12a8CAAZo2bVpIiwSASAsYljNnztTMmTM7jB88eDAkBQFANOIOHrRz5cqVoLZ1\nxtc/W3+M1xv1xRdfmNfs7HVIp79GifDh3nAAMCAsAcCAsAQAA8ISAAwISwAwICwBwICwBAADwhIA\nDAhLADAgLAHAwPx5lo+1Ez8frRSLHycViz1JHfvaunWr+bErVqwwz920aZNpXlFRkXlNf19CFovH\nKhZ7khzyEW0AAMISAEwISwAwICwBwICwBAADwhIADAhLADAgLAHAgLAEAAPCEgAMuN2xm8ViT1Js\n9kVPzsHtjgDgEIQlABgQlgBgQFgCgAFhCQAGhCUAGBCWAGBAWAKAAWEJAAaEJQAYEJYAYEBYAoAB\nYQkABoQlABgQlgBgQFgCgAFhCQAGhCUAGBCWAGBAWAKAAWEJAAbxlknFxcWqqanRw4cPtXDhQh07\ndky1tbVKTk6WJC1YsEATJkwIZZ0AEFEBw/L06dO6ePGi3G63mpubNX36dI0aNUorV65UVlZWOGoE\ngIgLGJYjRozQsGHDJEm9e/dWS0uLWltbQ14YAEQTl7ezbxV/hNvt1tmzZxUXF6fGxkY9ePBAKSkp\nKiwsVN++ff3vxM+Xo8fiF8LHYk9SbPZFT84Rrr46i0NzWB49elSlpaU6cOCAPB6PkpOTlZaWpr17\n9+qvv/7S2rVr/T7W4/EoPT2965UDQLTwGvz222/ed955x9vc3Nxh28WLF71z5szp9PGSfP50ts2p\nP7HYU6z2RU/O+QlXX50J+Nah27dvq7i4WKWlpW1Xv5cuXar6+npJUnV1tQYNGhRoGQBwtIAXeI4c\nOaLm5mYtX768bWzGjBlavny5EhMTlZSUpKKiopAWCQCR1qULPEHvhAs8jheLfdGTc4Srr87ikDt4\nAMCAsAQAA8ISAAwISwAwICwBwICwBAADwhIADAhLADAgLAHAgLAEAAPCEgAMCEsAMCAsAcCAsAQA\nA8ISAAwISwAwICwBwICwBAADwhIADAhLADAgLAHAgLAEAIOwfBUuADgdZ5YAYEBYAoABYQkABoQl\nABgQlgBgQFgCgEF8JHa6ceNGnTt3Ti6XS6tXr9awYcMiUUa3qq6u1rJlyzRo0CBJ0uDBg1VYWBjh\nqoJXV1enDz/8UPPnz1deXp6uXbumVatWqbW1Vf369dOWLVuUkJAQ6TK75NGeCgoKVFtbq+TkZEnS\nggULNGHChMgW2UXFxcWqqanRw4cPtXDhQmVkZDj+OEkd+zp27FjEj1XYw/LMmTO6cuWK3G63Ll++\nrNWrV8vtdoe7jJAYOXKkdu7cGekyHtvdu3e1YcMGZWZmto3t3LlTubm5ysnJ0bZt21ReXq7c3NwI\nVtk1vnqSpJUrVyorKytCVT2e06dP6+LFi3K73Wpubtb06dOVmZnp6OMk+e5r1KhRET9WYX8aXlVV\npezsbEnSwIEDdevWLd25cyfcZaATCQkJ2rdvn1JTU9vGqqurNWnSJElSVlaWqqqqIlVeUHz15HQj\nRozQjh07JEm9e/dWS0uL44+T5Luv1tbWCFcVgbBsampSnz592n7v27evGhsbw11GSFy6dEmLFi3S\n7NmzdfLkyUiXE7T4+Hj16NGj3VhLS0vb07mUlBTHHTNfPUlSWVmZ5s2bpxUrVujmzZsRqCx4cXFx\nSkpKkiSVl5dr3Lhxjj9Oku++4uLiIn6sIvKa5X/Fyt2WL7/8spYsWaKcnBzV19dr3rx5qqysdOTr\nRYHEyjGbOnWqkpOTlZaWpr1792r37t1au3ZtpMvqsqNHj6q8vFwHDhzQlClT2sadfpz+25fH44n4\nsQr7mWVqaqqamprafr9+/br69esX7jK6Xf/+/fXWW2/J5XLpxRdf1DPPPKOGhoZIl9VtkpKSdO/e\nPUlSQ0NDTDydzczMVFpamiRp4sSJqquri3BFXXfixAnt2bNH+/btU69evWLmOD3aVzQcq7CH5Zgx\nY1RRUSFJqq2tVWpqqnr27BnuMrrd4cOHtX//fklSY2Ojbty4of79+0e4qu4zevTotuNWWVmpsWPH\nRriix7d06VLV19dL+v/XZP/3TganuH37toqLi1VaWtp2lTgWjpOvvqLhWEXkU4dKSkp09uxZuVwu\nrVu3TkOGDAl3Cd3uzp07ys/P199//60HDx5oyZIlGj9+fKTLCorH49HmzZt19epVxcfHq3///iop\nKVFBQYHu37+vAQMGqKioSE8//XSkSzXz1VNeXp727t2rxMREJSUlqaioSCkpKZEu1cztdmvXrl16\n5ZVX2sY2bdqkNWvWOPY4Sb77mjFjhsrKyiJ6rPiINgAw4A4eADAgLAHAgLAEAAPCEgAMCEsAMCAs\nAcCAsAQAA8ISAAz+D2GuR1qUzSXkAAAAAElFTkSuQmCC\n", "text/plain": [ - "u'/content/t2t/checkpoints/transformer_ende_test/model.ckpt-350855'" + "<matplotlib.figure.Figure at 0x7f899c8e6f50>" ] }, "metadata": { "tags": [] - }, - "execution_count": 8 + } } ] }, { "metadata": { - "id": "3O-8E9d6TtuJ", + "id": "gXL7_bVH49Kl", + "colab_type": "text" + }, + "source": [ + "# Translate from English to German with a pre-trained model" + ], + "cell_type": "markdown" + }, + { + "metadata": { + "id": "EB4MP7_y_SuQ", "colab_type": "code", "colab": { "autoexec": { @@ -333,18 +392,18 @@ }, "output_extras": [ { - "item_id": 3 + "item_id": 2 } ], "base_uri": "https://localhost:8080/", - "height": 119 + "height": 68 }, - "outputId": "24231c95-99cb-421b-d961-5a21322be945", + "outputId": "db79aefe-d9a6-437b-aaf8-4174a1f3c643", "executionInfo": { "status": "ok", - "timestamp": 1512165773424, + "timestamp": 1512173998055, "user_tz": 480, - "elapsed": 12593, + "elapsed": 2988, "user": { "displayName": "Ryan Sepassi", "photoUrl": "//lh4.googleusercontent.com/-dcHmhQy1Y2A/AAAAAAAAAAI/AAAAAAAABEw/if_k14yF4KI/s50-c-k-no/photo.jpg", @@ -353,32 +412,40 @@ } }, "source": [ - "# Restore and translate!\n", + "# Fetch the problem\n", + "ende_problem = problems.problem(\"translate_ende_wmt32k\")\n", "\n", - "def translate(inputs):\n", - " encoded_inputs = encode(inputs)\n", - " with tfe.restore_variables_on_create(ckpt_path):\n", - " model_output = translate_model.infer(encoded_inputs)\n", - " return decode(model_output)\n", + "# Copy the vocab file locally so we can encode inputs and decode model outputs\n", + "# All vocabs are stored on GCS\n", + "vocab_file = os.path.join(gs_data_dir, \"vocab.ende.32768\")\n", + "!gsutil cp {vocab_file} {data_dir}\n", "\n", - "inputs = \"This is a cat.\"\n", - "outputs = translate(inputs)\n", + "# Get the encoders from the problem\n", + "encoders = ende_problem.feature_encoders(data_dir)\n", "\n", - "print(\"Inputs: %s\" % inputs)\n", - "print(\"Outputs: %s\" % outputs)" + "# Setup helper functions for encoding and decoding\n", + "def encode(input_str):\n", + " \"\"\"Input str to features dict, ready for inference\"\"\"\n", + " inputs = encoders[\"inputs\"].encode(input_str) + [1] # add EOS id\n", + " batch_inputs = tf.reshape(inputs, [1, -1, 1]) # Make it 3D.\n", + " return {\"inputs\": batch_inputs}\n", + "\n", + "def decode(integers):\n", + " \"\"\"List of ints to str\"\"\"\n", + " integers = list(np.squeeze(integers))\n", + " if 1 in integers:\n", + " integers = integers[:integers.index(1)]\n", + " return encoders[\"inputs\"].decode(np.squeeze(integers))" ], "cell_type": "code", - "execution_count": 9, + "execution_count": 7, "outputs": [ { "output_type": "stream", "text": [ - "INFO:tensorflow:Greedy Decoding\n", - "WARNING:tensorflow:From /usr/local/lib/python2.7/dist-packages/tensor2tensor/layers/common_layers.py:487: calling reduce_mean (from tensorflow.python.ops.math_ops) with keep_dims is deprecated and will be removed in a future version.\n", - "Instructions for updating:\n", - "keep_dims is deprecated, use keepdims instead\n", - "Inputs: This is a cat.\n", - "Outputs: Das ist eine Katze.\n" + "Copying gs://tensor2tensor-data/vocab.ende.32768...\n", + "/ [1 files][316.4 KiB/316.4 KiB] \n", + "Operation completed over 1 objects/316.4 KiB. \n" ], "name": "stdout" } @@ -386,17 +453,46 @@ }, { "metadata": { - "id": "i7BZuO7T5BB4", - "colab_type": "text" + "id": "g2aQW7Z6TOEu", + "colab_type": "code", + "colab": { + "autoexec": { + "startup": false, + "wait_interval": 0 + } + } }, "source": [ - "# Train a custom model on MNIST" + "# # Generate and view the data\n", + "# # This cell is commented out because WMT data generation can take hours\n", + "\n", + "# ende_problem.generate_data(data_dir, tmp_dir)\n", + "# example = tfe.Iterator(ende_problem.dataset(Modes.TRAIN, data_dir)).next()\n", + "# inputs = [int(x) for x in example[\"inputs\"].numpy()] # Cast to ints.\n", + "# targets = [int(x) for x in example[\"targets\"].numpy()] # Cast to ints.\n", + "\n", + "\n", + "\n", + "# # Example inputs as int-tensor.\n", + "# print(\"Inputs, encoded:\")\n", + "# print(inputs)\n", + "# print(\"Inputs, decoded:\")\n", + "# # Example inputs as a sentence.\n", + "# print(decode(inputs))\n", + "# # Example targets as int-tensor.\n", + "# print(\"Targets, encoded:\")\n", + "# print(targets)\n", + "# # Example targets as a sentence.\n", + "# print(\"Targets, decoded:\")\n", + "# print(decode(targets))" ], - "cell_type": "markdown" + "cell_type": "code", + "execution_count": 0, + "outputs": [] }, { "metadata": { - "id": "RYDMO4zArgkz", + "id": "WkFUEs7ZOA79", "colab_type": "code", "colab": { "autoexec": { @@ -409,14 +505,14 @@ } ], "base_uri": "https://localhost:8080/", - "height": 1224 + "height": 408 }, - "outputId": "3b62dff4-7bfa-436e-a9f5-ecf66616e93a", + "outputId": "7283214e-af66-4f16-b203-3b209643484f", "executionInfo": { "status": "ok", - "timestamp": 1512165773875, + "timestamp": 1512174000121, "user_tz": 480, - "elapsed": 423, + "elapsed": 321, "user": { "displayName": "Ryan Sepassi", "photoUrl": "//lh4.googleusercontent.com/-dcHmhQy1Y2A/AAAAAAAAAAI/AAAAAAAABEw/if_k14yF4KI/s50-c-k-no/photo.jpg", @@ -425,158 +521,79 @@ } }, "source": [ - "# Lots of problems available\n", - "problems.available()" + "# There are many models available in Tensor2Tensor\n", + "registry.list_models()" ], "cell_type": "code", - "execution_count": 10, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "['algorithmic_addition_binary40',\n", - " 'algorithmic_addition_decimal40',\n", - " 'algorithmic_cipher_shift200',\n", - " 'algorithmic_cipher_shift5',\n", - " 'algorithmic_cipher_vigenere200',\n", - " 'algorithmic_cipher_vigenere5',\n", - " 'algorithmic_identity_binary40',\n", - " 'algorithmic_identity_decimal40',\n", - " 'algorithmic_multiplication_binary40',\n", - " 'algorithmic_multiplication_decimal40',\n", - " 'algorithmic_reverse_binary40',\n", - " 'algorithmic_reverse_binary40_test',\n", - " 'algorithmic_reverse_decimal40',\n", - " 'algorithmic_reverse_nlplike32k',\n", - " 'algorithmic_reverse_nlplike8k',\n", - " 'algorithmic_shift_decimal40',\n", - " 'audio_timit_characters_tune',\n", - " 'audio_timit_tokens8k_test',\n", - " 'audio_timit_tokens8k_tune',\n", - " 'image_celeba_tune',\n", - " 'image_cifar10',\n", - " 'image_cifar10_plain',\n", - " 'image_cifar10_plain8',\n", - " 'image_cifar10_tune',\n", - " 'image_fsns',\n", - " 'image_imagenet',\n", - " 'image_imagenet224',\n", - " 'image_imagenet32',\n", - " 'image_imagenet64',\n", - " 'image_mnist',\n", - " 'image_mnist_tune',\n", - " 'image_ms_coco_characters',\n", - " 'image_ms_coco_tokens32k',\n", - " 'image_ms_coco_tokens8k',\n", - " 'img2img_cifar10',\n", - " 'img2img_imagenet',\n", - " 'languagemodel_lm1b32k',\n", - " 'languagemodel_lm1b8k_packed',\n", - " 'languagemodel_lm1b_characters',\n", - " 'languagemodel_ptb10k',\n", - " 'languagemodel_ptb_characters',\n", - " 'languagemodel_wiki_full32k',\n", - " 'languagemodel_wiki_scramble128',\n", - " 'languagemodel_wiki_scramble1k50',\n", - " 'languagemodel_wiki_scramble8k50',\n", - " 'librispeech',\n", - " 'multinli_matched',\n", - " 'multinli_mismatched',\n", - " 'ocr_test',\n", - " 'parsing_english_ptb16k',\n", - " 'parsing_english_ptb8k',\n", - " 'parsing_icelandic16k',\n", - " 'programming_desc2code_cpp',\n", - " 'programming_desc2code_py',\n", - " 'sentiment_imdb',\n", - " 'summarize_cnn_dailymail32k',\n", - " 'translate_encs_wmt32k',\n", - " 'translate_encs_wmt_characters',\n", - " 'translate_ende_wmt32k',\n", - " 'translate_ende_wmt32k_packed',\n", - " 'translate_ende_wmt8k',\n", - " 'translate_ende_wmt_bpe32k',\n", - " 'translate_ende_wmt_characters',\n", - " 'translate_enfr_wmt32k',\n", - " 'translate_enfr_wmt8k',\n", - " 'translate_enfr_wmt_characters',\n", - " 'translate_enfr_wmt_small32k',\n", - " 'translate_enfr_wmt_small8k',\n", - " 'translate_enfr_wmt_small_characters',\n", - " 'translate_enmk_setimes32k',\n", - " 'translate_enzh_wmt8k']" + "execution_count": 9, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "['resnet50',\n", + " 'lstm_seq2seq',\n", + " 'transformer_encoder',\n", + " 'attention_lm',\n", + " 'vanilla_gan',\n", + " 'transformer',\n", + " 'gene_expression_conv',\n", + " 'transformer_moe',\n", + " 'attention_lm_moe',\n", + " 'transformer_revnet',\n", + " 'lstm_seq2seq_attention',\n", + " 'shake_shake',\n", + " 'transformer_ae',\n", + " 'diagonal_neural_gpu',\n", + " 'xception',\n", + " 'aligned',\n", + " 'multi_model',\n", + " 'neural_gpu',\n", + " 'slice_net',\n", + " 'byte_net',\n", + " 'cycle_gan',\n", + " 'transformer_sketch',\n", + " 'blue_net']" ] }, "metadata": { "tags": [] }, - "execution_count": 10 + "execution_count": 9 } ] }, { "metadata": { - "id": "JKc2uSk6WX5e", + "id": "9l6hDQbrRUYV", "colab_type": "code", "colab": { "autoexec": { "startup": false, "wait_interval": 0 - }, - "output_extras": [ - { - "item_id": 3 - } - ], - "base_uri": "https://localhost:8080/", - "height": 204 - }, - "outputId": "f9fa17c1-ed3f-474e-8bd8-f764c3b00000", - "executionInfo": { - "status": "ok", - "timestamp": 1512165774930, - "user_tz": 480, - "elapsed": 977, - "user": { - "displayName": "Ryan Sepassi", - "photoUrl": "//lh4.googleusercontent.com/-dcHmhQy1Y2A/AAAAAAAAAAI/AAAAAAAABEw/if_k14yF4KI/s50-c-k-no/photo.jpg", - "userId": "107877449274830904926" } } }, "source": [ - "# Create the MNIST problem and generate the data\n", + "# Create hparams and the model\n", + "model_name = \"transformer\"\n", + "hparams_set = \"transformer_base\"\n", "\n", - "mnist_problem = problems.problem(\"image_mnist\")\n", - "# Generate data\n", - "mnist_problem.generate_data(data_dir, tmp_dir)" + "hparams = trainer_utils.create_hparams(hparams_set, data_dir)\n", + "trainer_utils.add_problem_hparams(hparams, \"translate_ende_wmt32k\")\n", + "\n", + "# NOTE: Only create the model once when restoring from a checkpoint; it's a\n", + "# Layer and so subsequent instantiations will have different variable scopes\n", + "# that will not match the checkpoint.\n", + "translate_model = registry.model(model_name)(hparams, Modes.PREDICT)" ], "cell_type": "code", - "execution_count": 11, - "outputs": [ - { - "output_type": "stream", - "text": [ - "INFO:tensorflow:Not downloading, file already found: /content/t2t/tmp/train-images-idx3-ubyte.gz\n", - "INFO:tensorflow:Not downloading, file already found: /content/t2t/tmp/train-labels-idx1-ubyte.gz\n", - "INFO:tensorflow:Not downloading, file already found: /content/t2t/tmp/t10k-images-idx3-ubyte.gz\n", - "INFO:tensorflow:Not downloading, file already found: /content/t2t/tmp/t10k-labels-idx1-ubyte.gz\n", - "INFO:tensorflow:Not downloading, file already found: /content/t2t/tmp/train-images-idx3-ubyte.gz\n", - "INFO:tensorflow:Not downloading, file already found: /content/t2t/tmp/train-labels-idx1-ubyte.gz\n", - "INFO:tensorflow:Not downloading, file already found: /content/t2t/tmp/t10k-images-idx3-ubyte.gz\n", - "INFO:tensorflow:Not downloading, file already found: /content/t2t/tmp/t10k-labels-idx1-ubyte.gz\n", - "INFO:tensorflow:Skipping generator because outputs files exist\n", - "INFO:tensorflow:Skipping generator because outputs files exist\n", - "INFO:tensorflow:Skipping shuffle because output files exist\n" - ], - "name": "stdout" - } - ] + "execution_count": 0, + "outputs": [] }, { "metadata": { - "id": "VW6HCRANFPYV", + "id": "FEwNUVlMYOJi", "colab_type": "code", "colab": { "autoexec": { @@ -585,21 +602,18 @@ }, "output_extras": [ { - "item_id": 2 - }, - { - "item_id": 3 + "item_id": 1 } ], "base_uri": "https://localhost:8080/", - "height": 381 + "height": 34 }, - "outputId": "93dea49c-dbca-4856-998f-8bcbb621abea", + "outputId": "ec8569a0-ee0e-4520-c9c6-06f3c7582ecc", "executionInfo": { "status": "ok", - "timestamp": 1512165775597, + "timestamp": 1512174015202, "user_tz": 480, - "elapsed": 622, + "elapsed": 12781, "user": { "displayName": "Ryan Sepassi", "photoUrl": "//lh4.googleusercontent.com/-dcHmhQy1Y2A/AAAAAAAAAAI/AAAAAAAABEw/if_k14yF4KI/s50-c-k-no/photo.jpg", @@ -608,42 +622,33 @@ } }, "source": [ - "# Get the tf.data.Dataset from Problem.dataset\n", - "mnist_example = tfe.Iterator(mnist_problem.dataset(Modes.TRAIN, data_dir)).next()\n", - "image = mnist_example[\"inputs\"]\n", - "label = mnist_example[\"targets\"]\n", - "\n", - "plt.imshow(image.numpy()[:, :, 0].astype(np.float32), cmap=plt.get_cmap('gray'))\n", - "print(\"Label: %d\" % label.numpy())" + "# Copy the pretrained checkpoint locally\n", + "ckpt_name = \"transformer_ende_test\"\n", + "gs_ckpt = os.path.join(gs_ckpt_dir, ckpt_name)\n", + "!gsutil -q cp -R {gs_ckpt} {checkpoint_dir}\n", + "ckpt_path = tf.train.latest_checkpoint(os.path.join(checkpoint_dir, ckpt_name))\n", + "ckpt_path" ], "cell_type": "code", - "execution_count": 12, + "execution_count": 11, "outputs": [ { - "output_type": "stream", - "text": [ - "INFO:tensorflow:Reading data files from /content/t2t/data/image_mnist-train*\n", - "Label: 6\n" - ], - "name": "stdout" - }, - { - "output_type": "display_data", + "output_type": "execute_result", "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAUsAAAFKCAYAAACU6307AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4wLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvpW3flQAAFK1JREFUeJzt3X9MVfUfx/HXDSQgJJSEzS2rNS0m\nuFWzxB8Vymx8y1JrsxCdzT/shyaZK8ZEWzZ/oP2Qfomm/iG53cYfzj90MLNWKuBk1YR/0NqMWREY\nGSYU2P3+0WIhF3hzufeee67Px8Yf93M+nPN+fw+9vuee4znH4/P5fAIADOoGpwsAADcgLAHAgLAE\nAAPCEgAMCEsAMCAsAcDCFwaS/P6cOXNmwGVu/YnGnqK1L3pyz0+4+hqMJxz/ztLj8fgd9/l8Ay5z\nq2jsSYrOvujJPcLV12BxGBvoSjdt2qRvv/1WHo9HxcXFmjJlSqCrAoCIF1BYnjp1SufPn5fX69V3\n332n4uJieb3eYNcGABEjoAs8NTU1ys3NlSTdeeedunTpki5fvhzUwgAgkgR0ZNnW1qbJkyf3fh47\ndqxaW1uVlJTkd/6ZM2eUmZnpd1kYTpmGXTT2JEVnX/TkHk73FfA5y/8aqomsrKwBfy/aTkZHY09S\ndPZFT+4RCRd4AvoanpaWpra2tt7Pv/zyi8aNGxfIqgDAFQIKyxkzZqiqqkqS1NjYqLS0tAG/ggNA\nNAjoa/i9996ryZMn6+mnn5bH49GGDRuCXRcARBT+UXqQRWNPUnT2RU/u4dpzlgBwvSEsAcCAsAQA\nA8ISAAwISwAwICwBwICwBAADwhIADAhLADAgLAHAgLAEAAPCEgAMCEsAMCAsAcCAsAQAA8ISAAwI\nSwAwICwBwICwBAADwhIADAhLADAI6FW4QLSaNGmSad7JkyfN6/zss8/McxctWmSei/DiyBIADAhL\nADAgLAHAgLAEAAPCEgAMCEsAMCAsAcCAsAQAA8ISAAwISwAw4HZHRL2EhATzspKSEtM6x4wZY97+\nN998Y56LyMWRJQAYEJYAYEBYAoABYQkABoQlABgQlgBgQFgCgAFhCQAGhCUAGHAHD6Jebm6ueVl+\nfn7Qt19RURH0dSL8OLIEAIOAjizr6uq0evVqTZw4UdI/rw+13lMLAG4U8Nfw+++/X2VlZcGsBQAi\nFl/DAcAg4LA8d+6cnnvuOT3zzDM6ceJEMGsCgIjj8fl8vuH+UktLi+rr65WXl6fm5mYtXbpU1dXV\niouL8zu/oaFBmZmZIy4WAJwSUFhe66mnntI777yjW2+91f9GPB6/4z6fb8BlbhWNPUnu7mvevHl+\nxw8dOqTHH3+8z9jBgweDvv3bb7/dPLe5uXlE23LzfhpMuPoaLA4D+hp+6NAh7dmzR5LU2tqqixcv\nKj09PbDqAMAFAroaPnv2bK1du1afffaZuru79frrrw/4FRwAokFAYZmUlKSdO3cGuxYAiFhBOWc5\n5EY4Z+l6kdbXYLcwXuvw4cN+x0eNGqXu7u4+YzExMaZ1fvnll+btz5071zz32nqGK9L2U7C49pwl\nAFxvCEsAMCAsAcCAsAQAA8ISAAwISwAwICwBwICwBAADwhIADAhLADDg7Y6IKI899php3oEDB8zr\njI0d+M/82mWXLl0yrXP58uXm7Y/0FkZEBo4sAcCAsAQAA8ISAAwISwAwICwBwICwBAADwhIADAhL\nADAgLAHAgDt4EJDB7oq51vPPP2+e+8Ybb5jm3XTTTeZ1/vHHH37Hk5KS+i3Lz883rfP77783bx/R\ngSNLADAgLAHAgLAEAAPCEgAMCEsAMCAsAcCAsAQAA8ISAAwISwAwICwBwIDbHRGQ2bNnm+e+++67\nQd9+T0+Pee7Bgwf9jhcUFPRbduTIkRHVhejFkSUAGBCWAGBAWAKAAWEJAAaEJQAYEJYAYEBYAoAB\nYQkABoQlABgQlgBg4PH5fL6Qb8Tj8Tvu8/kGXOZWbu/pgQce8DteW1uradOm9X4+duyYeZ3x8fEj\nrutaGzZsMM998803/Y67fV/5E409SeHra7A4NB1ZNjU1KTc3VxUVFZKkn376SUuWLFF+fr5Wr16t\nv/76KziVAkCEGjIsr1y5oo0bNyo7O7t3rKysTPn5+Tpw4IBuu+02VVZWhrRIAHDakGEZFxen3bt3\nKy0trXesrq5Oc+bMkSTl5OSopqYmdBUCQAQY8hFtsbGxio3tO62zs1NxcXGSpNTUVLW2toamOgCI\nECN+nqXl+tCZM2eUmZkZ8O+7TTT2JP1zkSdSbNy4MShzo3FfRWNPkvN9BRSWiYmJ6urqUnx8vFpa\nWvp8RfcnKyvL73g0Xrlze09cDXe3aOxJctHV8GtNnz5dVVVVkqTq6mrNmjUrsMoAwCWGPLJsaGjQ\n1q1bdeHCBcXGxqqqqkrbt29XUVGRvF6vxo8fr/nz54ejVgBwzJBhmZmZqf379/cb37dvX0gKAoBI\nxAvL0MfatWtNy0JxHlKSPvnkE9O8t956KyTbD4VHH33UPDcmJsY898KFC37H77vvvj6f6+vrzevE\nwLg3HAAMCEsAMCAsAcCAsAQAA8ISAAwISwAwICwBwICwBAADwhIADAhLADDghWVBFok9Pfvss+a5\nH374od/x+Ph4dXV19X7+9+HPFhcvXjTPnT59umneuXPnzOu8+eab/Y7/9ttvSklJ6TO2bt060zoX\nLlxo3v5tt91mnjucv50//vij39jo0aPV0dHRZ+yOO+4wr3M4+yqcXPuINgC43hCWAGBAWAKAAWEJ\nAAaEJQAYEJYAYEBYAoABYQkABoQlABgQlgBgwNsdXWo4bwFcsGCBee6NN95oWjacu2RXrVplnmu9\njXGgWxj9WbZsmXnZK6+8Yl6v1XBu0xvO/65JSUmm8cLCQvM6S0pKzHOvNxxZAoABYQkABoQlABgQ\nlgBgQFgCgAFhCQAGhCUAGBCWAGBAWAKAAS8sC7Jw9ZSenm6e++OPP454ezfccIP+/vvv3s/+XpY1\nkOTkZPPce++91zRvx44d5nXOmDHD77jH4+l3x0wo/nM4efKkea71hW0DuXY/DXeddXV1I9p+qPDC\nMgBwCcISAAwISwAwICwBwICwBAADwhIADAhLADAgLAHAgLAEAAPCEgAMeGEZAtLY2GieO3bsWPPc\niooK07y77rrLvM6Ojg6/48nJyf2Web1e0zo/+eQT8/ZHjRplnltVVWWeu23btn5jr732Wr/x+vp6\n8zoxMI4sAcDAFJZNTU3Kzc3t/X/9oqIizZs3T0uWLNGSJUv0xRdfhLJGAHDckF/Dr1y5oo0bNyo7\nO7vP+Jo1a5STkxOywgAgkgx5ZBkXF6fdu3crLS0tHPUAQEQyP8/yvffe05gxY1RQUKCioiK1traq\nu7tbqampKikpGfQkfkNDgzIzM4NWNACEW0BXw5944gmlpKQoIyNDu3bt0vvvv6/169cPOD8rK8vv\nOA//DZzTD/89deqU+XcfffRR89zjx4+b5g3navjly5f9jicnJ+v333/vMxYNV8O3bt3aZ2zdunXm\ndfb09JjnhpNrH/6bnZ2tjIwMSdLs2bPV1NQUWGUA4BIBheWqVavU3Nws6Z/H0E+cODGoRQFApBny\na3hDQ4O2bt2qCxcuKDY2VlVVVSooKFBhYaESEhKUmJiozZs3h6NWAHDMkGGZmZmp/fv39xt/5JFH\nQlIQAEQibnd0qWeffdbR7Q/nZPu/p2ws4uPjTfOuXLliXmdeXp7f8RMnTvRbZn0TY0JCgnn7R44c\nMc8dzgWWTz/9tN/Ya6+91m88Ui/auA23OwKAAWEJAAaEJQAYEJYAYEBYAoABYQkABoQlABgQlgBg\nQFgCgAFhCQAG5of/jmgjA9wax/MsA+f08yydVlxcbJ67Y8cOv+OdnZ39blucN2+eaZ2vvPKKefv3\n3HOPee6aNWvMcz/44IN+Y9H435Tk4udZAsD1hrAEAAPCEgAMCEsAMCAsAcCAsAQAA8ISAAwISwAw\nICwBwIAXlrlUV1eXee4PP/xgnjthwoRAygmalStXmuadOHHCvM6PPvrIvGzp0qWmdX7//ffm7b/0\n0kvmueXl5ea5CC+OLAHAgLAEAAPCEgAMCEsAMCAsAcCAsAQAA8ISAAwISwAwICwBwICwBAADXlgW\nZJHY04EDB8xzFy1a5Hc8XC8su3TpkmlebKz9Tt2kpCS/4x6Pp98Lqqy3kc6aNcu8/fr6evPckYrE\nv79g4IVlAOAShCUAGBCWAGBAWAKAAWEJAAaEJQAYEJYAYEBYAoABYQkABoQlABjwdsfrQFtbm9Ml\nmN18881BX+dAb4KcOXNmv2WLFy82rXM4b8xEdDCFZWlpqerr69XT06MVK1YoKytLr776qq5evapx\n48Zp27ZtiouLC3WtAOCYIcOytrZWZ8+eldfrVXt7uxYsWKDs7Gzl5+crLy9Pb7/9tiorK5Wfnx+O\negHAEUOes5w6dap27NghSUpOTlZnZ6fq6uo0Z84cSVJOTo5qampCWyUAOGzIsIyJiVFiYqIkqbKy\nUg8++KA6Ozt7v3anpqaqtbU1tFUCgMPMF3iOHj2qyspK7d27V3Pnzu0dtzwO88yZM8rMzPS7LAyP\n0wy7aOxJ+ueZlm40c+ZM87Lz58+HupyQi9a/P6f7MoXlV199pZ07d+rjjz/W6NGjlZiYqK6uLsXH\nx6ulpUVpaWmD/n5WVpbf8Wh8UGkk9lRWVmae++KLL/odD9fDf0Ph5MmTfsdnzpyp48eP9xlz+9Xw\nSPz7CwZXPPy3o6NDpaWlKi8vV0pKiiRp+vTpqqqqkiRVV1cP66nRAOBGQx5ZHj58WO3t7SosLOwd\n27Jli9atWyev16vx48dr/vz5IS0SAJw2ZFguWrTI73tZ9u3bF5KCACAS8cKyIIvEnqZNm2aeO9Dd\nLiM5Z/nWW2+Z5x45ciSgbQzm888/9zseiftqpKKxJ8kl5ywBAIQlAJgQlgBgQFgCgAFhCQAGhCUA\nGBCWAGBAWAKAAWEJAAaEJQAYcLtjkEViT/Hx8ea5Az3O7J577tHXX3/d+3ny5MnmdT700EPmubW1\ntea5IxWJ+2qkorEnidsdAcA1CEsAMCAsAcCAsAQAA8ISAAwISwAwICwBwICwBAADwhIADAhLADDg\ndscgi8aepOjsi57cg9sdAcAlCEsAMCAsAcCAsAQAA8ISAAwISwAwICwBwICwBAADwhIADAhLADAg\nLAHAgLAEAAPCEgAMCEsAMCAsAcCAsAQAA8ISAAwISwAwICwBwICwBAADwhIADGItk0pLS1VfX6+e\nnh6tWLFCx44dU2Njo1JSUiRJy5cv18MPPxzKOgHAUUOGZW1trc6ePSuv16v29nYtWLBA06ZN05o1\na5STkxOOGgHAcUOG5dSpUzVlyhRJUnJysjo7O3X16tWQFwYAkcTjG+yt4tfwer06ffq0YmJi1Nra\nqu7ubqWmpqqkpERjx44deCMDvBw9Gl8IH409SdHZFz25R7j6GiwOzWF59OhRlZeXa+/evWpoaFBK\nSooyMjK0a9cu/fzzz1q/fv2Av9vQ0KDMzMzhVw4AkcJn8OWXX/qefPJJX3t7e79lZ8+e9S1evHjQ\n35fk92ewZW79icaeorUvenLPT7j6GsyQ/3Soo6NDpaWlKi8v7736vWrVKjU3N0uS6urqNHHixKFW\nAwCuNuQFnsOHD6u9vV2FhYW9YwsXLlRhYaESEhKUmJiozZs3h7RIAHDasC7wBLwRLvC4XjT2RU/u\nEa6+BotD7uABAAPCEgAMCEsAMCAsAcCAsAQAA8ISAAwISwAwICwBwICwBAADwhIADAhLADAgLAHA\ngLAEAAPCEgAMCEsAMCAsAcCAsAQAA8ISAAwISwAwICwBwICwBAADwhIADMLyKlwAcDuOLAHAgLAE\nAAPCEgAMCEsAMCAsAcCAsAQAg1gnNrpp0yZ9++238ng8Ki4u1pQpU5woI6jq6uq0evVqTZw4UZI0\nadIklZSUOFxV4JqamvTCCy9o2bJlKigo0E8//aRXX31VV69e1bhx47Rt2zbFxcU5XeawXNtTUVGR\nGhsblZKSIklavny5Hn74YWeLHKbS0lLV19erp6dHK1asUFZWluv3k9S/r2PHjjm+r8IelqdOndL5\n8+fl9Xr13Xffqbi4WF6vN9xlhMT999+vsrIyp8sYsStXrmjjxo3Kzs7uHSsrK1N+fr7y8vL09ttv\nq7KyUvn5+Q5WOTz+epKkNWvWKCcnx6GqRqa2tlZnz56V1+tVe3u7FixYoOzsbFfvJ8l/X9OmTXN8\nX4X9a3hNTY1yc3MlSXfeeacuXbqky5cvh7sMDCIuLk67d+9WWlpa71hdXZ3mzJkjScrJyVFNTY1T\n5QXEX09uN3XqVO3YsUOSlJycrM7OTtfvJ8l/X1evXnW4KgfCsq2tTWPGjOn9PHbsWLW2toa7jJA4\nd+6cnnvuOT3zzDM6ceKE0+UELDY2VvHx8X3GOjs7e7/Opaamum6f+etJkioqKrR06VK9/PLL+vXX\nXx2oLHAxMTFKTEyUJFVWVurBBx90/X6S/PcVExPj+L5y5Jzlf0XL3Za33367Vq5cqby8PDU3N2vp\n0qWqrq525fmioUTLPnviiSeUkpKijIwM7dq1S++//77Wr1/vdFnDdvToUVVWVmrv3r2aO3du77jb\n99N/+2poaHB8X4X9yDItLU1tbW29n3/55ReNGzcu3GUEXXp6uv73v//J4/FowoQJuuWWW9TS0uJ0\nWUGTmJiorq4uSVJLS0tUfJ3Nzs5WRkaGJGn27NlqampyuKLh++qrr7Rz507t3r1bo0ePjpr9dG1f\nkbCvwh6WM2bMUFVVlSSpsbFRaWlpSkpKCncZQXfo0CHt2bNHktTa2qqLFy8qPT3d4aqCZ/r06b37\nrbq6WrNmzXK4opFbtWqVmpubJf1zTvbff8ngFh0dHSotLVV5eXnvVeJo2E/++oqEfeXIU4e2b9+u\n06dPy+PxaMOGDbr77rvDXULQXb58WWvXrtXvv/+u7u5urVy5Ug899JDTZQWkoaFBW7du1YULFxQb\nG6v09HRt375dRUVF+vPPPzV+/Hht3rxZo0aNcrpUM389FRQUaNeuXUpISFBiYqI2b96s1NRUp0s1\n83q9eu+993THHXf0jm3ZskXr1q1z7X6S/Pe1cOFCVVRUOLqveEQbABhwBw8AGBCWAGBAWAKAAWEJ\nAAaEJQAYEJYAYEBYAoABYQkABv8HkbgWVGnLsmMAAAAASUVORK5CYII=\n", "text/plain": [ - "<matplotlib.figure.Figure at 0x7f4899f96d90>" + "u'/content/t2t/checkpoints/transformer_ende_test/model.ckpt-350855'" ] }, "metadata": { "tags": [] - } + }, + "execution_count": 11 } ] }, { "metadata": { - "id": "WkFUEs7ZOA79", + "id": "3O-8E9d6TtuJ", "colab_type": "code", "colab": { "autoexec": { @@ -652,18 +657,18 @@ }, "output_extras": [ { - "item_id": 1 + "item_id": 3 } ], "base_uri": "https://localhost:8080/", - "height": 408 + "height": 119 }, - "outputId": "3d0c50f2-9c18-4d4b-8455-1aabe9e28190", + "outputId": "306d8df1-70c4-43f5-fc15-54ff66ec58ed", "executionInfo": { "status": "ok", - "timestamp": 1512165775887, + "timestamp": 1512174026517, "user_tz": 480, - "elapsed": 242, + "elapsed": 11277, "user": { "displayName": "Ryan Sepassi", "photoUrl": "//lh4.googleusercontent.com/-dcHmhQy1Y2A/AAAAAAAAAAI/AAAAAAAABEw/if_k14yF4KI/s50-c-k-no/photo.jpg", @@ -672,48 +677,47 @@ } }, "source": [ - "# Lots of models available\n", - "registry.list_models()" + "# Restore and translate!\n", + "\n", + "def translate(inputs):\n", + " encoded_inputs = encode(inputs)\n", + " with tfe.restore_variables_on_create(ckpt_path):\n", + " model_output = translate_model.infer(encoded_inputs)\n", + " return decode(model_output)\n", + "\n", + "inputs = \"This is a cat.\"\n", + "outputs = translate(inputs)\n", + "\n", + "print(\"Inputs: %s\" % inputs)\n", + "print(\"Outputs: %s\" % outputs)" ], "cell_type": "code", - "execution_count": 13, + "execution_count": 12, "outputs": [ { - "output_type": "execute_result", - "data": { - "text/plain": [ - "['resnet50',\n", - " 'lstm_seq2seq',\n", - " 'transformer_encoder',\n", - " 'attention_lm',\n", - " 'vanilla_gan',\n", - " 'transformer',\n", - " 'gene_expression_conv',\n", - " 'transformer_moe',\n", - " 'attention_lm_moe',\n", - " 'transformer_revnet',\n", - " 'lstm_seq2seq_attention',\n", - " 'shake_shake',\n", - " 'transformer_ae',\n", - " 'diagonal_neural_gpu',\n", - " 'xception',\n", - " 'aligned',\n", - " 'multi_model',\n", - " 'neural_gpu',\n", - " 'slice_net',\n", - " 'byte_net',\n", - " 'cycle_gan',\n", - " 'transformer_sketch',\n", - " 'blue_net']" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 13 + "output_type": "stream", + "text": [ + "INFO:tensorflow:Greedy Decoding\n", + "WARNING:tensorflow:From /usr/local/lib/python2.7/dist-packages/tensor2tensor/layers/common_layers.py:487: calling reduce_mean (from tensorflow.python.ops.math_ops) with keep_dims is deprecated and will be removed in a future version.\n", + "Instructions for updating:\n", + "keep_dims is deprecated, use keepdims instead\n", + "Inputs: This is a cat.\n", + "Outputs: Das ist eine Katze.\n" + ], + "name": "stdout" } ] }, + { + "metadata": { + "id": "i7BZuO7T5BB4", + "colab_type": "text" + }, + "source": [ + "# Train a custom model on MNIST" + ], + "cell_type": "markdown" + }, { "metadata": { "id": "-H25oG91YQj3", @@ -751,7 +755,7 @@ }, { "metadata": { - "id": "AWVd2I7PYz6H", + "id": "7GEmpYQ2ZMnB", "colab_type": "code", "colab": { "autoexec": { @@ -760,18 +764,18 @@ }, "output_extras": [ { - "item_id": 12 + "item_id": 1 } ], "base_uri": "https://localhost:8080/", - "height": 357 + "height": 34 }, - "outputId": "19abcffa-6a56-4633-90c1-71a59a104ace", + "outputId": "9535b122-d663-470b-fb03-15541769a8d6", "executionInfo": { "status": "ok", - "timestamp": 1512165882231, + "timestamp": 1512174027233, "user_tz": 480, - "elapsed": 105926, + "elapsed": 372, "user": { "displayName": "Ryan Sepassi", "photoUrl": "//lh4.googleusercontent.com/-dcHmhQy1Y2A/AAAAAAAAAAI/AAAAAAAABEw/if_k14yF4KI/s50-c-k-no/photo.jpg", @@ -780,27 +784,72 @@ } }, "source": [ - "# Train\n", + "# Prepare for the training loop\n", "\n", - "# In Eager mode, opt.minimize must be passed a function that produces the loss\n", - "def loss_function(features):\n", + "# In Eager mode, opt.minimize must be passed a loss function wrapped with\n", + "# implicit_value_and_gradients\n", + "@tfe.implicit_value_and_gradients\n", + "def loss_fn(features):\n", " _, losses = model(features)\n", " return losses[\"training\"]\n", "\n", - "tfe_loss_fn = tfe.implicit_value_and_gradients(loss_function)\n", - "optimizer = tf.train.AdamOptimizer()\n", - "\n", - "NUM_STEPS = 500\n", + "# Setup the training data\n", "BATCH_SIZE = 128\n", - "\n", - "# Repeat and batch the data\n", "mnist_train_dataset = mnist_problem.dataset(Modes.TRAIN, data_dir)\n", "mnist_train_dataset = mnist_train_dataset.repeat(None).batch(BATCH_SIZE)\n", "\n", - "# Training loop\n", + "optimizer = tf.train.AdamOptimizer()" + ], + "cell_type": "code", + "execution_count": 14, + "outputs": [ + { + "output_type": "stream", + "text": [ + "INFO:tensorflow:Reading data files from /content/t2t/data/image_mnist-train*\n" + ], + "name": "stdout" + } + ] + }, + { + "metadata": { + "id": "AWVd2I7PYz6H", + "colab_type": "code", + "colab": { + "autoexec": { + "startup": false, + "wait_interval": 0 + }, + "output_extras": [ + { + "item_id": 11 + } + ], + "base_uri": "https://localhost:8080/", + "height": 340 + }, + "outputId": "adfe2262-ca2a-4d74-ef6f-4caaf5531824", + "executionInfo": { + "status": "ok", + "timestamp": 1512174129153, + "user_tz": 480, + "elapsed": 101898, + "user": { + "displayName": "Ryan Sepassi", + "photoUrl": "//lh4.googleusercontent.com/-dcHmhQy1Y2A/AAAAAAAAAAI/AAAAAAAABEw/if_k14yF4KI/s50-c-k-no/photo.jpg", + "userId": "107877449274830904926" + } + } + }, + "source": [ + "# Train\n", + "\n", + "NUM_STEPS = 500\n", + "\n", "for count, example in enumerate(tfe.Iterator(mnist_train_dataset)):\n", " example[\"targets\"] = tf.reshape(example[\"targets\"], [BATCH_SIZE, 1, 1, 1]) # Make it 4D.\n", - " loss, gv = tfe_loss_fn(example)\n", + " loss, gv = loss_fn(example)\n", " optimizer.apply_gradients(gv)\n", "\n", " if count % 50 == 0:\n", @@ -814,7 +863,6 @@ { "output_type": "stream", "text": [ - "INFO:tensorflow:Reading data files from /content/t2t/data/image_mnist-train*\n", "WARNING:tensorflow:From /usr/local/lib/python2.7/dist-packages/tensor2tensor/layers/common_layers.py:1671: softmax_cross_entropy_with_logits (from tensorflow.python.ops.nn_ops) is deprecated and will be removed in a future version.\n", "Instructions for updating:\n", "\n", @@ -823,22 +871,71 @@ "\n", "See tf.nn.softmax_cross_entropy_with_logits_v2.\n", "\n", - "Step: 0, Loss: 5.430\n", - "Step: 50, Loss: 0.833\n", - "Step: 100, Loss: 0.722\n", - "Step: 150, Loss: 0.529\n", - "Step: 200, Loss: 0.349\n", - "Step: 250, Loss: 0.293\n", - "Step: 300, Loss: 0.303\n", - "Step: 350, Loss: 0.295\n", - "Step: 400, Loss: 0.275\n", - "Step: 450, Loss: 0.290\n", - "Step: 500, Loss: 0.334\n" + "Step: 0, Loss: 5.357\n", + "Step: 50, Loss: 0.746\n", + "Step: 100, Loss: 0.618\n", + "Step: 150, Loss: 0.502\n", + "Step: 200, Loss: 0.395\n", + "Step: 250, Loss: 0.345\n", + "Step: 300, Loss: 0.338\n", + "Step: 350, Loss: 0.175\n", + "Step: 400, Loss: 0.345\n", + "Step: 450, Loss: 0.373\n", + "Step: 500, Loss: 0.292\n" ], "name": "stdout" } ] }, + { + "metadata": { + "id": "a2cL8UwLaSYG", + "colab_type": "code", + "colab": { + "autoexec": { + "startup": false, + "wait_interval": 0 + } + } + }, + "source": [ + "# This will eventually be available at\n", + "# tensor2tensor.metrics.create_eager_metrics\n", + "def create_eager_metrics(metric_names):\n", + " \"\"\"Create metrics accumulators and averager for Eager mode.\n", + "\n", + " Args:\n", + " metric_names: list<str> from tensor2tensor.metrics.Metrics\n", + "\n", + " Returns:\n", + " (accum_fn(predictions, targets) => None,\n", + " result_fn() => dict<str metric_name, float avg_val>\n", + " \"\"\"\n", + " metric_fns = dict(\n", + " [(name, metrics.METRICS_FNS[name]) for name in metric_names])\n", + " tfe_metrics = dict()\n", + "\n", + " for name in metric_names:\n", + " tfe_metrics[name] = tfe.metrics.Mean(name=name)\n", + "\n", + " def metric_accum(predictions, targets):\n", + " for name, metric_fn in metric_fns.items():\n", + " val, weight = metric_fn(predictions, targets,\n", + " weights_fn=common_layers.weights_all)\n", + " tfe_metrics[name](np.squeeze(val), np.squeeze(weight))\n", + "\n", + " def metric_means():\n", + " avgs = {}\n", + " for name in metric_names:\n", + " avgs[name] = tfe_metrics[name].result().numpy()\n", + " return avgs\n", + "\n", + " return metric_accum, metric_means" + ], + "cell_type": "code", + "execution_count": 0, + "outputs": [] + }, { "metadata": { "id": "CIFlkiVOd8jO", @@ -854,14 +951,14 @@ } ], "base_uri": "https://localhost:8080/", - "height": 51 + "height": 68 }, - "outputId": "70b92db9-9ec0-466c-e5c2-c5a39f13447d", + "outputId": "95ec4064-d884-4ea8-acdf-ffe83dc0c230", "executionInfo": { "status": "ok", - "timestamp": 1512165950748, + "timestamp": 1512174132643, "user_tz": 480, - "elapsed": 2772, + "elapsed": 3097, "user": { "displayName": "Ryan Sepassi", "photoUrl": "//lh4.googleusercontent.com/-dcHmhQy1Y2A/AAAAAAAAAAI/AAAAAAAABEw/if_k14yF4KI/s50-c-k-no/photo.jpg", @@ -872,25 +969,29 @@ "source": [ "model.set_mode(Modes.EVAL)\n", "mnist_eval_dataset = mnist_problem.dataset(Modes.EVAL, data_dir)\n", - "all_perplexities = []\n", - "all_accuracies = []\n", + "\n", + "# Create eval metric accumulators for accuracy (ACC) and accuracy in\n", + "# top 5 (ACC_TOP5)\n", + "metrics_accum, metrics_result = create_eager_metrics(\n", + " [metrics.Metrics.ACC, metrics.Metrics.ACC_TOP5])\n", + "\n", "for count, example in enumerate(tfe.Iterator(mnist_eval_dataset)):\n", - " if count >= 100:\n", + " if count >= 200:\n", " break\n", "\n", - " batch_inputs = tf.reshape(example[\"inputs\"], [1, 28, 28, 3]) # Make it 4D.\n", - " batch_targets = tf.reshape(example[\"targets\"], [1, 1, 1, 1]) # Make it 4D.\n", - " features = {\"inputs\": batch_inputs, \"targets\": batch_targets}\n", + " # Make the inputs and targets 4D\n", + " example[\"inputs\"] = tf.reshape(example[\"inputs\"], [1, 28, 28, 3])\n", + " example[\"targets\"] = tf.reshape(example[\"targets\"], [1, 1, 1, 1])\n", "\n", - " # Call the model.\n", - " predictions, _ = model(features)\n", + " # Call the model\n", + " predictions, _ = model(example)\n", "\n", - " # Calculate and append the metrics\n", - " all_perplexities.extend(metrics.padded_neg_log_perplexity(predictions, features[\"targets\"]))\n", - " all_accuracies.extend(metrics.padded_accuracy(predictions, features[\"targets\"]))\n", + " # Compute and accumulate metrics\n", + " metrics_accum(predictions, example[\"targets\"])\n", "\n", - "# Print out metrics on the dataset\n", - "print(\"Accuracy: %.2f\" % tf.reduce_mean(tf.concat(all_accuracies, axis=1)).numpy())" + "# Print out the averaged metric values on the eval data\n", + "for name, val in metrics_result().items():\n", + " print(\"%s: %.2f\" % (name, val))" ], "cell_type": "code", "execution_count": 17, @@ -899,7 +1000,8 @@ "output_type": "stream", "text": [ "INFO:tensorflow:Reading data files from /content/t2t/data/image_mnist-dev*\n", - "Accuracy: 0.98\n" + "accuracy_top5: 1.00\n", + "accuracy: 0.98\n" ], "name": "stdout" } From 9a8f203cffcf8fe433b905ea62f2a7168e438f22 Mon Sep 17 00:00:00 2001 From: Lukasz Kaiser <lukaszkaiser@google.com> Date: Sun, 3 Dec 2017 10:40:07 -0800 Subject: [PATCH 0646/4095] Store attention-weight tensors as part of Transformer model class for easier access when vizualizing. PiperOrigin-RevId: 177748075 --- tensor2tensor/layers/common_attention.py | 19 +++++++++++++---- tensor2tensor/models/transformer.py | 27 +++++++++++++++++++----- 2 files changed, 37 insertions(+), 9 deletions(-) diff --git a/tensor2tensor/layers/common_attention.py b/tensor2tensor/layers/common_attention.py index 23cf074af..304cb49be 100644 --- a/tensor2tensor/layers/common_attention.py +++ b/tensor2tensor/layers/common_attention.py @@ -1182,7 +1182,8 @@ def dot_product_attention(q, dropout_rate=0.0, image_shapes=None, name=None, - make_image_summary=True): + make_image_summary=True, + save_weights_to=None): """dot-product attention. Args: @@ -1195,17 +1196,22 @@ def dot_product_attention(q, see comments for attention_image_summary() name: an optional string make_image_summary: True if you want an image summary. + save_weights_to: an optional dictionary to capture attention weights + for vizualization; the weights tensor will be appended there under + a string key created from the variable scope (including name). Returns: A Tensor. """ with tf.variable_scope( - name, default_name="dot_product_attention", values=[q, k, v]): + name, default_name="dot_product_attention", values=[q, k, v]) as scope: # [batch, num_heads, query_length, memory_length] logits = tf.matmul(q, k, transpose_b=True) if bias is not None: logits += bias weights = tf.nn.softmax(logits, name="attention_weights") + if save_weights_to is not None: + save_weights_to[scope.name] = weights # dropping out the attention links for each of the heads weights = tf.nn.dropout(weights, 1.0 - dropout_rate) if (not tf.get_variable_scope().reuse and @@ -2245,6 +2251,7 @@ def multihead_attention(query_antecedent, gap_size=0, num_memory_blocks=2, name=None, + save_weights_to=None, **kwargs): """Multihead scaled-dot-product attention with input/output transformations. @@ -2284,7 +2291,10 @@ def multihead_attention(query_antecedent, memory blocks. num_memory_blocks: Integer option to indicate how many memory blocks to look at. - name: an optional string + name: an optional string. + save_weights_to: an optional dictionary to capture attention weights + for vizualization; the weights tensor will be appended there under + a string key created from the variable scope (including name). **kwargs (dict): Parameters for the attention function Caching: @@ -2345,7 +2355,8 @@ def multihead_attention(query_antecedent, if isinstance(x, tuple): x, additional_returned_value = x # Unpack elif attention_type == "dot_product": - x = dot_product_attention(q, k, v, bias, dropout_rate, image_shapes) + x = dot_product_attention(q, k, v, bias, dropout_rate, image_shapes, + save_weights_to=save_weights_to) elif attention_type == "dot_product_relative": x = dot_product_attention_relative(q, k, v, bias, max_relative_position, dropout_rate, image_shapes) diff --git a/tensor2tensor/models/transformer.py b/tensor2tensor/models/transformer.py index ffe5fcb52..8fd3edd21 100644 --- a/tensor2tensor/models/transformer.py +++ b/tensor2tensor/models/transformer.py @@ -45,6 +45,10 @@ class Transformer(t2t_model.T2TModel): """Attention net. See file docstring.""" + def __init__(self, *args, **kwargs): + super(Transformer, self).__init__(*args, **kwargs) + self.attention_weights = dict() # For vizualizing attention heads. + def encode(self, inputs, target_space, hparams, features=None): """Encode transformer inputs. @@ -73,7 +77,8 @@ def encode(self, inputs, target_space, hparams, features=None): encoder_output = transformer_encoder( encoder_input, self_attention_bias, - hparams, nonpadding=_features_to_nonpadding(features, "inputs")) + hparams, nonpadding=_features_to_nonpadding(features, "inputs"), + save_weights_to=self.attention_weights) return encoder_output, encoder_decoder_attention_bias @@ -114,7 +119,8 @@ def decode(self, encoder_decoder_attention_bias, hparams, cache=cache, - nonpadding=nonpadding) + nonpadding=nonpadding, + save_weights_to=self.attention_weights) if hparams.use_tpu and hparams.mode == tf.estimator.ModeKeys.TRAIN: # TPU does not react kindly to extra dimensions. @@ -507,7 +513,8 @@ def transformer_encoder(encoder_input, encoder_self_attention_bias, hparams, name="encoder", - nonpadding=None): + nonpadding=None, + save_weights_to=None): """A stack of transformer layers. Args: @@ -522,6 +529,9 @@ def transformer_encoder(encoder_input, encoder_self_attention_bias. The knowledge about padding is used for pad_remover(efficiency) and to mask out padding in convoltutional layers. + save_weights_to: an optional dictionary to capture attention weights + for vizualization; the weights tensor will be appended there under + a string key created from the variable scope (including name). Returns: y: a Tensors @@ -551,6 +561,7 @@ def transformer_encoder(encoder_input, hparams.num_heads, hparams.attention_dropout, attention_type=hparams.self_attention_type, + save_weights_to=save_weights_to, max_relative_position=hparams.max_relative_position) x = common_layers.layer_postprocess(x, y, hparams) with tf.variable_scope("ffn"): @@ -571,7 +582,8 @@ def transformer_decoder(decoder_input, hparams, cache=None, name="decoder", - nonpadding=None): + nonpadding=None, + save_weights_to=None): """A stack of transformer layers. Args: @@ -590,6 +602,9 @@ def transformer_decoder(decoder_input, to mask out padding in convoltutional layers. We generally only need this mask for "packed" datasets, because for ordinary datasets, no padding is ever followed by nonpadding. + save_weights_to: an optional dictionary to capture attention weights + for vizualization; the weights tensor will be appended there under + a string key created from the variable scope (including name). Returns: y: a Tensors @@ -612,6 +627,7 @@ def transformer_decoder(decoder_input, hparams.num_heads, hparams.attention_dropout, attention_type=hparams.self_attention_type, + save_weights_to=save_weights_to, max_relative_position=hparams.max_relative_position, cache=layer_cache) x = common_layers.layer_postprocess(x, y, hparams) @@ -624,7 +640,8 @@ def transformer_decoder(decoder_input, hparams.attention_key_channels or hparams.hidden_size, hparams.attention_value_channels or hparams.hidden_size, hparams.hidden_size, hparams.num_heads, - hparams.attention_dropout) + hparams.attention_dropout, + save_weights_to=save_weights_to) x = common_layers.layer_postprocess(x, y, hparams) with tf.variable_scope("ffn"): y = transformer_ffn_layer( From b8aa4ea85ec5450253fec76fc0ecfe03099a4ae0 Mon Sep 17 00:00:00 2001 From: Noam Shazeer <noam@google.com> Date: Sun, 3 Dec 2017 13:01:10 -0800 Subject: [PATCH 0647/4095] add translate_enfr_wmt32k_packed problem. PiperOrigin-RevId: 177752030 --- tensor2tensor/data_generators/translate_enfr.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tensor2tensor/data_generators/translate_enfr.py b/tensor2tensor/data_generators/translate_enfr.py index b09fca90e..921834000 100644 --- a/tensor2tensor/data_generators/translate_enfr.py +++ b/tensor2tensor/data_generators/translate_enfr.py @@ -143,6 +143,14 @@ def use_small_dataset(self): return False +@registry.register_problem +class TranslateEnfrWmt32kPacked(TranslateEnfrWmt32k): + + @property + def packed_length(self): + return 256 + + @registry.register_problem class TranslateEnfrWmtSmallCharacters(translate.TranslateProblem): """Problem spec for WMT En-Fr translation.""" From f2fb96b31ec46c97d079868fee2ca37a931d19ec Mon Sep 17 00:00:00 2001 From: Lukasz Kaiser <lukaszkaiser@google.com> Date: Sun, 3 Dec 2017 18:11:14 -0800 Subject: [PATCH 0648/4095] Release 1.3.2 with colab improvements. PiperOrigin-RevId: 177762544 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 94f44c137..8870809ae 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ setup( name='tensor2tensor', - version='1.3.1', + version='1.3.2', description='Tensor2Tensor', author='Google Inc.', author_email='no-reply@google.com', From 9a934ccfd314c4753f1fca6fb624b656d05c9716 Mon Sep 17 00:00:00 2001 From: cbockman <c.bockman@gmail.com> Date: Fri, 15 Dec 2017 14:56:45 -0800 Subject: [PATCH 0649/4095] Spelling fix: 'fo' => 'to' (#471) --- tensor2tensor/layers/modalities.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensor2tensor/layers/modalities.py b/tensor2tensor/layers/modalities.py index ddef5e67f..3dd321ca1 100644 --- a/tensor2tensor/layers/modalities.py +++ b/tensor2tensor/layers/modalities.py @@ -76,7 +76,7 @@ def _get_weights(self, hidden_dim=None): """Create or get concatenated embedding or softmax variable. Args: - hidden_dim: dim of the variable. Defaults fo self._body_input_depth + hidden_dim: dim of the variable. Defaults to self._body_input_depth Returns: a list of self._num_shards Tensors. From 5c8009561c4604be53e27310d0014ce69176c5db Mon Sep 17 00:00:00 2001 From: ZYShin <yeshuangzhu@gmail.com> Date: Mon, 18 Dec 2017 17:46:09 -0800 Subject: [PATCH 0650/4095] Fix translate_enzh dev data path error (#453) --- tensor2tensor/data_generators/translate_enzh.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensor2tensor/data_generators/translate_enzh.py b/tensor2tensor/data_generators/translate_enzh.py index 0ee3bfd08..52b364137 100644 --- a/tensor2tensor/data_generators/translate_enzh.py +++ b/tensor2tensor/data_generators/translate_enzh.py @@ -49,7 +49,7 @@ _ENZH_TEST_DATASETS = [[ "http://data.statmt.org/wmt17/translation-task/dev.tgz", - ("dev/newsdev2017-zhen-src.en.sgm", "dev/newsdev2017-zhen-ref.zh.sgm") + ("dev/newsdev2017-enzh-src.en.sgm", "dev/newsdev2017-enzh-ref.zh.sgm") ]] From 121e4ea9e765ac80a2dc24e1f986ea1cf792f0e0 Mon Sep 17 00:00:00 2001 From: Niki Parmar <nikip@google.com> Date: Sun, 3 Dec 2017 23:30:46 -0800 Subject: [PATCH 0651/4095] Add attention viz to colab notebook PiperOrigin-RevId: 177776073 --- tensor2tensor/bin/make_tf_configs.py | 86 ++ tensor2tensor/bin/t2t-tpu-trainer | 116 +++ tensor2tensor/bin/t2t_datagen.py | 211 +++++ tensor2tensor/bin/t2t_decoder.py | 103 ++ tensor2tensor/bin/t2t_trainer.py | 107 +++ .../data_generators/translate_enzh.py | 2 +- tensor2tensor/layers/modalities.py | 2 +- tensor2tensor/notebooks/hello_t2t.ipynb | 883 +++++++++++++++--- 8 files changed, 1394 insertions(+), 116 deletions(-) create mode 100644 tensor2tensor/bin/make_tf_configs.py create mode 100644 tensor2tensor/bin/t2t-tpu-trainer create mode 100644 tensor2tensor/bin/t2t_datagen.py create mode 100644 tensor2tensor/bin/t2t_decoder.py create mode 100644 tensor2tensor/bin/t2t_trainer.py diff --git a/tensor2tensor/bin/make_tf_configs.py b/tensor2tensor/bin/make_tf_configs.py new file mode 100644 index 000000000..ce0d638d6 --- /dev/null +++ b/tensor2tensor/bin/make_tf_configs.py @@ -0,0 +1,86 @@ +# coding=utf-8 +# Copyright 2017 The Tensor2Tensor Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Output command line arguments and json-encoded TF_CONFIGs. + +Usage: + +`t2t-make-tf-configs --masters="server1:1234" --ps="server3:2134,server4:2334"` + +Outputs 1 line per job to stdout, first the masters, then the parameter servers. +Each line has the TF_CONFIG, then a tab, then the command line flags for that +job. + +If there is a single master, it will have the `--sync` flag. +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import json + +# Dependency imports + +import tensorflow as tf + +flags = tf.flags +FLAGS = flags.FLAGS + +flags.DEFINE_string("masters", "", "Comma-separated list of master addresses") +flags.DEFINE_string("ps", "", "Comma-separated list of ps addresses") + + +def main(_): + if not (FLAGS.masters and FLAGS.ps): + raise ValueError("Must provide --masters and --ps") + + masters = FLAGS.masters.split(",") + ps = FLAGS.ps.split(",") + + cluster = {"ps": ps, "master": masters} + + for task_type, jobs in (("master", masters), ("ps", ps)): + for idx, job in enumerate(jobs): + if task_type == "master": + cmd_line_flags = " ".join([ + "--master=grpc://%s" % job, + "--ps_replicas=%d" % len(ps), + "--worker_replicas=%d" % len(masters), + "--worker_gpu=1", + "--worker_id=%d" % idx, + "--worker_job='/job:master'", + "--ps_gpu=1", + "--schedule=train", + "--sync" if len(masters) == 1 else "", + ]) + else: + cmd_line_flags = " ".join([ + "--master=grpc://%s" % job, + "--schedule=run_std_server", + ]) + + tf_config = json.dumps({ + "cluster": cluster, + "task": { + "type": task_type, + "index": idx + }, + "environment": "cloud", + }) + print("'%s'\t%s" % (tf_config, cmd_line_flags)) + + +if __name__ == "__main__": + tf.app.run() diff --git a/tensor2tensor/bin/t2t-tpu-trainer b/tensor2tensor/bin/t2t-tpu-trainer new file mode 100644 index 000000000..3e8dedd13 --- /dev/null +++ b/tensor2tensor/bin/t2t-tpu-trainer @@ -0,0 +1,116 @@ +#!/usr/bin/env python +# coding=utf-8 +# Copyright 2017 The Tensor2Tensor Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Train on TPU.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +# Dependency imports + +from tensor2tensor import models # pylint: disable=unused-import +from tensor2tensor import problems as problems_lib # pylint: disable=unused-import +from tensor2tensor.tpu import tpu_trainer_lib as lib +from tensor2tensor.utils import registry +from tensor2tensor.utils import usr_dir + +import tensorflow as tf + +flags = tf.flags +FLAGS = flags.FLAGS + +# See trainer_utils.py for additional command-line flags. +flags.DEFINE_string("t2t_usr_dir", "", + "Path to a Python module that will be imported. The " + "__init__.py file should include the necessary imports. " + "The imported files should contain registrations, " + "e.g. @registry.register_model calls, that will then be " + "available to the t2t-trainer.") +flags.DEFINE_integer("tpu_num_shards", 8, "Number of tpu shards.") +flags.DEFINE_integer("iterations_per_loop", 1000, + "Number of iterations in a TPU training loop.") +flags.DEFINE_bool("use_tpu", True, "Whether to use TPU.") + +# To maintain compatibility with some internal libs, we guard against these flag +# definitions possibly erroring. Apologies for the ugliness. +try: + flags.DEFINE_string("master", "", "Address of TensorFlow master.") + flags.DEFINE_string("output_dir", "", "Base output directory for run.") + flags.DEFINE_string("schedule", "continuous_train_and_eval", + "Method of Experiment to run.") + flags.DEFINE_integer("eval_steps", 200, "Number of steps in evaluation.") +except: # pylint: disable=bare-except + pass + + +def get_problem_name(): + problems = FLAGS.problems.split("-") + assert len(problems) == 1 + return problems[0] + + +def create_hparams(): + hparams = registry.hparams(FLAGS.hparams_set)() + if FLAGS.hparams: + hparams = hparams.parse(FLAGS.hparams) + return hparams + + +def create_experiment_fn(): + return lib.create_experiment_fn( + FLAGS.model, + get_problem_name(), + FLAGS.data_dir, + FLAGS.train_steps, + FLAGS.eval_steps, + FLAGS.local_eval_frequency, + use_tpu=FLAGS.use_tpu) + + +def create_run_config(): + return lib.create_run_config( + model_dir=FLAGS.output_dir, + master=FLAGS.master, + iterations_per_loop=FLAGS.iterations_per_loop, + num_shards=FLAGS.tpu_num_shards, + log_device_placement=FLAGS.log_device_placement, + save_checkpoints_steps=max(FLAGS.iterations_per_loop, + FLAGS.local_eval_frequency), + num_gpus=FLAGS.worker_gpu, + gpu_order=FLAGS.gpu_order, + shard_to_cpu=FLAGS.locally_shard_to_cpu, + use_tpu=FLAGS.use_tpu) + + +def execute_schedule(exp): + if not hasattr(exp, FLAGS.schedule): + raise ValueError( + "Experiment has no method %s, from --schedule" % FLAGS.schedule) + getattr(exp, FLAGS.schedule)() + + +def main(_): + tf.logging.set_verbosity(tf.logging.INFO) + tf.set_random_seed(123) + usr_dir.import_usr_dir(FLAGS.t2t_usr_dir) + + exp_fn = create_experiment_fn() + exp = exp_fn(create_run_config(), create_hparams()) + execute_schedule(exp) + + +if __name__ == "__main__": + tf.app.run() diff --git a/tensor2tensor/bin/t2t_datagen.py b/tensor2tensor/bin/t2t_datagen.py new file mode 100644 index 000000000..c83428bc2 --- /dev/null +++ b/tensor2tensor/bin/t2t_datagen.py @@ -0,0 +1,211 @@ +# coding=utf-8 +# Copyright 2017 The Tensor2Tensor Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Produces the training and dev data for --problem into --data_dir. + +Produces sharded and shuffled TFRecord files of tensorflow.Example protocol +buffers for a variety of registered datasets. + +All Problems are registered with @registry.register_problem or are in +_SUPPORTED_PROBLEM_GENERATORS in this file. Each entry maps a string name +(selectable on the command-line with --problem) to a function that takes 2 +arguments - input_directory and mode (one of "train" or "dev") - and yields for +each training example a dictionary mapping string feature names to lists of +{string, int, float}. The generator will be run once for each mode. +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +import random +import tempfile + +# Dependency imports + +import numpy as np + +from tensor2tensor.data_generators import algorithmic_math +from tensor2tensor.data_generators import all_problems # pylint: disable=unused-import +from tensor2tensor.data_generators import audio +from tensor2tensor.data_generators import generator_utils +from tensor2tensor.data_generators import snli +from tensor2tensor.data_generators import wsj_parsing +from tensor2tensor.utils import registry +from tensor2tensor.utils import usr_dir + +import tensorflow as tf + +flags = tf.flags +FLAGS = flags.FLAGS + +flags.DEFINE_string("data_dir", "", "Data directory.") +flags.DEFINE_string("tmp_dir", "/tmp/t2t_datagen", + "Temporary storage directory.") +flags.DEFINE_string("problem", "", + "The name of the problem to generate data for.") +flags.DEFINE_string("exclude_problems", "", + "Comma-separates list of problems to exclude.") +flags.DEFINE_integer("num_shards", 0, "How many shards to use. Ignored for " + "registered Problems.") +flags.DEFINE_integer("max_cases", 0, + "Maximum number of cases to generate (unbounded if 0).") +flags.DEFINE_bool("only_list", False, + "If true, we only list the problems that will be generated.") +flags.DEFINE_integer("random_seed", 429459, "Random seed to use.") +flags.DEFINE_integer("task_id", -1, "For distributed data generation.") +flags.DEFINE_string("t2t_usr_dir", "", + "Path to a Python module that will be imported. The " + "__init__.py file should include the necessary imports. " + "The imported files should contain registrations, " + "e.g. @registry.register_problem calls, that will then be " + "available to t2t-datagen.") + +# Mapping from problems that we can generate data for to their generators. +# pylint: disable=g-long-lambda +_SUPPORTED_PROBLEM_GENERATORS = { + "algorithmic_algebra_inverse": ( + lambda: algorithmic_math.algebra_inverse(26, 0, 2, 100000), + lambda: algorithmic_math.algebra_inverse(26, 3, 3, 10000)), + "parsing_english_ptb8k": ( + lambda: wsj_parsing.parsing_token_generator( + FLAGS.data_dir, FLAGS.tmp_dir, True, 2**13, 2**9), + lambda: wsj_parsing.parsing_token_generator( + FLAGS.data_dir, FLAGS.tmp_dir, False, 2**13, 2**9)), + "parsing_english_ptb16k": ( + lambda: wsj_parsing.parsing_token_generator( + FLAGS.data_dir, FLAGS.tmp_dir, True, 2**14, 2**9), + lambda: wsj_parsing.parsing_token_generator( + FLAGS.data_dir, FLAGS.tmp_dir, False, 2**14, 2**9)), + "inference_snli32k": ( + lambda: snli.snli_token_generator(FLAGS.tmp_dir, True, 2**15), + lambda: snli.snli_token_generator(FLAGS.tmp_dir, False, 2**15), + ), + "audio_timit_characters_test": ( + lambda: audio.timit_generator( + FLAGS.data_dir, FLAGS.tmp_dir, True, 1718), + lambda: audio.timit_generator( + FLAGS.data_dir, FLAGS.tmp_dir, False, 626)), + "audio_timit_tokens_8k_test": ( + lambda: audio.timit_generator( + FLAGS.data_dir, FLAGS.tmp_dir, True, 1718, + vocab_filename="vocab.endefr.%d" % 2**13, vocab_size=2**13), + lambda: audio.timit_generator( + FLAGS.data_dir, FLAGS.tmp_dir, False, 626, + vocab_filename="vocab.endefr.%d" % 2**13, vocab_size=2**13)), + "audio_timit_tokens_32k_test": ( + lambda: audio.timit_generator( + FLAGS.data_dir, FLAGS.tmp_dir, True, 1718, + vocab_filename="vocab.endefr.%d" % 2**15, vocab_size=2**15), + lambda: audio.timit_generator( + FLAGS.data_dir, FLAGS.tmp_dir, False, 626, + vocab_filename="vocab.endefr.%d" % 2**15, vocab_size=2**15)), +} + +# pylint: enable=g-long-lambda + + +def set_random_seed(): + """Set the random seed from flag everywhere.""" + tf.set_random_seed(FLAGS.random_seed) + random.seed(FLAGS.random_seed) + np.random.seed(FLAGS.random_seed) + + +def main(_): + tf.logging.set_verbosity(tf.logging.INFO) + usr_dir.import_usr_dir(FLAGS.t2t_usr_dir) + + # Calculate the list of problems to generate. + problems = sorted( + list(_SUPPORTED_PROBLEM_GENERATORS) + registry.list_problems()) + for exclude in FLAGS.exclude_problems.split(","): + if exclude: + problems = [p for p in problems if exclude not in p] + if FLAGS.problem and FLAGS.problem[-1] == "*": + problems = [p for p in problems if p.startswith(FLAGS.problem[:-1])] + elif FLAGS.problem: + problems = [p for p in problems if p == FLAGS.problem] + else: + problems = [] + + # Remove TIMIT if paths are not given. + if not FLAGS.timit_paths: + problems = [p for p in problems if "timit" not in p] + # Remove parsing if paths are not given. + if not FLAGS.parsing_path: + problems = [p for p in problems if "parsing" not in p] + + if not problems: + problems_str = "\n * ".join( + sorted(list(_SUPPORTED_PROBLEM_GENERATORS) + registry.list_problems())) + error_msg = ("You must specify one of the supported problems to " + "generate data for:\n * " + problems_str + "\n") + error_msg += ("TIMIT and parsing need data_sets specified with " + "--timit_paths and --parsing_path.") + raise ValueError(error_msg) + + if not FLAGS.data_dir: + FLAGS.data_dir = tempfile.gettempdir() + tf.logging.warning("It is strongly recommended to specify --data_dir. " + "Data will be written to default data_dir=%s.", + FLAGS.data_dir) + + tf.logging.info("Generating problems:\n%s" + % registry.display_list_by_prefix(problems, + starting_spaces=4)) + if FLAGS.only_list: + return + for problem in problems: + set_random_seed() + + if problem in _SUPPORTED_PROBLEM_GENERATORS: + generate_data_for_problem(problem) + else: + generate_data_for_registered_problem(problem) + + +def generate_data_for_problem(problem): + """Generate data for a problem in _SUPPORTED_PROBLEM_GENERATORS.""" + training_gen, dev_gen = _SUPPORTED_PROBLEM_GENERATORS[problem] + + num_shards = FLAGS.num_shards or 10 + tf.logging.info("Generating training data for %s.", problem) + train_output_files = generator_utils.train_data_filenames( + problem + generator_utils.UNSHUFFLED_SUFFIX, FLAGS.data_dir, num_shards) + generator_utils.generate_files(training_gen(), train_output_files, + FLAGS.max_cases) + tf.logging.info("Generating development data for %s.", problem) + dev_output_files = generator_utils.dev_data_filenames( + problem + generator_utils.UNSHUFFLED_SUFFIX, FLAGS.data_dir, 1) + generator_utils.generate_files(dev_gen(), dev_output_files) + all_output_files = train_output_files + dev_output_files + generator_utils.shuffle_dataset(all_output_files) + + +def generate_data_for_registered_problem(problem_name): + tf.logging.info("Generating data for %s.", problem_name) + if FLAGS.num_shards: + raise ValueError("--num_shards should not be set for registered Problem.") + problem = registry.problem(problem_name) + task_id = None if FLAGS.task_id < 0 else FLAGS.task_id + problem.generate_data( + os.path.expanduser(FLAGS.data_dir), + os.path.expanduser(FLAGS.tmp_dir), + task_id=task_id) + + +if __name__ == "__main__": + tf.app.run() diff --git a/tensor2tensor/bin/t2t_decoder.py b/tensor2tensor/bin/t2t_decoder.py new file mode 100644 index 000000000..16da8567d --- /dev/null +++ b/tensor2tensor/bin/t2t_decoder.py @@ -0,0 +1,103 @@ +# coding=utf-8 +# Copyright 2017 The Tensor2Tensor Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +r"""Decode from trained T2T models. + +This binary performs inference using the Estimator API. + +Example usage to decode from dataset: + + t2t-decoder \ + --data_dir ~/data \ + --problems=algorithmic_identity_binary40 \ + --model=transformer + --hparams_set=transformer_base + +Set FLAGS.decode_interactive or FLAGS.decode_from_file for alternative decode +sources. +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os + +# Dependency imports + +from tensor2tensor.utils import decoding +from tensor2tensor.utils import trainer_utils +from tensor2tensor.utils import usr_dir + +import tensorflow as tf + +flags = tf.flags +FLAGS = flags.FLAGS + +flags.DEFINE_string("output_dir", "", "Training directory to load from.") +flags.DEFINE_string("decode_from_file", None, + "Path to the source file for decoding") +flags.DEFINE_string("decode_to_file", None, + "Path to the decoded (output) file") +flags.DEFINE_bool("decode_interactive", False, + "Interactive local inference mode.") +flags.DEFINE_integer("decode_shards", 1, "Number of decoding replicas.") +flags.DEFINE_string("t2t_usr_dir", "", + "Path to a Python module that will be imported. The " + "__init__.py file should include the necessary imports. " + "The imported files should contain registrations, " + "e.g. @registry.register_model calls, that will then be " + "available to the t2t-decoder.") +flags.DEFINE_string("master", "", "Address of TensorFlow master.") +flags.DEFINE_string("schedule", "train_and_evaluate", + "Must be train_and_evaluate for decoding.") + + +def main(_): + tf.logging.set_verbosity(tf.logging.INFO) + usr_dir.import_usr_dir(FLAGS.t2t_usr_dir) + trainer_utils.log_registry() + trainer_utils.validate_flags() + assert FLAGS.schedule == "train_and_evaluate" + data_dir = os.path.expanduser(FLAGS.data_dir) + output_dir = os.path.expanduser(FLAGS.output_dir) + + hparams = trainer_utils.create_hparams( + FLAGS.hparams_set, data_dir, passed_hparams=FLAGS.hparams) + trainer_utils.add_problem_hparams(hparams, FLAGS.problems) + estimator, _ = trainer_utils.create_experiment_components( + data_dir=data_dir, + model_name=FLAGS.model, + hparams=hparams, + run_config=trainer_utils.create_run_config(output_dir)) + + decode_hp = decoding.decode_hparams(FLAGS.decode_hparams) + decode_hp.add_hparam("shards", FLAGS.decode_shards) + decode_hp.add_hparam("shard_id", FLAGS.worker_id) + if FLAGS.decode_interactive: + decoding.decode_interactively(estimator, decode_hp) + elif FLAGS.decode_from_file: + decoding.decode_from_file(estimator, FLAGS.decode_from_file, decode_hp, + FLAGS.decode_to_file) + else: + decoding.decode_from_dataset( + estimator, + FLAGS.problems.split("-"), + decode_hp, + decode_to_file=FLAGS.decode_to_file, + dataset_split="test" if FLAGS.eval_use_test_set else None) + + +if __name__ == "__main__": + tf.app.run() diff --git a/tensor2tensor/bin/t2t_trainer.py b/tensor2tensor/bin/t2t_trainer.py new file mode 100644 index 000000000..5de5c8d9e --- /dev/null +++ b/tensor2tensor/bin/t2t_trainer.py @@ -0,0 +1,107 @@ +# coding=utf-8 +# Copyright 2017 The Tensor2Tensor Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +r"""Trainer for T2T models. + +This binary perform training, evaluation, and inference using +the Estimator API with tf.learn Experiment objects. + +To train your model, for example: + t2t-trainer \ + --data_dir ~/data \ + --problems=algorithmic_identity_binary40 \ + --model=transformer + --hparams_set=transformer_base +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os + +# Dependency imports + +from tensor2tensor.utils import registry +from tensor2tensor.utils import trainer_utils +from tensor2tensor.utils import usr_dir + +import tensorflow as tf + +flags = tf.flags +FLAGS = flags.FLAGS + +# See trainer_utils.py for additional command-line flags. +flags.DEFINE_string("t2t_usr_dir", "", + "Path to a Python module that will be imported. The " + "__init__.py file should include the necessary imports. " + "The imported files should contain registrations, " + "e.g. @registry.register_model calls, that will then be " + "available to the t2t-trainer.") +flags.DEFINE_string("tmp_dir", "/tmp/t2t_datagen", + "Temporary storage directory.") +flags.DEFINE_bool("generate_data", False, "Generate data before training?") + +flags.DEFINE_integer("eval_steps", 10, "Number of steps in evaluation.") +flags.DEFINE_string("output_dir", "", "Base output directory for run.") +flags.DEFINE_string("master", "", "Address of TensorFlow master.") +flags.DEFINE_string("schedule", "train_and_evaluate", + "Method of tf.contrib.learn.Experiment to run.") +flags.DEFINE_bool("profile", False, "Profile performance?") + + +def main(_): + tf.logging.set_verbosity(tf.logging.INFO) + usr_dir.import_usr_dir(FLAGS.t2t_usr_dir) + trainer_utils.log_registry() + trainer_utils.validate_flags() + output_dir = os.path.expanduser(FLAGS.output_dir) + tmp_dir = os.path.expanduser(FLAGS.tmp_dir) + if not FLAGS.data_dir: + raise ValueError("You must specify a --data_dir") + data_dir = os.path.expanduser(FLAGS.data_dir) + tf.gfile.MakeDirs(output_dir) + + # Generate data if requested. + if FLAGS.generate_data: + tf.gfile.MakeDirs(data_dir) + tf.gfile.MakeDirs(tmp_dir) + for problem_name in FLAGS.problems.split("-"): + tf.logging.info("Generating data for %s" % problem_name) + problem = registry.problem(problem_name) + problem.generate_data(data_dir, tmp_dir) + + # Run the trainer. + def run_experiment(): + trainer_utils.run( + data_dir=data_dir, + model=FLAGS.model, + output_dir=output_dir, + train_steps=FLAGS.train_steps, + eval_steps=FLAGS.eval_steps, + schedule=FLAGS.schedule) + + if FLAGS.profile: + with tf.contrib.tfprof.ProfileContext("t2tprof", + trace_steps=range(100), + dump_steps=range(100)) as pctx: + opts = tf.profiler.ProfileOptionBuilder.time_and_memory() + pctx.add_auto_profiling("op", opts, range(100)) + run_experiment() + else: + run_experiment() + + +if __name__ == "__main__": + tf.app.run() diff --git a/tensor2tensor/data_generators/translate_enzh.py b/tensor2tensor/data_generators/translate_enzh.py index 52b364137..0ee3bfd08 100644 --- a/tensor2tensor/data_generators/translate_enzh.py +++ b/tensor2tensor/data_generators/translate_enzh.py @@ -49,7 +49,7 @@ _ENZH_TEST_DATASETS = [[ "http://data.statmt.org/wmt17/translation-task/dev.tgz", - ("dev/newsdev2017-enzh-src.en.sgm", "dev/newsdev2017-enzh-ref.zh.sgm") + ("dev/newsdev2017-zhen-src.en.sgm", "dev/newsdev2017-zhen-ref.zh.sgm") ]] diff --git a/tensor2tensor/layers/modalities.py b/tensor2tensor/layers/modalities.py index 3dd321ca1..ddef5e67f 100644 --- a/tensor2tensor/layers/modalities.py +++ b/tensor2tensor/layers/modalities.py @@ -76,7 +76,7 @@ def _get_weights(self, hidden_dim=None): """Create or get concatenated embedding or softmax variable. Args: - hidden_dim: dim of the variable. Defaults to self._body_input_depth + hidden_dim: dim of the variable. Defaults fo self._body_input_depth Returns: a list of self._num_shards Tensors. diff --git a/tensor2tensor/notebooks/hello_t2t.ipynb b/tensor2tensor/notebooks/hello_t2t.ipynb index 797b0b98b..fd8547e97 100644 --- a/tensor2tensor/notebooks/hello_t2t.ipynb +++ b/tensor2tensor/notebooks/hello_t2t.ipynb @@ -7,7 +7,12 @@ "version": "0.3.2", "views": {}, "default_view": {}, - "provenance": [], + "provenance": [ + { + "file_id": "1-VScmaLkMqWiSbqgUCFWefzisSREd8l1", + "timestamp": 1512175750497 + } + ], "collapsed_sections": [] } }, @@ -135,18 +140,18 @@ } ], "base_uri": "https://localhost:8080/", - "height": 1224 + "height": 1241 }, - "outputId": "2edd5f47-1ebb-4d18-e57c-741c966afc10", + "outputId": "f0f13103-a437-4b95-ac9d-38f2b57a5f4c", "executionInfo": { "status": "ok", - "timestamp": 1512173990900, + "timestamp": 1512371452348, "user_tz": 480, - "elapsed": 272, + "elapsed": 505, "user": { - "displayName": "Ryan Sepassi", - "photoUrl": "//lh4.googleusercontent.com/-dcHmhQy1Y2A/AAAAAAAAAAI/AAAAAAAABEw/if_k14yF4KI/s50-c-k-no/photo.jpg", - "userId": "107877449274830904926" + "displayName": "Lukasz Kaiser", + "photoUrl": "//lh3.googleusercontent.com/-CbWIwcQ_VsA/AAAAAAAAAAI/AAAAAAAAAB8/jloHVR1qOhg/s50-c-k-no/photo.jpg", + "userId": "109750154298538986950" } } }, @@ -229,6 +234,7 @@ " 'translate_ende_wmt_bpe32k',\n", " 'translate_ende_wmt_characters',\n", " 'translate_enfr_wmt32k',\n", + " 'translate_enfr_wmt32k_packed',\n", " 'translate_enfr_wmt8k',\n", " 'translate_enfr_wmt_characters',\n", " 'translate_enfr_wmt_small32k',\n", @@ -256,22 +262,22 @@ }, "output_extras": [ { - "item_id": 3 + "item_id": 12 } ], "base_uri": "https://localhost:8080/", - "height": 204 + "height": 306 }, - "outputId": "0ea990ae-6715-4ada-d3a2-a5312faaaa39", + "outputId": "7e0cafb5-d035-49a7-9ff4-7f4150c905c7", "executionInfo": { "status": "ok", - "timestamp": 1512173992544, + "timestamp": 1512371478309, "user_tz": 480, - "elapsed": 955, + "elapsed": 21361, "user": { - "displayName": "Ryan Sepassi", - "photoUrl": "//lh4.googleusercontent.com/-dcHmhQy1Y2A/AAAAAAAAAAI/AAAAAAAABEw/if_k14yF4KI/s50-c-k-no/photo.jpg", - "userId": "107877449274830904926" + "displayName": "Lukasz Kaiser", + "photoUrl": "//lh3.googleusercontent.com/-CbWIwcQ_VsA/AAAAAAAAAAI/AAAAAAAAAB8/jloHVR1qOhg/s50-c-k-no/photo.jpg", + "userId": "109750154298538986950" } } }, @@ -288,17 +294,23 @@ { "output_type": "stream", "text": [ + "INFO:tensorflow:Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to /content/t2t/tmp/train-images-idx3-ubyte.gz\n", + "100% completed\n", + "INFO:tensorflow:Successfully downloaded train-images-idx3-ubyte.gz, 9912422 bytes.\n", + "INFO:tensorflow:Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to /content/t2t/tmp/train-labels-idx1-ubyte.gz\n", + "113% completed\n", + "INFO:tensorflow:Successfully downloaded train-labels-idx1-ubyte.gz, 28881 bytes.\n", + "INFO:tensorflow:Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to /content/t2t/tmp/t10k-images-idx3-ubyte.gz\n", + "100% completed\n", + "INFO:tensorflow:Successfully downloaded t10k-images-idx3-ubyte.gz, 1648877 bytes.\n", + "INFO:tensorflow:Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to /content/t2t/tmp/t10k-labels-idx1-ubyte.gz\n", + "180% completed\n", + "INFO:tensorflow:Successfully downloaded t10k-labels-idx1-ubyte.gz, 4542 bytes.\n", "INFO:tensorflow:Not downloading, file already found: /content/t2t/tmp/train-images-idx3-ubyte.gz\n", "INFO:tensorflow:Not downloading, file already found: /content/t2t/tmp/train-labels-idx1-ubyte.gz\n", "INFO:tensorflow:Not downloading, file already found: /content/t2t/tmp/t10k-images-idx3-ubyte.gz\n", "INFO:tensorflow:Not downloading, file already found: /content/t2t/tmp/t10k-labels-idx1-ubyte.gz\n", - "INFO:tensorflow:Not downloading, file already found: /content/t2t/tmp/train-images-idx3-ubyte.gz\n", - "INFO:tensorflow:Not downloading, file already found: /content/t2t/tmp/train-labels-idx1-ubyte.gz\n", - "INFO:tensorflow:Not downloading, file already found: /content/t2t/tmp/t10k-images-idx3-ubyte.gz\n", - "INFO:tensorflow:Not downloading, file already found: /content/t2t/tmp/t10k-labels-idx1-ubyte.gz\n", - "INFO:tensorflow:Skipping generator because outputs files exist\n", - "INFO:tensorflow:Skipping generator because outputs files exist\n", - "INFO:tensorflow:Skipping shuffle because output files exist\n" + "INFO:tensorflow:Shuffling data...\n" ], "name": "stdout" } @@ -315,25 +327,25 @@ }, "output_extras": [ { - "item_id": 2 + "item_id": 1 }, { - "item_id": 3 + "item_id": 2 } ], "base_uri": "https://localhost:8080/", "height": 381 }, - "outputId": "121d463f-adaf-4340-a5cb-12e931fd0fdb", + "outputId": "3b33057c-5082-4377-ec83-79f67e5a8e84", "executionInfo": { "status": "ok", - "timestamp": 1512173993175, + "timestamp": 1512371501917, "user_tz": 480, - "elapsed": 561, + "elapsed": 471, "user": { - "displayName": "Ryan Sepassi", - "photoUrl": "//lh4.googleusercontent.com/-dcHmhQy1Y2A/AAAAAAAAAAI/AAAAAAAABEw/if_k14yF4KI/s50-c-k-no/photo.jpg", - "userId": "107877449274830904926" + "displayName": "Lukasz Kaiser", + "photoUrl": "//lh3.googleusercontent.com/-CbWIwcQ_VsA/AAAAAAAAAAI/AAAAAAAAAB8/jloHVR1qOhg/s50-c-k-no/photo.jpg", + "userId": "109750154298538986950" } } }, @@ -353,16 +365,16 @@ "output_type": "stream", "text": [ "INFO:tensorflow:Reading data files from /content/t2t/data/image_mnist-train*\n", - "Label: 6\n" + "Label: 7\n" ], "name": "stdout" }, { "output_type": "display_data", "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAUsAAAFKCAYAAACU6307AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4wLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvpW3flQAAE4hJREFUeJzt3X1MlfX/x/HXCWLC1KEklq27OZ1M\ncKvUic4bFC3amje1VERzc02XOm9Gxpyo5SaKaN61RFO3ZK3T+CdXLsjMcoo4aVMP/6D+YcwMQZnp\nRFM6vz9++7KQczhvjpyb6/h8bPzB5/qcz/V+72IvrnOuc53j8nq9XgEAOvVUpAsAACcgLAHAgLAE\nAAPCEgAMCEsAMCAsAcDCGwaSfP5cuHDB7zan/sRiT7HaFz055ydcfXXGFY73WbpcLp/jXq/X7zan\nisWepNjsi56cI1x9dRaH8cEuunHjRp07d04ul0urV6/WsGHDgl0KAKJeUGF55swZXblyRW63W5cv\nX9bq1avldru7uzYAiBpBXeCpqqpSdna2JGngwIG6deuW7ty5062FAUA0CerMsqmpSUOHDm37vW/f\nvmpsbFTPnj19zr9w4YLS09N9bgvDS6ZhF4s9SbHZFz05R6T7Cvo1y/8K1ERGRobfx8Xai9Gx2JMU\nm33Rk3NEwwWeoJ6Gp6amqqmpqe3369evq1+/fsEsBQCOEFRYjhkzRhUVFZKk2tpapaam+n0KDgCx\nIKin4a+99pqGDh2qWbNmyeVyad26dd1dFwBEFd6U3s1isScpNvuiJ+dw7GuWAPCkISwBwICwBAAD\nwhIADAhLADAgLAHAgLAEAAPCEgAMCEsAMCAsAcCAsAQAA8ISAAwISwAwICwBwICwBAADwhIADAhL\nADAgLAHAgLAEAAPCEgAMCEsAMAjqq3CBWDV8+HDTvMrKSvOaf/zxh3ludna2eW5TU5N5Lh4fZ5YA\nYEBYAoABYQkABoQlABgQlgBgQFgCgAFhCQAGhCUAGBCWAGBAWAKAAbc7Av/x/vvvm+YlJyeb1+zK\n3Pnz55vnlpSUmOfi8XFmCQAGhCUAGBCWAGBAWAKAAWEJAAaEJQAYEJYAYEBYAoABYQkABtzBg5j3\n3HPPmbfl5uaa1rxx44Z5/4WFhea5p06dMs9FeHFmCQAGQZ1ZVldXa9myZRo0aJAkafDgwV367wkA\nThP00/CRI0dq586d3VkLAEQtnoYDgEHQYXnp0iUtWrRIs2fP1smTJ7uzJgCIOi6v1+vt6oMaGhpU\nU1OjnJwc1dfXa968eaqsrFRCQoLP+R6PR+np6Y9dLABESlBh+ah3331Xn332mV544QXfO3G5fI57\nvV6/25wqFnuSnN2Xv7cO/fnnnxowYEC7MY/HY1rz33//Ne8/VG8dOn/+fIcxJx+nzoSrr87iMKin\n4YcPH9b+/fslSY2Njbpx44b69+8fXHUA4ABBXQ2fOHGi8vPz9fPPP+vBgwdav36936fgABALggrL\nnj17as+ePd1dCwBELW53RMwrKCgwb+vTp49pze3bt5v3z4lFbOB9lgBgQFgCgAFhCQAGhCUAGBCW\nAGBAWAKAAWEJAAaEJQAYEJYAYEBYAoBBt3xEW8Cd8BFtjhdtfc2dO9c89+DBgz7H4+Li1Nra2m7s\n5s2bpjVfffVV8/6vXr1qnvu4ou04dRfHfkQbADxpCEsAMCAsAcCAsAQAA8ISAAwISwAwICwBwICw\nBAADwhIADPjCMjjSe++9Z5771FP+zwke3fbVV1+Z1gznXTmIDpxZAoABYQkABoQlABgQlgBgQFgC\ngAFhCQAGhCUAGBCWAGBAWAKAAWEJAAbc7oiokpeXZ5r35ptvmte8d++ez/HExMQO26y3O+LJw5kl\nABgQlgBgQFgCgAFhCQAGhCUAGBCWAGBAWAKAAWEJAAaEJQAYEJYAYMDtjogqs2fPNs2Li4szr7l9\n+3af4/n5+fr888/bjZ0/f968Lp4spjPLuro6ZWdnq6ysTJJ07do1zZ07V7m5uVq2bJn++eefkBYJ\nAJEWMCzv3r2rDRs2KDMzs21s586dys3N1ddff62XXnpJ5eXlIS0SACItYFgmJCRo3759Sk1NbRur\nrq7WpEmTJElZWVmqqqoKXYUAEAUCvmYZHx+v+Pj201paWpSQkCBJSklJUWNjY2iqA4Ao8dgXeLxe\nb8A5Fy5cUHp6etCPd5pY7Elybl/5+fnmbZ3NdQqnHqdAIt1XUGGZlJSke/fuqUePHmpoaGj3FN2X\njIwMn+Ner1culyuYEqJWLPYkha+vH374wTQvJyfHvObWrVt9jufn56ukpKTd2EcffWReNxrx9/f4\n+/EnqPdZjh49WhUVFZKkyspKjR07NrjKAMAhAp5Zejwebd68WVevXlV8fLwqKipUUlKigoICud1u\nDRgwQNOmTQtHrQAQMQHDMj09XYcOHeowfvDgwZAUBADRiDt4EHJz5swxz508ebJpnr8vIfPl119/\n9Tmen5/fYVufPn1MazY3N5v3j9jAveEAYEBYAoABYQkABoQlABgQlgBgQFgCgAFhCQAGhCUAGBCW\nAGBAWAKAAbc7IuTGjRtnnvvoB0378/3335vXHD58uHlbaWmpac0dO3aY919cXGyei+jFmSUAGBCW\nAGBAWAKAAWEJAAaEJQAYEJYAYEBYAoABYQkABoQlABgQlgBg4PJ6vd6Q78Tl8jnu9Xr9bnOqWOxJ\n6thXYmKi+bGXL182z3322WdN82pra81rDh061Oe4y+VSsH/+J0+eNM8dO3ZsUPsIxpPy9xfK/fjD\nmSUAGBCWAGBAWAKAAWEJAAaEJQAYEJYAYEBYAoABYQkABoQlABjwhWUIyqxZs8xzrXfldIW/u3LC\n5dSpUxHdP8KPM0sAMCAsAcCAsAQAA8ISAAwISwAwICwBwICwBAADwhIADAhLADAgLAHAgNsdEZRR\no0ZFugSzn376yef4lClTOmybPHmyac2WlpbHrgvOwpklABiYwrKurk7Z2dkqKyuTJBUUFOjtt9/W\n3LlzNXfuXB0/fjyUNQJAxAV8Gn737l1t2LBBmZmZ7cZXrlyprKyskBUGANEk4JllQkKC9u3bp9TU\n1HDUAwBRyeX1er2Wibt27VKfPn2Ul5engoICNTY26sGDB0pJSVFhYaH69u3r97Eej0fp6endVjQA\nhFtQV8OnTp2q5ORkpaWlae/evdq9e7fWrl3rd35GRobPca/XK5fLFUwJUSsWe5I69lVaWmp+7Acf\nfBCKksw6uxpeWVnZbsx6NfzTTz8173/9+vXmuY/rSfn7C+V+/AnqanhmZqbS0tIkSRMnTlRdXV1w\nlQGAQwQVlkuXLlV9fb0kqbq6WoMGDerWogAg2gR8Gu7xeLR582ZdvXpV8fHxqqioUF5enpYvX67E\nxEQlJSWpqKgoHLUCQMQEDMv09HQdOnSow/gbb7wRkoIAIBpxuyPaef75503bZs6cGY5y/Prxxx/N\nc8+dO+dzfMqUKfr999/bjfm7GPmo/fv3m/eP2MDtjgBgQFgCgAFhCQAGhCUAGBCWAGBAWAKAAWEJ\nAAaEJQAYEJYAYEBYAoABtzuinc4+e/K/23r37h2S/T98+NA07+DBg+Y1Fy9e7Hfbo1+XYl33f5+6\nhScHZ5YAYEBYAoABYQkABoQlABgQlgBgQFgCgAFhCQAGhCUAGBCWAGDAHTxoJyUlJaht3eW7774z\nzbtz5455zddff9287fjx4+Z18WThzBIADAhLADAgLAHAgLAEAAPCEgAMCEsAMCAsAcCAsAQAA8IS\nAAwISwAw4HZHhNz9+/fNc4cMGWKa9+2335rXrKmp8Tk+fvz4Dtu2bt1qXhdPFs4sAcCAsAQAA8IS\nAAwISwAwICwBwICwBAADwhIADAhLADAgLAHAgLAEAANud0TIWb+xUZLS0tJM85KSksxrfvLJJz7H\njx071mHb7du3zeviyWIKy+LiYtXU1Ojhw4dauHChMjIytGrVKrW2tqpfv37asmWLEhISQl0rAERM\nwLA8ffq0Ll68KLfbrebmZk2fPl2ZmZnKzc1VTk6Otm3bpvLycuXm5oajXgCIiICvWY4YMUI7duyQ\nJPXu3VstLS2qrq7WpEmTJElZWVmqqqoKbZUAEGEBwzIuLq7t9aHy8nKNGzdOLS0tbU+7U1JS1NjY\nGNoqASDCXF6v12uZePToUZWWlurAgQOaMmVK29nklStX9PHHH+ubb77x+1iPx6P09PTuqRgAIsB0\ngefEiRPas2ePvvzyS/Xq1UtJSUm6d++eevTooYaGBqWmpnb6+IyMDJ/jXq9XLper61VHMaf3tGvX\nLp/jS5Ys0e7du9t+X7x4sXnNrnxQr/VqeFf++WZnZ/scP3bsmCZOnNhu7JdffjGvG42c/vfnT7j6\n6uzcMeDT8Nu3b6u4uFilpaVKTk6WJI0ePVoVFRWSpMrKSo0dO7abSgWA6BTwzPLIkSNqbm7W8uXL\n28Y2bdqkNWvWyO12a8CAAZo2bVpIiwSASAsYljNnztTMmTM7jB88eDAkBQFANOIOHrRz5cqVoLZ1\nxtc/W3+M1xv1xRdfmNfs7HVIp79GifDh3nAAMCAsAcCAsAQAA8ISAAwISwAwICwBwICwBAADwhIA\nDAhLADAgLAHAwPx5lo+1Ez8frRSLHycViz1JHfvaunWr+bErVqwwz920aZNpXlFRkXlNf19CFovH\nKhZ7khzyEW0AAMISAEwISwAwICwBwICwBAADwhIADAhLADAgLAHAgLAEAAPCEgAMuN2xm8ViT1Js\n9kVPzsHtjgDgEIQlABgQlgBgQFgCgAFhCQAGhCUAGBCWAGBAWAKAAWEJAAaEJQAYEJYAYEBYAoAB\nYQkABoQlABgQlgBgQFgCgAFhCQAGhCUAGBCWAGBAWAKAAWEJAAbxlknFxcWqqanRw4cPtXDhQh07\ndky1tbVKTk6WJC1YsEATJkwIZZ0AEFEBw/L06dO6ePGi3G63mpubNX36dI0aNUorV65UVlZWOGoE\ngIgLGJYjRozQsGHDJEm9e/dWS0uLWltbQ14YAEQTl7ezbxV/hNvt1tmzZxUXF6fGxkY9ePBAKSkp\nKiwsVN++ff3vxM+Xo8fiF8LHYk9SbPZFT84Rrr46i0NzWB49elSlpaU6cOCAPB6PkpOTlZaWpr17\n9+qvv/7S2rVr/T7W4/EoPT2965UDQLTwGvz222/ed955x9vc3Nxh28WLF71z5szp9PGSfP50ts2p\nP7HYU6z2RU/O+QlXX50J+Nah27dvq7i4WKWlpW1Xv5cuXar6+npJUnV1tQYNGhRoGQBwtIAXeI4c\nOaLm5mYtX768bWzGjBlavny5EhMTlZSUpKKiopAWCQCR1qULPEHvhAs8jheLfdGTc4Srr87ikDt4\nAMCAsAQAA8ISAAwISwAwICwBwICwBAADwhIADAhLADAgLAHAgLAEAAPCEgAMCEsAMCAsAcCAsAQA\nA8ISAAwISwAwICwBwICwBAADwhIADAhLADAgLAHAgLAEAIOwfBUuADgdZ5YAYEBYAoABYQkABoQl\nABgQlgBgQFgCgEF8JHa6ceNGnTt3Ti6XS6tXr9awYcMiUUa3qq6u1rJlyzRo0CBJ0uDBg1VYWBjh\nqoJXV1enDz/8UPPnz1deXp6uXbumVatWqbW1Vf369dOWLVuUkJAQ6TK75NGeCgoKVFtbq+TkZEnS\nggULNGHChMgW2UXFxcWqqanRw4cPtXDhQmVkZDj+OEkd+zp27FjEj1XYw/LMmTO6cuWK3G63Ll++\nrNWrV8vtdoe7jJAYOXKkdu7cGekyHtvdu3e1YcMGZWZmto3t3LlTubm5ysnJ0bZt21ReXq7c3NwI\nVtk1vnqSpJUrVyorKytCVT2e06dP6+LFi3K73Wpubtb06dOVmZnp6OMk+e5r1KhRET9WYX8aXlVV\npezsbEnSwIEDdevWLd25cyfcZaATCQkJ2rdvn1JTU9vGqqurNWnSJElSVlaWqqqqIlVeUHz15HQj\nRozQjh07JEm9e/dWS0uL44+T5Luv1tbWCFcVgbBsampSnz592n7v27evGhsbw11GSFy6dEmLFi3S\n7NmzdfLkyUiXE7T4+Hj16NGj3VhLS0vb07mUlBTHHTNfPUlSWVmZ5s2bpxUrVujmzZsRqCx4cXFx\nSkpKkiSVl5dr3Lhxjj9Oku++4uLiIn6sIvKa5X/Fyt2WL7/8spYsWaKcnBzV19dr3rx5qqysdOTr\nRYHEyjGbOnWqkpOTlZaWpr1792r37t1au3ZtpMvqsqNHj6q8vFwHDhzQlClT2sadfpz+25fH44n4\nsQr7mWVqaqqamprafr9+/br69esX7jK6Xf/+/fXWW2/J5XLpxRdf1DPPPKOGhoZIl9VtkpKSdO/e\nPUlSQ0NDTDydzczMVFpamiRp4sSJqquri3BFXXfixAnt2bNH+/btU69evWLmOD3aVzQcq7CH5Zgx\nY1RRUSFJqq2tVWpqqnr27BnuMrrd4cOHtX//fklSY2Ojbty4of79+0e4qu4zevTotuNWWVmpsWPH\nRriix7d06VLV19dL+v/XZP/3TganuH37toqLi1VaWtp2lTgWjpOvvqLhWEXkU4dKSkp09uxZuVwu\nrVu3TkOGDAl3Cd3uzp07ys/P199//60HDx5oyZIlGj9+fKTLCorH49HmzZt19epVxcfHq3///iop\nKVFBQYHu37+vAQMGqKioSE8//XSkSzXz1VNeXp727t2rxMREJSUlqaioSCkpKZEu1cztdmvXrl16\n5ZVX2sY2bdqkNWvWOPY4Sb77mjFjhsrKyiJ6rPiINgAw4A4eADAgLAHAgLAEAAPCEgAMCEsAMCAs\nAcCAsAQAA8ISAAz+D2GuR1qUzSXkAAAAAElFTkSuQmCC\n", + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAUsAAAFKCAYAAACU6307AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4wLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvpW3flQAAEhNJREFUeJzt3V1IlPn7x/HP/J2VGir85arQYrtL\nGCtpBwuFGj1YEriwlNHDJiULHRRLkVmESA8LQZa5Rm4HqT0crCzMNkcdBErEQrQ6sR6EemJ1UCKt\naUkl2W7J/A9+/GTbHfVympn7nun9Ag+85+s918V3+nQ/zHfGEwqFQgIATOn/nC4AABIBYQkABoQl\nABgQlgBgQFgCgAFhCQAWoTiQFPanu7t70scS9ScZe0rWvugpcX7i1ddUPPF4n6XH4wm7PRQKTfpY\nokrGnqTk7IueEke8+poqDr2R7vTkyZO6e/euPB6PampqtHTp0kh3BQCuF1FY3rlzRw8fPpTf79eD\nBw9UU1Mjv98f7doAwDUiusHT0dGhkpISSdKiRYv0/PlzjY6ORrUwAHCTiI4sh4eHtWTJkonf58+f\nr6GhIc2ZMyfs+O7ubuXl5YV9LA6XTOMuGXuSkrMvekocTvcV8TXLv5uuifz8/En/LtkuRidjT1Jy\n9kVPicMNN3giOg3PzMzU8PDwxO9PnjxRRkZGJLsCgIQQUViuWLFCbW1tkqTe3l5lZmZOegoOAMkg\notPwL7/8UkuWLNE333wjj8ej48ePR7suAHAV3pQeZcnYk5ScfdFT4kjYa5YA8KEhLAHAgLAEAAPC\nEgAMCEsAMCAsAcCAsAQAA8ISAAwISwAwICwBwICwBAADwhIADAhLADAgLAHAgLAEAAPCEgAMCEsA\nMCAsAcCAsAQAA8ISAAwISwAwICwBwICwBAADwhIADAhLADAgLAHAgLAEAAPCEgAMCEsAMCAsAcCA\nsAQAA8ISAAwISwAwICwBwICwBAADwhIADAhLADAgLAHAgLAEAAPCEgAMCEsAMCAsAcCAsAQAA8IS\nAAy8kfxRMBjU/v37lZOTI0lavHixjh49GtXCAMBNIgpLSVq+fLkaGxujWQsAuBan4QBgEHFY3r9/\nX3v27NH27dt1+/btaNYEAK7jCYVCoZn+0eDgoLq6ulRaWqr+/n5VVFSovb1dqampYcf39PQoLy/v\nvYsFAKdEFJb/tHnzZp09e1bZ2dnhn8TjCbs9FApN+liiSsaepOTsi54SR7z6mioOIzoNv3btmi5d\nuiRJGhoa0tOnT5WVlRVZdQCQACI6shwdHdWhQ4f04sULvXnzRnv37tXq1asnfxKOLBNeMvZFT4nD\nDUeWUTkNnw5hmfiSsS96ShxuCMuI32cJJIrJrqWHe8z6zo6p9vlPDQ0N5rEHDx40j0V88T5LADAg\nLAHAgLAEAAPCEgAMCEsAMCAsAcCAsAQAA8ISAAwISwAwICwBwIDljkh6Uy1h/OdjM1nGaBUMBqO+\nT8QfR5YAYEBYAoABYQkABoQlABgQlgBgQFgCgAFhCQAGhCUAGBCWAGDAtztGWTL2JMWvL+sKGr/f\nb95nYWFhpOVMqqOjwzy2qKgo6s8/GV5/7/88k+HIEgAMCEsAMCAsAcCAsAQAA8ISAAwISwAwICwB\nwICwBAADwhIADAhLADDgC8vwjq1bt5oe++STT8z73LJli3lsLJYmzkR/f79pXDyXMMIdOLIEAAPC\nEgAMCEsAMCAsAcCAsAQAA8ISAAwISwAwICwBwICwBAADwhIADFju6DK//fabaZwTywJn8o2Kierq\n1atOlwCXMh1Z9vX1qaSkRK2trZKkx48fa+fOnSovL9f+/fv1119/xbRIAHDatGH56tUrnThx4p0j\nmcbGRpWXl+vnn3/Wp59+qkAgENMiAcBp04ZlamqqWlpalJmZObEtGAxq3bp1kqTi4uIZfeE8ACSi\naa9Zer1eeb3vDhsbG1NqaqokKT09XUNDQ7GpDgBc4r1v8IRCoWnHdHd3Ky8vL+K/TzTJ2NOHoqqq\nKqrjnJCsrz+n+4ooLH0+n16/fq1Zs2ZpcHDwnVP0cPLz88NuD4VC8ng8kZTgWu/bk5vvhn8IGhoa\nTOMOHjwY40oik4z/pqT49TVVIEf0PsuioiK1tbVJktrb27Vy5crIKgOABDHtkWVPT49Onz6tgYEB\neb1etbW1qb6+XtXV1fL7/VqwYIE2btwYj1oBwDHThmVeXp5++umnf22/cuVKTAoCADdiBY/LOH0t\ncrIv7MrOzn7nsbNnz5r3OTAwYB77yy+/mMbF6mJ/MBiMyX6R+FgbDgAGhCUAGBCWAGBAWAKAAWEJ\nAAaEJQAYEJYAYEBYAoABYQkABoQlABh4QnH4kLjJPlopGT9O6n17+uGHH0zjZrIsz7qEcCpum6tY\nvWwXLlxoGjfZslCnuW2eoiVhP6INAD40hCUAGBCWAGBAWAKAAWEJAAaEJQAYEJYAYEBYAoABYQkA\nBoQlABiw3DHKkrEnyX19xepl66YeI+G2eYoWljsCQIIgLAHAgLAEAAPCEgAMCEsAMCAsAcCAsAQA\nA8ISAAwISwAwICwBwICwBAADwhIADAhLADAgLAHAgLAEAAPCEgAMCEsAMCAsAcCAsAQAA8ISAAy8\nThcAJKKCggLz2IGBAfPY/v7+SMpBHHBkCQAGprDs6+tTSUmJWltbJUnV1dX6+uuvtXPnTu3cuVO/\n/vprLGsEAMdNexr+6tUrnThxQoWFhe9sr6qqUnFxccwKAwA3mfbIMjU1VS0tLcrMzIxHPQDgStMe\nWXq9Xnm9/x7W2tqqK1euKD09XUePHtX8+fMn3Ud3d7fy8vLCPhYKhWZQbmJIxp6k5O3r75Khx2To\nIRyn+4robviGDRuUlpam3NxcNTc36/z58zp27Nik4/Pz88NuD4VC8ng8kZTgWsnYk+S+vmL1D8fa\no1vvhrttnqIlXn1N9bqK6G54YWGhcnNzJUlr165VX19fZJUBQIKIKCz37ds38T9gMBhUTk5OVIsC\nALeZ9jS8p6dHp0+f1sDAgLxer9ra2rRjxw5VVlZq9uzZ8vl8qq2tjUetAOAYTygOV00nu9aQjNdX\nkrEnyX19cc0yPLfNU7S44Zolyx0RkUePHpnHZmdnm8devXo1knKiJhYhPJOetm7dGvXnR3Sw3BEA\nDAhLADAgLAHAgLAEAAPCEgAMCEsAMCAsAcCAsAQAA8ISAAwISwAwYLkjXGXLli1Ol2AykyWMBw8e\njGEliBeOLAHAgLAEAAPCEgAMCEsAMCAsAcCAsAQAA8ISAAwISwAwICwBwIBvd4yyZOxJ+ndfM/kS\nss2bN0e9noaGhqjvU5IWLlxoGve+38IYKx/K6y+WzzMZjiwBwICwBAADwhIADAhLADAgLAHAgLAE\nAAPCEgAMCEsAMCAsAcCAsAQAA5Y7Rlky9iS5r69YvWzd1GMk3DZP0cJyRwBIEIQlABgQlgBgQFgC\ngAFhCQAGhCUAGBCWAGBAWAKAAWEJAAaEJQAYeJ0uAHCTgoIC07jOzs4YVwK3MYVlXV2durq69Pbt\nW+3evVv5+fk6fPiwxsfHlZGRoTNnzig1NTXWtQKAY6YNy87OTt27d09+v18jIyMqKytTYWGhysvL\nVVpaqoaGBgUCAZWXl8ejXgBwxLTXLJctW6Zz585JkubNm6exsTEFg0GtW7dOklRcXKyOjo7YVgkA\nDps2LFNSUuTz+SRJgUBAq1at0tjY2MRpd3p6uoaGhmJbJQA4zHyD58aNGwoEArp8+bLWr18/sd3y\nuYLd3d3Ky8sL+1gcPk4z7pKxJyl5+/q7ZDhLStZ5crovU1jeunVLFy5c0MWLFzV37lz5fD69fv1a\ns2bN0uDgoDIzM6f8+/z8/LDbk/GDSpOxJ8l9fcXqH05hYaFpnFvvhrttnqIlIT789+XLl6qrq1NT\nU5PS0tIkSUVFRWpra5Mktbe3a+XKlVEqFQDcadojy+vXr2tkZESVlZUT206dOqUjR47I7/drwYIF\n2rhxY0yLBACn8R08UZaMPUnu64vT8PDcNk/R4obTcFbwICH19/ebx2ZnZ5vHDgwMRFIOPgCsDQcA\nA8ISAAwISwAwICwBwICwBAADwhIADAhLADAgLAHAgLAEAAPCEgAMWO6IhDSTtdkzWe5oXRs+k+WW\nSA4cWQKAAWEJAAaEJQAYEJYAYEBYAoABYQkABoQlABgQlgBgQFgCgAFhCQAGhCUAGBCWAGBAWAKA\nAWEJAAaEJQAYEJYAYEBYAoABYQkABoQlABgQlgBgQFgCgAFhCQAGhCUAGBCWAGBAWAKAAWEJAAaE\nJQAYEJYAYEBYAoABYQkABoQlABh4nS4AiEQgEDCPLSgoCLs9Oztb/f3972x79OjRe9WF5GUKy7q6\nOnV1dent27favXu3bt68qd7eXqWlpUmSdu3apTVr1sSyTgBw1LRh2dnZqXv37snv92tkZERlZWUq\nKChQVVWViouL41EjADhu2rBctmyZli5dKkmaN2+exsbGND4+HvPCAMBNpr3Bk5KSIp/PJ+m/14lW\nrVqllJQUtba2qqKiQgcOHNCzZ89iXigAOMkTCoVCloE3btxQU1OTLl++rJ6eHqWlpSk3N1fNzc36\n448/dOzYsUn/tqenR3l5eVErGgDizRSWt27d0rlz53Tx4sWJmzr/c//+fX3//fdqbW2d/Ek8nrDb\nQ6HQpI8lqmTsSXJfX1u3bjWPra+vD7s93N1w6347OzvNzx9PbpunaIlXX1PF4bSn4S9fvlRdXZ2a\nmpomgnLfvn0TL7JgMKicnJwolQoA7jTtDZ7r169rZGRElZWVE9s2bdqkyspKzZ49Wz6fT7W1tTEt\nEgCcNm1Ybtu2Tdu2bfvX9rKyspgUBABuxHJHADAw3w1/ryfhBk/CS8a+6ClxJMQNHgAAYQkAJoQl\nABgQlgBgQFgCgAFhCQAGhCUAGBCWAGBAWAKAAWEJAAaEJQAYEJYAYEBYAoABYQkABoQlABgQlgBg\nQFgCgAFhCQAGhCUAGBCWAGAQly8sA4BEx5ElABgQlgBgQFgCgAFhCQAGhCUAGBCWAGDgdeJJT548\nqbt378rj8aimpkZLly51ooyoCgaD2r9/v3JyciRJixcv1tGjRx2uKnJ9fX367rvv9O2332rHjh16\n/PixDh8+rPHxcWVkZOjMmTNKTU11uswZ+WdP1dXV6u3tVVpamiRp165dWrNmjbNFzlBdXZ26urr0\n9u1b7d69W/n5+Qk/T9K/+7p586bjcxX3sLxz544ePnwov9+vBw8eqKamRn6/P95lxMTy5cvV2Njo\ndBnv7dWrVzpx4oQKCwsntjU2Nqq8vFylpaVqaGhQIBBQeXm5g1XOTLieJKmqqkrFxcUOVfV+Ojs7\nde/ePfn9fo2MjKisrEyFhYUJPU9S+L4KCgocn6u4n4Z3dHSopKREkrRo0SI9f/5co6Oj8S4DU0hN\nTVVLS4syMzMntgWDQa1bt06SVFxcrI6ODqfKi0i4nhLdsmXLdO7cOUnSvHnzNDY2lvDzJIXva3x8\n3OGqHAjL4eFh/ec//5n4ff78+RoaGop3GTFx//597dmzR9u3b9ft27edLidiXq9Xs2bNemfb2NjY\nxOlcenp6ws1ZuJ4kqbW1VRUVFTpw4ICePXvmQGWRS0lJkc/nkyQFAgGtWrUq4edJCt9XSkqK43Pl\nyDXLv0uW1ZafffaZ9u7dq9LSUvX396uiokLt7e0Jeb1oOskyZxs2bFBaWppyc3PV3Nys8+fP69ix\nY06XNWM3btxQIBDQ5cuXtX79+ontiT5Pf++rp6fH8bmK+5FlZmamhoeHJ35/8uSJMjIy4l1G1GVl\nZemrr76Sx+PRwoUL9fHHH2twcNDpsqLG5/Pp9evXkqTBwcGkOJ0tLCxUbm6uJGnt2rXq6+tzuKKZ\nu3Xrli5cuKCWlhbNnTs3aebpn325Ya7iHpYrVqxQW1ubJKm3t1eZmZmaM2dOvMuIumvXrunSpUuS\npKGhIT19+lRZWVkOVxU9RUVFE/PW3t6ulStXOlzR+9u3b5/6+/sl/fea7P/eyZAoXr58qbq6OjU1\nNU3cJU6GeQrXlxvmypFPHaqvr9fvv/8uj8ej48eP64svvoh3CVE3OjqqQ4cO6cWLF3rz5o327t2r\n1atXO11WRHp6enT69GkNDAzI6/UqKytL9fX1qq6u1p9//qkFCxaotrZWH330kdOlmoXraceOHWpu\nbtbs2bPl8/lUW1ur9PR0p0s18/v9+vHHH/X5559PbDt16pSOHDmSsPMkhe9r06ZNam1tdXSu+Ig2\nADBgBQ8AGBCWAGBAWAKAAWEJAAaEJQAYEJYAYEBYAoABYQkABv8PicrBdxpy97QAAAAASUVORK5C\nYII=\n", "text/plain": [ - "<matplotlib.figure.Figure at 0x7f899c8e6f50>" + "<matplotlib.figure.Figure at 0x7f9a730a8210>" ] }, "metadata": { @@ -392,22 +404,22 @@ }, "output_extras": [ { - "item_id": 2 + "item_id": 3 } ], "base_uri": "https://localhost:8080/", - "height": 68 + "height": 170 }, - "outputId": "db79aefe-d9a6-437b-aaf8-4174a1f3c643", + "outputId": "8fbdcd05-a8b6-45e5-88b2-ce6fdfec0351", "executionInfo": { "status": "ok", - "timestamp": 1512173998055, + "timestamp": 1512371509946, "user_tz": 480, - "elapsed": 2988, + "elapsed": 2843, "user": { - "displayName": "Ryan Sepassi", - "photoUrl": "//lh4.googleusercontent.com/-dcHmhQy1Y2A/AAAAAAAAAAI/AAAAAAAABEw/if_k14yF4KI/s50-c-k-no/photo.jpg", - "userId": "107877449274830904926" + "displayName": "Lukasz Kaiser", + "photoUrl": "//lh3.googleusercontent.com/-CbWIwcQ_VsA/AAAAAAAAAAI/AAAAAAAAAB8/jloHVR1qOhg/s50-c-k-no/photo.jpg", + "userId": "109750154298538986950" } } }, @@ -424,7 +436,7 @@ "encoders = ende_problem.feature_encoders(data_dir)\n", "\n", "# Setup helper functions for encoding and decoding\n", - "def encode(input_str):\n", + "def encode(input_str, output_str=None):\n", " \"\"\"Input str to features dict, ready for inference\"\"\"\n", " inputs = encoders[\"inputs\"].encode(input_str) + [1] # add EOS id\n", " batch_inputs = tf.reshape(inputs, [1, -1, 1]) # Make it 3D.\n", @@ -443,6 +455,12 @@ { "output_type": "stream", "text": [ + "\r\n", + "\r\n", + "Updates are available for some Cloud SDK components. To install them,\r\n", + "please run:\r\n", + " $ gcloud components update\r\n", + "\n", "Copying gs://tensor2tensor-data/vocab.ende.32768...\n", "/ [1 files][316.4 KiB/316.4 KiB] \n", "Operation completed over 1 objects/316.4 KiB. \n" @@ -507,16 +525,16 @@ "base_uri": "https://localhost:8080/", "height": 408 }, - "outputId": "7283214e-af66-4f16-b203-3b209643484f", + "outputId": "f8be52a4-e85c-4daf-9f77-24d75eea3ab0", "executionInfo": { "status": "ok", - "timestamp": 1512174000121, + "timestamp": 1512371515918, "user_tz": 480, - "elapsed": 321, + "elapsed": 496, "user": { - "displayName": "Ryan Sepassi", - "photoUrl": "//lh4.googleusercontent.com/-dcHmhQy1Y2A/AAAAAAAAAAI/AAAAAAAABEw/if_k14yF4KI/s50-c-k-no/photo.jpg", - "userId": "107877449274830904926" + "displayName": "Lukasz Kaiser", + "photoUrl": "//lh3.googleusercontent.com/-CbWIwcQ_VsA/AAAAAAAAAAI/AAAAAAAAAB8/jloHVR1qOhg/s50-c-k-no/photo.jpg", + "userId": "109750154298538986950" } } }, @@ -585,7 +603,7 @@ "# NOTE: Only create the model once when restoring from a checkpoint; it's a\n", "# Layer and so subsequent instantiations will have different variable scopes\n", "# that will not match the checkpoint.\n", - "translate_model = registry.model(model_name)(hparams, Modes.PREDICT)" + "translate_model = registry.model(model_name)(hparams, Modes.EVAL)" ], "cell_type": "code", "execution_count": 0, @@ -608,16 +626,16 @@ "base_uri": "https://localhost:8080/", "height": 34 }, - "outputId": "ec8569a0-ee0e-4520-c9c6-06f3c7582ecc", + "outputId": "86747a09-e83d-4a5f-d938-2fef25e4ce2f", "executionInfo": { "status": "ok", - "timestamp": 1512174015202, + "timestamp": 1512371536282, "user_tz": 480, - "elapsed": 12781, + "elapsed": 13020, "user": { - "displayName": "Ryan Sepassi", - "photoUrl": "//lh4.googleusercontent.com/-dcHmhQy1Y2A/AAAAAAAAAAI/AAAAAAAABEw/if_k14yF4KI/s50-c-k-no/photo.jpg", - "userId": "107877449274830904926" + "displayName": "Lukasz Kaiser", + "photoUrl": "//lh3.googleusercontent.com/-CbWIwcQ_VsA/AAAAAAAAAAI/AAAAAAAAAB8/jloHVR1qOhg/s50-c-k-no/photo.jpg", + "userId": "109750154298538986950" } } }, @@ -657,54 +675,700 @@ }, "output_extras": [ { - "item_id": 3 + "item_id": 2 } ], "base_uri": "https://localhost:8080/", - "height": 119 + "height": 68 }, - "outputId": "306d8df1-70c4-43f5-fc15-54ff66ec58ed", + "outputId": "cee729b7-8237-45bb-ac6f-dfadce9916b4", "executionInfo": { "status": "ok", - "timestamp": 1512174026517, + "timestamp": 1512371578480, "user_tz": 480, - "elapsed": 11277, + "elapsed": 11397, "user": { - "displayName": "Ryan Sepassi", - "photoUrl": "//lh4.googleusercontent.com/-dcHmhQy1Y2A/AAAAAAAAAAI/AAAAAAAABEw/if_k14yF4KI/s50-c-k-no/photo.jpg", - "userId": "107877449274830904926" + "displayName": "Lukasz Kaiser", + "photoUrl": "//lh3.googleusercontent.com/-CbWIwcQ_VsA/AAAAAAAAAAI/AAAAAAAAAB8/jloHVR1qOhg/s50-c-k-no/photo.jpg", + "userId": "109750154298538986950" } } }, "source": [ "# Restore and translate!\n", - "\n", "def translate(inputs):\n", " encoded_inputs = encode(inputs)\n", " with tfe.restore_variables_on_create(ckpt_path):\n", " model_output = translate_model.infer(encoded_inputs)\n", " return decode(model_output)\n", "\n", - "inputs = \"This is a cat.\"\n", + "inputs = \"The animal didn't cross the street because it was too tired\"\n", "outputs = translate(inputs)\n", "\n", "print(\"Inputs: %s\" % inputs)\n", "print(\"Outputs: %s\" % outputs)" ], "cell_type": "code", - "execution_count": 12, + "execution_count": 13, "outputs": [ { "output_type": "stream", "text": [ "INFO:tensorflow:Greedy Decoding\n", - "WARNING:tensorflow:From /usr/local/lib/python2.7/dist-packages/tensor2tensor/layers/common_layers.py:487: calling reduce_mean (from tensorflow.python.ops.math_ops) with keep_dims is deprecated and will be removed in a future version.\n", + "Inputs: The animal didn't cross the street because it was too tired\n", + "Outputs: Das Tier überquerte die Straße nicht, weil es zu müde war, weil es zu müde war.\n" + ], + "name": "stdout" + } + ] + }, + { + "metadata": { + "id": "X3mkIEcbfiTP", + "colab_type": "text" + }, + "source": [ + "## Attention Viz Utils" + ], + "cell_type": "markdown" + }, + { + "metadata": { + "id": "r6GPPFy1fL2N", + "colab_type": "code", + "colab": { + "autoexec": { + "startup": false, + "wait_interval": 0 + } + } + }, + "source": [ + "from tensor2tensor.visualization import attention\n", + "from tensor2tensor.data_generators import text_encoder\n", + "\n", + "SIZE = 35\n", + "\n", + "def encode_eval(input_str, output_str):\n", + " inputs = tf.reshape(encoders[\"inputs\"].encode(input_str) + [1], [1, -1, 1, 1]) # Make it 3D.\n", + " outputs = tf.reshape(encoders[\"inputs\"].encode(output_str) + [1], [1, -1, 1, 1]) # Make it 3D.\n", + " return {\"inputs\": inputs, \"targets\": outputs}\n", + "\n", + "def get_att_mats():\n", + " enc_atts = []\n", + " dec_atts = []\n", + " encdec_atts = []\n", + "\n", + " for i in range(hparams.num_hidden_layers):\n", + " enc_att = translate_model.attention_weights[\n", + " \"transformer/body/encoder/layer_%i/self_attention/multihead_attention/dot_product_attention\" % i][0]\n", + " dec_att = translate_model.attention_weights[\n", + " \"transformer/body/decoder/layer_%i/self_attention/multihead_attention/dot_product_attention\" % i][0]\n", + " encdec_att = translate_model.attention_weights[\n", + " \"transformer/body/decoder/layer_%i/encdec_attention/multihead_attention/dot_product_attention\" % i][0]\n", + " enc_atts.append(resize(enc_att))\n", + " dec_atts.append(resize(dec_att))\n", + " encdec_atts.append(resize(encdec_att))\n", + " return enc_atts, dec_atts, encdec_atts\n", + "\n", + "def resize(np_mat):\n", + " # Sum across heads\n", + " np_mat = np_mat[:, :SIZE, :SIZE]\n", + " row_sums = np.sum(np_mat, axis=0)\n", + " # Normalize\n", + " layer_mat = np_mat / row_sums[np.newaxis, :]\n", + " lsh = layer_mat.shape\n", + " # Add extra dim for viz code to work.\n", + " layer_mat = np.reshape(layer_mat, (1, lsh[0], lsh[1], lsh[2]))\n", + " return layer_mat\n", + "\n", + "def to_tokens(ids):\n", + " ids = np.squeeze(ids)\n", + " subtokenizer = hparams.problems[0].vocabulary['targets']\n", + " tokens = []\n", + " for _id in ids:\n", + " if _id == 0:\n", + " tokens.append('<PAD>')\n", + " elif _id == 1:\n", + " tokens.append('<EOS>')\n", + " elif _id == -1:\n", + " tokens.append('<NULL>')\n", + " else:\n", + " tokens.append(subtokenizer._subtoken_id_to_subtoken_string(_id))\n", + " return tokens" + ], + "cell_type": "code", + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "wfF8_cW-OXPN", + "colab_type": "code", + "colab": { + "autoexec": { + "startup": false, + "wait_interval": 0 + } + } + }, + "source": [ + "def call_html():\n", + " import IPython\n", + " display(IPython.core.display.HTML('''\n", + " <script src=\"/static/components/requirejs/require.js\"></script>\n", + " <script>\n", + " requirejs.config({\n", + " paths: {\n", + " base: '/static/base',\n", + " \"d3\": \"https://cdnjs.cloudflare.com/ajax/libs/d3/3.5.8/d3.min\",\n", + " jquery: '//ajax.googleapis.com/ajax/libs/jquery/2.0.0/jquery.min',\n", + " },\n", + " });\n", + " </script>\n", + " '''))" + ], + "cell_type": "code", + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "T7UJzFf6fmhp", + "colab_type": "text" + }, + "source": [ + "## Display Attention" + ], + "cell_type": "markdown" + }, + { + "metadata": { + "id": "OJKU36QAfqOC", + "colab_type": "code", + "colab": { + "autoexec": { + "startup": false, + "wait_interval": 0 + }, + "output_extras": [ + { + "item_id": 1 + }, + { + "item_id": 2 + }, + { + "item_id": 3 + }, + { + "item_id": 4 + }, + { + "item_id": 5 + } + ], + "resources": { + "http://localhost:8080/static/components/requirejs/require.js": { + "data": "LyoqIHZpbTogZXQ6dHM9NDpzdz00OnN0cz00CiAqIEBsaWNlbnNlIFJlcXVpcmVKUyAyLjEuMjIgQ29weXJpZ2h0IChjKSAyMDEwLTIwMTUsIFRoZSBEb2pvIEZvdW5kYXRpb24gQWxsIFJpZ2h0cyBSZXNlcnZlZC4KICogQXZhaWxhYmxlIHZpYSB0aGUgTUlUIG9yIG5ldyBCU0QgbGljZW5zZS4KICogc2VlOiBodHRwOi8vZ2l0aHViLmNvbS9qcmJ1cmtlL3JlcXVpcmVqcyBmb3IgZGV0YWlscwogKi8KLy9Ob3QgdXNpbmcgc3RyaWN0OiB1bmV2ZW4gc3RyaWN0IHN1cHBvcnQgaW4gYnJvd3NlcnMsICMzOTIsIGFuZCBjYXVzZXMKLy9wcm9ibGVtcyB3aXRoIHJlcXVpcmVqcy5leGVjKCkvdHJhbnNwaWxlciBwbHVnaW5zIHRoYXQgbWF5IG5vdCBiZSBzdHJpY3QuCi8qanNsaW50IHJlZ2V4cDogdHJ1ZSwgbm9tZW46IHRydWUsIHNsb3BweTogdHJ1ZSAqLwovKmdsb2JhbCB3aW5kb3csIG5hdmlnYXRvciwgZG9jdW1lbnQsIGltcG9ydFNjcmlwdHMsIHNldFRpbWVvdXQsIG9wZXJhICovCgp2YXIgcmVxdWlyZWpzLCByZXF1aXJlLCBkZWZpbmU7CihmdW5jdGlvbiAoZ2xvYmFsKSB7CiAgICB2YXIgcmVxLCBzLCBoZWFkLCBiYXNlRWxlbWVudCwgZGF0YU1haW4sIHNyYywKICAgICAgICBpbnRlcmFjdGl2ZVNjcmlwdCwgY3VycmVudGx5QWRkaW5nU2NyaXB0LCBtYWluU2NyaXB0LCBzdWJQYXRoLAogICAgICAgIHZlcnNpb24gPSAnMi4xLjIyJywKICAgICAgICBjb21tZW50UmVnRXhwID0gLyhcL1wqKFtcc1xTXSo/KVwqXC98KFteOl18XilcL1wvKC4qKSQpL21nLAogICAgICAgIGNqc1JlcXVpcmVSZWdFeHAgPSAvW14uXVxzKnJlcXVpcmVccypcKFxzKlsiJ10oW14nIlxzXSspWyInXVxzKlwpL2csCiAgICAgICAganNTdWZmaXhSZWdFeHAgPSAvXC5qcyQvLAogICAgICAgIGN1cnJEaXJSZWdFeHAgPSAvXlwuXC8vLAogICAgICAgIG9wID0gT2JqZWN0LnByb3RvdHlwZSwKICAgICAgICBvc3RyaW5nID0gb3AudG9TdHJpbmcsCiAgICAgICAgaGFzT3duID0gb3AuaGFzT3duUHJvcGVydHksCiAgICAgICAgYXAgPSBBcnJheS5wcm90b3R5cGUsCiAgICAgICAgaXNCcm93c2VyID0gISEodHlwZW9mIHdpbmRvdyAhPT0gJ3VuZGVmaW5lZCcgJiYgdHlwZW9mIG5hdmlnYXRvciAhPT0gJ3VuZGVmaW5lZCcgJiYgd2luZG93LmRvY3VtZW50KSwKICAgICAgICBpc1dlYldvcmtlciA9ICFpc0Jyb3dzZXIgJiYgdHlwZW9mIGltcG9ydFNjcmlwdHMgIT09ICd1bmRlZmluZWQnLAogICAgICAgIC8vUFMzIGluZGljYXRlcyBsb2FkZWQgYW5kIGNvbXBsZXRlLCBidXQgbmVlZCB0byB3YWl0IGZvciBjb21wbGV0ZQogICAgICAgIC8vc3BlY2lmaWNhbGx5LiBTZXF1ZW5jZSBpcyAnbG9hZGluZycsICdsb2FkZWQnLCBleGVjdXRpb24sCiAgICAgICAgLy8gdGhlbiAnY29tcGxldGUnLiBUaGUgVUEgY2hlY2sgaXMgdW5mb3J0dW5hdGUsIGJ1dCBub3Qgc3VyZSBob3cKICAgICAgICAvL3RvIGZlYXR1cmUgdGVzdCB3L28gY2F1c2luZyBwZXJmIGlzc3Vlcy4KICAgICAgICByZWFkeVJlZ0V4cCA9IGlzQnJvd3NlciAmJiBuYXZpZ2F0b3IucGxhdGZvcm0gPT09ICdQTEFZU1RBVElPTiAzJyA/CiAgICAgICAgICAgICAgICAgICAgICAvXmNvbXBsZXRlJC8gOiAvXihjb21wbGV0ZXxsb2FkZWQpJC8sCiAgICAgICAgZGVmQ29udGV4dE5hbWUgPSAnXycsCiAgICAgICAgLy9PaCB0aGUgdHJhZ2VkeSwgZGV0ZWN0aW5nIG9wZXJhLiBTZWUgdGhlIHVzYWdlIG9mIGlzT3BlcmEgZm9yIHJlYXNvbi4KICAgICAgICBpc09wZXJhID0gdHlwZW9mIG9wZXJhICE9PSAndW5kZWZpbmVkJyAmJiBvcGVyYS50b1N0cmluZygpID09PSAnW29iamVjdCBPcGVyYV0nLAogICAgICAgIGNvbnRleHRzID0ge30sCiAgICAgICAgY2ZnID0ge30sCiAgICAgICAgZ2xvYmFsRGVmUXVldWUgPSBbXSwKICAgICAgICB1c2VJbnRlcmFjdGl2ZSA9IGZhbHNlOwoKICAgIGZ1bmN0aW9uIGlzRnVuY3Rpb24oaXQpIHsKICAgICAgICByZXR1cm4gb3N0cmluZy5jYWxsKGl0KSA9PT0gJ1tvYmplY3QgRnVuY3Rpb25dJzsKICAgIH0KCiAgICBmdW5jdGlvbiBpc0FycmF5KGl0KSB7CiAgICAgICAgcmV0dXJuIG9zdHJpbmcuY2FsbChpdCkgPT09ICdbb2JqZWN0IEFycmF5XSc7CiAgICB9CgogICAgLyoqCiAgICAgKiBIZWxwZXIgZnVuY3Rpb24gZm9yIGl0ZXJhdGluZyBvdmVyIGFuIGFycmF5LiBJZiB0aGUgZnVuYyByZXR1cm5zCiAgICAgKiBhIHRydWUgdmFsdWUsIGl0IHdpbGwgYnJlYWsgb3V0IG9mIHRoZSBsb29wLgogICAgICovCiAgICBmdW5jdGlvbiBlYWNoKGFyeSwgZnVuYykgewogICAgICAgIGlmIChhcnkpIHsKICAgICAgICAgICAgdmFyIGk7CiAgICAgICAgICAgIGZvciAoaSA9IDA7IGkgPCBhcnkubGVuZ3RoOyBpICs9IDEpIHsKICAgICAgICAgICAgICAgIGlmIChhcnlbaV0gJiYgZnVuYyhhcnlbaV0sIGksIGFyeSkpIHsKICAgICAgICAgICAgICAgICAgICBicmVhazsKICAgICAgICAgICAgICAgIH0KICAgICAgICAgICAgfQogICAgICAgIH0KICAgIH0KCiAgICAvKioKICAgICAqIEhlbHBlciBmdW5jdGlvbiBmb3IgaXRlcmF0aW5nIG92ZXIgYW4gYXJyYXkgYmFja3dhcmRzLiBJZiB0aGUgZnVuYwogICAgICogcmV0dXJucyBhIHRydWUgdmFsdWUsIGl0IHdpbGwgYnJlYWsgb3V0IG9mIHRoZSBsb29wLgogICAgICovCiAgICBmdW5jdGlvbiBlYWNoUmV2ZXJzZShhcnksIGZ1bmMpIHsKICAgICAgICBpZiAoYXJ5KSB7CiAgICAgICAgICAgIHZhciBpOwogICAgICAgICAgICBmb3IgKGkgPSBhcnkubGVuZ3RoIC0gMTsgaSA+IC0xOyBpIC09IDEpIHsKICAgICAgICAgICAgICAgIGlmIChhcnlbaV0gJiYgZnVuYyhhcnlbaV0sIGksIGFyeSkpIHsKICAgICAgICAgICAgICAgICAgICBicmVhazsKICAgICAgICAgICAgICAgIH0KICAgICAgICAgICAgfQogICAgICAgIH0KICAgIH0KCiAgICBmdW5jdGlvbiBoYXNQcm9wKG9iaiwgcHJvcCkgewogICAgICAgIHJldHVybiBoYXNPd24uY2FsbChvYmosIHByb3ApOwogICAgfQoKICAgIGZ1bmN0aW9uIGdldE93bihvYmosIHByb3ApIHsKICAgICAgICByZXR1cm4gaGFzUHJvcChvYmosIHByb3ApICYmIG9ialtwcm9wXTsKICAgIH0KCiAgICAvKioKICAgICAqIEN5Y2xlcyBvdmVyIHByb3BlcnRpZXMgaW4gYW4gb2JqZWN0IGFuZCBjYWxscyBhIGZ1bmN0aW9uIGZvciBlYWNoCiAgICAgKiBwcm9wZXJ0eSB2YWx1ZS4gSWYgdGhlIGZ1bmN0aW9uIHJldHVybnMgYSB0cnV0aHkgdmFsdWUsIHRoZW4gdGhlCiAgICAgKiBpdGVyYXRpb24gaXMgc3RvcHBlZC4KICAgICAqLwogICAgZnVuY3Rpb24gZWFjaFByb3Aob2JqLCBmdW5jKSB7CiAgICAgICAgdmFyIHByb3A7CiAgICAgICAgZm9yIChwcm9wIGluIG9iaikgewogICAgICAgICAgICBpZiAoaGFzUHJvcChvYmosIHByb3ApKSB7CiAgICAgICAgICAgICAgICBpZiAoZnVuYyhvYmpbcHJvcF0sIHByb3ApKSB7CiAgICAgICAgICAgICAgICAgICAgYnJlYWs7CiAgICAgICAgICAgICAgICB9CiAgICAgICAgICAgIH0KICAgICAgICB9CiAgICB9CgogICAgLyoqCiAgICAgKiBTaW1wbGUgZnVuY3Rpb24gdG8gbWl4IGluIHByb3BlcnRpZXMgZnJvbSBzb3VyY2UgaW50byB0YXJnZXQsCiAgICAgKiBidXQgb25seSBpZiB0YXJnZXQgZG9lcyBub3QgYWxyZWFkeSBoYXZlIGEgcHJvcGVydHkgb2YgdGhlIHNhbWUgbmFtZS4KICAgICAqLwogICAgZnVuY3Rpb24gbWl4aW4odGFyZ2V0LCBzb3VyY2UsIGZvcmNlLCBkZWVwU3RyaW5nTWl4aW4pIHsKICAgICAgICBpZiAoc291cmNlKSB7CiAgICAgICAgICAgIGVhY2hQcm9wKHNvdXJjZSwgZnVuY3Rpb24gKHZhbHVlLCBwcm9wKSB7CiAgICAgICAgICAgICAgICBpZiAoZm9yY2UgfHwgIWhhc1Byb3AodGFyZ2V0LCBwcm9wKSkgewogICAgICAgICAgICAgICAgICAgIGlmIChkZWVwU3RyaW5nTWl4aW4gJiYgdHlwZW9mIHZhbHVlID09PSAnb2JqZWN0JyAmJiB2YWx1ZSAmJgogICAgICAgICAgICAgICAgICAgICAgICAhaXNBcnJheSh2YWx1ZSkgJiYgIWlzRnVuY3Rpb24odmFsdWUpICYmCiAgICAgICAgICAgICAgICAgICAgICAgICEodmFsdWUgaW5zdGFuY2VvZiBSZWdFeHApKSB7CgogICAgICAgICAgICAgICAgICAgICAgICBpZiAoIXRhcmdldFtwcm9wXSkgewogICAgICAgICAgICAgICAgICAgICAgICAgICAgdGFyZ2V0W3Byb3BdID0ge307CiAgICAgICAgICAgICAgICAgICAgICAgIH0KICAgICAgICAgICAgICAgICAgICAgICAgbWl4aW4odGFyZ2V0W3Byb3BdLCB2YWx1ZSwgZm9yY2UsIGRlZXBTdHJpbmdNaXhpbik7CiAgICAgICAgICAgICAgICAgICAgfSBlbHNlIHsKICAgICAgICAgICAgICAgICAgICAgICAgdGFyZ2V0W3Byb3BdID0gdmFsdWU7CiAgICAgICAgICAgICAgICAgICAgfQogICAgICAgICAgICAgICAgfQogICAgICAgICAgICB9KTsKICAgICAgICB9CiAgICAgICAgcmV0dXJuIHRhcmdldDsKICAgIH0KCiAgICAvL1NpbWlsYXIgdG8gRnVuY3Rpb24ucHJvdG90eXBlLmJpbmQsIGJ1dCB0aGUgJ3RoaXMnIG9iamVjdCBpcyBzcGVjaWZpZWQKICAgIC8vZmlyc3QsIHNpbmNlIGl0IGlzIGVhc2llciB0byByZWFkL2ZpZ3VyZSBvdXQgd2hhdCAndGhpcycgd2lsbCBiZS4KICAgIGZ1bmN0aW9uIGJpbmQob2JqLCBmbikgewogICAgICAgIHJldHVybiBmdW5jdGlvbiAoKSB7CiAgICAgICAgICAgIHJldHVybiBmbi5hcHBseShvYmosIGFyZ3VtZW50cyk7CiAgICAgICAgfTsKICAgIH0KCiAgICBmdW5jdGlvbiBzY3JpcHRzKCkgewogICAgICAgIHJldHVybiBkb2N1bWVudC5nZXRFbGVtZW50c0J5VGFnTmFtZSgnc2NyaXB0Jyk7CiAgICB9CgogICAgZnVuY3Rpb24gZGVmYXVsdE9uRXJyb3IoZXJyKSB7CiAgICAgICAgdGhyb3cgZXJyOwogICAgfQoKICAgIC8vQWxsb3cgZ2V0dGluZyBhIGdsb2JhbCB0aGF0IGlzIGV4cHJlc3NlZCBpbgogICAgLy9kb3Qgbm90YXRpb24sIGxpa2UgJ2EuYi5jJy4KICAgIGZ1bmN0aW9uIGdldEdsb2JhbCh2YWx1ZSkgewogICAgICAgIGlmICghdmFsdWUpIHsKICAgICAgICAgICAgcmV0dXJuIHZhbHVlOwogICAgICAgIH0KICAgICAgICB2YXIgZyA9IGdsb2JhbDsKICAgICAgICBlYWNoKHZhbHVlLnNwbGl0KCcuJyksIGZ1bmN0aW9uIChwYXJ0KSB7CiAgICAgICAgICAgIGcgPSBnW3BhcnRdOwogICAgICAgIH0pOwogICAgICAgIHJldHVybiBnOwogICAgfQoKICAgIC8qKgogICAgICogQ29uc3RydWN0cyBhbiBlcnJvciB3aXRoIGEgcG9pbnRlciB0byBhbiBVUkwgd2l0aCBtb3JlIGluZm9ybWF0aW9uLgogICAgICogQHBhcmFtIHtTdHJpbmd9IGlkIHRoZSBlcnJvciBJRCB0aGF0IG1hcHMgdG8gYW4gSUQgb24gYSB3ZWIgcGFnZS4KICAgICAqIEBwYXJhbSB7U3RyaW5nfSBtZXNzYWdlIGh1bWFuIHJlYWRhYmxlIGVycm9yLgogICAgICogQHBhcmFtIHtFcnJvcn0gW2Vycl0gdGhlIG9yaWdpbmFsIGVycm9yLCBpZiB0aGVyZSBpcyBvbmUuCiAgICAgKgogICAgICogQHJldHVybnMge0Vycm9yfQogICAgICovCiAgICBmdW5jdGlvbiBtYWtlRXJyb3IoaWQsIG1zZywgZXJyLCByZXF1aXJlTW9kdWxlcykgewogICAgICAgIHZhciBlID0gbmV3IEVycm9yKG1zZyArICdcbmh0dHA6Ly9yZXF1aXJlanMub3JnL2RvY3MvZXJyb3JzLmh0bWwjJyArIGlkKTsKICAgICAgICBlLnJlcXVpcmVUeXBlID0gaWQ7CiAgICAgICAgZS5yZXF1aXJlTW9kdWxlcyA9IHJlcXVpcmVNb2R1bGVzOwogICAgICAgIGlmIChlcnIpIHsKICAgICAgICAgICAgZS5vcmlnaW5hbEVycm9yID0gZXJyOwogICAgICAgIH0KICAgICAgICByZXR1cm4gZTsKICAgIH0KCiAgICBpZiAodHlwZW9mIGRlZmluZSAhPT0gJ3VuZGVmaW5lZCcpIHsKICAgICAgICAvL0lmIGEgZGVmaW5lIGlzIGFscmVhZHkgaW4gcGxheSB2aWEgYW5vdGhlciBBTUQgbG9hZGVyLAogICAgICAgIC8vZG8gbm90IG92ZXJ3cml0ZS4KICAgICAgICByZXR1cm47CiAgICB9CgogICAgaWYgKHR5cGVvZiByZXF1aXJlanMgIT09ICd1bmRlZmluZWQnKSB7CiAgICAgICAgaWYgKGlzRnVuY3Rpb24ocmVxdWlyZWpzKSkgewogICAgICAgICAgICAvL0RvIG5vdCBvdmVyd3JpdGUgYW4gZXhpc3RpbmcgcmVxdWlyZWpzIGluc3RhbmNlLgogICAgICAgICAgICByZXR1cm47CiAgICAgICAgfQogICAgICAgIGNmZyA9IHJlcXVpcmVqczsKICAgICAgICByZXF1aXJlanMgPSB1bmRlZmluZWQ7CiAgICB9CgogICAgLy9BbGxvdyBmb3IgYSByZXF1aXJlIGNvbmZpZyBvYmplY3QKICAgIGlmICh0eXBlb2YgcmVxdWlyZSAhPT0gJ3VuZGVmaW5lZCcgJiYgIWlzRnVuY3Rpb24ocmVxdWlyZSkpIHsKICAgICAgICAvL2Fzc3VtZSBpdCBpcyBhIGNvbmZpZyBvYmplY3QuCiAgICAgICAgY2ZnID0gcmVxdWlyZTsKICAgICAgICByZXF1aXJlID0gdW5kZWZpbmVkOwogICAgfQoKICAgIGZ1bmN0aW9uIG5ld0NvbnRleHQoY29udGV4dE5hbWUpIHsKICAgICAgICB2YXIgaW5DaGVja0xvYWRlZCwgTW9kdWxlLCBjb250ZXh0LCBoYW5kbGVycywKICAgICAgICAgICAgY2hlY2tMb2FkZWRUaW1lb3V0SWQsCiAgICAgICAgICAgIGNvbmZpZyA9IHsKICAgICAgICAgICAgICAgIC8vRGVmYXVsdHMuIERvIG5vdCBzZXQgYSBkZWZhdWx0IGZvciBtYXAKICAgICAgICAgICAgICAgIC8vY29uZmlnIHRvIHNwZWVkIHVwIG5vcm1hbGl6ZSgpLCB3aGljaAogICAgICAgICAgICAgICAgLy93aWxsIHJ1biBmYXN0ZXIgaWYgdGhlcmUgaXMgbm8gZGVmYXVsdC4KICAgICAgICAgICAgICAgIHdhaXRTZWNvbmRzOiA3LAogICAgICAgICAgICAgICAgYmFzZVVybDogJy4vJywKICAgICAgICAgICAgICAgIHBhdGhzOiB7fSwKICAgICAgICAgICAgICAgIGJ1bmRsZXM6IHt9LAogICAgICAgICAgICAgICAgcGtnczoge30sCiAgICAgICAgICAgICAgICBzaGltOiB7fSwKICAgICAgICAgICAgICAgIGNvbmZpZzoge30KICAgICAgICAgICAgfSwKICAgICAgICAgICAgcmVnaXN0cnkgPSB7fSwKICAgICAgICAgICAgLy9yZWdpc3RyeSBvZiBqdXN0IGVuYWJsZWQgbW9kdWxlcywgdG8gc3BlZWQKICAgICAgICAgICAgLy9jeWNsZSBicmVha2luZyBjb2RlIHdoZW4gbG90cyBvZiBtb2R1bGVzCiAgICAgICAgICAgIC8vYXJlIHJlZ2lzdGVyZWQsIGJ1dCBub3QgYWN0aXZhdGVkLgogICAgICAgICAgICBlbmFibGVkUmVnaXN0cnkgPSB7fSwKICAgICAgICAgICAgdW5kZWZFdmVudHMgPSB7fSwKICAgICAgICAgICAgZGVmUXVldWUgPSBbXSwKICAgICAgICAgICAgZGVmaW5lZCA9IHt9LAogICAgICAgICAgICB1cmxGZXRjaGVkID0ge30sCiAgICAgICAgICAgIGJ1bmRsZXNNYXAgPSB7fSwKICAgICAgICAgICAgcmVxdWlyZUNvdW50ZXIgPSAxLAogICAgICAgICAgICB1bm5vcm1hbGl6ZWRDb3VudGVyID0gMTsKCiAgICAgICAgLyoqCiAgICAgICAgICogVHJpbXMgdGhlIC4gYW5kIC4uIGZyb20gYW4gYXJyYXkgb2YgcGF0aCBzZWdtZW50cy4KICAgICAgICAgKiBJdCB3aWxsIGtlZXAgYSBsZWFkaW5nIHBhdGggc2VnbWVudCBpZiBhIC4uIHdpbGwgYmVjb21lCiAgICAgICAgICogdGhlIGZpcnN0IHBhdGggc2VnbWVudCwgdG8gaGVscCB3aXRoIG1vZHVsZSBuYW1lIGxvb2t1cHMsCiAgICAgICAgICogd2hpY2ggYWN0IGxpa2UgcGF0aHMsIGJ1dCBjYW4gYmUgcmVtYXBwZWQuIEJ1dCB0aGUgZW5kIHJlc3VsdCwKICAgICAgICAgKiBhbGwgcGF0aHMgdGhhdCB1c2UgdGhpcyBmdW5jdGlvbiBzaG91bGQgbG9vayBub3JtYWxpemVkLgogICAgICAgICAqIE5PVEU6IHRoaXMgbWV0aG9kIE1PRElGSUVTIHRoZSBpbnB1dCBhcnJheS4KICAgICAgICAgKiBAcGFyYW0ge0FycmF5fSBhcnkgdGhlIGFycmF5IG9mIHBhdGggc2VnbWVudHMuCiAgICAgICAgICovCiAgICAgICAgZnVuY3Rpb24gdHJpbURvdHMoYXJ5KSB7CiAgICAgICAgICAgIHZhciBpLCBwYXJ0OwogICAgICAgICAgICBmb3IgKGkgPSAwOyBpIDwgYXJ5Lmxlbmd0aDsgaSsrKSB7CiAgICAgICAgICAgICAgICBwYXJ0ID0gYXJ5W2ldOwogICAgICAgICAgICAgICAgaWYgKHBhcnQgPT09ICcuJykgewogICAgICAgICAgICAgICAgICAgIGFyeS5zcGxpY2UoaSwgMSk7CiAgICAgICAgICAgICAgICAgICAgaSAtPSAxOwogICAgICAgICAgICAgICAgfSBlbHNlIGlmIChwYXJ0ID09PSAnLi4nKSB7CiAgICAgICAgICAgICAgICAgICAgLy8gSWYgYXQgdGhlIHN0YXJ0LCBvciBwcmV2aW91cyB2YWx1ZSBpcyBzdGlsbCAuLiwKICAgICAgICAgICAgICAgICAgICAvLyBrZWVwIHRoZW0gc28gdGhhdCB3aGVuIGNvbnZlcnRlZCB0byBhIHBhdGggaXQgbWF5CiAgICAgICAgICAgICAgICAgICAgLy8gc3RpbGwgd29yayB3aGVuIGNvbnZlcnRlZCB0byBhIHBhdGgsIGV2ZW4gdGhvdWdoCiAgICAgICAgICAgICAgICAgICAgLy8gYXMgYW4gSUQgaXQgaXMgbGVzcyB0aGFuIGlkZWFsLiBJbiBsYXJnZXIgcG9pbnQKICAgICAgICAgICAgICAgICAgICAvLyByZWxlYXNlcywgbWF5IGJlIGJldHRlciB0byBqdXN0IGtpY2sgb3V0IGFuIGVycm9yLgogICAgICAgICAgICAgICAgICAgIGlmIChpID09PSAwIHx8IChpID09PSAxICYmIGFyeVsyXSA9PT0gJy4uJykgfHwgYXJ5W2kgLSAxXSA9PT0gJy4uJykgewogICAgICAgICAgICAgICAgICAgICAgICBjb250aW51ZTsKICAgICAgICAgICAgICAgICAgICB9IGVsc2UgaWYgKGkgPiAwKSB7CiAgICAgICAgICAgICAgICAgICAgICAgIGFyeS5zcGxpY2UoaSAtIDEsIDIpOwogICAgICAgICAgICAgICAgICAgICAgICBpIC09IDI7CiAgICAgICAgICAgICAgICAgICAgfQogICAgICAgICAgICAgICAgfQogICAgICAgICAgICB9CiAgICAgICAgfQoKICAgICAgICAvKioKICAgICAgICAgKiBHaXZlbiBhIHJlbGF0aXZlIG1vZHVsZSBuYW1lLCBsaWtlIC4vc29tZXRoaW5nLCBub3JtYWxpemUgaXQgdG8KICAgICAgICAgKiBhIHJlYWwgbmFtZSB0aGF0IGNhbiBiZSBtYXBwZWQgdG8gYSBwYXRoLgogICAgICAgICAqIEBwYXJhbSB7U3RyaW5nfSBuYW1lIHRoZSByZWxhdGl2ZSBuYW1lCiAgICAgICAgICogQHBhcmFtIHtTdHJpbmd9IGJhc2VOYW1lIGEgcmVhbCBuYW1lIHRoYXQgdGhlIG5hbWUgYXJnIGlzIHJlbGF0aXZlCiAgICAgICAgICogdG8uCiAgICAgICAgICogQHBhcmFtIHtCb29sZWFufSBhcHBseU1hcCBhcHBseSB0aGUgbWFwIGNvbmZpZyB0byB0aGUgdmFsdWUuIFNob3VsZAogICAgICAgICAqIG9ubHkgYmUgZG9uZSBpZiB0aGlzIG5vcm1hbGl6YXRpb24gaXMgZm9yIGEgZGVwZW5kZW5jeSBJRC4KICAgICAgICAgKiBAcmV0dXJucyB7U3RyaW5nfSBub3JtYWxpemVkIG5hbWUKICAgICAgICAgKi8KICAgICAgICBmdW5jdGlvbiBub3JtYWxpemUobmFtZSwgYmFzZU5hbWUsIGFwcGx5TWFwKSB7CiAgICAgICAgICAgIHZhciBwa2dNYWluLCBtYXBWYWx1ZSwgbmFtZVBhcnRzLCBpLCBqLCBuYW1lU2VnbWVudCwgbGFzdEluZGV4LAogICAgICAgICAgICAgICAgZm91bmRNYXAsIGZvdW5kSSwgZm91bmRTdGFyTWFwLCBzdGFySSwgbm9ybWFsaXplZEJhc2VQYXJ0cywKICAgICAgICAgICAgICAgIGJhc2VQYXJ0cyA9IChiYXNlTmFtZSAmJiBiYXNlTmFtZS5zcGxpdCgnLycpKSwKICAgICAgICAgICAgICAgIG1hcCA9IGNvbmZpZy5tYXAsCiAgICAgICAgICAgICAgICBzdGFyTWFwID0gbWFwICYmIG1hcFsnKiddOwoKICAgICAgICAgICAgLy9BZGp1c3QgYW55IHJlbGF0aXZlIHBhdGhzLgogICAgICAgICAgICBpZiAobmFtZSkgewogICAgICAgICAgICAgICAgbmFtZSA9IG5hbWUuc3BsaXQoJy8nKTsKICAgICAgICAgICAgICAgIGxhc3RJbmRleCA9IG5hbWUubGVuZ3RoIC0gMTsKCiAgICAgICAgICAgICAgICAvLyBJZiB3YW50aW5nIG5vZGUgSUQgY29tcGF0aWJpbGl0eSwgc3RyaXAgLmpzIGZyb20gZW5kCiAgICAgICAgICAgICAgICAvLyBvZiBJRHMuIEhhdmUgdG8gZG8gdGhpcyBoZXJlLCBhbmQgbm90IGluIG5hbWVUb1VybAogICAgICAgICAgICAgICAgLy8gYmVjYXVzZSBub2RlIGFsbG93cyBlaXRoZXIgLmpzIG9yIG5vbiAuanMgdG8gbWFwCiAgICAgICAgICAgICAgICAvLyB0byBzYW1lIGZpbGUuCiAgICAgICAgICAgICAgICBpZiAoY29uZmlnLm5vZGVJZENvbXBhdCAmJiBqc1N1ZmZpeFJlZ0V4cC50ZXN0KG5hbWVbbGFzdEluZGV4XSkpIHsKICAgICAgICAgICAgICAgICAgICBuYW1lW2xhc3RJbmRleF0gPSBuYW1lW2xhc3RJbmRleF0ucmVwbGFjZShqc1N1ZmZpeFJlZ0V4cCwgJycpOwogICAgICAgICAgICAgICAgfQoKICAgICAgICAgICAgICAgIC8vIFN0YXJ0cyB3aXRoIGEgJy4nIHNvIG5lZWQgdGhlIGJhc2VOYW1lCiAgICAgICAgICAgICAgICBpZiAobmFtZVswXS5jaGFyQXQoMCkgPT09ICcuJyAmJiBiYXNlUGFydHMpIHsKICAgICAgICAgICAgICAgICAgICAvL0NvbnZlcnQgYmFzZU5hbWUgdG8gYXJyYXksIGFuZCBsb3Agb2ZmIHRoZSBsYXN0IHBhcnQsCiAgICAgICAgICAgICAgICAgICAgLy9zbyB0aGF0IC4gbWF0Y2hlcyB0aGF0ICdkaXJlY3RvcnknIGFuZCBub3QgbmFtZSBvZiB0aGUgYmFzZU5hbWUncwogICAgICAgICAgICAgICAgICAgIC8vbW9kdWxlLiBGb3IgaW5zdGFuY2UsIGJhc2VOYW1lIG9mICdvbmUvdHdvL3RocmVlJywgbWFwcyB0bwogICAgICAgICAgICAgICAgICAgIC8vJ29uZS90d28vdGhyZWUuanMnLCBidXQgd2Ugd2FudCB0aGUgZGlyZWN0b3J5LCAnb25lL3R3bycgZm9yCiAgICAgICAgICAgICAgICAgICAgLy90aGlzIG5vcm1hbGl6YXRpb24uCiAgICAgICAgICAgICAgICAgICAgbm9ybWFsaXplZEJhc2VQYXJ0cyA9IGJhc2VQYXJ0cy5zbGljZSgwLCBiYXNlUGFydHMubGVuZ3RoIC0gMSk7CiAgICAgICAgICAgICAgICAgICAgbmFtZSA9IG5vcm1hbGl6ZWRCYXNlUGFydHMuY29uY2F0KG5hbWUpOwogICAgICAgICAgICAgICAgfQoKICAgICAgICAgICAgICAgIHRyaW1Eb3RzKG5hbWUpOwogICAgICAgICAgICAgICAgbmFtZSA9IG5hbWUuam9pbignLycpOwogICAgICAgICAgICB9CgogICAgICAgICAgICAvL0FwcGx5IG1hcCBjb25maWcgaWYgYXZhaWxhYmxlLgogICAgICAgICAgICBpZiAoYXBwbHlNYXAgJiYgbWFwICYmIChiYXNlUGFydHMgfHwgc3Rhck1hcCkpIHsKICAgICAgICAgICAgICAgIG5hbWVQYXJ0cyA9IG5hbWUuc3BsaXQoJy8nKTsKCiAgICAgICAgICAgICAgICBvdXRlckxvb3A6IGZvciAoaSA9IG5hbWVQYXJ0cy5sZW5ndGg7IGkgPiAwOyBpIC09IDEpIHsKICAgICAgICAgICAgICAgICAgICBuYW1lU2VnbWVudCA9IG5hbWVQYXJ0cy5zbGljZSgwLCBpKS5qb2luKCcvJyk7CgogICAgICAgICAgICAgICAgICAgIGlmIChiYXNlUGFydHMpIHsKICAgICAgICAgICAgICAgICAgICAgICAgLy9GaW5kIHRoZSBsb25nZXN0IGJhc2VOYW1lIHNlZ21lbnQgbWF0Y2ggaW4gdGhlIGNvbmZpZy4KICAgICAgICAgICAgICAgICAgICAgICAgLy9TbywgZG8gam9pbnMgb24gdGhlIGJpZ2dlc3QgdG8gc21hbGxlc3QgbGVuZ3RocyBvZiBiYXNlUGFydHMuCiAgICAgICAgICAgICAgICAgICAgICAgIGZvciAoaiA9IGJhc2VQYXJ0cy5sZW5ndGg7IGogPiAwOyBqIC09IDEpIHsKICAgICAgICAgICAgICAgICAgICAgICAgICAgIG1hcFZhbHVlID0gZ2V0T3duKG1hcCwgYmFzZVBhcnRzLnNsaWNlKDAsIGopLmpvaW4oJy8nKSk7CgogICAgICAgICAgICAgICAgICAgICAgICAgICAgLy9iYXNlTmFtZSBzZWdtZW50IGhhcyBjb25maWcsIGZpbmQgaWYgaXQgaGFzIG9uZSBmb3IKICAgICAgICAgICAgICAgICAgICAgICAgICAgIC8vdGhpcyBuYW1lLgogICAgICAgICAgICAgICAgICAgICAgICAgICAgaWYgKG1hcFZhbHVlKSB7CiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgbWFwVmFsdWUgPSBnZXRPd24obWFwVmFsdWUsIG5hbWVTZWdtZW50KTsKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBpZiAobWFwVmFsdWUpIHsKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgLy9NYXRjaCwgdXBkYXRlIG5hbWUgdG8gdGhlIG5ldyB2YWx1ZS4KICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgZm91bmRNYXAgPSBtYXBWYWx1ZTsKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgZm91bmRJID0gaTsKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgYnJlYWsgb3V0ZXJMb29wOwogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIH0KICAgICAgICAgICAgICAgICAgICAgICAgICAgIH0KICAgICAgICAgICAgICAgICAgICAgICAgfQogICAgICAgICAgICAgICAgICAgIH0KCiAgICAgICAgICAgICAgICAgICAgLy9DaGVjayBmb3IgYSBzdGFyIG1hcCBtYXRjaCwgYnV0IGp1c3QgaG9sZCBvbiB0byBpdCwKICAgICAgICAgICAgICAgICAgICAvL2lmIHRoZXJlIGlzIGEgc2hvcnRlciBzZWdtZW50IG1hdGNoIGxhdGVyIGluIGEgbWF0Y2hpbmcKICAgICAgICAgICAgICAgICAgICAvL2NvbmZpZywgdGhlbiBmYXZvciBvdmVyIHRoaXMgc3RhciBtYXAuCiAgICAgICAgICAgICAgICAgICAgaWYgKCFmb3VuZFN0YXJNYXAgJiYgc3Rhck1hcCAmJiBnZXRPd24oc3Rhck1hcCwgbmFtZVNlZ21lbnQpKSB7CiAgICAgICAgICAgICAgICAgICAgICAgIGZvdW5kU3Rhck1hcCA9IGdldE93bihzdGFyTWFwLCBuYW1lU2VnbWVudCk7CiAgICAgICAgICAgICAgICAgICAgICAgIHN0YXJJID0gaTsKICAgICAgICAgICAgICAgICAgICB9CiAgICAgICAgICAgICAgICB9CgogICAgICAgICAgICAgICAgaWYgKCFmb3VuZE1hcCAmJiBmb3VuZFN0YXJNYXApIHsKICAgICAgICAgICAgICAgICAgICBmb3VuZE1hcCA9IGZvdW5kU3Rhck1hcDsKICAgICAgICAgICAgICAgICAgICBmb3VuZEkgPSBzdGFySTsKICAgICAgICAgICAgICAgIH0KCiAgICAgICAgICAgICAgICBpZiAoZm91bmRNYXApIHsKICAgICAgICAgICAgICAgICAgICBuYW1lUGFydHMuc3BsaWNlKDAsIGZvdW5kSSwgZm91bmRNYXApOwogICAgICAgICAgICAgICAgICAgIG5hbWUgPSBuYW1lUGFydHMuam9pbignLycpOwogICAgICAgICAgICAgICAgfQogICAgICAgICAgICB9CgogICAgICAgICAgICAvLyBJZiB0aGUgbmFtZSBwb2ludHMgdG8gYSBwYWNrYWdlJ3MgbmFtZSwgdXNlCiAgICAgICAgICAgIC8vIHRoZSBwYWNrYWdlIG1haW4gaW5zdGVhZC4KICAgICAgICAgICAgcGtnTWFpbiA9IGdldE93bihjb25maWcucGtncywgbmFtZSk7CgogICAgICAgICAgICByZXR1cm4gcGtnTWFpbiA/IHBrZ01haW4gOiBuYW1lOwogICAgICAgIH0KCiAgICAgICAgZnVuY3Rpb24gcmVtb3ZlU2NyaXB0KG5hbWUpIHsKICAgICAgICAgICAgaWYgKGlzQnJvd3NlcikgewogICAgICAgICAgICAgICAgZWFjaChzY3JpcHRzKCksIGZ1bmN0aW9uIChzY3JpcHROb2RlKSB7CiAgICAgICAgICAgICAgICAgICAgaWYgKHNjcmlwdE5vZGUuZ2V0QXR0cmlidXRlKCdkYXRhLXJlcXVpcmVtb2R1bGUnKSA9PT0gbmFtZSAmJgogICAgICAgICAgICAgICAgICAgICAgICAgICAgc2NyaXB0Tm9kZS5nZXRBdHRyaWJ1dGUoJ2RhdGEtcmVxdWlyZWNvbnRleHQnKSA9PT0gY29udGV4dC5jb250ZXh0TmFtZSkgewogICAgICAgICAgICAgICAgICAgICAgICBzY3JpcHROb2RlLnBhcmVudE5vZGUucmVtb3ZlQ2hpbGQoc2NyaXB0Tm9kZSk7CiAgICAgICAgICAgICAgICAgICAgICAgIHJldHVybiB0cnVlOwogICAgICAgICAgICAgICAgICAgIH0KICAgICAgICAgICAgICAgIH0pOwogICAgICAgICAgICB9CiAgICAgICAgfQoKICAgICAgICBmdW5jdGlvbiBoYXNQYXRoRmFsbGJhY2soaWQpIHsKICAgICAgICAgICAgdmFyIHBhdGhDb25maWcgPSBnZXRPd24oY29uZmlnLnBhdGhzLCBpZCk7CiAgICAgICAgICAgIGlmIChwYXRoQ29uZmlnICYmIGlzQXJyYXkocGF0aENvbmZpZykgJiYgcGF0aENvbmZpZy5sZW5ndGggPiAxKSB7CiAgICAgICAgICAgICAgICAvL1BvcCBvZmYgdGhlIGZpcnN0IGFycmF5IHZhbHVlLCBzaW5jZSBpdCBmYWlsZWQsIGFuZAogICAgICAgICAgICAgICAgLy9yZXRyeQogICAgICAgICAgICAgICAgcGF0aENvbmZpZy5zaGlmdCgpOwogICAgICAgICAgICAgICAgY29udGV4dC5yZXF1aXJlLnVuZGVmKGlkKTsKCiAgICAgICAgICAgICAgICAvL0N1c3RvbSByZXF1aXJlIHRoYXQgZG9lcyBub3QgZG8gbWFwIHRyYW5zbGF0aW9uLCBzaW5jZQogICAgICAgICAgICAgICAgLy9JRCBpcyAiYWJzb2x1dGUiLCBhbHJlYWR5IG1hcHBlZC9yZXNvbHZlZC4KICAgICAgICAgICAgICAgIGNvbnRleHQubWFrZVJlcXVpcmUobnVsbCwgewogICAgICAgICAgICAgICAgICAgIHNraXBNYXA6IHRydWUKICAgICAgICAgICAgICAgIH0pKFtpZF0pOwoKICAgICAgICAgICAgICAgIHJldHVybiB0cnVlOwogICAgICAgICAgICB9CiAgICAgICAgfQoKICAgICAgICAvL1R1cm5zIGEgcGx1Z2luIXJlc291cmNlIHRvIFtwbHVnaW4sIHJlc291cmNlXQogICAgICAgIC8vd2l0aCB0aGUgcGx1Z2luIGJlaW5nIHVuZGVmaW5lZCBpZiB0aGUgbmFtZQogICAgICAgIC8vZGlkIG5vdCBoYXZlIGEgcGx1Z2luIHByZWZpeC4KICAgICAgICBmdW5jdGlvbiBzcGxpdFByZWZpeChuYW1lKSB7CiAgICAgICAgICAgIHZhciBwcmVmaXgsCiAgICAgICAgICAgICAgICBpbmRleCA9IG5hbWUgPyBuYW1lLmluZGV4T2YoJyEnKSA6IC0xOwogICAgICAgICAgICBpZiAoaW5kZXggPiAtMSkgewogICAgICAgICAgICAgICAgcHJlZml4ID0gbmFtZS5zdWJzdHJpbmcoMCwgaW5kZXgpOwogICAgICAgICAgICAgICAgbmFtZSA9IG5hbWUuc3Vic3RyaW5nKGluZGV4ICsgMSwgbmFtZS5sZW5ndGgpOwogICAgICAgICAgICB9CiAgICAgICAgICAgIHJldHVybiBbcHJlZml4LCBuYW1lXTsKICAgICAgICB9CgogICAgICAgIC8qKgogICAgICAgICAqIENyZWF0ZXMgYSBtb2R1bGUgbWFwcGluZyB0aGF0IGluY2x1ZGVzIHBsdWdpbiBwcmVmaXgsIG1vZHVsZQogICAgICAgICAqIG5hbWUsIGFuZCBwYXRoLiBJZiBwYXJlbnRNb2R1bGVNYXAgaXMgcHJvdmlkZWQgaXQgd2lsbAogICAgICAgICAqIGFsc28gbm9ybWFsaXplIHRoZSBuYW1lIHZpYSByZXF1aXJlLm5vcm1hbGl6ZSgpCiAgICAgICAgICoKICAgICAgICAgKiBAcGFyYW0ge1N0cmluZ30gbmFtZSB0aGUgbW9kdWxlIG5hbWUKICAgICAgICAgKiBAcGFyYW0ge1N0cmluZ30gW3BhcmVudE1vZHVsZU1hcF0gcGFyZW50IG1vZHVsZSBtYXAKICAgICAgICAgKiBmb3IgdGhlIG1vZHVsZSBuYW1lLCB1c2VkIHRvIHJlc29sdmUgcmVsYXRpdmUgbmFtZXMuCiAgICAgICAgICogQHBhcmFtIHtCb29sZWFufSBpc05vcm1hbGl6ZWQ6IGlzIHRoZSBJRCBhbHJlYWR5IG5vcm1hbGl6ZWQuCiAgICAgICAgICogVGhpcyBpcyB0cnVlIGlmIHRoaXMgY2FsbCBpcyBkb25lIGZvciBhIGRlZmluZSgpIG1vZHVsZSBJRC4KICAgICAgICAgKiBAcGFyYW0ge0Jvb2xlYW59IGFwcGx5TWFwOiBhcHBseSB0aGUgbWFwIGNvbmZpZyB0byB0aGUgSUQuCiAgICAgICAgICogU2hvdWxkIG9ubHkgYmUgdHJ1ZSBpZiB0aGlzIG1hcCBpcyBmb3IgYSBkZXBlbmRlbmN5LgogICAgICAgICAqCiAgICAgICAgICogQHJldHVybnMge09iamVjdH0KICAgICAgICAgKi8KICAgICAgICBmdW5jdGlvbiBtYWtlTW9kdWxlTWFwKG5hbWUsIHBhcmVudE1vZHVsZU1hcCwgaXNOb3JtYWxpemVkLCBhcHBseU1hcCkgewogICAgICAgICAgICB2YXIgdXJsLCBwbHVnaW5Nb2R1bGUsIHN1ZmZpeCwgbmFtZVBhcnRzLAogICAgICAgICAgICAgICAgcHJlZml4ID0gbnVsbCwKICAgICAgICAgICAgICAgIHBhcmVudE5hbWUgPSBwYXJlbnRNb2R1bGVNYXAgPyBwYXJlbnRNb2R1bGVNYXAubmFtZSA6IG51bGwsCiAgICAgICAgICAgICAgICBvcmlnaW5hbE5hbWUgPSBuYW1lLAogICAgICAgICAgICAgICAgaXNEZWZpbmUgPSB0cnVlLAogICAgICAgICAgICAgICAgbm9ybWFsaXplZE5hbWUgPSAnJzsKCiAgICAgICAgICAgIC8vSWYgbm8gbmFtZSwgdGhlbiBpdCBtZWFucyBpdCBpcyBhIHJlcXVpcmUgY2FsbCwgZ2VuZXJhdGUgYW4KICAgICAgICAgICAgLy9pbnRlcm5hbCBuYW1lLgogICAgICAgICAgICBpZiAoIW5hbWUpIHsKICAgICAgICAgICAgICAgIGlzRGVmaW5lID0gZmFsc2U7CiAgICAgICAgICAgICAgICBuYW1lID0gJ19AcicgKyAocmVxdWlyZUNvdW50ZXIgKz0gMSk7CiAgICAgICAgICAgIH0KCiAgICAgICAgICAgIG5hbWVQYXJ0cyA9IHNwbGl0UHJlZml4KG5hbWUpOwogICAgICAgICAgICBwcmVmaXggPSBuYW1lUGFydHNbMF07CiAgICAgICAgICAgIG5hbWUgPSBuYW1lUGFydHNbMV07CgogICAgICAgICAgICBpZiAocHJlZml4KSB7CiAgICAgICAgICAgICAgICBwcmVmaXggPSBub3JtYWxpemUocHJlZml4LCBwYXJlbnROYW1lLCBhcHBseU1hcCk7CiAgICAgICAgICAgICAgICBwbHVnaW5Nb2R1bGUgPSBnZXRPd24oZGVmaW5lZCwgcHJlZml4KTsKICAgICAgICAgICAgfQoKICAgICAgICAgICAgLy9BY2NvdW50IGZvciByZWxhdGl2ZSBwYXRocyBpZiB0aGVyZSBpcyBhIGJhc2UgbmFtZS4KICAgICAgICAgICAgaWYgKG5hbWUpIHsKICAgICAgICAgICAgICAgIGlmIChwcmVmaXgpIHsKICAgICAgICAgICAgICAgICAgICBpZiAocGx1Z2luTW9kdWxlICYmIHBsdWdpbk1vZHVsZS5ub3JtYWxpemUpIHsKICAgICAgICAgICAgICAgICAgICAgICAgLy9QbHVnaW4gaXMgbG9hZGVkLCB1c2UgaXRzIG5vcm1hbGl6ZSBtZXRob2QuCiAgICAgICAgICAgICAgICAgICAgICAgIG5vcm1hbGl6ZWROYW1lID0gcGx1Z2luTW9kdWxlLm5vcm1hbGl6ZShuYW1lLCBmdW5jdGlvbiAobmFtZSkgewogICAgICAgICAgICAgICAgICAgICAgICAgICAgcmV0dXJuIG5vcm1hbGl6ZShuYW1lLCBwYXJlbnROYW1lLCBhcHBseU1hcCk7CiAgICAgICAgICAgICAgICAgICAgICAgIH0pOwogICAgICAgICAgICAgICAgICAgIH0gZWxzZSB7CiAgICAgICAgICAgICAgICAgICAgICAgIC8vIElmIG5lc3RlZCBwbHVnaW4gcmVmZXJlbmNlcywgdGhlbiBkbyBub3QgdHJ5IHRvCiAgICAgICAgICAgICAgICAgICAgICAgIC8vIG5vcm1hbGl6ZSwgYXMgaXQgd2lsbCBub3Qgbm9ybWFsaXplIGNvcnJlY3RseS4gVGhpcwogICAgICAgICAgICAgICAgICAgICAgICAvLyBwbGFjZXMgYSByZXN0cmljdGlvbiBvbiByZXNvdXJjZUlkcywgYW5kIHRoZSBsb25nZXIKICAgICAgICAgICAgICAgICAgICAgICAgLy8gdGVybSBzb2x1dGlvbiBpcyBub3QgdG8gbm9ybWFsaXplIHVudGlsIHBsdWdpbnMgYXJlCiAgICAgICAgICAgICAgICAgICAgICAgIC8vIGxvYWRlZCBhbmQgYWxsIG5vcm1hbGl6YXRpb25zIHRvIGFsbG93IGZvciBhc3luYwogICAgICAgICAgICAgICAgICAgICAgICAvLyBsb2FkaW5nIG9mIGEgbG9hZGVyIHBsdWdpbi4gQnV0IGZvciBub3csIGZpeGVzIHRoZQogICAgICAgICAgICAgICAgICAgICAgICAvLyBjb21tb24gdXNlcy4gRGV0YWlscyBpbiAjMTEzMQogICAgICAgICAgICAgICAgICAgICAgICBub3JtYWxpemVkTmFtZSA9IG5hbWUuaW5kZXhPZignIScpID09PSAtMSA/CiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgbm9ybWFsaXplKG5hbWUsIHBhcmVudE5hbWUsIGFwcGx5TWFwKSA6CiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgbmFtZTsKICAgICAgICAgICAgICAgICAgICB9CiAgICAgICAgICAgICAgICB9IGVsc2UgewogICAgICAgICAgICAgICAgICAgIC8vQSByZWd1bGFyIG1vZHVsZS4KICAgICAgICAgICAgICAgICAgICBub3JtYWxpemVkTmFtZSA9IG5vcm1hbGl6ZShuYW1lLCBwYXJlbnROYW1lLCBhcHBseU1hcCk7CgogICAgICAgICAgICAgICAgICAgIC8vTm9ybWFsaXplZCBuYW1lIG1heSBiZSBhIHBsdWdpbiBJRCBkdWUgdG8gbWFwIGNvbmZpZwogICAgICAgICAgICAgICAgICAgIC8vYXBwbGljYXRpb24gaW4gbm9ybWFsaXplLiBUaGUgbWFwIGNvbmZpZyB2YWx1ZXMgbXVzdAogICAgICAgICAgICAgICAgICAgIC8vYWxyZWFkeSBiZSBub3JtYWxpemVkLCBzbyBkbyBub3QgbmVlZCB0byByZWRvIHRoYXQgcGFydC4KICAgICAgICAgICAgICAgICAgICBuYW1lUGFydHMgPSBzcGxpdFByZWZpeChub3JtYWxpemVkTmFtZSk7CiAgICAgICAgICAgICAgICAgICAgcHJlZml4ID0gbmFtZVBhcnRzWzBdOwogICAgICAgICAgICAgICAgICAgIG5vcm1hbGl6ZWROYW1lID0gbmFtZVBhcnRzWzFdOwogICAgICAgICAgICAgICAgICAgIGlzTm9ybWFsaXplZCA9IHRydWU7CgogICAgICAgICAgICAgICAgICAgIHVybCA9IGNvbnRleHQubmFtZVRvVXJsKG5vcm1hbGl6ZWROYW1lKTsKICAgICAgICAgICAgICAgIH0KICAgICAgICAgICAgfQoKICAgICAgICAgICAgLy9JZiB0aGUgaWQgaXMgYSBwbHVnaW4gaWQgdGhhdCBjYW5ub3QgYmUgZGV0ZXJtaW5lZCBpZiBpdCBuZWVkcwogICAgICAgICAgICAvL25vcm1hbGl6YXRpb24sIHN0YW1wIGl0IHdpdGggYSB1bmlxdWUgSUQgc28gdHdvIG1hdGNoaW5nIHJlbGF0aXZlCiAgICAgICAgICAgIC8vaWRzIHRoYXQgbWF5IGNvbmZsaWN0IGNhbiBiZSBzZXBhcmF0ZS4KICAgICAgICAgICAgc3VmZml4ID0gcHJlZml4ICYmICFwbHVnaW5Nb2R1bGUgJiYgIWlzTm9ybWFsaXplZCA/CiAgICAgICAgICAgICAgICAgICAgICdfdW5ub3JtYWxpemVkJyArICh1bm5vcm1hbGl6ZWRDb3VudGVyICs9IDEpIDoKICAgICAgICAgICAgICAgICAgICAgJyc7CgogICAgICAgICAgICByZXR1cm4gewogICAgICAgICAgICAgICAgcHJlZml4OiBwcmVmaXgsCiAgICAgICAgICAgICAgICBuYW1lOiBub3JtYWxpemVkTmFtZSwKICAgICAgICAgICAgICAgIHBhcmVudE1hcDogcGFyZW50TW9kdWxlTWFwLAogICAgICAgICAgICAgICAgdW5ub3JtYWxpemVkOiAhIXN1ZmZpeCwKICAgICAgICAgICAgICAgIHVybDogdXJsLAogICAgICAgICAgICAgICAgb3JpZ2luYWxOYW1lOiBvcmlnaW5hbE5hbWUsCiAgICAgICAgICAgICAgICBpc0RlZmluZTogaXNEZWZpbmUsCiAgICAgICAgICAgICAgICBpZDogKHByZWZpeCA/CiAgICAgICAgICAgICAgICAgICAgICAgIHByZWZpeCArICchJyArIG5vcm1hbGl6ZWROYW1lIDoKICAgICAgICAgICAgICAgICAgICAgICAgbm9ybWFsaXplZE5hbWUpICsgc3VmZml4CiAgICAgICAgICAgIH07CiAgICAgICAgfQoKICAgICAgICBmdW5jdGlvbiBnZXRNb2R1bGUoZGVwTWFwKSB7CiAgICAgICAgICAgIHZhciBpZCA9IGRlcE1hcC5pZCwKICAgICAgICAgICAgICAgIG1vZCA9IGdldE93bihyZWdpc3RyeSwgaWQpOwoKICAgICAgICAgICAgaWYgKCFtb2QpIHsKICAgICAgICAgICAgICAgIG1vZCA9IHJlZ2lzdHJ5W2lkXSA9IG5ldyBjb250ZXh0Lk1vZHVsZShkZXBNYXApOwogICAgICAgICAgICB9CgogICAgICAgICAgICByZXR1cm4gbW9kOwogICAgICAgIH0KCiAgICAgICAgZnVuY3Rpb24gb24oZGVwTWFwLCBuYW1lLCBmbikgewogICAgICAgICAgICB2YXIgaWQgPSBkZXBNYXAuaWQsCiAgICAgICAgICAgICAgICBtb2QgPSBnZXRPd24ocmVnaXN0cnksIGlkKTsKCiAgICAgICAgICAgIGlmIChoYXNQcm9wKGRlZmluZWQsIGlkKSAmJgogICAgICAgICAgICAgICAgICAgICghbW9kIHx8IG1vZC5kZWZpbmVFbWl0Q29tcGxldGUpKSB7CiAgICAgICAgICAgICAgICBpZiAobmFtZSA9PT0gJ2RlZmluZWQnKSB7CiAgICAgICAgICAgICAgICAgICAgZm4oZGVmaW5lZFtpZF0pOwogICAgICAgICAgICAgICAgfQogICAgICAgICAgICB9IGVsc2UgewogICAgICAgICAgICAgICAgbW9kID0gZ2V0TW9kdWxlKGRlcE1hcCk7CiAgICAgICAgICAgICAgICBpZiAobW9kLmVycm9yICYmIG5hbWUgPT09ICdlcnJvcicpIHsKICAgICAgICAgICAgICAgICAgICBmbihtb2QuZXJyb3IpOwogICAgICAgICAgICAgICAgfSBlbHNlIHsKICAgICAgICAgICAgICAgICAgICBtb2Qub24obmFtZSwgZm4pOwogICAgICAgICAgICAgICAgfQogICAgICAgICAgICB9CiAgICAgICAgfQoKICAgICAgICBmdW5jdGlvbiBvbkVycm9yKGVyciwgZXJyYmFjaykgewogICAgICAgICAgICB2YXIgaWRzID0gZXJyLnJlcXVpcmVNb2R1bGVzLAogICAgICAgICAgICAgICAgbm90aWZpZWQgPSBmYWxzZTsKCiAgICAgICAgICAgIGlmIChlcnJiYWNrKSB7CiAgICAgICAgICAgICAgICBlcnJiYWNrKGVycik7CiAgICAgICAgICAgIH0gZWxzZSB7CiAgICAgICAgICAgICAgICBlYWNoKGlkcywgZnVuY3Rpb24gKGlkKSB7CiAgICAgICAgICAgICAgICAgICAgdmFyIG1vZCA9IGdldE93bihyZWdpc3RyeSwgaWQpOwogICAgICAgICAgICAgICAgICAgIGlmIChtb2QpIHsKICAgICAgICAgICAgICAgICAgICAgICAgLy9TZXQgZXJyb3Igb24gbW9kdWxlLCBzbyBpdCBza2lwcyB0aW1lb3V0IGNoZWNrcy4KICAgICAgICAgICAgICAgICAgICAgICAgbW9kLmVycm9yID0gZXJyOwogICAgICAgICAgICAgICAgICAgICAgICBpZiAobW9kLmV2ZW50cy5lcnJvcikgewogICAgICAgICAgICAgICAgICAgICAgICAgICAgbm90aWZpZWQgPSB0cnVlOwogICAgICAgICAgICAgICAgICAgICAgICAgICAgbW9kLmVtaXQoJ2Vycm9yJywgZXJyKTsKICAgICAgICAgICAgICAgICAgICAgICAgfQogICAgICAgICAgICAgICAgICAgIH0KICAgICAgICAgICAgICAgIH0pOwoKICAgICAgICAgICAgICAgIGlmICghbm90aWZpZWQpIHsKICAgICAgICAgICAgICAgICAgICByZXEub25FcnJvcihlcnIpOwogICAgICAgICAgICAgICAgfQogICAgICAgICAgICB9CiAgICAgICAgfQoKICAgICAgICAvKioKICAgICAgICAgKiBJbnRlcm5hbCBtZXRob2QgdG8gdHJhbnNmZXIgZ2xvYmFsUXVldWUgaXRlbXMgdG8gdGhpcyBjb250ZXh0J3MKICAgICAgICAgKiBkZWZRdWV1ZS4KICAgICAgICAgKi8KICAgICAgICBmdW5jdGlvbiB0YWtlR2xvYmFsUXVldWUoKSB7CiAgICAgICAgICAgIC8vUHVzaCBhbGwgdGhlIGdsb2JhbERlZlF1ZXVlIGl0ZW1zIGludG8gdGhlIGNvbnRleHQncyBkZWZRdWV1ZQogICAgICAgICAgICBpZiAoZ2xvYmFsRGVmUXVldWUubGVuZ3RoKSB7CiAgICAgICAgICAgICAgICBlYWNoKGdsb2JhbERlZlF1ZXVlLCBmdW5jdGlvbihxdWV1ZUl0ZW0pIHsKICAgICAgICAgICAgICAgICAgICB2YXIgaWQgPSBxdWV1ZUl0ZW1bMF07CiAgICAgICAgICAgICAgICAgICAgaWYgKHR5cGVvZiBpZCA9PT0gJ3N0cmluZycpIHsKICAgICAgICAgICAgICAgICAgICAgICAgY29udGV4dC5kZWZRdWV1ZU1hcFtpZF0gPSB0cnVlOwogICAgICAgICAgICAgICAgICAgIH0KICAgICAgICAgICAgICAgICAgICBkZWZRdWV1ZS5wdXNoKHF1ZXVlSXRlbSk7CiAgICAgICAgICAgICAgICB9KTsKICAgICAgICAgICAgICAgIGdsb2JhbERlZlF1ZXVlID0gW107CiAgICAgICAgICAgIH0KICAgICAgICB9CgogICAgICAgIGhhbmRsZXJzID0gewogICAgICAgICAgICAncmVxdWlyZSc6IGZ1bmN0aW9uIChtb2QpIHsKICAgICAgICAgICAgICAgIGlmIChtb2QucmVxdWlyZSkgewogICAgICAgICAgICAgICAgICAgIHJldHVybiBtb2QucmVxdWlyZTsKICAgICAgICAgICAgICAgIH0gZWxzZSB7CiAgICAgICAgICAgICAgICAgICAgcmV0dXJuIChtb2QucmVxdWlyZSA9IGNvbnRleHQubWFrZVJlcXVpcmUobW9kLm1hcCkpOwogICAgICAgICAgICAgICAgfQogICAgICAgICAgICB9LAogICAgICAgICAgICAnZXhwb3J0cyc6IGZ1bmN0aW9uIChtb2QpIHsKICAgICAgICAgICAgICAgIG1vZC51c2luZ0V4cG9ydHMgPSB0cnVlOwogICAgICAgICAgICAgICAgaWYgKG1vZC5tYXAuaXNEZWZpbmUpIHsKICAgICAgICAgICAgICAgICAgICBpZiAobW9kLmV4cG9ydHMpIHsKICAgICAgICAgICAgICAgICAgICAgICAgcmV0dXJuIChkZWZpbmVkW21vZC5tYXAuaWRdID0gbW9kLmV4cG9ydHMpOwogICAgICAgICAgICAgICAgICAgIH0gZWxzZSB7CiAgICAgICAgICAgICAgICAgICAgICAgIHJldHVybiAobW9kLmV4cG9ydHMgPSBkZWZpbmVkW21vZC5tYXAuaWRdID0ge30pOwogICAgICAgICAgICAgICAgICAgIH0KICAgICAgICAgICAgICAgIH0KICAgICAgICAgICAgfSwKICAgICAgICAgICAgJ21vZHVsZSc6IGZ1bmN0aW9uIChtb2QpIHsKICAgICAgICAgICAgICAgIGlmIChtb2QubW9kdWxlKSB7CiAgICAgICAgICAgICAgICAgICAgcmV0dXJuIG1vZC5tb2R1bGU7CiAgICAgICAgICAgICAgICB9IGVsc2UgewogICAgICAgICAgICAgICAgICAgIHJldHVybiAobW9kLm1vZHVsZSA9IHsKICAgICAgICAgICAgICAgICAgICAgICAgaWQ6IG1vZC5tYXAuaWQsCiAgICAgICAgICAgICAgICAgICAgICAgIHVyaTogbW9kLm1hcC51cmwsCiAgICAgICAgICAgICAgICAgICAgICAgIGNvbmZpZzogZnVuY3Rpb24gKCkgewogICAgICAgICAgICAgICAgICAgICAgICAgICAgcmV0dXJuIGdldE93bihjb25maWcuY29uZmlnLCBtb2QubWFwLmlkKSB8fCB7fTsKICAgICAgICAgICAgICAgICAgICAgICAgfSwKICAgICAgICAgICAgICAgICAgICAgICAgZXhwb3J0czogbW9kLmV4cG9ydHMgfHwgKG1vZC5leHBvcnRzID0ge30pCiAgICAgICAgICAgICAgICAgICAgfSk7CiAgICAgICAgICAgICAgICB9CiAgICAgICAgICAgIH0KICAgICAgICB9OwoKICAgICAgICBmdW5jdGlvbiBjbGVhblJlZ2lzdHJ5KGlkKSB7CiAgICAgICAgICAgIC8vQ2xlYW4gdXAgbWFjaGluZXJ5IHVzZWQgZm9yIHdhaXRpbmcgbW9kdWxlcy4KICAgICAgICAgICAgZGVsZXRlIHJlZ2lzdHJ5W2lkXTsKICAgICAgICAgICAgZGVsZXRlIGVuYWJsZWRSZWdpc3RyeVtpZF07CiAgICAgICAgfQoKICAgICAgICBmdW5jdGlvbiBicmVha0N5Y2xlKG1vZCwgdHJhY2VkLCBwcm9jZXNzZWQpIHsKICAgICAgICAgICAgdmFyIGlkID0gbW9kLm1hcC5pZDsKCiAgICAgICAgICAgIGlmIChtb2QuZXJyb3IpIHsKICAgICAgICAgICAgICAgIG1vZC5lbWl0KCdlcnJvcicsIG1vZC5lcnJvcik7CiAgICAgICAgICAgIH0gZWxzZSB7CiAgICAgICAgICAgICAgICB0cmFjZWRbaWRdID0gdHJ1ZTsKICAgICAgICAgICAgICAgIGVhY2gobW9kLmRlcE1hcHMsIGZ1bmN0aW9uIChkZXBNYXAsIGkpIHsKICAgICAgICAgICAgICAgICAgICB2YXIgZGVwSWQgPSBkZXBNYXAuaWQsCiAgICAgICAgICAgICAgICAgICAgICAgIGRlcCA9IGdldE93bihyZWdpc3RyeSwgZGVwSWQpOwoKICAgICAgICAgICAgICAgICAgICAvL09ubHkgZm9yY2UgdGhpbmdzIHRoYXQgaGF2ZSBub3QgY29tcGxldGVkCiAgICAgICAgICAgICAgICAgICAgLy9iZWluZyBkZWZpbmVkLCBzbyBzdGlsbCBpbiB0aGUgcmVnaXN0cnksCiAgICAgICAgICAgICAgICAgICAgLy9hbmQgb25seSBpZiBpdCBoYXMgbm90IGJlZW4gbWF0Y2hlZCB1cAogICAgICAgICAgICAgICAgICAgIC8vaW4gdGhlIG1vZHVsZSBhbHJlYWR5LgogICAgICAgICAgICAgICAgICAgIGlmIChkZXAgJiYgIW1vZC5kZXBNYXRjaGVkW2ldICYmICFwcm9jZXNzZWRbZGVwSWRdKSB7CiAgICAgICAgICAgICAgICAgICAgICAgIGlmIChnZXRPd24odHJhY2VkLCBkZXBJZCkpIHsKICAgICAgICAgICAgICAgICAgICAgICAgICAgIG1vZC5kZWZpbmVEZXAoaSwgZGVmaW5lZFtkZXBJZF0pOwogICAgICAgICAgICAgICAgICAgICAgICAgICAgbW9kLmNoZWNrKCk7IC8vcGFzcyBmYWxzZT8KICAgICAgICAgICAgICAgICAgICAgICAgfSBlbHNlIHsKICAgICAgICAgICAgICAgICAgICAgICAgICAgIGJyZWFrQ3ljbGUoZGVwLCB0cmFjZWQsIHByb2Nlc3NlZCk7CiAgICAgICAgICAgICAgICAgICAgICAgIH0KICAgICAgICAgICAgICAgICAgICB9CiAgICAgICAgICAgICAgICB9KTsKICAgICAgICAgICAgICAgIHByb2Nlc3NlZFtpZF0gPSB0cnVlOwogICAgICAgICAgICB9CiAgICAgICAgfQoKICAgICAgICBmdW5jdGlvbiBjaGVja0xvYWRlZCgpIHsKICAgICAgICAgICAgdmFyIGVyciwgdXNpbmdQYXRoRmFsbGJhY2ssCiAgICAgICAgICAgICAgICB3YWl0SW50ZXJ2YWwgPSBjb25maWcud2FpdFNlY29uZHMgKiAxMDAwLAogICAgICAgICAgICAgICAgLy9JdCBpcyBwb3NzaWJsZSB0byBkaXNhYmxlIHRoZSB3YWl0IGludGVydmFsIGJ5IHVzaW5nIHdhaXRTZWNvbmRzIG9mIDAuCiAgICAgICAgICAgICAgICBleHBpcmVkID0gd2FpdEludGVydmFsICYmIChjb250ZXh0LnN0YXJ0VGltZSArIHdhaXRJbnRlcnZhbCkgPCBuZXcgRGF0ZSgpLmdldFRpbWUoKSwKICAgICAgICAgICAgICAgIG5vTG9hZHMgPSBbXSwKICAgICAgICAgICAgICAgIHJlcUNhbGxzID0gW10sCiAgICAgICAgICAgICAgICBzdGlsbExvYWRpbmcgPSBmYWxzZSwKICAgICAgICAgICAgICAgIG5lZWRDeWNsZUNoZWNrID0gdHJ1ZTsKCiAgICAgICAgICAgIC8vRG8gbm90IGJvdGhlciBpZiB0aGlzIGNhbGwgd2FzIGEgcmVzdWx0IG9mIGEgY3ljbGUgYnJlYWsuCiAgICAgICAgICAgIGlmIChpbkNoZWNrTG9hZGVkKSB7CiAgICAgICAgICAgICAgICByZXR1cm47CiAgICAgICAgICAgIH0KCiAgICAgICAgICAgIGluQ2hlY2tMb2FkZWQgPSB0cnVlOwoKICAgICAgICAgICAgLy9GaWd1cmUgb3V0IHRoZSBzdGF0ZSBvZiBhbGwgdGhlIG1vZHVsZXMuCiAgICAgICAgICAgIGVhY2hQcm9wKGVuYWJsZWRSZWdpc3RyeSwgZnVuY3Rpb24gKG1vZCkgewogICAgICAgICAgICAgICAgdmFyIG1hcCA9IG1vZC5tYXAsCiAgICAgICAgICAgICAgICAgICAgbW9kSWQgPSBtYXAuaWQ7CgogICAgICAgICAgICAgICAgLy9Ta2lwIHRoaW5ncyB0aGF0IGFyZSBub3QgZW5hYmxlZCBvciBpbiBlcnJvciBzdGF0ZS4KICAgICAgICAgICAgICAgIGlmICghbW9kLmVuYWJsZWQpIHsKICAgICAgICAgICAgICAgICAgICByZXR1cm47CiAgICAgICAgICAgICAgICB9CgogICAgICAgICAgICAgICAgaWYgKCFtYXAuaXNEZWZpbmUpIHsKICAgICAgICAgICAgICAgICAgICByZXFDYWxscy5wdXNoKG1vZCk7CiAgICAgICAgICAgICAgICB9CgogICAgICAgICAgICAgICAgaWYgKCFtb2QuZXJyb3IpIHsKICAgICAgICAgICAgICAgICAgICAvL0lmIHRoZSBtb2R1bGUgc2hvdWxkIGJlIGV4ZWN1dGVkLCBhbmQgaXQgaGFzIG5vdAogICAgICAgICAgICAgICAgICAgIC8vYmVlbiBpbml0ZWQgYW5kIHRpbWUgaXMgdXAsIHJlbWVtYmVyIGl0LgogICAgICAgICAgICAgICAgICAgIGlmICghbW9kLmluaXRlZCAmJiBleHBpcmVkKSB7CiAgICAgICAgICAgICAgICAgICAgICAgIGlmIChoYXNQYXRoRmFsbGJhY2sobW9kSWQpKSB7CiAgICAgICAgICAgICAgICAgICAgICAgICAgICB1c2luZ1BhdGhGYWxsYmFjayA9IHRydWU7CiAgICAgICAgICAgICAgICAgICAgICAgICAgICBzdGlsbExvYWRpbmcgPSB0cnVlOwogICAgICAgICAgICAgICAgICAgICAgICB9IGVsc2UgewogICAgICAgICAgICAgICAgICAgICAgICAgICAgbm9Mb2Fkcy5wdXNoKG1vZElkKTsKICAgICAgICAgICAgICAgICAgICAgICAgICAgIHJlbW92ZVNjcmlwdChtb2RJZCk7CiAgICAgICAgICAgICAgICAgICAgICAgIH0KICAgICAgICAgICAgICAgICAgICB9IGVsc2UgaWYgKCFtb2QuaW5pdGVkICYmIG1vZC5mZXRjaGVkICYmIG1hcC5pc0RlZmluZSkgewogICAgICAgICAgICAgICAgICAgICAgICBzdGlsbExvYWRpbmcgPSB0cnVlOwogICAgICAgICAgICAgICAgICAgICAgICBpZiAoIW1hcC5wcmVmaXgpIHsKICAgICAgICAgICAgICAgICAgICAgICAgICAgIC8vTm8gcmVhc29uIHRvIGtlZXAgbG9va2luZyBmb3IgdW5maW5pc2hlZAogICAgICAgICAgICAgICAgICAgICAgICAgICAgLy9sb2FkaW5nLiBJZiB0aGUgb25seSBzdGlsbExvYWRpbmcgaXMgYQogICAgICAgICAgICAgICAgICAgICAgICAgICAgLy9wbHVnaW4gcmVzb3VyY2UgdGhvdWdoLCBrZWVwIGdvaW5nLAogICAgICAgICAgICAgICAgICAgICAgICAgICAgLy9iZWNhdXNlIGl0IG1heSBiZSB0aGF0IGEgcGx1Z2luIHJlc291cmNlCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAvL2lzIHdhaXRpbmcgb24gYSBub24tcGx1Z2luIGN5Y2xlLgogICAgICAgICAgICAgICAgICAgICAgICAgICAgcmV0dXJuIChuZWVkQ3ljbGVDaGVjayA9IGZhbHNlKTsKICAgICAgICAgICAgICAgICAgICAgICAgfQogICAgICAgICAgICAgICAgICAgIH0KICAgICAgICAgICAgICAgIH0KICAgICAgICAgICAgfSk7CgogICAgICAgICAgICBpZiAoZXhwaXJlZCAmJiBub0xvYWRzLmxlbmd0aCkgewogICAgICAgICAgICAgICAgLy9JZiB3YWl0IHRpbWUgZXhwaXJlZCwgdGhyb3cgZXJyb3Igb2YgdW5sb2FkZWQgbW9kdWxlcy4KICAgICAgICAgICAgICAgIGVyciA9IG1ha2VFcnJvcigndGltZW91dCcsICdMb2FkIHRpbWVvdXQgZm9yIG1vZHVsZXM6ICcgKyBub0xvYWRzLCBudWxsLCBub0xvYWRzKTsKICAgICAgICAgICAgICAgIGVyci5jb250ZXh0TmFtZSA9IGNvbnRleHQuY29udGV4dE5hbWU7CiAgICAgICAgICAgICAgICByZXR1cm4gb25FcnJvcihlcnIpOwogICAgICAgICAgICB9CgogICAgICAgICAgICAvL05vdCBleHBpcmVkLCBjaGVjayBmb3IgYSBjeWNsZS4KICAgICAgICAgICAgaWYgKG5lZWRDeWNsZUNoZWNrKSB7CiAgICAgICAgICAgICAgICBlYWNoKHJlcUNhbGxzLCBmdW5jdGlvbiAobW9kKSB7CiAgICAgICAgICAgICAgICAgICAgYnJlYWtDeWNsZShtb2QsIHt9LCB7fSk7CiAgICAgICAgICAgICAgICB9KTsKICAgICAgICAgICAgfQoKICAgICAgICAgICAgLy9JZiBzdGlsbCB3YWl0aW5nIG9uIGxvYWRzLCBhbmQgdGhlIHdhaXRpbmcgbG9hZCBpcyBzb21ldGhpbmcKICAgICAgICAgICAgLy9vdGhlciB0aGFuIGEgcGx1Z2luIHJlc291cmNlLCBvciB0aGVyZSBhcmUgc3RpbGwgb3V0c3RhbmRpbmcKICAgICAgICAgICAgLy9zY3JpcHRzLCB0aGVuIGp1c3QgdHJ5IGJhY2sgbGF0ZXIuCiAgICAgICAgICAgIGlmICgoIWV4cGlyZWQgfHwgdXNpbmdQYXRoRmFsbGJhY2spICYmIHN0aWxsTG9hZGluZykgewogICAgICAgICAgICAgICAgLy9Tb21ldGhpbmcgaXMgc3RpbGwgd2FpdGluZyB0byBsb2FkLiBXYWl0IGZvciBpdCwgYnV0IG9ubHkKICAgICAgICAgICAgICAgIC8vaWYgYSB0aW1lb3V0IGlzIG5vdCBhbHJlYWR5IGluIGVmZmVjdC4KICAgICAgICAgICAgICAgIGlmICgoaXNCcm93c2VyIHx8IGlzV2ViV29ya2VyKSAmJiAhY2hlY2tMb2FkZWRUaW1lb3V0SWQpIHsKICAgICAgICAgICAgICAgICAgICBjaGVja0xvYWRlZFRpbWVvdXRJZCA9IHNldFRpbWVvdXQoZnVuY3Rpb24gKCkgewogICAgICAgICAgICAgICAgICAgICAgICBjaGVja0xvYWRlZFRpbWVvdXRJZCA9IDA7CiAgICAgICAgICAgICAgICAgICAgICAgIGNoZWNrTG9hZGVkKCk7CiAgICAgICAgICAgICAgICAgICAgfSwgNTApOwogICAgICAgICAgICAgICAgfQogICAgICAgICAgICB9CgogICAgICAgICAgICBpbkNoZWNrTG9hZGVkID0gZmFsc2U7CiAgICAgICAgfQoKICAgICAgICBNb2R1bGUgPSBmdW5jdGlvbiAobWFwKSB7CiAgICAgICAgICAgIHRoaXMuZXZlbnRzID0gZ2V0T3duKHVuZGVmRXZlbnRzLCBtYXAuaWQpIHx8IHt9OwogICAgICAgICAgICB0aGlzLm1hcCA9IG1hcDsKICAgICAgICAgICAgdGhpcy5zaGltID0gZ2V0T3duKGNvbmZpZy5zaGltLCBtYXAuaWQpOwogICAgICAgICAgICB0aGlzLmRlcEV4cG9ydHMgPSBbXTsKICAgICAgICAgICAgdGhpcy5kZXBNYXBzID0gW107CiAgICAgICAgICAgIHRoaXMuZGVwTWF0Y2hlZCA9IFtdOwogICAgICAgICAgICB0aGlzLnBsdWdpbk1hcHMgPSB7fTsKICAgICAgICAgICAgdGhpcy5kZXBDb3VudCA9IDA7CgogICAgICAgICAgICAvKiB0aGlzLmV4cG9ydHMgdGhpcy5mYWN0b3J5CiAgICAgICAgICAgICAgIHRoaXMuZGVwTWFwcyA9IFtdLAogICAgICAgICAgICAgICB0aGlzLmVuYWJsZWQsIHRoaXMuZmV0Y2hlZAogICAgICAgICAgICAqLwogICAgICAgIH07CgogICAgICAgIE1vZHVsZS5wcm90b3R5cGUgPSB7CiAgICAgICAgICAgIGluaXQ6IGZ1bmN0aW9uIChkZXBNYXBzLCBmYWN0b3J5LCBlcnJiYWNrLCBvcHRpb25zKSB7CiAgICAgICAgICAgICAgICBvcHRpb25zID0gb3B0aW9ucyB8fCB7fTsKCiAgICAgICAgICAgICAgICAvL0RvIG5vdCBkbyBtb3JlIGluaXRzIGlmIGFscmVhZHkgZG9uZS4gQ2FuIGhhcHBlbiBpZiB0aGVyZQogICAgICAgICAgICAgICAgLy9hcmUgbXVsdGlwbGUgZGVmaW5lIGNhbGxzIGZvciB0aGUgc2FtZSBtb2R1bGUuIFRoYXQgaXMgbm90CiAgICAgICAgICAgICAgICAvL2Egbm9ybWFsLCBjb21tb24gY2FzZSwgYnV0IGl0IGlzIGFsc28gbm90IHVuZXhwZWN0ZWQuCiAgICAgICAgICAgICAgICBpZiAodGhpcy5pbml0ZWQpIHsKICAgICAgICAgICAgICAgICAgICByZXR1cm47CiAgICAgICAgICAgICAgICB9CgogICAgICAgICAgICAgICAgdGhpcy5mYWN0b3J5ID0gZmFjdG9yeTsKCiAgICAgICAgICAgICAgICBpZiAoZXJyYmFjaykgewogICAgICAgICAgICAgICAgICAgIC8vUmVnaXN0ZXIgZm9yIGVycm9ycyBvbiB0aGlzIG1vZHVsZS4KICAgICAgICAgICAgICAgICAgICB0aGlzLm9uKCdlcnJvcicsIGVycmJhY2spOwogICAgICAgICAgICAgICAgfSBlbHNlIGlmICh0aGlzLmV2ZW50cy5lcnJvcikgewogICAgICAgICAgICAgICAgICAgIC8vSWYgbm8gZXJyYmFjayBhbHJlYWR5LCBidXQgdGhlcmUgYXJlIGVycm9yIGxpc3RlbmVycwogICAgICAgICAgICAgICAgICAgIC8vb24gdGhpcyBtb2R1bGUsIHNldCB1cCBhbiBlcnJiYWNrIHRvIHBhc3MgdG8gdGhlIGRlcHMuCiAgICAgICAgICAgICAgICAgICAgZXJyYmFjayA9IGJpbmQodGhpcywgZnVuY3Rpb24gKGVycikgewogICAgICAgICAgICAgICAgICAgICAgICB0aGlzLmVtaXQoJ2Vycm9yJywgZXJyKTsKICAgICAgICAgICAgICAgICAgICB9KTsKICAgICAgICAgICAgICAgIH0KCiAgICAgICAgICAgICAgICAvL0RvIGEgY29weSBvZiB0aGUgZGVwZW5kZW5jeSBhcnJheSwgc28gdGhhdAogICAgICAgICAgICAgICAgLy9zb3VyY2UgaW5wdXRzIGFyZSBub3QgbW9kaWZpZWQuIEZvciBleGFtcGxlCiAgICAgICAgICAgICAgICAvLyJzaGltIiBkZXBzIGFyZSBwYXNzZWQgaW4gaGVyZSBkaXJlY3RseSwgYW5kCiAgICAgICAgICAgICAgICAvL2RvaW5nIGEgZGlyZWN0IG1vZGlmaWNhdGlvbiBvZiB0aGUgZGVwTWFwcyBhcnJheQogICAgICAgICAgICAgICAgLy93b3VsZCBhZmZlY3QgdGhhdCBjb25maWcuCiAgICAgICAgICAgICAgICB0aGlzLmRlcE1hcHMgPSBkZXBNYXBzICYmIGRlcE1hcHMuc2xpY2UoMCk7CgogICAgICAgICAgICAgICAgdGhpcy5lcnJiYWNrID0gZXJyYmFjazsKCiAgICAgICAgICAgICAgICAvL0luZGljYXRlIHRoaXMgbW9kdWxlIGhhcyBiZSBpbml0aWFsaXplZAogICAgICAgICAgICAgICAgdGhpcy5pbml0ZWQgPSB0cnVlOwoKICAgICAgICAgICAgICAgIHRoaXMuaWdub3JlID0gb3B0aW9ucy5pZ25vcmU7CgogICAgICAgICAgICAgICAgLy9Db3VsZCBoYXZlIG9wdGlvbiB0byBpbml0IHRoaXMgbW9kdWxlIGluIGVuYWJsZWQgbW9kZSwKICAgICAgICAgICAgICAgIC8vb3IgY291bGQgaGF2ZSBiZWVuIHByZXZpb3VzbHkgbWFya2VkIGFzIGVuYWJsZWQuIEhvd2V2ZXIsCiAgICAgICAgICAgICAgICAvL3RoZSBkZXBlbmRlbmNpZXMgYXJlIG5vdCBrbm93biB1bnRpbCBpbml0IGlzIGNhbGxlZC4gU28KICAgICAgICAgICAgICAgIC8vaWYgZW5hYmxlZCBwcmV2aW91c2x5LCBub3cgdHJpZ2dlciBkZXBlbmRlbmNpZXMgYXMgZW5hYmxlZC4KICAgICAgICAgICAgICAgIGlmIChvcHRpb25zLmVuYWJsZWQgfHwgdGhpcy5lbmFibGVkKSB7CiAgICAgICAgICAgICAgICAgICAgLy9FbmFibGUgdGhpcyBtb2R1bGUgYW5kIGRlcGVuZGVuY2llcy4KICAgICAgICAgICAgICAgICAgICAvL1dpbGwgY2FsbCB0aGlzLmNoZWNrKCkKICAgICAgICAgICAgICAgICAgICB0aGlzLmVuYWJsZSgpOwogICAgICAgICAgICAgICAgfSBlbHNlIHsKICAgICAgICAgICAgICAgICAgICB0aGlzLmNoZWNrKCk7CiAgICAgICAgICAgICAgICB9CiAgICAgICAgICAgIH0sCgogICAgICAgICAgICBkZWZpbmVEZXA6IGZ1bmN0aW9uIChpLCBkZXBFeHBvcnRzKSB7CiAgICAgICAgICAgICAgICAvL0JlY2F1c2Ugb2YgY3ljbGVzLCBkZWZpbmVkIGNhbGxiYWNrIGZvciBhIGdpdmVuCiAgICAgICAgICAgICAgICAvL2V4cG9ydCBjYW4gYmUgY2FsbGVkIG1vcmUgdGhhbiBvbmNlLgogICAgICAgICAgICAgICAgaWYgKCF0aGlzLmRlcE1hdGNoZWRbaV0pIHsKICAgICAgICAgICAgICAgICAgICB0aGlzLmRlcE1hdGNoZWRbaV0gPSB0cnVlOwogICAgICAgICAgICAgICAgICAgIHRoaXMuZGVwQ291bnQgLT0gMTsKICAgICAgICAgICAgICAgICAgICB0aGlzLmRlcEV4cG9ydHNbaV0gPSBkZXBFeHBvcnRzOwogICAgICAgICAgICAgICAgfQogICAgICAgICAgICB9LAoKICAgICAgICAgICAgZmV0Y2g6IGZ1bmN0aW9uICgpIHsKICAgICAgICAgICAgICAgIGlmICh0aGlzLmZldGNoZWQpIHsKICAgICAgICAgICAgICAgICAgICByZXR1cm47CiAgICAgICAgICAgICAgICB9CiAgICAgICAgICAgICAgICB0aGlzLmZldGNoZWQgPSB0cnVlOwoKICAgICAgICAgICAgICAgIGNvbnRleHQuc3RhcnRUaW1lID0gKG5ldyBEYXRlKCkpLmdldFRpbWUoKTsKCiAgICAgICAgICAgICAgICB2YXIgbWFwID0gdGhpcy5tYXA7CgogICAgICAgICAgICAgICAgLy9JZiB0aGUgbWFuYWdlciBpcyBmb3IgYSBwbHVnaW4gbWFuYWdlZCByZXNvdXJjZSwKICAgICAgICAgICAgICAgIC8vYXNrIHRoZSBwbHVnaW4gdG8gbG9hZCBpdCBub3cuCiAgICAgICAgICAgICAgICBpZiAodGhpcy5zaGltKSB7CiAgICAgICAgICAgICAgICAgICAgY29udGV4dC5tYWtlUmVxdWlyZSh0aGlzLm1hcCwgewogICAgICAgICAgICAgICAgICAgICAgICBlbmFibGVCdWlsZENhbGxiYWNrOiB0cnVlCiAgICAgICAgICAgICAgICAgICAgfSkodGhpcy5zaGltLmRlcHMgfHwgW10sIGJpbmQodGhpcywgZnVuY3Rpb24gKCkgewogICAgICAgICAgICAgICAgICAgICAgICByZXR1cm4gbWFwLnByZWZpeCA/IHRoaXMuY2FsbFBsdWdpbigpIDogdGhpcy5sb2FkKCk7CiAgICAgICAgICAgICAgICAgICAgfSkpOwogICAgICAgICAgICAgICAgfSBlbHNlIHsKICAgICAgICAgICAgICAgICAgICAvL1JlZ3VsYXIgZGVwZW5kZW5jeS4KICAgICAgICAgICAgICAgICAgICByZXR1cm4gbWFwLnByZWZpeCA/IHRoaXMuY2FsbFBsdWdpbigpIDogdGhpcy5sb2FkKCk7CiAgICAgICAgICAgICAgICB9CiAgICAgICAgICAgIH0sCgogICAgICAgICAgICBsb2FkOiBmdW5jdGlvbiAoKSB7CiAgICAgICAgICAgICAgICB2YXIgdXJsID0gdGhpcy5tYXAudXJsOwoKICAgICAgICAgICAgICAgIC8vUmVndWxhciBkZXBlbmRlbmN5LgogICAgICAgICAgICAgICAgaWYgKCF1cmxGZXRjaGVkW3VybF0pIHsKICAgICAgICAgICAgICAgICAgICB1cmxGZXRjaGVkW3VybF0gPSB0cnVlOwogICAgICAgICAgICAgICAgICAgIGNvbnRleHQubG9hZCh0aGlzLm1hcC5pZCwgdXJsKTsKICAgICAgICAgICAgICAgIH0KICAgICAgICAgICAgfSwKCiAgICAgICAgICAgIC8qKgogICAgICAgICAgICAgKiBDaGVja3MgaWYgdGhlIG1vZHVsZSBpcyByZWFkeSB0byBkZWZpbmUgaXRzZWxmLCBhbmQgaWYgc28sCiAgICAgICAgICAgICAqIGRlZmluZSBpdC4KICAgICAgICAgICAgICovCiAgICAgICAgICAgIGNoZWNrOiBmdW5jdGlvbiAoKSB7CiAgICAgICAgICAgICAgICBpZiAoIXRoaXMuZW5hYmxlZCB8fCB0aGlzLmVuYWJsaW5nKSB7CiAgICAgICAgICAgICAgICAgICAgcmV0dXJuOwogICAgICAgICAgICAgICAgfQoKICAgICAgICAgICAgICAgIHZhciBlcnIsIGNqc01vZHVsZSwKICAgICAgICAgICAgICAgICAgICBpZCA9IHRoaXMubWFwLmlkLAogICAgICAgICAgICAgICAgICAgIGRlcEV4cG9ydHMgPSB0aGlzLmRlcEV4cG9ydHMsCiAgICAgICAgICAgICAgICAgICAgZXhwb3J0cyA9IHRoaXMuZXhwb3J0cywKICAgICAgICAgICAgICAgICAgICBmYWN0b3J5ID0gdGhpcy5mYWN0b3J5OwoKICAgICAgICAgICAgICAgIGlmICghdGhpcy5pbml0ZWQpIHsKICAgICAgICAgICAgICAgICAgICAvLyBPbmx5IGZldGNoIGlmIG5vdCBhbHJlYWR5IGluIHRoZSBkZWZRdWV1ZS4KICAgICAgICAgICAgICAgICAgICBpZiAoIWhhc1Byb3AoY29udGV4dC5kZWZRdWV1ZU1hcCwgaWQpKSB7CiAgICAgICAgICAgICAgICAgICAgICAgIHRoaXMuZmV0Y2goKTsKICAgICAgICAgICAgICAgICAgICB9CiAgICAgICAgICAgICAgICB9IGVsc2UgaWYgKHRoaXMuZXJyb3IpIHsKICAgICAgICAgICAgICAgICAgICB0aGlzLmVtaXQoJ2Vycm9yJywgdGhpcy5lcnJvcik7CiAgICAgICAgICAgICAgICB9IGVsc2UgaWYgKCF0aGlzLmRlZmluaW5nKSB7CiAgICAgICAgICAgICAgICAgICAgLy9UaGUgZmFjdG9yeSBjb3VsZCB0cmlnZ2VyIGFub3RoZXIgcmVxdWlyZSBjYWxsCiAgICAgICAgICAgICAgICAgICAgLy90aGF0IHdvdWxkIHJlc3VsdCBpbiBjaGVja2luZyB0aGlzIG1vZHVsZSB0bwogICAgICAgICAgICAgICAgICAgIC8vZGVmaW5lIGl0c2VsZiBhZ2Fpbi4gSWYgYWxyZWFkeSBpbiB0aGUgcHJvY2VzcwogICAgICAgICAgICAgICAgICAgIC8vb2YgZG9pbmcgdGhhdCwgc2tpcCB0aGlzIHdvcmsuCiAgICAgICAgICAgICAgICAgICAgdGhpcy5kZWZpbmluZyA9IHRydWU7CgogICAgICAgICAgICAgICAgICAgIGlmICh0aGlzLmRlcENvdW50IDwgMSAmJiAhdGhpcy5kZWZpbmVkKSB7CiAgICAgICAgICAgICAgICAgICAgICAgIGlmIChpc0Z1bmN0aW9uKGZhY3RvcnkpKSB7CiAgICAgICAgICAgICAgICAgICAgICAgICAgICB0cnkgewogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIGV4cG9ydHMgPSBjb250ZXh0LmV4ZWNDYihpZCwgZmFjdG9yeSwgZGVwRXhwb3J0cywgZXhwb3J0cyk7CiAgICAgICAgICAgICAgICAgICAgICAgICAgICB9IGNhdGNoIChlKSB7CiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgZXJyID0gZTsKICAgICAgICAgICAgICAgICAgICAgICAgICAgIH0KCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAvLyBGYXZvciByZXR1cm4gdmFsdWUgb3ZlciBleHBvcnRzLiBJZiBub2RlL2NqcyBpbiBwbGF5LAogICAgICAgICAgICAgICAgICAgICAgICAgICAgLy8gdGhlbiB3aWxsIG5vdCBoYXZlIGEgcmV0dXJuIHZhbHVlIGFueXdheS4gRmF2b3IKICAgICAgICAgICAgICAgICAgICAgICAgICAgIC8vIG1vZHVsZS5leHBvcnRzIGFzc2lnbm1lbnQgb3ZlciBleHBvcnRzIG9iamVjdC4KICAgICAgICAgICAgICAgICAgICAgICAgICAgIGlmICh0aGlzLm1hcC5pc0RlZmluZSAmJiBleHBvcnRzID09PSB1bmRlZmluZWQpIHsKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBjanNNb2R1bGUgPSB0aGlzLm1vZHVsZTsKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBpZiAoY2pzTW9kdWxlKSB7CiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIGV4cG9ydHMgPSBjanNNb2R1bGUuZXhwb3J0czsKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICB9IGVsc2UgaWYgKHRoaXMudXNpbmdFeHBvcnRzKSB7CiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIC8vZXhwb3J0cyBhbHJlYWR5IHNldCB0aGUgZGVmaW5lZCB2YWx1ZS4KICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgZXhwb3J0cyA9IHRoaXMuZXhwb3J0czsKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICB9CiAgICAgICAgICAgICAgICAgICAgICAgICAgICB9CgogICAgICAgICAgICAgICAgICAgICAgICAgICAgaWYgKGVycikgewogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIC8vIElmIHRoZXJlIGlzIGFuIGVycm9yIGxpc3RlbmVyLCBmYXZvciBwYXNzaW5nCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgLy8gdG8gdGhhdCBpbnN0ZWFkIG9mIHRocm93aW5nIGFuIGVycm9yLiBIb3dldmVyLAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIC8vIG9ubHkgZG8gaXQgZm9yIGRlZmluZSgpJ2QgIG1vZHVsZXMuIHJlcXVpcmUKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAvLyBlcnJiYWNrcyBzaG91bGQgbm90IGJlIGNhbGxlZCBmb3IgZmFpbHVyZXMgaW4KICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAvLyB0aGVpciBjYWxsYmFja3MgKCM2OTkpLiBIb3dldmVyIGlmIGEgZ2xvYmFsCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgLy8gb25FcnJvciBpcyBzZXQsIHVzZSB0aGF0LgogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIGlmICgodGhpcy5ldmVudHMuZXJyb3IgJiYgdGhpcy5tYXAuaXNEZWZpbmUpIHx8CiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIHJlcS5vbkVycm9yICE9PSBkZWZhdWx0T25FcnJvcikgewogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBlcnIucmVxdWlyZU1hcCA9IHRoaXMubWFwOwogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBlcnIucmVxdWlyZU1vZHVsZXMgPSB0aGlzLm1hcC5pc0RlZmluZSA/IFt0aGlzLm1hcC5pZF0gOiBudWxsOwogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBlcnIucmVxdWlyZVR5cGUgPSB0aGlzLm1hcC5pc0RlZmluZSA/ICdkZWZpbmUnIDogJ3JlcXVpcmUnOwogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICByZXR1cm4gb25FcnJvcigodGhpcy5lcnJvciA9IGVycikpOwogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIH0gZWxzZSBpZiAodHlwZW9mIGNvbnNvbGUgIT09ICd1bmRlZmluZWQnICYmCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBjb25zb2xlLmVycm9yKSB7CiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIC8vIExvZyB0aGUgZXJyb3IgZm9yIGRlYnVnZ2luZy4gSWYgcHJvbWlzZXMgY291bGQgYmUKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgLy8gdXNlZCwgdGhpcyB3b3VsZCBiZSBkaWZmZXJlbnQsIGJ1dCBtYWtpbmcgZG8uCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIGNvbnNvbGUuZXJyb3IoZXJyKTsKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICB9IGVsc2UgewogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAvLyBEbyBub3Qgd2FudCB0byBjb21wbGV0ZWx5IGxvc2UgdGhlIGVycm9yLiBXaGlsZSB0aGlzCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIC8vIHdpbGwgbWVzcyB1cCBwcm9jZXNzaW5nIGFuZCBsZWFkIHRvIHNpbWlsYXIgcmVzdWx0cwogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAvLyBhcyBidWcgMTQ0MCwgaXQgYXQgbGVhc3Qgc3VyZmFjZXMgdGhlIGVycm9yLgogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICByZXEub25FcnJvcihlcnIpOwogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIH0KICAgICAgICAgICAgICAgICAgICAgICAgICAgIH0KICAgICAgICAgICAgICAgICAgICAgICAgfSBlbHNlIHsKICAgICAgICAgICAgICAgICAgICAgICAgICAgIC8vSnVzdCBhIGxpdGVyYWwgdmFsdWUKICAgICAgICAgICAgICAgICAgICAgICAgICAgIGV4cG9ydHMgPSBmYWN0b3J5OwogICAgICAgICAgICAgICAgICAgICAgICB9CgogICAgICAgICAgICAgICAgICAgICAgICB0aGlzLmV4cG9ydHMgPSBleHBvcnRzOwoKICAgICAgICAgICAgICAgICAgICAgICAgaWYgKHRoaXMubWFwLmlzRGVmaW5lICYmICF0aGlzLmlnbm9yZSkgewogICAgICAgICAgICAgICAgICAgICAgICAgICAgZGVmaW5lZFtpZF0gPSBleHBvcnRzOwoKICAgICAgICAgICAgICAgICAgICAgICAgICAgIGlmIChyZXEub25SZXNvdXJjZUxvYWQpIHsKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICB2YXIgcmVzTG9hZE1hcHMgPSBbXTsKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBlYWNoKHRoaXMuZGVwTWFwcywgZnVuY3Rpb24gKGRlcE1hcCkgewogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICByZXNMb2FkTWFwcy5wdXNoKGRlcE1hcC5ub3JtYWxpemVkTWFwIHx8IGRlcE1hcCk7CiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgfSk7CiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgcmVxLm9uUmVzb3VyY2VMb2FkKGNvbnRleHQsIHRoaXMubWFwLCByZXNMb2FkTWFwcyk7CiAgICAgICAgICAgICAgICAgICAgICAgICAgICB9CiAgICAgICAgICAgICAgICAgICAgICAgIH0KCiAgICAgICAgICAgICAgICAgICAgICAgIC8vQ2xlYW4gdXAKICAgICAgICAgICAgICAgICAgICAgICAgY2xlYW5SZWdpc3RyeShpZCk7CgogICAgICAgICAgICAgICAgICAgICAgICB0aGlzLmRlZmluZWQgPSB0cnVlOwogICAgICAgICAgICAgICAgICAgIH0KCiAgICAgICAgICAgICAgICAgICAgLy9GaW5pc2hlZCB0aGUgZGVmaW5lIHN0YWdlLiBBbGxvdyBjYWxsaW5nIGNoZWNrIGFnYWluCiAgICAgICAgICAgICAgICAgICAgLy90byBhbGxvdyBkZWZpbmUgbm90aWZpY2F0aW9ucyBiZWxvdyBpbiB0aGUgY2FzZSBvZiBhCiAgICAgICAgICAgICAgICAgICAgLy9jeWNsZS4KICAgICAgICAgICAgICAgICAgICB0aGlzLmRlZmluaW5nID0gZmFsc2U7CgogICAgICAgICAgICAgICAgICAgIGlmICh0aGlzLmRlZmluZWQgJiYgIXRoaXMuZGVmaW5lRW1pdHRlZCkgewogICAgICAgICAgICAgICAgICAgICAgICB0aGlzLmRlZmluZUVtaXR0ZWQgPSB0cnVlOwogICAgICAgICAgICAgICAgICAgICAgICB0aGlzLmVtaXQoJ2RlZmluZWQnLCB0aGlzLmV4cG9ydHMpOwogICAgICAgICAgICAgICAgICAgICAgICB0aGlzLmRlZmluZUVtaXRDb21wbGV0ZSA9IHRydWU7CiAgICAgICAgICAgICAgICAgICAgfQoKICAgICAgICAgICAgICAgIH0KICAgICAgICAgICAgfSwKCiAgICAgICAgICAgIGNhbGxQbHVnaW46IGZ1bmN0aW9uICgpIHsKICAgICAgICAgICAgICAgIHZhciBtYXAgPSB0aGlzLm1hcCwKICAgICAgICAgICAgICAgICAgICBpZCA9IG1hcC5pZCwKICAgICAgICAgICAgICAgICAgICAvL01hcCBhbHJlYWR5IG5vcm1hbGl6ZWQgdGhlIHByZWZpeC4KICAgICAgICAgICAgICAgICAgICBwbHVnaW5NYXAgPSBtYWtlTW9kdWxlTWFwKG1hcC5wcmVmaXgpOwoKICAgICAgICAgICAgICAgIC8vTWFyayB0aGlzIGFzIGEgZGVwZW5kZW5jeSBmb3IgdGhpcyBwbHVnaW4sIHNvIGl0CiAgICAgICAgICAgICAgICAvL2NhbiBiZSB0cmFjZWQgZm9yIGN5Y2xlcy4KICAgICAgICAgICAgICAgIHRoaXMuZGVwTWFwcy5wdXNoKHBsdWdpbk1hcCk7CgogICAgICAgICAgICAgICAgb24ocGx1Z2luTWFwLCAnZGVmaW5lZCcsIGJpbmQodGhpcywgZnVuY3Rpb24gKHBsdWdpbikgewogICAgICAgICAgICAgICAgICAgIHZhciBsb2FkLCBub3JtYWxpemVkTWFwLCBub3JtYWxpemVkTW9kLAogICAgICAgICAgICAgICAgICAgICAgICBidW5kbGVJZCA9IGdldE93bihidW5kbGVzTWFwLCB0aGlzLm1hcC5pZCksCiAgICAgICAgICAgICAgICAgICAgICAgIG5hbWUgPSB0aGlzLm1hcC5uYW1lLAogICAgICAgICAgICAgICAgICAgICAgICBwYXJlbnROYW1lID0gdGhpcy5tYXAucGFyZW50TWFwID8gdGhpcy5tYXAucGFyZW50TWFwLm5hbWUgOiBudWxsLAogICAgICAgICAgICAgICAgICAgICAgICBsb2NhbFJlcXVpcmUgPSBjb250ZXh0Lm1ha2VSZXF1aXJlKG1hcC5wYXJlbnRNYXAsIHsKICAgICAgICAgICAgICAgICAgICAgICAgICAgIGVuYWJsZUJ1aWxkQ2FsbGJhY2s6IHRydWUKICAgICAgICAgICAgICAgICAgICAgICAgfSk7CgogICAgICAgICAgICAgICAgICAgIC8vSWYgY3VycmVudCBtYXAgaXMgbm90IG5vcm1hbGl6ZWQsIHdhaXQgZm9yIHRoYXQKICAgICAgICAgICAgICAgICAgICAvL25vcm1hbGl6ZWQgbmFtZSB0byBsb2FkIGluc3RlYWQgb2YgY29udGludWluZy4KICAgICAgICAgICAgICAgICAgICBpZiAodGhpcy5tYXAudW5ub3JtYWxpemVkKSB7CiAgICAgICAgICAgICAgICAgICAgICAgIC8vTm9ybWFsaXplIHRoZSBJRCBpZiB0aGUgcGx1Z2luIGFsbG93cyBpdC4KICAgICAgICAgICAgICAgICAgICAgICAgaWYgKHBsdWdpbi5ub3JtYWxpemUpIHsKICAgICAgICAgICAgICAgICAgICAgICAgICAgIG5hbWUgPSBwbHVnaW4ubm9ybWFsaXplKG5hbWUsIGZ1bmN0aW9uIChuYW1lKSB7CiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgcmV0dXJuIG5vcm1hbGl6ZShuYW1lLCBwYXJlbnROYW1lLCB0cnVlKTsKICAgICAgICAgICAgICAgICAgICAgICAgICAgIH0pIHx8ICcnOwogICAgICAgICAgICAgICAgICAgICAgICB9CgogICAgICAgICAgICAgICAgICAgICAgICAvL3ByZWZpeCBhbmQgbmFtZSBzaG91bGQgYWxyZWFkeSBiZSBub3JtYWxpemVkLCBubyBuZWVkCiAgICAgICAgICAgICAgICAgICAgICAgIC8vZm9yIGFwcGx5aW5nIG1hcCBjb25maWcgYWdhaW4gZWl0aGVyLgogICAgICAgICAgICAgICAgICAgICAgICBub3JtYWxpemVkTWFwID0gbWFrZU1vZHVsZU1hcChtYXAucHJlZml4ICsgJyEnICsgbmFtZSwKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgdGhpcy5tYXAucGFyZW50TWFwKTsKICAgICAgICAgICAgICAgICAgICAgICAgb24obm9ybWFsaXplZE1hcCwKICAgICAgICAgICAgICAgICAgICAgICAgICAgICdkZWZpbmVkJywgYmluZCh0aGlzLCBmdW5jdGlvbiAodmFsdWUpIHsKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICB0aGlzLm1hcC5ub3JtYWxpemVkTWFwID0gbm9ybWFsaXplZE1hcDsKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICB0aGlzLmluaXQoW10sIGZ1bmN0aW9uICgpIHsgcmV0dXJuIHZhbHVlOyB9LCBudWxsLCB7CiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIGVuYWJsZWQ6IHRydWUsCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIGlnbm9yZTogdHJ1ZQogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIH0pOwogICAgICAgICAgICAgICAgICAgICAgICAgICAgfSkpOwoKICAgICAgICAgICAgICAgICAgICAgICAgbm9ybWFsaXplZE1vZCA9IGdldE93bihyZWdpc3RyeSwgbm9ybWFsaXplZE1hcC5pZCk7CiAgICAgICAgICAgICAgICAgICAgICAgIGlmIChub3JtYWxpemVkTW9kKSB7CiAgICAgICAgICAgICAgICAgICAgICAgICAgICAvL01hcmsgdGhpcyBhcyBhIGRlcGVuZGVuY3kgZm9yIHRoaXMgcGx1Z2luLCBzbyBpdAogICAgICAgICAgICAgICAgICAgICAgICAgICAgLy9jYW4gYmUgdHJhY2VkIGZvciBjeWNsZXMuCiAgICAgICAgICAgICAgICAgICAgICAgICAgICB0aGlzLmRlcE1hcHMucHVzaChub3JtYWxpemVkTWFwKTsKCiAgICAgICAgICAgICAgICAgICAgICAgICAgICBpZiAodGhpcy5ldmVudHMuZXJyb3IpIHsKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBub3JtYWxpemVkTW9kLm9uKCdlcnJvcicsIGJpbmQodGhpcywgZnVuY3Rpb24gKGVycikgewogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICB0aGlzLmVtaXQoJ2Vycm9yJywgZXJyKTsKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICB9KSk7CiAgICAgICAgICAgICAgICAgICAgICAgICAgICB9CiAgICAgICAgICAgICAgICAgICAgICAgICAgICBub3JtYWxpemVkTW9kLmVuYWJsZSgpOwogICAgICAgICAgICAgICAgICAgICAgICB9CgogICAgICAgICAgICAgICAgICAgICAgICByZXR1cm47CiAgICAgICAgICAgICAgICAgICAgfQoKICAgICAgICAgICAgICAgICAgICAvL0lmIGEgcGF0aHMgY29uZmlnLCB0aGVuIGp1c3QgbG9hZCB0aGF0IGZpbGUgaW5zdGVhZCB0bwogICAgICAgICAgICAgICAgICAgIC8vcmVzb2x2ZSB0aGUgcGx1Z2luLCBhcyBpdCBpcyBidWlsdCBpbnRvIHRoYXQgcGF0aHMgbGF5ZXIuCiAgICAgICAgICAgICAgICAgICAgaWYgKGJ1bmRsZUlkKSB7CiAgICAgICAgICAgICAgICAgICAgICAgIHRoaXMubWFwLnVybCA9IGNvbnRleHQubmFtZVRvVXJsKGJ1bmRsZUlkKTsKICAgICAgICAgICAgICAgICAgICAgICAgdGhpcy5sb2FkKCk7CiAgICAgICAgICAgICAgICAgICAgICAgIHJldHVybjsKICAgICAgICAgICAgICAgICAgICB9CgogICAgICAgICAgICAgICAgICAgIGxvYWQgPSBiaW5kKHRoaXMsIGZ1bmN0aW9uICh2YWx1ZSkgewogICAgICAgICAgICAgICAgICAgICAgICB0aGlzLmluaXQoW10sIGZ1bmN0aW9uICgpIHsgcmV0dXJuIHZhbHVlOyB9LCBudWxsLCB7CiAgICAgICAgICAgICAgICAgICAgICAgICAgICBlbmFibGVkOiB0cnVlCiAgICAgICAgICAgICAgICAgICAgICAgIH0pOwogICAgICAgICAgICAgICAgICAgIH0pOwoKICAgICAgICAgICAgICAgICAgICBsb2FkLmVycm9yID0gYmluZCh0aGlzLCBmdW5jdGlvbiAoZXJyKSB7CiAgICAgICAgICAgICAgICAgICAgICAgIHRoaXMuaW5pdGVkID0gdHJ1ZTsKICAgICAgICAgICAgICAgICAgICAgICAgdGhpcy5lcnJvciA9IGVycjsKICAgICAgICAgICAgICAgICAgICAgICAgZXJyLnJlcXVpcmVNb2R1bGVzID0gW2lkXTsKCiAgICAgICAgICAgICAgICAgICAgICAgIC8vUmVtb3ZlIHRlbXAgdW5ub3JtYWxpemVkIG1vZHVsZXMgZm9yIHRoaXMgbW9kdWxlLAogICAgICAgICAgICAgICAgICAgICAgICAvL3NpbmNlIHRoZXkgd2lsbCBuZXZlciBiZSByZXNvbHZlZCBvdGhlcndpc2Ugbm93LgogICAgICAgICAgICAgICAgICAgICAgICBlYWNoUHJvcChyZWdpc3RyeSwgZnVuY3Rpb24gKG1vZCkgewogICAgICAgICAgICAgICAgICAgICAgICAgICAgaWYgKG1vZC5tYXAuaWQuaW5kZXhPZihpZCArICdfdW5ub3JtYWxpemVkJykgPT09IDApIHsKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBjbGVhblJlZ2lzdHJ5KG1vZC5tYXAuaWQpOwogICAgICAgICAgICAgICAgICAgICAgICAgICAgfQogICAgICAgICAgICAgICAgICAgICAgICB9KTsKCiAgICAgICAgICAgICAgICAgICAgICAgIG9uRXJyb3IoZXJyKTsKICAgICAgICAgICAgICAgICAgICB9KTsKCiAgICAgICAgICAgICAgICAgICAgLy9BbGxvdyBwbHVnaW5zIHRvIGxvYWQgb3RoZXIgY29kZSB3aXRob3V0IGhhdmluZyB0byBrbm93IHRoZQogICAgICAgICAgICAgICAgICAgIC8vY29udGV4dCBvciBob3cgdG8gJ2NvbXBsZXRlJyB0aGUgbG9hZC4KICAgICAgICAgICAgICAgICAgICBsb2FkLmZyb21UZXh0ID0gYmluZCh0aGlzLCBmdW5jdGlvbiAodGV4dCwgdGV4dEFsdCkgewogICAgICAgICAgICAgICAgICAgICAgICAvKmpzbGludCBldmlsOiB0cnVlICovCiAgICAgICAgICAgICAgICAgICAgICAgIHZhciBtb2R1bGVOYW1lID0gbWFwLm5hbWUsCiAgICAgICAgICAgICAgICAgICAgICAgICAgICBtb2R1bGVNYXAgPSBtYWtlTW9kdWxlTWFwKG1vZHVsZU5hbWUpLAogICAgICAgICAgICAgICAgICAgICAgICAgICAgaGFzSW50ZXJhY3RpdmUgPSB1c2VJbnRlcmFjdGl2ZTsKCiAgICAgICAgICAgICAgICAgICAgICAgIC8vQXMgb2YgMi4xLjAsIHN1cHBvcnQganVzdCBwYXNzaW5nIHRoZSB0ZXh0LCB0byByZWluZm9yY2UKICAgICAgICAgICAgICAgICAgICAgICAgLy9mcm9tVGV4dCBvbmx5IGJlaW5nIGNhbGxlZCBvbmNlIHBlciByZXNvdXJjZS4gU3RpbGwKICAgICAgICAgICAgICAgICAgICAgICAgLy9zdXBwb3J0IG9sZCBzdHlsZSBvZiBwYXNzaW5nIG1vZHVsZU5hbWUgYnV0IGRpc2NhcmQKICAgICAgICAgICAgICAgICAgICAgICAgLy90aGF0IG1vZHVsZU5hbWUgaW4gZmF2b3Igb2YgdGhlIGludGVybmFsIHJlZi4KICAgICAgICAgICAgICAgICAgICAgICAgaWYgKHRleHRBbHQpIHsKICAgICAgICAgICAgICAgICAgICAgICAgICAgIHRleHQgPSB0ZXh0QWx0OwogICAgICAgICAgICAgICAgICAgICAgICB9CgogICAgICAgICAgICAgICAgICAgICAgICAvL1R1cm4gb2ZmIGludGVyYWN0aXZlIHNjcmlwdCBtYXRjaGluZyBmb3IgSUUgZm9yIGFueSBkZWZpbmUKICAgICAgICAgICAgICAgICAgICAgICAgLy9jYWxscyBpbiB0aGUgdGV4dCwgdGhlbiB0dXJuIGl0IGJhY2sgb24gYXQgdGhlIGVuZC4KICAgICAgICAgICAgICAgICAgICAgICAgaWYgKGhhc0ludGVyYWN0aXZlKSB7CiAgICAgICAgICAgICAgICAgICAgICAgICAgICB1c2VJbnRlcmFjdGl2ZSA9IGZhbHNlOwogICAgICAgICAgICAgICAgICAgICAgICB9CgogICAgICAgICAgICAgICAgICAgICAgICAvL1ByaW1lIHRoZSBzeXN0ZW0gYnkgY3JlYXRpbmcgYSBtb2R1bGUgaW5zdGFuY2UgZm9yCiAgICAgICAgICAgICAgICAgICAgICAgIC8vaXQuCiAgICAgICAgICAgICAgICAgICAgICAgIGdldE1vZHVsZShtb2R1bGVNYXApOwoKICAgICAgICAgICAgICAgICAgICAgICAgLy9UcmFuc2ZlciBhbnkgY29uZmlnIHRvIHRoaXMgb3RoZXIgbW9kdWxlLgogICAgICAgICAgICAgICAgICAgICAgICBpZiAoaGFzUHJvcChjb25maWcuY29uZmlnLCBpZCkpIHsKICAgICAgICAgICAgICAgICAgICAgICAgICAgIGNvbmZpZy5jb25maWdbbW9kdWxlTmFtZV0gPSBjb25maWcuY29uZmlnW2lkXTsKICAgICAgICAgICAgICAgICAgICAgICAgfQoKICAgICAgICAgICAgICAgICAgICAgICAgdHJ5IHsKICAgICAgICAgICAgICAgICAgICAgICAgICAgIHJlcS5leGVjKHRleHQpOwogICAgICAgICAgICAgICAgICAgICAgICB9IGNhdGNoIChlKSB7CiAgICAgICAgICAgICAgICAgICAgICAgICAgICByZXR1cm4gb25FcnJvcihtYWtlRXJyb3IoJ2Zyb210ZXh0ZXZhbCcsCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICdmcm9tVGV4dCBldmFsIGZvciAnICsgaWQgKwogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICcgZmFpbGVkOiAnICsgZSwKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgZSwKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgW2lkXSkpOwogICAgICAgICAgICAgICAgICAgICAgICB9CgogICAgICAgICAgICAgICAgICAgICAgICBpZiAoaGFzSW50ZXJhY3RpdmUpIHsKICAgICAgICAgICAgICAgICAgICAgICAgICAgIHVzZUludGVyYWN0aXZlID0gdHJ1ZTsKICAgICAgICAgICAgICAgICAgICAgICAgfQoKICAgICAgICAgICAgICAgICAgICAgICAgLy9NYXJrIHRoaXMgYXMgYSBkZXBlbmRlbmN5IGZvciB0aGUgcGx1Z2luCiAgICAgICAgICAgICAgICAgICAgICAgIC8vcmVzb3VyY2UKICAgICAgICAgICAgICAgICAgICAgICAgdGhpcy5kZXBNYXBzLnB1c2gobW9kdWxlTWFwKTsKCiAgICAgICAgICAgICAgICAgICAgICAgIC8vU3VwcG9ydCBhbm9ueW1vdXMgbW9kdWxlcy4KICAgICAgICAgICAgICAgICAgICAgICAgY29udGV4dC5jb21wbGV0ZUxvYWQobW9kdWxlTmFtZSk7CgogICAgICAgICAgICAgICAgICAgICAgICAvL0JpbmQgdGhlIHZhbHVlIG9mIHRoYXQgbW9kdWxlIHRvIHRoZSB2YWx1ZSBmb3IgdGhpcwogICAgICAgICAgICAgICAgICAgICAgICAvL3Jlc291cmNlIElELgogICAgICAgICAgICAgICAgICAgICAgICBsb2NhbFJlcXVpcmUoW21vZHVsZU5hbWVdLCBsb2FkKTsKICAgICAgICAgICAgICAgICAgICB9KTsKCiAgICAgICAgICAgICAgICAgICAgLy9Vc2UgcGFyZW50TmFtZSBoZXJlIHNpbmNlIHRoZSBwbHVnaW4ncyBuYW1lIGlzIG5vdCByZWxpYWJsZSwKICAgICAgICAgICAgICAgICAgICAvL2NvdWxkIGJlIHNvbWUgd2VpcmQgc3RyaW5nIHdpdGggbm8gcGF0aCB0aGF0IGFjdHVhbGx5IHdhbnRzIHRvCiAgICAgICAgICAgICAgICAgICAgLy9yZWZlcmVuY2UgdGhlIHBhcmVudE5hbWUncyBwYXRoLgogICAgICAgICAgICAgICAgICAgIHBsdWdpbi5sb2FkKG1hcC5uYW1lLCBsb2NhbFJlcXVpcmUsIGxvYWQsIGNvbmZpZyk7CiAgICAgICAgICAgICAgICB9KSk7CgogICAgICAgICAgICAgICAgY29udGV4dC5lbmFibGUocGx1Z2luTWFwLCB0aGlzKTsKICAgICAgICAgICAgICAgIHRoaXMucGx1Z2luTWFwc1twbHVnaW5NYXAuaWRdID0gcGx1Z2luTWFwOwogICAgICAgICAgICB9LAoKICAgICAgICAgICAgZW5hYmxlOiBmdW5jdGlvbiAoKSB7CiAgICAgICAgICAgICAgICBlbmFibGVkUmVnaXN0cnlbdGhpcy5tYXAuaWRdID0gdGhpczsKICAgICAgICAgICAgICAgIHRoaXMuZW5hYmxlZCA9IHRydWU7CgogICAgICAgICAgICAgICAgLy9TZXQgZmxhZyBtZW50aW9uaW5nIHRoYXQgdGhlIG1vZHVsZSBpcyBlbmFibGluZywKICAgICAgICAgICAgICAgIC8vc28gdGhhdCBpbW1lZGlhdGUgY2FsbHMgdG8gdGhlIGRlZmluZWQgY2FsbGJhY2tzCiAgICAgICAgICAgICAgICAvL2ZvciBkZXBlbmRlbmNpZXMgZG8gbm90IHRyaWdnZXIgaW5hZHZlcnRlbnQgbG9hZAogICAgICAgICAgICAgICAgLy93aXRoIHRoZSBkZXBDb3VudCBzdGlsbCBiZWluZyB6ZXJvLgogICAgICAgICAgICAgICAgdGhpcy5lbmFibGluZyA9IHRydWU7CgogICAgICAgICAgICAgICAgLy9FbmFibGUgZWFjaCBkZXBlbmRlbmN5CiAgICAgICAgICAgICAgICBlYWNoKHRoaXMuZGVwTWFwcywgYmluZCh0aGlzLCBmdW5jdGlvbiAoZGVwTWFwLCBpKSB7CiAgICAgICAgICAgICAgICAgICAgdmFyIGlkLCBtb2QsIGhhbmRsZXI7CgogICAgICAgICAgICAgICAgICAgIGlmICh0eXBlb2YgZGVwTWFwID09PSAnc3RyaW5nJykgewogICAgICAgICAgICAgICAgICAgICAgICAvL0RlcGVuZGVuY3kgbmVlZHMgdG8gYmUgY29udmVydGVkIHRvIGEgZGVwTWFwCiAgICAgICAgICAgICAgICAgICAgICAgIC8vYW5kIHdpcmVkIHVwIHRvIHRoaXMgbW9kdWxlLgogICAgICAgICAgICAgICAgICAgICAgICBkZXBNYXAgPSBtYWtlTW9kdWxlTWFwKGRlcE1hcCwKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAodGhpcy5tYXAuaXNEZWZpbmUgPyB0aGlzLm1hcCA6IHRoaXMubWFwLnBhcmVudE1hcCksCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgZmFsc2UsCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIXRoaXMuc2tpcE1hcCk7CiAgICAgICAgICAgICAgICAgICAgICAgIHRoaXMuZGVwTWFwc1tpXSA9IGRlcE1hcDsKCiAgICAgICAgICAgICAgICAgICAgICAgIGhhbmRsZXIgPSBnZXRPd24oaGFuZGxlcnMsIGRlcE1hcC5pZCk7CgogICAgICAgICAgICAgICAgICAgICAgICBpZiAoaGFuZGxlcikgewogICAgICAgICAgICAgICAgICAgICAgICAgICAgdGhpcy5kZXBFeHBvcnRzW2ldID0gaGFuZGxlcih0aGlzKTsKICAgICAgICAgICAgICAgICAgICAgICAgICAgIHJldHVybjsKICAgICAgICAgICAgICAgICAgICAgICAgfQoKICAgICAgICAgICAgICAgICAgICAgICAgdGhpcy5kZXBDb3VudCArPSAxOwoKICAgICAgICAgICAgICAgICAgICAgICAgb24oZGVwTWFwLCAnZGVmaW5lZCcsIGJpbmQodGhpcywgZnVuY3Rpb24gKGRlcEV4cG9ydHMpIHsKICAgICAgICAgICAgICAgICAgICAgICAgICAgIGlmICh0aGlzLnVuZGVmZWQpIHsKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICByZXR1cm47CiAgICAgICAgICAgICAgICAgICAgICAgICAgICB9CiAgICAgICAgICAgICAgICAgICAgICAgICAgICB0aGlzLmRlZmluZURlcChpLCBkZXBFeHBvcnRzKTsKICAgICAgICAgICAgICAgICAgICAgICAgICAgIHRoaXMuY2hlY2soKTsKICAgICAgICAgICAgICAgICAgICAgICAgfSkpOwoKICAgICAgICAgICAgICAgICAgICAgICAgaWYgKHRoaXMuZXJyYmFjaykgewogICAgICAgICAgICAgICAgICAgICAgICAgICAgb24oZGVwTWFwLCAnZXJyb3InLCBiaW5kKHRoaXMsIHRoaXMuZXJyYmFjaykpOwogICAgICAgICAgICAgICAgICAgICAgICB9IGVsc2UgaWYgKHRoaXMuZXZlbnRzLmVycm9yKSB7CiAgICAgICAgICAgICAgICAgICAgICAgICAgICAvLyBObyBkaXJlY3QgZXJyYmFjayBvbiB0aGlzIG1vZHVsZSwgYnV0IHNvbWV0aGluZwogICAgICAgICAgICAgICAgICAgICAgICAgICAgLy8gZWxzZSBpcyBsaXN0ZW5pbmcgZm9yIGVycm9ycywgc28gYmUgc3VyZSB0bwogICAgICAgICAgICAgICAgICAgICAgICAgICAgLy8gcHJvcGFnYXRlIHRoZSBlcnJvciBjb3JyZWN0bHkuCiAgICAgICAgICAgICAgICAgICAgICAgICAgICBvbihkZXBNYXAsICdlcnJvcicsIGJpbmQodGhpcywgZnVuY3Rpb24oZXJyKSB7CiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgdGhpcy5lbWl0KCdlcnJvcicsIGVycik7CiAgICAgICAgICAgICAgICAgICAgICAgICAgICB9KSk7CiAgICAgICAgICAgICAgICAgICAgICAgIH0KICAgICAgICAgICAgICAgICAgICB9CgogICAgICAgICAgICAgICAgICAgIGlkID0gZGVwTWFwLmlkOwogICAgICAgICAgICAgICAgICAgIG1vZCA9IHJlZ2lzdHJ5W2lkXTsKCiAgICAgICAgICAgICAgICAgICAgLy9Ta2lwIHNwZWNpYWwgbW9kdWxlcyBsaWtlICdyZXF1aXJlJywgJ2V4cG9ydHMnLCAnbW9kdWxlJwogICAgICAgICAgICAgICAgICAgIC8vQWxzbywgZG9uJ3QgY2FsbCBlbmFibGUgaWYgaXQgaXMgYWxyZWFkeSBlbmFibGVkLAogICAgICAgICAgICAgICAgICAgIC8vaW1wb3J0YW50IGluIGNpcmN1bGFyIGRlcGVuZGVuY3kgY2FzZXMuCiAgICAgICAgICAgICAgICAgICAgaWYgKCFoYXNQcm9wKGhhbmRsZXJzLCBpZCkgJiYgbW9kICYmICFtb2QuZW5hYmxlZCkgewogICAgICAgICAgICAgICAgICAgICAgICBjb250ZXh0LmVuYWJsZShkZXBNYXAsIHRoaXMpOwogICAgICAgICAgICAgICAgICAgIH0KICAgICAgICAgICAgICAgIH0pKTsKCiAgICAgICAgICAgICAgICAvL0VuYWJsZSBlYWNoIHBsdWdpbiB0aGF0IGlzIHVzZWQgaW4KICAgICAgICAgICAgICAgIC8vYSBkZXBlbmRlbmN5CiAgICAgICAgICAgICAgICBlYWNoUHJvcCh0aGlzLnBsdWdpbk1hcHMsIGJpbmQodGhpcywgZnVuY3Rpb24gKHBsdWdpbk1hcCkgewogICAgICAgICAgICAgICAgICAgIHZhciBtb2QgPSBnZXRPd24ocmVnaXN0cnksIHBsdWdpbk1hcC5pZCk7CiAgICAgICAgICAgICAgICAgICAgaWYgKG1vZCAmJiAhbW9kLmVuYWJsZWQpIHsKICAgICAgICAgICAgICAgICAgICAgICAgY29udGV4dC5lbmFibGUocGx1Z2luTWFwLCB0aGlzKTsKICAgICAgICAgICAgICAgICAgICB9CiAgICAgICAgICAgICAgICB9KSk7CgogICAgICAgICAgICAgICAgdGhpcy5lbmFibGluZyA9IGZhbHNlOwoKICAgICAgICAgICAgICAgIHRoaXMuY2hlY2soKTsKICAgICAgICAgICAgfSwKCiAgICAgICAgICAgIG9uOiBmdW5jdGlvbiAobmFtZSwgY2IpIHsKICAgICAgICAgICAgICAgIHZhciBjYnMgPSB0aGlzLmV2ZW50c1tuYW1lXTsKICAgICAgICAgICAgICAgIGlmICghY2JzKSB7CiAgICAgICAgICAgICAgICAgICAgY2JzID0gdGhpcy5ldmVudHNbbmFtZV0gPSBbXTsKICAgICAgICAgICAgICAgIH0KICAgICAgICAgICAgICAgIGNicy5wdXNoKGNiKTsKICAgICAgICAgICAgfSwKCiAgICAgICAgICAgIGVtaXQ6IGZ1bmN0aW9uIChuYW1lLCBldnQpIHsKICAgICAgICAgICAgICAgIGVhY2godGhpcy5ldmVudHNbbmFtZV0sIGZ1bmN0aW9uIChjYikgewogICAgICAgICAgICAgICAgICAgIGNiKGV2dCk7CiAgICAgICAgICAgICAgICB9KTsKICAgICAgICAgICAgICAgIGlmIChuYW1lID09PSAnZXJyb3InKSB7CiAgICAgICAgICAgICAgICAgICAgLy9Ob3cgdGhhdCB0aGUgZXJyb3IgaGFuZGxlciB3YXMgdHJpZ2dlcmVkLCByZW1vdmUKICAgICAgICAgICAgICAgICAgICAvL3RoZSBsaXN0ZW5lcnMsIHNpbmNlIHRoaXMgYnJva2VuIE1vZHVsZSBpbnN0YW5jZQogICAgICAgICAgICAgICAgICAgIC8vY2FuIHN0YXkgYXJvdW5kIGZvciBhIHdoaWxlIGluIHRoZSByZWdpc3RyeS4KICAgICAgICAgICAgICAgICAgICBkZWxldGUgdGhpcy5ldmVudHNbbmFtZV07CiAgICAgICAgICAgICAgICB9CiAgICAgICAgICAgIH0KICAgICAgICB9OwoKICAgICAgICBmdW5jdGlvbiBjYWxsR2V0TW9kdWxlKGFyZ3MpIHsKICAgICAgICAgICAgLy9Ta2lwIG1vZHVsZXMgYWxyZWFkeSBkZWZpbmVkLgogICAgICAgICAgICBpZiAoIWhhc1Byb3AoZGVmaW5lZCwgYXJnc1swXSkpIHsKICAgICAgICAgICAgICAgIGdldE1vZHVsZShtYWtlTW9kdWxlTWFwKGFyZ3NbMF0sIG51bGwsIHRydWUpKS5pbml0KGFyZ3NbMV0sIGFyZ3NbMl0pOwogICAgICAgICAgICB9CiAgICAgICAgfQoKICAgICAgICBmdW5jdGlvbiByZW1vdmVMaXN0ZW5lcihub2RlLCBmdW5jLCBuYW1lLCBpZU5hbWUpIHsKICAgICAgICAgICAgLy9GYXZvciBkZXRhY2hFdmVudCBiZWNhdXNlIG9mIElFOQogICAgICAgICAgICAvL2lzc3VlLCBzZWUgYXR0YWNoRXZlbnQvYWRkRXZlbnRMaXN0ZW5lciBjb21tZW50IGVsc2V3aGVyZQogICAgICAgICAgICAvL2luIHRoaXMgZmlsZS4KICAgICAgICAgICAgaWYgKG5vZGUuZGV0YWNoRXZlbnQgJiYgIWlzT3BlcmEpIHsKICAgICAgICAgICAgICAgIC8vUHJvYmFibHkgSUUuIElmIG5vdCBpdCB3aWxsIHRocm93IGFuIGVycm9yLCB3aGljaCB3aWxsIGJlCiAgICAgICAgICAgICAgICAvL3VzZWZ1bCB0byBrbm93LgogICAgICAgICAgICAgICAgaWYgKGllTmFtZSkgewogICAgICAgICAgICAgICAgICAgIG5vZGUuZGV0YWNoRXZlbnQoaWVOYW1lLCBmdW5jKTsKICAgICAgICAgICAgICAgIH0KICAgICAgICAgICAgfSBlbHNlIHsKICAgICAgICAgICAgICAgIG5vZGUucmVtb3ZlRXZlbnRMaXN0ZW5lcihuYW1lLCBmdW5jLCBmYWxzZSk7CiAgICAgICAgICAgIH0KICAgICAgICB9CgogICAgICAgIC8qKgogICAgICAgICAqIEdpdmVuIGFuIGV2ZW50IGZyb20gYSBzY3JpcHQgbm9kZSwgZ2V0IHRoZSByZXF1aXJlanMgaW5mbyBmcm9tIGl0LAogICAgICAgICAqIGFuZCB0aGVuIHJlbW92ZXMgdGhlIGV2ZW50IGxpc3RlbmVycyBvbiB0aGUgbm9kZS4KICAgICAgICAgKiBAcGFyYW0ge0V2ZW50fSBldnQKICAgICAgICAgKiBAcmV0dXJucyB7T2JqZWN0fQogICAgICAgICAqLwogICAgICAgIGZ1bmN0aW9uIGdldFNjcmlwdERhdGEoZXZ0KSB7CiAgICAgICAgICAgIC8vVXNpbmcgY3VycmVudFRhcmdldCBpbnN0ZWFkIG9mIHRhcmdldCBmb3IgRmlyZWZveCAyLjAncyBzYWtlLiBOb3QKICAgICAgICAgICAgLy9hbGwgb2xkIGJyb3dzZXJzIHdpbGwgYmUgc3VwcG9ydGVkLCBidXQgdGhpcyBvbmUgd2FzIGVhc3kgZW5vdWdoCiAgICAgICAgICAgIC8vdG8gc3VwcG9ydCBhbmQgc3RpbGwgbWFrZXMgc2Vuc2UuCiAgICAgICAgICAgIHZhciBub2RlID0gZXZ0LmN1cnJlbnRUYXJnZXQgfHwgZXZ0LnNyY0VsZW1lbnQ7CgogICAgICAgICAgICAvL1JlbW92ZSB0aGUgbGlzdGVuZXJzIG9uY2UgaGVyZS4KICAgICAgICAgICAgcmVtb3ZlTGlzdGVuZXIobm9kZSwgY29udGV4dC5vblNjcmlwdExvYWQsICdsb2FkJywgJ29ucmVhZHlzdGF0ZWNoYW5nZScpOwogICAgICAgICAgICByZW1vdmVMaXN0ZW5lcihub2RlLCBjb250ZXh0Lm9uU2NyaXB0RXJyb3IsICdlcnJvcicpOwoKICAgICAgICAgICAgcmV0dXJuIHsKICAgICAgICAgICAgICAgIG5vZGU6IG5vZGUsCiAgICAgICAgICAgICAgICBpZDogbm9kZSAmJiBub2RlLmdldEF0dHJpYnV0ZSgnZGF0YS1yZXF1aXJlbW9kdWxlJykKICAgICAgICAgICAgfTsKICAgICAgICB9CgogICAgICAgIGZ1bmN0aW9uIGludGFrZURlZmluZXMoKSB7CiAgICAgICAgICAgIHZhciBhcmdzOwoKICAgICAgICAgICAgLy9BbnkgZGVmaW5lZCBtb2R1bGVzIGluIHRoZSBnbG9iYWwgcXVldWUsIGludGFrZSB0aGVtIG5vdy4KICAgICAgICAgICAgdGFrZUdsb2JhbFF1ZXVlKCk7CgogICAgICAgICAgICAvL01ha2Ugc3VyZSBhbnkgcmVtYWluaW5nIGRlZlF1ZXVlIGl0ZW1zIGdldCBwcm9wZXJseSBwcm9jZXNzZWQuCiAgICAgICAgICAgIHdoaWxlIChkZWZRdWV1ZS5sZW5ndGgpIHsKICAgICAgICAgICAgICAgIGFyZ3MgPSBkZWZRdWV1ZS5zaGlmdCgpOwogICAgICAgICAgICAgICAgaWYgKGFyZ3NbMF0gPT09IG51bGwpIHsKICAgICAgICAgICAgICAgICAgICByZXR1cm4gb25FcnJvcihtYWtlRXJyb3IoJ21pc21hdGNoJywgJ01pc21hdGNoZWQgYW5vbnltb3VzIGRlZmluZSgpIG1vZHVsZTogJyArCiAgICAgICAgICAgICAgICAgICAgICAgIGFyZ3NbYXJncy5sZW5ndGggLSAxXSkpOwogICAgICAgICAgICAgICAgfSBlbHNlIHsKICAgICAgICAgICAgICAgICAgICAvL2FyZ3MgYXJlIGlkLCBkZXBzLCBmYWN0b3J5LiBTaG91bGQgYmUgbm9ybWFsaXplZCBieSB0aGUKICAgICAgICAgICAgICAgICAgICAvL2RlZmluZSgpIGZ1bmN0aW9uLgogICAgICAgICAgICAgICAgICAgIGNhbGxHZXRNb2R1bGUoYXJncyk7CiAgICAgICAgICAgICAgICB9CiAgICAgICAgICAgIH0KICAgICAgICAgICAgY29udGV4dC5kZWZRdWV1ZU1hcCA9IHt9OwogICAgICAgIH0KCiAgICAgICAgY29udGV4dCA9IHsKICAgICAgICAgICAgY29uZmlnOiBjb25maWcsCiAgICAgICAgICAgIGNvbnRleHROYW1lOiBjb250ZXh0TmFtZSwKICAgICAgICAgICAgcmVnaXN0cnk6IHJlZ2lzdHJ5LAogICAgICAgICAgICBkZWZpbmVkOiBkZWZpbmVkLAogICAgICAgICAgICB1cmxGZXRjaGVkOiB1cmxGZXRjaGVkLAogICAgICAgICAgICBkZWZRdWV1ZTogZGVmUXVldWUsCiAgICAgICAgICAgIGRlZlF1ZXVlTWFwOiB7fSwKICAgICAgICAgICAgTW9kdWxlOiBNb2R1bGUsCiAgICAgICAgICAgIG1ha2VNb2R1bGVNYXA6IG1ha2VNb2R1bGVNYXAsCiAgICAgICAgICAgIG5leHRUaWNrOiByZXEubmV4dFRpY2ssCiAgICAgICAgICAgIG9uRXJyb3I6IG9uRXJyb3IsCgogICAgICAgICAgICAvKioKICAgICAgICAgICAgICogU2V0IGEgY29uZmlndXJhdGlvbiBmb3IgdGhlIGNvbnRleHQuCiAgICAgICAgICAgICAqIEBwYXJhbSB7T2JqZWN0fSBjZmcgY29uZmlnIG9iamVjdCB0byBpbnRlZ3JhdGUuCiAgICAgICAgICAgICAqLwogICAgICAgICAgICBjb25maWd1cmU6IGZ1bmN0aW9uIChjZmcpIHsKICAgICAgICAgICAgICAgIC8vTWFrZSBzdXJlIHRoZSBiYXNlVXJsIGVuZHMgaW4gYSBzbGFzaC4KICAgICAgICAgICAgICAgIGlmIChjZmcuYmFzZVVybCkgewogICAgICAgICAgICAgICAgICAgIGlmIChjZmcuYmFzZVVybC5jaGFyQXQoY2ZnLmJhc2VVcmwubGVuZ3RoIC0gMSkgIT09ICcvJykgewogICAgICAgICAgICAgICAgICAgICAgICBjZmcuYmFzZVVybCArPSAnLyc7CiAgICAgICAgICAgICAgICAgICAgfQogICAgICAgICAgICAgICAgfQoKICAgICAgICAgICAgICAgIC8vU2F2ZSBvZmYgdGhlIHBhdGhzIHNpbmNlIHRoZXkgcmVxdWlyZSBzcGVjaWFsIHByb2Nlc3NpbmcsCiAgICAgICAgICAgICAgICAvL3RoZXkgYXJlIGFkZGl0aXZlLgogICAgICAgICAgICAgICAgdmFyIHNoaW0gPSBjb25maWcuc2hpbSwKICAgICAgICAgICAgICAgICAgICBvYmpzID0gewogICAgICAgICAgICAgICAgICAgICAgICBwYXRoczogdHJ1ZSwKICAgICAgICAgICAgICAgICAgICAgICAgYnVuZGxlczogdHJ1ZSwKICAgICAgICAgICAgICAgICAgICAgICAgY29uZmlnOiB0cnVlLAogICAgICAgICAgICAgICAgICAgICAgICBtYXA6IHRydWUKICAgICAgICAgICAgICAgICAgICB9OwoKICAgICAgICAgICAgICAgIGVhY2hQcm9wKGNmZywgZnVuY3Rpb24gKHZhbHVlLCBwcm9wKSB7CiAgICAgICAgICAgICAgICAgICAgaWYgKG9ianNbcHJvcF0pIHsKICAgICAgICAgICAgICAgICAgICAgICAgaWYgKCFjb25maWdbcHJvcF0pIHsKICAgICAgICAgICAgICAgICAgICAgICAgICAgIGNvbmZpZ1twcm9wXSA9IHt9OwogICAgICAgICAgICAgICAgICAgICAgICB9CiAgICAgICAgICAgICAgICAgICAgICAgIG1peGluKGNvbmZpZ1twcm9wXSwgdmFsdWUsIHRydWUsIHRydWUpOwogICAgICAgICAgICAgICAgICAgIH0gZWxzZSB7CiAgICAgICAgICAgICAgICAgICAgICAgIGNvbmZpZ1twcm9wXSA9IHZhbHVlOwogICAgICAgICAgICAgICAgICAgIH0KICAgICAgICAgICAgICAgIH0pOwoKICAgICAgICAgICAgICAgIC8vUmV2ZXJzZSBtYXAgdGhlIGJ1bmRsZXMKICAgICAgICAgICAgICAgIGlmIChjZmcuYnVuZGxlcykgewogICAgICAgICAgICAgICAgICAgIGVhY2hQcm9wKGNmZy5idW5kbGVzLCBmdW5jdGlvbiAodmFsdWUsIHByb3ApIHsKICAgICAgICAgICAgICAgICAgICAgICAgZWFjaCh2YWx1ZSwgZnVuY3Rpb24gKHYpIHsKICAgICAgICAgICAgICAgICAgICAgICAgICAgIGlmICh2ICE9PSBwcm9wKSB7CiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgYnVuZGxlc01hcFt2XSA9IHByb3A7CiAgICAgICAgICAgICAgICAgICAgICAgICAgICB9CiAgICAgICAgICAgICAgICAgICAgICAgIH0pOwogICAgICAgICAgICAgICAgICAgIH0pOwogICAgICAgICAgICAgICAgfQoKICAgICAgICAgICAgICAgIC8vTWVyZ2Ugc2hpbQogICAgICAgICAgICAgICAgaWYgKGNmZy5zaGltKSB7CiAgICAgICAgICAgICAgICAgICAgZWFjaFByb3AoY2ZnLnNoaW0sIGZ1bmN0aW9uICh2YWx1ZSwgaWQpIHsKICAgICAgICAgICAgICAgICAgICAgICAgLy9Ob3JtYWxpemUgdGhlIHN0cnVjdHVyZQogICAgICAgICAgICAgICAgICAgICAgICBpZiAoaXNBcnJheSh2YWx1ZSkpIHsKICAgICAgICAgICAgICAgICAgICAgICAgICAgIHZhbHVlID0gewogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIGRlcHM6IHZhbHVlCiAgICAgICAgICAgICAgICAgICAgICAgICAgICB9OwogICAgICAgICAgICAgICAgICAgICAgICB9CiAgICAgICAgICAgICAgICAgICAgICAgIGlmICgodmFsdWUuZXhwb3J0cyB8fCB2YWx1ZS5pbml0KSAmJiAhdmFsdWUuZXhwb3J0c0ZuKSB7CiAgICAgICAgICAgICAgICAgICAgICAgICAgICB2YWx1ZS5leHBvcnRzRm4gPSBjb250ZXh0Lm1ha2VTaGltRXhwb3J0cyh2YWx1ZSk7CiAgICAgICAgICAgICAgICAgICAgICAgIH0KICAgICAgICAgICAgICAgICAgICAgICAgc2hpbVtpZF0gPSB2YWx1ZTsKICAgICAgICAgICAgICAgICAgICB9KTsKICAgICAgICAgICAgICAgICAgICBjb25maWcuc2hpbSA9IHNoaW07CiAgICAgICAgICAgICAgICB9CgogICAgICAgICAgICAgICAgLy9BZGp1c3QgcGFja2FnZXMgaWYgbmVjZXNzYXJ5LgogICAgICAgICAgICAgICAgaWYgKGNmZy5wYWNrYWdlcykgewogICAgICAgICAgICAgICAgICAgIGVhY2goY2ZnLnBhY2thZ2VzLCBmdW5jdGlvbiAocGtnT2JqKSB7CiAgICAgICAgICAgICAgICAgICAgICAgIHZhciBsb2NhdGlvbiwgbmFtZTsKCiAgICAgICAgICAgICAgICAgICAgICAgIHBrZ09iaiA9IHR5cGVvZiBwa2dPYmogPT09ICdzdHJpbmcnID8ge25hbWU6IHBrZ09ian0gOiBwa2dPYmo7CgogICAgICAgICAgICAgICAgICAgICAgICBuYW1lID0gcGtnT2JqLm5hbWU7CiAgICAgICAgICAgICAgICAgICAgICAgIGxvY2F0aW9uID0gcGtnT2JqLmxvY2F0aW9uOwogICAgICAgICAgICAgICAgICAgICAgICBpZiAobG9jYXRpb24pIHsKICAgICAgICAgICAgICAgICAgICAgICAgICAgIGNvbmZpZy5wYXRoc1tuYW1lXSA9IHBrZ09iai5sb2NhdGlvbjsKICAgICAgICAgICAgICAgICAgICAgICAgfQoKICAgICAgICAgICAgICAgICAgICAgICAgLy9TYXZlIHBvaW50ZXIgdG8gbWFpbiBtb2R1bGUgSUQgZm9yIHBrZyBuYW1lLgogICAgICAgICAgICAgICAgICAgICAgICAvL1JlbW92ZSBsZWFkaW5nIGRvdCBpbiBtYWluLCBzbyBtYWluIHBhdGhzIGFyZSBub3JtYWxpemVkLAogICAgICAgICAgICAgICAgICAgICAgICAvL2FuZCByZW1vdmUgYW55IHRyYWlsaW5nIC5qcywgc2luY2UgZGlmZmVyZW50IHBhY2thZ2UKICAgICAgICAgICAgICAgICAgICAgICAgLy9lbnZzIGhhdmUgZGlmZmVyZW50IGNvbnZlbnRpb25zOiBzb21lIHVzZSBhIG1vZHVsZSBuYW1lLAogICAgICAgICAgICAgICAgICAgICAgICAvL3NvbWUgdXNlIGEgZmlsZSBuYW1lLgogICAgICAgICAgICAgICAgICAgICAgICBjb25maWcucGtnc1tuYW1lXSA9IHBrZ09iai5uYW1lICsgJy8nICsgKHBrZ09iai5tYWluIHx8ICdtYWluJykKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIC5yZXBsYWNlKGN1cnJEaXJSZWdFeHAsICcnKQogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgLnJlcGxhY2UoanNTdWZmaXhSZWdFeHAsICcnKTsKICAgICAgICAgICAgICAgICAgICB9KTsKICAgICAgICAgICAgICAgIH0KCiAgICAgICAgICAgICAgICAvL0lmIHRoZXJlIGFyZSBhbnkgIndhaXRpbmcgdG8gZXhlY3V0ZSIgbW9kdWxlcyBpbiB0aGUgcmVnaXN0cnksCiAgICAgICAgICAgICAgICAvL3VwZGF0ZSB0aGUgbWFwcyBmb3IgdGhlbSwgc2luY2UgdGhlaXIgaW5mbywgbGlrZSBVUkxzIHRvIGxvYWQsCiAgICAgICAgICAgICAgICAvL21heSBoYXZlIGNoYW5nZWQuCiAgICAgICAgICAgICAgICBlYWNoUHJvcChyZWdpc3RyeSwgZnVuY3Rpb24gKG1vZCwgaWQpIHsKICAgICAgICAgICAgICAgICAgICAvL0lmIG1vZHVsZSBhbHJlYWR5IGhhcyBpbml0IGNhbGxlZCwgc2luY2UgaXQgaXMgdG9vCiAgICAgICAgICAgICAgICAgICAgLy9sYXRlIHRvIG1vZGlmeSB0aGVtLCBhbmQgaWdub3JlIHVubm9ybWFsaXplZCBvbmVzCiAgICAgICAgICAgICAgICAgICAgLy9zaW5jZSB0aGV5IGFyZSB0cmFuc2llbnQuCiAgICAgICAgICAgICAgICAgICAgaWYgKCFtb2QuaW5pdGVkICYmICFtb2QubWFwLnVubm9ybWFsaXplZCkgewogICAgICAgICAgICAgICAgICAgICAgICBtb2QubWFwID0gbWFrZU1vZHVsZU1hcChpZCwgbnVsbCwgdHJ1ZSk7CiAgICAgICAgICAgICAgICAgICAgfQogICAgICAgICAgICAgICAgfSk7CgogICAgICAgICAgICAgICAgLy9JZiBhIGRlcHMgYXJyYXkgb3IgYSBjb25maWcgY2FsbGJhY2sgaXMgc3BlY2lmaWVkLCB0aGVuIGNhbGwKICAgICAgICAgICAgICAgIC8vcmVxdWlyZSB3aXRoIHRob3NlIGFyZ3MuIFRoaXMgaXMgdXNlZnVsIHdoZW4gcmVxdWlyZSBpcyBkZWZpbmVkIGFzIGEKICAgICAgICAgICAgICAgIC8vY29uZmlnIG9iamVjdCBiZWZvcmUgcmVxdWlyZS5qcyBpcyBsb2FkZWQuCiAgICAgICAgICAgICAgICBpZiAoY2ZnLmRlcHMgfHwgY2ZnLmNhbGxiYWNrKSB7CiAgICAgICAgICAgICAgICAgICAgY29udGV4dC5yZXF1aXJlKGNmZy5kZXBzIHx8IFtdLCBjZmcuY2FsbGJhY2spOwogICAgICAgICAgICAgICAgfQogICAgICAgICAgICB9LAoKICAgICAgICAgICAgbWFrZVNoaW1FeHBvcnRzOiBmdW5jdGlvbiAodmFsdWUpIHsKICAgICAgICAgICAgICAgIGZ1bmN0aW9uIGZuKCkgewogICAgICAgICAgICAgICAgICAgIHZhciByZXQ7CiAgICAgICAgICAgICAgICAgICAgaWYgKHZhbHVlLmluaXQpIHsKICAgICAgICAgICAgICAgICAgICAgICAgcmV0ID0gdmFsdWUuaW5pdC5hcHBseShnbG9iYWwsIGFyZ3VtZW50cyk7CiAgICAgICAgICAgICAgICAgICAgfQogICAgICAgICAgICAgICAgICAgIHJldHVybiByZXQgfHwgKHZhbHVlLmV4cG9ydHMgJiYgZ2V0R2xvYmFsKHZhbHVlLmV4cG9ydHMpKTsKICAgICAgICAgICAgICAgIH0KICAgICAgICAgICAgICAgIHJldHVybiBmbjsKICAgICAgICAgICAgfSwKCiAgICAgICAgICAgIG1ha2VSZXF1aXJlOiBmdW5jdGlvbiAocmVsTWFwLCBvcHRpb25zKSB7CiAgICAgICAgICAgICAgICBvcHRpb25zID0gb3B0aW9ucyB8fCB7fTsKCiAgICAgICAgICAgICAgICBmdW5jdGlvbiBsb2NhbFJlcXVpcmUoZGVwcywgY2FsbGJhY2ssIGVycmJhY2spIHsKICAgICAgICAgICAgICAgICAgICB2YXIgaWQsIG1hcCwgcmVxdWlyZU1vZDsKCiAgICAgICAgICAgICAgICAgICAgaWYgKG9wdGlvbnMuZW5hYmxlQnVpbGRDYWxsYmFjayAmJiBjYWxsYmFjayAmJiBpc0Z1bmN0aW9uKGNhbGxiYWNrKSkgewogICAgICAgICAgICAgICAgICAgICAgICBjYWxsYmFjay5fX3JlcXVpcmVKc0J1aWxkID0gdHJ1ZTsKICAgICAgICAgICAgICAgICAgICB9CgogICAgICAgICAgICAgICAgICAgIGlmICh0eXBlb2YgZGVwcyA9PT0gJ3N0cmluZycpIHsKICAgICAgICAgICAgICAgICAgICAgICAgaWYgKGlzRnVuY3Rpb24oY2FsbGJhY2spKSB7CiAgICAgICAgICAgICAgICAgICAgICAgICAgICAvL0ludmFsaWQgY2FsbAogICAgICAgICAgICAgICAgICAgICAgICAgICAgcmV0dXJuIG9uRXJyb3IobWFrZUVycm9yKCdyZXF1aXJlYXJncycsICdJbnZhbGlkIHJlcXVpcmUgY2FsbCcpLCBlcnJiYWNrKTsKICAgICAgICAgICAgICAgICAgICAgICAgfQoKICAgICAgICAgICAgICAgICAgICAgICAgLy9JZiByZXF1aXJlfGV4cG9ydHN8bW9kdWxlIGFyZSByZXF1ZXN0ZWQsIGdldCB0aGUKICAgICAgICAgICAgICAgICAgICAgICAgLy92YWx1ZSBmb3IgdGhlbSBmcm9tIHRoZSBzcGVjaWFsIGhhbmRsZXJzLiBDYXZlYXQ6CiAgICAgICAgICAgICAgICAgICAgICAgIC8vdGhpcyBvbmx5IHdvcmtzIHdoaWxlIG1vZHVsZSBpcyBiZWluZyBkZWZpbmVkLgogICAgICAgICAgICAgICAgICAgICAgICBpZiAocmVsTWFwICYmIGhhc1Byb3AoaGFuZGxlcnMsIGRlcHMpKSB7CiAgICAgICAgICAgICAgICAgICAgICAgICAgICByZXR1cm4gaGFuZGxlcnNbZGVwc10ocmVnaXN0cnlbcmVsTWFwLmlkXSk7CiAgICAgICAgICAgICAgICAgICAgICAgIH0KCiAgICAgICAgICAgICAgICAgICAgICAgIC8vU3luY2hyb25vdXMgYWNjZXNzIHRvIG9uZSBtb2R1bGUuIElmIHJlcXVpcmUuZ2V0IGlzCiAgICAgICAgICAgICAgICAgICAgICAgIC8vYXZhaWxhYmxlIChhcyBpbiB0aGUgTm9kZSBhZGFwdGVyKSwgcHJlZmVyIHRoYXQuCiAgICAgICAgICAgICAgICAgICAgICAgIGlmIChyZXEuZ2V0KSB7CiAgICAgICAgICAgICAgICAgICAgICAgICAgICByZXR1cm4gcmVxLmdldChjb250ZXh0LCBkZXBzLCByZWxNYXAsIGxvY2FsUmVxdWlyZSk7CiAgICAgICAgICAgICAgICAgICAgICAgIH0KCiAgICAgICAgICAgICAgICAgICAgICAgIC8vTm9ybWFsaXplIG1vZHVsZSBuYW1lLCBpZiBpdCBjb250YWlucyAuIG9yIC4uCiAgICAgICAgICAgICAgICAgICAgICAgIG1hcCA9IG1ha2VNb2R1bGVNYXAoZGVwcywgcmVsTWFwLCBmYWxzZSwgdHJ1ZSk7CiAgICAgICAgICAgICAgICAgICAgICAgIGlkID0gbWFwLmlkOwoKICAgICAgICAgICAgICAgICAgICAgICAgaWYgKCFoYXNQcm9wKGRlZmluZWQsIGlkKSkgewogICAgICAgICAgICAgICAgICAgICAgICAgICAgcmV0dXJuIG9uRXJyb3IobWFrZUVycm9yKCdub3Rsb2FkZWQnLCAnTW9kdWxlIG5hbWUgIicgKwogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgaWQgKwogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgJyIgaGFzIG5vdCBiZWVuIGxvYWRlZCB5ZXQgZm9yIGNvbnRleHQ6ICcgKwogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgY29udGV4dE5hbWUgKwogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgKHJlbE1hcCA/ICcnIDogJy4gVXNlIHJlcXVpcmUoW10pJykpKTsKICAgICAgICAgICAgICAgICAgICAgICAgfQogICAgICAgICAgICAgICAgICAgICAgICByZXR1cm4gZGVmaW5lZFtpZF07CiAgICAgICAgICAgICAgICAgICAgfQoKICAgICAgICAgICAgICAgICAgICAvL0dyYWIgZGVmaW5lcyB3YWl0aW5nIGluIHRoZSBnbG9iYWwgcXVldWUuCiAgICAgICAgICAgICAgICAgICAgaW50YWtlRGVmaW5lcygpOwoKICAgICAgICAgICAgICAgICAgICAvL01hcmsgYWxsIHRoZSBkZXBlbmRlbmNpZXMgYXMgbmVlZGluZyB0byBiZSBsb2FkZWQuCiAgICAgICAgICAgICAgICAgICAgY29udGV4dC5uZXh0VGljayhmdW5jdGlvbiAoKSB7CiAgICAgICAgICAgICAgICAgICAgICAgIC8vU29tZSBkZWZpbmVzIGNvdWxkIGhhdmUgYmVlbiBhZGRlZCBzaW5jZSB0aGUKICAgICAgICAgICAgICAgICAgICAgICAgLy9yZXF1aXJlIGNhbGwsIGNvbGxlY3QgdGhlbS4KICAgICAgICAgICAgICAgICAgICAgICAgaW50YWtlRGVmaW5lcygpOwoKICAgICAgICAgICAgICAgICAgICAgICAgcmVxdWlyZU1vZCA9IGdldE1vZHVsZShtYWtlTW9kdWxlTWFwKG51bGwsIHJlbE1hcCkpOwoKICAgICAgICAgICAgICAgICAgICAgICAgLy9TdG9yZSBpZiBtYXAgY29uZmlnIHNob3VsZCBiZSBhcHBsaWVkIHRvIHRoaXMgcmVxdWlyZQogICAgICAgICAgICAgICAgICAgICAgICAvL2NhbGwgZm9yIGRlcGVuZGVuY2llcy4KICAgICAgICAgICAgICAgICAgICAgICAgcmVxdWlyZU1vZC5za2lwTWFwID0gb3B0aW9ucy5za2lwTWFwOwoKICAgICAgICAgICAgICAgICAgICAgICAgcmVxdWlyZU1vZC5pbml0KGRlcHMsIGNhbGxiYWNrLCBlcnJiYWNrLCB7CiAgICAgICAgICAgICAgICAgICAgICAgICAgICBlbmFibGVkOiB0cnVlCiAgICAgICAgICAgICAgICAgICAgICAgIH0pOwoKICAgICAgICAgICAgICAgICAgICAgICAgY2hlY2tMb2FkZWQoKTsKICAgICAgICAgICAgICAgICAgICB9KTsKCiAgICAgICAgICAgICAgICAgICAgcmV0dXJuIGxvY2FsUmVxdWlyZTsKICAgICAgICAgICAgICAgIH0KCiAgICAgICAgICAgICAgICBtaXhpbihsb2NhbFJlcXVpcmUsIHsKICAgICAgICAgICAgICAgICAgICBpc0Jyb3dzZXI6IGlzQnJvd3NlciwKCiAgICAgICAgICAgICAgICAgICAgLyoqCiAgICAgICAgICAgICAgICAgICAgICogQ29udmVydHMgYSBtb2R1bGUgbmFtZSArIC5leHRlbnNpb24gaW50byBhbiBVUkwgcGF0aC4KICAgICAgICAgICAgICAgICAgICAgKiAqUmVxdWlyZXMqIHRoZSB1c2Ugb2YgYSBtb2R1bGUgbmFtZS4gSXQgZG9lcyBub3Qgc3VwcG9ydCB1c2luZwogICAgICAgICAgICAgICAgICAgICAqIHBsYWluIFVSTHMgbGlrZSBuYW1lVG9VcmwuCiAgICAgICAgICAgICAgICAgICAgICovCiAgICAgICAgICAgICAgICAgICAgdG9Vcmw6IGZ1bmN0aW9uIChtb2R1bGVOYW1lUGx1c0V4dCkgewogICAgICAgICAgICAgICAgICAgICAgICB2YXIgZXh0LAogICAgICAgICAgICAgICAgICAgICAgICAgICAgaW5kZXggPSBtb2R1bGVOYW1lUGx1c0V4dC5sYXN0SW5kZXhPZignLicpLAogICAgICAgICAgICAgICAgICAgICAgICAgICAgc2VnbWVudCA9IG1vZHVsZU5hbWVQbHVzRXh0LnNwbGl0KCcvJylbMF0sCiAgICAgICAgICAgICAgICAgICAgICAgICAgICBpc1JlbGF0aXZlID0gc2VnbWVudCA9PT0gJy4nIHx8IHNlZ21lbnQgPT09ICcuLic7CgogICAgICAgICAgICAgICAgICAgICAgICAvL0hhdmUgYSBmaWxlIGV4dGVuc2lvbiBhbGlhcywgYW5kIGl0IGlzIG5vdCB0aGUKICAgICAgICAgICAgICAgICAgICAgICAgLy9kb3RzIGZyb20gYSByZWxhdGl2ZSBwYXRoLgogICAgICAgICAgICAgICAgICAgICAgICBpZiAoaW5kZXggIT09IC0xICYmICghaXNSZWxhdGl2ZSB8fCBpbmRleCA+IDEpKSB7CiAgICAgICAgICAgICAgICAgICAgICAgICAgICBleHQgPSBtb2R1bGVOYW1lUGx1c0V4dC5zdWJzdHJpbmcoaW5kZXgsIG1vZHVsZU5hbWVQbHVzRXh0Lmxlbmd0aCk7CiAgICAgICAgICAgICAgICAgICAgICAgICAgICBtb2R1bGVOYW1lUGx1c0V4dCA9IG1vZHVsZU5hbWVQbHVzRXh0LnN1YnN0cmluZygwLCBpbmRleCk7CiAgICAgICAgICAgICAgICAgICAgICAgIH0KCiAgICAgICAgICAgICAgICAgICAgICAgIHJldHVybiBjb250ZXh0Lm5hbWVUb1VybChub3JtYWxpemUobW9kdWxlTmFtZVBsdXNFeHQsCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIHJlbE1hcCAmJiByZWxNYXAuaWQsIHRydWUpLCBleHQsICB0cnVlKTsKICAgICAgICAgICAgICAgICAgICB9LAoKICAgICAgICAgICAgICAgICAgICBkZWZpbmVkOiBmdW5jdGlvbiAoaWQpIHsKICAgICAgICAgICAgICAgICAgICAgICAgcmV0dXJuIGhhc1Byb3AoZGVmaW5lZCwgbWFrZU1vZHVsZU1hcChpZCwgcmVsTWFwLCBmYWxzZSwgdHJ1ZSkuaWQpOwogICAgICAgICAgICAgICAgICAgIH0sCgogICAgICAgICAgICAgICAgICAgIHNwZWNpZmllZDogZnVuY3Rpb24gKGlkKSB7CiAgICAgICAgICAgICAgICAgICAgICAgIGlkID0gbWFrZU1vZHVsZU1hcChpZCwgcmVsTWFwLCBmYWxzZSwgdHJ1ZSkuaWQ7CiAgICAgICAgICAgICAgICAgICAgICAgIHJldHVybiBoYXNQcm9wKGRlZmluZWQsIGlkKSB8fCBoYXNQcm9wKHJlZ2lzdHJ5LCBpZCk7CiAgICAgICAgICAgICAgICAgICAgfQogICAgICAgICAgICAgICAgfSk7CgogICAgICAgICAgICAgICAgLy9Pbmx5IGFsbG93IHVuZGVmIG9uIHRvcCBsZXZlbCByZXF1aXJlIGNhbGxzCiAgICAgICAgICAgICAgICBpZiAoIXJlbE1hcCkgewogICAgICAgICAgICAgICAgICAgIGxvY2FsUmVxdWlyZS51bmRlZiA9IGZ1bmN0aW9uIChpZCkgewogICAgICAgICAgICAgICAgICAgICAgICAvL0JpbmQgYW55IHdhaXRpbmcgZGVmaW5lKCkgY2FsbHMgdG8gdGhpcyBjb250ZXh0LAogICAgICAgICAgICAgICAgICAgICAgICAvL2ZpeCBmb3IgIzQwOAogICAgICAgICAgICAgICAgICAgICAgICB0YWtlR2xvYmFsUXVldWUoKTsKCiAgICAgICAgICAgICAgICAgICAgICAgIHZhciBtYXAgPSBtYWtlTW9kdWxlTWFwKGlkLCByZWxNYXAsIHRydWUpLAogICAgICAgICAgICAgICAgICAgICAgICAgICAgbW9kID0gZ2V0T3duKHJlZ2lzdHJ5LCBpZCk7CgogICAgICAgICAgICAgICAgICAgICAgICBtb2QudW5kZWZlZCA9IHRydWU7CiAgICAgICAgICAgICAgICAgICAgICAgIHJlbW92ZVNjcmlwdChpZCk7CgogICAgICAgICAgICAgICAgICAgICAgICBkZWxldGUgZGVmaW5lZFtpZF07CiAgICAgICAgICAgICAgICAgICAgICAgIGRlbGV0ZSB1cmxGZXRjaGVkW21hcC51cmxdOwogICAgICAgICAgICAgICAgICAgICAgICBkZWxldGUgdW5kZWZFdmVudHNbaWRdOwoKICAgICAgICAgICAgICAgICAgICAgICAgLy9DbGVhbiBxdWV1ZWQgZGVmaW5lcyB0b28uIEdvIGJhY2t3YXJkcwogICAgICAgICAgICAgICAgICAgICAgICAvL2luIGFycmF5IHNvIHRoYXQgdGhlIHNwbGljZXMgZG8gbm90CiAgICAgICAgICAgICAgICAgICAgICAgIC8vbWVzcyB1cCB0aGUgaXRlcmF0aW9uLgogICAgICAgICAgICAgICAgICAgICAgICBlYWNoUmV2ZXJzZShkZWZRdWV1ZSwgZnVuY3Rpb24oYXJncywgaSkgewogICAgICAgICAgICAgICAgICAgICAgICAgICAgaWYgKGFyZ3NbMF0gPT09IGlkKSB7CiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgZGVmUXVldWUuc3BsaWNlKGksIDEpOwogICAgICAgICAgICAgICAgICAgICAgICAgICAgfQogICAgICAgICAgICAgICAgICAgICAgICB9KTsKICAgICAgICAgICAgICAgICAgICAgICAgZGVsZXRlIGNvbnRleHQuZGVmUXVldWVNYXBbaWRdOwoKICAgICAgICAgICAgICAgICAgICAgICAgaWYgKG1vZCkgewogICAgICAgICAgICAgICAgICAgICAgICAgICAgLy9Ib2xkIG9uIHRvIGxpc3RlbmVycyBpbiBjYXNlIHRoZQogICAgICAgICAgICAgICAgICAgICAgICAgICAgLy9tb2R1bGUgd2lsbCBiZSBhdHRlbXB0ZWQgdG8gYmUgcmVsb2FkZWQKICAgICAgICAgICAgICAgICAgICAgICAgICAgIC8vdXNpbmcgYSBkaWZmZXJlbnQgY29uZmlnLgogICAgICAgICAgICAgICAgICAgICAgICAgICAgaWYgKG1vZC5ldmVudHMuZGVmaW5lZCkgewogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIHVuZGVmRXZlbnRzW2lkXSA9IG1vZC5ldmVudHM7CiAgICAgICAgICAgICAgICAgICAgICAgICAgICB9CgogICAgICAgICAgICAgICAgICAgICAgICAgICAgY2xlYW5SZWdpc3RyeShpZCk7CiAgICAgICAgICAgICAgICAgICAgICAgIH0KICAgICAgICAgICAgICAgICAgICB9OwogICAgICAgICAgICAgICAgfQoKICAgICAgICAgICAgICAgIHJldHVybiBsb2NhbFJlcXVpcmU7CiAgICAgICAgICAgIH0sCgogICAgICAgICAgICAvKioKICAgICAgICAgICAgICogQ2FsbGVkIHRvIGVuYWJsZSBhIG1vZHVsZSBpZiBpdCBpcyBzdGlsbCBpbiB0aGUgcmVnaXN0cnkKICAgICAgICAgICAgICogYXdhaXRpbmcgZW5hYmxlbWVudC4gQSBzZWNvbmQgYXJnLCBwYXJlbnQsIHRoZSBwYXJlbnQgbW9kdWxlLAogICAgICAgICAgICAgKiBpcyBwYXNzZWQgaW4gZm9yIGNvbnRleHQsIHdoZW4gdGhpcyBtZXRob2QgaXMgb3ZlcnJpZGRlbiBieQogICAgICAgICAgICAgKiB0aGUgb3B0aW1pemVyLiBOb3Qgc2hvd24gaGVyZSB0byBrZWVwIGNvZGUgY29tcGFjdC4KICAgICAgICAgICAgICovCiAgICAgICAgICAgIGVuYWJsZTogZnVuY3Rpb24gKGRlcE1hcCkgewogICAgICAgICAgICAgICAgdmFyIG1vZCA9IGdldE93bihyZWdpc3RyeSwgZGVwTWFwLmlkKTsKICAgICAgICAgICAgICAgIGlmIChtb2QpIHsKICAgICAgICAgICAgICAgICAgICBnZXRNb2R1bGUoZGVwTWFwKS5lbmFibGUoKTsKICAgICAgICAgICAgICAgIH0KICAgICAgICAgICAgfSwKCiAgICAgICAgICAgIC8qKgogICAgICAgICAgICAgKiBJbnRlcm5hbCBtZXRob2QgdXNlZCBieSBlbnZpcm9ubWVudCBhZGFwdGVycyB0byBjb21wbGV0ZSBhIGxvYWQgZXZlbnQuCiAgICAgICAgICAgICAqIEEgbG9hZCBldmVudCBjb3VsZCBiZSBhIHNjcmlwdCBsb2FkIG9yIGp1c3QgYSBsb2FkIHBhc3MgZnJvbSBhIHN5bmNocm9ub3VzCiAgICAgICAgICAgICAqIGxvYWQgY2FsbC4KICAgICAgICAgICAgICogQHBhcmFtIHtTdHJpbmd9IG1vZHVsZU5hbWUgdGhlIG5hbWUgb2YgdGhlIG1vZHVsZSB0byBwb3RlbnRpYWxseSBjb21wbGV0ZS4KICAgICAgICAgICAgICovCiAgICAgICAgICAgIGNvbXBsZXRlTG9hZDogZnVuY3Rpb24gKG1vZHVsZU5hbWUpIHsKICAgICAgICAgICAgICAgIHZhciBmb3VuZCwgYXJncywgbW9kLAogICAgICAgICAgICAgICAgICAgIHNoaW0gPSBnZXRPd24oY29uZmlnLnNoaW0sIG1vZHVsZU5hbWUpIHx8IHt9LAogICAgICAgICAgICAgICAgICAgIHNoRXhwb3J0cyA9IHNoaW0uZXhwb3J0czsKCiAgICAgICAgICAgICAgICB0YWtlR2xvYmFsUXVldWUoKTsKCiAgICAgICAgICAgICAgICB3aGlsZSAoZGVmUXVldWUubGVuZ3RoKSB7CiAgICAgICAgICAgICAgICAgICAgYXJncyA9IGRlZlF1ZXVlLnNoaWZ0KCk7CiAgICAgICAgICAgICAgICAgICAgaWYgKGFyZ3NbMF0gPT09IG51bGwpIHsKICAgICAgICAgICAgICAgICAgICAgICAgYXJnc1swXSA9IG1vZHVsZU5hbWU7CiAgICAgICAgICAgICAgICAgICAgICAgIC8vSWYgYWxyZWFkeSBmb3VuZCBhbiBhbm9ueW1vdXMgbW9kdWxlIGFuZCBib3VuZCBpdAogICAgICAgICAgICAgICAgICAgICAgICAvL3RvIHRoaXMgbmFtZSwgdGhlbiB0aGlzIGlzIHNvbWUgb3RoZXIgYW5vbiBtb2R1bGUKICAgICAgICAgICAgICAgICAgICAgICAgLy93YWl0aW5nIGZvciBpdHMgY29tcGxldGVMb2FkIHRvIGZpcmUuCiAgICAgICAgICAgICAgICAgICAgICAgIGlmIChmb3VuZCkgewogICAgICAgICAgICAgICAgICAgICAgICAgICAgYnJlYWs7CiAgICAgICAgICAgICAgICAgICAgICAgIH0KICAgICAgICAgICAgICAgICAgICAgICAgZm91bmQgPSB0cnVlOwogICAgICAgICAgICAgICAgICAgIH0gZWxzZSBpZiAoYXJnc1swXSA9PT0gbW9kdWxlTmFtZSkgewogICAgICAgICAgICAgICAgICAgICAgICAvL0ZvdW5kIG1hdGNoaW5nIGRlZmluZSBjYWxsIGZvciB0aGlzIHNjcmlwdCEKICAgICAgICAgICAgICAgICAgICAgICAgZm91bmQgPSB0cnVlOwogICAgICAgICAgICAgICAgICAgIH0KCiAgICAgICAgICAgICAgICAgICAgY2FsbEdldE1vZHVsZShhcmdzKTsKICAgICAgICAgICAgICAgIH0KICAgICAgICAgICAgICAgIGNvbnRleHQuZGVmUXVldWVNYXAgPSB7fTsKCiAgICAgICAgICAgICAgICAvL0RvIHRoaXMgYWZ0ZXIgdGhlIGN5Y2xlIG9mIGNhbGxHZXRNb2R1bGUgaW4gY2FzZSB0aGUgcmVzdWx0CiAgICAgICAgICAgICAgICAvL29mIHRob3NlIGNhbGxzL2luaXQgY2FsbHMgY2hhbmdlcyB0aGUgcmVnaXN0cnkuCiAgICAgICAgICAgICAgICBtb2QgPSBnZXRPd24ocmVnaXN0cnksIG1vZHVsZU5hbWUpOwoKICAgICAgICAgICAgICAgIGlmICghZm91bmQgJiYgIWhhc1Byb3AoZGVmaW5lZCwgbW9kdWxlTmFtZSkgJiYgbW9kICYmICFtb2QuaW5pdGVkKSB7CiAgICAgICAgICAgICAgICAgICAgaWYgKGNvbmZpZy5lbmZvcmNlRGVmaW5lICYmICghc2hFeHBvcnRzIHx8ICFnZXRHbG9iYWwoc2hFeHBvcnRzKSkpIHsKICAgICAgICAgICAgICAgICAgICAgICAgaWYgKGhhc1BhdGhGYWxsYmFjayhtb2R1bGVOYW1lKSkgewogICAgICAgICAgICAgICAgICAgICAgICAgICAgcmV0dXJuOwogICAgICAgICAgICAgICAgICAgICAgICB9IGVsc2UgewogICAgICAgICAgICAgICAgICAgICAgICAgICAgcmV0dXJuIG9uRXJyb3IobWFrZUVycm9yKCdub2RlZmluZScsCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICdObyBkZWZpbmUgY2FsbCBmb3IgJyArIG1vZHVsZU5hbWUsCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIG51bGwsCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIFttb2R1bGVOYW1lXSkpOwogICAgICAgICAgICAgICAgICAgICAgICB9CiAgICAgICAgICAgICAgICAgICAgfSBlbHNlIHsKICAgICAgICAgICAgICAgICAgICAgICAgLy9BIHNjcmlwdCB0aGF0IGRvZXMgbm90IGNhbGwgZGVmaW5lKCksIHNvIGp1c3Qgc2ltdWxhdGUKICAgICAgICAgICAgICAgICAgICAgICAgLy90aGUgY2FsbCBmb3IgaXQuCiAgICAgICAgICAgICAgICAgICAgICAgIGNhbGxHZXRNb2R1bGUoW21vZHVsZU5hbWUsIChzaGltLmRlcHMgfHwgW10pLCBzaGltLmV4cG9ydHNGbl0pOwogICAgICAgICAgICAgICAgICAgIH0KICAgICAgICAgICAgICAgIH0KCiAgICAgICAgICAgICAgICBjaGVja0xvYWRlZCgpOwogICAgICAgICAgICB9LAoKICAgICAgICAgICAgLyoqCiAgICAgICAgICAgICAqIENvbnZlcnRzIGEgbW9kdWxlIG5hbWUgdG8gYSBmaWxlIHBhdGguIFN1cHBvcnRzIGNhc2VzIHdoZXJlCiAgICAgICAgICAgICAqIG1vZHVsZU5hbWUgbWF5IGFjdHVhbGx5IGJlIGp1c3QgYW4gVVJMLgogICAgICAgICAgICAgKiBOb3RlIHRoYXQgaXQgKipkb2VzIG5vdCoqIGNhbGwgbm9ybWFsaXplIG9uIHRoZSBtb2R1bGVOYW1lLAogICAgICAgICAgICAgKiBpdCBpcyBhc3N1bWVkIHRvIGhhdmUgYWxyZWFkeSBiZWVuIG5vcm1hbGl6ZWQuIFRoaXMgaXMgYW4KICAgICAgICAgICAgICogaW50ZXJuYWwgQVBJLCBub3QgYSBwdWJsaWMgb25lLiBVc2UgdG9VcmwgZm9yIHRoZSBwdWJsaWMgQVBJLgogICAgICAgICAgICAgKi8KICAgICAgICAgICAgbmFtZVRvVXJsOiBmdW5jdGlvbiAobW9kdWxlTmFtZSwgZXh0LCBza2lwRXh0KSB7CiAgICAgICAgICAgICAgICB2YXIgcGF0aHMsIHN5bXMsIGksIHBhcmVudE1vZHVsZSwgdXJsLAogICAgICAgICAgICAgICAgICAgIHBhcmVudFBhdGgsIGJ1bmRsZUlkLAogICAgICAgICAgICAgICAgICAgIHBrZ01haW4gPSBnZXRPd24oY29uZmlnLnBrZ3MsIG1vZHVsZU5hbWUpOwoKICAgICAgICAgICAgICAgIGlmIChwa2dNYWluKSB7CiAgICAgICAgICAgICAgICAgICAgbW9kdWxlTmFtZSA9IHBrZ01haW47CiAgICAgICAgICAgICAgICB9CgogICAgICAgICAgICAgICAgYnVuZGxlSWQgPSBnZXRPd24oYnVuZGxlc01hcCwgbW9kdWxlTmFtZSk7CgogICAgICAgICAgICAgICAgaWYgKGJ1bmRsZUlkKSB7CiAgICAgICAgICAgICAgICAgICAgcmV0dXJuIGNvbnRleHQubmFtZVRvVXJsKGJ1bmRsZUlkLCBleHQsIHNraXBFeHQpOwogICAgICAgICAgICAgICAgfQoKICAgICAgICAgICAgICAgIC8vSWYgYSBjb2xvbiBpcyBpbiB0aGUgVVJMLCBpdCBpbmRpY2F0ZXMgYSBwcm90b2NvbCBpcyB1c2VkIGFuZCBpdCBpcyBqdXN0CiAgICAgICAgICAgICAgICAvL2FuIFVSTCB0byBhIGZpbGUsIG9yIGlmIGl0IHN0YXJ0cyB3aXRoIGEgc2xhc2gsIGNvbnRhaW5zIGEgcXVlcnkgYXJnIChpLmUuID8pCiAgICAgICAgICAgICAgICAvL29yIGVuZHMgd2l0aCAuanMsIHRoZW4gYXNzdW1lIHRoZSB1c2VyIG1lYW50IHRvIHVzZSBhbiB1cmwgYW5kIG5vdCBhIG1vZHVsZSBpZC4KICAgICAgICAgICAgICAgIC8vVGhlIHNsYXNoIGlzIGltcG9ydGFudCBmb3IgcHJvdG9jb2wtbGVzcyBVUkxzIGFzIHdlbGwgYXMgZnVsbCBwYXRocy4KICAgICAgICAgICAgICAgIGlmIChyZXEuanNFeHRSZWdFeHAudGVzdChtb2R1bGVOYW1lKSkgewogICAgICAgICAgICAgICAgICAgIC8vSnVzdCBhIHBsYWluIHBhdGgsIG5vdCBtb2R1bGUgbmFtZSBsb29rdXAsIHNvIGp1c3QgcmV0dXJuIGl0LgogICAgICAgICAgICAgICAgICAgIC8vQWRkIGV4dGVuc2lvbiBpZiBpdCBpcyBpbmNsdWRlZC4gVGhpcyBpcyBhIGJpdCB3b25reSwgb25seSBub24tLmpzIHRoaW5ncyBwYXNzCiAgICAgICAgICAgICAgICAgICAgLy9hbiBleHRlbnNpb24sIHRoaXMgbWV0aG9kIHByb2JhYmx5IG5lZWRzIHRvIGJlIHJld29ya2VkLgogICAgICAgICAgICAgICAgICAgIHVybCA9IG1vZHVsZU5hbWUgKyAoZXh0IHx8ICcnKTsKICAgICAgICAgICAgICAgIH0gZWxzZSB7CiAgICAgICAgICAgICAgICAgICAgLy9BIG1vZHVsZSB0aGF0IG5lZWRzIHRvIGJlIGNvbnZlcnRlZCB0byBhIHBhdGguCiAgICAgICAgICAgICAgICAgICAgcGF0aHMgPSBjb25maWcucGF0aHM7CgogICAgICAgICAgICAgICAgICAgIHN5bXMgPSBtb2R1bGVOYW1lLnNwbGl0KCcvJyk7CiAgICAgICAgICAgICAgICAgICAgLy9Gb3IgZWFjaCBtb2R1bGUgbmFtZSBzZWdtZW50LCBzZWUgaWYgdGhlcmUgaXMgYSBwYXRoCiAgICAgICAgICAgICAgICAgICAgLy9yZWdpc3RlcmVkIGZvciBpdC4gU3RhcnQgd2l0aCBtb3N0IHNwZWNpZmljIG5hbWUKICAgICAgICAgICAgICAgICAgICAvL2FuZCB3b3JrIHVwIGZyb20gaXQuCiAgICAgICAgICAgICAgICAgICAgZm9yIChpID0gc3ltcy5sZW5ndGg7IGkgPiAwOyBpIC09IDEpIHsKICAgICAgICAgICAgICAgICAgICAgICAgcGFyZW50TW9kdWxlID0gc3ltcy5zbGljZSgwLCBpKS5qb2luKCcvJyk7CgogICAgICAgICAgICAgICAgICAgICAgICBwYXJlbnRQYXRoID0gZ2V0T3duKHBhdGhzLCBwYXJlbnRNb2R1bGUpOwogICAgICAgICAgICAgICAgICAgICAgICBpZiAocGFyZW50UGF0aCkgewogICAgICAgICAgICAgICAgICAgICAgICAgICAgLy9JZiBhbiBhcnJheSwgaXQgbWVhbnMgdGhlcmUgYXJlIGEgZmV3IGNob2ljZXMsCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAvL0Nob29zZSB0aGUgb25lIHRoYXQgaXMgZGVzaXJlZAogICAgICAgICAgICAgICAgICAgICAgICAgICAgaWYgKGlzQXJyYXkocGFyZW50UGF0aCkpIHsKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBwYXJlbnRQYXRoID0gcGFyZW50UGF0aFswXTsKICAgICAgICAgICAgICAgICAgICAgICAgICAgIH0KICAgICAgICAgICAgICAgICAgICAgICAgICAgIHN5bXMuc3BsaWNlKDAsIGksIHBhcmVudFBhdGgpOwogICAgICAgICAgICAgICAgICAgICAgICAgICAgYnJlYWs7CiAgICAgICAgICAgICAgICAgICAgICAgIH0KICAgICAgICAgICAgICAgICAgICB9CgogICAgICAgICAgICAgICAgICAgIC8vSm9pbiB0aGUgcGF0aCBwYXJ0cyB0b2dldGhlciwgdGhlbiBmaWd1cmUgb3V0IGlmIGJhc2VVcmwgaXMgbmVlZGVkLgogICAgICAgICAgICAgICAgICAgIHVybCA9IHN5bXMuam9pbignLycpOwogICAgICAgICAgICAgICAgICAgIHVybCArPSAoZXh0IHx8ICgvXmRhdGFcOnxcPy8udGVzdCh1cmwpIHx8IHNraXBFeHQgPyAnJyA6ICcuanMnKSk7CiAgICAgICAgICAgICAgICAgICAgdXJsID0gKHVybC5jaGFyQXQoMCkgPT09ICcvJyB8fCB1cmwubWF0Y2goL15bXHdcK1wuXC1dKzovKSA/ICcnIDogY29uZmlnLmJhc2VVcmwpICsgdXJsOwogICAgICAgICAgICAgICAgfQoKICAgICAgICAgICAgICAgIHJldHVybiBjb25maWcudXJsQXJncyA/IHVybCArCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAoKHVybC5pbmRleE9mKCc/JykgPT09IC0xID8gJz8nIDogJyYnKSArCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgY29uZmlnLnVybEFyZ3MpIDogdXJsOwogICAgICAgICAgICB9LAoKICAgICAgICAgICAgLy9EZWxlZ2F0ZXMgdG8gcmVxLmxvYWQuIEJyb2tlbiBvdXQgYXMgYSBzZXBhcmF0ZSBmdW5jdGlvbiB0bwogICAgICAgICAgICAvL2FsbG93IG92ZXJyaWRpbmcgaW4gdGhlIG9wdGltaXplci4KICAgICAgICAgICAgbG9hZDogZnVuY3Rpb24gKGlkLCB1cmwpIHsKICAgICAgICAgICAgICAgIHJlcS5sb2FkKGNvbnRleHQsIGlkLCB1cmwpOwogICAgICAgICAgICB9LAoKICAgICAgICAgICAgLyoqCiAgICAgICAgICAgICAqIEV4ZWN1dGVzIGEgbW9kdWxlIGNhbGxiYWNrIGZ1bmN0aW9uLiBCcm9rZW4gb3V0IGFzIGEgc2VwYXJhdGUgZnVuY3Rpb24KICAgICAgICAgICAgICogc29sZWx5IHRvIGFsbG93IHRoZSBidWlsZCBzeXN0ZW0gdG8gc2VxdWVuY2UgdGhlIGZpbGVzIGluIHRoZSBidWlsdAogICAgICAgICAgICAgKiBsYXllciBpbiB0aGUgcmlnaHQgc2VxdWVuY2UuCiAgICAgICAgICAgICAqCiAgICAgICAgICAgICAqIEBwcml2YXRlCiAgICAgICAgICAgICAqLwogICAgICAgICAgICBleGVjQ2I6IGZ1bmN0aW9uIChuYW1lLCBjYWxsYmFjaywgYXJncywgZXhwb3J0cykgewogICAgICAgICAgICAgICAgcmV0dXJuIGNhbGxiYWNrLmFwcGx5KGV4cG9ydHMsIGFyZ3MpOwogICAgICAgICAgICB9LAoKICAgICAgICAgICAgLyoqCiAgICAgICAgICAgICAqIGNhbGxiYWNrIGZvciBzY3JpcHQgbG9hZHMsIHVzZWQgdG8gY2hlY2sgc3RhdHVzIG9mIGxvYWRpbmcuCiAgICAgICAgICAgICAqCiAgICAgICAgICAgICAqIEBwYXJhbSB7RXZlbnR9IGV2dCB0aGUgZXZlbnQgZnJvbSB0aGUgYnJvd3NlciBmb3IgdGhlIHNjcmlwdAogICAgICAgICAgICAgKiB0aGF0IHdhcyBsb2FkZWQuCiAgICAgICAgICAgICAqLwogICAgICAgICAgICBvblNjcmlwdExvYWQ6IGZ1bmN0aW9uIChldnQpIHsKICAgICAgICAgICAgICAgIC8vVXNpbmcgY3VycmVudFRhcmdldCBpbnN0ZWFkIG9mIHRhcmdldCBmb3IgRmlyZWZveCAyLjAncyBzYWtlLiBOb3QKICAgICAgICAgICAgICAgIC8vYWxsIG9sZCBicm93c2VycyB3aWxsIGJlIHN1cHBvcnRlZCwgYnV0IHRoaXMgb25lIHdhcyBlYXN5IGVub3VnaAogICAgICAgICAgICAgICAgLy90byBzdXBwb3J0IGFuZCBzdGlsbCBtYWtlcyBzZW5zZS4KICAgICAgICAgICAgICAgIGlmIChldnQudHlwZSA9PT0gJ2xvYWQnIHx8CiAgICAgICAgICAgICAgICAgICAgICAgIChyZWFkeVJlZ0V4cC50ZXN0KChldnQuY3VycmVudFRhcmdldCB8fCBldnQuc3JjRWxlbWVudCkucmVhZHlTdGF0ZSkpKSB7CiAgICAgICAgICAgICAgICAgICAgLy9SZXNldCBpbnRlcmFjdGl2ZSBzY3JpcHQgc28gYSBzY3JpcHQgbm9kZSBpcyBub3QgaGVsZCBvbnRvIGZvcgogICAgICAgICAgICAgICAgICAgIC8vdG8gbG9uZy4KICAgICAgICAgICAgICAgICAgICBpbnRlcmFjdGl2ZVNjcmlwdCA9IG51bGw7CgogICAgICAgICAgICAgICAgICAgIC8vUHVsbCBvdXQgdGhlIG5hbWUgb2YgdGhlIG1vZHVsZSBhbmQgdGhlIGNvbnRleHQuCiAgICAgICAgICAgICAgICAgICAgdmFyIGRhdGEgPSBnZXRTY3JpcHREYXRhKGV2dCk7CiAgICAgICAgICAgICAgICAgICAgY29udGV4dC5jb21wbGV0ZUxvYWQoZGF0YS5pZCk7CiAgICAgICAgICAgICAgICB9CiAgICAgICAgICAgIH0sCgogICAgICAgICAgICAvKioKICAgICAgICAgICAgICogQ2FsbGJhY2sgZm9yIHNjcmlwdCBlcnJvcnMuCiAgICAgICAgICAgICAqLwogICAgICAgICAgICBvblNjcmlwdEVycm9yOiBmdW5jdGlvbiAoZXZ0KSB7CiAgICAgICAgICAgICAgICB2YXIgZGF0YSA9IGdldFNjcmlwdERhdGEoZXZ0KTsKICAgICAgICAgICAgICAgIGlmICghaGFzUGF0aEZhbGxiYWNrKGRhdGEuaWQpKSB7CiAgICAgICAgICAgICAgICAgICAgdmFyIHBhcmVudHMgPSBbXTsKICAgICAgICAgICAgICAgICAgICBlYWNoUHJvcChyZWdpc3RyeSwgZnVuY3Rpb24odmFsdWUsIGtleSkgewogICAgICAgICAgICAgICAgICAgICAgICBpZiAoa2V5LmluZGV4T2YoJ19AcicpICE9PSAwKSB7CiAgICAgICAgICAgICAgICAgICAgICAgICAgICBlYWNoKHZhbHVlLmRlcE1hcHMsIGZ1bmN0aW9uKGRlcE1hcCkgewogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIGlmIChkZXBNYXAuaWQgPT09IGRhdGEuaWQpIHsKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgcGFyZW50cy5wdXNoKGtleSk7CiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgfQogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIHJldHVybiB0cnVlOwogICAgICAgICAgICAgICAgICAgICAgICAgICAgfSk7CiAgICAgICAgICAgICAgICAgICAgICAgIH0KICAgICAgICAgICAgICAgICAgICB9KTsKICAgICAgICAgICAgICAgICAgICByZXR1cm4gb25FcnJvcihtYWtlRXJyb3IoJ3NjcmlwdGVycm9yJywgJ1NjcmlwdCBlcnJvciBmb3IgIicgKyBkYXRhLmlkICsKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgKHBhcmVudHMubGVuZ3RoID8KICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgJyIsIG5lZWRlZCBieTogJyArIHBhcmVudHMuam9pbignLCAnKSA6CiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICciJyksIGV2dCwgW2RhdGEuaWRdKSk7CiAgICAgICAgICAgICAgICB9CiAgICAgICAgICAgIH0KICAgICAgICB9OwoKICAgICAgICBjb250ZXh0LnJlcXVpcmUgPSBjb250ZXh0Lm1ha2VSZXF1aXJlKCk7CiAgICAgICAgcmV0dXJuIGNvbnRleHQ7CiAgICB9CgogICAgLyoqCiAgICAgKiBNYWluIGVudHJ5IHBvaW50LgogICAgICoKICAgICAqIElmIHRoZSBvbmx5IGFyZ3VtZW50IHRvIHJlcXVpcmUgaXMgYSBzdHJpbmcsIHRoZW4gdGhlIG1vZHVsZSB0aGF0CiAgICAgKiBpcyByZXByZXNlbnRlZCBieSB0aGF0IHN0cmluZyBpcyBmZXRjaGVkIGZvciB0aGUgYXBwcm9wcmlhdGUgY29udGV4dC4KICAgICAqCiAgICAgKiBJZiB0aGUgZmlyc3QgYXJndW1lbnQgaXMgYW4gYXJyYXksIHRoZW4gaXQgd2lsbCBiZSB0cmVhdGVkIGFzIGFuIGFycmF5CiAgICAgKiBvZiBkZXBlbmRlbmN5IHN0cmluZyBuYW1lcyB0byBmZXRjaC4gQW4gb3B0aW9uYWwgZnVuY3Rpb24gY2FsbGJhY2sgY2FuCiAgICAgKiBiZSBzcGVjaWZpZWQgdG8gZXhlY3V0ZSB3aGVuIGFsbCBvZiB0aG9zZSBkZXBlbmRlbmNpZXMgYXJlIGF2YWlsYWJsZS4KICAgICAqCiAgICAgKiBNYWtlIGEgbG9jYWwgcmVxIHZhcmlhYmxlIHRvIGhlbHAgQ2FqYSBjb21wbGlhbmNlIChpdCBhc3N1bWVzIHRoaW5ncwogICAgICogb24gYSByZXF1aXJlIHRoYXQgYXJlIG5vdCBzdGFuZGFyZGl6ZWQpLCBhbmQgdG8gZ2l2ZSBhIHNob3J0CiAgICAgKiBuYW1lIGZvciBtaW5pZmljYXRpb24vbG9jYWwgc2NvcGUgdXNlLgogICAgICovCiAgICByZXEgPSByZXF1aXJlanMgPSBmdW5jdGlvbiAoZGVwcywgY2FsbGJhY2ssIGVycmJhY2ssIG9wdGlvbmFsKSB7CgogICAgICAgIC8vRmluZCB0aGUgcmlnaHQgY29udGV4dCwgdXNlIGRlZmF1bHQKICAgICAgICB2YXIgY29udGV4dCwgY29uZmlnLAogICAgICAgICAgICBjb250ZXh0TmFtZSA9IGRlZkNvbnRleHROYW1lOwoKICAgICAgICAvLyBEZXRlcm1pbmUgaWYgaGF2ZSBjb25maWcgb2JqZWN0IGluIHRoZSBjYWxsLgogICAgICAgIGlmICghaXNBcnJheShkZXBzKSAmJiB0eXBlb2YgZGVwcyAhPT0gJ3N0cmluZycpIHsKICAgICAgICAgICAgLy8gZGVwcyBpcyBhIGNvbmZpZyBvYmplY3QKICAgICAgICAgICAgY29uZmlnID0gZGVwczsKICAgICAgICAgICAgaWYgKGlzQXJyYXkoY2FsbGJhY2spKSB7CiAgICAgICAgICAgICAgICAvLyBBZGp1c3QgYXJncyBpZiB0aGVyZSBhcmUgZGVwZW5kZW5jaWVzCiAgICAgICAgICAgICAgICBkZXBzID0gY2FsbGJhY2s7CiAgICAgICAgICAgICAgICBjYWxsYmFjayA9IGVycmJhY2s7CiAgICAgICAgICAgICAgICBlcnJiYWNrID0gb3B0aW9uYWw7CiAgICAgICAgICAgIH0gZWxzZSB7CiAgICAgICAgICAgICAgICBkZXBzID0gW107CiAgICAgICAgICAgIH0KICAgICAgICB9CgogICAgICAgIGlmIChjb25maWcgJiYgY29uZmlnLmNvbnRleHQpIHsKICAgICAgICAgICAgY29udGV4dE5hbWUgPSBjb25maWcuY29udGV4dDsKICAgICAgICB9CgogICAgICAgIGNvbnRleHQgPSBnZXRPd24oY29udGV4dHMsIGNvbnRleHROYW1lKTsKICAgICAgICBpZiAoIWNvbnRleHQpIHsKICAgICAgICAgICAgY29udGV4dCA9IGNvbnRleHRzW2NvbnRleHROYW1lXSA9IHJlcS5zLm5ld0NvbnRleHQoY29udGV4dE5hbWUpOwogICAgICAgIH0KCiAgICAgICAgaWYgKGNvbmZpZykgewogICAgICAgICAgICBjb250ZXh0LmNvbmZpZ3VyZShjb25maWcpOwogICAgICAgIH0KCiAgICAgICAgcmV0dXJuIGNvbnRleHQucmVxdWlyZShkZXBzLCBjYWxsYmFjaywgZXJyYmFjayk7CiAgICB9OwoKICAgIC8qKgogICAgICogU3VwcG9ydCByZXF1aXJlLmNvbmZpZygpIHRvIG1ha2UgaXQgZWFzaWVyIHRvIGNvb3BlcmF0ZSB3aXRoIG90aGVyCiAgICAgKiBBTUQgbG9hZGVycyBvbiBnbG9iYWxseSBhZ3JlZWQgbmFtZXMuCiAgICAgKi8KICAgIHJlcS5jb25maWcgPSBmdW5jdGlvbiAoY29uZmlnKSB7CiAgICAgICAgcmV0dXJuIHJlcShjb25maWcpOwogICAgfTsKCiAgICAvKioKICAgICAqIEV4ZWN1dGUgc29tZXRoaW5nIGFmdGVyIHRoZSBjdXJyZW50IHRpY2sKICAgICAqIG9mIHRoZSBldmVudCBsb29wLiBPdmVycmlkZSBmb3Igb3RoZXIgZW52cwogICAgICogdGhhdCBoYXZlIGEgYmV0dGVyIHNvbHV0aW9uIHRoYW4gc2V0VGltZW91dC4KICAgICAqIEBwYXJhbSAge0Z1bmN0aW9ufSBmbiBmdW5jdGlvbiB0byBleGVjdXRlIGxhdGVyLgogICAgICovCiAgICByZXEubmV4dFRpY2sgPSB0eXBlb2Ygc2V0VGltZW91dCAhPT0gJ3VuZGVmaW5lZCcgPyBmdW5jdGlvbiAoZm4pIHsKICAgICAgICBzZXRUaW1lb3V0KGZuLCA0KTsKICAgIH0gOiBmdW5jdGlvbiAoZm4pIHsgZm4oKTsgfTsKCiAgICAvKioKICAgICAqIEV4cG9ydCByZXF1aXJlIGFzIGEgZ2xvYmFsLCBidXQgb25seSBpZiBpdCBkb2VzIG5vdCBhbHJlYWR5IGV4aXN0LgogICAgICovCiAgICBpZiAoIXJlcXVpcmUpIHsKICAgICAgICByZXF1aXJlID0gcmVxOwogICAgfQoKICAgIHJlcS52ZXJzaW9uID0gdmVyc2lvbjsKCiAgICAvL1VzZWQgdG8gZmlsdGVyIG91dCBkZXBlbmRlbmNpZXMgdGhhdCBhcmUgYWxyZWFkeSBwYXRocy4KICAgIHJlcS5qc0V4dFJlZ0V4cCA9IC9eXC98OnxcP3xcLmpzJC87CiAgICByZXEuaXNCcm93c2VyID0gaXNCcm93c2VyOwogICAgcyA9IHJlcS5zID0gewogICAgICAgIGNvbnRleHRzOiBjb250ZXh0cywKICAgICAgICBuZXdDb250ZXh0OiBuZXdDb250ZXh0CiAgICB9OwoKICAgIC8vQ3JlYXRlIGRlZmF1bHQgY29udGV4dC4KICAgIHJlcSh7fSk7CgogICAgLy9FeHBvcnRzIHNvbWUgY29udGV4dC1zZW5zaXRpdmUgbWV0aG9kcyBvbiBnbG9iYWwgcmVxdWlyZS4KICAgIGVhY2goWwogICAgICAgICd0b1VybCcsCiAgICAgICAgJ3VuZGVmJywKICAgICAgICAnZGVmaW5lZCcsCiAgICAgICAgJ3NwZWNpZmllZCcKICAgIF0sIGZ1bmN0aW9uIChwcm9wKSB7CiAgICAgICAgLy9SZWZlcmVuY2UgZnJvbSBjb250ZXh0cyBpbnN0ZWFkIG9mIGVhcmx5IGJpbmRpbmcgdG8gZGVmYXVsdCBjb250ZXh0LAogICAgICAgIC8vc28gdGhhdCBkdXJpbmcgYnVpbGRzLCB0aGUgbGF0ZXN0IGluc3RhbmNlIG9mIHRoZSBkZWZhdWx0IGNvbnRleHQKICAgICAgICAvL3dpdGggaXRzIGNvbmZpZyBnZXRzIHVzZWQuCiAgICAgICAgcmVxW3Byb3BdID0gZnVuY3Rpb24gKCkgewogICAgICAgICAgICB2YXIgY3R4ID0gY29udGV4dHNbZGVmQ29udGV4dE5hbWVdOwogICAgICAgICAgICByZXR1cm4gY3R4LnJlcXVpcmVbcHJvcF0uYXBwbHkoY3R4LCBhcmd1bWVudHMpOwogICAgICAgIH07CiAgICB9KTsKCiAgICBpZiAoaXNCcm93c2VyKSB7CiAgICAgICAgaGVhZCA9IHMuaGVhZCA9IGRvY3VtZW50LmdldEVsZW1lbnRzQnlUYWdOYW1lKCdoZWFkJylbMF07CiAgICAgICAgLy9JZiBCQVNFIHRhZyBpcyBpbiBwbGF5LCB1c2luZyBhcHBlbmRDaGlsZCBpcyBhIHByb2JsZW0gZm9yIElFNi4KICAgICAgICAvL1doZW4gdGhhdCBicm93c2VyIGRpZXMsIHRoaXMgY2FuIGJlIHJlbW92ZWQuIERldGFpbHMgaW4gdGhpcyBqUXVlcnkgYnVnOgogICAgICAgIC8vaHR0cDovL2Rldi5qcXVlcnkuY29tL3RpY2tldC8yNzA5CiAgICAgICAgYmFzZUVsZW1lbnQgPSBkb2N1bWVudC5nZXRFbGVtZW50c0J5VGFnTmFtZSgnYmFzZScpWzBdOwogICAgICAgIGlmIChiYXNlRWxlbWVudCkgewogICAgICAgICAgICBoZWFkID0gcy5oZWFkID0gYmFzZUVsZW1lbnQucGFyZW50Tm9kZTsKICAgICAgICB9CiAgICB9CgogICAgLyoqCiAgICAgKiBBbnkgZXJyb3JzIHRoYXQgcmVxdWlyZSBleHBsaWNpdGx5IGdlbmVyYXRlcyB3aWxsIGJlIHBhc3NlZCB0byB0aGlzCiAgICAgKiBmdW5jdGlvbi4gSW50ZXJjZXB0L292ZXJyaWRlIGl0IGlmIHlvdSB3YW50IGN1c3RvbSBlcnJvciBoYW5kbGluZy4KICAgICAqIEBwYXJhbSB7RXJyb3J9IGVyciB0aGUgZXJyb3Igb2JqZWN0LgogICAgICovCiAgICByZXEub25FcnJvciA9IGRlZmF1bHRPbkVycm9yOwoKICAgIC8qKgogICAgICogQ3JlYXRlcyB0aGUgbm9kZSBmb3IgdGhlIGxvYWQgY29tbWFuZC4gT25seSB1c2VkIGluIGJyb3dzZXIgZW52cy4KICAgICAqLwogICAgcmVxLmNyZWF0ZU5vZGUgPSBmdW5jdGlvbiAoY29uZmlnLCBtb2R1bGVOYW1lLCB1cmwpIHsKICAgICAgICB2YXIgbm9kZSA9IGNvbmZpZy54aHRtbCA/CiAgICAgICAgICAgICAgICBkb2N1bWVudC5jcmVhdGVFbGVtZW50TlMoJ2h0dHA6Ly93d3cudzMub3JnLzE5OTkveGh0bWwnLCAnaHRtbDpzY3JpcHQnKSA6CiAgICAgICAgICAgICAgICBkb2N1bWVudC5jcmVhdGVFbGVtZW50KCdzY3JpcHQnKTsKICAgICAgICBub2RlLnR5cGUgPSBjb25maWcuc2NyaXB0VHlwZSB8fCAndGV4dC9qYXZhc2NyaXB0JzsKICAgICAgICBub2RlLmNoYXJzZXQgPSAndXRmLTgnOwogICAgICAgIG5vZGUuYXN5bmMgPSB0cnVlOwogICAgICAgIHJldHVybiBub2RlOwogICAgfTsKCiAgICAvKioKICAgICAqIERvZXMgdGhlIHJlcXVlc3QgdG8gbG9hZCBhIG1vZHVsZSBmb3IgdGhlIGJyb3dzZXIgY2FzZS4KICAgICAqIE1ha2UgdGhpcyBhIHNlcGFyYXRlIGZ1bmN0aW9uIHRvIGFsbG93IG90aGVyIGVudmlyb25tZW50cwogICAgICogdG8gb3ZlcnJpZGUgaXQuCiAgICAgKgogICAgICogQHBhcmFtIHtPYmplY3R9IGNvbnRleHQgdGhlIHJlcXVpcmUgY29udGV4dCB0byBmaW5kIHN0YXRlLgogICAgICogQHBhcmFtIHtTdHJpbmd9IG1vZHVsZU5hbWUgdGhlIG5hbWUgb2YgdGhlIG1vZHVsZS4KICAgICAqIEBwYXJhbSB7T2JqZWN0fSB1cmwgdGhlIFVSTCB0byB0aGUgbW9kdWxlLgogICAgICovCiAgICByZXEubG9hZCA9IGZ1bmN0aW9uIChjb250ZXh0LCBtb2R1bGVOYW1lLCB1cmwpIHsKICAgICAgICB2YXIgY29uZmlnID0gKGNvbnRleHQgJiYgY29udGV4dC5jb25maWcpIHx8IHt9LAogICAgICAgICAgICBub2RlOwogICAgICAgIGlmIChpc0Jyb3dzZXIpIHsKICAgICAgICAgICAgLy9JbiB0aGUgYnJvd3NlciBzbyB1c2UgYSBzY3JpcHQgdGFnCiAgICAgICAgICAgIG5vZGUgPSByZXEuY3JlYXRlTm9kZShjb25maWcsIG1vZHVsZU5hbWUsIHVybCk7CiAgICAgICAgICAgIGlmIChjb25maWcub25Ob2RlQ3JlYXRlZCkgewogICAgICAgICAgICAgICAgY29uZmlnLm9uTm9kZUNyZWF0ZWQobm9kZSwgY29uZmlnLCBtb2R1bGVOYW1lLCB1cmwpOwogICAgICAgICAgICB9CgogICAgICAgICAgICBub2RlLnNldEF0dHJpYnV0ZSgnZGF0YS1yZXF1aXJlY29udGV4dCcsIGNvbnRleHQuY29udGV4dE5hbWUpOwogICAgICAgICAgICBub2RlLnNldEF0dHJpYnV0ZSgnZGF0YS1yZXF1aXJlbW9kdWxlJywgbW9kdWxlTmFtZSk7CgogICAgICAgICAgICAvL1NldCB1cCBsb2FkIGxpc3RlbmVyLiBUZXN0IGF0dGFjaEV2ZW50IGZpcnN0IGJlY2F1c2UgSUU5IGhhcwogICAgICAgICAgICAvL2Egc3VidGxlIGlzc3VlIGluIGl0cyBhZGRFdmVudExpc3RlbmVyIGFuZCBzY3JpcHQgb25sb2FkIGZpcmluZ3MKICAgICAgICAgICAgLy90aGF0IGRvIG5vdCBtYXRjaCB0aGUgYmVoYXZpb3Igb2YgYWxsIG90aGVyIGJyb3dzZXJzIHdpdGgKICAgICAgICAgICAgLy9hZGRFdmVudExpc3RlbmVyIHN1cHBvcnQsIHdoaWNoIGZpcmUgdGhlIG9ubG9hZCBldmVudCBmb3IgYQogICAgICAgICAgICAvL3NjcmlwdCByaWdodCBhZnRlciB0aGUgc2NyaXB0IGV4ZWN1dGlvbi4gU2VlOgogICAgICAgICAgICAvL2h0dHBzOi8vY29ubmVjdC5taWNyb3NvZnQuY29tL0lFL2ZlZWRiYWNrL2RldGFpbHMvNjQ4MDU3L3NjcmlwdC1vbmxvYWQtZXZlbnQtaXMtbm90LWZpcmVkLWltbWVkaWF0ZWx5LWFmdGVyLXNjcmlwdC1leGVjdXRpb24KICAgICAgICAgICAgLy9VTkZPUlRVTkFURUxZIE9wZXJhIGltcGxlbWVudHMgYXR0YWNoRXZlbnQgYnV0IGRvZXMgbm90IGZvbGxvdyB0aGUgc2NyaXB0CiAgICAgICAgICAgIC8vc2NyaXB0IGV4ZWN1dGlvbiBtb2RlLgogICAgICAgICAgICBpZiAobm9kZS5hdHRhY2hFdmVudCAmJgogICAgICAgICAgICAgICAgICAgIC8vQ2hlY2sgaWYgbm9kZS5hdHRhY2hFdmVudCBpcyBhcnRpZmljaWFsbHkgYWRkZWQgYnkgY3VzdG9tIHNjcmlwdCBvcgogICAgICAgICAgICAgICAgICAgIC8vbmF0aXZlbHkgc3VwcG9ydGVkIGJ5IGJyb3dzZXIKICAgICAgICAgICAgICAgICAgICAvL3JlYWQgaHR0cHM6Ly9naXRodWIuY29tL2pyYnVya2UvcmVxdWlyZWpzL2lzc3Vlcy8xODcKICAgICAgICAgICAgICAgICAgICAvL2lmIHdlIGNhbiBOT1QgZmluZCBbbmF0aXZlIGNvZGVdIHRoZW4gaXQgbXVzdCBOT1QgbmF0aXZlbHkgc3VwcG9ydGVkLgogICAgICAgICAgICAgICAgICAgIC8vaW4gSUU4LCBub2RlLmF0dGFjaEV2ZW50IGRvZXMgbm90IGhhdmUgdG9TdHJpbmcoKQogICAgICAgICAgICAgICAgICAgIC8vTm90ZSB0aGUgdGVzdCBmb3IgIltuYXRpdmUgY29kZSIgd2l0aCBubyBjbG9zaW5nIGJyYWNlLCBzZWU6CiAgICAgICAgICAgICAgICAgICAgLy9odHRwczovL2dpdGh1Yi5jb20vanJidXJrZS9yZXF1aXJlanMvaXNzdWVzLzI3MwogICAgICAgICAgICAgICAgICAgICEobm9kZS5hdHRhY2hFdmVudC50b1N0cmluZyAmJiBub2RlLmF0dGFjaEV2ZW50LnRvU3RyaW5nKCkuaW5kZXhPZignW25hdGl2ZSBjb2RlJykgPCAwKSAmJgogICAgICAgICAgICAgICAgICAgICFpc09wZXJhKSB7CiAgICAgICAgICAgICAgICAvL1Byb2JhYmx5IElFLiBJRSAoYXQgbGVhc3QgNi04KSBkbyBub3QgZmlyZQogICAgICAgICAgICAgICAgLy9zY3JpcHQgb25sb2FkIHJpZ2h0IGFmdGVyIGV4ZWN1dGluZyB0aGUgc2NyaXB0LCBzbwogICAgICAgICAgICAgICAgLy93ZSBjYW5ub3QgdGllIHRoZSBhbm9ueW1vdXMgZGVmaW5lIGNhbGwgdG8gYSBuYW1lLgogICAgICAgICAgICAgICAgLy9Ib3dldmVyLCBJRSByZXBvcnRzIHRoZSBzY3JpcHQgYXMgYmVpbmcgaW4gJ2ludGVyYWN0aXZlJwogICAgICAgICAgICAgICAgLy9yZWFkeVN0YXRlIGF0IHRoZSB0aW1lIG9mIHRoZSBkZWZpbmUgY2FsbC4KICAgICAgICAgICAgICAgIHVzZUludGVyYWN0aXZlID0gdHJ1ZTsKCiAgICAgICAgICAgICAgICBub2RlLmF0dGFjaEV2ZW50KCdvbnJlYWR5c3RhdGVjaGFuZ2UnLCBjb250ZXh0Lm9uU2NyaXB0TG9hZCk7CiAgICAgICAgICAgICAgICAvL0l0IHdvdWxkIGJlIGdyZWF0IHRvIGFkZCBhbiBlcnJvciBoYW5kbGVyIGhlcmUgdG8gY2F0Y2gKICAgICAgICAgICAgICAgIC8vNDA0cyBpbiBJRTkrLiBIb3dldmVyLCBvbnJlYWR5c3RhdGVjaGFuZ2Ugd2lsbCBmaXJlIGJlZm9yZQogICAgICAgICAgICAgICAgLy90aGUgZXJyb3IgaGFuZGxlciwgc28gdGhhdCBkb2VzIG5vdCBoZWxwLiBJZiBhZGRFdmVudExpc3RlbmVyCiAgICAgICAgICAgICAgICAvL2lzIHVzZWQsIHRoZW4gSUUgd2lsbCBmaXJlIGVycm9yIGJlZm9yZSBsb2FkLCBidXQgd2UgY2Fubm90CiAgICAgICAgICAgICAgICAvL3VzZSB0aGF0IHBhdGh3YXkgZ2l2ZW4gdGhlIGNvbm5lY3QubWljcm9zb2Z0LmNvbSBpc3N1ZQogICAgICAgICAgICAgICAgLy9tZW50aW9uZWQgYWJvdmUgYWJvdXQgbm90IGRvaW5nIHRoZSAnc2NyaXB0IGV4ZWN1dGUsCiAgICAgICAgICAgICAgICAvL3RoZW4gZmlyZSB0aGUgc2NyaXB0IGxvYWQgZXZlbnQgbGlzdGVuZXIgYmVmb3JlIGV4ZWN1dGUKICAgICAgICAgICAgICAgIC8vbmV4dCBzY3JpcHQnIHRoYXQgb3RoZXIgYnJvd3NlcnMgZG8uCiAgICAgICAgICAgICAgICAvL0Jlc3QgaG9wZTogSUUxMCBmaXhlcyB0aGUgaXNzdWVzLAogICAgICAgICAgICAgICAgLy9hbmQgdGhlbiBkZXN0cm95cyBhbGwgaW5zdGFsbHMgb2YgSUUgNi05LgogICAgICAgICAgICAgICAgLy9ub2RlLmF0dGFjaEV2ZW50KCdvbmVycm9yJywgY29udGV4dC5vblNjcmlwdEVycm9yKTsKICAgICAgICAgICAgfSBlbHNlIHsKICAgICAgICAgICAgICAgIG5vZGUuYWRkRXZlbnRMaXN0ZW5lcignbG9hZCcsIGNvbnRleHQub25TY3JpcHRMb2FkLCBmYWxzZSk7CiAgICAgICAgICAgICAgICBub2RlLmFkZEV2ZW50TGlzdGVuZXIoJ2Vycm9yJywgY29udGV4dC5vblNjcmlwdEVycm9yLCBmYWxzZSk7CiAgICAgICAgICAgIH0KICAgICAgICAgICAgbm9kZS5zcmMgPSB1cmw7CgogICAgICAgICAgICAvL0ZvciBzb21lIGNhY2hlIGNhc2VzIGluIElFIDYtOCwgdGhlIHNjcmlwdCBleGVjdXRlcyBiZWZvcmUgdGhlIGVuZAogICAgICAgICAgICAvL29mIHRoZSBhcHBlbmRDaGlsZCBleGVjdXRpb24sIHNvIHRvIHRpZSBhbiBhbm9ueW1vdXMgZGVmaW5lCiAgICAgICAgICAgIC8vY2FsbCB0byB0aGUgbW9kdWxlIG5hbWUgKHdoaWNoIGlzIHN0b3JlZCBvbiB0aGUgbm9kZSksIGhvbGQgb24KICAgICAgICAgICAgLy90byBhIHJlZmVyZW5jZSB0byB0aGlzIG5vZGUsIGJ1dCBjbGVhciBhZnRlciB0aGUgRE9NIGluc2VydGlvbi4KICAgICAgICAgICAgY3VycmVudGx5QWRkaW5nU2NyaXB0ID0gbm9kZTsKICAgICAgICAgICAgaWYgKGJhc2VFbGVtZW50KSB7CiAgICAgICAgICAgICAgICBoZWFkLmluc2VydEJlZm9yZShub2RlLCBiYXNlRWxlbWVudCk7CiAgICAgICAgICAgIH0gZWxzZSB7CiAgICAgICAgICAgICAgICBoZWFkLmFwcGVuZENoaWxkKG5vZGUpOwogICAgICAgICAgICB9CiAgICAgICAgICAgIGN1cnJlbnRseUFkZGluZ1NjcmlwdCA9IG51bGw7CgogICAgICAgICAgICByZXR1cm4gbm9kZTsKICAgICAgICB9IGVsc2UgaWYgKGlzV2ViV29ya2VyKSB7CiAgICAgICAgICAgIHRyeSB7CiAgICAgICAgICAgICAgICAvL0luIGEgd2ViIHdvcmtlciwgdXNlIGltcG9ydFNjcmlwdHMuIFRoaXMgaXMgbm90IGEgdmVyeQogICAgICAgICAgICAgICAgLy9lZmZpY2llbnQgdXNlIG9mIGltcG9ydFNjcmlwdHMsIGltcG9ydFNjcmlwdHMgd2lsbCBibG9jayB1bnRpbAogICAgICAgICAgICAgICAgLy9pdHMgc2NyaXB0IGlzIGRvd25sb2FkZWQgYW5kIGV2YWx1YXRlZC4gSG93ZXZlciwgaWYgd2ViIHdvcmtlcnMKICAgICAgICAgICAgICAgIC8vYXJlIGluIHBsYXksIHRoZSBleHBlY3RhdGlvbiBpcyB0aGF0IGEgYnVpbGQgaGFzIGJlZW4gZG9uZSBzbwogICAgICAgICAgICAgICAgLy90aGF0IG9ubHkgb25lIHNjcmlwdCBuZWVkcyB0byBiZSBsb2FkZWQgYW55d2F5LiBUaGlzIG1heSBuZWVkCiAgICAgICAgICAgICAgICAvL3RvIGJlIHJlZXZhbHVhdGVkIGlmIG90aGVyIHVzZSBjYXNlcyBiZWNvbWUgY29tbW9uLgogICAgICAgICAgICAgICAgaW1wb3J0U2NyaXB0cyh1cmwpOwoKICAgICAgICAgICAgICAgIC8vQWNjb3VudCBmb3IgYW5vbnltb3VzIG1vZHVsZXMKICAgICAgICAgICAgICAgIGNvbnRleHQuY29tcGxldGVMb2FkKG1vZHVsZU5hbWUpOwogICAgICAgICAgICB9IGNhdGNoIChlKSB7CiAgICAgICAgICAgICAgICBjb250ZXh0Lm9uRXJyb3IobWFrZUVycm9yKCdpbXBvcnRzY3JpcHRzJywKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAnaW1wb3J0U2NyaXB0cyBmYWlsZWQgZm9yICcgKwogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBtb2R1bGVOYW1lICsgJyBhdCAnICsgdXJsLAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIGUsCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgW21vZHVsZU5hbWVdKSk7CiAgICAgICAgICAgIH0KICAgICAgICB9CiAgICB9OwoKICAgIGZ1bmN0aW9uIGdldEludGVyYWN0aXZlU2NyaXB0KCkgewogICAgICAgIGlmIChpbnRlcmFjdGl2ZVNjcmlwdCAmJiBpbnRlcmFjdGl2ZVNjcmlwdC5yZWFkeVN0YXRlID09PSAnaW50ZXJhY3RpdmUnKSB7CiAgICAgICAgICAgIHJldHVybiBpbnRlcmFjdGl2ZVNjcmlwdDsKICAgICAgICB9CgogICAgICAgIGVhY2hSZXZlcnNlKHNjcmlwdHMoKSwgZnVuY3Rpb24gKHNjcmlwdCkgewogICAgICAgICAgICBpZiAoc2NyaXB0LnJlYWR5U3RhdGUgPT09ICdpbnRlcmFjdGl2ZScpIHsKICAgICAgICAgICAgICAgIHJldHVybiAoaW50ZXJhY3RpdmVTY3JpcHQgPSBzY3JpcHQpOwogICAgICAgICAgICB9CiAgICAgICAgfSk7CiAgICAgICAgcmV0dXJuIGludGVyYWN0aXZlU2NyaXB0OwogICAgfQoKICAgIC8vTG9vayBmb3IgYSBkYXRhLW1haW4gc2NyaXB0IGF0dHJpYnV0ZSwgd2hpY2ggY291bGQgYWxzbyBhZGp1c3QgdGhlIGJhc2VVcmwuCiAgICBpZiAoaXNCcm93c2VyICYmICFjZmcuc2tpcERhdGFNYWluKSB7CiAgICAgICAgLy9GaWd1cmUgb3V0IGJhc2VVcmwuIEdldCBpdCBmcm9tIHRoZSBzY3JpcHQgdGFnIHdpdGggcmVxdWlyZS5qcyBpbiBpdC4KICAgICAgICBlYWNoUmV2ZXJzZShzY3JpcHRzKCksIGZ1bmN0aW9uIChzY3JpcHQpIHsKICAgICAgICAgICAgLy9TZXQgdGhlICdoZWFkJyB3aGVyZSB3ZSBjYW4gYXBwZW5kIGNoaWxkcmVuIGJ5CiAgICAgICAgICAgIC8vdXNpbmcgdGhlIHNjcmlwdCdzIHBhcmVudC4KICAgICAgICAgICAgaWYgKCFoZWFkKSB7CiAgICAgICAgICAgICAgICBoZWFkID0gc2NyaXB0LnBhcmVudE5vZGU7CiAgICAgICAgICAgIH0KCiAgICAgICAgICAgIC8vTG9vayBmb3IgYSBkYXRhLW1haW4gYXR0cmlidXRlIHRvIHNldCBtYWluIHNjcmlwdCBmb3IgdGhlIHBhZ2UKICAgICAgICAgICAgLy90byBsb2FkLiBJZiBpdCBpcyB0aGVyZSwgdGhlIHBhdGggdG8gZGF0YSBtYWluIGJlY29tZXMgdGhlCiAgICAgICAgICAgIC8vYmFzZVVybCwgaWYgaXQgaXMgbm90IGFscmVhZHkgc2V0LgogICAgICAgICAgICBkYXRhTWFpbiA9IHNjcmlwdC5nZXRBdHRyaWJ1dGUoJ2RhdGEtbWFpbicpOwogICAgICAgICAgICBpZiAoZGF0YU1haW4pIHsKICAgICAgICAgICAgICAgIC8vUHJlc2VydmUgZGF0YU1haW4gaW4gY2FzZSBpdCBpcyBhIHBhdGggKGkuZS4gY29udGFpbnMgJz8nKQogICAgICAgICAgICAgICAgbWFpblNjcmlwdCA9IGRhdGFNYWluOwoKICAgICAgICAgICAgICAgIC8vU2V0IGZpbmFsIGJhc2VVcmwgaWYgdGhlcmUgaXMgbm90IGFscmVhZHkgYW4gZXhwbGljaXQgb25lLgogICAgICAgICAgICAgICAgaWYgKCFjZmcuYmFzZVVybCkgewogICAgICAgICAgICAgICAgICAgIC8vUHVsbCBvZmYgdGhlIGRpcmVjdG9yeSBvZiBkYXRhLW1haW4gZm9yIHVzZSBhcyB0aGUKICAgICAgICAgICAgICAgICAgICAvL2Jhc2VVcmwuCiAgICAgICAgICAgICAgICAgICAgc3JjID0gbWFpblNjcmlwdC5zcGxpdCgnLycpOwogICAgICAgICAgICAgICAgICAgIG1haW5TY3JpcHQgPSBzcmMucG9wKCk7CiAgICAgICAgICAgICAgICAgICAgc3ViUGF0aCA9IHNyYy5sZW5ndGggPyBzcmMuam9pbignLycpICArICcvJyA6ICcuLyc7CgogICAgICAgICAgICAgICAgICAgIGNmZy5iYXNlVXJsID0gc3ViUGF0aDsKICAgICAgICAgICAgICAgIH0KCiAgICAgICAgICAgICAgICAvL1N0cmlwIG9mZiBhbnkgdHJhaWxpbmcgLmpzIHNpbmNlIG1haW5TY3JpcHQgaXMgbm93CiAgICAgICAgICAgICAgICAvL2xpa2UgYSBtb2R1bGUgbmFtZS4KICAgICAgICAgICAgICAgIG1haW5TY3JpcHQgPSBtYWluU2NyaXB0LnJlcGxhY2UoanNTdWZmaXhSZWdFeHAsICcnKTsKCiAgICAgICAgICAgICAgICAvL0lmIG1haW5TY3JpcHQgaXMgc3RpbGwgYSBwYXRoLCBmYWxsIGJhY2sgdG8gZGF0YU1haW4KICAgICAgICAgICAgICAgIGlmIChyZXEuanNFeHRSZWdFeHAudGVzdChtYWluU2NyaXB0KSkgewogICAgICAgICAgICAgICAgICAgIG1haW5TY3JpcHQgPSBkYXRhTWFpbjsKICAgICAgICAgICAgICAgIH0KCiAgICAgICAgICAgICAgICAvL1B1dCB0aGUgZGF0YS1tYWluIHNjcmlwdCBpbiB0aGUgZmlsZXMgdG8gbG9hZC4KICAgICAgICAgICAgICAgIGNmZy5kZXBzID0gY2ZnLmRlcHMgPyBjZmcuZGVwcy5jb25jYXQobWFpblNjcmlwdCkgOiBbbWFpblNjcmlwdF07CgogICAgICAgICAgICAgICAgcmV0dXJuIHRydWU7CiAgICAgICAgICAgIH0KICAgICAgICB9KTsKICAgIH0KCiAgICAvKioKICAgICAqIFRoZSBmdW5jdGlvbiB0aGF0IGhhbmRsZXMgZGVmaW5pdGlvbnMgb2YgbW9kdWxlcy4gRGlmZmVycyBmcm9tCiAgICAgKiByZXF1aXJlKCkgaW4gdGhhdCBhIHN0cmluZyBmb3IgdGhlIG1vZHVsZSBzaG91bGQgYmUgdGhlIGZpcnN0IGFyZ3VtZW50LAogICAgICogYW5kIHRoZSBmdW5jdGlvbiB0byBleGVjdXRlIGFmdGVyIGRlcGVuZGVuY2llcyBhcmUgbG9hZGVkIHNob3VsZAogICAgICogcmV0dXJuIGEgdmFsdWUgdG8gZGVmaW5lIHRoZSBtb2R1bGUgY29ycmVzcG9uZGluZyB0byB0aGUgZmlyc3QgYXJndW1lbnQncwogICAgICogbmFtZS4KICAgICAqLwogICAgZGVmaW5lID0gZnVuY3Rpb24gKG5hbWUsIGRlcHMsIGNhbGxiYWNrKSB7CiAgICAgICAgdmFyIG5vZGUsIGNvbnRleHQ7CgogICAgICAgIC8vQWxsb3cgZm9yIGFub255bW91cyBtb2R1bGVzCiAgICAgICAgaWYgKHR5cGVvZiBuYW1lICE9PSAnc3RyaW5nJykgewogICAgICAgICAgICAvL0FkanVzdCBhcmdzIGFwcHJvcHJpYXRlbHkKICAgICAgICAgICAgY2FsbGJhY2sgPSBkZXBzOwogICAgICAgICAgICBkZXBzID0gbmFtZTsKICAgICAgICAgICAgbmFtZSA9IG51bGw7CiAgICAgICAgfQoKICAgICAgICAvL1RoaXMgbW9kdWxlIG1heSBub3QgaGF2ZSBkZXBlbmRlbmNpZXMKICAgICAgICBpZiAoIWlzQXJyYXkoZGVwcykpIHsKICAgICAgICAgICAgY2FsbGJhY2sgPSBkZXBzOwogICAgICAgICAgICBkZXBzID0gbnVsbDsKICAgICAgICB9CgogICAgICAgIC8vSWYgbm8gbmFtZSwgYW5kIGNhbGxiYWNrIGlzIGEgZnVuY3Rpb24sIHRoZW4gZmlndXJlIG91dCBpZiBpdCBhCiAgICAgICAgLy9Db21tb25KUyB0aGluZyB3aXRoIGRlcGVuZGVuY2llcy4KICAgICAgICBpZiAoIWRlcHMgJiYgaXNGdW5jdGlvbihjYWxsYmFjaykpIHsKICAgICAgICAgICAgZGVwcyA9IFtdOwogICAgICAgICAgICAvL1JlbW92ZSBjb21tZW50cyBmcm9tIHRoZSBjYWxsYmFjayBzdHJpbmcsCiAgICAgICAgICAgIC8vbG9vayBmb3IgcmVxdWlyZSBjYWxscywgYW5kIHB1bGwgdGhlbSBpbnRvIHRoZSBkZXBlbmRlbmNpZXMsCiAgICAgICAgICAgIC8vYnV0IG9ubHkgaWYgdGhlcmUgYXJlIGZ1bmN0aW9uIGFyZ3MuCiAgICAgICAgICAgIGlmIChjYWxsYmFjay5sZW5ndGgpIHsKICAgICAgICAgICAgICAgIGNhbGxiYWNrCiAgICAgICAgICAgICAgICAgICAgLnRvU3RyaW5nKCkKICAgICAgICAgICAgICAgICAgICAucmVwbGFjZShjb21tZW50UmVnRXhwLCAnJykKICAgICAgICAgICAgICAgICAgICAucmVwbGFjZShjanNSZXF1aXJlUmVnRXhwLCBmdW5jdGlvbiAobWF0Y2gsIGRlcCkgewogICAgICAgICAgICAgICAgICAgICAgICBkZXBzLnB1c2goZGVwKTsKICAgICAgICAgICAgICAgICAgICB9KTsKCiAgICAgICAgICAgICAgICAvL01heSBiZSBhIENvbW1vbkpTIHRoaW5nIGV2ZW4gd2l0aG91dCByZXF1aXJlIGNhbGxzLCBidXQgc3RpbGwKICAgICAgICAgICAgICAgIC8vY291bGQgdXNlIGV4cG9ydHMsIGFuZCBtb2R1bGUuIEF2b2lkIGRvaW5nIGV4cG9ydHMgYW5kIG1vZHVsZQogICAgICAgICAgICAgICAgLy93b3JrIHRob3VnaCBpZiBpdCBqdXN0IG5lZWRzIHJlcXVpcmUuCiAgICAgICAgICAgICAgICAvL1JFUVVJUkVTIHRoZSBmdW5jdGlvbiB0byBleHBlY3QgdGhlIENvbW1vbkpTIHZhcmlhYmxlcyBpbiB0aGUKICAgICAgICAgICAgICAgIC8vb3JkZXIgbGlzdGVkIGJlbG93LgogICAgICAgICAgICAgICAgZGVwcyA9IChjYWxsYmFjay5sZW5ndGggPT09IDEgPyBbJ3JlcXVpcmUnXSA6IFsncmVxdWlyZScsICdleHBvcnRzJywgJ21vZHVsZSddKS5jb25jYXQoZGVwcyk7CiAgICAgICAgICAgIH0KICAgICAgICB9CgogICAgICAgIC8vSWYgaW4gSUUgNi04IGFuZCBoaXQgYW4gYW5vbnltb3VzIGRlZmluZSgpIGNhbGwsIGRvIHRoZSBpbnRlcmFjdGl2ZQogICAgICAgIC8vd29yay4KICAgICAgICBpZiAodXNlSW50ZXJhY3RpdmUpIHsKICAgICAgICAgICAgbm9kZSA9IGN1cnJlbnRseUFkZGluZ1NjcmlwdCB8fCBnZXRJbnRlcmFjdGl2ZVNjcmlwdCgpOwogICAgICAgICAgICBpZiAobm9kZSkgewogICAgICAgICAgICAgICAgaWYgKCFuYW1lKSB7CiAgICAgICAgICAgICAgICAgICAgbmFtZSA9IG5vZGUuZ2V0QXR0cmlidXRlKCdkYXRhLXJlcXVpcmVtb2R1bGUnKTsKICAgICAgICAgICAgICAgIH0KICAgICAgICAgICAgICAgIGNvbnRleHQgPSBjb250ZXh0c1tub2RlLmdldEF0dHJpYnV0ZSgnZGF0YS1yZXF1aXJlY29udGV4dCcpXTsKICAgICAgICAgICAgfQogICAgICAgIH0KCiAgICAgICAgLy9BbHdheXMgc2F2ZSBvZmYgZXZhbHVhdGluZyB0aGUgZGVmIGNhbGwgdW50aWwgdGhlIHNjcmlwdCBvbmxvYWQgaGFuZGxlci4KICAgICAgICAvL1RoaXMgYWxsb3dzIG11bHRpcGxlIG1vZHVsZXMgdG8gYmUgaW4gYSBmaWxlIHdpdGhvdXQgcHJlbWF0dXJlbHkKICAgICAgICAvL3RyYWNpbmcgZGVwZW5kZW5jaWVzLCBhbmQgYWxsb3dzIGZvciBhbm9ueW1vdXMgbW9kdWxlIHN1cHBvcnQsCiAgICAgICAgLy93aGVyZSB0aGUgbW9kdWxlIG5hbWUgaXMgbm90IGtub3duIHVudGlsIHRoZSBzY3JpcHQgb25sb2FkIGV2ZW50CiAgICAgICAgLy9vY2N1cnMuIElmIG5vIGNvbnRleHQsIHVzZSB0aGUgZ2xvYmFsIHF1ZXVlLCBhbmQgZ2V0IGl0IHByb2Nlc3NlZAogICAgICAgIC8vaW4gdGhlIG9uc2NyaXB0IGxvYWQgY2FsbGJhY2suCiAgICAgICAgaWYgKGNvbnRleHQpIHsKICAgICAgICAgICAgY29udGV4dC5kZWZRdWV1ZS5wdXNoKFtuYW1lLCBkZXBzLCBjYWxsYmFja10pOwogICAgICAgICAgICBjb250ZXh0LmRlZlF1ZXVlTWFwW25hbWVdID0gdHJ1ZTsKICAgICAgICB9IGVsc2UgewogICAgICAgICAgICBnbG9iYWxEZWZRdWV1ZS5wdXNoKFtuYW1lLCBkZXBzLCBjYWxsYmFja10pOwogICAgICAgIH0KICAgIH07CgogICAgZGVmaW5lLmFtZCA9IHsKICAgICAgICBqUXVlcnk6IHRydWUKICAgIH07CgogICAgLyoqCiAgICAgKiBFeGVjdXRlcyB0aGUgdGV4dC4gTm9ybWFsbHkganVzdCB1c2VzIGV2YWwsIGJ1dCBjYW4gYmUgbW9kaWZpZWQKICAgICAqIHRvIHVzZSBhIGJldHRlciwgZW52aXJvbm1lbnQtc3BlY2lmaWMgY2FsbC4gT25seSB1c2VkIGZvciB0cmFuc3BpbGluZwogICAgICogbG9hZGVyIHBsdWdpbnMsIG5vdCBmb3IgcGxhaW4gSlMgbW9kdWxlcy4KICAgICAqIEBwYXJhbSB7U3RyaW5nfSB0ZXh0IHRoZSB0ZXh0IHRvIGV4ZWN1dGUvZXZhbHVhdGUuCiAgICAgKi8KICAgIHJlcS5leGVjID0gZnVuY3Rpb24gKHRleHQpIHsKICAgICAgICAvKmpzbGludCBldmlsOiB0cnVlICovCiAgICAgICAgcmV0dXJuIGV2YWwodGV4dCk7CiAgICB9OwoKICAgIC8vU2V0IHVwIHdpdGggY29uZmlnIGluZm8uCiAgICByZXEoY2ZnKTsKfSh0aGlzKSk7Cg==", + "ok": true, + "headers": [ + [ + "content-type", + "text/javascript" + ] + ], + "status": 200, + "status_text": "" + } + }, + "base_uri": "https://localhost:8080/", + "height": 2006 + }, + "outputId": "0b3f497f-040f-41ef-8a32-70b4adf7d7d0", + "executionInfo": { + "status": "ok", + "timestamp": 1512371597785, + "user_tz": 480, + "elapsed": 4242, + "user": { + "displayName": "Lukasz Kaiser", + "photoUrl": "//lh3.googleusercontent.com/-CbWIwcQ_VsA/AAAAAAAAAAI/AAAAAAAAAB8/jloHVR1qOhg/s50-c-k-no/photo.jpg", + "userId": "109750154298538986950" + } + } + }, + "source": [ + "# Convert inputs and outputs to subwords\n", + "inp_text = to_tokens(encoders[\"inputs\"].encode(inputs))\n", + "out_text = to_tokens(encoders[\"inputs\"].encode(outputs))\n", + "\n", + "# Run eval to collect attention weights\n", + "example = encode_eval(inputs, outputs)\n", + "with tfe.restore_variables_on_create(ckpt_path):\n", + " translate_model.set_mode(Modes.EVAL)\n", + " translate_model(example)\n", + "# Get normalized attention weights for each layer\n", + "enc_atts, dec_atts, encdec_atts = get_att_mats()\n", + "\n", + "call_html()\n", + "attention.show(inp_text, out_text, enc_atts, dec_atts, encdec_atts)" + ], + "cell_type": "code", + "execution_count": 16, + "outputs": [ + { + "output_type": "stream", + "text": [ + "WARNING:tensorflow:From /usr/local/lib/python2.7/dist-packages/tensor2tensor/layers/common_layers.py:1671: softmax_cross_entropy_with_logits (from tensorflow.python.ops.nn_ops) is deprecated and will be removed in a future version.\n", "Instructions for updating:\n", - "keep_dims is deprecated, use keepdims instead\n", - "Inputs: This is a cat.\n", - "Outputs: Das ist eine Katze.\n" + "\n", + "Future major versions of TensorFlow will allow gradients to flow\n", + "into the labels input on backprop by default.\n", + "\n", + "See tf.nn.softmax_cross_entropy_with_logits_v2.\n", + "\n" ], "name": "stdout" + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "<IPython.core.display.HTML object>" + ], + "text/html": [ + "\n", + " <script src=\"/static/components/requirejs/require.js\"></script>\n", + " <script>\n", + " requirejs.config({\n", + " paths: {\n", + " base: '/static/base',\n", + " \"d3\": \"https://cdnjs.cloudflare.com/ajax/libs/d3/3.5.8/d3.min\",\n", + " jquery: '//ajax.googleapis.com/ajax/libs/jquery/2.0.0/jquery.min',\n", + " },\n", + " });\n", + " </script>\n", + " " + ] + }, + "metadata": { + "tags": [] + } + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "<IPython.core.display.HTML object>" + ], + "text/html": [ + "\n", + " <span style=\"user-select:none\">\n", + " Layer: <select id=\"layer\"></select>\n", + " Attention: <select id=\"att_type\">\n", + " <option value=\"all\">All</option>\n", + " <option value=\"inp_inp\">Input - Input</option>\n", + " <option value=\"inp_out\">Input - Output</option>\n", + " <option value=\"out_out\">Output - Output</option>\n", + " </select>\n", + " </span>\n", + " <div id='vis'></div>\n" + ] + }, + "metadata": { + "tags": [] + } + }, + { + "output_type": "display_data", + "data": { + "application/javascript": [ + "window.attention = {\"inp_out\": {\"top_text\": [\"The_\", \"animal_\", \"didn_\", \"'_\", \"t_\", \"cross_\", \"the_\", \"street_\", \"because_\", \"it_\", \"was_\", \"too_\", \"tire\", \"d_\"], \"att\": [[[[0.01107952743768692, 0.002038179198279977, 0.02572617679834366, 0.043437324464321136, 0.026865433901548386, 0.008821134455502033, 0.05896050110459328, 0.006038360297679901, 0.05802087485790253, 0.05262080207467079, 0.021981995552778244, 0.01655607670545578, 0.007265332620590925, 0.017941446974873543, 0.19668635725975037], [0.4201550781726837, 0.0003083523770328611, 0.003427971852943301, 0.027074502781033516, 0.0025770263746380806, 0.0006525526405312121, 0.0672224909067154, 0.0006329934694804251, 0.002376251621171832, 0.007315297145396471, 0.0018543159822002053, 0.0002170451043639332, 5.486799182108371e-06, 8.465739665552974e-05, 0.018722370266914368], [6.826388562330976e-05, 0.41254693269729614, 8.318798791151494e-05, 0.00021303755056578666, 2.6623651137924753e-05, 1.3030116861045826e-06, 3.3524677292007254e-06, 9.95700816019962e-07, 0.00025696202646940947, 0.00021154701244086027, 4.0387480112258345e-05, 7.382633339148015e-05, 0.0001871670683613047, 0.0001393109851051122, 0.00044668230111710727], [0.0012913167010992765, 0.46178945899009705, 0.0011929792817682028, 0.0014885100536048412, 0.001382660586386919, 0.00010778238356579095, 4.841455302084796e-05, 4.8626650823280215e-05, 0.0007912410655990243, 0.0019299217965453863, 0.0002972490037791431, 0.0004315593687351793, 0.013707359321415424, 0.0025058358442038298, 0.00208207662217319], [0.0008573953527957201, 5.803010481031379e-06, 0.0034995940513908863, 0.007113253697752953, 4.1040249925572425e-05, 0.48505696654319763, 0.0009781911503523588, 2.57480514846975e-05, 0.0006811833591200411, 0.011991027742624283, 0.013829604722559452, 0.02649468183517456, 0.018967876210808754, 0.008940043859183788, 0.0023627132177352905], [3.2793446735013276e-05, 4.91645641886862e-06, 0.0003670089063234627, 0.0005689052632078528, 0.0004337447171565145, 0.6979628205299377, 0.00025133590679615736, 1.3211038094596006e-05, 0.001040837960317731, 0.0008422345272265375, 0.00011131400242447853, 0.0007033413276076317, 0.00044049491407349706, 0.0004404923238325864, 0.00032976132933981717], [0.002877118531614542, 0.0015123215271160007, 0.21683953702449799, 0.042356427758932114, 0.09360139071941376, 0.7325531840324402, 0.007687804754823446, 0.0004983373219147325, 0.0008397439960390329, 0.018263472244143486, 0.01633409783244133, 0.06572946161031723, 0.029279880225658417, 0.13710656762123108, 0.013406738638877869], [0.09384340792894363, 0.002295592101290822, 0.05245966836810112, 0.10398446023464203, 0.13232196867465973, 0.2621823251247406, 0.7299563884735107, 0.01621837355196476, 0.008298774249851704, 0.019108427688479424, 0.013038183562457561, 0.008606976829469204, 0.0014156820252537727, 0.008462491445243359, 0.08448491245508194], [7.994164479896426e-05, 9.660106115916278e-06, 1.3390360436460469e-05, 0.0009496311540715396, 7.498388185922522e-06, 0.0023292596451938152, 0.0033705621026456356, 0.45610299706459045, 0.00048403104301542044, 0.0003956609289161861, 6.013430538587272e-05, 1.5610943592037074e-05, 4.899038231087616e-06, 1.0044974260381423e-05, 0.0011326958192512393], [0.0021254755556583405, 0.025354469195008278, 0.0505821667611599, 0.04718977212905884, 0.3544465899467468, 0.27984359860420227, 0.10468283295631409, 0.03827415779232979, 0.0065247067250311375, 0.003615353489294648, 0.001024437602609396, 0.02404061146080494, 0.00031744904117658734, 0.011979974806308746, 0.06911104917526245], [0.06793052703142166, 0.04423084855079651, 0.009074175730347633, 0.010606715455651283, 0.023761747404932976, 0.06765440851449966, 0.048715878278017044, 0.13498826324939728, 0.15846557915210724, 0.01835249364376068, 0.0033974519465118647, 0.011923078447580338, 0.0035463334061205387, 0.036997705698013306, 0.15195232629776], [0.00013637961819767952, 0.00010623007256072015, 0.00015417735266964883, 0.00014589299098588526, 0.0007127521676011384, 0.0008950252668000758, 0.00038585966103710234, 0.002901369472965598, 0.34460243582725525, 0.00040915730642154813, 0.00017379666678607464, 9.334777860203758e-05, 0.0002283527428517118, 0.0001650981866987422, 0.0021401161793619394], [0.03951041400432587, 0.015644539147615433, 0.002765331417322159, 0.020979223772883415, 0.001914863707497716, 0.049360573291778564, 0.010446744039654732, 0.06006397679448128, 0.18512527644634247, 0.5769777894020081, 0.07455664873123169, 0.016840822994709015, 0.21517987549304962, 0.030672460794448853, 0.04319411888718605], [0.0012064727488905191, 0.0013226938899606466, 0.002064700936898589, 0.008003294467926025, 0.002116014016792178, 0.0028530799318104982, 0.006337625440210104, 0.0002913604548666626, 0.0004794643900822848, 0.0026383439544588327, 0.0038926906418055296, 0.3737375736236572, 0.002772320294752717, 0.007620541378855705, 0.003997606225311756], [1.0432314411445986e-05, 4.745730166177964e-06, 1.672162215982098e-05, 2.360623693675734e-05, 4.496370820561424e-06, 1.767691173881758e-06, 4.21794857174973e-06, 1.7029789205480483e-06, 2.8430429665604606e-05, 7.409282261505723e-05, 0.00010478614422027022, 0.00017224416660610586, 0.480630487203598, 0.017292670905590057, 3.8113743357826024e-05], [0.00031966043752618134, 7.799067680025473e-05, 0.0005293181748129427, 0.0002383182873018086, 6.09634407737758e-05, 1.622732997930143e-05, 0.0001254813396371901, 4.548055585473776e-05, 0.0002202334435423836, 0.0014038329245522618, 0.008373874239623547, 0.0005300238262861967, 0.8584288358688354, 0.0721927285194397, 0.0012385909212753177], [0.008336205966770649, 0.000929497298784554, 0.060522519052028656, 0.02858084999024868, 0.004865946713835001, 0.19429318606853485, 0.006222299765795469, 0.00020022530225105584, 0.03241097182035446, 0.2199898362159729, 0.40489089488983154, 0.12284909188747406, 0.04783688485622406, 0.16652296483516693, 0.03165041282773018], [0.06735408306121826, 0.02395833097398281, 0.022876637056469917, 0.059418935328722, 0.020556019619107246, 0.006657767109572887, 0.01686989888548851, 0.03750348463654518, 0.0929105281829834, 0.11066772043704987, 0.07383746653795242, 0.04306775704026222, 0.1764260083436966, 0.2488536387681961, 0.14264866709709167], [0.00023218609567265958, 9.724824485601857e-05, 0.00017837552877608687, 0.000249945733230561, 0.00043016509152948856, 0.0002728255931288004, 0.0002596308768261224, 0.0021448382176458836, 0.33870813250541687, 0.0012523159384727478, 0.0004828754754271358, 7.525486580561846e-05, 0.001232807757332921, 0.00022845527564641088, 0.0029908884316682816], [0.044313203543424606, 0.014693659730255604, 0.001713237608782947, 0.01787775754928589, 0.001054717693477869, 0.03111616149544716, 0.005932849366217852, 0.035437386482954025, 0.10908837616443634, 0.6214090585708618, 0.11623460799455643, 0.018710769712924957, 0.26884767413139343, 0.036007944494485855, 0.04555344209074974], [0.0014647350180894136, 0.0016486160457134247, 0.001705971430055797, 0.008203698322176933, 0.0011827786220237613, 0.001036314177326858, 0.004107706248760223, 0.00018337460642214864, 0.0005908485618419945, 0.004427316598594189, 0.0075510423630476, 0.37528446316719055, 0.0045065670274198055, 0.01084148045629263, 0.0047609396278858185], [1.1546462701517157e-05, 6.3197094277711585e-06, 1.3665205187862739e-05, 2.3049220544635318e-05, 3.1024922009237343e-06, 9.712728115118807e-07, 4.2468768697290216e-06, 1.4032799526830786e-06, 2.1501631636056118e-05, 0.00011254433775320649, 0.00014821428339928389, 0.00021640797785948962, 0.4815296530723572, 0.022970588877797127, 4.596232975018211e-05], [0.0004618540406227112, 0.00011890243331436068, 0.0008028792799450457, 0.0003817373653873801, 7.645944424439222e-05, 2.0059787857462652e-05, 0.00017321997438557446, 3.885024489136413e-05, 0.00016429855895694345, 0.0017073642229661345, 0.011983372271060944, 0.0008083870052359998, 0.8495219349861145, 0.07573292404413223, 0.0017974229995161295], [0.00848880223929882, 0.0010204557329416275, 0.06384890526533127, 0.030244439840316772, 0.004545390605926514, 0.2111765593290329, 0.007047791499644518, 0.00020413362653926015, 0.03285042569041252, 0.2096482813358307, 0.40160003304481506, 0.12425301223993301, 0.05433715134859085, 0.2013336718082428, 0.03489448130130768], [0.018106432631611824, 0.01663283444941044, 0.006966447923332453, 0.06288447231054306, 0.008926548063755035, 0.0005806194385513663, 0.004527462646365166, 0.00047311693197116256, 0.010450053960084915, 0.008817908354103565, 0.02498125471174717, 0.02475220151245594, 0.006219316273927689, 0.034688226878643036, 0.15510374307632446]], [[0.011485431343317032, 0.057214245200157166, 0.11445975303649902, 0.035292237997055054, 0.17235025763511658, 0.21079879999160767, 0.08683252334594727, 0.33144259452819824, 0.2781406342983246, 0.07864350080490112, 0.10017280280590057, 0.0828540250658989, 0.17722147703170776, 0.21101748943328857, 0.15805292129516602], [0.041519034653902054, 0.11474552005529404, 0.04909001290798187, 0.1299373209476471, 0.06295691430568695, 0.0239214189350605, 0.22038953006267548, 0.6809458136558533, 0.03295678645372391, 0.34942832589149475, 0.1847512274980545, 0.22206875681877136, 0.13646042346954346, 0.277276873588562, 0.1334262192249298], [0.0764331966638565, 0.004937899298965931, 0.049346037209033966, 0.05165911093354225, 0.051789041608572006, 0.11632981896400452, 0.3382570743560791, 0.21805666387081146, 0.5269062519073486, 0.05627245828509331, 0.1284114420413971, 0.3053610324859619, 0.058564696460962296, 0.14431920647621155, 0.19175130128860474], [0.08274618536233902, 0.009897814132273197, 0.07511309534311295, 0.03663979470729828, 0.16369661688804626, 0.04579350724816322, 0.04420214146375656, 0.06866282969713211, 0.17000554502010345, 0.09549596160650253, 0.07313749194145203, 0.06223462149500847, 0.11603321135044098, 0.07143211364746094, 0.2059532254934311], [0.41769060492515564, 0.07210511714220047, 0.40716952085494995, 0.22363832592964172, 0.48781970143318176, 0.015007800422608852, 0.4504202902317047, 0.4675638973712921, 0.24936619400978088, 0.5447031855583191, 0.4296078681945801, 0.07025930285453796, 0.1902965009212494, 0.3567025065422058, 0.12464861571788788], [0.3858333230018616, 0.06937354803085327, 0.5601253509521484, 0.30969470739364624, 0.36272186040878296, 0.005774383433163166, 0.16290897130966187, 0.16338182985782623, 0.1734752655029297, 0.10127251595258713, 0.6812319159507751, 0.35078492760658264, 0.26554787158966064, 0.3089393675327301, 0.12310608476400375], [0.047016799449920654, 0.04388514533638954, 0.010725832544267178, 0.029561294242739677, 0.04913409426808357, 0.007112162187695503, 0.045616600662469864, 0.09563170373439789, 0.021758677437901497, 0.05606407672166824, 0.023780539631843567, 0.2586848735809326, 0.1317795366048813, 0.13214319944381714, 0.18490085005760193], [0.024271933361887932, 0.10952932387590408, 0.01092300284653902, 0.005798409227281809, 0.03478696197271347, 0.015390553511679173, 0.005925341974943876, 0.04537563398480415, 0.00714160455390811, 0.005484140943735838, 0.00704369880259037, 0.04858299717307091, 0.06617175042629242, 0.13874217867851257, 0.17208275198936462], [0.1448126882314682, 0.16020630300045013, 0.02696153335273266, 0.06902630627155304, 0.03837759047746658, 0.07682601362466812, 0.15773272514343262, 0.005734406877309084, 0.16041570901870728, 0.10849703103303909, 0.08964504301548004, 0.4313186705112457, 0.12084108591079712, 0.20548132061958313, 0.1913137137889862], [0.03147122263908386, 0.06498080492019653, 0.03835386037826538, 0.021906379610300064, 0.004580754786729813, 0.08777225762605667, 0.06548282504081726, 0.0501156747341156, 0.09960248321294785, 0.05812418833374977, 0.04425663501024246, 0.12932318449020386, 0.040425609797239304, 0.10523593425750732, 0.20731014013290405], [0.03185653313994408, 0.014990762807428837, 0.012671640142798424, 0.014554454945027828, 0.005096337758004665, 0.025306345894932747, 0.015522593632340431, 0.012109486386179924, 0.014945329166948795, 0.0111803337931633, 0.010501275770366192, 0.010505528189241886, 0.013426732271909714, 0.01895906589925289, 0.16498495638370514], [0.05249502509832382, 0.3800218403339386, 0.048091597855091095, 0.01820666529238224, 0.10161028057336807, 0.18240275979042053, 0.03954629600048065, 0.08666953444480896, 0.00239415536634624, 0.05545663461089134, 0.11899324506521225, 0.03552442044019699, 0.037884730845689774, 0.08727249503135681, 0.23120805621147156], [0.06818026304244995, 0.06384387612342834, 0.013627037405967712, 0.017488455399870872, 0.04112459346652031, 0.37204819917678833, 0.2269488275051117, 0.050778258591890335, 0.07564288377761841, 0.002337054116651416, 0.03256889060139656, 0.017944803461432457, 0.02268233709037304, 0.05458826571702957, 0.17415940761566162], [0.3350563049316406, 0.14807114005088806, 0.16856855154037476, 0.0634150505065918, 0.6115131974220276, 0.8617944717407227, 0.4784194529056549, 0.271447092294693, 0.44727417826652527, 0.03638387843966484, 0.0791390910744667, 0.0010650564217939973, 0.10882135480642319, 0.07249648869037628, 0.16217634081840515], [0.6229478120803833, 0.11473710834980011, 0.9313594102859497, 0.6977004408836365, 0.7760463953018188, 0.5547962784767151, 0.2850213646888733, 0.12024195492267609, 0.6867435574531555, 0.3715392053127289, 0.5383524894714355, 0.04410971701145172, 0.001209885231219232, 0.03505939990282059, 0.07057712972164154], [0.12039526551961899, 0.15183398127555847, 0.23466746509075165, 0.07534174621105194, 0.09489727020263672, 0.12723755836486816, 0.06088049337267876, 0.06659132242202759, 0.24534910917282104, 0.08624531328678131, 0.05703657865524292, 0.031156441196799278, 0.0026320687029510736, 0.016870809718966484, 0.16136524081230164], [0.024926312267780304, 0.055538877844810486, 0.0035579875111579895, 0.006728078704327345, 0.10179352015256882, 0.12386216968297958, 0.08368373662233353, 0.17138876020908356, 0.13290183246135712, 0.025975322350859642, 0.0007942751399241388, 0.08679928630590439, 0.006940893363207579, 0.006668384652584791, 0.2167840152978897], [0.03079223819077015, 0.008776835165917873, 0.025623725727200508, 0.02996702678501606, 0.076390340924263, 0.11722294241189957, 0.03722265735268593, 0.06894396245479584, 0.023492204025387764, 0.02721765637397766, 0.02432498149573803, 0.009946721605956554, 0.02367306686937809, 0.02709045261144638, 0.15603508055210114], [0.050754088908433914, 0.38707080483436584, 0.056088101118803024, 0.022330837324261665, 0.19594413042068481, 0.356031596660614, 0.05540256202220917, 0.17031489312648773, 0.002592364326119423, 0.0904960110783577, 0.17009596526622772, 0.02688765898346901, 0.05266827344894409, 0.09536514431238174, 0.2306852787733078], [0.052731066942214966, 0.07647765427827835, 0.009669344872236252, 0.013631273992359638, 0.037963252514600754, 0.40968915820121765, 0.1877974420785904, 0.06287717074155807, 0.06925270706415176, 0.0021469732746481895, 0.03106895461678505, 0.02147551439702511, 0.022071314975619316, 0.058794401586055756, 0.17150944471359253], [0.2993965446949005, 0.1887350082397461, 0.17583680152893066, 0.06075390800833702, 0.6836855411529541, 0.8825634121894836, 0.44942814111709595, 0.3110062777996063, 0.6245057582855225, 0.04149743914604187, 0.08928828686475754, 0.0010537458583712578, 0.13885420560836792, 0.09175378829240799, 0.16601231694221497], [0.6222140192985535, 0.13893182575702667, 0.9335290789604187, 0.7374492883682251, 0.8253674507141113, 0.5633905529975891, 0.4091120660305023, 0.12903769314289093, 0.8090996742248535, 0.490604043006897, 0.6206711530685425, 0.06171489879488945, 0.0013746770564466715, 0.055387232452631, 0.07617512345314026], [0.1216169223189354, 0.17628714442253113, 0.21903447806835175, 0.08471400290727615, 0.12100206315517426, 0.12684285640716553, 0.060168445110321045, 0.05725802481174469, 0.204857736825943, 0.07119028270244598, 0.04997517541050911, 0.046147700399160385, 0.002665548352524638, 0.01769380457699299, 0.1595369428396225], [0.02323095127940178, 0.05151251330971718, 0.002836216241121292, 0.007343180477619171, 0.11471041291952133, 0.09745588153600693, 0.08793136477470398, 0.19987791776657104, 0.2081962525844574, 0.026029428467154503, 0.0006721516838297248, 0.15218332409858704, 0.008676346391439438, 0.009503011591732502, 0.20713838934898376], [0.07751920074224472, 0.05964339151978493, 0.026831025257706642, 0.018057459965348244, 0.1489739865064621, 0.27560925483703613, 0.15271086990833282, 0.29336896538734436, 0.2548864185810089, 0.015449506230652332, 0.02643660455942154, 0.05839552357792854, 0.06659974157810211, 0.1841144859790802, 0.1324990689754486]], [[0.006645309738814831, 0.043047573417425156, 0.04108792915940285, 0.028674451634287834, 0.10265154391527176, 0.03326163440942764, 0.05858607590198517, 0.06312219053506851, 0.013714859262108803, 0.017589740455150604, 0.02732386440038681, 0.11026919633150101, 0.028857730329036713, 0.054291173815727234, 0.19011041522026062], [0.006623337976634502, 0.06184479594230652, 0.014693422242999077, 0.03981047496199608, 0.08752858638763428, 0.01962500624358654, 0.06706372648477554, 0.011501927860081196, 0.0061228955164551735, 0.013949333690106869, 0.018435969948768616, 0.03678559139370918, 0.022487374022603035, 0.0660797506570816, 0.28934401273727417], [0.04245300590991974, 0.10349805653095245, 0.03407163918018341, 0.007511724252253771, 0.011565770022571087, 0.010817471891641617, 0.05971734598278999, 0.00459411833435297, 0.00350962788797915, 0.021488210186362267, 0.02298545651137829, 0.06376963108778, 0.036461468786001205, 0.1865386664867401, 0.16962040960788727], [0.014149562455713749, 0.03299444913864136, 0.007003516890108585, 0.004260434303432703, 0.018919609487056732, 0.008522795513272285, 0.018369171768426895, 0.015471882186830044, 0.0008095644298009574, 0.012402600608766079, 0.0075600892305374146, 0.03885417431592941, 0.05682341009378433, 0.0525624044239521, 0.22132590413093567], [0.01582285761833191, 0.013434984721243382, 0.0299182441085577, 0.03647983819246292, 0.009840411134064198, 0.06101881340146065, 0.04943924769759178, 0.3809337913990021, 0.027872184291481972, 0.07177315652370453, 0.06987256556749344, 0.014244881458580494, 0.18650749325752258, 0.16280896961688995, 0.16209137439727783], [0.018014581874012947, 0.11459828168153763, 0.013770120218396187, 0.021584663540124893, 0.02155740186572075, 0.03133949637413025, 0.03938381373882294, 0.28105995059013367, 0.02592163160443306, 0.026603924110531807, 0.010026685893535614, 0.009953479282557964, 0.004658891819417477, 0.014652709476649761, 0.16460371017456055], [0.001359884045086801, 0.029354294762015343, 0.0013457777677103877, 0.0026418184861540794, 0.008543581701815128, 0.003654624568298459, 0.0034977763425558805, 0.039957791566848755, 0.00108401442412287, 0.0005604945472441614, 0.0003877367707900703, 0.0033066808246076107, 0.007358025759458542, 0.007617549039423466, 0.20286646485328674], [0.015068605542182922, 0.027786174789071083, 0.015096615999937057, 0.048349082469940186, 0.03296791389584541, 0.0033369800075888634, 0.004459223244339228, 0.01348987128585577, 0.0010384898632764816, 0.013556106016039848, 0.015940798446536064, 0.042712315917015076, 0.02055070362985134, 0.042082786560058594, 0.17761820554733276], [0.09032934159040451, 0.007927155122160912, 0.08835490047931671, 0.21186837553977966, 0.05379607528448105, 0.23637458682060242, 0.16646702587604523, 0.022663533687591553, 0.024165447801351547, 0.08468358218669891, 0.07286331057548523, 0.016201749444007874, 0.031014403328299522, 0.026781529188156128, 0.21159759163856506], [0.014649872668087482, 0.032003261148929596, 0.1914098560810089, 0.17710277438163757, 0.07542474567890167, 0.05287592485547066, 0.14732114970684052, 0.08320016413927078, 0.025441674515604973, 0.02800501137971878, 0.0780739113688469, 0.04154554009437561, 0.017996925860643387, 0.08907850831747055, 0.17056028544902802], [0.29397615790367126, 0.03400568664073944, 0.3242063522338867, 0.3681035339832306, 0.48163339495658875, 0.025333818048238754, 0.20042747259140015, 0.06051841378211975, 0.2913966476917267, 0.19229580461978912, 0.12739360332489014, 0.07057002186775208, 0.012750222347676754, 0.053084854036569595, 0.09877952188253403], [0.2290111482143402, 0.04351853206753731, 0.4067046046257019, 0.12047477811574936, 0.3140789866447449, 0.03630740940570831, 0.1768438071012497, 0.13207398355007172, 0.0676346942782402, 0.07621245086193085, 0.1797569841146469, 0.24804529547691345, 0.009716867469251156, 0.01671340875327587, 0.15996301174163818], [0.0448942668735981, 0.015721717849373817, 0.04864601418375969, 0.03494936227798462, 0.016112152487039566, 0.06668571382761002, 0.05302642658352852, 0.07182876765727997, 0.006946365814656019, 0.011091585271060467, 0.1120418831706047, 0.008756275288760662, 0.055249348282814026, 0.03253563493490219, 0.187040314078331], [0.3104230761528015, 0.04545353353023529, 0.3986057937145233, 0.6762936115264893, 0.03838818892836571, 0.03300129249691963, 0.27034318447113037, 0.21517230570316315, 0.008858010172843933, 0.2650390863418579, 0.2720700800418854, 0.005442188587039709, 0.06764175742864609, 0.053534120321273804, 0.18754751980304718], [0.011383982375264168, 0.11127021163702011, 0.0030386100988835096, 0.0067845494486391544, 0.013927198015153408, 0.08719860762357712, 0.03287587687373161, 0.5690041184425354, 0.03855481743812561, 0.020931608974933624, 0.01293823029845953, 0.047187648713588715, 0.021772168576717377, 0.1471272110939026, 0.18776896595954895], [0.005892250686883926, 0.03474593162536621, 0.023128867149353027, 0.002957691205665469, 0.03212961554527283, 0.015600761398673058, 0.0076070488430559635, 0.04006163775920868, 0.012522950768470764, 0.00397108681499958, 0.004476191475987434, 0.01931026391685009, 0.006290406920015812, 0.014653924852609634, 0.17843826115131378], [0.030382098630070686, 0.14396639168262482, 0.0023552696220576763, 0.003069670405238867, 0.03293609246611595, 0.010766614228487015, 0.04698408767580986, 0.0892328992486, 0.010764017701148987, 0.01645551063120365, 0.0007101192022673786, 0.14693684875965118, 0.10194381326436996, 0.06734117865562439, 0.21650707721710205], [0.11579495668411255, 0.04704239219427109, 0.08932461589574814, 0.10469675809144974, 0.3945455551147461, 0.10528933256864548, 0.15413445234298706, 0.13012593984603882, 0.37207290530204773, 0.07726370543241501, 0.08641648292541504, 0.07665102183818817, 0.02378079853951931, 0.06452124565839767, 0.12331708520650864], [0.20921318233013153, 0.07137931883335114, 0.3537597060203552, 0.1065746620297432, 0.30610421299934387, 0.07002534717321396, 0.22329437732696533, 0.23702743649482727, 0.06014438346028328, 0.05975072830915451, 0.17522762715816498, 0.3013332188129425, 0.02163097821176052, 0.016774384304881096, 0.15580035746097565], [0.037447404116392136, 0.022215796634554863, 0.033449236303567886, 0.026462113484740257, 0.01563168875873089, 0.07434160262346268, 0.05695066228508949, 0.11209315806627274, 0.007291351445019245, 0.008904322981834412, 0.08964232355356216, 0.01435061078518629, 0.07215401530265808, 0.030404584482312202, 0.17889626324176788], [0.35028940439224243, 0.06261257082223892, 0.400876522064209, 0.6601436138153076, 0.0364767424762249, 0.0348673090338707, 0.3584212362766266, 0.3042086958885193, 0.012779565528035164, 0.3784087598323822, 0.29859334230422974, 0.00785628892481327, 0.11913719773292542, 0.06971576809883118, 0.17937220633029938], [0.014627714641392231, 0.1739588975906372, 0.0033204040955752134, 0.007496224716305733, 0.011711684986948967, 0.10170583426952362, 0.050673384219408035, 0.6495208740234375, 0.040652137249708176, 0.03492900729179382, 0.01829371228814125, 0.07074988633394241, 0.02588740922510624, 0.18312060832977295, 0.1794223189353943], [0.006626310292631388, 0.049714479595422745, 0.02355029061436653, 0.0033578642178326845, 0.02970620058476925, 0.020507775247097015, 0.008351391181349754, 0.03789898753166199, 0.008593969978392124, 0.004206442274153233, 0.004605707712471485, 0.02678176388144493, 0.006028715055435896, 0.012980426661670208, 0.1725957691669464], [0.029822910204529762, 0.18419219553470612, 0.002088941168040037, 0.00302593014203012, 0.028257815167307854, 0.012486547231674194, 0.051940228790044785, 0.10161811858415604, 0.01137576438486576, 0.02022942155599594, 0.0007436276064254344, 0.2113851010799408, 0.1359580010175705, 0.08821411430835724, 0.2053057849407196], [0.016353517770767212, 0.03170220926403999, 0.014149405062198639, 0.013441388495266438, 0.037340469658374786, 0.010170645080506802, 0.0053974115289747715, 0.025274941697716713, 0.017184404656291008, 0.0020940443500876427, 0.006704597268253565, 0.009430822916328907, 0.030376460403203964, 0.024553189054131508, 0.15533798933029175]], [[0.005564282648265362, 0.001319661969318986, 0.028383644297719002, 0.01146539393812418, 0.028919272124767303, 0.012663042172789574, 0.023019153624773026, 0.0018097365973517299, 0.0143426563590765, 0.021044740453362465, 0.015969598665833473, 0.03200899809598923, 0.013908782042562962, 0.03448842838406563, 0.20206299424171448], [0.3364894986152649, 0.00033270660787820816, 0.017299778759479523, 0.02505551464855671, 0.00914769060909748, 0.0018482855521142483, 0.040363892912864685, 0.0008854345069266856, 0.020481230691075325, 0.022734129801392555, 0.016724254935979843, 0.0011141380527988076, 5.783090819022618e-05, 0.0005799515638500452, 0.07228588312864304], [0.0004661931307055056, 0.4122284948825836, 0.0022180580999702215, 0.00018468582129571587, 0.00030452435021288693, 5.825214248034172e-05, 0.0012309255544096231, 0.0017770789563655853, 1.19774986160337e-05, 0.0001907332189148292, 0.0007099026697687805, 0.0006694658659398556, 1.216385771840578e-05, 0.00011785236711148173, 0.00036971797817386687], [0.04950903728604317, 0.2967310845851898, 0.021222729235887527, 0.01289455872029066, 0.009955117478966713, 0.008917939849197865, 0.011312013491988182, 0.01272521447390318, 0.0006359940161928535, 0.011413054540753365, 0.006479735020548105, 0.0053005279041826725, 0.001741865067742765, 0.0027997863944619894, 0.08213357627391815], [0.020872987806797028, 3.087984805461019e-05, 0.009670623578131199, 0.0253498163074255, 0.010817835107445717, 0.4320962131023407, 0.017970044165849686, 0.0021109851077198982, 0.0003069202939514071, 0.008261006325483322, 0.006166533567011356, 0.7898750901222229, 0.11304597556591034, 0.12737329304218292, 0.011856237426400185], [0.06067817285656929, 0.005839335732161999, 0.025896329432725906, 0.03351203724741936, 0.025002295151352882, 0.25514867901802063, 0.4275963008403778, 0.0194717925041914, 0.0888834074139595, 0.04690318927168846, 0.03570560738444328, 0.0850825086236, 0.0388353131711483, 0.24394167959690094, 0.10019046813249588], [0.014415884390473366, 0.001141559099778533, 0.0678224116563797, 0.024646559730172157, 0.08796916157007217, 0.022639306262135506, 0.07784608006477356, 0.02605922892689705, 0.014093886129558086, 0.0286162830889225, 0.09674176573753357, 0.04692256450653076, 0.03519048914313316, 0.20982496440410614, 0.1800668090581894], [0.02086471952497959, 0.0008324789232574403, 0.01815967448055744, 0.002886975882574916, 0.0020961007103323936, 0.004472428001463413, 0.033020272850990295, 0.0047500282526016235, 0.012928733602166176, 0.014328529126942158, 0.015946470201015472, 0.06593997031450272, 0.00855537410825491, 0.07526978105306625, 0.1768130511045456], [0.0009654826717451215, 0.000225315525312908, 0.0006124225910753012, 0.0007836261647753417, 0.0007428302778862417, 0.003282200777903199, 0.008662715554237366, 0.45239004492759705, 4.857195381191559e-05, 0.0006357804522849619, 0.0010122592793777585, 0.0006606358801946044, 0.00025698603712953627, 0.0011707579251378775, 0.0028539940249174833], [0.0025523374788463116, 0.0009212270379066467, 0.09748471528291702, 0.057154957205057144, 0.4982932209968567, 0.000552327954210341, 0.02918482944369316, 0.0039253802970051765, 0.00450148293748498, 0.0014971394557505846, 0.009822547435760498, 0.0017059196252375841, 0.001570553402416408, 0.005804183427244425, 0.00957300141453743], [0.016401896253228188, 0.00043752315104939044, 0.0039018490351736546, 0.005885160993784666, 0.0023499932140111923, 0.0031332974322140217, 0.055512603372335434, 0.003903925186023116, 0.10197419673204422, 0.009071548469364643, 0.023729920387268066, 0.002627716166898608, 0.01914973370730877, 0.02837507426738739, 0.1623656302690506], [0.0004865071678068489, 2.4051656509982422e-05, 0.00020084556308574975, 0.0003736558719538152, 0.000646126689389348, 9.209318523062393e-05, 0.009753170423209667, 9.854567178990692e-05, 0.34485483169555664, 0.00047165394062176347, 0.0012700805673375726, 0.000479432987049222, 0.0015819557011127472, 0.0008011643076315522, 0.0017131956992670894], [0.03442303463816643, 0.014513631351292133, 0.003174385754391551, 0.00478995218873024, 0.0017101461999118328, 0.003900717245414853, 0.05713852494955063, 0.013628470711410046, 0.0976317971944809, 0.28217896819114685, 0.01894235610961914, 0.009533336386084557, 0.003816690994426608, 0.005922130309045315, 0.12864208221435547], [0.01004086248576641, 0.01997406780719757, 0.005450551863759756, 0.006583535112440586, 0.0027623113710433245, 0.002903316868469119, 0.03531726077198982, 0.008635452017188072, 0.029197845607995987, 0.02162068709731102, 0.013219092041254044, 0.2711889445781708, 0.00537630682811141, 0.006846235599368811, 0.06079954653978348], [0.00031272557680495083, 8.196506314561702e-06, 4.237617031321861e-05, 0.00043677922803908587, 0.00024717405904084444, 0.022641032934188843, 0.002573953475803137, 0.0004433683061506599, 0.0013428670354187489, 0.00034036010038107634, 0.0007929583080112934, 0.0033021108247339725, 0.4761846959590912, 0.05593165382742882, 0.00081905338447541], [0.00267792004160583, 4.751862070406787e-05, 0.014043050818145275, 0.02037942036986351, 0.04410611465573311, 0.04370833560824394, 0.06117184832692146, 0.01571183279156685, 0.11117196083068848, 0.006906491704285145, 0.0029646854382008314, 0.15407170355319977, 0.010935205966234207, 0.03797803074121475, 0.16977860033512115], [0.011722833849489689, 0.005004812031984329, 0.007801789790391922, 0.0020204312168061733, 0.004946417640894651, 0.000467105332063511, 0.11018845438957214, 0.016256244853138924, 0.05208335816860199, 0.08122430741786957, 0.4447634816169739, 0.0032620911952108145, 0.0036480925045907497, 0.02699565887451172, 0.038189876824617386], [0.024071840569376945, 0.0004321316082496196, 0.023504342883825302, 0.020648522302508354, 0.021508874371647835, 0.012214796617627144, 0.024360070005059242, 0.0013747027842327952, 0.0815734788775444, 0.08039785921573639, 0.06951787322759628, 0.017521949484944344, 0.04566040262579918, 0.08389204740524292, 0.15396325290203094], [0.0014979105908423662, 4.0405931940767914e-05, 0.0008743218495510519, 0.001329930848442018, 0.0032007889822125435, 0.0002464030694682151, 0.015361684374511242, 0.00014017200737725943, 0.3369258642196655, 0.0015512423124164343, 0.003011554479598999, 0.0010034784208983183, 0.0037561107892543077, 0.0018123533809557557, 0.0037892721593379974], [0.03386643901467323, 0.015328249894082546, 0.002211565151810646, 0.003828595858067274, 0.0012934240512549877, 0.004837968852370977, 0.04463785141706467, 0.014559985138475895, 0.04106945917010307, 0.26340487599372864, 0.017707379534840584, 0.01015215553343296, 0.0033097255509346724, 0.0058202859945595264, 0.13427288830280304], [0.011043943464756012, 0.029788998886942863, 0.004548549186438322, 0.006417197175323963, 0.0019613932818174362, 0.0028304944280534983, 0.02768276073038578, 0.006805655546486378, 0.02553243562579155, 0.0314837321639061, 0.015709027647972107, 0.2568790316581726, 0.008081428706645966, 0.009137820452451706, 0.06746803224086761], [0.0003306480939500034, 1.1417017958592623e-05, 3.816767639364116e-05, 0.000435528316302225, 0.00020690191013272852, 0.02179853804409504, 0.002864222740754485, 0.0005160043947398663, 0.001080053043551743, 0.0004847492673434317, 0.0009861867874860764, 0.003908392507582903, 0.47703394293785095, 0.07113853842020035, 0.000873323529958725], [0.0030808241572231054, 6.38188939774409e-05, 0.011707174591720104, 0.023645061999559402, 0.038246914744377136, 0.047200631350278854, 0.04958858713507652, 0.012573646381497383, 0.04961754009127617, 0.005252092145383358, 0.002489157486706972, 0.17429526150226593, 0.008030706085264683, 0.02717452496290207, 0.1679786741733551], [0.01455691922456026, 0.008012487553060055, 0.006938801147043705, 0.00259140832349658, 0.004911262542009354, 0.0004763725446537137, 0.10579084604978561, 0.021042171865701675, 0.03971559554338455, 0.07511086016893387, 0.43185338377952576, 0.0035418386105448008, 0.004437423776835203, 0.03184036538004875, 0.04226255044341087], [0.055085837841033936, 0.014846320264041424, 0.06939522176980972, 0.036867137998342514, 0.13156765699386597, 0.04343622922897339, 0.18117153644561768, 0.04244613274931908, 0.04596249759197235, 0.13158053159713745, 0.047130946069955826, 0.549620509147644, 0.24813801050186157, 0.3232562243938446, 0.11823604255914688]], [[0.7448275089263916, 0.00023065913410391659, 0.0003700565139297396, 0.0002745355886872858, 0.0005768057890236378, 1.0151054993912112e-05, 1.3715341992792673e-05, 7.643950084457174e-06, 0.0004341531603131443, 5.2913601393811405e-05, 5.353476808522828e-05, 8.812115265754983e-05, 1.1566834245968494e-06, 5.744800546381157e-06, 5.576572584686801e-05], [8.114575030049309e-05, 0.06691394746303558, 0.04036417603492737, 0.022258125245571136, 0.055233534425497055, 0.050445422530174255, 0.048324622213840485, 0.00889397319406271, 0.1270352452993393, 0.04156908392906189, 0.20929713547229767, 0.21122632920742035, 0.414194792509079, 0.12628954648971558, 0.25567519664764404], [0.0012628535041585565, 0.0008597301202826202, 0.036364536732435226, 0.0971999391913414, 0.04217860475182533, 0.10421664267778397, 0.16082510352134705, 0.03283625468611717, 0.09032318741083145, 0.09653837233781815, 0.21890851855278015, 0.06589526683092117, 0.47985169291496277, 0.21388037502765656, 0.21010825037956238], [0.0002990703214891255, 0.001862871926277876, 0.010526847094297409, 0.01025421917438507, 0.05592086538672447, 0.02697981521487236, 0.01570008136332035, 0.02568165771663189, 0.010194454342126846, 0.048093631863594055, 0.04421652480959892, 0.02353351190686226, 0.21245922148227692, 0.0448865108191967, 0.23352482914924622], [0.00015855174569878727, 0.013162538409233093, 0.006567019037902355, 0.004201928153634071, 0.006268346216529608, 0.00024757537175901234, 0.012954139150679111, 0.003747382666915655, 0.03740423545241356, 0.007960616610944271, 0.013323514722287655, 0.06273993849754333, 0.048431456089019775, 0.13987915217876434, 0.20342004299163818], [0.013553211465477943, 0.03824196010828018, 0.02278091199696064, 0.09299258887767792, 0.0559159517288208, 0.00022306715254671872, 0.031003709882497787, 0.010444254614412785, 0.16168788075447083, 0.03666102886199951, 0.00852662418037653, 0.4432809352874756, 0.009321487508714199, 0.024379035457968712, 0.17351986467838287], [0.00026768012321554124, 0.015254812315106392, 0.007090381346642971, 0.006173381581902504, 0.006773150525987148, 0.0008773274021223187, 0.00638232659548521, 0.016591282561421394, 0.004996343981474638, 0.009327422827482224, 0.008862738497555256, 0.05876166746020317, 0.009527520276606083, 0.00578573253005743, 0.20356230437755585], [0.0008312691352330148, 0.012717761099338531, 0.013986560516059399, 0.007093494758009911, 0.004876464139670134, 0.0027259632479399443, 0.0033886858727782965, 0.01589561626315117, 0.00876854918897152, 0.005017295014113188, 0.023178039118647575, 0.05755693465471268, 0.05451130494475365, 0.06928746402263641, 0.1796484887599945], [0.00016753048112150282, 0.011822681874036789, 0.005686081480234861, 0.011659285984933376, 0.004307762254029512, 0.0031254058703780174, 0.009316416457295418, 0.0016170619055628777, 0.012603488750755787, 0.0245236624032259, 0.01756892167031765, 0.011099276132881641, 0.11892349272966385, 0.02075323462486267, 0.2549600899219513], [0.00017647366621531546, 0.053185176104307175, 0.007304554805159569, 0.004834755789488554, 0.000954066461417824, 0.025718921795487404, 0.02985404059290886, 0.09960591793060303, 0.010695043951272964, 0.016483109444379807, 0.018774237483739853, 0.05090473219752312, 0.01008983701467514, 0.028674444183707237, 0.22871088981628418], [0.0008755451999604702, 0.020039640367031097, 0.003969491925090551, 0.007670485880225897, 0.006173306610435247, 0.012295764870941639, 0.0076020946726202965, 0.012137084268033504, 0.010956642217934132, 0.010541083291172981, 0.018125493079423904, 0.03226908668875694, 0.02587633579969406, 0.016216130927205086, 0.1660052388906479], [5.4335410823114216e-05, 0.03367479890584946, 0.004507457371801138, 0.004544241353869438, 0.00623831432312727, 0.002192543353885412, 0.004128816071897745, 0.021106822416186333, 0.0003909784718416631, 0.00830051489174366, 0.018183842301368713, 0.009683135896921158, 0.0325237475335598, 0.00792472343891859, 0.25227075815200806], [0.0006012204103171825, 0.01188816037029028, 0.023532994091510773, 0.00770517997443676, 0.007410787045955658, 0.007087987381964922, 0.021027186885476112, 0.013456426560878754, 0.03266710042953491, 0.001251929672434926, 0.09021235257387161, 0.024440091103315353, 0.024299103766679764, 0.02338516153395176, 0.1967199146747589], [0.0009616355528123677, 0.059039004147052765, 0.04997482895851135, 0.013552234508097172, 0.03981975466012955, 0.020335622131824493, 0.014380398206412792, 0.07606764137744904, 0.07161007821559906, 0.024130970239639282, 0.06891870498657227, 0.0008635766571387649, 0.023193923756480217, 0.02981526218354702, 0.21020111441612244], [0.0013424595817923546, 0.0746709555387497, 0.011544802226126194, 0.027912717312574387, 0.0729047879576683, 0.10483764857053757, 0.07119728624820709, 0.010606798343360424, 0.044552259147167206, 0.05723145231604576, 0.034647323191165924, 0.38214871287345886, 0.003923356998711824, 0.08778946846723557, 0.19581711292266846], [0.0016638260567560792, 0.01581355184316635, 0.08943041414022446, 0.02092832513153553, 0.021133122965693474, 0.012408973649144173, 0.01347691286355257, 0.00275444146245718, 0.027862150222063065, 0.01225491613149643, 0.018322426825761795, 0.008929668925702572, 0.00015579524915665388, 0.0014782899525016546, 0.18181975185871124], [0.0008640239248052239, 0.06174946948885918, 0.004653214477002621, 0.002717669354751706, 0.015129820443689823, 0.00935456808656454, 0.016078660264611244, 0.08089328557252884, 0.017857585102319717, 0.0025031790137290955, 0.00012101473839720711, 0.013123439624905586, 0.005499868653714657, 0.001559562049806118, 0.22764776647090912], [0.0008687095833010972, 0.025285501033067703, 0.01658034697175026, 0.02363765239715576, 0.02393241412937641, 0.0657346174120903, 0.015298763290047646, 0.01792113669216633, 0.021707117557525635, 0.018967296928167343, 0.037634264677762985, 0.013209421187639236, 0.02256513573229313, 0.007774183992296457, 0.15961462259292603], [0.0001073219973477535, 0.04253393039107323, 0.010077103972434998, 0.007349912542849779, 0.00879223458468914, 0.004757148679345846, 0.008167163468897343, 0.03753674402832985, 0.00042728587868623435, 0.014237778261303902, 0.029898250475525856, 0.006872681900858879, 0.045794516801834106, 0.007500257343053818, 0.2562271058559418], [0.0005320480559021235, 0.010701313614845276, 0.020972738042473793, 0.007364482618868351, 0.006165153346955776, 0.00950621161609888, 0.022682208567857742, 0.018515970557928085, 0.03319491446018219, 0.00125269521959126, 0.07773777842521667, 0.022826068103313446, 0.02051766775548458, 0.020874740555882454, 0.1872510462999344], [0.0008804904646240175, 0.05573932081460953, 0.06578188389539719, 0.01897181011736393, 0.043492771685123444, 0.026308609172701836, 0.016426166519522667, 0.09104844927787781, 0.12495335191488266, 0.04637341946363449, 0.0944451242685318, 0.0008321930072270334, 0.03243781998753548, 0.03530845418572426, 0.2013196051120758], [0.001610875129699707, 0.08435038477182388, 0.014167247340083122, 0.03493078798055649, 0.07050123810768127, 0.10772886872291565, 0.09850788861513138, 0.013066386803984642, 0.05027954652905464, 0.10465669631958008, 0.04533415287733078, 0.47037968039512634, 0.004505114629864693, 0.12196572870016098, 0.18816377222537994], [0.0018758929800242186, 0.019657986238598824, 0.1020394116640091, 0.033738646656274796, 0.024869924411177635, 0.012215637601912022, 0.015038376674056053, 0.002843664726242423, 0.02175789885222912, 0.01636381261050701, 0.01989913359284401, 0.01190999522805214, 0.00020280842727515846, 0.0016855570720508695, 0.17570628225803375], [0.0009206020040437579, 0.08179444819688797, 0.00436751963570714, 0.003652991494163871, 0.019383452832698822, 0.008280212059617043, 0.016885409131646156, 0.10377784073352814, 0.023152435198426247, 0.0037028237711638212, 0.0001251623034477234, 0.018928401172161102, 0.009926089085638523, 0.002465219935402274, 0.21539123356342316], [0.0005496710073202848, 0.039492249488830566, 0.016358638182282448, 0.007983607240021229, 0.006420070305466652, 0.0012171968119218946, 0.003928476013243198, 0.005028040148317814, 0.010722441598773003, 0.0025004756171256304, 0.015696601942181587, 0.006085758097469807, 0.0033880609553307295, 0.0056163351982831955, 0.1572248637676239]], [[0.09555985033512115, 0.6603901982307434, 0.4109249413013458, 0.6857163310050964, 0.16377028822898865, 0.1341286301612854, 0.19969937205314636, 0.28269705176353455, 0.14764364063739777, 0.41980865597724915, 0.4319525361061096, 0.3789142668247223, 0.49345141649246216, 0.26345306634902954, 0.00909768883138895], [0.1460653841495514, 0.2758752405643463, 0.2826583981513977, 0.551855206489563, 0.05612415447831154, 0.19304026663303375, 0.0849798247218132, 0.038316093385219574, 0.02312053181231022, 0.46154478192329407, 0.36433619260787964, 0.35877159237861633, 0.1596277803182602, 0.0554661750793457, 6.483463948825374e-05], [3.716628270922229e-05, 1.9402585849093157e-07, 1.0113188182003796e-05, 6.318590021692216e-05, 6.053787728887983e-07, 2.5790013751247898e-06, 0.00022986173280514777, 1.074662236533186e-06, 6.082240361138247e-06, 3.35614299729059e-06, 2.225729804194998e-05, 7.863033715693746e-06, 1.555537892272696e-06, 3.881560041918419e-05, 0.23657216131687164], [0.6150763630867004, 0.041665952652692795, 0.4174444377422333, 0.4949702024459839, 0.20794649422168732, 0.3307763934135437, 0.8098993897438049, 0.2721010744571686, 0.7274996042251587, 0.4779607057571411, 0.6233283281326294, 0.7560765147209167, 0.3628612458705902, 0.7672091722488403, 5.392584171204362e-06], [5.640763447445352e-06, 2.5884469323500525e-07, 1.2724142379738623e-06, 8.170181899913587e-06, 1.2345621769327408e-07, 1.310836523771286e-07, 1.02673438959755e-05, 9.661080184741877e-07, 6.520539272969472e-07, 7.602448022225872e-07, 2.058099425994442e-06, 6.885502301656743e-08, 1.0175665465794737e-06, 1.7383708836860023e-05, 0.20754273235797882], [9.27566077280062e-07, 5.395870630309219e-07, 1.8455818917573197e-07, 1.2775643654094893e-06, 2.105696061960316e-08, 3.1680112755338996e-08, 6.263408067752607e-06, 4.3284012463118415e-07, 1.918825773827848e-06, 1.694104128091567e-07, 3.363936968980852e-07, 9.135120215830739e-09, 4.4058825920956224e-08, 7.840970965844463e-07, 0.18219269812107086], [0.7144812345504761, 0.6739043593406677, 0.2952970862388611, 0.49478814005851746, 0.17151717841625214, 0.06989942491054535, 0.5132517218589783, 0.30886489152908325, 0.5621734261512756, 0.5728412866592407, 0.576314389705658, 0.34687095880508423, 0.25617536902427673, 0.29690253734588623, 7.371841547865188e-06], [0.6291437745094299, 0.5982875823974609, 0.4885888695716858, 0.5792520046234131, 0.2514877915382385, 0.5298613905906677, 0.11972777545452118, 0.6076628565788269, 0.04243328422307968, 0.5940482020378113, 0.6775911450386047, 0.3496588468551636, 0.4937344789505005, 0.40163323283195496, 2.9517783332266845e-05], [0.6414378881454468, 0.20530864596366882, 0.8448930978775024, 0.5841984748840332, 0.48009997606277466, 0.48003992438316345, 0.4468145966529846, 0.036266062408685684, 0.3466547429561615, 0.521195650100708, 0.7532409429550171, 0.14529024064540863, 0.3844791650772095, 0.46825459599494934, 2.1059213395346887e-05], [0.7977450489997864, 0.5162288546562195, 0.513008177280426, 0.6203657984733582, 0.04621165990829468, 0.2237500697374344, 0.10730908066034317, 0.17203836143016815, 0.028481170535087585, 0.5342445969581604, 0.7256113290786743, 0.5827998518943787, 0.755642294883728, 0.511749804019928, 0.00015279543003998697], [0.5001324415206909, 0.7283154129981995, 0.6225411295890808, 0.5096700191497803, 0.4470505714416504, 0.6475648880004883, 0.4919697046279907, 0.42729777097702026, 0.22966071963310242, 0.4533919394016266, 0.5539101958274841, 0.2698501944541931, 0.3532210886478424, 0.2643750309944153, 2.9741322578047402e-05], [0.42266348004341125, 0.20205438137054443, 0.42841264605522156, 0.6724829077720642, 0.29094210267066956, 0.4464052617549896, 0.24126748740673065, 0.22405968606472015, 0.21308888494968414, 0.3085091710090637, 0.4672502279281616, 0.14604215323925018, 0.09687051922082901, 0.12085973471403122, 2.7047781259170733e-05], [0.5077533721923828, 0.4866065979003906, 0.8742184638977051, 0.805268406867981, 0.8406472206115723, 0.45863693952560425, 0.3596036732196808, 0.36316972970962524, 0.38783764839172363, 0.03767421096563339, 0.43841618299484253, 0.3401361405849457, 0.3197961747646332, 0.20812755823135376, 7.5720936365542e-06], [0.12348711490631104, 0.49926623702049255, 0.1342328041791916, 0.07936512678861618, 0.11133208125829697, 0.032334309071302414, 0.028592387214303017, 0.036310840398073196, 0.036252155900001526, 0.10585709661245346, 0.19267472624778748, 0.34429997205734253, 0.16909800469875336, 0.2464863359928131, 3.1697504709882196e-06], [4.5035082507638435e-07, 4.8253248507990065e-08, 2.1990938847693542e-08, 4.3766593194050074e-07, 1.1283042766763174e-07, 2.4235429663121977e-08, 4.6985369408503175e-06, 1.5805973418991925e-07, 1.1619090578562918e-08, 1.9516033233912822e-08, 1.8456361772223318e-07, 2.2261544074808626e-07, 2.278205402106437e-09, 7.143006541809882e-07, 0.21044957637786865], [0.71169513463974, 0.2780396640300751, 0.44078493118286133, 0.7963916063308716, 0.6933308839797974, 0.5056049823760986, 0.7329073548316956, 0.810703694820404, 0.551677942276001, 0.6459015607833862, 0.6943050622940063, 0.2817550301551819, 0.10247289389371872, 0.7378624677658081, 8.274764695670456e-06], [0.723514199256897, 0.08602748066186905, 0.6093902587890625, 0.8655006289482117, 0.42677831649780273, 0.03823491558432579, 0.30262306332588196, 0.036271825432777405, 0.12300263345241547, 0.2776595950126648, 0.07632125169038773, 0.06917709112167358, 0.14498986303806305, 0.06881040334701538, 2.5871422622003593e-06], [0.7111753225326538, 0.8019941449165344, 0.7984396815299988, 0.6959745287895203, 0.34880974888801575, 0.5955101251602173, 0.6658092141151428, 0.5378626585006714, 0.35595381259918213, 0.5855972766876221, 0.5757258534431458, 0.133575439453125, 0.3884122669696808, 0.11617641150951385, 8.579120731155854e-06], [0.43439850211143494, 0.1714652180671692, 0.4214288294315338, 0.6560039520263672, 0.15961043536663055, 0.25604698061943054, 0.26937225461006165, 0.1702796220779419, 0.22940081357955933, 0.327440470457077, 0.3977930247783661, 0.08873222768306732, 0.13160161674022675, 0.07058954238891602, 2.3103428247850388e-05], [0.48717519640922546, 0.4504354000091553, 0.9026078581809998, 0.8262973427772522, 0.8697957992553711, 0.4322546720504761, 0.47440072894096375, 0.40584686398506165, 0.6554202437400818, 0.04447361081838608, 0.5114831924438477, 0.4020007252693176, 0.3586147725582123, 0.19603849947452545, 5.424046776170144e-06], [0.09346597641706467, 0.41046077013015747, 0.13097965717315674, 0.06711046397686005, 0.09538185596466064, 0.021688319742679596, 0.027864748612046242, 0.029869627207517624, 0.07506763935089111, 0.13717295229434967, 0.21322546899318695, 0.3559926152229309, 0.19059841334819794, 0.24045485258102417, 2.0756003777933074e-06], [4.6634454520244617e-07, 5.573102512812511e-08, 2.3018172257138758e-08, 3.889360016273713e-07, 9.709493298259986e-08, 2.4796046105279856e-08, 7.192591056082165e-06, 1.7916640615567303e-07, 1.8580767147113875e-08, 3.5935642017648206e-08, 2.774728216081712e-07, 3.801677337378351e-07, 2.8816848907098347e-09, 9.808413778955583e-07, 0.2028982788324356], [0.6667957305908203, 0.327456533908844, 0.4202725291252136, 0.7458598613739014, 0.6837785840034485, 0.5435037612915039, 0.7794858813285828, 0.849186360836029, 0.6942030787467957, 0.7531007528305054, 0.7604266405105591, 0.4857816696166992, 0.12311270833015442, 0.7958275079727173, 7.400509275612421e-06], [0.704485297203064, 0.08825523406267166, 0.5944071412086487, 0.8510531783103943, 0.4262540936470032, 0.04518446326255798, 0.38849392533302307, 0.055145543068647385, 0.277063250541687, 0.40566664934158325, 0.09198901802301407, 0.13750647008419037, 0.24822941422462463, 0.1165834292769432, 3.5331499930180144e-06], [0.5231692790985107, 0.6706213355064392, 0.7785398364067078, 0.7122241258621216, 0.34260621666908264, 0.579698920249939, 0.5863306522369385, 0.4822496175765991, 0.5804131031036377, 0.7801564335823059, 0.7983464002609253, 0.22512593865394592, 0.4790371060371399, 0.2274763584136963, 1.8860177078749985e-05]], [[0.12044757604598999, 0.22699733078479767, 0.3625817894935608, 0.18942511081695557, 0.468371719121933, 0.5971034169197083, 0.5581120252609253, 0.29680517315864563, 0.4773823618888855, 0.4035939574241638, 0.3702273666858673, 0.3751682937145233, 0.267861545085907, 0.4069889783859253, 0.040672045201063156], [0.0243044663220644, 0.4273812174797058, 0.5286219716072083, 0.05566978082060814, 0.4582313597202301, 0.5064847469329834, 0.09591992199420929, 0.1787465512752533, 0.7349562644958496, 0.00692495983093977, 0.04355573281645775, 0.04027868062257767, 0.03415951877832413, 0.02788657508790493, 0.03653726726770401], [0.1999487727880478, 0.02213704027235508, 0.750217854976654, 0.5677059292793274, 0.8556592464447021, 0.6869031190872192, 0.2201639711856842, 0.6947058439254761, 0.2711787521839142, 0.21462410688400269, 0.3783731162548065, 0.39328378438949585, 0.3796219229698181, 0.27560317516326904, 0.052095912396907806], [0.17733721435070038, 0.1195838525891304, 0.4294462502002716, 0.41039443016052246, 0.45686641335487366, 0.5433338284492493, 0.08341590315103531, 0.5749803781509399, 0.0773383378982544, 0.2876206338405609, 0.19534848630428314, 0.10015372186899185, 0.2102438062429428, 0.04678432643413544, 0.044711172580718994], [0.4523387849330902, 0.8917949795722961, 0.4903220534324646, 0.5869925022125244, 0.47626572847366333, 0.006232858635485172, 0.41125378012657166, 0.13404546678066254, 0.6460333466529846, 0.32553666830062866, 0.3429105877876282, 0.031081799417734146, 0.42998504638671875, 0.16709895431995392, 0.08821719139814377], [0.49767979979515076, 0.7566660642623901, 0.25263193249702454, 0.4967457056045532, 0.47193706035614014, 0.006824302952736616, 0.2858791947364807, 0.18135732412338257, 0.4390898644924164, 0.7668571472167969, 0.15391138195991516, 0.08414287865161896, 0.5640745759010315, 0.35628020763397217, 0.09142898768186569], [0.18697474896907806, 0.23196713626384735, 0.23554784059524536, 0.34321168065071106, 0.5325552225112915, 0.15430577099323273, 0.2887123227119446, 0.4957616627216339, 0.36584702134132385, 0.2891024053096771, 0.08069057762622833, 0.18119029700756073, 0.4536079466342926, 0.16425864398479462, 0.03777371346950531], [0.17079660296440125, 0.16765500605106354, 0.28291502594947815, 0.16039209067821503, 0.2695491909980774, 0.16163654625415802, 0.08897912502288818, 0.28747832775115967, 0.8989478349685669, 0.26775097846984863, 0.17184530198574066, 0.3264879584312439, 0.31386569142341614, 0.1549917310476303, 0.05264737084507942], [0.04084352031350136, 0.5361505150794983, 0.018223807215690613, 0.03828004375100136, 0.3140276074409485, 0.08277524262666702, 0.07094793766736984, 0.012667819857597351, 0.3304368853569031, 0.10053964704275131, 0.03868165612220764, 0.31755131483078003, 0.22644393146038055, 0.07613880187273026, 0.12961620092391968], [0.07373615354299545, 0.19122207164764404, 0.06966950744390488, 0.01624569669365883, 0.017842771485447884, 0.2144099771976471, 0.24285149574279785, 0.3761756718158722, 0.8141085505485535, 0.27487871050834656, 0.09974052757024765, 0.10127317160367966, 0.16323235630989075, 0.21032299101352692, 0.10343435406684875], [0.06651142984628677, 0.1456020176410675, 0.01741747185587883, 0.07566884905099869, 0.018790215253829956, 0.20801369845867157, 0.16892337799072266, 0.33592528104782104, 0.1834612786769867, 0.29906225204467773, 0.2579277753829956, 0.5998365879058838, 0.5642448663711548, 0.572043240070343, 0.0891154333949089], [0.03234146162867546, 0.1962265521287918, 0.0277019701898098, 0.06972747296094894, 0.10650040954351425, 0.07791601866483688, 0.38205334544181824, 0.4892197549343109, 0.003444283502176404, 0.414199560880661, 0.16890743374824524, 0.4916560649871826, 0.8149713277816772, 0.7298122048377991, 0.14976243674755096], [0.07799918204545975, 0.2381461262702942, 0.01647050306200981, 0.08363308757543564, 0.05209676921367645, 0.02968973107635975, 0.11220219731330872, 0.32446831464767456, 0.1546868085861206, 0.06510066986083984, 0.1935844123363495, 0.5264057517051697, 0.34881067276000977, 0.6311980485916138, 0.09822507947683334], [0.1688770204782486, 0.13700607419013977, 0.20374003052711487, 0.12288741022348404, 0.15864238142967224, 0.039533428847789764, 0.12642242014408112, 0.35126128792762756, 0.365562379360199, 0.48467183113098145, 0.3247453570365906, 0.003142370842397213, 0.5969579219818115, 0.5533550977706909, 0.1647837609052658], [0.3052995800971985, 0.6539703607559204, 0.022321274504065514, 0.1902511715888977, 0.05963977798819542, 0.17083951830863953, 0.5218495726585388, 0.2573777139186859, 0.17107829451560974, 0.46426069736480713, 0.3389802873134613, 0.4338558316230774, 0.014936042949557304, 0.6202957630157471, 0.13899832963943481], [0.12219581007957458, 0.5012378692626953, 0.06702763587236404, 0.06399006396532059, 0.07401375472545624, 0.24048954248428345, 0.08739905059337616, 0.050457850098609924, 0.030934542417526245, 0.1506662517786026, 0.1536494344472885, 0.49837279319763184, 0.018043117597699165, 0.11216632276773453, 0.12939369678497314], [0.11525271832942963, 0.521948516368866, 0.007329752668738365, 0.008543604053556919, 0.05213259160518646, 0.04235774278640747, 0.2166471928358078, 0.528154194355011, 0.42159566283226013, 0.22446103394031525, 0.0032521234825253487, 0.5035390257835388, 0.365617960691452, 0.44961339235305786, 0.15735329687595367], [0.03232282027602196, 0.08449342846870422, 0.004147443920373917, 0.050799064338207245, 0.037334948778152466, 0.08206064254045486, 0.07099173963069916, 0.19771835207939148, 0.021330662071704865, 0.08051090687513351, 0.1005825400352478, 0.700605034828186, 0.3027697801589966, 0.4364767074584961, 0.10480254143476486], [0.034268103539943695, 0.16091260313987732, 0.0168391652405262, 0.06967493146657944, 0.0915973111987114, 0.051104262471199036, 0.2385529726743698, 0.3295409679412842, 0.0004638703539967537, 0.22104156017303467, 0.13362999260425568, 0.5110065937042236, 0.7347238063812256, 0.7763577103614807, 0.15897347033023834], [0.08530293405056, 0.1988343894481659, 0.010091865435242653, 0.07736483961343765, 0.030177433043718338, 0.023718634620308876, 0.06320804357528687, 0.20902810990810394, 0.020835628733038902, 0.026085397228598595, 0.10371798276901245, 0.427949994802475, 0.2465561032295227, 0.6410334706306458, 0.12414435297250748], [0.17881684005260468, 0.09949745982885361, 0.17292529344558716, 0.14197823405265808, 0.0994792953133583, 0.022899990901350975, 0.07621151208877563, 0.20277591049671173, 0.059071850031614304, 0.23252709209918976, 0.2142648547887802, 0.0016634195344522595, 0.4786902368068695, 0.5105896592140198, 0.1802191287279129], [0.29184988141059875, 0.5299537181854248, 0.01714717224240303, 0.1581006944179535, 0.034420810639858246, 0.1480618417263031, 0.35555243492126465, 0.16130897402763367, 0.0352683924138546, 0.2384539395570755, 0.22334522008895874, 0.274210661649704, 0.008749962784349918, 0.5107676982879639, 0.16247788071632385], [0.1536586880683899, 0.39876002073287964, 0.060627128928899765, 0.08434724807739258, 0.06138864532113075, 0.18170806765556335, 0.0558285117149353, 0.026850836351513863, 0.004648242145776749, 0.05450701341032982, 0.08679821342229843, 0.24500715732574463, 0.009806739166378975, 0.06359081715345383, 0.14997224509716034], [0.1216418668627739, 0.4058372378349304, 0.00597163662314415, 0.009731672704219818, 0.04685758054256439, 0.030955728143453598, 0.14503908157348633, 0.4122965633869171, 0.13539999723434448, 0.08889995515346527, 0.0017191163497045636, 0.24694381654262543, 0.23039060831069946, 0.2996818721294403, 0.1837962418794632], [0.2966727912425995, 0.1567845344543457, 0.07310101389884949, 0.14124755561351776, 0.2961083948612213, 0.07968501001596451, 0.06122228875756264, 0.14724984765052795, 0.06047076731920242, 0.055829375982284546, 0.06430483609437943, 0.11614347994327545, 0.15107537806034088, 0.15706941485404968, 0.12527146935462952]], [[0.004390498157590628, 0.00876205787062645, 0.016465701162815094, 0.005714573431760073, 0.036494653671979904, 0.0032131776679307222, 0.01477664802223444, 0.018077310174703598, 0.010320773348212242, 0.006645719520747662, 0.03231831267476082, 0.004141036421060562, 0.011432528495788574, 0.011813640594482422, 0.20326180756092072], [0.024762088432908058, 0.05259820073843002, 0.06384432315826416, 0.1483391523361206, 0.26820069551467896, 0.20398226380348206, 0.37573596835136414, 0.08007726073265076, 0.052950888872146606, 0.09653404355049133, 0.1610451638698578, 0.12953783571720123, 0.2330068051815033, 0.4463363587856293, 0.19394421577453613], [0.679330587387085, 0.043791741132736206, 0.12768849730491638, 0.27546241879463196, 0.03847555071115494, 0.08167082816362381, 0.21957245469093323, 0.04802798852324486, 0.10780715942382812, 0.6106712222099304, 0.2505488693714142, 0.1709391176700592, 0.04529926925897598, 0.17936259508132935, 0.13903558254241943], [0.05959116667509079, 0.03547457605600357, 0.03805014118552208, 0.02909783646464348, 0.08531224727630615, 0.035567909479141235, 0.017052877694368362, 0.03032829985022545, 0.012725351378321648, 0.06508343666791916, 0.04963213950395584, 0.013415418565273285, 0.026129938662052155, 0.011819864623248577, 0.21026377379894257], [0.0922531858086586, 0.009465531446039677, 0.05285167694091797, 0.11621613800525665, 0.008946871384978294, 0.0003396931570023298, 0.056973982602357864, 0.011571673676371574, 0.03833528608083725, 0.02977353148162365, 0.12428728491067886, 0.005304301157593727, 0.012764646671712399, 0.03717968612909317, 0.1998610943555832], [0.024207258597016335, 0.015275360085070133, 0.12442810088396072, 0.044900182634592056, 0.06243159621953964, 0.002727220067754388, 0.05297050252556801, 0.34427115321159363, 0.10989916324615479, 0.020859790965914726, 0.11048608273267746, 0.02605186030268669, 0.1171213760972023, 0.05136575922369957, 0.16462838649749756], [0.03260662034153938, 0.00298042013309896, 0.16533112525939941, 0.056620776653289795, 0.049906134605407715, 0.008958332240581512, 0.05700542405247688, 0.016634995117783546, 0.029206881299614906, 0.025224529206752777, 0.19688823819160461, 0.03853357210755348, 0.07708126306533813, 0.04636078327894211, 0.17741571366786957], [0.04517968371510506, 0.08089613169431686, 0.11787059158086777, 0.09224344044923782, 0.27191361784935, 0.020393863320350647, 0.01454318780452013, 0.009129227139055729, 0.020442765206098557, 0.08070629835128784, 0.07541637122631073, 0.10045406222343445, 0.04119513928890228, 0.10953037440776825, 0.15667563676834106], [0.08136362582445145, 0.07834970951080322, 0.015254710800945759, 0.0832342654466629, 0.10864067077636719, 0.11524737626314163, 0.1366880238056183, 0.012557982467114925, 0.1251911222934723, 0.15952906012535095, 0.026927798986434937, 0.07786250859498978, 0.11803606152534485, 0.2014097422361374, 0.2085045427083969], [0.07754338532686234, 0.11610410362482071, 0.032187070697546005, 0.05519983917474747, 0.0022462301421910524, 0.11507689952850342, 0.2733137607574463, 0.17666463553905487, 0.010644900612533092, 0.08315187692642212, 0.02269633859395981, 0.06840697675943375, 0.010724963620305061, 0.0371541827917099, 0.21114735305309296], [0.022315502166748047, 0.012378118932247162, 0.0062178960070014, 0.0078407758846879, 0.015144318342208862, 0.010697844438254833, 0.011326298117637634, 0.013119788840413094, 0.009139686822891235, 0.006104558240622282, 0.005014281254261732, 0.002417754614725709, 0.007784656248986721, 0.009948876686394215, 0.16676713526248932], [0.2628116309642792, 0.1443735957145691, 0.08422664552927017, 0.11404431611299515, 0.17927099764347076, 0.25378888845443726, 0.1460212618112564, 0.04387032985687256, 0.023589681833982468, 0.13644081354141235, 0.045464351773262024, 0.06847606599330902, 0.006222521886229515, 0.036451175808906555, 0.20291540026664734], [0.22663825750350952, 0.15363532304763794, 0.01756531558930874, 0.025186356157064438, 0.038983430713415146, 0.01259024627506733, 0.15960636734962463, 0.10260611027479172, 0.059462085366249084, 0.02338782697916031, 0.039677273482084274, 0.055942799896001816, 0.010165784507989883, 0.013570738956332207, 0.1720115691423416], [0.04994741827249527, 0.08986728638410568, 0.03736276924610138, 0.029899757355451584, 0.03542618826031685, 0.007244490087032318, 0.040187276899814606, 0.040814109146595, 0.04076588898897171, 0.05965813249349594, 0.045340292155742645, 0.0002602309104986489, 0.026138437911868095, 0.02984587848186493, 0.21049101650714874], [0.058702513575553894, 0.04533839225769043, 0.03167680650949478, 0.07689032703638077, 0.07722999900579453, 0.05968516319990158, 0.08647314459085464, 0.04232413321733475, 0.05769982933998108, 0.08562258630990982, 0.07418374717235565, 0.08922348916530609, 0.0013435373548418283, 0.0365031398832798, 0.1955317258834839], [0.035160183906555176, 0.01820351555943489, 0.1303882896900177, 0.019772829487919807, 0.040328264236450195, 0.05493366718292236, 0.03643186390399933, 0.013673724606633186, 0.020261095836758614, 0.09265058487653732, 0.06087178364396095, 0.005874141119420528, 0.0010416797595098615, 0.00679743243381381, 0.17795756459236145], [0.0850016176700592, 0.12483492493629456, 0.30438917875289917, 0.08283902704715729, 0.36141735315322876, 0.5806636810302734, 0.21757252514362335, 0.0776025652885437, 0.2093839943408966, 0.1517311930656433, 0.0691467672586441, 0.05431315675377846, 0.323522686958313, 0.21248842775821686, 0.11186490952968597], [0.017619943246245384, 0.008017263375222683, 0.019503258168697357, 0.014857600443065166, 0.07692210376262665, 0.015309707261621952, 0.015313221141695976, 0.008549719117581844, 0.03095930442214012, 0.019377540796995163, 0.031960610300302505, 0.0054225618951022625, 0.016712497919797897, 0.015215321443974972, 0.15961019694805145], [0.2695287764072418, 0.16650046408176422, 0.14075446128845215, 0.1364857405424118, 0.23432065546512604, 0.261515349149704, 0.18958930671215057, 0.053015366196632385, 0.031337250024080276, 0.28422990441322327, 0.08986067771911621, 0.06408891826868057, 0.008591849356889725, 0.031372129917144775, 0.19151051342487335], [0.2586316764354706, 0.21131351590156555, 0.019284198060631752, 0.02717362530529499, 0.037918541580438614, 0.014535612426698208, 0.14439015090465546, 0.14164134860038757, 0.06384728103876114, 0.03232301026582718, 0.05240772292017937, 0.08253412693738937, 0.007928711362183094, 0.011026060208678246, 0.1583670824766159], [0.0646420493721962, 0.15151722729206085, 0.04734531044960022, 0.03642117232084274, 0.03833956643939018, 0.007805521599948406, 0.03985777497291565, 0.05410199984908104, 0.07749858498573303, 0.1281091719865799, 0.06692291796207428, 0.0004382343322504312, 0.02769407443702221, 0.03219819441437721, 0.20084568858146667], [0.06935474276542664, 0.07278740406036377, 0.0317843034863472, 0.061563972383737564, 0.057788632810115814, 0.05731336027383804, 0.08327846229076385, 0.046548519283533096, 0.06359860301017761, 0.13075897097587585, 0.09122113883495331, 0.1188196912407875, 0.0009191188146360219, 0.03464866429567337, 0.18994329869747162], [0.04588386043906212, 0.027941085398197174, 0.16196617484092712, 0.023955674842000008, 0.04093120992183685, 0.06800121814012527, 0.031365618109703064, 0.013349683955311775, 0.016157155856490135, 0.09367228299379349, 0.06382262706756592, 0.009268027730286121, 0.0006308736628852785, 0.005314440466463566, 0.17240527272224426], [0.09685268998146057, 0.17937548458576202, 0.31954076886177063, 0.09235721081495285, 0.3550800085067749, 0.5939842462539673, 0.19687135517597198, 0.10603781044483185, 0.27224627137184143, 0.17071248590946198, 0.0712975338101387, 0.10525800287723541, 0.3080449402332306, 0.250378280878067, 0.11120767891407013], [0.012543261051177979, 0.010277148336172104, 0.014658409170806408, 0.007294217124581337, 0.028056686744093895, 0.009602113626897335, 0.004711315967142582, 0.003909323364496231, 0.019910220056772232, 0.0035717461723834276, 0.016398703679442406, 0.01044577918946743, 0.015165981836616993, 0.04322582483291626, 0.1563079059123993]]], [[[0.017177388072013855, 0.0003127168456558138, 0.004294774029403925, 0.0025685238651931286, 0.0020048224832862616, 0.0018501998856663704, 0.004262528382241726, 0.00010045748058473691, 0.004143967293202877, 0.0026836262550204992, 0.0008790316642262042, 0.0012905423063784838, 8.68891947902739e-05, 0.00021419797849375755, 0.16245633363723755], [0.12795236706733704, 0.00371668953448534, 0.02831968478858471, 0.025539351627230644, 0.0009935664711520076, 0.0005314573645591736, 0.0308157317340374, 4.653090945794247e-05, 0.004544692113995552, 0.02307700179517269, 0.014357739128172398, 0.0017676070565357804, 1.5830510164960288e-05, 0.0005655316635966301, 0.23366259038448334], [0.0012442924780771136, 0.6349257826805115, 1.560185046400875e-05, 0.0005892697954550385, 2.671209358595661e-06, 1.747990245348774e-05, 0.00010909549746429548, 9.000968930195086e-06, 1.720580803521443e-05, 0.0008049540338106453, 0.00025925427326001227, 4.468534825718962e-06, 5.9764097386505455e-06, 7.895294402260333e-05, 0.00020540088007692248], [0.014811321161687374, 0.6550174951553345, 5.4754978918936104e-05, 0.0013682727003470063, 7.1730828494764864e-06, 3.513193587423302e-05, 0.00030579010490328074, 4.0161107790481765e-06, 8.621193410363048e-05, 0.0020331761334091425, 0.00018049145000986755, 1.5370842447737232e-05, 2.3058303213474574e-06, 3.803792060352862e-05, 0.0004018820764031261], [0.0038746336940675974, 0.000324725842801854, 0.0051879663951694965, 0.009153621271252632, 0.0008864403935149312, 0.6781038641929626, 0.057408660650253296, 0.0010902854846790433, 0.00043091498082503676, 0.000930881651584059, 0.00047575533972121775, 0.0024355631321668625, 0.0005705857765860856, 0.0003382607828825712, 0.0010924984235316515], [3.359095899213571e-06, 1.5333833403019526e-07, 3.112653939751908e-05, 0.00013510043208952993, 6.284327810135437e-06, 0.7821753025054932, 0.0016732696676626801, 2.949555346276611e-05, 1.1825303545265342e-06, 2.2443591660703532e-06, 4.938602842230466e-07, 8.253279020209447e-07, 2.1931487026449759e-07, 9.422030302630446e-07, 3.409375494811684e-06], [0.00014056767395231873, 5.100669682178705e-07, 0.0031089531257748604, 0.006296438630670309, 0.00044245802564546466, 0.5631491541862488, 0.006006886251270771, 0.00015836386592127383, 1.0129460861207917e-05, 9.741926623973995e-05, 8.02019567345269e-05, 2.8800504878745414e-05, 2.2740101485396735e-05, 9.966635116143152e-05, 5.9340749430703e-05], [0.07201159745454788, 9.12444302230142e-05, 0.07167930901050568, 0.07350550591945648, 0.008381813764572144, 0.32997292280197144, 0.32325229048728943, 0.006826527416706085, 0.005964158568531275, 0.01031426526606083, 0.0041834041476249695, 0.0003298712254036218, 2.8659975214395672e-05, 0.00019656911899801344, 0.02016262151300907], [0.0011574724921956658, 3.413460092360765e-07, 0.00010100962390424684, 0.0058910842053592205, 3.088227913394803e-06, 0.01394782867282629, 0.16852441430091858, 0.6476468443870544, 4.158269439358264e-05, 0.002217742381617427, 3.1430703529622406e-05, 8.318846812471747e-05, 7.552150123046886e-07, 2.136993316526059e-06, 0.00013183141709305346], [0.056869976222515106, 0.00018767332949209958, 0.07251239567995071, 0.21200358867645264, 0.5404223799705505, 0.01658189669251442, 0.03565289452672005, 0.0015120785683393478, 0.002293382305651903, 0.005935561377555132, 0.012055100873112679, 0.005193157121539116, 0.003556813346222043, 0.007320231292396784, 0.018532630056142807], [0.37012216448783875, 0.0030506134498864412, 0.585090160369873, 0.3774729073047638, 0.6362679600715637, 0.12865976989269257, 0.340728759765625, 0.01963443122804165, 0.11373940855264664, 0.0405576266348362, 0.04042620584368706, 0.006893007550388575, 0.0011100739939138293, 0.004035779275000095, 0.12706774473190308], [0.01695789396762848, 0.00023016006161924452, 0.013878279365599155, 0.04998883232474327, 0.0032932739704847336, 8.226843783631921e-05, 0.014781651087105274, 0.00017401285003870726, 0.4112556278705597, 0.007095593959093094, 0.01393651869148016, 0.000858593441080302, 0.0009966455399990082, 0.006141065154224634, 0.004614917561411858], [0.023780474439263344, 4.510316648520529e-05, 0.013797261752188206, 0.087004654109478, 0.0004407854867167771, 0.0013536562910303473, 0.04187630116939545, 0.0028901200275868177, 0.06213926523923874, 0.3483656048774719, 0.03705320879817009, 0.005524389911442995, 0.0004139445663895458, 0.0025706440210342407, 0.012163926847279072], [0.017730457708239555, 8.937691018218175e-05, 0.00767871318385005, 0.02321789041161537, 0.00010702417785068974, 0.004407694097608328, 0.0538853257894516, 0.011079255491495132, 0.003184565110132098, 0.026336153969168663, 0.005110009107738733, 0.3480301797389984, 0.002053677337244153, 0.01653059385716915, 0.00945478305220604], [0.00016590843733865768, 4.410037217894569e-05, 0.0031412369571626186, 0.0015988551313057542, 0.002399750053882599, 0.0004506838449742645, 0.001152031123638153, 0.00021803524577990174, 0.00054850586457178, 0.0001300607982557267, 0.001143390079960227, 0.0023531741462647915, 0.6484718322753906, 0.061944324523210526, 1.8855764210456982e-05], [5.492825607689156e-07, 1.991102926979238e-08, 2.3713612335996004e-06, 1.7095164366764948e-05, 8.657893886265811e-07, 3.6805211323098774e-08, 1.598790731804911e-06, 2.0731313554733788e-07, 4.274500042811269e-07, 5.490248440764844e-06, 0.00014167907647788525, 5.53526615476585e-06, 0.5851997137069702, 0.22563536465168, 1.0684430407081891e-07], [0.01633528247475624, 0.0006067559006623924, 0.047781698405742645, 0.1674666851758957, 0.0008243213524110615, 0.0007217283127829432, 0.005900595337152481, 0.0001012250068015419, 0.006910703144967556, 0.1343279927968979, 0.5695670247077942, 0.0034049933310598135, 0.008110514841973782, 0.0796104148030281, 0.00713667506352067], [0.02614973485469818, 0.001497315475717187, 0.11498566716909409, 0.08699594438076019, 0.006599655374884605, 0.0011878651566803455, 0.009639720432460308, 0.0002812722814269364, 0.014351817779242992, 0.06119270250201225, 0.19180962443351746, 0.06391202658414841, 0.4759237766265869, 0.44549837708473206, 0.058810409158468246], [0.041024841368198395, 0.0016396299470216036, 0.05072889104485512, 0.1323171705007553, 0.0024413676001131535, 0.00023246044293045998, 0.02059599943459034, 0.00033336327760480344, 0.7358176708221436, 0.04226389154791832, 0.0658484548330307, 0.002587914001196623, 0.013076293282210827, 0.0423613116145134, 0.051219869405031204], [0.025904469192028046, 0.00014531973283737898, 0.014812517911195755, 0.11958510428667068, 0.0003183217777404934, 0.0012536202557384968, 0.031174438074231148, 0.0025010022800415754, 0.045685503631830215, 0.4334242641925812, 0.057037968188524246, 0.005963113158941269, 0.0007164725102484226, 0.00356480129994452, 0.02565825544297695], [0.04193783551454544, 0.0005606984486803412, 0.01569434627890587, 0.058890990912914276, 0.00016686622984707355, 0.0032934362534433603, 0.10695304721593857, 0.011062747798860073, 0.008127261884510517, 0.04922156408429146, 0.01035262644290924, 0.3408533036708832, 0.003045044606551528, 0.019185535609722137, 0.046415992081165314], [0.00012501348101068288, 4.870840712101199e-05, 0.0024386774748563766, 0.001847597537562251, 0.0017206922639161348, 0.0002501157287042588, 0.0009360458934679627, 0.00021343374100979418, 0.0004799730086233467, 0.00017777700850274414, 0.0013057318283244967, 0.0019216074142605066, 0.7016423344612122, 0.059743087738752365, 1.6802117897896096e-05], [1.7574552657606546e-06, 9.272354617451128e-08, 1.001089003693778e-05, 5.891482942388393e-05, 3.3656547202554066e-06, 1.2065736143540562e-07, 6.7727110035775695e-06, 6.411150366147922e-07, 1.3192883443480241e-06, 1.1707085832313169e-05, 0.00026830541901290417, 1.0283902156515978e-05, 0.6812964081764221, 0.27208930253982544, 4.838558993469633e-07], [0.01900503970682621, 0.0008953948272392154, 0.09836827963590622, 0.2858547866344452, 0.0013939865166321397, 0.0011423979885876179, 0.011685764417052269, 0.00014273256238084286, 0.010754182003438473, 0.15914513170719147, 0.6438553333282471, 0.002441136632114649, 0.008362390100955963, 0.07132171094417572, 0.011131932027637959], [0.12417581677436829, 0.0153038389980793, 0.12986266613006592, 0.6406017541885376, 0.009386910125613213, 0.057520631700754166, 0.09723392128944397, 0.0041757188737392426, 0.030985616147518158, 0.12765046954154968, 0.052563395351171494, 0.09427980333566666, 0.010530965402722359, 0.01615813747048378, 0.110444575548172]], [[0.05668458715081215, 0.013551714830100536, 0.3300224542617798, 0.22417771816253662, 0.24923239648342133, 0.16107039153575897, 0.07639153301715851, 0.036736860871315, 0.044193096458911896, 0.14611276984214783, 0.15061600506305695, 0.035221245139837265, 0.0397845022380352, 0.06225845590233803, 0.12414046376943588], [0.29422780871391296, 0.3258638381958008, 0.027477310970425606, 0.10906420648097992, 0.003920723684132099, 0.020042676478624344, 0.05157224088907242, 0.0009247793932445347, 0.005282218102365732, 0.1744423359632492, 0.0761384516954422, 0.0033416510559618473, 0.0003361533163115382, 0.0012587645323947072, 0.013668928295373917], [0.19355924427509308, 0.1259031891822815, 0.004604514688253403, 0.04003702849149704, 0.0129036083817482, 0.019794460386037827, 0.06589072942733765, 0.0014933310449123383, 0.012753497809171677, 0.06252782791852951, 0.0361945815384388, 0.011655895970761776, 0.01012047752737999, 0.02639157697558403, 0.16549569368362427], [0.4293937385082245, 0.07181306928396225, 0.003158864099532366, 0.04697505012154579, 0.01354672759771347, 0.09221473336219788, 0.24058710038661957, 0.0037424738984555006, 0.07543525844812393, 0.0656844824552536, 0.01989266835153103, 0.06512395292520523, 0.01137665193527937, 0.029709961265325546, 0.18951866030693054], [0.052543047815561295, 0.03695955500006676, 0.100065678358078, 0.07546547800302505, 0.053252771496772766, 0.11382242292165756, 0.28551623225212097, 0.14051520824432373, 0.12815484404563904, 0.15533913671970367, 0.11139650642871857, 0.09512985497713089, 0.017796501517295837, 0.04266834259033203, 0.1351824700832367], [0.002040643012151122, 0.005490712355822325, 0.024769198149442673, 0.007002650294452906, 0.0020249236840754747, 0.03913044556975365, 0.01487613096833229, 0.09424738585948944, 0.010089649818837643, 0.05513475462794304, 0.0488949678838253, 0.007691625505685806, 0.002344577107578516, 0.012510538101196289, 0.20307941734790802], [0.04981796815991402, 0.13342007994651794, 0.4189896881580353, 0.06767702847719193, 0.007763800676912069, 0.11641503125429153, 0.029343493282794952, 0.11072052270174026, 0.06700066477060318, 0.1429358571767807, 0.3406253457069397, 0.00571059063076973, 0.0006326772854663432, 0.004126383922994137, 0.17491626739501953], [0.008032058365643024, 0.009898788295686245, 0.0165096465498209, 0.015990890562534332, 0.001612947671674192, 0.07025154680013657, 0.1309722512960434, 0.45684561133384705, 0.020022952929139137, 0.014566164463758469, 0.01627122238278389, 0.001012062537483871, 0.003352430183440447, 0.006583840120583773, 0.0849505066871643], [0.027854006737470627, 0.008844887837767601, 0.011581032536923885, 0.014227867126464844, 0.0022522227372974157, 0.6803511381149292, 0.24682462215423584, 0.11913055926561356, 0.0028406307101249695, 0.006190288811922073, 0.00574448611587286, 0.0012344244169071317, 0.010572707280516624, 0.00985674187541008, 0.11121391505002975], [0.11111988872289658, 0.0035893325693905354, 0.4007861316204071, 0.2033512443304062, 0.1986382007598877, 0.15137647092342377, 0.12109687924385071, 0.007575488183647394, 0.021906785666942596, 0.03087061457335949, 0.08533017337322235, 0.07086688280105591, 0.06729871034622192, 0.045789312571287155, 0.1673528403043747], [0.06468851119279861, 0.006587199401110411, 0.23617494106292725, 0.19800357520580292, 0.15495024621486664, 0.06172433868050575, 0.05180465057492256, 0.01833559013903141, 0.016546709463000298, 0.05746111273765564, 0.0824536681175232, 0.007550883572548628, 0.007943101227283478, 0.011712267994880676, 0.33849596977233887], [0.09414701163768768, 0.10295354574918747, 0.0844656303524971, 0.06548816710710526, 0.08529236167669296, 0.06227908656001091, 0.030192906036973, 0.010874724946916103, 0.025562399998307228, 0.005146168638020754, 0.014559037052094936, 0.013559900224208832, 0.06781303137540817, 0.05153109133243561, 0.33232951164245605], [0.314544141292572, 0.6832185983657837, 0.07794945687055588, 0.042061515152454376, 0.015504884533584118, 0.1916494369506836, 0.006379975005984306, 0.0006176759488880634, 0.0012508369982242584, 0.01929312013089657, 0.022219885140657425, 0.0019787217024713755, 0.01769268326461315, 0.008809820748865604, 0.08711312711238861], [0.027118999511003494, 0.07309459149837494, 0.04486501216888428, 0.012266037985682487, 0.024303032085299492, 0.030924739316105843, 0.021004648879170418, 0.003694491693750024, 0.01517508551478386, 0.025275954976677895, 0.0075909653678536415, 0.24021397531032562, 0.04135901853442192, 0.07603362947702408, 0.11061857640743256], [0.025165440514683723, 0.019109023734927177, 0.008520743809640408, 0.015198510140180588, 0.007751345168799162, 0.005125374533236027, 0.008160223253071308, 0.0017721926560625434, 0.08641061931848526, 0.07765892893075943, 0.017936453223228455, 0.020675569772720337, 0.0024341135285794735, 0.023971976712346077, 0.16557703912258148], [0.22320780158042908, 0.05348529666662216, 0.01734296977519989, 0.1172923669219017, 0.004340981598943472, 0.003372892737388611, 0.033841460943222046, 0.024162178859114647, 0.05216863751411438, 0.3090120553970337, 0.2295515090227127, 0.014075365848839283, 0.020010780543088913, 0.20773397386074066, 0.12411301583051682], [0.1383964717388153, 0.05579448863863945, 0.1563209742307663, 0.09128513187170029, 0.039257608354091644, 0.009886945597827435, 0.006391164381057024, 0.0007081980584189296, 0.006523598916828632, 0.16335614025592804, 0.02935076504945755, 0.023180969059467316, 0.19186609983444214, 0.2336183488368988, 0.16814255714416504], [0.1625337302684784, 0.007939358241856098, 0.11928629875183105, 0.1341797411441803, 0.005670298356562853, 0.0033473502844572067, 0.022544465959072113, 0.005534132476896048, 0.007299710530787706, 0.08667418360710144, 0.07403960824012756, 0.004230144899338484, 0.002401313977316022, 0.005503634922206402, 0.20701391994953156], [0.08204744011163712, 0.04882703348994255, 0.048393696546554565, 0.02867632359266281, 0.012730585411190987, 0.02805456519126892, 0.014470821246504784, 0.008571655489504337, 0.011637779884040356, 0.011116313748061657, 0.015620187856256962, 0.00444953003898263, 0.038398172706365585, 0.021771300584077835, 0.25556278228759766], [0.3818233609199524, 0.6690115928649902, 0.07648678869009018, 0.0345233753323555, 0.011518634855747223, 0.1436365395784378, 0.005264819134026766, 0.000502048700582236, 0.0017500953981652856, 0.03918173909187317, 0.04129163548350334, 0.0023984990548342466, 0.020183494314551353, 0.008427903987467289, 0.09516369551420212], [0.02332407608628273, 0.06938373297452927, 0.035716570913791656, 0.008126936852931976, 0.012537641450762749, 0.0137803228572011, 0.01513306051492691, 0.00204691500402987, 0.029820755124092102, 0.05474912002682686, 0.016170548275113106, 0.22342036664485931, 0.05026429146528244, 0.06863567978143692, 0.11948796361684799], [0.020166568458080292, 0.015762973576784134, 0.006330324336886406, 0.008625769056379795, 0.005781465210020542, 0.00451312493532896, 0.007413441780954599, 0.0018466140609234571, 0.14846709370613098, 0.1376892477273941, 0.02431248314678669, 0.03153817355632782, 0.0025850962847471237, 0.026987632736563683, 0.15984071791172028], [0.11904438585042953, 0.03637225553393364, 0.013324074447154999, 0.04586002975702286, 0.00359557312913239, 0.002297254279255867, 0.02453085221350193, 0.019205793738365173, 0.07615289092063904, 0.3510056436061859, 0.24748629331588745, 0.0179043747484684, 0.015299135819077492, 0.16336295008659363, 0.13914434611797333], [0.0598345547914505, 0.028141267597675323, 0.11996681243181229, 0.04193190485239029, 0.03001757152378559, 0.006633914541453123, 0.005910022184252739, 0.0007469199481420219, 0.010509159415960312, 0.18832749128341675, 0.032145459204912186, 0.022126449272036552, 0.16793787479400635, 0.1917877346277237, 0.16885708272457123], [0.30011340975761414, 0.029496116563677788, 0.21246175467967987, 0.11388618499040604, 0.019265230745077133, 0.011386800557374954, 0.02386542037129402, 0.0049255480989813805, 0.002113579073920846, 0.2235003262758255, 0.1410367637872696, 0.022971738129854202, 0.009332037530839443, 0.01034344732761383, 0.12311729788780212]], [[0.03517069295048714, 0.03549245744943619, 0.004381549544632435, 0.008797217160463333, 0.007323419209569693, 0.042320944368839264, 0.004849699325859547, 0.003679578425362706, 0.011580413207411766, 0.009367180056869984, 0.006541883572936058, 0.022973380982875824, 0.023761657997965813, 0.02892483025789261, 0.1581033319234848], [0.01528994832187891, 0.20408181846141815, 0.11101088672876358, 0.08111120015382767, 0.07986893504858017, 0.010126215405762196, 0.020366966724395752, 0.1417536586523056, 0.04787333309650421, 0.04340335354208946, 0.2409791648387909, 0.04442436248064041, 0.005909040104597807, 0.014603852294385433, 0.18931475281715393], [0.21622280776500702, 0.09626477211713791, 0.10110790282487869, 0.31975099444389343, 0.2572377920150757, 0.630383312702179, 0.1336757242679596, 0.17725828289985657, 0.02378956414759159, 0.22253809869289398, 0.13939163088798523, 0.30914127826690674, 0.35968318581581116, 0.48164138197898865, 0.09301326423883438], [0.168080672621727, 0.1516411453485489, 0.07150255143642426, 0.32225823402404785, 0.2490793913602829, 0.30686429142951965, 0.032337237149477005, 0.16698232293128967, 0.04405289515852928, 0.2310783565044403, 0.10561788827180862, 0.2769646644592285, 0.19830158352851868, 0.1653461754322052, 0.09653043746948242], [0.04038669914007187, 0.16624715924263, 0.3317047655582428, 0.3851986229419708, 0.42305275797843933, 0.008450526744127274, 0.09501849114894867, 0.24002836644649506, 0.4256587326526642, 0.15410973131656647, 0.19127053022384644, 0.04389801248908043, 0.030224177986383438, 0.05971052870154381, 0.11478950828313828], [0.04527302458882332, 0.15370813012123108, 0.46266382932662964, 0.06791326403617859, 0.6029869914054871, 0.018879592418670654, 0.07514301687479019, 0.07948564738035202, 0.6243545413017273, 0.11254889518022537, 0.24916931986808777, 0.08612842112779617, 0.07598677277565002, 0.13317255675792694, 0.04299912229180336], [0.03695433586835861, 0.028389452025294304, 0.2721908688545227, 0.07653216272592545, 0.6730886697769165, 0.004614274017512798, 0.004165990743786097, 0.01533985324203968, 0.28992146253585815, 0.028840038925409317, 0.055076081305742264, 0.024787841364741325, 0.0010191021719947457, 0.0022868094965815544, 0.030124979093670845], [0.005083801224827766, 0.09139324724674225, 0.28116321563720703, 0.08195066452026367, 0.6340349316596985, 0.012272918596863747, 0.0005934475339017808, 0.010692326352000237, 0.1514793336391449, 0.016046250239014626, 0.04672969505190849, 0.014393122866749763, 0.002580928150564432, 0.007409923244267702, 0.12582267820835114], [0.00605103699490428, 0.11548061668872833, 0.2870264947414398, 0.061026521027088165, 0.8064441084861755, 0.2189176380634308, 0.020241523161530495, 0.07779920846223831, 0.08952271938323975, 0.0073190852999687195, 0.02372264862060547, 0.038144610822200775, 0.07446137070655823, 0.09413070231676102, 0.030171062797307968], [0.08316895365715027, 0.6715664267539978, 0.04549514129757881, 0.17856287956237793, 0.018127189949154854, 0.38010329008102417, 0.16956135630607605, 0.5726994872093201, 0.1473512202501297, 0.13756032288074493, 0.044131502509117126, 0.03872460126876831, 0.13646697998046875, 0.07963203638792038, 0.10255669057369232], [0.0817432552576065, 0.2031053900718689, 0.02472570165991783, 0.02598942257463932, 0.05427335575222969, 0.43315476179122925, 0.06398319453001022, 0.14792829751968384, 0.18555517494678497, 0.020227503031492233, 0.03572608157992363, 0.008726409636437893, 0.33127138018608093, 0.0956021174788475, 0.032814960926771164], [0.36652442812919617, 0.4977355897426605, 0.09286413341760635, 0.21385566890239716, 0.18058304488658905, 0.4562758207321167, 0.4738945960998535, 0.2067655473947525, 0.17124009132385254, 0.035114847123622894, 0.05785587430000305, 0.03289380669593811, 0.3892229497432709, 0.2459530532360077, 0.0885753259062767], [0.3338637053966522, 0.241106316447258, 0.10183558613061905, 0.16975384950637817, 0.22215212881565094, 0.1208982765674591, 0.12069278955459595, 0.027770178392529488, 0.12589573860168457, 0.018161755055189133, 0.05639319866895676, 0.024462532252073288, 0.08646970242261887, 0.18506868183612823, 0.2994369864463806], [0.24999171495437622, 0.7484717965126038, 0.1908620148897171, 0.6611655354499817, 0.24442408978939056, 0.0825357735157013, 0.5622089505195618, 0.4391622543334961, 0.045715928077697754, 0.2250336855649948, 0.3067566156387329, 0.014471310190856457, 0.06388252228498459, 0.21674634516239166, 0.13583892583847046], [0.05097173899412155, 0.16686855256557465, 0.15120531618595123, 0.3698476254940033, 0.35846272110939026, 0.6895467042922974, 0.8159933686256409, 0.843620777130127, 0.6904561519622803, 0.307870090007782, 0.450530469417572, 0.6275950074195862, 0.15986312925815582, 0.5293903350830078, 0.07888244837522507], [0.3532100319862366, 0.1141892597079277, 0.06207668036222458, 0.23437273502349854, 0.13035829365253448, 0.16457295417785645, 0.6610441207885742, 0.6354422569274902, 0.6703211069107056, 0.18266227841377258, 0.16635818779468536, 0.1048990935087204, 0.1468038111925125, 0.17976891994476318, 0.0709633082151413], [0.18437133729457855, 0.20806346833705902, 0.06752406805753708, 0.15831130743026733, 0.3405534625053406, 0.0627271831035614, 0.3717433214187622, 0.3913803696632385, 0.5862330794334412, 0.29396724700927734, 0.02299528755247593, 0.060014016926288605, 0.08232607692480087, 0.15418194234371185, 0.15275102853775024], [0.07671413570642471, 0.17070698738098145, 0.13325846195220947, 0.07402658462524414, 0.6503690481185913, 0.1330946981906891, 0.165133535861969, 0.2397843301296234, 0.6370089054107666, 0.09848601371049881, 0.09929761290550232, 0.10903115570545197, 0.14141131937503815, 0.14783106744289398, 0.08112896233797073], [0.1416744738817215, 0.274202436208725, 0.13295260071754456, 0.20105819404125214, 0.3945937156677246, 0.333781898021698, 0.3556738793849945, 0.2839928865432739, 0.10343024134635925, 0.07706140726804733, 0.054361648857593536, 0.05752982571721077, 0.2817353904247284, 0.27278265357017517, 0.13429909944534302], [0.22879131138324738, 0.1777554452419281, 0.09183042496442795, 0.14726729691028595, 0.1873711347579956, 0.05672184377908707, 0.08326486498117447, 0.01781904511153698, 0.0835406556725502, 0.02614605240523815, 0.06876543164253235, 0.03439611196517944, 0.0621294341981411, 0.16512615978717804, 0.26481878757476807], [0.1532706916332245, 0.5982866883277893, 0.18050755560398102, 0.5800401568412781, 0.22030943632125854, 0.025230426341295242, 0.3744361996650696, 0.265155166387558, 0.03173244372010231, 0.2068646252155304, 0.27338433265686035, 0.012270096689462662, 0.05047086998820305, 0.14277896285057068, 0.15170519053936005], [0.04688200727105141, 0.12437571585178375, 0.1870293915271759, 0.4533093273639679, 0.3565751910209656, 0.5648568868637085, 0.7852934002876282, 0.7657470703125, 0.5417794585227966, 0.4419334828853607, 0.632922887802124, 0.7103447914123535, 0.15686877071857452, 0.6169639825820923, 0.08483293652534485], [0.2884610891342163, 0.10604135692119598, 0.07176870107650757, 0.2240629643201828, 0.12294583767652512, 0.10159854590892792, 0.6051279902458191, 0.5541971921920776, 0.5623130798339844, 0.16405576467514038, 0.18055777251720428, 0.13399486243724823, 0.12637703120708466, 0.18360036611557007, 0.09598042815923691], [0.10626664012670517, 0.1478983461856842, 0.07806308567523956, 0.11814259737730026, 0.31690794229507446, 0.03372211009263992, 0.30042603611946106, 0.29277828335762024, 0.44479742646217346, 0.216581329703331, 0.023049354553222656, 0.0511498898267746, 0.08494822680950165, 0.14207273721694946, 0.16419102251529694], [0.048457998782396317, 0.0638582855463028, 0.20956584811210632, 0.021124709397554398, 0.09014897048473358, 0.11662621796131134, 0.3483109474182129, 0.4503737986087799, 0.17136822640895844, 0.02997676283121109, 0.21708470582962036, 0.05856599286198616, 0.2859736979007721, 0.41663405299186707, 0.12262307107448578]], [[0.01622859761118889, 0.0033176897559314966, 0.006228303536772728, 0.003451053285971284, 0.011415286920964718, 0.016942020505666733, 0.0027556640561670065, 0.001647507306188345, 0.0010015909792855382, 0.0013629572931677103, 0.004746851045638323, 0.009338179603219032, 0.00885467603802681, 0.006604180671274662, 0.16180677711963654], [0.17455320060253143, 0.026163265109062195, 0.2041780799627304, 0.027548620477318764, 0.4711945950984955, 0.5480062365531921, 0.10718726366758347, 0.032194506376981735, 0.08035919070243835, 0.010791448876261711, 0.11821587383747101, 0.04372825473546982, 0.5788823962211609, 0.10199426859617233, 0.06844703108072281], [0.023936308920383453, 0.03560526669025421, 0.007881848141551018, 0.022994371131062508, 0.003501775674521923, 0.000663262908346951, 0.0027445319574326277, 0.0008202926255762577, 0.002215484855696559, 0.014335977844893932, 0.06139073148369789, 0.0039900378324091434, 0.004902976099401712, 0.006251698825508356, 0.21882350742816925], [0.01501577626913786, 0.026870740577578545, 0.007700353395193815, 0.02517320215702057, 0.005199552513659, 0.0040618558414280415, 0.0018289085710421205, 0.0005822794046252966, 0.008953371085226536, 0.004845716059207916, 0.02605423890054226, 0.010851072147488594, 0.011600007303059101, 0.011058725416660309, 0.2679094076156616], [0.05198093131184578, 0.026691097766160965, 0.04745011776685715, 0.02099662832915783, 0.007765383925288916, 0.0017653746763244271, 0.002459246199578047, 0.0005052239284850657, 0.0007161727407947183, 0.00449666241183877, 0.00950489193201065, 0.002728741616010666, 0.007593079470098019, 0.0031749741174280643, 0.1993207037448883], [0.0031879025045782328, 0.001219254801981151, 0.007273980416357517, 0.0029734931886196136, 9.794573998078704e-05, 0.0006066279602237046, 0.000905939843505621, 0.0002116545947501436, 0.00022416051069740206, 0.001432110439054668, 0.00046862047747708857, 0.0008043517009355128, 0.00010411434050183743, 0.0003457288257777691, 0.22099417448043823], [0.020157048478722572, 0.026601465418934822, 0.04540588706731796, 0.04344630241394043, 0.0022944926749914885, 0.0010618591913953424, 0.00406603142619133, 0.0029086798895150423, 0.0019963555969297886, 0.010005260817706585, 0.0020353682339191437, 0.0019374215044081211, 0.0013613863848149776, 0.001661884132772684, 0.34173521399497986], [0.09776000678539276, 0.012011643499135971, 0.12930582463741302, 0.019725820049643517, 0.03450663015246391, 0.44516250491142273, 0.09379248321056366, 0.011904217302799225, 0.012111036106944084, 0.007218031212687492, 0.028761520981788635, 0.011232447810471058, 0.17035166919231415, 0.022308414801955223, 0.055901553481817245], [0.0270126610994339, 0.0034831874072551727, 0.03977394104003906, 0.025583824142813683, 0.0007700100541114807, 0.002870001830160618, 0.0027750579174607992, 0.0016644555144011974, 0.0016086471732705832, 0.001177149242721498, 0.00746855279430747, 0.002065857872366905, 0.0016993783647194505, 0.0015537800500169396, 0.32808277010917664], [0.16020068526268005, 0.019860466942191124, 0.3786206543445587, 0.04546584561467171, 0.22538548707962036, 0.035959187895059586, 0.022749971598386765, 0.0223965086042881, 0.010994979180395603, 0.013655508868396282, 0.08095952123403549, 0.07914181798696518, 0.5184871554374695, 0.24710357189178467, 0.059729527682065964], [0.002354596508666873, 0.013563946820795536, 0.0012282072566449642, 0.0011236226418986917, 0.004269973374903202, 0.05393142253160477, 0.010044331662356853, 0.012847290374338627, 0.23206481337547302, 0.0042032524943351746, 0.002388538094237447, 0.005051162093877792, 0.004106870852410793, 0.003583247307687998, 0.0021634430158883333], [0.1318124532699585, 0.006612265948206186, 0.026151085272431374, 0.15551267564296722, 0.006537565030157566, 0.045402105897665024, 0.08115606755018234, 0.020273711532354355, 0.2617640495300293, 0.03846455365419388, 0.42425140738487244, 0.0063036843203008175, 0.045534029603004456, 0.06594183295965195, 0.0061628553085029125], [0.0171976238489151, 0.0023818486370146275, 0.036466922610998154, 0.011855212040245533, 0.019672302529215813, 0.007386004086583853, 0.02982362173497677, 0.0045198979787528515, 0.02385052479803562, 0.25256073474884033, 0.2446560561656952, 0.0453505739569664, 0.08819476515054703, 0.09139581024646759, 0.0022182920947670937], [0.023948049172759056, 0.006307430099695921, 0.014840157702565193, 0.01758965104818344, 0.0009477039566263556, 0.00178795016836375, 0.005927308928221464, 0.0026511158794164658, 0.00012311375758145005, 0.04321818798780441, 0.0496363490819931, 0.3416200280189514, 0.001097637927159667, 0.007029203698039055, 0.007338459137827158], [0.1633826345205307, 0.005062526557594538, 0.04231903329491615, 0.24309031665325165, 0.0009563505300320685, 0.0008045694557949901, 0.004994159564375877, 0.0011061460245400667, 0.0013372766552492976, 0.023061903193593025, 0.044598180800676346, 0.0017028035363182425, 2.3589664124301635e-05, 0.0003540365141816437, 0.16737498342990875], [0.1106855720281601, 0.005593962036073208, 0.014953872188925743, 0.19064223766326904, 0.0008905718568712473, 0.002549833618104458, 0.019427485764026642, 0.019940704107284546, 0.0020017458591610193, 0.029780413955450058, 0.01774613931775093, 0.00061158457538113, 0.0022336822003126144, 0.007989613339304924, 0.2558586895465851], [0.07112060487270355, 0.029737049713730812, 0.09336916357278824, 0.07307538390159607, 0.023197662085294724, 0.022866347804665565, 0.060328319668769836, 0.04474486783146858, 0.0006379868718795478, 0.027103934437036514, 0.2942929267883301, 0.011375843547284603, 0.07746338844299316, 0.09051978588104248, 0.11258094012737274], [0.15941812098026276, 0.02997875213623047, 0.08360203355550766, 0.10365118086338043, 0.03050130233168602, 0.39312028884887695, 0.3065427839756012, 0.2912093997001648, 0.135236918926239, 0.18899840116500854, 0.13724294304847717, 0.1948302835226059, 0.07353706657886505, 0.12220755219459534, 0.10422825068235397], [0.24064786732196808, 0.0051915524527430534, 0.09652373939752579, 0.2287912219762802, 0.019215410575270653, 0.13947954773902893, 0.15343742072582245, 0.07055477797985077, 0.05467608571052551, 0.10673969984054565, 0.5659986138343811, 0.014077076688408852, 0.1709020584821701, 0.23944324254989624, 0.026877261698246002], [0.019817974418401718, 0.002034382661804557, 0.04978875443339348, 0.009913384914398193, 0.033772312104701996, 0.0069160182029008865, 0.027356693521142006, 0.004301261156797409, 0.005268980748951435, 0.24062182009220123, 0.2975090742111206, 0.09841412305831909, 0.13523375988006592, 0.1965852826833725, 0.004198803100734949], [0.017094334587454796, 0.005556214600801468, 0.011722622439265251, 0.009952181950211525, 0.0008346029790118337, 0.0009373819339089096, 0.006794091779738665, 0.0019291864009574056, 4.7701923904241994e-05, 0.0364256277680397, 0.035398196429014206, 0.3890627920627594, 0.0013647697633132339, 0.008012092672288418, 0.013173048384487629], [0.12328237295150757, 0.0036286553367972374, 0.03202027454972267, 0.16562366485595703, 0.0006255045300349593, 0.00061140360776335, 0.00499368691816926, 0.0010923785157501698, 0.0008833102765493095, 0.03177933022379875, 0.04344986379146576, 0.00255553494207561, 2.260845576529391e-05, 0.0005036385264247656, 0.16160868108272552], [0.050196755677461624, 0.002699600299820304, 0.009293685667216778, 0.06999042630195618, 0.0006182404467836022, 0.0013977399794384837, 0.014421526342630386, 0.010930507443845272, 0.0008620836888439953, 0.015927143394947052, 0.008692404255270958, 0.0006625624373555183, 0.0011245491914451122, 0.0053406055085361, 0.2061784416437149], [0.04101766273379326, 0.020672734826803207, 0.08772061765193939, 0.04009746387600899, 0.01892852783203125, 0.017910925671458244, 0.057973578572273254, 0.03737492114305496, 0.00047206622548401356, 0.021084431558847427, 0.21054430305957794, 0.013546224683523178, 0.08985017240047455, 0.10610225051641464, 0.1389981210231781], [0.018278781324625015, 0.03789714351296425, 0.00408195098862052, 0.005283118225634098, 0.009515376761555672, 0.11360906809568405, 0.008760524913668633, 0.006613489706069231, 0.018946174532175064, 0.008831392042338848, 0.015675490722060204, 0.021136337891221046, 0.13481837511062622, 0.08728663623332977, 0.15406787395477295]], [[0.05651351809501648, 0.11774645000696182, 0.026926513761281967, 0.04848615080118179, 0.10334916412830353, 0.4247743785381317, 0.21147629618644714, 0.6254463195800781, 0.10587190836668015, 0.08194849640130997, 0.04674661532044411, 0.35135090351104736, 0.35409873723983765, 0.43208518624305725, 0.11939813196659088], [0.05609016492962837, 0.06931670010089874, 0.1576625108718872, 0.27308744192123413, 0.04202406853437424, 0.2399596869945526, 0.3320065140724182, 0.6272499561309814, 0.09423039108514786, 0.144412100315094, 0.2769482433795929, 0.05643320456147194, 0.11388154327869415, 0.32551372051239014, 0.13187405467033386], [0.1798395812511444, 0.02382134646177292, 0.024498937651515007, 0.28730508685112, 0.19651466608047485, 0.13693250715732574, 0.34929007291793823, 0.1055094301700592, 0.08990196883678436, 0.5189381837844849, 0.3313819468021393, 0.34343984723091125, 0.21719343960285187, 0.21188895404338837, 0.15588119626045227], [0.26584357023239136, 0.03035559318959713, 0.026536965742707253, 0.20298171043395996, 0.23938016593456268, 0.24181482195854187, 0.31930428743362427, 0.10626629739999771, 0.13103167712688446, 0.4636806845664978, 0.393515944480896, 0.3422740399837494, 0.342117577791214, 0.5495904088020325, 0.14030353724956512], [0.30834218859672546, 0.3875667452812195, 0.32842832803726196, 0.16462059319019318, 0.416511207818985, 0.03730625659227371, 0.23662680387496948, 0.5092235207557678, 0.08549848943948746, 0.3278381824493408, 0.507111668586731, 0.0415511280298233, 0.5590415596961975, 0.6185146570205688, 0.0664283037185669], [0.0765935555100441, 0.29552146792411804, 0.05705742537975311, 0.01913047581911087, 0.15779250860214233, 0.030224651098251343, 0.08988720178604126, 0.3389361500740051, 0.08153010904788971, 0.05811480060219765, 0.09408371150493622, 0.19600677490234375, 0.6126919388771057, 0.623294472694397, 0.13969288766384125], [0.4304950535297394, 0.5688965320587158, 0.09143517911434174, 0.09618712961673737, 0.13307496905326843, 0.014428870752453804, 0.040250685065984726, 0.15830516815185547, 0.10923942923545837, 0.23653797805309296, 0.3180045783519745, 0.5594316720962524, 0.5058388710021973, 0.3866141140460968, 0.14058275520801544], [0.31169822812080383, 0.7707167863845825, 0.30778199434280396, 0.10994993895292282, 0.18047340214252472, 0.01769133098423481, 0.014783667400479317, 0.009741406887769699, 0.1340220719575882, 0.11223828792572021, 0.46960482001304626, 0.360332190990448, 0.56731116771698, 0.5470200181007385, 0.18929171562194824], [0.2397254854440689, 0.361926406621933, 0.24345533549785614, 0.18179422616958618, 0.10373111069202423, 0.014045567251741886, 0.08654272556304932, 0.018043776974081993, 0.02193235233426094, 0.07134812325239182, 0.19312754273414612, 0.6192790865898132, 0.6039608716964722, 0.673239529132843, 0.15608295798301697], [0.32110491394996643, 0.2706402838230133, 0.034645695239305496, 0.029830342158675194, 0.00933478306978941, 0.25964564085006714, 0.17791348695755005, 0.11580535769462585, 0.07073061913251877, 0.10197918862104416, 0.06440304219722748, 0.2378954440355301, 0.09358810633420944, 0.24307624995708466, 0.22625915706157684], [0.18688960373401642, 0.6521251797676086, 0.05505351349711418, 0.05518023297190666, 0.07190049439668655, 0.15721110999584198, 0.11867944896221161, 0.2974295914173126, 0.018550140783190727, 0.1645369827747345, 0.09910324215888977, 0.499615877866745, 0.34706613421440125, 0.5406060218811035, 0.24014075100421906], [0.24844318628311157, 0.24823600053787231, 0.41713690757751465, 0.05438315495848656, 0.5823535323143005, 0.1801777333021164, 0.13823869824409485, 0.16278210282325745, 0.035736992955207825, 0.017554355785250664, 0.03778500482439995, 0.09959819167852402, 0.18642207980155945, 0.26950401067733765, 0.24913227558135986], [0.21744470298290253, 0.04392259195446968, 0.5108200907707214, 0.27167755365371704, 0.5572997331619263, 0.30860280990600586, 0.5083038210868835, 0.6815038919448853, 0.3754148483276367, 0.01992654800415039, 0.0589066781103611, 0.07934294641017914, 0.15649113059043884, 0.3772245943546295, 0.25267744064331055], [0.11088164150714874, 0.06568774580955505, 0.49295517802238464, 0.06175035238265991, 0.3928946256637573, 0.306259423494339, 0.1265336275100708, 0.29877781867980957, 0.061930101364851, 0.053618840873241425, 0.02546272985637188, 0.011733881197869778, 0.4200928509235382, 0.25557151436805725, 0.12701815366744995], [0.06005493924021721, 0.46575742959976196, 0.4922090172767639, 0.06956527382135391, 0.3788193464279175, 0.21330630779266357, 0.06565267592668533, 0.10461793839931488, 0.1200915202498436, 0.07597928494215012, 0.08451344817876816, 0.06952610611915588, 0.03487509861588478, 0.12158560007810593, 0.14820002019405365], [0.11028759926557541, 0.4027779996395111, 0.8237467408180237, 0.1328621804714203, 0.7811888456344604, 0.5416622757911682, 0.16887041926383972, 0.2001309096813202, 0.08848496526479721, 0.05607001483440399, 0.13165172934532166, 0.10739479213953018, 0.052385441958904266, 0.05461856350302696, 0.16259506344795227], [0.12960980832576752, 0.21605639159679413, 0.13754284381866455, 0.0687912181019783, 0.2001095861196518, 0.7652902007102966, 0.3308810591697693, 0.3389359712600708, 0.07430214434862137, 0.036511119455099106, 0.010612682439386845, 0.005050503648817539, 0.1584991067647934, 0.036481909453868866, 0.18724960088729858], [0.16838932037353516, 0.47491130232810974, 0.21776747703552246, 0.05912807583808899, 0.16565343737602234, 0.34125030040740967, 0.2414778620004654, 0.28169524669647217, 0.03973108157515526, 0.03921183571219444, 0.02238578163087368, 0.02449338510632515, 0.05498792976140976, 0.03159895911812782, 0.17659053206443787], [0.14295107126235962, 0.27777984738349915, 0.30436068773269653, 0.03198731318116188, 0.38494178652763367, 0.27411460876464844, 0.18790900707244873, 0.29966217279434204, 0.029011890292167664, 0.012050352990627289, 0.008839968591928482, 0.009298003278672695, 0.09229473769664764, 0.05935056507587433, 0.2074589878320694], [0.185210719704628, 0.0802093893289566, 0.4863169491291046, 0.24164138734340668, 0.5185936689376831, 0.381059467792511, 0.5372542142868042, 0.6922534108161926, 0.40473121404647827, 0.015452258288860321, 0.03550630062818527, 0.023993153125047684, 0.09803077578544617, 0.14391310513019562, 0.25199130177497864], [0.08245678246021271, 0.1390499472618103, 0.5461503863334656, 0.060220371931791306, 0.43899697065353394, 0.5144884586334229, 0.22183947265148163, 0.5088672041893005, 0.09321429580450058, 0.05354699492454529, 0.02214067056775093, 0.004303250927478075, 0.39110496640205383, 0.12463895231485367, 0.1568218618631363], [0.043030936270952225, 0.498334676027298, 0.5084810853004456, 0.06107298657298088, 0.3904430866241455, 0.35258427262306213, 0.08483341336250305, 0.17738159000873566, 0.1815967708826065, 0.09597334265708923, 0.08432064205408096, 0.040181081742048264, 0.02593160979449749, 0.08670566976070404, 0.14764654636383057], [0.0785449668765068, 0.4015392065048218, 0.8182658553123474, 0.10243776440620422, 0.7659414410591125, 0.5735372304916382, 0.16621330380439758, 0.21339072287082672, 0.12523002922534943, 0.05685745179653168, 0.1081186980009079, 0.07184037566184998, 0.02847907319664955, 0.031456008553504944, 0.15293413400650024], [0.07311940938234329, 0.15430475771427155, 0.1386927217245102, 0.04823235049843788, 0.20945730805397034, 0.8191487193107605, 0.33371293544769287, 0.3618466258049011, 0.1152336597442627, 0.031010858714580536, 0.008395140990614891, 0.002998974174261093, 0.13362915813922882, 0.02411211095750332, 0.1613900512456894], [0.2622520923614502, 0.7386532425880432, 0.41215938329696655, 0.08539438247680664, 0.7665934562683105, 0.5218235850334167, 0.42940571904182434, 0.4037780165672302, 0.7456067204475403, 0.07961834967136383, 0.02781907096505165, 0.02608557976782322, 0.15701159834861755, 0.05025498941540718, 0.11428551375865936]], [[0.5009713768959045, 0.11806200444698334, 0.543484628200531, 0.29247328639030457, 0.5261343717575073, 0.23446989059448242, 0.5474087595939636, 0.062012095004320145, 0.8189043998718262, 0.538780152797699, 0.6200674176216125, 0.43515679240226746, 0.24830776453018188, 0.341129869222641, 0.04290800169110298], [0.018064359202980995, 0.030848585069179535, 0.08071158826351166, 0.0676560178399086, 0.13447926938533783, 0.11551786214113235, 0.17043589055538177, 0.10128363966941833, 0.6618390679359436, 0.2855142652988434, 0.0971621423959732, 0.23388729989528656, 0.21859601140022278, 0.46025529503822327, 0.182326078414917], [0.04308566823601723, 0.03711610287427902, 0.06502576172351837, 0.10632220655679703, 0.09326566010713577, 0.08777783066034317, 0.3412204086780548, 0.6204424500465393, 0.8231819868087769, 0.09377399832010269, 0.1541169434785843, 0.21222646534442902, 0.11298450827598572, 0.15309588611125946, 0.11645805835723877], [0.07351326197385788, 0.05497964471578598, 0.07563240081071854, 0.32393333315849304, 0.057468246668577194, 0.2634526193141937, 0.3780488967895508, 0.7154850363731384, 0.7017503976821899, 0.20895157754421234, 0.29085400700569153, 0.06311048567295074, 0.03268700838088989, 0.14748480916023254, 0.03694311901926994], [0.15202973783016205, 0.07260382175445557, 0.07307075709104538, 0.01561899296939373, 0.03831832483410835, 0.04392734169960022, 0.07259247452020645, 0.03668325021862984, 0.315115749835968, 0.14016768336296082, 0.147903710603714, 0.09513753652572632, 0.08079177141189575, 0.04876280575990677, 0.1678115576505661], [0.20334205031394958, 0.03987862542271614, 0.2323523759841919, 0.08299659937620163, 0.11007620394229889, 0.049821991473436356, 0.05303451418876648, 0.020633194595575333, 0.20804192125797272, 0.621069610118866, 0.6013453006744385, 0.6998922824859619, 0.30664384365081787, 0.1810489445924759, 0.12484823167324066], [0.33830341696739197, 0.10967365652322769, 0.03348035365343094, 0.09579410403966904, 0.07735400646924973, 0.09874830394983292, 0.15181724727153778, 0.11190870404243469, 0.4600948095321655, 0.5270871520042419, 0.27297794818878174, 0.3748718500137329, 0.4609748125076294, 0.5019738078117371, 0.0790465772151947], [0.18835663795471191, 0.05185278132557869, 0.06106729805469513, 0.04512745887041092, 0.04466439411044121, 0.025852244347333908, 0.031750425696372986, 0.022515133023262024, 0.5077425837516785, 0.6734393835067749, 0.37964752316474915, 0.35936975479125977, 0.19831591844558716, 0.216437429189682, 0.2985125184059143], [0.5560556054115295, 0.47877317667007446, 0.15116584300994873, 0.40482252836227417, 0.04176756739616394, 0.04773563891649246, 0.13619393110275269, 0.07804162055253983, 0.07037016749382019, 0.5527278780937195, 0.486864298582077, 0.22204715013504028, 0.2625967860221863, 0.19855597615242004, 0.060070205479860306], [0.21585102379322052, 0.028776921331882477, 0.056070148944854736, 0.3207121789455414, 0.0078024002723395824, 0.016524065285921097, 0.3710367977619171, 0.14693383872509003, 0.12693363428115845, 0.6266815662384033, 0.6993157863616943, 0.5497558116912842, 0.14310741424560547, 0.3664083480834961, 0.047443971037864685], [0.28475576639175415, 0.10818006843328476, 0.08735410869121552, 0.329417884349823, 0.02252645045518875, 0.04752267897129059, 0.3733118176460266, 0.39454737305641174, 0.029050499200820923, 0.6059318780899048, 0.7311877012252808, 0.44807982444763184, 0.29598307609558105, 0.33838847279548645, 0.16424106061458588], [0.08968453854322433, 0.11453098803758621, 0.20413988828659058, 0.368092805147171, 0.07694120705127716, 0.048818718641996384, 0.12943927943706512, 0.036333490163087845, 0.04509947448968887, 0.25635746121406555, 0.2806471586227417, 0.5608395338058472, 0.1390012502670288, 0.28897786140441895, 0.04701472818851471], [0.05315335839986801, 0.017116300761699677, 0.1720367670059204, 0.3916313052177429, 0.05510414391756058, 0.2876152992248535, 0.22692401707172394, 0.14989952743053436, 0.3368622660636902, 0.0913245752453804, 0.3484038710594177, 0.3637443780899048, 0.007217096630483866, 0.103476881980896, 0.036375418305397034], [0.5125223994255066, 0.07351671159267426, 0.21591535210609436, 0.21059465408325195, 0.3288169205188751, 0.5466507077217102, 0.21618640422821045, 0.15017350018024445, 0.8681062459945679, 0.2442341297864914, 0.06865198910236359, 0.019835328683257103, 0.10077274590730667, 0.12228173017501831, 0.1682003289461136], [0.4846254289150238, 0.17620818316936493, 0.23995715379714966, 0.09631974995136261, 0.22585628926753998, 0.04512355476617813, 0.06700992584228516, 0.01503949984908104, 0.07369402050971985, 0.03452376648783684, 0.04930250719189644, 0.1451164036989212, 0.010093613527715206, 0.020862746983766556, 0.16003692150115967], [0.12189289927482605, 0.3658526837825775, 0.06606122851371765, 0.1638106107711792, 0.07819290459156036, 0.27624964714050293, 0.09599297493696213, 0.08126427978277206, 0.14055852591991425, 0.02327289618551731, 0.03783821687102318, 0.2963305115699768, 0.13405835628509521, 0.09205315262079239, 0.12166540324687958], [0.278896301984787, 0.1438806802034378, 0.46959513425827026, 0.3356979489326477, 0.3651174008846283, 0.1071292906999588, 0.18117688596248627, 0.20183299481868744, 0.29131460189819336, 0.13872042298316956, 0.021824011579155922, 0.06362087279558182, 0.34404000639915466, 0.13715140521526337, 0.1120462715625763], [0.2151702344417572, 0.2682046890258789, 0.2758127450942993, 0.20445802807807922, 0.06759822368621826, 0.058143485337495804, 0.21948587894439697, 0.1328936666250229, 0.04737214744091034, 0.09880322962999344, 0.06969184428453445, 0.0649414211511612, 0.09957331418991089, 0.08072139322757721, 0.15442174673080444], [0.10625648498535156, 0.3580685555934906, 0.2235240340232849, 0.2717205584049225, 0.14765356481075287, 0.1302592158317566, 0.182493656873703, 0.07402253895998001, 0.044094108045101166, 0.28373098373413086, 0.09141446650028229, 0.13240621984004974, 0.1622740924358368, 0.2716645896434784, 0.09359043836593628], [0.08181191235780716, 0.05183182656764984, 0.18780435621738434, 0.39972010254859924, 0.11086275428533554, 0.3443254232406616, 0.26716044545173645, 0.2157517671585083, 0.3917877972126007, 0.09846898168325424, 0.25891563296318054, 0.25942671298980713, 0.008535100147128105, 0.11220833659172058, 0.06895694881677628], [0.4507053792476654, 0.10277862101793289, 0.16431982815265656, 0.2027788907289505, 0.318918377161026, 0.4106469452381134, 0.24116744101047516, 0.1587350070476532, 0.8309358358383179, 0.2625651955604553, 0.047453198581933975, 0.009295494295656681, 0.07160880416631699, 0.07481760531663895, 0.19364440441131592], [0.5336673855781555, 0.18865860998630524, 0.19927646219730377, 0.10614699125289917, 0.21258802711963654, 0.035614922642707825, 0.07572873681783676, 0.021095039322972298, 0.08985494822263718, 0.061252057552337646, 0.05201297253370285, 0.10173538327217102, 0.008337927050888538, 0.017984798178076744, 0.15578274428844452], [0.11776354163885117, 0.337507039308548, 0.055947914719581604, 0.144154354929924, 0.09536269307136536, 0.2646341919898987, 0.10820504277944565, 0.0982295498251915, 0.1891198456287384, 0.027041049674153328, 0.03162495046854019, 0.2652260959148407, 0.10165920853614807, 0.07911970466375351, 0.1373925358057022], [0.20648452639579773, 0.10074114054441452, 0.42538517713546753, 0.26027214527130127, 0.3658106029033661, 0.09280957281589508, 0.23363487422466278, 0.27985435724258423, 0.3744349181652069, 0.1453229784965515, 0.02015594393014908, 0.05169985443353653, 0.3284047245979309, 0.12707991898059845, 0.12262601405382156], [0.019576620310544968, 0.03319034352898598, 0.0111849969252944, 0.010870445519685745, 0.03222370147705078, 0.13807591795921326, 0.0675833523273468, 0.0615379698574543, 0.013822048902511597, 0.008804764598608017, 0.004974161274731159, 0.01815059222280979, 0.1774466335773468, 0.06282598525285721, 0.15396134555339813]], [[0.07712388038635254, 0.042244281619787216, 0.004363007377833128, 0.0015959119191393256, 0.019252488389611244, 0.02118455246090889, 0.001846740604378283, 0.0012080060550943017, 0.0007866616360843182, 0.001261864323168993, 0.002815018408000469, 0.017323212698101997, 0.00286104716360569, 0.004067797679454088, 0.15733002126216888], [0.176344633102417, 0.3271441161632538, 0.08498391509056091, 0.04002806171774864, 0.06676299124956131, 0.008946515619754791, 0.012590638361871243, 0.0061616976745426655, 0.010515754111111164, 0.042563267052173615, 0.024306243285536766, 0.009260479360818863, 0.0002838150830939412, 0.0009972971165552735, 0.0829070582985878], [0.3345734477043152, 0.016792800277471542, 0.785018265247345, 0.16747814416885376, 0.3955724537372589, 0.09289640188217163, 0.041390396654605865, 0.004024161957204342, 0.04094661772251129, 0.023736434057354927, 0.20348279178142548, 0.041674140840768814, 0.012969214469194412, 0.03994787111878395, 0.04405270516872406], [0.027460135519504547, 0.0009503767942078412, 0.8045902252197266, 0.05251304432749748, 0.4111766219139099, 0.08071836084127426, 0.01928381621837616, 0.0005491983611136675, 0.029575586318969727, 0.001678029540926218, 0.033282194286584854, 0.007144003175199032, 0.012064780108630657, 0.008930332958698273, 0.0033295771572738886], [0.18455208837985992, 0.0566692017018795, 0.08522135764360428, 0.2798183560371399, 0.013304274529218674, 0.0006802850402891636, 0.09522412717342377, 0.0060977875255048275, 0.002369458321481943, 0.017453324049711227, 0.0036190226674079895, 2.9809654733981006e-05, 0.0002128492487827316, 0.0002820969675667584, 0.18610867857933044], [0.6536933779716492, 0.3485175371170044, 0.2007695585489273, 0.8106443881988525, 0.12433423846960068, 0.008092332631349564, 0.6807736158370972, 0.40895989537239075, 0.04516575112938881, 0.1387551873922348, 0.004862201400101185, 0.0003120531910099089, 0.00022667655139230192, 0.00031860917806625366, 0.07640787214040756], [0.08564082533121109, 0.05155009403824806, 0.10021068900823593, 0.5880905985832214, 0.0823356956243515, 0.0626063123345375, 0.7381499409675598, 0.566346287727356, 0.04188016802072525, 0.02469027414917946, 0.004355741199105978, 0.00042968738125637174, 2.4299803044414148e-05, 2.7212277927901596e-05, 0.001896930974908173], [0.03975995257496834, 0.012421448715031147, 0.08890707790851593, 0.605818510055542, 0.05048904940485954, 0.017510779201984406, 0.24702893197536469, 0.39587050676345825, 0.06098005548119545, 0.052625395357608795, 0.013424866832792759, 0.0005194320692680776, 0.000250102486461401, 0.0003063087642658502, 0.0010793216060847044], [0.11902385950088501, 0.011114073917269707, 0.22151720523834229, 0.2006509006023407, 0.03878694027662277, 0.01363028772175312, 0.3268369734287262, 0.04311302676796913, 0.8067907094955444, 0.34777864813804626, 0.25920552015304565, 0.09021251648664474, 0.035271789878606796, 0.0031717135570943356, 0.004271878860890865], [0.006270309444516897, 0.0001492560259066522, 0.00045137249981053174, 0.0007612273329868913, 7.476524478988722e-05, 0.013270817697048187, 0.04344405606389046, 0.014117085374891758, 0.6041488647460938, 0.07304701954126358, 0.010559855960309505, 0.0026350386906415224, 0.02638809196650982, 0.002994539914652705, 0.00020572090579662472], [0.002078789984807372, 0.000502656155731529, 0.00018232718866784126, 0.0008548289188183844, 0.0009249084978364408, 0.02029070071876049, 0.012032798491418362, 0.024348178878426552, 0.2300865352153778, 0.10343841463327408, 0.007660495117306709, 0.0012821657583117485, 0.0114271380007267, 0.0009412667131982744, 7.524124521296471e-05], [0.022463228553533554, 0.0013134862529113889, 0.00013891702110413462, 0.002816978842020035, 0.0011811865260824561, 0.0014538302784785628, 0.0005458829691633582, 0.0004073161107953638, 0.000992793939076364, 0.626685380935669, 0.1310541182756424, 0.1785772740840912, 0.1327074021100998, 0.014590581879019737, 3.459410072537139e-05], [0.004299411084502935, 0.00014757749158889055, 0.0013493087608367205, 0.003552102018147707, 0.004041418433189392, 0.004232631530612707, 0.00022051982523407787, 5.3625211876351386e-05, 0.008671559393405914, 0.2003454566001892, 0.2010745257139206, 0.20048564672470093, 0.327506959438324, 0.12215141952037811, 7.573522452730685e-05], [0.011497906409204006, 0.0014132088981568813, 0.002270179335027933, 0.006387166678905487, 5.5530636018374935e-05, 0.0020248510409146547, 0.0021348590962588787, 0.001147052156738937, 0.0024277162738144398, 0.3687064051628113, 0.5298402905464172, 0.006611559074372053, 0.3372868299484253, 0.2915361225605011, 0.0002606022753752768], [0.043351031839847565, 0.015730101615190506, 0.006545424461364746, 0.11301398277282715, 0.001535893650725484, 0.0002994980022776872, 0.002417969051748514, 0.0027875620871782303, 0.007663458585739136, 0.4366588592529297, 0.29866132140159607, 0.03879629448056221, 0.0005757116014137864, 0.10755223035812378, 0.15693426132202148], [0.05824243649840355, 0.00918568018823862, 0.004823020659387112, 0.12202360481023788, 0.001364732626825571, 0.009540650062263012, 0.017077280208468437, 0.02250218391418457, 0.031557418406009674, 0.39489659667015076, 0.4118596911430359, 0.4739699363708496, 0.04330656677484512, 0.22410848736763, 0.009354491718113422], [0.10114194452762604, 0.055991608649492264, 0.0056193675845861435, 0.044799599796533585, 0.005612906999886036, 0.0018076150445267558, 0.0035521595273166895, 0.003050913568586111, 0.014126029796898365, 0.18568304181098938, 0.044660091400146484, 0.8178999423980713, 0.12312521040439606, 0.22830259799957275, 0.0015339198289439082], [0.17329555749893188, 0.022842630743980408, 0.03050464764237404, 0.3040459156036377, 0.023058682680130005, 0.05675753578543663, 0.012084487825632095, 0.018060212954878807, 0.012510768137872219, 0.4205268621444702, 0.403047114610672, 0.5196431279182434, 0.14466160535812378, 0.15726853907108307, 0.003281315555796027], [0.21814380586147308, 0.013853680342435837, 0.0011839027283713222, 0.02006133459508419, 0.0059941732324659824, 0.004335244186222553, 0.0006587213138118386, 0.0008069095201790333, 6.766151636838913e-05, 0.4439576268196106, 0.16648612916469574, 0.7347545623779297, 0.19459886848926544, 0.05657987296581268, 0.0006026092451065779], [0.034262340515851974, 0.0017182001611217856, 0.005656392779201269, 0.017169898375868797, 0.0156857930123806, 0.01468763966113329, 0.0007699507405050099, 0.00017933807976078242, 0.002019587904214859, 0.09474337100982666, 0.21286551654338837, 0.39837440848350525, 0.44769343733787537, 0.30061447620391846, 0.0009720441303215921], [0.1974877417087555, 0.05350746586918831, 0.02080627717077732, 0.07140190154314041, 0.0007820951868779957, 0.021851971745491028, 0.023295408114790916, 0.011020028032362461, 0.0015720969531685114, 0.3204348385334015, 0.5890824198722839, 0.011122598312795162, 0.40923523902893066, 0.5521805882453918, 0.009284045547246933], [0.04384012520313263, 0.020103074610233307, 0.00601673498749733, 0.10121199488639832, 0.0015372235793620348, 0.00047879578778520226, 0.0028034253045916557, 0.0035304632037878036, 0.0019347126362845302, 0.15543726086616516, 0.10060140490531921, 0.012154079042375088, 0.00020098914683330804, 0.049742307513952255, 0.15931616723537445], [0.33183732628822327, 0.07794758677482605, 0.02364480309188366, 0.3878714144229889, 0.007764760870486498, 0.055411770939826965, 0.07855504751205444, 0.09397301822900772, 0.02721172571182251, 0.38145557045936584, 0.42047446966171265, 0.5078706741333008, 0.03859835863113403, 0.25985077023506165, 0.0625251829624176], [0.4473247230052948, 0.3730325996875763, 0.029895052313804626, 0.15908104181289673, 0.02762797847390175, 0.008889964781701565, 0.016516737639904022, 0.012883803807199001, 0.01523641124367714, 0.22003965079784393, 0.05771813541650772, 0.8456536531448364, 0.1770154982805252, 0.31127816438674927, 0.007925343699753284], [0.2188224196434021, 0.06026163697242737, 0.01674255169928074, 0.1205059364438057, 0.017392028123140335, 0.033714599907398224, 0.013199009001255035, 0.035441260784864426, 0.006878681946545839, 0.5097362399101257, 0.5390803217887878, 0.7098195552825928, 0.20610427856445312, 0.34404870867729187, 0.06464894115924835]], [[0.24012988805770874, 0.6692726612091064, 0.08029869198799133, 0.41845017671585083, 0.08128808438777924, 0.09738753736019135, 0.15100885927677155, 0.2691691815853119, 0.013517879880964756, 0.21848294138908386, 0.16758716106414795, 0.12734578549861908, 0.32224464416503906, 0.12471552193164825, 0.07385692000389099], [0.13747748732566833, 0.012865100987255573, 0.3056560158729553, 0.3759651184082031, 0.20075583457946777, 0.056869279593229294, 0.27502477169036865, 0.09038521349430084, 0.09535539150238037, 0.27579623460769653, 0.15189220011234283, 0.6071571111679077, 0.0820951759815216, 0.09481122344732285, 0.09779953956604004], [0.007538634352385998, 0.02957071363925934, 0.011847163550555706, 0.055522944778203964, 0.04100131243467331, 0.031534671783447266, 0.06567902117967606, 0.09044305235147476, 0.007193693891167641, 0.06334451586008072, 0.07378207892179489, 0.07786792516708374, 0.28214019536972046, 0.08070375770330429, 0.20607011020183563], [0.005881547927856445, 0.008371960371732712, 0.010823756456375122, 0.024797217920422554, 0.024142105132341385, 0.01083815935999155, 0.008304014801979065, 0.006388344801962376, 0.009114595130085945, 0.022048065438866615, 0.1306026130914688, 0.23451638221740723, 0.3918500244617462, 0.08784151822328568, 0.2650633752346039], [0.20629070699214935, 0.2529377341270447, 0.028870999813079834, 0.049127642065286636, 0.04690879210829735, 0.11594393104314804, 0.15515393018722534, 0.06585636734962463, 0.0420556403696537, 0.1996643990278244, 0.028717953711748123, 0.7190893292427063, 0.30376943945884705, 0.22654840350151062, 0.12926629185676575], [0.01586613617837429, 0.15566423535346985, 0.015082520432770252, 0.009204044006764889, 0.002680863719433546, 0.07106906920671463, 0.08370621502399445, 0.05749649554491043, 0.03059268370270729, 0.012942377477884293, 0.0011753733269870281, 0.00916373822838068, 0.0020018015056848526, 0.049308281391859055, 0.19197486340999603], [0.03849078342318535, 0.08146823942661285, 0.03517843410372734, 0.025976145640015602, 0.02364599145948887, 0.1389763057231903, 0.02619975060224533, 0.034312427043914795, 0.02985706366598606, 0.029806064441800117, 0.00684476038441062, 0.03280223533511162, 0.030126189813017845, 0.10321015119552612, 0.23163792490959167], [0.2772977352142334, 0.05161405727267265, 0.04358568787574768, 0.047931231558322906, 0.04583681374788284, 0.08128579705953598, 0.15782645344734192, 0.0856042429804802, 0.10767779499292374, 0.11355230212211609, 0.041377030313014984, 0.252811074256897, 0.05780917406082153, 0.19973745942115784, 0.22427907586097717], [0.023119861260056496, 0.02037731558084488, 0.0453791618347168, 0.1060030460357666, 0.006244942545890808, 0.0085020512342453, 0.012060720473527908, 0.014560479670763016, 0.00689319521188736, 0.011241135187447071, 0.023835573345422745, 0.02693312056362629, 0.011436404660344124, 0.019489392638206482, 0.30997538566589355], [0.045414164662361145, 0.005229660775512457, 0.011418518610298634, 0.009312640875577927, 0.0002147085906472057, 0.12653864920139313, 0.05854451283812523, 0.11896014213562012, 0.0156405046582222, 0.010270207189023495, 0.0032450463622808456, 0.015787174925208092, 0.011106730438768864, 0.007675709668546915, 0.3779195249080658], [0.007367350626736879, 0.012884993106126785, 0.01019106525927782, 0.011957473121583462, 0.054886650294065475, 0.09750530868768692, 0.029414953663945198, 0.08492925018072128, 0.17440666258335114, 0.003643231000751257, 0.00105402956251055, 0.02280060388147831, 0.0010922637302428484, 0.005130939185619354, 0.09500079602003098], [0.02996714971959591, 0.028387926518917084, 0.16122521460056305, 0.0898616760969162, 0.06381779164075851, 0.20551051199436188, 0.13175098598003387, 0.562389075756073, 0.04834860563278198, 0.013581722043454647, 0.03991095721721649, 0.10736902058124542, 0.03830268979072571, 0.05736052244901657, 0.27213579416275024], [0.03571658954024315, 0.012061648070812225, 0.08574458211660385, 0.022463832050561905, 0.12578466534614563, 0.07826194912195206, 0.06577891856431961, 0.13274507224559784, 0.06591502577066422, 0.05002211779356003, 0.03129255399107933, 0.27911075949668884, 0.31601372361183167, 0.10930214822292328, 0.30993908643722534], [0.04630875587463379, 0.03141915798187256, 0.03061339072883129, 0.007028677500784397, 0.008451082743704319, 0.02540888637304306, 0.012118873186409473, 0.09331455826759338, 0.0033372503239661455, 0.01357665192335844, 0.0069510783068835735, 0.017483821138739586, 0.033454760909080505, 0.014270796440541744, 0.44127020239830017], [0.1722828894853592, 0.15122008323669434, 0.056102070957422256, 0.09136570990085602, 0.02421834133565426, 0.045343294739723206, 0.034619707614183426, 0.030837759375572205, 0.019798463210463524, 0.04411705583333969, 0.05331422761082649, 0.09423463046550751, 0.1436629444360733, 0.13433872163295746, 0.1229754090309143], [0.022473091259598732, 0.0489150770008564, 0.010993139818310738, 0.03897916153073311, 0.003662768052890897, 0.002051829593256116, 0.0037445707712322474, 0.016557298600673676, 0.014907213859260082, 0.004300208762288094, 0.004852794576436281, 0.0027131394017487764, 0.016001524403691292, 0.008091894909739494, 0.25544992089271545], [0.08012817800045013, 0.2898695766925812, 0.022246699780225754, 0.06057273969054222, 0.025327028706669807, 0.02957070618867874, 0.04002644121646881, 0.019245512783527374, 0.01995179057121277, 0.020330116152763367, 0.006697094067931175, 0.015452835708856583, 0.014569609425961971, 0.04013357311487198, 0.2585589587688446], [0.01832924410700798, 0.023918962106108665, 0.024782713502645493, 0.033514510840177536, 0.050549402832984924, 0.013098560273647308, 0.023091215640306473, 0.030541786924004555, 0.1064886748790741, 0.006106832530349493, 0.0024854408111423254, 0.018918434157967567, 0.0075035663321614265, 0.009370497427880764, 0.21452490985393524], [0.027254067361354828, 0.020437292754650116, 0.14233240485191345, 0.08538791537284851, 0.03242940828204155, 0.0897425189614296, 0.08476056158542633, 0.2620556950569153, 0.02126460149884224, 0.023079702630639076, 0.03143052011728287, 0.04489685967564583, 0.046720463782548904, 0.03604652360081673, 0.23038896918296814], [0.042377930134534836, 0.017293933779001236, 0.08730384707450867, 0.030179454013705254, 0.12187745422124863, 0.05139933153986931, 0.047754548490047455, 0.066692054271698, 0.06521614640951157, 0.05196157470345497, 0.028108397498726845, 0.17703385651111603, 0.22747749090194702, 0.06955988705158234, 0.28824013471603394], [0.03372317552566528, 0.030876630917191505, 0.025082340463995934, 0.008588657714426517, 0.007454049773514271, 0.009771045297384262, 0.010381288826465607, 0.041183773428201675, 0.004549690056592226, 0.01619204692542553, 0.0060179769061505795, 0.009672058746218681, 0.022905999794602394, 0.009750566445291042, 0.30946746468544006], [0.18900562822818756, 0.14908763766288757, 0.05840699374675751, 0.10216160118579865, 0.03072887472808361, 0.04109037667512894, 0.03799780085682869, 0.02909342385828495, 0.03500371053814888, 0.0757574513554573, 0.061073921620845795, 0.09956928342580795, 0.10441071540117264, 0.14136889576911926, 0.13095542788505554], [0.014150185510516167, 0.03789284825325012, 0.007744992151856422, 0.02556411363184452, 0.0037681234534829855, 0.001123085618019104, 0.002939486177638173, 0.010072565637528896, 0.019109029322862625, 0.003645692951977253, 0.0027771664317697287, 0.002490789396688342, 0.007166225463151932, 0.005180294159799814, 0.2058444321155548], [0.0469474196434021, 0.1743137687444687, 0.021908296272158623, 0.046387769281864166, 0.02985612489283085, 0.019742406904697418, 0.040140021592378616, 0.01437240932136774, 0.02856219932436943, 0.018488112837076187, 0.004136314615607262, 0.01038376335054636, 0.009851893410086632, 0.026245350018143654, 0.22488054633140564], [0.00832295510917902, 0.021339448168873787, 0.00394090311601758, 0.002333499025553465, 0.05547437444329262, 0.007243151310831308, 0.011641105636954308, 0.0331541933119297, 0.010278979316353798, 0.011881710961461067, 0.001766148954629898, 0.04899042472243309, 0.01878243498504162, 0.01244808267802, 0.15685127675533295]]], [[[0.04773104563355446, 0.01963546872138977, 0.16452182829380035, 0.04063690826296806, 0.1849776655435562, 0.08088860660791397, 0.11659693717956543, 0.038044340908527374, 0.2744975686073303, 0.003083554795011878, 0.019721103832125664, 0.08137688785791397, 0.0169991385191679, 0.03939461708068848, 0.14168404042720795], [0.09676018357276917, 0.018249453976750374, 0.657112717628479, 0.5890088677406311, 0.5712416768074036, 0.2744671702384949, 0.48642322421073914, 0.26345524191856384, 0.23708243668079376, 0.03475205600261688, 0.15204745531082153, 0.0676480308175087, 0.050043635070323944, 0.0665324404835701, 0.036993421614170074], [0.04065309092402458, 0.0025235058274120092, 0.11838234961032867, 0.27863210439682007, 0.37560757994651794, 0.7046668529510498, 0.12516380846500397, 0.1912177950143814, 0.14992743730545044, 0.05949303135275841, 0.056387268006801605, 0.04353337734937668, 0.17471297085285187, 0.07017815858125687, 0.12025584280490875], [0.015422305092215538, 0.000844803755171597, 0.015767300501465797, 0.11098357290029526, 0.273564875125885, 0.3235251009464264, 0.14805495738983154, 0.17132841050624847, 0.25568780303001404, 0.034506767988204956, 0.046862825751304626, 0.03818853572010994, 0.025031423196196556, 0.027911247685551643, 0.009120252914726734], [0.01866327039897442, 0.11290711164474487, 0.007440958172082901, 0.031009642407298088, 0.059622399508953094, 0.035299621522426605, 0.012064317241311073, 0.17540854215621948, 0.06399405747652054, 0.010346408933401108, 0.023967623710632324, 0.006549614481627941, 0.015476463362574577, 0.017944032326340675, 0.15624091029167175], [0.115133136510849, 0.5564319491386414, 0.0024013265501707792, 0.014839398674666882, 0.027623601257801056, 0.003712957026436925, 0.11139625310897827, 0.4320802688598633, 0.18111301958560944, 0.025198934599757195, 0.05914938822388649, 0.029404014348983765, 0.1131783202290535, 0.1630096137523651, 0.14384765923023224], [0.047323077917099, 0.01987922191619873, 0.021367410197854042, 0.0816798061132431, 0.11104802042245865, 0.01310664601624012, 0.37855657935142517, 0.16697411239147186, 0.31461480259895325, 0.04616151005029678, 0.27547621726989746, 0.04939346760511398, 0.02232075110077858, 0.15515512228012085, 0.01579722762107849], [0.13229456543922424, 0.031869739294052124, 0.26943540573120117, 0.2586674690246582, 0.3796730637550354, 0.127562016248703, 0.20277942717075348, 0.05910756066441536, 0.14354895055294037, 0.08293455094099045, 0.2214740365743637, 0.23150987923145294, 0.18035069108009338, 0.2860051393508911, 0.07895194739103317], [0.09224988520145416, 0.07457923144102097, 0.05282874405384064, 0.09438028931617737, 0.06849074363708496, 0.012997711077332497, 0.007214613724499941, 0.004257954657077789, 0.2309093326330185, 0.38276976346969604, 0.5917518734931946, 0.7830951809883118, 0.8438952565193176, 0.7586230039596558, 0.04145537316799164], [0.014161140657961369, 0.027171263471245766, 0.0029068312142044306, 0.020549731329083443, 0.0005743438960053027, 0.00417140731588006, 0.003657599212601781, 0.00956815481185913, 0.34446486830711365, 0.5171273946762085, 0.39057764410972595, 0.2845093309879303, 0.1669711321592331, 0.5306525230407715, 0.015455210581421852], [0.02566671371459961, 0.00907080341130495, 0.0006065603229217231, 0.03001752682030201, 0.00023783017240930349, 0.0005533608491532505, 0.013808660209178925, 0.003767948364838958, 0.06461481004953384, 0.1359771490097046, 0.08153439313173294, 0.572087287902832, 0.36045318841934204, 0.44234389066696167, 0.0030113777611404657], [0.03087739646434784, 0.012099061161279678, 0.004942088853567839, 0.038267359137535095, 0.0023591304197907448, 0.0037323227152228355, 0.04966888204216957, 0.012427400797605515, 0.16158415377140045, 0.020882699638605118, 0.05600592866539955, 0.367767333984375, 0.24262923002243042, 0.38281354308128357, 0.00973587203770876], [0.04249054566025734, 0.0069285486824810505, 0.006088858004659414, 0.044397544115781784, 0.05390672758221626, 0.006144464481621981, 0.018320903182029724, 0.01545354351401329, 0.05193139612674713, 0.03221629932522774, 0.02379259280860424, 0.27246853709220886, 0.22103002667427063, 0.23179520666599274, 0.005589436274021864], [0.04184036701917648, 0.03700190782546997, 0.008264865726232529, 0.02439146116375923, 0.00799429602921009, 0.12502151727676392, 0.05032283812761307, 0.18101848661899567, 0.07329469919204712, 0.08409427851438522, 0.10790428519248962, 0.011960207484662533, 0.20496119558811188, 0.19276422262191772, 0.0069670299999415874], [0.06364590674638748, 0.06483624875545502, 0.015260975807905197, 0.1278582364320755, 0.006228389218449593, 0.02756887674331665, 0.020600903779268265, 0.015440343879163265, 0.018087223172187805, 0.017098410055041313, 0.025406692177057266, 0.0007098353235051036, 0.00014885497512295842, 0.0013503700029104948, 0.15608660876750946], [0.6220619678497314, 0.6306124329566956, 0.6737340092658997, 0.49940165877342224, 0.1517823040485382, 0.8503586649894714, 0.705633282661438, 0.6629571914672852, 0.11157920956611633, 0.39899003505706787, 0.3173867464065552, 0.027327625080943108, 0.014980590902268887, 0.009274562820792198, 0.08523338288068771], [0.15005189180374146, 0.04609784111380577, 0.17501141130924225, 0.21113994717597961, 0.26919078826904297, 0.6422000527381897, 0.7493206858634949, 0.2162598967552185, 0.010351919569075108, 0.09728528559207916, 0.09688232094049454, 0.028558582067489624, 0.10305432975292206, 0.05914681404829025, 0.11260810494422913], [0.09041088819503784, 0.052050016820430756, 0.08856991678476334, 0.2977358102798462, 0.04025371000170708, 0.3506464660167694, 0.6434463858604431, 0.25059518218040466, 0.01933867670595646, 0.04819375276565552, 0.07508239895105362, 0.04970608279109001, 0.02890131063759327, 0.02355407178401947, 0.12558245658874512], [0.18765486776828766, 0.021713200956583023, 0.21844394505023956, 0.3042432367801666, 0.17823228240013123, 0.1673380434513092, 0.8088975548744202, 0.46762967109680176, 0.05706785246729851, 0.009645337238907814, 0.0322297103703022, 0.09777479618787766, 0.08048812299966812, 0.10106904059648514, 0.17228879034519196], [0.4792143702507019, 0.09839366376399994, 0.1882246881723404, 0.4093988239765167, 0.7147246599197388, 0.24897223711013794, 0.4705742597579956, 0.4205995500087738, 0.01958448253571987, 0.026842152699828148, 0.02239188365638256, 0.15106931328773499, 0.08969185501337051, 0.10003618896007538, 0.1635625958442688], [0.40625429153442383, 0.3796224594116211, 0.2515096962451935, 0.36165565252304077, 0.24774380028247833, 0.8824228644371033, 0.8048573136329651, 0.857955813407898, 0.058371078222990036, 0.07109472155570984, 0.11402199417352676, 0.0021524245385080576, 0.019929109141230583, 0.030590593814849854, 0.11712031066417694], [0.04390633478760719, 0.032843075692653656, 0.010515165515244007, 0.11869800090789795, 0.005461697466671467, 0.023131608963012695, 0.01705162413418293, 0.008547519333660603, 0.003713170997798443, 0.008410640992224216, 0.009457322768867016, 0.00015943740436341614, 3.361727431183681e-05, 0.0002994383394252509, 0.1532706469297409], [0.6348351836204529, 0.5127235651016235, 0.5931673645973206, 0.5543242692947388, 0.12377271056175232, 0.8264753222465515, 0.6941898465156555, 0.5687963962554932, 0.03150533139705658, 0.12843358516693115, 0.11884576827287674, 0.005231617949903011, 0.0018767286092042923, 0.0011644444894045591, 0.11210005730390549], [0.10790421068668365, 0.016916295513510704, 0.09771728515625, 0.22749783098697662, 0.26325535774230957, 0.49138790369033813, 0.6275916695594788, 0.08931886404752731, 0.0033968419302254915, 0.024402111768722534, 0.018104346469044685, 0.003288157982751727, 0.010537534020841122, 0.006979967001825571, 0.12102893739938736], [0.028179557994008064, 0.011468129232525826, 0.016789404675364494, 0.00803140178322792, 0.00952040497213602, 0.02960360422730446, 0.24957160651683807, 0.03544437885284424, 0.005487674381583929, 0.0028927521780133247, 0.005656986031681299, 0.0040698484517633915, 0.04730471968650818, 0.0667993351817131, 0.1372966766357422]], [[0.11859580129384995, 0.07486707717180252, 0.21083025634288788, 0.32276296615600586, 0.08426652103662491, 0.03581860288977623, 0.24113436043262482, 0.608397364616394, 0.13584911823272705, 0.45509204268455505, 0.594833254814148, 0.30372148752212524, 0.8448506593704224, 0.7470672726631165, 0.09252076596021652], [0.04140070080757141, 0.00858838576823473, 0.11639615148305893, 0.1280786097049713, 0.2722368836402893, 0.21025919914245605, 0.4195333421230316, 0.631318211555481, 0.6560773253440857, 0.29341432452201843, 0.6862512230873108, 0.7675639986991882, 0.8915717005729675, 0.8601328730583191, 0.23356862366199493], [0.23441848158836365, 0.1666196584701538, 0.16664288938045502, 0.25857093930244446, 0.13334479928016663, 0.17917701601982117, 0.8257887363433838, 0.7395779490470886, 0.6802234053611755, 0.8125103712081909, 0.671615719795227, 0.8831866383552551, 0.6773648858070374, 0.7102506160736084, 0.08689045161008835], [0.24967892467975616, 0.48421844840049744, 0.036505091935396194, 0.17128480970859528, 0.01777578890323639, 0.09479225426912308, 0.36135032773017883, 0.0868472084403038, 0.16740600764751434, 0.523710310459137, 0.24439233541488647, 0.42307958006858826, 0.6259368062019348, 0.3662186563014984, 0.20058651268482208], [0.28931790590286255, 0.4439229369163513, 0.24370647966861725, 0.6020305752754211, 0.17363131046295166, 0.338454008102417, 0.5701692700386047, 0.33999428153038025, 0.68463534116745, 0.8701388239860535, 0.7831944823265076, 0.9611375331878662, 0.9679895043373108, 0.9072677493095398, 0.0468842089176178], [0.1225743219256401, 0.062406159937381744, 0.03387807682156563, 0.02868799865245819, 0.01787530817091465, 0.04143121838569641, 0.5920179486274719, 0.08798510581254959, 0.2968905568122864, 0.7129084467887878, 0.4609105885028839, 0.29060137271881104, 0.7909923791885376, 0.5701599717140198, 0.13614380359649658], [0.0705394446849823, 0.02209068462252617, 0.0211530439555645, 0.008882923051714897, 0.0033682750072330236, 0.08319123089313507, 0.11070933192968369, 0.0025125632528215647, 0.10380591452121735, 0.17744502425193787, 0.10391969978809357, 0.12427430599927902, 0.5562515258789062, 0.49710196256637573, 0.3223192095756531], [0.15847322344779968, 0.015464702621102333, 0.13866224884986877, 0.053395166993141174, 0.03494010120630264, 0.13738934695720673, 0.02684560976922512, 0.03214175999164581, 0.5759801864624023, 0.1755424290895462, 0.13409779965877533, 0.035038210451602936, 0.6489107012748718, 0.4460716247558594, 0.4074119031429291], [0.00857736449688673, 0.012718217447400093, 0.01174219325184822, 0.012934550642967224, 0.006551709491759539, 0.24597492814064026, 0.030029013752937317, 0.05923602730035782, 0.04650798439979553, 0.02447274886071682, 0.019859377294778824, 0.003505804343149066, 0.04937520623207092, 0.05625420808792114, 0.28037816286087036], [0.0015372766647487879, 0.015295127406716347, 0.018696704879403114, 0.004789609462022781, 0.19481690227985382, 0.04769033566117287, 0.01355075929313898, 0.02196505106985569, 0.08700259774923325, 0.020393503829836845, 0.02400771528482437, 0.18789233267307281, 0.15418098866939545, 0.08713112771511078, 0.19334079325199127], [0.04759770259261131, 0.04375501722097397, 0.02714523859322071, 0.05194481834769249, 0.05246514454483986, 0.14355513453483582, 0.17152011394500732, 0.14246520400047302, 0.1098044142127037, 0.013531663455069065, 0.008927365764975548, 0.03807468339800835, 0.10050502419471741, 0.02236531302332878, 0.3381733298301697], [0.10647730529308319, 0.04246760904788971, 0.08123224973678589, 0.13003453612327576, 0.07854175567626953, 0.24148082733154297, 0.6790831685066223, 0.7492273449897766, 0.28685522079467773, 0.03681188449263573, 0.15954196453094482, 0.2672117054462433, 0.11099980026483536, 0.04468434303998947, 0.4826459586620331], [0.2962004542350769, 0.47284576296806335, 0.11245852708816528, 0.23689918220043182, 0.10807513445615768, 0.8532499074935913, 0.5788733959197998, 0.6375027894973755, 0.33168625831604004, 0.06381742656230927, 0.004373080097138882, 0.015940984711050987, 0.3371734917163849, 0.06828418374061584, 0.21185840666294098], [0.3828115463256836, 0.12613584101200104, 0.47516295313835144, 0.4473835527896881, 0.17031393945217133, 0.6938255429267883, 0.7945614457130432, 0.34594833850860596, 0.5323623418807983, 0.34808266162872314, 0.11382761597633362, 0.1349307745695114, 0.013382190838456154, 0.0600610226392746, 0.30783677101135254], [0.7362364530563354, 0.8323087096214294, 0.9336822032928467, 0.7739728689193726, 0.8897883296012878, 0.9609381556510925, 0.9334329962730408, 0.9553548693656921, 0.7747710943222046, 0.4005538523197174, 0.5586770176887512, 0.25099167227745056, 0.4200068712234497, 0.1631680577993393, 0.06528117507696152], [0.07449624687433243, 0.061402805149555206, 0.09389828145503998, 0.048646457493305206, 0.024208296090364456, 0.10819891840219498, 0.10563155263662338, 0.1243496686220169, 0.048523951321840286, 0.14693649113178253, 0.06614942103624344, 0.0066792843863368034, 0.2858017086982727, 0.04383772611618042, 0.15409637987613678], [0.02467108517885208, 0.049052223563194275, 0.08135215938091278, 0.013768618926405907, 0.01176412496715784, 0.15210841596126556, 0.004693970084190369, 0.0041237217374145985, 0.018837640061974525, 0.03490369766950607, 0.036496780812740326, 0.0011750683188438416, 0.018557026982307434, 0.02382473833858967, 0.22122804820537567], [0.012043171562254429, 0.03080524504184723, 0.02248452790081501, 0.008785543963313103, 0.00550604984164238, 0.05614035204052925, 0.015958979725837708, 0.01727765053510666, 0.03423915058374405, 0.017799094319343567, 0.029912255704402924, 0.01144923735409975, 0.09533664584159851, 0.02436906285583973, 0.20283196866512299], [0.01959865354001522, 0.003073114436119795, 0.06498773396015167, 0.027286570519208908, 0.019540993496775627, 0.052237618714571, 0.08713454008102417, 0.28957968950271606, 0.3906492590904236, 0.044482238590717316, 0.17143161594867706, 0.1301742047071457, 0.10445850342512131, 0.03699616342782974, 0.2442801147699356], [0.11208802461624146, 0.11668127030134201, 0.09828943759202957, 0.10754654556512833, 0.015885351225733757, 0.38998937606811523, 0.183034285902977, 0.3230077624320984, 0.20506803691387177, 0.08733018487691879, 0.007069121580570936, 0.010435528121888638, 0.30221423506736755, 0.047303054481744766, 0.19994190335273743], [0.1682588905096054, 0.051582805812358856, 0.4415716230869293, 0.2735750675201416, 0.07878735661506653, 0.06776249408721924, 0.15038572251796722, 0.03211068734526634, 0.6709542274475098, 0.37688353657722473, 0.1879340261220932, 0.04096703231334686, 0.011627858504652977, 0.03471425548195839, 0.19384095072746277], [0.8205305933952332, 0.9214023947715759, 0.9559677839279175, 0.7988566160202026, 0.9105063080787659, 0.9672437906265259, 0.9506043195724487, 0.9735420346260071, 0.9064961075782776, 0.6156813502311707, 0.6370130777359009, 0.18943972885608673, 0.3681671619415283, 0.1194160059094429, 0.08283783495426178], [0.10534824430942535, 0.08027994632720947, 0.1381307989358902, 0.07063161581754684, 0.01806548424065113, 0.10409632325172424, 0.12885765731334686, 0.2072904407978058, 0.09267445653676987, 0.23836983740329742, 0.11645739525556564, 0.006059943698346615, 0.1595546454191208, 0.017974214628338814, 0.14464683830738068], [0.026579611003398895, 0.02949470281600952, 0.04954056441783905, 0.017031243070960045, 0.008355016820132732, 0.09075918793678284, 0.0036468924954533577, 0.0022332987282425165, 0.050134338438510895, 0.049380820244550705, 0.028885982930660248, 0.0007559077348560095, 0.015549316070973873, 0.013319555670022964, 0.1734825074672699], [0.05047497898340225, 0.027197130024433136, 0.11470095813274384, 0.007973222993314266, 0.12679167091846466, 0.4866730570793152, 0.17132264375686646, 0.15032453835010529, 0.14889459311962128, 0.01696154847741127, 0.0735161080956459, 0.0034290377516299486, 0.05194668471813202, 0.06144191324710846, 0.13309471309185028]], [[0.005987181328237057, 0.0011158415582031012, 0.0026756690349429846, 0.0011391430161893368, 0.0021053741220384836, 0.0005449134623631835, 0.0017384873935952783, 0.000736464629881084, 0.00014482461847364902, 0.0008784460369497538, 0.0008941806154325604, 0.0009559267782606184, 0.00015614555741194636, 0.00044419756159186363, 0.16329224407672882], [0.3448674976825714, 0.07203025370836258, 0.011963781900703907, 0.012941744178533554, 0.011539866216480732, 0.003333584638312459, 0.005511423572897911, 0.0016478801844641566, 0.003020848147571087, 0.006189296022057533, 0.0020935258362442255, 0.00048376841004937887, 8.994764357339591e-05, 0.00040787423495203257, 0.2113737165927887], [0.44219815731048584, 0.8124432563781738, 0.1900549679994583, 0.3808274269104004, 0.045300956815481186, 0.024617541581392288, 0.0172295980155468, 0.03488133102655411, 0.004235385917127132, 0.05999733507633209, 0.03787413239479065, 0.0011567235924303532, 0.0017442036187276244, 0.008845857344567776, 0.004224383272230625], [0.07874103635549545, 0.02866651676595211, 0.3287397623062134, 0.27984437346458435, 0.10563887655735016, 0.003691220423206687, 0.005916049238294363, 0.0007406381191685796, 0.0005066083394922316, 0.0481056272983551, 0.029072491452097893, 0.000652547983918339, 0.0003529583918862045, 0.0009863339364528656, 0.002192106796428561], [0.030638281255960464, 0.02597089111804962, 0.6577842831611633, 0.16596756875514984, 0.48041173815727234, 0.6114144921302795, 0.028207998722791672, 0.053615398705005646, 0.1417267620563507, 0.03454216569662094, 0.023575417697429657, 0.004873087164014578, 0.0009616028983145952, 0.00223313900642097, 0.0011337294708937407], [0.29477018117904663, 0.14754106104373932, 0.8534399271011353, 0.9182198643684387, 0.6083860993385315, 0.9389832019805908, 0.12579986453056335, 0.03590020909905434, 0.012173496186733246, 0.16479530930519104, 0.15366923809051514, 0.0035958383232355118, 0.002988115418702364, 0.026292480528354645, 0.0003885648038703948], [0.2897806465625763, 0.01695333980023861, 0.6714832782745361, 0.4471692144870758, 0.24303969740867615, 0.15563154220581055, 0.008645682595670223, 0.0004950988804921508, 0.0001695932005532086, 0.13566477596759796, 0.030448369681835175, 0.00021736785129178315, 9.297585347667336e-05, 0.0014399208594113588, 5.083655923954211e-05], [0.1102917492389679, 0.0027466323226690292, 0.13646264374256134, 0.07094646990299225, 0.17040857672691345, 0.6033481955528259, 0.41631338000297546, 0.013031017035245895, 0.00012492973473854363, 0.005976412910968065, 0.0002816450723912567, 4.682707003667019e-05, 0.00021861463028471917, 0.00019605428678914905, 0.001022772048600018], [0.7042187452316284, 0.49455204606056213, 0.43194010853767395, 0.7080989480018616, 0.382207989692688, 0.06800723820924759, 0.48792970180511475, 0.12651333212852478, 0.0012585417134687304, 0.07895761728286743, 0.01729964278638363, 0.0006471746601164341, 0.00013743228919338435, 0.00039039706462062895, 0.00010207234299741685], [0.5233215093612671, 0.7835124135017395, 0.3596530258655548, 0.5502080917358398, 0.589034378528595, 0.24138878285884857, 0.4714515507221222, 0.13250088691711426, 0.08884716778993607, 0.06473898142576218, 0.12478159368038177, 0.001717525301501155, 0.01358798798173666, 0.004862584639340639, 0.0004225081647746265], [0.0975094586610794, 0.14095744490623474, 0.009511731564998627, 0.03128954395651817, 0.01951521448791027, 0.0017430862644687295, 0.033708807080984116, 0.009512575343251228, 0.3042309582233429, 0.0025639990344643593, 0.0006334132049232721, 2.5987004846683703e-05, 0.0001574041525600478, 1.1997842193522956e-05, 1.5690195141360164e-05], [0.536220133304596, 0.12877297401428223, 0.013534938916563988, 0.13534405827522278, 0.015604051761329174, 0.0035537974908947945, 0.02344023622572422, 0.008398037403821945, 0.2580391466617584, 0.2587551474571228, 0.014949243515729904, 0.0010696486569941044, 0.00046315763029269874, 0.0013398011215031147, 8.422375685768202e-05], [0.028944578021764755, 0.013114584609866142, 0.0438210591673851, 0.05079193785786629, 0.03694206848740578, 0.0008442872785963118, 0.0030779552180320024, 0.002579997293651104, 0.01023491844534874, 0.21445545554161072, 0.2806929349899292, 0.00855539832264185, 0.03333647921681404, 0.06091907247900963, 1.9560096916393377e-05], [0.0058769844472408295, 0.06350620836019516, 0.003568005282431841, 0.0076079596765339375, 0.0037217612843960524, 0.004286385141313076, 0.03584115207195282, 0.14617407321929932, 0.0030082303564995527, 0.12143123894929886, 0.0793885663151741, 0.1555183082818985, 0.14442139863967896, 0.29275521636009216, 7.129996811272576e-05], [0.034930020570755005, 0.09419079124927521, 0.0127689428627491, 0.008763227611780167, 0.0065171802416443825, 0.008632887154817581, 0.02612082101404667, 0.02043459191918373, 0.0836663544178009, 0.5329904556274414, 0.3228733241558075, 0.7184357047080994, 0.5793755650520325, 0.783859133720398, 0.0001531920424895361], [0.0009532110998407006, 0.0024861039128154516, 7.189704774646088e-05, 0.00014637503772974014, 2.8552024105010787e-06, 3.0342853278853e-05, 0.0007709002820774913, 0.0005337693146429956, 6.919851330167148e-06, 0.02619163505733013, 0.02381032705307007, 0.008668542839586735, 0.39639002084732056, 0.7824769616127014, 1.1539431170604075e-06], [0.02785377763211727, 0.15845024585723877, 0.19323119521141052, 0.06543393433094025, 0.014044036157429218, 0.040286585688591, 0.07583826035261154, 0.6567350029945374, 0.004159754142165184, 0.35265031456947327, 0.6287637948989868, 0.12951745092868805, 0.32439297437667847, 0.653313934803009, 0.0008144593448378146], [0.02927210181951523, 0.04805546626448631, 0.295967698097229, 0.060625556856393814, 0.014990724623203278, 0.10397231578826904, 0.12186732143163681, 0.5237559080123901, 0.0203724168241024, 0.43874940276145935, 0.4409005343914032, 0.09095493704080582, 0.5531511306762695, 0.5263633728027344, 0.0002321143983863294], [0.5664732456207275, 0.02422192506492138, 0.3148367702960968, 0.37531769275665283, 0.06290365755558014, 0.02708868682384491, 0.03764869272708893, 0.06476183980703354, 0.09221415221691132, 0.3172641098499298, 0.088014617562294, 0.02202794700860977, 0.004314645659178495, 0.0619816817343235, 0.0017959593096747994], [0.04828598350286484, 0.01127469539642334, 0.1758044958114624, 0.0725238099694252, 0.01880812831223011, 0.003422890789806843, 0.0039800796657800674, 0.008112750947475433, 0.0007020575576461852, 0.0960424467921257, 0.3098883628845215, 0.03193678706884384, 0.03351299837231636, 0.2577627897262573, 0.0005041947006247938], [0.008833246305584908, 0.03231082111597061, 0.009648996405303478, 0.01135926228016615, 0.004257569555193186, 0.002696139505133033, 0.026390861719846725, 0.07894735038280487, 0.0002903220884036273, 0.05877671018242836, 0.0971919596195221, 0.32856324315071106, 0.08294347673654556, 0.6861463785171509, 0.00047716210247017443], [0.020260397344827652, 0.03928471356630325, 0.012783887796103954, 0.0091601787135005, 0.005565040744841099, 0.007968534715473652, 0.020862603560090065, 0.012279938906431198, 0.01832268387079239, 0.3204420506954193, 0.28696081042289734, 0.7937509417533875, 0.6314787864685059, 0.8277974724769592, 0.00014348741387948394], [0.00497927563264966, 0.011739314533770084, 0.0009416648535989225, 0.0009133343119174242, 2.0598932678694837e-05, 0.00024278588534798473, 0.00463896244764328, 0.0027787971775978804, 1.9694551156135276e-05, 0.026842234656214714, 0.05824153125286102, 0.023767979815602303, 0.7019069194793701, 0.8979114294052124, 1.5536308637820184e-05], [0.06832221150398254, 0.18812543153762817, 0.5426309108734131, 0.237625390291214, 0.041615329682826996, 0.11611851304769516, 0.16301436722278595, 0.827357828617096, 0.011619587428867817, 0.35340800881385803, 0.8248108625411987, 0.22083298861980438, 0.4978465139865875, 0.8379470109939575, 0.008811386302113533], [0.7676634788513184, 0.8615484237670898, 0.768317461013794, 0.9594964981079102, 0.36958935856819153, 0.4649639129638672, 0.5634418725967407, 0.8043064475059509, 0.6601962447166443, 0.9397303462028503, 0.8348119258880615, 0.9867405295372009, 0.7646960020065308, 0.8154686689376831, 0.03640103340148926]], [[0.5194346308708191, 0.08715501427650452, 0.09860441088676453, 0.08100719004869461, 0.11848669499158859, 0.14280925691127777, 0.19592297077178955, 0.1196337640285492, 0.2793996334075928, 0.0691760703921318, 0.09539081901311874, 0.05545644089579582, 0.02620256133377552, 0.03735822066664696, 0.09928011149168015], [0.002687783446162939, 0.2585922181606293, 0.004556892905384302, 0.0005560630816034973, 0.0013625096762552857, 0.000865808455273509, 2.095674426527694e-05, 0.013363445177674294, 1.4331720194604713e-05, 0.00023233501997310668, 0.013212678954005241, 0.00027388104354031384, 2.99917119264137e-05, 5.10126119479537e-05, 0.0653858631849289], [0.010489544831216335, 0.001751396106556058, 0.2775154411792755, 0.0030420231632888317, 0.08156438916921616, 0.0006471106316894293, 1.7804295566747896e-05, 0.00014657371502835304, 0.00035265504266135395, 0.00129506376106292, 0.018553601577878, 0.0019669390749186277, 0.009056665003299713, 0.05091148242354393, 0.1541917622089386], [0.0025869093369692564, 0.008571458049118519, 0.38431695103645325, 0.030530055984854698, 0.03365315869450569, 0.005854337941855192, 0.00010941662185359746, 4.1041937947738916e-05, 0.000364075880497694, 0.0011989381164312363, 0.014197473414242268, 0.0010815636487677693, 0.0004893331206403673, 0.0013785242335870862, 0.011478900909423828], [0.20589935779571533, 0.03613102436065674, 0.009011336602270603, 0.09399610757827759, 0.042497485876083374, 0.000576009857468307, 0.0040712482295930386, 0.00162220629863441, 0.00015305644774343818, 0.0034409475047141314, 0.025435233488678932, 2.175084773625713e-05, 1.0188268788624555e-05, 5.634217450278811e-05, 0.160919189453125], [0.00994176883250475, 0.015379102900624275, 0.000435269670560956, 0.004355194512754679, 0.002023787936195731, 4.86412636746536e-06, 0.0007220985717140138, 0.0004895065212622285, 0.0005591813242062926, 0.009127096273005009, 0.023014724254608154, 0.0003639610658865422, 3.1703839340480044e-05, 0.00036040451959706843, 0.1469942033290863], [0.31647789478302, 0.5689504742622375, 0.010991040617227554, 0.29046669602394104, 0.008814695291221142, 0.008600234054028988, 0.094898521900177, 0.02089405618607998, 0.005384301766753197, 0.1224634200334549, 0.2525540888309479, 0.011421876028180122, 9.89354812190868e-05, 0.00020726426737383008, 0.3419104218482971], [0.006757077760994434, 0.1354868859052658, 0.002759847091510892, 0.009205225855112076, 0.0038083188701421022, 0.0014255000278353691, 0.0007299972930923104, 0.2051592320203781, 0.00020230394147802144, 0.001623967313207686, 0.006681961473077536, 0.0021689198911190033, 5.557909025810659e-05, 0.000162289768923074, 0.20840437710285187], [0.010027364827692509, 0.02789497748017311, 0.0041139991953969, 0.012661347165703773, 0.0013435317669063807, 0.0034407242201268673, 0.0064836894161999226, 0.007366063538938761, 0.29601985216140747, 0.053567804396152496, 0.040060218423604965, 0.004607491660863161, 0.00018677859043236822, 3.186250978615135e-05, 0.10952453315258026], [0.19971387088298798, 0.012958711944520473, 0.001638519112020731, 0.17775660753250122, 0.0022716999519616365, 0.03685721755027771, 0.06948257982730865, 0.005452410783618689, 0.037147630006074905, 0.19678887724876404, 0.21911752223968506, 0.02466990426182747, 0.0004891769494861364, 6.33890085737221e-05, 0.21250228583812714], [0.05692211166024208, 0.036700569093227386, 0.0015533106634393334, 0.01848980039358139, 0.002404581755399704, 0.008354752324521542, 0.023693444207310677, 0.02836945652961731, 0.29948922991752625, 0.005321406293660402, 0.0022319734562188387, 0.0005214664852246642, 0.00019869217067025602, 5.8369230828247964e-05, 0.008838840760290623], [0.011123275384306908, 0.003955129534006119, 0.0015235289465636015, 0.011223106645047665, 0.002481319010257721, 0.000903434120118618, 0.0006720115779899061, 0.00024289102293550968, 0.010115177370607853, 0.26232361793518066, 0.014199022203683853, 0.0005582758458331227, 0.0001542939426144585, 5.357913687475957e-05, 0.050008371472358704], [0.025191567838191986, 0.009952094405889511, 0.015023785643279552, 0.0893990620970726, 0.006299919448792934, 0.0077370950020849705, 0.0004422276106197387, 0.00010742250742623582, 0.001807618304155767, 0.052116382867097855, 0.33116668462753296, 0.0029348258394747972, 0.004942082799971104, 0.0017646296182647347, 0.009777115657925606], [0.12133541703224182, 0.0033125760965049267, 0.008441481739282608, 0.0257105715572834, 0.005432062782347202, 0.020603680983185768, 0.0008238950395025313, 0.00019463927310425788, 0.0001117472565965727, 0.011082900688052177, 0.4118730425834656, 0.0024717452470213175, 0.21560189127922058, 0.015253315679728985, 0.03452993184328079], [0.00568122835829854, 0.003583817044273019, 0.0009402501164004207, 0.0034319525584578514, 0.014700439758598804, 0.00014027200813870877, 5.928567406954244e-05, 0.0005310353590175509, 0.001004774123430252, 0.00433507701382041, 0.003991644363850355, 0.0015378128737211227, 6.231402221601456e-05, 0.02625701017677784, 0.15481357276439667], [0.00503728911280632, 0.004739185329526663, 0.021364033222198486, 0.04603096470236778, 0.004565324168652296, 0.021244995296001434, 0.07592181116342545, 0.027910754084587097, 0.008603491820394993, 0.004941265098750591, 0.03103908710181713, 0.035909827798604965, 0.01818632334470749, 0.04406380280852318, 0.17931725084781647], [0.21416018903255463, 0.005411786492913961, 0.02111194096505642, 0.07001130282878876, 0.04736214876174927, 0.09187527745962143, 0.1399366855621338, 0.030981194227933884, 0.02342112548649311, 0.07424263656139374, 0.02716991677880287, 0.5710572600364685, 0.007255392149090767, 0.005560784600675106, 0.054831843823194504], [0.3339015245437622, 0.03176174685359001, 0.25991618633270264, 0.31748515367507935, 0.17923809587955475, 0.2977932095527649, 0.14185847342014313, 0.09826549887657166, 0.4168005883693695, 0.09961694478988647, 0.1390676498413086, 0.191667839884758, 0.0443519689142704, 0.10075851529836655, 0.08045557886362076], [0.018510108813643456, 0.0015040059806779027, 0.011199833825230598, 0.021222928538918495, 0.02421635016798973, 0.004175371024757624, 0.0007807075162418187, 0.0005349562270566821, 0.0038052168674767017, 0.3727143108844757, 0.022828511893749237, 0.01009275484830141, 0.0012628438416868448, 0.0009096930734813213, 0.10904579609632492], [0.05896773934364319, 0.023542853072285652, 0.0776505172252655, 0.15385140478610992, 0.011508575640618801, 0.0939982458949089, 0.0018089915392920375, 0.0003290986060164869, 0.0005636389250867069, 0.029514340683817863, 0.35146546363830566, 0.007090898230671883, 0.012099701911211014, 0.006742698606103659, 0.052738532423973083], [0.18205131590366364, 0.00472951028496027, 0.03192766383290291, 0.059333182871341705, 0.028221452608704567, 0.033883631229400635, 0.00131422549020499, 0.0001085989861167036, 5.632251122733578e-05, 0.004554648417979479, 0.2950275242328644, 0.0014449548907577991, 0.2329740822315216, 0.0520821250975132, 0.1361607313156128], [0.0063572716899216175, 0.002779513830319047, 0.0009721479145810008, 0.0035897656343877316, 0.019835324957966805, 0.00021187934908084571, 8.435463678324595e-05, 0.00043589723645709455, 0.0004945950931869447, 0.004414541646838188, 0.0027602717746049166, 0.0008482423145323992, 5.171148222871125e-05, 0.021799515932798386, 0.15211130678653717], [0.005286877974867821, 0.008391096256673336, 0.025823507457971573, 0.030178312212228775, 0.00857502967119217, 0.042816706001758575, 0.07608389109373093, 0.03679429367184639, 0.0067360359244048595, 0.0038807345554232597, 0.03710461035370827, 0.037315309047698975, 0.018847206607460976, 0.0415174663066864, 0.15352587401866913], [0.2992006242275238, 0.008802352473139763, 0.027079692110419273, 0.08564624935388565, 0.11560814827680588, 0.22971339523792267, 0.1826445311307907, 0.033842965960502625, 0.06175734102725983, 0.11205370724201202, 0.04016120731830597, 0.5851526856422424, 0.016921253874897957, 0.011652404442429543, 0.08951538056135178], [0.12446854263544083, 0.0009617851465009153, 0.004788657650351524, 0.0008746102685108781, 0.16037316620349884, 0.003065474098548293, 0.0056405095383524895, 0.005250739399343729, 0.05696318671107292, 0.013819074258208275, 0.028642717748880386, 0.0011808956041932106, 0.08446037769317627, 0.03008313849568367, 0.13710428774356842]], [[0.005261753685772419, 0.005328452680259943, 0.1075906753540039, 0.007504252251237631, 0.18196941912174225, 0.2677178680896759, 0.18533208966255188, 0.041308093816041946, 0.04052837938070297, 0.0018225060775876045, 0.004738607443869114, 0.028365809470415115, 0.07867489755153656, 0.032602421939373016, 0.14697469770908356], [0.024903474375605583, 0.2637169063091278, 0.01148936152458191, 0.01806865818798542, 0.010384032502770424, 0.05497525632381439, 0.01011874619871378, 6.159161421237513e-05, 0.03404803201556206, 0.01315199863165617, 0.004086918197572231, 0.033981483429670334, 0.0007253359071910381, 0.0010365481721237302, 0.023150891065597534], [0.03176039457321167, 0.002004105830565095, 0.011469452641904354, 0.003235333366319537, 0.011606591753661633, 0.01332010142505169, 0.007885226979851723, 0.0010319099528715014, 0.0026684575714170933, 0.003885145066305995, 0.002207087352871895, 0.010414022952318192, 0.015553043223917484, 0.01973811537027359, 0.1639232188463211], [0.24842531979084015, 0.031220050528645515, 0.028132880106568336, 0.029530569911003113, 0.01766534335911274, 0.36354437470436096, 0.06892471760511398, 0.02528339996933937, 0.01102821622043848, 0.15825842320919037, 0.13755246996879578, 0.07390110194683075, 0.19022952020168304, 0.1824880689382553, 0.1432848572731018], [0.0013664831640198827, 0.001714985934086144, 0.0013615208445116878, 0.0015855998499318957, 0.0011547008762136102, 0.007221538573503494, 0.01537399459630251, 0.020302001386880875, 0.0011185031617060304, 0.001242821803316474, 0.0004577837826218456, 0.0013307477347552776, 6.100967220845632e-05, 3.943840420106426e-05, 0.16435295343399048], [0.0006725311395712197, 0.000846685899887234, 0.001614874112419784, 0.000348375499015674, 0.0019150535808876157, 0.01370947528630495, 0.026421356946229935, 0.08118636161088943, 0.0008913385099731386, 0.0004401778569445014, 0.0003709472657646984, 0.0007744845934212208, 0.002328733913600445, 0.0003664834948722273, 0.14579549431800842], [0.011207095347344875, 0.029191432520747185, 0.015348215587437153, 0.012354064732789993, 0.002485303906723857, 0.7150441408157349, 0.0764552503824234, 0.14450958371162415, 0.0016117440536618233, 0.008765846490859985, 0.011787951923906803, 0.002862851833924651, 0.022502094507217407, 0.007210019044578075, 0.007054056040942669], [0.006926322355866432, 0.0050496323965489864, 0.010020078159868717, 0.021360181272029877, 0.0027102867607027292, 0.028520535677671432, 0.05918040871620178, 0.23060235381126404, 0.019199691712856293, 0.09477535635232925, 0.013206732459366322, 0.0014817069750279188, 0.0153219448402524, 0.01803957298398018, 0.07950127124786377], [0.009242678992450237, 0.05580667033791542, 0.014326682314276695, 0.04630666971206665, 0.010674487799406052, 0.5850453972816467, 0.4108324944972992, 0.4116209149360657, 0.007144990377128124, 0.20661039650440216, 0.037308260798454285, 0.054067905992269516, 0.037599414587020874, 0.03113422356545925, 0.22261686623096466], [0.0023711349349468946, 0.019731320440769196, 0.027566438540816307, 0.03758935630321503, 0.022646954283118248, 0.06538618355989456, 0.01152126956731081, 0.014797273091971874, 0.003413880243897438, 0.024214325472712517, 0.019466044381260872, 0.007235943805426359, 0.0008611958473920822, 0.0011126803001388907, 0.268255352973938], [0.08772679418325424, 0.02003292553126812, 0.09465871006250381, 0.41126132011413574, 0.07995565980672836, 0.5143890976905823, 0.1155472919344902, 0.01320470031350851, 0.02149844542145729, 0.06702866405248642, 0.6884661316871643, 0.09638151526451111, 0.35587188601493835, 0.2170087993144989, 0.019593046978116035], [0.01343127153813839, 0.0019279895350337029, 0.01925632171332836, 0.04226915165781975, 0.005290344823151827, 0.5555825233459473, 0.06846548616886139, 0.006453313864767551, 0.019162334501743317, 0.0017575293313711882, 0.2967261075973511, 0.11721283942461014, 0.4438721835613251, 0.1899448037147522, 0.007863422855734825], [0.12789316475391388, 0.004323228262364864, 0.03538274019956589, 0.05581461265683174, 0.020947236567735672, 0.09860846400260925, 0.11394336074590683, 0.010361305437982082, 0.011101406998932362, 0.33580121397972107, 0.13689599931240082, 0.038663506507873535, 0.19725953042507172, 0.10533706098794937, 0.008538279682397842], [0.007053391542285681, 0.012331487610936165, 0.008611395955085754, 0.031008008867502213, 0.004283395130187273, 0.0029549654573202133, 0.00849387887865305, 0.008564120158553123, 0.02629040740430355, 0.009985123760998249, 0.00761940935626626, 0.003499145619571209, 0.0015691317385062575, 0.005600257311016321, 0.5214234590530396], [0.0007030746201053262, 0.0001308645587414503, 0.0001913319865707308, 0.00016671058256179094, 0.000299752748105675, 0.0001608166057849303, 0.004501530434936285, 0.0010771069210022688, 0.003937124740332365, 0.001599485520273447, 0.0007339937728829682, 0.0030779645312577486, 3.4502605558373034e-05, 9.700484952190891e-05, 0.15641583502292633], [0.027913473546504974, 0.10055015236139297, 0.005828284192830324, 0.007361504249274731, 0.0010143647668883204, 0.000654859293717891, 0.0101061025634408, 0.029607031494379044, 0.04485415667295456, 0.09235014766454697, 0.05163425952196121, 0.03075464628636837, 0.027050884440541267, 0.021472401916980743, 0.18064866960048676], [0.0011193754617124796, 0.03864011913537979, 0.0033454783260822296, 0.0006957795703783631, 0.001480268081650138, 0.0012079592561349273, 0.00020605533791240305, 0.0011212154058739543, 0.0015670693246647716, 0.0014121911954134703, 0.0012700740480795503, 0.0019415348069742322, 0.001359732006676495, 0.0011440571397542953, 0.23876120150089264], [0.012943120673298836, 0.020876264199614525, 0.04825761169195175, 0.03707631304860115, 0.015636419877409935, 0.11923719942569733, 0.021652603521943092, 0.026653259992599487, 0.020431919023394585, 0.03287035599350929, 0.10921605676412582, 0.11103712767362595, 0.08490956574678421, 0.05352960154414177, 0.1791488379240036], [0.010143280029296875, 0.0011783033842220902, 0.07699523866176605, 0.04151652753353119, 0.013031265698373318, 0.6595657467842102, 0.04001229628920555, 0.015414847061038017, 0.05828738585114479, 0.00582495890557766, 0.39538952708244324, 0.3540988564491272, 0.5535411834716797, 0.14920510351657867, 0.05510678142309189], [0.10365689545869827, 0.011393263004720211, 0.09083462506532669, 0.05552159622311592, 0.021694108843803406, 0.23093751072883606, 0.12655670940876007, 0.02638416364789009, 0.016898566856980324, 0.4334920644760132, 0.1302367001771927, 0.07987051457166672, 0.26015403866767883, 0.07882147282361984, 0.06412448734045029], [0.0009046280756592751, 0.006186267826706171, 0.001710598124191165, 0.0040000369772315025, 0.0010556421475484967, 0.00010012275743065402, 0.000467440317152068, 0.00034073027200065553, 0.012450831942260265, 0.001776019111275673, 0.0016348852077499032, 0.0004490323772188276, 0.00023723821504972875, 0.0005369102582335472, 0.2610536217689514], [0.00040706052095629275, 5.995776882627979e-05, 0.00011266738147241995, 0.00010974665929097682, 0.00022393744438886642, 7.468188414350152e-05, 0.00239625689573586, 0.0004222780407872051, 0.002755024004727602, 0.0011263962369412184, 0.0004159261588938534, 0.0013214137870818377, 1.3015362128498964e-05, 3.146446033497341e-05, 0.15343648195266724], [0.02487853355705738, 0.06922142952680588, 0.005931189749389887, 0.005149703938513994, 0.0007503133383579552, 0.00046759017277508974, 0.004864065907895565, 0.010271446779370308, 0.03885169327259064, 0.0494176521897316, 0.032662954181432724, 0.015474021434783936, 0.005468437913805246, 0.0031831569503992796, 0.16160887479782104], [0.0006016235565766692, 0.010655699297785759, 0.0012552555417641997, 0.0004406629304867238, 0.0006771506741642952, 0.0004804672207683325, 8.584682655055076e-05, 0.00018533790716901422, 0.0020008538849651814, 0.0008522755815647542, 0.0005471827462315559, 0.0006654397584497929, 0.0003326669684611261, 0.00020969027536921203, 0.18202657997608185], [0.0006660889484919608, 0.0011989487102255225, 0.006168409250676632, 0.0007392434636130929, 0.002072105184197426, 0.0013732375809922814, 0.001215140800923109, 8.942947169998661e-05, 0.0032219376880675554, 0.00034276655060239136, 0.0006051870877854526, 0.0004003554640803486, 0.0006330502219498158, 9.228585986420512e-05, 0.13989190757274628]], [[0.17597882449626923, 0.03865775838494301, 0.04927876219153404, 0.19269852340221405, 0.07631995528936386, 0.03202155977487564, 0.04315444082021713, 0.0381813645362854, 0.14437337219715118, 0.14268529415130615, 0.12548406422138214, 0.22065725922584534, 0.007455701474100351, 0.012540786527097225, 0.13194040954113007], [0.12168548256158829, 0.12690430879592896, 0.03319493681192398, 0.044549524784088135, 0.022643521428108215, 0.12293753027915955, 0.012858373112976551, 0.056580886244773865, 0.0409478023648262, 0.5390252470970154, 0.04499629884958267, 0.010665545240044594, 0.0012580851325765252, 0.0006077282596379519, 0.16003872454166412], [0.004976227879524231, 0.0016218257369473577, 0.10218203067779541, 0.005807417444884777, 0.025330372154712677, 0.00805770605802536, 0.0010953968157991767, 0.007808555383235216, 0.03332183510065079, 0.01014297641813755, 0.0378553569316864, 0.0012688467977568507, 0.0070253219455480576, 0.006525768432766199, 0.1611432433128357], [0.018298039212822914, 0.043392445892095566, 0.026758581399917603, 0.06685060262680054, 0.007846164517104626, 0.0070086256600916386, 0.0011090404586866498, 0.0016357558779418468, 0.015295942313969135, 0.022091375663876534, 0.08676162362098694, 0.0013220091350376606, 0.0007799563463777304, 0.0005145008908584714, 0.5814905166625977], [0.16791731119155884, 0.01838838867843151, 0.03170344606041908, 0.04746389389038086, 0.024931352585554123, 0.002624210435897112, 0.3320338726043701, 0.32248422503471375, 0.021048149093985558, 0.02857070416212082, 0.11922428011894226, 4.079664358869195e-05, 0.0002566495386417955, 0.0005197013379074633, 0.1538068950176239], [0.03376027196645737, 0.001082546659745276, 0.003266592975705862, 0.006257645785808563, 0.023632841184735298, 0.00021245618700049818, 0.033721838146448135, 0.15340450406074524, 0.009442711248993874, 0.006162047851830721, 0.09923229366540909, 0.0001386175281368196, 0.0008165750186890364, 0.0010916005121544003, 0.14602994918823242], [0.04221357777714729, 0.03857824206352234, 0.004161412362009287, 0.06419923156499863, 0.010648604482412338, 0.008165394887328148, 0.04070910066366196, 0.34736329317092896, 0.0012154168216511607, 0.1630050241947174, 0.07001504302024841, 0.0033116117119789124, 0.00023883172252681106, 0.00045473958016373217, 0.2740376889705658], [0.007271567825227976, 0.0015110730892047286, 0.0014769553672522306, 0.0053740208968520164, 0.0038654205854982138, 0.0024983601178973913, 0.049697574228048325, 0.27208074927330017, 0.0006182760698720813, 0.014045008458197117, 0.00131281279027462, 0.00040628391434438527, 0.00037906834040768445, 0.0001199298130813986, 0.006693295668810606], [0.08829134702682495, 0.11286511272192001, 0.004967967513948679, 0.006996258161962032, 0.0014454894699156284, 0.006397548597306013, 0.01389994379132986, 0.27431485056877136, 0.0018983082845807076, 0.09154568612575531, 0.022492842748761177, 0.0017391144065186381, 0.000634143827483058, 4.5783879613736644e-05, 0.318096399307251], [0.02142007276415825, 0.007001234218478203, 0.00761477230116725, 0.018849696964025497, 0.010492328554391861, 0.01844215951859951, 0.008208145387470722, 0.01109394058585167, 0.006335548125207424, 0.01884968765079975, 0.01652243174612522, 0.016355833038687706, 0.0014795949682593346, 0.0011322565842419863, 0.27169719338417053], [0.17013461887836456, 0.14343884587287903, 0.017679741606116295, 0.10850679129362106, 0.01231957133859396, 0.010847942903637886, 0.04900640249252319, 0.023357992991805077, 0.014735743403434753, 0.014097570441663265, 0.012582896277308464, 0.0010529988212510943, 0.00046457236749120057, 0.0006211225991137326, 0.5663455724716187], [0.1586649864912033, 0.08337923884391785, 0.0181503314524889, 0.22676831483840942, 0.016727542504668236, 0.015186772681772709, 0.0050455182790756226, 0.00688449339941144, 0.025511443614959717, 0.20239992439746857, 0.024231791496276855, 0.0023393011651933193, 0.0011192933889105916, 0.0005647524958476424, 0.390881210565567], [0.3443087935447693, 0.28029316663742065, 0.23536846041679382, 0.34415915608406067, 0.11761639267206192, 0.006012732163071632, 0.008058828301727772, 0.005314267706125975, 0.013309409841895103, 0.09906232357025146, 0.10091385245323181, 0.018941059708595276, 0.025248508900403976, 0.014945760369300842, 0.7436007857322693], [0.0022638223599642515, 0.004991845227777958, 0.004655482713133097, 0.0007185174035839736, 0.0013901105849072337, 0.011776956729590893, 0.0005479936371557415, 0.00022604972764384001, 0.00024645475787110627, 0.009541304782032967, 0.011744895949959755, 0.0007132806931622326, 0.27867355942726135, 0.02834550105035305, 0.007979176938533783], [0.024570701643824577, 0.00167787482496351, 0.004072254989296198, 0.00223688711412251, 0.007143567781895399, 0.00014352552534546703, 0.0004634522774722427, 0.0016921478090807796, 0.003620122792199254, 0.007754941936582327, 0.011850811541080475, 0.0027722271624952555, 9.3724018370267e-05, 0.02145184949040413, 0.15506701171398163], [0.01723022572696209, 0.08018677681684494, 0.007713299244642258, 0.004271229729056358, 0.0005464836140163243, 0.00456921337172389, 0.0031762931030243635, 0.009469777345657349, 0.000385247083613649, 0.01870143786072731, 0.033109456300735474, 0.004042719956487417, 0.004976211115717888, 0.005646048113703728, 0.19230251014232635], [0.016216034069657326, 0.04777013510465622, 0.01620146818459034, 0.010810854844748974, 0.16034351289272308, 0.006931359879672527, 0.0032006967812776566, 0.032106515020132065, 0.0003033989341929555, 0.015325331129133701, 0.006036583799868822, 0.12791146337985992, 0.19952742755413055, 0.023708127439022064, 0.18307197093963623], [0.014499284327030182, 0.035677529871463776, 0.009275808930397034, 0.01653297245502472, 0.006223962642252445, 0.0020693510305136442, 0.007680083625018597, 0.013822571374475956, 0.00040966575033962727, 0.0038025544490665197, 0.013774569146335125, 0.006069935858249664, 0.004488381557166576, 0.005977130029350519, 0.217429518699646], [0.03237156197428703, 0.013441890478134155, 0.0194883793592453, 0.09343220293521881, 0.05379915237426758, 0.004893247038125992, 0.0011929833563044667, 0.009432576596736908, 0.015330814756453037, 0.14898745715618134, 0.018398255109786987, 0.01228779274970293, 0.00492482166737318, 0.0038985873106867075, 0.2601524889469147], [0.08357361704111099, 0.18220724165439606, 0.10462122410535812, 0.08245989680290222, 0.03124452568590641, 0.002170282183215022, 0.0020384257659316063, 0.004550496581941843, 0.003485089400783181, 0.036062099039554596, 0.0278666652739048, 0.011443988420069218, 0.01760544627904892, 0.013599698431789875, 0.3874043822288513], [0.001995340920984745, 0.011527596041560173, 0.005334027577191591, 0.0006887424970045686, 0.0023407095577567816, 0.00276917009614408, 0.00029977987287566066, 0.00012230046559125185, 0.00026578022516332567, 0.008239910937845707, 0.009819538332521915, 0.000393931899452582, 0.605858564376831, 0.08989311754703522, 0.011135715991258621], [0.021298440173268318, 0.001658836961723864, 0.004600299056619406, 0.0025729055050760508, 0.015332063660025597, 0.00017298871534876525, 0.0005721640191040933, 0.00186175387352705, 0.0037871075328439474, 0.009124312549829483, 0.01116581168025732, 0.0031747270841151476, 0.00012207991676405072, 0.029056062921881676, 0.15163807570934296], [0.020229021087288857, 0.11621151119470596, 0.015550180338323116, 0.006284819450229406, 0.0013723199954256415, 0.013658476993441582, 0.005685316864401102, 0.02063058130443096, 0.001440295367501676, 0.022225895896553993, 0.07092871516942978, 0.007373427972197533, 0.00771017000079155, 0.006927240639925003, 0.16024509072303772], [0.014029471203684807, 0.02389930933713913, 0.011611595749855042, 0.012217668816447258, 0.2477317750453949, 0.006976675242185593, 0.0035841658245772123, 0.022232146933674812, 0.0018886715406551957, 0.01750483363866806, 0.005654812324792147, 0.10889071226119995, 0.19916927814483643, 0.022882532328367233, 0.16074435412883759], [0.0032621105201542377, 0.006088452413678169, 0.012619324028491974, 0.008848619647324085, 0.17461968958377838, 8.660123421577737e-05, 0.0006109846872277558, 0.0007747155614197254, 0.003163054818287492, 0.017787659540772438, 0.029563669115304947, 0.0032195982057601213, 0.013336165808141232, 0.013171130791306496, 0.1387031376361847]], [[0.09661699831485748, 0.7619754076004028, 0.05676787346601486, 0.020180072635412216, 0.10883769392967224, 0.42711278796195984, 0.09064477682113647, 0.10612691193819046, 0.04782179743051529, 0.06935178488492966, 0.027948519214987755, 0.00755169615149498, 0.007339869160205126, 0.025803416967391968, 0.09292053431272507], [0.042798254638910294, 0.23223945498466492, 0.062359996140003204, 0.01933804154396057, 0.04838808253407478, 0.30189236998558044, 0.0354127362370491, 0.019764740020036697, 0.00920741818845272, 0.0097093116492033, 0.0160877276211977, 0.0032758424058556557, 0.005296806804835796, 0.011010169051587582, 0.02110680378973484], [0.02002989500761032, 0.001048662350513041, 0.03834937512874603, 0.030392715707421303, 0.09750902652740479, 0.056120067834854126, 0.008173296228051186, 0.006944228895008564, 0.004440560005605221, 0.005061029922217131, 0.007118762470781803, 0.008411978371441364, 0.023608768358826637, 0.04182775691151619, 0.16016238927841187], [0.041295986622571945, 0.19780276715755463, 0.03777160495519638, 0.1712082475423813, 0.20935285091400146, 0.158755823969841, 0.3937656581401825, 0.684601902961731, 0.2584594190120697, 0.11237194389104843, 0.1112959012389183, 0.09882687777280807, 0.05429066717624664, 0.24210131168365479, 0.016339490190148354], [0.26312491297721863, 0.2720799446105957, 0.005703570321202278, 0.0481516495347023, 0.027902500703930855, 0.0034437666181474924, 0.03425572067499161, 0.03555849939584732, 0.028000997379422188, 0.0429554246366024, 0.002753790933638811, 0.0017769382102414966, 0.002218457870185375, 0.003535473719239235, 0.1597488671541214], [0.22248251736164093, 0.03185709938406944, 0.000688861298840493, 0.005810217931866646, 0.007679672911763191, 0.0008787074475549161, 0.07858764380216599, 0.14273476600646973, 0.07306984066963196, 0.02433006465435028, 0.011720307171344757, 0.013396549038589, 0.017704129219055176, 0.034836068749427795, 0.1453055441379547], [0.1531120240688324, 0.15391655266284943, 0.006810865830630064, 0.07720811665058136, 0.008951452560722828, 0.01149735413491726, 0.2822602391242981, 0.30408379435539246, 0.48283058404922485, 0.33028021454811096, 0.16095426678657532, 0.031167738139629364, 0.03355513513088226, 0.13962571322917938, 0.012790725566446781], [0.03593587130308151, 0.03233448788523674, 0.22662676870822906, 0.405829519033432, 0.014032814651727676, 0.02822977490723133, 0.09231841564178467, 0.1225365549325943, 0.20093639194965363, 0.2508411109447479, 0.5826555490493774, 0.037383783608675, 0.07952429354190826, 0.10720134526491165, 0.15212680399417877], [0.037364520132541656, 0.04119153320789337, 0.0012645104434341192, 0.021537767723202705, 0.000536995125003159, 0.0011436643544584513, 0.019049961119890213, 0.06139632686972618, 0.385105162858963, 0.13276730477809906, 0.24771228432655334, 0.04952799528837204, 0.04911990836262703, 0.11973114311695099, 0.021608887240290642], [0.004867227748036385, 0.009626063518226147, 0.0003137234307359904, 0.0026314754504710436, 0.00027048110496252775, 0.000934475683607161, 0.007251756265759468, 0.03575620427727699, 0.40781450271606445, 0.05584407597780228, 0.040446195751428604, 0.005334825720638037, 0.007708138320595026, 0.06401336193084717, 0.010240204632282257], [0.19358457624912262, 0.2328234314918518, 0.0017398587660863996, 0.10100623220205307, 0.0019695234950631857, 0.1674531251192093, 0.4513051509857178, 0.6547151803970337, 0.030009860172867775, 0.7025956511497498, 0.1685936599969864, 0.03178222477436066, 0.13270388543605804, 0.23426049947738647, 0.010277668945491314], [0.09463346004486084, 0.5257620811462402, 0.0045187450014054775, 0.07222570478916168, 0.0025188177824020386, 0.1410406231880188, 0.06597349792718887, 0.0719805508852005, 0.09957849979400635, 0.17567123472690582, 0.18618373572826385, 0.02195402979850769, 0.042485080659389496, 0.12470933794975281, 0.00617468124255538], [0.027796348556876183, 0.06599752604961395, 0.002643989399075508, 0.029425768181681633, 0.008861851878464222, 0.013279970735311508, 0.25377023220062256, 0.2656356692314148, 0.055540941655635834, 0.027583830058574677, 0.004816746339201927, 0.3890189528465271, 0.12020140886306763, 0.33882811665534973, 0.0040408894419670105], [0.4147956669330597, 0.5514373779296875, 0.09636387228965759, 0.29775112867355347, 0.03436855599284172, 0.08799602836370468, 0.07023341208696365, 0.10276275128126144, 0.25543972849845886, 0.10302554070949554, 0.05857125297188759, 0.029829595237970352, 0.114840567111969, 0.33078575134277344, 0.07371985912322998], [0.07031518220901489, 0.001305539975874126, 0.0025430582463741302, 0.010662226937711239, 0.0007357596186921, 0.000663888524286449, 0.0014398572966456413, 0.0005107407923787832, 0.005960140842944384, 0.0030986208003014326, 0.0017578504048287868, 0.00018377922242507339, 1.743367283779662e-05, 4.847845411859453e-05, 0.15638960897922516], [0.24421003460884094, 0.03331591188907623, 0.07573812454938889, 0.33240795135498047, 0.006838400848209858, 0.008697851561009884, 0.06428743898868561, 0.06466686725616455, 0.006176145281642675, 0.06394235789775848, 0.09260299056768417, 0.19959890842437744, 0.02154124155640602, 0.021672323346138, 0.15025706589221954], [0.5462155342102051, 0.545982301235199, 0.3341628611087799, 0.5788259506225586, 0.08809857815504074, 0.06356553733348846, 0.022417092695832253, 0.0164126455783844, 0.00386660173535347, 0.10154324769973755, 0.14015790820121765, 0.0864240974187851, 0.34186482429504395, 0.22899740934371948, 0.05407746881246567], [0.48888036608695984, 0.6578190326690674, 0.030819885432720184, 0.2205304652452469, 0.004883326590061188, 0.0656682699918747, 0.04461565986275673, 0.05094402655959129, 0.0005314986919984221, 0.15455113351345062, 0.10763049870729446, 0.1186080202460289, 0.14419804513454437, 0.1328149437904358, 0.09490374475717545], [0.15812784433364868, 0.9118645191192627, 0.022590545937418938, 0.05952226370573044, 0.00360964541323483, 0.07875056564807892, 0.013187792152166367, 0.02020449750125408, 0.0020393244922161102, 0.033818699419498444, 0.0449705570936203, 0.02132066898047924, 0.0717315599322319, 0.12101268768310547, 0.06353376060724258], [0.07771441340446472, 0.4748976230621338, 0.012594498693943024, 0.043653786182403564, 0.006564431358128786, 0.024485116824507713, 0.20463299751281738, 0.1550481915473938, 0.0016144687542691827, 0.005543926265090704, 0.0017496985383331776, 0.3491710126399994, 0.23835937678813934, 0.3316482901573181, 0.08539295196533203], [0.22228576242923737, 0.3581831455230713, 0.10504736006259918, 0.2062736451625824, 0.015430409461259842, 0.007369442842900753, 0.009848481975495815, 0.0027359407395124435, 0.003257193835452199, 0.004766176920384169, 0.0058546122163534164, 0.0040231142193078995, 0.032162997871637344, 0.05548902228474617, 0.22239458560943604], [0.040305208414793015, 0.0008039010572247207, 0.001399470493197441, 0.006614126265048981, 0.0003286598657723516, 0.0002559607964940369, 0.0005696980515494943, 0.00010972175368806347, 0.0006102611077949405, 0.0009710662416182458, 0.0004746906051877886, 5.0628168537514284e-05, 6.201828455232317e-06, 1.1841932064271532e-05, 0.15342259407043457], [0.18667390942573547, 0.05485990643501282, 0.06146723031997681, 0.2094709873199463, 0.003188095986843109, 0.005957009736448526, 0.04363764822483063, 0.02604665607213974, 0.0011390803847461939, 0.022857926785945892, 0.035827361047267914, 0.07732249796390533, 0.00673074834048748, 0.004807854071259499, 0.15350142121315002], [0.46625471115112305, 0.6644052863121033, 0.19963930547237396, 0.36004284024238586, 0.06144074350595474, 0.06362717598676682, 0.016601700335741043, 0.006137203890830278, 0.0020489897578954697, 0.041981395334005356, 0.042364589869976044, 0.04546959325671196, 0.25786423683166504, 0.1048446074128151, 0.10812478512525558], [0.01868601329624653, 0.08739857375621796, 0.016145089641213417, 0.000850466953124851, 0.0035631621722131968, 0.013478883542120457, 0.0006747889565303922, 0.0010685214074328542, 0.013735192827880383, 0.0029910006560385227, 0.017663421109318733, 0.0005569100612774491, 0.0335303470492363, 0.010939561761915684, 0.13854636251926422]], [[0.03039383515715599, 0.011264979839324951, 0.30973049998283386, 0.33407092094421387, 0.24303670227527618, 0.013086382299661636, 0.12547586858272552, 0.047571711242198944, 0.07738520950078964, 0.2579103410243988, 0.13098950684070587, 0.3019145727157593, 0.018321001902222633, 0.10478901118040085, 0.1313871294260025], [0.32489657402038574, 0.01967906951904297, 0.10292623937129974, 0.18745845556259155, 0.06220339238643646, 0.03126899152994156, 0.030121171846985817, 0.013807957991957664, 0.01960192248225212, 0.10352540761232376, 0.08122410625219345, 0.11610747873783112, 0.05098450556397438, 0.06022121384739876, 0.24838198721408844], [0.21547414362430573, 0.011987588368356228, 0.09540344774723053, 0.03949207067489624, 0.22973625361919403, 0.013393656350672245, 0.014646085910499096, 0.018391601741313934, 0.12483032047748566, 0.04761500656604767, 0.16838808357715607, 0.0500614158809185, 0.09093409031629562, 0.09172232449054718, 0.14920873939990997], [0.3455514907836914, 0.20528344810009003, 0.14200778305530548, 0.1397678107023239, 0.3345029056072235, 0.04282815381884575, 0.020769812166690826, 0.02952164225280285, 0.29125186800956726, 0.09975660592317581, 0.3298649489879608, 0.36294782161712646, 0.10288939625024796, 0.1784013956785202, 0.03550736606121063], [0.023072484880685806, 0.08888474851846695, 0.04328835755586624, 0.009794876910746098, 0.18984860181808472, 0.0009663040982559323, 0.0038235578685998917, 0.05101485177874565, 0.059323158115148544, 0.00876270979642868, 0.021391507238149643, 0.02426949329674244, 0.013026251457631588, 0.06840420514345169, 0.15691325068473816], [0.20066522061824799, 0.18445545434951782, 0.10427504032850266, 0.02148139849305153, 0.3108636438846588, 0.0010669901967048645, 0.031332992017269135, 0.06621930748224258, 0.42585986852645874, 0.05703788995742798, 0.1919325739145279, 0.6617251038551331, 0.07196007668972015, 0.2038833349943161, 0.13549473881721497], [0.06934618204832077, 0.15043997764587402, 0.24868465960025787, 0.0180400051176548, 0.61164391040802, 0.0047634197399020195, 0.0077652581967413425, 0.01316747348755598, 0.09036756306886673, 0.016214115545153618, 0.09484434872865677, 0.7773507833480835, 0.3649398386478424, 0.19880527257919312, 0.026039909571409225], [0.5420496463775635, 0.775536835193634, 0.21455605328083038, 0.17522192001342773, 0.3905614912509918, 0.07102629542350769, 0.15213513374328613, 0.06534071266651154, 0.05938922241330147, 0.3742612600326538, 0.040289394557476044, 0.6919643878936768, 0.07523911446332932, 0.14220400154590607, 0.06588775664567947], [0.05002814158797264, 0.18039211630821228, 0.4788157641887665, 0.0970841720700264, 0.5287489891052246, 0.07699278742074966, 0.024560611695051193, 0.055294524878263474, 0.031155720353126526, 0.029308732599020004, 0.023515479639172554, 0.10280930250883102, 0.01905171573162079, 0.033789344131946564, 0.006217750255018473], [0.2326076328754425, 0.12470381706953049, 0.5816100239753723, 0.187625452876091, 0.17989297211170197, 0.58512943983078, 0.4148763120174408, 0.7688660621643066, 0.02497384324669838, 0.10204316675662994, 0.16508084535598755, 0.4722842574119568, 0.654721736907959, 0.31103214621543884, 0.02808636985719204], [0.32085803151130676, 0.3732209801673889, 0.8471049070358276, 0.2474840134382248, 0.8311324715614319, 0.1531035155057907, 0.14141014218330383, 0.12460694462060928, 0.15561653673648834, 0.05888388305902481, 0.03703024983406067, 0.2600737512111664, 0.049645353108644485, 0.08333000540733337, 0.053744472563266754], [0.048572178930044174, 0.20163586735725403, 0.8568418025970459, 0.3438677489757538, 0.8764770030975342, 0.038519736379384995, 0.10765119642019272, 0.14438603818416595, 0.13915397226810455, 0.04139794409275055, 0.24816225469112396, 0.22188685834407806, 0.1582770049571991, 0.255889892578125, 0.05260627716779709], [0.10717450082302094, 0.14654512703418732, 0.5492125749588013, 0.149112731218338, 0.6473506689071655, 0.014123019762337208, 0.023513145744800568, 0.06304500997066498, 0.5243880152702332, 0.17494699358940125, 0.11734810471534729, 0.2534768283367157, 0.06080847606062889, 0.1781260073184967, 0.01657547615468502], [0.024022793397307396, 0.20128284394741058, 0.39493197202682495, 0.16542883217334747, 0.7724959254264832, 0.05353498458862305, 0.039175428450107574, 0.21511156857013702, 0.10924636572599411, 0.3127569556236267, 0.20907098054885864, 0.6610769033432007, 0.026550091803073883, 0.07443477213382721, 0.04747246578335762], [0.0639173686504364, 0.0019661476835608482, 0.03054100275039673, 0.07290788739919662, 0.07458660751581192, 0.0017515828367322683, 0.01338117104023695, 0.0049591753631830215, 0.10895326733589172, 0.03256915882229805, 0.07470867037773132, 0.022291045635938644, 0.00026081688702106476, 0.003768018214032054, 0.15579301118850708], [0.00809751357883215, 0.08670660853385925, 0.12165205925703049, 0.06173386052250862, 0.8110419511795044, 0.006245153024792671, 0.03447260707616806, 0.08050490915775299, 0.779870867729187, 0.2479465901851654, 0.38426774740219116, 0.6870184540748596, 0.2310730367898941, 0.07155610620975494, 0.05814361199736595], [0.01971210353076458, 0.10859540849924088, 0.17558348178863525, 0.04931360110640526, 0.4077165424823761, 0.001824796199798584, 0.004386546555906534, 0.0422598272562027, 0.9374924302101135, 0.3226373493671417, 0.06322266161441803, 0.05341457948088646, 0.0039883931167423725, 0.004304073750972748, 0.13460686802864075], [0.018049566075205803, 0.12295468151569366, 0.24470828473567963, 0.04122815281152725, 0.7332677245140076, 0.004472800530493259, 0.0029204280581325293, 0.018685931339859962, 0.4878760874271393, 0.20441682636737823, 0.08441592752933502, 0.4205068051815033, 0.04466289281845093, 0.13263334333896637, 0.0994158536195755], [0.007120466325432062, 0.02300306409597397, 0.2714575231075287, 0.07745856046676636, 0.6446666717529297, 0.0059507740661501884, 0.011145476251840591, 0.13244189321994781, 0.38060593605041504, 0.06726288050413132, 0.22673718631267548, 0.3522229492664337, 0.17927831411361694, 0.524927020072937, 0.09379637986421585], [0.03649899363517761, 0.08160936087369919, 0.2519805133342743, 0.07504414021968842, 0.1795702874660492, 0.006024391856044531, 0.0073743402026593685, 0.061968039721250534, 0.7520835995674133, 0.28517279028892517, 0.1493321657180786, 0.3589819371700287, 0.04636238142848015, 0.16408585011959076, 0.046330999583005905], [0.009416425600647926, 0.1558573991060257, 0.15325002372264862, 0.08311447501182556, 0.6221630573272705, 0.0029961667023599148, 0.006436231546103954, 0.027678541839122772, 0.2543543577194214, 0.47390833497047424, 0.28851544857025146, 0.6220062375068665, 0.014266690239310265, 0.05054754391312599, 0.0578170008957386], [0.04693470522761345, 0.0011674511479213834, 0.01364858541637659, 0.06039872020483017, 0.0427468940615654, 0.0009404723532497883, 0.007858873344957829, 0.0028007859364151955, 0.06382106244564056, 0.03982963413000107, 0.05175205320119858, 0.011254650540649891, 0.0001272865483770147, 0.001588277518749237, 0.15313954651355743], [0.017768997699022293, 0.1465732455253601, 0.15898801386356354, 0.12304693460464478, 0.8442554473876953, 0.006285809446126223, 0.04204265773296356, 0.12739135324954987, 0.8276333808898926, 0.5079721808433533, 0.5299316644668579, 0.8274551630020142, 0.09790517389774323, 0.02651425078511238, 0.11435628682374954], [0.017107579857110977, 0.05770094692707062, 0.07052541524171829, 0.059498131275177, 0.2613165080547333, 0.0009367912425659597, 0.0028308003675192595, 0.01869240775704384, 0.8671534061431885, 0.40041688084602356, 0.03947103023529053, 0.0349445715546608, 0.00177917187102139, 0.002164072822779417, 0.1562660187482834], [0.006599111016839743, 0.004138579126447439, 0.06047067046165466, 0.013185898773372173, 0.15347044169902802, 0.000755132467020303, 0.007522573694586754, 0.002741254400461912, 0.10833818465471268, 0.005474736914038658, 0.009540018625557423, 0.00040286476723849773, 0.004092549905180931, 0.002003892557695508, 0.13896189630031586]]], [[[0.010830877348780632, 0.011870973743498325, 0.10922139137983322, 0.013140714727342129, 0.060979437083005905, 0.24213501811027527, 0.056873127818107605, 0.0565403513610363, 0.1606917381286621, 0.004471848253160715, 0.04391508549451828, 0.16444265842437744, 0.14521700143814087, 0.12183647602796555, 0.18165212869644165], [0.1442122757434845, 0.026047294959425926, 0.4262431859970093, 0.3211715519428253, 0.7946609258651733, 0.48857852816581726, 0.31943926215171814, 0.3322535455226898, 0.8442224860191345, 0.37700119614601135, 0.4491288661956787, 0.725179135799408, 0.5425247550010681, 0.7077597379684448, 0.47353750467300415], [0.004308484960347414, 0.0038143862038850784, 0.01376394834369421, 0.007213444449007511, 0.0352218858897686, 0.009065943770110607, 0.00796457938849926, 0.009648038074374199, 0.012818497605621815, 0.005304576829075813, 0.00578665267676115, 0.025514552369713783, 0.003588201943784952, 0.005116589833050966, 0.1385156214237213], [0.37350767850875854, 0.33144617080688477, 0.1264321357011795, 0.21400198340415955, 0.32627996802330017, 0.09132378548383713, 0.05067773535847664, 0.05911920592188835, 0.47554144263267517, 0.5285797715187073, 0.055136121809482574, 0.07909779250621796, 0.0048016151413321495, 0.023815851658582687, 0.05086187273263931], [0.026979738846421242, 0.17144815623760223, 0.016802728176116943, 0.011190843768417835, 0.05719228833913803, 0.006600439548492432, 0.02541169337928295, 0.056367360055446625, 0.2566111385822296, 0.13847731053829193, 0.02390860766172409, 0.10821771621704102, 0.004193281754851341, 0.024024199694395065, 0.1485961675643921], [0.010539665818214417, 0.02736317366361618, 0.020729688927531242, 0.012272891588509083, 0.037458207458257675, 0.020133765414357185, 0.006475721951574087, 0.0135318823158741, 0.14018985629081726, 0.043190933763980865, 0.014518915675580502, 0.06027117371559143, 0.013409063220024109, 0.008036705665290356, 0.12864065170288086], [0.06693296134471893, 0.05517994612455368, 0.31718623638153076, 0.09396946430206299, 0.13595829904079437, 0.09244473278522491, 0.0043823812156915665, 0.004134675953537226, 0.9252469539642334, 0.10048755258321762, 0.12945091724395752, 0.21572811901569366, 0.034586720168590546, 0.0726432204246521, 0.04207848384976387], [0.07686225324869156, 0.019675375893712044, 0.2417416274547577, 0.08641211688518524, 0.27890217304229736, 0.038729339838027954, 0.01047417800873518, 0.015033761039376259, 0.4832261800765991, 0.05870191380381584, 0.2969569265842438, 0.6193534731864929, 0.12871475517749786, 0.22289764881134033, 0.5152896642684937], [0.27357029914855957, 0.46676310896873474, 0.3964380621910095, 0.19407758116722107, 0.11257106065750122, 0.014855606481432915, 0.047355495393276215, 0.03237777575850487, 0.3466991186141968, 0.3347361087799072, 0.40522828698158264, 0.5460160970687866, 0.16927282512187958, 0.30020883679389954, 0.04839835315942764], [0.03550037741661072, 0.12907657027244568, 0.07532694190740585, 0.016156595200300217, 0.003630127990618348, 0.01967703178524971, 0.04095811769366264, 0.0179570484906435, 0.39472800493240356, 0.07661326229572296, 0.4370958209037781, 0.4819755256175995, 0.022724222391843796, 0.033822834491729736, 0.04362141340970993], [0.021909046918153763, 0.030848275870084763, 0.046106528490781784, 0.06202828511595726, 0.0325893796980381, 0.03412875533103943, 0.03159455209970474, 0.053456224501132965, 0.16627800464630127, 0.058593228459358215, 0.13071225583553314, 0.20816291868686676, 0.06561117619276047, 0.04416830837726593, 0.03868245705962181], [0.012810717336833477, 0.0013835412682965398, 0.03224228695034981, 0.08643268793821335, 0.03331959247589111, 0.030278367921710014, 0.07819522172212601, 0.03789946064352989, 0.1521843820810318, 0.04584735259413719, 0.022775838151574135, 0.3594759702682495, 0.37505412101745605, 0.4203481376171112, 0.0833948627114296], [0.12084313482046127, 0.009313090704381466, 0.17649081349372864, 0.125856414437294, 0.03634244203567505, 0.028733352199196815, 0.006864639464765787, 0.002353896852582693, 0.16829386353492737, 0.1124483197927475, 0.061692144721746445, 0.19240431487560272, 0.09329058974981308, 0.18641597032546997, 0.018957242369651794], [0.026597192510962486, 0.005893908906728029, 0.12369649112224579, 0.06400194019079208, 0.07115989178419113, 0.0058293454349040985, 0.008344992063939571, 0.00957680307328701, 0.04244829714298248, 0.036994293332099915, 0.07189996540546417, 0.04466360807418823, 0.12661096453666687, 0.2742233872413635, 0.042464204132556915], [0.0012156351003795862, 0.0009695529006421566, 0.021633058786392212, 0.003243132960051298, 0.017804604023694992, 0.006560572423040867, 0.00960883591324091, 0.043045539408922195, 0.008467147126793861, 0.0006170565611682832, 0.0028031598776578903, 0.004630656447261572, 1.7895566998049617e-05, 0.00023196694382932037, 0.14134538173675537], [0.3736850321292877, 0.29077818989753723, 0.43184730410575867, 0.4823248088359833, 0.7379603385925293, 0.5093098282814026, 0.5006043910980225, 0.3135696351528168, 0.5183887481689453, 0.13794882595539093, 0.04961319640278816, 0.12779268622398376, 0.1589212864637375, 0.22346213459968567, 0.1422436237335205], [0.15325459837913513, 0.1614270806312561, 0.4186149537563324, 0.16462315618991852, 0.44647181034088135, 0.7114150524139404, 0.12785741686820984, 0.04132780805230141, 0.047578196972608566, 0.12349404394626617, 0.3133608400821686, 0.35326144099235535, 0.30924320220947266, 0.31196898221969604, 0.028064150363206863], [0.06399086862802505, 0.06306004524230957, 0.1948489397764206, 0.12845031917095184, 0.26295408606529236, 0.38098499178886414, 0.0839061513543129, 0.02110268920660019, 0.07144157588481903, 0.01679118163883686, 0.14834797382354736, 0.479995995759964, 0.24741992354393005, 0.2288939356803894, 0.04729384183883667], [0.041305530816316605, 0.00217662681825459, 0.29091107845306396, 0.12698692083358765, 0.3031243085861206, 0.1103614866733551, 0.14891935884952545, 0.018863126635551453, 0.033797744661569595, 0.008303376846015453, 0.009713392704725266, 0.31765925884246826, 0.4755025804042816, 0.4005468487739563, 0.10761724412441254], [0.4954506754875183, 0.04642331227660179, 0.603453516960144, 0.26468321681022644, 0.3210473358631134, 0.15078485012054443, 0.027168329805135727, 0.004181328695267439, 0.10826757550239563, 0.10845811665058136, 0.053085505962371826, 0.20335085690021515, 0.12072784453630447, 0.17107200622558594, 0.059424202889204025], [0.21408557891845703, 0.03960772231221199, 0.43507251143455505, 0.10961537808179855, 0.42240580916404724, 0.06637464463710785, 0.08428787440061569, 0.03856734186410904, 0.0027873425278812647, 0.012926235795021057, 0.019708000123500824, 0.017574653029441833, 0.10679914057254791, 0.20499441027641296, 0.14648839831352234], [0.002137779025360942, 0.0005492505733855069, 0.03787382319569588, 0.004300523083657026, 0.03090864233672619, 0.003432363970205188, 0.010591491125524044, 0.028211969882249832, 0.003533262060955167, 0.0003883022291120142, 0.0014010752784088254, 0.0010855919681489468, 8.133743904181756e-06, 7.628504681633785e-05, 0.13786831498146057], [0.39364972710609436, 0.15414100885391235, 0.5289453864097595, 0.2158767729997635, 0.8369554877281189, 0.5879349708557129, 0.29191306233406067, 0.1240038275718689, 0.0375535674393177, 0.006134674418717623, 0.003127586329355836, 0.02892274223268032, 0.023530103266239166, 0.026029296219348907, 0.16074688732624054], [0.2684386968612671, 0.29252222180366516, 0.6921796798706055, 0.1771971732378006, 0.6445736885070801, 0.7333542704582214, 0.14767038822174072, 0.04686985909938812, 0.030383678153157234, 0.06000908464193344, 0.1879548877477646, 0.5258318781852722, 0.3533342778682709, 0.3370157778263092, 0.05586722865700722], [0.0015460141003131866, 0.010688474401831627, 0.09971211850643158, 0.017146917060017586, 0.1899741291999817, 0.03437719866633415, 0.022833971306681633, 0.015900788828730583, 0.05731913447380066, 0.0008445536368526518, 0.0073861475102603436, 0.06343144923448563, 0.11084617674350739, 0.11975067108869553, 0.13715405762195587]], [[0.021257108077406883, 0.04756314679980278, 0.05559564009308815, 0.030912479385733604, 0.2625647187232971, 0.138688862323761, 0.027820995077490807, 0.05787678435444832, 0.3002224862575531, 0.018701573833823204, 0.027547171339392662, 0.19844435155391693, 0.1917300671339035, 0.07151354849338531, 0.16648255288600922], [0.4235764741897583, 0.10086580365896225, 0.07221788167953491, 0.13654322922229767, 0.04923773929476738, 0.06516944617033005, 0.07642015814781189, 0.147566020488739, 0.013325832784175873, 0.07923475652933121, 0.03588176146149635, 0.02368854358792305, 0.12847480177879333, 0.04384613409638405, 0.18713882565498352], [0.8895729184150696, 0.7431688904762268, 0.3041851818561554, 0.5492796897888184, 0.7013789415359497, 0.2035668045282364, 0.4541507959365845, 0.17740322649478912, 0.37418368458747864, 0.7257221937179565, 0.3302299678325653, 0.32646968960762024, 0.4535413682460785, 0.2710181474685669, 0.06444819271564484], [0.18918083608150482, 0.07354198396205902, 0.03709281235933304, 0.039312511682510376, 0.2119109183549881, 0.32255253195762634, 0.06547961384057999, 0.022612132132053375, 0.0069438498467206955, 0.04682554677128792, 0.04775600507855415, 0.10260774195194244, 0.060122229158878326, 0.07651683688163757, 0.11037445813417435], [0.05778415873646736, 0.1888784021139145, 0.12087801843881607, 0.08340981602668762, 0.2725185453891754, 0.956253707408905, 0.6455949544906616, 0.6532288789749146, 0.3585406243801117, 0.18532338738441467, 0.18782632052898407, 0.09142936766147614, 0.8097347617149353, 0.3558001220226288, 0.037162330001592636], [0.04896414652466774, 0.25620371103286743, 0.11985385417938232, 0.0157163105905056, 0.14219185709953308, 0.22957918047904968, 0.36173656582832336, 0.07001917064189911, 0.3676673173904419, 0.12105175852775574, 0.22853095829486847, 0.07480601221323013, 0.5630075335502625, 0.8219463229179382, 0.12425509095191956], [0.04714362695813179, 0.01630709134042263, 0.04501143842935562, 0.03696214035153389, 0.036871057003736496, 0.14248797297477722, 0.08399422466754913, 0.03027486614882946, 0.0030259382911026478, 0.019033554941415787, 0.2224818617105484, 0.033125121146440506, 0.02079186774790287, 0.04913722351193428, 0.46250322461128235], [0.033912286162376404, 0.0072718155570328236, 0.013269636780023575, 0.010754123330116272, 0.003932052757591009, 0.022333307191729546, 0.05135813727974892, 0.17082874476909637, 0.004249163903295994, 0.009168761782348156, 0.00692910747602582, 0.00042953240335918963, 0.008801857940852642, 0.008872170932590961, 0.02866899035871029], [0.026226887479424477, 0.006219716742634773, 0.016528652980923653, 0.019500089809298515, 0.009756595827639103, 0.01771577261388302, 0.10877248644828796, 0.07924166321754456, 0.026382839307188988, 0.007807224057614803, 0.018975039944052696, 0.009491248056292534, 0.042680755257606506, 0.025040525943040848, 0.31068748235702515], [0.0181743074208498, 0.0022439020685851574, 0.027739310637116432, 0.07926302403211594, 0.007397042121738195, 0.01831221394240856, 0.057637136429548264, 0.025927647948265076, 0.03431807458400726, 0.03189869597554207, 0.20874466001987457, 0.006929311901330948, 0.08810199052095413, 0.09789149463176727, 0.25120988488197327], [0.0006848929915577173, 0.00015734595945104957, 0.0022563491947948933, 0.00281638465821743, 0.00390908308327198, 0.012311742641031742, 0.006667551584541798, 0.010898235253989697, 0.18826207518577576, 0.0010989188449457288, 0.003811799455434084, 0.0007082286756485701, 0.0025871950201690197, 0.0005297476891428232, 0.004719105549156666], [0.008918036706745625, 0.01932302489876747, 0.1743663251399994, 0.04276113957166672, 0.17357498407363892, 0.05217360332608223, 0.01903947815299034, 0.006896412931382656, 0.02532179281115532, 0.019349897280335426, 0.14434273540973663, 0.2454780638217926, 0.06247624009847641, 0.03444024175405502, 0.2827233076095581], [0.014348846860229969, 0.006216275505721569, 0.06011093780398369, 0.05047134682536125, 0.013856974430382252, 0.08402124047279358, 0.0029483914840966463, 0.0018935499247163534, 0.004232283215969801, 0.022591279819607735, 0.34387707710266113, 0.06330335885286331, 0.20501238107681274, 0.1859048306941986, 0.0244001317769289], [0.016000788658857346, 0.003648907644674182, 0.07618206739425659, 0.26581478118896484, 0.00828572828322649, 0.01491115428507328, 0.006984202191233635, 0.00572665361687541, 0.007784067187458277, 0.03336494415998459, 0.19996345043182373, 0.0026567107997834682, 0.14645317196846008, 0.1677580624818802, 0.0739188864827156], [0.033913157880306244, 0.5720782279968262, 0.09820353239774704, 0.06329890340566635, 0.10058190673589706, 0.8026418685913086, 0.08380495011806488, 0.37448471784591675, 0.04885341227054596, 0.01422097533941269, 0.32552391290664673, 0.701602578163147, 0.9988673329353333, 0.9602208137512207, 0.015194611623883247], [0.01701497472822666, 0.004510161932557821, 0.04222021996974945, 0.131240576505661, 0.007172171492129564, 0.0009335885988548398, 0.0025300730485469103, 0.0012859954731538892, 0.013300590217113495, 0.05520036071538925, 0.2908037602901459, 0.0021335158962756395, 0.11976832151412964, 0.046004947274923325, 0.029495948925614357], [0.0007848403765819967, 0.002563882153481245, 0.003471110016107559, 0.009534057229757309, 0.012083875946700573, 0.006908607203513384, 0.0028729254845529795, 0.0018324146512895823, 0.009593485854566097, 0.008395246230065823, 0.009609236381947994, 0.05064208433032036, 0.00595981115475297, 0.002902570180594921, 0.2071433663368225], [0.008253121748566628, 0.01393465232104063, 0.03316362947225571, 0.045629892498254776, 0.015712177380919456, 0.15894818305969238, 0.02510240487754345, 0.013996893540024757, 0.6886083483695984, 0.014645315706729889, 0.04062162712216377, 0.02812274731695652, 0.10265076905488968, 0.10770027339458466, 0.07716524600982666], [0.0017006727866828442, 0.008613905869424343, 0.08540165424346924, 0.014788517728447914, 0.11802737414836884, 0.058780014514923096, 0.008085138164460659, 0.003584004705771804, 0.06396479159593582, 0.006658769678324461, 0.02042919024825096, 0.3806440234184265, 0.01375669613480568, 0.01512871216982603, 0.1676391214132309], [0.017164628952741623, 0.028738657012581825, 0.06823595613241196, 0.08604145050048828, 0.04855107143521309, 0.24198594689369202, 0.008688676171004772, 0.003311790293082595, 0.059665460139513016, 0.08214288204908371, 0.34741461277008057, 0.15404720604419708, 0.18822570145130157, 0.19501997530460358, 0.062469229102134705], [0.04490135982632637, 0.02318926900625229, 0.15967297554016113, 0.36984479427337646, 0.027114713564515114, 0.1867561787366867, 0.04668368771672249, 0.02171866036951542, 0.05653616786003113, 0.08818016946315765, 0.14142879843711853, 0.002535451203584671, 0.06232175603508949, 0.12099058926105499, 0.16113655269145966], [0.07898441702127457, 0.817236065864563, 0.29267793893814087, 0.16063392162322998, 0.31295838952064514, 0.9265751838684082, 0.1967003047466278, 0.5436303615570068, 0.2332589328289032, 0.04864489659667015, 0.5440958142280579, 0.8931991457939148, 0.9993566870689392, 0.9798612594604492, 0.03687797114253044], [0.051174335181713104, 0.009388554841279984, 0.15813162922859192, 0.3707107603549957, 0.02142486348748207, 0.01361497025936842, 0.01679075136780739, 0.00489152641966939, 0.08238242566585541, 0.07653495669364929, 0.14888693392276764, 0.003932347521185875, 0.1416105329990387, 0.05760091543197632, 0.13266737759113312], [0.00042274355655536056, 0.0019217034569010139, 0.0013128711143508554, 0.004135955590754747, 0.004101510625332594, 0.004091422073543072, 0.0013299065176397562, 0.0007323773461394012, 0.006002569571137428, 0.003528070170432329, 0.004258603788912296, 0.04385730251669884, 0.006557406857609749, 0.0025679266545921564, 0.1728060394525528], [0.0034927180968225002, 0.014745223335921764, 0.025302981957793236, 0.04650698974728584, 0.0658985823392868, 0.10278132557868958, 0.009682145901024342, 0.010841106064617634, 0.1757735013961792, 0.03157021477818489, 0.006062814965844154, 0.2611170709133148, 0.3153221011161804, 0.08490109443664551, 0.13624651730060577]], [[0.01888529770076275, 0.5547894835472107, 0.0062187607400119305, 0.02304725907742977, 0.007431741803884506, 0.05333258956670761, 0.13557927310466766, 0.09608769416809082, 0.011193820275366306, 0.006900292821228504, 0.007560353726148605, 0.018807610496878624, 0.018169475719332695, 0.07717052102088928, 0.1439915895462036], [0.045791856944561005, 0.14471176266670227, 0.057932548224925995, 0.15441685914993286, 0.011981116607785225, 0.030152589082717896, 0.13976308703422546, 0.003811573376879096, 0.010053272359073162, 0.1557283103466034, 0.05080341920256615, 0.00967743806540966, 0.003085661679506302, 0.003445286303758621, 0.08783376961946487], [0.010936958715319633, 0.0031021125614643097, 0.009866965003311634, 0.09017129242420197, 0.02775183692574501, 0.0016267865430563688, 0.01958146132528782, 0.003049993421882391, 0.009465858340263367, 0.022049162536859512, 0.013875926844775677, 0.002902107546105981, 0.0008567434852011502, 0.0034160439390689135, 0.13799139857292175], [0.10994840413331985, 0.15032780170440674, 0.0035718681756407022, 0.1491042822599411, 0.020450405776500702, 0.013510379940271378, 0.47067153453826904, 0.6447877883911133, 0.18023402988910675, 0.1876010298728943, 0.011866661719977856, 0.006677938625216484, 0.0005242988117970526, 0.004238110035657883, 0.29615819454193115], [0.06992093473672867, 0.2791251242160797, 0.006900451611727476, 0.053067900240421295, 0.010168666951358318, 0.0023874202743172646, 0.05137968435883522, 0.06462283432483673, 0.11192043125629425, 0.10690896213054657, 0.009735661558806896, 0.04335656389594078, 0.0031411510426551104, 0.011707558296620846, 0.14929862320423126], [0.24040630459785461, 0.43853774666786194, 0.0175826046615839, 0.06282828748226166, 0.03055599145591259, 0.20223812758922577, 0.5439046025276184, 0.8139520287513733, 0.30283859372138977, 0.4911571145057678, 0.09772597998380661, 0.1337594985961914, 0.08667796850204468, 0.03606351464986801, 0.12256386131048203], [0.03999294713139534, 0.1864590346813202, 0.003897173795849085, 0.04184543341398239, 0.0012414547381922603, 0.025941016152501106, 0.05348599702119827, 0.5434274673461914, 0.012460692785680294, 0.31306707859039307, 0.06930337846279144, 0.0021947044879198074, 0.023592861369252205, 0.04260588437318802, 0.01969532109797001], [0.053744781762361526, 0.006899113766849041, 0.0563664473593235, 0.12695427238941193, 0.012777185067534447, 0.08455551415681839, 0.11441048979759216, 0.13062608242034912, 0.19371363520622253, 0.6254263520240784, 0.24294114112854004, 0.020724456757307053, 0.019838949665427208, 0.022365091368556023, 0.1131007969379425], [0.11661048978567123, 0.35882315039634705, 0.03118491731584072, 0.06881216168403625, 0.014698721468448639, 0.0038598491810262203, 0.1485612690448761, 0.39066970348358154, 0.07792866975069046, 0.22571811079978943, 0.040231697261333466, 0.265895277261734, 0.2000368982553482, 0.1125464141368866, 0.24931347370147705], [0.03291217237710953, 0.23853188753128052, 0.04644821211695671, 0.031600918620824814, 0.045192934572696686, 0.0019951597787439823, 0.11113008856773376, 0.36339887976646423, 0.010439107194542885, 0.20188210904598236, 0.027288423851132393, 0.21054767072200775, 0.04143378138542175, 0.0853629931807518, 0.2336580902338028], [0.07334253191947937, 0.14656193554401398, 0.004660916980355978, 0.03353964164853096, 0.00998624786734581, 0.00235390174202621, 0.04832129552960396, 0.031250230967998505, 0.0017524310387670994, 0.10710166394710541, 0.04863408952951431, 0.11276239901781082, 0.00949337612837553, 0.024303043261170387, 0.5020502805709839], [0.15921767055988312, 0.18694822490215302, 0.011401425115764141, 0.15920288860797882, 0.0017978762043640018, 0.00600996520370245, 0.1401643455028534, 0.08585444837808609, 0.05989503860473633, 0.2726706564426422, 0.041456613689661026, 0.0019109381828457117, 0.0026012342423200607, 0.00675933575257659, 0.05683350935578346], [0.6248686909675598, 0.8166397213935852, 0.05456394702196121, 0.3034517765045166, 0.0032548136077821255, 0.03656908869743347, 0.3933179974555969, 0.635881781578064, 0.4090532660484314, 0.6309216618537903, 0.09238837659358978, 0.01225167978554964, 0.0038302247412502766, 0.05015851929783821, 0.4316881597042084], [0.6506885886192322, 0.26984432339668274, 0.19192098081111908, 0.45030322670936584, 0.018604522570967674, 0.06438936293125153, 0.16284945607185364, 0.46218666434288025, 0.2198290228843689, 0.6063108444213867, 0.13934792578220367, 0.19822801649570465, 0.009406321682035923, 0.07906869053840637, 0.39550670981407166], [0.6516265273094177, 0.3494286835193634, 0.13445304334163666, 0.40472084283828735, 0.05377691984176636, 0.043724507093429565, 0.6220480799674988, 0.09338771551847458, 0.1620686650276184, 0.8232020139694214, 0.17699383199214935, 0.03535428270697594, 4.775904380949214e-05, 0.000580178399104625, 0.13870029151439667], [0.40970566868782043, 0.3527304232120514, 0.004458754323422909, 0.09938450157642365, 0.006175781134516001, 0.014084810391068459, 0.22543573379516602, 0.4835565686225891, 0.025563040748238564, 0.39703506231307983, 0.00602720445021987, 0.0051488312892615795, 0.0008810341823846102, 0.0033910071942955256, 0.2277533859014511], [0.19487805664539337, 0.1991150975227356, 0.010765495710074902, 0.08231080323457718, 0.014791524969041348, 0.005413876846432686, 0.2905171811580658, 0.06453394889831543, 0.003980779554694891, 0.08378233760595322, 0.012941073626279831, 0.009292078204452991, 0.0008543379371985793, 0.002103410428389907, 0.1794004589319229], [0.12092277407646179, 0.17967110872268677, 0.0018819703254848719, 0.04615653306245804, 0.002711376640945673, 0.0007180452230386436, 0.10793514549732208, 0.09669310599565506, 0.0005949889309704304, 0.15432700514793396, 0.015202132984995842, 0.003636009059846401, 0.00047353014815598726, 0.0022874167189002037, 0.22825637459754944], [0.14498451352119446, 0.2535317540168762, 0.027076847851276398, 0.14632807672023773, 0.0057570356875658035, 0.011071202345192432, 0.31473973393440247, 0.2956455647945404, 0.07720959931612015, 0.1944134682416916, 0.008117430843412876, 0.0006636073812842369, 0.0008167477208189666, 0.0018315445631742477, 0.15913215279579163], [0.22215187549591064, 0.47823596000671387, 0.018273456022143364, 0.13293205201625824, 0.0049734353087842464, 0.0265207476913929, 0.27213141322135925, 0.33180302381515503, 0.1344960778951645, 0.335622638463974, 0.010143149644136429, 0.0012862810399383307, 0.00035499766818247736, 0.0037611438892781734, 0.27220219373703003], [0.3673586845397949, 0.057844266295433044, 0.06040150299668312, 0.09888742864131927, 0.023171812295913696, 0.05270017683506012, 0.11794743686914444, 0.1507657766342163, 0.008498218841850758, 0.09498187899589539, 0.003615680383518338, 0.010834122076630592, 0.00024780313833616674, 0.0017297717276960611, 0.20351538062095642], [0.6060628294944763, 0.1373525857925415, 0.13755829632282257, 0.4113396406173706, 0.07285188883543015, 0.014519162476062775, 0.5372579097747803, 0.0630655512213707, 0.14564833045005798, 0.695697009563446, 0.06662726402282715, 0.006644518580287695, 1.2849791346525308e-05, 0.00011718441965058446, 0.13694217801094055], [0.16518473625183105, 0.10184229910373688, 0.002064367523416877, 0.05309450253844261, 0.004080682527273893, 0.012669779360294342, 0.18988992273807526, 0.5354599356651306, 0.004024976398795843, 0.07357845455408096, 0.00022774768876843154, 0.00034433722612448037, 4.428778629517183e-05, 0.00011935137445107102, 0.17481543123722076], [0.060375016182661057, 0.09738604724407196, 0.004719918128103018, 0.05357348173856735, 0.007510221563279629, 0.002087255474179983, 0.1777726411819458, 0.04658319056034088, 0.0022654803469777107, 0.02657914347946644, 0.002838509390130639, 0.0023206211626529694, 0.00029234393150545657, 0.0006460589938797057, 0.15720529854297638], [0.006292517296969891, 0.056422796100378036, 0.003871192689985037, 0.016857203096151352, 0.0060961381532251835, 0.01021772250533104, 0.02558758109807968, 0.004345982801169157, 0.003136568469926715, 0.011386821046471596, 0.0007550015579909086, 0.014218548312783241, 0.002899263286963105, 0.00665974011644721, 0.1386014223098755]], [[0.19101674854755402, 0.0880991518497467, 0.25550922751426697, 0.3376496732234955, 0.25425824522972107, 0.2177356481552124, 0.35922226309776306, 0.13405567407608032, 0.2859460711479187, 0.47983312606811523, 0.235154390335083, 0.26708394289016724, 0.2646999657154083, 0.4890832304954529, 0.0349225178360939], [0.12788966298103333, 0.14897412061691284, 0.18708589673042297, 0.1539590060710907, 0.06750026345252991, 0.06459501385688782, 0.24742794036865234, 0.0008040289394557476, 0.08417094498872757, 0.08338519930839539, 0.09756942838430405, 0.05163748189806938, 0.06044981628656387, 0.1204136312007904, 0.005185095127671957], [0.00823432207107544, 0.006774595472961664, 0.011488616466522217, 0.031759701669216156, 0.014620696194469929, 0.015192853286862373, 0.015498323366045952, 0.001623230637051165, 0.04214249551296234, 0.022796856239438057, 0.0813785269856453, 0.058821164071559906, 0.018185952678322792, 0.030505431815981865, 0.13797427713871002], [0.07304069399833679, 0.17316529154777527, 0.0638275146484375, 0.06216027960181236, 0.10879980027675629, 0.2286580353975296, 0.12489848583936691, 0.06798849999904633, 0.12340370565652847, 0.11364749073982239, 0.33209869265556335, 0.7156579494476318, 0.917570948600769, 0.8780012726783752, 0.004697424825280905], [0.04041377454996109, 0.06032548099756241, 0.013153426349163055, 0.12010756880044937, 0.032379359006881714, 0.02533758245408535, 0.03651244193315506, 0.05168384686112404, 0.05184069648385048, 0.20407944917678833, 0.10554968565702438, 0.5571502447128296, 0.039276935160160065, 0.10380254685878754, 0.1458612084388733], [0.025283029302954674, 0.14580176770687103, 0.0262577123939991, 0.01834816485643387, 0.02426275424659252, 0.5010125637054443, 0.025797395035624504, 0.08120379596948624, 0.10846563428640366, 0.05807282403111458, 0.047331083565950394, 0.01890925131738186, 0.041984543204307556, 0.021773895248770714, 0.12734822928905487], [0.11099886894226074, 0.272359162569046, 0.07267793267965317, 0.02685651369392872, 0.04662291333079338, 0.6599292755126953, 0.15850403904914856, 0.1944371908903122, 0.02196124941110611, 0.18415939807891846, 0.2094753533601761, 0.11699666827917099, 0.8625363111495972, 0.6611498594284058, 0.034588079899549484], [0.10045554488897324, 0.003808635985478759, 0.012772331945598125, 0.008206314407289028, 0.016907531768083572, 0.2308196723461151, 0.04502535238862038, 0.16794730722904205, 0.14683513343334198, 0.07804886251688004, 0.12962646782398224, 0.03242946416139603, 0.45433515310287476, 0.3931583762168884, 0.023861808702349663], [0.020261207595467567, 0.011864200234413147, 0.013516101986169815, 0.00783876795321703, 0.006360001862049103, 0.5825139880180359, 0.27136117219924927, 0.28645893931388855, 0.002775657456368208, 0.05587191879749298, 0.01021821890026331, 0.03437367081642151, 0.37942126393318176, 0.11788230389356613, 0.047214996069669724], [0.3444993495941162, 0.4299255907535553, 0.3897337317466736, 0.11608962714672089, 0.07001375406980515, 0.1826992928981781, 0.3195875883102417, 0.1513850837945938, 0.014436168596148491, 0.25265297293663025, 0.18822813034057617, 0.20145024359226227, 0.648497998714447, 0.6856710314750671, 0.13566814363002777], [0.37375974655151367, 0.2605052888393402, 0.636468231678009, 0.14340142905712128, 0.5107957124710083, 0.683059811592102, 0.3617965579032898, 0.3775153160095215, 0.0734284520149231, 0.5245854258537292, 0.5329803228378296, 0.541839063167572, 0.8546188473701477, 0.8892531991004944, 0.08003345131874084], [0.1478864699602127, 0.26107946038246155, 0.2706110179424286, 0.022070137783885002, 0.08394861966371536, 0.7104908227920532, 0.22173403203487396, 0.18465854227542877, 0.3481738865375519, 0.02706378884613514, 0.14399166405200958, 0.24452990293502808, 0.3432118594646454, 0.3138853907585144, 0.0603480227291584], [0.03315366804599762, 0.109662726521492, 0.165960431098938, 0.03089676797389984, 0.00589095801115036, 0.7119044065475464, 0.04612211138010025, 0.03627030551433563, 0.019800378009676933, 0.02169116772711277, 0.07954178750514984, 0.014483828097581863, 0.3210127055644989, 0.25073835253715515, 0.021559905260801315], [0.1801593005657196, 0.7095129489898682, 0.41699883341789246, 0.14223065972328186, 0.03218872845172882, 0.8857168555259705, 0.325775682926178, 0.46090880036354065, 0.31827157735824585, 0.19596631824970245, 0.36584827303886414, 0.568932831287384, 0.05918605625629425, 0.12899020314216614, 0.03239220380783081], [0.15587098896503448, 0.007851594127714634, 0.38951343297958374, 0.26023998856544495, 0.2678505480289459, 0.04164084047079086, 0.060063086450099945, 0.06729273498058319, 0.019880756735801697, 0.0442759171128273, 0.10040930658578873, 0.1083277016878128, 0.0003995952138211578, 0.001039322349242866, 0.14095477759838104], [0.08899319916963577, 0.2356371134519577, 0.40766164660453796, 0.08200893551111221, 0.14033742249011993, 0.12043434381484985, 0.050508081912994385, 0.04391980916261673, 0.2084629088640213, 0.07807423919439316, 0.06514080613851547, 0.6571899652481079, 0.6522034406661987, 0.4899447560310364, 0.0237458273768425], [0.3269592225551605, 0.23715397715568542, 0.21103474497795105, 0.29856637120246887, 0.031984660774469376, 0.019636303186416626, 0.2648169696331024, 0.0041971527971327305, 0.6909844875335693, 0.5414000153541565, 0.4092715382575989, 0.02185220457613468, 0.006548420060425997, 0.013211028650403023, 0.06752441078424454], [0.40959432721138, 0.2696213126182556, 0.4055677354335785, 0.265968382358551, 0.12281941622495651, 0.10883577167987823, 0.16766701638698578, 0.053767129778862, 0.028326192870736122, 0.5353591442108154, 0.3247348368167877, 0.03339260071516037, 0.1199125200510025, 0.14055927097797394, 0.07849014550447464], [0.0703776553273201, 0.17115768790245056, 0.14820680022239685, 0.014450321905314922, 0.036940984427928925, 0.4336852431297302, 0.18269671499729156, 0.1382565200328827, 0.5314536690711975, 0.05019254609942436, 0.11642822623252869, 0.17526941001415253, 0.3684784173965454, 0.3591882586479187, 0.09016428142786026], [0.020959746092557907, 0.2473447471857071, 0.04995026811957359, 0.032434724271297455, 0.004538285546004772, 0.38885483145713806, 0.04268676042556763, 0.035024866461753845, 0.14864443242549896, 0.14174208045005798, 0.13687251508235931, 0.021197974681854248, 0.4566997289657593, 0.37854352593421936, 0.051512595266103745], [0.11558277904987335, 0.8023946285247803, 0.11340320110321045, 0.07801315933465958, 0.012690390460193157, 0.363363116979599, 0.22989940643310547, 0.28700947761535645, 0.3164795935153961, 0.28987860679626465, 0.20186272263526917, 0.5113669037818909, 0.04614659398794174, 0.13675883412361145, 0.05756649002432823], [0.13439694046974182, 0.004173143766820431, 0.22800596058368683, 0.19857077300548553, 0.1396344006061554, 0.007145485375076532, 0.03306930512189865, 0.026599518954753876, 0.02599666267633438, 0.04890456795692444, 0.0713912844657898, 0.040079280734062195, 0.00020046728604938835, 0.0004629320465028286, 0.13767622411251068], [0.21178027987480164, 0.5613860487937927, 0.18598653376102448, 0.13814353942871094, 0.06437420845031738, 0.1469835489988327, 0.09205848723649979, 0.07043211162090302, 0.3314816355705261, 0.1618121713399887, 0.0553976409137249, 0.7871544361114502, 0.7398563027381897, 0.533365786075592, 0.06109875440597534], [0.308572918176651, 0.1810312271118164, 0.10904403775930405, 0.38784971833229065, 0.013434378430247307, 0.011286276392638683, 0.26633715629577637, 0.0027595413848757744, 0.7609409689903259, 0.7608016729354858, 0.6143397688865662, 0.036307673901319504, 0.013564765453338623, 0.02826162986457348, 0.07738469541072845], [0.1500416249036789, 0.027276279404759407, 0.32022449374198914, 0.45847558975219727, 0.23693141341209412, 0.1596660166978836, 0.2821829915046692, 0.005833256058394909, 0.32143598794937134, 0.14477354288101196, 0.029714325442910194, 0.15291856229305267, 0.007731991354376078, 0.029727784916758537, 0.12283544987440109]], [[0.2602275013923645, 0.0514441579580307, 0.4731021821498871, 0.5077798962593079, 0.22717851400375366, 0.04740440100431442, 0.27564913034439087, 0.24302659928798676, 0.05887439846992493, 0.3509802222251892, 0.6124410033226013, 0.11394976824522018, 0.0489780493080616, 0.04593530669808388, 0.01042554248124361], [0.032066281884908676, 0.1349876970052719, 0.04647025838494301, 0.02243492752313614, 0.02574889175593853, 0.03298051655292511, 0.026965852826833725, 0.3248708248138428, 0.005728535819798708, 0.08351098001003265, 0.1499667763710022, 0.16844461858272552, 0.05473209172487259, 0.05656114220619202, 0.10718395560979843], [0.005181984044611454, 0.0008690498070791364, 0.00864254217594862, 0.00306740403175354, 0.10709173232316971, 0.0007182863773778081, 0.004329775460064411, 0.010956686921417713, 0.06760676205158234, 0.010445973835885525, 0.012115269899368286, 0.06696799397468567, 0.0054829977452754974, 0.025371035560965538, 0.13854098320007324], [0.03556624799966812, 0.11754146218299866, 0.010577056556940079, 0.008073115721344948, 0.06965696066617966, 0.0032990325707942247, 0.011276635341346264, 0.09485359489917755, 0.10517128556966782, 0.0125450249761343, 0.007751243654638529, 0.0650070384144783, 0.0006160335033200681, 0.002038064645603299, 0.4774436056613922], [0.13858208060264587, 0.06875398755073547, 0.01532802265137434, 0.10744626820087433, 0.18273182213306427, 0.002165634883567691, 0.069672591984272, 0.11672408878803253, 0.005795653443783522, 0.0880894884467125, 0.05771886929869652, 0.025581423193216324, 0.03904194384813309, 0.07354751974344254, 0.14365413784980774], [0.16291819512844086, 0.050931405276060104, 0.14806726574897766, 0.2683573365211487, 0.2810481786727905, 0.002092417562380433, 0.012745368294417858, 0.01212888304144144, 0.014305775985121727, 0.17753903567790985, 0.1299620419740677, 0.10299177467823029, 0.21836693584918976, 0.06576120108366013, 0.12406044453382492], [0.12156791239976883, 0.39120492339134216, 0.1209033653140068, 0.08395244181156158, 0.29989197850227356, 0.044024936854839325, 0.023133939132094383, 0.05934688448905945, 0.02561376802623272, 0.024757277220487595, 0.04535222053527832, 0.11912120133638382, 0.02126661129295826, 0.03811139240860939, 0.248785600066185], [0.106705442070961, 0.8169862627983093, 0.1967339813709259, 0.01375850010663271, 0.13418887555599213, 0.16134029626846313, 0.005958847235888243, 0.09247319400310516, 0.04806499928236008, 0.025876127183437347, 0.08311128616333008, 0.22926460206508636, 0.05653654783964157, 0.04726153612136841, 0.20836575329303741], [0.04722486063838005, 0.04722658172249794, 0.05176655203104019, 0.00462702801451087, 0.20528024435043335, 0.0011717488523572683, 0.004415996838361025, 0.014451048336923122, 0.028127426281571388, 0.007240481209009886, 0.004411954898387194, 0.10081291943788528, 0.07703132927417755, 0.033158108592033386, 0.21852079033851624], [0.032722555100917816, 0.027063244953751564, 0.014943713322281837, 0.0013555125333368778, 0.016471203416585922, 0.005467826500535011, 0.02999643050134182, 0.014794600196182728, 0.03837134689092636, 0.004397213459014893, 0.01024235412478447, 0.04855721816420555, 0.05723624676465988, 0.051476139575242996, 0.2643129825592041], [0.052069392055273056, 0.003948261961340904, 0.01313212513923645, 0.010319330729544163, 0.04011767730116844, 0.00066552241332829, 0.01502715889364481, 0.007099903654307127, 0.16779832541942596, 0.03226454555988312, 0.052614975720644, 0.014822165481746197, 0.002071568975225091, 0.001763610984198749, 0.05304422974586487], [0.022045070305466652, 0.036587294191122055, 0.06798984855413437, 0.040110163390636444, 0.5405737161636353, 0.015278805047273636, 0.02948732301592827, 0.034845639020204544, 0.27487096190452576, 0.008005083538591862, 0.012681123800575733, 0.10707750916481018, 0.02124345488846302, 0.00868641585111618, 0.4183328449726105], [0.07479816675186157, 0.018890362232923508, 0.2873721718788147, 0.028116360306739807, 0.7967413067817688, 0.008446138352155685, 0.020726248621940613, 0.018564706668257713, 0.33813604712486267, 0.003492887830361724, 0.010393181815743446, 0.18903475999832153, 0.00443642633035779, 0.0231452826410532, 0.42231008410453796], [0.07108656316995621, 0.0021144712809473276, 0.0671088695526123, 0.03148089721798897, 0.7113023400306702, 0.006737539079040289, 0.2500847280025482, 0.023258471861481667, 0.23158760368824005, 0.011219021864235401, 0.04227704927325249, 0.03650788217782974, 0.15078191459178925, 0.09633734077215195, 0.15066072344779968], [0.04487757384777069, 0.009540342725813389, 0.2420971691608429, 0.01275626104325056, 0.3918483257293701, 0.0218670591711998, 0.022137846797704697, 0.08132637292146683, 0.11900310963392258, 0.000993919325992465, 0.03630243241786957, 0.087126724421978, 0.0003738462692126632, 0.02454514056444168, 0.14072805643081665], [0.0048965876922011375, 0.019337626174092293, 0.002879639156162739, 0.0027576948050409555, 0.04260760545730591, 0.003218113211914897, 0.003307115286588669, 0.026640478521585464, 0.011750566773116589, 0.0005104524316266179, 9.575913281878456e-05, 0.057879798114299774, 0.004244217649102211, 0.00609983503818512, 0.28528884053230286], [0.0335795059800148, 0.030716734007000923, 0.023829646408557892, 0.03415534272789955, 0.08875380456447601, 0.0019310596399009228, 0.017619425430893898, 0.012105603702366352, 0.002468202030286193, 0.010380377061665058, 0.01267782598733902, 0.10606792569160461, 0.0014069904573261738, 0.0004161447286605835, 0.19442977011203766], [0.17404082417488098, 0.05758971348404884, 0.12847737967967987, 0.07598815858364105, 0.49957963824272156, 0.003085564589127898, 0.05114232748746872, 0.011464038863778114, 0.06926580518484116, 0.06844814121723175, 0.06813240051269531, 0.08604259043931961, 0.004740274045616388, 0.009239559061825275, 0.19994765520095825], [0.011875619180500507, 0.026503771543502808, 0.054018229246139526, 0.01668175496160984, 0.3499281406402588, 0.01803278550505638, 0.01878167688846588, 0.01221490278840065, 0.15005004405975342, 0.0046301730908453465, 0.005843435879796743, 0.032064031809568405, 0.010490885935723782, 0.00555034726858139, 0.27147379517555237], [0.0646943747997284, 0.047236885875463486, 0.11903148144483566, 0.02203843556344509, 0.4764179587364197, 0.008550588972866535, 0.013687309809029102, 0.008890991099178791, 0.32491248846054077, 0.011557912454009056, 0.009869826957583427, 0.0921611338853836, 0.0031256151851266623, 0.016340140253305435, 0.3438139855861664], [0.17560914158821106, 0.007353567518293858, 0.056802812963724136, 0.032415200024843216, 0.4015137553215027, 0.02137722261250019, 0.35710790753364563, 0.018633568659424782, 0.05862341821193695, 0.02506905421614647, 0.018169963732361794, 0.009134531952440739, 0.07779684662818909, 0.07867905497550964, 0.1750962883234024], [0.05210466682910919, 0.006375414319336414, 0.22638031840324402, 0.012961659580469131, 0.3225522041320801, 0.012402641586959362, 0.024030247703194618, 0.056293144822120667, 0.11919546872377396, 0.0012290689628571272, 0.027758106589317322, 0.025181178003549576, 0.00022994892788119614, 0.012616506777703762, 0.1375768631696701], [0.005459210369735956, 0.03143180534243584, 0.0014205367770045996, 0.0012642937945201993, 0.01687682792544365, 0.007108580321073532, 0.004234722815454006, 0.017920657992362976, 0.003724986221641302, 0.0002761750074569136, 2.4563792976550758e-05, 0.011889445595443249, 0.0013067404506728053, 0.002636768389493227, 0.19040453433990479], [0.031027475371956825, 0.05656901001930237, 0.0113890515640378, 0.024300340563058853, 0.03550150617957115, 0.0024159413296729326, 0.02035972848534584, 0.01581081561744213, 0.002032301388680935, 0.009238713420927525, 0.01651322841644287, 0.11367840319871902, 0.003108791308477521, 0.00086622079834342, 0.16520220041275024], [0.7154905796051025, 0.15825338661670685, 0.49722805619239807, 0.38231807947158813, 0.39668020606040955, 0.051081933081150055, 0.4188354015350342, 0.3623049259185791, 0.3077245056629181, 0.4494604766368866, 0.7933229804039001, 0.20231026411056519, 0.27286192774772644, 0.2623305022716522, 0.06808917224407196]], [[0.437301367521286, 0.15179137885570526, 0.09085877984762192, 0.06997784972190857, 0.17732757329940796, 0.23180970549583435, 0.11514479666948318, 0.32073739171028137, 0.15501314401626587, 0.1294255405664444, 0.06762269139289856, 0.21488851308822632, 0.2614101469516754, 0.12734454870224, 0.049641113728284836], [0.028495818376541138, 0.1544514149427414, 0.06366834789514542, 0.016971074044704437, 0.02302762120962143, 0.054101087152957916, 0.012630121782422066, 0.018889501690864563, 0.004939573351293802, 0.01251249760389328, 0.1164683923125267, 0.009905983693897724, 0.01818472519516945, 0.01017050538212061, 0.04256897792220116], [0.007633751258254051, 0.002589557319879532, 0.02251260355114937, 0.05040144920349121, 0.032673582434654236, 0.0022981506772339344, 0.00627527991309762, 0.0006094649434089661, 0.01362280547618866, 0.006205975078046322, 0.006417383905500174, 0.0010467394022271037, 0.0010408272501081228, 0.007578521966934204, 0.13823428750038147], [0.0074798669666051865, 0.011802621185779572, 0.3115181624889374, 0.22458955645561218, 0.10706131160259247, 0.016402821987867355, 0.046956516802310944, 0.004200803115963936, 0.01468481682240963, 0.014471452683210373, 0.27619558572769165, 0.0038709931541234255, 0.00034889893140643835, 0.0020716534927487373, 0.01783183217048645], [0.015254770405590534, 0.01172303594648838, 0.002065492793917656, 0.005149758420884609, 0.013159574940800667, 0.001197350095026195, 0.018971139565110207, 0.004385960288345814, 0.06813318282365799, 0.021520443260669708, 0.005575989838689566, 0.001505104242824018, 0.0019181625684723258, 0.005167691968381405, 0.15193934738636017], [0.026872141286730766, 0.003412047168239951, 0.03895608335733414, 0.03612855076789856, 0.02536499686539173, 0.03102046251296997, 0.004315483849495649, 0.0027427596505731344, 0.03512648865580559, 0.022632958367466927, 0.05171700567007065, 0.0026941397227346897, 0.0031264815479516983, 0.024213580414652824, 0.12838274240493774], [0.0600903183221817, 0.002928798785433173, 0.0064612883143126965, 0.05414368212223053, 0.029363246634602547, 0.006244697142392397, 0.397325724363327, 0.040878646075725555, 0.005305922590196133, 0.27715954184532166, 0.04618077725172043, 0.008418801240622997, 0.01155431941151619, 0.05281350389122963, 0.025860372930765152], [0.0013151391176506877, 0.002262294292449951, 0.0012738551013171673, 0.0034272209741175175, 0.0030726443510502577, 0.04279911145567894, 0.008567760698497295, 0.17885291576385498, 0.00929640606045723, 0.001624501310288906, 0.02533317357301712, 0.005113683640956879, 0.027247918769717216, 0.07258909195661545, 0.014188846573233604], [0.3408622145652771, 0.07445694506168365, 0.03113507851958275, 0.0754152163863182, 0.014415460638701916, 0.002693483140319586, 0.09953030943870544, 0.11086118221282959, 0.5124953985214233, 0.329039990901947, 0.5092117786407471, 0.027396254241466522, 0.055544231086969376, 0.4057520925998688, 0.09588415175676346], [0.09238530695438385, 0.007053247652947903, 0.0017291916301473975, 0.005093103274703026, 0.0007437380263581872, 0.0014228186337277293, 0.02520381473004818, 0.019087698310613632, 0.47848576307296753, 0.29748132824897766, 0.057576071470975876, 0.01139640249311924, 0.004621520172804594, 0.02937469258904457, 0.015335291624069214], [0.0720675140619278, 0.012255199253559113, 0.04221949726343155, 0.09128241240978241, 0.009349699132144451, 0.008273615501821041, 0.014371694065630436, 0.01100369542837143, 0.1737149953842163, 0.16746114194393158, 0.1696900725364685, 0.014558696188032627, 0.01365632750093937, 0.0269284937530756, 0.016150163486599922], [0.052127860486507416, 0.0038822691421955824, 0.01307338010519743, 0.12611117959022522, 0.013002983294427395, 0.054914653301239014, 0.022843925282359123, 0.0017219025176018476, 0.025739489123225212, 0.3090609014034271, 0.10414470732212067, 0.006550551857799292, 0.006861968897283077, 0.010005415417253971, 0.011784915812313557], [0.074305959045887, 0.010457544587552547, 0.07050318270921707, 0.4022633135318756, 0.04945780336856842, 0.04771194979548454, 0.4660364091396332, 0.07594453543424606, 0.018491366878151894, 0.1513216346502304, 0.09796185791492462, 0.23858080804347992, 0.011272062547504902, 0.09385059028863907, 0.06640274822711945], [0.025815313681960106, 0.0033349080476909876, 0.00924734864383936, 0.012487816624343395, 0.03726305067539215, 0.016575457528233528, 0.23753590881824493, 0.025156090036034584, 0.11919926106929779, 0.04390435293316841, 0.0095932362601161, 0.04137176275253296, 0.08216788619756699, 0.1757660061120987, 0.30195334553718567], [0.05659867450594902, 0.020075146108865738, 0.01205957867205143, 0.004331792704761028, 0.052221644669771194, 0.0230423454195261, 0.0683140978217125, 0.09752152115106583, 0.2100839763879776, 0.0003861601871903986, 0.0032946986611932516, 0.0004593236662913114, 5.027504084864631e-05, 0.0022022551856935024, 0.14128009974956512], [0.08638240396976471, 0.0710444375872612, 0.06771891564130783, 0.17398057878017426, 0.05179189518094063, 0.34193578362464905, 0.2095513492822647, 0.09331211447715759, 0.052257001399993896, 0.006232596468180418, 0.002646914916113019, 0.06318453699350357, 0.019070196896791458, 0.02972061187028885, 0.2659039795398712], [0.26895081996917725, 0.1478959172964096, 0.3258365988731384, 0.404258131980896, 0.3733697533607483, 0.19055484235286713, 0.19857566058635712, 0.01781378500163555, 0.07512970268726349, 0.11693259328603745, 0.1175057590007782, 0.24425068497657776, 0.20241285860538483, 0.2411348670721054, 0.06638508290052414], [0.17850612103939056, 0.12822727859020233, 0.17801056802272797, 0.28459492325782776, 0.058830633759498596, 0.03884930908679962, 0.3513718843460083, 0.061017971485853195, 0.06718380004167557, 0.071348175406456, 0.23821549117565155, 0.03658399358391762, 0.03897847980260849, 0.20709341764450073, 0.13892877101898193], [0.4637373983860016, 0.04377487301826477, 0.15646661818027496, 0.36986854672431946, 0.09056738018989563, 0.23626187443733215, 0.11398540437221527, 0.0026716177817434072, 0.006399102043360472, 0.2626173198223114, 0.20860937237739563, 0.01349638868123293, 0.014208723790943623, 0.042171213775873184, 0.08208009600639343], [0.13806220889091492, 0.04062362387776375, 0.09515099227428436, 0.37904345989227295, 0.10653041303157806, 0.052835192531347275, 0.5728973150253296, 0.03487204387784004, 0.0029783223289996386, 0.07966885715723038, 0.03475099802017212, 0.13843636214733124, 0.006917618680745363, 0.06183210015296936, 0.1688811033964157], [0.02612869068980217, 0.003477374091744423, 0.007765303365886211, 0.0023155075032263994, 0.018893033266067505, 0.022398637607693672, 0.09549611806869507, 0.004012360703200102, 0.0013466936070472002, 0.0021441734861582518, 0.0004924506065435708, 0.006835760548710823, 0.011635211296379566, 0.023846328258514404, 0.22376547753810883], [0.08347997069358826, 0.014491320587694645, 0.015744350850582123, 0.0043899440206587315, 0.05038629099726677, 0.008546282537281513, 0.06458569318056107, 0.03869106248021126, 0.0615551732480526, 0.0002168803766835481, 0.0014501431724056602, 0.00013847390073351562, 1.5032101146061905e-05, 0.0007368824444711208, 0.13783538341522217], [0.072405144572258, 0.036094967275857925, 0.060353852808475494, 0.1382489949464798, 0.03810955956578255, 0.1803218573331833, 0.3716851472854614, 0.04992733895778656, 0.002898369450122118, 0.0008571037324145436, 0.00035707451752386987, 0.02692999318242073, 0.003073085332289338, 0.009645520709455013, 0.17640869319438934], [0.30767515301704407, 0.17313888669013977, 0.17682777345180511, 0.3453424274921417, 0.2732711434364319, 0.18888972699642181, 0.2821650207042694, 0.011036374606192112, 0.013345124199986458, 0.030917862430214882, 0.037141598761081696, 0.14430613815784454, 0.09504004567861557, 0.16429893672466278, 0.0962204858660698], [0.038221023976802826, 0.4632723033428192, 0.022520000115036964, 0.005303966347128153, 0.07163825631141663, 0.030774233862757683, 0.006099082063883543, 0.008936556056141853, 0.02098681591451168, 0.004558844491839409, 0.0029896388296037912, 0.018592750653624535, 0.20478543639183044, 0.08578886091709137, 0.1358346790075302]], [[0.04784957319498062, 0.004609245341271162, 0.006819143425673246, 0.0166594497859478, 0.006965316366404295, 0.000989345251582563, 0.006434451788663864, 0.005414100829511881, 0.027048002928495407, 0.008730669505894184, 0.003844247665256262, 0.0032386775128543377, 0.00916406698524952, 0.02474893629550934, 0.20862001180648804], [0.07474544644355774, 0.14463284611701965, 0.06348620355129242, 0.11649901419878006, 0.010943777859210968, 0.05790672451257706, 0.023460205644369125, 0.09132371097803116, 0.013804412446916103, 0.11923354864120483, 0.04609918221831322, 0.0031168698333203793, 0.02482042834162712, 0.018085025250911713, 0.06715727597475052], [0.07159372419118881, 0.23599489033222198, 0.6269188523292542, 0.2670744061470032, 0.07840307801961899, 0.7659233808517456, 0.4897821247577667, 0.7919513583183289, 0.47275444865226746, 0.20698092877864838, 0.5493778586387634, 0.516223669052124, 0.5164197683334351, 0.6560667753219604, 0.10535097867250443], [0.030506769195199013, 0.030577607452869415, 0.37364113330841064, 0.17907775938510895, 0.011576596647500992, 0.0018289608415216208, 0.0013806972419843078, 0.0006740305689163506, 0.006688407156616449, 0.02554805763065815, 0.1984224021434784, 0.0020999175030738115, 0.0001219362675328739, 0.0009508132934570312, 0.00851912796497345], [0.6425503492355347, 0.21330313384532928, 0.8213226199150085, 0.6104346513748169, 0.4307103455066681, 0.005470798350870609, 0.1284545361995697, 0.017213305458426476, 0.14068865776062012, 0.2507726550102234, 0.6069697737693787, 0.17266355454921722, 0.10257546603679657, 0.4255537688732147, 0.07138645648956299], [0.4833258390426636, 0.07765677571296692, 0.6261626482009888, 0.5845412611961365, 0.457427054643631, 0.012895571999251842, 0.037013884633779526, 0.0045295762829482555, 0.030468540266156197, 0.08583686500787735, 0.4300892949104309, 0.6064226627349854, 0.07339996099472046, 0.02218388393521309, 0.11548874527215958], [0.47047996520996094, 0.06838852912187576, 0.42273014783859253, 0.6319702863693237, 0.4177776277065277, 0.0021309976000338793, 0.00800495408475399, 0.0009326375438831747, 0.00536699453368783, 0.07440605759620667, 0.2710660994052887, 0.5013447999954224, 0.021646764129400253, 0.07749785482883453, 0.039263706654310226], [0.5323148965835571, 0.13256511092185974, 0.352451890707016, 0.6556484699249268, 0.4897412359714508, 0.22345507144927979, 0.17913641035556793, 0.12689323723316193, 0.025374194607138634, 0.169284388422966, 0.17072416841983795, 0.08815333992242813, 0.10821512341499329, 0.18704712390899658, 0.05398408696055412], [0.14081209897994995, 0.02785991132259369, 0.37397870421409607, 0.3742114305496216, 0.4757237732410431, 0.0011322007048875093, 0.0019287536852061749, 0.00011125820310553536, 0.00032575102522969246, 0.0042410544119775295, 0.007025705184787512, 0.007957610301673412, 0.0022035131696611643, 0.0008391661685891449, 0.0013405061326920986], [0.17781563103199005, 0.10205524414777756, 0.04494810104370117, 0.011432765983045101, 0.0031803075689822435, 0.6873405575752258, 0.1935015618801117, 0.2538544535636902, 0.0006125010550022125, 0.0012519293231889606, 0.0009674279135651886, 0.0007319907890632749, 0.006560447160154581, 0.0005926102166995406, 0.045413821935653687], [0.24551935493946075, 0.010881111957132816, 0.16116493940353394, 0.28567203879356384, 0.017490731552243233, 0.03198051080107689, 0.25225502252578735, 0.04009091481566429, 0.1379493623971939, 0.030329206958413124, 0.00725751556456089, 0.0005535308737307787, 0.0001769027003319934, 0.0002177381538785994, 0.11288075149059296], [0.2663186192512512, 0.0841110497713089, 0.39283427596092224, 0.3631373345851898, 0.12446267902851105, 0.0023146900348365307, 0.05166012421250343, 0.025394057855010033, 0.09723125398159027, 0.2633029520511627, 0.09458169341087341, 0.0066002910025417805, 0.0024958536960184574, 0.0033851033076643944, 0.0521465502679348], [0.032533496618270874, 0.005542360246181488, 0.14801643788814545, 0.028237437829375267, 0.09192534536123276, 0.002004631096497178, 0.0014868990983814, 0.0018816014053300023, 0.026168106123805046, 0.03666744753718376, 0.2621643543243408, 0.27366670966148376, 0.011460919864475727, 0.012693443335592747, 0.006134080700576305], [0.028670914471149445, 0.004855436272919178, 0.1069486141204834, 0.02764085866510868, 0.11977140605449677, 0.002686614403501153, 0.007388734724372625, 0.00704799173399806, 0.05677136406302452, 0.0688808336853981, 0.16234178841114044, 0.10548661649227142, 0.1935848444700241, 0.06036479026079178, 0.0025575226172804832], [0.04708265885710716, 0.030478408560156822, 0.0932990089058876, 0.24881142377853394, 0.1139858141541481, 0.03301549330353737, 0.12353643029928207, 0.18121947348117828, 0.3742617964744568, 0.11242274194955826, 0.2673158049583435, 0.05749531090259552, 0.00021243211813271046, 0.005648713558912277, 0.14063234627246857], [0.0034641579259186983, 0.015587975271046162, 0.04098831117153168, 0.025328122079372406, 0.012870541773736477, 0.002695741830393672, 0.0012444279855117202, 0.005834754556417465, 0.005115050356835127, 0.10742342472076416, 0.29450723528862, 0.004624508786946535, 0.028462348505854607, 0.09151851385831833, 0.02349407598376274], [0.00187075010035187, 0.017386021092534065, 0.0033179710153490305, 0.00216178921982646, 0.0006196821923367679, 0.0036519868299365044, 0.020315727218985558, 0.0735914558172226, 0.011879049241542816, 0.05418893322348595, 0.04255518689751625, 0.006776698864996433, 0.007105604745447636, 0.005562894977629185, 0.20312508940696716], [0.018124327063560486, 0.011053304187953472, 0.041496749967336655, 0.08067373931407928, 0.008039752952754498, 0.27361106872558594, 0.12004023045301437, 0.14489491283893585, 0.05115145817399025, 0.09850911796092987, 0.102595254778862, 0.03553636744618416, 0.03690872713923454, 0.062350839376449585, 0.18180564045906067], [0.12148405611515045, 0.0812632218003273, 0.2165963500738144, 0.1931358426809311, 0.08697410672903061, 0.006551810074597597, 0.06685828417539597, 0.03445844352245331, 0.0957593098282814, 0.40685340762138367, 0.14669549465179443, 0.05295614153146744, 0.013317806646227837, 0.016840115189552307, 0.07654187083244324], [0.00987213384360075, 0.006524993572384119, 0.026135168969631195, 0.011839349754154682, 0.033334147185087204, 0.0041054473258554935, 0.0015945311170071363, 0.0032734640408307314, 0.04142798110842705, 0.08157128095626831, 0.26105597615242004, 0.34578391909599304, 0.018666768446564674, 0.02866668626666069, 0.00917118415236473], [0.024172252044081688, 0.01827125810086727, 0.0764245018362999, 0.024589890614151955, 0.045055974274873734, 0.08366040140390396, 0.049236495047807693, 0.16330885887145996, 0.05235174670815468, 0.18916647136211395, 0.2596777379512787, 0.12284716963768005, 0.3776375353336334, 0.3416304290294647, 0.00993264652788639], [0.03498423844575882, 0.015507807955145836, 0.05400218814611435, 0.2035217136144638, 0.06879755109548569, 0.01839861460030079, 0.1265679895877838, 0.19229170680046082, 0.28682830929756165, 0.19846217334270477, 0.19391797482967377, 0.03128731623291969, 0.00016305393364746124, 0.003939830232411623, 0.1374405473470688], [0.013754391111433506, 0.07632532715797424, 0.05588589236140251, 0.060033075511455536, 0.015113652683794498, 0.024528013542294502, 0.0056539555080235004, 0.025407979264855385, 0.0030256062746047974, 0.3076882064342499, 0.2846599221229553, 0.01613902486860752, 0.07589408755302429, 0.25697121024131775, 0.08533195406198502], [0.0015476603293791413, 0.017548631876707077, 0.0017550711054354906, 0.0017123925499618053, 0.0004861274501308799, 0.0013240363914519548, 0.007671059109270573, 0.03281305357813835, 0.0013763409806415439, 0.060824256390333176, 0.04298469424247742, 0.011416267603635788, 0.012759965844452381, 0.012971585616469383, 0.16966485977172852], [0.005211545154452324, 0.0055291797034442425, 0.0040288688614964485, 0.011110500432550907, 0.002710954286158085, 0.0645279660820961, 0.01716793328523636, 0.025083528831601143, 0.010282285511493683, 0.009002536535263062, 0.0011292833369225264, 0.0045064822770655155, 0.007478337734937668, 0.004868943244218826, 0.13875910639762878]], [[0.01263146661221981, 0.08983241021633148, 0.002674827352166176, 0.0008326905663125217, 0.0032944290433079004, 0.06790440529584885, 0.02327594719827175, 0.08626140654087067, 0.0010102109517902136, 0.0009567838278599083, 0.001915089669637382, 0.019144434481859207, 0.060631223022937775, 0.04236740246415138, 0.2042645514011383], [0.12322216480970383, 0.14532910287380219, 0.08289580047130585, 0.07800436019897461, 0.016899574548006058, 0.20651613175868988, 0.15389330685138702, 0.08048079907894135, 0.023754820227622986, 0.08939354121685028, 0.05408218502998352, 0.0083498889580369, 0.16772767901420593, 0.03971855714917183, 0.029394451528787613], [0.002537816995754838, 0.0036866364534944296, 0.0026212686207145452, 0.0010326605988666415, 0.0028582154773175716, 0.0016078348271548748, 0.0024177017621695995, 0.004757970105856657, 0.007405414246022701, 0.0004943490494042635, 0.0008183143800124526, 0.0020540759433060884, 0.0008841927628964186, 0.0009274804615415633, 0.13894422352313995], [0.18076959252357483, 0.11159703880548477, 0.07333940267562866, 0.12368053197860718, 0.1442640721797943, 0.3224244713783264, 0.2286587655544281, 0.10576390475034714, 0.0873323604464531, 0.0707816481590271, 0.07077325880527496, 0.024980774149298668, 0.015894055366516113, 0.01236753724515438, 0.034113459289073944], [0.008514223620295525, 0.006442691199481487, 0.003549255197867751, 0.00919315591454506, 0.0011393448803573847, 0.0005870977183803916, 0.02400296926498413, 0.03577389195561409, 0.006469632964581251, 0.004828252829611301, 0.0027150637470185757, 9.597353346180171e-05, 0.00011822552187368274, 0.000396552961319685, 0.1521017998456955], [0.0016907083336263895, 9.336868970422074e-05, 0.0023900996893644333, 0.0018071996746584773, 0.001690928009338677, 0.0010278637055307627, 0.008010926656425, 0.0018918663263320923, 0.0009378245449624956, 0.0005185406771488488, 0.00012474792310968041, 0.00014544214354828, 2.7525844416231848e-05, 2.095987474604044e-05, 0.12926018238067627], [0.08279342949390411, 0.00717265997081995, 0.01113244891166687, 0.030300047248601913, 0.03227340802550316, 0.02679654024541378, 0.2711687386035919, 0.12656770646572113, 0.0010184150887653232, 0.0069296094588935375, 0.006689318455755711, 0.00307065830565989, 0.004024384077638388, 0.006041096989065409, 0.12722525000572205], [0.09468965977430344, 0.010531323030591011, 0.1253902167081833, 0.09483902901411057, 0.060478318482637405, 0.1959676593542099, 0.5850688219070435, 0.11734473705291748, 0.08924026787281036, 0.031869061291217804, 0.04437774419784546, 0.004531644284725189, 0.19630968570709229, 0.04580901935696602, 0.04253998026251793], [0.03443194553256035, 0.006786322686821222, 0.08545193076133728, 0.2555176913738251, 0.16119416058063507, 0.3760574460029602, 0.3180745542049408, 0.0858285129070282, 0.0052651395089924335, 0.035345133394002914, 0.0046972003765404224, 0.00805696938186884, 0.0738091915845871, 0.004572577308863401, 0.028640231117606163], [0.26599034667015076, 0.06405031681060791, 0.39913085103034973, 0.7390084862709045, 0.8533709049224854, 0.0830850899219513, 0.22198519110679626, 0.15359464287757874, 0.0286090150475502, 0.1338224709033966, 0.06985709816217422, 0.03841168060898781, 0.1308237761259079, 0.01580808497965336, 0.010780439712107182], [0.16064751148223877, 0.5348425507545471, 0.09399141371250153, 0.3709404170513153, 0.3757614493370056, 0.2272261530160904, 0.2699662148952484, 0.46868544816970825, 0.09081633388996124, 0.07856583595275879, 0.054298948496580124, 0.10659310221672058, 0.05178465321660042, 0.012835889123380184, 0.19243957102298737], [0.33067551255226135, 0.40668511390686035, 0.03748138248920441, 0.16017457842826843, 0.02931954525411129, 0.1285390406847, 0.43687552213668823, 0.6227295398712158, 0.016583241522312164, 0.054699335247278214, 0.43602558970451355, 0.028376825153827667, 0.1860552728176117, 0.202489972114563, 0.03443598374724388], [0.025147954002022743, 0.023277895525097847, 0.036982107907533646, 0.030706623569130898, 0.00253032217733562, 0.08060919493436813, 0.062497250735759735, 0.22720953822135925, 0.015824737027287483, 0.020865583792328835, 0.051981136202812195, 0.016274577006697655, 0.3496847152709961, 0.19709302484989166, 0.00854758732020855], [0.0009813109645619988, 0.0007951235747896135, 0.007896890863776207, 0.006039812229573727, 0.001424357295036316, 0.003153599100187421, 0.0010362794855609536, 0.006138501223176718, 0.00410880520939827, 0.003359388094395399, 0.008728301152586937, 0.0021525975316762924, 0.2318088710308075, 0.017491629347205162, 0.0005464124260470271], [0.008814784698188305, 0.009578033350408077, 0.008741176687180996, 0.002597709419205785, 0.0019302073633298278, 0.02750723622739315, 0.010486552491784096, 0.061721935868263245, 0.05738110467791557, 0.0038812088314443827, 0.08735688030719757, 0.00500333309173584, 3.085857315454632e-05, 0.005531619768589735, 0.14116442203521729], [0.015857994556427002, 0.010374038480222225, 0.002225207630544901, 0.002974742790684104, 0.0010843537747859955, 0.007387869525700808, 0.006818806286901236, 0.0318806953728199, 0.1651621013879776, 0.21757511794567108, 0.2911650240421295, 0.08204617351293564, 0.016449127346277237, 0.10985822230577469, 0.0020742996130138636], [0.01972219906747341, 0.20374125242233276, 0.0031293979845941067, 0.004390338435769081, 0.031924858689308167, 0.06048818305134773, 0.0774247944355011, 0.7845978140830994, 0.15838612616062164, 0.06142642721533775, 0.0820784792304039, 0.20785683393478394, 0.46646884083747864, 0.42270010709762573, 0.053927596658468246], [0.026567673310637474, 0.2768426239490509, 0.016553064808249474, 0.07253812253475189, 0.029352964833378792, 0.034967049956321716, 0.09283487498760223, 0.5970632433891296, 0.02342795394361019, 0.04057195410132408, 0.06215028092265129, 0.2966896891593933, 0.4489157795906067, 0.24187524616718292, 0.048112284392118454], [0.14453455805778503, 0.4129781723022461, 0.021322425454854965, 0.11776001751422882, 0.008680691011250019, 0.12525556981563568, 0.1459336131811142, 0.4943058490753174, 0.041365865617990494, 0.06633096933364868, 0.48416346311569214, 0.027247071266174316, 0.10342812538146973, 0.15874288976192474, 0.04535134881734848], [0.03164434805512428, 0.10487183183431625, 0.019769076257944107, 0.0709872916340828, 0.0046073514968156815, 0.12636253237724304, 0.06114564463496208, 0.5786424875259399, 0.17960773408412933, 0.15923625230789185, 0.14680741727352142, 0.04373620077967644, 0.20528176426887512, 0.14476445317268372, 0.03252548724412918], [0.03216148540377617, 0.04786192253232002, 0.0904572606086731, 0.284318745136261, 0.04915444552898407, 0.20336958765983582, 0.019341057166457176, 0.31598398089408875, 0.503376841545105, 0.2976534068584442, 0.3550446927547455, 0.318871408700943, 0.31741514801979065, 0.09137054532766342, 0.022498751059174538], [0.00784912146627903, 0.004314524121582508, 0.007757026236504316, 0.004281783476471901, 0.001910648075863719, 0.00898022297769785, 0.007197065278887749, 0.05121663585305214, 0.12398385256528854, 0.006457128562033176, 0.09335841238498688, 0.0023844544775784016, 1.3785818737233058e-05, 0.0021891386713832617, 0.13778245449066162], [0.0865921899676323, 0.029389984905719757, 0.007211814168840647, 0.022628001868724823, 0.003064699238166213, 0.026838112622499466, 0.02777392417192459, 0.17195671796798706, 0.5349084734916687, 0.37311822175979614, 0.5073185563087463, 0.12468769401311874, 0.014684900641441345, 0.11363118886947632, 0.01852630451321602], [0.021940317004919052, 0.17988227307796478, 0.0027716639451682568, 0.0058884406462311745, 0.02112143486738205, 0.056551095098257065, 0.09669405966997147, 0.8433947563171387, 0.1836535632610321, 0.048101164400577545, 0.0939687192440033, 0.12228170782327652, 0.5153423547744751, 0.4533718526363373, 0.10564926266670227], [0.07970402389764786, 0.263812392950058, 0.027112353593111038, 0.06228066235780716, 0.03007029928267002, 0.5465735197067261, 0.2176109254360199, 0.5667538046836853, 0.10334119945764542, 0.3484029769897461, 0.1586397886276245, 0.28290486335754395, 0.07807470858097076, 0.405972421169281, 0.12247955799102783]]], [[[0.02659090794622898, 0.049626123160123825, 0.04500019550323486, 0.012677792459726334, 0.33557751774787903, 0.02776678465306759, 0.02675992250442505, 0.09967876970767975, 0.04216820374131203, 0.009756066836416721, 0.0133897690102458, 0.12886802852153778, 0.03152704983949661, 0.046163998544216156, 0.21004843711853027], [0.05978302285075188, 0.18161648511886597, 0.038620203733444214, 0.022025080397725105, 0.09790226072072983, 0.04398013651371002, 0.00788698997348547, 0.04135579988360405, 0.0068543110974133015, 0.03809167072176933, 0.03150040656328201, 0.0462106354534626, 0.024762138724327087, 0.011792140081524849, 0.015839271247386932], [0.005166883580386639, 0.0005590450600720942, 0.007114546839147806, 0.0015656572068110108, 0.02179996483027935, 0.0010864944197237492, 0.0051814797334373, 0.0011148365447297692, 0.00816393457353115, 0.0019027285743504763, 0.005033016670495272, 0.010743028484284878, 0.0006906923954375088, 0.0011143455049023032, 0.16189540922641754], [0.17136499285697937, 0.002046054694801569, 0.4725193679332733, 0.24347566068172455, 0.1026763990521431, 0.00369152519851923, 0.013768541626632214, 0.003912978805601597, 0.022358577698469162, 0.06323882192373276, 0.28539538383483887, 0.009778834879398346, 0.0043070269748568535, 0.020384330302476883, 0.006856778170913458], [0.18433871865272522, 0.013500750064849854, 0.42166435718536377, 0.1935500204563141, 0.3502363860607147, 0.0009389789775013924, 0.0472395233809948, 0.015336934477090836, 0.07204270362854004, 0.07276465743780136, 0.4023721218109131, 0.016390468925237656, 0.00493515282869339, 0.01088448241353035, 0.18081046640872955], [0.01929071731865406, 3.154709338559769e-05, 0.04895680397748947, 0.04499320685863495, 0.03726757690310478, 0.0012487026397138834, 0.06078735366463661, 0.0025376947596669197, 0.023622047156095505, 0.008605116978287697, 0.05601886287331581, 0.011475598439574242, 0.0013240767875686288, 0.009706309996545315, 0.13962702453136444], [0.032548993825912476, 0.0047013829462230206, 0.08043498545885086, 0.08197268843650818, 0.43236956000328064, 0.013080407865345478, 0.006017346400767565, 0.05529334023594856, 0.01970849372446537, 0.004050384275615215, 0.0073967562057077885, 0.005829385481774807, 0.0008975209202617407, 0.0025361862499266863, 0.011671289801597595], [0.046304989606142044, 0.026358718052506447, 0.20277923345565796, 0.3021180331707001, 0.6281617879867554, 0.19840610027313232, 0.12000668793916702, 0.21165543794631958, 0.0507807619869709, 0.10083203762769699, 0.17539183795452118, 0.08392243832349777, 0.036049142479896545, 0.06088141351938248, 0.024198466911911964], [0.016816509887576103, 0.003118144813925028, 0.035858120769262314, 0.02315649762749672, 0.2957051992416382, 0.0033856350928545, 0.008419573307037354, 0.013085800223052502, 0.0065522813238203526, 0.004261805210262537, 0.0022621729876846075, 0.0015856586396694183, 0.00012999074533581734, 0.00036330719012767076, 0.004947974346578121], [0.13966688513755798, 0.051315873861312866, 0.16794879734516144, 0.17204447090625763, 0.02530861273407936, 0.1971883773803711, 0.6035643219947815, 0.35590535402297974, 0.01904589682817459, 0.14328262209892273, 0.05827813595533371, 0.12283631414175034, 0.08582676202058792, 0.021607764065265656, 0.09174748510122299], [0.07622234523296356, 0.021088531240820885, 0.13214311003684998, 0.1876712292432785, 0.09946685284376144, 0.0739995539188385, 0.16667790710926056, 0.06527374684810638, 0.2691768705844879, 0.1298666000366211, 0.20347969233989716, 0.28972044587135315, 0.16063560545444489, 0.23408198356628418, 0.02879655919969082], [0.04186922311782837, 0.028065834194421768, 0.2365874946117401, 0.22718128561973572, 0.717268168926239, 0.0283160749822855, 0.047574929893016815, 0.22635598480701447, 0.046485841274261475, 0.11764083057641983, 0.11684223264455795, 0.600357711315155, 0.07936308532953262, 0.1614740490913391, 0.02326863817870617], [0.002160860225558281, 0.00041385856457054615, 0.0032894921023398638, 0.004175879992544651, 0.09230346977710724, 0.00037096597952768207, 0.00036027038004249334, 0.000777967507019639, 0.0010948613053187728, 0.006351495627313852, 0.00803811103105545, 0.2546491026878357, 0.005140772555023432, 0.0052158161997795105, 0.0018242541700601578], [0.01453752163797617, 0.0016249779146164656, 0.07837095856666565, 0.046283330768346786, 0.5220571756362915, 0.00571427633985877, 0.011274048127233982, 0.0005770810530520976, 0.06172677502036095, 0.028573052957654, 0.1375623345375061, 0.2926015257835388, 0.17741695046424866, 0.13592077791690826, 0.025488857179880142], [0.0018199050100520253, 1.759366932674311e-05, 0.005607981700450182, 0.029583722352981567, 0.009902501478791237, 0.00240499060600996, 0.016255119815468788, 0.008434450253844261, 0.0070381201803684235, 0.006882159970700741, 0.008103356696665287, 0.009371891617774963, 3.180988642270677e-05, 0.0005422193789854646, 0.14323127269744873], [0.04913536086678505, 0.005111359525471926, 0.3943053185939789, 0.16504207253456116, 0.1333204060792923, 0.007373967207968235, 0.00649205781519413, 0.005781218875199556, 0.0696163922548294, 0.17078818380832672, 0.43588367104530334, 0.2441176176071167, 0.044073574244976044, 0.13962700963020325, 0.0038013174198567867], [0.02972331829369068, 0.032405998557806015, 0.13676248490810394, 0.2985995411872864, 0.6838041543960571, 0.17950911819934845, 0.02566559985280037, 0.299430251121521, 0.06906868517398834, 0.09219349920749664, 0.14271143078804016, 0.15384355187416077, 0.31184810400009155, 0.37699857354164124, 0.11869719624519348], [0.035901740193367004, 0.049252428114414215, 0.13651704788208008, 0.3431343734264374, 0.4621880352497101, 0.07741573452949524, 0.035817742347717285, 0.1879495084285736, 0.09167803823947906, 0.15167558193206787, 0.20264029502868652, 0.22310277819633484, 0.27972275018692017, 0.27912822365760803, 0.1079779863357544], [0.03869367763400078, 0.07609386742115021, 0.09811960905790329, 0.19582945108413696, 0.7770717144012451, 0.05828123167157173, 0.03398818522691727, 0.4334997236728668, 0.06648975610733032, 0.07675088942050934, 0.06197739765048027, 0.7435874938964844, 0.14106591045856476, 0.2445826381444931, 0.04634908586740494], [0.0033209763932973146, 0.0013802923494949937, 0.007923663593828678, 0.01537866611033678, 0.27329060435295105, 0.0012711664894595742, 0.000925537955481559, 0.0031033798586577177, 0.00518713379278779, 0.008014743216335773, 0.01865261048078537, 0.32840412855148315, 0.015081376768648624, 0.0187647957354784, 0.007287481799721718], [0.012120293453335762, 0.00801909901201725, 0.05887366458773613, 0.08173726499080658, 0.42918333411216736, 0.0074272770434618, 0.018144551664590836, 0.002390465000644326, 0.19959968328475952, 0.01595914363861084, 0.19477497041225433, 0.24081164598464966, 0.32190656661987305, 0.2620943486690521, 0.06223426014184952], [0.001324097509495914, 1.9873512428603135e-05, 0.0026336663868278265, 0.025088831782341003, 0.006480309646576643, 0.0015246026450768113, 0.009156930260360241, 0.006450172513723373, 0.006447002291679382, 0.003797400277107954, 0.0037222199607640505, 0.006030225194990635, 1.9453302229521796e-05, 0.0003723614208865911, 0.13770580291748047], [0.23361828923225403, 0.06709202378988266, 0.7719610333442688, 0.734594464302063, 0.7922726273536682, 0.049216482788324356, 0.04663456231355667, 0.060855433344841, 0.40224209427833557, 0.20935069024562836, 0.5060975551605225, 0.5454070568084717, 0.2919921875, 0.420108824968338, 0.08753460645675659], [0.01675574854016304, 0.0394110269844532, 0.07827049493789673, 0.20941881835460663, 0.5690934658050537, 0.13831959664821625, 0.015872817486524582, 0.2790753245353699, 0.07380014657974243, 0.05484941974282265, 0.11329877376556396, 0.046586740761995316, 0.27540746331214905, 0.3769146502017975, 0.12728242576122284], [0.13399043679237366, 0.38312259316444397, 0.21414920687675476, 0.1335369348526001, 0.883351743221283, 0.17629003524780273, 0.21391625702381134, 0.35840436816215515, 0.7405950427055359, 0.11166028678417206, 0.2222289741039276, 0.2562817633152008, 0.20710349082946777, 0.2988908290863037, 0.10401280969381332]], [[0.169734388589859, 0.018695855513215065, 0.1739528477191925, 0.1591939628124237, 0.2628772258758545, 0.10412096232175827, 0.10786166787147522, 0.024563027545809746, 0.26776236295700073, 0.15710414946079254, 0.04751116409897804, 0.10171505063772202, 0.02745870314538479, 0.022933470085263252, 0.11237789690494537], [0.04881957918405533, 0.17062845826148987, 0.0187830850481987, 0.030382977798581123, 0.08311481773853302, 0.03788991644978523, 0.005156277678906918, 0.026916639879345894, 0.06639944016933441, 0.03180782124400139, 0.02173716016113758, 0.05343012511730194, 0.01850084401667118, 0.0033381145913153887, 0.04681381955742836], [0.11046597361564636, 0.13029024004936218, 0.30802851915359497, 0.31618139147758484, 0.21513698995113373, 0.08858107775449753, 0.07770872116088867, 0.030179373919963837, 0.2956576347351074, 0.19506438076496124, 0.06668522953987122, 0.15814362466335297, 0.07954283803701401, 0.09008871018886566, 0.11347464472055435], [0.14630576968193054, 0.10272074490785599, 0.06626180559396744, 0.39613619446754456, 0.5213132500648499, 0.09462913125753403, 0.19745559990406036, 0.14176879823207855, 0.45916420221328735, 0.2814978361129761, 0.19076579809188843, 0.7478294968605042, 0.15201923251152039, 0.4428024888038635, 0.11204658448696136], [0.17077980935573578, 0.372023344039917, 0.03066021017730236, 0.20403380692005157, 0.25160810351371765, 0.047236956655979156, 0.19034826755523682, 0.09997845441102982, 0.22249065339565277, 0.14956896007061005, 0.12211201339960098, 0.43811750411987305, 0.32559871673583984, 0.4463178217411041, 0.1688702404499054], [0.001587467617355287, 0.0028523027431219816, 0.001275891438126564, 0.007771230302751064, 0.06833823025226593, 0.016362184658646584, 0.01554875634610653, 0.0395360104739666, 0.020186755806207657, 0.02848842740058899, 0.006796931382268667, 0.08043718338012695, 0.1258731484413147, 0.048048797994852066, 0.14538481831550598], [0.19441094994544983, 0.026329312473535538, 0.03907056525349617, 0.5187185406684875, 0.06508557498455048, 0.04464683309197426, 0.23734036087989807, 0.10510969161987305, 0.23671847581863403, 0.2550508677959442, 0.2969563603401184, 0.31371036171913147, 0.023362383246421814, 0.04756302013993263, 0.09379850327968597], [0.009693926200270653, 0.06855454295873642, 0.04046608507633209, 0.021632034331560135, 0.07003092765808105, 0.1099655032157898, 0.02166297659277916, 0.14673617482185364, 0.08559776097536087, 0.021444879472255707, 0.06376301497220993, 0.07838241755962372, 0.2981177270412445, 0.05645254626870155, 0.11510419100522995], [0.1475960612297058, 0.11415769904851913, 0.09677327424287796, 0.22716772556304932, 0.05128113925457001, 0.0685737207531929, 0.17258046567440033, 0.05221087113022804, 0.2985250651836395, 0.36185649037361145, 0.6199293732643127, 0.5016448497772217, 0.08136574923992157, 0.06544326990842819, 0.09482244402170181], [0.16866622865200043, 0.03890697658061981, 0.038960762321949005, 0.045146964490413666, 0.003443084890022874, 0.025941072031855583, 0.02535194903612137, 0.01214737631380558, 0.39030662178993225, 0.11890958994626999, 0.2736153304576874, 0.3244759440422058, 0.00968784186989069, 0.014615286141633987, 0.03826850652694702], [0.08395736664533615, 0.10560688376426697, 0.29490047693252563, 0.15838190913200378, 0.20854075253009796, 0.047574300318956375, 0.025914132595062256, 0.0076736449263989925, 0.23083198070526123, 0.11239635199308395, 0.08150741457939148, 0.3915822207927704, 0.126749187707901, 0.08327525854110718, 0.07453686743974686], [0.08537011593580246, 0.01334940642118454, 0.026223814114928246, 0.09485415369272232, 0.04081009700894356, 0.021519087255001068, 0.04835912212729454, 0.008561250753700733, 0.1425430029630661, 0.15310505032539368, 0.12245412170886993, 0.15674236416816711, 0.03265313804149628, 0.020860055461525917, 0.1338454782962799], [0.009048069827258587, 0.008220783434808254, 0.0010462020291015506, 0.0073586152866482735, 0.01628630980849266, 0.0030796914361417294, 0.0014804736711084843, 0.0016866090008988976, 0.021953675895929337, 0.024090107530355453, 0.02321471832692623, 0.2417944222688675, 0.00791110284626484, 0.012413977645337582, 0.02231968566775322], [0.02412300556898117, 0.02128133550286293, 0.018482450395822525, 0.016898121684789658, 0.07439899444580078, 0.03563898429274559, 0.04473365843296051, 0.0026737016160041094, 0.06965204328298569, 0.10727399587631226, 0.046027760952711105, 0.33166152238845825, 0.12371443957090378, 0.07036767154932022, 0.15801618993282318], [0.007644897326827049, 0.000292555516352877, 0.08444877713918686, 0.17402730882167816, 0.16615508496761322, 0.013423392549157143, 0.054235123097896576, 0.007257240824401379, 0.08712441474199295, 0.012547464109957218, 0.0328214131295681, 0.2736492455005646, 0.0037261026445776224, 0.09982366114854813, 0.13941559195518494], [0.07466596364974976, 0.11066461354494095, 0.02582395263016224, 0.1052846685051918, 0.0988694354891777, 0.13372771441936493, 0.10285167396068573, 0.04043884575366974, 0.12614820897579193, 0.00874736811965704, 0.006169801577925682, 0.3642371892929077, 0.13258321583271027, 0.14621633291244507, 0.16873647272586823], [0.23522600531578064, 0.0398484542965889, 0.3737937808036804, 0.288825660943985, 0.10485613346099854, 0.11366727948188782, 0.29695606231689453, 0.06251946091651917, 0.35146233439445496, 0.04921486973762512, 0.25325968861579895, 0.33112239837646484, 0.06967249512672424, 0.050063006579875946, 0.0896972194314003], [0.1151093989610672, 0.085483118891716, 0.1238018348813057, 0.10984596610069275, 0.07372570037841797, 0.07080911099910736, 0.04283013194799423, 0.011434272862970829, 0.6184931993484497, 0.031299810856580734, 0.1232943907380104, 0.4399086534976959, 0.16973690688610077, 0.18915507197380066, 0.06319096684455872], [0.23179487884044647, 0.03441762179136276, 0.058240070939064026, 0.17834095656871796, 0.049968671053647995, 0.038375332951545715, 0.05405527353286743, 0.00672679441049695, 0.09475977718830109, 0.0764862671494484, 0.1440851390361786, 0.11337311565876007, 0.06998162716627121, 0.031302694231271744, 0.13650138676166534], [0.037197839468717575, 0.022889001294970512, 0.00443503400310874, 0.02830665186047554, 0.056754183024168015, 0.011282439343631268, 0.008815057575702667, 0.005641489755362272, 0.03366301208734512, 0.01200089417397976, 0.022881681099534035, 0.24835483729839325, 0.020306341350078583, 0.028865927830338478, 0.09140723943710327], [0.019821494817733765, 0.0461096465587616, 0.009799499064683914, 0.008886821568012238, 0.03164605051279068, 0.03408728539943695, 0.06531291455030441, 0.004583337344229221, 0.015776870772242546, 0.0067581660114228725, 0.005247185938060284, 0.0803409293293953, 0.12878651916980743, 0.033680036664009094, 0.15540239214897156], [0.006374652031809092, 0.0003620072384364903, 0.05079201981425285, 0.10443739593029022, 0.13200052082538605, 0.007841442711651325, 0.04038690775632858, 0.005943085998296738, 0.04502689838409424, 0.005707652773708105, 0.010736361145973206, 0.17095635831356049, 0.0034604808315634727, 0.08947119116783142, 0.1356668770313263], [0.05784226581454277, 0.06101800128817558, 0.011293647810816765, 0.030310506001114845, 0.02692366950213909, 0.10355494171380997, 0.1643158346414566, 0.02146345190703869, 0.10686127096414566, 0.0006235101609490812, 0.001034505432471633, 0.12770172953605652, 0.08152752369642258, 0.06569667905569077, 0.13584844768047333], [0.24130187928676605, 0.04057329148054123, 0.37395209074020386, 0.32695549726486206, 0.18701796233654022, 0.1542418897151947, 0.4307348132133484, 0.07850468903779984, 0.24226921796798706, 0.027551302686333656, 0.17328326404094696, 0.256756991147995, 0.1007629856467247, 0.0746576264500618, 0.1026487648487091], [0.18065117299556732, 0.0850963443517685, 0.37481072545051575, 0.36960142850875854, 0.042269542813301086, 0.04689870774745941, 0.10553675144910812, 0.031215613707900047, 0.03850337490439415, 0.055640675127506256, 0.11964564025402069, 0.20274300873279572, 0.22541530430316925, 0.07314471900463104, 0.12492100149393082]], [[0.2626786530017853, 0.0849713385105133, 0.11954734474420547, 0.09299539029598236, 0.12019845843315125, 0.1675114780664444, 0.12060416489839554, 0.1292921006679535, 0.33819568157196045, 0.3146125078201294, 0.20831438899040222, 0.39596518874168396, 0.2145393043756485, 0.2666572332382202, 0.05294949933886528], [0.1368129849433899, 0.16135744750499725, 0.15528292953968048, 0.24771884083747864, 0.1416730433702469, 0.05803852900862694, 0.07394444942474365, 0.10563277453184128, 0.033661823719739914, 0.18054474890232086, 0.1985052525997162, 0.05316935107111931, 0.05009648948907852, 0.043446026742458344, 0.03412564843893051], [0.0030849967151880264, 0.0006440586876124144, 0.016017315909266472, 0.0037563794758170843, 0.009170617908239365, 0.0008218333241529763, 0.0032779525499790907, 0.0006974118296056986, 0.12044321000576019, 0.005983977112919092, 0.011704917997121811, 0.023849062621593475, 0.0031650178134441376, 0.01169323269277811, 0.16145823895931244], [0.02798222377896309, 0.012448069639503956, 0.018199993297457695, 0.0069459048099815845, 0.042531996965408325, 0.009718443267047405, 0.013791781850159168, 0.04370715469121933, 0.21814176440238953, 0.024645699188113213, 0.0633857473731041, 0.0802498310804367, 0.006771658081561327, 0.040147896856069565, 0.4109969139099121], [0.02001010812819004, 0.02580004744231701, 0.006869276985526085, 0.007543967105448246, 0.017537932842969894, 0.00023914838675409555, 0.006739956792443991, 0.008227680809795856, 0.05446772649884224, 0.03320171311497688, 0.022232946008443832, 0.01063306163996458, 0.0007752752280794084, 0.0028256638906896114, 0.2078467756509781], [0.0034786108881235123, 0.00011826713307527825, 0.002407492371276021, 0.005452741403132677, 0.002847136929631233, 0.003419033018872142, 0.013516861945390701, 0.002940082224085927, 0.002004653448238969, 0.006652397103607655, 0.004079414997249842, 0.0028307989705353975, 0.0006369714974425733, 0.002542868722230196, 0.1463778167963028], [0.0762338638305664, 0.11778479814529419, 0.03105221875011921, 0.006415408570319414, 0.0190818402916193, 0.027191398665308952, 0.005222225561738014, 0.0170834269374609, 0.05309534817934036, 0.00936796236783266, 0.03816217556595802, 0.17940494418144226, 0.020440110936760902, 0.13513173162937164, 0.3000544309616089], [0.16228125989437103, 0.35454851388931274, 0.04026315361261368, 0.03822629526257515, 0.023396998643875122, 0.30800631642341614, 0.24136781692504883, 0.15176478028297424, 0.0788438618183136, 0.07347536832094193, 0.030298085883259773, 0.007365733850747347, 0.1061745211482048, 0.2841038405895233, 0.07787416130304337], [0.05645078793168068, 0.023840615525841713, 0.013567867688834667, 0.00750470208004117, 0.07643276453018188, 0.08809614926576614, 0.06102507561445236, 0.021034346893429756, 0.039108242839574814, 0.02081543207168579, 0.011458326131105423, 0.20520520210266113, 0.027348484843969345, 0.06299317628145218, 0.2514360249042511], [0.016126127913594246, 0.01087501272559166, 0.01213990617543459, 0.004450921434909105, 0.014690833166241646, 0.30525338649749756, 0.02716207131743431, 0.09981174021959305, 0.027048761025071144, 0.01336466334760189, 0.006663064938038588, 0.0520603246986866, 0.042623523622751236, 0.018071996048092842, 0.1948687732219696], [0.04185086488723755, 0.034399643540382385, 0.041276611387729645, 0.0584070086479187, 0.019824109971523285, 0.00856409315019846, 0.08867836743593216, 0.10337970405817032, 0.09468665719032288, 0.02033121883869171, 0.018058426678180695, 0.059728462249040604, 0.09321711957454681, 0.20168805122375488, 0.1941128522157669], [0.01436887588351965, 0.027922889217734337, 0.046481672674417496, 0.010071231983602047, 0.026127830147743225, 0.06003356724977493, 0.022118212655186653, 0.08160483092069626, 0.07784195244312286, 0.010694753378629684, 0.017130734398961067, 0.05340806022286415, 0.041410259902477264, 0.035884104669094086, 0.2491855025291443], [0.053393200039863586, 0.04828185588121414, 0.03453819081187248, 0.013636122457683086, 0.25098806619644165, 0.12313847243785858, 0.02266266942024231, 0.017618268728256226, 0.019785437732934952, 0.005274764262139797, 0.021053072065114975, 0.20679616928100586, 0.021523641422390938, 0.03855947405099869, 0.1109846979379654], [0.12851715087890625, 0.12400124222040176, 0.2637093663215637, 0.02439347468316555, 0.07038086652755737, 0.12665364146232605, 0.04898465424776077, 0.03412041813135147, 0.0263816025108099, 0.023226425051689148, 0.11513664573431015, 0.09503531455993652, 0.1215861439704895, 0.11158601939678192, 0.14799171686172485], [0.0010214513167738914, 0.004835289902985096, 0.0042709591798484325, 0.0026378841139376163, 0.005866974592208862, 0.008331544697284698, 0.006240549497306347, 0.01365274004638195, 0.1720106601715088, 0.0005307683604769409, 0.0007543729152530432, 0.004353509750217199, 0.0002490385086275637, 0.0017186965560540557, 0.14317919313907623], [0.07205050438642502, 0.12816517055034637, 0.23753608763217926, 0.08243206143379211, 0.5041552186012268, 0.11970840394496918, 0.04837331175804138, 0.034129947423934937, 0.16484025120735168, 0.011070297099649906, 0.05054215341806412, 0.039082955569028854, 0.09205758571624756, 0.1322212517261505, 0.16203875839710236], [0.014979850500822067, 0.03769220784306526, 0.04367470741271973, 0.009415187872946262, 0.019922776147723198, 0.11522040516138077, 0.014906312339007854, 0.04722318425774574, 0.06570684164762497, 0.008925273083150387, 0.019600573927164078, 0.0472339391708374, 0.005348374601453543, 0.0017698986921459436, 0.1612817794084549], [0.023198002949357033, 0.06148262694478035, 0.046858664602041245, 0.013079512864351273, 0.08762317895889282, 0.00949429627507925, 0.0484880767762661, 0.025388503447175026, 0.04432932287454605, 0.006038118619471788, 0.010164186358451843, 0.08949221670627594, 0.06122652441263199, 0.11895263940095901, 0.16355113685131073], [0.009917332790791988, 0.01408212911337614, 0.047434139996767044, 0.005388779100030661, 0.023170381784439087, 0.034844160079956055, 0.009820640087127686, 0.03569778800010681, 0.05789060518145561, 0.0037882563192397356, 0.013808010146021843, 0.04879388585686684, 0.03114072047173977, 0.0507131889462471, 0.18661679327487946], [0.0652787834405899, 0.04612350836396217, 0.04522763565182686, 0.014745297841727734, 0.27657532691955566, 0.16156227886676788, 0.025164838880300522, 0.017732013016939163, 0.023105354979634285, 0.005499221384525299, 0.020183373242616653, 0.19132839143276215, 0.020515967160463333, 0.056384406983852386, 0.14304831624031067], [0.14539514482021332, 0.21388974785804749, 0.34906452894210815, 0.031415559351444244, 0.062017399817705154, 0.08485611528158188, 0.03913363441824913, 0.03569692373275757, 0.023448940366506577, 0.020669998601078987, 0.1622902750968933, 0.1315622329711914, 0.09182734042406082, 0.1796703040599823, 0.13702963292598724], [0.0009059146977961063, 0.004442692268639803, 0.002850044285878539, 0.0024173678830266, 0.006019651889801025, 0.004450949374586344, 0.003768310882151127, 0.009272964671254158, 0.19643637537956238, 0.0004391498805489391, 0.0004852984275203198, 0.005083973053842783, 0.000164541692356579, 0.001456208759918809, 0.13767127692699432], [0.03601038455963135, 0.08602340519428253, 0.042799800634384155, 0.007577326148748398, 0.12637566030025482, 0.07399067282676697, 0.02205651067197323, 0.01475659292191267, 0.14170114696025848, 0.004405674524605274, 0.013175459578633308, 0.03142356127500534, 0.06839168816804886, 0.09161193668842316, 0.1376270353794098], [0.014056011103093624, 0.020953036844730377, 0.03237491473555565, 0.0042424313724040985, 0.017438247799873352, 0.08849667757749557, 0.005714876111596823, 0.025588830932974815, 0.08735965192317963, 0.009712125174701214, 0.02371004782617092, 0.06271149963140488, 0.00425978796556592, 0.0027238703332841396, 0.14272134006023407], [0.15719948709011078, 0.03286461904644966, 0.12916648387908936, 0.10299614071846008, 0.014032969251275063, 0.011700707487761974, 0.06680437922477722, 0.016068298369646072, 0.04505150765180588, 0.056866806000471115, 0.07287567108869553, 0.09101171046495438, 0.06734755635261536, 0.17371943593025208, 0.1297563910484314]], [[0.010018138214945793, 0.02516627125442028, 0.027397310361266136, 0.005101055838167667, 0.025938771665096283, 0.13529063761234283, 0.02690303698182106, 0.11719205975532532, 0.027814749628305435, 0.019565219059586525, 0.07996311038732529, 0.0991574078798294, 0.16288702189922333, 0.1113416850566864, 0.22370746731758118], [0.05219842493534088, 0.1440066546201706, 0.27922260761260986, 0.2058621197938919, 0.11230742931365967, 0.6016822457313538, 0.20846855640411377, 0.04777589067816734, 0.20611444115638733, 0.15481434762477875, 0.11950203776359558, 0.02679699845612049, 0.0639302060008049, 0.047183193266391754, 0.04897741973400116], [0.01555164996534586, 0.0014379153726622462, 0.01706753298640251, 0.003720618085935712, 0.10093016922473907, 0.027928827330470085, 0.015380543656647205, 0.0025812943931668997, 0.020822137594223022, 0.014309070073068142, 0.017923271283507347, 0.0120958611369133, 0.014481468126177788, 0.009491728618741035, 0.15904544293880463], [0.11612647771835327, 0.0010205605067312717, 0.020188286900520325, 0.027076182886958122, 0.09822120517492294, 0.3221674859523773, 0.1250218003988266, 0.002691123867407441, 0.005359187722206116, 0.04976291581988335, 0.023232540115714073, 0.04237976670265198, 0.028708819299936295, 0.049411751329898834, 0.005618311930447817], [0.0470837838947773, 0.007497857324779034, 0.004583081230521202, 0.022991856560111046, 0.0278051495552063, 0.00051211251411587, 0.0627230703830719, 0.011764267459511757, 0.010903585702180862, 0.07272983342409134, 0.011678352952003479, 0.09392477571964264, 0.01558940764516592, 0.03351595252752304, 0.2068868726491928], [0.0024584962520748377, 8.163625898305327e-05, 0.00016154914919752628, 0.0002508168399799615, 0.0019916424062103033, 0.0004536219348665327, 0.0036078437697142363, 0.0008641426684334874, 0.00021941671730019152, 0.0014423344982787967, 0.0004360634775366634, 0.004383172374218702, 0.0009428760386072099, 0.0009436326217837632, 0.14683274924755096], [0.02989446185529232, 0.007703323382884264, 0.12996061146259308, 0.025068828836083412, 0.2812304198741913, 0.0071953474543988705, 0.0021352169569581747, 0.0025125211104750633, 0.0014658492291346192, 0.007028855849057436, 0.0448734275996685, 0.09462164342403412, 0.0503704659640789, 0.11768583953380585, 0.12974096834659576], [0.16756094992160797, 0.028098214417696, 0.20756086707115173, 0.2207580953836441, 0.10928753018379211, 0.13773545622825623, 0.2233184576034546, 0.1774815022945404, 0.13830231130123138, 0.20932619273662567, 0.18267595767974854, 0.05961548537015915, 0.07697918266057968, 0.18739080429077148, 0.06796090304851532], [0.017068415880203247, 0.00098085415083915, 0.010854640044271946, 0.006490680854767561, 0.29060667753219604, 0.006710599176585674, 0.0118483304977417, 0.0008181483135558665, 0.00011296885350020602, 0.0034601599909365177, 0.005098147317767143, 0.010750477202236652, 0.010399019345641136, 0.009376241825520992, 0.017405353486537933], [0.1331326961517334, 0.019769106060266495, 0.01612294837832451, 0.028521019965410233, 0.007509702816605568, 0.2665199935436249, 0.19958320260047913, 0.1385747790336609, 0.0059373765252530575, 0.08046255260705948, 0.052418529987335205, 0.004961848258972168, 0.10941796749830246, 0.06705309450626373, 0.17611992359161377], [0.019668979570269585, 0.0081618782132864, 0.12552350759506226, 0.0802406370639801, 0.07089362293481827, 0.18871739506721497, 0.12778939306735992, 0.04829992726445198, 0.04307088255882263, 0.02314154990017414, 0.14194107055664062, 0.05861861631274223, 0.19650596380233765, 0.11930099874734879, 0.18420156836509705], [0.00538466265425086, 0.0270208939909935, 0.18066750466823578, 0.06076826527714729, 0.035171061754226685, 0.411039799451828, 0.09634009003639221, 0.26394954323768616, 0.1915867179632187, 0.03318370133638382, 0.3213040828704834, 0.10995125770568848, 0.5320225954055786, 0.4394112527370453, 0.15243512392044067], [0.0030147582292556763, 0.00625306461006403, 0.017102748155593872, 0.008551767095923424, 0.0727200135588646, 0.015153692103922367, 0.0023096217773854733, 0.011201570741832256, 0.002435098635032773, 0.006847116630524397, 0.016829995438456535, 0.12519565224647522, 0.3878204822540283, 0.13249750435352325, 0.028183329850435257], [0.066617950797081, 0.006649812217801809, 0.04142908379435539, 0.13957993686199188, 0.025706114247441292, 0.08231058716773987, 0.08377126604318619, 0.02330365777015686, 0.04652002453804016, 0.11060080677270889, 0.09014575183391571, 0.07117310166358948, 0.15938407182693481, 0.1624550223350525, 0.05356656014919281], [0.004379222169518471, 0.0002637936850078404, 0.0022587613202631474, 0.006711117923259735, 0.0006837267428636551, 0.007989797741174698, 0.02997850626707077, 0.045127563178539276, 0.008224103599786758, 0.0034686585422605276, 0.0038658890407532454, 0.00034815416438505054, 7.646608719369397e-05, 0.00017854337056633085, 0.14325816929340363], [0.25216665863990784, 0.1422366499900818, 0.10172943770885468, 0.3735504150390625, 0.0612066313624382, 0.06238102167844772, 0.11154207587242126, 0.031159698963165283, 0.011768986470997334, 0.4107469618320465, 0.1557808816432953, 0.07179611176252365, 0.186580628156662, 0.18789765238761902, 0.099563829600811], [0.0073658498004078865, 0.1486257165670395, 0.03456511348485947, 0.0081891855224967, 0.009660922922194004, 0.09341325610876083, 0.010183881968259811, 0.09390538185834885, 0.005950886756181717, 0.019719628617167473, 0.060451164841651917, 0.021925343200564384, 0.19991156458854675, 0.17004182934761047, 0.15761280059814453], [0.0057948376052081585, 0.023180164396762848, 0.018019115552306175, 0.008233858272433281, 0.005580522585660219, 0.09526203572750092, 0.025384269654750824, 0.05396068096160889, 0.022398412227630615, 0.010895788669586182, 0.02884012460708618, 0.008390026167035103, 0.1754663735628128, 0.0998048186302185, 0.1692073941230774], [0.0038264640606939793, 0.023839879781007767, 0.12264026701450348, 0.02543032169342041, 0.01467527449131012, 0.22457416355609894, 0.02885078825056553, 0.18430863320827484, 0.08557040989398956, 0.016987022012472153, 0.3513573110103607, 0.04023189842700958, 0.40384334325790405, 0.4235673248767853, 0.16652488708496094], [0.006266402080655098, 0.015031179413199425, 0.02853887900710106, 0.010518345981836319, 0.09044987708330154, 0.021657679229974747, 0.0031435268465429544, 0.020945381373167038, 0.004824943374842405, 0.0127853499725461, 0.04820985347032547, 0.12459135800600052, 0.5573670268058777, 0.2566193640232086, 0.05160163715481758], [0.3002758324146271, 0.08866846561431885, 0.06544900685548782, 0.25531354546546936, 0.028160221874713898, 0.12210531532764435, 0.16810676455497742, 0.0764283761382103, 0.17981933057308197, 0.3050864636898041, 0.2806880474090576, 0.13050490617752075, 0.19047558307647705, 0.3216065764427185, 0.07704814523458481], [0.005926316604018211, 0.0003559965989552438, 0.0015365411527454853, 0.005924532189965248, 0.0005743101937696338, 0.007415232714265585, 0.024156678467988968, 0.045611582696437836, 0.009969166480004787, 0.003380746114999056, 0.003106702584773302, 0.0003880919248331338, 4.0538176108384505e-05, 0.00014580521383322775, 0.13770556449890137], [0.1617586314678192, 0.29556339979171753, 0.028325924649834633, 0.059843577444553375, 0.009868957102298737, 0.03965649753808975, 0.07811643928289413, 0.06809397041797638, 0.009963614866137505, 0.11740529537200928, 0.08369920402765274, 0.039758261293172836, 0.13982373476028442, 0.1197674348950386, 0.13220268487930298], [0.012153265066444874, 0.16048333048820496, 0.041802890598773956, 0.00796045083552599, 0.018259191885590553, 0.10963782668113708, 0.009757153689861298, 0.07023902982473373, 0.01128031499683857, 0.030125515535473824, 0.0943576917052269, 0.02206866256892681, 0.1321137398481369, 0.19507774710655212, 0.1400403380393982], [0.005033975467085838, 0.01824766956269741, 0.015512547455728054, 0.006673634983599186, 0.005676268134266138, 0.04240407794713974, 0.023996027186512947, 0.1038113459944725, 0.02023463323712349, 0.0080516142770648, 0.052543867379426956, 0.1188565045595169, 0.05977800861001015, 0.05786403268575668, 0.13343320786952972]], [[0.1022859737277031, 0.17571765184402466, 0.1416551172733307, 0.11749783158302307, 0.09062699973583221, 0.07838433235883713, 0.09344526380300522, 0.3238999545574188, 0.11371968686580658, 0.10100032389163971, 0.09302259236574173, 0.0389624647796154, 0.16697892546653748, 0.1419355273246765, 0.1285012662410736], [0.24028724431991577, 0.14351274073123932, 0.051798444241285324, 0.16382630169391632, 0.04226303845643997, 0.020662518218159676, 0.11527843773365021, 0.29321926832199097, 0.02218940667808056, 0.0878078043460846, 0.10535410046577454, 0.011972848325967789, 0.07032275199890137, 0.04715458303689957, 0.0739566907286644], [0.2799055874347687, 0.11053244769573212, 0.1936434954404831, 0.029654914513230324, 0.3583168685436249, 0.552708625793457, 0.34459343552589417, 0.33612802624702454, 0.17023301124572754, 0.19969996809959412, 0.18768110871315002, 0.6793866157531738, 0.791401207447052, 0.7463385462760925, 0.09094473719596863], [0.1572730988264084, 0.12077052146196365, 0.0489557608962059, 0.1575693041086197, 0.05669395253062248, 0.21311312913894653, 0.07387427985668182, 0.12006285786628723, 0.06427917629480362, 0.05486075580120087, 0.09722346067428589, 0.0672946497797966, 0.519307017326355, 0.15919242799282074, 0.07895061373710632], [0.056666091084480286, 0.13304737210273743, 0.023897293955087662, 0.04679059237241745, 0.045941345393657684, 0.32384783029556274, 0.44531556963920593, 0.533463716506958, 0.08588721603155136, 0.10118058323860168, 0.027683693915605545, 0.15270595252513885, 0.45412689447402954, 0.19033603370189667, 0.009601723402738571], [0.026866083964705467, 0.01856745034456253, 0.00889106560498476, 0.023431263864040375, 0.014423922635614872, 0.06721587479114532, 0.30465173721313477, 0.5084072351455688, 0.06748852878808975, 0.09416066110134125, 0.028160765767097473, 0.08301042765378952, 0.13479003310203552, 0.08470122516155243, 0.14269311726093292], [0.07283831387758255, 0.02513016201555729, 0.513066828250885, 0.1692790985107422, 0.12089971452951431, 0.05420007184147835, 0.019427694380283356, 0.038392528891563416, 0.31973040103912354, 0.29048243165016174, 0.4046151340007782, 0.10607112944126129, 0.0885496586561203, 0.07017665356397629, 0.1372956782579422], [0.27857187390327454, 0.3617483973503113, 0.2938012182712555, 0.22770966589450836, 0.06824903935194016, 0.055705904960632324, 0.2735913395881653, 0.10727421194314957, 0.15245027840137482, 0.12983311712741852, 0.2781352400779724, 0.010307536460459232, 0.09433942288160324, 0.07780664414167404, 0.13000918924808502], [0.09918209165334702, 0.053455647081136703, 0.645177960395813, 0.40746453404426575, 0.08205579966306686, 0.11053493618965149, 0.09200509637594223, 0.0519426129758358, 0.15867555141448975, 0.14363400638103485, 0.08945868164300919, 0.009240956045687199, 0.05626320466399193, 0.024817338213324547, 0.10628006607294083], [0.21029417216777802, 0.16975507140159607, 0.4791514277458191, 0.5080997347831726, 0.14877668023109436, 0.04306463524699211, 0.02225780300796032, 0.027854960411787033, 0.09907854348421097, 0.17716829478740692, 0.027767561376094818, 0.04010230675339699, 0.1045137569308281, 0.07445494085550308, 0.1349247545003891], [0.05318222567439079, 0.11344952136278152, 0.09562063962221146, 0.10165436565876007, 0.11442670226097107, 0.07387696951627731, 0.04448265954852104, 0.12469986081123352, 0.10296554863452911, 0.029610879719257355, 0.006854650564491749, 0.06481806933879852, 0.038151390850543976, 0.029200172051787376, 0.19021393358707428], [0.024841444566845894, 0.16249340772628784, 0.20643305778503418, 0.09402812272310257, 0.0850510448217392, 0.023708872497081757, 0.027868179604411125, 0.16653721034526825, 0.2575382590293884, 0.07176022976636887, 0.04638299718499184, 0.019721999764442444, 0.08340867608785629, 0.04306621477007866, 0.19255293905735016], [0.24242781102657318, 0.4547469913959503, 0.7904132008552551, 0.7443370819091797, 0.4808639585971832, 0.2640213668346405, 0.06001711264252663, 0.24681034684181213, 0.5675581097602844, 0.2725449204444885, 0.247804656624794, 0.029579274356365204, 0.19247104227542877, 0.09198179841041565, 0.18542104959487915], [0.10456986725330353, 0.23679938912391663, 0.29603201150894165, 0.2020668387413025, 0.14429134130477905, 0.4285147190093994, 0.3221139907836914, 0.592944860458374, 0.47945162653923035, 0.273953914642334, 0.2270997315645218, 0.05125115066766739, 0.15167200565338135, 0.14498752355575562, 0.03565559163689613], [0.005393329542130232, 0.004602347034960985, 0.02125353366136551, 0.017772456631064415, 0.029431374743580818, 0.06670433282852173, 0.07382840663194656, 0.05640842020511627, 0.2022721767425537, 0.02110537886619568, 0.006757265422493219, 0.0065305884927511215, 0.00012849831546191126, 0.0015581984771415591, 0.14312443137168884], [0.03693488612771034, 0.3099628686904907, 0.02452116832137108, 0.038606833666563034, 0.04603191837668419, 0.056979674845933914, 0.014461892656981945, 0.021202413365244865, 0.4372372031211853, 0.02073492854833603, 0.005594322457909584, 0.11605570465326309, 0.05724794790148735, 0.01605997234582901, 0.1753198802471161], [0.17487157881259918, 0.2829012870788574, 0.22657853364944458, 0.2227388322353363, 0.09278897941112518, 0.05522100254893303, 0.023270972073078156, 0.031554628163576126, 0.32194823026657104, 0.13948096334934235, 0.09803083539009094, 0.2809208631515503, 0.14969345927238464, 0.03018103539943695, 0.10283161699771881], [0.06711219251155853, 0.13971862196922302, 0.10573939234018326, 0.08062157034873962, 0.22173365950584412, 0.04757346957921982, 0.02002648264169693, 0.06195787340402603, 0.09553409367799759, 0.04351034387946129, 0.015184497460722923, 0.17841440439224243, 0.07658158242702484, 0.04646967723965645, 0.1461518555879593], [0.015694430097937584, 0.09081663191318512, 0.2731003761291504, 0.09780610352754593, 0.06437630951404572, 0.024092676118016243, 0.017730340361595154, 0.09997125715017319, 0.24317535758018494, 0.06615940481424332, 0.05322461575269699, 0.013002216815948486, 0.10308460891246796, 0.03947872668504715, 0.16966252028942108], [0.19514591991901398, 0.2590837776660919, 0.7111572027206421, 0.6245842576026917, 0.2279123067855835, 0.21324849128723145, 0.0465325303375721, 0.16129039227962494, 0.5552195906639099, 0.24888396263122559, 0.16995932161808014, 0.017819084227085114, 0.13601525127887726, 0.04923256114125252, 0.1924036145210266], [0.11466818302869797, 0.23749157786369324, 0.22078867256641388, 0.21260471642017365, 0.1054922342300415, 0.38443663716316223, 0.35735341906547546, 0.3432110548019409, 0.45766645669937134, 0.30316272377967834, 0.15794025361537933, 0.23222389817237854, 0.18522031605243683, 0.12369272857904434, 0.062224190682172775], [0.004928229842334986, 0.004764902405440807, 0.014567935839295387, 0.014073353260755539, 0.020878629758954048, 0.04901519790291786, 0.05124438554048538, 0.042454566806554794, 0.19801755249500275, 0.018003307282924652, 0.004736864008009434, 0.006620202213525772, 0.00011398878996260464, 0.001381832524202764, 0.13761556148529053], [0.013776288367807865, 0.25124475359916687, 0.00789756141602993, 0.00910337083041668, 0.005072988104075193, 0.015830766409635544, 0.005818341393023729, 0.011153762228786945, 0.14152461290359497, 0.008211367763578892, 0.002360414480790496, 0.06666377186775208, 0.057822320610284805, 0.009000283665955067, 0.13980405032634735], [0.25532495975494385, 0.3110601603984833, 0.28066542744636536, 0.29941898584365845, 0.09561395645141602, 0.06004221364855766, 0.0257351566106081, 0.04446575790643692, 0.3475395441055298, 0.2538500130176544, 0.25107017159461975, 0.4736424386501312, 0.29699820280075073, 0.06975124776363373, 0.11745814979076385], [0.06876020133495331, 0.07319146394729614, 0.08357107639312744, 0.06905727088451385, 0.010884120129048824, 0.012632370926439762, 0.04344229772686958, 0.06033884361386299, 0.05559740215539932, 0.048808641731739044, 0.06204793229699135, 0.017201891168951988, 0.028970519080758095, 0.021960163488984108, 0.13179059326648712]], [[0.1855485588312149, 0.4779467284679413, 0.0886944904923439, 0.027812138199806213, 0.051930978894233704, 0.20570456981658936, 0.13285183906555176, 0.12479114532470703, 0.03275279700756073, 0.13280591368675232, 0.10831113904714584, 0.13358037173748016, 0.31709861755371094, 0.18639257550239563, 0.0658930093050003], [0.04738391190767288, 0.17884546518325806, 0.030679181218147278, 0.09374479204416275, 0.015219364315271378, 0.004209337756037712, 0.011544613167643547, 0.014519347809255123, 0.0008998611010611057, 0.03714418038725853, 0.02808041125535965, 0.0015275280456990004, 0.014074422419071198, 0.01773718185722828, 0.02865048497915268], [0.4282352328300476, 0.07421883940696716, 0.37614062428474426, 0.6016114950180054, 0.16448479890823364, 0.10949403792619705, 0.43647968769073486, 0.17394804954528809, 0.2346193641424179, 0.5131813287734985, 0.6543169021606445, 0.06318124383687973, 0.059741634875535965, 0.08049911260604858, 0.08155221492052078], [0.04248558357357979, 0.005498564336448908, 0.015051363967359066, 0.021896474063396454, 0.031015703454613686, 0.23631463944911957, 0.5231030583381653, 0.1651564985513687, 0.010708797723054886, 0.0702022984623909, 0.015817642211914062, 0.01968570239841938, 0.2309122085571289, 0.11954572051763535, 0.04909561946988106], [0.019823409616947174, 0.02119731903076172, 0.0447932668030262, 0.04950243979692459, 0.11350910365581512, 0.3172611892223358, 0.1175147220492363, 0.16474604606628418, 0.025614900514483452, 0.11684545129537582, 0.027774598449468613, 0.03366768732666969, 0.1657668650150299, 0.20241110026836395, 0.02058284729719162], [0.024027986451983452, 0.07085671275854111, 0.014559593982994556, 0.003951122052967548, 0.5812088251113892, 0.07389754801988602, 0.10464153438806534, 0.06822511553764343, 0.1849648803472519, 0.02429678477346897, 0.014226456172764301, 0.2123226672410965, 0.1049809455871582, 0.17609325051307678, 0.13661964237689972], [0.20496347546577454, 0.09403666108846664, 0.02112487144768238, 0.025338320061564445, 0.008130905218422413, 0.1783977895975113, 0.3754851818084717, 0.0950397253036499, 0.0030220954213291407, 0.08205359429121017, 0.011042395606637001, 0.018588367849588394, 0.1888807862997055, 0.10302136838436127, 0.14473272860050201], [0.037373751401901245, 0.07382072508335114, 0.08205787092447281, 0.10832883417606354, 0.02859049290418625, 0.1663966327905655, 0.058918725699186325, 0.17053310573101044, 0.011018002405762672, 0.15213745832443237, 0.027154715731739998, 0.0019660431426018476, 0.22162862122058868, 0.11411792784929276, 0.08493959158658981], [0.015705576166510582, 0.016172299161553383, 0.006149389781057835, 0.0038101596292108297, 0.007736767642199993, 0.20371977984905243, 0.12438680231571198, 0.06649734079837799, 0.004926482681185007, 0.004153827205300331, 0.0012289183214306831, 0.003863752353936434, 0.0550994910299778, 0.04052891582250595, 0.36571574211120605], [0.008730506524443626, 0.002757954876869917, 0.0122150257229805, 0.006305738352239132, 0.004681416787207127, 0.06460410356521606, 0.008150112815201283, 0.010960009880363941, 0.004299533553421497, 0.004670997615903616, 0.0034528695978224277, 0.0024545302148908377, 0.005013267509639263, 0.008545692078769207, 0.23703089356422424], [0.09499987959861755, 0.010673395358026028, 0.007046178914606571, 0.020993953570723534, 0.010670008137822151, 0.07466354966163635, 0.06417079269886017, 0.023990478366613388, 0.17728924751281738, 0.15624059736728668, 0.004560643341392279, 0.010690598748624325, 0.03727814555168152, 0.017693333327770233, 0.14084658026695251], [0.688500165939331, 0.16286028921604156, 0.04583478718996048, 0.22473743557929993, 0.025797681882977486, 0.04771623760461807, 0.5437547564506531, 0.0642164871096611, 0.01443459838628769, 0.2519066631793976, 0.017869845032691956, 0.003991205245256424, 0.04630482196807861, 0.029587149620056152, 0.049375567585229874], [0.14772717654705048, 0.11627800017595291, 0.034884992986917496, 0.02596234902739525, 0.031621210277080536, 0.39286479353904724, 0.6627658009529114, 0.20747745037078857, 0.019052494317293167, 0.06071586161851883, 0.014515946619212627, 0.03545556217432022, 0.1622975915670395, 0.05619712546467781, 0.4560142755508423], [0.3253695070743561, 0.18678773939609528, 0.23196454346179962, 0.43925735354423523, 0.09974130243062973, 0.1577768325805664, 0.26045241951942444, 0.07323815673589706, 0.005399893503636122, 0.23951157927513123, 0.04431937262415886, 0.013187061063945293, 0.0749824121594429, 0.025474021211266518, 0.2768867611885071], [0.049311667680740356, 0.10222040861845016, 0.30249276757240295, 0.11109475791454315, 0.4333159327507019, 0.4476950168609619, 0.14919614791870117, 0.45436185598373413, 0.10977044701576233, 0.101465605199337, 0.28612539172172546, 0.15904487669467926, 0.4858849048614502, 0.19411928951740265, 0.08273273706436157], [0.08865676820278168, 0.0832996591925621, 0.0360012948513031, 0.026901112869381905, 0.0488949753344059, 0.5697077512741089, 0.2118675261735916, 0.21166029572486877, 0.009457184933125973, 0.042189937084913254, 0.010147118009626865, 0.027016732841730118, 0.1966082751750946, 0.18848717212677002, 0.17412608861923218], [0.09455566853284836, 0.047932155430316925, 0.06032469496130943, 0.027359262108802795, 0.004525639116764069, 0.19231697916984558, 0.29536089301109314, 0.10446369647979736, 0.004957688972353935, 0.22148354351520538, 0.017980555072426796, 0.016062501817941666, 0.01227590162307024, 0.007468203082680702, 0.14047065377235413], [0.18475790321826935, 0.03305341675877571, 0.022945405915379524, 0.02499788999557495, 0.016275716945528984, 0.44049808382987976, 0.3255404233932495, 0.03656867519021034, 0.008760510943830013, 0.28132569789886475, 0.00872495025396347, 0.02103549800813198, 0.09103824943304062, 0.045535117387771606, 0.1431308537721634], [0.5226730704307556, 0.08511564135551453, 0.13128292560577393, 0.22977954149246216, 0.025636736303567886, 0.14430683851242065, 0.697600245475769, 0.08303582668304443, 0.03326253592967987, 0.30183717608451843, 0.04944504052400589, 0.004384536296129227, 0.07144975662231445, 0.05258011445403099, 0.06879302859306335], [0.06703877449035645, 0.049393996596336365, 0.041539933532476425, 0.021373772993683815, 0.02868128940463066, 0.32991066575050354, 0.488584041595459, 0.0702073872089386, 0.0075523643754422665, 0.038572411984205246, 0.012813442386686802, 0.04136957228183746, 0.06929102540016174, 0.03757195174694061, 0.23515936732292175], [0.15618596971035004, 0.12941822409629822, 0.2654253840446472, 0.28590527176856995, 0.31243884563446045, 0.1085575670003891, 0.15852880477905273, 0.026613548398017883, 0.004155577160418034, 0.15324708819389343, 0.037679530680179596, 0.09416285902261734, 0.02134908176958561, 0.010629331693053246, 0.17846201360225677], [0.058257974684238434, 0.12017454952001572, 0.32657214999198914, 0.12284700572490692, 0.5568311810493469, 0.41536086797714233, 0.16300946474075317, 0.49100223183631897, 0.15462136268615723, 0.11520260572433472, 0.260068416595459, 0.28476831316947937, 0.501883327960968, 0.21151991188526154, 0.09330709278583527], [0.04007576033473015, 0.04011448100209236, 0.02015572600066662, 0.006723308004438877, 0.01584162376821041, 0.6745935082435608, 0.14270515739917755, 0.05812964215874672, 0.0018657244509086013, 0.018765496090054512, 0.004551106132566929, 0.05217724293470383, 0.21886952221393585, 0.13090433180332184, 0.13149680197238922], [0.051524627953767776, 0.037071868777275085, 0.09267362952232361, 0.03285788744688034, 0.006808253470808268, 0.2584725618362427, 0.21142001450061798, 0.06556515395641327, 0.003410812932997942, 0.18829914927482605, 0.028329605236649513, 0.02864006720483303, 0.014232979156076908, 0.014326054602861404, 0.12804241478443146], [0.13503411412239075, 0.06798373907804489, 0.08072269707918167, 0.04104887321591377, 0.027653640136122704, 0.5933560132980347, 0.15723249316215515, 0.044575583189725876, 0.017590617761015892, 0.04771400988101959, 0.07117579132318497, 0.10345834493637085, 0.10624422132968903, 0.027206260710954666, 0.1271171271800995]], [[0.04247138649225235, 0.01728098653256893, 0.06617120653390884, 0.009399485774338245, 0.0730140432715416, 0.14221039414405823, 0.11889991164207458, 0.10651882737874985, 0.10687308758497238, 0.0351867638528347, 0.09164245426654816, 0.06160420924425125, 0.04699656739830971, 0.14884592592716217, 0.20088525116443634], [0.35919252038002014, 0.017007382586598396, 0.3711448311805725, 0.05260182172060013, 0.23237934708595276, 0.17189942300319672, 0.06846722215414047, 0.25480321049690247, 0.4269619286060333, 0.141769677400589, 0.19745108485221863, 0.3101239502429962, 0.12419883906841278, 0.061588384211063385, 0.3489930033683777], [0.1570073962211609, 0.6818748116493225, 0.08056136965751648, 0.04282544180750847, 0.09609510749578476, 0.21831035614013672, 0.11452964693307877, 0.4344905614852905, 0.09872471541166306, 0.06769980490207672, 0.054214250296354294, 0.015440859831869602, 0.04572026804089546, 0.05267196521162987, 0.06955287605524063], [0.1362180858850479, 0.01786869764328003, 0.3548091650009155, 0.13650378584861755, 0.07479218393564224, 0.08773932605981827, 0.007214170414954424, 0.020996512845158577, 0.09793394804000854, 0.26323461532592773, 0.31718939542770386, 0.004400049336254597, 0.01118874829262495, 0.016452480107545853, 0.0059462906792759895], [0.13787487149238586, 0.02221597172319889, 0.46063661575317383, 0.42787930369377136, 0.16819633543491364, 0.30927538871765137, 0.10940644890069962, 0.14741046726703644, 0.3708270192146301, 0.08424455672502518, 0.34931957721710205, 0.015041538514196873, 0.02219252847135067, 0.0637117251753807, 0.001682900357991457], [0.09526984393596649, 0.013222168199717999, 0.9035038352012634, 0.8715099692344666, 0.20107677578926086, 0.7829492688179016, 0.28305909037590027, 0.141366645693779, 0.15355023741722107, 0.11376345157623291, 0.804192841053009, 0.012117957696318626, 0.3312073349952698, 0.4514775276184082, 0.016239164397120476], [0.34537556767463684, 0.010514522902667522, 0.04824088513851166, 0.12771852314472198, 0.005308120045810938, 0.17857761681079865, 0.2263273000717163, 0.26537755131721497, 0.3297313451766968, 0.3104889690876007, 0.11654951423406601, 0.08535956591367722, 0.02363554947078228, 0.031254567205905914, 0.10634612292051315], [0.2808375656604767, 0.07436379790306091, 0.11235158890485764, 0.07017786800861359, 0.034851111471652985, 0.01653558947145939, 0.025893066078424454, 0.02911091037094593, 0.23654304444789886, 0.2646749019622803, 0.20617236196994781, 0.25081631541252136, 0.013157923705875874, 0.04621773213148117, 0.2354249358177185], [0.5487799644470215, 0.03728892654180527, 0.05227963626384735, 0.18957917392253876, 0.014632479287683964, 0.19499987363815308, 0.29326584935188293, 0.6778355836868286, 0.45779454708099365, 0.33408117294311523, 0.11356081813573837, 0.01941866986453533, 0.010207045823335648, 0.013884961605072021, 0.09069465100765228], [0.09531711786985397, 0.03595840558409691, 0.017401238903403282, 0.061305541545152664, 0.1627957820892334, 0.050434935837984085, 0.05516263470053673, 0.23917846381664276, 0.3637218177318573, 0.09729932248592377, 0.03891580551862717, 0.19205324351787567, 0.041229162365198135, 0.046046942472457886, 0.03756402060389519], [0.08811857551336288, 0.010963470675051212, 0.2593647241592407, 0.26678594946861267, 0.42746680974960327, 0.41530901193618774, 0.07491520792245865, 0.18910719454288483, 0.04928334057331085, 0.04599721357226372, 0.4843277335166931, 0.07717985659837723, 0.09353034198284149, 0.07800954580307007, 0.08156391978263855], [0.04596662148833275, 0.005170373246073723, 0.12165658175945282, 0.15079215168952942, 0.04554709792137146, 0.08856093138456345, 0.04626012593507767, 0.020681705325841904, 0.17637456953525543, 0.26189061999320984, 0.13335715234279633, 0.046832337975502014, 0.018430203199386597, 0.01621258072555065, 0.10917440801858902], [0.5138411521911621, 0.0654044821858406, 0.1128465011715889, 0.18054738640785217, 0.038166921585798264, 0.13531430065631866, 0.12295213341712952, 0.28065726161003113, 0.2875981628894806, 0.5909985899925232, 0.601227879524231, 0.03077608533203602, 0.04096299037337303, 0.09236451238393784, 0.1495288461446762], [0.07072688639163971, 0.012152088806033134, 0.021357353776693344, 0.04663744568824768, 0.020319821313023567, 0.05489102751016617, 0.07223928719758987, 0.23148301243782043, 0.18188072741031647, 0.10590049624443054, 0.10450157523155212, 0.03876996785402298, 0.13536545634269714, 0.10362161695957184, 0.12556865811347961], [0.07390952110290527, 0.023819932714104652, 0.4992673993110657, 0.293674498796463, 0.18016116321086884, 0.3294305205345154, 0.5326097011566162, 0.20817913115024567, 0.231731578707695, 0.17336609959602356, 0.4696378707885742, 0.3560185134410858, 0.5055418610572815, 0.687153697013855, 0.06569264829158783], [0.19887569546699524, 0.009285598993301392, 0.17495201528072357, 0.1799449920654297, 0.0410592183470726, 0.0050115324556827545, 0.025978662073612213, 0.011312133632600307, 0.04069671407341957, 0.23767657577991486, 0.3294059634208679, 0.09899688512086868, 0.03285939246416092, 0.08387716114521027, 0.04885585233569145], [0.054675761610269547, 0.04458622261881828, 0.0536046139895916, 0.016943499445915222, 0.02146792784333229, 0.1686052531003952, 0.036354243755340576, 0.08614800870418549, 0.1611979901790619, 0.170720174908638, 0.163726344704628, 0.09202460944652557, 0.016866492107510567, 0.019021833315491676, 0.13082824647426605], [0.254617303609848, 0.09600356966257095, 0.5283652544021606, 0.35948434472084045, 0.11690203100442886, 0.22449535131454468, 0.07030754536390305, 0.14074397087097168, 0.11056768894195557, 0.2017645388841629, 0.5897989273071289, 0.032950446009635925, 0.0850306898355484, 0.16881772875785828, 0.07667817175388336], [0.06611059606075287, 0.009380446746945381, 0.1600489318370819, 0.18714633584022522, 0.028496628627181053, 0.28509950637817383, 0.06793918460607529, 0.036412376910448074, 0.3864555358886719, 0.38031718134880066, 0.19321800768375397, 0.03279240429401398, 0.024823389947414398, 0.02684853971004486, 0.10572600364685059], [0.5806823372840881, 0.09046274423599243, 0.1468239277601242, 0.2587219774723053, 0.018666794523596764, 0.17986845970153809, 0.1758078932762146, 0.26734092831611633, 0.30597683787345886, 0.6407824158668518, 0.6427304148674011, 0.011203133501112461, 0.017842967063188553, 0.05609212443232536, 0.1528221219778061], [0.09578646719455719, 0.04883359372615814, 0.014442636631429195, 0.07719788700342178, 0.013871591538190842, 0.24272511899471283, 0.11848346889019012, 0.48695430159568787, 0.10090471804141998, 0.15632015466690063, 0.12246286869049072, 0.056596189737319946, 0.051980338990688324, 0.03806659206748009, 0.1369783878326416], [0.12923087179660797, 0.04506811499595642, 0.5631698966026306, 0.4945719838142395, 0.16776354610919952, 0.4656532406806946, 0.6344242095947266, 0.28209388256073, 0.297488808631897, 0.3520771265029907, 0.6463941931724548, 0.3803158104419708, 0.4924411177635193, 0.6891878843307495, 0.08469904214143753], [0.3177553117275238, 0.027823492884635925, 0.11541304737329483, 0.1464630663394928, 0.010460668243467808, 0.028609508648514748, 0.14352867007255554, 0.043905869126319885, 0.18215790390968323, 0.6030426025390625, 0.38763877749443054, 0.1293274313211441, 0.07180552184581757, 0.1464845985174179, 0.10971048474311829], [0.03459807112812996, 0.05000016465783119, 0.02839210256934166, 0.008521324954926968, 0.009519261308014393, 0.12168280780315399, 0.03372196480631828, 0.07665831595659256, 0.21765880286693573, 0.11945746093988419, 0.0821232944726944, 0.058310747146606445, 0.011853469535708427, 0.02031784877181053, 0.13586042821407318], [0.02964477799832821, 0.1353258490562439, 0.017653465270996094, 0.011115004308521748, 0.008141545578837395, 0.05911250412464142, 0.01831989735364914, 0.05519499629735947, 0.03573962301015854, 0.02204814739525318, 0.05097896233201027, 0.08341387659311295, 0.08060181885957718, 0.10490117967128754, 0.13247323036193848]], [[0.20067201554775238, 0.150595024228096, 0.3375815153121948, 0.5753223896026611, 0.03983612731099129, 0.13901081681251526, 0.37267425656318665, 0.07406412810087204, 0.07071352750062943, 0.22996902465820312, 0.35784539580345154, 0.0401473231613636, 0.03251379355788231, 0.07572956383228302, 0.005637211725115776], [0.055522263050079346, 0.0030253075528889894, 0.054468654096126556, 0.18383808434009552, 0.2751407325267792, 0.06163792684674263, 0.5092534422874451, 0.21577699482440948, 0.23691882193088531, 0.32801976799964905, 0.29786956310272217, 0.4967685043811798, 0.6341143250465393, 0.7677603363990784, 0.40264371037483215], [0.0005822544917464256, 0.0004425827646628022, 0.0014265297213569283, 0.0006841197027824819, 0.03406556695699692, 0.0010687633184716105, 0.0028485425282269716, 0.020860498771071434, 0.05133597180247307, 0.002158694202080369, 0.002441320102661848, 0.037159714847803116, 0.005256796721369028, 0.008102376013994217, 0.16207638382911682], [0.20224374532699585, 0.7376267313957214, 0.004014236852526665, 0.0103965038433671, 0.07275543361902237, 0.03262623772025108, 0.04577071964740753, 0.5017040371894836, 0.12205435335636139, 0.19255708158016205, 0.006990006659179926, 0.028381695970892906, 0.046785227954387665, 0.15206293761730194, 0.330488920211792], [0.3634231686592102, 0.404717355966568, 0.00689590023830533, 0.04770800471305847, 0.0251657422631979, 0.0006883289897814393, 0.02071242779493332, 0.019072405993938446, 0.15776626765727997, 0.3694642186164856, 0.036826737225055695, 0.23951902985572815, 0.011015082709491253, 0.04999716952443123, 0.2037181556224823], [0.8270207643508911, 0.8942698836326599, 0.020243747159838676, 0.04263966530561447, 0.09284591674804688, 0.054453812539577484, 0.21418678760528564, 0.23612302541732788, 0.5479635000228882, 0.7225908041000366, 0.08608872443437576, 0.5934221148490906, 0.30024465918540955, 0.22648638486862183, 0.12622572481632233], [0.043734412640333176, 0.7137998342514038, 0.1370490938425064, 0.045488547533750534, 0.06789389997720718, 0.49671053886413574, 0.1280447244644165, 0.4211912155151367, 0.03652801364660263, 0.041476957499980927, 0.08040425181388855, 0.19641457498073578, 0.603863537311554, 0.49263066053390503, 0.07636027038097382], [0.017375759780406952, 0.012506993487477303, 0.020720014348626137, 0.011049210093915462, 0.03743210807442665, 0.0072485157288610935, 0.03524084761738777, 0.005443913396447897, 0.24646395444869995, 0.048276107758283615, 0.03640883043408394, 0.507624089717865, 0.15355341136455536, 0.1730290949344635, 0.2644885182380676], [0.09840062260627747, 0.7509858012199402, 0.13933908939361572, 0.13482652604579926, 0.18154919147491455, 0.32397931814193726, 0.23646889626979828, 0.11657525599002838, 0.03430478647351265, 0.1277371644973755, 0.15700362622737885, 0.24829043447971344, 0.7591869831085205, 0.7825927138328552, 0.06869770586490631], [0.22806629538536072, 0.6706615686416626, 0.2560598850250244, 0.17412559688091278, 0.6327939033508301, 0.04699348285794258, 0.058767881244421005, 0.11556732654571533, 0.09056147933006287, 0.3648419678211212, 0.5388886332511902, 0.261055588722229, 0.6016876697540283, 0.7496042847633362, 0.0894755870103836], [0.5419997572898865, 0.6956567168235779, 0.044124722480773926, 0.12586495280265808, 0.048711128532886505, 0.11729516834020615, 0.4073715806007385, 0.43757542967796326, 0.032695479691028595, 0.4824156165122986, 0.05927032604813576, 0.04766178876161575, 0.25393223762512207, 0.23675066232681274, 0.10572775453329086], [0.09369882941246033, 0.5731168985366821, 0.13611510396003723, 0.13756731152534485, 0.024227088317275047, 0.31910547614097595, 0.16772453486919403, 0.1680929958820343, 0.09319504350423813, 0.0998181626200676, 0.22465890645980835, 0.00899507012218237, 0.16640731692314148, 0.25350457429885864, 0.09016240388154984], [0.02838694490492344, 0.30040091276168823, 0.005878766532987356, 0.015430719591677189, 0.017050068825483322, 0.06605669111013412, 0.12745192646980286, 0.23377051949501038, 0.08052214235067368, 0.033177152276039124, 0.06731567531824112, 0.07575374841690063, 0.18187224864959717, 0.570769727230072, 0.04572387412190437], [0.2655380666255951, 0.4107033908367157, 0.04865417629480362, 0.08488347381353378, 0.04310445114970207, 0.10849997401237488, 0.15643075108528137, 0.04165918007493019, 0.12898734211921692, 0.11095981299877167, 0.23520684242248535, 0.10632039606571198, 0.055878568440675735, 0.24558725953102112, 0.17682571709156036], [0.8565200567245483, 0.8639481067657471, 0.0803997814655304, 0.36449819803237915, 0.17448320984840393, 0.12402030825614929, 0.13765643537044525, 0.2065785825252533, 0.18182852864265442, 0.6806339025497437, 0.1919344812631607, 0.19068314135074615, 0.004361266735941172, 0.01490570418536663, 0.13936595618724823], [0.22751423716545105, 0.21127405762672424, 0.005130667705088854, 0.028237944468855858, 0.06646221876144409, 0.045109983533620834, 0.478432834148407, 0.6443154215812683, 0.140235036611557, 0.0980456992983818, 0.006476161070168018, 0.038696710020303726, 0.25798937678337097, 0.10561345517635345, 0.16755780577659607], [0.3886019289493561, 0.36600789427757263, 0.07069597393274307, 0.12792876362800598, 0.0629734918475151, 0.0820467472076416, 0.2973020672798157, 0.27475541830062866, 0.019707435742020607, 0.2982620298862457, 0.24423947930335999, 0.05686682090163231, 0.23438367247581482, 0.3444555997848511, 0.09858046472072601], [0.31350865960121155, 0.5118260383605957, 0.01775331422686577, 0.060602445155382156, 0.015971101820468903, 0.03445184975862503, 0.4316053092479706, 0.4819965064525604, 0.008238772861659527, 0.27349013090133667, 0.02135261707007885, 0.006705985404551029, 0.06119696795940399, 0.05213680863380432, 0.13011163473129272], [0.11128952354192734, 0.6662537455558777, 0.10913366079330444, 0.08027850091457367, 0.016604425385594368, 0.1904260814189911, 0.09001538157463074, 0.12034764140844345, 0.032395973801612854, 0.07767382264137268, 0.13288450241088867, 0.0038343279156833887, 0.15461067855358124, 0.13092683255672455, 0.1198263093829155], [0.045069050043821335, 0.5156355500221252, 0.014353718608617783, 0.026371080428361893, 0.027669712901115417, 0.08119883388280869, 0.2510265111923218, 0.45373910665512085, 0.0644708126783371, 0.03346102684736252, 0.06456929445266724, 0.036929432302713394, 0.1635800451040268, 0.4964689314365387, 0.12627021968364716], [0.15574656426906586, 0.22756966948509216, 0.016156630590558052, 0.0469389408826828, 0.01719032973051071, 0.01580459624528885, 0.07493647187948227, 0.02412206307053566, 0.018628407269716263, 0.03879624605178833, 0.03891688585281372, 0.03379734605550766, 0.008454171009361744, 0.03055991418659687, 0.1906210333108902], [0.7930518984794617, 0.8248118162155151, 0.03787774592638016, 0.2306395173072815, 0.10945193469524384, 0.048738475888967514, 0.07385316491127014, 0.1171715259552002, 0.09199279546737671, 0.5013920664787292, 0.07074998319149017, 0.14583703875541687, 0.0018764830892905593, 0.00646476075053215, 0.13562877476215363], [0.139163076877594, 0.17112046480178833, 0.0021531793754547834, 0.0053843106143176556, 0.013183848932385445, 0.014547600410878658, 0.39682450890541077, 0.7216413021087646, 0.013683686964213848, 0.038195278495550156, 0.0014429710572585464, 0.0075409854762256145, 0.06976743042469025, 0.016425929963588715, 0.1257757991552353], [0.37428542971611023, 0.3404470980167389, 0.07186836749315262, 0.11062464118003845, 0.09624961018562317, 0.06910651177167892, 0.26704323291778564, 0.35990291833877563, 0.016681469976902008, 0.31615501642227173, 0.23382727801799774, 0.051282789558172226, 0.1643712818622589, 0.24623094499111176, 0.1059461385011673], [0.2896858751773834, 0.2041676938533783, 0.0844137892127037, 0.26597079634666443, 0.007990201003849506, 0.057605594396591187, 0.37075188755989075, 0.33039090037345886, 0.04668770357966423, 0.6492098569869995, 0.34850311279296875, 0.12703292071819305, 0.22453922033309937, 0.2423134297132492, 0.11649563163518906]]], [[[0.12698857486248016, 0.15100647509098053, 0.08910781890153885, 0.09401589632034302, 0.14288602769374847, 0.07712502032518387, 0.1496707946062088, 0.23784373700618744, 0.024656152352690697, 0.07261883467435837, 0.11269068717956543, 0.10889188945293427, 0.23155105113983154, 0.10633593797683716, 0.14060717821121216], [0.33520859479904175, 0.17541100084781647, 0.043081097304821014, 0.07071122527122498, 0.031066332012414932, 0.05302952229976654, 0.13712948560714722, 0.0819549486041069, 0.010218805633485317, 0.05350261554121971, 0.03376028686761856, 0.016291575506329536, 0.04384060204029083, 0.016914406791329384, 0.06937505304813385], [0.2972787618637085, 0.14542943239212036, 0.2801832854747772, 0.6946116089820862, 0.3750338852405548, 0.09368664771318436, 0.11078806221485138, 0.124379463493824, 0.028408339247107506, 0.3442523181438446, 0.15075638890266418, 0.08511755615472794, 0.32891392707824707, 0.12337944656610489, 0.05913665145635605], [0.06821048259735107, 0.007578656077384949, 0.033511072397232056, 0.039627932012081146, 0.016393400728702545, 0.20925503969192505, 0.15704192221164703, 0.024064799770712852, 0.005696912761777639, 0.01698312722146511, 0.15042142570018768, 0.0017041407991200686, 0.016995420679450035, 0.005758653394877911, 0.015053601935505867], [0.05268644914031029, 0.018480738624930382, 0.006206580437719822, 0.01908770017325878, 0.009213676676154137, 0.012446015141904354, 0.2606332302093506, 0.15275397896766663, 0.004711512941867113, 0.01064901053905487, 0.00940486416220665, 0.00429189158603549, 0.014810611493885517, 0.012880465015769005, 0.15466143190860748], [0.017502065747976303, 0.09008979797363281, 0.045234303921461105, 0.04321402683854103, 0.014162504114210606, 0.2841097414493561, 0.10382679849863052, 0.4497845470905304, 0.042821191251277924, 0.03918898105621338, 0.06416238099336624, 0.04602029174566269, 0.2197093665599823, 0.07547488063573837, 0.13285692036151886], [0.02909473329782486, 0.05293780937790871, 0.025932423770427704, 0.061369478702545166, 0.12287095934152603, 0.12207728624343872, 0.20267462730407715, 0.3647293746471405, 0.036313559859991074, 0.028358493000268936, 0.054471470415592194, 0.007501897402107716, 0.10796680301427841, 0.05851392075419426, 0.12157665193080902], [0.02889016829431057, 0.05256107077002525, 0.05110660940408707, 0.09513585269451141, 0.049980901181697845, 0.07343146204948425, 0.21190620958805084, 0.10279127210378647, 0.1787082403898239, 0.022944355383515358, 0.03947293758392334, 0.008258121088147163, 0.09723227471113205, 0.030062679201364517, 0.14898137748241425], [0.027054987847805023, 0.06796294450759888, 0.02347770519554615, 0.04540639370679855, 0.13579830527305603, 0.1935206949710846, 0.09281998127698898, 0.22921815514564514, 0.012567882426083088, 0.02752627059817314, 0.05939676612615585, 0.00633750855922699, 0.24427738785743713, 0.10302533209323883, 0.18246731162071228], [0.13923436403274536, 0.07431720942258835, 0.06541924923658371, 0.14132679998874664, 0.10506866127252579, 0.06156519800424576, 0.21440355479717255, 0.06509862840175629, 0.02759510651230812, 0.10144857317209244, 0.13265900313854218, 0.048845868557691574, 0.16166719794273376, 0.1116088330745697, 0.15105699002742767], [0.14352908730506897, 0.10288456827402115, 0.05261845886707306, 0.1541282832622528, 0.05661991983652115, 0.12065587192773819, 0.10697692632675171, 0.15951323509216309, 0.1055477038025856, 0.14385449886322021, 0.23090383410453796, 0.08539394289255142, 0.09938428550958633, 0.08322764188051224, 0.11896289885044098], [0.24387870728969574, 0.11191204935312271, 0.06428070366382599, 0.3038298189640045, 0.14750736951828003, 0.1200045570731163, 0.46686112880706787, 0.3116493225097656, 0.10273779183626175, 0.10795925557613373, 0.1416371762752533, 0.09460661560297012, 0.27618303894996643, 0.09149192273616791, 0.10828596353530884], [0.1039203479886055, 0.05052376165986061, 0.051659513264894485, 0.18036356568336487, 0.11265069991350174, 0.047071922570466995, 0.3453211784362793, 0.29340654611587524, 0.007079527713358402, 0.06730296462774277, 0.08055143058300018, 0.02563900128006935, 0.19650228321552277, 0.060815099626779556, 0.13184599578380585], [0.1947154402732849, 0.003113611601293087, 0.028957238420844078, 0.026910793036222458, 0.017121652141213417, 0.08169777691364288, 0.32467299699783325, 0.05661681666970253, 0.007502032909542322, 0.02869880571961403, 0.020577264949679375, 0.0070375413633883, 0.16551434993743896, 0.06083058565855026, 0.06852211803197861], [0.018467016518115997, 0.004791167099028826, 0.015553582459688187, 0.021664531901478767, 0.025298617780208588, 0.1971224695444107, 0.13395515084266663, 0.1881190687417984, 0.05309745669364929, 0.018728721886873245, 0.018886514008045197, 0.023248562589287758, 0.008927382528781891, 0.03253133222460747, 0.130488321185112], [0.4018593430519104, 0.09619066119194031, 0.047895513474941254, 0.0887020081281662, 0.04670756310224533, 0.17605426907539368, 0.21604543924331665, 0.1403813511133194, 0.0010993692558258772, 0.07762767374515533, 0.0958188846707344, 0.1024225577712059, 0.06565871089696884, 0.04857100546360016, 0.1717240959405899], [0.31909966468811035, 0.26355716586112976, 0.16833621263504028, 0.334572434425354, 0.18670302629470825, 0.11206400394439697, 0.46585598587989807, 0.15377958118915558, 0.014857469126582146, 0.07049962878227234, 0.1590365469455719, 0.09933225810527802, 0.23580892384052277, 0.09940709918737411, 0.11795931309461594], [0.3361136317253113, 0.18450267612934113, 0.10482683777809143, 0.3672127425670624, 0.09347432106733322, 0.06302808225154877, 0.17493662238121033, 0.11965186893939972, 0.06742112338542938, 0.13331438601016998, 0.26999813318252563, 0.03264465183019638, 0.07908355444669724, 0.09376725554466248, 0.11511774361133575], [0.271436870098114, 0.16103556752204895, 0.09723401814699173, 0.3494490087032318, 0.1582973301410675, 0.11393263936042786, 0.41371721029281616, 0.2938876152038574, 0.08068472146987915, 0.08301044255495071, 0.11968915909528732, 0.07779402285814285, 0.24559125304222107, 0.07589462399482727, 0.1087639182806015], [0.1091129332780838, 0.08970999717712402, 0.08557470142841339, 0.23009367287158966, 0.13180004060268402, 0.0638015940785408, 0.31095248460769653, 0.2814267873764038, 0.0075759077444672585, 0.039292845875024796, 0.06780961900949478, 0.013560868799686432, 0.15987654030323029, 0.04180291295051575, 0.12740370631217957], [0.4568881392478943, 0.01152532733976841, 0.12744615972042084, 0.16633041203022003, 0.05682089552283287, 0.22013583779335022, 0.46718865633010864, 0.06831676512956619, 0.011846139095723629, 0.051503561437129974, 0.07631707936525345, 0.017341753467917442, 0.16032609343528748, 0.06682911515235901, 0.06364742666482925], [0.0270079392939806, 0.003701634705066681, 0.024473953992128372, 0.035727839916944504, 0.031186459586024284, 0.22590965032577515, 0.1764952838420868, 0.1725662350654602, 0.06108492240309715, 0.017804577946662903, 0.01644762232899666, 0.018474329262971878, 0.0059660994447767735, 0.026993868872523308, 0.12890712916851044], [0.32686647772789, 0.10561588406562805, 0.10599718242883682, 0.08397059142589569, 0.05158340185880661, 0.22573474049568176, 0.19403943419456482, 0.08219113945960999, 0.0007591660832986236, 0.028280239552259445, 0.06139420345425606, 0.03943438082933426, 0.025857241824269295, 0.027251310646533966, 0.1435350626707077], [0.21139562129974365, 0.21867576241493225, 0.17973701655864716, 0.29884445667266846, 0.19560806453227997, 0.11132223159074783, 0.28179141879081726, 0.10507592558860779, 0.014165982604026794, 0.04481332749128342, 0.1297360062599182, 0.07738039642572403, 0.2323194295167923, 0.09134778380393982, 0.12234959006309509], [0.2484172284603119, 0.2714419662952423, 0.13623963296413422, 0.33317360281944275, 0.14056812226772308, 0.16453251242637634, 0.23482279479503632, 0.2797185182571411, 0.08398787677288055, 0.13855448365211487, 0.19988903403282166, 0.12159004807472229, 0.21263501048088074, 0.1342880129814148, 0.11613592505455017]], [[0.1659475415945053, 0.1821746528148651, 0.2680368423461914, 0.3257308900356293, 0.2135642170906067, 0.10952500998973846, 0.23729652166366577, 0.15246635675430298, 0.09328519552946091, 0.22413431107997894, 0.22322525084018707, 0.11237151175737381, 0.18681256473064423, 0.1572018712759018, 0.06837792694568634], [0.14290380477905273, 0.026570750400424004, 0.14845344424247742, 0.26635152101516724, 0.12476544827222824, 0.1522083431482315, 0.287058562040329, 0.16522644460201263, 0.21008911728858948, 0.3761942982673645, 0.12840349972248077, 0.0757022351026535, 0.39944273233413696, 0.379029244184494, 0.1911974847316742], [0.00885845348238945, 0.005625984165817499, 0.0020030708983540535, 0.005766861606389284, 0.001782223698683083, 0.004346099682152271, 0.014438317157328129, 0.010037342086434364, 0.0175970196723938, 0.0067982920445501804, 0.003056151093915105, 0.005088370759040117, 0.0035549686290323734, 0.002117584692314267, 0.17935973405838013], [0.04871530085802078, 0.2322341799736023, 0.043161727488040924, 0.046935759484767914, 0.04166096821427345, 0.048159919679164886, 0.2838554382324219, 0.5679410696029663, 0.17445935308933258, 0.05776107683777809, 0.14550535380840302, 0.04300517588853836, 0.2332015484571457, 0.28196635842323303, 0.4675023853778839], [0.03277377411723137, 0.28776609897613525, 0.0018310850718989968, 0.006392122711986303, 0.0034063432831317186, 0.0006021481240168214, 0.02006486989557743, 0.09552518278360367, 0.02804744802415371, 0.060428690165281296, 0.004742977675050497, 0.018782831728458405, 0.016696294769644737, 0.023774143308401108, 0.16262513399124146], [0.006045958958566189, 0.0958699956536293, 0.007954242639243603, 0.011606856249272823, 0.004544504452496767, 0.010406642220914364, 0.011899203062057495, 0.07300186902284622, 0.002370428293943405, 0.012239865958690643, 0.020374998450279236, 0.012496876530349255, 0.024265890941023827, 0.0274967048317194, 0.1423870474100113], [0.008809137158095837, 0.13565093278884888, 0.03191651031374931, 0.0483417883515358, 0.028707973659038544, 0.039296794682741165, 0.018359076231718063, 0.07145766168832779, 0.13921810686588287, 0.01646633818745613, 0.06145479157567024, 0.028490308672189713, 0.056069642305374146, 0.13838331401348114, 0.19134177267551422], [0.39272594451904297, 0.39728477597236633, 0.32111606001853943, 0.41796234250068665, 0.15293559432029724, 0.04586965963244438, 0.16940170526504517, 0.022719532251358032, 0.14239482581615448, 0.5121501088142395, 0.19016578793525696, 0.06530822068452835, 0.29211705923080444, 0.14742477238178253, 0.11553633958101273], [0.009060109965503216, 0.08736205101013184, 0.03623565658926964, 0.046393588185310364, 0.04293924570083618, 0.049119193106889725, 0.018734706565737724, 0.10957584530115128, 0.04821338504552841, 0.02008068934082985, 0.029284991323947906, 0.015971768647432327, 0.05779576674103737, 0.21830672025680542, 0.21264111995697021], [0.02833615615963936, 0.24966742098331451, 0.06237170845270157, 0.03993965685367584, 0.10454770177602768, 0.019859671592712402, 0.03772445023059845, 0.19178973138332367, 0.012827831320464611, 0.03533304110169411, 0.024230163544416428, 0.054630037397146225, 0.032379381358623505, 0.08906079828739166, 0.17152637243270874], [0.015255320817232132, 0.21888743340969086, 0.1253896951675415, 0.08362822234630585, 0.12500159442424774, 0.02890017069876194, 0.03405824303627014, 0.07477163523435593, 0.0229325033724308, 0.01863025315105915, 0.044950928539037704, 0.0560457706451416, 0.04699615016579628, 0.08650227636098862, 0.1548503190279007], [0.011826024390757084, 0.10608652234077454, 0.04723645746707916, 0.057715099304914474, 0.03395959734916687, 0.028910892084240913, 0.011586843058466911, 0.050380002707242966, 0.030421555042266846, 0.00583301018923521, 0.015118762850761414, 0.014350258745253086, 0.01606619358062744, 0.025515934452414513, 0.18496018648147583], [0.015032858587801456, 0.5077551603317261, 0.07541441917419434, 0.08020945638418198, 0.10545077919960022, 0.2137133628129959, 0.01040775515139103, 0.09528981149196625, 0.09038985520601273, 0.012094871141016483, 0.025733938440680504, 0.06706724315881729, 0.03145073354244232, 0.09538157284259796, 0.34148263931274414], [0.32250380516052246, 0.7984310388565063, 0.3962976634502411, 0.40014326572418213, 0.3554738759994507, 0.47898975014686584, 0.10853014886379242, 0.20243746042251587, 0.127571240067482, 0.2699570655822754, 0.16473528742790222, 0.08001074939966202, 0.03713205084204674, 0.14643853902816772, 0.4229389429092407], [0.023898553103208542, 0.03448064997792244, 0.007101188413798809, 0.020377272740006447, 0.09085186570882797, 0.008504875935614109, 0.01689869724214077, 0.021393392235040665, 0.03013733960688114, 0.004040753003209829, 0.000672544410917908, 0.0007860396872274578, 0.0003324192948639393, 0.0003073772240895778, 0.13160185515880585], [0.025859396904706955, 0.29733914136886597, 0.09033425897359848, 0.06196272000670433, 0.10889838635921478, 0.14661002159118652, 0.034964289516210556, 0.07059973478317261, 0.007527152542024851, 0.007617437280714512, 0.006072000600397587, 0.0492180734872818, 0.0069811418652534485, 0.011496509425342083, 0.22706106305122375], [0.014849718660116196, 0.1462036818265915, 0.11065799742937088, 0.06219353526830673, 0.08005399256944656, 0.016894571483135223, 0.010269397869706154, 0.02562439627945423, 0.009192260913550854, 0.009821194224059582, 0.015785057097673416, 0.019254932180047035, 0.01222837995737791, 0.011684795841574669, 0.16154925525188446], [0.01973692700266838, 0.11480830609798431, 0.07148479670286179, 0.05237298831343651, 0.0777522474527359, 0.019268590956926346, 0.01592963933944702, 0.01235677395015955, 0.06519288569688797, 0.019938096404075623, 0.03185376524925232, 0.0271891038864851, 0.01742159202694893, 0.040164995938539505, 0.1837940812110901], [0.006014276295900345, 0.07228019088506699, 0.029915854334831238, 0.031709808856248856, 0.01963544264435768, 0.01660715602338314, 0.00532315531745553, 0.03606380149722099, 0.029185649007558823, 0.0046777487732470036, 0.01710142381489277, 0.013257446698844433, 0.01389795821160078, 0.02201540581882, 0.16183340549468994], [0.008549164049327374, 0.34144893288612366, 0.03957316279411316, 0.03764811158180237, 0.04039980471134186, 0.07271253317594528, 0.00613941578194499, 0.04612124711275101, 0.0911136344075203, 0.008750539273023605, 0.01715807057917118, 0.03749352693557739, 0.024577608332037926, 0.06848984956741333, 0.2503378689289093], [0.1472499966621399, 0.4703251123428345, 0.2558133602142334, 0.283985435962677, 0.21470209956169128, 0.17662864923477173, 0.07007063925266266, 0.06038873642683029, 0.20766907930374146, 0.26984694600105286, 0.16889145970344543, 0.27114859223365784, 0.03473396599292755, 0.13903996348381042, 0.2962591350078583], [0.020655758678913116, 0.020222418010234833, 0.006879583932459354, 0.019070995971560478, 0.07609020173549652, 0.006032301113009453, 0.015974652022123337, 0.01717195473611355, 0.05267442390322685, 0.004277344327419996, 0.0005684247589670122, 0.0007490122807212174, 0.0002994663082063198, 0.0002370573638472706, 0.12958088517189026], [0.009374987334012985, 0.23445867002010345, 0.05258592590689659, 0.020285839214920998, 0.024131227284669876, 0.0535256564617157, 0.01552440132945776, 0.032435644418001175, 0.006646827794611454, 0.005740212742239237, 0.005195626523345709, 0.07125341892242432, 0.0043562185019254684, 0.01014760322868824, 0.17807012796401978], [0.018758203834295273, 0.11843696236610413, 0.09101122617721558, 0.0610043928027153, 0.06165887042880058, 0.012400476261973381, 0.011786350980401039, 0.021215293556451797, 0.014211799949407578, 0.011016220785677433, 0.02130991406738758, 0.02418670989573002, 0.015627985820174217, 0.013993974775075912, 0.14536960422992706], [0.03985379636287689, 0.12957410514354706, 0.13386031985282898, 0.10592924803495407, 0.09455320239067078, 0.03913174197077751, 0.052976641803979874, 0.03812992200255394, 0.11070051789283752, 0.042073190212249756, 0.05433963984251022, 0.058929286897182465, 0.03380222246050835, 0.05054538697004318, 0.1317562311887741]], [[0.038382355123758316, 0.16509199142456055, 0.03795319423079491, 0.018471574410796165, 0.017937200143933296, 0.20822547376155853, 0.036850690841674805, 0.07025959342718124, 0.026183662936091423, 0.008891633711755276, 0.011525453999638557, 0.06559614092111588, 0.10240377485752106, 0.05705304443836212, 0.19186913967132568], [0.18736660480499268, 0.12802250683307648, 0.06000450998544693, 0.07085607945919037, 0.02492770366370678, 0.13308653235435486, 0.01379183866083622, 0.01460492704063654, 0.018005041405558586, 0.18972568213939667, 0.18918126821517944, 0.05261359363794327, 0.08419474214315414, 0.039842329919338226, 0.12843605875968933], [0.003212069161236286, 0.04924406483769417, 0.010131219401955605, 0.0015629208646714687, 0.009065762162208557, 0.04507109895348549, 0.003221129300072789, 0.07382506877183914, 0.0011923180427402258, 0.004047631751745939, 0.006328214425593615, 0.012952281162142754, 0.0641837865114212, 0.02541324496269226, 0.1715373396873474], [0.002438034862279892, 0.0007996301865205169, 0.10929557681083679, 0.030698396265506744, 0.007961505092680454, 0.21520712971687317, 0.0018748894799500704, 0.0015670642023906112, 0.00039643081254325807, 0.0017966092564165592, 0.010619523003697395, 0.0026792865246534348, 0.0035868084523826838, 0.001077426946721971, 0.003137440187856555], [0.04913554713129997, 0.023452362045645714, 0.16805477440357208, 0.2746557891368866, 0.369334876537323, 0.025402046740055084, 0.03595297038555145, 0.27975642681121826, 0.005478397477418184, 0.044800374656915665, 0.028408128768205643, 0.025396348908543587, 0.1202942430973053, 0.22760754823684692, 0.12602998316287994], [0.0008230121457017958, 0.006709535606205463, 0.005090394522994757, 0.005009432788938284, 0.0009200142812915146, 0.002589132636785507, 0.003276216797530651, 0.011904137209057808, 0.0009605096420273185, 0.0016532291192561388, 0.001647727913223207, 0.0010296034161001444, 0.00474548852071166, 0.004530362784862518, 0.14385877549648285], [0.011407818645238876, 0.11073090881109238, 0.11066732555627823, 0.07063236832618713, 0.2326628416776657, 0.057718440890312195, 0.005228970665484667, 0.12933272123336792, 0.010014788247644901, 0.0034599530044943094, 0.015450170263648033, 0.004393222741782665, 0.010258005000650883, 0.00790967233479023, 0.16524673998355865], [0.024886149913072586, 0.019822845235466957, 0.050577834248542786, 0.042761147022247314, 0.013624369166791439, 0.03171548992395401, 0.03447520360350609, 0.057101696729660034, 0.018126925453543663, 0.012612801045179367, 0.056599393486976624, 0.005686976481229067, 0.022324958816170692, 0.021004129201173782, 0.18438492715358734], [0.012148641981184483, 0.047028496861457825, 0.07792042940855026, 0.1455426812171936, 0.3985011875629425, 0.08270914107561111, 0.0031603944953531027, 0.07123681157827377, 0.020226983353495598, 0.005742877256125212, 0.009367674589157104, 0.007002389058470726, 0.013849785551428795, 0.006732230074703693, 0.14449873566627502], [0.029934342950582504, 0.04287242144346237, 0.10493571311235428, 0.10647397488355637, 0.01039193756878376, 0.1410648375749588, 0.06155749782919884, 0.08983614295721054, 0.05490254610776901, 0.038721270859241486, 0.021267540752887726, 0.05536682903766632, 0.019229264929890633, 0.008436290547251701, 0.15105655789375305], [0.009979508817195892, 0.08308109641075134, 0.026161497458815575, 0.023276647552847862, 0.0017319537000730634, 0.056630972772836685, 0.012614267878234386, 0.041058339178562164, 0.026752248406410217, 0.01169703807681799, 0.011314285919070244, 0.007283498533070087, 0.05053415521979332, 0.019243547692894936, 0.16277745366096497], [0.04712976887822151, 0.24274323880672455, 0.053717970848083496, 0.06948067992925644, 0.009206406772136688, 0.0471884086728096, 0.010105792433023453, 0.05801715701818466, 0.01891178824007511, 0.07684698700904846, 0.07729421555995941, 0.042662668973207474, 0.10241091996431351, 0.038032110780477524, 0.15563422441482544], [0.009955390356481075, 0.06358544528484344, 0.028598172590136528, 0.04170457646250725, 0.01363537646830082, 0.011423949152231216, 0.003101062262430787, 0.04170127958059311, 0.01145926769822836, 0.01274544931948185, 0.020664334297180176, 0.15329574048519135, 0.20515742897987366, 0.07666952162981033, 0.13521607220172882], [0.006747167091816664, 0.006801524665206671, 0.007903891615569592, 0.00237295706756413, 0.0009535709978081286, 0.0006887177005410194, 0.0011137888068333268, 0.0005580680444836617, 0.004365934059023857, 0.0043631866574287415, 0.004836279433220625, 0.0014166004257276654, 0.1882382482290268, 0.04424351081252098, 0.006875277496874332], [0.0040101236663758755, 0.00047035442548803985, 0.0008357138140127063, 0.009736553765833378, 0.00025759977870620787, 2.9679033104912378e-05, 0.008525178767740726, 0.0036214631982147694, 0.0009930779924616218, 0.0008531230851076543, 0.0029921825043857098, 7.93160234024981e-06, 6.746472354279831e-05, 0.0017078705132007599, 0.13162609934806824], [0.021027032285928726, 0.04388788715004921, 0.07337366044521332, 0.13240061700344086, 0.005691900383681059, 0.08179081231355667, 0.010154702700674534, 0.019539857283234596, 0.013572044670581818, 0.03972425311803818, 0.14196330308914185, 0.0491810142993927, 0.029326222836971283, 0.024830663576722145, 0.1775946319103241], [0.020570920780301094, 0.07008225470781326, 0.05771828070282936, 0.10093566030263901, 0.0037175160832703114, 0.10588520765304565, 0.008791210129857063, 0.07720224559307098, 0.037850137799978256, 0.016810759902000427, 0.0763774886727333, 0.06772230565547943, 0.10185997188091278, 0.02133399061858654, 0.1501101702451706], [0.027059482410550117, 0.22707954049110413, 0.13379518687725067, 0.08346803486347198, 0.011664706282317638, 0.1994924694299698, 0.013729198835790157, 0.07924864441156387, 0.10303384810686111, 0.02253318764269352, 0.06352351605892181, 0.13561668992042542, 0.3492315113544464, 0.13069112598896027, 0.12187084555625916], [0.038929592818021774, 0.2334582358598709, 0.12089657783508301, 0.17347271740436554, 0.023068996146321297, 0.04853734001517296, 0.008499456569552422, 0.0867975577712059, 0.02351396717131138, 0.04524386301636696, 0.12492679059505463, 0.06575564295053482, 0.10587428510189056, 0.055128976702690125, 0.1414995789527893], [0.011872883886098862, 0.08469298481941223, 0.054403409361839294, 0.08831894397735596, 0.02684788778424263, 0.021699469536542892, 0.0027920349966734648, 0.05190650746226311, 0.006984782870858908, 0.008844600059092045, 0.02751598134636879, 0.22613400220870972, 0.15431185066699982, 0.06476734578609467, 0.1412026435136795], [0.015115483663976192, 0.08628259599208832, 0.023322032764554024, 0.012461238540709019, 0.0028755213133990765, 0.010226217098534107, 0.0010302395094186068, 0.002081838669255376, 0.003762529231607914, 0.013111302629113197, 0.0290949996560812, 0.013309521600604057, 0.22778895497322083, 0.05992528051137924, 0.00796937569975853], [0.0057023135013878345, 0.0003758604871109128, 0.0009645622340030968, 0.01432577334344387, 0.00027227052487432957, 3.7724938010796905e-05, 0.007459490094333887, 0.0037525389343500137, 0.001061747083440423, 0.0008801367366686463, 0.0023195864632725716, 8.150678695528768e-06, 4.0667833673069254e-05, 0.001007204526104033, 0.12961283326148987], [0.017900969833135605, 0.026770949363708496, 0.15903817117214203, 0.31877970695495605, 0.014844128862023354, 0.10845804959535599, 0.00868347566574812, 0.015460771508514881, 0.008762474171817303, 0.01190071552991867, 0.07999671250581741, 0.053750935941934586, 0.013735906220972538, 0.020958656445145607, 0.15606556832790375], [0.022256335243582726, 0.07135839015245438, 0.07359576225280762, 0.12423767894506454, 0.006224590353667736, 0.13500085473060608, 0.008429165929555893, 0.08156562596559525, 0.02983916364610195, 0.013062523677945137, 0.10225346684455872, 0.04065772891044617, 0.06899033486843109, 0.012502058409154415, 0.13831046223640442], [0.016071150079369545, 0.06728275120258331, 0.025518205016851425, 0.023689931258559227, 0.0069392030127346516, 0.04150809720158577, 0.00898416806012392, 0.016712933778762817, 0.005143268499523401, 0.020111138001084328, 0.03020956739783287, 0.01359627302736044, 0.018198341131210327, 0.01637156493961811, 0.1379418522119522]], [[0.029921628534793854, 0.09876842796802521, 0.1324968934059143, 0.09236511588096619, 0.02831152267754078, 0.08077768236398697, 0.03118293546140194, 0.1750149130821228, 0.015778981149196625, 0.07032441347837448, 0.22269371151924133, 0.07579661160707474, 0.029184984043240547, 0.053061336278915405, 0.18562854826450348], [0.07805982232093811, 0.05365234240889549, 0.2842547595500946, 0.2606758773326874, 0.21293140947818756, 0.02651267871260643, 0.08033362030982971, 0.07913534343242645, 0.17101624608039856, 0.12522375583648682, 0.14315897226333618, 0.16815446317195892, 0.0695369690656662, 0.13316825032234192, 0.19111928343772888], [0.11272483319044113, 0.11636882275342941, 0.45685258507728577, 0.0910579040646553, 0.3091263473033905, 0.12632955610752106, 0.1822080761194229, 0.18498732149600983, 0.6353387832641602, 0.08394157886505127, 0.3285849094390869, 0.4818887710571289, 0.08592816442251205, 0.3495768904685974, 0.07449600845575333], [0.2834128737449646, 0.1102365031838417, 0.1840669959783554, 0.5708534121513367, 0.3157653212547302, 0.041008107364177704, 0.038309745490550995, 0.03211268410086632, 0.6102551817893982, 0.20786605775356293, 0.21116787195205688, 0.10018377006053925, 0.04653669148683548, 0.17929011583328247, 0.11314841359853745], [0.5993789434432983, 0.0908532664179802, 0.49218761920928955, 0.41100576519966125, 0.18825526535511017, 0.4342217445373535, 0.12116678059101105, 0.10673660039901733, 0.822167158126831, 0.4385586380958557, 0.6995345950126648, 0.18085956573486328, 0.1357179582118988, 0.2864921987056732, 0.034255724400281906], [0.858432412147522, 0.34460219740867615, 0.7778953909873962, 0.7743141651153564, 0.4405529797077179, 0.4761039614677429, 0.6155950427055359, 0.06873662024736404, 0.7323919534683228, 0.7086790204048157, 0.6720118522644043, 0.45794978737831116, 0.1628962755203247, 0.4249861538410187, 0.040913816541433334], [0.04546767473220825, 0.0383436344563961, 0.10268200188875198, 0.20100316405296326, 0.185649111866951, 0.08432896435260773, 0.060354892164468765, 0.07717668265104294, 0.3201402723789215, 0.04503992572426796, 0.088813915848732, 0.3990366756916046, 0.1564548909664154, 0.08066049963235855, 0.11440145969390869], [0.21178147196769714, 0.043018583208322525, 0.1065564677119255, 0.10858221352100372, 0.05675008147954941, 0.06700197607278824, 0.12675313651561737, 0.058651700615882874, 0.18508696556091309, 0.05493801832199097, 0.037313126027584076, 0.19010567665100098, 0.07823225855827332, 0.034572359174489975, 0.16783590614795685], [0.053469568490982056, 0.03894811123609543, 0.06651152670383453, 0.10646583139896393, 0.08985435962677002, 0.07578439265489578, 0.03395741805434227, 0.09802807122468948, 0.190333291888237, 0.07748086005449295, 0.07400990277528763, 0.6643930077552795, 0.07830479741096497, 0.07947986572980881, 0.11464671790599823], [0.1680978536605835, 0.06724530458450317, 0.16071708500385284, 0.2987021803855896, 0.11997595429420471, 0.007637033239006996, 0.05953739956021309, 0.06456195563077927, 0.07405640929937363, 0.11493658274412155, 0.07269633561372757, 0.12183233350515366, 0.019239120185375214, 0.0931614562869072, 0.15387272834777832], [0.09433168172836304, 0.05311369523406029, 0.44581180810928345, 0.2857709527015686, 0.11141614615917206, 0.04973546415567398, 0.10592624545097351, 0.0732862576842308, 0.26435965299606323, 0.07302475720643997, 0.17637307941913605, 0.06760746240615845, 0.052111051976680756, 0.29667070508003235, 0.11431443691253662], [0.07687122374773026, 0.10929025709629059, 0.4687592387199402, 0.20397132635116577, 0.26744040846824646, 0.03514130413532257, 0.033296968787908554, 0.08783485740423203, 0.22074763476848602, 0.08713625371456146, 0.12920482456684113, 0.05166565254330635, 0.07679110020399094, 0.17419996857643127, 0.1387287825345993], [0.061203911900520325, 0.12594261765480042, 0.353413462638855, 0.22131817042827606, 0.41015592217445374, 0.11432977020740509, 0.010031531564891338, 0.048355478793382645, 0.27572426199913025, 0.07773520797491074, 0.2322542816400528, 0.1527126431465149, 0.05797232687473297, 0.09810248017311096, 0.16366761922836304], [0.10230414569377899, 0.03857935592532158, 0.05230129137635231, 0.14396332204341888, 0.09251677989959717, 0.03541665896773338, 0.005624003708362579, 0.014271721243858337, 0.042375415563583374, 0.13543996214866638, 0.061749108135700226, 0.00788076315075159, 0.1602918803691864, 0.07564403861761093, 0.09375559538602829], [0.705120861530304, 0.026186510920524597, 0.8528315424919128, 0.8252069354057312, 0.24319231510162354, 0.07270172983407974, 0.09487330913543701, 0.07207771390676498, 0.4722364544868469, 0.7067926526069641, 0.8624283075332642, 0.07399676740169525, 0.0075901346281170845, 0.016478050500154495, 0.12560917437076569], [0.27840110659599304, 0.06363435834646225, 0.3689763844013214, 0.33064448833465576, 0.25749024748802185, 0.1453908383846283, 0.03645810857415199, 0.00836147554218769, 0.3977815508842468, 0.41805213689804077, 0.17756043374538422, 0.05318059027194977, 0.011340576224029064, 0.020938394591212273, 0.05934957042336464], [0.17816129326820374, 0.10609658807516098, 0.17893879115581512, 0.28182876110076904, 0.15060719847679138, 0.03372456133365631, 0.04276707395911217, 0.050946421921253204, 0.04137968271970749, 0.16634012758731842, 0.16395889222621918, 0.24548840522766113, 0.05229371041059494, 0.09448723495006561, 0.12793652713298798], [0.14424489438533783, 0.0705854520201683, 0.24214811623096466, 0.24549053609371185, 0.19939330220222473, 0.02639644220471382, 0.021373553201556206, 0.024115193635225296, 0.08405331522226334, 0.14685925841331482, 0.15661610662937164, 0.06219787895679474, 0.032059792429208755, 0.09036684036254883, 0.15146715939044952], [0.06650430709123611, 0.10705426335334778, 0.3146411180496216, 0.1647443175315857, 0.23945462703704834, 0.035643309354782104, 0.026562364771962166, 0.09605439007282257, 0.19827118515968323, 0.1037423387169838, 0.14283734560012817, 0.08165161311626434, 0.07012972235679626, 0.11072988063097, 0.13417953252792358], [0.06460674107074738, 0.10897383838891983, 0.18354696035385132, 0.20187535881996155, 0.38844820857048035, 0.04722803831100464, 0.010622762143611908, 0.04332485795021057, 0.31279584765434265, 0.11892355233430862, 0.20366235077381134, 0.1460915356874466, 0.041410893201828, 0.060890424996614456, 0.16885291039943695], [0.08445128798484802, 0.07278266549110413, 0.017734743654727936, 0.12906457483768463, 0.17354236543178558, 0.01439378596842289, 0.0032682251185178757, 0.009051240049302578, 0.02403325028717518, 0.17859239876270294, 0.05114053934812546, 0.026160510256886482, 0.17188863456249237, 0.059929899871349335, 0.12745818495750427], [0.6940725445747375, 0.016104217618703842, 0.8427497148513794, 0.8075915575027466, 0.2572270333766937, 0.04667792096734047, 0.07690176367759705, 0.06650352478027344, 0.4641934931278229, 0.7403572797775269, 0.892522931098938, 0.08286882191896439, 0.00509345019236207, 0.009769911877810955, 0.1252693384885788], [0.47638654708862305, 0.08160793781280518, 0.2188907116651535, 0.3983159363269806, 0.3041192293167114, 0.0773146003484726, 0.041229549795389175, 0.00785501953214407, 0.20719125866889954, 0.6323855519294739, 0.1790589690208435, 0.15920953452587128, 0.005728188902139664, 0.011172757484018803, 0.10331764072179794], [0.3162515461444855, 0.12029282748699188, 0.1898643672466278, 0.3138664960861206, 0.22235795855522156, 0.03812789171934128, 0.07994988560676575, 0.07006566971540451, 0.06856126338243484, 0.2470276951789856, 0.2142392098903656, 0.4667101502418518, 0.07071195542812347, 0.09391427785158157, 0.11791101843118668], [0.15722334384918213, 0.11492010205984116, 0.22595097124576569, 0.17283931374549866, 0.11246844381093979, 0.07424511015415192, 0.1308857947587967, 0.1509532928466797, 0.12219540029764175, 0.14498494565486908, 0.13763099908828735, 0.16327989101409912, 0.12245305627584457, 0.21428720653057098, 0.12265608459711075]], [[0.03995227441191673, 0.02612248808145523, 0.09039098769426346, 0.04685363546013832, 0.14171013236045837, 0.3046724796295166, 0.08713044226169586, 0.11726538836956024, 0.3945818245410919, 0.03867875412106514, 0.060879118740558624, 0.3211958110332489, 0.1562168449163437, 0.1954476237297058, 0.12928469479084015], [0.138319730758667, 0.1925395429134369, 0.06914161890745163, 0.1830926090478897, 0.22252067923545837, 0.24239645898342133, 0.2738734483718872, 0.3115195333957672, 0.287569522857666, 0.12556934356689453, 0.047479670494794846, 0.1859251707792282, 0.015966184437274933, 0.050888173282146454, 0.04287213087081909], [0.059622667729854584, 0.19761067628860474, 0.019807182252407074, 0.02911451645195484, 0.11472073942422867, 0.03754669055342674, 0.08183436095714569, 0.09122617542743683, 0.10595303028821945, 0.094895139336586, 0.022252719849348068, 0.087751105427742, 0.015402892604470253, 0.02668953314423561, 0.15029701590538025], [0.4440009295940399, 0.5055950880050659, 0.14072291553020477, 0.20776981115341187, 0.24339812994003296, 0.01946749910712242, 0.1477651447057724, 0.24892206490039825, 0.13990418612957, 0.5277839303016663, 0.22113053500652313, 0.7815175652503967, 0.04741470143198967, 0.31336119771003723, 0.318754643201828], [0.003975332248955965, 0.09357346594333649, 0.000580776366405189, 0.001556370290927589, 0.0040078358724713326, 0.00020105167641304433, 0.005314813926815987, 0.0463886484503746, 0.0025405578780919313, 0.008098164573311806, 0.0004367573419585824, 0.0955028310418129, 0.0013312119990587234, 0.008472515270113945, 0.16612127423286438], [0.00713347876444459, 0.11304348707199097, 0.007166451308876276, 0.017305465415120125, 0.01892760582268238, 0.004294875077903271, 0.013284130021929741, 0.05641845986247063, 0.006293897051364183, 0.008091668598353863, 0.004229044076055288, 0.03852742537856102, 0.036073870956897736, 0.030675750225782394, 0.1423715502023697], [0.112990602850914, 0.20299020409584045, 0.29141831398010254, 0.1917479783296585, 0.25626659393310547, 0.40023526549339294, 0.045914653688669205, 0.05403761938214302, 0.3577503561973572, 0.11164049804210663, 0.20054538547992706, 0.23382915556430817, 0.3541012704372406, 0.39880213141441345, 0.05442150682210922], [0.11769542098045349, 0.22490660846233368, 0.16446754336357117, 0.17726869881153107, 0.24409359693527222, 0.16966795921325684, 0.06426751613616943, 0.1868649125099182, 0.17593497037887573, 0.10732528567314148, 0.1210716962814331, 0.18835949897766113, 0.07820838689804077, 0.12172650545835495, 0.0815061554312706], [0.08801974356174469, 0.2964327037334442, 0.17140379548072815, 0.1086457222700119, 0.1790848970413208, 0.042561717331409454, 0.02568918652832508, 0.12736740708351135, 0.4644424617290497, 0.09952269494533539, 0.1403166949748993, 0.12085206061601639, 0.2499331831932068, 0.14905890822410583, 0.04691213369369507], [0.28339406847953796, 0.25363603234291077, 0.49371209740638733, 0.28714650869369507, 0.42171764373779297, 0.03586414083838463, 0.140908345580101, 0.27345338463783264, 0.06897412985563278, 0.24740128219127655, 0.5061832070350647, 0.4192107915878296, 0.43851029872894287, 0.29079654812812805, 0.10071542859077454], [0.049345988780260086, 0.1473262906074524, 0.10952533781528473, 0.16707968711853027, 0.25493475794792175, 0.03866606950759888, 0.046480532735586166, 0.16288119554519653, 0.06614720076322556, 0.0629507377743721, 0.07218940556049347, 0.3448391556739807, 0.06943795084953308, 0.058807674795389175, 0.135455921292305], [0.05557708069682121, 0.024377070367336273, 0.171014666557312, 0.1548214852809906, 0.21205416321754456, 0.29049578309059143, 0.08155391365289688, 0.2053205668926239, 0.09979691356420517, 0.11640740185976028, 0.23155182600021362, 0.4772811830043793, 0.2134055644273758, 0.3209300637245178, 0.0739695355296135], [0.046621087938547134, 0.02855776995420456, 0.11975010484457016, 0.2049850970506668, 0.16244490444660187, 0.14614170789718628, 0.03785347566008568, 0.2537410259246826, 0.3719625771045685, 0.1159287542104721, 0.23734091222286224, 0.26474830508232117, 0.04938332363963127, 0.17566856741905212, 0.034675102680921555], [0.08535599708557129, 0.01230260543525219, 0.28460273146629333, 0.3323705196380615, 0.13364574313163757, 0.14216013252735138, 0.16550986468791962, 0.36634352803230286, 0.3233327269554138, 0.13755354285240173, 0.6341029405593872, 0.1276889443397522, 0.0818048045039177, 0.2633805274963379, 0.10007897019386292], [0.014263293705880642, 0.07173046469688416, 0.01932992786169052, 0.01909404993057251, 0.16755935549736023, 0.2271488904953003, 0.1093294620513916, 0.14342457056045532, 0.0580194853246212, 0.01671113632619381, 0.03395597264170647, 0.0692841187119484, 0.07175575196743011, 0.04972841590642929, 0.12856654822826385], [0.06590985506772995, 0.1636172980070114, 0.09935098141431808, 0.20126965641975403, 0.4101002812385559, 0.21936923265457153, 0.26084569096565247, 0.3593950569629669, 0.014820259064435959, 0.05201014503836632, 0.03426084294915199, 0.38774317502975464, 0.1401163786649704, 0.3782513439655304, 0.13036324083805084], [0.05128908529877663, 0.11090300232172012, 0.24501535296440125, 0.07115167379379272, 0.3950805068016052, 0.2010982632637024, 0.08927696198225021, 0.2923780679702759, 0.11195118725299835, 0.05971711874008179, 0.14540457725524902, 0.4000069797039032, 0.2374461144208908, 0.47139719128608704, 0.10731440782546997], [0.014083221554756165, 0.029302498325705528, 0.019839908927679062, 0.019802037626504898, 0.11310776323080063, 0.014347831718623638, 0.013065088540315628, 0.0404186025261879, 0.14103254675865173, 0.01056672353297472, 0.02028844505548477, 0.4335528016090393, 0.019943613559007645, 0.08491621166467667, 0.15365199744701385], [0.04251990094780922, 0.025738505646586418, 0.19788101315498352, 0.08900192379951477, 0.20504283905029297, 0.36725619435310364, 0.05852765589952469, 0.12635937333106995, 0.07596885412931442, 0.055006030946969986, 0.1975020170211792, 0.39253395795822144, 0.2602497935295105, 0.3791850209236145, 0.11310473829507828], [0.06150972843170166, 0.049163203686475754, 0.14174170792102814, 0.13322500884532928, 0.16170991957187653, 0.21354396641254425, 0.04667104035615921, 0.26311540603637695, 0.32218027114868164, 0.0809161439538002, 0.18361496925354004, 0.23948682844638824, 0.09133663028478622, 0.25973111391067505, 0.07212682068347931], [0.12382826954126358, 0.035204268991947174, 0.3469122052192688, 0.27821084856987, 0.12485836446285248, 0.1130678728222847, 0.12963837385177612, 0.3451126217842102, 0.16417652368545532, 0.12570835649967194, 0.5000419616699219, 0.09880878776311874, 0.042446259409189224, 0.2635292708873749, 0.16834798455238342], [0.010800065472722054, 0.04851265624165535, 0.01629789173603058, 0.013155121356248856, 0.14412836730480194, 0.10944324731826782, 0.08000180870294571, 0.10409139841794968, 0.054843056946992874, 0.011575616896152496, 0.02017728053033352, 0.044063322246074677, 0.04816943034529686, 0.03936787694692612, 0.1280953288078308], [0.03501533716917038, 0.12365423142910004, 0.058643028140068054, 0.026187611743807793, 0.2106953263282776, 0.09627192467451096, 0.1373300403356552, 0.209503173828125, 0.00544273667037487, 0.010177833028137684, 0.00795654021203518, 0.17826952040195465, 0.06280092895030975, 0.2785777747631073, 0.15446779131889343], [0.055331505835056305, 0.14680130779743195, 0.22850985825061798, 0.040600359439849854, 0.2299574315547943, 0.21366852521896362, 0.10291176289319992, 0.2649042010307312, 0.07482050359249115, 0.04207760840654373, 0.11352740973234177, 0.22353075444698334, 0.2551318407058716, 0.4900997579097748, 0.11985023319721222], [0.04223596677184105, 0.14613933861255646, 0.08112313598394394, 0.04192597419023514, 0.11981905251741409, 0.18680673837661743, 0.07695262134075165, 0.14058402180671692, 0.1875196099281311, 0.05864474177360535, 0.0581248439848423, 0.23554684221744537, 0.21983209252357483, 0.1619952768087387, 0.12595340609550476]], [[0.24939602613449097, 0.0921018123626709, 0.20195554196834564, 0.25931593775749207, 0.24976609647274017, 0.08025927096605301, 0.10602997988462448, 0.08455296605825424, 0.038250602781772614, 0.34039628505706787, 0.2528480887413025, 0.17168891429901123, 0.12038858979940414, 0.16591216623783112, 0.05973837152123451], [0.04881530627608299, 0.07757209986448288, 0.080610491335392, 0.047049663960933685, 0.2744564712047577, 0.18291208148002625, 0.11781244724988937, 0.130965456366539, 0.16412131488323212, 0.049904536455869675, 0.10192018002271652, 0.46385079622268677, 0.23078110814094543, 0.23192283511161804, 0.17445482313632965], [0.11153621971607208, 0.27696484327316284, 0.0350787453353405, 0.011731116101145744, 0.08945441246032715, 0.2750371992588043, 0.07341955602169037, 0.12011690437793732, 0.026965567842125893, 0.023494159802794456, 0.015654105693101883, 0.05704642832279205, 0.11022293567657471, 0.0463077574968338, 0.1307818740606308], [0.06216026097536087, 0.123567596077919, 0.044055916368961334, 0.012494971975684166, 0.045035671442747116, 0.18137943744659424, 0.1501520872116089, 0.0996006652712822, 0.05310875549912453, 0.11289763450622559, 0.05045852065086365, 0.055306825786828995, 0.3424266576766968, 0.1600506752729416, 0.04121629521250725], [0.03470996022224426, 0.38486456871032715, 0.007671448867768049, 0.014272118918597698, 0.01295357197523117, 0.001353065250441432, 0.035229261964559555, 0.10929086059331894, 0.03641098737716675, 0.08741087466478348, 0.01870635710656643, 0.10011491179466248, 0.03142678365111351, 0.12343490868806839, 0.15971165895462036], [0.03053746558725834, 0.24113330245018005, 0.009466315619647503, 0.01980357989668846, 0.04114365205168724, 0.05523357167840004, 0.027042368426918983, 0.10979101061820984, 0.004461985547095537, 0.04689180105924606, 0.04529552906751633, 0.1364448219537735, 0.054305437952280045, 0.06579019129276276, 0.13895106315612793], [0.3289671242237091, 0.3443813920021057, 0.38217487931251526, 0.32642021775245667, 0.12515123188495636, 0.04144418612122536, 0.06740343570709229, 0.024584289640188217, 0.007359183859080076, 0.39375364780426025, 0.38123685121536255, 0.3035361170768738, 0.18788036704063416, 0.13260427117347717, 0.09976762533187866], [0.1711268573999405, 0.1900682896375656, 0.20778892934322357, 0.08847668021917343, 0.39589688181877136, 0.3955995440483093, 0.3348483741283417, 0.11133389919996262, 0.10861264914274216, 0.14033687114715576, 0.26926568150520325, 0.4846358299255371, 0.23405344784259796, 0.4343181252479553, 0.08998383581638336], [0.4154844284057617, 0.4073733687400818, 0.5541329383850098, 0.43809109926223755, 0.11503908038139343, 0.02849700301885605, 0.025097709149122238, 0.014711813069880009, 0.006424109451472759, 0.39197838306427, 0.4694826304912567, 0.17039237916469574, 0.16142874956130981, 0.19919125735759735, 0.054951149970293045], [0.24498042464256287, 0.277620404958725, 0.060333866626024246, 0.030503980815410614, 0.04090564325451851, 0.4659561812877655, 0.2110646367073059, 0.11101182550191879, 0.028219982981681824, 0.10508411377668381, 0.025386929512023926, 0.0648839995265007, 0.13676653802394867, 0.07622335106134415, 0.09164498746395111], [0.4220424294471741, 0.21296784281730652, 0.10483475774526596, 0.11319100856781006, 0.14396990835666656, 0.1309618502855301, 0.13656088709831238, 0.2097199261188507, 0.1397993415594101, 0.263439804315567, 0.10735370218753815, 0.27457332611083984, 0.26051631569862366, 0.18891198933124542, 0.10100831091403961], [0.12607140839099884, 0.08847615122795105, 0.09191321581602097, 0.06030821427702904, 0.21649383008480072, 0.10438336431980133, 0.07331530004739761, 0.1330888420343399, 0.04176999628543854, 0.06727378815412521, 0.06257567554712296, 0.21110908687114716, 0.09018781781196594, 0.09389244765043259, 0.13621515035629272], [0.062066610902547836, 0.07845254987478256, 0.24838510155677795, 0.16541223227977753, 0.16867581009864807, 0.019677892327308655, 0.021460779011249542, 0.018530650064349174, 0.023010587319731712, 0.10349667817354202, 0.16099916398525238, 0.3089703619480133, 0.08426959812641144, 0.16459643840789795, 0.06073381006717682], [0.11642084270715714, 0.11190053075551987, 0.12368596345186234, 0.04549993947148323, 0.3567850887775421, 0.06569506227970123, 0.07286660373210907, 0.03259556367993355, 0.09530685096979141, 0.19273261725902557, 0.06463074684143066, 0.7640278339385986, 0.06371455639600754, 0.1593337506055832, 0.2193848341703415], [0.11034999042749405, 0.03210863843560219, 0.010996339842677116, 0.026450032368302345, 0.051475513726472855, 0.02743532694876194, 0.3610350787639618, 0.20538736879825592, 0.017281753942370415, 0.05300014466047287, 0.012052728794515133, 0.08001075685024261, 0.0069017065688967705, 0.010893179103732109, 0.13085691630840302], [0.07615644484758377, 0.1536630541086197, 0.1253354847431183, 0.048576656728982925, 0.05276811867952347, 0.1611642986536026, 0.12317243963479996, 0.32385867834091187, 0.012925365939736366, 0.0864856168627739, 0.08918802440166473, 0.23886144161224365, 0.20351386070251465, 0.20744860172271729, 0.13318131864070892], [0.051417503505945206, 0.1600690335035324, 0.08639511466026306, 0.02997625432908535, 0.08503448963165283, 0.32695260643959045, 0.06822863221168518, 0.16364485025405884, 0.06138167902827263, 0.07786902785301208, 0.04443247988820076, 0.0585777647793293, 0.1263807862997055, 0.10769001394510269, 0.13808733224868774], [0.1321558654308319, 0.24967153370380402, 0.0761917233467102, 0.044561922550201416, 0.12028387933969498, 0.19908402860164642, 0.04708404839038849, 0.10076720267534256, 0.09921064227819443, 0.18345412611961365, 0.09404058009386063, 0.21650025248527527, 0.11625839024782181, 0.1530369222164154, 0.12011245638132095], [0.10757170617580414, 0.1042957603931427, 0.13590699434280396, 0.06331591308116913, 0.24158470332622528, 0.09161848574876785, 0.0633605495095253, 0.13977625966072083, 0.03925082087516785, 0.07121878862380981, 0.1023484393954277, 0.26378345489501953, 0.10990181565284729, 0.12030858546495438, 0.1261080652475357], [0.06512168049812317, 0.13837532699108124, 0.3250073194503784, 0.16753129661083221, 0.21647527813911438, 0.04118574038147926, 0.03336784988641739, 0.029927842319011688, 0.03334499150514603, 0.08782976865768433, 0.17631417512893677, 0.3171449303627014, 0.10520178824663162, 0.15139654278755188, 0.0914224162697792], [0.06382797658443451, 0.2566763758659363, 0.11056842654943466, 0.028001734986901283, 0.2813059389591217, 0.24806144833564758, 0.07807287573814392, 0.05373501405119896, 0.21183612942695618, 0.09658068418502808, 0.05084875971078873, 0.501965343952179, 0.06208595260977745, 0.10913741588592529, 0.26912179589271545], [0.08548272401094437, 0.017544403672218323, 0.011271107010543346, 0.022962557151913643, 0.05241750180721283, 0.02648325450718403, 0.3057800531387329, 0.19772306084632874, 0.025625178590416908, 0.03652432560920715, 0.006945622619241476, 0.05576859414577484, 0.00584550853818655, 0.008180957287549973, 0.12917736172676086], [0.03209112584590912, 0.1926622986793518, 0.09989916533231735, 0.02044818177819252, 0.04127199947834015, 0.22930434346199036, 0.09912838786840439, 0.3779822289943695, 0.007566491607576609, 0.046152934432029724, 0.04734500125050545, 0.35250937938690186, 0.10047939419746399, 0.16575956344604492, 0.13635975122451782], [0.05301084369421005, 0.1661737710237503, 0.08216799795627594, 0.025789698585867882, 0.07900767773389816, 0.3054123520851135, 0.08738221228122711, 0.17720931768417358, 0.06289011240005493, 0.06967967748641968, 0.05491774156689644, 0.02886299602687359, 0.10253670811653137, 0.09415244311094284, 0.129754438996315], [0.1895110011100769, 0.09308972954750061, 0.1887637972831726, 0.14927715063095093, 0.3653167188167572, 0.1686658412218094, 0.1126369759440422, 0.17013703286647797, 0.0685301423072815, 0.15278968214988708, 0.19327588379383087, 0.18825437128543854, 0.143904447555542, 0.143670454621315, 0.1203024610877037]], [[0.20045556128025055, 0.06346653401851654, 0.1246497705578804, 0.132145956158638, 0.18068760633468628, 0.0611145943403244, 0.3011611998081207, 0.09648064523935318, 0.3848741054534912, 0.20776434242725372, 0.09024091809988022, 0.10095226764678955, 0.05726093426346779, 0.17784324288368225, 0.06983170658349991], [0.06639314442873001, 0.03837187588214874, 0.306266725063324, 0.09758531302213669, 0.10875808447599411, 0.20901371538639069, 0.0894559919834137, 0.21620051562786102, 0.13805773854255676, 0.07912127673625946, 0.3521624505519867, 0.036526914685964584, 0.1551785171031952, 0.14622288942337036, 0.19236178696155548], [0.03379146009683609, 0.11666905134916306, 0.02791847102344036, 0.04754703491926193, 0.02039634808897972, 0.23185299336910248, 0.07985613495111465, 0.3240954875946045, 0.04561735317111015, 0.061520081013441086, 0.18156962096691132, 0.10860903561115265, 0.3409081995487213, 0.3218340575695038, 0.13103368878364563], [0.06278766691684723, 0.001863734913058579, 0.30563783645629883, 0.056017640978097916, 0.245498925447464, 0.11060530692338943, 0.09064232558012009, 0.004372697789222002, 0.007118886336684227, 0.06251134723424911, 0.17941752076148987, 0.004394095856696367, 0.11450538039207458, 0.046043287962675095, 0.021101655438542366], [0.11553236097097397, 0.0885467380285263, 0.2750205993652344, 0.21104735136032104, 0.3459762930870056, 0.07976578176021576, 0.218110129237175, 0.05760955810546875, 0.09680842608213425, 0.2662138342857361, 0.21090076863765717, 0.41520535945892334, 0.21548694372177124, 0.2248467653989792, 0.10481394827365875], [0.03112325258553028, 0.08175794035196304, 0.035110849887132645, 0.038375336676836014, 0.2468937784433365, 0.060934457927942276, 0.0843387246131897, 0.03423367813229561, 0.02026834897696972, 0.07970783859491348, 0.08959806710481644, 0.1693299561738968, 0.16057033836841583, 0.21660663187503815, 0.13329552114009857], [0.09539461880922318, 0.058681365102529526, 0.01674766093492508, 0.02866855263710022, 0.012030106969177723, 0.21465063095092773, 0.034089475870132446, 0.04479566961526871, 0.014019637368619442, 0.035355255007743835, 0.1569557934999466, 0.01038492750376463, 0.06631091982126236, 0.1547483503818512, 0.19284123182296753], [0.04954487085342407, 0.07065968960523605, 0.07275094836950302, 0.040997497737407684, 0.07946129143238068, 0.17300859093666077, 0.03222974017262459, 0.02469809167087078, 0.18557047843933105, 0.13542628288269043, 0.26776814460754395, 0.056715987622737885, 0.15973475575447083, 0.19029632210731506, 0.17610958218574524], [0.047577280551195145, 0.02606579288840294, 0.0165295097976923, 0.04137043654918671, 0.013305035419762135, 0.32835593819618225, 0.026565413922071457, 0.06772360950708389, 0.010228256694972515, 0.041277337819337845, 0.1336892545223236, 0.008326719515025616, 0.10322394222021103, 0.1976388841867447, 0.21077491343021393], [0.043893925845623016, 0.021177353337407112, 0.028366681188344955, 0.07016126066446304, 0.07573862373828888, 0.22699910402297974, 0.055615294724702835, 0.07980518788099289, 0.009269739501178265, 0.09460800141096115, 0.16427507996559143, 0.20832805335521698, 0.1427353024482727, 0.2680304944515228, 0.13907650113105774], [0.03411688283085823, 0.056632235646247864, 0.07365043461322784, 0.10934542864561081, 0.09185239672660828, 0.5077250003814697, 0.05141168087720871, 0.047258101403713226, 0.053326722234487534, 0.13365329802036285, 0.28296661376953125, 0.041020717471838, 0.08861301094293594, 0.13371184468269348, 0.11519401520490646], [0.04096442833542824, 0.07374820858240128, 0.07300861179828644, 0.10121195018291473, 0.051522452384233475, 0.3508135676383972, 0.03948133811354637, 0.047985587269067764, 0.06340529769659042, 0.06765846908092499, 0.281475692987442, 0.05536516010761261, 0.1822110116481781, 0.22272904217243195, 0.13150985538959503], [0.07982534170150757, 0.06016559898853302, 0.03820561617612839, 0.02410227432847023, 0.006901262793689966, 0.42442968487739563, 0.02364957146346569, 0.07835549116134644, 0.027230771258473396, 0.12123586237430573, 0.15446297824382782, 0.018115278333425522, 0.21087171137332916, 0.29417684674263, 0.08362340182065964], [0.05696694925427437, 0.014171368442475796, 0.06200120970606804, 0.021368764340877533, 0.012162269093096256, 0.0841592326760292, 0.03827953711152077, 0.07895056158304214, 0.01159723848104477, 0.05937046930193901, 0.023348387330770493, 0.008824712596833706, 0.13521961867809296, 0.23698511719703674, 0.03196632117033005], [0.11678174138069153, 0.8205142617225647, 0.01038320455700159, 0.023903295397758484, 0.21764065325260162, 0.2580764889717102, 0.20165181159973145, 0.2900886535644531, 0.03504627197980881, 0.10256802290678024, 0.03713424876332283, 0.7063723206520081, 0.8779962062835693, 0.8367014527320862, 0.0919082760810852], [0.038494985550642014, 0.05109047889709473, 0.07501792907714844, 0.04001014679670334, 0.021166233345866203, 0.03079657442867756, 0.01494709774851799, 0.010983827523887157, 0.0029027159325778484, 0.0995086133480072, 0.350593626499176, 0.02021479234099388, 0.34575650095939636, 0.21952421963214874, 0.05450797453522682], [0.028108511120080948, 0.08174566179513931, 0.03328564018011093, 0.03230520337820053, 0.012646276503801346, 0.1872790902853012, 0.025206655263900757, 0.06737280637025833, 0.033121660351753235, 0.08641302585601807, 0.2848047614097595, 0.059273794293403625, 0.18425194919109344, 0.15244826674461365, 0.1352420449256897], [0.07509021461009979, 0.05027765780687332, 0.23718997836112976, 0.11438266932964325, 0.11051909625530243, 0.431958943605423, 0.046987809240818024, 0.021854011341929436, 0.15366314351558685, 0.1928708851337433, 0.2900879681110382, 0.052021902054548264, 0.11538787186145782, 0.25173547863960266, 0.10233873873949051], [0.03257948160171509, 0.08023553341627121, 0.06238585337996483, 0.06856023520231247, 0.02927098423242569, 0.2968010902404785, 0.03317389637231827, 0.04758336395025253, 0.07943073660135269, 0.053982626646757126, 0.21416282653808594, 0.05025764927268028, 0.14347779750823975, 0.19969123601913452, 0.13921964168548584], [0.07817428559064865, 0.11046875268220901, 0.040724072605371475, 0.024797527119517326, 0.004808576311916113, 0.5141928791999817, 0.024754824116826057, 0.080713652074337, 0.03179122135043144, 0.12244449555873871, 0.22665926814079285, 0.013305582106113434, 0.23485711216926575, 0.323343425989151, 0.10171245783567429], [0.03765244409441948, 0.0463164821267128, 0.06456112116575241, 0.05319739878177643, 0.010156691074371338, 0.1155625581741333, 0.02458079345524311, 0.07648347318172455, 0.019683409482240677, 0.06488858163356781, 0.09342794120311737, 0.059032924473285675, 0.15581923723220825, 0.2894386053085327, 0.04157077521085739], [0.14924734830856323, 0.8862696886062622, 0.013125438243150711, 0.033269379287958145, 0.22599543631076813, 0.33975404500961304, 0.25561264157295227, 0.36481109261512756, 0.05327271297574043, 0.09902165085077286, 0.03598061203956604, 0.754990816116333, 0.9104278087615967, 0.8631682395935059, 0.10125402361154556], [0.03672042489051819, 0.12888115644454956, 0.1578092873096466, 0.056865133345127106, 0.03288109228014946, 0.1379515379667282, 0.021150214597582817, 0.013284055516123772, 0.003249341854825616, 0.08646353334188461, 0.5471532940864563, 0.0361909456551075, 0.5093809366226196, 0.39931434392929077, 0.07520455867052078], [0.03492635861039162, 0.09938696771860123, 0.028945090249180794, 0.03084651380777359, 0.012707062065601349, 0.15071596205234528, 0.029011720791459084, 0.05455483868718147, 0.03256314992904663, 0.07100401073694229, 0.2587825059890747, 0.05546442046761513, 0.17298617959022522, 0.15517692267894745, 0.13362783193588257], [0.050736088305711746, 0.10139954090118408, 0.08949553966522217, 0.0938185378909111, 0.06053004041314125, 0.18139560520648956, 0.0767659917473793, 0.11340610682964325, 0.19499026238918304, 0.11419404298067093, 0.23666803538799286, 0.05730360746383667, 0.07293370366096497, 0.11558260023593903, 0.12613430619239807]], [[0.1489560306072235, 0.2212677150964737, 0.055408962070941925, 0.03110104240477085, 0.02513720653951168, 0.07830048352479935, 0.05067736655473709, 0.06611648201942444, 0.02238955721259117, 0.03719142824411392, 0.025896798819303513, 0.04350690543651581, 0.11618120968341827, 0.08714473247528076, 0.15466241538524628], [0.002932992298156023, 0.307859867811203, 0.008187332190573215, 0.003677746979519725, 0.0005738585605286062, 0.0008406178676523268, 0.0005446207360364497, 0.00039283244404941797, 0.0009221792570315301, 0.000758469570428133, 0.003933709114789963, 0.0009352274937555194, 0.001059120986610651, 0.0020118390675634146, 0.010183396749198437], [0.37297555804252625, 0.09208715707063675, 0.16802547872066498, 0.11860792338848114, 0.08042033761739731, 0.18612971901893616, 0.45423436164855957, 0.07133221626281738, 0.13892753422260284, 0.3810507357120514, 0.291797935962677, 0.16154640913009644, 0.050885219126939774, 0.10468144714832306, 0.10335776954889297], [0.028274476528167725, 0.018124615773558617, 0.13954800367355347, 0.03560209274291992, 0.08428613841533661, 0.17491763830184937, 0.13035845756530762, 0.0214189775288105, 0.009060325101017952, 0.012400318868458271, 0.031279344111680984, 0.011209131218492985, 0.19533281028270721, 0.012452301569283009, 0.020085560157895088], [0.11180772632360458, 0.012462746351957321, 0.04844700172543526, 0.06198285147547722, 0.06685204058885574, 0.44600817561149597, 0.30352795124053955, 0.1519387811422348, 0.003835479263216257, 0.08384031802415848, 0.027865614742040634, 0.159846231341362, 0.46423590183258057, 0.09249147027730942, 0.09178084880113602], [0.04840230569243431, 0.026793736964464188, 0.1120820939540863, 0.09037120640277863, 0.2328549474477768, 0.1063276007771492, 0.14073747396469116, 0.19612964987754822, 0.1904316544532776, 0.10354755818843842, 0.10268037766218185, 0.13820117712020874, 0.3374333083629608, 0.15443934500217438, 0.12536528706550598], [0.36786824464797974, 0.056283749639987946, 0.03846094757318497, 0.07181648164987564, 0.03666122257709503, 0.04024837538599968, 0.5659748911857605, 0.2338860183954239, 0.11518415063619614, 0.3659259080886841, 0.04107162728905678, 0.012827688828110695, 0.0609581284224987, 0.02837788313627243, 0.060403015464544296], [0.0033490851055830717, 0.001678164815530181, 0.02563566155731678, 0.028815647587180138, 0.007257265504449606, 0.04370535537600517, 0.026118090376257896, 0.435838907957077, 0.005564961116760969, 0.014266176149249077, 0.018343305215239525, 0.0009297388605773449, 0.03809681162238121, 0.020595146343111992, 0.03566184639930725], [0.34718528389930725, 0.028826624155044556, 0.05378839746117592, 0.0680842474102974, 0.0254778191447258, 0.1994519978761673, 0.7739751935005188, 0.28213825821876526, 0.24756361544132233, 0.3363908529281616, 0.08445209264755249, 0.0067241075448691845, 0.09118638187646866, 0.04656682163476944, 0.0331079363822937], [0.06212884560227394, 0.013463910669088364, 0.024143628776073456, 0.025745615363121033, 0.12165382504463196, 0.04105379059910774, 0.21918880939483643, 0.12444313615560532, 0.7241542935371399, 0.2624671459197998, 0.05330171436071396, 0.026902005076408386, 0.04947282373905182, 0.06268218904733658, 0.04105047509074211], [0.23139908909797668, 0.12510670721530914, 0.062008026987314224, 0.06357982009649277, 0.21447335183620453, 0.06672460585832596, 0.5059712529182434, 0.23151132464408875, 0.3211345672607422, 0.29274967312812805, 0.07394816726446152, 0.12323616445064545, 0.33240705728530884, 0.13292434811592102, 0.0974365845322609], [0.3976813554763794, 0.24336650967597961, 0.030069073662161827, 0.04866141080856323, 0.061815883964300156, 0.023062149062752724, 0.2837987542152405, 0.10572359710931778, 0.42220908403396606, 0.47088485956192017, 0.06114182993769646, 0.05295940861105919, 0.04274435341358185, 0.033208493143320084, 0.07069624215364456], [0.6213744282722473, 0.08501708507537842, 0.08457361906766891, 0.0819045826792717, 0.02008524350821972, 0.02321169711649418, 0.5481746196746826, 0.17061969637870789, 0.19314314424991608, 0.48946020007133484, 0.08799289166927338, 0.009451461024582386, 0.1643926501274109, 0.03458939492702484, 0.0487554594874382], [0.11498570442199707, 0.014700047671794891, 0.04425002261996269, 0.027370423078536987, 0.031341005116701126, 0.11119254678487778, 0.2834031581878662, 0.24822625517845154, 0.387948602437973, 0.17188440263271332, 0.026020031422376633, 0.003112945705652237, 0.1680845320224762, 0.013143973425030708, 0.05647796019911766], [0.00710845272988081, 0.009718026034533978, 0.08296849578619003, 0.05356726795434952, 0.20372402667999268, 0.20898059010505676, 0.07373131066560745, 0.07588774710893631, 0.33318811655044556, 0.09730548411607742, 0.031877510249614716, 0.04629351943731308, 0.026428943499922752, 0.05165233090519905, 0.12934288382530212], [0.092291921377182, 0.13057716190814972, 0.11971572786569595, 0.09643372148275375, 0.0971774011850357, 0.03882397338747978, 0.30341219902038574, 0.06688009947538376, 0.5493715405464172, 0.21897412836551666, 0.10454282909631729, 0.09917838126420975, 0.19730664789676666, 0.0889393612742424, 0.0462181456387043], [0.3365032970905304, 0.06134270504117012, 0.11965256929397583, 0.08703643828630447, 0.08615697175264359, 0.01610170491039753, 0.289604127407074, 0.16905160248279572, 0.690265953540802, 0.5125291347503662, 0.11020015180110931, 0.05034353584051132, 0.04973014071583748, 0.04155145213007927, 0.06180096045136452], [0.25151577591896057, 0.0737723708152771, 0.11452356725931168, 0.07270905375480652, 0.27380475401878357, 0.046423640102148056, 0.6668940782546997, 0.60158771276474, 0.286392480134964, 0.2904633581638336, 0.07359147071838379, 0.040276750922203064, 0.2706137001514435, 0.15532110631465912, 0.051646988838911057], [0.4344438314437866, 0.2159019559621811, 0.0411386713385582, 0.059745997190475464, 0.08364511281251907, 0.02960371784865856, 0.3908357322216034, 0.17347759008407593, 0.4736940562725067, 0.5831181406974792, 0.08143209666013718, 0.05496616289019585, 0.0508774034678936, 0.03704635798931122, 0.07529113441705704], [0.6010525822639465, 0.07716702669858932, 0.12942874431610107, 0.11651009321212769, 0.029510293155908585, 0.025635747238993645, 0.564699649810791, 0.20346374809741974, 0.1942133754491806, 0.5329980254173279, 0.09726559370756149, 0.006782675161957741, 0.1884276419878006, 0.02957840822637081, 0.046941183507442474], [0.07098641246557236, 0.02088714949786663, 0.0536419078707695, 0.04874833673238754, 0.1357380896806717, 0.10192368179559708, 0.22615019977092743, 0.3848302960395813, 0.3569928705692291, 0.19976821541786194, 0.030237246304750443, 0.012232640758156776, 0.14491091668605804, 0.01217038556933403, 0.025625383481383324], [0.007031308952718973, 0.007269172929227352, 0.08423776179552078, 0.053896792232990265, 0.21268267929553986, 0.2456619292497635, 0.0817742720246315, 0.07338020205497742, 0.2872445285320282, 0.08955906331539154, 0.02503780461847782, 0.043076977133750916, 0.024157537147402763, 0.05127491056919098, 0.1281031221151352], [0.06564409285783768, 0.10634885728359222, 0.14713656902313232, 0.07514703273773193, 0.3204736113548279, 0.07143916934728622, 0.4829144775867462, 0.2612879276275635, 0.7603816986083984, 0.17889906466007233, 0.07189968973398209, 0.10938191413879395, 0.2776612341403961, 0.08681799471378326, 0.052979547530412674], [0.28806957602500916, 0.05887402966618538, 0.12616868317127228, 0.10481040924787521, 0.19247829914093018, 0.033351678401231766, 0.39873749017715454, 0.22540906071662903, 0.7029480338096619, 0.5013188719749451, 0.10523373633623123, 0.08320688456296921, 0.0816955640912056, 0.04881281033158302, 0.09282685816287994], [0.2559513747692108, 0.07615252584218979, 0.11904845386743546, 0.07934627681970596, 0.09980516135692596, 0.14371442794799805, 0.3059750497341156, 0.09035829454660416, 0.22693291306495667, 0.32864776253700256, 0.08986205607652664, 0.1614997386932373, 0.17624114453792572, 0.16325940191745758, 0.119119793176651]]]], \"bot_text\": [\"Das_\", \"Tier\", \"_\", \"\\u00fcber\", \"quer\", \"te_\", \"die_\", \"Stra\\u00dfe_\", \"nicht_\", \", _\", \"weil_\", \"es_\", \"zu_\", \"m\\u00fc\", \"de_\", \"war_\", \", _\", \"weil_\", \"es_\", \"zu_\", \"m\\u00fc\", \"de_\", \"war_\", \"._\"]}, \"all\": {\"top_text\": [\"The_\", \"animal_\", \"didn_\", \"'_\", \"t_\", \"cross_\", \"the_\", \"street_\", \"because_\", \"it_\", \"was_\", \"too_\", \"tire\", \"d_\", \"Das_\", \"Tier\", \"_\", \"\\u00fcber\", \"quer\", \"te_\", \"die_\", \"Stra\\u00dfe_\", \"nicht_\", \", _\", \"weil_\", \"es_\", \"zu_\", \"m\\u00fc\", \"de_\", \"war_\", \", _\", \"weil_\", \"es_\", \"zu_\", \"m\\u00fc\", \"de_\", \"war_\", \"._\"], \"att\": [[[[0.04540494084358215, 0.009098929353058338, 0.06841860711574554, 0.050027038902044296, 0.1867244392633438, 0.20893266797065735, 0.15536439418792725, 0.2501838803291321, 0.03253718465566635, 0.045193806290626526, 0.01405471283942461, 0.15126678347587585, 0.5554144382476807, 0.07120772451162338, 0.21479088068008423, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.010880604386329651, 0.008569094352424145, 0.3644530475139618, 0.032524824142456055, 0.15862980484962463, 0.2895345985889435, 0.007411073427647352, 0.03074379824101925, 0.23678991198539734, 0.04092710092663765, 0.21633881330490112, 0.10217994451522827, 0.5741018652915955, 0.08794906735420227, 0.15811748802661896, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.1548197716474533, 0.04407857358455658, 0.04267416149377823, 0.14390510320663452, 0.39150071144104004, 0.10470721870660782, 0.21010224521160126, 0.37398451566696167, 0.24677534401416779, 0.3071460425853729, 0.12511251866817474, 0.37053829431533813, 0.34731435775756836, 0.21468856930732727, 0.22426171600818634, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.01666487753391266, 0.070415198802948, 0.13558338582515717, 0.030082950368523598, 0.17114414274692535, 0.20995233952999115, 0.018852930516004562, 0.2688913345336914, 0.024380644783377647, 0.01614876091480255, 0.058318838477134705, 0.003357462352141738, 0.22233186662197113, 0.08606056123971939, 0.08522026240825653, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.26702794432640076, 0.10013092309236526, 0.15535299479961395, 0.01822819747030735, 0.19259323179721832, 0.1620739996433258, 0.06925511360168457, 0.14121465384960175, 0.30160874128341675, 0.138941690325737, 0.14571446180343628, 0.1845642775297165, 0.3172887861728668, 0.1378965824842453, 0.15321676433086395, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.05774107202887535, 0.08979255706071854, 0.15777261555194855, 0.0986839085817337, 0.04042482376098633, 0.02364284358918667, 0.006265458185225725, 0.20312650501728058, 0.04589210823178291, 0.2705432176589966, 0.29482388496398926, 0.25277185440063477, 0.21941334009170532, 0.09023746848106384, 0.12374064326286316, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.10808208584785461, 0.08377770334482193, 0.3031982481479645, 0.08575166761875153, 0.1659224033355713, 0.02410510927438736, 0.024052061140537262, 0.06346622854471207, 0.012278172187507153, 0.033475130796432495, 0.02865537814795971, 0.2309909611940384, 0.5272806286811829, 0.058207638561725616, 0.12589795887470245, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.2848440408706665, 0.04557379335165024, 0.07043055444955826, 0.13887976109981537, 0.25104182958602905, 0.08729252219200134, 0.03900376707315445, 0.06159999966621399, 0.07028467953205109, 0.1360185593366623, 0.12163159996271133, 0.4339398145675659, 0.18035274744033813, 0.13636742532253265, 0.35040098428726196, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.03364454582333565, 0.06385143101215363, 0.4650610089302063, 0.13847006857395172, 0.12132523953914642, 0.23606915771961212, 0.02828356996178627, 0.17786316573619843, 0.0068073878064751625, 0.0032905752304941416, 0.04716186597943306, 0.060036350041627884, 0.5867005586624146, 0.23594366014003754, 0.05739189311861992, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.04961356148123741, 0.4571499228477478, 0.32633671164512634, 0.044803813099861145, 0.12193554639816284, 0.15620054304599762, 0.031114954501390457, 0.37925899028778076, 0.023853085935115814, 0.007363635115325451, 0.0625552162528038, 0.04359081760048866, 0.12771400809288025, 0.10945692658424377, 0.03218715265393257, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.054336514323949814, 0.12682472169399261, 0.28572455048561096, 0.7098703384399414, 0.04356186464428902, 0.036012813448905945, 0.12616953253746033, 0.12438997626304626, 0.06097114831209183, 0.011340769939124584, 0.00453603221103549, 0.02511424943804741, 0.15918391942977905, 0.004009802360087633, 0.1337292641401291, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.029656492173671722, 0.11861541867256165, 0.25968441367149353, 0.6952800154685974, 0.06073199212551117, 0.3734285235404968, 0.030824951827526093, 0.09641394764184952, 0.0529148206114769, 0.01715172454714775, 0.01323915645480156, 0.055627286434173584, 0.11593649536371231, 0.04441850632429123, 0.04630020260810852, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.10554661601781845, 0.6362442970275879, 0.6959939002990723, 0.018170323222875595, 0.40134888887405396, 0.15823723375797272, 0.1629355400800705, 0.11358990520238876, 0.24731940031051636, 0.23558683693408966, 0.07505767047405243, 0.03725680336356163, 0.014009351842105389, 0.03713200241327286, 0.09585387259721756, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.4055319130420685, 0.2534714341163635, 0.44874629378318787, 0.14194901287555695, 0.3008168041706085, 0.20029903948307037, 0.07248799502849579, 0.26174047589302063, 0.1826024055480957, 0.0982341319322586, 0.09884719550609589, 0.22728654742240906, 0.04277953878045082, 0.06280668079853058, 0.09454112499952316, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.025013893842697144, 0.013348683714866638, 0.22353146970272064, 0.0037027201615273952, 0.14888618886470795, 0.22346094250679016, 0.021921563893556595, 0.6342950463294983, 0.03356323391199112, 0.06236502528190613, 0.03522828221321106, 0.17797930538654327, 0.04731723666191101, 0.06786928325891495, 0.042550042271614075, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.01107952743768692, 0.002038179198279977, 0.02572617679834366, 0.043437324464321136, 0.026865433901548386, 0.008821134455502033, 0.05896050110459328, 0.006038360297679901, 0.05802087485790253, 0.05262080207467079, 0.021981995552778244, 0.01655607670545578, 0.007265332620590925, 0.017941446974873543, 0.19668635725975037, 0.125, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.4201550781726837, 0.0003083523770328611, 0.003427971852943301, 0.027074502781033516, 0.0025770263746380806, 0.0006525526405312121, 0.0672224909067154, 0.0006329934694804251, 0.002376251621171832, 0.007315297145396471, 0.0018543159822002053, 0.0002170451043639332, 5.486799182108371e-06, 8.465739665552974e-05, 0.018722370266914368, 0.33067038655281067, 0.02820705994963646, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [6.826388562330976e-05, 0.41254693269729614, 8.318798791151494e-05, 0.00021303755056578666, 2.6623651137924753e-05, 1.3030116861045826e-06, 3.3524677292007254e-06, 9.95700816019962e-07, 0.00025696202646940947, 0.00021154701244086027, 4.0387480112258345e-05, 7.382633339148015e-05, 0.0001871670683613047, 0.0001393109851051122, 0.00044668230111710727, 0.43891066312789917, 0.3106566071510315, 0.006947982590645552, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.0012913167010992765, 0.46178945899009705, 0.0011929792817682028, 0.0014885100536048412, 0.001382660586386919, 0.00010778238356579095, 4.841455302084796e-05, 4.8626650823280215e-05, 0.0007912410655990243, 0.0019299217965453863, 0.0002972490037791431, 0.0004315593687351793, 0.013707359321415424, 0.0025058358442038298, 0.00208207662217319, 0.8740342259407043, 0.6547167897224426, 0.0062981778755784035, 0.46666401624679565, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.0008573953527957201, 5.803010481031379e-06, 0.0034995940513908863, 0.007113253697752953, 4.1040249925572425e-05, 0.48505696654319763, 0.0009781911503523588, 2.57480514846975e-05, 0.0006811833591200411, 0.011991027742624283, 0.013829604722559452, 0.02649468183517456, 0.018967876210808754, 0.008940043859183788, 0.0023627132177352905, 0.009682492353022099, 0.17458303272724152, 0.7120969891548157, 0.10496775060892105, 0.0038010317366570234, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [3.2793446735013276e-05, 4.91645641886862e-06, 0.0003670089063234627, 0.0005689052632078528, 0.0004337447171565145, 0.6979628205299377, 0.00025133590679615736, 1.3211038094596006e-05, 0.001040837960317731, 0.0008422345272265375, 0.00011131400242447853, 0.0007033413276076317, 0.00044049491407349706, 0.0004404923238325864, 0.00032976132933981717, 0.31054121255874634, 0.41146165132522583, 0.4573209881782532, 0.639615535736084, 0.038498248904943466, 0.06232544779777527, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.002877118531614542, 0.0015123215271160007, 0.21683953702449799, 0.042356427758932114, 0.09360139071941376, 0.7325531840324402, 0.007687804754823446, 0.0004983373219147325, 0.0008397439960390329, 0.018263472244143486, 0.01633409783244133, 0.06572946161031723, 0.029279880225658417, 0.13710656762123108, 0.013406738638877869, 0.2996446192264557, 0.18095439672470093, 0.8072441220283508, 0.6008384227752686, 0.045412980020046234, 0.09029265493154526, 0.15878555178642273, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.09384340792894363, 0.002295592101290822, 0.05245966836810112, 0.10398446023464203, 0.13232196867465973, 0.2621823251247406, 0.7299563884735107, 0.01621837355196476, 0.008298774249851704, 0.019108427688479424, 0.013038183562457561, 0.008606976829469204, 0.0014156820252537727, 0.008462491445243359, 0.08448491245508194, 0.07671086490154266, 0.13175785541534424, 0.032809216529130936, 0.06887537240982056, 0.32570284605026245, 0.22846734523773193, 0.06983717530965805, 0.07415641844272614, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [7.994164479896426e-05, 9.660106115916278e-06, 1.3390360436460469e-05, 0.0009496311540715396, 7.498388185922522e-06, 0.0023292596451938152, 0.0033705621026456356, 0.45610299706459045, 0.00048403104301542044, 0.0003956609289161861, 6.013430538587272e-05, 1.5610943592037074e-05, 4.899038231087616e-06, 1.0044974260381423e-05, 0.0011326958192512393, 0.4443431496620178, 0.2924090623855591, 0.09237049520015717, 0.07077033072710037, 0.05661908909678459, 0.1886560618877411, 0.5792031288146973, 0.23326165974140167, 0.024399278685450554, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.0021254755556583405, 0.025354469195008278, 0.0505821667611599, 0.04718977212905884, 0.3544465899467468, 0.27984359860420227, 0.10468283295631409, 0.03827415779232979, 0.0065247067250311375, 0.003615353489294648, 0.001024437602609396, 0.02404061146080494, 0.00031744904117658734, 0.011979974806308746, 0.06911104917526245, 0.0045473226346075535, 0.015263181179761887, 0.11153102666139603, 0.01091472152620554, 0.07137833535671234, 0.14599360525608063, 0.24649137258529663, 0.2676219940185547, 0.14942915737628937, 0.03359955921769142, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.06793052703142166, 0.04423084855079651, 0.009074175730347633, 0.010606715455651283, 0.023761747404932976, 0.06765440851449966, 0.048715878278017044, 0.13498826324939728, 0.15846557915210724, 0.01835249364376068, 0.0033974519465118647, 0.011923078447580338, 0.0035463334061205387, 0.036997705698013306, 0.15195232629776, 0.0021246292162686586, 0.019146723672747612, 0.0190261360257864, 0.004887872841209173, 0.032842181622982025, 0.009469296783208847, 0.015122202225029469, 0.056959331035614014, 0.014146327041089535, 0.2864534854888916, 0.028167642652988434, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.00013637961819767952, 0.00010623007256072015, 0.00015417735266964883, 0.00014589299098588526, 0.0007127521676011384, 0.0008950252668000758, 0.00038585966103710234, 0.002901369472965598, 0.34460243582725525, 0.00040915730642154813, 0.00017379666678607464, 9.334777860203758e-05, 0.0002283527428517118, 0.0001650981866987422, 0.0021401161793619394, 0.007321672048419714, 0.06949152052402496, 0.18409577012062073, 0.05168240889906883, 0.5332358479499817, 0.12983477115631104, 0.020923368632793427, 0.015086837112903595, 0.05491120368242264, 0.38865622878074646, 0.036598365753889084, 0.02645716816186905, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.03951041400432587, 0.015644539147615433, 0.002765331417322159, 0.020979223772883415, 0.001914863707497716, 0.049360573291778564, 0.010446744039654732, 0.06006397679448128, 0.18512527644634247, 0.5769777894020081, 0.07455664873123169, 0.016840822994709015, 0.21517987549304962, 0.030672460794448853, 0.04319411888718605, 0.004608431365340948, 0.07759333401918411, 0.05611182749271393, 0.031112710013985634, 0.06043193116784096, 0.023203425109386444, 0.01299421489238739, 0.011212858371436596, 0.2615091800689697, 0.5089370608329773, 0.22289350628852844, 0.10276756435632706, 0.03959360718727112, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.0012064727488905191, 0.0013226938899606466, 0.002064700936898589, 0.008003294467926025, 0.002116014016792178, 0.0028530799318104982, 0.006337625440210104, 0.0002913604548666626, 0.0004794643900822848, 0.0026383439544588327, 0.0038926906418055296, 0.3737375736236572, 0.002772320294752717, 0.007620541378855705, 0.003997606225311756, 0.012221934273838997, 0.040381401777267456, 0.0694599524140358, 0.0800129845738411, 0.023234205320477486, 0.003881127340719104, 0.03062801994383335, 0.024260450154542923, 0.012832778505980968, 0.01656900905072689, 0.2333584874868393, 0.3572527766227722, 0.0072386497631669044, 0.014752739109098911, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [1.0432314411445986e-05, 4.745730166177964e-06, 1.672162215982098e-05, 2.360623693675734e-05, 4.496370820561424e-06, 1.767691173881758e-06, 4.21794857174973e-06, 1.7029789205480483e-06, 2.8430429665604606e-05, 7.409282261505723e-05, 0.00010478614422027022, 0.00017224416660610586, 0.480630487203598, 0.017292670905590057, 3.8113743357826024e-05, 0.09144259989261627, 0.1256924569606781, 0.6557105779647827, 0.1641494482755661, 0.04417502135038376, 0.42902442812919617, 0.377028226852417, 0.1956152766942978, 0.27481555938720703, 0.37677863240242004, 0.4323487877845764, 0.6219720244407654, 0.3997260332107544, 0.1145903542637825, 0.041462015360593796, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.00031966043752618134, 7.799067680025473e-05, 0.0005293181748129427, 0.0002383182873018086, 6.09634407737758e-05, 1.622732997930143e-05, 0.0001254813396371901, 4.548055585473776e-05, 0.0002202334435423836, 0.0014038329245522618, 0.008373874239623547, 0.0005300238262861967, 0.8584288358688354, 0.0721927285194397, 0.0012385909212753177, 0.5997433662414551, 0.1045081838965416, 0.10960735380649567, 0.047688476741313934, 0.31575047969818115, 0.1532202959060669, 0.4197675585746765, 0.16546213626861572, 0.31973955035209656, 0.23332525789737701, 0.15541672706604004, 0.05988143011927605, 0.5733460187911987, 0.8565582036972046, 0.009604076854884624, 0.030047349631786346, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.008336205966770649, 0.000929497298784554, 0.060522519052028656, 0.02858084999024868, 0.004865946713835001, 0.19429318606853485, 0.006222299765795469, 0.00020022530225105584, 0.03241097182035446, 0.2199898362159729, 0.40489089488983154, 0.12284909188747406, 0.04783688485622406, 0.16652296483516693, 0.03165041282773018, 0.02339007519185543, 0.01581897959113121, 0.02374129369854927, 0.02252129279077053, 0.08995510637760162, 0.0626068115234375, 0.27313846349716187, 0.036778680980205536, 0.22608895599842072, 0.06801939755678177, 0.035735905170440674, 0.022851483896374702, 0.06078701093792915, 0.42404335737228394, 0.41984546184539795, 0.08353053033351898, 0.058427464216947556, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.06735408306121826, 0.02395833097398281, 0.022876637056469917, 0.059418935328722, 0.020556019619107246, 0.006657767109572887, 0.01686989888548851, 0.03750348463654518, 0.0929105281829834, 0.11066772043704987, 0.07383746653795242, 0.04306775704026222, 0.1764260083436966, 0.2488536387681961, 0.14264866709709167, 0.034203190356492996, 0.23458202183246613, 0.15632590651512146, 0.02520577609539032, 0.26413342356681824, 0.06292548030614853, 0.06378099322319031, 0.08676797896623611, 0.02988903410732746, 0.3430734872817993, 0.007843950763344765, 0.03405369073152542, 0.01887335814535618, 0.39618176221847534, 0.2528276741504669, 0.10531513392925262, 0.12583006918430328, 0.09389571845531464, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.00023218609567265958, 9.724824485601857e-05, 0.00017837552877608687, 0.000249945733230561, 0.00043016509152948856, 0.0002728255931288004, 0.0002596308768261224, 0.0021448382176458836, 0.33870813250541687, 0.0012523159384727478, 0.0004828754754271358, 7.525486580561846e-05, 0.001232807757332921, 0.00022845527564641088, 0.0029908884316682816, 0.009769688360393047, 0.056299567222595215, 0.11172951757907867, 0.02802591770887375, 0.3647110164165497, 0.09813904017210007, 0.016619421541690826, 0.006417513824999332, 0.016537560150027275, 0.15495160222053528, 0.023067951202392578, 0.011397394351661205, 0.029141509905457497, 0.0527399443089962, 0.2784731984138489, 0.059669919312000275, 0.5969582796096802, 0.09549567103385925, 0.03235183656215668, NaN, NaN, NaN, NaN, NaN, NaN], [0.044313203543424606, 0.014693659730255604, 0.001713237608782947, 0.01787775754928589, 0.001054717693477869, 0.03111616149544716, 0.005932849366217852, 0.035437386482954025, 0.10908837616443634, 0.6214090585708618, 0.11623460799455643, 0.018710769712924957, 0.26884767413139343, 0.036007944494485855, 0.04555344209074974, 0.00987912341952324, 0.12349259853363037, 0.037169262766838074, 0.01944275200366974, 0.06324917078018188, 0.02598830871284008, 0.020618943497538567, 0.009103300981223583, 0.1360517293214798, 0.09789924323558807, 0.06809242814779282, 0.12332575768232346, 0.034675393253564835, 0.16954950988292694, 0.010956126265227795, 0.11111389100551605, 0.1871008574962616, 0.2434563934803009, 0.10274684429168701, 0.0379486046731472, NaN, NaN, NaN, NaN, NaN], [0.0014647350180894136, 0.0016486160457134247, 0.001705971430055797, 0.008203698322176933, 0.0011827786220237613, 0.001036314177326858, 0.004107706248760223, 0.00018337460642214864, 0.0005908485618419945, 0.004427316598594189, 0.0075510423630476, 0.37528446316719055, 0.0045065670274198055, 0.01084148045629263, 0.0047609396278858185, 0.010987702757120132, 0.03791751340031624, 0.03792046010494232, 0.0400051474571228, 0.008841714821755886, 0.002161285374313593, 0.031619150191545486, 0.01907121017575264, 0.0057282340712845325, 0.002385619329288602, 0.03308374434709549, 0.11032091826200485, 0.0044158026576042175, 0.05701944977045059, 0.0651637390255928, 0.027267253026366234, 0.3151875138282776, 0.17881636321544647, 0.3164456784725189, 0.005250148009508848, 0.011875288560986519, NaN, NaN, NaN, NaN], [1.1546462701517157e-05, 6.3197094277711585e-06, 1.3665205187862739e-05, 2.3049220544635318e-05, 3.1024922009237343e-06, 9.712728115118807e-07, 4.2468768697290216e-06, 1.4032799526830786e-06, 2.1501631636056118e-05, 0.00011254433775320649, 0.00014821428339928389, 0.00021640797785948962, 0.4815296530723572, 0.022970588877797127, 4.596232975018211e-05, 0.08034691959619522, 0.1792650669813156, 0.6813479661941528, 0.11697664856910706, 0.022037051618099213, 0.4362119436264038, 0.3332834541797638, 0.16648675501346588, 0.3133866786956787, 0.21180157363414764, 0.22306133806705475, 0.5634312033653259, 0.2539531886577606, 0.28583550453186035, 0.0421890914440155, 0.24185270071029663, 0.9185315370559692, 0.5444227457046509, 0.7130873799324036, 0.36675870418548584, 0.1082441657781601, 0.02894955314695835, NaN, NaN, NaN], [0.0004618540406227112, 0.00011890243331436068, 0.0008028792799450457, 0.0003817373653873801, 7.645944424439222e-05, 2.0059787857462652e-05, 0.00017321997438557446, 3.885024489136413e-05, 0.00016429855895694345, 0.0017073642229661345, 0.011983372271060944, 0.0008083870052359998, 0.8495219349861145, 0.07573292404413223, 0.0017974229995161295, 0.3316553831100464, 0.07297243922948837, 0.18084223568439484, 0.0543624572455883, 0.141310915350914, 0.15985439717769623, 0.22593949735164642, 0.09976530820131302, 0.2670679986476898, 0.12590403854846954, 0.10189743340015411, 0.06066418066620827, 0.14688965678215027, 0.6279550790786743, 0.004891595803201199, 0.013660040684044361, 0.19539086520671844, 0.13336770236492157, 0.11226529628038406, 0.4554508626461029, 0.7914823293685913, 0.007615156006067991, 0.015521766617894173, NaN, NaN], [0.00848880223929882, 0.0010204557329416275, 0.06384890526533127, 0.030244439840316772, 0.004545390605926514, 0.2111765593290329, 0.007047791499644518, 0.00020413362653926015, 0.03285042569041252, 0.2096482813358307, 0.40160003304481506, 0.12425301223993301, 0.05433715134859085, 0.2013336718082428, 0.03489448130130768, 0.010082974098622799, 0.009416572749614716, 0.026376336812973022, 0.021534079685807228, 0.041008636355400085, 0.028814975172281265, 0.09862472116947174, 0.019531887024641037, 0.1915404349565506, 0.055525705218315125, 0.03489372506737709, 0.035597167909145355, 0.017297467216849327, 0.13875839114189148, 0.18795406818389893, 0.13025526702404022, 0.03705297037959099, 0.016517892479896545, 0.028779756277799606, 0.02632485330104828, 0.36631691455841064, 0.4771501123905182, 0.10461407899856567, 0.07566797733306885, NaN], [0.018106432631611824, 0.01663283444941044, 0.006966447923332453, 0.06288447231054306, 0.008926548063755035, 0.0005806194385513663, 0.004527462646365166, 0.00047311693197116256, 0.010450053960084915, 0.008817908354103565, 0.02498125471174717, 0.02475220151245594, 0.006219316273927689, 0.034688226878643036, 0.15510374307632446, 0.00671275844797492, 0.019956005737185478, 0.15321078896522522, 0.00987993273884058, 0.1430601179599762, 0.02432059310376644, 0.007838046178221703, 0.016839532181620598, 0.017622128129005432, 0.03075602278113365, 0.01907699555158615, 0.30206096172332764, 0.010013632476329803, 0.06018203869462013, 0.19546428322792053, 0.020215312018990517, 0.04091925173997879, 0.022548291832208633, 0.26572445034980774, 0.010653333738446236, 0.1212434321641922, 0.3668496906757355, 0.1586136817932129, 0.14579400420188904, 0.04911552369594574]], [[0.1577349603176117, 0.09554319828748703, 0.02016325853765011, 0.08440300822257996, 0.33925309777259827, 0.35353752970695496, 0.49755600094795227, 0.2782062292098999, 0.2544572949409485, 0.6230229735374451, 0.04059281200170517, 0.12019311636686325, 0.2659685015678406, 0.3508304953575134, 0.10784413665533066, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.053030457347631454, 0.00926118716597557, 0.08361255377531052, 0.1587543487548828, 0.42493122816085815, 0.0713140144944191, 0.05032603442668915, 0.790120005607605, 0.4618776738643646, 0.3647898733615875, 0.20375682413578033, 0.2847990393638611, 0.20242592692375183, 0.33538198471069336, 0.174686461687088, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.08703262358903885, 0.32554149627685547, 0.013934381306171417, 0.05831753462553024, 0.13550086319446564, 0.24707834422588348, 0.10738440603017807, 0.2015978991985321, 0.20393061637878418, 0.3176687955856323, 0.11071985214948654, 0.18533341586589813, 0.23293758928775787, 0.34885379672050476, 0.5850104689598083, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.10977373272180557, 0.1966770738363266, 0.08552326261997223, 0.3559982180595398, 0.025181425735354424, 0.05637436732649803, 0.04466243088245392, 0.30799123644828796, 0.24855823814868927, 0.13041310012340546, 0.16531962156295776, 0.11238406598567963, 0.33737656474113464, 0.08863592892885208, 0.043888676911592484, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.5166918635368347, 0.35558366775512695, 0.01755080744624138, 0.011931763030588627, 0.556053638458252, 0.21828243136405945, 0.17387567460536957, 0.11686032265424728, 0.22141756117343903, 0.6036979556083679, 0.3235246241092682, 0.21816273033618927, 0.20258961617946625, 0.7225815653800964, 0.3817636966705322, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.34899845719337463, 0.35567307472229004, 0.2643766403198242, 0.12664493918418884, 0.18397535383701324, 0.012551958672702312, 0.056629326194524765, 0.06369142234325409, 0.252005010843277, 0.3601645529270172, 0.3771168887615204, 0.4479873776435852, 0.13717319071292877, 0.6667386293411255, 0.1451762467622757, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.5782451629638672, 0.6189379096031189, 0.11758852005004883, 0.3125992715358734, 0.3504111170768738, 0.10631152987480164, 0.16217094659805298, 0.04177623987197876, 0.10916820168495178, 0.3274877965450287, 0.10721725970506668, 0.11595069617033005, 0.11270644515752792, 0.32787472009658813, 0.13412055373191833, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.2553749084472656, 0.5479037165641785, 0.3395489752292633, 0.13140854239463806, 0.07771788537502289, 0.06743729114532471, 0.04718935862183571, 0.022107038646936417, 0.2706955075263977, 0.06462319940328598, 0.20574931800365448, 0.08401398360729218, 0.11249610781669617, 0.20925462245941162, 0.07354141771793365, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.15992610156536102, 0.4297313988208771, 0.11996463686227798, 0.29957810044288635, 0.19940054416656494, 0.6192947030067444, 0.07005859166383743, 0.4058174192905426, 0.0451255701482296, 0.02480492927134037, 0.052432600408792496, 0.13078351318836212, 0.14195236563682556, 0.12686756253242493, 0.10959619283676147, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.13202522695064545, 0.3311104476451874, 0.12707853317260742, 0.06901858001947403, 0.13186469674110413, 0.37057942152023315, 0.1482420712709427, 0.21941475570201874, 0.1949346363544464, 0.11534072458744049, 0.011536079458892345, 0.018882060423493385, 0.16279305517673492, 0.07962523400783539, 0.11737312376499176, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0604790523648262, 0.5140921473503113, 0.37517040967941284, 0.060462601482868195, 0.14644990861415863, 0.49839717149734497, 0.08009912073612213, 0.3367377519607544, 0.0785842090845108, 0.043956201523542404, 0.0826396569609642, 0.015624956227838993, 0.10417986661195755, 0.07971351593732834, 0.018050679937005043, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.10509271919727325, 0.5468136072158813, 0.2136838436126709, 0.13898353278636932, 0.11654751002788544, 0.1982421725988388, 0.03731672093272209, 0.5618436336517334, 0.37511539459228516, 0.015668287873268127, 0.07859797775745392, 0.026544239372015, 0.11879771202802658, 0.051024846732616425, 0.03191406652331352, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.2583395540714264, 0.306291788816452, 0.15283380448818207, 0.48663485050201416, 0.24239543080329895, 0.6472541093826294, 0.11895711719989777, 0.7050262093544006, 0.43789902329444885, 0.07257331907749176, 0.1529301553964615, 0.07237879186868668, 0.029207568615674973, 0.031136667355895042, 0.04320577159523964, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.37997886538505554, 0.3090342879295349, 0.09529577195644379, 0.06091787666082382, 0.5611693859100342, 0.5351426005363464, 0.5250707268714905, 0.4058402180671692, 0.08284364640712738, 0.7192233204841614, 0.12988585233688354, 0.24924960732460022, 0.016598563641309738, 0.6531801819801331, 0.22117754817008972, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.31734058260917664, 0.02799793891608715, 0.08435621112585068, 0.4273812472820282, 0.37900310754776, 0.1551857888698578, 0.12445898354053497, 0.02975497953593731, 0.13922178745269775, 0.25836795568466187, 0.3142063617706299, 0.5329877138137817, 0.020000692456960678, 0.19246473908424377, 0.34441179037094116, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.011485431343317032, 0.057214245200157166, 0.11445975303649902, 0.035292237997055054, 0.17235025763511658, 0.21079879999160767, 0.08683252334594727, 0.33144259452819824, 0.2781406342983246, 0.07864350080490112, 0.10017280280590057, 0.0828540250658989, 0.17722147703170776, 0.21101748943328857, 0.15805292129516602, 0.125, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.041519034653902054, 0.11474552005529404, 0.04909001290798187, 0.1299373209476471, 0.06295691430568695, 0.0239214189350605, 0.22038953006267548, 0.6809458136558533, 0.03295678645372391, 0.34942832589149475, 0.1847512274980545, 0.22206875681877136, 0.13646042346954346, 0.277276873588562, 0.1334262192249298, 0.00017037145153153688, 0.1837475299835205, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.0764331966638565, 0.004937899298965931, 0.049346037209033966, 0.05165911093354225, 0.051789041608572006, 0.11632981896400452, 0.3382570743560791, 0.21805666387081146, 0.5269062519073486, 0.05627245828509331, 0.1284114420413971, 0.3053610324859619, 0.058564696460962296, 0.14431920647621155, 0.19175130128860474, 4.619961600837996e-06, 0.00011092388740507886, 0.19595862925052643, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.08274618536233902, 0.009897814132273197, 0.07511309534311295, 0.03663979470729828, 0.16369661688804626, 0.04579350724816322, 0.04420214146375656, 0.06866282969713211, 0.17000554502010345, 0.09549596160650253, 0.07313749194145203, 0.06223462149500847, 0.11603321135044098, 0.07143211364746094, 0.2059532254934311, 7.402049959637225e-07, 0.0014410031726583838, 0.15330694615840912, 0.0009438465931452811, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.41769060492515564, 0.07210511714220047, 0.40716952085494995, 0.22363832592964172, 0.48781970143318176, 0.015007800422608852, 0.4504202902317047, 0.4675638973712921, 0.24936619400978088, 0.5447031855583191, 0.4296078681945801, 0.07025930285453796, 0.1902965009212494, 0.3567025065422058, 0.12464861571788788, 6.564930572494632e-07, 1.2471617083065212e-05, 0.0012651559663936496, 1.2094314115529414e-05, 0.2683168947696686, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.3858333230018616, 0.06937354803085327, 0.5601253509521484, 0.30969470739364624, 0.36272186040878296, 0.005774383433163166, 0.16290897130966187, 0.16338182985782623, 0.1734752655029297, 0.10127251595258713, 0.6812319159507751, 0.35078492760658264, 0.26554787158966064, 0.3089393675327301, 0.12310608476400375, 3.960849710438197e-07, 2.835777740983758e-05, 0.0015905762556940317, 5.72201497561764e-05, 0.20671997964382172, 0.03618929535150528, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.047016799449920654, 0.04388514533638954, 0.010725832544267178, 0.029561294242739677, 0.04913409426808357, 0.007112162187695503, 0.045616600662469864, 0.09563170373439789, 0.021758677437901497, 0.05606407672166824, 0.023780539631843567, 0.2586848735809326, 0.1317795366048813, 0.13214319944381714, 0.18490085005760193, 3.613545777625404e-05, 4.069158967467956e-05, 0.0019799659494310617, 4.598083614837378e-05, 0.28016433119773865, 0.1021510660648346, 0.0019787675701081753, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.024271933361887932, 0.10952932387590408, 0.01092300284653902, 0.005798409227281809, 0.03478696197271347, 0.015390553511679173, 0.005925341974943876, 0.04537563398480415, 0.00714160455390811, 0.005484140943735838, 0.00704369880259037, 0.04858299717307091, 0.06617175042629242, 0.13874217867851257, 0.17208275198936462, 0.03414154052734375, 0.018152736127376556, 0.002861178945749998, 0.0031036457512527704, 0.2743661403656006, 0.08905426412820816, 0.058365415781736374, 0.2834230065345764, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.1448126882314682, 0.16020630300045013, 0.02696153335273266, 0.06902630627155304, 0.03837759047746658, 0.07682601362466812, 0.15773272514343262, 0.005734406877309084, 0.16041570901870728, 0.10849703103303909, 0.08964504301548004, 0.4313186705112457, 0.12084108591079712, 0.20548132061958313, 0.1913137137889862, 0.0001288916973862797, 0.0019113116431981325, 0.0011359998025000095, 2.5460678443778306e-05, 0.0018093753606081009, 0.008086470887064934, 0.005666371434926987, 0.0014489549212157726, 0.27176737785339355, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.03147122263908386, 0.06498080492019653, 0.03835386037826538, 0.021906379610300064, 0.004580754786729813, 0.08777225762605667, 0.06548282504081726, 0.0501156747341156, 0.09960248321294785, 0.05812418833374977, 0.04425663501024246, 0.12932318449020386, 0.040425609797239304, 0.10523593425750732, 0.20731014013290405, 0.0013363973703235388, 0.015213730745017529, 0.019847076386213303, 0.0016770424554124475, 0.6085457801818848, 0.051846977323293686, 0.06904839724302292, 0.023163089528679848, 0.0024616841692477465, 0.4075135886669159, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.03185653313994408, 0.014990762807428837, 0.012671640142798424, 0.014554454945027828, 0.005096337758004665, 0.025306345894932747, 0.015522593632340431, 0.012109486386179924, 0.014945329166948795, 0.0111803337931633, 0.010501275770366192, 0.010505528189241886, 0.013426732271909714, 0.01895906589925289, 0.16498495638370514, 1.5705205441918224e-05, 0.00011942459968850017, 3.308789018774405e-05, 0.00047703171730972826, 1.5581523257424124e-05, 3.566192026482895e-05, 0.000621139828581363, 0.002513762330636382, 0.0013953398447483778, 0.001656065694987774, 0.6708395481109619, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.05249502509832382, 0.3800218403339386, 0.048091597855091095, 0.01820666529238224, 0.10161028057336807, 0.18240275979042053, 0.03954629600048065, 0.08666953444480896, 0.00239415536634624, 0.05545663461089134, 0.11899324506521225, 0.03552442044019699, 0.037884730845689774, 0.08727249503135681, 0.23120805621147156, 0.0009777048835530877, 0.006719581317156553, 0.017090875655412674, 0.007835427299141884, 0.0003081739123445004, 0.0027951891534030437, 0.0031432590913027525, 0.011542102321982384, 0.01903962530195713, 0.032312098890542984, 0.23448777198791504, 0.18604722619056702, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.06818026304244995, 0.06384387612342834, 0.013627037405967712, 0.017488455399870872, 0.04112459346652031, 0.37204819917678833, 0.2269488275051117, 0.050778258591890335, 0.07564288377761841, 0.002337054116651416, 0.03256889060139656, 0.017944803461432457, 0.02268233709037304, 0.05458826571702957, 0.17415940761566162, 0.0010771078523248434, 0.00013067253166809678, 0.0004810431564692408, 0.0005832655006088316, 0.27172601222991943, 0.023587899282574654, 0.0011203349567949772, 0.0001570776366861537, 3.2636336982250214e-05, 0.008125105872750282, 0.3860749900341034, 0.011222672648727894, 0.4488545358181, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.3350563049316406, 0.14807114005088806, 0.16856855154037476, 0.0634150505065918, 0.6115131974220276, 0.8617944717407227, 0.4784194529056549, 0.271447092294693, 0.44727417826652527, 0.03638387843966484, 0.0791390910744667, 0.0010650564217939973, 0.10882135480642319, 0.07249648869037628, 0.16217634081840515, 0.0018897228874266148, 0.00010004806244978681, 0.040837980806827545, 0.0009045379119925201, 0.4036760926246643, 0.033945482224226, 0.0009020724683068693, 2.477952148183249e-05, 0.0006147518288344145, 2.3498352675233036e-05, 0.0003015661786776036, 0.00019162058015353978, 0.0013656887458637357, 0.9207848906517029, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.6229478120803833, 0.11473710834980011, 0.9313594102859497, 0.6977004408836365, 0.7760463953018188, 0.5547962784767151, 0.2850213646888733, 0.12024195492267609, 0.6867435574531555, 0.3715392053127289, 0.5383524894714355, 0.04410971701145172, 0.001209885231219232, 0.03505939990282059, 0.07057712972164154, 3.0049262932152487e-05, 0.00032340767211280763, 0.0004620190302375704, 1.456133759347722e-05, 0.4214256703853607, 0.00038119935197755694, 2.2086916942498647e-05, 5.437946310848929e-05, 0.0005922063137404621, 0.0002251591213280335, 4.171442924416624e-05, 0.0011568808695301414, 6.667344860034063e-05, 0.004539569839835167, 0.07099039107561111, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.12039526551961899, 0.15183398127555847, 0.23466746509075165, 0.07534174621105194, 0.09489727020263672, 0.12723755836486816, 0.06088049337267876, 0.06659132242202759, 0.24534910917282104, 0.08624531328678131, 0.05703657865524292, 0.031156441196799278, 0.0026320687029510736, 0.016870809718966484, 0.16136524081230164, 0.0001142411565524526, 0.001007341779768467, 0.5582761764526367, 0.0006983705679886043, 0.04208780825138092, 0.07311324775218964, 0.011010478250682354, 0.00018356108921580017, 0.11227726191282272, 1.5535662896581925e-05, 7.865564111853018e-05, 8.497068483848125e-05, 0.007107958197593689, 0.04726947844028473, 0.03816111385822296, 0.7400538921356201, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.024926312267780304, 0.055538877844810486, 0.0035579875111579895, 0.006728078704327345, 0.10179352015256882, 0.12386216968297958, 0.08368373662233353, 0.17138876020908356, 0.13290183246135712, 0.025975322350859642, 0.0007942751399241388, 0.08679928630590439, 0.006940893363207579, 0.006668384652584791, 0.2167840152978897, 9.270196460420266e-05, 0.00014002913667354733, 0.006266205105930567, 8.287983655463904e-05, 0.029540851712226868, 0.019505193457007408, 0.0002005908900173381, 0.0002361711667617783, 0.002089217072352767, 0.0007247799658216536, 0.0003387654141988605, 3.3522373996675014e-05, 0.00015295531193260103, 0.005682599265128374, 0.01914886385202408, 0.006167547311633825, 0.6065680980682373, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.03079223819077015, 0.008776835165917873, 0.025623725727200508, 0.02996702678501606, 0.076390340924263, 0.11722294241189957, 0.03722265735268593, 0.06894396245479584, 0.023492204025387764, 0.02721765637397766, 0.02432498149573803, 0.009946721605956554, 0.02367306686937809, 0.02709045261144638, 0.15603508055210114, 0.017243418842554092, 0.0717378556728363, 0.015470567159354687, 0.14577892422676086, 0.003815611358731985, 0.01656431145966053, 0.21609994769096375, 0.24452562630176544, 0.07360902428627014, 0.020440302789211273, 0.9522358775138855, 0.0012982342159375548, 0.00034142163349315524, 4.905217429040931e-05, 0.0002677988959476352, 0.0020047405268996954, 0.013444142416119576, 0.5238149166107178, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.050754088908433914, 0.38707080483436584, 0.056088101118803024, 0.022330837324261665, 0.19594413042068481, 0.356031596660614, 0.05540256202220917, 0.17031489312648773, 0.002592364326119423, 0.0904960110783577, 0.17009596526622772, 0.02688765898346901, 0.05266827344894409, 0.09536514431238174, 0.2306852787733078, 0.006589227356016636, 0.025933612138032913, 0.05151839554309845, 0.019538801163434982, 0.000567624403629452, 0.011064885184168816, 0.018599001690745354, 0.0389220230281353, 0.03263486549258232, 0.03920944407582283, 0.309482604265213, 0.18455958366394043, 0.0028949796687811613, 0.0009189100819639862, 0.01304793544113636, 0.01903691701591015, 0.0013186958385631442, 0.1459255963563919, 0.2617945969104767, NaN, NaN, NaN, NaN, NaN, NaN], [0.052731066942214966, 0.07647765427827835, 0.009669344872236252, 0.013631273992359638, 0.037963252514600754, 0.40968915820121765, 0.1877974420785904, 0.06287717074155807, 0.06925270706415176, 0.0021469732746481895, 0.03106895461678505, 0.02147551439702511, 0.022071314975619316, 0.058794401586055756, 0.17150944471359253, 0.000940846570301801, 6.996696902206168e-05, 0.0001185448418254964, 0.00013115631008986384, 0.04620806872844696, 0.009408986195921898, 0.0010798430303111672, 0.00010642426059348509, 1.4586596989829559e-05, 0.0008147742482833564, 0.049950405955314636, 0.0020658469293266535, 0.020368386059999466, 0.0015965981874614954, 0.0005227082292549312, 8.089001494226977e-05, 0.42970454692840576, 0.3893451988697052, 0.006195466499775648, 0.2630486488342285, NaN, NaN, NaN, NaN, NaN], [0.2993965446949005, 0.1887350082397461, 0.17583680152893066, 0.06075390800833702, 0.6836855411529541, 0.8825634121894836, 0.44942814111709595, 0.3110062777996063, 0.6245057582855225, 0.04149743914604187, 0.08928828686475754, 0.0010537458583712578, 0.13885420560836792, 0.09175378829240799, 0.16601231694221497, 0.0015646422980353236, 5.644361226586625e-05, 0.015588155947625637, 0.0004337269929237664, 0.061090677976608276, 0.015012362040579319, 0.0009935805574059486, 3.2441483199363574e-05, 0.0006383971776813269, 7.901599929027725e-06, 0.00011085882579209283, 2.031324947893154e-05, 0.0001886440732050687, 0.1558367908000946, 2.918860081990715e-05, 0.00031420652521774173, 3.769064642256126e-05, 0.000311522075207904, 8.488001913065091e-05, 0.001447036280296743, 0.9016569256782532, NaN, NaN, NaN, NaN], [0.6222140192985535, 0.13893182575702667, 0.9335290789604187, 0.7374492883682251, 0.8253674507141113, 0.5633905529975891, 0.4091120660305023, 0.12903769314289093, 0.8090996742248535, 0.490604043006897, 0.6206711530685425, 0.06171489879488945, 0.0013746770564466715, 0.055387232452631, 0.07617512345314026, 6.329882307909429e-05, 0.0007932570297271013, 0.0008974742377176881, 3.545067738741636e-05, 0.41645264625549316, 0.0012166639789938927, 5.162824527360499e-05, 0.00016062096983660012, 0.0028807471971958876, 0.0007734368555247784, 0.0001738688733894378, 0.0017386887921020389, 8.449772576568648e-05, 0.008313576690852642, 0.04833607003092766, 5.605717160506174e-05, 0.000497612461913377, 0.00019103533122688532, 0.0018799308454617858, 0.000193181011127308, 0.010939341969788074, 0.11687301844358444, NaN, NaN, NaN], [0.1216169223189354, 0.17628714442253113, 0.21903447806835175, 0.08471400290727615, 0.12100206315517426, 0.12684285640716553, 0.060168445110321045, 0.05725802481174469, 0.204857736825943, 0.07119028270244598, 0.04997517541050911, 0.046147700399160385, 0.002665548352524638, 0.01769380457699299, 0.1595369428396225, 2.7039888664148748e-05, 0.0002653435221873224, 0.3520841896533966, 0.0011641159653663635, 0.017258664593100548, 0.13898366689682007, 0.004804374184459448, 0.0001136215214501135, 0.10132589936256409, 1.9021857951884158e-05, 0.00018713112513069063, 5.577637057285756e-05, 0.0021825090516358614, 0.016621561720967293, 0.003813497256487608, 0.05257569998502731, 7.136658678064123e-05, 0.00013083907833788544, 8.304342918563634e-05, 0.009517401456832886, 0.07102376222610474, 0.0242641419172287, 0.791592538356781, NaN, NaN], [0.02323095127940178, 0.05151251330971718, 0.002836216241121292, 0.007343180477619171, 0.11471041291952133, 0.09745588153600693, 0.08793136477470398, 0.19987791776657104, 0.2081962525844574, 0.026029428467154503, 0.0006721516838297248, 0.15218332409858704, 0.008676346391439438, 0.009503011591732502, 0.20713838934898376, 1.8426982933306135e-05, 6.735812348779291e-05, 0.005383457988500595, 0.0002568464260548353, 0.03709089383482933, 0.05173188075423241, 0.00015440442075487226, 0.00026214553508907557, 0.0031172526068985462, 0.0018413036596029997, 0.001364374067634344, 0.0001026472236844711, 0.00015940713637974113, 0.00464483629912138, 0.007250420283526182, 0.006640422623604536, 0.10042263567447662, 0.00037284562131389976, 5.502302519744262e-05, 0.00017516437219455838, 0.013823487795889378, 0.028728578239679337, 0.014491567388176918, 0.5602642297744751, NaN], [0.07751920074224472, 0.05964339151978493, 0.026831025257706642, 0.018057459965348244, 0.1489739865064621, 0.27560925483703613, 0.15271086990833282, 0.29336896538734436, 0.2548864185810089, 0.015449506230652332, 0.02643660455942154, 0.05839552357792854, 0.06659974157810211, 0.1841144859790802, 0.1324990689754486, 1.3810687960358337e-05, 0.0002572945086285472, 0.008041280321776867, 0.00040080497274175286, 0.00010326507617719471, 0.0013340600999072194, 0.00019016038277186453, 0.00019489554688334465, 0.0007417663000524044, 0.0012533330591395497, 0.0032668926287442446, 0.001072657760232687, 5.286548912408762e-05, 4.225512952871213e-07, 1.0035311788669787e-05, 2.1279807697283104e-05, 0.0006032216479070485, 0.00048016011714935303, 0.00037273563793860376, 3.447151175350882e-05, 9.715819260236458e-07, 2.8930742701049894e-05, 0.0003854547976516187, 0.005018792115151882, 0.4505775570869446]], [[0.022252710536122322, 0.017558962106704712, 0.12289869785308838, 0.01514213066548109, 0.04983796179294586, 0.160098597407341, 0.09159664064645767, 0.03634485974907875, 0.27353572845458984, 0.14908282458782196, 0.8423851132392883, 0.33708906173706055, 0.03012021631002426, 0.05972116440534592, 0.2686574459075928, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.13637107610702515, 0.02899317629635334, 0.09026061743497849, 0.22582301497459412, 0.09117049723863602, 0.19661013782024384, 0.30083417892456055, 0.13528303802013397, 0.1352328211069107, 0.18504901230335236, 0.3621358573436737, 0.504258930683136, 0.10044156759977341, 0.37106865644454956, 0.36433035135269165, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.10935092717409134, 0.06271693855524063, 0.044740546494722366, 0.1709805577993393, 0.22382155060768127, 0.2615796625614166, 0.3429900109767914, 0.02677186205983162, 0.39723172783851624, 0.1559167355298996, 0.6381150484085083, 0.34350308775901794, 0.14388519525527954, 0.322640985250473, 0.07209958881139755, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.11123806983232498, 0.14550834894180298, 0.12841136753559113, 0.013620064593851566, 0.006130752619355917, 0.025231752544641495, 0.11538708955049515, 0.09429272264242172, 0.3855685293674469, 0.016912028193473816, 0.3869503438472748, 0.1961694061756134, 0.15352581441402435, 0.019190048798918724, 0.4291467070579529, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.1283823847770691, 0.33987957239151, 0.06837885081768036, 0.03946131095290184, 0.03139644116163254, 0.11983324587345123, 0.12062173336744308, 0.46404916048049927, 0.24212448298931122, 0.1594262570142746, 0.4298713207244873, 0.5236353278160095, 0.2188095897436142, 0.049411591142416, 0.10146455466747284, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.010564678348600864, 0.32722386717796326, 0.19864077866077423, 0.015389330685138702, 0.0028029000386595726, 0.007416849955916405, 0.003262599464505911, 0.23795713484287262, 0.05000551417469978, 0.075996033847332, 0.049679387360811234, 0.21265098452568054, 0.2097157984972, 0.01007634773850441, 0.03895873948931694, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.10390599817037582, 0.04329453781247139, 0.42168325185775757, 0.06385642290115356, 0.04340887442231178, 0.029213739559054375, 0.036663200706243515, 0.0028809772338718176, 0.19718152284622192, 0.16335125267505646, 0.6605148315429688, 0.17834524810314178, 0.08135847747325897, 0.05741032958030701, 0.24636343121528625, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.010566278360784054, 0.32608217000961304, 0.34194469451904297, 0.08201102167367935, 0.036688148975372314, 0.12155891954898834, 0.015490439720451832, 0.05858473479747772, 0.1731383204460144, 0.12207219004631042, 0.0636284351348877, 0.2239474654197693, 0.2988812327384949, 0.033257871866226196, 0.04593053460121155, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.26241976022720337, 0.0378817655146122, 0.10770448297262192, 0.11944369971752167, 0.367754727602005, 0.041288651525974274, 0.25914207100868225, 0.061461515724658966, 0.061867646872997284, 0.08977923542261124, 0.03797370195388794, 0.2101898193359375, 0.035329420119524, 0.38835543394088745, 0.3324989080429077, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.3753410875797272, 0.031615160405635834, 0.1074504628777504, 0.07966858148574829, 0.16393397748470306, 0.01204571221023798, 0.36072632670402527, 0.026240641251206398, 0.09493876993656158, 0.12203314155340195, 0.0640302300453186, 0.13458214700222015, 0.19451306760311127, 0.3176366686820984, 0.19878560304641724, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.19523903727531433, 0.1090913861989975, 0.11059779673814774, 0.03402426466345787, 0.4491459131240845, 0.1729225516319275, 0.3482173979282379, 0.01764478161931038, 0.14307594299316406, 0.22771455347537994, 0.04787566140294075, 0.14714154601097107, 0.028272001072764397, 0.23823784291744232, 0.19700175523757935, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.1428564339876175, 0.03585843741893768, 0.023294193670153618, 0.1143055409193039, 0.07461919635534286, 0.13578416407108307, 0.4153969883918762, 0.03374828025698662, 0.10746961832046509, 0.17216910421848297, 0.02314077876508236, 0.02450137585401535, 0.06497504562139511, 0.381274551153183, 0.14229674637317657, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.5444629788398743, 0.049506742507219315, 0.09827632457017899, 0.29229700565338135, 0.06650383025407791, 0.11397240310907364, 0.597455620765686, 0.1362738311290741, 0.15222173929214478, 0.2562837302684784, 0.13646292686462402, 0.38294121623039246, 0.030382927507162094, 0.038297515362501144, 0.465526819229126, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.12950241565704346, 0.2834409177303314, 0.40745216608047485, 0.040315985679626465, 0.09126543253660202, 0.16738829016685486, 0.24838824570178986, 0.2707839906215668, 0.5177856087684631, 0.1416875720024109, 0.6573355793952942, 0.4225574731826782, 0.02239617332816124, 0.07502269744873047, 0.07588320225477219, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.00751910824328661, 0.5024122595787048, 0.38239815831184387, 0.016937274485826492, 0.039716992527246475, 0.11479316651821136, 0.004478333052247763, 0.02017248421907425, 0.011771232821047306, 0.0035600941628217697, 0.03807784244418144, 0.07125832885503769, 0.1964063048362732, 0.0026467873249202967, 0.00302477041259408, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.006645309738814831, 0.043047573417425156, 0.04108792915940285, 0.028674451634287834, 0.10265154391527176, 0.03326163440942764, 0.05858607590198517, 0.06312219053506851, 0.013714859262108803, 0.017589740455150604, 0.02732386440038681, 0.11026919633150101, 0.028857730329036713, 0.054291173815727234, 0.19011041522026062, 0.125, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.006623337976634502, 0.06184479594230652, 0.014693422242999077, 0.03981047496199608, 0.08752858638763428, 0.01962500624358654, 0.06706372648477554, 0.011501927860081196, 0.0061228955164551735, 0.013949333690106869, 0.018435969948768616, 0.03678559139370918, 0.022487374022603035, 0.0660797506570816, 0.28934401273727417, 4.347301455709385e-06, 0.18382565677165985, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.04245300590991974, 0.10349805653095245, 0.03407163918018341, 0.007511724252253771, 0.011565770022571087, 0.010817471891641617, 0.05971734598278999, 0.00459411833435297, 0.00350962788797915, 0.021488210186362267, 0.02298545651137829, 0.06376963108778, 0.036461468786001205, 0.1865386664867401, 0.16962040960788727, 0.0001576173526700586, 0.00605444610118866, 0.19315025210380554, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.014149562455713749, 0.03299444913864136, 0.007003516890108585, 0.004260434303432703, 0.018919609487056732, 0.008522795513272285, 0.018369171768426895, 0.015471882186830044, 0.0008095644298009574, 0.012402600608766079, 0.0075600892305374146, 0.03885417431592941, 0.05682341009378433, 0.0525624044239521, 0.22132590413093567, 0.0015271879965439439, 0.2696094512939453, 0.0976908802986145, 0.19172586500644684, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.01582285761833191, 0.013434984721243382, 0.0299182441085577, 0.03647983819246292, 0.009840411134064198, 0.06101881340146065, 0.04943924769759178, 0.3809337913990021, 0.027872184291481972, 0.07177315652370453, 0.06987256556749344, 0.014244881458580494, 0.18650749325752258, 0.16280896961688995, 0.16209137439727783, 0.018620789051055908, 0.1513659805059433, 0.1261996626853943, 0.04123798385262489, 0.18324223160743713, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.018014581874012947, 0.11459828168153763, 0.013770120218396187, 0.021584663540124893, 0.02155740186572075, 0.03133949637413025, 0.03938381373882294, 0.28105995059013367, 0.02592163160443306, 0.026603924110531807, 0.010026685893535614, 0.009953479282557964, 0.004658891819417477, 0.014652709476649761, 0.16460371017456055, 7.739824650343508e-05, 0.0007302183075807989, 0.0020413347519934177, 0.0010007238015532494, 0.20195050537586212, 0.04546361416578293, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.001359884045086801, 0.029354294762015343, 0.0013457777677103877, 0.0026418184861540794, 0.008543581701815128, 0.003654624568298459, 0.0034977763425558805, 0.039957791566848755, 0.00108401442412287, 0.0005604945472441614, 0.0003877367707900703, 0.0033066808246076107, 0.007358025759458542, 0.007617549039423466, 0.20286646485328674, 0.0007431988487951458, 0.330532044172287, 0.08558935672044754, 0.06556878238916397, 0.10690004378557205, 0.1145712360739708, 0.06475446373224258, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.015068605542182922, 0.027786174789071083, 0.015096615999937057, 0.048349082469940186, 0.03296791389584541, 0.0033369800075888634, 0.004459223244339228, 0.01348987128585577, 0.0010384898632764816, 0.013556106016039848, 0.015940798446536064, 0.042712315917015076, 0.02055070362985134, 0.042082786560058594, 0.17761820554733276, 0.015635214745998383, 0.050190601497888565, 0.02352251298725605, 0.24284599721431732, 0.06325101107358932, 0.02171560376882553, 0.015677697956562042, 0.4775830805301666, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.09032934159040451, 0.007927155122160912, 0.08835490047931671, 0.21186837553977966, 0.05379607528448105, 0.23637458682060242, 0.16646702587604523, 0.022663533687591553, 0.024165447801351547, 0.08468358218669891, 0.07286331057548523, 0.016201749444007874, 0.031014403328299522, 0.026781529188156128, 0.21159759163856506, 0.03602181747555733, 0.2262161672115326, 0.11374488472938538, 0.22297167778015137, 0.018925879150629044, 0.2400040328502655, 0.13629396259784698, 0.14897051453590393, 0.11721047759056091, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.014649872668087482, 0.032003261148929596, 0.1914098560810089, 0.17710277438163757, 0.07542474567890167, 0.05287592485547066, 0.14732114970684052, 0.08320016413927078, 0.025441674515604973, 0.02800501137971878, 0.0780739113688469, 0.04154554009437561, 0.017996925860643387, 0.08907850831747055, 0.17056028544902802, 0.001669732853770256, 0.0008830919396132231, 0.007873992435634136, 0.004793200176209211, 0.032567575573921204, 0.019068563356995583, 0.01167156733572483, 0.006520072463899851, 0.001765590044669807, 0.479371041059494, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.29397615790367126, 0.03400568664073944, 0.3242063522338867, 0.3681035339832306, 0.48163339495658875, 0.025333818048238754, 0.20042747259140015, 0.06051841378211975, 0.2913966476917267, 0.19229580461978912, 0.12739360332489014, 0.07057002186775208, 0.012750222347676754, 0.053084854036569595, 0.09877952188253403, 0.04264334216713905, 0.01628556102514267, 0.012549073435366154, 0.1270730197429657, 0.09553729742765427, 0.12904676795005798, 0.28088441491127014, 0.08353402465581894, 0.19219043850898743, 0.1467161476612091, 0.04815742373466492, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.2290111482143402, 0.04351853206753731, 0.4067046046257019, 0.12047477811574936, 0.3140789866447449, 0.03630740940570831, 0.1768438071012497, 0.13207398355007172, 0.0676346942782402, 0.07621245086193085, 0.1797569841146469, 0.24804529547691345, 0.009716867469251156, 0.01671340875327587, 0.15996301174163818, 0.006975929252803326, 0.05510300025343895, 0.007132354192435741, 0.0349782258272171, 0.02191060781478882, 0.018211986869573593, 0.026551326736807823, 0.03648876026272774, 0.06464254856109619, 0.049987878650426865, 0.05908217281103134, 0.5448521375656128, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.0448942668735981, 0.015721717849373817, 0.04864601418375969, 0.03494936227798462, 0.016112152487039566, 0.06668571382761002, 0.05302642658352852, 0.07182876765727997, 0.006946365814656019, 0.011091585271060467, 0.1120418831706047, 0.008756275288760662, 0.055249348282814026, 0.03253563493490219, 0.187040314078331, 0.000807860866189003, 0.00374230626039207, 0.004482839722186327, 0.005506760906428099, 0.000447272410383448, 0.003816538956016302, 0.03234753757715225, 0.014306235127151012, 0.01718331128358841, 0.04840204864740372, 0.06595310568809509, 0.18900929391384125, 0.0723472312092781, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.3104230761528015, 0.04545353353023529, 0.3986057937145233, 0.6762936115264893, 0.03838818892836571, 0.03300129249691963, 0.27034318447113037, 0.21517230570316315, 0.008858010172843933, 0.2650390863418579, 0.2720700800418854, 0.005442188587039709, 0.06764175742864609, 0.053534120321273804, 0.18754751980304718, 0.00447529973462224, 0.019966747611761093, 0.03737834841012955, 0.3797287940979004, 0.010614297352731228, 0.05463654175400734, 0.32780376076698303, 0.0739898681640625, 0.25606051087379456, 0.8621841073036194, 0.2645638585090637, 0.25103500485420227, 0.016027942299842834, 0.004609693773090839, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.011383982375264168, 0.11127021163702011, 0.0030386100988835096, 0.0067845494486391544, 0.013927198015153408, 0.08719860762357712, 0.03287587687373161, 0.5690041184425354, 0.03855481743812561, 0.020931608974933624, 0.01293823029845953, 0.047187648713588715, 0.021772168576717377, 0.1471272110939026, 0.18776896595954895, 0.0010164460400119424, 0.011448963545262814, 0.03378765657544136, 0.02785181999206543, 0.056788451969623566, 0.07099426537752151, 0.008927138522267342, 0.01755385287106037, 0.039185769855976105, 0.09313513338565826, 0.027632856741547585, 0.12282836437225342, 0.017955774441361427, 0.02453978732228279, 0.267269104719162, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.005892250686883926, 0.03474593162536621, 0.023128867149353027, 0.002957691205665469, 0.03212961554527283, 0.015600761398673058, 0.0076070488430559635, 0.04006163775920868, 0.012522950768470764, 0.00397108681499958, 0.004476191475987434, 0.01931026391685009, 0.006290406920015812, 0.014653924852609634, 0.17843826115131378, 0.09903331845998764, 0.854941725730896, 0.020280463621020317, 0.8786925673484802, 0.37992238998413086, 0.20425425469875336, 0.32038459181785583, 0.8171603083610535, 0.2503354549407959, 0.7644308805465698, 0.7474347949028015, 0.935006856918335, 0.36836859583854675, 0.03383934497833252, 0.0021248040720820427, 0.21007098257541656, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.030382098630070686, 0.14396639168262482, 0.0023552696220576763, 0.003069670405238867, 0.03293609246611595, 0.010766614228487015, 0.04698408767580986, 0.0892328992486, 0.010764017701148987, 0.01645551063120365, 0.0007101192022673786, 0.14693684875965118, 0.10194381326436996, 0.06734117865562439, 0.21650707721710205, 0.09584157168865204, 0.00421579135581851, 0.0017077650409191847, 0.0670090913772583, 0.10943465679883957, 0.05715145170688629, 0.03694647178053856, 0.04514404758810997, 0.04956913739442825, 0.07195062190294266, 0.4566742479801178, 0.20942343771457672, 0.1548582911491394, 0.3906869888305664, 0.03925589844584465, 0.005858495831489563, 0.23115697503089905, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.11579495668411255, 0.04704239219427109, 0.08932461589574814, 0.10469675809144974, 0.3945455551147461, 0.10528933256864548, 0.15413445234298706, 0.13012593984603882, 0.37207290530204773, 0.07726370543241501, 0.08641648292541504, 0.07665102183818817, 0.02378079853951931, 0.06452124565839767, 0.12331708520650864, 0.10393274575471878, 0.03258725255727768, 0.01998279243707657, 0.13928532600402832, 0.08602269738912582, 0.139993816614151, 0.2561682462692261, 0.08122693002223969, 0.28790318965911865, 0.34215468168258667, 0.023110536858439445, 0.8003224730491638, 0.11519370973110199, 0.5406965613365173, 0.2252652645111084, 0.07071924954652786, 0.03988110274076462, 0.09249765425920486, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.20921318233013153, 0.07137931883335114, 0.3537597060203552, 0.1065746620297432, 0.30610421299934387, 0.07002534717321396, 0.22329437732696533, 0.23702743649482727, 0.06014438346028328, 0.05975072830915451, 0.17522762715816498, 0.3013332188129425, 0.02163097821176052, 0.016774384304881096, 0.15580035746097565, 0.006400381214916706, 0.03668399527668953, 0.006957556586712599, 0.024804070591926575, 0.013962345197796822, 0.010118995793163776, 0.014814852736890316, 0.02360437996685505, 0.038752347230911255, 0.10996780544519424, 0.24877001345157623, 0.7050904035568237, 0.103914275765419, 0.0656881257891655, 0.03925013542175293, 0.0268316138535738, 0.009403076022863388, 0.042995911091566086, 0.38370969891548157, NaN, NaN, NaN, NaN, NaN, NaN], [0.037447404116392136, 0.022215796634554863, 0.033449236303567886, 0.026462113484740257, 0.01563168875873089, 0.07434160262346268, 0.05695066228508949, 0.11209315806627274, 0.007291351445019245, 0.008904322981834412, 0.08964232355356216, 0.01435061078518629, 0.07215401530265808, 0.030404584482312202, 0.17889626324176788, 0.0005728903925046325, 0.0018518416909500957, 0.003297911025583744, 0.002339646453037858, 0.0003125199000351131, 0.0013706001918762922, 0.011640608310699463, 0.005699110683053732, 0.00646078959107399, 0.029403753578662872, 0.09435103088617325, 0.4532504379749298, 0.1454003006219864, 0.08155784755945206, 0.1478416919708252, 0.06988534331321716, 0.07031917572021484, 0.08092489838600159, 0.16178953647613525, 0.09959835559129715, NaN, NaN, NaN, NaN, NaN], [0.35028940439224243, 0.06261257082223892, 0.400876522064209, 0.6601436138153076, 0.0364767424762249, 0.0348673090338707, 0.3584212362766266, 0.3042086958885193, 0.012779565528035164, 0.3784087598323822, 0.29859334230422974, 0.00785628892481327, 0.11913719773292542, 0.06971576809883118, 0.17937220633029938, 0.007587960455566645, 0.01947515644133091, 0.06775914877653122, 0.37032291293144226, 0.014833947643637657, 0.04509717598557472, 0.2979332506656647, 0.08052700757980347, 0.2017516791820526, 0.8817963004112244, 0.3514429032802582, 0.3636293411254883, 0.14158478379249573, 0.09958238899707794, 0.13573585450649261, 0.27771836519241333, 0.47418463230133057, 0.36210212111473083, 0.2140081375837326, 0.022566867992281914, 0.004614678677171469, NaN, NaN, NaN, NaN], [0.014627714641392231, 0.1739588975906372, 0.0033204040955752134, 0.007496224716305733, 0.011711684986948967, 0.10170583426952362, 0.050673384219408035, 0.6495208740234375, 0.040652137249708176, 0.03492900729179382, 0.01829371228814125, 0.07074988633394241, 0.02588740922510624, 0.18312060832977295, 0.1794223189353943, 0.0009141381597146392, 0.00906511303037405, 0.026196878403425217, 0.011460180394351482, 0.03924085199832916, 0.05833837762475014, 0.004696658346801996, 0.009781464003026485, 0.029306253418326378, 0.06398104876279831, 0.017127037048339844, 0.0922316163778305, 0.03436172753572464, 0.12105685472488403, 0.475220263004303, 0.20121201872825623, 0.0066191148944199085, 0.018271028995513916, 0.05732923001050949, 0.018915977329015732, 0.019877590239048004, 0.23682713508605957, NaN, NaN, NaN], [0.006626310292631388, 0.049714479595422745, 0.02355029061436653, 0.0033578642178326845, 0.02970620058476925, 0.020507775247097015, 0.008351391181349754, 0.03789898753166199, 0.008593969978392124, 0.004206442274153233, 0.004605707712471485, 0.02678176388144493, 0.006028715055435896, 0.012980426661670208, 0.1725957691669464, 0.14320576190948486, 0.892350971698761, 0.030759859830141068, 0.8051734566688538, 0.7149769067764282, 0.4937312602996826, 0.3181091248989105, 0.8743517994880676, 0.3442763686180115, 0.8711729049682617, 0.7545801997184753, 0.9297782182693481, 0.6998263001441956, 0.17287810146808624, 0.008261360228061676, 0.9148194789886475, 0.7390273213386536, 0.743715763092041, 0.8801547288894653, 0.47275617718696594, 0.02699747122824192, 0.002916275057941675, 0.1803632229566574, NaN, NaN], [0.029822910204529762, 0.18419219553470612, 0.002088941168040037, 0.00302593014203012, 0.028257815167307854, 0.012486547231674194, 0.051940228790044785, 0.10161811858415604, 0.01137576438486576, 0.02022942155599594, 0.0007436276064254344, 0.2113851010799408, 0.1359580010175705, 0.08821411430835724, 0.2053057849407196, 0.0431031733751297, 0.0034584910608828068, 0.0008681766339577734, 0.032780423760414124, 0.11873625963926315, 0.03893061354756355, 0.019801655784249306, 0.03132590278983116, 0.05763043835759163, 0.06388700753450394, 0.3317660689353943, 0.16543246805667877, 0.10311393439769745, 0.4146954417228699, 0.09686555713415146, 0.06189668923616409, 0.5733434557914734, 0.2515217959880829, 0.17396190762519836, 0.13145960867404938, 0.40639445185661316, 0.07709264755249023, 0.007335619535297155, 0.2446187138557434, NaN], [0.016353517770767212, 0.03170220926403999, 0.014149405062198639, 0.013441388495266438, 0.037340469658374786, 0.010170645080506802, 0.0053974115289747715, 0.025274941697716713, 0.017184404656291008, 0.0020940443500876427, 0.006704597268253565, 0.009430822916328907, 0.030376460403203964, 0.024553189054131508, 0.15533798933029175, 0.046706411987543106, 0.31744489073753357, 0.6429179310798645, 0.4889025092124939, 0.43930482864379883, 0.3055577576160431, 0.6935683488845825, 0.25992196798324585, 0.7758384346961975, 0.2076689600944519, 0.8320663571357727, 0.39907822012901306, 0.8469056487083435, 0.5997118353843689, 0.31635957956314087, 0.36650604009628296, 0.2247273474931717, 0.7608639597892761, 0.37947097420692444, 0.8680096864700317, 0.5816919803619385, 0.19056683778762817, 0.27210569381713867, 0.06685535609722137, 0.040061503648757935]], [[0.06952784210443497, 0.0770183801651001, 0.23747292160987854, 0.022874178364872932, 0.14143598079681396, 0.08435114473104477, 0.0795491486787796, 0.054600730538368225, 0.015159118920564651, 0.06120437756180763, 0.02771361917257309, 0.06765643507242203, 0.013518131338059902, 0.15485556423664093, 0.21279898285865784, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.2531612813472748, 0.03241151198744774, 0.04793045297265053, 0.13835468888282776, 0.05921119078993797, 0.20751594007015228, 0.5453532934188843, 0.021712571382522583, 0.07093679159879684, 0.2689567506313324, 0.13515745103359222, 0.05570060759782791, 0.04099860414862633, 0.03517309948801994, 0.11268090456724167, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.35043928027153015, 0.18572849035263062, 0.0481790192425251, 0.19426384568214417, 0.018465382978320122, 0.2676069438457489, 0.3000488579273224, 0.2726097106933594, 0.08134563267230988, 0.10164237022399902, 0.05787196010351181, 0.03694695979356766, 0.21335498988628387, 0.0815601795911789, 0.051584985107183456, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.10967924445867538, 0.047143928706645966, 0.06498727947473526, 0.0161599051207304, 0.08311080187559128, 0.25361040234565735, 0.2589581310749054, 0.0646943673491478, 0.11701063811779022, 0.7398742437362671, 0.11236728727817535, 0.4240334630012512, 0.09019055217504501, 0.1980810910463333, 0.08526580780744553, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0050394656136631966, 0.005000656470656395, 0.01952306181192398, 0.4184519350528717, 0.012662295252084732, 0.015614073723554611, 0.006089636590331793, 0.027387546375393867, 0.007885311730206013, 0.009227052330970764, 0.015002718195319176, 0.002679894445464015, 0.040426015853881836, 0.023895790800452232, 0.031263262033462524, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.1104135811328888, 0.16341662406921387, 0.10040471702814102, 0.15014782547950745, 0.22085179388523102, 0.07417210936546326, 0.08140900731086731, 0.21936744451522827, 0.12380684167146683, 0.030364450067281723, 0.008148477412760258, 0.040405042469501495, 0.016740301623940468, 0.05651557818055153, 0.03777482733130455, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.021739037707448006, 0.025255737826228142, 0.041796568781137466, 0.028582973405718803, 0.06361079961061478, 0.10603900998830795, 0.04079660773277283, 0.23573672771453857, 0.031395647674798965, 0.17699679732322693, 0.11518478393554688, 0.12758946418762207, 0.029195530340075493, 0.19761133193969727, 0.24158287048339844, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.1121676117181778, 0.056780170649290085, 0.05766424164175987, 0.4753672778606415, 0.17093990743160248, 0.055545274168252945, 0.23774300515651703, 0.047642335295677185, 0.2396271675825119, 0.07084424793720245, 0.05071293190121651, 0.15200014412403107, 0.17973174154758453, 0.16349640488624573, 0.16329222917556763, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.08155515789985657, 0.04415197670459747, 0.09395420551300049, 0.06736686080694199, 0.009449290111660957, 0.007789341267198324, 0.08313233405351639, 0.018231436610221863, 0.2736586928367615, 0.12516330182552338, 0.14283257722854614, 0.03993181511759758, 0.11735112965106964, 0.037545330822467804, 0.095799021422863, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.07989984005689621, 0.019307896494865417, 0.05061032995581627, 0.29983657598495483, 0.009587445296347141, 0.23453857004642487, 0.06259765475988388, 0.014452173374593258, 0.026213111355900764, 0.03952796012163162, 0.12968890368938446, 0.019515926018357277, 0.23016268014907837, 0.18980233371257782, 0.14884653687477112, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.042069002985954285, 0.007410319056361914, 0.027750220149755478, 0.14348776638507843, 0.190275177359581, 0.0696464255452156, 0.09576459228992462, 0.08924749493598938, 0.16830699145793915, 0.14098002016544342, 0.2945949137210846, 0.08460760116577148, 0.11812892556190491, 0.2108343094587326, 0.28860458731651306, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.509858250617981, 0.07021021842956543, 0.044154465198516846, 0.005825423635542393, 0.5241404175758362, 0.030089300125837326, 0.19222509860992432, 0.02549084462225437, 0.1939508020877838, 0.09437919408082962, 0.10883274674415588, 0.13631868362426758, 0.08004569262266159, 0.04784407094120979, 0.14005501568317413, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.029798628762364388, 0.0011461747344583273, 0.00650657806545496, 0.02902117185294628, 0.007348767947405577, 0.012432223185896873, 0.018553903326392174, 0.006125486921519041, 0.008405826054513454, 0.057926055043935776, 0.04542696848511696, 0.21123111248016357, 0.05352021008729935, 0.2931033968925476, 0.1833699345588684, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.01627730205655098, 0.0057758791372179985, 0.013731835409998894, 0.6289489269256592, 0.011782719753682613, 0.006108477246016264, 0.005309773609042168, 0.023312430828809738, 0.012817217037081718, 0.00939176045358181, 0.04320970177650452, 0.012798959389328957, 0.1585281491279602, 0.11795029044151306, 0.13285225629806519, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.39748579263687134, 0.10528232902288437, 0.006042438093572855, 0.07306646555662155, 0.020484283566474915, 0.09288878738880157, 0.6331413388252258, 0.03478514030575752, 0.016230005770921707, 0.039869412779808044, 0.10224607586860657, 0.005181388463824987, 0.007975003682076931, 0.01008305512368679, 0.026732152327895164, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.005564282648265362, 0.001319661969318986, 0.028383644297719002, 0.01146539393812418, 0.028919272124767303, 0.012663042172789574, 0.023019153624773026, 0.0018097365973517299, 0.0143426563590765, 0.021044740453362465, 0.015969598665833473, 0.03200899809598923, 0.013908782042562962, 0.03448842838406563, 0.20206299424171448, 0.125, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.3364894986152649, 0.00033270660787820816, 0.017299778759479523, 0.02505551464855671, 0.00914769060909748, 0.0018482855521142483, 0.040363892912864685, 0.0008854345069266856, 0.020481230691075325, 0.022734129801392555, 0.016724254935979843, 0.0011141380527988076, 5.783090819022618e-05, 0.0005799515638500452, 0.07228588312864304, 0.17503570020198822, 0.10145211219787598, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.0004661931307055056, 0.4122284948825836, 0.0022180580999702215, 0.00018468582129571587, 0.00030452435021288693, 5.825214248034172e-05, 0.0012309255544096231, 0.0017770789563655853, 1.19774986160337e-05, 0.0001907332189148292, 0.0007099026697687805, 0.0006694658659398556, 1.216385771840578e-05, 0.00011785236711148173, 0.00036971797817386687, 0.002467370592057705, 0.014373218640685081, 0.18901397287845612, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.04950903728604317, 0.2967310845851898, 0.021222729235887527, 0.01289455872029066, 0.009955117478966713, 0.008917939849197865, 0.011312013491988182, 0.01272521447390318, 0.0006359940161928535, 0.011413054540753365, 0.006479735020548105, 0.0053005279041826725, 0.001741865067742765, 0.0027997863944619894, 0.08213357627391815, 4.782021278515458e-05, 0.0002036100922850892, 0.15351639688014984, 0.001678619533777237, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.020872987806797028, 3.087984805461019e-05, 0.009670623578131199, 0.0253498163074255, 0.010817835107445717, 0.4320962131023407, 0.017970044165849686, 0.0021109851077198982, 0.0003069202939514071, 0.008261006325483322, 0.006166533567011356, 0.7898750901222229, 0.11304597556591034, 0.12737329304218292, 0.011856237426400185, 0.015930648893117905, 0.006582066882401705, 0.10560829937458038, 0.3465193808078766, 0.012144939973950386, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.06067817285656929, 0.005839335732161999, 0.025896329432725906, 0.03351203724741936, 0.025002295151352882, 0.25514867901802063, 0.4275963008403778, 0.0194717925041914, 0.0888834074139595, 0.04690318927168846, 0.03570560738444328, 0.0850825086236, 0.0388353131711483, 0.24394167959690094, 0.10019046813249588, 0.010950141586363316, 0.003185260808095336, 0.03380253165960312, 0.13516294956207275, 0.16374172270298004, 0.0833682045340538, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.014415884390473366, 0.001141559099778533, 0.0678224116563797, 0.024646559730172157, 0.08796916157007217, 0.022639306262135506, 0.07784608006477356, 0.02605922892689705, 0.014093886129558086, 0.0286162830889225, 0.09674176573753357, 0.04692256450653076, 0.03519048914313316, 0.20982496440410614, 0.1800668090581894, 4.016391176264733e-05, 0.0003202538937330246, 0.0050767818465828896, 1.7212016246048734e-05, 0.5176156759262085, 0.003749872324988246, 0.00026106167933903635, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.02086471952497959, 0.0008324789232574403, 0.01815967448055744, 0.002886975882574916, 0.0020961007103323936, 0.004472428001463413, 0.033020272850990295, 0.0047500282526016235, 0.012928733602166176, 0.014328529126942158, 0.015946470201015472, 0.06593997031450272, 0.00855537410825491, 0.07526978105306625, 0.1768130511045456, 0.13457109034061432, 0.07774609327316284, 0.006220821291208267, 0.0008077693055383861, 0.2509746253490448, 0.17662860453128815, 0.13796226680278778, 0.053514063358306885, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.0009654826717451215, 0.000225315525312908, 0.0006124225910753012, 0.0007836261647753417, 0.0007428302778862417, 0.003282200777903199, 0.008662715554237366, 0.45239004492759705, 4.857195381191559e-05, 0.0006357804522849619, 0.0010122592793777585, 0.0006606358801946044, 0.00025698603712953627, 0.0011707579251378775, 0.0028539940249174833, 0.06553670763969421, 0.09473168104887009, 0.013516419567167759, 0.0013789478689432144, 0.03089364431798458, 0.0676402598619461, 0.03963227570056915, 0.17151857912540436, 0.1338733434677124, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.0025523374788463116, 0.0009212270379066467, 0.09748471528291702, 0.057154957205057144, 0.4982932209968567, 0.000552327954210341, 0.02918482944369316, 0.0039253802970051765, 0.00450148293748498, 0.0014971394557505846, 0.009822547435760498, 0.0017059196252375841, 0.001570553402416408, 0.005804183427244425, 0.00957300141453743, 0.07379595190286636, 0.1714182198047638, 0.13684017956256866, 0.00734432740136981, 0.0039545828476548195, 0.09408346563577652, 0.0452522449195385, 0.2525797188282013, 0.15314188599586487, 0.008748584426939487, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.016401896253228188, 0.00043752315104939044, 0.0039018490351736546, 0.005885160993784666, 0.0023499932140111923, 0.0031332974322140217, 0.055512603372335434, 0.003903925186023116, 0.10197419673204422, 0.009071548469364643, 0.023729920387268066, 0.002627716166898608, 0.01914973370730877, 0.02837507426738739, 0.1623656302690506, 0.006909683812409639, 0.034793343394994736, 0.13824458420276642, 0.0004423256032168865, 0.38493895530700684, 0.12702688574790955, 0.0007700703572481871, 0.005257567390799522, 0.3978818655014038, 0.028774550184607506, 0.016022928059101105, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.0004865071678068489, 2.4051656509982422e-05, 0.00020084556308574975, 0.0003736558719538152, 0.000646126689389348, 9.209318523062393e-05, 0.009753170423209667, 9.854567178990692e-05, 0.34485483169555664, 0.00047165394062176347, 0.0012700805673375726, 0.000479432987049222, 0.0015819557011127472, 0.0008011643076315522, 0.0017131956992670894, 0.15589091181755066, 0.059809040278196335, 0.2019805759191513, 0.006274765357375145, 0.053891621530056, 0.38889890909194946, 0.024021193385124207, 0.016828669235110283, 0.09206627309322357, 0.15270450711250305, 0.10960505902767181, 0.14381197094917297, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.03442303463816643, 0.014513631351292133, 0.003174385754391551, 0.00478995218873024, 0.0017101461999118328, 0.003900717245414853, 0.05713852494955063, 0.013628470711410046, 0.0976317971944809, 0.28217896819114685, 0.01894235610961914, 0.009533336386084557, 0.003816690994426608, 0.005922130309045315, 0.12864208221435547, 0.0011966965394094586, 0.0013769377255812287, 0.0006101150647737086, 4.0936538425739855e-05, 0.008213219232857227, 0.03395655378699303, 0.0003392287762835622, 0.00015790743054822087, 0.000944053172133863, 0.0007261222926899791, 0.011664116755127907, 0.22049497067928314, 0.0034024016931653023, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.01004086248576641, 0.01997406780719757, 0.005450551863759756, 0.006583535112440586, 0.0027623113710433245, 0.002903316868469119, 0.03531726077198982, 0.008635452017188072, 0.029197845607995987, 0.02162068709731102, 0.013219092041254044, 0.2711889445781708, 0.00537630682811141, 0.006846235599368811, 0.06079954653978348, 0.2470119595527649, 0.22662757337093353, 0.086290642619133, 0.0011605313047766685, 0.20862528681755066, 0.31339770555496216, 0.007298772688955069, 0.00864456407725811, 0.010568802244961262, 0.01924213580787182, 0.034804634749889374, 0.16789764165878296, 0.11296499520540237, 0.017940307036042213, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.00031272557680495083, 8.196506314561702e-06, 4.237617031321861e-05, 0.00043677922803908587, 0.00024717405904084444, 0.022641032934188843, 0.002573953475803137, 0.0004433683061506599, 0.0013428670354187489, 0.00034036010038107634, 0.0007929583080112934, 0.0033021108247339725, 0.4761846959590912, 0.05593165382742882, 0.00081905338447541, 0.3800778388977051, 0.4679488241672516, 0.19362112879753113, 0.18464821577072144, 0.046723559498786926, 0.160307839512825, 0.24654103815555573, 0.2610638439655304, 0.07595612108707428, 0.1325986683368683, 0.022732526063919067, 0.1294456422328949, 0.2688123285770416, 0.12097980827093124, 0.12297553569078445, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.00267792004160583, 4.751862070406787e-05, 0.014043050818145275, 0.02037942036986351, 0.04410611465573311, 0.04370833560824394, 0.06117184832692146, 0.01571183279156685, 0.11117196083068848, 0.006906491704285145, 0.0029646854382008314, 0.15407170355319977, 0.010935205966234207, 0.03797803074121475, 0.16977860033512115, 0.005153980106115341, 0.0002073257346637547, 0.12819816172122955, 0.00011319551413180307, 0.08506736904382706, 0.013190183788537979, 0.0028314462397247553, 0.00016588614380452782, 0.009067418053746223, 0.0008525841985829175, 0.00018506577180232853, 0.0002737078757490963, 0.0002474631182849407, 0.04919072240591049, 0.1850043386220932, 0.0018668848788365722, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.011722833849489689, 0.005004812031984329, 0.007801789790391922, 0.0020204312168061733, 0.004946417640894651, 0.000467105332063511, 0.11018845438957214, 0.016256244853138924, 0.05208335816860199, 0.08122430741786957, 0.4447634816169739, 0.0032620911952108145, 0.0036480925045907497, 0.02699565887451172, 0.038189876824617386, 0.4235798418521881, 0.8363600969314575, 0.13292381167411804, 0.03160996362566948, 0.6294970512390137, 0.3827916085720062, 0.01768689975142479, 0.031598031520843506, 0.05291707068681717, 0.004268768709152937, 0.01666090451180935, 0.0017059938982129097, 0.03961870074272156, 0.006749838124960661, 0.2787548303604126, 0.12898604571819305, 0.00984524842351675, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.024071840569376945, 0.0004321316082496196, 0.023504342883825302, 0.020648522302508354, 0.021508874371647835, 0.012214796617627144, 0.024360070005059242, 0.0013747027842327952, 0.0815734788775444, 0.08039785921573639, 0.06951787322759628, 0.017521949484944344, 0.04566040262579918, 0.08389204740524292, 0.15396325290203094, 0.001200420199893415, 0.004923743661493063, 0.03312471881508827, 7.996988279046491e-05, 0.2118730992078781, 0.0288531631231308, 0.00010192030458711088, 0.0002958755649160594, 0.007303019054234028, 0.00011155433458043262, 2.6572593014861923e-06, 0.00035481253871694207, 2.4723947262828005e-06, 2.6933960270980606e-06, 0.017764916643500328, 0.0003658832865767181, 0.25218549370765686, 0.002238432876765728, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.0014979105908423662, 4.0405931940767914e-05, 0.0008743218495510519, 0.001329930848442018, 0.0032007889822125435, 0.0002464030694682151, 0.015361684374511242, 0.00014017200737725943, 0.3369258642196655, 0.0015512423124164343, 0.003011554479598999, 0.0010034784208983183, 0.0037561107892543077, 0.0018123533809557557, 0.0037892721593379974, 0.16854390501976013, 0.046801913529634476, 0.18834064900875092, 0.005545254796743393, 0.10321269929409027, 0.3906272351741791, 0.03742265701293945, 0.024458711966872215, 0.05521516501903534, 0.07171308994293213, 0.021107476204633713, 0.025199010968208313, 0.0027974944096058607, 0.0025010560639202595, 0.02306896261870861, 0.15930885076522827, 0.06242140382528305, 0.11754277348518372, 0.21403564512729645, NaN, NaN, NaN, NaN, NaN, NaN], [0.03386643901467323, 0.015328249894082546, 0.002211565151810646, 0.003828595858067274, 0.0012934240512549877, 0.004837968852370977, 0.04463785141706467, 0.014559985138475895, 0.04106945917010307, 0.26340487599372864, 0.017707379534840584, 0.01015215553343296, 0.0033097255509346724, 0.0058202859945595264, 0.13427288830280304, 0.0004002669302280992, 0.00040952101699076593, 0.00012874403910245746, 8.880775567376986e-06, 0.005201425869017839, 0.007163480389863253, 0.0002137795090675354, 0.00012960725871380419, 0.0005550362984649837, 0.0001244707527803257, 0.0006415210082195699, 0.03161805495619774, 4.1008814150700346e-05, 0.000599265971686691, 0.00399716105312109, 5.7038221711991355e-05, 0.0033261284697800875, 0.006950944196432829, 0.22392861545085907, 0.0028074102010577917, NaN, NaN, NaN, NaN, NaN], [0.011043943464756012, 0.029788998886942863, 0.004548549186438322, 0.006417197175323963, 0.0019613932818174362, 0.0028304944280534983, 0.02768276073038578, 0.006805655546486378, 0.02553243562579155, 0.0314837321639061, 0.015709027647972107, 0.2568790316581726, 0.008081428706645966, 0.009137820452451706, 0.06746803224086761, 0.22722585499286652, 0.18426381051540375, 0.07697561383247375, 0.0012757674558088183, 0.23254786431789398, 0.14769063889980316, 0.013780240900814533, 0.02735842764377594, 0.04001649469137192, 0.031179115176200867, 0.015889445319771767, 0.062248069792985916, 0.013498637825250626, 0.0052745710127055645, 0.2219674438238144, 0.0031969451811164618, 0.0037056237924844027, 0.028058722615242004, 0.22486938536167145, 0.09661445021629333, 0.02616964653134346, NaN, NaN, NaN, NaN], [0.0003306480939500034, 1.1417017958592623e-05, 3.816767639364116e-05, 0.000435528316302225, 0.00020690191013272852, 0.02179853804409504, 0.002864222740754485, 0.0005160043947398663, 0.001080053043551743, 0.0004847492673434317, 0.0009861867874860764, 0.003908392507582903, 0.47703394293785095, 0.07113853842020035, 0.000873323529958725, 0.27366653084754944, 0.354305237531662, 0.16368547081947327, 0.1598840057849884, 0.02900015190243721, 0.10581760108470917, 0.21902981400489807, 0.27043354511260986, 0.19813168048858643, 0.2514232099056244, 0.025616073980927467, 0.12471329420804977, 0.09682969748973846, 0.07310353219509125, 0.02883375994861126, 0.09285400807857513, 0.013515813276171684, 0.021914459764957428, 0.14159631729125977, 0.3238908648490906, 0.1783936321735382, 0.11570748686790466, NaN, NaN, NaN], [0.0030808241572231054, 6.38188939774409e-05, 0.011707174591720104, 0.023645061999559402, 0.038246914744377136, 0.047200631350278854, 0.04958858713507652, 0.012573646381497383, 0.04961754009127617, 0.005252092145383358, 0.002489157486706972, 0.17429526150226593, 0.008030706085264683, 0.02717452496290207, 0.1679786741733551, 0.0030968550126999617, 7.297070260392502e-05, 0.1371629387140274, 0.00018204482330475003, 0.04798782989382744, 0.01213640347123146, 0.0023585439193993807, 0.00011540603009052575, 0.016970379278063774, 0.0015150568215176463, 0.0003718302759807557, 0.00044133648043498397, 0.00012143531785113737, 0.021671650931239128, 0.023021340370178223, 0.00010860650218091905, 0.0005334930610843003, 0.000257489358773455, 0.0005856966599822044, 0.00045311596477404237, 0.09709983319044113, 0.18528476357460022, 0.0029071324970573187, NaN, NaN], [0.01455691922456026, 0.008012487553060055, 0.006938801147043705, 0.00259140832349658, 0.004911262542009354, 0.0004763725446537137, 0.10579084604978561, 0.021042171865701675, 0.03971559554338455, 0.07511086016893387, 0.43185338377952576, 0.0035418386105448008, 0.004437423776835203, 0.03184036538004875, 0.04226255044341087, 0.49188995361328125, 0.918917715549469, 0.2054058462381363, 0.08403602242469788, 0.6967929005622864, 0.5653088688850403, 0.03772272169589996, 0.04957969859242439, 0.18319177627563477, 0.012161915190517902, 0.07060753554105759, 0.009896048344671726, 0.1126827672123909, 0.010653471574187279, 0.1938174068927765, 0.1352803260087967, 0.0021707522682845592, 0.030638370662927628, 0.003963022027164698, 0.03303877264261246, 0.004082953091710806, 0.20578816533088684, 0.11854958534240723, 0.02041587606072426, NaN], [0.055085837841033936, 0.014846320264041424, 0.06939522176980972, 0.036867137998342514, 0.13156765699386597, 0.04343622922897339, 0.18117153644561768, 0.04244613274931908, 0.04596249759197235, 0.13158053159713745, 0.047130946069955826, 0.549620509147644, 0.24813801050186157, 0.3232562243938446, 0.11823604255914688, 0.001465475419536233, 0.00045102695003151894, 0.017218099907040596, 0.00030212500132620335, 0.11662620306015015, 0.017841650173068047, 0.00014393724268302321, 0.0003088460653088987, 0.006560556124895811, 0.0005491081974469125, 5.78465114813298e-05, 0.0019656207878142595, 0.00016285650781355798, 0.0002489366161171347, 0.011378495953977108, 0.0017521223053336143, 0.00787137821316719, 8.434856863459572e-05, 0.0012881350703537464, 7.287580228876323e-05, 0.00021561238099820912, 0.020317554473876953, 0.04195580258965492, 0.24219898879528046, 0.0017395684262737632]], [[0.2484879046678543, 0.12593188881874084, 0.11472177505493164, 0.6318025588989258, 0.009745504707098007, 0.030495919287204742, 0.054615989327430725, 0.004801109898835421, 0.23875823616981506, 0.011562658473849297, 0.02087206020951271, 0.059635717421770096, 0.011483770795166492, 0.07716090232133865, 0.041850361973047256, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.3294946551322937, 0.17723912000656128, 0.041080135852098465, 0.30134642124176025, 0.0073102316819131374, 0.049291279166936874, 0.0495959147810936, 0.0037847748026251793, 0.014987694099545479, 0.07676513493061066, 0.039059415459632874, 0.006041571032255888, 0.011380840092897415, 0.011979957111179829, 0.02782473713159561, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.008675806224346161, 0.016726570203900337, 0.19906938076019287, 0.3167073726654053, 0.022006884217262268, 0.014510865323245525, 0.00237266905605793, 0.00938868336379528, 0.004848333541303873, 0.00305117666721344, 0.042285457253456116, 0.0026737553998827934, 0.017337674275040627, 0.0016427191440016031, 0.0027906473260372877, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.06292864680290222, 0.010060630738735199, 0.07846219092607498, 0.3009726405143738, 0.09911586344242096, 0.3769649565219879, 0.290684312582016, 0.048859626054763794, 0.015964722260832787, 0.02972962148487568, 0.25837212800979614, 0.050403933972120285, 0.052831199020147324, 0.44793814420700073, 0.12096201628446579, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0647541731595993, 0.06744952499866486, 0.010754776187241077, 0.15598785877227783, 0.08916914463043213, 0.4045051634311676, 0.5958212018013, 0.10594789683818817, 0.12025819718837738, 0.04822946712374687, 0.02913811057806015, 0.014846491627395153, 0.17111137509346008, 0.049513354897499084, 0.14188753068447113, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.07069405168294907, 0.0006015333347022533, 0.0017680496675893664, 0.0010985832195729017, 0.0012869784841313958, 0.22278346121311188, 0.4465882480144501, 0.06128238886594772, 0.02642727456986904, 0.03756114840507507, 0.002607540925964713, 0.0018699204083532095, 0.0059012919664382935, 0.020283877849578857, 0.03355809301137924, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0861939862370491, 0.03346291184425354, 0.009915103204548359, 0.35010838508605957, 0.03437130153179169, 0.18394741415977478, 0.5006390810012817, 0.0633198693394661, 0.36160194873809814, 0.07578127831220627, 0.038500167429447174, 0.08213403075933456, 0.026455186307430267, 0.12013117223978043, 0.1146865040063858, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.2484544962644577, 0.00790119543671608, 0.004407763481140137, 0.02700735628604889, 0.015422074124217033, 0.015295883640646935, 0.40846768021583557, 0.10706920176744461, 0.06367217004299164, 0.22094424068927765, 0.21221157908439636, 0.006999517325311899, 0.054566796869039536, 0.124799944460392, 0.09114839136600494, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.1237153485417366, 0.029043834656476974, 0.07521974295377731, 0.04068650305271149, 0.002623512176796794, 0.008706655353307724, 0.03832445293664932, 0.14616532623767853, 0.1701044738292694, 0.20599642395973206, 0.11677426844835281, 0.2341107875108719, 0.06235762685537338, 0.003964806441217661, 0.15731573104858398, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.034962959587574005, 0.023077068850398064, 0.034600574523210526, 0.14041800796985626, 0.0021679585333913565, 0.009290770627558231, 0.07274696230888367, 0.014187950640916824, 0.1371506154537201, 0.39440277218818665, 0.2198760211467743, 0.19940708577632904, 0.11203428357839584, 0.08552268147468567, 0.11737436801195145, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.015330069698393345, 0.007386082783341408, 0.017500948160886765, 0.01906486414372921, 0.010120063088834286, 0.05364372953772545, 0.043298348784446716, 0.12658876180648804, 0.06039673835039139, 0.02238147333264351, 0.16429400444030762, 0.06984445452690125, 0.3043651580810547, 0.055543575435876846, 0.11423089355230331, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.09644094854593277, 0.0058854687958955765, 0.03721459209918976, 0.0025620406959205866, 0.062300242483615875, 0.003563062520697713, 0.07219880819320679, 0.03924282267689705, 0.025451356545090675, 0.06598387658596039, 0.026776403188705444, 0.07250863313674927, 0.45021528005599976, 0.08199745416641235, 0.4220075309276581, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.01460834126919508, 0.0005662022740580142, 0.0013911814894527197, 0.05315173417329788, 0.008028149604797363, 0.016604119911789894, 0.011740745045244694, 0.008678588084876537, 0.0025609249714761972, 0.01638207584619522, 0.018210044130682945, 0.014119945466518402, 0.06550943106412888, 0.34254926443099976, 0.04794229939579964, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.05372002348303795, 0.14061135053634644, 0.018787089735269547, 0.0958278551697731, 0.0019092779839411378, 0.03348369151353836, 0.13957257568836212, 0.031220966950058937, 0.19735871255397797, 0.017847368493676186, 0.0589337982237339, 0.01900595612823963, 0.1276925951242447, 0.04769464209675789, 0.4384888708591461, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.08416850119829178, 0.1088641807436943, 0.0573052242398262, 0.27551695704460144, 0.030813831835985184, 0.18022866547107697, 0.10468263924121857, 0.09972096234560013, 0.31189021468162537, 0.3315774202346802, 0.2321816384792328, 0.034622836858034134, 0.14143656194210052, 0.04640315845608711, 0.09621720016002655, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.7448275089263916, 0.00023065913410391659, 0.0003700565139297396, 0.0002745355886872858, 0.0005768057890236378, 1.0151054993912112e-05, 1.3715341992792673e-05, 7.643950084457174e-06, 0.0004341531603131443, 5.2913601393811405e-05, 5.353476808522828e-05, 8.812115265754983e-05, 1.1566834245968494e-06, 5.744800546381157e-06, 5.576572584686801e-05, 0.125, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [8.114575030049309e-05, 0.06691394746303558, 0.04036417603492737, 0.022258125245571136, 0.055233534425497055, 0.050445422530174255, 0.048324622213840485, 0.00889397319406271, 0.1270352452993393, 0.04156908392906189, 0.20929713547229767, 0.21122632920742035, 0.414194792509079, 0.12628954648971558, 0.25567519664764404, 0.39058852195739746, 8.28505744721042e-06, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.0012628535041585565, 0.0008597301202826202, 0.036364536732435226, 0.0971999391913414, 0.04217860475182533, 0.10421664267778397, 0.16082510352134705, 0.03283625468611717, 0.09032318741083145, 0.09653837233781815, 0.21890851855278015, 0.06589526683092117, 0.47985169291496277, 0.21388037502765656, 0.21010825037956238, 2.7811127438326366e-05, 0.4158080220222473, 0.0005852450849488378, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.0002990703214891255, 0.001862871926277876, 0.010526847094297409, 0.01025421917438507, 0.05592086538672447, 0.02697981521487236, 0.01570008136332035, 0.02568165771663189, 0.010194454342126846, 0.048093631863594055, 0.04421652480959892, 0.02353351190686226, 0.21245922148227692, 0.0448865108191967, 0.23352482914924622, 9.039229868085252e-13, 4.1926887206500396e-05, 0.15358270704746246, 0.00044542484101839364, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.00015855174569878727, 0.013162538409233093, 0.006567019037902355, 0.004201928153634071, 0.006268346216529608, 0.00024757537175901234, 0.012954139150679111, 0.003747382666915655, 0.03740423545241356, 0.007960616610944271, 0.013323514722287655, 0.06273993849754333, 0.048431456089019775, 0.13987915217876434, 0.20342004299163818, 1.9216391628896996e-16, 4.9363904963684035e-08, 0.0004218998074065894, 0.40449434518814087, 4.695959432865493e-06, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.013553211465477943, 0.03824196010828018, 0.02278091199696064, 0.09299258887767792, 0.0559159517288208, 0.00022306715254671872, 0.031003709882497787, 0.010444254614412785, 0.16168788075447083, 0.03666102886199951, 0.00852662418037653, 0.4432809352874756, 0.009321487508714199, 0.024379035457968712, 0.17351986467838287, 1.7349648803667746e-14, 5.141012060505545e-09, 3.7822364902240224e-06, 0.0002717413299251348, 0.22465285658836365, 2.698016260183067e-06, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.00026768012321554124, 0.015254812315106392, 0.007090381346642971, 0.006173381581902504, 0.006773150525987148, 0.0008773274021223187, 0.00638232659548521, 0.016591282561421394, 0.004996343981474638, 0.009327422827482224, 0.008862738497555256, 0.05876166746020317, 0.009527520276606083, 0.00578573253005743, 0.20356230437755585, 3.6696812255598843e-09, 2.368522711293508e-09, 3.1902116006676806e-06, 9.520445587440918e-08, 9.990107355406508e-05, 0.2170185148715973, 0.019131841138005257, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.0008312691352330148, 0.012717761099338531, 0.013986560516059399, 0.007093494758009911, 0.004876464139670134, 0.0027259632479399443, 0.0033886858727782965, 0.01589561626315117, 0.00876854918897152, 0.005017295014113188, 0.023178039118647575, 0.05755693465471268, 0.05451130494475365, 0.06928746402263641, 0.1796484887599945, 2.292660354896725e-07, 1.4062491449085002e-10, 1.0373556180720556e-11, 2.945570870549474e-11, 1.3987125901948616e-09, 1.1205498822164373e-06, 0.3382871150970459, 0.0008390913717448711, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.00016753048112150282, 0.011822681874036789, 0.005686081480234861, 0.011659285984933376, 0.004307762254029512, 0.0031254058703780174, 0.009316416457295418, 0.0016170619055628777, 0.012603488750755787, 0.0245236624032259, 0.01756892167031765, 0.011099276132881641, 0.11892349272966385, 0.02075323462486267, 0.2549600899219513, 2.3133984541345853e-06, 0.00017511146143078804, 1.441240442545677e-06, 3.064446918443764e-09, 3.097617096159411e-08, 7.23518027712089e-08, 0.0017295092111453414, 0.39626115560531616, 0.00019915253506042063, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.00017647366621531546, 0.053185176104307175, 0.007304554805159569, 0.004834755789488554, 0.000954066461417824, 0.025718921795487404, 0.02985404059290886, 0.09960591793060303, 0.010695043951272964, 0.016483109444379807, 0.018774237483739853, 0.05090473219752312, 0.01008983701467514, 0.028674444183707237, 0.22871088981628418, 8.689644937311981e-15, 2.8357308110571466e-06, 5.0946681540153804e-08, 2.0269605438549831e-10, 1.289949813632063e-10, 3.375676821404383e-11, 8.602300205495794e-09, 4.5097981455910485e-06, 0.29888245463371277, 6.641173968091607e-05, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.0008755451999604702, 0.020039640367031097, 0.003969491925090551, 0.007670485880225897, 0.006173306610435247, 0.012295764870941639, 0.0076020946726202965, 0.012137084268033504, 0.010956642217934132, 0.010541083291172981, 0.018125493079423904, 0.03226908668875694, 0.02587633579969406, 0.016216130927205086, 0.1660052388906479, 2.8127108337250475e-18, 1.3557467148928026e-08, 7.431774662336466e-08, 2.301476165200711e-08, 1.1707952315975767e-11, 7.274678689300762e-12, 7.034611066401852e-13, 5.257664963120856e-13, 3.4044413041556254e-05, 0.32336506247520447, 4.600838292390108e-05, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [5.4335410823114216e-05, 0.03367479890584946, 0.004507457371801138, 0.004544241353869438, 0.00623831432312727, 0.002192543353885412, 0.004128816071897745, 0.021106822416186333, 0.0003909784718416631, 0.00830051489174366, 0.018183842301368713, 0.009683135896921158, 0.0325237475335598, 0.00792472343891859, 0.25227075815200806, 6.300134025583048e-13, 5.676838910062543e-08, 1.822371018533886e-06, 2.3448223146260716e-05, 2.5415656068616954e-07, 3.417801153204891e-08, 5.353474885616549e-10, 2.141239963115993e-11, 3.762530198514469e-08, 6.24434178462252e-05, 0.33693620562553406, 3.183486114721745e-05, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.0006012204103171825, 0.01188816037029028, 0.023532994091510773, 0.00770517997443676, 0.007410787045955658, 0.007087987381964922, 0.021027186885476112, 0.013456426560878754, 0.03266710042953491, 0.001251929672434926, 0.09021235257387161, 0.024440091103315353, 0.024299103766679764, 0.02338516153395176, 0.1967199146747589, 1.5877897954763576e-12, 1.2288996487086479e-09, 3.458522428445576e-07, 9.462546586291865e-06, 7.457422907464206e-05, 0.0005706463125534356, 1.4425116212635203e-08, 4.5430816769144455e-13, 2.616490357709722e-12, 3.545688542772041e-08, 0.00016559385403525084, 0.22770871222019196, 0.0009294600458815694, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.0009616355528123677, 0.059039004147052765, 0.04997482895851135, 0.013552234508097172, 0.03981975466012955, 0.020335622131824493, 0.014380398206412792, 0.07606764137744904, 0.07161007821559906, 0.024130970239639282, 0.06891870498657227, 0.0008635766571387649, 0.023193923756480217, 0.02981526218354702, 0.21020111441612244, 2.579016999959549e-10, 1.5412886245069757e-10, 5.557828156033118e-11, 1.2367832313842086e-09, 3.3751638284229557e-07, 4.776334208145272e-07, 1.75399406998622e-07, 9.608910021829953e-12, 7.499024594652057e-14, 2.8573548556528813e-14, 3.2670008191793e-12, 4.494925178732956e-06, 0.37381958961486816, 3.638648195192218e-05, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.0013424595817923546, 0.0746709555387497, 0.011544802226126194, 0.027912717312574387, 0.0729047879576683, 0.10483764857053757, 0.07119728624820709, 0.010606798343360424, 0.044552259147167206, 0.05723145231604576, 0.034647323191165924, 0.38214871287345886, 0.003923356998711824, 0.08778946846723557, 0.19581711292266846, 3.090227983193472e-05, 8.430293382843956e-05, 4.32313208875712e-05, 1.6493000885020592e-06, 8.794136192591395e-06, 0.0005616153357550502, 0.0013158570509403944, 0.0005267951055429876, 3.675571861094795e-05, 2.42239195813454e-07, 8.356466074666002e-10, 2.3424906885338714e-06, 0.0012797197559848428, 0.6210904717445374, 0.0014036636566743255, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.0016638260567560792, 0.01581355184316635, 0.08943041414022446, 0.02092832513153553, 0.021133122965693474, 0.012408973649144173, 0.01347691286355257, 0.00275444146245718, 0.027862150222063065, 0.01225491613149643, 0.018322426825761795, 0.008929668925702572, 0.00015579524915665388, 0.0014782899525016546, 0.18181975185871124, 7.67247776423119e-09, 2.954437938740284e-08, 8.54147774731473e-09, 2.011255162415182e-09, 5.265776792384713e-08, 1.4630668898618637e-09, 2.2913241082278546e-06, 3.266295323101076e-08, 1.6124132571349037e-06, 1.13081211061683e-11, 2.6358108895513247e-15, 7.728456763445024e-11, 2.3767283696685126e-09, 2.1271845980663784e-05, 0.19462287425994873, 6.456446044467157e-06, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.0008640239248052239, 0.06174946948885918, 0.004653214477002621, 0.002717669354751706, 0.015129820443689823, 0.00935456808656454, 0.016078660264611244, 0.08089328557252884, 0.017857585102319717, 0.0025031790137290955, 0.00012101473839720711, 0.013123439624905586, 0.005499868653714657, 0.001559562049806118, 0.22764776647090912, 4.312543703220706e-13, 2.1705271535665815e-07, 1.1365986551936658e-07, 1.9739390211270802e-07, 7.690645453806155e-09, 4.219609994748907e-09, 9.716764060030414e-10, 3.915795687703394e-08, 3.0873563900968293e-06, 5.5168204227129536e-08, 1.0056843552375128e-10, 6.254387632798064e-12, 4.318517331930449e-12, 1.5618051990573534e-11, 6.033264071447775e-05, 0.4116440713405609, 1.8908482161350548e-05, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.0008687095833010972, 0.025285501033067703, 0.01658034697175026, 0.02363765239715576, 0.02393241412937641, 0.0657346174120903, 0.015298763290047646, 0.01792113669216633, 0.021707117557525635, 0.018967296928167343, 0.037634264677762985, 0.013209421187639236, 0.02256513573229313, 0.007774183992296457, 0.15961462259292603, 1.797858697974407e-17, 3.5553746058347713e-10, 1.0377114723070235e-09, 5.157609006545272e-09, 5.5740526777592336e-11, 3.675403037473046e-11, 3.015720268992328e-12, 1.2632186895361434e-14, 3.2584634990229233e-09, 2.7093712162695738e-08, 2.733851353305984e-15, 2.0347772078377346e-10, 7.802066534575867e-16, 1.702402683943053e-16, 1.8298086656987067e-10, 6.30185184036236e-08, 0.2592085301876068, 3.469779585429933e-06, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.0001073219973477535, 0.04253393039107323, 0.010077103972434998, 0.007349912542849779, 0.00879223458468914, 0.004757148679345846, 0.008167163468897343, 0.03753674402832985, 0.00042728587868623435, 0.014237778261303902, 0.029898250475525856, 0.006872681900858879, 0.045794516801834106, 0.007500257343053818, 0.2562271058559418, 3.386366187463352e-10, 1.5587464474720036e-07, 5.430682108453766e-07, 1.926859113154933e-05, 2.7584928830037825e-06, 5.553058031182445e-07, 6.554741815989473e-08, 7.146391256540596e-10, 4.225638150501254e-08, 2.0539353045023745e-06, 0.00010312868107575923, 2.5505174860995794e-08, 1.3659710695890226e-08, 4.206753695390475e-11, 5.200286035123014e-11, 3.842067428649898e-07, 1.4282905794971157e-05, 0.31164512038230896, 0.00011869923037011176, NaN, NaN, NaN, NaN, NaN, NaN], [0.0005320480559021235, 0.010701313614845276, 0.020972738042473793, 0.007364482618868351, 0.006165153346955776, 0.00950621161609888, 0.022682208567857742, 0.018515970557928085, 0.03319491446018219, 0.00125269521959126, 0.07773777842521667, 0.022826068103313446, 0.02051766775548458, 0.020874740555882454, 0.1872510462999344, 3.098006018387167e-10, 3.2388165482899467e-09, 1.8609943808201024e-08, 5.099297482047405e-07, 4.603737033903599e-05, 0.00016448901442345232, 1.6998721719119203e-07, 1.7718410072475876e-11, 2.5886336477154437e-11, 9.218055652127077e-09, 1.2046231745443947e-07, 7.304957398446277e-05, 2.3164133111652774e-10, 2.8952129582648922e-09, 2.9085676575557606e-11, 8.895827650901023e-12, 8.14965606110718e-09, 8.762691868469119e-05, 0.2280847281217575, 0.0004104141262359917, NaN, NaN, NaN, NaN, NaN], [0.0008804904646240175, 0.05573932081460953, 0.06578188389539719, 0.01897181011736393, 0.043492771685123444, 0.026308609172701836, 0.016426166519522667, 0.09104844927787781, 0.12495335191488266, 0.04637341946363449, 0.0944451242685318, 0.0008321930072270334, 0.03243781998753548, 0.03530845418572426, 0.2013196051120758, 1.3149543676149733e-09, 1.080373679407387e-09, 5.5150013028582023e-11, 7.800748935693491e-10, 1.7859061074432248e-07, 2.183157299384675e-08, 2.5236221290469985e-07, 2.35878039323012e-10, 9.060349692724401e-12, 1.4339956088890715e-12, 1.7799637631876752e-12, 2.9941787715870305e-08, 6.0217857935640495e-06, 3.1683756313016787e-11, 4.5713120788715145e-11, 3.4124135808721867e-13, 3.591858459424911e-15, 1.3559961530365539e-12, 3.119595021416899e-06, 0.35679423809051514, 3.964137067669071e-05, NaN, NaN, NaN, NaN], [0.001610875129699707, 0.08435038477182388, 0.014167247340083122, 0.03493078798055649, 0.07050123810768127, 0.10772886872291565, 0.09850788861513138, 0.013066386803984642, 0.05027954652905464, 0.10465669631958008, 0.04533415287733078, 0.47037968039512634, 0.004505114629864693, 0.12196572870016098, 0.18816377222537994, 4.326914222474443e-06, 0.00023807807883713394, 0.00026310785324312747, 8.714396244613454e-06, 1.617559973965399e-05, 0.0001319001312367618, 0.0005945482989773154, 0.000823884445708245, 0.0008506007143296301, 1.7805428797146305e-05, 2.734714854568665e-08, 2.8855724849563558e-06, 4.891938442597166e-05, 0.0011682395124807954, 8.529372053089901e-07, 0.00017029111040756106, 1.0359013202787537e-07, 7.06834313302096e-10, 1.0861956525332062e-06, 0.0008713650749996305, 0.596385657787323, 0.0009257638594135642, NaN, NaN, NaN], [0.0018758929800242186, 0.019657986238598824, 0.1020394116640091, 0.033738646656274796, 0.024869924411177635, 0.012215637601912022, 0.015038376674056053, 0.002843664726242423, 0.02175789885222912, 0.01636381261050701, 0.01989913359284401, 0.01190999522805214, 0.00020280842727515846, 0.0016855570720508695, 0.17570628225803375, 1.4773272882795396e-10, 2.3448599506536993e-08, 6.434380566133768e-07, 3.8027360460546333e-07, 2.454226432746509e-06, 5.541529457531169e-09, 3.5226184991188347e-06, 2.5443886997322807e-08, 1.7749154721968807e-05, 1.8393259137994278e-09, 4.026108439691978e-12, 6.382850692432385e-09, 1.7809153263215194e-08, 8.996512974590587e-07, 0.00010512088192626834, 1.1464897607671443e-11, 2.794342757184154e-09, 2.4549680847631107e-15, 9.933188299671158e-11, 7.3009864820505754e-09, 8.105817687464878e-05, 0.2077004611492157, 2.0097606466151774e-05, NaN, NaN], [0.0009206020040437579, 0.08179444819688797, 0.00436751963570714, 0.003652991494163871, 0.019383452832698822, 0.008280212059617043, 0.016885409131646156, 0.10377784073352814, 0.023152435198426247, 0.0037028237711638212, 0.0001251623034477234, 0.018928401172161102, 0.009926089085638523, 0.002465219935402274, 0.21539123356342316, 1.1257004341538607e-14, 1.3137036347643516e-08, 4.6611327775281097e-07, 3.0405328743654536e-06, 1.5423474053477548e-07, 2.520166120234535e-08, 3.4643394819511286e-09, 1.1558090484697914e-08, 1.417677253812144e-06, 9.112129362165433e-08, 4.2694305868451465e-09, 3.7723260626343347e-10, 4.1450526344632976e-10, 2.7357388923676673e-11, 6.112880441833113e-07, 3.9687514799879864e-05, 8.382351063263016e-11, 8.293656039715103e-11, 4.97465783844131e-12, 4.144883221368634e-12, 1.4191136113450575e-11, 2.5566061594872735e-05, 0.4056495428085327, 4.4409513066057116e-05, NaN], [0.0005496710073202848, 0.039492249488830566, 0.016358638182282448, 0.007983607240021229, 0.006420070305466652, 0.0012171968119218946, 0.003928476013243198, 0.005028040148317814, 0.010722441598773003, 0.0025004756171256304, 0.015696601942181587, 0.006085758097469807, 0.0033880609553307295, 0.0056163351982831955, 0.1572248637676239, 9.215334861117716e-19, 2.6557794852166694e-10, 5.799645919069008e-07, 1.003176621633406e-11, 7.217926736302616e-07, 4.876178394397357e-08, 8.254863459455919e-11, 1.424103456687531e-12, 1.1857503423584603e-08, 1.3074058502482444e-09, 8.580362115262474e-12, 5.829819293978744e-09, 1.8017319407259702e-12, 9.234832950427707e-14, 3.576115098491428e-11, 1.9265784523270213e-09, 1.8997316146851517e-06, 1.949248054633479e-11, 8.860704392432694e-10, 2.8198800851872777e-14, 5.674391451236226e-15, 1.0258181110112119e-10, 6.93914080329705e-06, 0.25534507632255554, 2.742740150551981e-07]], [[0.130781888961792, 0.31469303369522095, 0.10550640523433685, 0.05234318599104881, 0.073336161673069, 0.022349786013364792, 0.04807984083890915, 0.1931842416524887, 0.06399697810411453, 0.042083337903022766, 0.026750531047582626, 0.11997608095407486, 0.008983415551483631, 0.03431839123368263, 0.019280044361948967, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.1582711637020111, 0.14862558245658875, 0.20016248524188995, 0.08876624703407288, 0.11006557196378708, 0.14632253348827362, 0.04025046527385712, 0.010204354301095009, 0.017868297174572945, 0.059372395277023315, 0.02111685276031494, 0.04181571304798126, 0.025184988975524902, 0.09681157767772675, 0.11611668020486832, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.23875439167022705, 0.3084685802459717, 0.14188633859157562, 0.026331612840294838, 0.0149313323199749, 0.09176106750965118, 0.03131069242954254, 0.10051372647285461, 0.03149634972214699, 0.11085867136716843, 0.014410188421607018, 0.02796255424618721, 0.034816499799489975, 0.025807565078139305, 0.01846306212246418, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.3404518961906433, 0.24260303378105164, 0.15383434295654297, 0.17020593583583832, 0.011800014413893223, 0.014385397545993328, 0.09441643208265305, 0.12204645574092865, 0.13843503594398499, 0.045293405652046204, 0.010667533613741398, 0.19693949818611145, 0.10281307995319366, 0.01422606036067009, 0.06984427571296692, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.002873742487281561, 0.008706165477633476, 0.35573768615722656, 0.0015586970839649439, 0.015496796928346157, 0.003392455168068409, 0.01149011217057705, 0.01891980692744255, 0.016394488513469696, 0.003960000351071358, 0.0035995631478726864, 0.008501716889441013, 0.018164046108722687, 0.004727588500827551, 0.013562880456447601, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.044807154685258865, 0.02788197249174118, 0.03947468474507332, 0.1271299421787262, 0.17640650272369385, 0.25110092759132385, 0.08349309861660004, 0.02069718949496746, 0.45751577615737915, 0.039922621101140976, 0.1781769096851349, 0.002931024879217148, 0.16567888855934143, 0.1177627220749855, 0.5156693458557129, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.005990047473460436, 0.04782475531101227, 0.01399919856339693, 0.010489771142601967, 0.06132129579782486, 0.030459748581051826, 0.010153756476938725, 0.3387801945209503, 0.06446883827447891, 0.007243711035698652, 0.00693717272952199, 0.020023254677653313, 0.007285784464329481, 0.009139767847955227, 0.0044054011814296246, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.020405659452080727, 0.00729386368766427, 0.06661678105592728, 0.08295443654060364, 0.20373474061489105, 0.3448184132575989, 0.04295210912823677, 0.20947468280792236, 0.03081577830016613, 0.010805373080074787, 0.17521467804908752, 0.06567652523517609, 0.012400656938552856, 0.10652147233486176, 0.07385163754224777, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.21573591232299805, 0.13175059854984283, 0.04085814207792282, 0.04119405150413513, 0.03551999852061272, 0.023009058088064194, 0.2751774191856384, 0.047030266374349594, 0.14272502064704895, 0.20153193175792694, 0.09575672447681427, 0.11327007412910461, 0.008532780222594738, 0.053245026618242264, 0.08952803909778595, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.2778390347957611, 0.11423225700855255, 0.3034791946411133, 0.34643107652664185, 0.5395972728729248, 0.06785042583942413, 0.13029156625270844, 0.18737749755382538, 0.029348008334636688, 0.16667678952217102, 0.021040884777903557, 0.008728248998522758, 0.037633832544088364, 0.02033349499106407, 0.03947347402572632, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.4898838996887207, 0.08082167059183121, 0.07362432777881622, 0.02171795442700386, 0.1333591789007187, 0.09000474214553833, 0.13501934707164764, 0.03979193791747093, 0.19113953411579132, 0.13522492349147797, 0.16557832062244415, 0.16255514323711395, 0.07687958329916, 0.15948235988616943, 0.09843874722719193, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.045906297862529755, 0.18602333962917328, 0.4082620143890381, 0.010370302945375443, 0.04507172852754593, 0.19693265855312347, 0.04021843150258064, 0.027866821736097336, 0.1546991914510727, 0.33766424655914307, 0.09260500222444534, 0.05066358670592308, 0.05655887722969055, 0.13157807290554047, 0.06850539147853851, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.020344020798802376, 0.0030158585868775845, 0.004445259924978018, 0.022628312930464745, 0.030150510370731354, 0.027700912207365036, 0.026311388239264488, 0.012862108647823334, 0.07009940594434738, 0.24656175076961517, 0.10596039146184921, 0.1143152266740799, 0.3679012656211853, 0.0068145813420414925, 0.04171491786837578, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.004749340936541557, 0.00182742765173316, 0.0021293568424880505, 0.00394084258005023, 0.004750867374241352, 5.3125138947507367e-05, 0.0026011874433606863, 0.000718552153557539, 0.002356230979785323, 0.00125187449157238, 0.0021339249797165394, 0.00044074622564949095, 0.2141493707895279, 0.0029175111558288336, 0.00477015832439065, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.12991508841514587, 0.06724811345338821, 0.06397818773984909, 0.15923364460468292, 0.2566852867603302, 0.07963784784078598, 0.09182894974946976, 0.040824584662914276, 0.21298912167549133, 0.2517295181751251, 0.2285410314798355, 0.11115844547748566, 0.1010512113571167, 0.3968040943145752, 0.1870165765285492, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.09555985033512115, 0.6603901982307434, 0.4109249413013458, 0.6857163310050964, 0.16377028822898865, 0.1341286301612854, 0.19969937205314636, 0.28269705176353455, 0.14764364063739777, 0.41980865597724915, 0.4319525361061096, 0.3789142668247223, 0.49345141649246216, 0.26345306634902954, 0.00909768883138895, 0.125, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.1460653841495514, 0.2758752405643463, 0.2826583981513977, 0.551855206489563, 0.05612415447831154, 0.19304026663303375, 0.0849798247218132, 0.038316093385219574, 0.02312053181231022, 0.46154478192329407, 0.36433619260787964, 0.35877159237861633, 0.1596277803182602, 0.0554661750793457, 6.483463948825374e-05, 0.0002614231198094785, 0.183704674243927, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [3.716628270922229e-05, 1.9402585849093157e-07, 1.0113188182003796e-05, 6.318590021692216e-05, 6.053787728887983e-07, 2.5790013751247898e-06, 0.00022986173280514777, 1.074662236533186e-06, 6.082240361138247e-06, 3.35614299729059e-06, 2.225729804194998e-05, 7.863033715693746e-06, 1.555537892272696e-06, 3.881560041918419e-05, 0.23657216131687164, 1.3331101555991154e-08, 0.003119559260085225, 0.19454506039619446, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.6150763630867004, 0.041665952652692795, 0.4174444377422333, 0.4949702024459839, 0.20794649422168732, 0.3307763934135437, 0.8098993897438049, 0.2721010744571686, 0.7274996042251587, 0.4779607057571411, 0.6233283281326294, 0.7560765147209167, 0.3628612458705902, 0.7672091722488403, 5.392584171204362e-06, 1.1244888353800775e-09, 0.0005117341643199325, 0.15345418453216553, 0.0018621939234435558, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [5.640763447445352e-06, 2.5884469323500525e-07, 1.2724142379738623e-06, 8.170181899913587e-06, 1.2345621769327408e-07, 1.310836523771286e-07, 1.02673438959755e-05, 9.661080184741877e-07, 6.520539272969472e-07, 7.602448022225872e-07, 2.058099425994442e-06, 6.885502301656743e-08, 1.0175665465794737e-06, 1.7383708836860023e-05, 0.20754273235797882, 2.882708471929618e-08, 0.0006895777769386768, 0.008299488574266434, 0.004234161227941513, 0.26378652453422546, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [9.27566077280062e-07, 5.395870630309219e-07, 1.8455818917573197e-07, 1.2775643654094893e-06, 2.105696061960316e-08, 3.1680112755338996e-08, 6.263408067752607e-06, 4.3284012463118415e-07, 1.918825773827848e-06, 1.694104128091567e-07, 3.363936968980852e-07, 9.135120215830739e-09, 4.4058825920956224e-08, 7.840970965844463e-07, 0.18219269812107086, 6.507164653157815e-05, 0.0030905166640877724, 0.269605815410614, 0.06594818085432053, 0.07055308669805527, 0.24370616674423218, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.7144812345504761, 0.6739043593406677, 0.2952970862388611, 0.49478814005851746, 0.17151717841625214, 0.06989942491054535, 0.5132517218589783, 0.30886489152908325, 0.5621734261512756, 0.5728412866592407, 0.576314389705658, 0.34687095880508423, 0.25617536902427673, 0.29690253734588623, 7.371841547865188e-06, 5.806248736917041e-05, 0.0008924558642320335, 0.00047033390728756785, 0.003593915607780218, 0.044251326471567154, 0.18547922372817993, 0.19724349677562714, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.6291437745094299, 0.5982875823974609, 0.4885888695716858, 0.5792520046234131, 0.2514877915382385, 0.5298613905906677, 0.11972777545452118, 0.6076628565788269, 0.04243328422307968, 0.5940482020378113, 0.6775911450386047, 0.3496588468551636, 0.4937344789505005, 0.40163323283195496, 2.9517783332266845e-05, 0.03321969881653786, 0.1786998063325882, 0.0021111152600497007, 0.00015362887643277645, 0.0013223892310634255, 0.01674751006066799, 0.27181917428970337, 0.0704144611954689, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.6414378881454468, 0.20530864596366882, 0.8448930978775024, 0.5841984748840332, 0.48009997606277466, 0.48003992438316345, 0.4468145966529846, 0.036266062408685684, 0.3466547429561615, 0.521195650100708, 0.7532409429550171, 0.14529024064540863, 0.3844791650772095, 0.46825459599494934, 2.1059213395346887e-05, 0.0005316429305821657, 0.0021434861700981855, 0.0005638045258820057, 2.0347550162114203e-05, 8.372889715246856e-05, 0.0012170294066891074, 0.0006328476592898369, 0.0015302025713026524, 0.2731996476650238, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.7977450489997864, 0.5162288546562195, 0.513008177280426, 0.6203657984733582, 0.04621165990829468, 0.2237500697374344, 0.10730908066034317, 0.17203836143016815, 0.028481170535087585, 0.5342445969581604, 0.7256113290786743, 0.5827998518943787, 0.755642294883728, 0.511749804019928, 0.00015279543003998697, 3.384976253073546e-06, 0.0032942681573331356, 0.003179847961291671, 0.0003072107210755348, 3.0923787562642246e-05, 0.0003082206822000444, 0.0026841319631785154, 0.011449099518358707, 0.2928124964237213, 0.0015787724405527115, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.5001324415206909, 0.7283154129981995, 0.6225411295890808, 0.5096700191497803, 0.4470505714416504, 0.6475648880004883, 0.4919697046279907, 0.42729777097702026, 0.22966071963310242, 0.4533919394016266, 0.5539101958274841, 0.2698501944541931, 0.3532210886478424, 0.2643750309944153, 2.9741322578047402e-05, 4.910896677756682e-05, 0.01189705915749073, 0.0036808690056204796, 0.006090851966291666, 0.0029882052913308144, 0.006760776974260807, 0.0002592294185888022, 0.0001972121826838702, 0.15788163244724274, 0.14973512291908264, 0.14614373445510864, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.42266348004341125, 0.20205438137054443, 0.42841264605522156, 0.6724829077720642, 0.29094210267066956, 0.4464052617549896, 0.24126748740673065, 0.22405968606472015, 0.21308888494968414, 0.3085091710090637, 0.4672502279281616, 0.14604215323925018, 0.09687051922082901, 0.12085973471403122, 2.7047781259170733e-05, 7.539001671830192e-05, 0.036947283893823624, 0.01112621370702982, 0.04119950905442238, 0.06979847699403763, 0.01383589580655098, 0.008948443457484245, 9.020609286380932e-05, 0.0005221512983553112, 0.34183818101882935, 0.12104173004627228, 0.027292484417557716, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.5077533721923828, 0.4866065979003906, 0.8742184638977051, 0.805268406867981, 0.8406472206115723, 0.45863693952560425, 0.3596036732196808, 0.36316972970962524, 0.38783764839172363, 0.03767421096563339, 0.43841618299484253, 0.3401361405849457, 0.3197961747646332, 0.20812755823135376, 7.5720936365542e-06, 5.4811065638205037e-05, 0.015359039418399334, 0.005874635651707649, 0.024854328483343124, 0.16572602093219757, 0.13195344805717468, 0.08553953468799591, 0.00124072446487844, 0.0008515206864103675, 0.0025517549365758896, 0.03817262500524521, 0.1957935392856598, 0.020919298753142357, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.12348711490631104, 0.49926623702049255, 0.1342328041791916, 0.07936512678861618, 0.11133208125829697, 0.032334309071302414, 0.028592387214303017, 0.036310840398073196, 0.036252155900001526, 0.10585709661245346, 0.19267472624778748, 0.34429997205734253, 0.16909800469875336, 0.2464863359928131, 3.1697504709882196e-06, 3.401398498681374e-05, 0.0008079431718215346, 0.00045223115012049675, 0.00013304724416229874, 0.0006849576020613313, 0.009534466080367565, 0.010466179810464382, 0.00030334663460962474, 0.00033610902028158307, 2.1021634893259034e-05, 6.891421071486548e-05, 0.0028196852654218674, 0.3685440421104431, 0.0008976467652246356, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [4.5035082507638435e-07, 4.8253248507990065e-08, 2.1990938847693542e-08, 4.3766593194050074e-07, 1.1283042766763174e-07, 2.4235429663121977e-08, 4.6985369408503175e-06, 1.5805973418991925e-07, 1.1619090578562918e-08, 1.9516033233912822e-08, 1.8456361772223318e-07, 2.2261544074808626e-07, 2.278205402106437e-09, 7.143006541809882e-07, 0.21044957637786865, 0.0012722803512588143, 0.07485485821962357, 0.004568059463053942, 0.008557068184018135, 0.04491077736020088, 0.010689688846468925, 0.010801602154970169, 0.015439217910170555, 0.001288879313506186, 0.032191790640354156, 9.430324280401692e-05, 0.0010071481810882688, 0.03593403846025467, 0.015365669503808022, 0.28865233063697815, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.71169513463974, 0.2780396640300751, 0.44078493118286133, 0.7963916063308716, 0.6933308839797974, 0.5056049823760986, 0.7329073548316956, 0.810703694820404, 0.551677942276001, 0.6459015607833862, 0.6943050622940063, 0.2817550301551819, 0.10247289389371872, 0.7378624677658081, 8.274764695670456e-06, 0.0003195737663190812, 0.0016381103778257966, 0.001899963477626443, 0.000450764549896121, 0.0029568641912192106, 0.0004077073244843632, 0.006739944685250521, 5.316005626809783e-05, 0.000977654941380024, 0.00033480822457931936, 1.5544836060144007e-05, 5.177688763069455e-06, 0.000280524865956977, 8.569184137741104e-05, 0.19435854256153107, 0.0009946423815563321, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.723514199256897, 0.08602748066186905, 0.6093902587890625, 0.8655006289482117, 0.42677831649780273, 0.03823491558432579, 0.30262306332588196, 0.036271825432777405, 0.12300263345241547, 0.2776595950126648, 0.07632125169038773, 0.06917709112167358, 0.14498986303806305, 0.06881040334701538, 2.5871422622003593e-06, 0.0004552309401333332, 0.00916277151554823, 0.2859989106655121, 0.028668222948908806, 0.004703177139163017, 0.013283651322126389, 0.011935138143599033, 0.00041849465924315155, 0.021506765857338905, 0.0005354905733838677, 2.3408898414345458e-05, 5.557515123655321e-06, 4.006853941973532e-06, 0.000782388960942626, 0.032734211534261703, 0.33600685000419617, 0.05645810067653656, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.7111753225326538, 0.8019941449165344, 0.7984396815299988, 0.6959745287895203, 0.34880974888801575, 0.5955101251602173, 0.6658092141151428, 0.5378626585006714, 0.35595381259918213, 0.5855972766876221, 0.5757258534431458, 0.133575439453125, 0.3884122669696808, 0.11617641150951385, 8.579120731155854e-06, 0.001615832676179707, 0.0592908076941967, 0.004439341835677624, 0.0221478920429945, 0.05761101841926575, 0.08599329739809036, 0.009327156469225883, 0.0014337823959067464, 0.22479815781116486, 0.007599419914186001, 0.00010282513540005311, 0.003995772451162338, 0.0007532926392741501, 0.0001985877170227468, 0.042725738137960434, 0.609107255935669, 0.032340146601200104, 0.2600889503955841, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.43439850211143494, 0.1714652180671692, 0.4214288294315338, 0.6560039520263672, 0.15961043536663055, 0.25604698061943054, 0.26937225461006165, 0.1702796220779419, 0.22940081357955933, 0.327440470457077, 0.3977930247783661, 0.08873222768306732, 0.13160161674022675, 0.07058954238891602, 2.3103428247850388e-05, 0.0007894318550825119, 0.08912800997495651, 0.00870462041348219, 0.062210533767938614, 0.21669252216815948, 0.04955689236521721, 0.12036743760108948, 0.001276280265301466, 0.002290783217176795, 0.4637441337108612, 0.041003014892339706, 0.007595454342663288, 0.0049859327264130116, 0.030789200216531754, 0.01441932376474142, 0.02666427381336689, 0.013092019595205784, 0.22824719548225403, 0.07290598005056381, NaN, NaN, NaN, NaN, NaN, NaN], [0.48717519640922546, 0.4504354000091553, 0.9026078581809998, 0.8262973427772522, 0.8697957992553711, 0.4322546720504761, 0.47440072894096375, 0.40584686398506165, 0.6554202437400818, 0.04447361081838608, 0.5114831924438477, 0.4020007252693176, 0.3586147725582123, 0.19603849947452545, 5.424046776170144e-06, 4.2991967347916216e-05, 0.006631283089518547, 0.0006027332856319845, 0.004053125157952309, 0.03894652798771858, 0.031787656247615814, 0.10168109834194183, 0.004267984535545111, 0.002045443281531334, 0.0010633694473654032, 0.005091637372970581, 0.031351421028375626, 6.663963722530752e-05, 0.09428737312555313, 0.0008465268765576184, 0.00024849644978530705, 0.002269570017233491, 0.01905866153538227, 0.2164839655160904, 0.010082208551466465, NaN, NaN, NaN, NaN, NaN], [0.09346597641706467, 0.41046077013015747, 0.13097965717315674, 0.06711046397686005, 0.09538185596466064, 0.021688319742679596, 0.027864748612046242, 0.029869627207517624, 0.07506763935089111, 0.13717295229434967, 0.21322546899318695, 0.3559926152229309, 0.19059841334819794, 0.24045485258102417, 2.0756003777933074e-06, 1.1191940757271368e-05, 0.0006002296577207744, 0.0002709901600610465, 9.913583926390857e-05, 0.0001758227008394897, 0.0029332106932997704, 0.008675863035023212, 0.0011328428518027067, 0.0023299665190279484, 6.693489558529109e-05, 0.00013525204849429429, 0.0013442488852888346, 0.022858861833810806, 2.321010106243193e-05, 0.0010626229923218489, 2.5993340386776254e-05, 3.972689592046663e-05, 5.326797690941021e-05, 0.0033412689808756113, 0.35271701216697693, 0.0008956229430623353, NaN, NaN, NaN, NaN], [4.6634454520244617e-07, 5.573102512812511e-08, 2.3018172257138758e-08, 3.889360016273713e-07, 9.709493298259986e-08, 2.4796046105279856e-08, 7.192591056082165e-06, 1.7916640615567303e-07, 1.8580767147113875e-08, 3.5935642017648206e-08, 2.774728216081712e-07, 3.801677337378351e-07, 2.8816848907098347e-09, 9.808413778955583e-07, 0.2028982788324356, 0.00036489564809016883, 0.07616367936134338, 0.00673737283796072, 0.011110173538327217, 0.021392904222011566, 0.010494116693735123, 0.006134945899248123, 0.015969248488545418, 0.005187375005334616, 0.12039955705404282, 0.0005341891082935035, 0.0022901638876646757, 0.027128320187330246, 0.005907480139285326, 0.033119603991508484, 0.002176248235628009, 0.0003625153622124344, 6.369769835146144e-05, 0.0007003483478911221, 0.03456505015492439, 0.01570759527385235, 0.28412890434265137, NaN, NaN, NaN], [0.6667957305908203, 0.327456533908844, 0.4202725291252136, 0.7458598613739014, 0.6837785840034485, 0.5435037612915039, 0.7794858813285828, 0.849186360836029, 0.6942030787467957, 0.7531007528305054, 0.7604266405105591, 0.4857816696166992, 0.12311270833015442, 0.7958275079727173, 7.400509275612421e-06, 3.192616713931784e-05, 0.00035208670306019485, 0.002478531561791897, 0.0006564928335137665, 0.0008886585710570216, 0.0005662215990014374, 0.0016915983287617564, 1.3900444173486903e-05, 0.0009738726075738668, 0.00042995362309738994, 8.639829320600256e-05, 1.4000924238644075e-05, 0.00033226466621272266, 2.9785558581352234e-05, 0.00921203475445509, 3.390025085536763e-06, 5.1574592362158e-05, 2.3835823412809987e-06, 1.9022172637050971e-06, 0.00016878120368346572, 9.063100151252002e-05, 0.20696188509464264, 0.001649125711992383, NaN, NaN], [0.704485297203064, 0.08825523406267166, 0.5944071412086487, 0.8510531783103943, 0.4262540936470032, 0.04518446326255798, 0.38849392533302307, 0.055145543068647385, 0.277063250541687, 0.40566664934158325, 0.09198901802301407, 0.13750647008419037, 0.24822941422462463, 0.1165834292769432, 3.5331499930180144e-06, 0.00019471753330435604, 0.003537738462910056, 0.2800489366054535, 0.036592625081539154, 0.002127013634890318, 0.024595409631729126, 0.008275463245809078, 0.00023266732750926167, 0.021680369973182678, 0.0005173377576284111, 7.175304199336097e-05, 2.6857771445065737e-05, 1.6371919627999887e-05, 0.0012281013187021017, 0.011112956330180168, 0.058813560754060745, 0.0009629606502130628, 1.1531898962857667e-05, 4.947432444168953e-06, 2.475359451636905e-06, 0.0005685617215931416, 0.0267820842564106, 0.3296748399734497, 0.06147307902574539, NaN], [0.5231692790985107, 0.6706213355064392, 0.7785398364067078, 0.7122241258621216, 0.34260621666908264, 0.579698920249939, 0.5863306522369385, 0.4822496175765991, 0.5804131031036377, 0.7801564335823059, 0.7983464002609253, 0.22512593865394592, 0.4790371060371399, 0.2274763584136963, 1.8860177078749985e-05, 3.20236104300875e-08, 0.00013383101031649858, 0.00029007354169152677, 0.002788462908938527, 0.0014709108509123325, 0.0009710633894428611, 0.0001290659129153937, 2.0881772798020393e-05, 7.236683813971467e-06, 3.12792144541163e-05, 7.099155482137576e-05, 3.213396485080011e-05, 3.9666349039180204e-05, 0.00022854047711007297, 0.0037343965377658606, 1.487573445047019e-05, 0.00019343644089531153, 8.10168421594426e-05, 1.1448363693489227e-05, 3.5921341350331204e-06, 2.216967368440237e-05, 0.0017730530817061663, 0.0001526248233858496, 0.009769736789166927, 0.4419056475162506]], [[0.06147387623786926, 0.0657946914434433, 0.22564710676670074, 0.1299343705177307, 0.021580645814538002, 0.08992400765419006, 0.025479430332779884, 0.04823821783065796, 0.05891237407922745, 0.016958819702267647, 0.0021926285699009895, 0.017513686791062355, 0.09859969466924667, 0.16368542611598969, 0.038398925215005875, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.029852252453565598, 0.26626214385032654, 0.14803646504878998, 0.038784727454185486, 0.07803148031234741, 0.006210723891854286, 0.0026457132771611214, 0.006018034182488918, 0.05453306809067726, 0.002730109030380845, 0.015730326995253563, 0.0017557059181854129, 0.034912969917058945, 0.03208531066775322, 0.03983413055539131, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.01053018867969513, 0.02744918502867222, 0.2530466914176941, 0.05846027657389641, 0.1744728684425354, 0.011957419104874134, 0.003304906887933612, 0.00205883732996881, 0.00874510407447815, 0.0014524421421810985, 0.0009729861048981547, 0.0026561047416180372, 0.0023208027705550194, 0.0038251704536378384, 0.005045189522206783, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.016039762645959854, 0.05755838379263878, 0.10756286233663559, 0.03799062967300415, 0.5738711953163147, 0.061907339841127396, 0.128611221909523, 0.01847657933831215, 0.06501789391040802, 0.015564735978841782, 0.0016139671206474304, 0.014343881979584694, 0.020734043791890144, 0.14008449018001556, 0.13515408337116241, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.005847899243235588, 0.11914067715406418, 0.01715121790766716, 0.3517457842826843, 0.0661543607711792, 0.07493122667074203, 0.012425812892615795, 0.11745280772447586, 0.08440648764371872, 0.020029406994581223, 0.05165768414735794, 0.04094480350613594, 0.024548601359128952, 0.005826729815453291, 0.13841456174850464, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.015926362946629524, 0.007578620687127113, 0.1226087138056755, 0.030128292739391327, 0.03851892054080963, 0.3367418944835663, 0.01694057136774063, 0.09829536825418472, 0.0361555740237236, 0.10537439584732056, 0.007450005039572716, 0.029753634706139565, 0.22920416295528412, 0.01793695241212845, 0.05258304625749588, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.01326388493180275, 0.05337870866060257, 0.047661036252975464, 0.08615607023239136, 0.12425915151834488, 0.4180251955986023, 0.04702466353774071, 0.0717325434088707, 0.05138256773352623, 0.06877672672271729, 0.0152205191552639, 0.0719875767827034, 0.1666427105665207, 0.13322126865386963, 0.053655143827199936, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.026802292093634605, 0.003955241292715073, 0.0206829272210598, 0.02742936834692955, 0.06016179919242859, 0.15127348899841309, 0.06774158030748367, 0.2981398105621338, 0.05239749699831009, 0.09365928173065186, 0.035629644989967346, 0.020771589130163193, 0.13655303418636322, 0.012941722758114338, 0.05640798062086105, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.06469012051820755, 0.1851334124803543, 0.08788572251796722, 0.19977343082427979, 0.00846380740404129, 0.03702360764145851, 0.0876760184764862, 0.046302031725645065, 0.11564433574676514, 0.05180440843105316, 0.49518024921417236, 0.1649368405342102, 0.030481798574328423, 0.10461966693401337, 0.07739346474409103, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.020106524229049683, 0.01925482228398323, 0.006043681409209967, 0.01652396097779274, 0.001572003006003797, 0.005779887083917856, 0.015335858799517155, 0.03537710756063461, 0.009967570193111897, 0.09144406765699387, 0.43651703000068665, 0.2613205015659332, 0.0483890138566494, 0.06553913652896881, 0.055434126406908035, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.07980967313051224, 0.14815203845500946, 0.09271827340126038, 0.004086778499186039, 0.010790406726300716, 0.0747552439570427, 0.10995902121067047, 0.04728228971362114, 0.1809520274400711, 0.025821411982178688, 0.06657237559556961, 0.1431768387556076, 0.19449584186077118, 0.20780201256275177, 0.10148976743221283, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.05537823587656021, 0.008725662715733051, 0.0058344281278550625, 0.029011448845267296, 0.048424966633319855, 0.047911662608385086, 0.16901308298110962, 0.17019973695278168, 0.011648884043097496, 0.08953043073415756, 0.5360274910926819, 0.10330803692340851, 0.078437939286232, 0.12202966213226318, 0.11905822902917862, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.01546903420239687, 0.0005347061669453979, 0.0015839362749829888, 0.053056132048368454, 0.23614321649074554, 0.013318118639290333, 0.051473915576934814, 0.011966699734330177, 0.007302975282073021, 0.09275621920824051, 0.06646261364221573, 0.010813506320118904, 0.13289499282836914, 0.22826357185840607, 0.04386172071099281, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.009458722546696663, 0.0058342707343399525, 0.012789146974682808, 0.005895438138395548, 0.026010286062955856, 0.057482823729515076, 0.005663284566253424, 0.005727604031562805, 0.0033144087065011263, 0.011671853251755238, 0.00424896739423275, 0.056589994579553604, 0.20401620864868164, 0.03777612745761871, 0.03114682249724865, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0012354525970295072, 0.034024473279714584, 0.10020612925291061, 0.02267461270093918, 0.08676987141370773, 0.14216794073581696, 0.0033775768242776394, 0.07320579141378403, 0.07390473037958145, 0.0168889332562685, 0.00386308366432786, 0.02569040097296238, 0.24664165079593658, 0.2674221694469452, 0.014589445665478706, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.12044757604598999, 0.22699733078479767, 0.3625817894935608, 0.18942511081695557, 0.468371719121933, 0.5971034169197083, 0.5581120252609253, 0.29680517315864563, 0.4773823618888855, 0.4035939574241638, 0.3702273666858673, 0.3751682937145233, 0.267861545085907, 0.4069889783859253, 0.040672045201063156, 0.125, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.0243044663220644, 0.4273812174797058, 0.5286219716072083, 0.05566978082060814, 0.4582313597202301, 0.5064847469329834, 0.09591992199420929, 0.1787465512752533, 0.7349562644958496, 0.00692495983093977, 0.04355573281645775, 0.04027868062257767, 0.03415951877832413, 0.02788657508790493, 0.03653726726770401, 0.07662782073020935, 0.14776498079299927, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.1999487727880478, 0.02213704027235508, 0.750217854976654, 0.5677059292793274, 0.8556592464447021, 0.6869031190872192, 0.2201639711856842, 0.6947058439254761, 0.2711787521839142, 0.21462410688400269, 0.3783731162548065, 0.39328378438949585, 0.3796219229698181, 0.27560317516326904, 0.052095912396907806, 0.0006832284270785749, 0.003495789598673582, 0.19430121779441833, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.17733721435070038, 0.1195838525891304, 0.4294462502002716, 0.41039443016052246, 0.45686641335487366, 0.5433338284492493, 0.08341590315103531, 0.5749803781509399, 0.0773383378982544, 0.2876206338405609, 0.19534848630428314, 0.10015372186899185, 0.2102438062429428, 0.04678432643413544, 0.044711172580718994, 0.00020953372586518526, 0.007476589176803827, 0.1521030217409134, 0.003494996577501297, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.4523387849330902, 0.8917949795722961, 0.4903220534324646, 0.5869925022125244, 0.47626572847366333, 0.006232858635485172, 0.41125378012657166, 0.13404546678066254, 0.6460333466529846, 0.32553666830062866, 0.3429105877876282, 0.031081799417734146, 0.42998504638671875, 0.16709895431995392, 0.08821719139814377, 0.00048688906827010214, 0.0011088894680142403, 0.0024602855555713177, 0.0005520267877727747, 0.26744863390922546, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.49767979979515076, 0.7566660642623901, 0.25263193249702454, 0.4967457056045532, 0.47193706035614014, 0.006824302952736616, 0.2858791947364807, 0.18135732412338257, 0.4390898644924164, 0.7668571472167969, 0.15391138195991516, 0.08414287865161896, 0.5640745759010315, 0.35628020763397217, 0.09142898768186569, 0.0004194685607217252, 0.0005068383179605007, 0.026896899566054344, 0.0004147894505877048, 0.006156287621706724, 0.4387049376964569, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.18697474896907806, 0.23196713626384735, 0.23554784059524536, 0.34321168065071106, 0.5325552225112915, 0.15430577099323273, 0.2887123227119446, 0.4957616627216339, 0.36584702134132385, 0.2891024053096771, 0.08069057762622833, 0.18119029700756073, 0.4536079466342926, 0.16425864398479462, 0.03777371346950531, 1.0518371709622443e-05, 5.5142045312095433e-05, 0.016997506842017174, 3.693701364682056e-05, 0.0006244040559977293, 0.21657241880893707, 0.01345360092818737, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.17079660296440125, 0.16765500605106354, 0.28291502594947815, 0.16039209067821503, 0.2695491909980774, 0.16163654625415802, 0.08897912502288818, 0.28747832775115967, 0.8989478349685669, 0.26775097846984863, 0.17184530198574066, 0.3264879584312439, 0.31386569142341614, 0.1549917310476303, 0.05264737084507942, 0.3619365394115448, 0.25655418634414673, 0.3611752688884735, 0.14710570871829987, 0.018539972603321075, 0.21814967691898346, 0.09323819726705551, 0.01780291646718979, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.04084352031350136, 0.5361505150794983, 0.018223807215690613, 0.03828004375100136, 0.3140276074409485, 0.08277524262666702, 0.07094793766736984, 0.012667819857597351, 0.3304368853569031, 0.10053964704275131, 0.03868165612220764, 0.31755131483078003, 0.22644393146038055, 0.07613880187273026, 0.12961620092391968, 0.004012200981378555, 0.004658036399632692, 0.017421945929527283, 0.0026806569658219814, 0.590861439704895, 0.051964171230793, 0.007618917152285576, 0.0007336572161875665, 0.12340892106294632, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.07373615354299545, 0.19122207164764404, 0.06966950744390488, 0.01624569669365883, 0.017842771485447884, 0.2144099771976471, 0.24285149574279785, 0.3761756718158722, 0.8141085505485535, 0.27487871050834656, 0.09974052757024765, 0.10127317160367966, 0.16323235630989075, 0.21032299101352692, 0.10343435406684875, 0.44725751876831055, 0.6053639054298401, 0.07041247189044952, 0.07085516303777695, 0.003138674655929208, 0.2879992425441742, 0.049135204404592514, 0.14297868311405182, 0.06008363142609596, 0.06304289400577545, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.06651142984628677, 0.1456020176410675, 0.01741747185587883, 0.07566884905099869, 0.018790215253829956, 0.20801369845867157, 0.16892337799072266, 0.33592528104782104, 0.1834612786769867, 0.29906225204467773, 0.2579277753829956, 0.5998365879058838, 0.5642448663711548, 0.572043240070343, 0.0891154333949089, 0.7072809338569641, 0.7582566142082214, 0.16150887310504913, 0.18586905300617218, 0.015776842832565308, 0.08385244756937027, 0.32581770420074463, 0.5540359020233154, 0.13379113376140594, 0.0028463751077651978, 0.051922835409641266, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.03234146162867546, 0.1962265521287918, 0.0277019701898098, 0.06972747296094894, 0.10650040954351425, 0.07791601866483688, 0.38205334544181824, 0.4892197549343109, 0.003444283502176404, 0.414199560880661, 0.16890743374824524, 0.4916560649871826, 0.8149713277816772, 0.7298122048377991, 0.14976243674755096, 0.4378974437713623, 0.10523661971092224, 0.014314417727291584, 0.30093127489089966, 0.06324318051338196, 0.08432605862617493, 0.2594241797924042, 0.6188808083534241, 0.3929617404937744, 0.00827555637806654, 0.07725780457258224, 0.06407154351472855, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.07799918204545975, 0.2381461262702942, 0.01647050306200981, 0.08363308757543564, 0.05209676921367645, 0.02968973107635975, 0.11220219731330872, 0.32446831464767456, 0.1546868085861206, 0.06510066986083984, 0.1935844123363495, 0.5264057517051697, 0.34881067276000977, 0.6311980485916138, 0.09822507947683334, 0.2013174593448639, 0.5200937390327454, 0.3190821707248688, 0.5249915719032288, 0.18779213726520538, 0.1779765784740448, 0.29882070422172546, 0.5049118399620056, 0.06443758308887482, 0.007539320737123489, 0.16998757421970367, 0.031686559319496155, 0.3610091209411621, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.1688770204782486, 0.13700607419013977, 0.20374003052711487, 0.12288741022348404, 0.15864238142967224, 0.039533428847789764, 0.12642242014408112, 0.35126128792762756, 0.365562379360199, 0.48467183113098145, 0.3247453570365906, 0.003142370842397213, 0.5969579219818115, 0.5533550977706909, 0.1647837609052658, 0.5546301603317261, 0.5397829413414001, 0.43089261651039124, 0.08987504988908768, 0.3114354610443115, 0.4812281131744385, 0.11215226352214813, 0.17198431491851807, 0.5790820121765137, 0.03648975491523743, 0.0541677288711071, 0.04165489599108696, 0.07749651372432709, 0.030232839286327362, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.3052995800971985, 0.6539703607559204, 0.022321274504065514, 0.1902511715888977, 0.05963977798819542, 0.17083951830863953, 0.5218495726585388, 0.2573777139186859, 0.17107829451560974, 0.46426069736480713, 0.3389802873134613, 0.4338558316230774, 0.014936042949557304, 0.6202957630157471, 0.13899832963943481, 0.005376005079597235, 0.010858614929020405, 0.02991071715950966, 0.029742157086730003, 0.04020260274410248, 0.1695990264415741, 0.0604972317814827, 0.10318762809038162, 0.48727869987487793, 0.07163358479738235, 0.025501595810055733, 0.05125340074300766, 0.22269804775714874, 0.08394679427146912, 0.19870582222938538, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.12219581007957458, 0.5012378692626953, 0.06702763587236404, 0.06399006396532059, 0.07401375472545624, 0.24048954248428345, 0.08739905059337616, 0.050457850098609924, 0.030934542417526245, 0.1506662517786026, 0.1536494344472885, 0.49837279319763184, 0.018043117597699165, 0.11216632276773453, 0.12939369678497314, 0.0006954512791708112, 0.0002132337394868955, 0.037006676197052, 0.0018452922813594341, 0.16118928790092468, 0.5505160689353943, 0.028353480622172356, 0.0021746368147432804, 0.027092093601822853, 0.0001434519508620724, 0.0029707583598792553, 4.2726576793938875e-05, 0.0012847317848354578, 0.0010433235438540578, 0.18891005218029022, 0.014656933024525642, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.11525271832942963, 0.521948516368866, 0.007329752668738365, 0.008543604053556919, 0.05213259160518646, 0.04235774278640747, 0.2166471928358078, 0.528154194355011, 0.42159566283226013, 0.22446103394031525, 0.0032521234825253487, 0.5035390257835388, 0.365617960691452, 0.44961339235305786, 0.15735329687595367, 0.013874622993171215, 0.0695175901055336, 0.005752294324338436, 0.005697373300790787, 0.0021822804119437933, 0.02415846660733223, 0.00723307253792882, 0.3120453357696533, 0.016472192481160164, 0.004319194238632917, 0.041901107877492905, 0.7052133083343506, 0.0035930864978581667, 0.020578961819410324, 0.0021869041956961155, 0.0003597450559027493, 0.0005889505264349282, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.03232282027602196, 0.08449342846870422, 0.004147443920373917, 0.050799064338207245, 0.037334948778152466, 0.08206064254045486, 0.07099173963069916, 0.19771835207939148, 0.021330662071704865, 0.08051090687513351, 0.1005825400352478, 0.700605034828186, 0.3027697801589966, 0.4364767074584961, 0.10480254143476486, 0.29724666476249695, 0.30918487906455994, 0.0693497508764267, 0.04026606306433678, 0.00593132060021162, 0.04497085511684418, 0.07199602574110031, 0.16270284354686737, 0.058071933686733246, 0.0005904879071749747, 0.0013724194141104817, 0.013050474226474762, 0.002609569113701582, 0.013482913374900818, 0.089314766228199, 0.03341012820601463, 0.21929660439491272, 0.006776490714401007, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.034268103539943695, 0.16091260313987732, 0.0168391652405262, 0.06967493146657944, 0.0915973111987114, 0.051104262471199036, 0.2385529726743698, 0.3295409679412842, 0.0004638703539967537, 0.22104156017303467, 0.13362999260425568, 0.5110065937042236, 0.7347238063812256, 0.7763577103614807, 0.15897347033023834, 0.3422777056694031, 0.07256462424993515, 0.012822822667658329, 0.21187257766723633, 0.060081083327531815, 0.09390594810247421, 0.19744858145713806, 0.5327264666557312, 0.3024030029773712, 0.013231869786977768, 0.1601967215538025, 0.04191795364022255, 0.5788960456848145, 0.791706383228302, 0.2698511779308319, 0.26516515016555786, 0.2890409529209137, 0.032140959054231644, 0.02436642162501812, NaN, NaN, NaN, NaN, NaN, NaN], [0.08530293405056, 0.1988343894481659, 0.010091865435242653, 0.07736483961343765, 0.030177433043718338, 0.023718634620308876, 0.06320804357528687, 0.20902810990810394, 0.020835628733038902, 0.026085397228598595, 0.10371798276901245, 0.427949994802475, 0.2465561032295227, 0.6410334706306458, 0.12414435297250748, 0.15722303092479706, 0.44676893949508667, 0.24300073087215424, 0.3980245292186737, 0.29666030406951904, 0.21130049228668213, 0.31708449125289917, 0.45276522636413574, 0.04954151436686516, 0.006070373114198446, 0.23888874053955078, 0.06321726739406586, 0.48237892985343933, 0.09136107563972473, 0.571183979511261, 0.36026179790496826, 0.0799446776509285, 0.1583012342453003, 0.025381257757544518, 0.5154083371162415, NaN, NaN, NaN, NaN, NaN], [0.17881684005260468, 0.09949745982885361, 0.17292529344558716, 0.14197823405265808, 0.0994792953133583, 0.022899990901350975, 0.07621151208877563, 0.20277591049671173, 0.059071850031614304, 0.23252709209918976, 0.2142648547887802, 0.0016634195344522595, 0.4786902368068695, 0.5105896592140198, 0.1802191287279129, 0.6566299200057983, 0.6752134561538696, 0.5489535927772522, 0.1520741730928421, 0.6433172821998596, 0.7151104211807251, 0.290630042552948, 0.3418242335319519, 0.686417818069458, 0.046654678881168365, 0.09611856192350388, 0.0634889155626297, 0.4891318380832672, 0.46607306599617004, 0.5581225156784058, 0.4337400496006012, 0.06152508407831192, 0.08386452496051788, 0.0397774837911129, 0.11068917065858841, 0.04009125009179115, NaN, NaN, NaN, NaN], [0.29184988141059875, 0.5299537181854248, 0.01714717224240303, 0.1581006944179535, 0.034420810639858246, 0.1480618417263031, 0.35555243492126465, 0.16130897402763367, 0.0352683924138546, 0.2384539395570755, 0.22334522008895874, 0.274210661649704, 0.008749962784349918, 0.5107676982879639, 0.16247788071632385, 0.0024060788564383984, 0.006098441779613495, 0.013975032605230808, 0.014695755206048489, 0.022452646866440773, 0.10514718294143677, 0.04751533642411232, 0.0609392412006855, 0.31799331307411194, 0.04427095875144005, 0.01951766200363636, 0.04202713817358017, 0.3371936082839966, 0.2731744647026062, 0.3478449583053589, 0.03363266587257385, 0.011759405955672264, 0.01767517626285553, 0.024101490154862404, 0.19511322677135468, 0.05518092215061188, 0.2097322940826416, NaN, NaN, NaN], [0.1536586880683899, 0.39876002073287964, 0.060627128928899765, 0.08434724807739258, 0.06138864532113075, 0.18170806765556335, 0.0558285117149353, 0.026850836351513863, 0.004648242145776749, 0.05450701341032982, 0.08679821342229843, 0.24500715732574463, 0.009806739166378975, 0.06359081715345383, 0.14997224509716034, 0.000109505133877974, 2.9198725314927287e-05, 0.01053665205836296, 0.0007290886132977903, 0.055462777614593506, 0.18011406064033508, 0.013305839151144028, 0.0007181179826147854, 0.008689867332577705, 4.760328374686651e-05, 0.0016827695071697235, 2.2867327061248943e-05, 0.000821226101834327, 0.0012459746794775128, 0.2353316843509674, 0.004575389437377453, 0.003901307238265872, 0.0009429306373931468, 1.1980442650383338e-05, 0.0003497266152407974, 0.00027309934375807643, 0.1965111494064331, 0.005757085047662258, NaN, NaN], [0.1216418668627739, 0.4058372378349304, 0.00597163662314415, 0.009731672704219818, 0.04685758054256439, 0.030955728143453598, 0.14503908157348633, 0.4122965633869171, 0.13539999723434448, 0.08889995515346527, 0.0017191163497045636, 0.24694381654262543, 0.23039060831069946, 0.2996818721294403, 0.1837962418794632, 0.0017744784709066153, 0.012578981928527355, 0.0015974465059116483, 0.002320722443982959, 0.0008557687979191542, 0.004459704738110304, 0.00322481500916183, 0.13683773577213287, 0.010506929829716682, 0.0027294831816107035, 0.03936534747481346, 0.7146239876747131, 0.0021277000196278095, 0.014929071068763733, 0.003117389976978302, 0.0010002683848142624, 0.0005979579291306436, 0.037009548395872116, 0.6984097361564636, 0.0021584301721304655, 0.012162267230451107, 0.002483450109139085, 0.00014705986541230232, 0.0003713203768711537, NaN], [0.2966727912425995, 0.1567845344543457, 0.07310101389884949, 0.14124755561351776, 0.2961083948612213, 0.07968501001596451, 0.06122228875756264, 0.14724984765052795, 0.06047076731920242, 0.055829375982284546, 0.06430483609437943, 0.11614347994327545, 0.15107537806034088, 0.15706941485404968, 0.12527146935462952, 0.10933294892311096, 0.0594157911837101, 0.01442565955221653, 0.027944112196564674, 0.24928514659404755, 0.3314722180366516, 0.036283038556575775, 0.01824975199997425, 0.03247179090976715, 0.02741291932761669, 0.0011664694175124168, 0.03365480154752731, 0.10097742080688477, 0.021067792549729347, 0.42791858315467834, 0.11242418736219406, 0.11434369534254074, 0.000791618600487709, 0.02291581965982914, 0.07201644033193588, 0.02081850729882717, 0.39859694242477417, 0.2763477563858032, 0.13874487578868866, 0.003258609212934971]], [[0.2643359303474426, 0.2943609654903412, 0.10517127066850662, 0.013473477214574814, 0.17808614671230316, 0.05031028389930725, 0.0477585569024086, 0.13444076478481293, 0.0626431554555893, 0.05089121311903, 0.025438696146011353, 0.12666909396648407, 0.015911895781755447, 0.08822031319141388, 0.09637932479381561, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.02893858775496483, 0.3286381959915161, 0.024464154615998268, 0.015645690262317657, 0.07065004110336304, 0.03320073336362839, 0.0035833900328725576, 0.002133443485945463, 0.0077736834064126015, 0.0014096481027081609, 0.006704544182866812, 0.0034484381321817636, 0.010553284548223019, 0.029550330713391304, 0.0064092278480529785, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0403970405459404, 0.029290249571204185, 0.2564694881439209, 0.03103366494178772, 0.01930038072168827, 0.0007984130643308163, 0.0024861868005245924, 0.013074777089059353, 0.025626862421631813, 0.0022637112997472286, 0.010511897504329681, 0.03038576804101467, 0.00803295336663723, 0.000980974524281919, 0.040744345635175705, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.23322375118732452, 0.23003342747688293, 0.24563531577587128, 0.07496963441371918, 0.029645830392837524, 0.0015733843902125955, 0.048427432775497437, 0.07474764436483383, 0.005064227152615786, 0.006064139772206545, 0.00639030896127224, 0.0023683567997068167, 0.0201968252658844, 0.0057837339118123055, 0.030518243089318275, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.009382463060319424, 0.004108777269721031, 0.355550616979599, 0.0026344929356127977, 0.036474164575338364, 0.0013674235669896007, 0.010420771315693855, 0.008167937397956848, 0.005904712714254856, 0.0164882093667984, 0.0014915319625288248, 0.00666471105068922, 0.007061991840600967, 0.006146776955574751, 0.03842667490243912, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.340854674577713, 0.027831802144646645, 0.11495380103588104, 0.4507772624492645, 0.33573275804519653, 0.07158998399972916, 0.3054116368293762, 0.09558256715536118, 0.008191889151930809, 0.08007357269525528, 0.08199689537286758, 0.011630101129412651, 0.016172919422388077, 0.020448284223675728, 0.05253906920552254, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0825798362493515, 0.09406770020723343, 0.044158000499010086, 0.06245531886816025, 0.15669509768486023, 0.1018981784582138, 0.17849969863891602, 0.1823071539402008, 0.1725231111049652, 0.14688736200332642, 0.027769910171628, 0.1729786992073059, 0.04907526820898056, 0.09640378504991531, 0.07928813993930817, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.04138464853167534, 0.0045098732225596905, 0.098704032599926, 0.034942083060741425, 0.1842936873435974, 0.1567782759666443, 0.14141200482845306, 0.1953822374343872, 0.09936889261007309, 0.281032919883728, 0.13522183895111084, 0.012650868855416775, 0.02501768246293068, 0.2133605033159256, 0.14542686939239502, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.05831298604607582, 0.07845572382211685, 0.00935202743858099, 0.09348727762699127, 0.2554629147052765, 0.026818757876753807, 0.15820558369159698, 0.09712891280651093, 0.18406683206558228, 0.297629177570343, 0.011888068169355392, 0.04674078896641731, 0.01729435659945011, 0.04945852607488632, 0.08047669380903244, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.030211733654141426, 0.004252443555742502, 0.044400423765182495, 0.0032993308268487453, 0.029341043904423714, 0.14371474087238312, 0.17894455790519714, 0.12369092553853989, 0.48359414935112, 0.06321088969707489, 0.05475561320781708, 0.3139732778072357, 0.086760014295578, 0.13208359479904175, 0.2905256450176239, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.06285266578197479, 0.0062216646037995815, 0.016913438215851784, 0.007285475265234709, 0.01629750058054924, 0.004617355298250914, 0.06147269159555435, 0.21831700205802917, 0.11657348275184631, 0.39258062839508057, 0.17390909790992737, 0.3519352376461029, 0.014494672417640686, 0.04437657818198204, 0.04845427721738815, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.014810703694820404, 0.027867808938026428, 0.00787208043038845, 0.003661711234599352, 0.06816401332616806, 0.014048570767045021, 0.04280591011047363, 0.04519394412636757, 0.07874996215105057, 0.2074531614780426, 0.12078044563531876, 0.53052818775177, 0.035032909363508224, 0.1398327797651291, 0.02986292913556099, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.011430865153670311, 0.002694258699193597, 0.03896895423531532, 0.04504057392477989, 0.00808126013725996, 0.01048098411411047, 0.012571780942380428, 0.0054772221483290195, 0.07419075071811676, 0.02193005569279194, 0.3994891941547394, 0.15694338083267212, 0.3065741956233978, 0.022703034803271294, 0.07852455973625183, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0007813395350240171, 4.470362910069525e-06, 0.0010683261789381504, 0.022204171866178513, 0.0022952572908252478, 4.198186070425436e-05, 0.0009061718010343611, 0.0006557627930305898, 0.0009219115017913282, 0.0006920882733538747, 0.005404994357377291, 0.012070748023688793, 0.21383939683437347, 0.0026518681552261114, 0.0011399114737287164, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.03732156753540039, 0.14082211256027222, 0.08218222856521606, 0.02148711122572422, 0.037640467286109924, 0.011636778712272644, 0.01611051708459854, 0.06724098324775696, 0.20042963325977325, 0.035641491413116455, 0.045655738562345505, 0.041121501475572586, 0.23917138576507568, 0.01630677469074726, 0.2854580283164978, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.004390498157590628, 0.00876205787062645, 0.016465701162815094, 0.005714573431760073, 0.036494653671979904, 0.0032131776679307222, 0.01477664802223444, 0.018077310174703598, 0.010320773348212242, 0.006645719520747662, 0.03231831267476082, 0.004141036421060562, 0.011432528495788574, 0.011813640594482422, 0.20326180756092072, 0.125, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.024762088432908058, 0.05259820073843002, 0.06384432315826416, 0.1483391523361206, 0.26820069551467896, 0.20398226380348206, 0.37573596835136414, 0.08007726073265076, 0.052950888872146606, 0.09653404355049133, 0.1610451638698578, 0.12953783571720123, 0.2330068051815033, 0.4463363587856293, 0.19394421577453613, 0.026641450822353363, 0.17128966748714447, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.679330587387085, 0.043791741132736206, 0.12768849730491638, 0.27546241879463196, 0.03847555071115494, 0.08167082816362381, 0.21957245469093323, 0.04802798852324486, 0.10780715942382812, 0.6106712222099304, 0.2505488693714142, 0.1709391176700592, 0.04529926925897598, 0.17936259508132935, 0.13903558254241943, 0.5577486157417297, 0.24638143181800842, 0.025497647002339363, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.05959116667509079, 0.03547457605600357, 0.03805014118552208, 0.02909783646464348, 0.08531224727630615, 0.035567909479141235, 0.017052877694368362, 0.03032829985022545, 0.012725351378321648, 0.06508343666791916, 0.04963213950395584, 0.013415418565273285, 0.026129938662052155, 0.011819864623248577, 0.21026377379894257, 0.1241803988814354, 0.06599891930818558, 0.13004763424396515, 0.33318501710891724, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.0922531858086586, 0.009465531446039677, 0.05285167694091797, 0.11621613800525665, 0.008946871384978294, 0.0003396931570023298, 0.056973982602357864, 0.011571673676371574, 0.03833528608083725, 0.02977353148162365, 0.12428728491067886, 0.005304301157593727, 0.012764646671712399, 0.03717968612909317, 0.1998610943555832, 0.9552784562110901, 0.6656578779220581, 0.04364815354347229, 0.097982257604599, 0.0012550450628623366, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.024207258597016335, 0.015275360085070133, 0.12442810088396072, 0.044900182634592056, 0.06243159621953964, 0.002727220067754388, 0.05297050252556801, 0.34427115321159363, 0.10989916324615479, 0.020859790965914726, 0.11048608273267746, 0.02605186030268669, 0.1171213760972023, 0.05136575922369957, 0.16462838649749756, 0.6779462695121765, 0.5809971690177917, 0.2087380737066269, 0.15752893686294556, 0.08772724121809006, 0.09023962169885635, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.03260662034153938, 0.00298042013309896, 0.16533112525939941, 0.056620776653289795, 0.049906134605407715, 0.008958332240581512, 0.05700542405247688, 0.016634995117783546, 0.029206881299614906, 0.025224529206752777, 0.19688823819160461, 0.03853357210755348, 0.07708126306533813, 0.04636078327894211, 0.17741571366786957, 0.6994673609733582, 0.48720496892929077, 0.08263873308897018, 0.3298986256122589, 0.0049313209019601345, 0.07016509026288986, 0.5443912744522095, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.04517968371510506, 0.08089613169431686, 0.11787059158086777, 0.09224344044923782, 0.27191361784935, 0.020393863320350647, 0.01454318780452013, 0.009129227139055729, 0.020442765206098557, 0.08070629835128784, 0.07541637122631073, 0.10045406222343445, 0.04119513928890228, 0.10953037440776825, 0.15667563676834106, 0.3437848389148712, 0.28689879179000854, 0.5712999105453491, 0.5371078252792358, 0.06584293395280838, 0.2492358684539795, 0.014812931418418884, 0.02226697839796543, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.08136362582445145, 0.07834970951080322, 0.015254710800945759, 0.0832342654466629, 0.10864067077636719, 0.11524737626314163, 0.1366880238056183, 0.012557982467114925, 0.1251911222934723, 0.15952906012535095, 0.026927798986434937, 0.07786250859498978, 0.11803606152534485, 0.2014097422361374, 0.2085045427083969, 0.44942334294319153, 0.3777551054954529, 0.7612449526786804, 0.7021526098251343, 0.30080679059028625, 0.4424319267272949, 0.22922295331954956, 0.04627525433897972, 0.055941756814718246, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.07754338532686234, 0.11610410362482071, 0.032187070697546005, 0.05519983917474747, 0.0022462301421910524, 0.11507689952850342, 0.2733137607574463, 0.17666463553905487, 0.010644900612533092, 0.08315187692642212, 0.02269633859395981, 0.06840697675943375, 0.010724963620305061, 0.0371541827917099, 0.21114735305309296, 0.47138965129852295, 0.18856076896190643, 0.6503154039382935, 0.9041082859039307, 0.2803841233253479, 0.4006999135017395, 0.5757170915603638, 0.295682817697525, 0.04142303764820099, 0.006079117301851511, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.022315502166748047, 0.012378118932247162, 0.0062178960070014, 0.0078407758846879, 0.015144318342208862, 0.010697844438254833, 0.011326298117637634, 0.013119788840413094, 0.009139686822891235, 0.006104558240622282, 0.005014281254261732, 0.002417754614725709, 0.007784656248986721, 0.009948876686394215, 0.16676713526248932, 0.24097655713558197, 0.15950126945972443, 0.6649572849273682, 0.6751598119735718, 0.46790093183517456, 0.6438081860542297, 0.3765251934528351, 0.2975021302700043, 0.10267924517393112, 0.060453154146671295, 0.03869982063770294, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.2628116309642792, 0.1443735957145691, 0.08422664552927017, 0.11404431611299515, 0.17927099764347076, 0.25378888845443726, 0.1460212618112564, 0.04387032985687256, 0.023589681833982468, 0.13644081354141235, 0.045464351773262024, 0.06847606599330902, 0.006222521886229515, 0.036451175808906555, 0.20291540026664734, 0.39086097478866577, 0.6666929125785828, 0.5642580389976501, 0.557075023651123, 0.25761184096336365, 0.3620971143245697, 0.656988263130188, 0.301082581281662, 0.3758563995361328, 0.026163028553128242, 0.024990877136588097, 0.0074356794357299805, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.22663825750350952, 0.15363532304763794, 0.01756531558930874, 0.025186356157064438, 0.038983430713415146, 0.01259024627506733, 0.15960636734962463, 0.10260611027479172, 0.059462085366249084, 0.02338782697916031, 0.039677273482084274, 0.055942799896001816, 0.010165784507989883, 0.013570738956332207, 0.1720115691423416, 0.7909376621246338, 0.3817039430141449, 0.6133569478988647, 0.41290101408958435, 0.30558884143829346, 0.6049348711967468, 0.5688384175300598, 0.4680134057998657, 0.6550416946411133, 0.42371857166290283, 0.10508850961923599, 0.021316751837730408, 0.05294431000947952, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.04994741827249527, 0.08986728638410568, 0.03736276924610138, 0.029899757355451584, 0.03542618826031685, 0.007244490087032318, 0.040187276899814606, 0.040814109146595, 0.04076588898897171, 0.05965813249349594, 0.045340292155742645, 0.0002602309104986489, 0.026138437911868095, 0.02984587848186493, 0.21049101650714874, 0.17973686754703522, 0.17233335971832275, 0.334688276052475, 0.4481850564479828, 0.04172942414879799, 0.10337609797716141, 0.5107487440109253, 0.7207926511764526, 0.1405051052570343, 0.0654703825712204, 0.41273486614227295, 0.17914383113384247, 0.042542651295661926, 0.010745447129011154, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.058702513575553894, 0.04533839225769043, 0.03167680650949478, 0.07689032703638077, 0.07722999900579453, 0.05968516319990158, 0.08647314459085464, 0.04232413321733475, 0.05769982933998108, 0.08562258630990982, 0.07418374717235565, 0.08922348916530609, 0.0013435373548418283, 0.0365031398832798, 0.1955317258834839, 0.5207539200782776, 0.308788537979126, 0.08189663290977478, 0.5850351452827454, 0.3457651734352112, 0.15844188630580902, 0.2948668897151947, 0.4065589904785156, 0.12084604799747467, 0.29343682527542114, 0.49164822697639465, 0.07233413308858871, 0.0535273477435112, 0.014947501011192799, 0.008541097864508629, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.035160183906555176, 0.01820351555943489, 0.1303882896900177, 0.019772829487919807, 0.040328264236450195, 0.05493366718292236, 0.03643186390399933, 0.013673724606633186, 0.020261095836758614, 0.09265058487653732, 0.06087178364396095, 0.005874141119420528, 0.0010416797595098615, 0.00679743243381381, 0.17795756459236145, 0.2949400544166565, 0.03748409450054169, 0.14473117887973785, 0.0705113336443901, 0.013025683350861073, 0.005298166535794735, 0.21091029047966003, 0.014800299890339375, 0.2805088758468628, 0.000897476973477751, 0.0938984826207161, 0.004705057479441166, 0.04936474934220314, 0.011992034502327442, 0.18721424043178558, 0.00230285432189703, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.0850016176700592, 0.12483492493629456, 0.30438917875289917, 0.08283902704715729, 0.36141735315322876, 0.5806636810302734, 0.21757252514362335, 0.0776025652885437, 0.2093839943408966, 0.1517311930656433, 0.0691467672586441, 0.05431315675377846, 0.323522686958313, 0.21248842775821686, 0.11186490952968597, 0.44276589155197144, 0.06478449702262878, 0.543609619140625, 0.8444110155105591, 0.13468694686889648, 0.4405028522014618, 0.6528593897819519, 0.5737791061401367, 0.6313535571098328, 0.8501816987991333, 0.4486657381057739, 0.06076665595173836, 0.7409859299659729, 0.15147589147090912, 0.20801351964473724, 0.027446726337075233, 0.036936238408088684, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.017619943246245384, 0.008017263375222683, 0.019503258168697357, 0.014857600443065166, 0.07692210376262665, 0.015309707261621952, 0.015313221141695976, 0.008549719117581844, 0.03095930442214012, 0.019377540796995163, 0.031960610300302505, 0.0054225618951022625, 0.016712497919797897, 0.015215321443974972, 0.15961019694805145, 0.5445577502250671, 0.2876933515071869, 0.7013069987297058, 0.627236008644104, 0.37061285972595215, 0.6206991076469421, 0.38252583146095276, 0.4230470061302185, 0.31842562556266785, 0.28603002429008484, 0.015331648290157318, 0.14692452549934387, 0.8622261881828308, 0.049388445913791656, 0.37183380126953125, 0.17907747626304626, 0.05781394988298416, 0.020684318616986275, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.2695287764072418, 0.16650046408176422, 0.14075446128845215, 0.1364857405424118, 0.23432065546512604, 0.261515349149704, 0.18958930671215057, 0.053015366196632385, 0.031337250024080276, 0.28422990441322327, 0.08986067771911621, 0.06408891826868057, 0.008591849356889725, 0.031372129917144775, 0.19151051342487335, 0.4656296670436859, 0.6725881099700928, 0.6199259161949158, 0.6479836702346802, 0.24076998233795166, 0.34658652544021606, 0.5947279930114746, 0.37259459495544434, 0.5521662831306458, 0.14718003571033478, 0.19626900553703308, 0.024240192025899887, 0.27736979722976685, 0.05565635487437248, 0.3618892729282379, 0.44332295656204224, 0.027751203626394272, 0.0260067880153656, 0.010717106983065605, NaN, NaN, NaN, NaN, NaN, NaN], [0.2586316764354706, 0.21131351590156555, 0.019284198060631752, 0.02717362530529499, 0.037918541580438614, 0.014535612426698208, 0.14439015090465546, 0.14164134860038757, 0.06384728103876114, 0.03232301026582718, 0.05240772292017937, 0.08253412693738937, 0.007928711362183094, 0.011026060208678246, 0.1583670824766159, 0.830940842628479, 0.42077580094337463, 0.7156820893287659, 0.57599937915802, 0.5493759512901306, 0.7128159999847412, 0.5476810932159424, 0.527928352355957, 0.8053308725357056, 0.8646240234375, 0.542984127998352, 0.2950981855392456, 0.3170693516731262, 0.5610483884811401, 0.26465174555778503, 0.45835256576538086, 0.22733505070209503, 0.10187508910894394, 0.03538959100842476, 0.07069608569145203, NaN, NaN, NaN, NaN, NaN], [0.0646420493721962, 0.15151722729206085, 0.04734531044960022, 0.03642117232084274, 0.03833956643939018, 0.007805521599948406, 0.03985777497291565, 0.05410199984908104, 0.07749858498573303, 0.1281091719865799, 0.06692291796207428, 0.0004382343322504312, 0.02769407443702221, 0.03219819441437721, 0.20084568858146667, 0.09599269181489944, 0.08247342705726624, 0.25253206491470337, 0.4357891380786896, 0.039192523807287216, 0.0719948410987854, 0.3563676178455353, 0.5300538539886475, 0.06311739236116409, 0.037909455597400665, 0.5032193064689636, 0.39894816279411316, 0.3283153772354126, 0.21619060635566711, 0.017918655648827553, 0.2577371895313263, 0.14531975984573364, 0.346793532371521, 0.2014700472354889, 0.0539211668074131, 0.0146569162607193, NaN, NaN, NaN, NaN], [0.06935474276542664, 0.07278740406036377, 0.0317843034863472, 0.061563972383737564, 0.057788632810115814, 0.05731336027383804, 0.08327846229076385, 0.046548519283533096, 0.06359860301017761, 0.13075897097587585, 0.09122113883495331, 0.1188196912407875, 0.0009191188146360219, 0.03464866429567337, 0.18994329869747162, 0.6422337889671326, 0.3740711212158203, 0.10689651221036911, 0.6858291029930115, 0.4494076073169708, 0.2826421856880188, 0.3886936604976654, 0.475405216217041, 0.13226336240768433, 0.3073323965072632, 0.7139697670936584, 0.17356495559215546, 0.25040003657341003, 0.23144030570983887, 0.024455448612570763, 0.4280460476875305, 0.048713963478803635, 0.3974619209766388, 0.06130422651767731, 0.05969162657856941, 0.015271119773387909, 0.00685582309961319, NaN, NaN, NaN], [0.04588386043906212, 0.027941085398197174, 0.16196617484092712, 0.023955674842000008, 0.04093120992183685, 0.06800121814012527, 0.031365618109703064, 0.013349683955311775, 0.016157155856490135, 0.09367228299379349, 0.06382262706756592, 0.009268027730286121, 0.0006308736628852785, 0.005314440466463566, 0.17240527272224426, 0.5218734741210938, 0.03395698964595795, 0.2861349880695343, 0.13773199915885925, 0.02211177349090576, 0.014614011161029339, 0.43378758430480957, 0.02492188662290573, 0.26067787408828735, 0.0009113854030147195, 0.1411941796541214, 0.009023642167448997, 0.14982649683952332, 0.15959703922271729, 0.7153633832931519, 0.014257365837693214, 0.06102409213781357, 0.12158294767141342, 0.006897313520312309, 0.06130388379096985, 0.012951835058629513, 0.16874605417251587, 0.002189028775319457, NaN, NaN], [0.09685268998146057, 0.17937548458576202, 0.31954076886177063, 0.09235721081495285, 0.3550800085067749, 0.5939842462539673, 0.19687135517597198, 0.10603781044483185, 0.27224627137184143, 0.17071248590946198, 0.0712975338101387, 0.10525800287723541, 0.3080449402332306, 0.250378280878067, 0.11120767891407013, 0.45293620228767395, 0.05202305316925049, 0.4803192913532257, 0.8224762082099915, 0.10338833183050156, 0.2861584722995758, 0.8321961760520935, 0.7622299790382385, 0.5323314070701599, 0.8633370995521545, 0.5219312310218811, 0.07432084530591965, 0.7646023631095886, 0.4150907099246979, 0.4998815357685089, 0.606073796749115, 0.2854492664337158, 0.6639280319213867, 0.09482558071613312, 0.806840717792511, 0.19665148854255676, 0.18194931745529175, 0.01953776553273201, 0.037144362926483154, NaN], [0.012543261051177979, 0.010277148336172104, 0.014658409170806408, 0.007294217124581337, 0.028056686744093895, 0.009602113626897335, 0.004711315967142582, 0.003909323364496231, 0.019910220056772232, 0.0035717461723834276, 0.016398703679442406, 0.01044577918946743, 0.015165981836616993, 0.04322582483291626, 0.1563079059123993, 0.8357685804367065, 0.6023411154747009, 0.16389556229114532, 0.4697819948196411, 0.05014880374073982, 0.3185025751590729, 0.2618474066257477, 0.7044641375541687, 0.16675803065299988, 0.7323283553123474, 0.14429442584514618, 0.2621355652809143, 0.041847843676805496, 0.3185603618621826, 0.04513467848300934, 0.49906620383262634, 0.611339807510376, 0.21515053510665894, 0.3302164673805237, 0.04920952767133713, 0.2760073244571686, 0.0218669306486845, 0.25043201446533203, 0.13627314567565918, 0.01334126852452755]]], [[[0.00028402332100085914, 1.9304454923485537e-08, 1.5483598847509938e-09, 7.885660006923256e-12, 2.7246130684943637e-08, 2.9440096113830805e-05, 4.3406546978985716e-07, 3.7434634236888087e-07, 3.9264233464564313e-07, 1.911867819615054e-08, 6.894639170695882e-08, 1.9322192201798316e-06, 1.594805780769093e-06, 1.097217136702966e-06, 0.25163131952285767, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.8221166729927063, 0.0031213052570819855, 7.842657214496285e-05, 5.977510153520882e-10, 6.043178735204435e-10, 7.336016096815001e-07, 0.0001510237343609333, 0.000765863514970988, 0.0003504687047097832, 5.704807790607447e-07, 3.8402351520971933e-08, 3.7901799032624695e-07, 1.534954208182171e-05, 4.934078606311232e-05, 0.00023439944197889417, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0023944040294736624, 0.796754002571106, 0.004422985017299652, 9.068900226338883e-07, 5.795331436964091e-10, 1.0343059742012883e-08, 4.4964113499190717e-07, 0.0014743957435712218, 0.00028717826353386045, 7.994436600711197e-05, 3.3569827451174206e-07, 1.215876466176269e-07, 7.940250839055807e-07, 4.835407253267476e-06, 2.585098854979151e-07, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [4.3931080995207594e-11, 0.0005229745293036103, 0.5791732668876648, 0.0002632129180710763, 3.316774765949049e-08, 1.7754019825469425e-12, 1.4596207272357664e-14, 1.5350217763554497e-09, 1.2882580335826788e-07, 7.457471838279162e-06, 1.2410231420290074e-06, 2.736720361440348e-08, 3.621486097116211e-11, 3.919724787804224e-12, 2.306477925317907e-12, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [3.994035801418473e-14, 1.3595737036187217e-10, 5.270875135465758e-06, 0.5513067841529846, 0.00020578903786372393, 1.9226330039145978e-07, 1.181193272532799e-12, 2.80986930771554e-13, 9.120337812881449e-14, 1.37843805814164e-10, 7.154308718781976e-07, 1.5133276747292257e-06, 7.425698944629744e-10, 2.2010659354171347e-13, 1.8997327582565005e-12, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [2.3444651168352815e-12, 2.1774425253313912e-13, 1.857566878094019e-09, 0.00030468025943264365, 0.9472002983093262, 0.00010681805724743754, 2.00606624645161e-08, 5.2167251502746245e-14, 1.354494091723496e-15, 5.737065011425513e-13, 8.729777456473187e-10, 3.2425006793346256e-05, 7.676636641917867e-07, 1.870739785303499e-09, 2.3914221713994266e-09, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [3.644098217625569e-11, 3.867062572937563e-11, 4.1057553190615437e-11, 1.5412249254609378e-09, 0.018834512680768967, 0.505605936050415, 0.0010763276368379593, 5.434728933551014e-08, 2.6194791127864825e-11, 6.074670846504876e-15, 3.814499497517554e-12, 1.2291486939375318e-07, 9.572526323609054e-06, 4.437842653715052e-05, 7.18067713023629e-06, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [5.002242687623948e-05, 2.445471238843311e-07, 7.217475506138271e-09, 2.943958878759423e-12, 1.391844648424012e-07, 0.0035048718564212322, 0.755942702293396, 0.0011242764303460717, 1.4866960555082187e-05, 9.753278740198823e-11, 3.792431321238132e-13, 1.6398679289486573e-11, 1.3850768709744443e-07, 0.0002873632765840739, 2.565975592005998e-05, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [7.748224284398475e-09, 3.667011867491965e-07, 1.7906526261768363e-09, 1.001209222569038e-16, 4.707358499311462e-15, 2.921879960204876e-10, 4.77575849799905e-06, 0.9355171918869019, 1.7088919776142575e-05, 1.5246609308405823e-08, 1.546373502880373e-14, 1.9256968477537417e-16, 2.8356877952137637e-15, 6.199032398512827e-10, 3.679770266273863e-09, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [6.04271771509346e-11, 2.349539499846287e-06, 6.254656170767703e-08, 2.0915530592191534e-12, 3.303753013789688e-16, 1.0466700578893717e-14, 7.288482968201282e-13, 0.0006303040427155793, 0.47335511445999146, 8.928982424549758e-05, 1.5872458902776998e-08, 1.3611594998645584e-14, 1.3777586457132233e-16, 1.589055302510104e-15, 8.100658338561217e-11, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [3.812023474658588e-10, 1.421315573679749e-06, 2.2867025109007955e-06, 2.6682736020688935e-08, 3.632111755455525e-12, 1.6831340872913367e-14, 3.240909670081289e-14, 1.4920277635610546e-07, 0.0005182845052331686, 0.39297640323638916, 0.0007259719423018396, 1.2580667174688642e-08, 3.7229049595736974e-13, 2.157145159519631e-15, 1.0612778433838344e-09, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [6.84109713322556e-10, 1.9775532322796607e-08, 5.041609938416514e-07, 0.00017906920402310789, 1.631619738873269e-06, 2.0158734681530177e-09, 9.65507530290054e-15, 4.2181228128435055e-12, 8.564649545128589e-10, 0.00023218656133394688, 0.6439363956451416, 0.000818322179839015, 1.3831699163802114e-07, 2.1358659198916774e-12, 5.4572883101400294e-08, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [1.4084274191361601e-08, 2.1930364191291574e-09, 7.004614666072939e-09, 2.0828078959311824e-06, 6.64705439703539e-05, 3.6118690331932157e-06, 4.0857584676645686e-11, 1.0090924406833124e-12, 5.430448080009356e-15, 6.815135122906213e-09, 0.0007384128402918577, 0.9033229351043701, 0.0037223652470856905, 5.428325380307797e-07, 5.097080588711833e-07, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [3.370899046006848e-11, 1.5044922772877722e-12, 1.903236411786996e-13, 5.2399131041103164e-12, 5.3600892613303586e-09, 3.287689196440624e-07, 1.293990137263279e-09, 3.2395277866498207e-13, 8.98320316581696e-19, 7.591717251043266e-18, 2.4333673097343134e-12, 7.08575316821225e-05, 0.3025490641593933, 0.00011370918218744919, 1.7842703314840946e-08, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0009491983219049871, 3.734114216058515e-05, 0.00010643315181368962, 4.299266220186837e-05, 0.0019948105327785015, 0.012520392425358295, 0.0005770812276750803, 0.00013455892622005194, 0.0002518744731787592, 0.0005399127840064466, 0.0017743584467098117, 0.004756112117320299, 0.00398082984611392, 0.002925803419202566, 0.1746407300233841, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.017177388072013855, 0.0003127168456558138, 0.004294774029403925, 0.0025685238651931286, 0.0020048224832862616, 0.0018501998856663704, 0.004262528382241726, 0.00010045748058473691, 0.004143967293202877, 0.0026836262550204992, 0.0008790316642262042, 0.0012905423063784838, 8.68891947902739e-05, 0.00021419797849375755, 0.16245633363723755, 0.125, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.12795236706733704, 0.00371668953448534, 0.02831968478858471, 0.025539351627230644, 0.0009935664711520076, 0.0005314573645591736, 0.0308157317340374, 4.653090945794247e-05, 0.004544692113995552, 0.02307700179517269, 0.014357739128172398, 0.0017676070565357804, 1.5830510164960288e-05, 0.0005655316635966301, 0.23366259038448334, 0.13569742441177368, 0.0376364141702652, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.0012442924780771136, 0.6349257826805115, 1.560185046400875e-05, 0.0005892697954550385, 2.671209358595661e-06, 1.747990245348774e-05, 0.00010909549746429548, 9.000968930195086e-06, 1.720580803521443e-05, 0.0008049540338106453, 0.00025925427326001227, 4.468534825718962e-06, 5.9764097386505455e-06, 7.895294402260333e-05, 0.00020540088007692248, 0.05053132027387619, 0.5417848825454712, 0.07814626395702362, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.014811321161687374, 0.6550174951553345, 5.4754978918936104e-05, 0.0013682727003470063, 7.1730828494764864e-06, 3.513193587423302e-05, 0.00030579010490328074, 4.0161107790481765e-06, 8.621193410363048e-05, 0.0020331761334091425, 0.00018049145000986755, 1.5370842447737232e-05, 2.3058303213474574e-06, 3.803792060352862e-05, 0.0004018820764031261, 0.03762863576412201, 0.4749486744403839, 0.013701170682907104, 0.053301598876714706, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.0038746336940675974, 0.000324725842801854, 0.0051879663951694965, 0.009153621271252632, 0.0008864403935149312, 0.6781038641929626, 0.057408660650253296, 0.0010902854846790433, 0.00043091498082503676, 0.000930881651584059, 0.00047575533972121775, 0.0024355631321668625, 0.0005705857765860856, 0.0003382607828825712, 0.0010924984235316515, 0.10598134994506836, 0.16776065528392792, 0.11929589509963989, 0.16846179962158203, 0.40715572237968445, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [3.359095899213571e-06, 1.5333833403019526e-07, 3.112653939751908e-05, 0.00013510043208952993, 6.284327810135437e-06, 0.7821753025054932, 0.0016732696676626801, 2.949555346276611e-05, 1.1825303545265342e-06, 2.2443591660703532e-06, 4.938602842230466e-07, 8.253279020209447e-07, 2.1931487026449759e-07, 9.422030302630446e-07, 3.409375494811684e-06, 0.05147748813033104, 0.203742116689682, 0.11462464928627014, 0.46246808767318726, 0.01836300455033779, 0.02458924613893032, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.00014056767395231873, 5.100669682178705e-07, 0.0031089531257748604, 0.006296438630670309, 0.00044245802564546466, 0.5631491541862488, 0.006006886251270771, 0.00015836386592127383, 1.0129460861207917e-05, 9.741926623973995e-05, 8.02019567345269e-05, 2.8800504878745414e-05, 2.2740101485396735e-05, 9.966635116143152e-05, 5.9340749430703e-05, 0.17594558000564575, 0.17753779888153076, 0.024665912613272667, 0.19817322492599487, 0.008797828108072281, 0.022263213992118835, 0.29173722863197327, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.07201159745454788, 9.12444302230142e-05, 0.07167930901050568, 0.07350550591945648, 0.008381813764572144, 0.32997292280197144, 0.32325229048728943, 0.006826527416706085, 0.005964158568531275, 0.01031426526606083, 0.0041834041476249695, 0.0003298712254036218, 2.8659975214395672e-05, 0.00019656911899801344, 0.02016262151300907, 0.016114797443151474, 0.0061007170006632805, 0.028504224494099617, 0.017245782539248466, 0.08753485232591629, 0.11264273524284363, 0.6154332160949707, 0.029144972562789917, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.0011574724921956658, 3.413460092360765e-07, 0.00010100962390424684, 0.0058910842053592205, 3.088227913394803e-06, 0.01394782867282629, 0.16852441430091858, 0.6476468443870544, 4.158269439358264e-05, 0.002217742381617427, 3.1430703529622406e-05, 8.318846812471747e-05, 7.552150123046886e-07, 2.136993316526059e-06, 0.00013183141709305346, 0.027042992413043976, 0.032212790101766586, 0.019619816914200783, 0.014702342450618744, 0.06721275299787521, 0.2560867667198181, 0.5545244216918945, 0.40561506152153015, 0.037922732532024384, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.056869976222515106, 0.00018767332949209958, 0.07251239567995071, 0.21200358867645264, 0.5404223799705505, 0.01658189669251442, 0.03565289452672005, 0.0015120785683393478, 0.002293382305651903, 0.005935561377555132, 0.012055100873112679, 0.005193157121539116, 0.003556813346222043, 0.007320231292396784, 0.018532630056142807, 0.1654873937368393, 0.013622531667351723, 0.0656571239233017, 0.09179358184337616, 0.03440919890999794, 0.08533406257629395, 0.16269220411777496, 0.1151970624923706, 0.09265416115522385, 0.028269361704587936, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.37012216448783875, 0.0030506134498864412, 0.585090160369873, 0.3774729073047638, 0.6362679600715637, 0.12865976989269257, 0.340728759765625, 0.01963443122804165, 0.11373940855264664, 0.0405576266348362, 0.04042620584368706, 0.006893007550388575, 0.0011100739939138293, 0.004035779275000095, 0.12706774473190308, 0.2598540484905243, 0.010173649527132511, 0.004170349799096584, 0.003479698905721307, 0.0014636714477092028, 0.0011101020500063896, 0.001677120802924037, 0.034040722995996475, 0.0041177538223564625, 0.024958845227956772, 0.016315795481204987, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.01695789396762848, 0.00023016006161924452, 0.013878279365599155, 0.04998883232474327, 0.0032932739704847336, 8.226843783631921e-05, 0.014781651087105274, 0.00017401285003870726, 0.4112556278705597, 0.007095593959093094, 0.01393651869148016, 0.000858593441080302, 0.0009966455399990082, 0.006141065154224634, 0.004614917561411858, 0.17492477595806122, 0.010013026185333729, 0.005800239276140928, 0.0069971769116818905, 0.0036480696871876717, 0.001016399241052568, 0.0060493675991892815, 0.0034581662621349096, 0.00659980857744813, 0.0047594537027180195, 0.3941299021244049, 0.2407994568347931, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.023780474439263344, 4.510316648520529e-05, 0.013797261752188206, 0.087004654109478, 0.0004407854867167771, 0.0013536562910303473, 0.04187630116939545, 0.0028901200275868177, 0.06213926523923874, 0.3483656048774719, 0.03705320879817009, 0.005524389911442995, 0.0004139445663895458, 0.0025706440210342407, 0.012163926847279072, 0.06559828668832779, 0.005602334160357714, 0.0005807551206089556, 0.0005322807701304555, 0.004617360420525074, 0.00354054500348866, 0.005599506665021181, 0.011434626765549183, 0.006905066315084696, 0.009602343663573265, 0.11027393490076065, 0.36931946873664856, 0.06368503719568253, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.017730457708239555, 8.937691018218175e-05, 0.00767871318385005, 0.02321789041161537, 0.00010702417785068974, 0.004407694097608328, 0.0538853257894516, 0.011079255491495132, 0.003184565110132098, 0.026336153969168663, 0.005110009107738733, 0.3480301797389984, 0.002053677337244153, 0.01653059385716915, 0.00945478305220604, 0.015983520075678825, 0.012168757617473602, 0.0015684146201238036, 0.0005484889261424541, 0.00233695306815207, 0.0038106110878288746, 0.005947766825556755, 0.04194773733615875, 0.014443459920585155, 0.06465759128332138, 0.14989611506462097, 0.5095774531364441, 0.1882752925157547, 0.02387852594256401, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.00016590843733865768, 4.410037217894569e-05, 0.0031412369571626186, 0.0015988551313057542, 0.002399750053882599, 0.0004506838449742645, 0.001152031123638153, 0.00021803524577990174, 0.00054850586457178, 0.0001300607982557267, 0.001143390079960227, 0.0023531741462647915, 0.6484718322753906, 0.061944324523210526, 1.8855764210456982e-05, 0.11159919947385788, 0.06036144495010376, 0.06681493669748306, 0.0798669382929802, 0.03668922558426857, 0.018710536882281303, 0.029976846650242805, 0.0675768032670021, 0.03372039645910263, 0.057603828608989716, 0.14515243470668793, 0.25060775876045227, 0.23181115090847015, 0.14262832701206207, 0.33286023139953613, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [5.492825607689156e-07, 1.991102926979238e-08, 2.3713612335996004e-06, 1.7095164366764948e-05, 8.657893886265811e-07, 3.6805211323098774e-08, 1.598790731804911e-06, 2.0731313554733788e-07, 4.274500042811269e-07, 5.490248440764844e-06, 0.00014167907647788525, 5.53526615476585e-06, 0.5851997137069702, 0.22563536465168, 1.0684430407081891e-07, 0.018035059794783592, 0.02341379225254059, 0.0019442361081019044, 0.004369894042611122, 0.00136191223282367, 0.00017434914479963481, 0.0011034610215574503, 0.06787250190973282, 0.060198791325092316, 0.12004764378070831, 0.11878902465105057, 0.2063554972410202, 0.28332868218421936, 0.35319504141807556, 0.008158767595887184, 0.26057863235473633, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.01633528247475624, 0.0006067559006623924, 0.047781698405742645, 0.1674666851758957, 0.0008243213524110615, 0.0007217283127829432, 0.005900595337152481, 0.0001012250068015419, 0.006910703144967556, 0.1343279927968979, 0.5695670247077942, 0.0034049933310598135, 0.008110514841973782, 0.0796104148030281, 0.00713667506352067, 0.17278411984443665, 0.007028562016785145, 0.010641193017363548, 0.013809186406433582, 0.0005732428980991244, 0.001056239241734147, 0.0005258666351437569, 0.03639528155326843, 0.02256075292825699, 0.01660884916782379, 0.1527748554944992, 0.1477358043193817, 0.2577149271965027, 0.03867224231362343, 0.04304511100053787, 0.11759469658136368, 0.0762997567653656, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.02614973485469818, 0.001497315475717187, 0.11498566716909409, 0.08699594438076019, 0.006599655374884605, 0.0011878651566803455, 0.009639720432460308, 0.0002812722814269364, 0.014351817779242992, 0.06119270250201225, 0.19180962443351746, 0.06391202658414841, 0.4759237766265869, 0.44549837708473206, 0.058810409158468246, 0.38573285937309265, 0.0028330886270850897, 0.0014278099406510592, 0.0009824484586715698, 9.371336636831984e-05, 0.00015483389142900705, 6.760591350030154e-05, 0.0035791138652712107, 0.0002520910056773573, 0.0005180046427994967, 0.00024238335026893765, 0.011901103891432285, 0.011019378900527954, 0.006276060827076435, 0.0026990415062755346, 0.016820058226585388, 0.03330027312040329, 0.047877803444862366, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.041024841368198395, 0.0016396299470216036, 0.05072889104485512, 0.1323171705007553, 0.0024413676001131535, 0.00023246044293045998, 0.02059599943459034, 0.00033336327760480344, 0.7358176708221436, 0.04226389154791832, 0.0658484548330307, 0.002587914001196623, 0.013076293282210827, 0.0423613116145134, 0.051219869405031204, 0.21399648487567902, 0.008264300413429737, 0.0051351506263017654, 0.005111425183713436, 0.0020249083172529936, 0.00047485672985203564, 0.0018332998733967543, 0.0008904117858037353, 0.0017731828847900033, 0.000539442349690944, 0.03944296017289162, 0.039767228066921234, 0.00580678740516305, 0.004312179517000914, 0.003937484696507454, 0.00913114845752716, 0.006211036816239357, 0.3553882837295532, 0.3024981617927551, NaN, NaN, NaN, NaN, NaN, NaN], [0.025904469192028046, 0.00014531973283737898, 0.014812517911195755, 0.11958510428667068, 0.0003183217777404934, 0.0012536202557384968, 0.031174438074231148, 0.0025010022800415754, 0.045685503631830215, 0.4334242641925812, 0.057037968188524246, 0.005963113158941269, 0.0007164725102484226, 0.00356480129994452, 0.02565825544297695, 0.05261809378862381, 0.004144520964473486, 0.00047606538282707334, 0.0003396419051568955, 0.002880769083276391, 0.0015178520698100328, 0.0018901955336332321, 0.0029504895210266113, 0.0017174717504531145, 0.0006908842478878796, 0.0046035549603402615, 0.09042679518461227, 0.0032755613792687654, 0.007712012622505426, 0.032594844698905945, 0.02268057130277157, 0.033856723457574844, 0.07955116033554077, 0.4074561595916748, 0.07153668999671936, NaN, NaN, NaN, NaN, NaN], [0.04193783551454544, 0.0005606984486803412, 0.01569434627890587, 0.058890990912914276, 0.00016686622984707355, 0.0032934362534433603, 0.10695304721593857, 0.011062747798860073, 0.008127261884510517, 0.04922156408429146, 0.01035262644290924, 0.3408533036708832, 0.003045044606551528, 0.019185535609722137, 0.046415992081165314, 0.019381573423743248, 0.012705344706773758, 0.0019882190972566605, 0.0005741973291151226, 0.0020475401543080807, 0.0023934554774314165, 0.004172713495790958, 0.021013854071497917, 0.005879250820726156, 0.006729640066623688, 0.00632414361461997, 0.09735815972089767, 0.01909361220896244, 0.00100265524815768, 0.003452989971265197, 0.008203250356018543, 0.05971603840589523, 0.11904174834489822, 0.5188009142875671, 0.2541559338569641, 0.029506316408514977, NaN, NaN, NaN, NaN], [0.00012501348101068288, 4.870840712101199e-05, 0.0024386774748563766, 0.001847597537562251, 0.0017206922639161348, 0.0002501157287042588, 0.0009360458934679627, 0.00021343374100979418, 0.0004799730086233467, 0.00017777700850274414, 0.0013057318283244967, 0.0019216074142605066, 0.7016423344612122, 0.059743087738752365, 1.6802117897896096e-05, 0.10572486370801926, 0.04525948688387871, 0.055838145315647125, 0.050681136548519135, 0.027844024822115898, 0.014026278629899025, 0.025656970217823982, 0.0361209474503994, 0.017075760290026665, 0.01003955863416195, 0.016965145245194435, 0.04991300031542778, 0.01522271428257227, 0.007584442384541035, 0.03757705166935921, 0.03609456866979599, 0.10922907292842865, 0.19329114258289337, 0.2903786897659302, 0.29551932215690613, 0.1564989984035492, 0.3518115282058716, NaN, NaN, NaN], [1.7574552657606546e-06, 9.272354617451128e-08, 1.001089003693778e-05, 5.891482942388393e-05, 3.3656547202554066e-06, 1.2065736143540562e-07, 6.7727110035775695e-06, 6.411150366147922e-07, 1.3192883443480241e-06, 1.1707085832313169e-05, 0.00026830541901290417, 1.0283902156515978e-05, 0.6812964081764221, 0.27208930253982544, 4.838558993469633e-07, 0.017342884093523026, 0.024629754945635796, 0.0017386168474331498, 0.003977979999035597, 0.0011948446044698358, 0.0001711023651296273, 0.0019097719341516495, 0.050265345722436905, 0.048485398292541504, 0.025773482397198677, 0.011941587552428246, 0.02582539990544319, 0.014500979334115982, 0.011088544502854347, 0.0004536270862445235, 0.001346826204098761, 0.09912228584289551, 0.03899921476840973, 0.19399496912956238, 0.33165985345840454, 0.3351045250892639, 0.007158405613154173, 0.26822295784950256, NaN, NaN], [0.01900503970682621, 0.0008953948272392154, 0.09836827963590622, 0.2858547866344452, 0.0013939865166321397, 0.0011423979885876179, 0.011685764417052269, 0.00014273256238084286, 0.010754182003438473, 0.15914513170719147, 0.6438553333282471, 0.002441136632114649, 0.008362390100955963, 0.07132171094417572, 0.011131932027637959, 0.15815527737140656, 0.009173951111733913, 0.012453499250113964, 0.01756284572184086, 0.0007500716019421816, 0.0020462200045585632, 0.00166225153952837, 0.05335438624024391, 0.037105023860931396, 0.009711050428450108, 0.05516523867845535, 0.04893142729997635, 0.03887411952018738, 0.002221355913206935, 0.004346344619989395, 0.004376854281872511, 0.001785764587111771, 0.09844812005758286, 0.14674220979213715, 0.34636548161506653, 0.04763580113649368, 0.057022612541913986, 0.12166893482208252, 0.13556897640228271, NaN], [0.12417581677436829, 0.0153038389980793, 0.12986266613006592, 0.6406017541885376, 0.009386910125613213, 0.057520631700754166, 0.09723392128944397, 0.0041757188737392426, 0.030985616147518158, 0.12765046954154968, 0.052563395351171494, 0.09427980333566666, 0.010530965402722359, 0.01615813747048378, 0.110444575548172, 0.16895240545272827, 0.0006144722574390471, 0.0027162963524460793, 0.0007400937611237168, 0.0007253509247675538, 0.0007097159395925701, 0.000199983871425502, 0.0005034026107750833, 0.0002540702698752284, 0.0002154638059437275, 0.0004817947919946164, 0.0019994170870631933, 0.0003459753352217376, 6.575404404429719e-05, 0.004540599416941404, 0.00010029276745626703, 0.0005050064064562321, 0.003569946391507983, 0.008527955040335655, 0.003213587449863553, 0.0022120880894362926, 0.11142478138208389, 0.01313241571187973, 0.055687084794044495, 0.21235007047653198]], [[0.1577264666557312, 0.03251823037862778, 0.4939506947994232, 0.8334789872169495, 0.6927971243858337, 0.3147047460079193, 0.7604361176490784, 0.11822030693292618, 0.7022377848625183, 0.6516091823577881, 0.14691989123821259, 0.2232232689857483, 0.14339210093021393, 0.3761228322982788, 0.014605461619794369, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.028655482456088066, 0.14083503186702728, 0.08485368639230728, 0.8299343585968018, 0.8304422497749329, 0.5664599537849426, 0.834579586982727, 0.7438958287239075, 0.8452481031417847, 0.8614712953567505, 0.3640905022621155, 0.805733323097229, 0.3481642007827759, 0.795884370803833, 0.05269646272063255, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.02106422185897827, 0.010846637189388275, 0.073356993496418, 0.017661061137914658, 0.8741048574447632, 0.5687165856361389, 0.5249210000038147, 0.5693489909172058, 0.5103186368942261, 0.5253384709358215, 0.6472406387329102, 0.4561024308204651, 0.1524587720632553, 0.45141565799713135, 0.034538887441158295, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.2203565090894699, 0.02154199220240116, 0.007279311306774616, 0.003464027540758252, 0.18461424112319946, 0.07773485034704208, 0.7297388315200806, 0.2260110229253769, 0.6848539113998413, 0.2328294813632965, 0.22646839916706085, 0.3173597455024719, 0.10388152301311493, 0.06158056855201721, 0.11330780386924744, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.1574045568704605, 0.12516136467456818, 0.04707150533795357, 0.0032313871197402477, 0.19444315135478973, 0.046962298452854156, 0.48863229155540466, 0.8290899991989136, 0.892469584941864, 0.6836395859718323, 0.83636474609375, 0.47956424951553345, 0.034452617168426514, 0.38761135935783386, 0.055785421282052994, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.4389230012893677, 0.6133158802986145, 0.4783843159675598, 0.11230780929327011, 0.006951127201318741, 0.0644199401140213, 0.03406795859336853, 0.33251792192459106, 0.9552598595619202, 0.8827710747718811, 0.9276224970817566, 0.8325800895690918, 0.737617552280426, 0.745059609413147, 0.05149900168180466, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.3395847976207733, 0.09897124767303467, 0.16763220727443695, 0.1671983003616333, 0.049412358552217484, 0.007114487700164318, 0.3340696394443512, 0.018166696652770042, 0.7235669493675232, 0.9639523029327393, 0.851059079170227, 0.7306914925575256, 0.5801126956939697, 0.8017169237136841, 0.08099871873855591, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.44394704699516296, 0.6082286238670349, 0.37166181206703186, 0.3715074956417084, 0.35315781831741333, 0.10853563994169235, 0.013190319761633873, 0.07092351466417313, 0.03435605764389038, 0.25131845474243164, 0.921750545501709, 0.8745512366294861, 0.7473158240318298, 0.834020733833313, 0.1216883435845375, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.18251584470272064, 0.8759727478027344, 0.1439245641231537, 0.06640342622995377, 0.060579828917980194, 0.2710072100162506, 0.011089610867202282, 0.034396518021821976, 0.1700025051832199, 0.043876904994249344, 0.14450228214263916, 0.9449294805526733, 0.9689385294914246, 0.939329981803894, 0.07954179495573044, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.32071176171302795, 0.7452729344367981, 0.11999625712633133, 0.08053360879421234, 0.3748469650745392, 0.31863275170326233, 0.028054066002368927, 0.2197551280260086, 0.01771731488406658, 0.23943577706813812, 0.01906767673790455, 0.8113164901733398, 0.9739595055580139, 0.9691897630691528, 0.21732129156589508, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.6261264085769653, 0.6649302244186401, 0.5194191336631775, 0.6324451565742493, 0.6771988272666931, 0.7814968228340149, 0.4118405878543854, 0.3728334903717041, 0.03296521306037903, 0.008678224869072437, 0.6047253012657166, 0.11251461505889893, 0.21560458838939667, 0.9244948625564575, 0.10127653181552887, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.3176693320274353, 0.5172579884529114, 0.1793123036623001, 0.37762320041656494, 0.23678036034107208, 0.5621929168701172, 0.08773050457239151, 0.24525783956050873, 0.010828782804310322, 0.025829488411545753, 0.0057976157404482365, 0.08708162605762482, 0.04166324809193611, 0.5714256167411804, 0.16898052394390106, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.6460146307945251, 0.8194199800491333, 0.48921409249305725, 0.6910595297813416, 0.5259124636650085, 0.6389046311378479, 0.3241840600967407, 0.7817367911338806, 0.17853572964668274, 0.1606196016073227, 0.06383053213357925, 0.007355134002864361, 0.02128707617521286, 0.02206379547715187, 0.23354344069957733, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.5992116332054138, 0.6358246803283691, 0.47243836522102356, 0.5617506504058838, 0.6971379518508911, 0.6431114673614502, 0.39991113543510437, 0.8182389140129089, 0.2704472243785858, 0.20400457084178925, 0.059529319405555725, 0.06732083112001419, 0.008503233082592487, 0.06121496111154556, 0.2071741670370102, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.2342938333749771, 0.5683650374412537, 0.6037701964378357, 0.7331977486610413, 0.7349027395248413, 0.6651985049247742, 0.23853524029254913, 0.2293619066476822, 0.48426058888435364, 0.7077944874763489, 0.5918195843696594, 0.8169012665748596, 0.7005065679550171, 0.4784330725669861, 0.015931207686662674, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.05668458715081215, 0.013551714830100536, 0.3300224542617798, 0.22417771816253662, 0.24923239648342133, 0.16107039153575897, 0.07639153301715851, 0.036736860871315, 0.044193096458911896, 0.14611276984214783, 0.15061600506305695, 0.035221245139837265, 0.0397845022380352, 0.06225845590233803, 0.12414046376943588, 0.125, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.29422780871391296, 0.3258638381958008, 0.027477310970425606, 0.10906420648097992, 0.003920723684132099, 0.020042676478624344, 0.05157224088907242, 0.0009247793932445347, 0.005282218102365732, 0.1744423359632492, 0.0761384516954422, 0.0033416510559618473, 0.0003361533163115382, 0.0012587645323947072, 0.013668928295373917, 0.13440807163715363, 0.048166193068027496, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.19355924427509308, 0.1259031891822815, 0.004604514688253403, 0.04003702849149704, 0.0129036083817482, 0.019794460386037827, 0.06589072942733765, 0.0014933310449123383, 0.012753497809171677, 0.06252782791852951, 0.0361945815384388, 0.011655895970761776, 0.01012047752737999, 0.02639157697558403, 0.16549569368362427, 0.14904144406318665, 0.03273539990186691, 0.03615117073059082, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.4293937385082245, 0.07181306928396225, 0.003158864099532366, 0.04697505012154579, 0.01354672759771347, 0.09221473336219788, 0.24058710038661957, 0.0037424738984555006, 0.07543525844812393, 0.0656844824552536, 0.01989266835153103, 0.06512395292520523, 0.01137665193527937, 0.029709961265325546, 0.18951866030693054, 0.17614386975765228, 0.0854690745472908, 0.038236960768699646, 0.12011754512786865, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.052543047815561295, 0.03695955500006676, 0.100065678358078, 0.07546547800302505, 0.053252771496772766, 0.11382242292165756, 0.28551623225212097, 0.14051520824432373, 0.12815484404563904, 0.15533913671970367, 0.11139650642871857, 0.09512985497713089, 0.017796501517295837, 0.04266834259033203, 0.1351824700832367, 0.14069411158561707, 0.1466522365808487, 0.07941046357154846, 0.06070372834801674, 0.045592159032821655, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.002040643012151122, 0.005490712355822325, 0.024769198149442673, 0.007002650294452906, 0.0020249236840754747, 0.03913044556975365, 0.01487613096833229, 0.09424738585948944, 0.010089649818837643, 0.05513475462794304, 0.0488949678838253, 0.007691625505685806, 0.002344577107578516, 0.012510538101196289, 0.20307941734790802, 0.15778480470180511, 0.11167039722204208, 0.20017755031585693, 0.10082826018333435, 0.013994856737554073, 0.07346371561288834, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.04981796815991402, 0.13342007994651794, 0.4189896881580353, 0.06767702847719193, 0.007763800676912069, 0.11641503125429153, 0.029343493282794952, 0.11072052270174026, 0.06700066477060318, 0.1429358571767807, 0.3406253457069397, 0.00571059063076973, 0.0006326772854663432, 0.004126383922994137, 0.17491626739501953, 0.15305520594120026, 0.26692208647727966, 0.1222626119852066, 0.14178596436977386, 0.012799645774066448, 0.019025815650820732, 0.14782781898975372, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.008032058365643024, 0.009898788295686245, 0.0165096465498209, 0.015990890562534332, 0.001612947671674192, 0.07025154680013657, 0.1309722512960434, 0.45684561133384705, 0.020022952929139137, 0.014566164463758469, 0.01627122238278389, 0.001012062537483871, 0.003352430183440447, 0.006583840120583773, 0.0849505066871643, 0.050227321684360504, 0.49922510981559753, 0.2564227879047394, 0.37594476342201233, 0.05222875997424126, 0.019398091360926628, 0.07475102692842484, 0.13636687397956848, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.027854006737470627, 0.008844887837767601, 0.011581032536923885, 0.014227867126464844, 0.0022522227372974157, 0.6803511381149292, 0.24682462215423584, 0.11913055926561356, 0.0028406307101249695, 0.006190288811922073, 0.00574448611587286, 0.0012344244169071317, 0.010572707280516624, 0.00985674187541008, 0.11121391505002975, 0.1278427243232727, 0.4489462971687317, 0.09382158517837524, 0.09914611279964447, 0.11451858282089233, 0.14035384356975555, 0.0858180820941925, 0.1395546793937683, 0.05027398467063904, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.11111988872289658, 0.0035893325693905354, 0.4007861316204071, 0.2033512443304062, 0.1986382007598877, 0.15137647092342377, 0.12109687924385071, 0.007575488183647394, 0.021906785666942596, 0.03087061457335949, 0.08533017337322235, 0.07086688280105591, 0.06729871034622192, 0.045789312571287155, 0.1673528403043747, 0.06907324492931366, 0.44302117824554443, 0.21607427299022675, 0.21861647069454193, 0.14559195935726166, 0.12854896485805511, 0.21420170366764069, 0.5056769251823425, 0.05036870762705803, 0.14160890877246857, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.06468851119279861, 0.006587199401110411, 0.23617494106292725, 0.19800357520580292, 0.15495024621486664, 0.06172433868050575, 0.05180465057492256, 0.01833559013903141, 0.016546709463000298, 0.05746111273765564, 0.0824536681175232, 0.007550883572548628, 0.007943101227283478, 0.011712267994880676, 0.33849596977233887, 0.08832916617393494, 0.4917650520801544, 0.16961733996868134, 0.21240676939487457, 0.17275941371917725, 0.13381528854370117, 0.1763075888156891, 0.3443826735019684, 0.022638684138655663, 0.14659351110458374, 0.05034468695521355, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.09414701163768768, 0.10295354574918747, 0.0844656303524971, 0.06548816710710526, 0.08529236167669296, 0.06227908656001091, 0.030192906036973, 0.010874724946916103, 0.025562399998307228, 0.005146168638020754, 0.014559037052094936, 0.013559900224208832, 0.06781303137540817, 0.05153109133243561, 0.33232951164245605, 0.10765255987644196, 0.1569133847951889, 0.14696621894836426, 0.12414205074310303, 0.1321374922990799, 0.32589367032051086, 0.09939466416835785, 0.15668180584907532, 0.035531532019376755, 0.18526552617549896, 0.100669264793396, 0.1766001582145691, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.314544141292572, 0.6832185983657837, 0.07794945687055588, 0.042061515152454376, 0.015504884533584118, 0.1916494369506836, 0.006379975005984306, 0.0006176759488880634, 0.0012508369982242584, 0.01929312013089657, 0.022219885140657425, 0.0019787217024713755, 0.01769268326461315, 0.008809820748865604, 0.08711312711238861, 0.0920143872499466, 0.03631591796875, 0.10338561236858368, 0.13865944743156433, 0.14365890622138977, 0.19164490699768066, 0.08302215486764908, 0.17053648829460144, 0.20418454706668854, 0.4243081212043762, 0.23730118572711945, 0.11353020370006561, 0.062482837587594986, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.027118999511003494, 0.07309459149837494, 0.04486501216888428, 0.012266037985682487, 0.024303032085299492, 0.030924739316105843, 0.021004648879170418, 0.003694491693750024, 0.01517508551478386, 0.025275954976677895, 0.0075909653678536415, 0.24021397531032562, 0.04135901853442192, 0.07603362947702408, 0.11061857640743256, 0.14247462153434753, 0.10275112092494965, 0.08782284706830978, 0.07633533328771591, 0.09427531808614731, 0.2382509559392929, 0.11237408220767975, 0.1274290829896927, 0.09234490990638733, 0.29983192682266235, 0.19681134819984436, 0.09119200706481934, 0.1394888311624527, 0.02876400761306286, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.025165440514683723, 0.019109023734927177, 0.008520743809640408, 0.015198510140180588, 0.007751345168799162, 0.005125374533236027, 0.008160223253071308, 0.0017721926560625434, 0.08641061931848526, 0.07765892893075943, 0.017936453223228455, 0.020675569772720337, 0.0024341135285794735, 0.023971976712346077, 0.16557703912258148, 0.14126147329807281, 0.06271495670080185, 0.09029032289981842, 0.10313913226127625, 0.08530516922473907, 0.05194256827235222, 0.09853952378034592, 0.05407971888780594, 0.10021005570888519, 0.14394013583660126, 0.19472479820251465, 0.17138735949993134, 0.055624835193157196, 0.022259291261434555, 0.010825252160429955, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.22320780158042908, 0.05348529666662216, 0.01734296977519989, 0.1172923669219017, 0.004340981598943472, 0.003372892737388611, 0.033841460943222046, 0.024162178859114647, 0.05216863751411438, 0.3090120553970337, 0.2295515090227127, 0.014075365848839283, 0.020010780543088913, 0.20773397386074066, 0.12411301583051682, 0.15579406917095184, 0.5571659207344055, 0.09220181405544281, 0.09424383193254471, 0.2893342971801758, 0.14449337124824524, 0.08881417661905289, 0.09621196240186691, 0.05768556892871857, 0.34467604756355286, 0.16894927620887756, 0.32070621848106384, 0.32385867834091187, 0.08616255223751068, 0.0030245021916925907, 0.011462957598268986, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.1383964717388153, 0.05579448863863945, 0.1563209742307663, 0.09128513187170029, 0.039257608354091644, 0.009886945597827435, 0.006391164381057024, 0.0007081980584189296, 0.006523598916828632, 0.16335614025592804, 0.02935076504945755, 0.023180969059467316, 0.19186609983444214, 0.2336183488368988, 0.16814255714416504, 0.06543286889791489, 0.3303832709789276, 0.1981877088546753, 0.17906354367733002, 0.08578304201364517, 0.12075137346982956, 0.09918820112943649, 0.14948950707912445, 0.0696079283952713, 0.2870473861694336, 0.2037079930305481, 0.20505982637405396, 0.415317177772522, 0.18504147231578827, 0.05944397673010826, 0.03780561313033104, 0.06350213289260864, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.1625337302684784, 0.007939358241856098, 0.11928629875183105, 0.1341797411441803, 0.005670298356562853, 0.0033473502844572067, 0.022544465959072113, 0.005534132476896048, 0.007299710530787706, 0.08667418360710144, 0.07403960824012756, 0.004230144899338484, 0.002401313977316022, 0.005503634922206402, 0.20701391994953156, 0.08806300163269043, 0.5073549151420593, 0.15216797590255737, 0.1779468059539795, 0.08599209040403366, 0.038353316485881805, 0.05095306783914566, 0.13815101981163025, 0.05531492829322815, 0.3680262565612793, 0.045964885503053665, 0.5803228616714478, 0.2365681380033493, 0.10053237527608871, 0.016326427459716797, 0.011199035681784153, 0.02849578857421875, 0.09785498678684235, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.08204744011163712, 0.04882703348994255, 0.048393696546554565, 0.02867632359266281, 0.012730585411190987, 0.02805456519126892, 0.014470821246504784, 0.008571655489504337, 0.011637779884040356, 0.011116313748061657, 0.015620187856256962, 0.00444953003898263, 0.038398172706365585, 0.021771300584077835, 0.25556278228759766, 0.10047968477010727, 0.17735490202903748, 0.1303417980670929, 0.1233980730175972, 0.11124629527330399, 0.27208706736564636, 0.09057758748531342, 0.20949512720108032, 0.0595981664955616, 0.32820063829421997, 0.19304482638835907, 0.3008245825767517, 0.24370267987251282, 0.0977335274219513, 0.0604717954993248, 0.08826017379760742, 0.05976974964141846, 0.11658596247434616, 0.26095637679100037, NaN, NaN, NaN, NaN, NaN, NaN], [0.3818233609199524, 0.6690115928649902, 0.07648678869009018, 0.0345233753323555, 0.011518634855747223, 0.1436365395784378, 0.005264819134026766, 0.000502048700582236, 0.0017500953981652856, 0.03918173909187317, 0.04129163548350334, 0.0023984990548342466, 0.020183494314551353, 0.008427903987467289, 0.09516369551420212, 0.08956606686115265, 0.03296149522066116, 0.07127847522497177, 0.10275094956159592, 0.12852256000041962, 0.15250688791275024, 0.05763629823923111, 0.13953621685504913, 0.2147330343723297, 0.3297017514705658, 0.25630685687065125, 0.3529660999774933, 0.05266188457608223, 0.19866161048412323, 0.08034973591566086, 0.16050152480602264, 0.12120798975229263, 0.21796129643917084, 0.13665789365768433, 0.05867582932114601, NaN, NaN, NaN, NaN, NaN], [0.02332407608628273, 0.06938373297452927, 0.035716570913791656, 0.008126936852931976, 0.012537641450762749, 0.0137803228572011, 0.01513306051492691, 0.00204691500402987, 0.029820755124092102, 0.05474912002682686, 0.016170548275113106, 0.22342036664485931, 0.05026429146528244, 0.06863567978143692, 0.11948796361684799, 0.16931524872779846, 0.06866136193275452, 0.058377113193273544, 0.054153572767972946, 0.06997817754745483, 0.17294903099536896, 0.06504172086715698, 0.09800923615694046, 0.07601338624954224, 0.22323867678642273, 0.17471107840538025, 0.20914696156978607, 0.32561469078063965, 0.04201642796397209, 0.014874166809022427, 0.043757203966379166, 0.11901038885116577, 0.15924809873104095, 0.08216992020606995, 0.13305248320102692, 0.031323518604040146, NaN, NaN, NaN, NaN], [0.020166568458080292, 0.015762973576784134, 0.006330324336886406, 0.008625769056379795, 0.005781465210020542, 0.00451312493532896, 0.007413441780954599, 0.0018466140609234571, 0.14846709370613098, 0.1376892477273941, 0.02431248314678669, 0.03153817355632782, 0.0025850962847471237, 0.026987632736563683, 0.15984071791172028, 0.14597494900226593, 0.05063166096806526, 0.07245789468288422, 0.08537694066762924, 0.07253167033195496, 0.03945168852806091, 0.07488631457090378, 0.04114159941673279, 0.09447583556175232, 0.11984950304031372, 0.21245841681957245, 0.24130037426948547, 0.053050536662340164, 0.036372195929288864, 0.012788524851202965, 0.05413965508341789, 0.17548364400863647, 0.18113258481025696, 0.17045176029205322, 0.056165628135204315, 0.023532675579190254, 0.007599800359457731, NaN, NaN, NaN], [0.11904438585042953, 0.03637225553393364, 0.013324074447154999, 0.04586002975702286, 0.00359557312913239, 0.002297254279255867, 0.02453085221350193, 0.019205793738365173, 0.07615289092063904, 0.3510056436061859, 0.24748629331588745, 0.0179043747484684, 0.015299135819077492, 0.16336295008659363, 0.13914434611797333, 0.20880575478076935, 0.4742221236228943, 0.0684090405702591, 0.07499475032091141, 0.22897963225841522, 0.11411925405263901, 0.06380540132522583, 0.06602712720632553, 0.04886250197887421, 0.25098055601119995, 0.16695836186408997, 0.41882073879241943, 0.45364588499069214, 0.19780457019805908, 0.004864717833697796, 0.007611281704157591, 0.23698794841766357, 0.08390159159898758, 0.28844529390335083, 0.28151822090148926, 0.0680297240614891, 0.0018790157046169043, 0.008693840354681015, NaN, NaN], [0.0598345547914505, 0.028141267597675323, 0.11996681243181229, 0.04193190485239029, 0.03001757152378559, 0.006633914541453123, 0.005910022184252739, 0.0007469199481420219, 0.010509159415960312, 0.18832749128341675, 0.032145459204912186, 0.022126449272036552, 0.16793787479400635, 0.1917877346277237, 0.16885708272457123, 0.06649312376976013, 0.2272576093673706, 0.15548978745937347, 0.13675269484519958, 0.06747769564390182, 0.09888236224651337, 0.07679145783185959, 0.09811051189899445, 0.059132058173418045, 0.16564641892910004, 0.1534833461046219, 0.21299242973327637, 0.46317315101623535, 0.18783308565616608, 0.06707606464624405, 0.07066023349761963, 0.038238298147916794, 0.13390158116817474, 0.1738123893737793, 0.3894510865211487, 0.199345201253891, 0.05267143249511719, 0.03450411930680275, 0.0674150139093399, NaN], [0.30011340975761414, 0.029496116563677788, 0.21246175467967987, 0.11388618499040604, 0.019265230745077133, 0.011386800557374954, 0.02386542037129402, 0.0049255480989813805, 0.002113579073920846, 0.2235003262758255, 0.1410367637872696, 0.022971738129854202, 0.009332037530839443, 0.01034344732761383, 0.12311729788780212, 0.13068987429141998, 0.5177554488182068, 0.21822108328342438, 0.17411521077156067, 0.11371950805187225, 0.10282127559185028, 0.14754493534564972, 0.10529720038175583, 0.04059072583913803, 0.1422514021396637, 0.16688787937164307, 0.3468432128429413, 0.07328897714614868, 0.033892080187797546, 0.005811289418488741, 0.006848806049674749, 0.033459149301052094, 0.08608346432447433, 0.29348817467689514, 0.07146795839071274, 0.05563248693943024, 0.008248405531048775, 0.00942459236830473, 0.03898181766271591, 0.13983668386936188]], [[0.04383472725749016, 0.02773081697523594, 0.016415273770689964, 0.024880478158593178, 0.005487722344696522, 0.14834517240524292, 0.010061212815344334, 0.013310510665178299, 0.03559315577149391, 0.022788431495428085, 0.016539618372917175, 0.022621937096118927, 0.3853665292263031, 0.02895752713084221, 0.21785423159599304, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.02212689444422722, 0.0360226184129715, 0.0007962794625200331, 0.005733562167733908, 0.0017349227564409375, 0.011109595187008381, 0.02015179581940174, 0.048344310373067856, 0.003794114338234067, 0.016348786652088165, 0.0018908409401774406, 0.010183308273553848, 0.04822028428316116, 0.011540568433701992, 0.21287554502487183, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.19621919095516205, 0.02568935602903366, 0.012553256005048752, 0.05958101898431778, 0.0049527534283697605, 0.009129918180406094, 0.035662900656461716, 0.006033026147633791, 0.01979534700512886, 0.016174430027604103, 0.025959551334381104, 0.017891131341457367, 0.21532145142555237, 0.010915487073361874, 0.2776879370212555, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.22681212425231934, 0.26364389061927795, 0.1368870735168457, 0.07472710311412811, 0.004966794513165951, 0.17209400236606598, 0.07595591247081757, 0.10330677032470703, 0.009879215620458126, 0.30214887857437134, 0.027453631162643433, 0.07928238064050674, 0.6068928837776184, 0.0009245484252460301, 0.41711828112602234, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.03220081329345703, 0.07110226154327393, 0.19687172770500183, 0.32465922832489014, 0.06123804301023483, 0.009123058058321476, 0.008925903588533401, 0.001694322214461863, 0.009767607785761356, 0.012425252236425877, 0.021234901621937752, 0.006749649532139301, 0.022427640855312347, 0.00419656652957201, 0.11337225884199142, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.1499132513999939, 0.1588381826877594, 0.006192722357809544, 0.06905046850442886, 0.021936854347586632, 0.04223879054188728, 0.01654554158449173, 0.012800824828445911, 0.001194898271933198, 0.011350413784384727, 0.0011690479004755616, 0.03650015965104103, 0.0330234132707119, 0.032408226281404495, 0.30060991644859314, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.10197536647319794, 0.32784661650657654, 0.22266407310962677, 0.37194594740867615, 0.4840903878211975, 0.2562866806983948, 0.20682689547538757, 0.01685171388089657, 0.02662164717912674, 0.01744299754500389, 0.07043293118476868, 0.06053447723388672, 0.13449640572071075, 0.0437617152929306, 0.15905345976352692, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.04155902937054634, 0.02725875750184059, 0.06621034443378448, 0.15740959346294403, 0.22226983308792114, 0.11737026274204254, 0.021176597103476524, 0.037896860390901566, 0.001983239781111479, 0.07737525552511215, 0.040612466633319855, 0.036445699632167816, 0.04206009954214096, 0.005294053349643946, 0.22695806622505188, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.3731417655944824, 0.020610323175787926, 0.04687204957008362, 0.19942151010036469, 0.0219199787825346, 0.023319954052567482, 0.607546865940094, 0.0038317576982080936, 0.05746426433324814, 0.0039819530211389065, 0.0020286834333091974, 0.023514816537499428, 0.0007224131841212511, 0.0017132725333794951, 0.31377115845680237, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.007707278709858656, 0.04994801804423332, 0.0602150596678257, 0.1843070536851883, 0.023052150383591652, 0.00867108628153801, 0.0030793596524745226, 0.008175634779036045, 0.3707427382469177, 0.032583341002464294, 0.030614105984568596, 0.003414844162762165, 0.0027733321767300367, 0.00039667857345193624, 0.06665757298469543, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.06275568902492523, 0.15385569632053375, 0.07121506333351135, 0.04657430946826935, 0.08974524587392807, 0.017753345891833305, 0.09537442773580551, 0.08409535884857178, 0.4617481529712677, 0.05371565744280815, 0.051210206001996994, 0.014556556940078735, 0.0261379461735487, 0.0015151489060372114, 0.25993233919143677, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.037524934858083725, 0.08964382112026215, 0.11503562331199646, 0.2385229468345642, 0.14595970511436462, 0.01507873460650444, 0.07354842126369476, 0.014194677583873272, 0.01029899064451456, 0.3145633935928345, 0.08443433046340942, 0.02799280546605587, 0.006364578381180763, 0.0011598452692851424, 0.25597554445266724, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.03498825803399086, 0.003427299438044429, 0.012860815972089767, 0.00960747804492712, 0.0073430403135716915, 0.002194140339270234, 0.020218953490257263, 0.04016692563891411, 0.0035721054300665855, 0.11439335346221924, 0.03179614990949631, 0.0055262502282857895, 0.08811097592115402, 0.0019241927657276392, 0.31578439474105835, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0003122057532891631, 0.0005657155998051167, 0.0003099576279055327, 0.018182117491960526, 8.608390635345131e-05, 0.00029685357003472745, 0.00030423246789723635, 0.0039575002156198025, 0.00041145391878671944, 0.0009832053910940886, 0.0007515411707572639, 0.006357411853969097, 0.3007054328918457, 0.00010537439811741933, 0.00161165336612612, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.052370160818099976, 0.019386928528547287, 0.0404941625893116, 0.16087706387043, 0.14014431834220886, 0.0561581589281559, 0.1907973736524582, 0.027806226164102554, 0.022970959544181824, 0.05846026912331581, 0.09902504831552505, 0.038958851248025894, 0.016928229480981827, 0.04114920645952225, 0.14461401104927063, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.03517069295048714, 0.03549245744943619, 0.004381549544632435, 0.008797217160463333, 0.007323419209569693, 0.042320944368839264, 0.004849699325859547, 0.003679578425362706, 0.011580413207411766, 0.009367180056869984, 0.006541883572936058, 0.022973380982875824, 0.023761657997965813, 0.02892483025789261, 0.1581033319234848, 0.125, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.01528994832187891, 0.20408181846141815, 0.11101088672876358, 0.08111120015382767, 0.07986893504858017, 0.010126215405762196, 0.020366966724395752, 0.1417536586523056, 0.04787333309650421, 0.04340335354208946, 0.2409791648387909, 0.04442436248064041, 0.005909040104597807, 0.014603852294385433, 0.18931475281715393, 0.13037645816802979, 0.08109150826931, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.21622280776500702, 0.09626477211713791, 0.10110790282487869, 0.31975099444389343, 0.2572377920150757, 0.630383312702179, 0.1336757242679596, 0.17725828289985657, 0.02378956414759159, 0.22253809869289398, 0.13939163088798523, 0.30914127826690674, 0.35968318581581116, 0.48164138197898865, 0.09301326423883438, 0.14859925210475922, 0.02925589494407177, 0.0505123995244503, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.168080672621727, 0.1516411453485489, 0.07150255143642426, 0.32225823402404785, 0.2490793913602829, 0.30686429142951965, 0.032337237149477005, 0.16698232293128967, 0.04405289515852928, 0.2310783565044403, 0.10561788827180862, 0.2769646644592285, 0.19830158352851868, 0.1653461754322052, 0.09653043746948242, 0.21387919783592224, 0.03206360712647438, 0.012896520085632801, 0.06630519032478333, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.04038669914007187, 0.16624715924263, 0.3317047655582428, 0.3851986229419708, 0.42305275797843933, 0.008450526744127274, 0.09501849114894867, 0.24002836644649506, 0.4256587326526642, 0.15410973131656647, 0.19127053022384644, 0.04389801248908043, 0.030224177986383438, 0.05971052870154381, 0.11478950828313828, 0.15968731045722961, 0.046736959367990494, 0.014681101776659489, 0.01418250147253275, 0.011044399812817574, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.04527302458882332, 0.15370813012123108, 0.46266382932662964, 0.06791326403617859, 0.6029869914054871, 0.018879592418670654, 0.07514301687479019, 0.07948564738035202, 0.6243545413017273, 0.11254889518022537, 0.24916931986808777, 0.08612842112779617, 0.07598677277565002, 0.13317255675792694, 0.04299912229180336, 0.22570300102233887, 0.051045093685388565, 0.020206425338983536, 0.021926334127783775, 0.008406145498156548, 0.0702541247010231, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.03695433586835861, 0.028389452025294304, 0.2721908688545227, 0.07653216272592545, 0.6730886697769165, 0.004614274017512798, 0.004165990743786097, 0.01533985324203968, 0.28992146253585815, 0.028840038925409317, 0.055076081305742264, 0.024787841364741325, 0.0010191021719947457, 0.0022868094965815544, 0.030124979093670845, 0.28555917739868164, 0.03329295665025711, 0.036049578338861465, 0.038853298872709274, 0.007190736476331949, 0.006643606815487146, 0.08228380233049393, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.005083801224827766, 0.09139324724674225, 0.28116321563720703, 0.08195066452026367, 0.6340349316596985, 0.012272918596863747, 0.0005934475339017808, 0.010692326352000237, 0.1514793336391449, 0.016046250239014626, 0.04672969505190849, 0.014393122866749763, 0.002580928150564432, 0.007409923244267702, 0.12582267820835114, 0.2511760890483856, 0.07463249564170837, 0.04988643527030945, 0.0701586976647377, 0.028143733739852905, 0.007391677238047123, 0.02261284738779068, 0.0737045407295227, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.00605103699490428, 0.11548061668872833, 0.2870264947414398, 0.061026521027088165, 0.8064441084861755, 0.2189176380634308, 0.020241523161530495, 0.07779920846223831, 0.08952271938323975, 0.0073190852999687195, 0.02372264862060547, 0.038144610822200775, 0.07446137070655823, 0.09413070231676102, 0.030171062797307968, 0.15217745304107666, 0.19177564978599548, 0.125013530254364, 0.1473270058631897, 0.20325084030628204, 0.10669662803411484, 0.07946557551622391, 0.027662983164191246, 0.09494684636592865, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.08316895365715027, 0.6715664267539978, 0.04549514129757881, 0.17856287956237793, 0.018127189949154854, 0.38010329008102417, 0.16956135630607605, 0.5726994872093201, 0.1473512202501297, 0.13756032288074493, 0.044131502509117126, 0.03872460126876831, 0.13646697998046875, 0.07963203638792038, 0.10255669057369232, 0.13806378841400146, 0.2514709234237671, 0.17176732420921326, 0.21858137845993042, 0.17882317304611206, 0.16198168694972992, 0.20351995527744293, 0.07158615440130234, 0.0266498401761055, 0.23213928937911987, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.0817432552576065, 0.2031053900718689, 0.02472570165991783, 0.02598942257463932, 0.05427335575222969, 0.43315476179122925, 0.06398319453001022, 0.14792829751968384, 0.18555517494678497, 0.020227503031492233, 0.03572608157992363, 0.008726409636437893, 0.33127138018608093, 0.0956021174788475, 0.032814960926771164, 0.17152094841003418, 0.15314172208309174, 0.15820659697055817, 0.19208288192749023, 0.19640566408634186, 0.061033159494400024, 0.12321671098470688, 0.07748300582170486, 0.07906179875135422, 0.032524362206459045, 0.08073069155216217, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.36652442812919617, 0.4977355897426605, 0.09286413341760635, 0.21385566890239716, 0.18058304488658905, 0.4562758207321167, 0.4738945960998535, 0.2067655473947525, 0.17124009132385254, 0.035114847123622894, 0.05785587430000305, 0.03289380669593811, 0.3892229497432709, 0.2459530532360077, 0.0885753259062767, 0.11935991793870926, 0.25889015197753906, 0.181893989443779, 0.2521744966506958, 0.2510518431663513, 0.1320696324110031, 0.17421388626098633, 0.10352174937725067, 0.13144756853580475, 0.06071629375219345, 0.07381404936313629, 0.11898738145828247, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.3338637053966522, 0.241106316447258, 0.10183558613061905, 0.16975384950637817, 0.22215212881565094, 0.1208982765674591, 0.12069278955459595, 0.027770178392529488, 0.12589573860168457, 0.018161755055189133, 0.05639319866895676, 0.024462532252073288, 0.08646970242261887, 0.18506868183612823, 0.2994369864463806, 0.11384479701519012, 0.12307179719209671, 0.17695116996765137, 0.21105043590068817, 0.2652710974216461, 0.1994313895702362, 0.5530626177787781, 0.33474239706993103, 0.11353342235088348, 0.20157715678215027, 0.12058570981025696, 0.02405776083469391, 0.20302970707416534, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.24999171495437622, 0.7484717965126038, 0.1908620148897171, 0.6611655354499817, 0.24442408978939056, 0.0825357735157013, 0.5622089505195618, 0.4391622543334961, 0.045715928077697754, 0.2250336855649948, 0.3067566156387329, 0.014471310190856457, 0.06388252228498459, 0.21674634516239166, 0.13583892583847046, 0.1661912202835083, 0.3088836967945099, 0.3049609959125519, 0.34614017605781555, 0.3287224769592285, 0.19484750926494598, 0.49978625774383545, 0.2471936047077179, 0.14924246072769165, 0.2264283001422882, 0.11719675362110138, 0.028577886521816254, 0.03125511854887009, 0.04683076590299606, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.05097173899412155, 0.16686855256557465, 0.15120531618595123, 0.3698476254940033, 0.35846272110939026, 0.6895467042922974, 0.8159933686256409, 0.843620777130127, 0.6904561519622803, 0.307870090007782, 0.450530469417572, 0.6275950074195862, 0.15986312925815582, 0.5293903350830078, 0.07888244837522507, 0.1382068395614624, 0.14312644302845, 0.15027517080307007, 0.2806132137775421, 0.10704077035188675, 0.15715429186820984, 0.3545873463153839, 0.2772214114665985, 0.11900671571493149, 0.16433128714561462, 0.08395379036664963, 0.0337035246193409, 0.08286106586456299, 0.029390821233391762, 0.07092607021331787, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.3532100319862366, 0.1141892597079277, 0.06207668036222458, 0.23437273502349854, 0.13035829365253448, 0.16457295417785645, 0.6610441207885742, 0.6354422569274902, 0.6703211069107056, 0.18266227841377258, 0.16635818779468536, 0.1048990935087204, 0.1468038111925125, 0.17976891994476318, 0.0709633082151413, 0.31265145540237427, 0.17018769681453705, 0.42172688245773315, 0.3373875319957733, 0.26503118872642517, 0.3668123483657837, 0.6080453991889954, 0.3421963155269623, 0.29850897192955017, 0.22005639970302582, 0.08626232296228409, 0.05660916119813919, 0.04967416450381279, 0.020023291930556297, 0.01626538299024105, 0.03365384787321091, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.18437133729457855, 0.20806346833705902, 0.06752406805753708, 0.15831130743026733, 0.3405534625053406, 0.0627271831035614, 0.3717433214187622, 0.3913803696632385, 0.5862330794334412, 0.29396724700927734, 0.02299528755247593, 0.060014016926288605, 0.08232607692480087, 0.15418194234371185, 0.15275102853775024, 0.11847452819347382, 0.5065410137176514, 0.4161456227302551, 0.44356557726860046, 0.358999639749527, 0.34202155470848083, 0.6410406231880188, 0.5693260431289673, 0.3344528377056122, 0.3382241725921631, 0.16963228583335876, 0.12081613391637802, 0.09492655098438263, 0.06781262904405594, 0.059771545231342316, 0.013083304278552532, 0.15846344828605652, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.07671413570642471, 0.17070698738098145, 0.13325846195220947, 0.07402658462524414, 0.6503690481185913, 0.1330946981906891, 0.165133535861969, 0.2397843301296234, 0.6370089054107666, 0.09848601371049881, 0.09929761290550232, 0.10903115570545197, 0.14141131937503815, 0.14783106744289398, 0.08112896233797073, 0.14143924415111542, 0.33810776472091675, 0.4273369610309601, 0.4442084729671478, 0.4867575168609619, 0.40271657705307007, 0.7919159531593323, 0.5796146988868713, 0.41502290964126587, 0.19611117243766785, 0.2659074366092682, 0.0590454526245594, 0.09533000737428665, 0.06579555571079254, 0.049002423882484436, 0.011413656175136566, 0.05989237129688263, 0.0694013461470604, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.1416744738817215, 0.274202436208725, 0.13295260071754456, 0.20105819404125214, 0.3945937156677246, 0.333781898021698, 0.3556738793849945, 0.2839928865432739, 0.10343024134635925, 0.07706140726804733, 0.054361648857593536, 0.05752982571721077, 0.2817353904247284, 0.27278265357017517, 0.13429909944534302, 0.06363721936941147, 0.3402014374732971, 0.30108359456062317, 0.3598821461200714, 0.356340229511261, 0.2955020070075989, 0.3913557827472687, 0.34592464566230774, 0.3881937265396118, 0.23078370094299316, 0.49122318625450134, 0.3432621657848358, 0.1563359946012497, 0.12668228149414062, 0.1534397453069687, 0.06296171993017197, 0.07472987473011017, 0.07419107109308243, 0.08810260146856308, NaN, NaN, NaN, NaN, NaN, NaN], [0.22879131138324738, 0.1777554452419281, 0.09183042496442795, 0.14726729691028595, 0.1873711347579956, 0.05672184377908707, 0.08326486498117447, 0.01781904511153698, 0.0835406556725502, 0.02614605240523815, 0.06876543164253235, 0.03439611196517944, 0.0621294341981411, 0.16512615978717804, 0.26481878757476807, 0.06025628373026848, 0.1445734202861786, 0.2208743691444397, 0.22917300462722778, 0.34805941581726074, 0.30598515272140503, 0.6932811141014099, 0.6030279994010925, 0.2491629421710968, 0.46458470821380615, 0.5228609442710876, 0.2136632800102234, 0.610046923160553, 0.25265923142433167, 0.14038830995559692, 0.07342293113470078, 0.22653138637542725, 0.10003089159727097, 0.02225746400654316, 0.14559555053710938, NaN, NaN, NaN, NaN, NaN], [0.1532706916332245, 0.5982866883277893, 0.18050755560398102, 0.5800401568412781, 0.22030943632125854, 0.025230426341295242, 0.3744361996650696, 0.265155166387558, 0.03173244372010231, 0.2068646252155304, 0.27338433265686035, 0.012270096689462662, 0.05047086998820305, 0.14277896285057068, 0.15170519053936005, 0.0902293398976326, 0.5066702961921692, 0.45472872257232666, 0.45485398173332214, 0.5058757662773132, 0.3594079613685608, 0.7028806209564209, 0.5180745720863342, 0.25713953375816345, 0.5372852683067322, 0.6213670372962952, 0.2659974694252014, 0.3181111812591553, 0.5259383916854858, 0.33730512857437134, 0.13441412150859833, 0.36266574263572693, 0.10496268421411514, 0.02362431399524212, 0.020191077142953873, 0.04590708762407303, NaN, NaN, NaN, NaN], [0.04688200727105141, 0.12437571585178375, 0.1870293915271759, 0.4533093273639679, 0.3565751910209656, 0.5648568868637085, 0.7852934002876282, 0.7657470703125, 0.5417794585227966, 0.4419334828853607, 0.632922887802124, 0.7103447914123535, 0.15686877071857452, 0.6169639825820923, 0.08483293652534485, 0.1059701219201088, 0.2303982675075531, 0.21762119233608246, 0.3580361306667328, 0.17096057534217834, 0.24843183159828186, 0.5131583213806152, 0.47260501980781555, 0.21650557219982147, 0.38561707735061646, 0.416827529668808, 0.1716565638780594, 0.3172723054885864, 0.29216328263282776, 0.47280052304267883, 0.38235870003700256, 0.1798420399427414, 0.1762932986021042, 0.04000748321413994, 0.08066289126873016, 0.03975420445203781, 0.08505715429782867, NaN, NaN, NaN], [0.2884610891342163, 0.10604135692119598, 0.07176870107650757, 0.2240629643201828, 0.12294583767652512, 0.10159854590892792, 0.6051279902458191, 0.5541971921920776, 0.5623130798339844, 0.16405576467514038, 0.18055777251720428, 0.13399486243724823, 0.12637703120708466, 0.18360036611557007, 0.09598042815923691, 0.2317487895488739, 0.2560827136039734, 0.5102789998054504, 0.4199059009552002, 0.44283756613731384, 0.5258800983428955, 0.732390284538269, 0.4491574466228485, 0.4244932234287262, 0.5298821926116943, 0.43037980794906616, 0.2800268232822418, 0.3093121647834778, 0.4250229299068451, 0.19317308068275452, 0.2640416920185089, 0.38813653588294983, 0.11181202530860901, 0.054203763604164124, 0.037284549325704575, 0.018739882856607437, 0.014264266937971115, 0.035236652940511703, NaN, NaN], [0.10626664012670517, 0.1478983461856842, 0.07806308567523956, 0.11814259737730026, 0.31690794229507446, 0.03372211009263992, 0.30042603611946106, 0.29277828335762024, 0.44479742646217346, 0.216581329703331, 0.023049354553222656, 0.0511498898267746, 0.08494822680950165, 0.14207273721694946, 0.16419102251529694, 0.08032029122114182, 0.6358892321586609, 0.5042787194252014, 0.5074477195739746, 0.5223307013511658, 0.5343775749206543, 0.703619122505188, 0.6657658815383911, 0.45647403597831726, 0.602655827999115, 0.5387927889823914, 0.39006462693214417, 0.39567169547080994, 0.43596506118774414, 0.41000646352767944, 0.269907683134079, 0.5412885546684265, 0.2038634866476059, 0.10306636989116669, 0.05501747503876686, 0.04515310004353523, 0.04695969074964523, 0.008877278305590153, 0.09985174983739853, NaN], [0.048457998782396317, 0.0638582855463028, 0.20956584811210632, 0.021124709397554398, 0.09014897048473358, 0.11662621796131134, 0.3483109474182129, 0.4503737986087799, 0.17136822640895844, 0.02997676283121109, 0.21708470582962036, 0.05856599286198616, 0.2859736979007721, 0.41663405299186707, 0.12262307107448578, 0.03129265457391739, 0.2636677324771881, 0.3672870099544525, 0.438161164522171, 0.7497870922088623, 0.43876102566719055, 0.6747432947158813, 0.5918557643890381, 0.5535795092582703, 0.7133825421333313, 0.7440239787101746, 0.3780657947063446, 0.4423457384109497, 0.6450315713882446, 0.5939705967903137, 0.7279283404350281, 0.4253756105899811, 0.4950290024280548, 0.13756991922855377, 0.08432447165250778, 0.11775307357311249, 0.12791647017002106, 0.07922011613845825, 0.04417572543025017, 0.3473970592021942]], [[0.1774463951587677, 0.26868411898612976, 0.03527391701936722, 0.01705012284219265, 0.00047759010340087116, 0.006241941824555397, 0.0031507122330367565, 0.2944689095020294, 0.038735195994377136, 0.003944840747863054, 0.004385389853268862, 0.004225992131978273, 0.03986744210124016, 0.00549504067748785, 0.07870971411466599, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.00027908835909329355, 0.005506355315446854, 0.001626787707209587, 0.13775548338890076, 0.0008261757320724428, 0.00028156363987363875, 0.0002459189563523978, 0.0025131029542535543, 0.0009445812902413309, 0.001017659087665379, 0.002250042976811528, 0.0015115974238142371, 0.0017954352078959346, 0.0006745054270140827, 0.21780018508434296, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.021244889125227928, 0.1178143173456192, 0.008956437930464745, 0.14321640133857727, 0.023635229095816612, 0.3068733811378479, 0.15845780074596405, 0.3092327415943146, 0.0024783278349786997, 0.06481246650218964, 0.008965774439275265, 0.019083118066191673, 0.04005150496959686, 0.01112168189138174, 0.19139143824577332, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.00042023108107969165, 0.0008873279439285398, 0.0019056870369240642, 0.007766622584313154, 0.23140135407447815, 0.5036463141441345, 0.015440672636032104, 0.008361338637769222, 0.001879698014818132, 0.0006688520661555231, 0.01133010908961296, 0.09722423553466797, 0.03314661607146263, 0.006971372757107019, 0.02285030484199524, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.002678314223885536, 0.004764833487570286, 0.0003137744788546115, 0.0006636036559939384, 0.07552827149629593, 0.36051952838897705, 0.21059149503707886, 0.11911091953516006, 0.00013829045929014683, 0.00018005385936703533, 0.00021675217431038618, 0.007453517522662878, 0.004449300933629274, 0.03708551451563835, 0.13281597197055817, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.008487393148243427, 0.014329447411000729, 0.005103611387312412, 0.0017902699764817953, 0.00018748251022771, 0.07080603390932083, 0.1865091174840927, 0.03389747440814972, 0.0026728338561952114, 0.00012369015894364566, 0.0001717496052151546, 0.0016556874616071582, 0.0035823825746774673, 0.018341869115829468, 0.2051384449005127, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0016413311241194606, 0.0038119314704090357, 0.0005628983490169048, 6.117233715485781e-05, 0.00011399950017221272, 0.0007454796577803791, 0.054881561547517776, 0.30246245861053467, 0.15667226910591125, 0.0004453254514373839, 0.0002609542279969901, 0.0001120980887208134, 0.0006856885738670826, 0.00573006272315979, 0.011146760545670986, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.001007524086162448, 0.0022212164476513863, 0.00036003260174766183, 2.8946307793376036e-05, 1.0167077562073246e-05, 0.00012231878645252436, 0.00022786400222685188, 0.03619853034615517, 0.005354967433959246, 0.003357505425810814, 0.0005030903848819435, 5.3131421736907214e-05, 4.2532476072665304e-05, 0.00010396525613032281, 0.2518664300441742, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.004948427900671959, 0.0037361346185207367, 0.0040338728576898575, 0.0015943445032462478, 3.9753424061927944e-05, 0.00016846440848894417, 0.00017597683472558856, 0.003258961718529463, 0.06328149139881134, 0.43567389249801636, 0.03252503648400307, 0.006277996581047773, 3.634384847828187e-05, 2.672040500328876e-05, 0.030029548332095146, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.00322673749178648, 0.017767680808901787, 0.0033617434091866016, 0.029219835996627808, 0.0009114073473028839, 0.002889687195420265, 0.00012576105655170977, 0.01574547402560711, 0.0018639388727024198, 0.6032934188842773, 0.1301620751619339, 0.04121570661664009, 0.0035096178762614727, 0.00032833084696903825, 0.3004224896430969, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.033899419009685516, 0.07324357330799103, 0.00985381193459034, 0.017461512237787247, 0.019165849313139915, 0.07006029784679413, 0.01799222268164158, 0.013579626567661762, 0.00021177329472266138, 0.026033537462353706, 0.13102787733078003, 0.2077469676733017, 0.7029638886451721, 0.029135672375559807, 0.05414650961756706, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0015424743760377169, 0.007544125430285931, 0.010602829977869987, 0.0016127177514135838, 0.006006686482578516, 0.08514653891324997, 0.003129118587821722, 0.0036380700767040253, 1.298951519856928e-05, 6.919799488969147e-05, 0.0003367147874087095, 0.031529009342193604, 0.36636054515838623, 0.21289798617362976, 0.04463290795683861, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.005653384607285261, 0.005221519153565168, 0.010438429191708565, 0.0023121859412640333, 0.0034771040081977844, 0.01156994141638279, 0.006321457680314779, 0.006196276750415564, 2.671167931111995e-05, 0.00012823205906897783, 0.00023895784397609532, 0.0015353390481323004, 0.06888392567634583, 0.3010466396808624, 0.05789510905742645, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0025978884659707546, 0.0011408268474042416, 0.0005907863960601389, 0.0073682027868926525, 5.514698841579957e-06, 0.0001586068101460114, 0.0016139426734298468, 0.002635698765516281, 2.2516995159094222e-05, 7.803570952091832e-06, 4.170422926108586e-06, 4.799172893399373e-05, 8.148160122800618e-05, 0.006126015912741423, 0.363029420375824, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.018444720655679703, 0.036891017109155655, 0.08301377296447754, 0.04485299810767174, 0.0371856652200222, 0.0472157783806324, 0.022677546367049217, 0.017107300460338593, 0.03217196837067604, 0.03369837626814842, 0.021089907735586166, 0.018274538218975067, 0.020997297018766403, 0.034321803599596024, 0.1648317128419876, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.01622859761118889, 0.0033176897559314966, 0.006228303536772728, 0.003451053285971284, 0.011415286920964718, 0.016942020505666733, 0.0027556640561670065, 0.001647507306188345, 0.0010015909792855382, 0.0013629572931677103, 0.004746851045638323, 0.009338179603219032, 0.00885467603802681, 0.006604180671274662, 0.16180677711963654, 0.125, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.17455320060253143, 0.026163265109062195, 0.2041780799627304, 0.027548620477318764, 0.4711945950984955, 0.5480062365531921, 0.10718726366758347, 0.032194506376981735, 0.08035919070243835, 0.010791448876261711, 0.11821587383747101, 0.04372825473546982, 0.5788823962211609, 0.10199426859617233, 0.06844703108072281, 0.13398022949695587, 0.051660239696502686, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.023936308920383453, 0.03560526669025421, 0.007881848141551018, 0.022994371131062508, 0.003501775674521923, 0.000663262908346951, 0.0027445319574326277, 0.0008202926255762577, 0.002215484855696559, 0.014335977844893932, 0.06139073148369789, 0.0039900378324091434, 0.004902976099401712, 0.006251698825508356, 0.21882350742816925, 0.14254364371299744, 0.023038247600197792, 0.14531654119491577, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.01501577626913786, 0.026870740577578545, 0.007700353395193815, 0.02517320215702057, 0.005199552513659, 0.0040618558414280415, 0.0018289085710421205, 0.0005822794046252966, 0.008953371085226536, 0.004845716059207916, 0.02605423890054226, 0.010851072147488594, 0.011600007303059101, 0.011058725416660309, 0.2679094076156616, 0.17795929312705994, 0.024941343814134598, 0.06730933487415314, 0.21388311684131622, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.05198093131184578, 0.026691097766160965, 0.04745011776685715, 0.02099662832915783, 0.007765383925288916, 0.0017653746763244271, 0.002459246199578047, 0.0005052239284850657, 0.0007161727407947183, 0.00449666241183877, 0.00950489193201065, 0.002728741616010666, 0.007593079470098019, 0.0031749741174280643, 0.1993207037448883, 0.09399491548538208, 0.3603954315185547, 0.2704434394836426, 0.1475897580385208, 0.18568314611911774, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.0031879025045782328, 0.001219254801981151, 0.007273980416357517, 0.0029734931886196136, 9.794573998078704e-05, 0.0006066279602237046, 0.000905939843505621, 0.0002116545947501436, 0.00022416051069740206, 0.001432110439054668, 0.00046862047747708857, 0.0008043517009355128, 0.00010411434050183743, 0.0003457288257777691, 0.22099417448043823, 0.14775781333446503, 0.19919507205486298, 0.14170727133750916, 0.05924544855952263, 0.05067846551537514, 0.45942243933677673, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.020157048478722572, 0.026601465418934822, 0.04540588706731796, 0.04344630241394043, 0.0022944926749914885, 0.0010618591913953424, 0.00406603142619133, 0.0029086798895150423, 0.0019963555969297886, 0.010005260817706585, 0.0020353682339191437, 0.0019374215044081211, 0.0013613863848149776, 0.001661884132772684, 0.34173521399497986, 0.14211317896842957, 0.055850330740213394, 0.31645503640174866, 0.16900919377803802, 0.038168299943208694, 0.07897188514471054, 0.2625669240951538, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.09776000678539276, 0.012011643499135971, 0.12930582463741302, 0.019725820049643517, 0.03450663015246391, 0.44516250491142273, 0.09379248321056366, 0.011904217302799225, 0.012111036106944084, 0.007218031212687492, 0.028761520981788635, 0.011232447810471058, 0.17035166919231415, 0.022308414801955223, 0.055901553481817245, 0.08848852664232254, 0.1616290658712387, 0.37575462460517883, 0.24721546471118927, 0.16591095924377441, 0.06889674067497253, 0.052010323852300644, 0.12634019553661346, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.0270126610994339, 0.0034831874072551727, 0.03977394104003906, 0.025583824142813683, 0.0007700100541114807, 0.002870001830160618, 0.0027750579174607992, 0.0016644555144011974, 0.0016086471732705832, 0.001177149242721498, 0.00746855279430747, 0.002065857872366905, 0.0016993783647194505, 0.0015537800500169396, 0.32808277010917664, 0.0747382640838623, 0.14914710819721222, 0.6135430335998535, 0.5929751992225647, 0.35069379210472107, 0.2108047604560852, 0.11502823978662491, 0.02365955151617527, 0.17759312689304352, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.16020068526268005, 0.019860466942191124, 0.3786206543445587, 0.04546584561467171, 0.22538548707962036, 0.035959187895059586, 0.022749971598386765, 0.0223965086042881, 0.010994979180395603, 0.013655508868396282, 0.08095952123403549, 0.07914181798696518, 0.5184871554374695, 0.24710357189178467, 0.059729527682065964, 0.02855301834642887, 0.21659326553344727, 0.4310435652732849, 0.40604472160339355, 0.3670090436935425, 0.48140615224838257, 0.27167943120002747, 0.09097199141979218, 0.1627163589000702, 0.1288144737482071, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.002354596508666873, 0.013563946820795536, 0.0012282072566449642, 0.0011236226418986917, 0.004269973374903202, 0.05393142253160477, 0.010044331662356853, 0.012847290374338627, 0.23206481337547302, 0.0042032524943351746, 0.002388538094237447, 0.005051162093877792, 0.004106870852410793, 0.003583247307687998, 0.0021634430158883333, 0.03365316241979599, 0.14809295535087585, 0.3644290566444397, 0.4046455919742584, 0.26744210720062256, 0.32108214497566223, 0.1678413599729538, 0.190241739153862, 0.22121649980545044, 0.03444775566458702, 0.46765974164009094, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.1318124532699585, 0.006612265948206186, 0.026151085272431374, 0.15551267564296722, 0.006537565030157566, 0.045402105897665024, 0.08115606755018234, 0.020273711532354355, 0.2617640495300293, 0.03846455365419388, 0.42425140738487244, 0.0063036843203008175, 0.045534029603004456, 0.06594183295965195, 0.0061628553085029125, 0.038216885179281235, 0.2552680969238281, 0.4071650505065918, 0.3936895430088043, 0.4416206479072571, 0.38015541434288025, 0.1657901555299759, 0.15260477364063263, 0.22771137952804565, 0.10614379495382309, 0.0724361315369606, 0.1760038137435913, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.0171976238489151, 0.0023818486370146275, 0.036466922610998154, 0.011855212040245533, 0.019672302529215813, 0.007386004086583853, 0.02982362173497677, 0.0045198979787528515, 0.02385052479803562, 0.25256073474884033, 0.2446560561656952, 0.0453505739569664, 0.08819476515054703, 0.09139581024646759, 0.0022182920947670937, 0.07068492472171783, 0.07818713039159775, 0.3302493095397949, 0.299561083316803, 0.46339741349220276, 0.48102065920829773, 0.15714748203754425, 0.27301517128944397, 0.38065311312675476, 0.19789563119411469, 0.11113718152046204, 0.05171056091785431, 0.13386131823062897, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.023948049172759056, 0.006307430099695921, 0.014840157702565193, 0.01758965104818344, 0.0009477039566263556, 0.00178795016836375, 0.005927308928221464, 0.0026511158794164658, 0.00012311375758145005, 0.04321818798780441, 0.0496363490819931, 0.3416200280189514, 0.001097637927159667, 0.007029203698039055, 0.007338459137827158, 0.05115865543484688, 0.44867002964019775, 0.49208834767341614, 0.477664977312088, 0.4642978608608246, 0.46059542894363403, 0.25649622082710266, 0.406831830739975, 0.27858051657676697, 0.2405669242143631, 0.11958811432123184, 0.1450459510087967, 0.0628136694431305, 0.09898709505796432, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.1633826345205307, 0.005062526557594538, 0.04231903329491615, 0.24309031665325165, 0.0009563505300320685, 0.0008045694557949901, 0.004994159564375877, 0.0011061460245400667, 0.0013372766552492976, 0.023061903193593025, 0.044598180800676346, 0.0017028035363182425, 2.3589664124301635e-05, 0.0003540365141816437, 0.16737498342990875, 0.04031704366207123, 0.6707005500793457, 0.529548704624176, 0.4586588144302368, 0.3106471002101898, 0.6713098287582397, 0.4458201229572296, 0.5507155060768127, 0.6255134344100952, 0.5032600164413452, 0.18919125199317932, 0.2968505918979645, 0.3902440667152405, 0.16804949939250946, 0.088200144469738, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.1106855720281601, 0.005593962036073208, 0.014953872188925743, 0.19064223766326904, 0.0008905718568712473, 0.002549833618104458, 0.019427485764026642, 0.019940704107284546, 0.0020017458591610193, 0.029780413955450058, 0.01774613931775093, 0.00061158457538113, 0.0022336822003126144, 0.007989613339304924, 0.2558586895465851, 0.13188821077346802, 0.1971314549446106, 0.3902590274810791, 0.4961083233356476, 0.37017205357551575, 0.46889960765838623, 0.2874276340007782, 0.1815745085477829, 0.39618349075317383, 0.17909032106399536, 0.26052209734916687, 0.13463276624679565, 0.11223814636468887, 0.05094114691019058, 0.030694767832756042, 0.23131275177001953, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.07112060487270355, 0.029737049713730812, 0.09336916357278824, 0.07307538390159607, 0.023197662085294724, 0.022866347804665565, 0.060328319668769836, 0.04474486783146858, 0.0006379868718795478, 0.027103934437036514, 0.2942929267883301, 0.011375843547284603, 0.07746338844299316, 0.09051978588104248, 0.11258094012737274, 0.029627619311213493, 0.0727827325463295, 0.2382729947566986, 0.16726669669151306, 0.3644602298736572, 0.47072863578796387, 0.2034798413515091, 0.1723088026046753, 0.43477845191955566, 0.18565386533737183, 0.3540991544723511, 0.2379947453737259, 0.07713616639375687, 0.19858470559120178, 0.17015229165554047, 0.0891638696193695, 0.22899208962917328, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.15941812098026276, 0.02997875213623047, 0.08360203355550766, 0.10365118086338043, 0.03050130233168602, 0.39312028884887695, 0.3065427839756012, 0.2912093997001648, 0.135236918926239, 0.18899840116500854, 0.13724294304847717, 0.1948302835226059, 0.07353706657886505, 0.12220755219459534, 0.10422825068235397, 0.01839388906955719, 0.10223808884620667, 0.244280606508255, 0.22035017609596252, 0.2828108072280884, 0.41914066672325134, 0.09010869264602661, 0.14338640868663788, 0.35142722725868225, 0.12073972821235657, 0.6723650693893433, 0.17433631420135498, 0.20010362565517426, 0.17566151916980743, 0.17214345932006836, 0.06743419170379639, 0.08234895765781403, 0.4274884760379791, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.24064786732196808, 0.0051915524527430534, 0.09652373939752579, 0.2287912219762802, 0.019215410575270653, 0.13947954773902893, 0.15343742072582245, 0.07055477797985077, 0.05467608571052551, 0.10673969984054565, 0.5659986138343811, 0.014077076688408852, 0.1709020584821701, 0.23944324254989624, 0.026877261698246002, 0.02117752842605114, 0.17625343799591064, 0.2448491007089615, 0.23410049080848694, 0.3357784152030945, 0.2992798388004303, 0.09099920094013214, 0.1110134869813919, 0.20308172702789307, 0.1763213574886322, 0.1646280288696289, 0.23259523510932922, 0.3615821301937103, 0.32664546370506287, 0.296549916267395, 0.2726198732852936, 0.07387500256299973, 0.07587912678718567, 0.14093360304832458, NaN, NaN, NaN, NaN, NaN, NaN], [0.019817974418401718, 0.002034382661804557, 0.04978875443339348, 0.009913384914398193, 0.033772312104701996, 0.0069160182029008865, 0.027356693521142006, 0.004301261156797409, 0.005268980748951435, 0.24062182009220123, 0.2975090742111206, 0.09841412305831909, 0.13523375988006592, 0.1965852826833725, 0.004198803100734949, 0.05486638844013214, 0.06597498804330826, 0.2194771021604538, 0.1927901804447174, 0.37433308362960815, 0.412477970123291, 0.07100911438465118, 0.1499587744474411, 0.3056679368019104, 0.16932857036590576, 0.15193165838718414, 0.19111526012420654, 0.291239857673645, 0.37710845470428467, 0.510109543800354, 0.47089657187461853, 0.17204606533050537, 0.09759342670440674, 0.05198577418923378, 0.1557197868824005, NaN, NaN, NaN, NaN, NaN], [0.017094334587454796, 0.005556214600801468, 0.011722622439265251, 0.009952181950211525, 0.0008346029790118337, 0.0009373819339089096, 0.006794091779738665, 0.0019291864009574056, 4.7701923904241994e-05, 0.0364256277680397, 0.035398196429014206, 0.3890627920627594, 0.0013647697633132339, 0.008012092672288418, 0.013173048384487629, 0.03942986950278282, 0.2940163016319275, 0.3192412853240967, 0.3550935387611389, 0.28974649310112, 0.35144588351249695, 0.111830934882164, 0.2212614268064499, 0.1942923218011856, 0.16557106375694275, 0.12293191254138947, 0.3516637980937958, 0.22679129242897034, 0.3504909574985504, 0.4427362084388733, 0.6422855854034424, 0.29741936922073364, 0.17250965535640717, 0.13341550529003143, 0.05469499155879021, 0.0792233869433403, NaN, NaN, NaN, NaN], [0.12328237295150757, 0.0036286553367972374, 0.03202027454972267, 0.16562366485595703, 0.0006255045300349593, 0.00061140360776335, 0.00499368691816926, 0.0010923785157501698, 0.0008833102765493095, 0.03177933022379875, 0.04344986379146576, 0.00255553494207561, 2.260845576529391e-05, 0.0005036385264247656, 0.16160868108272552, 0.03949292004108429, 0.6095755696296692, 0.4376317858695984, 0.4024345874786377, 0.24819140136241913, 0.555855929851532, 0.2881583273410797, 0.40402302145957947, 0.5775710940361023, 0.42070186138153076, 0.22824901342391968, 0.4547353982925415, 0.567461371421814, 0.5762937664985657, 0.33163049817085266, 0.41951635479927063, 0.37286072969436646, 0.25620296597480774, 0.25266289710998535, 0.3395143151283264, 0.13239842653274536, 0.07333662360906601, NaN, NaN, NaN], [0.050196755677461624, 0.002699600299820304, 0.009293685667216778, 0.06999042630195618, 0.0006182404467836022, 0.0013977399794384837, 0.014421526342630386, 0.010930507443845272, 0.0008620836888439953, 0.015927143394947052, 0.008692404255270958, 0.0006625624373555183, 0.0011245491914451122, 0.0053406055085361, 0.2061784416437149, 0.11607979983091354, 0.18507249653339386, 0.30528268218040466, 0.41669708490371704, 0.22673273086547852, 0.3321194052696228, 0.17922396957874298, 0.1181870847940445, 0.299829363822937, 0.11785572022199631, 0.23005077242851257, 0.1731709986925125, 0.17971253395080566, 0.2448451966047287, 0.15796169638633728, 0.701153576374054, 0.1659945547580719, 0.4861533045768738, 0.20215842127799988, 0.13506482541561127, 0.058445703238248825, 0.03114200383424759, 0.21790345013141632, NaN, NaN], [0.04101766273379326, 0.020672734826803207, 0.08772061765193939, 0.04009746387600899, 0.01892852783203125, 0.017910925671458244, 0.057973578572273254, 0.03737492114305496, 0.00047206622548401356, 0.021084431558847427, 0.21054430305957794, 0.013546224683523178, 0.08985017240047455, 0.10610225051641464, 0.1389981210231781, 0.017429474741220474, 0.04190561920404434, 0.14842365682125092, 0.09654705971479416, 0.16489917039871216, 0.24686570465564728, 0.09686223417520523, 0.09368213266134262, 0.2918589413166046, 0.08991989493370056, 0.18521137535572052, 0.19666530191898346, 0.06316249072551727, 0.222347229719162, 0.3215444087982178, 0.3288835287094116, 0.38603323698043823, 0.4142700135707855, 0.25910744071006775, 0.0714699923992157, 0.2130158245563507, 0.1895158588886261, 0.07420682162046432, 0.2235250473022461, NaN], [0.018278781324625015, 0.03789714351296425, 0.00408195098862052, 0.005283118225634098, 0.009515376761555672, 0.11360906809568405, 0.008760524913668633, 0.006613489706069231, 0.018946174532175064, 0.008831392042338848, 0.015675490722060204, 0.021136337891221046, 0.13481837511062622, 0.08728663623332977, 0.15406787395477295, 0.011625233106315136, 0.13701221346855164, 0.3079974055290222, 0.17742200195789337, 0.10538481175899506, 0.17213597893714905, 0.08605048805475235, 0.13507568836212158, 0.2275547832250595, 0.07923908531665802, 0.07705283164978027, 0.2479921281337738, 0.3453103303909302, 0.2883259654045105, 0.36409828066825867, 0.18068012595176697, 0.4896908700466156, 0.399289608001709, 0.5261627435684204, 0.6339481472969055, 0.6382991671562195, 0.5417840480804443, 0.2542280852794647, 0.330732524394989, 0.21995915472507477]], [[0.2133164256811142, 0.025492815300822258, 0.20653849840164185, 0.07043907791376114, 0.10411863774061203, 0.3043566346168518, 0.06760577112436295, 0.5064103603363037, 0.08081910014152527, 0.27507925033569336, 0.5432406663894653, 0.27881479263305664, 0.16320040822029114, 0.2653813064098358, 0.11116068065166473, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.015402763150632381, 0.2444494515657425, 0.0030522451270371675, 0.00048490799963474274, 0.0026600188575685024, 0.06905494630336761, 0.012269481085240841, 0.014592616818845272, 0.004205085337162018, 0.0039128707721829414, 0.0037959537003189325, 0.012499181553721428, 0.02713301219046116, 0.00563135975971818, 0.19437076151371002, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.04805738478899002, 0.007929358631372452, 0.4969516396522522, 0.08109094947576523, 0.008613435551524162, 0.06128339096903801, 0.020970679819583893, 0.014624540694057941, 0.001800250494852662, 0.04372387006878853, 0.036881472915410995, 0.022519467398524284, 0.032134752720594406, 0.17586740851402283, 0.15428785979747772, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.021660206839442253, 0.06483402103185654, 0.07990853488445282, 0.8655576705932617, 0.10770212858915329, 0.042777951806783676, 0.004243527539074421, 0.04141073673963547, 0.0011197980493307114, 0.0010354480473324656, 0.007620980031788349, 0.009411019273102283, 0.023886993527412415, 0.8532692193984985, 0.009252375923097134, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.03802541270852089, 0.5626884698867798, 0.3869370222091675, 0.012873617932200432, 0.11968709528446198, 0.014900745823979378, 0.02957817167043686, 0.018288375809788704, 0.005979553796350956, 0.03379013389348984, 0.016338851302862167, 0.01766209304332733, 0.8086205720901489, 0.08052025735378265, 0.13067808747291565, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0663566142320633, 0.02082742564380169, 0.009716741740703583, 0.003548208624124527, 0.0008020728128030896, 0.4547119140625, 0.03523911535739899, 0.0031006578356027603, 0.006736437324434519, 0.0009184986702166498, 0.0011584048625081778, 0.04212343320250511, 0.019468490034341812, 0.001240313402377069, 0.20631356537342072, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.004470710642635822, 0.02006937935948372, 0.020011691376566887, 0.019766854122281075, 0.12330501526594162, 0.15558527410030365, 0.04160740226507187, 0.1780312955379486, 0.014384130015969276, 0.005233153235167265, 0.004123131278902292, 0.05227937176823616, 0.013469746336340904, 0.022578507661819458, 0.07922197878360748, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.17898443341255188, 0.006772744003683329, 0.041487641632556915, 0.009575014933943748, 0.016729410737752914, 0.2668032944202423, 0.12321095168590546, 0.6781973838806152, 0.0025635806377977133, 0.01087682880461216, 0.002732365159317851, 0.020299792289733887, 0.0031363710295408964, 0.0008204782498069108, 0.05180227383971214, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.12461799383163452, 0.013122161850333214, 0.02311752177774906, 0.0762406587600708, 0.09383975714445114, 0.007501720450818539, 0.07133012264966965, 0.008159258402884007, 0.13900579512119293, 0.006521029397845268, 0.021471921354532242, 0.012502939440310001, 0.0014349960256367922, 0.011674328707158566, 0.3848530650138855, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.014992507174611092, 0.010756749659776688, 0.10129547864198685, 0.15213072299957275, 0.1363232582807541, 0.16603931784629822, 0.0040587568655610085, 0.505429208278656, 0.0025213102344423532, 0.05678342655301094, 0.20746274292469025, 0.04314066469669342, 0.0019582516979426146, 0.01985819824039936, 0.18090446293354034, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.11427638679742813, 0.0123747568577528, 0.020808644592761993, 0.1336503028869629, 0.008563186042010784, 0.09643486887216568, 0.15193390846252441, 0.050255559384822845, 0.0023536821827292442, 0.3208443820476532, 0.021319447085261345, 0.003293143818154931, 0.027340535074472427, 0.01197835523635149, 0.09007034450769424, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.15923485159873962, 0.11477550864219666, 0.21969333291053772, 0.09681756794452667, 0.07061057537794113, 0.1670638769865036, 0.1398637294769287, 0.059452954679727554, 0.00850652251392603, 0.062244825065135956, 0.03212086483836174, 0.10482167452573776, 0.05658517777919769, 0.03675027936697006, 0.24718202650547028, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.004966236650943756, 0.001515651005320251, 0.002549123717471957, 0.006106496322900057, 0.00036676786839962006, 0.0014838402858003974, 0.008350875228643417, 0.003760475432500243, 9.004020830616355e-05, 0.003012964967638254, 0.000879374798387289, 0.0023141989950090647, 0.5349817276000977, 0.00013737898552790284, 0.18041089177131653, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [3.0577066354453564e-05, 0.00011073229688918218, 0.0002722943318076432, 0.00012968607188668102, 3.925479541067034e-05, 9.284611587645486e-05, 1.1375399481039494e-05, 0.00013649655738845468, 2.160583608201705e-05, 3.872126853821101e-06, 4.776401965500554e-06, 5.892393892281689e-05, 0.3018791675567627, 0.0016873051645234227, 0.00020723984926007688, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0053407615050673485, 0.002270790981128812, 0.015077341347932816, 0.008943013846874237, 0.01947944425046444, 0.013856526464223862, 0.021029049530625343, 0.011522401124238968, 0.019980257377028465, 0.021877266466617584, 0.03018842823803425, 0.06539047509431839, 0.04945596680045128, 0.008784771896898746, 0.1688213050365448, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.05651351809501648, 0.11774645000696182, 0.026926513761281967, 0.04848615080118179, 0.10334916412830353, 0.4247743785381317, 0.21147629618644714, 0.6254463195800781, 0.10587190836668015, 0.08194849640130997, 0.04674661532044411, 0.35135090351104736, 0.35409873723983765, 0.43208518624305725, 0.11939813196659088, 0.125, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.05609016492962837, 0.06931670010089874, 0.1576625108718872, 0.27308744192123413, 0.04202406853437424, 0.2399596869945526, 0.3320065140724182, 0.6272499561309814, 0.09423039108514786, 0.144412100315094, 0.2769482433795929, 0.05643320456147194, 0.11388154327869415, 0.32551372051239014, 0.13187405467033386, 0.04915444552898407, 0.7444152235984802, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.1798395812511444, 0.02382134646177292, 0.024498937651515007, 0.28730508685112, 0.19651466608047485, 0.13693250715732574, 0.34929007291793823, 0.1055094301700592, 0.08990196883678436, 0.5189381837844849, 0.3313819468021393, 0.34343984723091125, 0.21719343960285187, 0.21188895404338837, 0.15588119626045227, 0.10270431637763977, 0.20103313028812408, 0.23083212971687317, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.26584357023239136, 0.03035559318959713, 0.026536965742707253, 0.20298171043395996, 0.23938016593456268, 0.24181482195854187, 0.31930428743362427, 0.10626629739999771, 0.13103167712688446, 0.4636806845664978, 0.393515944480896, 0.3422740399837494, 0.342117577791214, 0.5495904088020325, 0.14030353724956512, 0.1558120846748352, 0.09243088960647583, 0.02280065417289734, 0.32627996802330017, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.30834218859672546, 0.3875667452812195, 0.32842832803726196, 0.16462059319019318, 0.416511207818985, 0.03730625659227371, 0.23662680387496948, 0.5092235207557678, 0.08549848943948746, 0.3278381824493408, 0.507111668586731, 0.0415511280298233, 0.5590415596961975, 0.6185146570205688, 0.0664283037185669, 0.1265193670988083, 0.1639627069234848, 0.12297425419092178, 0.08557231724262238, 0.1833999902009964, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.0765935555100441, 0.29552146792411804, 0.05705742537975311, 0.01913047581911087, 0.15779250860214233, 0.030224651098251343, 0.08988720178604126, 0.3389361500740051, 0.08153010904788971, 0.05811480060219765, 0.09408371150493622, 0.19600677490234375, 0.6126919388771057, 0.623294472694397, 0.13969288766384125, 0.11118379235267639, 0.23907560110092163, 0.16732671856880188, 0.1982172429561615, 0.02825341187417507, 0.15412425994873047, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.4304950535297394, 0.5688965320587158, 0.09143517911434174, 0.09618712961673737, 0.13307496905326843, 0.014428870752453804, 0.040250685065984726, 0.15830516815185547, 0.10923942923545837, 0.23653797805309296, 0.3180045783519745, 0.5594316720962524, 0.5058388710021973, 0.3866141140460968, 0.14058275520801544, 0.06564534455537796, 0.4107542335987091, 0.09891282767057419, 0.3507450222969055, 0.0021941487211734056, 0.004341787192970514, 0.11288701742887497, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.31169822812080383, 0.7707167863845825, 0.30778199434280396, 0.10994993895292282, 0.18047340214252472, 0.01769133098423481, 0.014783667400479317, 0.009741406887769699, 0.1340220719575882, 0.11223828792572021, 0.46960482001304626, 0.360332190990448, 0.56731116771698, 0.5470200181007385, 0.18929171562194824, 0.09254656732082367, 0.17870496213436127, 0.11882538348436356, 0.2565489113330841, 0.06709786504507065, 0.020701991394162178, 0.05621851608157158, 0.571487307548523, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.2397254854440689, 0.361926406621933, 0.24345533549785614, 0.18179422616958618, 0.10373111069202423, 0.014045567251741886, 0.08654272556304932, 0.018043776974081993, 0.02193235233426094, 0.07134812325239182, 0.19312754273414612, 0.6192790865898132, 0.6039608716964722, 0.673239529132843, 0.15608295798301697, 0.12130707502365112, 0.06869146227836609, 0.052872415632009506, 0.07373122870922089, 0.03967232629656792, 0.019552208483219147, 0.024196362122893333, 0.1570335328578949, 0.3329051434993744, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.32110491394996643, 0.2706402838230133, 0.034645695239305496, 0.029830342158675194, 0.00933478306978941, 0.25964564085006714, 0.17791348695755005, 0.11580535769462585, 0.07073061913251877, 0.10197918862104416, 0.06440304219722748, 0.2378954440355301, 0.09358810633420944, 0.24307624995708466, 0.22625915706157684, 0.12370187789201736, 0.027735348790884018, 0.007442266680300236, 0.018701551482081413, 0.04923407360911369, 0.022976329550147057, 0.06834850460290909, 0.13354788720607758, 0.13089321553707123, 0.41554775834083557, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.18688960373401642, 0.6521251797676086, 0.05505351349711418, 0.05518023297190666, 0.07190049439668655, 0.15721110999584198, 0.11867944896221161, 0.2974295914173126, 0.018550140783190727, 0.1645369827747345, 0.09910324215888977, 0.499615877866745, 0.34706613421440125, 0.5406060218811035, 0.24014075100421906, 0.08012630045413971, 0.020899765193462372, 0.032236725091934204, 0.011631320230662823, 0.1322554349899292, 0.13739252090454102, 0.3272823691368103, 0.10228703171014786, 0.16136890649795532, 0.12631160020828247, 0.3315902352333069, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.24844318628311157, 0.24823600053787231, 0.41713690757751465, 0.05438315495848656, 0.5823535323143005, 0.1801777333021164, 0.13823869824409485, 0.16278210282325745, 0.035736992955207825, 0.017554355785250664, 0.03778500482439995, 0.09959819167852402, 0.18642207980155945, 0.26950401067733765, 0.24913227558135986, 0.07002493739128113, 0.03239390626549721, 0.05209453031420708, 0.033656563609838486, 0.10301846265792847, 0.08080227673053741, 0.10908480733633041, 0.10694557428359985, 0.2992934286594391, 0.26628223061561584, 0.1579413264989853, 0.18216297030448914, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.21744470298290253, 0.04392259195446968, 0.5108200907707214, 0.27167755365371704, 0.5572997331619263, 0.30860280990600586, 0.5083038210868835, 0.6815038919448853, 0.3754148483276367, 0.01992654800415039, 0.0589066781103611, 0.07934294641017914, 0.15649113059043884, 0.3772245943546295, 0.25267744064331055, 0.23901967704296112, 0.02059122547507286, 0.03393668681383133, 0.04736512154340744, 0.05927135422825813, 0.02361929975450039, 0.006761881057173014, 0.05556455999612808, 0.1379650980234146, 0.12424714863300323, 0.191926509141922, 0.01547694206237793, 0.05743350088596344, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.11088164150714874, 0.06568774580955505, 0.49295517802238464, 0.06175035238265991, 0.3928946256637573, 0.306259423494339, 0.1265336275100708, 0.29877781867980957, 0.061930101364851, 0.053618840873241425, 0.02546272985637188, 0.011733881197869778, 0.4200928509235382, 0.25557151436805725, 0.12701815366744995, 0.0662187710404396, 0.02669837884604931, 0.008789082989096642, 0.004751283209770918, 0.0528719425201416, 0.011242655105888844, 0.018989307805895805, 0.07620660215616226, 0.012969521805644035, 0.039284493774175644, 0.22954939305782318, 0.04563957825303078, 0.029234008863568306, 0.7488549947738647, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.06005493924021721, 0.46575742959976196, 0.4922090172767639, 0.06956527382135391, 0.3788193464279175, 0.21330630779266357, 0.06565267592668533, 0.10461793839931488, 0.1200915202498436, 0.07597928494215012, 0.08451344817876816, 0.06952610611915588, 0.03487509861588478, 0.12158560007810593, 0.14820002019405365, 0.10826153308153152, 0.014460555277764797, 0.0725417360663414, 0.03217141702771187, 0.06698039174079895, 0.08051858842372894, 0.05872708931565285, 0.022866755723953247, 0.06705553829669952, 0.07034263759851456, 0.3507814407348633, 0.05356235057115555, 0.08709309250116348, 0.23604632914066315, 0.324868768453598, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.11028759926557541, 0.4027779996395111, 0.8237467408180237, 0.1328621804714203, 0.7811888456344604, 0.5416622757911682, 0.16887041926383972, 0.2001309096813202, 0.08848496526479721, 0.05607001483440399, 0.13165172934532166, 0.10739479213953018, 0.052385441958904266, 0.05461856350302696, 0.16259506344795227, 0.13878783583641052, 0.02536645717918873, 0.06943535804748535, 0.05891912057995796, 0.006977759767323732, 0.003910682164132595, 0.004916978534311056, 0.04463541880249977, 0.07985055446624756, 0.07872368395328522, 0.291103333234787, 0.21302121877670288, 0.16995804011821747, 0.19893744587898254, 0.01890285685658455, 0.3838881254196167, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.12960980832576752, 0.21605639159679413, 0.13754284381866455, 0.0687912181019783, 0.2001095861196518, 0.7652902007102966, 0.3308810591697693, 0.3389359712600708, 0.07430214434862137, 0.036511119455099106, 0.010612682439386845, 0.005050503648817539, 0.1584991067647934, 0.036481909453868866, 0.18724960088729858, 0.04579493775963783, 0.04550570994615555, 0.013287660665810108, 0.023886512964963913, 0.024052713066339493, 0.017023656517267227, 0.04836693033576012, 0.030526861548423767, 0.017645621672272682, 0.03170713782310486, 0.09266000241041183, 0.23106807470321655, 0.03557471185922623, 0.12432269752025604, 0.10334902256727219, 0.3233395516872406, 0.3770029842853546, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.16838932037353516, 0.47491130232810974, 0.21776747703552246, 0.05912807583808899, 0.16565343737602234, 0.34125030040740967, 0.2414778620004654, 0.28169524669647217, 0.03973108157515526, 0.03921183571219444, 0.02238578163087368, 0.02449338510632515, 0.05498792976140976, 0.03159895911812782, 0.17659053206443787, 0.0394071489572525, 0.011173942126333714, 0.019201254472136497, 0.012027204036712646, 0.1043756976723671, 0.09629304707050323, 0.044260744005441666, 0.010774374939501286, 0.027033720165491104, 0.01529898401349783, 0.004158060997724533, 0.03471178933978081, 0.3574643135070801, 0.04469288885593414, 0.27014297246932983, 0.10925178974866867, 0.34427598118782043, 0.2875407040119171, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.14295107126235962, 0.27777984738349915, 0.30436068773269653, 0.03198731318116188, 0.38494178652763367, 0.27411460876464844, 0.18790900707244873, 0.29966217279434204, 0.029011890292167664, 0.012050352990627289, 0.008839968591928482, 0.009298003278672695, 0.09229473769664764, 0.05935056507587433, 0.2074589878320694, 0.08343059569597244, 0.043180350214242935, 0.0767669752240181, 0.06360654532909393, 0.1271795630455017, 0.0800960585474968, 0.06889919936656952, 0.05648425221443176, 0.1521727591753006, 0.09240606427192688, 0.03566697984933853, 0.03560119867324829, 0.1492718607187271, 0.18653850257396698, 0.3474813401699066, 0.3278762698173523, 0.10706853121519089, 0.127774178981781, 0.1299499273300171, NaN, NaN, NaN, NaN, NaN, NaN], [0.185210719704628, 0.0802093893289566, 0.4863169491291046, 0.24164138734340668, 0.5185936689376831, 0.381059467792511, 0.5372542142868042, 0.6922534108161926, 0.40473121404647827, 0.015452258288860321, 0.03550630062818527, 0.023993153125047684, 0.09803077578544617, 0.14391310513019562, 0.25199130177497864, 0.23721955716609955, 0.02343675307929516, 0.03610215708613396, 0.05973569303750992, 0.07488072663545609, 0.026813305914402008, 0.0050082337111234665, 0.03149579092860222, 0.06251367926597595, 0.02305557392537594, 0.025774041190743446, 0.007636546157300472, 0.004965651780366898, 0.09922869503498077, 0.133448526263237, 0.1956746131181717, 0.04676169902086258, 0.27956491708755493, 0.021136147901415825, 0.057313986122608185, NaN, NaN, NaN, NaN, NaN], [0.08245678246021271, 0.1390499472618103, 0.5461503863334656, 0.060220371931791306, 0.43899697065353394, 0.5144884586334229, 0.22183947265148163, 0.5088672041893005, 0.09321429580450058, 0.05354699492454529, 0.02214067056775093, 0.004303250927478075, 0.39110496640205383, 0.12463895231485367, 0.1568218618631363, 0.0697786882519722, 0.028010839596390724, 0.012634677812457085, 0.007894599810242653, 0.0697624459862709, 0.015741104260087013, 0.01737123914062977, 0.05471426621079445, 0.0063003492541611195, 0.009287585504353046, 0.02825707383453846, 0.016440505161881447, 0.0038715004920959473, 0.07019948214292526, 0.02518516778945923, 0.041359793394804, 0.06545242667198181, 0.29174378514289856, 0.05010553449392319, 0.020036837086081505, 0.7549301981925964, NaN, NaN, NaN, NaN], [0.043030936270952225, 0.498334676027298, 0.5084810853004456, 0.06107298657298088, 0.3904430866241455, 0.35258427262306213, 0.08483341336250305, 0.17738159000873566, 0.1815967708826065, 0.09597334265708923, 0.08432064205408096, 0.040181081742048264, 0.02593160979449749, 0.08670566976070404, 0.14764654636383057, 0.12042609602212906, 0.016146911308169365, 0.09666067361831665, 0.04101520776748657, 0.09386932849884033, 0.11830881983041763, 0.08227012306451797, 0.02001151442527771, 0.0443122573196888, 0.028465820476412773, 0.11253371834754944, 0.02299223281443119, 0.013287386856973171, 0.043506089597940445, 0.09705191105604172, 0.08899306505918503, 0.14267200231552124, 0.1414598524570465, 0.04555709660053253, 0.08242949843406677, 0.2358742356300354, 0.30384859442710876, NaN, NaN, NaN], [0.0785449668765068, 0.4015392065048218, 0.8182658553123474, 0.10243776440620422, 0.7659414410591125, 0.5735372304916382, 0.16621330380439758, 0.21339072287082672, 0.12523002922534943, 0.05685745179653168, 0.1081186980009079, 0.07184037566184998, 0.02847907319664955, 0.031456008553504944, 0.15293413400650024, 0.14026813209056854, 0.02709769457578659, 0.07936792075634003, 0.07383942604064941, 0.01026969589293003, 0.007506935391575098, 0.01013263501226902, 0.043357811868190765, 0.054843299090862274, 0.032377004623413086, 0.07885654270648956, 0.05951513722538948, 0.021026868373155594, 0.029062975198030472, 0.004067933652549982, 0.00896876398473978, 0.031901001930236816, 0.2457016408443451, 0.1949184089899063, 0.16180625557899475, 0.23649972677230835, 0.020314330235123634, 0.390868216753006, NaN, NaN], [0.07311940938234329, 0.15430475771427155, 0.1386927217245102, 0.04823235049843788, 0.20945730805397034, 0.8191487193107605, 0.33371293544769287, 0.3618466258049011, 0.1152336597442627, 0.031010858714580536, 0.008395140990614891, 0.002998974174261093, 0.13362915813922882, 0.02411211095750332, 0.1613900512456894, 0.036581799387931824, 0.048626694828271866, 0.015552042052149773, 0.027681825682520866, 0.03610476478934288, 0.033903565257787704, 0.10816461592912674, 0.038128215819597244, 0.015381437726318836, 0.020138615742325783, 0.04596110060811043, 0.12391334027051926, 0.008882056921720505, 0.017164889723062515, 0.019657107070088387, 0.039318498224020004, 0.012226631864905357, 0.12883862853050232, 0.2578184902667999, 0.03228205814957619, 0.13855229318141937, 0.08962707966566086, 0.32015570998191833, 0.32621434330940247, NaN], [0.2622520923614502, 0.7386532425880432, 0.41215938329696655, 0.08539438247680664, 0.7665934562683105, 0.5218235850334167, 0.42940571904182434, 0.4037780165672302, 0.7456067204475403, 0.07961834967136383, 0.02781907096505165, 0.02608557976782322, 0.15701159834861755, 0.05025498941540718, 0.11428551375865936, 0.16620944440364838, 0.03880922496318817, 0.027515552937984467, 0.018877340480685234, 0.019147777929902077, 0.2389368712902069, 0.02623477764427662, 0.012871777638792992, 0.013969821855425835, 0.021991701796650887, 0.0026013199239969254, 0.00741098215803504, 0.01774594374001026, 0.003101027337834239, 0.007316285278648138, 0.009464021772146225, 0.007634901907294989, 0.005969886668026447, 0.011287253350019455, 0.04429420828819275, 0.016200777143239975, 0.03440575301647186, 0.14183124899864197, 0.1436305195093155, 0.03402799740433693]], [[0.09667091816663742, 0.08969368785619736, 0.16646768152713776, 0.01428181305527687, 0.1262292116880417, 0.03015410713851452, 0.00857650488615036, 0.013287652283906937, 0.013465571217238903, 0.009945754893124104, 0.03584994748234749, 0.07976501435041428, 0.013894102536141872, 0.07191513478755951, 0.16682514548301697, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.00307486648671329, 0.2169581949710846, 0.015313946641981602, 0.005070009268820286, 0.13766343891620636, 0.036365993320941925, 0.013734312728047371, 0.012890451587736607, 0.00037508379318751395, 0.002069024136289954, 0.0038654597010463476, 0.007793853525072336, 0.006365353707224131, 0.02897111512720585, 0.19472798705101013, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.013033762574195862, 0.0016745100729167461, 0.09789733588695526, 0.11557573825120926, 0.070904940366745, 0.039959780871868134, 0.06112189590930939, 0.005926545709371567, 0.05931684747338295, 0.06562750041484833, 0.015556245110929012, 0.2949027419090271, 0.09280899167060852, 0.18960142135620117, 0.2321171909570694, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0009253448224626482, 0.0011463494738563895, 0.0022407870274037123, 0.022192178294062614, 0.18083734810352325, 0.18906380236148834, 0.06340676546096802, 0.5556718111038208, 0.008876022882759571, 0.00195835973136127, 0.009641225449740887, 0.13488754630088806, 0.03692271187901497, 0.0069083282724022865, 0.19416382908821106, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.020195724442601204, 0.0026999269612133503, 0.0047158133238554, 0.017117822542786598, 0.22690622508525848, 0.009801734238862991, 0.18513473868370056, 0.000916039280127734, 0.006044555455446243, 0.006021710112690926, 0.010346228256821632, 0.04500352963805199, 0.008295656181871891, 0.1122727021574974, 0.4271945357322693, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.02983868308365345, 0.03651329129934311, 0.005064305383712053, 0.00043434457620605826, 0.001774297677911818, 0.10316617041826248, 0.10274261981248856, 0.570116400718689, 0.0018607155652716756, 0.004884766880422831, 0.0001192242925753817, 0.01004798710346222, 0.011760696768760681, 0.020220324397087097, 0.036799319088459015, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.020830435678362846, 0.04066089913249016, 0.01340602245181799, 0.0007146665593609214, 0.05329689383506775, 0.010700137354433537, 0.06310626864433289, 0.1416247934103012, 0.059007443487644196, 0.009734428487718105, 0.023192377761006355, 0.030464952811598778, 0.011454294435679913, 0.06458231806755066, 0.29838618636131287, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.04047420993447304, 0.05575861781835556, 0.0035385461524128914, 0.00047053993330337107, 0.010776028037071228, 0.0002634078555274755, 0.006466362159699202, 0.09768779575824738, 0.011305907741189003, 0.6455902457237244, 0.005685864482074976, 0.009437574073672295, 0.0014128481270745397, 0.0036261524073779583, 0.1994941532611847, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.001968077849596739, 0.00013096239126753062, 0.014192181639373302, 0.0025808673817664385, 1.1752749742299784e-05, 7.090794679243118e-05, 8.489128958899528e-05, 7.501097570639104e-05, 0.005588378757238388, 0.00024033378576859832, 0.7911840081214905, 0.0006417080294340849, 0.00012212486763019115, 0.0026151463389396667, 0.024830428883433342, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.007711799815297127, 0.006852409336715937, 0.005409319419413805, 0.029324712231755257, 0.0012151957489550114, 0.0014427780406549573, 0.0002848623844329268, 0.0011284908978268504, 0.00042831210885196924, 0.0035933239851146936, 0.2853389084339142, 0.04352247342467308, 0.0011324246879667044, 0.0015205255476757884, 0.05924868583679199, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.06333743035793304, 0.004831443540751934, 0.017261236906051636, 0.05893971398472786, 0.005950291641056538, 0.002105317311361432, 0.003185122972354293, 0.0028415010310709476, 0.004572128411382437, 0.007815520279109478, 0.07613655924797058, 0.10669270157814026, 0.027066918089985847, 0.03207901865243912, 0.4743220806121826, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.10327208787202835, 0.004544916562736034, 0.05445469170808792, 0.010814311914145947, 0.026858847588300705, 0.011217474937438965, 0.07071709632873535, 0.05960191786289215, 0.0010665962472558022, 0.025403864681720734, 0.006131312809884548, 0.5720618963241577, 0.029676837846636772, 0.17520834505558014, 0.23297326266765594, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.011414228938519955, 0.002735550981014967, 0.015156290493905544, 0.0027777000796049833, 0.009832575917243958, 0.015552453696727753, 0.017305195331573486, 0.004722784738987684, 4.7792200348339975e-05, 0.0034479873720556498, 0.0004017044266220182, 0.0011886333813890815, 0.18307994306087494, 0.2786843478679657, 0.04159880056977272, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0032662157900631428, 0.004168938845396042, 0.0016457620076835155, 0.0005059303948655725, 0.0003206630062777549, 0.000853654695674777, 0.010604765266180038, 0.005784912034869194, 0.00014833646127954125, 0.0001704594906186685, 5.580573997576721e-05, 0.0004662217397708446, 0.0009024841128848493, 0.025914611294865608, 0.3543371260166168, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.057395875453948975, 0.01834016665816307, 0.017516011372208595, 0.011936328373849392, 0.010095582343637943, 0.018046732991933823, 0.24530914425849915, 0.01257838774472475, 0.014466731809079647, 0.027552323415875435, 0.054997242987155914, 0.013960911892354488, 0.0074861980974674225, 0.03251070901751518, 0.14566579461097717, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.5009713768959045, 0.11806200444698334, 0.543484628200531, 0.29247328639030457, 0.5261343717575073, 0.23446989059448242, 0.5474087595939636, 0.062012095004320145, 0.8189043998718262, 0.538780152797699, 0.6200674176216125, 0.43515679240226746, 0.24830776453018188, 0.341129869222641, 0.04290800169110298, 0.125, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.018064359202980995, 0.030848585069179535, 0.08071158826351166, 0.0676560178399086, 0.13447926938533783, 0.11551786214113235, 0.17043589055538177, 0.10128363966941833, 0.6618390679359436, 0.2855142652988434, 0.0971621423959732, 0.23388729989528656, 0.21859601140022278, 0.46025529503822327, 0.182326078414917, 0.13823550939559937, 0.01690824329853058, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.04308566823601723, 0.03711610287427902, 0.06502576172351837, 0.10632220655679703, 0.09326566010713577, 0.08777783066034317, 0.3412204086780548, 0.6204424500465393, 0.8231819868087769, 0.09377399832010269, 0.1541169434785843, 0.21222646534442902, 0.11298450827598572, 0.15309588611125946, 0.11645805835723877, 0.1366243064403534, 0.10029595345258713, 0.03309698402881622, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.07351326197385788, 0.05497964471578598, 0.07563240081071854, 0.32393333315849304, 0.057468246668577194, 0.2634526193141937, 0.3780488967895508, 0.7154850363731384, 0.7017503976821899, 0.20895157754421234, 0.29085400700569153, 0.06311048567295074, 0.03268700838088989, 0.14748480916023254, 0.03694311901926994, 0.14204008877277374, 0.17578311264514923, 0.058153361082077026, 0.03275991603732109, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.15202973783016205, 0.07260382175445557, 0.07307075709104538, 0.01561899296939373, 0.03831832483410835, 0.04392734169960022, 0.07259247452020645, 0.03668325021862984, 0.315115749835968, 0.14016768336296082, 0.147903710603714, 0.09513753652572632, 0.08079177141189575, 0.04876280575990677, 0.1678115576505661, 0.15378697216510773, 0.06811928749084473, 0.031730279326438904, 0.02174059860408306, 0.06419884413480759, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.20334205031394958, 0.03987862542271614, 0.2323523759841919, 0.08299659937620163, 0.11007620394229889, 0.049821991473436356, 0.05303451418876648, 0.020633194595575333, 0.20804192125797272, 0.621069610118866, 0.6013453006744385, 0.6998922824859619, 0.30664384365081787, 0.1810489445924759, 0.12484823167324066, 0.2336570769548416, 0.05475717782974243, 0.004165933933109045, 0.0025384188629686832, 0.005177688784897327, 0.12858138978481293, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.33830341696739197, 0.10967365652322769, 0.03348035365343094, 0.09579410403966904, 0.07735400646924973, 0.09874830394983292, 0.15181724727153778, 0.11190870404243469, 0.4600948095321655, 0.5270871520042419, 0.27297794818878174, 0.3748718500137329, 0.4609748125076294, 0.5019738078117371, 0.0790465772151947, 0.1292651742696762, 0.01662198081612587, 0.01174056064337492, 0.002378111705183983, 0.04036910459399223, 0.6038607358932495, 0.053664252161979675, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.18835663795471191, 0.05185278132557869, 0.06106729805469513, 0.04512745887041092, 0.04466439411044121, 0.025852244347333908, 0.031750425696372986, 0.022515133023262024, 0.5077425837516785, 0.6734393835067749, 0.37964752316474915, 0.35936975479125977, 0.19831591844558716, 0.216437429189682, 0.2985125184059143, 0.13257111608982086, 0.0015173845458775759, 0.11979293078184128, 0.025075461715459824, 0.17128729820251465, 0.38108551502227783, 0.04533570259809494, 0.02173132263123989, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.5560556054115295, 0.47877317667007446, 0.15116584300994873, 0.40482252836227417, 0.04176756739616394, 0.04773563891649246, 0.13619393110275269, 0.07804162055253983, 0.07037016749382019, 0.5527278780937195, 0.486864298582077, 0.22204715013504028, 0.2625967860221863, 0.19855597615242004, 0.060070205479860306, 0.12533389031887054, 0.01691550202667713, 0.03341663256287575, 0.04296481981873512, 0.13898836076259613, 0.21484552323818207, 0.09921174496412277, 0.178620383143425, 0.08540544658899307, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.21585102379322052, 0.028776921331882477, 0.056070148944854736, 0.3207121789455414, 0.0078024002723395824, 0.016524065285921097, 0.3710367977619171, 0.14693383872509003, 0.12693363428115845, 0.6266815662384033, 0.6993157863616943, 0.5497558116912842, 0.14310741424560547, 0.3664083480834961, 0.047443971037864685, 0.19628551602363586, 0.0262758769094944, 0.06177970767021179, 0.020167797803878784, 0.21508394181728363, 0.05243970826268196, 0.05236654728651047, 0.019688904285430908, 0.04470491781830788, 0.03636182099580765, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.28475576639175415, 0.10818006843328476, 0.08735410869121552, 0.329417884349823, 0.02252645045518875, 0.04752267897129059, 0.3733118176460266, 0.39454737305641174, 0.029050499200820923, 0.6059318780899048, 0.7311877012252808, 0.44807982444763184, 0.29598307609558105, 0.33838847279548645, 0.16424106061458588, 0.10685201734304428, 0.1520930975675583, 0.22691352665424347, 0.1206204891204834, 0.20647111535072327, 0.3387817144393921, 0.17652125656604767, 0.14866295456886292, 0.058651361614465714, 0.13512541353702545, 0.029732942581176758, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.08968453854322433, 0.11453098803758621, 0.20413988828659058, 0.368092805147171, 0.07694120705127716, 0.048818718641996384, 0.12943927943706512, 0.036333490163087845, 0.04509947448968887, 0.25635746121406555, 0.2806471586227417, 0.5608395338058472, 0.1390012502670288, 0.28897786140441895, 0.04701472818851471, 0.14931687712669373, 0.17397953569889069, 0.045104723423719406, 0.029273295775055885, 0.009919327683746815, 0.05321130529046059, 0.40632039308547974, 0.053491849452257156, 0.10154163092374802, 0.08916116505861282, 0.038379959762096405, 0.050926242023706436, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.05315335839986801, 0.017116300761699677, 0.1720367670059204, 0.3916313052177429, 0.05510414391756058, 0.2876152992248535, 0.22692401707172394, 0.14989952743053436, 0.3368622660636902, 0.0913245752453804, 0.3484038710594177, 0.3637443780899048, 0.007217096630483866, 0.103476881980896, 0.036375418305397034, 0.1467411071062088, 0.6613936424255371, 0.30691561102867126, 0.27473992109298706, 0.05103013291954994, 0.09803401678800583, 0.18992389738559723, 0.012332501821219921, 0.08918186277151108, 0.009687116369605064, 0.01925584301352501, 0.0046735359355807304, 0.006799460854381323, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.5125223994255066, 0.07351671159267426, 0.21591535210609436, 0.21059465408325195, 0.3288169205188751, 0.5466507077217102, 0.21618640422821045, 0.15017350018024445, 0.8681062459945679, 0.2442341297864914, 0.06865198910236359, 0.019835328683257103, 0.10077274590730667, 0.12228173017501831, 0.1682003289461136, 0.23535212874412537, 0.03722311928868294, 0.0383867472410202, 0.06886720657348633, 0.040591221302747726, 0.07368911802768707, 0.09838991612195969, 0.052333034574985504, 0.3684787154197693, 0.05692664161324501, 0.030762571841478348, 0.0074586388655006886, 0.017855344340205193, 0.004115242511034012, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.4846254289150238, 0.17620818316936493, 0.23995715379714966, 0.09631974995136261, 0.22585628926753998, 0.04512355476617813, 0.06700992584228516, 0.01503949984908104, 0.07369402050971985, 0.03452376648783684, 0.04930250719189644, 0.1451164036989212, 0.010093613527715206, 0.020862746983766556, 0.16003692150115967, 0.17482686042785645, 0.020169643685221672, 0.038628242909908295, 0.03409411385655403, 0.011309999041259289, 0.013418656773865223, 0.010934274643659592, 0.0036632094997912645, 0.017374617978930473, 0.023464469239115715, 0.0031370571814477444, 0.004764250945299864, 0.022831382229924202, 0.0012565170181915164, 0.01132481824606657, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.12189289927482605, 0.3658526837825775, 0.06606122851371765, 0.1638106107711792, 0.07819290459156036, 0.27624964714050293, 0.09599297493696213, 0.08126427978277206, 0.14055852591991425, 0.02327289618551731, 0.03783821687102318, 0.2963305115699768, 0.13405835628509521, 0.09205315262079239, 0.12166540324687958, 0.2204812914133072, 0.0262058824300766, 0.011961801908910275, 0.00864139012992382, 0.033310361206531525, 0.014301336370408535, 0.009627565741539001, 0.26419174671173096, 0.09070254862308502, 0.04369048774242401, 0.05080936849117279, 0.022543352097272873, 0.012377972714602947, 0.030277462676167488, 0.2341402769088745, 0.01971697248518467, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.278896301984787, 0.1438806802034378, 0.46959513425827026, 0.3356979489326477, 0.3651174008846283, 0.1071292906999588, 0.18117688596248627, 0.20183299481868744, 0.29131460189819336, 0.13872042298316956, 0.021824011579155922, 0.06362087279558182, 0.34404000639915466, 0.13715140521526337, 0.1120462715625763, 0.253863126039505, 0.004828702192753553, 0.05376851186156273, 0.11550138890743256, 0.1064227893948555, 0.03894256055355072, 0.006152869202196598, 0.03161965310573578, 0.06215812265872955, 0.10950783640146255, 0.01032247580587864, 0.005066303536295891, 0.011880352161824703, 0.09494113177061081, 0.06700112670660019, 0.10617008060216904, 0.020382743328809738, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.2151702344417572, 0.2682046890258789, 0.2758127450942993, 0.20445802807807922, 0.06759822368621826, 0.058143485337495804, 0.21948587894439697, 0.1328936666250229, 0.04737214744091034, 0.09880322962999344, 0.06969184428453445, 0.0649414211511612, 0.09957331418991089, 0.08072139322757721, 0.15442174673080444, 0.04813924431800842, 0.008662978187203407, 0.10469061881303787, 0.06787187606096268, 0.02962217852473259, 0.04144993796944618, 0.019078848883509636, 0.10597121715545654, 0.0923849567770958, 0.24696239829063416, 0.010940729640424252, 0.060362689197063446, 0.059540145099163055, 0.36283043026924133, 0.1817280501127243, 0.2542697787284851, 0.10456714779138565, 0.017782384529709816, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.10625648498535156, 0.3580685555934906, 0.2235240340232849, 0.2717205584049225, 0.14765356481075287, 0.1302592158317566, 0.182493656873703, 0.07402253895998001, 0.044094108045101166, 0.28373098373413086, 0.09141446650028229, 0.13240621984004974, 0.1622740924358368, 0.2716645896434784, 0.09359043836593628, 0.10143542289733887, 0.13917230069637299, 0.040259018540382385, 0.030723553150892258, 0.006155712995678186, 0.031952716410160065, 0.3338092863559723, 0.06915750354528427, 0.1324792504310608, 0.11542332917451859, 0.05764009431004524, 0.04023035988211632, 0.03596781566739082, 0.1495574563741684, 0.02840258926153183, 0.049019940197467804, 0.4096885919570923, 0.03150010108947754, 0.02953496389091015, NaN, NaN, NaN, NaN, NaN, NaN], [0.08181191235780716, 0.05183182656764984, 0.18780435621738434, 0.39972010254859924, 0.11086275428533554, 0.3443254232406616, 0.26716044545173645, 0.2157517671585083, 0.3917877972126007, 0.09846898168325424, 0.25891563296318054, 0.25942671298980713, 0.008535100147128105, 0.11220833659172058, 0.06895694881677628, 0.1521255224943161, 0.6490614414215088, 0.39427587389945984, 0.3861289620399475, 0.05361294746398926, 0.09808307886123657, 0.16810499131679535, 0.014004985801875591, 0.1451900601387024, 0.008040589280426502, 0.022555561736226082, 0.013471563346683979, 0.006859058979898691, 0.05312783271074295, 0.04058152437210083, 0.023753749206662178, 0.3811529278755188, 0.052651502192020416, 0.007359141018241644, 0.007947265170514584, NaN, NaN, NaN, NaN, NaN], [0.4507053792476654, 0.10277862101793289, 0.16431982815265656, 0.2027788907289505, 0.318918377161026, 0.4106469452381134, 0.24116744101047516, 0.1587350070476532, 0.8309358358383179, 0.2625651955604553, 0.047453198581933975, 0.009295494295656681, 0.07160880416631699, 0.07481760531663895, 0.19364440441131592, 0.2650813162326813, 0.032561566680669785, 0.05222610384225845, 0.09714324027299881, 0.038093939423561096, 0.08016244322061539, 0.09171951562166214, 0.056265611201524734, 0.42980653047561646, 0.0462084598839283, 0.03524700179696083, 0.017182864248752594, 0.04137876257300377, 0.007372017949819565, 0.08077534288167953, 0.07507885992527008, 0.050101280212402344, 0.02560576982796192, 0.006666052620857954, 0.016142593696713448, 0.003943128511309624, NaN, NaN, NaN, NaN], [0.5336673855781555, 0.18865860998630524, 0.19927646219730377, 0.10614699125289917, 0.21258802711963654, 0.035614922642707825, 0.07572873681783676, 0.021095039322972298, 0.08985494822263718, 0.061252057552337646, 0.05201297253370285, 0.10173538327217102, 0.008337927050888538, 0.017984798178076744, 0.15578274428844452, 0.186274453997612, 0.02024305984377861, 0.052268851548433304, 0.04830823838710785, 0.011142827570438385, 0.015970220789313316, 0.01383616030216217, 0.004258061293512583, 0.024750858545303345, 0.02320612221956253, 0.004944193176925182, 0.006908308248966932, 0.022138824686408043, 0.002315782941877842, 0.022694725543260574, 0.010753386653959751, 0.0032616793178021908, 0.0013332129456102848, 0.0031688748858869076, 0.015737321227788925, 0.00092066585784778, 0.009911282919347286, NaN, NaN, NaN], [0.11776354163885117, 0.337507039308548, 0.055947914719581604, 0.144154354929924, 0.09536269307136536, 0.2646341919898987, 0.10820504277944565, 0.0982295498251915, 0.1891198456287384, 0.027041049674153328, 0.03162495046854019, 0.2652260959148407, 0.10165920853614807, 0.07911970466375351, 0.1373925358057022, 0.2620354890823364, 0.032388050109148026, 0.01473915670067072, 0.01008685864508152, 0.03682388737797737, 0.017798764631152153, 0.012407293543219566, 0.2692665457725525, 0.10958822816610336, 0.03793380409479141, 0.07735131680965424, 0.03087974339723587, 0.01817244663834572, 0.0740593820810318, 0.5664002895355225, 0.01639901101589203, 0.07361851632595062, 0.02498074807226658, 0.01953950524330139, 0.011185318231582642, 0.024920325726270676, 0.19407986104488373, 0.01722806692123413, NaN, NaN], [0.20648452639579773, 0.10074114054441452, 0.42538517713546753, 0.26027214527130127, 0.3658106029033661, 0.09280957281589508, 0.23363487422466278, 0.27985435724258423, 0.3744349181652069, 0.1453229784965515, 0.02015594393014908, 0.05169985443353653, 0.3284047245979309, 0.12707991898059845, 0.12262601405382156, 0.27593934535980225, 0.005811678245663643, 0.07111961394548416, 0.13982559740543365, 0.1345955729484558, 0.06462955474853516, 0.009384723380208015, 0.03974011912941933, 0.0818282812833786, 0.09768332540988922, 0.015042337588965893, 0.006764655001461506, 0.01590757444500923, 0.11177312582731247, 0.1289886087179184, 0.2743605673313141, 0.018859822303056717, 0.01428449247032404, 0.0072670611552894115, 0.013756940141320229, 0.08787993341684341, 0.08323681354522705, 0.09635237604379654, 0.025643613189458847, NaN], [0.019576620310544968, 0.03319034352898598, 0.0111849969252944, 0.010870445519685745, 0.03222370147705078, 0.13807591795921326, 0.0675833523273468, 0.0615379698574543, 0.013822048902511597, 0.008804764598608017, 0.004974161274731159, 0.01815059222280979, 0.1774466335773468, 0.06282598525285721, 0.15396134555339813, 0.17263205349445343, 0.01194645743817091, 0.02866498939692974, 0.16296441853046417, 0.0019488729303702712, 0.034664519131183624, 0.05397665500640869, 0.1285821497440338, 0.10828299820423126, 0.02950196899473667, 0.008275950327515602, 0.008977574296295643, 0.09588290750980377, 0.01758315972983837, 0.00981396809220314, 0.06520896404981613, 0.03634792938828468, 0.007794357370585203, 0.007516053505241871, 0.0633511170744896, 0.016588596627116203, 0.008872142061591148, 0.04887184873223305, 0.025813041254878044, 0.0022019031457602978]], [[0.3107149600982666, 0.049285680055618286, 0.08128133416175842, 0.03986956924200058, 0.07088969647884369, 0.1961679309606552, 0.15016919374465942, 0.05429982393980026, 0.1291487067937851, 0.03663256764411926, 0.25306442379951477, 0.3913470208644867, 0.2542778253555298, 0.252127081155777, 0.15921251475811005, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.10834414511919022, 0.3508348762989044, 0.02124197781085968, 0.019397908821702003, 0.026673240587115288, 0.3167271912097931, 0.11886779963970184, 0.17699773609638214, 0.14507175981998444, 0.115145742893219, 0.6241064667701721, 0.1622784435749054, 0.5683063268661499, 0.15724869072437286, 0.12728430330753326, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.6979861855506897, 0.039286430925130844, 0.3014020621776581, 0.003208757843822241, 0.01772892102599144, 0.014036925509572029, 0.19886529445648193, 0.09335973858833313, 0.4060034155845642, 0.28424081206321716, 0.26539483666419983, 0.1895008385181427, 0.4672236740589142, 0.16107353568077087, 0.10992881655693054, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.5298255681991577, 0.6474234461784363, 0.19260530173778534, 0.026028962805867195, 0.013013242743909359, 0.01466711051762104, 0.11121421307325363, 0.06523838639259338, 0.29339125752449036, 0.46135157346725464, 0.7174844145774841, 0.3618351221084595, 0.19526919722557068, 0.0703459233045578, 0.24330592155456543, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.7494951486587524, 0.23358309268951416, 0.3640848398208618, 0.09014757722616196, 0.32190942764282227, 0.0021980239544063807, 0.07713330537080765, 0.030900368466973305, 0.08560045808553696, 0.26394325494766235, 0.11549779027700424, 0.44356539845466614, 0.12175428122282028, 0.3783136308193207, 0.14015373587608337, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.3064809739589691, 0.15617568790912628, 0.4955383241176605, 0.8125641942024231, 0.02114781178534031, 0.2633197009563446, 0.014569958671927452, 0.04754461348056793, 0.03227522596716881, 0.09995166957378387, 0.0697590634226799, 0.0770602896809578, 0.19454655051231384, 0.18272873759269714, 0.19963966310024261, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.5314973592758179, 0.5086395144462585, 0.5757231116294861, 0.44031307101249695, 0.2709468603134155, 0.0639616996049881, 0.2984015941619873, 0.0039451331831514835, 0.0197422094643116, 0.0031917106825858355, 0.05093149095773697, 0.12591752409934998, 0.25977155566215515, 0.0615861676633358, 0.3711840510368347, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.2939777970314026, 0.2997593581676483, 0.5167340040206909, 0.46100836992263794, 0.39705657958984375, 0.5034002065658569, 0.07978513836860657, 0.0779491513967514, 0.012053987942636013, 0.01132633350789547, 0.028715649619698524, 0.059212565422058105, 0.20603224635124207, 0.15584728121757507, 0.14816488325595856, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.3128078877925873, 0.0864272266626358, 0.7678588032722473, 0.6537591814994812, 0.8236088752746582, 0.6979317665100098, 0.30976778268814087, 0.014760972931981087, 0.5645584464073181, 0.004590533208101988, 0.008271697908639908, 0.012132997624576092, 0.028745530173182487, 0.04464057460427284, 0.1669740080833435, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.6456499099731445, 0.1693999022245407, 0.7097220420837402, 0.5244839191436768, 0.46365103125572205, 0.5023244023323059, 0.9643971920013428, 0.24913577735424042, 0.13337120413780212, 0.06419410556554794, 0.012416149489581585, 0.0573885552585125, 0.016666844487190247, 0.008706454187631607, 0.1754455268383026, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.09960467368364334, 0.0907629206776619, 0.36143985390663147, 0.11092879623174667, 0.19937658309936523, 0.03214935213327408, 0.3196737766265869, 0.4763943552970886, 0.497630774974823, 0.1899363249540329, 0.1145005002617836, 0.004749455489218235, 0.0008605146431364119, 0.0007969819707795978, 0.02025206945836544, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.3807562589645386, 0.26623356342315674, 0.4209006428718567, 0.27443018555641174, 0.5137820839881897, 0.1592678278684616, 0.6250110864639282, 0.6178545951843262, 0.9692861437797546, 0.5716569423675537, 0.22724294662475586, 0.17567582428455353, 0.008769324980676174, 0.002557128667831421, 0.05025441572070122, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.2969632148742676, 0.16767999529838562, 0.46978121995925903, 0.28813451528549194, 0.45300158858299255, 0.33029136061668396, 0.6236194968223572, 0.1634167730808258, 0.8177276253700256, 0.718397855758667, 0.9021148681640625, 0.07875741273164749, 0.09992827475070953, 0.004932410083711147, 0.1707668900489807, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.3945808410644531, 0.3581867516040802, 0.5247420072555542, 0.4120633900165558, 0.3024104833602905, 0.35548633337020874, 0.5872392654418945, 0.15815261006355286, 0.7289484143257141, 0.7948301434516907, 0.9396543502807617, 0.9256777167320251, 0.08537369966506958, 0.03166399896144867, 0.03224433213472366, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.004588960204273462, 0.041907694190740585, 0.17755450308322906, 0.039724841713905334, 0.047663237899541855, 0.09274838864803314, 0.010110240429639816, 0.014862497337162495, 0.11161036789417267, 0.0490046888589859, 0.18517035245895386, 0.029471391811966896, 0.05094437301158905, 0.002971563721075654, 0.16300250589847565, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.07712388038635254, 0.042244281619787216, 0.004363007377833128, 0.0015959119191393256, 0.019252488389611244, 0.02118455246090889, 0.001846740604378283, 0.0012080060550943017, 0.0007866616360843182, 0.001261864323168993, 0.002815018408000469, 0.017323212698101997, 0.00286104716360569, 0.004067797679454088, 0.15733002126216888, 0.125, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.176344633102417, 0.3271441161632538, 0.08498391509056091, 0.04002806171774864, 0.06676299124956131, 0.008946515619754791, 0.012590638361871243, 0.0061616976745426655, 0.010515754111111164, 0.042563267052173615, 0.024306243285536766, 0.009260479360818863, 0.0002838150830939412, 0.0009972971165552735, 0.0829070582985878, 0.13826748728752136, 0.016647184267640114, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.3345734477043152, 0.016792800277471542, 0.785018265247345, 0.16747814416885376, 0.3955724537372589, 0.09289640188217163, 0.041390396654605865, 0.004024161957204342, 0.04094661772251129, 0.023736434057354927, 0.20348279178142548, 0.041674140840768814, 0.012969214469194412, 0.03994787111878395, 0.04405270516872406, 0.12115656584501266, 0.053111400455236435, 0.35221540927886963, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.027460135519504547, 0.0009503767942078412, 0.8045902252197266, 0.05251304432749748, 0.4111766219139099, 0.08071836084127426, 0.01928381621837616, 0.0005491983611136675, 0.029575586318969727, 0.001678029540926218, 0.033282194286584854, 0.007144003175199032, 0.012064780108630657, 0.008930332958698273, 0.0033295771572738886, 0.06620940566062927, 0.0874415934085846, 0.3174281120300293, 0.09698687493801117, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.18455208837985992, 0.0566692017018795, 0.08522135764360428, 0.2798183560371399, 0.013304274529218674, 0.0006802850402891636, 0.09522412717342377, 0.0060977875255048275, 0.002369458321481943, 0.017453324049711227, 0.0036190226674079895, 2.9809654733981006e-05, 0.0002128492487827316, 0.0002820969675667584, 0.18610867857933044, 0.05510773882269859, 0.045387670397758484, 0.35701045393943787, 0.5011870265007019, 0.0787656381726265, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.6536933779716492, 0.3485175371170044, 0.2007695585489273, 0.8106443881988525, 0.12433423846960068, 0.008092332631349564, 0.6807736158370972, 0.40895989537239075, 0.04516575112938881, 0.1387551873922348, 0.004862201400101185, 0.0003120531910099089, 0.00022667655139230192, 0.00031860917806625366, 0.07640787214040756, 0.05231153964996338, 0.1393265277147293, 0.34751832485198975, 0.15474379062652588, 0.1892920285463333, 0.06652400642633438, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.08564082533121109, 0.05155009403824806, 0.10021068900823593, 0.5880905985832214, 0.0823356956243515, 0.0626063123345375, 0.7381499409675598, 0.566346287727356, 0.04188016802072525, 0.02469027414917946, 0.004355741199105978, 0.00042968738125637174, 2.4299803044414148e-05, 2.7212277927901596e-05, 0.001896930974908173, 0.04669328033924103, 0.038986966013908386, 0.38860636949539185, 0.09904015064239502, 0.3339899182319641, 0.027963249012827873, 0.04134462773799896, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.03975995257496834, 0.012421448715031147, 0.08890707790851593, 0.605818510055542, 0.05048904940485954, 0.017510779201984406, 0.24702893197536469, 0.39587050676345825, 0.06098005548119545, 0.052625395357608795, 0.013424866832792759, 0.0005194320692680776, 0.000250102486461401, 0.0003063087642658502, 0.0010793216060847044, 0.20758312940597534, 0.07789289951324463, 0.047907259315252304, 0.006299893371760845, 0.2608397901058197, 0.044556185603141785, 0.061705876141786575, 0.034865181893110275, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.11902385950088501, 0.011114073917269707, 0.22151720523834229, 0.2006509006023407, 0.03878694027662277, 0.01363028772175312, 0.3268369734287262, 0.04311302676796913, 0.8067907094955444, 0.34777864813804626, 0.25920552015304565, 0.09021251648664474, 0.035271789878606796, 0.0031717135570943356, 0.004271878860890865, 0.18052776157855988, 0.08179321140050888, 0.059846919029951096, 0.02793782763183117, 0.062999427318573, 0.04310278594493866, 0.024987775832414627, 0.015387488529086113, 0.132792130112648, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.006270309444516897, 0.0001492560259066522, 0.00045137249981053174, 0.0007612273329868913, 7.476524478988722e-05, 0.013270817697048187, 0.04344405606389046, 0.014117085374891758, 0.6041488647460938, 0.07304701954126358, 0.010559855960309505, 0.0026350386906415224, 0.02638809196650982, 0.002994539914652705, 0.00020572090579662472, 0.03587701544165611, 0.020078828558325768, 0.04571571201086044, 0.02593454346060753, 0.007220670115202665, 0.03280382603406906, 0.012364541180431843, 0.04736338183283806, 0.48638036847114563, 0.015403805300593376, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.002078789984807372, 0.000502656155731529, 0.00018232718866784126, 0.0008548289188183844, 0.0009249084978364408, 0.02029070071876049, 0.012032798491418362, 0.024348178878426552, 0.2300865352153778, 0.10343841463327408, 0.007660495117306709, 0.0012821657583117485, 0.0114271380007267, 0.0009412667131982744, 7.524124521296471e-05, 0.010417330078780651, 0.019508572295308113, 0.03964173421263695, 0.041229844093322754, 0.021899865940213203, 0.0029071751050651073, 0.010124437510967255, 0.08508285880088806, 0.40291228890419006, 0.4734281599521637, 0.015163381583988667, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.022463228553533554, 0.0013134862529113889, 0.00013891702110413462, 0.002816978842020035, 0.0011811865260824561, 0.0014538302784785628, 0.0005458829691633582, 0.0004073161107953638, 0.000992793939076364, 0.626685380935669, 0.1310541182756424, 0.1785772740840912, 0.1327074021100998, 0.014590581879019737, 3.459410072537139e-05, 0.08744391798973083, 0.1107466071844101, 0.15557123720645905, 0.13837403059005737, 0.05803389474749565, 0.026755833998322487, 0.03754325956106186, 0.4220706820487976, 0.16102783381938934, 0.2859216034412384, 0.1457504779100418, 0.03281670808792114, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.004299411084502935, 0.00014757749158889055, 0.0013493087608367205, 0.003552102018147707, 0.004041418433189392, 0.004232631530612707, 0.00022051982523407787, 5.3625211876351386e-05, 0.008671559393405914, 0.2003454566001892, 0.2010745257139206, 0.20048564672470093, 0.327506959438324, 0.12215141952037811, 7.573522452730685e-05, 0.21633882820606232, 0.07441287487745285, 0.04740259423851967, 0.026924576610326767, 0.012407396920025349, 0.002398786135017872, 0.0038467273116111755, 0.13835540413856506, 0.06710492819547653, 0.026295386254787445, 0.17057135701179504, 0.013244924135506153, 0.46883779764175415, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.011497906409204006, 0.0014132088981568813, 0.002270179335027933, 0.006387166678905487, 5.5530636018374935e-05, 0.0020248510409146547, 0.0021348590962588787, 0.001147052156738937, 0.0024277162738144398, 0.3687064051628113, 0.5298402905464172, 0.006611559074372053, 0.3372868299484253, 0.2915361225605011, 0.0002606022753752768, 0.027107199653983116, 0.05742119997739792, 0.06533583253622055, 0.024222400039434433, 0.014050583355128765, 0.013653005473315716, 0.0030738371424376965, 0.04425956308841705, 0.06826918572187424, 0.011929179541766644, 0.14959540963172913, 0.16161218285560608, 0.5212987065315247, 0.041249219328165054, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.043351031839847565, 0.015730101615190506, 0.006545424461364746, 0.11301398277282715, 0.001535893650725484, 0.0002994980022776872, 0.002417969051748514, 0.0027875620871782303, 0.007663458585739136, 0.4366588592529297, 0.29866132140159607, 0.03879629448056221, 0.0005757116014137864, 0.10755223035812378, 0.15693426132202148, 0.12232528626918793, 0.02327316626906395, 0.043996360152959824, 0.010462167672812939, 0.05786772817373276, 0.006097386125475168, 0.001271827262826264, 0.022651376202702522, 0.03627351298928261, 0.030646052211523056, 0.03145253658294678, 0.18536151945590973, 0.10030946880578995, 0.3235938847064972, 0.09760642796754837, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.05824243649840355, 0.00918568018823862, 0.004823020659387112, 0.12202360481023788, 0.001364732626825571, 0.009540650062263012, 0.017077280208468437, 0.02250218391418457, 0.031557418406009674, 0.39489659667015076, 0.4118596911430359, 0.4739699363708496, 0.04330656677484512, 0.22410848736763, 0.009354491718113422, 0.01696004532277584, 0.0005225083441473544, 0.012039890512824059, 0.0003213977033738047, 0.024568837136030197, 0.0005492557538673282, 6.035636397427879e-05, 0.0032521369867026806, 0.016784805804491043, 0.013033770024776459, 0.023488081991672516, 0.04594254866242409, 0.04732683673501015, 0.2366781234741211, 0.2578820288181305, 0.02447950839996338, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.10114194452762604, 0.055991608649492264, 0.0056193675845861435, 0.044799599796533585, 0.005612906999886036, 0.0018076150445267558, 0.0035521595273166895, 0.003050913568586111, 0.014126029796898365, 0.18568304181098938, 0.044660091400146484, 0.8178999423980713, 0.12312521040439606, 0.22830259799957275, 0.0015339198289439082, 0.016271475702524185, 0.026037830859422684, 0.05988215655088425, 0.04065781086683273, 0.0548781082034111, 0.0059303357265889645, 0.000490839418489486, 0.009792556054890156, 0.05564826726913452, 0.029693011194467545, 0.015783851966261864, 0.050408631563186646, 0.10483089834451675, 0.18894171714782715, 0.4590488076210022, 0.24355939030647278, 0.03408684581518173, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.17329555749893188, 0.022842630743980408, 0.03050464764237404, 0.3040459156036377, 0.023058682680130005, 0.05675753578543663, 0.012084487825632095, 0.018060212954878807, 0.012510768137872219, 0.4205268621444702, 0.403047114610672, 0.5196431279182434, 0.14466160535812378, 0.15726853907108307, 0.003281315555796027, 0.011992339976131916, 0.02786487340927124, 0.025577154010534286, 0.02912752889096737, 0.009845648892223835, 0.0007121131638996303, 0.001387864351272583, 0.015649031847715378, 0.05334821715950966, 0.05039743706583977, 0.0003855754912365228, 0.07798124849796295, 0.03745294734835625, 0.16697214543819427, 0.29521557688713074, 0.2776513993740082, 0.29445046186447144, 0.031993161886930466, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.21814380586147308, 0.013853680342435837, 0.0011839027283713222, 0.02006133459508419, 0.0059941732324659824, 0.004335244186222553, 0.0006587213138118386, 0.0008069095201790333, 6.766151636838913e-05, 0.4439576268196106, 0.16648612916469574, 0.7347545623779297, 0.19459886848926544, 0.05657987296581268, 0.0006026092451065779, 0.11517049372196198, 0.11416894942522049, 0.19162771105766296, 0.14611610770225525, 0.060761958360672, 0.02055470645427704, 0.021888524293899536, 0.20655019581317902, 0.047658227384090424, 0.055987950414419174, 0.01683689095079899, 0.005808014422655106, 0.045862384140491486, 0.09340663254261017, 0.10908356308937073, 0.18944555521011353, 0.26804569363594055, 0.20485185086727142, 0.037772081792354584, NaN, NaN, NaN, NaN, NaN, NaN], [0.034262340515851974, 0.0017182001611217856, 0.005656392779201269, 0.017169898375868797, 0.0156857930123806, 0.01468763966113329, 0.0007699507405050099, 0.00017933807976078242, 0.002019587904214859, 0.09474337100982666, 0.21286551654338837, 0.39837440848350525, 0.44769343733787537, 0.30061447620391846, 0.0009720441303215921, 0.24184046685695648, 0.07921410351991653, 0.056290365755558014, 0.026794791221618652, 0.016941547393798828, 0.0021516080014407635, 0.0023830668069422245, 0.05685606598854065, 0.02070370689034462, 0.003236053278669715, 0.01165463775396347, 0.004370343871414661, 0.030780060216784477, 0.00907946564257145, 0.06188458576798439, 0.04407832771539688, 0.006142587400972843, 0.14762946963310242, 0.013672620058059692, 0.4999893307685852, NaN, NaN, NaN, NaN, NaN], [0.1974877417087555, 0.05350746586918831, 0.02080627717077732, 0.07140190154314041, 0.0007820951868779957, 0.021851971745491028, 0.023295408114790916, 0.011020028032362461, 0.0015720969531685114, 0.3204348385334015, 0.5890824198722839, 0.011122598312795162, 0.40923523902893066, 0.5521805882453918, 0.009284045547246933, 0.03566991165280342, 0.0538097508251667, 0.09943600744009018, 0.028607800602912903, 0.020965654402971268, 0.013461945578455925, 0.002478980924934149, 0.02911236882209778, 0.02446376532316208, 0.0022762087173759937, 0.010774179361760616, 0.04047773778438568, 0.06471210718154907, 0.0026813328731805086, 0.07523855566978455, 0.030470186844468117, 0.0345987044274807, 0.1238497719168663, 0.17781274020671844, 0.4970780611038208, 0.04515520855784416, NaN, NaN, NaN, NaN], [0.04384012520313263, 0.020103074610233307, 0.00601673498749733, 0.10121199488639832, 0.0015372235793620348, 0.00047879578778520226, 0.0028034253045916557, 0.0035304632037878036, 0.0019347126362845302, 0.15543726086616516, 0.10060140490531921, 0.012154079042375088, 0.00020098914683330804, 0.049742307513952255, 0.15931616723537445, 0.12716706097126007, 0.02434932254254818, 0.05787394568324089, 0.013031681068241596, 0.06681805849075317, 0.007088592275977135, 0.0018475945107638836, 0.021072670817375183, 0.024636711925268173, 0.010089303366839886, 0.0076353950425982475, 0.05158482864499092, 0.009980393573641777, 0.034229546785354614, 0.01627102866768837, 0.008032353594899178, 0.013575052842497826, 0.04940066114068031, 0.19428585469722748, 0.10819438844919205, 0.2976790964603424, 0.08516447991132736, NaN, NaN, NaN], [0.33183732628822327, 0.07794758677482605, 0.02364480309188366, 0.3878714144229889, 0.007764760870486498, 0.055411770939826965, 0.07855504751205444, 0.09397301822900772, 0.02721172571182251, 0.38145557045936584, 0.42047446966171265, 0.5078706741333008, 0.03859835863113403, 0.25985077023506165, 0.0625251829624176, 0.01713084802031517, 0.000499976216815412, 0.019638467580080032, 0.00048709739348851144, 0.03356647491455078, 0.0008144291932694614, 0.00011953162174904719, 0.003664336632937193, 0.013800683431327343, 0.004805452190339565, 0.004433726891875267, 0.011711561121046543, 0.003556638490408659, 0.01588965393602848, 0.025807680562138557, 0.00022126971452962607, 0.004036479629576206, 0.00837762001901865, 0.04655361920595169, 0.04086336866021156, 0.22630761563777924, 0.2765483856201172, 0.02425519935786724, NaN, NaN], [0.4473247230052948, 0.3730325996875763, 0.029895052313804626, 0.15908104181289673, 0.02762797847390175, 0.008889964781701565, 0.016516737639904022, 0.012883803807199001, 0.01523641124367714, 0.22003965079784393, 0.05771813541650772, 0.8456536531448364, 0.1770154982805252, 0.31127816438674927, 0.007925343699753284, 0.010901566594839096, 0.020337969064712524, 0.07802019268274307, 0.0504593625664711, 0.06312800198793411, 0.009868033230304718, 0.000861799344420433, 0.010114955715835094, 0.052247028797864914, 0.012602821923792362, 0.005399123765528202, 0.01934058591723442, 0.013776490464806557, 0.010564911179244518, 0.04300173744559288, 0.008748980239033699, 0.0006391598144546151, 0.006108305882662535, 0.05087457224726677, 0.09035929292440414, 0.18751013278961182, 0.4462290108203888, 0.28552356362342834, 0.05451636388897896, NaN], [0.2188224196434021, 0.06026163697242737, 0.01674255169928074, 0.1205059364438057, 0.017392028123140335, 0.033714599907398224, 0.013199009001255035, 0.035441260784864426, 0.006878681946545839, 0.5097362399101257, 0.5390803217887878, 0.7098195552825928, 0.20610427856445312, 0.34404870867729187, 0.06464894115924835, 0.1367119550704956, 0.02979014255106449, 0.04602046683430672, 0.022530242800712585, 0.009278235025703907, 0.01184787880629301, 0.010125648230314255, 0.02445557340979576, 0.052750833332538605, 0.013119504787027836, 0.0006633299053646624, 0.007243738044053316, 0.02398994006216526, 0.00908573716878891, 0.013761860318481922, 0.007176807615906, 0.00677318312227726, 0.0021949538495391607, 0.01309704128652811, 0.09677710384130478, 0.12711098790168762, 0.1613820642232895, 0.37058699131011963, 0.3504316806793213, 0.02586444839835167]], [[6.113462859502761e-06, 0.5065946578979492, 7.261813152581453e-05, 5.1066386498122354e-14, 1.0490246824277965e-15, 1.4956003015903496e-12, 2.5734427609724886e-13, 2.1143946469237562e-06, 9.544867651811728e-08, 4.2543565892394497e-10, 6.215519418595328e-12, 1.687761909396901e-11, 1.6993320528513323e-08, 1.0583119935958507e-09, 9.857150189418462e-07, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [4.727198188447801e-08, 0.002272214274853468, 0.8730366826057434, 0.0016238681273534894, 9.849362297975617e-11, 6.310171162720105e-14, 1.3311845115798748e-12, 1.350557283785747e-07, 1.07800769910682e-05, 3.4101576602552086e-05, 7.529693561991735e-07, 3.7022258592145363e-09, 3.1551092294357375e-10, 8.851498527195911e-12, 1.024629546009237e-05, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [6.003397223786067e-10, 5.335852165444521e-06, 0.00445933174341917, 0.5796651840209961, 5.976808097329922e-05, 2.377180230439535e-09, 1.7792844021063958e-12, 1.2140626282075573e-09, 6.417224529542409e-09, 2.601910637167748e-06, 1.1842810181406094e-06, 1.8266834445057611e-07, 1.3081095096012518e-09, 1.5776791765370612e-12, 4.7676843678345904e-05, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [2.4071971206038626e-15, 2.3560551770727793e-14, 9.98394700246763e-11, 1.7167060661904543e-07, 0.2774648666381836, 1.6012703781598248e-05, 9.760837530760607e-15, 4.654387315338889e-18, 8.039692137064508e-20, 2.1508527635127157e-16, 1.789740057545064e-11, 2.4233797191186568e-08, 2.7592322870972907e-10, 4.956549239646573e-15, 1.5411848153235042e-06, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [1.9919477308935618e-13, 5.266535346254387e-16, 1.2917133013982517e-14, 7.221083175856791e-10, 8.195231930585578e-05, 0.5564944744110107, 4.117699063499458e-06, 5.438900198273533e-13, 2.4172004338169554e-20, 9.57835365503234e-22, 9.376302678036402e-17, 3.235451073724249e-10, 6.101883442966027e-09, 9.971044129253315e-11, 1.6162671201414014e-08, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [9.771466125130246e-08, 3.17872256294649e-11, 3.1429036890379125e-13, 5.901367481980172e-16, 4.2342058748090494e-09, 0.0012305855052545667, 0.6103256940841675, 2.2161180822877213e-05, 7.972257402844019e-12, 6.481494664823834e-19, 5.35928561114305e-19, 7.863773244772346e-14, 1.1593314752644801e-07, 8.808668212623161e-07, 1.1730364235518209e-07, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [2.6939844799400703e-10, 3.892770337188267e-07, 2.2438891023046637e-10, 2.095593632707407e-18, 1.8655412772298346e-14, 2.206185598652155e-07, 3.0316745323943906e-05, 0.33891788125038147, 5.437008439912461e-06, 1.3213468337612382e-14, 2.5347562276209975e-18, 1.0659246862729562e-18, 2.6392999114346893e-13, 9.868956762915104e-10, 1.6170986327779246e-06, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [1.3015508670832787e-09, 4.1474245904282725e-07, 7.619819371029735e-06, 9.079691751061325e-13, 5.725895077835787e-16, 1.0568446176517903e-14, 8.978999488373773e-11, 2.253716047562193e-05, 0.9323674440383911, 0.0001553743495605886, 1.1094852814252931e-10, 4.251380123255501e-17, 3.4548606558270072e-18, 1.563022274271835e-14, 1.7832363141678798e-07, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [1.2218349942916262e-10, 4.9370779464652514e-08, 1.0212672805209877e-06, 3.802215486903293e-11, 4.1323817879847246e-16, 3.8503187577578586e-16, 6.2032051316354e-15, 3.2203126920649083e-07, 8.202762546716258e-05, 0.5051153898239136, 1.6483796571264975e-05, 2.317061202194298e-13, 9.134085045449695e-19, 4.959048342554486e-21, 1.9839136555788173e-08, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [3.5615963439117673e-14, 6.311461336200308e-12, 7.572167781688677e-09, 7.864790063649707e-08, 5.871175941252194e-13, 4.399392566282849e-15, 3.6105855357745724e-20, 8.408651243829376e-14, 2.915925279012299e-09, 2.7294316168990918e-05, 0.31493836641311646, 1.4271394093157141e-06, 7.57530499374999e-14, 1.0444343699767344e-21, 5.65783730976932e-09, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [1.619628042792698e-10, 6.862534152052291e-11, 7.238428190170509e-10, 5.1994692995549485e-08, 8.193378420173758e-08, 6.734891755399985e-09, 1.47457238341411e-14, 5.793711288450045e-15, 1.5065480465795492e-14, 1.167909147170576e-08, 0.0003541565383784473, 0.5504465699195862, 2.5677532903500833e-05, 4.9321430864142715e-14, 1.3459792569392448e-07, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [8.003913504195381e-11, 5.626729984720136e-12, 4.9737857062137625e-12, 1.4365373474101162e-11, 1.165467935493325e-07, 3.263785401941277e-05, 9.4434834951862e-11, 2.6144878938953817e-15, 6.540743544149476e-19, 2.5930401594030658e-17, 1.8366722587259687e-09, 1.8794700736179948e-05, 0.49058014154434204, 8.066950840657228e-07, 1.3585024589701788e-06, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [1.0801989728040362e-12, 2.2359935084037552e-13, 1.1691597126203823e-12, 1.0214807062303036e-16, 2.4270561688882752e-12, 4.4484740890915475e-10, 1.1468358207533669e-10, 1.5131759777478604e-13, 3.7208958865722007e-20, 6.888861115537483e-21, 1.5888746801787275e-18, 3.2241334168431335e-12, 5.685043561243219e-06, 0.3912107050418854, 3.0407140694244106e-10, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [5.397048425948014e-07, 2.3629811494174646e-06, 8.614414923613367e-07, 8.006720286779512e-13, 4.92412575016192e-14, 2.066644277931573e-08, 0.00031528103863820434, 0.011093947105109692, 3.7555511767095595e-07, 1.151808547627739e-13, 5.505821095062543e-16, 1.6971218267519683e-12, 5.383023108151974e-06, 0.8731740117073059, 0.04139598086476326, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.6266164779663086, 0.3128010928630829, 0.06246759742498398, 0.00042505442979745567, 0.008534153923392296, 0.09425555169582367, 0.2709643542766571, 0.686626672744751, 0.3142872750759125, 0.10107265412807465, 0.015935143455863, 0.012286541052162647, 0.14970052242279053, 0.3989029824733734, 0.022492708638310432, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.24012988805770874, 0.6692726612091064, 0.08029869198799133, 0.41845017671585083, 0.08128808438777924, 0.09738753736019135, 0.15100885927677155, 0.2691691815853119, 0.013517879880964756, 0.21848294138908386, 0.16758716106414795, 0.12734578549861908, 0.32224464416503906, 0.12471552193164825, 0.07385692000389099, 0.125, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.13747748732566833, 0.012865100987255573, 0.3056560158729553, 0.3759651184082031, 0.20075583457946777, 0.056869279593229294, 0.27502477169036865, 0.09038521349430084, 0.09535539150238037, 0.27579623460769653, 0.15189220011234283, 0.6071571111679077, 0.0820951759815216, 0.09481122344732285, 0.09779953956604004, 0.13988038897514343, 0.003474950324743986, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.007538634352385998, 0.02957071363925934, 0.011847163550555706, 0.055522944778203964, 0.04100131243467331, 0.031534671783447266, 0.06567902117967606, 0.09044305235147476, 0.007193693891167641, 0.06334451586008072, 0.07378207892179489, 0.07786792516708374, 0.28214019536972046, 0.08070375770330429, 0.20607011020183563, 0.14879919588565826, 0.018745053559541702, 0.07372914999723434, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.005881547927856445, 0.008371960371732712, 0.010823756456375122, 0.024797217920422554, 0.024142105132341385, 0.01083815935999155, 0.008304014801979065, 0.006388344801962376, 0.009114595130085945, 0.022048065438866615, 0.1306026130914688, 0.23451638221740723, 0.3918500244617462, 0.08784151822328568, 0.2650633752346039, 0.030327370390295982, 0.02692173607647419, 0.46947386860847473, 0.09036581218242645, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.20629070699214935, 0.2529377341270447, 0.028870999813079834, 0.049127642065286636, 0.04690879210829735, 0.11594393104314804, 0.15515393018722534, 0.06585636734962463, 0.0420556403696537, 0.1996643990278244, 0.028717953711748123, 0.7190893292427063, 0.30376943945884705, 0.22654840350151062, 0.12926629185676575, 0.164228156208992, 0.0009850627975538373, 0.0044541023671627045, 0.0005622706958092749, 0.024160074070096016, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.01586613617837429, 0.15566423535346985, 0.015082520432770252, 0.009204044006764889, 0.002680863719433546, 0.07106906920671463, 0.08370621502399445, 0.05749649554491043, 0.03059268370270729, 0.012942377477884293, 0.0011753733269870281, 0.00916373822838068, 0.0020018015056848526, 0.049308281391859055, 0.19197486340999603, 0.020124448463320732, 0.0011880549136549234, 0.0042731426656246185, 3.242780803702772e-05, 0.6858344078063965, 0.023040860891342163, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.03849078342318535, 0.08146823942661285, 0.03517843410372734, 0.025976145640015602, 0.02364599145948887, 0.1389763057231903, 0.02619975060224533, 0.034312427043914795, 0.02985706366598606, 0.029806064441800117, 0.00684476038441062, 0.03280223533511162, 0.030126189813017845, 0.10321015119552612, 0.23163792490959167, 0.0017230550292879343, 3.356653905939311e-05, 0.001307086437009275, 1.4968540199333802e-05, 0.5564903616905212, 0.236929789185524, 0.007688341196626425, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.2772977352142334, 0.05161405727267265, 0.04358568787574768, 0.047931231558322906, 0.04583681374788284, 0.08128579705953598, 0.15782645344734192, 0.0856042429804802, 0.10767779499292374, 0.11355230212211609, 0.041377030313014984, 0.252811074256897, 0.05780917406082153, 0.19973745942115784, 0.22427907586097717, 0.1612924486398697, 0.00029754414572380483, 0.0029063820838928223, 0.0015110797248780727, 0.16695675253868103, 0.3453270196914673, 0.07193248718976974, 0.006359610706567764, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.023119861260056496, 0.02037731558084488, 0.0453791618347168, 0.1060030460357666, 0.006244942545890808, 0.0085020512342453, 0.012060720473527908, 0.014560479670763016, 0.00689319521188736, 0.011241135187447071, 0.023835573345422745, 0.02693312056362629, 0.011436404660344124, 0.019489392638206482, 0.30997538566589355, 0.1910298615694046, 0.01051796693354845, 0.0018660163041204214, 0.0012154864380136132, 0.022663934156298637, 0.008557457476854324, 0.016767704859375954, 0.05246622860431671, 0.08816055208444595, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.045414164662361145, 0.005229660775512457, 0.011418518610298634, 0.009312640875577927, 0.0002147085906472057, 0.12653864920139313, 0.05854451283812523, 0.11896014213562012, 0.0156405046582222, 0.010270207189023495, 0.0032450463622808456, 0.015787174925208092, 0.011106730438768864, 0.007675709668546915, 0.3779195249080658, 0.24295811355113983, 0.0012021175352856517, 0.0005200211890041828, 0.00015996988804545254, 0.002627951791509986, 0.03450923040509224, 0.014827161096036434, 0.015967652201652527, 0.005632439162582159, 0.001854590023867786, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.007367350626736879, 0.012884993106126785, 0.01019106525927782, 0.011957473121583462, 0.054886650294065475, 0.09750530868768692, 0.029414953663945198, 0.08492925018072128, 0.17440666258335114, 0.003643231000751257, 0.00105402956251055, 0.02280060388147831, 0.0010922637302428484, 0.005130939185619354, 0.09500079602003098, 0.2492469847202301, 0.004325273912400007, 0.004784590099006891, 0.013903478160500526, 0.0013026667293161154, 0.003877879586070776, 0.017029188573360443, 0.01781909167766571, 0.05003270506858826, 0.026610376313328743, 0.008462576195597649, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.02996714971959591, 0.028387926518917084, 0.16122521460056305, 0.0898616760969162, 0.06381779164075851, 0.20551051199436188, 0.13175098598003387, 0.562389075756073, 0.04834860563278198, 0.013581722043454647, 0.03991095721721649, 0.10736902058124542, 0.03830268979072571, 0.05736052244901657, 0.27213579416275024, 0.25306010246276855, 0.0017952719936147332, 0.005404005758464336, 0.021692873910069466, 0.0005702165653929114, 9.544018394080922e-05, 0.001603480544872582, 0.001225438085384667, 0.036846794188022614, 0.001749897957779467, 0.016878794878721237, 0.021703237667679787, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.03571658954024315, 0.012061648070812225, 0.08574458211660385, 0.022463832050561905, 0.12578466534614563, 0.07826194912195206, 0.06577891856431961, 0.13274507224559784, 0.06591502577066422, 0.05002211779356003, 0.03129255399107933, 0.27911075949668884, 0.31601372361183167, 0.10930214822292328, 0.30993908643722534, 0.055758021771907806, 0.000425096252001822, 0.0005783061496913433, 0.0011671994579955935, 0.00034630659501999617, 0.00031045774812810123, 0.0006358043756335974, 0.004018810577690601, 0.0004720573779195547, 0.006387148518115282, 0.038948215544223785, 0.40798652172088623, 0.0038703898899257183, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.04630875587463379, 0.03141915798187256, 0.03061339072883129, 0.007028677500784397, 0.008451082743704319, 0.02540888637304306, 0.012118873186409473, 0.09331455826759338, 0.0033372503239661455, 0.01357665192335844, 0.0069510783068835735, 0.017483821138739586, 0.033454760909080505, 0.014270796440541744, 0.44127020239830017, 0.29551389813423157, 0.006183725781738758, 0.0010477532632648945, 0.001470124931074679, 0.0028535614255815744, 0.003910644445568323, 0.004942604340612888, 0.003798475954681635, 0.01567114144563675, 0.060374900698661804, 0.006600319407880306, 0.010896215215325356, 0.009779008105397224, 0.007320093456655741, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.1722828894853592, 0.15122008323669434, 0.056102070957422256, 0.09136570990085602, 0.02421834133565426, 0.045343294739723206, 0.034619707614183426, 0.030837759375572205, 0.019798463210463524, 0.04411705583333969, 0.05331422761082649, 0.09423463046550751, 0.1436629444360733, 0.13433872163295746, 0.1229754090309143, 0.1632017195224762, 0.00519327400252223, 0.00790441408753395, 0.0009941658936440945, 0.3241596221923828, 0.0008480648975819349, 0.0001429034018656239, 0.0012253100285306573, 0.0008457236108370125, 0.006411578040570021, 0.0016067628748714924, 0.003762597683817148, 0.029224932193756104, 0.07677540183067322, 0.06338826566934586, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.022473091259598732, 0.0489150770008564, 0.010993139818310738, 0.03897916153073311, 0.003662768052890897, 0.002051829593256116, 0.0037445707712322474, 0.016557298600673676, 0.014907213859260082, 0.004300208762288094, 0.004852794576436281, 0.0027131394017487764, 0.016001524403691292, 0.008091894909739494, 0.25544992089271545, 0.005401996895670891, 6.3005199990584515e-06, 0.0004310416697990149, 8.47076989884954e-06, 0.009243682958185673, 0.0008590375073254108, 4.37394373875577e-06, 6.523932825075462e-05, 8.531090134056285e-05, 0.0006816720124334097, 7.644478318979964e-05, 0.00018924157484434545, 0.0012375408550724387, 0.023784970864653587, 0.4309314787387848, 0.034907225519418716, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.08012817800045013, 0.2898695766925812, 0.022246699780225754, 0.06057273969054222, 0.025327028706669807, 0.02957070618867874, 0.04002644121646881, 0.019245512783527374, 0.01995179057121277, 0.020330116152763367, 0.006697094067931175, 0.015452835708856583, 0.014569609425961971, 0.04013357311487198, 0.2585589587688446, 0.29775136709213257, 0.006892140489071608, 0.009814155288040638, 0.016249310225248337, 0.004830268211662769, 0.0035455955658107996, 0.0007549467263743281, 0.000541276705916971, 0.0031480982434004545, 0.001557780895382166, 0.0010192448971793056, 0.0018504501786082983, 0.002619183622300625, 0.1016833484172821, 0.03818811476230621, 0.06928347051143646, 0.0412699431180954, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.01832924410700798, 0.023918962106108665, 0.024782713502645493, 0.033514510840177536, 0.050549402832984924, 0.013098560273647308, 0.023091215640306473, 0.030541786924004555, 0.1064886748790741, 0.006106832530349493, 0.0024854408111423254, 0.018918434157967567, 0.0075035663321614265, 0.009370497427880764, 0.21452490985393524, 0.26683223247528076, 0.0017643374158069491, 0.02531762421131134, 0.047485485672950745, 0.0005023732082918286, 0.0011795219033956528, 0.002227108459919691, 0.0028741960413753986, 0.005215880926698446, 0.001946018310263753, 3.592624852899462e-05, 0.001338632428087294, 0.0025214410852640867, 0.07723907381296158, 0.012742026709020138, 0.25196006894111633, 0.052669085562229156, 0.020061112940311432, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.027254067361354828, 0.020437292754650116, 0.14233240485191345, 0.08538791537284851, 0.03242940828204155, 0.0897425189614296, 0.08476056158542633, 0.2620556950569153, 0.02126460149884224, 0.023079702630639076, 0.03143052011728287, 0.04489685967564583, 0.046720463782548904, 0.03604652360081673, 0.23038896918296814, 0.3006725609302521, 0.0014043879928067327, 0.009936605580151081, 0.037061650305986404, 0.0005129858036525548, 5.274279828881845e-05, 0.0006371501949615777, 0.00048446646542288363, 0.015043019317090511, 0.0003374778898432851, 0.0015171451959758997, 0.001911269617266953, 0.0014702629996463656, 0.015123972669243813, 0.0006335150101222098, 0.0006853189552202821, 0.0006114236894063652, 0.013829384930431843, 0.010252222418785095, NaN, NaN, NaN, NaN, NaN, NaN], [0.042377930134534836, 0.017293933779001236, 0.08730384707450867, 0.030179454013705254, 0.12187745422124863, 0.05139933153986931, 0.047754548490047455, 0.066692054271698, 0.06521614640951157, 0.05196157470345497, 0.028108397498726845, 0.17703385651111603, 0.22747749090194702, 0.06955988705158234, 0.28824013471603394, 0.11150761693716049, 0.0006332705961540341, 0.0012255925685167313, 0.0022868558298796415, 0.0007688697660341859, 0.00046408100752159953, 0.0006869957433082163, 0.0021696356125175953, 0.0003113164857495576, 0.0013619231758639216, 0.004312699660658836, 0.1263500303030014, 0.0001710234791971743, 0.0024227115791291, 0.0006429344066418707, 0.008991677314043045, 0.01230061985552311, 0.025017380714416504, 0.33947470784187317, 0.0032216052059084177, NaN, NaN, NaN, NaN, NaN], [0.03372317552566528, 0.030876630917191505, 0.025082340463995934, 0.008588657714426517, 0.007454049773514271, 0.009771045297384262, 0.010381288826465607, 0.041183773428201675, 0.004549690056592226, 0.01619204692542553, 0.0060179769061505795, 0.009672058746218681, 0.022905999794602394, 0.009750566445291042, 0.30946746468544006, 0.31111404299736023, 0.0035644923336803913, 0.0013678895775228739, 0.0016790243098512292, 0.0035299588926136494, 0.004438228905200958, 0.004504224751144648, 0.0015486004995182157, 0.006104794796556234, 0.009403211995959282, 0.00038756802678108215, 0.001732571516185999, 0.00042684219079092145, 0.00029873420135118067, 0.02043243870139122, 0.02443091571331024, 0.011036018840968609, 0.0030384601559489965, 0.007405058480799198, 0.004648045636713505, 0.010011163540184498, NaN, NaN, NaN, NaN], [0.18900562822818756, 0.14908763766288757, 0.05840699374675751, 0.10216160118579865, 0.03072887472808361, 0.04109037667512894, 0.03799780085682869, 0.02909342385828495, 0.03500371053814888, 0.0757574513554573, 0.061073921620845795, 0.09956928342580795, 0.10441071540117264, 0.14136889576911926, 0.13095542788505554, 0.16896948218345642, 0.0033956619445234537, 0.009647470898926258, 0.0011160745052620769, 0.30864211916923523, 0.0008666384965181351, 0.0001862353819888085, 0.0007671809289604425, 0.0006719603552483022, 0.002030742121860385, 0.00038655498065054417, 0.0009093419066630304, 0.0015865613240748644, 0.007534818258136511, 0.009185722097754478, 0.00011195908882655203, 0.003075815038755536, 0.000886340974830091, 0.0034873690456151962, 0.021776562556624413, 0.11334169656038284, 0.0832705944776535, NaN, NaN, NaN], [0.014150185510516167, 0.03789284825325012, 0.007744992151856422, 0.02556411363184452, 0.0037681234534829855, 0.001123085618019104, 0.002939486177638173, 0.010072565637528896, 0.019109029322862625, 0.003645692951977253, 0.0027771664317697287, 0.002490789396688342, 0.007166225463151932, 0.005180294159799814, 0.2058444321155548, 0.006588279269635677, 7.165617716964334e-06, 0.0005450915195979178, 1.0953889614029322e-05, 0.01959507167339325, 0.001590097788721323, 1.1096496564277913e-05, 7.439414184773341e-05, 9.72584675764665e-05, 0.00039174238918349147, 2.7912905352422968e-05, 4.964227991877124e-05, 7.256279786815867e-05, 0.00222678086720407, 0.04727102443575859, 0.0002576226834207773, 0.00020273383415769786, 7.391278631985188e-05, 0.00018598776659928262, 0.000617648009210825, 0.03195251524448395, 0.45461374521255493, 0.037591490894556046, NaN, NaN], [0.0469474196434021, 0.1743137687444687, 0.021908296272158623, 0.046387769281864166, 0.02985612489283085, 0.019742406904697418, 0.040140021592378616, 0.01437240932136774, 0.02856219932436943, 0.018488112837076187, 0.004136314615607262, 0.01038376335054636, 0.009851893410086632, 0.026245350018143654, 0.22488054633140564, 0.35417911410331726, 0.010997277684509754, 0.014662563800811768, 0.023722819983959198, 0.01071385107934475, 0.009427045471966267, 0.002653747797012329, 0.0011037624208256602, 0.005973298568278551, 0.0016420705942437053, 0.0009447215707041323, 0.001327668083831668, 0.0005524749867618084, 0.012130306102335453, 0.005379356909543276, 0.0037436189595609903, 0.0009285339619964361, 0.0002853046462405473, 0.0013114019529893994, 0.0012977200094610453, 0.08090774714946747, 0.034737478941679, 0.058711227029561996, 0.0672648623585701, NaN], [0.00832295510917902, 0.021339448168873787, 0.00394090311601758, 0.002333499025553465, 0.05547437444329262, 0.007243151310831308, 0.011641105636954308, 0.0331541933119297, 0.010278979316353798, 0.011881710961461067, 0.001766148954629898, 0.04899042472243309, 0.01878243498504162, 0.01244808267802, 0.15685127675533295, 0.18188641965389252, 0.00040442554745823145, 0.0015771333128213882, 0.005189571529626846, 8.387575689994264e-06, 0.0001226859458256513, 0.0011242604814469814, 0.0013583728577941656, 0.0030172227416187525, 0.00029841059586033225, 1.2829146726289764e-05, 0.001467264024540782, 0.001090237987227738, 0.002914785873144865, 0.0006871690275147557, 0.002592542441561818, 0.00021328746515791863, 6.871169898658991e-05, 0.002350796014070511, 0.0026233955286443233, 0.02620280720293522, 0.005966363474726677, 0.08270465582609177, 0.010547555983066559, 0.018362630158662796]]], [[[0.1393769532442093, 0.0735321119427681, 0.701509952545166, 0.10650816559791565, 0.05110495164990425, 0.021589145064353943, 0.0033319133799523115, 0.0014166238252073526, 0.01486207265406847, 0.006584684830158949, 0.002582702785730362, 0.0004108685825485736, 0.010701421648263931, 0.009390643797814846, 0.06290604919195175, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0030957262497395277, 0.0237117987126112, 0.7945073246955872, 0.09792613238096237, 0.2614360749721527, 0.179405078291893, 0.011310527101159096, 0.009954328648746014, 0.009489532560110092, 0.0005609119543805718, 0.000751268700696528, 0.0001462608779547736, 0.004604416899383068, 0.004964352585375309, 0.019775664433836937, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.002461136318743229, 0.024594180285930634, 0.009559455327689648, 0.055053047835826874, 0.30010533332824707, 0.4690517783164978, 0.03334644436836243, 0.0075769852846860886, 0.007821744307875633, 0.004109389614313841, 0.0022267017047852278, 0.000916018383577466, 0.0037954216822981834, 0.0007741246954537928, 0.004415341652929783, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0019876149017363787, 0.0012237336486577988, 0.00015556006110273302, 0.0003553472051862627, 0.4419420659542084, 0.6252713799476624, 0.02062046155333519, 0.0028509902767837048, 0.00548406969755888, 0.0003452444798313081, 0.0001962203241419047, 0.0008938669925555587, 0.0009214308229275048, 1.2216354662086815e-05, 0.0019377138232812285, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.00020824302919209003, 0.00021322975226212293, 4.6913473852328025e-06, 0.00017657040734775364, 0.0005752452998422086, 0.5289100408554077, 0.1970362812280655, 0.12947966158390045, 0.0005265067447908223, 0.000227929005632177, 6.233566091395915e-05, 0.0001991882745642215, 0.00032238851417787373, 0.0003627484547905624, 0.0016414258861914277, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0010278578847646713, 0.0029486939311027527, 0.00014835220645181835, 0.00036925319000147283, 0.00742883887141943, 0.03272741660475731, 0.8576475977897644, 0.03500620648264885, 0.2982224225997925, 0.0003585784579627216, 5.663683623424731e-05, 0.0011889662127941847, 0.00576341338455677, 0.003998933359980583, 0.03130826726555824, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.002113666385412216, 0.004151111003011465, 0.002428078791126609, 0.002119476906955242, 0.001100956811569631, 0.003687644377350807, 0.13543397188186646, 0.11922256648540497, 0.7567945718765259, 0.2570010721683502, 0.004903816152364016, 0.0001005519661703147, 0.000830159813631326, 0.001259618904441595, 0.14076685905456543, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0010344160255044699, 0.00660368800163269, 0.0025270660407841206, 0.00023567670723423362, 0.0004021638887934387, 0.0030120171140879393, 0.0016376315616071224, 0.0524386465549469, 0.7797302007675171, 0.1269131302833557, 0.004214781802147627, 0.0002750723797362298, 0.002267329953610897, 0.001067862962372601, 0.16698867082595825, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0009750229655764997, 0.0120720649138093, 0.0038384809158742428, 0.0036232813727110624, 0.004431525245308876, 0.0007613649941049516, 5.662842158926651e-05, 0.01338160876184702, 0.041878536343574524, 0.7091978788375854, 0.2535402476787567, 0.13969287276268005, 0.026510832831263542, 0.0006678565987385809, 0.015569130890071392, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0002093962684739381, 0.00030164673808030784, 0.00010105424007633701, 5.030819465901004e-06, 0.001411793869920075, 0.003664590884000063, 0.00017403968377038836, 0.0011218853760510683, 0.011106000281870365, 0.003924186807125807, 0.07315385341644287, 0.3008219599723816, 0.36353737115859985, 0.025737306103110313, 0.0060785748064517975, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0001716838014544919, 0.0008840822265483439, 4.3183892557863146e-05, 3.6494086543825688e-06, 0.0005770743009634316, 0.010045445524156094, 0.00010205945727648214, 6.57988857710734e-05, 0.0006949909729883075, 0.004452799912542105, 0.009000658988952637, 0.49080607295036316, 0.17717383801937103, 0.11174798011779785, 0.021669577807188034, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.019416164606809616, 0.0014941463014110923, 0.001027028076350689, 0.001502541359513998, 0.0085412273183465, 0.12493651360273361, 0.0035243057645857334, 0.0026196581311523914, 0.0008317703031934798, 0.0015569254755973816, 0.060888972133398056, 0.06929422169923782, 0.3396435081958771, 0.387500524520874, 0.017253199592232704, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.04994890093803406, 0.15025374293327332, 0.024391163140535355, 0.00227133696898818, 0.012616162188351154, 0.2894521951675415, 0.4185648262500763, 0.19089959561824799, 0.027421748265624046, 0.001001756638288498, 0.0036985764745622873, 0.06802930682897568, 0.02484762854874134, 0.057649459689855576, 0.1606004238128662, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.03736208751797676, 0.11793919652700424, 0.0180205088108778, 0.0001436693564755842, 0.0030756669584661722, 0.08228655159473419, 0.12110688537359238, 0.09650447964668274, 0.015347721055150032, 0.0004259537090547383, 0.00022625335259363055, 0.001013986300677061, 0.0784289613366127, 0.2240448147058487, 0.18707746267318726, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.7529165148735046, 0.7075774073600769, 0.6068683862686157, 0.3852986991405487, 0.6197313666343689, 0.6735447645187378, 0.6598724722862244, 0.7226093411445618, 0.31395286321640015, 0.2518909275531769, 0.07010441273450851, 0.21793116629123688, 0.4325476884841919, 0.7029338479042053, 0.06848814338445663, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.04773104563355446, 0.01963546872138977, 0.16452182829380035, 0.04063690826296806, 0.1849776655435562, 0.08088860660791397, 0.11659693717956543, 0.038044340908527374, 0.2744975686073303, 0.003083554795011878, 0.019721103832125664, 0.08137688785791397, 0.0169991385191679, 0.03939461708068848, 0.14168404042720795, 0.125, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.09676018357276917, 0.018249453976750374, 0.657112717628479, 0.5890088677406311, 0.5712416768074036, 0.2744671702384949, 0.48642322421073914, 0.26345524191856384, 0.23708243668079376, 0.03475205600261688, 0.15204745531082153, 0.0676480308175087, 0.050043635070323944, 0.0665324404835701, 0.036993421614170074, 0.13007116317749023, 0.035988736897706985, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.04065309092402458, 0.0025235058274120092, 0.11838234961032867, 0.27863210439682007, 0.37560757994651794, 0.7046668529510498, 0.12516380846500397, 0.1912177950143814, 0.14992743730545044, 0.05949303135275841, 0.056387268006801605, 0.04353337734937668, 0.17471297085285187, 0.07017815858125687, 0.12025584280490875, 0.17991511523723602, 0.05124381557106972, 0.013642107136547565, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.015422305092215538, 0.000844803755171597, 0.015767300501465797, 0.11098357290029526, 0.273564875125885, 0.3235251009464264, 0.14805495738983154, 0.17132841050624847, 0.25568780303001404, 0.034506767988204956, 0.046862825751304626, 0.03818853572010994, 0.025031423196196556, 0.027911247685551643, 0.009120252914726734, 0.16831281781196594, 0.043814778327941895, 0.0950295478105545, 0.07350433617830276, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.01866327039897442, 0.11290711164474487, 0.007440958172082901, 0.031009642407298088, 0.059622399508953094, 0.035299621522426605, 0.012064317241311073, 0.17540854215621948, 0.06399405747652054, 0.010346408933401108, 0.023967623710632324, 0.006549614481627941, 0.015476463362574577, 0.017944032326340675, 0.15624091029167175, 0.13759823143482208, 0.14112484455108643, 0.20577600598335266, 0.13910864293575287, 0.034107428044080734, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.115133136510849, 0.5564319491386414, 0.0024013265501707792, 0.014839398674666882, 0.027623601257801056, 0.003712957026436925, 0.11139625310897827, 0.4320802688598633, 0.18111301958560944, 0.025198934599757195, 0.05914938822388649, 0.029404014348983765, 0.1131783202290535, 0.1630096137523651, 0.14384765923023224, 0.11619941890239716, 0.038306448608636856, 0.06045802682638168, 0.03494013100862503, 0.374624639749527, 0.22046393156051636, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.047323077917099, 0.01987922191619873, 0.021367410197854042, 0.0816798061132431, 0.11104802042245865, 0.01310664601624012, 0.37855657935142517, 0.16697411239147186, 0.31461480259895325, 0.04616151005029678, 0.27547621726989746, 0.04939346760511398, 0.02232075110077858, 0.15515512228012085, 0.01579722762107849, 0.08332619816064835, 0.009484739042818546, 0.012810231186449528, 0.0027760458178818226, 0.3268325924873352, 0.26342087984085083, 0.17634892463684082, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.13229456543922424, 0.031869739294052124, 0.26943540573120117, 0.2586674690246582, 0.3796730637550354, 0.127562016248703, 0.20277942717075348, 0.05910756066441536, 0.14354895055294037, 0.08293455094099045, 0.2214740365743637, 0.23150987923145294, 0.18035069108009338, 0.2860051393508911, 0.07895194739103317, 0.057563915848731995, 0.01992173306643963, 0.03713805601000786, 0.014863312244415283, 0.25726908445358276, 0.14832180738449097, 0.402090460062027, 0.06479739397764206, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.09224988520145416, 0.07457923144102097, 0.05282874405384064, 0.09438028931617737, 0.06849074363708496, 0.012997711077332497, 0.007214613724499941, 0.004257954657077789, 0.2309093326330185, 0.38276976346969604, 0.5917518734931946, 0.7830951809883118, 0.8438952565193176, 0.7586230039596558, 0.04145537316799164, 0.21478669345378876, 0.15359601378440857, 0.26770198345184326, 0.12653663754463196, 0.09151764959096909, 0.07003500312566757, 0.19363711774349213, 0.014233908616006374, 0.023967349901795387, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.014161140657961369, 0.027171263471245766, 0.0029068312142044306, 0.020549731329083443, 0.0005743438960053027, 0.00417140731588006, 0.003657599212601781, 0.00956815481185913, 0.34446486830711365, 0.5171273946762085, 0.39057764410972595, 0.2845093309879303, 0.1669711321592331, 0.5306525230407715, 0.015455210581421852, 0.2834857702255249, 0.07559704780578613, 0.07655511796474457, 0.16202391684055328, 0.08316012471914291, 0.11911017447710037, 0.0204884335398674, 0.011816238984465599, 0.13204774260520935, 0.039266277104616165, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.02566671371459961, 0.00907080341130495, 0.0006065603229217231, 0.03001752682030201, 0.00023783017240930349, 0.0005533608491532505, 0.013808660209178925, 0.003767948364838958, 0.06461481004953384, 0.1359771490097046, 0.08153439313173294, 0.572087287902832, 0.36045318841934204, 0.44234389066696167, 0.0030113777611404657, 0.23006244003772736, 0.03933367133140564, 0.07187695801258087, 0.04476522281765938, 0.01073860377073288, 0.0032203071750700474, 0.00176758982706815, 0.018770985305309296, 0.12121162563562393, 0.18536020815372467, 0.01582610420882702, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.03087739646434784, 0.012099061161279678, 0.004942088853567839, 0.038267359137535095, 0.0023591304197907448, 0.0037323227152228355, 0.04966888204216957, 0.012427400797605515, 0.16158415377140045, 0.020882699638605118, 0.05600592866539955, 0.367767333984375, 0.24262923002243042, 0.38281354308128357, 0.00973587203770876, 0.18067117035388947, 0.009833509102463722, 0.03744787722826004, 0.016920698806643486, 0.05744745582342148, 0.04540643468499184, 0.008024180307984352, 0.012110988609492779, 0.09370782226324081, 0.08820194005966187, 0.06259123980998993, 0.025030089542269707, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.04249054566025734, 0.0069285486824810505, 0.006088858004659414, 0.044397544115781784, 0.05390672758221626, 0.006144464481621981, 0.018320903182029724, 0.01545354351401329, 0.05193139612674713, 0.03221629932522774, 0.02379259280860424, 0.27246853709220886, 0.22103002667427063, 0.23179520666599274, 0.005589436274021864, 0.11523616313934326, 0.03200709819793701, 0.050564926117658615, 0.010618647560477257, 0.09430865943431854, 0.018685024231672287, 0.022438397631049156, 0.017720744013786316, 0.1592920571565628, 0.21717989444732666, 0.2463550567626953, 0.2194516956806183, 0.0009421245777048171, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.04184036701917648, 0.03700190782546997, 0.008264865726232529, 0.02439146116375923, 0.00799429602921009, 0.12502151727676392, 0.05032283812761307, 0.18101848661899567, 0.07329469919204712, 0.08409427851438522, 0.10790428519248962, 0.011960207484662533, 0.20496119558811188, 0.19276422262191772, 0.0069670299999415874, 0.09747911244630814, 0.1645127683877945, 0.1875433474779129, 0.09478750824928284, 0.08721300214529037, 0.02294742316007614, 0.02039182186126709, 0.07351931929588318, 0.1815827339887619, 0.5564144849777222, 0.41975197196006775, 0.2698606848716736, 0.05650324374437332, 0.05821085348725319, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.06364590674638748, 0.06483624875545502, 0.015260975807905197, 0.1278582364320755, 0.006228389218449593, 0.02756887674331665, 0.020600903779268265, 0.015440343879163265, 0.018087223172187805, 0.017098410055041313, 0.025406692177057266, 0.0007098353235051036, 0.00014885497512295842, 0.0013503700029104948, 0.15608660876750946, 0.14833268523216248, 0.1209164559841156, 0.08990822732448578, 0.0656033307313919, 0.23720099031925201, 0.11782333254814148, 0.04633651673793793, 0.16808320581912994, 0.06126163899898529, 0.43528908491134644, 0.3754012882709503, 0.13757933676242828, 0.05596579611301422, 0.16984672844409943, 0.002737722359597683, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.6220619678497314, 0.6306124329566956, 0.6737340092658997, 0.49940165877342224, 0.1517823040485382, 0.8503586649894714, 0.705633282661438, 0.6629571914672852, 0.11157920956611633, 0.39899003505706787, 0.3173867464065552, 0.027327625080943108, 0.014980590902268887, 0.009274562820792198, 0.08523338288068771, 0.19258342683315277, 0.05838138237595558, 0.04652376100420952, 0.017318567261099815, 0.23482391238212585, 0.16333334147930145, 0.02100907638669014, 0.048424359411001205, 0.06841404736042023, 0.3133482038974762, 0.07921069860458374, 0.021035969257354736, 0.03291412815451622, 0.18175286054611206, 0.1566929817199707, 0.053215935826301575, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.15005189180374146, 0.04609784111380577, 0.17501141130924225, 0.21113994717597961, 0.26919078826904297, 0.6422000527381897, 0.7493206858634949, 0.2162598967552185, 0.010351919569075108, 0.09728528559207916, 0.09688232094049454, 0.028558582067489624, 0.10305432975292206, 0.05914681404829025, 0.11260810494422913, 0.17641158401966095, 0.15294750034809113, 0.15352487564086914, 0.10843643546104431, 0.08260629326105118, 0.016529222950339317, 0.012650150805711746, 0.07893627882003784, 0.1388573795557022, 0.19094663858413696, 0.03751035034656525, 0.05650494620203972, 0.2426995038986206, 0.16961677372455597, 0.07263431698083878, 0.152814581990242, 0.018521834164857864, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.09041088819503784, 0.052050016820430756, 0.08856991678476334, 0.2977358102798462, 0.04025371000170708, 0.3506464660167694, 0.6434463858604431, 0.25059518218040466, 0.01933867670595646, 0.04819375276565552, 0.07508239895105362, 0.04970608279109001, 0.02890131063759327, 0.02355407178401947, 0.12558245658874512, 0.25574439764022827, 0.04364950954914093, 0.05707173049449921, 0.02453112043440342, 0.016254547983407974, 0.0026636396069079638, 0.0035282839089632034, 0.015699811279773712, 0.03404982015490532, 0.04375504329800606, 0.001423283712938428, 0.05359426140785217, 0.1740386039018631, 0.10691730678081512, 0.03620539605617523, 0.04950953647494316, 0.022295303642749786, 0.025807255879044533, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.18765486776828766, 0.021713200956583023, 0.21844394505023956, 0.3042432367801666, 0.17823228240013123, 0.1673380434513092, 0.8088975548744202, 0.46762967109680176, 0.05706785246729851, 0.009645337238907814, 0.0322297103703022, 0.09777479618787766, 0.08048812299966812, 0.10106904059648514, 0.17228879034519196, 0.216966450214386, 0.016096990555524826, 0.08351551741361618, 0.02645382098853588, 0.05811392888426781, 0.04091750830411911, 0.014506897889077663, 0.015038754791021347, 0.07221462577581406, 0.08585365861654282, 0.059816163033246994, 0.04502185434103012, 0.00397779606282711, 0.041175276041030884, 0.04448581859469414, 0.10983181744813919, 0.01911303587257862, 0.07987141609191895, 0.062483180314302444, NaN, NaN, NaN, NaN, NaN, NaN], [0.4792143702507019, 0.09839366376399994, 0.1882246881723404, 0.4093988239765167, 0.7147246599197388, 0.24897223711013794, 0.4705742597579956, 0.4205995500087738, 0.01958448253571987, 0.026842152699828148, 0.02239188365638256, 0.15106931328773499, 0.08969185501337051, 0.10003618896007538, 0.1635625958442688, 0.11257521063089371, 0.027663733810186386, 0.023284420371055603, 0.0038690094370394945, 0.053685132414102554, 0.008445030078291893, 0.014706910587847233, 0.009755544364452362, 0.06406830251216888, 0.10475295782089233, 0.08554040640592575, 0.16072620451450348, 0.00029980239924043417, 0.03509804978966713, 0.03031017631292343, 0.04435117170214653, 0.06420817226171494, 0.2780051827430725, 0.2271702140569687, 0.0013584558619186282, NaN, NaN, NaN, NaN, NaN], [0.40625429153442383, 0.3796224594116211, 0.2515096962451935, 0.36165565252304077, 0.24774380028247833, 0.8824228644371033, 0.8048573136329651, 0.857955813407898, 0.058371078222990036, 0.07109472155570984, 0.11402199417352676, 0.0021524245385080576, 0.019929109141230583, 0.030590593814849854, 0.11712031066417694, 0.10895614326000214, 0.15509657561779022, 0.19682957231998444, 0.07681374996900558, 0.06229116767644882, 0.016663551330566406, 0.015513443388044834, 0.04232686012983322, 0.0986364334821701, 0.35070890188217163, 0.19941051304340363, 0.163076713681221, 0.026361489668488503, 0.018140846863389015, 0.016411108896136284, 0.03203867748379707, 0.053678009659051895, 0.19773079454898834, 0.3572796881198883, 0.059515852481126785, 0.04298213869333267, NaN, NaN, NaN, NaN], [0.04390633478760719, 0.032843075692653656, 0.010515165515244007, 0.11869800090789795, 0.005461697466671467, 0.023131608963012695, 0.01705162413418293, 0.008547519333660603, 0.003713170997798443, 0.008410640992224216, 0.009457322768867016, 0.00015943740436341614, 3.361727431183681e-05, 0.0002994383394252509, 0.1532706469297409, 0.15568822622299194, 0.11876019835472107, 0.09203660488128662, 0.059780094772577286, 0.24089980125427246, 0.06525673717260361, 0.029934749007225037, 0.11168782413005829, 0.03211824223399162, 0.30118685960769653, 0.22822384536266327, 0.08190999180078506, 0.018841415643692017, 0.1366286426782608, 0.0017427116399630904, 0.02601366490125656, 0.09386949241161346, 0.19522085785865784, 0.1546826809644699, 0.06491755694150925, 0.19679579138755798, 0.0025137634947896004, NaN, NaN, NaN], [0.6348351836204529, 0.5127235651016235, 0.5931673645973206, 0.5543242692947388, 0.12377271056175232, 0.8264753222465515, 0.6941898465156555, 0.5687963962554932, 0.03150533139705658, 0.12843358516693115, 0.11884576827287674, 0.005231617949903011, 0.0018767286092042923, 0.0011644444894045591, 0.11210005730390549, 0.26271528005599976, 0.07045364379882812, 0.0520184300839901, 0.023400958627462387, 0.11433269083499908, 0.07895253598690033, 0.012276851572096348, 0.023823700845241547, 0.04200353845953941, 0.16687022149562836, 0.05654531344771385, 0.038080912083387375, 0.012698299251496792, 0.10473722219467163, 0.0643644630908966, 0.015445034019649029, 0.014234953559935093, 0.06144930049777031, 0.05821693688631058, 0.0568128302693367, 0.1767931431531906, 0.1402994990348816, 0.07714083790779114, NaN, NaN], [0.10790421068668365, 0.016916295513510704, 0.09771728515625, 0.22749783098697662, 0.26325535774230957, 0.49138790369033813, 0.6275916695594788, 0.08931886404752731, 0.0033968419302254915, 0.024402111768722534, 0.018104346469044685, 0.003288157982751727, 0.010537534020841122, 0.006979967001825571, 0.12102893739938736, 0.1969611942768097, 0.16093717515468597, 0.1609625220298767, 0.11138524115085602, 0.026131147518754005, 0.00619129091501236, 0.005407778546214104, 0.04104578495025635, 0.06517186760902405, 0.06833471357822418, 0.020616043359041214, 0.03467438742518425, 0.095084547996521, 0.06247802451252937, 0.022057469934225082, 0.06569864600896835, 0.0052108620293438435, 0.03032413311302662, 0.0838729590177536, 0.3427644968032837, 0.19215865433216095, 0.08116735517978668, 0.14785417914390564, 0.015012684278190136, NaN], [0.028179557994008064, 0.011468129232525826, 0.016789404675364494, 0.00803140178322792, 0.00952040497213602, 0.02960360422730446, 0.24957160651683807, 0.03544437885284424, 0.005487674381583929, 0.0028927521780133247, 0.005656986031681299, 0.0040698484517633915, 0.04730471968650818, 0.0667993351817131, 0.1372966766357422, 0.1272672563791275, 0.008308093063533306, 0.030398543924093246, 0.02721896767616272, 0.016537277027964592, 0.021588556468486786, 0.002818688517436385, 0.010970782488584518, 0.01434051152318716, 0.012293173000216484, 0.04184769093990326, 0.03683166950941086, 0.023453323170542717, 0.020430248230695724, 0.03333409130573273, 0.068024642765522, 0.02648366242647171, 0.1640448421239853, 0.109919473528862, 0.1576652079820633, 0.14138163626194, 0.16884489357471466, 0.30372628569602966, 0.2283693552017212, 0.17022481560707092]], [[0.0006553527782671154, 0.5631614327430725, 0.0008777088369242847, 0.00020331511041149497, 0.0014234310947358608, 0.013944034464657307, 9.958680493582506e-06, 0.01898920349776745, 0.00014103656576480716, 1.4779416233068332e-06, 1.1701366275929104e-07, 1.195983372781484e-06, 0.00012817273091059178, 3.365538941579871e-05, 0.00028557839686982334, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.00638999929651618, 0.7093943953514099, 0.004974186420440674, 0.06159398332238197, 0.003979360219091177, 0.06536109745502472, 0.005324128083884716, 0.02885170467197895, 0.0003847253101412207, 0.0002721542550716549, 4.3882369936909527e-05, 0.00024302180099766701, 0.00612376956269145, 0.006710950285196304, 0.0343138724565506, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.109707772731781, 0.1680740863084793, 0.05170662701129913, 0.04158816486597061, 0.026700180023908615, 0.23248757421970367, 0.5156019330024719, 0.3799504041671753, 0.02909121848642826, 0.009008231572806835, 0.0013055672170594335, 0.0032788640819489956, 0.0791734829545021, 0.010587821714580059, 0.06850002706050873, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.04004191607236862, 0.02257939800620079, 0.01325287576764822, 0.14834734797477722, 0.0700073167681694, 0.12831416726112366, 0.47980472445487976, 0.3121630549430847, 0.05984592065215111, 0.015101294964551926, 0.002668763743713498, 0.0007187540177255869, 0.04004915803670883, 0.0007627750164829195, 0.05523831769824028, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0007188548916019499, 0.006864115130156279, 0.00033292395528405905, 0.000431404507253319, 0.0152564262971282, 0.2775210440158844, 0.03714991733431816, 0.7278205156326294, 0.004819776862859726, 0.00047404138604179025, 0.0003997469611931592, 0.0001266899926122278, 0.0201359074562788, 0.0027800032403320074, 0.042311206459999084, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.00020999301341362298, 0.0025689874310046434, 3.502765650864603e-07, 6.610702985199168e-05, 0.00024143110204022378, 0.018905406817793846, 0.033397458493709564, 0.4650881290435791, 0.004783111158758402, 0.00013528004637919366, 5.751344360760413e-06, 7.93816871009767e-05, 0.0039043116848915815, 0.0005016719806008041, 0.07914639264345169, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.00019393693946767598, 0.07456899434328079, 1.429513213224709e-05, 4.6383509470615536e-05, 6.820548151154071e-05, 0.004400796256959438, 0.0021800962276756763, 0.45963534712791443, 0.00143687822856009, 0.0008175616967491806, 6.983020284678787e-05, 3.49152869603131e-05, 0.0030698180198669434, 0.0006545006763190031, 0.001625033444724977, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.004301158711314201, 0.013502174988389015, 4.788395017385483e-05, 0.00021532995742745697, 7.713190279901028e-05, 0.001439842046238482, 0.005622516851872206, 0.121849425137043, 0.006593172438442707, 0.006624745205044746, 0.0006814572843722999, 0.0002721978526096791, 0.0009267745190300047, 0.0016606011195108294, 0.2357456088066101, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0064394231885671616, 0.03409593552350998, 0.0025135872419923544, 0.0008376456098631024, 0.0004409599641803652, 0.0026055865455418825, 0.005634414032101631, 0.014003962278366089, 0.2343187928199768, 0.08099395036697388, 0.23927520215511322, 0.01715606264770031, 0.10332414507865906, 0.021894987672567368, 0.1941189020872116, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0004975660121999681, 0.0015548047376796603, 6.826691333117196e-06, 1.0557592986515374e-06, 2.731301538005937e-05, 0.0005447702133096755, 0.00042012380436062813, 0.0503113828599453, 0.0053693996742367744, 0.0012762928381562233, 0.0017790982965379953, 0.019809026271104813, 0.47653263807296753, 0.008869247511029243, 0.017010610550642014, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.00012974163109902292, 0.005610004533082247, 2.3442629753844813e-05, 1.8520654521125834e-06, 3.9678394387010485e-05, 0.0016583451069891453, 0.00029088594601489604, 0.004530484322458506, 0.0021493860986083746, 0.00029196502873674035, 0.0005848451401107013, 0.0028240433894097805, 0.4590959846973419, 0.22978197038173676, 0.0020738127641379833, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.00021855060185771435, 0.005491270218044519, 1.9927349057979882e-05, 7.633860150235705e-06, 0.0004071943403687328, 0.008836714550852776, 7.301902951439843e-05, 0.011723233386874199, 1.7278060113312677e-05, 0.0001269245840376243, 0.00022235361393541098, 0.016586007550358772, 0.41012606024742126, 0.37776312232017517, 0.0024871949572116137, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.02619638666510582, 0.18392468988895416, 0.0003054745029658079, 0.00016413358389399946, 0.0015171386767178774, 0.004799532704055309, 0.004810427315533161, 0.058836404234170914, 0.0003794554795604199, 0.0017285931389778852, 0.000568193441722542, 0.003299211384728551, 0.6178385019302368, 0.5079926252365112, 0.05467592179775238, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.03445081040263176, 0.14193737506866455, 0.0007241201237775385, 0.0002892682678066194, 0.0003202178922947496, 0.003702279180288315, 0.01134149543941021, 0.12129464000463486, 0.0006569268880411983, 0.0008894759230315685, 8.523569704266265e-05, 0.00030898841214366257, 0.7088924646377563, 0.10790188610553741, 0.05374660715460777, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.04547691345214844, 0.010678221471607685, 0.0016328264027833939, 0.024403419345617294, 0.012795579619705677, 0.004323439672589302, 0.06414945423603058, 0.014008321799337864, 0.011475995182991028, 0.00871653389185667, 0.012156924232840538, 0.0147528275847435, 0.009472412057220936, 0.0331418551504612, 0.1366012692451477, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.11859580129384995, 0.07486707717180252, 0.21083025634288788, 0.32276296615600586, 0.08426652103662491, 0.03581860288977623, 0.24113436043262482, 0.608397364616394, 0.13584911823272705, 0.45509204268455505, 0.594833254814148, 0.30372148752212524, 0.8448506593704224, 0.7470672726631165, 0.09252076596021652, 0.125, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.04140070080757141, 0.00858838576823473, 0.11639615148305893, 0.1280786097049713, 0.2722368836402893, 0.21025919914245605, 0.4195333421230316, 0.631318211555481, 0.6560773253440857, 0.29341432452201843, 0.6862512230873108, 0.7675639986991882, 0.8915717005729675, 0.8601328730583191, 0.23356862366199493, 0.12451039254665375, 0.1335938721895218, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.23441848158836365, 0.1666196584701538, 0.16664288938045502, 0.25857093930244446, 0.13334479928016663, 0.17917701601982117, 0.8257887363433838, 0.7395779490470886, 0.6802234053611755, 0.8125103712081909, 0.671615719795227, 0.8831866383552551, 0.6773648858070374, 0.7102506160736084, 0.08689045161008835, 0.18396444618701935, 0.017508728429675102, 0.02471269853413105, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.24967892467975616, 0.48421844840049744, 0.036505091935396194, 0.17128480970859528, 0.01777578890323639, 0.09479225426912308, 0.36135032773017883, 0.0868472084403038, 0.16740600764751434, 0.523710310459137, 0.24439233541488647, 0.42307958006858826, 0.6259368062019348, 0.3662186563014984, 0.20058651268482208, 0.18453162908554077, 0.038695670664310455, 0.04155581444501877, 0.05072518810629845, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.28931790590286255, 0.4439229369163513, 0.24370647966861725, 0.6020305752754211, 0.17363131046295166, 0.338454008102417, 0.5701692700386047, 0.33999428153038025, 0.68463534116745, 0.8701388239860535, 0.7831944823265076, 0.9611375331878662, 0.9679895043373108, 0.9072677493095398, 0.0468842089176178, 0.14826133847236633, 0.04252630099654198, 0.08689215034246445, 0.08308856934309006, 0.015247097238898277, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.1225743219256401, 0.062406159937381744, 0.03387807682156563, 0.02868799865245819, 0.01787530817091465, 0.04143121838569641, 0.5920179486274719, 0.08798510581254959, 0.2968905568122864, 0.7129084467887878, 0.4609105885028839, 0.29060137271881104, 0.7909923791885376, 0.5701599717140198, 0.13614380359649658, 0.1348571479320526, 0.07033194601535797, 0.10030655562877655, 0.13752251863479614, 0.030713800340890884, 0.1331333965063095, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.0705394446849823, 0.02209068462252617, 0.0211530439555645, 0.008882923051714897, 0.0033682750072330236, 0.08319123089313507, 0.11070933192968369, 0.0025125632528215647, 0.10380591452121735, 0.17744502425193787, 0.10391969978809357, 0.12427430599927902, 0.5562515258789062, 0.49710196256637573, 0.3223192095756531, 0.20671042799949646, 0.05809834972023964, 0.1630101054906845, 0.06033356115221977, 0.07501133531332016, 0.017328333109617233, 0.028450097888708115, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.15847322344779968, 0.015464702621102333, 0.13866224884986877, 0.053395166993141174, 0.03494010120630264, 0.13738934695720673, 0.02684560976922512, 0.03214175999164581, 0.5759801864624023, 0.1755424290895462, 0.13409779965877533, 0.035038210451602936, 0.6489107012748718, 0.4460716247558594, 0.4074119031429291, 0.15813153982162476, 0.14090144634246826, 0.26030233502388, 0.10773709416389465, 0.16133210062980652, 0.04816069453954697, 0.01304988656193018, 0.13335363566875458, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.00857736449688673, 0.012718217447400093, 0.01174219325184822, 0.012934550642967224, 0.006551709491759539, 0.24597492814064026, 0.030029013752937317, 0.05923602730035782, 0.04650798439979553, 0.02447274886071682, 0.019859377294778824, 0.003505804343149066, 0.04937520623207092, 0.05625420808792114, 0.28037816286087036, 0.3033713400363922, 0.22469042241573334, 0.4264413118362427, 0.3422197103500366, 0.14910078048706055, 0.06983038783073425, 0.023690486326813698, 0.010566752403974533, 0.05880258232355118, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.0015372766647487879, 0.015295127406716347, 0.018696704879403114, 0.004789609462022781, 0.19481690227985382, 0.04769033566117287, 0.01355075929313898, 0.02196505106985569, 0.08700259774923325, 0.020393503829836845, 0.02400771528482437, 0.18789233267307281, 0.15418098866939545, 0.08713112771511078, 0.19334079325199127, 0.25368839502334595, 0.33459752798080444, 0.3829180896282196, 0.2782860994338989, 0.2427205741405487, 0.08768615871667862, 0.031752120703458786, 0.02143564634025097, 0.03798065707087517, 0.07379034906625748, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.04759770259261131, 0.04375501722097397, 0.02714523859322071, 0.05194481834769249, 0.05246514454483986, 0.14355513453483582, 0.17152011394500732, 0.14246520400047302, 0.1098044142127037, 0.013531663455069065, 0.008927365764975548, 0.03807468339800835, 0.10050502419471741, 0.02236531302332878, 0.3381733298301697, 0.14200474321842194, 0.2391311228275299, 0.18728229403495789, 0.11236919462680817, 0.20923744142055511, 0.13365258276462555, 0.052715059369802475, 0.134474515914917, 0.14480768144130707, 0.06683899462223053, 0.104619100689888, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.10647730529308319, 0.04246760904788971, 0.08123224973678589, 0.13003453612327576, 0.07854175567626953, 0.24148082733154297, 0.6790831685066223, 0.7492273449897766, 0.28685522079467773, 0.03681188449263573, 0.15954196453094482, 0.2672117054462433, 0.11099980026483536, 0.04468434303998947, 0.4826459586620331, 0.09595079720020294, 0.2752297520637512, 0.21842314302921295, 0.13660691678524017, 0.35477691888809204, 0.37130749225616455, 0.20556269586086273, 0.35276445746421814, 0.31008264422416687, 0.11074709892272949, 0.19841141998767853, 0.07199764251708984, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.2962004542350769, 0.47284576296806335, 0.11245852708816528, 0.23689918220043182, 0.10807513445615768, 0.8532499074935913, 0.5788733959197998, 0.6375027894973755, 0.33168625831604004, 0.06381742656230927, 0.004373080097138882, 0.015940984711050987, 0.3371734917163849, 0.06828418374061584, 0.21185840666294098, 0.15323933959007263, 0.4611065983772278, 0.07869336754083633, 0.03600241616368294, 0.47375282645225525, 0.7350273132324219, 0.297486275434494, 0.6052883863449097, 0.4953201115131378, 0.144621342420578, 0.3493393063545227, 0.04881289228796959, 0.10520726442337036, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.3828115463256836, 0.12613584101200104, 0.47516295313835144, 0.4473835527896881, 0.17031393945217133, 0.6938255429267883, 0.7945614457130432, 0.34594833850860596, 0.5323623418807983, 0.34808266162872314, 0.11382761597633362, 0.1349307745695114, 0.013382190838456154, 0.0600610226392746, 0.30783677101135254, 0.12003841996192932, 0.2704387903213501, 0.20063650608062744, 0.23778890073299408, 0.36254584789276123, 0.5319709777832031, 0.4483972191810608, 0.15058189630508423, 0.11134153604507446, 0.09426670521497726, 0.21241672337055206, 0.10488338023424149, 0.049764484167099, 0.15823495388031006, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.7362364530563354, 0.8323087096214294, 0.9336822032928467, 0.7739728689193726, 0.8897883296012878, 0.9609381556510925, 0.9334329962730408, 0.9553548693656921, 0.7747710943222046, 0.4005538523197174, 0.5586770176887512, 0.25099167227745056, 0.4200068712234497, 0.1631680577993393, 0.06528117507696152, 0.15233570337295532, 0.21891875565052032, 0.13215333223342896, 0.2837490439414978, 0.08042775094509125, 0.43866410851478577, 0.2773631513118744, 0.12773916125297546, 0.3155127763748169, 0.07932031899690628, 0.1219707503914833, 0.11212008446455002, 0.1944955438375473, 0.07170752435922623, 0.004313962999731302, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.07449624687433243, 0.061402805149555206, 0.09389828145503998, 0.048646457493305206, 0.024208296090364456, 0.10819891840219498, 0.10563155263662338, 0.1243496686220169, 0.048523951321840286, 0.14693649113178253, 0.06614942103624344, 0.0066792843863368034, 0.2858017086982727, 0.04383772611618042, 0.15409637987613678, 0.2607015371322632, 0.3645761013031006, 0.37828943133354187, 0.3385462462902069, 0.2960833013057709, 0.5598280429840088, 0.544554591178894, 0.47054967284202576, 0.3477361798286438, 0.13701467216014862, 0.14822737872600555, 0.030188634991645813, 0.05528556555509567, 0.058441486209630966, 0.03410256654024124, 0.17273126542568207, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.02467108517885208, 0.049052223563194275, 0.08135215938091278, 0.013768618926405907, 0.01176412496715784, 0.15210841596126556, 0.004693970084190369, 0.0041237217374145985, 0.018837640061974525, 0.03490369766950607, 0.036496780812740326, 0.0011750683188438416, 0.018557026982307434, 0.02382473833858967, 0.22122804820537567, 0.1872977614402771, 0.29805198311805725, 0.5206820368766785, 0.33024296164512634, 0.6395015716552734, 0.7210167050361633, 0.353913813829422, 0.406305193901062, 0.5096184015274048, 0.26257815957069397, 0.07301049679517746, 0.03464117646217346, 0.0787002444267273, 0.10916904360055923, 0.3557807505130768, 0.08364078402519226, 0.08538500964641571, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.012043171562254429, 0.03080524504184723, 0.02248452790081501, 0.008785543963313103, 0.00550604984164238, 0.05614035204052925, 0.015958979725837708, 0.01727765053510666, 0.03423915058374405, 0.017799094319343567, 0.029912255704402924, 0.01144923735409975, 0.09533664584159851, 0.02436906285583973, 0.20283196866512299, 0.13269101083278656, 0.2835436165332794, 0.47488275170326233, 0.24851854145526886, 0.694171130657196, 0.6760384440422058, 0.2759343385696411, 0.29058361053466797, 0.7136873602867126, 0.20711864531040192, 0.04295802861452103, 0.07691331952810287, 0.11943909525871277, 0.1323360651731491, 0.20847304165363312, 0.05967296287417412, 0.12062160670757294, 0.09502720832824707, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.01959865354001522, 0.003073114436119795, 0.06498773396015167, 0.027286570519208908, 0.019540993496775627, 0.052237618714571, 0.08713454008102417, 0.28957968950271606, 0.3906492590904236, 0.044482238590717316, 0.17143161594867706, 0.1301742047071457, 0.10445850342512131, 0.03699616342782974, 0.2442801147699356, 0.058743223547935486, 0.276242733001709, 0.29826071858406067, 0.20218241214752197, 0.4631478488445282, 0.48415693640708923, 0.2865871787071228, 0.3694051504135132, 0.4054408073425293, 0.19627220928668976, 0.2907293438911438, 0.09057808667421341, 0.11348091810941696, 0.21781016886234283, 0.38082650303840637, 0.3570795953273773, 0.22612451016902924, 0.09323522448539734, 0.03618632256984711, NaN, NaN, NaN, NaN, NaN, NaN], [0.11208802461624146, 0.11668127030134201, 0.09828943759202957, 0.10754654556512833, 0.015885351225733757, 0.38998937606811523, 0.183034285902977, 0.3230077624320984, 0.20506803691387177, 0.08733018487691879, 0.007069121580570936, 0.010435528121888638, 0.30221423506736755, 0.047303054481744766, 0.19994190335273743, 0.07694489508867264, 0.41184449195861816, 0.038429711014032364, 0.018668875098228455, 0.5307568907737732, 0.7476497888565063, 0.4137455224990845, 0.6917499303817749, 0.6703397035598755, 0.3623183071613312, 0.579600989818573, 0.12613137066364288, 0.20100651681423187, 0.40998968482017517, 0.46115902066230774, 0.575211763381958, 0.35096046328544617, 0.163946270942688, 0.021770814433693886, 0.09986086189746857, NaN, NaN, NaN, NaN, NaN], [0.1682588905096054, 0.051582805812358856, 0.4415716230869293, 0.2735750675201416, 0.07878735661506653, 0.06776249408721924, 0.15038572251796722, 0.03211068734526634, 0.6709542274475098, 0.37688353657722473, 0.1879340261220932, 0.04096703231334686, 0.011627858504652977, 0.03471425548195839, 0.19384095072746277, 0.0834016501903534, 0.33346420526504517, 0.238715261220932, 0.28079062700271606, 0.5652539134025574, 0.6881173849105835, 0.5534363985061646, 0.22000034153461456, 0.1979052871465683, 0.3127084970474243, 0.4257359504699707, 0.18722867965698242, 0.1397658735513687, 0.3447277843952179, 0.13513657450675964, 0.31811001896858215, 0.32070791721343994, 0.12404847145080566, 0.05496959760785103, 0.04215753450989723, 0.16014836728572845, NaN, NaN, NaN, NaN], [0.8205305933952332, 0.9214023947715759, 0.9559677839279175, 0.7988566160202026, 0.9105063080787659, 0.9672437906265259, 0.9506043195724487, 0.9735420346260071, 0.9064961075782776, 0.6156813502311707, 0.6370130777359009, 0.18943972885608673, 0.3681671619415283, 0.1194160059094429, 0.08283783495426178, 0.13260646164417267, 0.29362690448760986, 0.18431688845157623, 0.38109344244003296, 0.20342527329921722, 0.5946046113967896, 0.4558189809322357, 0.26072001457214355, 0.5455912351608276, 0.2635512351989746, 0.31394094228744507, 0.23975242674350739, 0.36583349108695984, 0.2753828167915344, 0.01127256266772747, 0.41475725173950195, 0.29836422204971313, 0.2503683567047119, 0.10983213782310486, 0.21767295897006989, 0.0692884549498558, 0.003035380970686674, NaN, NaN, NaN], [0.10534824430942535, 0.08027994632720947, 0.1381307989358902, 0.07063161581754684, 0.01806548424065113, 0.10409632325172424, 0.12885765731334686, 0.2072904407978058, 0.09267445653676987, 0.23836983740329742, 0.11645739525556564, 0.006059943698346615, 0.1595546454191208, 0.017974214628338814, 0.14464683830738068, 0.2068602293729782, 0.4467880427837372, 0.4564751386642456, 0.4485791325569153, 0.45999279618263245, 0.6740500330924988, 0.7906107902526855, 0.6832103133201599, 0.5420533418655396, 0.4096798300743103, 0.3950984477996826, 0.13646338880062103, 0.10497336834669113, 0.17230592668056488, 0.07012390345335007, 0.27583980560302734, 0.3079235553741455, 0.1555996537208557, 0.038740403950214386, 0.05588690564036369, 0.03859011456370354, 0.02352789230644703, 0.12950412929058075, NaN, NaN], [0.026579611003398895, 0.02949470281600952, 0.04954056441783905, 0.017031243070960045, 0.008355016820132732, 0.09075918793678284, 0.0036468924954533577, 0.0022332987282425165, 0.050134338438510895, 0.049380820244550705, 0.028885982930660248, 0.0007559077348560095, 0.015549316070973873, 0.013319555670022964, 0.1734825074672699, 0.16561447083950043, 0.3958832919597626, 0.5531814098358154, 0.4040684700012207, 0.7809365391731262, 0.8175305128097534, 0.5712264180183411, 0.6113651394844055, 0.6668697595596313, 0.4850655198097229, 0.18787693977355957, 0.08608534932136536, 0.19115354120731354, 0.2498423308134079, 0.6246696710586548, 0.31422460079193115, 0.373276948928833, 0.049351077526807785, 0.046956032514572144, 0.08076699078083038, 0.09392194449901581, 0.3349837362766266, 0.062239501625299454, 0.10001940280199051, NaN], [0.05047497898340225, 0.027197130024433136, 0.11470095813274384, 0.007973222993314266, 0.12679167091846466, 0.4866730570793152, 0.17132264375686646, 0.15032453835010529, 0.14889459311962128, 0.01696154847741127, 0.0735161080956459, 0.0034290377516299486, 0.05194668471813202, 0.06144191324710846, 0.13309471309185028, 0.06568613648414612, 0.36780038475990295, 0.6246912479400635, 0.7116879820823669, 0.754679262638092, 0.7714072465896606, 0.7616819739341736, 0.5837911367416382, 0.9111838936805725, 0.8262851238250732, 0.6737059354782104, 0.5146453380584717, 0.7674095630645752, 0.7359525561332703, 0.5679676532745361, 0.7213301062583923, 0.6703079342842102, 0.5636342167854309, 0.38883939385414124, 0.5560528635978699, 0.518941342830658, 0.3739706873893738, 0.32013192772865295, 0.3743935525417328, 0.3977084755897522]], [[0.3143080472946167, 0.014564945362508297, 0.07743841409683228, 0.19665417075157166, 0.23130221664905548, 0.03274351730942726, 0.23599109053611755, 0.04763320833444595, 0.20168107748031616, 0.7521476149559021, 0.7922006249427795, 0.840878427028656, 0.6463541388511658, 0.6008138656616211, 0.0070990691892802715, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.05880431830883026, 0.004086965229362249, 0.06557433307170868, 0.4476080536842346, 0.32179930806159973, 0.2046266496181488, 0.5952353477478027, 0.20483972132205963, 0.7834360599517822, 0.27592822909355164, 0.5900363922119141, 0.6986290812492371, 0.3548848032951355, 0.36629796028137207, 0.07452832907438278, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.4484235942363739, 0.0712433010339737, 0.09740526974201202, 0.49982836842536926, 0.18807044625282288, 0.007537430617958307, 0.2073078453540802, 0.015238385647535324, 0.18028782308101654, 0.6095888018608093, 0.4225178062915802, 0.6769288778305054, 0.3957397937774658, 0.7102670669555664, 0.05611870437860489, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.4341801106929779, 0.05481646955013275, 0.17834456264972687, 0.2579769194126129, 0.326920747756958, 0.0030261597130447626, 0.03147314488887787, 0.003279186552390456, 0.09941483289003372, 0.5679370760917664, 0.8480010032653809, 0.8133074045181274, 0.4710683822631836, 0.9189481139183044, 0.04321537911891937, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.559230387210846, 0.08983521163463593, 0.16111011803150177, 0.14667965471744537, 0.32596829533576965, 0.008685072883963585, 0.1111784353852272, 0.02690659649670124, 0.06770152598619461, 0.18340016901493073, 0.4614297151565552, 0.502476155757904, 0.42325475811958313, 0.5992166996002197, 0.05437220633029938, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.367906779050827, 0.21432256698608398, 0.3548191487789154, 0.2603428363800049, 0.22096140682697296, 0.0013341127196326852, 0.021726170554757118, 0.005543001927435398, 0.5389296412467957, 0.818263828754425, 0.919593095779419, 0.8187286257743835, 0.4823090434074402, 0.4897681474685669, 0.07018090784549713, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.7116888761520386, 0.17206020653247833, 0.6874114871025085, 0.19288089871406555, 0.20990870893001556, 0.011273512616753578, 0.2026582807302475, 0.004371582996100187, 0.10976968705654144, 0.4432500898838043, 0.7022042274475098, 0.8704607486724854, 0.721519947052002, 0.7422701716423035, 0.025589054450392723, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.7674684524536133, 0.20032620429992676, 0.42808812856674194, 0.11714937537908554, 0.32732346653938293, 0.009955272078514099, 0.05444686487317085, 0.0040375906974077225, 0.12078685313463211, 0.6266691088676453, 0.5163981914520264, 0.8307003378868103, 0.32096055150032043, 0.24524804949760437, 0.04717922583222389, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.7549813389778137, 0.15439504384994507, 0.33331331610679626, 0.24930144846439362, 0.2927357852458954, 0.04936225712299347, 0.44933974742889404, 0.06466211378574371, 0.09519664198160172, 0.08716140687465668, 0.058296240866184235, 0.09990595281124115, 0.5117565989494324, 0.1508449912071228, 0.039490822702646255, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.654628574848175, 0.3205694854259491, 0.5841068029403687, 0.21299651265144348, 0.365792840719223, 0.0401315838098526, 0.18686936795711517, 0.05883712321519852, 0.05069931596517563, 0.33667507767677307, 0.3354107439517975, 0.22027519345283508, 0.05277648940682411, 0.09031395614147186, 0.015531455166637897, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.3366456627845764, 0.1530359387397766, 0.41866233944892883, 0.39775165915489197, 0.7769761681556702, 0.06979230791330338, 0.41583842039108276, 0.02130916155874729, 0.14617334306240082, 0.25815388560295105, 0.1423572301864624, 0.18894770741462708, 0.041056301444768906, 0.026175418868660927, 0.03888533264398575, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.24913249909877777, 0.0818726196885109, 0.5426726341247559, 0.1687711775302887, 0.8305720090866089, 0.26261457800865173, 0.39635857939720154, 0.1712585836648941, 0.1158638522028923, 0.17366157472133636, 0.12521226704120636, 0.5298976302146912, 0.041029125452041626, 0.02415779046714306, 0.1170416921377182, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.3567614257335663, 0.035316068679094315, 0.3819185495376587, 0.10469090938568115, 0.3454773426055908, 0.09596268832683563, 0.3821227550506592, 0.17425164580345154, 0.40528857707977295, 0.1745157092809677, 0.10956539213657379, 0.5078453421592712, 0.0026470222510397434, 0.016186503693461418, 0.08932095021009445, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.330766886472702, 0.039845019578933716, 0.6981685757637024, 0.09713104367256165, 0.8411048650741577, 0.16356231272220612, 0.3630223274230957, 0.1627381145954132, 0.6954487562179565, 0.17326875030994415, 0.1752558946609497, 0.24479816854000092, 0.026946308091282845, 0.016200177371501923, 0.06702017039060593, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.07683827728033066, 0.07034450024366379, 0.21707428991794586, 0.2902449369430542, 0.1834353357553482, 0.01726321130990982, 0.13144701719284058, 0.005189047660678625, 0.150242418050766, 0.1182665303349495, 0.4041094183921814, 0.12062898278236389, 0.05959685891866684, 0.1186181977391243, 0.1283060759305954, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.005987181328237057, 0.0011158415582031012, 0.0026756690349429846, 0.0011391430161893368, 0.0021053741220384836, 0.0005449134623631835, 0.0017384873935952783, 0.000736464629881084, 0.00014482461847364902, 0.0008784460369497538, 0.0008941806154325604, 0.0009559267782606184, 0.00015614555741194636, 0.00044419756159186363, 0.16329224407672882, 0.125, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.3448674976825714, 0.07203025370836258, 0.011963781900703907, 0.012941744178533554, 0.011539866216480732, 0.003333584638312459, 0.005511423572897911, 0.0016478801844641566, 0.003020848147571087, 0.006189296022057533, 0.0020935258362442255, 0.00048376841004937887, 8.994764357339591e-05, 0.00040787423495203257, 0.2113737165927887, 0.1305680274963379, 0.02726716920733452, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.44219815731048584, 0.8124432563781738, 0.1900549679994583, 0.3808274269104004, 0.045300956815481186, 0.024617541581392288, 0.0172295980155468, 0.03488133102655411, 0.004235385917127132, 0.05999733507633209, 0.03787413239479065, 0.0011567235924303532, 0.0017442036187276244, 0.008845857344567776, 0.004224383272230625, 0.002169837476685643, 0.0032534021884202957, 0.5694547891616821, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.07874103635549545, 0.02866651676595211, 0.3287397623062134, 0.27984437346458435, 0.10563887655735016, 0.003691220423206687, 0.005916049238294363, 0.0007406381191685796, 0.0005066083394922316, 0.0481056272983551, 0.029072491452097893, 0.000652547983918339, 0.0003529583918862045, 0.0009863339364528656, 0.002192106796428561, 0.1568225622177124, 0.12336109578609467, 0.028200775384902954, 0.03890102356672287, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.030638281255960464, 0.02597089111804962, 0.6577842831611633, 0.16596756875514984, 0.48041173815727234, 0.6114144921302795, 0.028207998722791672, 0.053615398705005646, 0.1417267620563507, 0.03454216569662094, 0.023575417697429657, 0.004873087164014578, 0.0009616028983145952, 0.00223313900642097, 0.0011337294708937407, 0.008017625659704208, 0.013223886489868164, 0.04581261798739433, 0.017950134351849556, 0.8790656328201294, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.29477018117904663, 0.14754106104373932, 0.8534399271011353, 0.9182198643684387, 0.6083860993385315, 0.9389832019805908, 0.12579986453056335, 0.03590020909905434, 0.012173496186733246, 0.16479530930519104, 0.15366923809051514, 0.0035958383232355118, 0.002988115418702364, 0.026292480528354645, 0.0003885648038703948, 0.08130903542041779, 0.2643316090106964, 0.5756329894065857, 0.29882851243019104, 0.31516125798225403, 0.09644471108913422, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.2897806465625763, 0.01695333980023861, 0.6714832782745361, 0.4471692144870758, 0.24303969740867615, 0.15563154220581055, 0.008645682595670223, 0.0004950988804921508, 0.0001695932005532086, 0.13566477596759796, 0.030448369681835175, 0.00021736785129178315, 9.297585347667336e-05, 0.0014399208594113588, 5.083655923954211e-05, 0.20484277606010437, 0.3443664610385895, 0.0019387316424399614, 0.017399819567799568, 0.0004214652581140399, 0.00013534165918827057, 0.01563790813088417, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.1102917492389679, 0.0027466323226690292, 0.13646264374256134, 0.07094646990299225, 0.17040857672691345, 0.6033481955528259, 0.41631338000297546, 0.013031017035245895, 0.00012492973473854363, 0.005976412910968065, 0.0002816450723912567, 4.682707003667019e-05, 0.00021861463028471917, 0.00019605428678914905, 0.001022772048600018, 0.1571786254644394, 0.5643889307975769, 0.13441002368927002, 0.09036820381879807, 0.02947377972304821, 0.015878956764936447, 0.022048691287636757, 0.14189693331718445, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.7042187452316284, 0.49455204606056213, 0.43194010853767395, 0.7080989480018616, 0.382207989692688, 0.06800723820924759, 0.48792970180511475, 0.12651333212852478, 0.0012585417134687304, 0.07895761728286743, 0.01729964278638363, 0.0006471746601164341, 0.00013743228919338435, 0.00039039706462062895, 0.00010207234299741685, 0.005826869048178196, 0.13292454183101654, 0.00521356426179409, 0.005004087463021278, 0.10703893005847931, 0.26877719163894653, 0.1785666048526764, 0.23197543621063232, 0.007970587350428104, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.5233215093612671, 0.7835124135017395, 0.3596530258655548, 0.5502080917358398, 0.589034378528595, 0.24138878285884857, 0.4714515507221222, 0.13250088691711426, 0.08884716778993607, 0.06473898142576218, 0.12478159368038177, 0.001717525301501155, 0.01358798798173666, 0.004862584639340639, 0.0004225081647746265, 0.03136341646313667, 0.08873608708381653, 0.009185479953885078, 0.03043411858379841, 0.3010490834712982, 0.36070317029953003, 0.178965762257576, 0.21872122585773468, 0.005464768502861261, 0.06020791083574295, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.0975094586610794, 0.14095744490623474, 0.009511731564998627, 0.03128954395651817, 0.01951521448791027, 0.0017430862644687295, 0.033708807080984116, 0.009512575343251228, 0.3042309582233429, 0.0025639990344643593, 0.0006334132049232721, 2.5987004846683703e-05, 0.0001574041525600478, 1.1997842193522956e-05, 1.5690195141360164e-05, 0.07854610681533813, 0.03772095590829849, 0.016643106937408447, 0.02832828275859356, 0.0785825327038765, 0.09336084127426147, 0.24177083373069763, 0.2718014717102051, 0.12932275235652924, 0.08437053114175797, 0.24188947677612305, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.536220133304596, 0.12877297401428223, 0.013534938916563988, 0.13534405827522278, 0.015604051761329174, 0.0035537974908947945, 0.02344023622572422, 0.008398037403821945, 0.2580391466617584, 0.2587551474571228, 0.014949243515729904, 0.0010696486569941044, 0.00046315763029269874, 0.0013398011215031147, 8.422375685768202e-05, 0.17239268124103546, 0.029533302411437035, 0.030515655875205994, 0.026403654366731644, 0.05037287250161171, 0.13986584544181824, 0.11416076123714447, 0.08228978514671326, 0.26975753903388977, 0.020502708852291107, 0.030797043815255165, 0.006723156664520502, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.028944578021764755, 0.013114584609866142, 0.0438210591673851, 0.05079193785786629, 0.03694206848740578, 0.0008442872785963118, 0.0030779552180320024, 0.002579997293651104, 0.01023491844534874, 0.21445545554161072, 0.2806929349899292, 0.00855539832264185, 0.03333647921681404, 0.06091907247900963, 1.9560096916393377e-05, 0.35662412643432617, 0.005917226430028677, 0.00044432797585614026, 0.00022813511895947158, 0.0073361690156161785, 0.0027237480971962214, 0.007987208664417267, 0.021625559777021408, 0.010472757741808891, 0.0008755659800954163, 0.012584702111780643, 0.000526397256180644, 0.01033733133226633, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.0058769844472408295, 0.06350620836019516, 0.003568005282431841, 0.0076079596765339375, 0.0037217612843960524, 0.004286385141313076, 0.03584115207195282, 0.14617407321929932, 0.0030082303564995527, 0.12143123894929886, 0.0793885663151741, 0.1555183082818985, 0.14442139863967896, 0.29275521636009216, 7.129996811272576e-05, 0.189227893948555, 0.01606086827814579, 0.0030457540415227413, 0.005861388053745031, 0.04963670298457146, 0.004091562703251839, 0.01225967425853014, 0.037419673055410385, 0.01020084973424673, 0.003108290024101734, 0.01512740459293127, 0.006679146084934473, 0.014098022133111954, 0.03816642239689827, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.034930020570755005, 0.09419079124927521, 0.0127689428627491, 0.008763227611780167, 0.0065171802416443825, 0.008632887154817581, 0.02612082101404667, 0.02043459191918373, 0.0836663544178009, 0.5329904556274414, 0.3228733241558075, 0.7184357047080994, 0.5793755650520325, 0.783859133720398, 0.0001531920424895361, 0.00965302623808384, 0.0035168000031262636, 0.03902876377105713, 0.0158648993819952, 0.32648226618766785, 0.0038036927580833435, 0.002248003613203764, 0.002372291637584567, 0.014672092162072659, 0.007728067692369223, 0.022481968626379967, 0.028911879286170006, 0.044244468212127686, 0.021532919257879257, 0.6417658925056458, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.0009532110998407006, 0.0024861039128154516, 7.189704774646088e-05, 0.00014637503772974014, 2.8552024105010787e-06, 3.0342853278853e-05, 0.0007709002820774913, 0.0005337693146429956, 6.919851330167148e-06, 0.02619163505733013, 0.02381032705307007, 0.008668542839586735, 0.39639002084732056, 0.7824769616127014, 1.1539431170604075e-06, 0.037641312927007675, 0.005557402968406677, 0.0006393054500222206, 0.006437606643885374, 0.007460788358002901, 0.0009530181414447725, 0.0016025539953261614, 0.0067516821436584, 0.02322007343173027, 0.018459537997841835, 0.011051125824451447, 0.006488891318440437, 0.04039585590362549, 0.18200218677520752, 0.0006002468289807439, 0.6243939995765686, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.02785377763211727, 0.15845024585723877, 0.19323119521141052, 0.06543393433094025, 0.014044036157429218, 0.040286585688591, 0.07583826035261154, 0.6567350029945374, 0.004159754142165184, 0.35265031456947327, 0.6287637948989868, 0.12951745092868805, 0.32439297437667847, 0.653313934803009, 0.0008144593448378146, 0.01615065336227417, 0.01699231006205082, 0.00012957912986166775, 0.016060354188084602, 0.0006264564581215382, 0.0012908404460176826, 0.002684527076780796, 0.027531128376722336, 0.015566377900540829, 0.003692139405757189, 0.5753727555274963, 0.5145941376686096, 0.03750383481383324, 0.009545800276100636, 0.0034461882896721363, 0.005381980445235968, 0.00046628122800029814, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.02927210181951523, 0.04805546626448631, 0.295967698097229, 0.060625556856393814, 0.014990724623203278, 0.10397231578826904, 0.12186732143163681, 0.5237559080123901, 0.0203724168241024, 0.43874940276145935, 0.4409005343914032, 0.09095493704080582, 0.5531511306762695, 0.5263633728027344, 0.0002321143983863294, 0.021861553192138672, 0.01695878431200981, 0.0018149337265640497, 0.015764223411679268, 0.007719711866229773, 0.0034752548672258854, 0.007653116714209318, 0.03472340479493141, 0.038436826318502426, 0.014262136071920395, 0.8426622748374939, 0.36256304383277893, 0.21876515448093414, 0.019672129303216934, 0.020847154781222343, 0.00781619269400835, 0.005409067030996084, 0.16073459386825562, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.5664732456207275, 0.02422192506492138, 0.3148367702960968, 0.37531769275665283, 0.06290365755558014, 0.02708868682384491, 0.03764869272708893, 0.06476183980703354, 0.09221415221691132, 0.3172641098499298, 0.088014617562294, 0.02202794700860977, 0.004314645659178495, 0.0619816817343235, 0.0017959593096747994, 0.18507197499275208, 0.027911728248000145, 0.014699580147862434, 0.025536103174090385, 0.014524195343255997, 0.045023027807474136, 0.031167738139629364, 0.07539253681898117, 0.22652071714401245, 0.011904416605830193, 0.08752688765525818, 0.03955431655049324, 0.2908211648464203, 0.03612781688570976, 0.00514488760381937, 0.017019467428326607, 0.07116629183292389, 0.03509910777211189, 0.02026083506643772, NaN, NaN, NaN, NaN, NaN, NaN], [0.04828598350286484, 0.01127469539642334, 0.1758044958114624, 0.0725238099694252, 0.01880812831223011, 0.003422890789806843, 0.0039800796657800674, 0.008112750947475433, 0.0007020575576461852, 0.0960424467921257, 0.3098883628845215, 0.03193678706884384, 0.03351299837231636, 0.2577627897262573, 0.0005041947006247938, 0.40259334444999695, 0.005078054964542389, 0.00017122419376391917, 9.21270766411908e-05, 0.002624903805553913, 0.0009363252320326865, 0.00360113475471735, 0.01331485528498888, 0.008243494667112827, 0.0007176694343797863, 0.019634194672107697, 0.002027983544394374, 0.02349759265780449, 0.030203014612197876, 0.000993669149465859, 0.0008422310347668827, 0.013102295808494091, 0.025159381330013275, 0.0006507099606096745, 0.018182074651122093, NaN, NaN, NaN, NaN, NaN], [0.008833246305584908, 0.03231082111597061, 0.009648996405303478, 0.01135926228016615, 0.004257569555193186, 0.002696139505133033, 0.026390861719846725, 0.07894735038280487, 0.0002903220884036273, 0.05877671018242836, 0.0971919596195221, 0.32856324315071106, 0.08294347673654556, 0.6861463785171509, 0.00047716210247017443, 0.2579963207244873, 0.021157346665859222, 0.002921733073890209, 0.006211739499121904, 0.031850416213274, 0.0022005264181643724, 0.0070661455392837524, 0.036871425807476044, 0.012320333160459995, 0.005331193562597036, 0.033889420330524445, 0.020235266536474228, 0.07458563148975372, 0.1398555487394333, 0.008059950545430183, 0.0405682735145092, 0.03368399292230606, 0.012085597030818462, 0.010676471516489983, 0.03411625698208809, 0.08152885735034943, NaN, NaN, NaN, NaN], [0.020260397344827652, 0.03928471356630325, 0.012783887796103954, 0.0091601787135005, 0.005565040744841099, 0.007968534715473652, 0.020862603560090065, 0.012279938906431198, 0.01832268387079239, 0.3204420506954193, 0.28696081042289734, 0.7937509417533875, 0.6314787864685059, 0.8277974724769592, 0.00014348741387948394, 0.005019576288759708, 0.001437423750758171, 0.014701779931783676, 0.005876661743968725, 0.15098156034946442, 0.001037455745972693, 0.0006782425916753709, 0.0010664333822205663, 0.006170186679810286, 0.004750464111566544, 0.015587885864078999, 0.020612932741642, 0.024904461577534676, 0.027292385697364807, 0.6522603631019592, 0.02780178189277649, 0.009980881586670876, 0.010863273404538631, 0.016993993893265724, 0.026612548157572746, 0.013426730409264565, 0.6643192768096924, NaN, NaN, NaN], [0.00497927563264966, 0.011739314533770084, 0.0009416648535989225, 0.0009133343119174242, 2.0598932678694837e-05, 0.00024278588534798473, 0.00463896244764328, 0.0027787971775978804, 1.9694551156135276e-05, 0.026842234656214714, 0.05824153125286102, 0.023767979815602303, 0.7019069194793701, 0.8979114294052124, 1.5536308637820184e-05, 0.023952102288603783, 0.0025056565646082163, 0.0002975048264488578, 0.0031560298521071672, 0.002087814500555396, 0.00019765450269915164, 0.00028781042783521116, 0.0023521913681179285, 0.009429593570530415, 0.010675383731722832, 0.013774069957435131, 0.012372920289635658, 0.030660077929496765, 0.3810364305973053, 0.0006224916432984173, 0.6039706468582153, 0.2701583206653595, 0.012816790491342545, 0.005745226051658392, 0.052403513342142105, 0.18411211669445038, 0.00043697847286239266, 0.6234135627746582, NaN, NaN], [0.06832221150398254, 0.18812543153762817, 0.5426309108734131, 0.237625390291214, 0.041615329682826996, 0.11611851304769516, 0.16301436722278595, 0.827357828617096, 0.011619587428867817, 0.35340800881385803, 0.8248108625411987, 0.22083298861980438, 0.4978465139865875, 0.8379470109939575, 0.008811386302113533, 0.007988094352185726, 0.006256349850445986, 4.065780740347691e-05, 0.006692530121654272, 0.00010113247117260471, 0.0002641561150085181, 0.0006015493418090045, 0.009669815190136433, 0.00486318813636899, 0.0012557843001559377, 0.43231210112571716, 0.35852983593940735, 0.01959061808884144, 0.007567983586341143, 0.0019125458784401417, 0.00857639778405428, 0.0005027590086683631, 0.41286540031433105, 0.4292365312576294, 0.01753525249660015, 0.005813234485685825, 0.00216498039662838, 0.003382693277671933, 0.00027526391204446554, NaN], [0.7676634788513184, 0.8615484237670898, 0.768317461013794, 0.9594964981079102, 0.36958935856819153, 0.4649639129638672, 0.5634418725967407, 0.8043064475059509, 0.6601962447166443, 0.9397303462028503, 0.8348119258880615, 0.9867405295372009, 0.7646960020065308, 0.8154686689376831, 0.03640103340148926, 0.1387476772069931, 0.027318276464939117, 0.00785337295383215, 0.019197843968868256, 0.013794281519949436, 0.020801816135644913, 0.013009469024837017, 0.07068510353565216, 0.020734209567308426, 0.024748992174863815, 0.04673967882990837, 0.025586238130927086, 0.01648368127644062, 0.06557000428438187, 0.022920427843928337, 0.013843921944499016, 0.04100487753748894, 0.0375630147755146, 0.023956134915351868, 0.018727701157331467, 0.05957711860537529, 0.020177751779556274, 0.007389482576400042, 0.027843382209539413, 0.025224220007658005]], [[0.06827192008495331, 0.0036808219738304615, 0.005701950751245022, 0.005157816223800182, 0.003777393838390708, 0.024757172912359238, 0.0020165019668638706, 0.010267351754009724, 0.013163687661290169, 0.001690453034825623, 0.00837681908160448, 0.00522418599575758, 0.061038240790367126, 0.015438525006175041, 0.325132817029953, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.7422951459884644, 0.028774140402674675, 0.06394203752279282, 0.00887901522219181, 0.04345611855387688, 0.027670713141560555, 0.0295904241502285, 0.01398912351578474, 0.025535697117447853, 0.02094031311571598, 0.022182827815413475, 0.009663421660661697, 0.049684178084135056, 0.026225639507174492, 0.13834334909915924, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.20897099375724792, 0.21868035197257996, 0.23815643787384033, 0.005872054491192102, 0.0010661164997145534, 0.0017293300479650497, 0.00042713910806924105, 0.002609806600958109, 0.016046296805143356, 0.009100147522985935, 0.014420107938349247, 0.0022624030243605375, 0.010553905740380287, 0.007111164275556803, 0.25332581996917725, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.2508500814437866, 0.20390872657299042, 0.7329782247543335, 0.07117453217506409, 0.016424261033535004, 0.021444672718644142, 0.001510130357928574, 0.004098558332771063, 0.0484151765704155, 0.02061472274363041, 0.001126835006289184, 0.0022107160184532404, 0.007578131277114153, 0.004504901356995106, 0.1403624713420868, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.27370113134384155, 0.8174626231193542, 0.7193068861961365, 0.7076587677001953, 0.07771007716655731, 0.01620337925851345, 0.004001453518867493, 0.004182097036391497, 0.03681829199194908, 0.09453201293945312, 0.026799198240041733, 0.006044679321348667, 0.03725922852754593, 0.016391301527619362, 0.04474738612771034, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.3889567255973816, 0.4487122893333435, 0.5870586037635803, 0.6609426140785217, 0.6319714188575745, 0.10676700621843338, 0.009257740341126919, 0.0017087672604247928, 0.027955975383520126, 0.07590407133102417, 0.006841681431978941, 0.08621303737163544, 0.05063363164663315, 0.016846608370542526, 0.05719457566738129, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.00991373136639595, 0.0983041524887085, 0.15667210519313812, 0.19277995824813843, 0.5809133052825928, 0.7996482253074646, 0.06316149979829788, 0.004939877428114414, 0.023352928459644318, 0.010926214046776295, 0.008795071393251419, 0.006998055148869753, 0.0765714943408966, 0.006783204153180122, 0.05886436253786087, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.07887525111436844, 0.017153050750494003, 0.2216421663761139, 0.13068468868732452, 0.5295770764350891, 0.35302138328552246, 0.8493326902389526, 0.04265422001481056, 0.052519019693136215, 0.027357611805200577, 0.01357424259185791, 0.004279646556824446, 0.026089098304510117, 0.04089489206671715, 0.014124121516942978, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.03465811163187027, 0.15351061522960663, 0.2825109362602234, 0.08174889534711838, 0.19755861163139343, 0.5825939774513245, 0.37084007263183594, 0.7892780900001526, 0.1287456750869751, 0.006381133571267128, 0.001940184272825718, 0.00047384126810356975, 0.011903955601155758, 0.003972942009568214, 0.06710142642259598, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.013788340613245964, 0.006632686126977205, 0.02207767777144909, 0.0785517543554306, 0.014113685116171837, 0.048156753182411194, 0.1944313496351242, 0.22155866026878357, 0.49656373262405396, 0.009422117844223976, 0.004702835343778133, 0.0007582302205264568, 0.00014129001647233963, 0.00033574484405107796, 0.23994654417037964, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.00469209672883153, 0.015491061843931675, 0.035103749483823776, 0.009631682187318802, 0.008573818951845169, 0.051444172859191895, 0.04315423220396042, 0.05495374649763107, 0.6859460473060608, 0.5370080471038818, 0.06784479320049286, 0.004556083586066961, 0.001035997993312776, 0.0006345660076476634, 0.13974453508853912, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.02668480947613716, 0.016245348379015923, 0.01112398225814104, 0.008507933467626572, 0.02067524567246437, 0.17763113975524902, 0.05662769451737404, 0.04544723033905029, 0.7948054671287537, 0.7384940385818481, 0.5224500298500061, 0.1060851439833641, 0.014122114516794682, 0.0019289307529106736, 0.08371670544147491, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.02394592948257923, 0.04371663182973862, 0.028385786339640617, 0.007640721742063761, 0.014576996676623821, 0.08887659758329391, 0.017377078533172607, 0.020801657810807228, 0.187345951795578, 0.5047414302825928, 0.6342922449111938, 0.3672487437725067, 0.04719087854027748, 0.10966072231531143, 0.08543073385953903, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.009629062376916409, 0.020042795687913895, 0.006009343545883894, 0.001406975439749658, 0.0026742229238152504, 0.006072318647056818, 0.006495587062090635, 0.0032924923580139875, 0.034326668828725815, 0.5998041033744812, 0.7456773519515991, 0.7204623818397522, 0.012111457996070385, 0.018825965002179146, 0.008305574767291546, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.08114123344421387, 0.05478224158287048, 0.11802507936954498, 0.1980995535850525, 0.15338915586471558, 0.11414031684398651, 0.06528255343437195, 0.04494854062795639, 0.26375874876976013, 0.30061599612236023, 0.26960447430610657, 0.5329554677009583, 0.4288364350795746, 0.12292250245809555, 0.12395624816417694, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.5194346308708191, 0.08715501427650452, 0.09860441088676453, 0.08100719004869461, 0.11848669499158859, 0.14280925691127777, 0.19592297077178955, 0.1196337640285492, 0.2793996334075928, 0.0691760703921318, 0.09539081901311874, 0.05545644089579582, 0.02620256133377552, 0.03735822066664696, 0.09928011149168015, 0.125, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.002687783446162939, 0.2585922181606293, 0.004556892905384302, 0.0005560630816034973, 0.0013625096762552857, 0.000865808455273509, 2.095674426527694e-05, 0.013363445177674294, 1.4331720194604713e-05, 0.00023233501997310668, 0.013212678954005241, 0.00027388104354031384, 2.99917119264137e-05, 5.10126119479537e-05, 0.0653858631849289, 0.1319446712732315, 0.003103907685726881, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.010489544831216335, 0.001751396106556058, 0.2775154411792755, 0.0030420231632888317, 0.08156438916921616, 0.0006471106316894293, 1.7804295566747896e-05, 0.00014657371502835304, 0.00035265504266135395, 0.00129506376106292, 0.018553601577878, 0.0019669390749186277, 0.009056665003299713, 0.05091148242354393, 0.1541917622089386, 0.004627853631973267, 0.8189921975135803, 0.006355744786560535, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.0025869093369692564, 0.008571458049118519, 0.38431695103645325, 0.030530055984854698, 0.03365315869450569, 0.005854337941855192, 0.00010941662185359746, 4.1041937947738916e-05, 0.000364075880497694, 0.0011989381164312363, 0.014197473414242268, 0.0010815636487677693, 0.0004893331206403673, 0.0013785242335870862, 0.011478900909423828, 0.0004822930786758661, 0.5574855208396912, 0.0058120423927903175, 0.014268792234361172, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.20589935779571533, 0.03613102436065674, 0.009011336602270603, 0.09399610757827759, 0.042497485876083374, 0.000576009857468307, 0.0040712482295930386, 0.00162220629863441, 0.00015305644774343818, 0.0034409475047141314, 0.025435233488678932, 2.175084773625713e-05, 1.0188268788624555e-05, 5.634217450278811e-05, 0.160919189453125, 0.15055440366268158, 0.0014966451562941074, 0.1733904629945755, 0.05038055405020714, 0.0057296124286949635, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.00994176883250475, 0.015379102900624275, 0.000435269670560956, 0.004355194512754679, 0.002023787936195731, 4.86412636746536e-06, 0.0007220985717140138, 0.0004895065212622285, 0.0005591813242062926, 0.009127096273005009, 0.023014724254608154, 0.0003639610658865422, 3.1703839340480044e-05, 0.00036040451959706843, 0.1469942033290863, 0.1304439753293991, 0.00022060537594370544, 0.03428095951676369, 0.0157721396535635, 0.20856629312038422, 0.2746620774269104, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.31647789478302, 0.5689504742622375, 0.010991040617227554, 0.29046669602394104, 0.008814695291221142, 0.008600234054028988, 0.094898521900177, 0.02089405618607998, 0.005384301766753197, 0.1224634200334549, 0.2525540888309479, 0.011421876028180122, 9.89354812190868e-05, 0.00020726426737383008, 0.3419104218482971, 0.017820989713072777, 1.0936159014818259e-05, 0.0006241680239327252, 4.3406893382780254e-05, 0.2565733790397644, 0.5255003571510315, 0.040596142411231995, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.006757077760994434, 0.1354868859052658, 0.002759847091510892, 0.009205225855112076, 0.0038083188701421022, 0.0014255000278353691, 0.0007299972930923104, 0.2051592320203781, 0.00020230394147802144, 0.001623967313207686, 0.006681961473077536, 0.0021689198911190033, 5.557909025810659e-05, 0.000162289768923074, 0.20840437710285187, 0.2143511176109314, 3.818454570136964e-05, 0.0006476931739598513, 0.00012842394062317908, 0.007853559218347073, 0.008102592080831528, 0.0005345920799300075, 0.00793861411511898, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.010027364827692509, 0.02789497748017311, 0.0041139991953969, 0.012661347165703773, 0.0013435317669063807, 0.0034407242201268673, 0.0064836894161999226, 0.007366063538938761, 0.29601985216140747, 0.053567804396152496, 0.040060218423604965, 0.004607491660863161, 0.00018677859043236822, 3.186250978615135e-05, 0.10952453315258026, 0.00014670012751594186, 7.536429620813578e-06, 0.0001294321846216917, 0.00024457855033688247, 0.00022483686916530132, 0.001284220488741994, 0.0014163334853947163, 0.5552030801773071, 0.006061996798962355, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.19971387088298798, 0.012958711944520473, 0.001638519112020731, 0.17775660753250122, 0.0022716999519616365, 0.03685721755027771, 0.06948257982730865, 0.005452410783618689, 0.037147630006074905, 0.19678887724876404, 0.21911752223968506, 0.02466990426182747, 0.0004891769494861364, 6.33890085737221e-05, 0.21250228583812714, 0.09223808348178864, 0.004348577931523323, 0.013163902796804905, 0.018216131255030632, 0.035016678273677826, 0.11075899004936218, 0.1728493720293045, 0.19621391594409943, 0.029301786795258522, 0.46166056394577026, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.05692211166024208, 0.036700569093227386, 0.0015533106634393334, 0.01848980039358139, 0.002404581755399704, 0.008354752324521542, 0.023693444207310677, 0.02836945652961731, 0.29948922991752625, 0.005321406293660402, 0.0022319734562188387, 0.0005214664852246642, 0.00019869217067025602, 5.8369230828247964e-05, 0.008838840760290623, 0.11309938877820969, 0.004489036742597818, 0.0485633909702301, 0.021462395787239075, 0.4192940890789032, 0.26214849948883057, 0.22032421827316284, 0.0067114257253706455, 0.010406548157334328, 0.11692964285612106, 0.23004111647605896, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.011123275384306908, 0.003955129534006119, 0.0015235289465636015, 0.011223106645047665, 0.002481319010257721, 0.000903434120118618, 0.0006720115779899061, 0.00024289102293550968, 0.010115177370607853, 0.26232361793518066, 0.014199022203683853, 0.0005582758458331227, 0.0001542939426144585, 5.357913687475957e-05, 0.050008371472358704, 0.14281870424747467, 0.000545236689504236, 0.003893920686095953, 0.0005153689999133348, 0.01790653169155121, 0.004868220537900925, 0.0031487985979765654, 0.0011714915744960308, 0.0043698386289179325, 0.020373020321130753, 0.02358497679233551, 0.2682037353515625, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.025191567838191986, 0.009952094405889511, 0.015023785643279552, 0.0893990620970726, 0.006299919448792934, 0.0077370950020849705, 0.0004422276106197387, 0.00010742250742623582, 0.001807618304155767, 0.052116382867097855, 0.33116668462753296, 0.0029348258394747972, 0.004942082799971104, 0.0017646296182647347, 0.009777115657925606, 0.09794370085000992, 0.0018320194212719798, 0.000285644200630486, 3.260145604144782e-05, 0.00041393720312044024, 0.0043053096160292625, 0.002047628629952669, 0.0003047001373488456, 0.002447759034112096, 0.0016152235912159085, 0.024524936452507973, 0.29461416602134705, 0.014563476666808128, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.12133541703224182, 0.0033125760965049267, 0.008441481739282608, 0.0257105715572834, 0.005432062782347202, 0.020603680983185768, 0.0008238950395025313, 0.00019463927310425788, 0.0001117472565965727, 0.011082900688052177, 0.4118730425834656, 0.0024717452470213175, 0.21560189127922058, 0.015253315679728985, 0.03452993184328079, 0.13817672431468964, 0.0034516772720962763, 0.002911344636231661, 0.0003800573176704347, 0.001462712767533958, 0.001961951842531562, 0.0040230052545666695, 0.0023086154833436012, 0.002483226591721177, 0.028553131967782974, 0.014239847660064697, 0.18359807133674622, 0.09542248398065567, 0.2067933827638626, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.00568122835829854, 0.003583817044273019, 0.0009402501164004207, 0.0034319525584578514, 0.014700439758598804, 0.00014027200813870877, 5.928567406954244e-05, 0.0005310353590175509, 0.001004774123430252, 0.00433507701382041, 0.003991644363850355, 0.0015378128737211227, 6.231402221601456e-05, 0.02625701017677784, 0.15481357276439667, 0.14011409878730774, 0.01466476172208786, 0.09487155824899673, 0.03769487887620926, 0.062972791492939, 0.003495296463370323, 0.0004466120735742152, 0.0044098952785134315, 0.056031279265880585, 0.12585759162902832, 0.04736572876572609, 0.02727479301393032, 0.06542934477329254, 0.563940703868866, 0.024195805191993713, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.00503728911280632, 0.004739185329526663, 0.021364033222198486, 0.04603096470236778, 0.004565324168652296, 0.021244995296001434, 0.07592181116342545, 0.027910754084587097, 0.008603491820394993, 0.004941265098750591, 0.03103908710181713, 0.035909827798604965, 0.01818632334470749, 0.04406380280852318, 0.17931725084781647, 0.05395817384123802, 6.747527368133888e-05, 0.0018676340114325285, 0.0002809480356518179, 0.03275269269943237, 0.005758063402026892, 9.199039777740836e-05, 0.00011598093260545284, 0.0015754709020256996, 0.026104740798473358, 0.009686414152383804, 0.001081737456843257, 0.0017741151386871934, 0.49180474877357483, 0.007121484261006117, 0.013531914912164211, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.21416018903255463, 0.005411786492913961, 0.02111194096505642, 0.07001130282878876, 0.04736214876174927, 0.09187527745962143, 0.1399366855621338, 0.030981194227933884, 0.02342112548649311, 0.07424263656139374, 0.02716991677880287, 0.5710572600364685, 0.007255392149090767, 0.005560784600675106, 0.054831843823194504, 0.03839295729994774, 0.0002068357716780156, 0.006204192526638508, 0.0054313126020133495, 0.011207946576178074, 0.0013116636546328664, 0.008276019245386124, 0.002269806107506156, 0.004080863669514656, 0.01488969475030899, 0.0006726597202941775, 0.009391524828970432, 0.039596475660800934, 0.19840312004089355, 0.043704546988010406, 0.31202515959739685, 0.23529505729675293, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.3339015245437622, 0.03176174685359001, 0.25991618633270264, 0.31748515367507935, 0.17923809587955475, 0.2977932095527649, 0.14185847342014313, 0.09826549887657166, 0.4168005883693695, 0.09961694478988647, 0.1390676498413086, 0.191667839884758, 0.0443519689142704, 0.10075851529836655, 0.08045557886362076, 0.07469534128904343, 0.001304430770687759, 0.0239309910684824, 0.008060658350586891, 0.021029237657785416, 0.015191669575870037, 0.006979105528444052, 0.0016427322989329696, 0.002132130553945899, 0.015241370536386967, 0.0018563566263765097, 0.035101406276226044, 0.06515936553478241, 0.27313047647476196, 0.10352547466754913, 0.2570805549621582, 0.45083746314048767, 0.1295340657234192, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.018510108813643456, 0.0015040059806779027, 0.011199833825230598, 0.021222928538918495, 0.02421635016798973, 0.004175371024757624, 0.0007807075162418187, 0.0005349562270566821, 0.0038052168674767017, 0.3727143108844757, 0.022828511893749237, 0.01009275484830141, 0.0012628438416868448, 0.0009096930734813213, 0.10904579609632492, 0.19253067672252655, 0.0008209676598198712, 0.004669400863349438, 0.00047802351764403284, 0.013135433197021484, 0.0034620855003595352, 0.0016354827675968409, 0.0008273401763290167, 0.0018895546672865748, 0.009773027151823044, 0.006215384230017662, 0.2356690764427185, 0.01036232803016901, 0.06144833192229271, 0.008870624005794525, 0.024212215095758438, 0.008509873412549496, 0.01347219105809927, 0.35532569885253906, NaN, NaN, NaN, NaN, NaN, NaN], [0.05896773934364319, 0.023542853072285652, 0.0776505172252655, 0.15385140478610992, 0.011508575640618801, 0.0939982458949089, 0.0018089915392920375, 0.0003290986060164869, 0.0005636389250867069, 0.029514340683817863, 0.35146546363830566, 0.007090898230671883, 0.012099701911211014, 0.006742698606103659, 0.052738532423973083, 0.10910779982805252, 0.002221200615167618, 0.0001436042075511068, 1.1848528629343491e-05, 0.0001887700636871159, 0.0020721519831568003, 0.0009632316650822759, 0.00014056939107831568, 0.0007320817094296217, 0.0006829273188486695, 0.007395991589874029, 0.2889891564846039, 0.007074101362377405, 0.0002627878566272557, 0.004363438580185175, 0.0018575063440948725, 0.00557676050812006, 0.012322820723056793, 0.31134024262428284, 0.027276715263724327, NaN, NaN, NaN, NaN, NaN], [0.18205131590366364, 0.00472951028496027, 0.03192766383290291, 0.059333182871341705, 0.028221452608704567, 0.033883631229400635, 0.00131422549020499, 0.0001085989861167036, 5.632251122733578e-05, 0.004554648417979479, 0.2950275242328644, 0.0014449548907577991, 0.2329740822315216, 0.0520821250975132, 0.1361607313156128, 0.18170765042304993, 0.003209297079592943, 0.0023912524338811636, 0.00020479358499869704, 0.0009326079743914306, 0.0013757160631939769, 0.0021110770758241415, 0.0008730489062145352, 0.000792569131590426, 0.01825624145567417, 0.0059272306971251965, 0.11984144151210785, 0.05654650926589966, 0.08423373848199844, 0.024963613599538803, 0.027966396883130074, 0.1777324080467224, 0.005578523967415094, 0.14623191952705383, 0.11331525444984436, 0.2157108038663864, NaN, NaN, NaN, NaN], [0.0063572716899216175, 0.002779513830319047, 0.0009721479145810008, 0.0035897656343877316, 0.019835324957966805, 0.00021187934908084571, 8.435463678324595e-05, 0.00043589723645709455, 0.0004945950931869447, 0.004414541646838188, 0.0027602717746049166, 0.0008482423145323992, 5.171148222871125e-05, 0.021799515932798386, 0.15211130678653717, 0.1515214741230011, 0.008395697921514511, 0.0657893642783165, 0.019086696207523346, 0.05097401514649391, 0.0016111076110973954, 0.00021851839846931398, 0.002003778237849474, 0.01669292151927948, 0.06321260333061218, 0.015100682154297829, 0.010209205560386181, 0.015906400978565216, 0.30131736397743225, 0.012282183393836021, 0.09666845202445984, 0.00808996893465519, 0.03798958286643028, 0.013879657723009586, 0.047733187675476074, 0.5371345281600952, 0.020763304084539413, NaN, NaN, NaN], [0.005286877974867821, 0.008391096256673336, 0.025823507457971573, 0.030178312212228775, 0.00857502967119217, 0.042816706001758575, 0.07608389109373093, 0.03679429367184639, 0.0067360359244048595, 0.0038807345554232597, 0.03710461035370827, 0.037315309047698975, 0.018847206607460976, 0.0415174663066864, 0.15352587401866913, 0.07945924997329712, 4.7485355025855824e-05, 0.0020416006445884705, 0.00022757358965463936, 0.013386114500463009, 0.001981395063921809, 3.6917605029884726e-05, 2.620528539409861e-05, 0.0003202208608854562, 0.009042860940098763, 0.0030785591807216406, 0.0011855574557557702, 0.0005728560499846935, 0.20002734661102295, 0.00213914574123919, 0.002927121240645647, 0.004968173801898956, 0.0065933396108448505, 0.002585601294413209, 0.002817549044266343, 0.547335147857666, 0.006171087268739939, 0.018697692081332207, NaN, NaN], [0.2992006242275238, 0.008802352473139763, 0.027079692110419273, 0.08564624935388565, 0.11560814827680588, 0.22971339523792267, 0.1826445311307907, 0.033842965960502625, 0.06175734102725983, 0.11205370724201202, 0.04016120731830597, 0.5851526856422424, 0.016921253874897957, 0.011652404442429543, 0.08951538056135178, 0.059381648898124695, 0.00026094831991940737, 0.007586375344544649, 0.006061093881726265, 0.0039266073144972324, 0.0004965912085026503, 0.003665223019197583, 0.0008195870905183256, 0.0014654117403551936, 0.0045553394593298435, 0.00032001128420233727, 0.004615657962858677, 0.017150992527604103, 0.07922492176294327, 0.012805018573999405, 0.1320599913597107, 0.09461667388677597, 0.003555287839844823, 0.019601207226514816, 0.047796737402677536, 0.29085052013397217, 0.04383813217282295, 0.32529252767562866, 0.24933147430419922, NaN], [0.12446854263544083, 0.0009617851465009153, 0.004788657650351524, 0.0008746102685108781, 0.16037316620349884, 0.003065474098548293, 0.0056405095383524895, 0.005250739399343729, 0.05696318671107292, 0.013819074258208275, 0.028642717748880386, 0.0011808956041932106, 0.08446037769317627, 0.03008313849568367, 0.13710428774356842, 0.13618361949920654, 0.0007103006355464458, 0.025071904063224792, 0.004419561009854078, 0.001962232170626521, 0.0023795748129487038, 0.002366183791309595, 0.0003890783409588039, 0.00022811641974840313, 0.0010611300822347403, 0.001608739490620792, 0.028126444667577744, 0.005591525696218014, 0.0024579197634011507, 0.004123267717659473, 0.0409882515668869, 0.010364435613155365, 0.010518459603190422, 0.09771004319190979, 0.037823982536792755, 0.019979961216449738, 0.018303534016013145, 0.22492042183876038, 0.09256016463041306, 0.005498841404914856]], [[0.09139528125524521, 0.1232069656252861, 0.06926427036523819, 0.03596228361129761, 0.08677947521209717, 0.3523865342140198, 0.17220446467399597, 0.3048216700553894, 0.24129998683929443, 0.008230631239712238, 0.012852879241108894, 0.0024019270204007626, 0.003931952640414238, 0.002576343482360244, 0.13348431885242462, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.005495021585375071, 0.009821278043091297, 0.006606503389775753, 0.0009270968730561435, 0.022634856402873993, 0.02637101709842682, 0.03666122257709503, 0.003247066168114543, 0.03138025477528572, 0.0023785934317857027, 0.007012520916759968, 0.0027185468934476376, 0.001623710268177092, 0.009003029204905033, 0.24841202795505524, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.004891206510365009, 0.01856830157339573, 0.01660238206386566, 0.05400720611214638, 0.2678459584712982, 0.21548990905284882, 0.0901486948132515, 0.14165979623794556, 0.4387242794036865, 0.0060303402133286, 0.03774549812078476, 0.022296983748674393, 0.014843892306089401, 0.003844154067337513, 0.0701230987906456, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.009136357344686985, 0.005524215288460255, 0.002000550739467144, 0.004360574297606945, 0.06230698525905609, 0.032116882503032684, 0.14447683095932007, 0.11250873655080795, 0.12456412613391876, 0.017903752624988556, 0.03641437739133835, 0.030236193910241127, 0.03817100450396538, 0.0020203718449920416, 0.24235397577285767, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.011458649300038815, 0.0028747334145009518, 0.0048751854337751865, 0.0034302298445254564, 0.032581884413957596, 0.009492963552474976, 0.29646721482276917, 0.024549754336476326, 0.5199102163314819, 0.07497825473546982, 0.039336495101451874, 0.23366358876228333, 0.2855432629585266, 0.0047793262638151646, 0.131587415933609, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0048281243070960045, 0.014400148764252663, 0.00021499136346392334, 0.00015902110317256302, 0.0008502291166223586, 0.005816742777824402, 0.03721616789698601, 0.31765323877334595, 0.006985681131482124, 9.90723492577672e-05, 0.0015535155544057488, 0.002471775049343705, 0.00966054666787386, 0.002636645222082734, 0.15553238987922668, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.01824354939162731, 0.02838711440563202, 0.0006440957658924162, 0.00040316785452887416, 0.00041587575105950236, 0.0021029487252235413, 0.07766012847423553, 0.3384210765361786, 0.005884509067982435, 0.02229108288884163, 0.02292727865278721, 0.00326070049777627, 0.002748187631368637, 0.004811563994735479, 0.08466839045286179, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0009052195237018168, 0.00028935770387761295, 0.00010135041520697996, 4.4237076508579776e-05, 9.765469440026209e-05, 0.0003226006228942424, 0.0006174442823976278, 0.003764552064239979, 0.001191335148178041, 0.0005841490346938372, 0.001988127361983061, 0.0019700597040355206, 0.0006354944198392332, 0.0011416736524552107, 0.25631290674209595, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.007226317655295134, 0.015471585094928741, 0.027516253292560577, 0.0063530029729008675, 0.015222059562802315, 0.004327190574258566, 0.010739101096987724, 0.0023785619996488094, 0.053105201572179794, 0.0674574077129364, 0.31870341300964355, 0.4986713230609894, 0.027042971923947334, 0.0736011192202568, 0.116986483335495, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.015794623643159866, 0.009404269978404045, 0.017993446439504623, 0.003823975333943963, 0.004969433881342411, 0.03679484874010086, 0.04242165759205818, 0.017222637310624123, 0.1201641708612442, 0.016131659969687462, 0.3518509864807129, 0.3061373829841614, 0.0458594486117363, 0.15943044424057007, 0.17968055605888367, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.006380036938935518, 0.028477374464273453, 0.006851766724139452, 0.005024573765695095, 0.02579522877931595, 0.052536945790052414, 0.0111169358715415, 0.0038714397232979536, 0.008046599105000496, 0.008921324275434017, 0.011395278386771679, 0.10255969315767288, 0.21638940274715424, 0.44467252492904663, 0.05895284563302994, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.010142950341105461, 0.001643709372729063, 0.002422438468784094, 0.0009472724632360041, 0.0033483330626040697, 0.003415578044950962, 0.03889569267630577, 0.005287462379783392, 0.00042015319922938943, 0.0010667687747627497, 0.00740370387211442, 0.00895014964044094, 0.0067735291086137295, 0.017782215029001236, 0.26753443479537964, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.11724554747343063, 0.0023070531897246838, 0.004510094877332449, 0.0014967885799705982, 0.007825762964785099, 0.00018500315491110086, 0.013543304987251759, 0.0012864026939496398, 0.0007778326398693025, 0.00044295378029346466, 0.001640060218051076, 0.0014512997586280107, 0.002360806567594409, 0.2112705558538437, 0.19457924365997314, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.09882069379091263, 0.014871560037136078, 0.005077258683741093, 0.0014827846316620708, 0.005620975513011217, 0.0024449406191706657, 0.07368315756320953, 0.06950978189706802, 0.0017206794582307339, 0.00039900749106891453, 0.0006052122334949672, 0.0005968212499283254, 0.004762541502714157, 0.0232950821518898, 0.2500154376029968, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.001020739320665598, 0.001402992638759315, 0.0006185534875839949, 0.0003395593084860593, 0.0013021298218518496, 0.0008022591937333345, 0.003452729433774948, 0.0026675688568502665, 0.0021077031269669533, 0.0008018113439902663, 0.0017594166565686464, 0.0005115982494316995, 0.0007778447470627725, 0.0008368113776668906, 0.13888627290725708, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.005261753685772419, 0.005328452680259943, 0.1075906753540039, 0.007504252251237631, 0.18196941912174225, 0.2677178680896759, 0.18533208966255188, 0.041308093816041946, 0.04052837938070297, 0.0018225060775876045, 0.004738607443869114, 0.028365809470415115, 0.07867489755153656, 0.032602421939373016, 0.14697469770908356, 0.125, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.024903474375605583, 0.2637169063091278, 0.01148936152458191, 0.01806865818798542, 0.010384032502770424, 0.05497525632381439, 0.01011874619871378, 6.159161421237513e-05, 0.03404803201556206, 0.01315199863165617, 0.004086918197572231, 0.033981483429670334, 0.0007253359071910381, 0.0010365481721237302, 0.023150891065597534, 0.11621169000864029, 0.2792567312717438, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.03176039457321167, 0.002004105830565095, 0.011469452641904354, 0.003235333366319537, 0.011606591753661633, 0.01332010142505169, 0.007885226979851723, 0.0010319099528715014, 0.0026684575714170933, 0.003885145066305995, 0.002207087352871895, 0.010414022952318192, 0.015553043223917484, 0.01973811537027359, 0.1639232188463211, 0.16788142919540405, 0.08717074245214462, 0.024576181545853615, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.24842531979084015, 0.031220050528645515, 0.028132880106568336, 0.029530569911003113, 0.01766534335911274, 0.36354437470436096, 0.06892471760511398, 0.02528339996933937, 0.01102821622043848, 0.15825842320919037, 0.13755246996879578, 0.07390110194683075, 0.19022952020168304, 0.1824880689382553, 0.1432848572731018, 0.14762163162231445, 0.09094145894050598, 0.023598572239279747, 0.2273045778274536, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.0013664831640198827, 0.001714985934086144, 0.0013615208445116878, 0.0015855998499318957, 0.0011547008762136102, 0.007221538573503494, 0.01537399459630251, 0.020302001386880875, 0.0011185031617060304, 0.001242821803316474, 0.0004577837826218456, 0.0013307477347552776, 6.100967220845632e-05, 3.943840420106426e-05, 0.16435295343399048, 0.10424397885799408, 0.7145561575889587, 0.21233327686786652, 0.5272893309593201, 0.04291817173361778, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.0006725311395712197, 0.000846685899887234, 0.001614874112419784, 0.000348375499015674, 0.0019150535808876157, 0.01370947528630495, 0.026421356946229935, 0.08118636161088943, 0.0008913385099731386, 0.0004401778569445014, 0.0003709472657646984, 0.0007744845934212208, 0.002328733913600445, 0.0003664834948722273, 0.14579549431800842, 0.11001076549291611, 0.4734446108341217, 0.06134912371635437, 0.2925608456134796, 0.02150837518274784, 0.19962187111377716, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.011207095347344875, 0.029191432520747185, 0.015348215587437153, 0.012354064732789993, 0.002485303906723857, 0.7150441408157349, 0.0764552503824234, 0.14450958371162415, 0.0016117440536618233, 0.008765846490859985, 0.011787951923906803, 0.002862851833924651, 0.022502094507217407, 0.007210019044578075, 0.007054056040942669, 0.17212024331092834, 0.1419786959886551, 0.05631781369447708, 0.2185172289609909, 0.002532752463594079, 0.0032626313623040915, 0.18381445109844208, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.006926322355866432, 0.0050496323965489864, 0.010020078159868717, 0.021360181272029877, 0.0027102867607027292, 0.028520535677671432, 0.05918040871620178, 0.23060235381126404, 0.019199691712856293, 0.09477535635232925, 0.013206732459366322, 0.0014817069750279188, 0.0153219448402524, 0.01803957298398018, 0.07950127124786377, 0.09107878059148788, 0.12160263955593109, 0.2150201052427292, 0.3705081045627594, 0.07164584845304489, 0.05021890252828598, 0.14392021298408508, 0.39638784527778625, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.009242678992450237, 0.05580667033791542, 0.014326682314276695, 0.04630666971206665, 0.010674487799406052, 0.5850453972816467, 0.4108324944972992, 0.4116209149360657, 0.007144990377128124, 0.20661039650440216, 0.037308260798454285, 0.054067905992269516, 0.037599414587020874, 0.03113422356545925, 0.22261686623096466, 0.2121918499469757, 0.20806513726711273, 0.15205760300159454, 0.38131871819496155, 0.1009124368429184, 0.09936784207820892, 0.07077471911907196, 0.05006752535700798, 0.14871110022068024, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.0023711349349468946, 0.019731320440769196, 0.027566438540816307, 0.03758935630321503, 0.022646954283118248, 0.06538618355989456, 0.01152126956731081, 0.014797273091971874, 0.003413880243897438, 0.024214325472712517, 0.019466044381260872, 0.007235943805426359, 0.0008611958473920822, 0.0011126803001388907, 0.268255352973938, 0.21685828268527985, 0.23333710432052612, 0.06609098613262177, 0.12803798913955688, 0.1004808098077774, 0.025170300155878067, 0.04069148004055023, 0.10828333348035812, 0.10351972281932831, 0.29450517892837524, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.08772679418325424, 0.02003292553126812, 0.09465871006250381, 0.41126132011413574, 0.07995565980672836, 0.5143890976905823, 0.1155472919344902, 0.01320470031350851, 0.02149844542145729, 0.06702866405248642, 0.6884661316871643, 0.09638151526451111, 0.35587188601493835, 0.2170087993144989, 0.019593046978116035, 0.05205162987112999, 0.22306090593338013, 0.049221184104681015, 0.061203524470329285, 0.09776578843593597, 0.06183243915438652, 0.17444021999835968, 0.321644127368927, 0.054029058665037155, 0.2629997134208679, 0.2757931053638458, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.01343127153813839, 0.0019279895350337029, 0.01925632171332836, 0.04226915165781975, 0.005290344823151827, 0.5555825233459473, 0.06846548616886139, 0.006453313864767551, 0.019162334501743317, 0.0017575293313711882, 0.2967261075973511, 0.11721283942461014, 0.4438721835613251, 0.1899448037147522, 0.007863422855734825, 0.05800137668848038, 0.32540804147720337, 0.13333332538604736, 0.05756821855902672, 0.12640602886676788, 0.11846329271793365, 0.2918737828731537, 0.3632459342479706, 0.18816226720809937, 0.6433262228965759, 0.3291742205619812, 0.12170911580324173, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.12789316475391388, 0.004323228262364864, 0.03538274019956589, 0.05581461265683174, 0.020947236567735672, 0.09860846400260925, 0.11394336074590683, 0.010361305437982082, 0.011101406998932362, 0.33580121397972107, 0.13689599931240082, 0.038663506507873535, 0.19725953042507172, 0.10533706098794937, 0.008538279682397842, 0.11078674346208572, 0.40781712532043457, 0.06261185556650162, 0.05779192969202995, 0.18194560706615448, 0.1120922714471817, 0.5645142793655396, 0.33037880063056946, 0.18058234453201294, 0.6155731678009033, 0.21430827677249908, 0.044265877455472946, 0.20548948645591736, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.007053391542285681, 0.012331487610936165, 0.008611395955085754, 0.031008008867502213, 0.004283395130187273, 0.0029549654573202133, 0.00849387887865305, 0.008564120158553123, 0.02629040740430355, 0.009985123760998249, 0.00761940935626626, 0.003499145619571209, 0.0015691317385062575, 0.005600257311016321, 0.5214234590530396, 0.08288691937923431, 0.2962968051433563, 0.2819015085697174, 0.19574381411075592, 0.1136796846985817, 0.07755676656961441, 0.20596812665462494, 0.3330870270729065, 0.21944326162338257, 0.22804425656795502, 0.1688224822282791, 0.2872299253940582, 0.13759873807430267, 0.09907422959804535, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.0007030746201053262, 0.0001308645587414503, 0.0001913319865707308, 0.00016671058256179094, 0.000299752748105675, 0.0001608166057849303, 0.004501530434936285, 0.0010771069210022688, 0.003937124740332365, 0.001599485520273447, 0.0007339937728829682, 0.0030779645312577486, 3.4502605558373034e-05, 9.700484952190891e-05, 0.15641583502292633, 0.11118441820144653, 0.6110438108444214, 0.6292654871940613, 0.5805363655090332, 0.22765980660915375, 0.4274957776069641, 0.6573506593704224, 0.6816673278808594, 0.5361799597740173, 0.320940226316452, 0.3845328688621521, 0.6242536306381226, 0.41633498668670654, 0.12922972440719604, 0.01991792768239975, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.027913473546504974, 0.10055015236139297, 0.005828284192830324, 0.007361504249274731, 0.0010143647668883204, 0.000654859293717891, 0.0101061025634408, 0.029607031494379044, 0.04485415667295456, 0.09235014766454697, 0.05163425952196121, 0.03075464628636837, 0.027050884440541267, 0.021472401916980743, 0.18064866960048676, 0.10675505548715591, 0.1912444829940796, 0.23975566029548645, 0.32351911067962646, 0.046362437307834625, 0.08004549145698547, 0.3363644778728485, 0.2706483006477356, 0.26792168617248535, 0.2952979505062103, 0.4496033787727356, 0.1126319095492363, 0.5116660594940186, 0.015820369124412537, 0.030236991122364998, 0.03603934869170189, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.0011193754617124796, 0.03864011913537979, 0.0033454783260822296, 0.0006957795703783631, 0.001480268081650138, 0.0012079592561349273, 0.00020605533791240305, 0.0011212154058739543, 0.0015670693246647716, 0.0014121911954134703, 0.0012700740480795503, 0.0019415348069742322, 0.001359732006676495, 0.0011440571397542953, 0.23876120150089264, 0.2233639359474182, 0.0911012589931488, 0.12918633222579956, 0.17958812415599823, 0.037158817052841187, 0.06043876335024834, 0.43303725123405457, 0.3349981904029846, 0.09061599522829056, 0.23225362598896027, 0.1514965295791626, 0.09056703746318817, 0.2480165809392929, 0.056160230189561844, 0.015552842989563942, 0.007365798112004995, 0.17054231464862823, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.012943120673298836, 0.020876264199614525, 0.04825761169195175, 0.03707631304860115, 0.015636419877409935, 0.11923719942569733, 0.021652603521943092, 0.026653259992599487, 0.020431919023394585, 0.03287035599350929, 0.10921605676412582, 0.11103712767362595, 0.08490956574678421, 0.05352960154414177, 0.1791488379240036, 0.09585364907979965, 0.22669152915477753, 0.08040254563093185, 0.0638674795627594, 0.15364862978458405, 0.13237975537776947, 0.3887532651424408, 0.5357696413993835, 0.07155110687017441, 0.4139500856399536, 0.05426981300115585, 0.1238613948225975, 0.07816720753908157, 0.14353296160697937, 0.021915707737207413, 0.02897939831018448, 0.22262324392795563, 0.4835837185382843, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.010143280029296875, 0.0011783033842220902, 0.07699523866176605, 0.04151652753353119, 0.013031265698373318, 0.6595657467842102, 0.04001229628920555, 0.015414847061038017, 0.05828738585114479, 0.00582495890557766, 0.39538952708244324, 0.3540988564491272, 0.5535411834716797, 0.14920510351657867, 0.05510678142309189, 0.05190133675932884, 0.3522363007068634, 0.14802464842796326, 0.07656959444284439, 0.12417534738779068, 0.17628712952136993, 0.33604755997657776, 0.38481405377388, 0.20552395284175873, 0.5797679424285889, 0.3262830972671509, 0.19466114044189453, 0.045280374586582184, 0.2712458372116089, 0.041196610778570175, 0.08666794002056122, 0.3327068090438843, 0.1922111064195633, 0.10969121754169464, NaN, NaN, NaN, NaN, NaN, NaN], [0.10365689545869827, 0.011393263004720211, 0.09083462506532669, 0.05552159622311592, 0.021694108843803406, 0.23093751072883606, 0.12655670940876007, 0.02638416364789009, 0.016898566856980324, 0.4334920644760132, 0.1302367001771927, 0.07987051457166672, 0.26015403866767883, 0.07882147282361984, 0.06412448734045029, 0.10818891227245331, 0.3937702178955078, 0.030490810051560402, 0.030189264565706253, 0.11243001371622086, 0.07142115384340286, 0.3648340702056885, 0.2467786818742752, 0.13009557127952576, 0.5037410855293274, 0.18716548383235931, 0.08825942128896713, 0.23451530933380127, 0.24434491991996765, 0.03496113047003746, 0.04431905224919319, 0.3934983015060425, 0.31427451968193054, 0.05462265387177467, 0.2524711489677429, NaN, NaN, NaN, NaN, NaN], [0.0009046280756592751, 0.006186267826706171, 0.001710598124191165, 0.0040000369772315025, 0.0010556421475484967, 0.00010012275743065402, 0.000467440317152068, 0.00034073027200065553, 0.012450831942260265, 0.001776019111275673, 0.0016348852077499032, 0.0004490323772188276, 0.00023723821504972875, 0.0005369102582335472, 0.2610536217689514, 0.06088699772953987, 0.23725801706314087, 0.2046121060848236, 0.14171433448791504, 0.06688592582941055, 0.06064169481396675, 0.14286598563194275, 0.21723276376724243, 0.13491223752498627, 0.2083195000886917, 0.15285742282867432, 0.34066644310951233, 0.18166381120681763, 0.10532425343990326, 0.06318715214729309, 0.052211396396160126, 0.20970472693443298, 0.20715771615505219, 0.28281068801879883, 0.13935938477516174, 0.11923542618751526, NaN, NaN, NaN, NaN], [0.00040706052095629275, 5.995776882627979e-05, 0.00011266738147241995, 0.00010974665929097682, 0.00022393744438886642, 7.468188414350152e-05, 0.00239625689573586, 0.0004222780407872051, 0.002755024004727602, 0.0011263962369412184, 0.0004159261588938534, 0.0013214137870818377, 1.3015362128498964e-05, 3.146446033497341e-05, 0.15343648195266724, 0.09884612262248993, 0.5530695915222168, 0.6301063299179077, 0.5187459588050842, 0.28427499532699585, 0.33059176802635193, 0.49595603346824646, 0.6107674241065979, 0.387560099363327, 0.3283739984035492, 0.3905918300151825, 0.5949583053588867, 0.2912430167198181, 0.19163259863853455, 0.03091937117278576, 0.3911139667034149, 0.3233675956726074, 0.421701043844223, 0.6310504674911499, 0.4068542718887329, 0.13317596912384033, 0.02126597985625267, NaN, NaN, NaN], [0.02487853355705738, 0.06922142952680588, 0.005931189749389887, 0.005149703938513994, 0.0007503133383579552, 0.00046759017277508974, 0.004864065907895565, 0.010271446779370308, 0.03885169327259064, 0.0494176521897316, 0.032662954181432724, 0.015474021434783936, 0.005468437913805246, 0.0031831569503992796, 0.16160887479782104, 0.07192745804786682, 0.09934075176715851, 0.15662430226802826, 0.18248029053211212, 0.021172231063246727, 0.037516966462135315, 0.12766626477241516, 0.09711621701717377, 0.09662153571844101, 0.1303528994321823, 0.3114719092845917, 0.1600099802017212, 0.265144020318985, 0.011710498481988907, 0.02471126988530159, 0.012725233100354671, 0.12533646821975708, 0.446529746055603, 0.11092787981033325, 0.45893827080726624, 0.011159577406942844, 0.028070949018001556, 0.024378135800361633, NaN, NaN], [0.0006016235565766692, 0.010655699297785759, 0.0012552555417641997, 0.0004406629304867238, 0.0006771506741642952, 0.0004804672207683325, 8.584682655055076e-05, 0.00018533790716901422, 0.0020008538849651814, 0.0008522755815647542, 0.0005471827462315559, 0.0006654397584497929, 0.0003326669684611261, 0.00020969027536921203, 0.18202657997608185, 0.21178482472896576, 0.0713806003332138, 0.12116114795207977, 0.16551871597766876, 0.025692136958241463, 0.03932836279273033, 0.255863755941391, 0.20887790620326996, 0.05500240623950958, 0.14075487852096558, 0.158308207988739, 0.10016348958015442, 0.22940821945667267, 0.06542190909385681, 0.016673747450113297, 0.011679067276418209, 0.21266934275627136, 0.27460965514183044, 0.08977667987346649, 0.1985965520143509, 0.05640871822834015, 0.014301197603344917, 0.004748867359012365, 0.1251523643732071, NaN], [0.0006660889484919608, 0.0011989487102255225, 0.006168409250676632, 0.0007392434636130929, 0.002072105184197426, 0.0013732375809922814, 0.001215140800923109, 8.942947169998661e-05, 0.0032219376880675554, 0.00034276655060239136, 0.0006051870877854526, 0.0004003554640803486, 0.0006330502219498158, 9.228585986420512e-05, 0.13989190757274628, 0.11377177387475967, 0.4656391441822052, 0.26672884821891785, 0.20802536606788635, 0.1860857605934143, 0.16829806566238403, 0.19711202383041382, 0.3023360073566437, 0.035885076969861984, 0.11114621162414551, 0.21048156917095184, 0.27827921509742737, 0.11178875714540482, 0.13154125213623047, 0.3096882104873657, 0.09530708193778992, 0.2201821655035019, 0.1989239901304245, 0.27841058373451233, 0.15223632752895355, 0.2206900417804718, 0.34536775946617126, 0.09229245036840439, 0.24595825374126434, 0.2865155339241028]], [[0.04622220993041992, 0.12740419805049896, 0.05372706800699234, 0.5582705140113831, 0.030120277777314186, 0.3703221380710602, 0.020304178819060326, 0.3357560634613037, 0.11819478869438171, 0.0765489861369133, 0.09261158853769302, 0.03858334198594093, 0.13079233467578888, 0.0447748564183712, 0.11706516146659851, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0919138491153717, 0.05798470228910446, 0.02827676385641098, 0.34965166449546814, 0.05504997447133064, 0.1526506543159485, 0.09941896051168442, 0.4367760419845581, 0.061004042625427246, 0.5390062928199768, 0.28723591566085815, 0.15840129554271698, 0.2018149495124817, 0.11561664938926697, 0.1249081939458847, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.032068803906440735, 0.0549696609377861, 0.018587671220302582, 0.2202640324831009, 0.0011182812741026282, 0.03810814768075943, 0.027008401229977608, 0.3763306438922882, 0.11146998405456543, 0.16719762980937958, 0.13283231854438782, 0.014421377331018448, 0.07254088670015335, 0.007401765324175358, 0.20662666857242584, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.10753453522920609, 0.479284405708313, 0.009764611721038818, 0.0431443527340889, 0.0008862981921993196, 0.03188035264611244, 0.00600279588252306, 0.43093177676200867, 0.08460848033428192, 0.18502341210842133, 0.038902610540390015, 0.030237559229135513, 0.1820157915353775, 0.03367093205451965, 0.14427724480628967, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.013928310945630074, 0.032752107828855515, 0.0024797581136226654, 0.10617181658744812, 0.0002726189268287271, 0.011333486996591091, 0.005626056343317032, 0.05421115458011627, 0.020341530442237854, 0.0548044852912426, 0.027503041550517082, 0.005752534605562687, 0.033552803099155426, 0.008454940281808376, 0.388910174369812, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.15046736598014832, 0.296213299036026, 0.044096194207668304, 0.05168119817972183, 0.02727358601987362, 0.04717152938246727, 0.0016543868696317077, 0.035376399755477905, 0.027143586426973343, 0.0870317667722702, 0.05812281742691994, 0.06705813109874725, 0.3147181272506714, 0.39039844274520874, 0.23394177854061127, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.14644725620746613, 0.5605929493904114, 0.11812092363834381, 0.5902084112167358, 0.021858595311641693, 0.10718227922916412, 0.007383488584309816, 0.019886687397956848, 0.06570647656917572, 0.10820640623569489, 0.1357717514038086, 0.025582531467080116, 0.077891044318676, 0.061965201050043106, 0.164744034409523, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.049012791365385056, 0.35138410329818726, 0.26388463377952576, 0.7301797866821289, 0.014552393928170204, 0.24720129370689392, 0.0041521950624883175, 0.07795857638120651, 0.014070906676352024, 0.04667593538761139, 0.1480453461408615, 0.010990227572619915, 0.20039354264736176, 0.17517414689064026, 0.0717916414141655, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.09980960935354233, 0.4834202826023102, 0.20237547159194946, 0.5161312222480774, 0.2011035680770874, 0.31254804134368896, 0.023049525916576385, 0.09284620732069016, 0.030714770779013634, 0.009841320104897022, 0.03625232353806496, 0.02249438874423504, 0.030981028452515602, 0.01249231118708849, 0.19809871912002563, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.2242409735918045, 0.5898000001907349, 0.2996082305908203, 0.6961580514907837, 0.3950251638889313, 0.824604332447052, 0.0551396869122982, 0.5436567068099976, 0.06683327257633209, 0.03568824753165245, 0.060814060270786285, 0.00592254800722003, 0.012778226286172867, 0.017990900203585625, 0.1082865446805954, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.03427329286932945, 0.7018846869468689, 0.18350760638713837, 0.5559015274047852, 0.03810380771756172, 0.7226935029029846, 0.05184842646121979, 0.881024181842804, 0.06315085291862488, 0.03384441137313843, 0.014913397841155529, 0.002015632577240467, 0.008405282162129879, 0.0011906703002750874, 0.2768104076385498, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.022437993437051773, 0.7336767315864563, 0.2893984615802765, 0.7315550446510315, 0.021726222708821297, 0.3247562646865845, 0.05117126554250717, 0.7097986340522766, 0.03149837628006935, 0.017582548782229424, 0.017906883731484413, 0.004864181391894817, 0.0014982494758442044, 0.0005988480988889933, 0.17147301137447357, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.279982328414917, 0.427709698677063, 0.4798988997936249, 0.811837911605835, 0.5607104301452637, 0.3233453035354614, 0.03364620357751846, 0.48738226294517517, 0.20507316291332245, 0.2806957960128784, 0.20560167729854584, 0.021487781777977943, 0.0051806773990392685, 0.018182942643761635, 0.10378202050924301, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.15081651508808136, 0.5779510736465454, 0.21354816854000092, 0.8126901984214783, 0.041816346347332, 0.5376638174057007, 0.02729017473757267, 0.45972490310668945, 0.1708957701921463, 0.17148789763450623, 0.06268936395645142, 0.0045938147231936455, 0.0036332160234451294, 0.0009066996863111854, 0.10311751067638397, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.009540104307234287, 0.03889232128858566, 0.016071060672402382, 0.08366316556930542, 0.004574422258883715, 0.029401082545518875, 0.00834547821432352, 0.0893266350030899, 0.14732055366039276, 0.09065960347652435, 0.14173488318920135, 0.042114999145269394, 0.004022075328975916, 0.003513866104185581, 0.1347859650850296, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.17597882449626923, 0.03865775838494301, 0.04927876219153404, 0.19269852340221405, 0.07631995528936386, 0.03202155977487564, 0.04315444082021713, 0.0381813645362854, 0.14437337219715118, 0.14268529415130615, 0.12548406422138214, 0.22065725922584534, 0.007455701474100351, 0.012540786527097225, 0.13194040954113007, 0.125, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.12168548256158829, 0.12690430879592896, 0.03319493681192398, 0.044549524784088135, 0.022643521428108215, 0.12293753027915955, 0.012858373112976551, 0.056580886244773865, 0.0409478023648262, 0.5390252470970154, 0.04499629884958267, 0.010665545240044594, 0.0012580851325765252, 0.0006077282596379519, 0.16003872454166412, 0.13124778866767883, 0.015335792675614357, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.004976227879524231, 0.0016218257369473577, 0.10218203067779541, 0.005807417444884777, 0.025330372154712677, 0.00805770605802536, 0.0010953968157991767, 0.007808555383235216, 0.03332183510065079, 0.01014297641813755, 0.0378553569316864, 0.0012688467977568507, 0.0070253219455480576, 0.006525768432766199, 0.1611432433128357, 0.19323189556598663, 0.005229663103818893, 0.005805561784654856, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.018298039212822914, 0.043392445892095566, 0.026758581399917603, 0.06685060262680054, 0.007846164517104626, 0.0070086256600916386, 0.0011090404586866498, 0.0016357558779418468, 0.015295942313969135, 0.022091375663876534, 0.08676162362098694, 0.0013220091350376606, 0.0007799563463777304, 0.0005145008908584714, 0.5814905166625977, 0.06695510447025299, 0.08997365087270737, 0.32878753542900085, 0.35321861505508423, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.16791731119155884, 0.01838838867843151, 0.03170344606041908, 0.04746389389038086, 0.024931352585554123, 0.002624210435897112, 0.3320338726043701, 0.32248422503471375, 0.021048149093985558, 0.02857070416212082, 0.11922428011894226, 4.079664358869195e-05, 0.0002566495386417955, 0.0005197013379074633, 0.1538068950176239, 0.1452476531267166, 0.07996584475040436, 0.2002653181552887, 0.13149262964725494, 0.005022347904741764, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.03376027196645737, 0.001082546659745276, 0.003266592975705862, 0.006257645785808563, 0.023632841184735298, 0.00021245618700049818, 0.033721838146448135, 0.15340450406074524, 0.009442711248993874, 0.006162047851830721, 0.09923229366540909, 0.0001386175281368196, 0.0008165750186890364, 0.0010916005121544003, 0.14602994918823242, 0.1274433135986328, 0.13577045500278473, 0.16066212952136993, 0.1959238052368164, 0.04180024936795235, 0.06788772344589233, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.04221357777714729, 0.03857824206352234, 0.004161412362009287, 0.06419923156499863, 0.010648604482412338, 0.008165394887328148, 0.04070910066366196, 0.34736329317092896, 0.0012154168216511607, 0.1630050241947174, 0.07001504302024841, 0.0033116117119789124, 0.00023883172252681106, 0.00045473958016373217, 0.2740376889705658, 0.14809708297252655, 0.29017606377601624, 0.22457490861415863, 0.17088554799556732, 0.041788797825574875, 0.013634788803756237, 0.02984887920320034, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.007271567825227976, 0.0015110730892047286, 0.0014769553672522306, 0.0053740208968520164, 0.0038654205854982138, 0.0024983601178973913, 0.049697574228048325, 0.27208074927330017, 0.0006182760698720813, 0.014045008458197117, 0.00131281279027462, 0.00040628391434438527, 0.00037906834040768445, 0.0001199298130813986, 0.006693295668810606, 0.21402230858802795, 0.012405444867908955, 0.0014808804262429476, 0.0009161182679235935, 0.0035427443217486143, 0.0017166208708658814, 0.001927618752233684, 0.015056394040584564, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.08829134702682495, 0.11286511272192001, 0.004967967513948679, 0.006996258161962032, 0.0014454894699156284, 0.006397548597306013, 0.01389994379132986, 0.27431485056877136, 0.0018983082845807076, 0.09154568612575531, 0.022492842748761177, 0.0017391144065186381, 0.000634143827483058, 4.5783879613736644e-05, 0.318096399307251, 0.10794443637132645, 0.13477572798728943, 0.046750620007514954, 0.03419584408402443, 0.30604344606399536, 0.11879221349954605, 0.08022946119308472, 0.11745522916316986, 0.21712547540664673, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.02142007276415825, 0.007001234218478203, 0.00761477230116725, 0.018849696964025497, 0.010492328554391861, 0.01844215951859951, 0.008208145387470722, 0.01109394058585167, 0.006335548125207424, 0.01884968765079975, 0.01652243174612522, 0.016355833038687706, 0.0014795949682593346, 0.0011322565842419863, 0.27169719338417053, 0.06259628385305405, 0.21873348951339722, 0.248628169298172, 0.2344663441181183, 0.09133727103471756, 0.05752522125840187, 0.03945200890302658, 0.39403918385505676, 0.15040725469589233, 0.009099425747990608, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.17013461887836456, 0.14343884587287903, 0.017679741606116295, 0.10850679129362106, 0.01231957133859396, 0.010847942903637886, 0.04900640249252319, 0.023357992991805077, 0.014735743403434753, 0.014097570441663265, 0.012582896277308464, 0.0010529988212510943, 0.00046457236749120057, 0.0006211225991137326, 0.5663455724716187, 0.06400181353092194, 0.3208324611186981, 0.5040323138237, 0.6282902359962463, 0.04389061778783798, 0.08030739426612854, 0.10539824515581131, 0.1485716998577118, 0.08085520565509796, 0.13963551819324493, 0.0947280004620552, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.1586649864912033, 0.08337923884391785, 0.0181503314524889, 0.22676831483840942, 0.016727542504668236, 0.015186772681772709, 0.0050455182790756226, 0.00688449339941144, 0.025511443614959717, 0.20239992439746857, 0.024231791496276855, 0.0023393011651933193, 0.0011192933889105916, 0.0005647524958476424, 0.390881210565567, 0.0935494601726532, 0.3055664598941803, 0.46751275658607483, 0.6914730072021484, 0.12860655784606934, 0.15726737678050995, 0.2987912595272064, 0.1529359668493271, 0.062232255935668945, 0.041881486773490906, 0.03399288281798363, 0.026789270341396332, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.3443087935447693, 0.28029316663742065, 0.23536846041679382, 0.34415915608406067, 0.11761639267206192, 0.006012732163071632, 0.008058828301727772, 0.005314267706125975, 0.013309409841895103, 0.09906232357025146, 0.10091385245323181, 0.018941059708595276, 0.025248508900403976, 0.014945760369300842, 0.7436007857322693, 0.012478480115532875, 0.051689472049474716, 0.7194163799285889, 0.8485123515129089, 0.006671697832643986, 0.03636787086725235, 0.05433559790253639, 0.01463489979505539, 0.0011851346353068948, 0.0010049004340544343, 0.012586181983351707, 0.0039429632015526295, 0.0029262336902320385, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.0022638223599642515, 0.004991845227777958, 0.004655482713133097, 0.0007185174035839736, 0.0013901105849072337, 0.011776956729590893, 0.0005479936371557415, 0.00022604972764384001, 0.00024645475787110627, 0.009541304782032967, 0.011744895949959755, 0.0007132806931622326, 0.27867355942726135, 0.02834550105035305, 0.007979176938533783, 0.16095376014709473, 0.10161679983139038, 0.15561290085315704, 0.27214428782463074, 0.06339859217405319, 0.047669682651758194, 0.16775988042354584, 0.30333516001701355, 0.29585903882980347, 0.026492541655898094, 0.03390856087207794, 0.020966142416000366, 0.027538424357771873, 0.040642742067575455, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.024570701643824577, 0.00167787482496351, 0.004072254989296198, 0.00223688711412251, 0.007143567781895399, 0.00014352552534546703, 0.0004634522774722427, 0.0016921478090807796, 0.003620122792199254, 0.007754941936582327, 0.011850811541080475, 0.0027722271624952555, 9.3724018370267e-05, 0.02145184949040413, 0.15506701171398163, 0.1701768934726715, 0.015393235720694065, 0.0020776872988790274, 0.011533004231750965, 0.013215321116149426, 0.004845780786126852, 0.011772604659199715, 0.006262979004532099, 0.00390799343585968, 0.007256041280925274, 0.0014780729543417692, 0.007152961101382971, 0.1450572907924652, 0.009833375923335552, 0.004788131918758154, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.01723022572696209, 0.08018677681684494, 0.007713299244642258, 0.004271229729056358, 0.0005464836140163243, 0.00456921337172389, 0.0031762931030243635, 0.009469777345657349, 0.000385247083613649, 0.01870143786072731, 0.033109456300735474, 0.004042719956487417, 0.004976211115717888, 0.005646048113703728, 0.19230251014232635, 0.27953270077705383, 0.3106633424758911, 0.3078516721725464, 0.2835734188556671, 0.23220741748809814, 0.10028243064880371, 0.059542566537857056, 0.10900203883647919, 0.24247398972511292, 0.19294817745685577, 0.04455278813838959, 0.032558612525463104, 0.2623904049396515, 0.04071282595396042, 0.07101175934076309, 0.01397540420293808, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.016216034069657326, 0.04777013510465622, 0.01620146818459034, 0.010810854844748974, 0.16034351289272308, 0.006931359879672527, 0.0032006967812776566, 0.032106515020132065, 0.0003033989341929555, 0.015325331129133701, 0.006036583799868822, 0.12791146337985992, 0.19952742755413055, 0.023708127439022064, 0.18307197093963623, 0.15828359127044678, 0.26215362548828125, 0.1828027367591858, 0.3383132517337799, 0.14976613223552704, 0.17187725007534027, 0.16098640859127045, 0.10713529586791992, 0.2253616452217102, 0.27887699007987976, 0.0991593673825264, 0.1987481713294983, 0.2010713517665863, 0.24892166256904602, 0.09143882989883423, 0.028894133865833282, 0.0226773452013731, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.014499284327030182, 0.035677529871463776, 0.009275808930397034, 0.01653297245502472, 0.006223962642252445, 0.0020693510305136442, 0.007680083625018597, 0.013822571374475956, 0.00040966575033962727, 0.0038025544490665197, 0.013774569146335125, 0.006069935858249664, 0.004488381557166576, 0.005977130029350519, 0.217429518699646, 0.08621957898139954, 0.39239373803138733, 0.32060059905052185, 0.6169360876083374, 0.04211895540356636, 0.07954877614974976, 0.28241875767707825, 0.1073535904288292, 0.10431969910860062, 0.28138864040374756, 0.05428503826260567, 0.29005417227745056, 0.2829020619392395, 0.1771886944770813, 0.12728992104530334, 0.029228007420897484, 0.09527892619371414, 0.030012397095561028, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.03237156197428703, 0.013441890478134155, 0.0194883793592453, 0.09343220293521881, 0.05379915237426758, 0.004893247038125992, 0.0011929833563044667, 0.009432576596736908, 0.015330814756453037, 0.14898745715618134, 0.018398255109786987, 0.01228779274970293, 0.00492482166737318, 0.0038985873106867075, 0.2601524889469147, 0.10387677699327469, 0.28899070620536804, 0.34778735041618347, 0.5978891849517822, 0.08856049180030823, 0.11093756556510925, 0.2773001492023468, 0.1387036144733429, 0.05535874143242836, 0.040542375296354294, 0.057020239531993866, 0.08593740314245224, 0.3575255870819092, 0.1780063509941101, 0.03115975111722946, 0.05683879926800728, 0.20087137818336487, 0.022991398349404335, 0.024780578911304474, NaN, NaN, NaN, NaN, NaN, NaN], [0.08357361704111099, 0.18220724165439606, 0.10462122410535812, 0.08245989680290222, 0.03124452568590641, 0.002170282183215022, 0.0020384257659316063, 0.004550496581941843, 0.003485089400783181, 0.036062099039554596, 0.0278666652739048, 0.011443988420069218, 0.01760544627904892, 0.013599698431789875, 0.3874043822288513, 0.027872784063220024, 0.11975038051605225, 0.8484699726104736, 0.9221431016921997, 0.010032964870333672, 0.05817321315407753, 0.14408904314041138, 0.03149182349443436, 0.0027255630120635033, 0.003546576714143157, 0.054592132568359375, 0.03846639767289162, 0.0179138146340847, 0.04004756733775139, 0.0025625908747315407, 0.006073353346437216, 0.017890095710754395, 0.006128084380179644, 0.0035659971181303263, 0.005842072889208794, NaN, NaN, NaN, NaN, NaN], [0.001995340920984745, 0.011527596041560173, 0.005334027577191591, 0.0006887424970045686, 0.0023407095577567816, 0.00276917009614408, 0.00029977987287566066, 0.00012230046559125185, 0.00026578022516332567, 0.008239910937845707, 0.009819538332521915, 0.000393931899452582, 0.605858564376831, 0.08989311754703522, 0.011135715991258621, 0.21095024049282074, 0.16082847118377686, 0.2551726996898651, 0.40046265721321106, 0.07841236889362335, 0.05558479577302933, 0.20925307273864746, 0.4381427764892578, 0.47918838262557983, 0.07096414268016815, 0.11106863617897034, 0.09138666838407516, 0.1393880993127823, 0.1506565660238266, 0.07743309438228607, 0.06943798065185547, 0.09801105409860611, 0.017720624804496765, 0.015859564766287804, 0.029157793149352074, 0.0392736941576004, NaN, NaN, NaN, NaN], [0.021298440173268318, 0.001658836961723864, 0.004600299056619406, 0.0025729055050760508, 0.015332063660025597, 0.00017298871534876525, 0.0005721640191040933, 0.00186175387352705, 0.0037871075328439474, 0.009124312549829483, 0.01116581168025732, 0.0031747270841151476, 0.00012207991676405072, 0.029056062921881676, 0.15163807570934296, 0.17935752868652344, 0.014263968914747238, 0.0022281131241470575, 0.011617614887654781, 0.022433524951338768, 0.0047986325807869434, 0.013686214573681355, 0.007696506567299366, 0.004939754959195852, 0.012488129548728466, 0.002878576284274459, 0.013457567431032658, 0.23303280770778656, 0.030022362247109413, 0.013181640766561031, 0.027029545977711678, 0.010247751139104366, 0.0006795030203647912, 0.0032072996255010366, 0.1104368045926094, 0.006663828622549772, 0.003364446572959423, NaN, NaN, NaN], [0.020229021087288857, 0.11621151119470596, 0.015550180338323116, 0.006284819450229406, 0.0013723199954256415, 0.013658476993441582, 0.005685316864401102, 0.02063058130443096, 0.001440295367501676, 0.022225895896553993, 0.07092871516942978, 0.007373427972197533, 0.00771017000079155, 0.006927240639925003, 0.16024509072303772, 0.3113161623477936, 0.29550519585609436, 0.2834082841873169, 0.292662650346756, 0.1380799263715744, 0.055221766233444214, 0.0487985797226429, 0.10219268500804901, 0.25612032413482666, 0.2569950222969055, 0.10279092192649841, 0.16084249317646027, 0.5340818166732788, 0.10305190831422806, 0.16831228137016296, 0.03310799598693848, 0.10521702468395233, 0.008185362443327904, 0.02029210887849331, 0.2447529286146164, 0.0189062412828207, 0.051586367189884186, 0.011271311901509762, NaN, NaN], [0.014029471203684807, 0.02389930933713913, 0.011611595749855042, 0.012217668816447258, 0.2477317750453949, 0.006976675242185593, 0.0035841658245772123, 0.022232146933674812, 0.0018886715406551957, 0.01750483363866806, 0.005654812324792147, 0.10889071226119995, 0.19916927814483643, 0.022882532328367233, 0.16074435412883759, 0.21913117170333862, 0.2667233347892761, 0.15068072080612183, 0.2934513986110687, 0.11010763049125671, 0.11770202964544296, 0.1548316478729248, 0.10880382359027863, 0.19848009943962097, 0.2926469147205353, 0.17939361929893494, 0.38748762011528015, 0.38622626662254333, 0.4369211196899414, 0.14473943412303925, 0.11290202289819717, 0.11878126114606857, 0.013051117770373821, 0.18458649516105652, 0.15622372925281525, 0.14840805530548096, 0.06742489337921143, 0.01624887064099312, 0.028317920863628387, NaN], [0.0032621105201542377, 0.006088452413678169, 0.012619324028491974, 0.008848619647324085, 0.17461968958377838, 8.660123421577737e-05, 0.0006109846872277558, 0.0007747155614197254, 0.003163054818287492, 0.017787659540772438, 0.029563669115304947, 0.0032195982057601213, 0.013336165808141232, 0.013171130791306496, 0.1387031376361847, 0.13670727610588074, 0.11102687567472458, 0.008893890306353569, 0.008979070000350475, 0.01785319298505783, 0.008134939707815647, 0.02043774165213108, 0.030145585536956787, 0.014907605946063995, 0.021436721086502075, 0.020207075402140617, 0.10284662246704102, 0.06823904067277908, 0.04208305850625038, 0.03810393810272217, 0.04656955599784851, 0.025087369605898857, 0.005296032875776291, 0.07358870655298233, 0.057817310094833374, 0.033472564071416855, 0.02220221422612667, 0.01758744567632675, 0.012124869041144848, 0.052647966891527176]], [[0.009570755064487457, 0.005546795669943094, 0.006825579330325127, 0.033384330570697784, 0.3769712448120117, 0.15916845202445984, 0.5290282368659973, 0.24695992469787598, 0.2377869039773941, 0.0913546234369278, 0.07570143043994904, 0.06522544473409653, 0.12397455424070358, 0.2645682692527771, 0.1787039041519165, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0061562443152070045, 0.040286894887685776, 0.0029807272367179394, 0.016133036464452744, 0.1151214987039566, 0.07519882172346115, 0.10128971189260483, 0.046498823910951614, 0.04111110791563988, 0.11845260113477707, 0.08915312588214874, 0.10556784272193909, 0.16933780908584595, 0.3531811535358429, 0.21578538417816162, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.14712950587272644, 0.04435151070356369, 0.015454337000846863, 0.01427951455116272, 0.08342041075229645, 0.005383625626564026, 0.10468690097332001, 0.05861024558544159, 0.08666124939918518, 0.15304753184318542, 0.23543620109558105, 0.2374279797077179, 0.10751555860042572, 0.10399115085601807, 0.23440681397914886, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0859314426779747, 0.15731151401996613, 0.005385389551520348, 0.04620514437556267, 0.010708490386605263, 0.006711416877806187, 0.012445325031876564, 0.056288186460733414, 0.097142793238163, 0.07020799815654755, 0.02479076385498047, 0.0890590250492096, 0.22972674667835236, 0.034618109464645386, 0.28529092669487, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.07441635429859161, 0.018118128180503845, 0.016377849504351616, 0.003080169903114438, 0.20936372876167297, 0.0007255859090946615, 0.03578657656908035, 0.00550744216889143, 0.1172742024064064, 0.5684130191802979, 0.3980042636394501, 0.15252694487571716, 0.10817506164312363, 0.23486874997615814, 0.2619861364364624, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.05188249424099922, 0.0069924332201480865, 0.0009591103880666196, 0.0061192926950752735, 0.002253405749797821, 0.006572761107236147, 0.004667140077799559, 0.11107926070690155, 0.03415685519576073, 0.010113962925970554, 0.006655086297541857, 0.010832482948899269, 0.03651394695043564, 0.040573474019765854, 0.2686486840248108, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.08095332235097885, 0.02014574408531189, 0.011188640259206295, 0.0037319576367735863, 0.024485761299729347, 0.0018746056593954563, 0.04114176332950592, 0.034570205956697464, 0.009728988632559776, 0.07755846530199051, 0.09898480027914047, 0.0613434873521328, 0.09528356045484543, 0.1511603444814682, 0.2821846306324005, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.04335615411400795, 0.026033984497189522, 0.03572213277220726, 0.017578190192580223, 0.05956277251243591, 0.01715734601020813, 0.011929154396057129, 0.28936532139778137, 0.0027683174703270197, 0.061091482639312744, 0.23734883964061737, 0.10397756844758987, 0.16337142884731293, 0.37352773547172546, 0.18409839272499084, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.06077902019023895, 0.031166722998023033, 0.11759120225906372, 0.1409873068332672, 0.24215947091579437, 0.009796793572604656, 0.10265856236219406, 0.01014934666454792, 0.2757207751274109, 0.023714441806077957, 0.038815632462501526, 0.15303847193717957, 0.14991649985313416, 0.6824791431427002, 0.13190437853336334, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.06505369395017624, 0.006089756730943918, 0.036541152745485306, 0.005829536356031895, 0.20233574509620667, 0.029401954263448715, 0.49993017315864563, 0.030510973185300827, 0.01976127363741398, 0.07993583381175995, 0.017815636470913887, 0.04079095646739006, 0.022992853075265884, 0.6425142288208008, 0.26567763090133667, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.6054520010948181, 0.07051455229520798, 0.2702813744544983, 0.029061302542686462, 0.13962645828723907, 0.07908772677183151, 0.4563634395599365, 0.02414957620203495, 0.02722080610692501, 0.03215296193957329, 0.015534932725131512, 0.009437407366931438, 0.0218642745167017, 0.08506882190704346, 0.4000338017940521, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.3943043351173401, 0.11258544027805328, 0.12088752537965775, 0.0732470229268074, 0.030587676912546158, 0.056065596640110016, 0.2533946633338928, 0.04020307958126068, 0.03702285513281822, 0.018525324761867523, 0.009753274731338024, 0.01584538072347641, 0.006842197384685278, 0.013304048217833042, 0.2415902465581894, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.09087645262479782, 0.0733630359172821, 0.03259122744202614, 0.05433432757854462, 0.028730718418955803, 0.026890264824032784, 0.0992540791630745, 0.042951032519340515, 0.1659460812807083, 0.017093859612941742, 0.006921885069459677, 0.0007972968742251396, 0.010357401333749294, 0.037234287708997726, 0.1852690428495407, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.2766205668449402, 0.06249983608722687, 0.03302843123674393, 0.08374682813882828, 0.07296875864267349, 0.016804786399006844, 0.2612326145172119, 0.06074067950248718, 0.06402052938938141, 0.021471360698342323, 0.00216249143704772, 0.001582604949362576, 0.0037338242400437593, 0.005314995069056749, 0.23526467382907867, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.005338736344128847, 0.013486125506460667, 0.016210375353693962, 0.00714905746281147, 0.01115293800830841, 0.008639699779450893, 0.009605110622942448, 0.01017976924777031, 0.008433598093688488, 0.06244685873389244, 0.040223702788352966, 0.009117859415709972, 0.005228321999311447, 0.0028589563444256783, 0.13790398836135864, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.09661699831485748, 0.7619754076004028, 0.05676787346601486, 0.020180072635412216, 0.10883769392967224, 0.42711278796195984, 0.09064477682113647, 0.10612691193819046, 0.04782179743051529, 0.06935178488492966, 0.027948519214987755, 0.00755169615149498, 0.007339869160205126, 0.025803416967391968, 0.09292053431272507, 0.125, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.042798254638910294, 0.23223945498466492, 0.062359996140003204, 0.01933804154396057, 0.04838808253407478, 0.30189236998558044, 0.0354127362370491, 0.019764740020036697, 0.00920741818845272, 0.0097093116492033, 0.0160877276211977, 0.0032758424058556557, 0.005296806804835796, 0.011010169051587582, 0.02110680378973484, 0.1301431953907013, 0.0347244068980217, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.02002989500761032, 0.001048662350513041, 0.03834937512874603, 0.030392715707421303, 0.09750902652740479, 0.056120067834854126, 0.008173296228051186, 0.006944228895008564, 0.004440560005605221, 0.005061029922217131, 0.007118762470781803, 0.008411978371441364, 0.023608768358826637, 0.04182775691151619, 0.16016238927841187, 0.19350707530975342, 0.0006586865638382733, 0.008110460825264454, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.041295986622571945, 0.19780276715755463, 0.03777160495519638, 0.1712082475423813, 0.20935285091400146, 0.158755823969841, 0.3937656581401825, 0.684601902961731, 0.2584594190120697, 0.11237194389104843, 0.1112959012389183, 0.09882687777280807, 0.05429066717624664, 0.24210131168365479, 0.016339490190148354, 0.07742509245872498, 0.025898784399032593, 0.46813124418258667, 0.21566073596477509, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.26312491297721863, 0.2720799446105957, 0.005703570321202278, 0.0481516495347023, 0.027902500703930855, 0.0034437666181474924, 0.03425572067499161, 0.03555849939584732, 0.028000997379422188, 0.0429554246366024, 0.002753790933638811, 0.0017769382102414966, 0.002218457870185375, 0.003535473719239235, 0.1597488671541214, 0.15508510172367096, 0.002848779782652855, 0.006727630738168955, 0.01290579792112112, 0.0019038956379517913, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.22248251736164093, 0.03185709938406944, 0.000688861298840493, 0.005810217931866646, 0.007679672911763191, 0.0008787074475549161, 0.07858764380216599, 0.14273476600646973, 0.07306984066963196, 0.02433006465435028, 0.011720307171344757, 0.013396549038589, 0.017704129219055176, 0.034836068749427795, 0.1453055441379547, 0.1506490558385849, 0.0018329949816688895, 0.0011812039883807302, 0.010563074611127377, 0.0007367127691395581, 0.0007524989196099341, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.1531120240688324, 0.15391655266284943, 0.006810865830630064, 0.07720811665058136, 0.008951452560722828, 0.01149735413491726, 0.2822602391242981, 0.30408379435539246, 0.48283058404922485, 0.33028021454811096, 0.16095426678657532, 0.031167738139629364, 0.03355513513088226, 0.13962571322917938, 0.012790725566446781, 0.0463392436504364, 0.0861721858382225, 0.5342088341712952, 0.5262086987495422, 0.252642959356308, 0.014757110737264156, 0.02778990939259529, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.03593587130308151, 0.03233448788523674, 0.22662676870822906, 0.405829519033432, 0.014032814651727676, 0.02822977490723133, 0.09231841564178467, 0.1225365549325943, 0.20093639194965363, 0.2508411109447479, 0.5826555490493774, 0.037383783608675, 0.07952429354190826, 0.10720134526491165, 0.15212680399417877, 0.08082517981529236, 0.10121051222085953, 0.3481808602809906, 0.41374534368515015, 0.38359278440475464, 0.07890304177999496, 0.1096968874335289, 0.1685827672481537, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.037364520132541656, 0.04119153320789337, 0.0012645104434341192, 0.021537767723202705, 0.000536995125003159, 0.0011436643544584513, 0.019049961119890213, 0.06139632686972618, 0.385105162858963, 0.13276730477809906, 0.24771228432655334, 0.04952799528837204, 0.04911990836262703, 0.11973114311695099, 0.021608887240290642, 0.1433362513780594, 0.13670213520526886, 0.10138670355081558, 0.1093992069363594, 0.236768901348114, 0.09415888041257858, 0.011134332977235317, 0.019298367202281952, 0.5348934531211853, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.004867227748036385, 0.009626063518226147, 0.0003137234307359904, 0.0026314754504710436, 0.00027048110496252775, 0.000934475683607161, 0.007251756265759468, 0.03575620427727699, 0.40781450271606445, 0.05584407597780228, 0.040446195751428604, 0.005334825720638037, 0.007708138320595026, 0.06401336193084717, 0.010240204632282257, 0.024931270629167557, 0.02871265634894371, 0.20136752724647522, 0.1457405984401703, 0.13753218948841095, 0.13171687722206116, 0.07031083852052689, 0.04771474376320839, 0.5403124690055847, 0.04482616111636162, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.19358457624912262, 0.2328234314918518, 0.0017398587660863996, 0.10100623220205307, 0.0019695234950631857, 0.1674531251192093, 0.4513051509857178, 0.6547151803970337, 0.030009860172867775, 0.7025956511497498, 0.1685936599969864, 0.03178222477436066, 0.13270388543605804, 0.23426049947738647, 0.010277668945491314, 0.026511939242482185, 0.12058579176664352, 0.09381356090307236, 0.09726550430059433, 0.13490843772888184, 0.36408668756484985, 0.19949088990688324, 0.09435784071683884, 0.45831772685050964, 0.1274537742137909, 0.014095090329647064, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.09463346004486084, 0.5257620811462402, 0.0045187450014054775, 0.07222570478916168, 0.0025188177824020386, 0.1410406231880188, 0.06597349792718887, 0.0719805508852005, 0.09957849979400635, 0.17567123472690582, 0.18618373572826385, 0.02195402979850769, 0.042485080659389496, 0.12470933794975281, 0.00617468124255538, 0.12624163925647736, 0.03293433412909508, 0.07055910676717758, 0.06304988265037537, 0.23899653553962708, 0.15645378828048706, 0.07000429183244705, 0.02516351453959942, 0.06797400116920471, 0.07094329595565796, 0.1311238706111908, 0.21208471059799194, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.027796348556876183, 0.06599752604961395, 0.002643989399075508, 0.029425768181681633, 0.008861851878464222, 0.013279970735311508, 0.25377023220062256, 0.2656356692314148, 0.055540941655635834, 0.027583830058574677, 0.004816746339201927, 0.3890189528465271, 0.12020140886306763, 0.33882811665534973, 0.0040408894419670105, 0.1118171289563179, 0.015469676814973354, 0.08768722414970398, 0.046650953590869904, 0.23542486131191254, 0.09032069146633148, 0.05012429133057594, 0.004171812906861305, 0.15006321668624878, 0.017805932089686394, 0.049085501581430435, 0.035517167299985886, 0.6428134441375732, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.4147956669330597, 0.5514373779296875, 0.09636387228965759, 0.29775112867355347, 0.03436855599284172, 0.08799602836370468, 0.07023341208696365, 0.10276275128126144, 0.25543972849845886, 0.10302554070949554, 0.05857125297188759, 0.029829595237970352, 0.114840567111969, 0.33078575134277344, 0.07371985912322998, 0.09301143884658813, 0.13257478177547455, 0.1489255279302597, 0.18642880022525787, 0.318376362323761, 0.31357452273368835, 0.1382697969675064, 0.07457731664180756, 0.17392435669898987, 0.00920780934393406, 0.020603884011507034, 0.049020376056432724, 0.322329580783844, 0.3050764203071594, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.07031518220901489, 0.001305539975874126, 0.0025430582463741302, 0.010662226937711239, 0.0007357596186921, 0.000663888524286449, 0.0014398572966456413, 0.0005107407923787832, 0.005960140842944384, 0.0030986208003014326, 0.0017578504048287868, 0.00018377922242507339, 1.743367283779662e-05, 4.847845411859453e-05, 0.15638960897922516, 0.17444664239883423, 0.0007958812057040632, 5.6854176364140585e-05, 0.0004179355164524168, 0.00013179269444663078, 0.00024977640714496374, 0.0001107741700252518, 7.639485556865111e-05, 0.0008396806661039591, 0.00030287212575785816, 0.00023763117496855557, 0.003834246192127466, 0.003433886216953397, 0.00015348535089287907, 0.00014843019016552716, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.24421003460884094, 0.03331591188907623, 0.07573812454938889, 0.33240795135498047, 0.006838400848209858, 0.008697851561009884, 0.06428743898868561, 0.06466686725616455, 0.006176145281642675, 0.06394235789775848, 0.09260299056768417, 0.19959890842437744, 0.02154124155640602, 0.021672323346138, 0.15025706589221954, 0.00841783918440342, 0.03505324944853783, 0.02469123899936676, 0.026689309626817703, 0.1500382125377655, 0.08861804753541946, 0.006530162878334522, 0.060150377452373505, 0.04669034481048584, 0.007807246409356594, 0.02131708152592182, 0.012364925816655159, 0.041818197816610336, 0.02841370552778244, 0.6981374621391296, 0.06836962699890137, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.5462155342102051, 0.545982301235199, 0.3341628611087799, 0.5788259506225586, 0.08809857815504074, 0.06356553733348846, 0.022417092695832253, 0.0164126455783844, 0.00386660173535347, 0.10154324769973755, 0.14015790820121765, 0.0864240974187851, 0.34186482429504395, 0.22899740934371948, 0.05407746881246567, 0.0009672276792116463, 0.0037913541309535503, 0.00524782482534647, 0.006044968497008085, 0.07807419449090958, 0.026950905099511147, 0.0024354930501431227, 0.005482541862875223, 0.013836389407515526, 0.002816400956362486, 0.0006559633184224367, 0.002845867071300745, 0.018497759476304054, 0.19704575836658478, 0.41393977403640747, 0.4024144113063812, 0.00308317132294178, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.48888036608695984, 0.6578190326690674, 0.030819885432720184, 0.2205304652452469, 0.004883326590061188, 0.0656682699918747, 0.04461565986275673, 0.05094402655959129, 0.0005314986919984221, 0.15455113351345062, 0.10763049870729446, 0.1186080202460289, 0.14419804513454437, 0.1328149437904358, 0.09490374475717545, 0.0023347423411905766, 0.018236415460705757, 0.011423468589782715, 0.014267664402723312, 0.06272618472576141, 0.09006785601377487, 0.023437032476067543, 0.008957883343100548, 0.03532397374510765, 0.006200278177857399, 0.0002018583327298984, 0.016960909590125084, 0.04933774098753929, 0.1362536996603012, 0.47770828008651733, 0.5670948624610901, 0.06992122530937195, 0.03068283386528492, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.15812784433364868, 0.9118645191192627, 0.022590545937418938, 0.05952226370573044, 0.00360964541323483, 0.07875056564807892, 0.013187792152166367, 0.02020449750125408, 0.0020393244922161102, 0.033818699419498444, 0.0449705570936203, 0.02132066898047924, 0.0717315599322319, 0.12101268768310547, 0.06353376060724258, 0.0730348452925682, 0.024321116507053375, 0.06646358221769333, 0.0630527138710022, 0.23201428353786469, 0.1378810703754425, 0.04738042131066322, 0.010255109518766403, 0.0316733755171299, 0.07226394861936569, 0.06345586478710175, 0.13366159796714783, 0.1651405692100525, 0.1875276118516922, 0.475235253572464, 0.34701114892959595, 0.106105737388134, 0.17074023187160492, 0.14835108816623688, NaN, NaN, NaN, NaN, NaN, NaN], [0.07771441340446472, 0.4748976230621338, 0.012594498693943024, 0.043653786182403564, 0.006564431358128786, 0.024485116824507713, 0.20463299751281738, 0.1550481915473938, 0.0016144687542691827, 0.005543926265090704, 0.0017496985383331776, 0.3491710126399994, 0.23835937678813934, 0.3316482901573181, 0.08539295196533203, 0.1317213624715805, 0.02603350207209587, 0.05892709270119667, 0.02498493157327175, 0.2902502715587616, 0.11121267080307007, 0.057563167065382004, 0.004654969088733196, 0.12363925576210022, 0.02343585342168808, 0.03682887554168701, 0.054189957678318024, 0.5043657422065735, 0.23388440907001495, 0.46154457330703735, 0.32561513781547546, 0.055846668779850006, 0.06476935744285583, 0.026345595717430115, 0.5623452067375183, NaN, NaN, NaN, NaN, NaN], [0.22228576242923737, 0.3581831455230713, 0.10504736006259918, 0.2062736451625824, 0.015430409461259842, 0.007369442842900753, 0.009848481975495815, 0.0027359407395124435, 0.003257193835452199, 0.004766176920384169, 0.0058546122163534164, 0.0040231142193078995, 0.032162997871637344, 0.05548902228474617, 0.22239458560943604, 0.037178635597229004, 0.08259578794240952, 0.0920928493142128, 0.09107104688882828, 0.19359135627746582, 0.17535823583602905, 0.06819135695695877, 0.03716395050287247, 0.07458745688199997, 0.0064619481563568115, 0.009060872718691826, 0.02094256319105625, 0.1461041122674942, 0.11104261875152588, 0.6685899496078491, 0.4500047266483307, 0.029085516929626465, 0.03437849134206772, 0.03590574488043785, 0.20188003778457642, 0.23542997241020203, NaN, NaN, NaN, NaN], [0.040305208414793015, 0.0008039010572247207, 0.001399470493197441, 0.006614126265048981, 0.0003286598657723516, 0.0002559607964940369, 0.0005696980515494943, 0.00010972175368806347, 0.0006102611077949405, 0.0009710662416182458, 0.0004746906051877886, 5.0628168537514284e-05, 6.201828455232317e-06, 1.1841932064271532e-05, 0.15342259407043457, 0.18516498804092407, 0.0009336460498161614, 7.266629108926281e-05, 0.00041225351742468774, 0.00023152375069912523, 0.0002865330025088042, 0.00012637366307899356, 8.909442112781107e-05, 0.0006568549433723092, 0.0003727772564161569, 0.00021836791711393744, 0.0030449857003986835, 0.002062517451122403, 0.0001740154402796179, 0.00019746039470192045, 0.0010639599058777094, 3.738106170203537e-05, 0.00018948569777421653, 0.0017019548686221242, 0.0021623496431857347, 7.414143328787759e-05, 0.00010166682477574795, NaN, NaN, NaN], [0.18667390942573547, 0.05485990643501282, 0.06146723031997681, 0.2094709873199463, 0.003188095986843109, 0.005957009736448526, 0.04363764822483063, 0.02604665607213974, 0.0011390803847461939, 0.022857926785945892, 0.035827361047267914, 0.07732249796390533, 0.00673074834048748, 0.004807854071259499, 0.15350142121315002, 0.014717604033648968, 0.07327108085155487, 0.049021750688552856, 0.04824157431721687, 0.2509053647518158, 0.1518847495317459, 0.011399514973163605, 0.08240412920713425, 0.052963949739933014, 0.012185328640043736, 0.03166860342025757, 0.029948236420750618, 0.0332757867872715, 0.026646502315998077, 0.6691258549690247, 0.05157328397035599, 0.010373775847256184, 0.027277877554297447, 0.022091276943683624, 0.06386284530162811, 0.02213944122195244, 0.7486419677734375, 0.1026511937379837, NaN, NaN], [0.46625471115112305, 0.6644052863121033, 0.19963930547237396, 0.36004284024238586, 0.06144074350595474, 0.06362717598676682, 0.016601700335741043, 0.006137203890830278, 0.0020489897578954697, 0.041981395334005356, 0.042364589869976044, 0.04546959325671196, 0.25786423683166504, 0.1048446074128151, 0.10812478512525558, 0.0010381464380770922, 0.0033105257898569107, 0.005275417119264603, 0.005129440221935511, 0.05292869359254837, 0.018404772505164146, 0.0016328096389770508, 0.0039754449389874935, 0.007563540246337652, 0.0015294092008844018, 0.00038045260589569807, 0.0016144785331562161, 0.00974529329687357, 0.09415796399116516, 0.176291361451149, 0.35064396262168884, 0.0026081653777509928, 0.0026635529939085245, 0.004589376971125603, 0.028667066246271133, 0.20089752972126007, 0.45412325859069824, 0.4352543354034424, 0.005037708207964897, NaN], [0.01868601329624653, 0.08739857375621796, 0.016145089641213417, 0.000850466953124851, 0.0035631621722131968, 0.013478883542120457, 0.0006747889565303922, 0.0010685214074328542, 0.013735192827880383, 0.0029910006560385227, 0.017663421109318733, 0.0005569100612774491, 0.0335303470492363, 0.010939561761915684, 0.13854636251926422, 0.1408424973487854, 0.01142195239663124, 0.027654578909277916, 0.018255943432450294, 0.00871819257736206, 0.007302883546799421, 0.002508251927793026, 0.0010894191218540072, 0.002539109904319048, 0.0016572934109717607, 0.002274427330121398, 0.00915378425270319, 0.004932411015033722, 0.000505969044752419, 0.0064278775826096535, 0.013472460210323334, 0.0009905033512040973, 0.004150861874222755, 0.015419019386172295, 0.013300818391144276, 0.00147106999065727, 0.01399929728358984, 0.03311459720134735, 0.0035406623501330614, 0.008275571279227734]], [[0.3301994204521179, 0.08890271931886673, 0.08465498685836792, 0.06385943293571472, 0.21852104365825653, 0.02508896216750145, 0.03711355850100517, 0.034155964851379395, 0.1728704422712326, 0.06344152241945267, 0.01567375846207142, 0.047274719923734665, 0.023079151287674904, 0.06240373104810715, 0.17532315850257874, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.08584976941347122, 0.12593986093997955, 0.03313801810145378, 0.017280908301472664, 0.17652282118797302, 0.268716037273407, 0.12116961926221848, 0.2558431923389435, 0.04765854403376579, 0.04246087744832039, 0.0035840249620378017, 0.02463056705892086, 0.2119264155626297, 0.11800020188093185, 0.14393316209316254, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.046346988528966904, 0.39951857924461365, 0.5525277853012085, 0.10910754650831223, 0.13167327642440796, 0.030212268233299255, 0.021472660824656487, 0.018023721873760223, 0.1298973113298416, 0.04191790521144867, 0.1535157859325409, 0.04246748238801956, 0.3158371150493622, 0.15602277219295502, 0.1064835637807846, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0703379437327385, 0.07535148411989212, 0.05811825022101402, 0.428435742855072, 0.07080380618572235, 0.15123498439788818, 0.3036666214466095, 0.07787945121526718, 0.48052453994750977, 0.12286645174026489, 0.04789941385388374, 0.033336445689201355, 0.030469346791505814, 0.005462532863020897, 0.08732402324676514, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0663379579782486, 0.03187985718250275, 0.09551261365413666, 0.0323714055120945, 0.33827176690101624, 0.1471284031867981, 0.3127540946006775, 0.02734280750155449, 0.23260797560214996, 0.02317011170089245, 0.046465177088975906, 0.0992102101445198, 0.09175661206245422, 0.13314616680145264, 0.07444406300783157, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.034720633178949356, 0.01384154986590147, 0.012703170999884605, 0.020319687202572823, 0.10901976376771927, 0.7807050347328186, 0.03443336486816406, 0.028544975444674492, 0.061822760850191116, 0.00809338316321373, 0.007171421777456999, 0.01342758722603321, 0.09649696201086044, 0.05527613312005997, 0.10404697060585022, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.030445659533143044, 0.041789710521698, 0.023520270362496376, 0.01782963052392006, 0.16124852001667023, 0.06983006745576859, 0.4703807234764099, 0.01895260065793991, 0.027326058596372604, 0.07994905114173889, 0.026343191042542458, 0.032219063490629196, 0.022085823118686676, 0.031095484271645546, 0.24155765771865845, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.055046502500772476, 0.3847074508666992, 0.04798666015267372, 0.003912709187716246, 0.06840738654136658, 0.36789029836654663, 0.07226144522428513, 0.4079316258430481, 0.022340288385748863, 0.10408379882574081, 0.07774890959262848, 0.04753485694527626, 0.285355806350708, 0.16128498315811157, 0.02375940792262554, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.03513112664222717, 0.11586778610944748, 0.03034079447388649, 0.001017131027765572, 0.04634808376431465, 0.03800477832555771, 0.03768199309706688, 0.013300161808729172, 0.14031966030597687, 0.015252463519573212, 0.053176701068878174, 0.06856708973646164, 0.13856393098831177, 0.054046642035245895, 0.2367301732301712, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.025786809623241425, 0.06564735621213913, 0.039564721286296844, 0.0026341548655182123, 0.016324089840054512, 0.016701271757483482, 0.020613567903637886, 0.0767805427312851, 0.22950275242328644, 0.51694655418396, 0.1544727236032486, 0.1054847463965416, 0.025381706655025482, 0.05480813980102539, 0.1677880734205246, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.012255452573299408, 0.02410232275724411, 0.08552651852369308, 0.002623841166496277, 0.010307574644684792, 0.0127415731549263, 0.021285703405737877, 0.010095748119056225, 0.06661782413721085, 0.12517453730106354, 0.7383688688278198, 0.19885332882404327, 0.07497892528772354, 0.10072800517082214, 0.06182975694537163, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.2776626944541931, 0.046990759670734406, 0.032447993755340576, 0.015461347065865993, 0.08414210379123688, 0.04174359515309334, 0.19995476305484772, 0.013662091456353664, 0.019540153443813324, 0.048985805362463, 0.25616249442100525, 0.2484772503376007, 0.1799653023481369, 0.17696446180343628, 0.09890354424715042, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.05504303798079491, 0.08340897411108017, 0.04799877479672432, 0.017563870176672935, 0.028545444831252098, 0.1704884171485901, 0.030681313946843147, 0.02359093725681305, 0.007767115719616413, 0.019779905676841736, 0.03771185874938965, 0.029841119423508644, 0.28957709670066833, 0.04182300344109535, 0.12634176015853882, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.06153338775038719, 0.02491314895451069, 0.02542346529662609, 0.0031092099379748106, 0.03241894021630287, 0.1874629557132721, 0.1358277052640915, 0.02619485929608345, 0.017582973465323448, 0.03225348889827728, 0.01329810544848442, 0.026643214747309685, 0.1614912450313568, 0.6035103797912598, 0.09545250982046127, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.027727488428354263, 0.10283610969781876, 0.02349940501153469, 0.010801603086292744, 0.0136191351339221, 0.1518852412700653, 0.05784522369503975, 0.11107083410024643, 0.10270816832780838, 0.1666017472743988, 0.06030665338039398, 0.06198698654770851, 0.05951831862330437, 0.015173939988017082, 0.1310720145702362, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.03039383515715599, 0.011264979839324951, 0.30973049998283386, 0.33407092094421387, 0.24303670227527618, 0.013086382299661636, 0.12547586858272552, 0.047571711242198944, 0.07738520950078964, 0.2579103410243988, 0.13098950684070587, 0.3019145727157593, 0.018321001902222633, 0.10478901118040085, 0.1313871294260025, 0.125, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.32489657402038574, 0.01967906951904297, 0.10292623937129974, 0.18745845556259155, 0.06220339238643646, 0.03126899152994156, 0.030121171846985817, 0.013807957991957664, 0.01960192248225212, 0.10352540761232376, 0.08122410625219345, 0.11610747873783112, 0.05098450556397438, 0.06022121384739876, 0.24838198721408844, 0.10530310869216919, 0.47072935104370117, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.21547414362430573, 0.011987588368356228, 0.09540344774723053, 0.03949207067489624, 0.22973625361919403, 0.013393656350672245, 0.014646085910499096, 0.018391601741313934, 0.12483032047748566, 0.04761500656604767, 0.16838808357715607, 0.0500614158809185, 0.09093409031629562, 0.09172232449054718, 0.14920873939990997, 0.07470229268074036, 0.01594272069633007, 0.3473423421382904, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.3455514907836914, 0.20528344810009003, 0.14200778305530548, 0.1397678107023239, 0.3345029056072235, 0.04282815381884575, 0.020769812166690826, 0.02952164225280285, 0.29125186800956726, 0.09975660592317581, 0.3298649489879608, 0.36294782161712646, 0.10288939625024796, 0.1784013956785202, 0.03550736606121063, 0.19784890115261078, 0.02982909232378006, 0.008884507231414318, 0.026416730135679245, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.023072484880685806, 0.08888474851846695, 0.04328835755586624, 0.009794876910746098, 0.18984860181808472, 0.0009663040982559323, 0.0038235578685998917, 0.05101485177874565, 0.059323158115148544, 0.00876270979642868, 0.021391507238149643, 0.02426949329674244, 0.013026251457631588, 0.06840420514345169, 0.15691325068473816, 0.15099161863327026, 0.004257611930370331, 0.06880252063274384, 0.03778434172272682, 0.016005711629986763, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.20066522061824799, 0.18445545434951782, 0.10427504032850266, 0.02148139849305153, 0.3108636438846588, 0.0010669901967048645, 0.031332992017269135, 0.06621930748224258, 0.42585986852645874, 0.05703788995742798, 0.1919325739145279, 0.6617251038551331, 0.07196007668972015, 0.2038833349943161, 0.13549473881721497, 0.14908726513385773, 0.01576131209731102, 0.006129090208560228, 0.013888919726014137, 0.006888655014336109, 0.007033796049654484, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.06934618204832077, 0.15043997764587402, 0.24868465960025787, 0.0180400051176548, 0.61164391040802, 0.0047634197399020195, 0.0077652581967413425, 0.01316747348755598, 0.09036756306886673, 0.016214115545153618, 0.09484434872865677, 0.7773507833480835, 0.3649398386478424, 0.19880527257919312, 0.026039909571409225, 0.1207430437207222, 0.0697125568985939, 0.0065151299349963665, 0.0038357542362064123, 0.04419673979282379, 0.16196060180664062, 0.49751368165016174, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.5420496463775635, 0.775536835193634, 0.21455605328083038, 0.17522192001342773, 0.3905614912509918, 0.07102629542350769, 0.15213513374328613, 0.06534071266651154, 0.05938922241330147, 0.3742612600326538, 0.040289394557476044, 0.6919643878936768, 0.07523911446332932, 0.14220400154590607, 0.06588775664567947, 0.02684849314391613, 0.03953110799193382, 0.00281998747959733, 0.001733462675474584, 0.08529012650251389, 0.6486974358558655, 0.306731641292572, 0.07198647409677505, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.05002814158797264, 0.18039211630821228, 0.4788157641887665, 0.0970841720700264, 0.5287489891052246, 0.07699278742074966, 0.024560611695051193, 0.055294524878263474, 0.031155720353126526, 0.029308732599020004, 0.023515479639172554, 0.10280930250883102, 0.01905171573162079, 0.033789344131946564, 0.006217750255018473, 0.012395885773003101, 0.009238478727638721, 0.0003186498652212322, 0.0010813054395839572, 0.008392964489758015, 0.2777543067932129, 0.44055092334747314, 0.0011997584952041507, 0.00246741552837193, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.2326076328754425, 0.12470381706953049, 0.5816100239753723, 0.187625452876091, 0.17989297211170197, 0.58512943983078, 0.4148763120174408, 0.7688660621643066, 0.02497384324669838, 0.10204316675662994, 0.16508084535598755, 0.4722842574119568, 0.654721736907959, 0.31103214621543884, 0.02808636985719204, 0.034838397055864334, 0.015937600284814835, 0.002090656431391835, 0.002794815693050623, 0.008703295141458511, 0.10732896625995636, 0.4454900026321411, 0.001775766140781343, 0.0009654808673076332, 0.016644174233078957, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.32085803151130676, 0.3732209801673889, 0.8471049070358276, 0.2474840134382248, 0.8311324715614319, 0.1531035155057907, 0.14141014218330383, 0.12460694462060928, 0.15561653673648834, 0.05888388305902481, 0.03703024983406067, 0.2600737512111664, 0.049645353108644485, 0.08333000540733337, 0.053744472563266754, 0.293722003698349, 0.0148458918556571, 0.02856721729040146, 0.006315621547400951, 0.005582483485341072, 0.0013911855639889836, 0.004092940129339695, 0.0036679452750831842, 0.0010494120651856065, 0.016411608085036278, 0.023008037358522415, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.048572178930044174, 0.20163586735725403, 0.8568418025970459, 0.3438677489757538, 0.8764770030975342, 0.038519736379384995, 0.10765119642019272, 0.14438603818416595, 0.13915397226810455, 0.04139794409275055, 0.24816225469112396, 0.22188685834407806, 0.1582770049571991, 0.255889892578125, 0.05260627716779709, 0.13037414848804474, 0.020949387922883034, 0.03831411898136139, 0.007462172769010067, 0.02548721246421337, 0.006367610301822424, 0.008434200659394264, 0.010317808948457241, 0.003713584039360285, 0.00402417778968811, 0.19032441079616547, 0.26746228337287903, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.10717450082302094, 0.14654512703418732, 0.5492125749588013, 0.149112731218338, 0.6473506689071655, 0.014123019762337208, 0.023513145744800568, 0.06304500997066498, 0.5243880152702332, 0.17494699358940125, 0.11734810471534729, 0.2534768283367157, 0.06080847606062889, 0.1781260073184967, 0.01657547615468502, 0.041874390095472336, 0.024160701781511307, 0.00029624058515764773, 0.00016299582784995437, 0.00014630405348725617, 0.0004776908899657428, 0.0010664566652849317, 0.005874973721802235, 0.000636687153019011, 0.0013240330154076219, 0.0912160873413086, 0.35286882519721985, 0.01772063784301281, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.024022793397307396, 0.20128284394741058, 0.39493197202682495, 0.16542883217334747, 0.7724959254264832, 0.05353498458862305, 0.039175428450107574, 0.21511156857013702, 0.10924636572599411, 0.3127569556236267, 0.20907098054885864, 0.6610769033432007, 0.026550091803073883, 0.07443477213382721, 0.04747246578335762, 0.11822566390037537, 0.015047432854771614, 0.019423136487603188, 0.00686526857316494, 0.0036870460025966167, 0.00022719512344338, 0.002930518239736557, 0.025171050801873207, 0.005165010690689087, 0.05391281098127365, 0.11512911319732666, 0.07776232063770294, 0.2967449426651001, 0.09380093216896057, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.0639173686504364, 0.0019661476835608482, 0.03054100275039673, 0.07290788739919662, 0.07458660751581192, 0.0017515828367322683, 0.01338117104023695, 0.0049591753631830215, 0.10895326733589172, 0.03256915882229805, 0.07470867037773132, 0.022291045635938644, 0.00026081688702106476, 0.003768018214032054, 0.15579301118850708, 0.09375648200511932, 0.01475021056830883, 0.012638024985790253, 0.0046005831100046635, 0.051909249275922775, 0.0036223391070961952, 0.004371740389615297, 0.009388775564730167, 0.01159447617828846, 0.023305783048272133, 0.046531662344932556, 0.058873143047094345, 0.07503876090049744, 0.0337555818259716, 0.30213212966918945, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.00809751357883215, 0.08670660853385925, 0.12165205925703049, 0.06173386052250862, 0.8110419511795044, 0.006245153024792671, 0.03447260707616806, 0.08050490915775299, 0.779870867729187, 0.2479465901851654, 0.38426774740219116, 0.6870184540748596, 0.2310730367898941, 0.07155610620975494, 0.05814361199736595, 0.060409948229789734, 0.03445665165781975, 0.000381257850676775, 0.0036348046269267797, 0.0002713070425670594, 0.0011815812904387712, 0.03030458651483059, 0.03435760363936424, 0.0019682012498378754, 0.00901943538337946, 0.2363511621952057, 0.7836493253707886, 0.05375572293996811, 0.0010517562041059136, 0.002096510259434581, 0.017742546275258064, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.01971210353076458, 0.10859540849924088, 0.17558348178863525, 0.04931360110640526, 0.4077165424823761, 0.001824796199798584, 0.004386546555906534, 0.0422598272562027, 0.9374924302101135, 0.3226373493671417, 0.06322266161441803, 0.05341457948088646, 0.0039883931167423725, 0.004304073750972748, 0.13460686802864075, 0.19913224875926971, 0.17475517094135284, 0.0022224360145628452, 0.015882516279816628, 0.001058473251760006, 0.0005846276762895286, 0.02601638250052929, 0.037341512739658356, 0.002062901621684432, 0.01394632738083601, 0.062121838331222534, 0.09270716458559036, 0.13391432166099548, 0.011137665249407291, 0.003502808278426528, 0.007463122718036175, 0.4640289545059204, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.018049566075205803, 0.12295468151569366, 0.24470828473567963, 0.04122815281152725, 0.7332677245140076, 0.004472800530493259, 0.0029204280581325293, 0.018685931339859962, 0.4878760874271393, 0.20441682636737823, 0.08441592752933502, 0.4205068051815033, 0.04466289281845093, 0.13263334333896637, 0.0994158536195755, 0.33059969544410706, 0.017222048714756966, 0.029873082414269447, 0.008054245263338089, 0.002331576542928815, 0.0006345488945953548, 0.011296147480607033, 0.005269323009997606, 0.0004991231253370643, 0.01808379590511322, 0.0023433570750057697, 0.0409514382481575, 0.01219080574810505, 0.010968736372888088, 0.004035044461488724, 0.000618473335634917, 0.01301309373229742, 0.04461785778403282, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.007120466325432062, 0.02300306409597397, 0.2714575231075287, 0.07745856046676636, 0.6446666717529297, 0.0059507740661501884, 0.011145476251840591, 0.13244189321994781, 0.38060593605041504, 0.06726288050413132, 0.22673718631267548, 0.3522229492664337, 0.17927831411361694, 0.524927020072937, 0.09379637986421585, 0.11787470430135727, 0.013379373587667942, 0.03657921776175499, 0.007838133722543716, 0.006328434217721224, 0.0013346761697903275, 0.005374525673687458, 0.005563441663980484, 0.0013783610193058848, 0.003622437361627817, 0.10895299166440964, 0.17491653561592102, 0.013411260209977627, 0.006658618804067373, 0.013080593198537827, 0.0013389869127422571, 0.03540230169892311, 0.3923792839050293, 0.2429211437702179, NaN, NaN, NaN, NaN, NaN, NaN], [0.03649899363517761, 0.08160936087369919, 0.2519805133342743, 0.07504414021968842, 0.1795702874660492, 0.006024391856044531, 0.0073743402026593685, 0.061968039721250534, 0.7520835995674133, 0.28517279028892517, 0.1493321657180786, 0.3589819371700287, 0.04636238142848015, 0.16408585011959076, 0.046330999583005905, 0.03099578432738781, 0.01363852247595787, 8.312943100463599e-05, 4.0873743273550645e-05, 3.1056373700266704e-05, 8.971957868197933e-05, 0.0004970009904354811, 0.0021136843133717775, 0.00015606316446792334, 0.0008045462891459465, 0.029241982847452164, 0.24120952188968658, 0.011327153071761131, 0.006169632077217102, 0.004105421248823404, 0.0017298789462074637, 0.09891722351312637, 0.13539430499076843, 0.3545337915420532, 0.03266340494155884, NaN, NaN, NaN, NaN, NaN], [0.009416425600647926, 0.1558573991060257, 0.15325002372264862, 0.08311447501182556, 0.6221630573272705, 0.0029961667023599148, 0.006436231546103954, 0.027678541839122772, 0.2543543577194214, 0.47390833497047424, 0.28851544857025146, 0.6220062375068665, 0.014266690239310265, 0.05054754391312599, 0.0578170008957386, 0.05892227217555046, 0.006390280555933714, 0.00726453959941864, 0.002730957930907607, 0.0007821861072443426, 5.8160956541541964e-05, 0.0015625637024641037, 0.007388831116259098, 0.0016573512693867087, 0.027249574661254883, 0.062049947679042816, 0.056622181087732315, 0.2355845421552658, 0.04601869359612465, 0.006218506023287773, 0.00966239720582962, 0.07739637047052383, 0.4012998342514038, 0.09626632183790207, 0.38049787282943726, 0.10569068044424057, NaN, NaN, NaN, NaN], [0.04693470522761345, 0.0011674511479213834, 0.01364858541637659, 0.06039872020483017, 0.0427468940615654, 0.0009404723532497883, 0.007858873344957829, 0.0028007859364151955, 0.06382106244564056, 0.03982963413000107, 0.05175205320119858, 0.011254650540649891, 0.0001272865483770147, 0.001588277518749237, 0.15313954651355743, 0.09179559350013733, 0.00951253343373537, 0.010748236440122128, 0.0033872865606099367, 0.04677930101752281, 0.0018132117111235857, 0.0035809800028800964, 0.005968866869807243, 0.0062707834877073765, 0.02606387436389923, 0.033457815647125244, 0.03605461120605469, 0.04817588999867439, 0.03754975646734238, 0.2781437933444977, 0.015551367774605751, 0.2560427486896515, 0.08298799395561218, 0.06865174323320389, 0.12361031025648117, 0.04344068095088005, 0.28463616967201233, NaN, NaN, NaN], [0.017768997699022293, 0.1465732455253601, 0.15898801386356354, 0.12304693460464478, 0.8442554473876953, 0.006285809446126223, 0.04204265773296356, 0.12739135324954987, 0.8276333808898926, 0.5079721808433533, 0.5299316644668579, 0.8274551630020142, 0.09790517389774323, 0.02651425078511238, 0.11435628682374954, 0.02905191108584404, 0.012088212184607983, 0.00011298860044917092, 0.0012518719304352999, 4.317293132771738e-05, 0.0001948956778505817, 0.008923283778131008, 0.008874665014445782, 0.00048750368296168745, 0.0041984752751886845, 0.08557221293449402, 0.46109655499458313, 0.018593793734908104, 0.0004841866611968726, 0.0006005582981742918, 0.004410868044942617, 0.1617877185344696, 0.2815479040145874, 0.7414005398750305, 0.06452517956495285, 0.0009642028599046171, 0.0012653517769649625, 0.012943175621330738, NaN, NaN], [0.017107579857110977, 0.05770094692707062, 0.07052541524171829, 0.059498131275177, 0.2613165080547333, 0.0009367912425659597, 0.0028308003675192595, 0.01869240775704384, 0.8671534061431885, 0.40041688084602356, 0.03947103023529053, 0.0349445715546608, 0.00177917187102139, 0.002164072822779417, 0.1562660187482834, 0.1381005197763443, 0.0952477678656578, 0.0011117071844637394, 0.007693122606724501, 0.0001761779421940446, 8.233776316046715e-05, 0.0067709037102758884, 0.015442474745213985, 0.0005836034542880952, 0.005857429001480341, 0.020792629569768906, 0.02682901732623577, 0.05164036154747009, 0.0043857707642018795, 0.0008507486782036722, 0.004215322434902191, 0.19233396649360657, 0.21357974410057068, 0.14138071238994598, 0.12764914333820343, 0.011541306972503662, 0.001996394479647279, 0.004979089833796024, 0.4768531322479248, NaN], [0.006599111016839743, 0.004138579126447439, 0.06047067046165466, 0.013185898773372173, 0.15347044169902802, 0.000755132467020303, 0.007522573694586754, 0.002741254400461912, 0.10833818465471268, 0.005474736914038658, 0.009540018625557423, 0.00040286476723849773, 0.004092549905180931, 0.002003892557695508, 0.13896189630031586, 0.14079369604587555, 0.0077750058844685555, 0.008707624860107899, 0.002215370535850525, 0.0003697987995110452, 8.685041393619031e-05, 6.568676326423883e-05, 0.0005928067839704454, 0.00018151948461309075, 0.0013713521184399724, 0.003134837606921792, 0.004530616104602814, 0.0021016064565628767, 0.0014590725768357515, 0.01743447594344616, 0.0004639088874682784, 0.00557903666049242, 0.015868593007326126, 0.012156624346971512, 0.006375743541866541, 0.004486390855163336, 0.037133798003196716, 0.0008373309392482042, 0.015209782868623734, 0.053904592990875244]]], [[[0.042950913310050964, 0.0007196685182861984, 0.027302199974656105, 0.006393556483089924, 0.09642192721366882, 0.01637418009340763, 0.0023990001063793898, 0.0024961719755083323, 0.0020593979861587286, 0.0015603104839101434, 0.03318732604384422, 0.35782966017723083, 0.0989728793501854, 0.061845745891332626, 0.203965961933136, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.10955026745796204, 0.02388770505785942, 0.04351670667529106, 0.023162608966231346, 0.012142845429480076, 0.035775765776634216, 0.03457501530647278, 0.11992064118385315, 0.01240380760282278, 0.007506475783884525, 0.05337386205792427, 0.6535924673080444, 0.5536571145057678, 0.19680790603160858, 0.140446737408638, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.005947283003479242, 0.0010204642312601209, 0.18009734153747559, 0.006447697523981333, 0.012463629245758057, 7.613956404384226e-05, 7.241032290039584e-05, 0.00011841111700050533, 0.0034185522235929966, 0.0034766956232488155, 0.002135018352419138, 0.005925178527832031, 0.003751354990527034, 0.0019247139571234584, 0.28479355573654175, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.014483454637229443, 0.022866876795887947, 0.32726621627807617, 0.007662326563149691, 0.09431912004947662, 0.0004296264669392258, 0.0011131323408335447, 0.0014158609556034207, 0.018019702285528183, 0.01865016296505928, 0.0020740600302815437, 0.0029411758296191692, 0.0016890126280486584, 0.0063899424858391285, 0.12852828204631805, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.030419446527957916, 0.058438073843717575, 0.3924228250980377, 0.035587672144174576, 0.08137891441583633, 0.010925069451332092, 0.001356365391984582, 0.0012006007600575686, 0.053269751369953156, 0.0027948038186877966, 0.04010261595249176, 0.01993635483086109, 0.004820133093744516, 0.004111820366233587, 0.21765674650669098, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.07767480611801147, 0.006269918289035559, 0.09326869994401932, 0.6196063756942749, 0.11043263971805573, 0.052975643426179886, 0.02037718892097473, 0.0008919782703742385, 0.008360025472939014, 0.002104781800881028, 0.0179440937936306, 0.10498880594968796, 0.011864815838634968, 0.002359954407438636, 0.24602332711219788, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.00026913435431197286, 8.159392746165395e-05, 0.007915529422461987, 0.05068095400929451, 0.6570689678192139, 0.32081079483032227, 0.05758208408951759, 0.0006442792946472764, 0.0015821922570466995, 6.469202344305813e-05, 0.003034515306353569, 0.0310077928006649, 0.025656316429376602, 0.0025228438898921013, 0.023106882348656654, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0005435149651020765, 0.0005490019102580845, 0.034476928412914276, 0.01287262886762619, 0.25229769945144653, 0.4536571502685547, 0.10281822830438614, 0.012222280725836754, 0.016108570620417595, 0.00031008716905489564, 0.0026372161228209734, 0.0034134499728679657, 0.0248859953135252, 0.017225822433829308, 0.02475895546376705, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.000726195692550391, 0.00036735343746840954, 0.007114858832210302, 0.0026034389156848192, 0.01250846590846777, 0.009484091773629189, 0.0354158952832222, 0.0016834242269396782, 0.19215336441993713, 0.007594457361847162, 0.003938279580324888, 2.8376112823025323e-05, 0.001137340790592134, 0.00011368053674232215, 0.29228782653808594, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0005387092242017388, 0.0003453432582318783, 0.015091696754097939, 0.06184916943311691, 0.003162123030051589, 0.014056581072509289, 0.012467358261346817, 0.009164737537503242, 0.05548334866762161, 0.008076494559645653, 0.005971547681838274, 0.001972777536138892, 0.006774900481104851, 0.001264052465558052, 0.2362799048423767, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0025044670328497887, 0.0023456772323697805, 0.07385681569576263, 0.006188494618982077, 0.021690815687179565, 0.0007893598522059619, 0.002135526854544878, 0.006048245821148157, 0.25190338492393494, 0.09442908316850662, 0.19532348215579987, 0.031008923426270485, 0.009561427868902683, 0.0021240306086838245, 0.21234139800071716, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.015501828864216805, 0.0072255814447999, 0.006012998055666685, 0.008203291334211826, 0.0171041339635849, 0.001770812552422285, 0.00655776634812355, 0.002186145167797804, 0.15154685080051422, 0.5713958144187927, 0.05368567630648613, 0.051326390355825424, 0.01612916588783264, 0.0019418209558352828, 0.18746227025985718, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.05876695737242699, 0.005032649263739586, 0.05515526235103607, 0.012789947912096977, 0.017388533800840378, 0.00580496434122324, 0.015462081879377365, 0.009339934214949608, 0.0222479198127985, 0.03960718587040901, 0.14906688034534454, 0.2817051410675049, 0.14850065112113953, 0.09505022317171097, 0.10619710385799408, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.012425977736711502, 0.0006452641100622714, 0.00298808584921062, 0.001349467202089727, 0.014642779715359211, 0.0010115096811205149, 0.0033098396379500628, 0.00038259345456026495, 0.0035037249326705933, 0.008293021470308304, 0.03801131248474121, 0.8317341208457947, 0.018821584060788155, 0.057542454451322556, 0.011905365623533726, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.04682805389165878, 0.01908799074590206, 0.10485747456550598, 0.060083843767642975, 0.15075230598449707, 0.029059063643217087, 0.04093548655509949, 0.03368941321969032, 0.017014725133776665, 0.011203174479305744, 0.0391479916870594, 0.24882012605667114, 0.37940239906311035, 0.12485622614622116, 0.12782400846481323, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.010830877348780632, 0.011870973743498325, 0.10922139137983322, 0.013140714727342129, 0.060979437083005905, 0.24213501811027527, 0.056873127818107605, 0.0565403513610363, 0.1606917381286621, 0.004471848253160715, 0.04391508549451828, 0.16444265842437744, 0.14521700143814087, 0.12183647602796555, 0.18165212869644165, 0.125, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.1442122757434845, 0.026047294959425926, 0.4262431859970093, 0.3211715519428253, 0.7946609258651733, 0.48857852816581726, 0.31943926215171814, 0.3322535455226898, 0.8442224860191345, 0.37700119614601135, 0.4491288661956787, 0.725179135799408, 0.5425247550010681, 0.7077597379684448, 0.47353750467300415, 0.12363631278276443, 0.14845161139965057, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.004308484960347414, 0.0038143862038850784, 0.01376394834369421, 0.007213444449007511, 0.0352218858897686, 0.009065943770110607, 0.00796457938849926, 0.009648038074374199, 0.012818497605621815, 0.005304576829075813, 0.00578665267676115, 0.025514552369713783, 0.003588201943784952, 0.005116589833050966, 0.1385156214237213, 0.14363405108451843, 0.021847352385520935, 0.10135873407125473, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.37350767850875854, 0.33144617080688477, 0.1264321357011795, 0.21400198340415955, 0.32627996802330017, 0.09132378548383713, 0.05067773535847664, 0.05911920592188835, 0.47554144263267517, 0.5285797715187073, 0.055136121809482574, 0.07909779250621796, 0.0048016151413321495, 0.023815851658582687, 0.05086187273263931, 0.13959342241287231, 0.059129536151885986, 0.04632453992962837, 0.0506979376077652, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.026979738846421242, 0.17144815623760223, 0.016802728176116943, 0.011190843768417835, 0.05719228833913803, 0.006600439548492432, 0.02541169337928295, 0.056367360055446625, 0.2566111385822296, 0.13847731053829193, 0.02390860766172409, 0.10821771621704102, 0.004193281754851341, 0.024024199694395065, 0.1485961675643921, 0.1401052325963974, 0.20328059792518616, 0.08711162209510803, 0.021569250151515007, 0.06437158584594727, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.010539665818214417, 0.02736317366361618, 0.020729688927531242, 0.012272891588509083, 0.037458207458257675, 0.020133765414357185, 0.006475721951574087, 0.0135318823158741, 0.14018985629081726, 0.043190933763980865, 0.014518915675580502, 0.06027117371559143, 0.013409063220024109, 0.008036705665290356, 0.12864065170288086, 0.14849096536636353, 0.24162742495536804, 0.13733072578907013, 0.023916935548186302, 0.4261094033718109, 0.034874048084020615, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.06693296134471893, 0.05517994612455368, 0.31718623638153076, 0.09396946430206299, 0.13595829904079437, 0.09244473278522491, 0.0043823812156915665, 0.004134675953537226, 0.9252469539642334, 0.10048755258321762, 0.12945091724395752, 0.21572811901569366, 0.034586720168590546, 0.0726432204246521, 0.04207848384976387, 0.1122843325138092, 0.27548718452453613, 0.3164171576499939, 0.11597670614719391, 0.521038293838501, 0.1305568367242813, 0.04802507162094116, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.07686225324869156, 0.019675375893712044, 0.2417416274547577, 0.08641211688518524, 0.27890217304229736, 0.038729339838027954, 0.01047417800873518, 0.015033761039376259, 0.4832261800765991, 0.05870191380381584, 0.2969569265842438, 0.6193534731864929, 0.12871475517749786, 0.22289764881134033, 0.5152896642684937, 0.13016629219055176, 0.2326299250125885, 0.3132029175758362, 0.32591310143470764, 0.1516764611005783, 0.09795279055833817, 0.02053435519337654, 0.1865263283252716, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.27357029914855957, 0.46676310896873474, 0.3964380621910095, 0.19407758116722107, 0.11257106065750122, 0.014855606481432915, 0.047355495393276215, 0.03237777575850487, 0.3466991186141968, 0.3347361087799072, 0.40522828698158264, 0.5460160970687866, 0.16927282512187958, 0.30020883679389954, 0.04839835315942764, 0.121080182492733, 0.4840172827243805, 0.47487083077430725, 0.3000609576702118, 0.5299880504608154, 0.09183567762374878, 0.057097259908914566, 0.12967270612716675, 0.04215369373559952, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.03550037741661072, 0.12907657027244568, 0.07532694190740585, 0.016156595200300217, 0.003630127990618348, 0.01967703178524971, 0.04095811769366264, 0.0179570484906435, 0.39472800493240356, 0.07661326229572296, 0.4370958209037781, 0.4819755256175995, 0.022724222391843796, 0.033822834491729736, 0.04362141340970993, 0.08035996556282043, 0.5049515962600708, 0.21779249608516693, 0.22551923990249634, 0.48642098903656006, 0.17451445758342743, 0.14853931963443756, 0.2973877787590027, 0.02990546263754368, 0.12922555208206177, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.021909046918153763, 0.030848275870084763, 0.046106528490781784, 0.06202828511595726, 0.0325893796980381, 0.03412875533103943, 0.03159455209970474, 0.053456224501132965, 0.16627800464630127, 0.058593228459358215, 0.13071225583553314, 0.20816291868686676, 0.06561117619276047, 0.04416830837726593, 0.03868245705962181, 0.15412510931491852, 0.24815845489501953, 0.21706829965114594, 0.15909965336322784, 0.3919820487499237, 0.2097313106060028, 0.05961627885699272, 0.10788830369710922, 0.04644578695297241, 0.008778278715908527, 0.1666601300239563, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.012810717336833477, 0.0013835412682965398, 0.03224228695034981, 0.08643268793821335, 0.03331959247589111, 0.030278367921710014, 0.07819522172212601, 0.03789946064352989, 0.1521843820810318, 0.04584735259413719, 0.022775838151574135, 0.3594759702682495, 0.37505412101745605, 0.4203481376171112, 0.0833948627114296, 0.1319347769021988, 0.07332690805196762, 0.3709748387336731, 0.10343886911869049, 0.2416648119688034, 0.273651659488678, 0.142499178647995, 0.032821010798215866, 0.08169299364089966, 0.04221141338348389, 0.04960552975535393, 0.14849121868610382, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.12084313482046127, 0.009313090704381466, 0.17649081349372864, 0.125856414437294, 0.03634244203567505, 0.028733352199196815, 0.006864639464765787, 0.002353896852582693, 0.16829386353492737, 0.1124483197927475, 0.061692144721746445, 0.19240431487560272, 0.09329058974981308, 0.18641597032546997, 0.018957242369651794, 0.15117543935775757, 0.09085448831319809, 0.23665060102939606, 0.09974268078804016, 0.5293540358543396, 0.2969721853733063, 0.0923411101102829, 0.04701923578977585, 0.47750627994537354, 0.31436240673065186, 0.11817371100187302, 0.08098391443490982, 0.05702001228928566, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.026597192510962486, 0.005893908906728029, 0.12369649112224579, 0.06400194019079208, 0.07115989178419113, 0.0058293454349040985, 0.008344992063939571, 0.00957680307328701, 0.04244829714298248, 0.036994293332099915, 0.07189996540546417, 0.04466360807418823, 0.12661096453666687, 0.2742233872413635, 0.042464204132556915, 0.2022491842508316, 0.0666579008102417, 0.032761361449956894, 0.03407268971204758, 0.3113752603530884, 0.5905517935752869, 0.21839523315429688, 0.043745849281549454, 0.02789805829524994, 0.042396336793899536, 0.08724991232156754, 0.07408890873193741, 0.010044119320809841, 0.12108539044857025, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.0012156351003795862, 0.0009695529006421566, 0.021633058786392212, 0.003243132960051298, 0.017804604023694992, 0.006560572423040867, 0.00960883591324091, 0.043045539408922195, 0.008467147126793861, 0.0006170565611682832, 0.0028031598776578903, 0.004630656447261572, 1.7895566998049617e-05, 0.00023196694382932037, 0.14134538173675537, 0.14857184886932373, 0.38842764496803284, 0.16100677847862244, 0.1839173436164856, 0.03719957172870636, 0.5251989364624023, 0.25831982493400574, 0.06345110386610031, 0.01966739259660244, 0.013820506632328033, 0.10135386884212494, 0.06285497546195984, 0.037499457597732544, 0.09235794097185135, 0.06518241763114929, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.3736850321292877, 0.29077818989753723, 0.43184730410575867, 0.4823248088359833, 0.7379603385925293, 0.5093098282814026, 0.5006043910980225, 0.3135696351528168, 0.5183887481689453, 0.13794882595539093, 0.04961319640278816, 0.12779268622398376, 0.1589212864637375, 0.22346213459968567, 0.1422436237335205, 0.15810954570770264, 0.08897967636585236, 0.2754043936729431, 0.11542505025863647, 0.7166418433189392, 0.6856120824813843, 0.15602687001228333, 0.03588242083787918, 0.10233978182077408, 0.06907100230455399, 0.13906386494636536, 0.06064911186695099, 0.02474391460418701, 0.09316151589155197, 0.5409220457077026, 0.18577302992343903, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.15325459837913513, 0.1614270806312561, 0.4186149537563324, 0.16462315618991852, 0.44647181034088135, 0.7114150524139404, 0.12785741686820984, 0.04132780805230141, 0.047578196972608566, 0.12349404394626617, 0.3133608400821686, 0.35326144099235535, 0.30924320220947266, 0.31196898221969604, 0.028064150363206863, 0.07972963899374008, 0.06995329260826111, 0.2565014958381653, 0.11985079944133759, 0.5429201126098633, 0.3072132468223572, 0.04467121511697769, 0.06233014911413193, 0.06391221284866333, 0.06306523084640503, 0.04008801653981209, 0.16940940916538239, 0.21208623051643372, 0.3237960636615753, 0.4987465739250183, 0.14530567824840546, 0.42085787653923035, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.06399086862802505, 0.06306004524230957, 0.1948489397764206, 0.12845031917095184, 0.26295408606529236, 0.38098499178886414, 0.0839061513543129, 0.02110268920660019, 0.07144157588481903, 0.01679118163883686, 0.14834797382354736, 0.479995995759964, 0.24741992354393005, 0.2288939356803894, 0.04729384183883667, 0.057688161730766296, 0.05957844480872154, 0.09227755665779114, 0.06308872997760773, 0.6051628589630127, 0.41719216108322144, 0.06513097882270813, 0.11441777646541595, 0.2576654255390167, 0.039566945284605026, 0.04989808052778244, 0.41204503178596497, 0.6269510388374329, 0.0653882622718811, 0.2309982180595398, 0.05030554160475731, 0.12162061780691147, 0.2016562819480896, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.041305530816316605, 0.00217662681825459, 0.29091107845306396, 0.12698692083358765, 0.3031243085861206, 0.1103614866733551, 0.14891935884952545, 0.018863126635551453, 0.033797744661569595, 0.008303376846015453, 0.009713392704725266, 0.31765925884246826, 0.4755025804042816, 0.4005468487739563, 0.10761724412441254, 0.08513950556516647, 0.05776134505867958, 0.44855204224586487, 0.15441171824932098, 0.37962910532951355, 0.43142464756965637, 0.21386101841926575, 0.07478547096252441, 0.22071515023708344, 0.1727379858493805, 0.06471506506204605, 0.1414414495229721, 0.20356127619743347, 0.23849359154701233, 0.28116941452026367, 0.22387196123600006, 0.24124523997306824, 0.10411572456359863, 0.14086224138736725, NaN, NaN, NaN, NaN, NaN, NaN], [0.4954506754875183, 0.04642331227660179, 0.603453516960144, 0.26468321681022644, 0.3210473358631134, 0.15078485012054443, 0.027168329805135727, 0.004181328695267439, 0.10826757550239563, 0.10845811665058136, 0.053085505962371826, 0.20335085690021515, 0.12072784453630447, 0.17107200622558594, 0.059424202889204025, 0.09857918322086334, 0.08268877118825912, 0.17155912518501282, 0.08326277136802673, 0.3910389840602875, 0.23102693259716034, 0.0706368237733841, 0.04062340036034584, 0.34264665842056274, 0.40400993824005127, 0.14310938119888306, 0.07597656548023224, 0.059025220572948456, 0.46083009243011475, 0.6441643834114075, 0.8002472519874573, 0.34466618299484253, 0.10859531164169312, 0.04317509010434151, 0.042760394513607025, NaN, NaN, NaN, NaN, NaN], [0.21408557891845703, 0.03960772231221199, 0.43507251143455505, 0.10961537808179855, 0.42240580916404724, 0.06637464463710785, 0.08428787440061569, 0.03856734186410904, 0.0027873425278812647, 0.012926235795021057, 0.019708000123500824, 0.017574653029441833, 0.10679914057254791, 0.20499441027641296, 0.14648839831352234, 0.07982634007930756, 0.027687683701515198, 0.01305405143648386, 0.01568622700870037, 0.15395750105381012, 0.36470726132392883, 0.09429053217172623, 0.02618592418730259, 0.00988653302192688, 0.03718657046556473, 0.057223062962293625, 0.036843542009592056, 0.008861655369400978, 0.039983998984098434, 0.5628355145454407, 0.5858935713768005, 0.11540589481592178, 0.07112369686365128, 0.022479010745882988, 0.0049066911451518536, 0.07443748414516449, NaN, NaN, NaN, NaN], [0.002137779025360942, 0.0005492505733855069, 0.03787382319569588, 0.004300523083657026, 0.03090864233672619, 0.003432363970205188, 0.010591491125524044, 0.028211969882249832, 0.003533262060955167, 0.0003883022291120142, 0.0014010752784088254, 0.0010855919681489468, 8.133743904181756e-06, 7.628504681633785e-05, 0.13786831498146057, 0.13230623304843903, 0.39635705947875977, 0.12619565427303314, 0.23844560980796814, 0.04749276116490364, 0.5552228093147278, 0.304650217294693, 0.16151569783687592, 0.05923860892653465, 0.03940735384821892, 0.37161606550216675, 0.13852664828300476, 0.1098584458231926, 0.421970933675766, 0.059641290456056595, 0.35413044691085815, 0.2336989790201187, 0.21869167685508728, 0.04408164322376251, 0.03093402087688446, 0.08392708003520966, 0.038801465183496475, NaN, NaN, NaN], [0.39364972710609436, 0.15414100885391235, 0.5289453864097595, 0.2158767729997635, 0.8369554877281189, 0.5879349708557129, 0.29191306233406067, 0.1240038275718689, 0.0375535674393177, 0.006134674418717623, 0.003127586329355836, 0.02892274223268032, 0.023530103266239166, 0.026029296219348907, 0.16074688732624054, 0.06938444077968597, 0.08034616708755493, 0.1555827558040619, 0.07347460091114044, 0.4763748347759247, 0.40589335560798645, 0.07265187799930573, 0.022002995014190674, 0.0527057945728302, 0.07314148545265198, 0.11090734601020813, 0.03504399210214615, 0.0172868762165308, 0.14030121266841888, 0.3467526137828827, 0.21038202941417694, 0.6312639117240906, 0.1208876520395279, 0.020520374178886414, 0.014591614715754986, 0.03736459091305733, 0.22129306197166443, 0.05682671070098877, NaN, NaN], [0.2684386968612671, 0.29252222180366516, 0.6921796798706055, 0.1771971732378006, 0.6445736885070801, 0.7333542704582214, 0.14767038822174072, 0.04686985909938812, 0.030383678153157234, 0.06000908464193344, 0.1879548877477646, 0.5258318781852722, 0.3533342778682709, 0.3370157778263092, 0.05586722865700722, 0.08218587934970856, 0.08353152126073837, 0.244074746966362, 0.15340235829353333, 0.5709766745567322, 0.4268343448638916, 0.06391507387161255, 0.13458560407161713, 0.14046461880207062, 0.13024689257144928, 0.043825987726449966, 0.1802380084991455, 0.2593124508857727, 0.4235299825668335, 0.23401854932308197, 0.23376718163490295, 0.4458163380622864, 0.1644086241722107, 0.22351105511188507, 0.25077733397483826, 0.28149890899658203, 0.3320602774620056, 0.05098887160420418, 0.4388013482093811, NaN], [0.0015460141003131866, 0.010688474401831627, 0.09971211850643158, 0.017146917060017586, 0.1899741291999817, 0.03437719866633415, 0.022833971306681633, 0.015900788828730583, 0.05731913447380066, 0.0008445536368526518, 0.0073861475102603436, 0.06343144923448563, 0.11084617674350739, 0.11975067108869553, 0.13715405762195587, 0.13887250423431396, 0.1972966492176056, 0.3352757692337036, 0.30585116147994995, 0.6380553841590881, 0.5158089995384216, 0.3850407004356384, 0.3912012279033661, 0.2877788245677948, 0.30187875032424927, 0.20025724172592163, 0.34020906686782837, 0.47167572379112244, 0.3815076947212219, 0.5385518074035645, 0.20663535594940186, 0.37741178274154663, 0.29376763105392456, 0.3577961027622223, 0.21765607595443726, 0.14290691912174225, 0.3544510304927826, 0.07646653801202774, 0.1391337811946869, 0.019570577889680862]], [[0.010500228963792324, 0.7224081754684448, 0.030353030189871788, 0.00683749420568347, 0.007232841569930315, 0.018554184585809708, 0.0004432629211805761, 0.02719983458518982, 0.0006519495509564877, 0.0012597806053236127, 0.006804677192121744, 0.0011734187137335539, 0.003679303452372551, 0.010371293872594833, 0.019012004137039185, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0004097823693882674, 0.007568135391920805, 0.05432860180735588, 0.08570658415555954, 0.005480978172272444, 0.0009473124518990517, 0.000799189496319741, 0.0012391285272315145, 0.00044785221689380705, 0.0009745006100274622, 0.013956908136606216, 0.00011593959061428905, 0.004404959734529257, 0.0031790253706276417, 0.20507724583148956, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.022728245705366135, 0.0194535069167614, 0.024020839482545853, 0.023168254643678665, 0.45748311281204224, 0.5855799913406372, 0.21754446625709534, 0.1001717820763588, 0.0221620611846447, 0.0033511894289404154, 0.03508710116147995, 0.20201759040355682, 0.2973189353942871, 0.04947788640856743, 0.0494859553873539, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.010499863885343075, 0.004784405697137117, 0.0035181313287466764, 0.007238015066832304, 0.4155227243900299, 0.8333501219749451, 0.07475034892559052, 0.20445603132247925, 0.005854693241417408, 0.001852003508247435, 0.02841898612678051, 0.243921160697937, 0.10275343060493469, 0.13816815614700317, 0.07406751066446304, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.00768234534189105, 0.012151399627327919, 0.0006104251369833946, 0.0018971813842654228, 0.08389636874198914, 0.7291921973228455, 0.2573831081390381, 0.13359335064888, 0.0011000150116160512, 0.0005446228897199035, 0.036390628665685654, 0.06110000237822533, 0.1527252048254013, 0.14593005180358887, 0.05624886974692345, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0037335127126425505, 0.004452059045433998, 0.00018280810036230832, 0.016856878995895386, 0.0016014263965189457, 0.05306785926222801, 0.5318921208381653, 0.2889253497123718, 0.0004385874199215323, 0.007465890143066645, 0.0005691659171134233, 0.008836256340146065, 0.00793292187154293, 0.0033322598319500685, 0.1706118881702423, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.00023320072796195745, 0.0486629419028759, 0.0005405444535426795, 0.005952970590442419, 0.0009982762858271599, 0.004001363180577755, 0.009125707671046257, 0.6945337057113647, 0.006549985148012638, 0.007807720452547073, 0.003924727905541658, 0.004149672109633684, 0.003537258366122842, 0.001676861196756363, 0.11541670560836792, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0021667596884071827, 0.0005287157837301493, 0.009149480611085892, 0.024324318394064903, 0.0018866003956645727, 0.0003624066011980176, 0.0004668526817113161, 0.0064473398961126804, 0.0217228215187788, 0.0031395854894071817, 0.0052951243706047535, 0.004629157949239016, 0.003511544084176421, 0.0017145106103271246, 0.2705381214618683, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0036477160174399614, 0.018601393327116966, 0.00400471780449152, 0.016223786398768425, 0.015442389994859695, 0.030637366697192192, 0.04816145822405815, 0.009263478219509125, 0.08580432087182999, 0.07024423778057098, 0.17587034404277802, 0.2670482397079468, 0.10741393268108368, 0.11723090708255768, 0.197556272149086, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0067135002464056015, 0.005400336813181639, 0.002429268090054393, 0.0005210567032918334, 0.0009090648964047432, 0.056922394782304764, 0.006305574905127287, 0.02051912061870098, 0.009087055921554565, 0.0029723523184657097, 0.5903128385543823, 0.4623943269252777, 0.5148944854736328, 0.10147220641374588, 0.10177940130233765, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.016283290460705757, 0.004236595239490271, 0.00024049253261182457, 0.00013081195356789976, 0.004825976211577654, 0.03370611369609833, 0.030076656490564346, 0.006495397537946701, 0.015585500746965408, 0.0006116450531408191, 0.009124655276536942, 0.7220618724822998, 0.5160555839538574, 0.16948190331459045, 0.04205150157213211, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.04056651145219803, 0.05449386313557625, 0.007923644036054611, 0.00034379694261588156, 0.0072999089024960995, 0.005707062315195799, 0.018278487026691437, 0.00924981851130724, 0.0004191468469798565, 0.0015566512010991573, 0.0019580996595323086, 0.06517467647790909, 0.4938390851020813, 0.1360015720129013, 0.14540629088878632, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.02595147117972374, 0.0358305424451828, 0.021912503987550735, 0.01559682097285986, 0.0029425774700939655, 0.008820675313472748, 0.259022980928421, 0.24083182215690613, 0.0008326273527927697, 0.009937180206179619, 0.008380424231290817, 0.0008840225636959076, 0.11912944912910461, 0.5976794362068176, 0.17433230578899384, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.024576334282755852, 0.01131413970142603, 0.0036256120074540377, 0.007047882303595543, 0.015460383147001266, 0.007877636700868607, 0.035456594079732895, 0.017273712903261185, 0.0020541276317089796, 0.005268692504614592, 0.003138576401397586, 0.0058868261985480785, 0.09279357641935349, 0.45485755801200867, 0.2460370808839798, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.02016485668718815, 0.03839857131242752, 0.0345035195350647, 0.005700604524463415, 0.03111962042748928, 0.03698137030005455, 0.056010663509368896, 0.043163470923900604, 0.004449993837624788, 0.000997284660115838, 0.006035848520696163, 0.0027079761493951082, 0.009604639373719692, 0.02099894918501377, 0.13394789397716522, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.021257108077406883, 0.04756314679980278, 0.05559564009308815, 0.030912479385733604, 0.2625647187232971, 0.138688862323761, 0.027820995077490807, 0.05787678435444832, 0.3002224862575531, 0.018701573833823204, 0.027547171339392662, 0.19844435155391693, 0.1917300671339035, 0.07151354849338531, 0.16648255288600922, 0.125, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.4235764741897583, 0.10086580365896225, 0.07221788167953491, 0.13654322922229767, 0.04923773929476738, 0.06516944617033005, 0.07642015814781189, 0.147566020488739, 0.013325832784175873, 0.07923475652933121, 0.03588176146149635, 0.02368854358792305, 0.12847480177879333, 0.04384613409638405, 0.18713882565498352, 0.10658828914165497, 0.44162610173225403, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.8895729184150696, 0.7431688904762268, 0.3041851818561554, 0.5492796897888184, 0.7013789415359497, 0.2035668045282364, 0.4541507959365845, 0.17740322649478912, 0.37418368458747864, 0.7257221937179565, 0.3302299678325653, 0.32646968960762024, 0.4535413682460785, 0.2710181474685669, 0.06444819271564484, 0.14346696436405182, 0.1105659008026123, 0.04705679044127464, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.18918083608150482, 0.07354198396205902, 0.03709281235933304, 0.039312511682510376, 0.2119109183549881, 0.32255253195762634, 0.06547961384057999, 0.022612132132053375, 0.0069438498467206955, 0.04682554677128792, 0.04775600507855415, 0.10260774195194244, 0.060122229158878326, 0.07651683688163757, 0.11037445813417435, 0.14569434523582458, 0.006359750870615244, 0.06321832537651062, 0.009962446056306362, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.05778415873646736, 0.1888784021139145, 0.12087801843881607, 0.08340981602668762, 0.2725185453891754, 0.956253707408905, 0.6455949544906616, 0.6532288789749146, 0.3585406243801117, 0.18532338738441467, 0.18782632052898407, 0.09142936766147614, 0.8097347617149353, 0.3558001220226288, 0.037162330001592636, 0.14614860713481903, 0.0770370289683342, 0.14572308957576752, 0.11918944120407104, 0.003047030884772539, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.04896414652466774, 0.25620371103286743, 0.11985385417938232, 0.0157163105905056, 0.14219185709953308, 0.22957918047904968, 0.36173656582832336, 0.07001917064189911, 0.3676673173904419, 0.12105175852775574, 0.22853095829486847, 0.07480601221323013, 0.5630075335502625, 0.8219463229179382, 0.12425509095191956, 0.16211360692977905, 0.1199408695101738, 0.008137544617056847, 0.026895001530647278, 0.022997038438916206, 0.0004772362008225173, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.04714362695813179, 0.01630709134042263, 0.04501143842935562, 0.03696214035153389, 0.036871057003736496, 0.14248797297477722, 0.08399422466754913, 0.03027486614882946, 0.0030259382911026478, 0.019033554941415787, 0.2224818617105484, 0.033125121146440506, 0.02079186774790287, 0.04913722351193428, 0.46250322461128235, 0.1276824176311493, 0.05415544658899307, 0.008876973763108253, 0.006533092353492975, 0.16286829113960266, 0.4191088378429413, 0.11241274327039719, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.033912286162376404, 0.0072718155570328236, 0.013269636780023575, 0.010754123330116272, 0.003932052757591009, 0.022333307191729546, 0.05135813727974892, 0.17082874476909637, 0.004249163903295994, 0.009168761782348156, 0.00692910747602582, 0.00042953240335918963, 0.008801857940852642, 0.008872170932590961, 0.02866899035871029, 0.1310766041278839, 0.09720440953969955, 0.005617472343146801, 0.018550021573901176, 0.07474999874830246, 0.03211009502410889, 0.01561786886304617, 0.5897646546363831, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.026226887479424477, 0.006219716742634773, 0.016528652980923653, 0.019500089809298515, 0.009756595827639103, 0.01771577261388302, 0.10877248644828796, 0.07924166321754456, 0.026382839307188988, 0.007807224057614803, 0.018975039944052696, 0.009491248056292534, 0.042680755257606506, 0.025040525943040848, 0.31068748235702515, 0.07142644375562668, 0.019657818600535393, 0.044225241988897324, 0.006672952324151993, 0.015112369321286678, 0.03715437650680542, 0.012035970576107502, 0.08684496581554413, 0.5578015446662903, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.0181743074208498, 0.0022439020685851574, 0.027739310637116432, 0.07926302403211594, 0.007397042121738195, 0.01831221394240856, 0.057637136429548264, 0.025927647948265076, 0.03431807458400726, 0.03189869597554207, 0.20874466001987457, 0.006929311901330948, 0.08810199052095413, 0.09789149463176727, 0.25120988488197327, 0.06384367495775223, 0.009399783797562122, 0.06692944467067719, 0.013825987465679646, 0.01438650768250227, 0.11814092099666595, 0.025182364508509636, 0.04756484180688858, 0.4922580420970917, 0.010614832863211632, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.0006848929915577173, 0.00015734595945104957, 0.0022563491947948933, 0.00281638465821743, 0.00390908308327198, 0.012311742641031742, 0.006667551584541798, 0.010898235253989697, 0.18826207518577576, 0.0010989188449457288, 0.003811799455434084, 0.0007082286756485701, 0.0025871950201690197, 0.0005297476891428232, 0.004719105549156666, 0.21570175886154175, 0.004600263200700283, 0.0039491499774158, 0.0010213260538876057, 0.00511409854516387, 0.00780195789411664, 0.0035460677463561296, 0.06005942076444626, 0.002209970960393548, 0.0011990047059953213, 0.010184505954384804, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.008918036706745625, 0.01932302489876747, 0.1743663251399994, 0.04276113957166672, 0.17357498407363892, 0.05217360332608223, 0.01903947815299034, 0.006896412931382656, 0.02532179281115532, 0.019349897280335426, 0.14434273540973663, 0.2454780638217926, 0.06247624009847641, 0.03444024175405502, 0.2827233076095581, 0.15804870426654816, 0.10358668118715286, 0.018792977556586266, 0.0036350360605865717, 0.02226737141609192, 0.007843486964702606, 0.002713214373216033, 0.3624168336391449, 0.00397031893953681, 0.013842551037669182, 0.05391863361001015, 0.040338534861803055, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.014348846860229969, 0.006216275505721569, 0.06011093780398369, 0.05047134682536125, 0.013856974430382252, 0.08402124047279358, 0.0029483914840966463, 0.0018935499247163534, 0.004232283215969801, 0.022591279819607735, 0.34387707710266113, 0.06330335885286331, 0.20501238107681274, 0.1859048306941986, 0.0244001317769289, 0.0703621581196785, 0.01676221750676632, 0.03283774480223656, 0.005265639629215002, 0.016811830922961235, 0.008307189680635929, 0.0008217993890866637, 0.06662888079881668, 0.006444453727453947, 0.0015952866524457932, 0.03341786190867424, 0.28674793243408203, 0.09830270707607269, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.016000788658857346, 0.003648907644674182, 0.07618206739425659, 0.26581478118896484, 0.00828572828322649, 0.01491115428507328, 0.006984202191233635, 0.00572665361687541, 0.007784067187458277, 0.03336494415998459, 0.19996345043182373, 0.0026567107997834682, 0.14645317196846008, 0.1677580624818802, 0.0739188864827156, 0.00274313404224813, 0.01220498327165842, 0.001565106911584735, 0.014617281965911388, 0.0015394951915368438, 0.00014163085143081844, 0.0032730719540268183, 0.04253724217414856, 0.01929563470184803, 0.0011092370841652155, 0.008900013752281666, 0.14250728487968445, 0.44352540373802185, 0.012739983387291431, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.033913157880306244, 0.5720782279968262, 0.09820353239774704, 0.06329890340566635, 0.10058190673589706, 0.8026418685913086, 0.08380495011806488, 0.37448471784591675, 0.04885341227054596, 0.01422097533941269, 0.32552391290664673, 0.701602578163147, 0.9988673329353333, 0.9602208137512207, 0.015194611623883247, 0.12441921979188919, 0.09727630764245987, 0.031539320945739746, 0.0390433706343174, 0.004017204977571964, 0.003718326799571514, 0.06902258098125458, 0.21229486167430878, 0.1692674309015274, 0.507585346698761, 0.24224399030208588, 0.4713107943534851, 0.22175242006778717, 0.1071210727095604, 0.001354279462248087, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.01701497472822666, 0.004510161932557821, 0.04222021996974945, 0.131240576505661, 0.007172171492129564, 0.0009335885988548398, 0.0025300730485469103, 0.0012859954731538892, 0.013300590217113495, 0.05520036071538925, 0.2908037602901459, 0.0021335158962756395, 0.11976832151412964, 0.046004947274923325, 0.029495948925614357, 0.11131177842617035, 0.045754965394735336, 0.13187335431575775, 0.021390099078416824, 0.2008819729089737, 0.1753949522972107, 0.029810786247253418, 0.1191062182188034, 0.0330519825220108, 0.021209293976426125, 0.007793682627379894, 0.004569755867123604, 0.21031485497951508, 0.08390634506940842, 0.11696453392505646, 0.2920413017272949, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.0007848403765819967, 0.002563882153481245, 0.003471110016107559, 0.009534057229757309, 0.012083875946700573, 0.006908607203513384, 0.0028729254845529795, 0.0018324146512895823, 0.009593485854566097, 0.008395246230065823, 0.009609236381947994, 0.05064208433032036, 0.00595981115475297, 0.002902570180594921, 0.2071433663368225, 0.28942060470581055, 0.004874760750681162, 0.02575746178627014, 0.03629674017429352, 0.0339069589972496, 0.06067432835698128, 0.06949229538440704, 0.17600718140602112, 0.04042575880885124, 0.0021073101088404655, 0.002125136088579893, 0.0013297069817781448, 0.013164625503122807, 0.019647862762212753, 0.0625171884894371, 0.003036472015082836, 0.15673543512821198, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.008253121748566628, 0.01393465232104063, 0.03316362947225571, 0.045629892498254776, 0.015712177380919456, 0.15894818305969238, 0.02510240487754345, 0.013996893540024757, 0.6886083483695984, 0.014645315706729889, 0.04062162712216377, 0.02812274731695652, 0.10265076905488968, 0.10770027339458466, 0.07716524600982666, 0.29843398928642273, 0.006499151699244976, 0.002175502711907029, 0.00474061444401741, 0.012194045819342136, 0.024305779486894608, 0.05332900583744049, 0.20892387628555298, 0.06725459545850754, 0.0056669809855520725, 0.023831704631447792, 0.0038352743722498417, 0.008001168258488178, 0.00692057004198432, 0.006051996257156134, 0.0008782879449427128, 0.0244371946901083, 0.05294432491064072, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.0017006727866828442, 0.008613905869424343, 0.08540165424346924, 0.014788517728447914, 0.11802737414836884, 0.058780014514923096, 0.008085138164460659, 0.003584004705771804, 0.06396479159593582, 0.006658769678324461, 0.02042919024825096, 0.3806440234184265, 0.01375669613480568, 0.01512871216982603, 0.1676391214132309, 0.19362471997737885, 0.05030333995819092, 0.012831996195018291, 0.0028119448106735945, 0.011659904383122921, 0.0070129260420799255, 0.002673238283023238, 0.1857692450284958, 0.0015845311572775245, 0.003893241984769702, 0.009055504575371742, 0.013083641417324543, 0.009338575415313244, 0.007860029116272926, 0.009482803754508495, 0.019751103594899178, 0.03845033049583435, 0.03947525471448898, 0.03009573556482792, NaN, NaN, NaN, NaN, NaN, NaN], [0.017164628952741623, 0.028738657012581825, 0.06823595613241196, 0.08604145050048828, 0.04855107143521309, 0.24198594689369202, 0.008688676171004772, 0.003311790293082595, 0.059665460139513016, 0.08214288204908371, 0.34741461277008057, 0.15404720604419708, 0.18822570145130157, 0.19501997530460358, 0.062469229102134705, 0.08181142061948776, 0.013090993277728558, 0.025600923225283623, 0.0045991819351911545, 0.007844633422791958, 0.0066622160375118256, 0.0006054755649529397, 0.01805841363966465, 0.0025927021633833647, 0.0006796378293074667, 0.012531430460512638, 0.18806973099708557, 0.04688132554292679, 0.005460845306515694, 0.053047653287649155, 0.013497358188033104, 0.040136244148015976, 0.022071214392781258, 0.31691932678222656, 0.07654344290494919, NaN, NaN, NaN, NaN, NaN], [0.04490135982632637, 0.02318926900625229, 0.15967297554016113, 0.36984479427337646, 0.027114713564515114, 0.1867561787366867, 0.04668368771672249, 0.02171866036951542, 0.05653616786003113, 0.08818016946315765, 0.14142879843711853, 0.002535451203584671, 0.06232175603508949, 0.12099058926105499, 0.16113655269145966, 0.003571689361706376, 0.007330529857426882, 0.0009176949388347566, 0.011351491324603558, 0.0005700239562429488, 0.0001114286933443509, 0.0023790227714926004, 0.011217805556952953, 0.004490875173360109, 0.00038650527130812407, 0.0025467458181083202, 0.048559535294771194, 0.22723886370658875, 0.0019670024048537016, 0.0002542402071412653, 0.027445662766695023, 0.015111691318452358, 0.029036840423941612, 0.2144545316696167, 0.4208240211009979, 0.013829981908202171, NaN, NaN, NaN, NaN], [0.07898441702127457, 0.817236065864563, 0.29267793893814087, 0.16063392162322998, 0.31295838952064514, 0.9265751838684082, 0.1967003047466278, 0.5436303615570068, 0.2332589328289032, 0.04864489659667015, 0.5440958142280579, 0.8931991457939148, 0.9993566870689392, 0.9798612594604492, 0.03687797114253044, 0.11162849515676498, 0.06633912026882172, 0.017337389290332794, 0.030477523803710938, 0.0024834000505506992, 0.001867939718067646, 0.03932232782244682, 0.1628599613904953, 0.14192035794258118, 0.2944621741771698, 0.21811458468437195, 0.42557209730148315, 0.2638176381587982, 0.14630424976348877, 0.0005040403339080513, 0.32521945238113403, 0.2411627173423767, 0.28287336230278015, 0.40539565682411194, 0.1682160645723343, 0.08244442939758301, 0.001218001707457006, NaN, NaN, NaN], [0.051174335181713104, 0.009388554841279984, 0.15813162922859192, 0.3707107603549957, 0.02142486348748207, 0.01361497025936842, 0.01679075136780739, 0.00489152641966939, 0.08238242566585541, 0.07653495669364929, 0.14888693392276764, 0.003932347521185875, 0.1416105329990387, 0.05760091543197632, 0.13266737759113312, 0.20973265171051025, 0.07712213695049286, 0.20427735149860382, 0.025535617023706436, 0.4053865373134613, 0.41131824254989624, 0.030548784881830215, 0.060146916657686234, 0.012079673819243908, 0.01592317223548889, 0.0048461491242051125, 0.0021770852617919445, 0.09957096725702286, 0.1170588806271553, 0.13386258482933044, 0.16141492128372192, 0.004613581579178572, 0.015190798789262772, 0.003683852730318904, 0.1389266699552536, 0.07006954401731491, 0.1815212517976761, 0.17825333774089813, NaN, NaN], [0.00042274355655536056, 0.0019217034569010139, 0.0013128711143508554, 0.004135955590754747, 0.004101510625332594, 0.004091422073543072, 0.0013299065176397562, 0.0007323773461394012, 0.006002569571137428, 0.003528070170432329, 0.004258603788912296, 0.04385730251669884, 0.006557406857609749, 0.0025679266545921564, 0.1728060394525528, 0.3360293209552765, 0.0046190484426915646, 0.024437543004751205, 0.03736568242311478, 0.023848971351981163, 0.05927197262644768, 0.0542423352599144, 0.09209144860506058, 0.023972967639565468, 0.000766670098528266, 0.0006589474505744874, 0.0007115502958185971, 0.00637162895873189, 0.012912634760141373, 0.014624576084315777, 0.0019432539120316505, 0.05897590517997742, 0.0038116518408060074, 0.0016802565660327673, 0.011611220426857471, 0.025170182809233665, 0.04455949738621712, 0.0020357028115540743, 0.14134161174297333, NaN], [0.0034927180968225002, 0.014745223335921764, 0.025302981957793236, 0.04650698974728584, 0.0658985823392868, 0.10278132557868958, 0.009682145901024342, 0.010841106064617634, 0.1757735013961792, 0.03157021477818489, 0.006062814965844154, 0.2611170709133148, 0.3153221011161804, 0.08490109443664551, 0.13624651730060577, 0.187117338180542, 0.005916869733482599, 0.020901108160614967, 0.0559980571269989, 0.0324174202978611, 0.008547084406018257, 0.044511571526527405, 0.04880741238594055, 0.05289075896143913, 0.038245368748903275, 0.003611604683101177, 0.002279189880937338, 0.01790045015513897, 0.008863909170031548, 0.01127588003873825, 0.005861865822225809, 0.17173975706100464, 0.009364882484078407, 0.005221609957516193, 0.012455414980649948, 0.007264893501996994, 0.016177698969841003, 0.008824422955513, 0.18642237782478333, 0.0006185321253724396]], [[0.11855445802211761, 0.018203705549240112, 0.014699782244861126, 0.005997231230139732, 0.012317956425249577, 0.005482070613652468, 0.020501872524619102, 0.04173066467046738, 0.028033137321472168, 0.007907108403742313, 0.13633504509925842, 0.11779958009719849, 0.02402079664170742, 0.08686818182468414, 0.19919154047966003, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.015789268538355827, 0.07802969217300415, 0.024552250280976295, 0.007203033193945885, 0.015197299420833588, 0.0086579704657197, 0.005928180180490017, 0.015956610441207886, 0.019966211169958115, 0.002508557867258787, 0.048071712255477905, 0.0452260747551918, 0.027286410331726074, 0.034357864409685135, 0.19209280610084534, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.7560696601867676, 0.09646204113960266, 0.24264514446258545, 0.03150765225291252, 0.15196740627288818, 0.027980739250779152, 0.025865402072668076, 0.037002913653850555, 0.02429634891450405, 0.014392002485692501, 0.11331582069396973, 0.2883520722389221, 0.24113057553768158, 0.5529852509498596, 0.13967400789260864, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.6593953371047974, 0.14735713601112366, 0.007992099039256573, 0.03938791900873184, 0.047611087560653687, 0.002478603972122073, 0.00756214139983058, 0.01120123453438282, 0.017771385610103607, 0.011085578240454197, 0.01766165718436241, 0.07185176759958267, 0.01590064913034439, 0.05699647217988968, 0.22524236142635345, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.8214750289916992, 0.5506035089492798, 0.04117008298635483, 0.00517136137932539, 0.5628769993782043, 0.013714980334043503, 0.018153639510273933, 0.019494647160172462, 0.02796507254242897, 0.003693098435178399, 0.052905939519405365, 0.024033749476075172, 0.017759546637535095, 0.154443621635437, 0.2181331366300583, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.47579920291900635, 0.4996025860309601, 0.02201933227479458, 0.032786499708890915, 0.003352785250172019, 0.402157723903656, 0.028392860665917397, 0.03425603359937668, 0.017302367836236954, 0.007774383760988712, 0.03628184646368027, 0.015436487272381783, 0.09682580828666687, 0.09163853526115417, 0.1807471215724945, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.6324970722198486, 0.5132108926773071, 0.14723047614097595, 0.10531618446111679, 0.14770705997943878, 0.01965152472257614, 0.16446776688098907, 0.023718399927020073, 0.014144167304039001, 0.003392518265172839, 0.03989372402429581, 0.048702552914619446, 0.05385157838463783, 0.06003360450267792, 0.2021118402481079, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.2804942727088928, 0.4447323679924011, 0.40719398856163025, 0.15280602872371674, 0.5485119223594666, 0.006256175693124533, 0.005905789323151112, 0.0894087627530098, 0.014159541577100754, 0.0037697115913033485, 0.08780182898044586, 0.04568948596715927, 0.08344046771526337, 0.08309336006641388, 0.1791403889656067, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.38668709993362427, 0.3767029941082001, 0.5765653848648071, 0.14457443356513977, 0.830109715461731, 0.558448314666748, 0.2105703204870224, 0.015437009744346142, 0.0802588015794754, 0.0035789015237241983, 0.009509528055787086, 0.011719968169927597, 0.04601259157061577, 0.015442220494151115, 0.02989899180829525, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.42374563217163086, 0.4557475447654724, 0.5995064973831177, 0.22240440547466278, 0.8298278450965881, 0.26192477345466614, 0.5618261694908142, 0.2755923569202423, 0.03321446478366852, 0.014314521104097366, 0.030895033851265907, 0.0061126528307795525, 0.0033166268840432167, 0.0021476708352565765, 0.12580153346061707, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.4742293357849121, 0.32335561513900757, 0.5931060910224915, 0.0772920548915863, 0.3757626712322235, 0.211185023188591, 0.42018893361091614, 0.37329575419425964, 0.26276469230651855, 0.012583179399371147, 0.3317490220069885, 0.002885210793465376, 0.011435287073254585, 0.00757939275354147, 0.1435183733701706, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.21439705789089203, 0.17853425443172455, 0.32548797130584717, 0.06489395350217819, 0.64824378490448, 0.1159982681274414, 0.19616922736167908, 0.27417391538619995, 0.6047332286834717, 0.1810707151889801, 0.034782104194164276, 0.10310898721218109, 0.0316632017493248, 0.025309519842267036, 0.09833981841802597, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.19860051572322845, 0.10174965113401413, 0.08606765419244766, 0.053267233073711395, 0.11251617968082428, 0.2378872036933899, 0.16651752591133118, 0.1490997076034546, 0.4605393707752228, 0.18029887974262238, 0.1883857697248459, 0.007075145840644836, 0.25310245156288147, 0.08171047270298004, 0.15088772773742676, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.2976968586444855, 0.21286718547344208, 0.04716610535979271, 0.025928588584065437, 0.1317281424999237, 0.12927810847759247, 0.2939497232437134, 0.23276808857917786, 0.5986261367797852, 0.05386120826005936, 0.05668044835329056, 0.025143466889858246, 0.007965278811752796, 0.03647890314459801, 0.16275253891944885, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.34472423791885376, 0.33325105905532837, 0.5841152667999268, 0.8456752300262451, 0.4377557933330536, 0.4159393310546875, 0.33224907517433167, 0.1488359123468399, 0.2203720510005951, 0.7425854206085205, 0.7086009383201599, 0.5293036699295044, 0.2777566909790039, 0.22530661523342133, 0.09936152398586273, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.01888529770076275, 0.5547894835472107, 0.0062187607400119305, 0.02304725907742977, 0.007431741803884506, 0.05333258956670761, 0.13557927310466766, 0.09608769416809082, 0.011193820275366306, 0.006900292821228504, 0.007560353726148605, 0.018807610496878624, 0.018169475719332695, 0.07717052102088928, 0.1439915895462036, 0.125, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.045791856944561005, 0.14471176266670227, 0.057932548224925995, 0.15441685914993286, 0.011981116607785225, 0.030152589082717896, 0.13976308703422546, 0.003811573376879096, 0.010053272359073162, 0.1557283103466034, 0.05080341920256615, 0.00967743806540966, 0.003085661679506302, 0.003445286303758621, 0.08783376961946487, 0.12484697252511978, 0.1276315450668335, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.010936958715319633, 0.0031021125614643097, 0.009866965003311634, 0.09017129242420197, 0.02775183692574501, 0.0016267865430563688, 0.01958146132528782, 0.003049993421882391, 0.009465858340263367, 0.022049162536859512, 0.013875926844775677, 0.002902107546105981, 0.0008567434852011502, 0.0034160439390689135, 0.13799139857292175, 0.15841424465179443, 0.03031034581363201, 0.02654799446463585, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.10994840413331985, 0.15032780170440674, 0.0035718681756407022, 0.1491042822599411, 0.020450405776500702, 0.013510379940271378, 0.47067153453826904, 0.6447877883911133, 0.18023402988910675, 0.1876010298728943, 0.011866661719977856, 0.006677938625216484, 0.0005242988117970526, 0.004238110035657883, 0.29615819454193115, 0.13769303262233734, 0.09575259685516357, 0.025977646932005882, 0.052591271698474884, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.06992093473672867, 0.2791251242160797, 0.006900451611727476, 0.053067900240421295, 0.010168666951358318, 0.0023874202743172646, 0.05137968435883522, 0.06462283432483673, 0.11192043125629425, 0.10690896213054657, 0.009735661558806896, 0.04335656389594078, 0.0031411510426551104, 0.011707558296620846, 0.14929862320423126, 0.15085087716579437, 0.15096567571163177, 0.09222358465194702, 0.028469638898968697, 0.0012114758137613535, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.24040630459785461, 0.43853774666786194, 0.0175826046615839, 0.06282828748226166, 0.03055599145591259, 0.20223812758922577, 0.5439046025276184, 0.8139520287513733, 0.30283859372138977, 0.4911571145057678, 0.09772597998380661, 0.1337594985961914, 0.08667796850204468, 0.03606351464986801, 0.12256386131048203, 0.16431185603141785, 0.07204771786928177, 0.05053501948714256, 0.012478960677981377, 0.05114812031388283, 0.00039714027661830187, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.03999294713139534, 0.1864590346813202, 0.003897173795849085, 0.04184543341398239, 0.0012414547381922603, 0.025941016152501106, 0.05348599702119827, 0.5434274673461914, 0.012460692785680294, 0.31306707859039307, 0.06930337846279144, 0.0021947044879198074, 0.023592861369252205, 0.04260588437318802, 0.01969532109797001, 0.1666734665632248, 0.06891340762376785, 0.013632094487547874, 0.018171580508351326, 0.002599227475002408, 0.0009873181115835905, 0.0006481229793280363, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.053744781762361526, 0.006899113766849041, 0.0563664473593235, 0.12695427238941193, 0.012777185067534447, 0.08455551415681839, 0.11441048979759216, 0.13062608242034912, 0.19371363520622253, 0.6254263520240784, 0.24294114112854004, 0.020724456757307053, 0.019838949665427208, 0.022365091368556023, 0.1131007969379425, 0.14423918724060059, 0.12251336872577667, 0.10176724940538406, 0.33380815386772156, 0.1583750993013382, 0.023372141644358635, 0.026839546859264374, 0.06730155646800995, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.11661048978567123, 0.35882315039634705, 0.03118491731584072, 0.06881216168403625, 0.014698721468448639, 0.0038598491810262203, 0.1485612690448761, 0.39066970348358154, 0.07792866975069046, 0.22571811079978943, 0.040231697261333466, 0.265895277261734, 0.2000368982553482, 0.1125464141368866, 0.24931347370147705, 0.2790219187736511, 0.15446610748767853, 0.015893638134002686, 0.03619629144668579, 0.003051391802728176, 0.00038247412885539234, 0.0007123185787349939, 0.010222047567367554, 0.0010863485513255, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.03291217237710953, 0.23853188753128052, 0.04644821211695671, 0.031600918620824814, 0.045192934572696686, 0.0019951597787439823, 0.11113008856773376, 0.36339887976646423, 0.010439107194542885, 0.20188210904598236, 0.027288423851132393, 0.21054767072200775, 0.04143378138542175, 0.0853629931807518, 0.2336580902338028, 0.26870372891426086, 0.10405707359313965, 0.00916238222271204, 0.058617573231458664, 0.0049601029604673386, 0.0005682760966010392, 0.004407011903822422, 0.03309918940067291, 0.0036104319151490927, 0.12174393236637115, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.07334253191947937, 0.14656193554401398, 0.004660916980355978, 0.03353964164853096, 0.00998624786734581, 0.00235390174202621, 0.04832129552960396, 0.031250230967998505, 0.0017524310387670994, 0.10710166394710541, 0.04863408952951431, 0.11276239901781082, 0.00949337612837553, 0.024303043261170387, 0.5020502805709839, 0.05985519662499428, 0.14893494546413422, 0.09544339030981064, 0.18974637985229492, 0.1120084673166275, 0.28269606828689575, 0.4275827407836914, 0.12184610962867737, 0.40095797181129456, 0.08120625466108322, 0.27448615431785583, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.15921767055988312, 0.18694822490215302, 0.011401425115764141, 0.15920288860797882, 0.0017978762043640018, 0.00600996520370245, 0.1401643455028534, 0.08585444837808609, 0.05989503860473633, 0.2726706564426422, 0.041456613689661026, 0.0019109381828457117, 0.0026012342423200607, 0.00675933575257659, 0.05683350935578346, 0.06809581816196442, 0.09586934000253677, 0.10229554027318954, 0.057183876633644104, 0.25635847449302673, 0.19582371413707733, 0.4237477481365204, 0.37648820877075195, 0.48733898997306824, 0.20777222514152527, 0.24944597482681274, 0.45371755957603455, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.6248686909675598, 0.8166397213935852, 0.05456394702196121, 0.3034517765045166, 0.0032548136077821255, 0.03656908869743347, 0.3933179974555969, 0.635881781578064, 0.4090532660484314, 0.6309216618537903, 0.09238837659358978, 0.01225167978554964, 0.0038302247412502766, 0.05015851929783821, 0.4316881597042084, 0.05513762682676315, 0.16880887746810913, 0.02300925739109516, 0.03029457852244377, 0.032050080597400665, 0.0745139941573143, 0.08332593739032745, 0.5048279166221619, 0.051856089383363724, 0.16889351606369019, 0.22218117117881775, 0.29087209701538086, 0.03443009778857231, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.6506885886192322, 0.26984432339668274, 0.19192098081111908, 0.45030322670936584, 0.018604522570967674, 0.06438936293125153, 0.16284945607185364, 0.46218666434288025, 0.2198290228843689, 0.6063108444213867, 0.13934792578220367, 0.19822801649570465, 0.009406321682035923, 0.07906869053840637, 0.39550670981407166, 0.07503295689821243, 0.22708888351917267, 0.011672623455524445, 0.03240634873509407, 0.051372844725847244, 0.0555996336042881, 0.1055832952260971, 0.27455389499664307, 0.019383858889341354, 0.29115474224090576, 0.25329896807670593, 0.3762655258178711, 0.06596359610557556, 0.027243560180068016, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.6516265273094177, 0.3494286835193634, 0.13445304334163666, 0.40472084283828735, 0.05377691984176636, 0.043724507093429565, 0.6220480799674988, 0.09338771551847458, 0.1620686650276184, 0.8232020139694214, 0.17699383199214935, 0.03535428270697594, 4.775904380949214e-05, 0.000580178399104625, 0.13870029151439667, 0.15851522982120514, 0.22386471927165985, 0.13473065197467804, 0.10273782163858414, 0.539568305015564, 0.23089595139026642, 0.2947250008583069, 0.2566256523132324, 0.08758009225130081, 0.04963833838701248, 0.026406293734908104, 0.02359875850379467, 0.06999926269054413, 0.014701825566589832, 0.008440684527158737, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.40970566868782043, 0.3527304232120514, 0.004458754323422909, 0.09938450157642365, 0.006175781134516001, 0.014084810391068459, 0.22543573379516602, 0.4835565686225891, 0.025563040748238564, 0.39703506231307983, 0.00602720445021987, 0.0051488312892615795, 0.0008810341823846102, 0.0033910071942955256, 0.2277533859014511, 0.1888987272977829, 0.22277534008026123, 0.06621028482913971, 0.04940320923924446, 0.013609242625534534, 0.012980671599507332, 0.0275713000446558, 0.5000426769256592, 0.025658253580331802, 0.28077542781829834, 0.21061377227306366, 0.1005047932267189, 0.0123829934746027, 0.005874408408999443, 0.04495157673954964, 0.007559731602668762, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.19487805664539337, 0.1991150975227356, 0.010765495710074902, 0.08231080323457718, 0.014791524969041348, 0.005413876846432686, 0.2905171811580658, 0.06453394889831543, 0.003980779554694891, 0.08378233760595322, 0.012941073626279831, 0.009292078204452991, 0.0008543379371985793, 0.002103410428389907, 0.1794004589319229, 0.10630622506141663, 0.1130438968539238, 0.04711592569947243, 0.14829613268375397, 0.0012987125664949417, 0.0009870391804724932, 0.002409427659586072, 0.10731083154678345, 0.010861101560294628, 0.02266101725399494, 0.22295407950878143, 0.37738272547721863, 0.21324896812438965, 0.09625840187072754, 0.01478838175535202, 0.004724964965134859, 0.13376930356025696, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.12092277407646179, 0.17967110872268677, 0.0018819703254848719, 0.04615653306245804, 0.002711376640945673, 0.0007180452230386436, 0.10793514549732208, 0.09669310599565506, 0.0005949889309704304, 0.15432700514793396, 0.015202132984995842, 0.003636009059846401, 0.00047353014815598726, 0.0022874167189002037, 0.22825637459754944, 0.0042772903107106686, 0.006450775545090437, 0.00791113544255495, 0.01871791109442711, 0.02349945716559887, 0.036059893667697906, 0.09560179710388184, 0.01157363597303629, 0.020316841080784798, 0.002858342370018363, 0.0015840751584619284, 0.03869258984923363, 0.04008479043841362, 0.0456826388835907, 0.061234306544065475, 0.32812535762786865, 0.4548730254173279, 0.048923686146736145, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.14498451352119446, 0.2535317540168762, 0.027076847851276398, 0.14632807672023773, 0.0057570356875658035, 0.011071202345192432, 0.31473973393440247, 0.2956455647945404, 0.07720959931612015, 0.1944134682416916, 0.008117430843412876, 0.0006636073812842369, 0.0008167477208189666, 0.0018315445631742477, 0.15913215279579163, 0.034464891999959946, 0.04304976761341095, 0.0730237364768982, 0.07959159463644028, 0.156441330909729, 0.14927342534065247, 0.37836754322052, 0.2500280439853668, 0.265838086605072, 0.038285933434963226, 0.0458042174577713, 0.2175784856081009, 0.055615901947021484, 0.32925114035606384, 0.23017114400863647, 0.5254709720611572, 0.3807608187198639, 0.4477500319480896, 0.3941081464290619, NaN, NaN, NaN, NaN, NaN, NaN], [0.22215187549591064, 0.47823596000671387, 0.018273456022143364, 0.13293205201625824, 0.0049734353087842464, 0.0265207476913929, 0.27213141322135925, 0.33180302381515503, 0.1344960778951645, 0.335622638463974, 0.010143149644136429, 0.0012862810399383307, 0.00035499766818247736, 0.0037611438892781734, 0.27220219373703003, 0.024431752040982246, 0.057854264974594116, 0.009785568341612816, 0.015689833089709282, 0.010099711827933788, 0.022971261292696, 0.026158222928643227, 0.08270542323589325, 0.00771379703655839, 0.023359954357147217, 0.06216609850525856, 0.1452798992395401, 0.010090651921927929, 0.13497084379196167, 0.023736534640192986, 0.06422590464353561, 0.2799428105354309, 0.34307411313056946, 0.27198341488838196, 0.018816450610756874, NaN, NaN, NaN, NaN, NaN], [0.3673586845397949, 0.057844266295433044, 0.06040150299668312, 0.09888742864131927, 0.023171812295913696, 0.05270017683506012, 0.11794743686914444, 0.1507657766342163, 0.008498218841850758, 0.09498187899589539, 0.003615680383518338, 0.010834122076630592, 0.00024780313833616674, 0.0017297717276960611, 0.20351538062095642, 0.032250434160232544, 0.07008427381515503, 0.003495490411296487, 0.011726448312401772, 0.013232100754976273, 0.021211393177509308, 0.02240551821887493, 0.050749149173498154, 0.0020511853508651257, 0.034987252205610275, 0.05167752131819725, 0.10231753438711166, 0.017492327839136124, 0.0036121474113315344, 0.0030979528091847897, 0.14347726106643677, 0.4107814431190491, 0.18759746849536896, 0.28042495250701904, 0.02327493391931057, 0.023935986682772636, NaN, NaN, NaN, NaN], [0.6060628294944763, 0.1373525857925415, 0.13755829632282257, 0.4113396406173706, 0.07285188883543015, 0.014519162476062775, 0.5372579097747803, 0.0630655512213707, 0.14564833045005798, 0.695697009563446, 0.06662726402282715, 0.006644518580287695, 1.2849791346525308e-05, 0.00011718441965058446, 0.13694217801094055, 0.17385193705558777, 0.24280618131160736, 0.0901411697268486, 0.1509939581155777, 0.5964542627334595, 0.18189039826393127, 0.25377142429351807, 0.39126867055892944, 0.11990400403738022, 0.04869762808084488, 0.06967514008283615, 0.0491257943212986, 0.1536286324262619, 0.04553663358092308, 0.006321897264569998, 0.008409527130424976, 0.01950901933014393, 0.028066763654351234, 0.039955586194992065, 0.08575458079576492, 0.02489100769162178, 0.0107131227850914, NaN, NaN, NaN], [0.16518473625183105, 0.10184229910373688, 0.002064367523416877, 0.05309450253844261, 0.004080682527273893, 0.012669779360294342, 0.18988992273807526, 0.5354599356651306, 0.004024976398795843, 0.07357845455408096, 0.00022774768876843154, 0.00034433722612448037, 4.428778629517183e-05, 0.00011935137445107102, 0.17481543123722076, 0.18693126738071442, 0.25040745735168457, 0.07803116738796234, 0.06071358174085617, 0.018153348937630653, 0.012512190267443657, 0.012858238071203232, 0.18478038907051086, 0.008756724186241627, 0.14063727855682373, 0.16963867843151093, 0.06472224742174149, 0.008233368396759033, 0.010625114664435387, 0.04533438757061958, 0.004584541078656912, 0.04685693234205246, 0.3269248306751251, 0.13935554027557373, 0.022706659510731697, 0.015514994971454144, 0.09856907278299332, 0.009564985521137714, NaN, NaN], [0.060375016182661057, 0.09738604724407196, 0.004719918128103018, 0.05357348173856735, 0.007510221563279629, 0.002087255474179983, 0.1777726411819458, 0.04658319056034088, 0.0022654803469777107, 0.02657914347946644, 0.002838509390130639, 0.0023206211626529694, 0.00029234393150545657, 0.0006460589938797057, 0.15720529854297638, 0.10220125317573547, 0.06584151834249496, 0.046970706433057785, 0.16499453783035278, 0.0008504274883307517, 0.000721337681170553, 0.0015187861863523722, 0.050142802298069, 0.005332621280103922, 0.005509581416845322, 0.0572623535990715, 0.172898530960083, 0.12213093042373657, 0.0640687644481659, 0.004657925106585026, 0.002522988012060523, 0.028443191200494766, 0.29674383997917175, 0.3544806241989136, 0.20916549861431122, 0.09151047468185425, 0.014975211583077908, 0.0019209993770346045, 0.07398010790348053, NaN], [0.006292517296969891, 0.056422796100378036, 0.003871192689985037, 0.016857203096151352, 0.0060961381532251835, 0.01021772250533104, 0.02558758109807968, 0.004345982801169157, 0.003136568469926715, 0.011386821046471596, 0.0007550015579909086, 0.014218548312783241, 0.002899263286963105, 0.00665974011644721, 0.1386014223098755, 0.014319260604679585, 0.019726725295186043, 0.010809341445565224, 0.06728478521108627, 0.024899542331695557, 0.06927011907100677, 0.2726534307003021, 0.06849226355552673, 0.06274150311946869, 0.0032663261517882347, 0.007571991998702288, 0.011041088029742241, 0.0653790682554245, 0.06552072614431381, 0.10165777057409286, 0.05923810228705406, 0.20752549171447754, 0.1128133162856102, 0.041725482791662216, 0.12833572924137115, 0.10405165702104568, 0.2233171910047531, 0.10715138167142868, 0.3742898404598236, 0.43902406096458435]], [[0.3582096993923187, 0.12323450297117233, 0.41414904594421387, 0.12697191536426544, 0.2567327618598938, 0.12921607494354248, 0.303745299577713, 0.26060354709625244, 0.2067556530237198, 0.0739586353302002, 0.038356974720954895, 0.018690073862671852, 0.019858568906784058, 0.03828525170683861, 0.09448481351137161, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.034560851752758026, 0.06147807836532593, 0.09719342738389969, 0.03090484067797661, 0.05040246620774269, 0.10769589245319366, 0.28225648403167725, 0.03959896042943001, 0.04561477154493332, 0.015998149290680885, 0.010396423749625683, 0.0027313604950904846, 0.02088637463748455, 0.02540828473865986, 0.1729334592819214, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.031599532812833786, 0.03154325857758522, 0.01938430592417717, 0.10300880670547485, 0.07719798386096954, 0.3211115002632141, 0.5488157868385315, 0.6110779047012329, 0.03511836752295494, 0.03874386474490166, 0.02549627609550953, 0.08684590458869934, 0.1071673184633255, 0.10855282843112946, 0.09071482717990875, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.05947110056877136, 0.046990834176540375, 0.001917339744977653, 0.019972380250692368, 0.14856000244617462, 0.10937333106994629, 0.7613639235496521, 0.43800127506256104, 0.038890283554792404, 0.0702563002705574, 0.052807219326496124, 0.20175476372241974, 0.09827514737844467, 0.19838720560073853, 0.1799801141023636, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.010548654943704605, 0.056933727115392685, 0.0004277318366803229, 0.0005220972234383225, 0.03427216783165932, 0.15697234869003296, 0.44382861256599426, 0.28639304637908936, 0.1278306096792221, 0.0589531809091568, 0.07240739464759827, 0.21584689617156982, 0.623681902885437, 0.39177897572517395, 0.053747572004795074, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.012333033606410027, 0.11936485022306442, 0.0015480549773201346, 0.05167163908481598, 0.003915506415069103, 0.05033823475241661, 0.18770258128643036, 0.5247471332550049, 0.13492631912231445, 0.0999734029173851, 0.02801361307501793, 0.04943297058343887, 0.067798912525177, 0.02220618724822998, 0.04863249137997627, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.023225123062729836, 0.03936318680644035, 0.0654693990945816, 0.0780135840177536, 0.03190883249044418, 0.007237496320158243, 0.3230750560760498, 0.11266676336526871, 0.3152024447917938, 0.12503208220005035, 0.08215073496103287, 0.20814812183380127, 0.054794978350400925, 0.014369799755513668, 0.31165388226509094, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.021642545238137245, 0.05032852664589882, 0.10916808992624283, 0.14173567295074463, 0.025796422734856606, 0.002176823327317834, 0.004212724044919014, 0.11230720579624176, 0.2761599123477936, 0.18545517325401306, 0.30032697319984436, 0.18456220626831055, 0.1202857494354248, 0.02383211813867092, 0.22383396327495575, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.014165909960865974, 0.030938388779759407, 0.019327908754348755, 0.025021186098456383, 0.018685894086956978, 0.058899857103824615, 0.05705944076180458, 0.013411193154752254, 0.27564239501953125, 0.14192135632038116, 0.4484158754348755, 0.49174171686172485, 0.42328834533691406, 0.5148258805274963, 0.024227913469076157, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.030343737453222275, 0.035576362162828445, 0.011198173277080059, 0.0029289661906659603, 0.004656192846596241, 0.19044476747512817, 0.14425727725028992, 0.14593322575092316, 0.02429576776921749, 0.03922351822257042, 0.03158531337976456, 0.3954472541809082, 0.18761666119098663, 0.829915463924408, 0.05755764618515968, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.07378673553466797, 0.08269044756889343, 0.008506381884217262, 0.004565858747810125, 0.0033621611073613167, 0.47163471579551697, 0.3437289595603943, 0.16293375194072723, 0.0103234788402915, 0.006828381214290857, 0.025515833869576454, 0.13491219282150269, 0.23380780220031738, 0.7675665616989136, 0.06853343546390533, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.19539110362529755, 0.20751968026161194, 0.012997383251786232, 0.004634191282093525, 0.004486567340791225, 0.10301963984966278, 0.2361651211977005, 0.10510270297527313, 0.007245894055813551, 0.02498149685561657, 0.005201807711273432, 0.12586773931980133, 0.2985144853591919, 0.741521954536438, 0.061252206563949585, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.3654796779155731, 0.656768798828125, 0.02389511466026306, 0.057929087430238724, 0.025417884811758995, 0.2985052168369293, 0.29244741797447205, 0.15614598989486694, 0.02199239283800125, 0.027919312939047813, 0.024499662220478058, 0.0015409317566081882, 0.18344998359680176, 0.05587974563241005, 0.11099682748317719, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.24996283650398254, 0.30432745814323425, 0.08651068061590195, 0.27794384956359863, 0.10948572307825089, 0.32318809628486633, 0.40224379301071167, 0.24700750410556793, 0.016620514914393425, 0.03902489319443703, 0.01563531532883644, 0.008603462018072605, 0.029363060370087624, 0.20380347967147827, 0.1635625809431076, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.08184575289487839, 0.05559774115681648, 0.012900986708700657, 0.004766350146383047, 0.02465618960559368, 0.0658264234662056, 0.16982027888298035, 0.09995799511671066, 0.1946410834789276, 0.03345171734690666, 0.026332948356866837, 0.010880211368203163, 0.01684177853167057, 0.011932285502552986, 0.13059602677822113, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.19101674854755402, 0.0880991518497467, 0.25550922751426697, 0.3376496732234955, 0.25425824522972107, 0.2177356481552124, 0.35922226309776306, 0.13405567407608032, 0.2859460711479187, 0.47983312606811523, 0.235154390335083, 0.26708394289016724, 0.2646999657154083, 0.4890832304954529, 0.0349225178360939, 0.125, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.12788966298103333, 0.14897412061691284, 0.18708589673042297, 0.1539590060710907, 0.06750026345252991, 0.06459501385688782, 0.24742794036865234, 0.0008040289394557476, 0.08417094498872757, 0.08338519930839539, 0.09756942838430405, 0.05163748189806938, 0.06044981628656387, 0.1204136312007904, 0.005185095127671957, 0.12878015637397766, 0.05999259278178215, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.00823432207107544, 0.006774595472961664, 0.011488616466522217, 0.031759701669216156, 0.014620696194469929, 0.015192853286862373, 0.015498323366045952, 0.001623230637051165, 0.04214249551296234, 0.022796856239438057, 0.0813785269856453, 0.058821164071559906, 0.018185952678322792, 0.030505431815981865, 0.13797427713871002, 0.16734670102596283, 0.0018487111665308475, 0.002184537472203374, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.07304069399833679, 0.17316529154777527, 0.0638275146484375, 0.06216027960181236, 0.10879980027675629, 0.2286580353975296, 0.12489848583936691, 0.06798849999904633, 0.12340370565652847, 0.11364749073982239, 0.33209869265556335, 0.7156579494476318, 0.917570948600769, 0.8780012726783752, 0.004697424825280905, 0.06620991975069046, 0.4480140209197998, 0.42379117012023926, 0.3748236298561096, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.04041377454996109, 0.06032548099756241, 0.013153426349163055, 0.12010756880044937, 0.032379359006881714, 0.02533758245408535, 0.03651244193315506, 0.05168384686112404, 0.05184069648385048, 0.20407944917678833, 0.10554968565702438, 0.5571502447128296, 0.039276935160160065, 0.10380254685878754, 0.1458612084388733, 0.1498516947031021, 0.091057188808918, 0.11073686927556992, 0.05954570695757866, 0.00012444167805369943, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.025283029302954674, 0.14580176770687103, 0.0262577123939991, 0.01834816485643387, 0.02426275424659252, 0.5010125637054443, 0.025797395035624504, 0.08120379596948624, 0.10846563428640366, 0.05807282403111458, 0.047331083565950394, 0.01890925131738186, 0.041984543204307556, 0.021773895248770714, 0.12734822928905487, 0.15789009630680084, 0.05178086459636688, 0.2272004932165146, 0.05532779544591904, 0.002530630910769105, 0.00011625503975665197, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.11099886894226074, 0.272359162569046, 0.07267793267965317, 0.02685651369392872, 0.04662291333079338, 0.6599292755126953, 0.15850403904914856, 0.1944371908903122, 0.02196124941110611, 0.18415939807891846, 0.2094753533601761, 0.11699666827917099, 0.8625363111495972, 0.6611498594284058, 0.034588079899549484, 0.05158510431647301, 0.42307329177856445, 0.4962795376777649, 0.6637455821037292, 0.11636865884065628, 0.027691489085555077, 0.059323750436306, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.10045554488897324, 0.003808635985478759, 0.012772331945598125, 0.008206314407289028, 0.016907531768083572, 0.2308196723461151, 0.04502535238862038, 0.16794730722904205, 0.14683513343334198, 0.07804886251688004, 0.12962646782398224, 0.03242946416139603, 0.45433515310287476, 0.3931583762168884, 0.023861808702349663, 0.1440366506576538, 0.37752795219421387, 0.42684903740882874, 0.13104133307933807, 0.0449170246720314, 0.0360451340675354, 0.007316120434552431, 0.03281773626804352, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.020261207595467567, 0.011864200234413147, 0.013516101986169815, 0.00783876795321703, 0.006360001862049103, 0.5825139880180359, 0.27136117219924927, 0.28645893931388855, 0.002775657456368208, 0.05587191879749298, 0.01021821890026331, 0.03437367081642151, 0.37942126393318176, 0.11788230389356613, 0.047214996069669724, 0.018571142107248306, 0.11001976579427719, 0.16728174686431885, 0.33147770166397095, 0.29621925950050354, 0.11174014210700989, 0.46736985445022583, 0.18467408418655396, 0.05186863988637924, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.3444993495941162, 0.4299255907535553, 0.3897337317466736, 0.11608962714672089, 0.07001375406980515, 0.1826992928981781, 0.3195875883102417, 0.1513850837945938, 0.014436168596148491, 0.25265297293663025, 0.18822813034057617, 0.20145024359226227, 0.648497998714447, 0.6856710314750671, 0.13566814363002777, 0.0193540807813406, 0.11997552216053009, 0.4339123070240021, 0.4291674792766571, 0.22741732001304626, 0.21840345859527588, 0.4310562014579773, 0.16546283662319183, 0.05634206160902977, 0.03477246314287186, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.37375974655151367, 0.2605052888393402, 0.636468231678009, 0.14340142905712128, 0.5107957124710083, 0.683059811592102, 0.3617965579032898, 0.3775153160095215, 0.0734284520149231, 0.5245854258537292, 0.5329803228378296, 0.541839063167572, 0.8546188473701477, 0.8892531991004944, 0.08003345131874084, 0.07166115939617157, 0.34385329484939575, 0.5272834300994873, 0.4769807457923889, 0.34829023480415344, 0.19288644194602966, 0.1752767115831375, 0.3240547180175781, 0.026788396760821342, 0.09653788805007935, 0.14339366555213928, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.1478864699602127, 0.26107946038246155, 0.2706110179424286, 0.022070137783885002, 0.08394861966371536, 0.7104908227920532, 0.22173403203487396, 0.18465854227542877, 0.3481738865375519, 0.02706378884613514, 0.14399166405200958, 0.24452990293502808, 0.3432118594646454, 0.3138853907585144, 0.0603480227291584, 0.09568949043750763, 0.2010803371667862, 0.1452081948518753, 0.13633964955806732, 0.13264110684394836, 0.11369673907756805, 0.18754418194293976, 0.10573749244213104, 0.12209529429674149, 0.3772747814655304, 0.4260762333869934, 0.1448964774608612, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.03315366804599762, 0.109662726521492, 0.165960431098938, 0.03089676797389984, 0.00589095801115036, 0.7119044065475464, 0.04612211138010025, 0.03627030551433563, 0.019800378009676933, 0.02169116772711277, 0.07954178750514984, 0.014483828097581863, 0.3210127055644989, 0.25073835253715515, 0.021559905260801315, 0.1600937843322754, 0.32966408133506775, 0.46643200516700745, 0.2761552929878235, 0.1128716766834259, 0.16030451655387878, 0.13808301091194153, 0.12019707262516022, 0.08980843424797058, 0.23569302260875702, 0.18699060380458832, 0.06252679228782654, 0.02190866880118847, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.1801593005657196, 0.7095129489898682, 0.41699883341789246, 0.14223065972328186, 0.03218872845172882, 0.8857168555259705, 0.325775682926178, 0.46090880036354065, 0.31827157735824585, 0.19596631824970245, 0.36584827303886414, 0.568932831287384, 0.05918605625629425, 0.12899020314216614, 0.03239220380783081, 0.09671676903963089, 0.3181785047054291, 0.5044789910316467, 0.5311775803565979, 0.43058764934539795, 0.24623769521713257, 0.546705424785614, 0.20948244631290436, 0.5971428155899048, 0.15125280618667603, 0.21692372858524323, 0.08393274247646332, 0.0805632621049881, 0.11463441699743271, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.15587098896503448, 0.007851594127714634, 0.38951343297958374, 0.26023998856544495, 0.2678505480289459, 0.04164084047079086, 0.060063086450099945, 0.06729273498058319, 0.019880756735801697, 0.0442759171128273, 0.10040930658578873, 0.1083277016878128, 0.0003995952138211578, 0.001039322349242866, 0.14095477759838104, 0.17538371682167053, 0.005170984659343958, 0.01562126912176609, 0.012803001329302788, 0.0004321248270571232, 0.003303500125184655, 0.010391591116786003, 0.0083633316680789, 0.001453742035664618, 0.0005911564221605659, 0.001968160504475236, 0.018067756667733192, 0.0012553221313282847, 0.0006174716982059181, 0.0014710418181493878, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.08899319916963577, 0.2356371134519577, 0.40766164660453796, 0.08200893551111221, 0.14033742249011993, 0.12043434381484985, 0.050508081912994385, 0.04391980916261673, 0.2084629088640213, 0.07807423919439316, 0.06514080613851547, 0.6571899652481079, 0.6522034406661987, 0.4899447560310364, 0.0237458273768425, 0.00964878499507904, 0.07296860218048096, 0.1732037365436554, 0.2482636272907257, 0.018695944920182228, 0.04061494395136833, 0.019565006718039513, 0.048743683844804764, 0.15582872927188873, 0.0506676621735096, 0.08059392869472504, 0.2691291868686676, 0.4701274335384369, 0.05269847437739372, 0.15863555669784546, 0.011098350398242474, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.3269592225551605, 0.23715397715568542, 0.21103474497795105, 0.29856637120246887, 0.031984660774469376, 0.019636303186416626, 0.2648169696331024, 0.0041971527971327305, 0.6909844875335693, 0.5414000153541565, 0.4092715382575989, 0.02185220457613468, 0.006548420060425997, 0.013211028650403023, 0.06752441078424454, 0.023792432621121407, 0.42975902557373047, 0.3812340199947357, 0.23295366764068604, 0.2699258625507355, 0.32472288608551025, 0.04527096822857857, 0.2556793987751007, 0.5905154347419739, 0.8116171360015869, 0.684613823890686, 0.13916483521461487, 0.05671815946698189, 0.0401710644364357, 0.30002903938293457, 0.014873968437314034, 0.1109585389494896, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.40959432721138, 0.2696213126182556, 0.4055677354335785, 0.265968382358551, 0.12281941622495651, 0.10883577167987823, 0.16766701638698578, 0.053767129778862, 0.028326192870736122, 0.5353591442108154, 0.3247348368167877, 0.03339260071516037, 0.1199125200510025, 0.14055927097797394, 0.07849014550447464, 0.07327478379011154, 0.42313894629478455, 0.7821765542030334, 0.6752634048461914, 0.18926696479320526, 0.27897483110427856, 0.1972714066505432, 0.26650866866111755, 0.21928414702415466, 0.6610813736915588, 0.8023169040679932, 0.32853400707244873, 0.043605707585811615, 0.04177317023277283, 0.5147100687026978, 0.014965414069592953, 0.041893746703863144, 0.10476090759038925, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.0703776553273201, 0.17115768790245056, 0.14820680022239685, 0.014450321905314922, 0.036940984427928925, 0.4336852431297302, 0.18269671499729156, 0.1382565200328827, 0.5314536690711975, 0.05019254609942436, 0.11642822623252869, 0.17526941001415253, 0.3684784173965454, 0.3591882586479187, 0.09016428142786026, 0.09543995559215546, 0.1369307041168213, 0.1906978189945221, 0.1367466300725937, 0.17180036008358002, 0.12260185182094574, 0.13847540318965912, 0.1559406965970993, 0.13510896265506744, 0.4644373655319214, 0.6843520402908325, 0.2938932180404663, 0.08134166151285172, 0.16692468523979187, 0.35020914673805237, 0.0983358696103096, 0.26928237080574036, 0.11322443932294846, 0.14002281427383423, NaN, NaN, NaN, NaN, NaN, NaN], [0.020959746092557907, 0.2473447471857071, 0.04995026811957359, 0.032434724271297455, 0.004538285546004772, 0.38885483145713806, 0.04268676042556763, 0.035024866461753845, 0.14864443242549896, 0.14174208045005798, 0.13687251508235931, 0.021197974681854248, 0.4566997289657593, 0.37854352593421936, 0.051512595266103745, 0.17294523119926453, 0.44891712069511414, 0.5596615076065063, 0.3151743412017822, 0.15508009493350983, 0.20398668944835663, 0.18162229657173157, 0.14380685985088348, 0.09279182553291321, 0.25614914298057556, 0.37145668268203735, 0.2047339379787445, 0.05775143578648567, 0.06389063596725464, 0.19947569072246552, 0.07508620619773865, 0.162083700299263, 0.036575064063072205, 0.05963924527168274, 0.02704720012843609, NaN, NaN, NaN, NaN, NaN], [0.11558277904987335, 0.8023946285247803, 0.11340320110321045, 0.07801315933465958, 0.012690390460193157, 0.363363116979599, 0.22989940643310547, 0.28700947761535645, 0.3164795935153961, 0.28987860679626465, 0.20186272263526917, 0.5113669037818909, 0.04614659398794174, 0.13675883412361145, 0.05756649002432823, 0.09450869262218475, 0.5263407230377197, 0.5685468316078186, 0.6246378421783447, 0.5457862615585327, 0.4288109838962555, 0.7265884876251221, 0.4213257133960724, 0.7441360354423523, 0.37028953433036804, 0.4906199276447296, 0.24940308928489685, 0.2854059636592865, 0.25606390833854675, 0.06486664712429047, 0.03651905804872513, 0.215606689453125, 0.16494624316692352, 0.07126681506633759, 0.0978088453412056, 0.18553400039672852, NaN, NaN, NaN, NaN], [0.13439694046974182, 0.004173143766820431, 0.22800596058368683, 0.19857077300548553, 0.1396344006061554, 0.007145485375076532, 0.03306930512189865, 0.026599518954753876, 0.02599666267633438, 0.04890456795692444, 0.0713912844657898, 0.040079280734062195, 0.00020046728604938835, 0.0004629320465028286, 0.13767622411251068, 0.19233128428459167, 0.0069253402762115, 0.019198253750801086, 0.024288823828101158, 0.0006626379326917231, 0.0032825330272316933, 0.012745865620672703, 0.02121213637292385, 0.004573441576212645, 0.001344278221949935, 0.010449343360960484, 0.07998955249786377, 0.008849495090544224, 0.005957764107733965, 0.00281895836815238, 0.0006993816932663321, 0.0011300387559458613, 0.0034355262760072947, 0.006048144306987524, 0.0007683978183194995, 0.00029024321702308953, 0.0009215899626724422, NaN, NaN, NaN], [0.21178027987480164, 0.5613860487937927, 0.18598653376102448, 0.13814353942871094, 0.06437420845031738, 0.1469835489988327, 0.09205848723649979, 0.07043211162090302, 0.3314816355705261, 0.1618121713399887, 0.0553976409137249, 0.7871544361114502, 0.7398563027381897, 0.533365786075592, 0.06109875440597534, 0.00490582175552845, 0.09978753328323364, 0.17523892223834991, 0.18201382458209991, 0.025161702185869217, 0.0351867638528347, 0.008898423984646797, 0.033712878823280334, 0.06612548977136612, 0.044598400592803955, 0.0818907842040062, 0.31783777475357056, 0.6522275805473328, 0.26521986722946167, 0.31609129905700684, 0.0543142631649971, 0.07028744369745255, 0.06436092406511307, 0.12702754139900208, 0.4257008731365204, 0.05356784537434578, 0.20406562089920044, 0.022904740646481514, NaN, NaN], [0.308572918176651, 0.1810312271118164, 0.10904403775930405, 0.38784971833229065, 0.013434378430247307, 0.011286276392638683, 0.26633715629577637, 0.0027595413848757744, 0.7609409689903259, 0.7608016729354858, 0.6143397688865662, 0.036307673901319504, 0.013564765453338623, 0.02826162986457348, 0.07738469541072845, 0.02933959849178791, 0.5456263422966003, 0.4945109188556671, 0.26123103499412537, 0.3237256109714508, 0.3705388903617859, 0.04209306091070175, 0.3351372182369232, 0.658141016960144, 0.8126230239868164, 0.8673186898231506, 0.28273773193359375, 0.11254162341356277, 0.17348313331604004, 0.7003386616706848, 0.1474425047636032, 0.36997753381729126, 0.41849759221076965, 0.091117262840271, 0.03724836930632591, 0.036747273057699203, 0.47380825877189636, 0.017722588032484055, 0.0920308530330658, NaN], [0.1500416249036789, 0.027276279404759407, 0.32022449374198914, 0.45847558975219727, 0.23693141341209412, 0.1596660166978836, 0.2821829915046692, 0.005833256058394909, 0.32143598794937134, 0.14477354288101196, 0.029714325442910194, 0.15291856229305267, 0.007731991354376078, 0.029727784916758537, 0.12283544987440109, 0.1429738998413086, 0.11406568437814713, 0.30407312512397766, 0.04420004412531853, 0.050888776779174805, 0.009020227938890457, 0.026264725252985954, 0.20154790580272675, 0.284900963306427, 0.16813665628433228, 0.6384625434875488, 0.35198092460632324, 0.0041788192465901375, 0.017796171829104424, 0.06702794879674911, 0.017356209456920624, 0.11703062057495117, 0.363391250371933, 0.08829980343580246, 0.0006652214215137064, 0.002063008025288582, 0.01232101023197174, 0.0010344748152419925, 0.005295889917761087, 0.10532692819833755]], [[0.06378140300512314, 0.013955923728644848, 0.058693334460258484, 0.014864355325698853, 0.02882157638669014, 0.02533077634871006, 0.013877282850444317, 0.02919653430581093, 0.029733512550592422, 0.010929838754236698, 0.2184230536222458, 0.404588907957077, 0.5044611692428589, 0.4171900451183319, 0.18600669503211975, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.09787620604038239, 0.3741878271102905, 0.1718531847000122, 0.22170154750347137, 0.11211875081062317, 0.06884550303220749, 0.023903023451566696, 0.00765330670401454, 0.043831951916217804, 0.04742401838302612, 0.08705892413854599, 0.19904442131519318, 0.1439688503742218, 0.08975595235824585, 0.124632827937603, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.024405136704444885, 0.006321595516055822, 0.03571266308426857, 0.0050111510790884495, 0.01807553507387638, 6.11300565651618e-05, 0.0022184934932738543, 0.002461126074194908, 0.00987271312624216, 0.03944821655750275, 0.02587837167084217, 0.009154303930699825, 0.018459370359778404, 0.07083768397569656, 0.2838045060634613, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.02829434722661972, 0.05303699150681496, 0.03342747688293457, 0.026768406853079796, 0.06776657700538635, 0.0015663451049476862, 0.0066550131887197495, 0.028257621452212334, 0.02201445959508419, 0.024995435029268265, 0.014314326457679272, 0.019762825220823288, 0.019060753285884857, 0.09995586425065994, 0.2721303105354309, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.011709636077284813, 0.13082386553287506, 0.3091292977333069, 0.012390679679811, 0.06598176062107086, 0.0025066242087632418, 0.008877930231392384, 0.03396160528063774, 0.01681593246757984, 0.01466491911560297, 0.12272557616233826, 0.010357965715229511, 0.009066522121429443, 0.12291242927312851, 0.3062548041343689, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.05738264322280884, 0.12342102825641632, 0.7862259149551392, 0.20355252921581268, 0.007363088894635439, 0.0717976987361908, 0.032159313559532166, 0.018495721742510796, 0.0034321516286581755, 0.0013732254737988114, 0.006710591726005077, 0.0023603499867022038, 0.007563347462564707, 0.05948156490921974, 0.12037239223718643, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.015277753584086895, 0.006394209805876017, 0.6686000227928162, 0.29117655754089355, 0.06745831668376923, 0.2462725043296814, 0.06154515966773033, 0.015117062255740166, 0.004134421236813068, 0.0023558081593364477, 0.08952713012695312, 0.04650713875889778, 0.023702487349510193, 0.01321239210665226, 0.09701406955718994, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.028385812416672707, 0.012191490270197392, 0.27066752314567566, 0.18411272764205933, 0.040896836668252945, 0.48173367977142334, 0.02650352008640766, 0.07071101665496826, 0.007758310064673424, 0.001958101289346814, 0.01839292421936989, 0.023066602647304535, 0.03435399383306503, 0.03657263144850731, 0.029525745660066605, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.04876675456762314, 0.422792911529541, 0.22041767835617065, 0.2559551000595093, 0.08884847164154053, 0.01230597123503685, 0.025672338902950287, 0.003895203350111842, 0.022659877315163612, 0.0043840305879712105, 0.007982935756444931, 0.010924039408564568, 0.06971067935228348, 0.0061518345028162, 0.21563398838043213, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.015657104551792145, 0.02366352081298828, 0.07373688369989395, 0.10379613190889359, 0.013535204343497753, 0.07323776930570602, 0.048540983349084854, 0.008235346525907516, 0.01638718694448471, 0.012322558090090752, 0.073370561003685, 0.03809332847595215, 0.021602218970656395, 0.003090204205363989, 0.23272792994976044, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.018198516219854355, 0.011175387538969517, 0.02189311571419239, 0.012938260100781918, 0.09454065561294556, 0.010837653651833534, 0.04214898869395256, 0.03231353685259819, 0.2788335978984833, 0.02807164192199707, 0.0381515808403492, 0.013884211890399456, 0.014051362872123718, 0.00934662390500307, 0.24102351069450378, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.01114112138748169, 0.11382883787155151, 0.017900465056300163, 0.008639826439321041, 0.024639632552862167, 0.020821422338485718, 0.022935912013053894, 0.04321465268731117, 0.055257730185985565, 0.0561254657804966, 0.006350866984575987, 0.034159135073423386, 0.001170721254311502, 0.00040716465446166694, 0.2438717484474182, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.01806582696735859, 0.014762195758521557, 0.02654433250427246, 0.025726040825247765, 0.03240499645471573, 0.020733002573251724, 0.04244884103536606, 0.02047092467546463, 0.13412125408649445, 0.512605607509613, 0.5156171321868896, 0.023306455463171005, 0.0489252470433712, 0.06594526767730713, 0.173824280500412, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.018763704225420952, 0.010509289801120758, 0.06387435644865036, 0.02487548068165779, 0.10975509881973267, 0.01984621025621891, 0.06460897624492645, 0.03137337416410446, 0.1802622228860855, 0.7354047894477844, 0.7864400148391724, 0.1003832221031189, 0.007522855885326862, 0.14785504341125488, 0.08187610656023026, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.02117479033768177, 0.061044495552778244, 0.02157888375222683, 0.021421663463115692, 0.04618487507104874, 0.05167240649461746, 0.01054168026894331, 0.009977741166949272, 0.0295058935880661, 0.008349624462425709, 0.02268156036734581, 0.026699911803007126, 0.020697196945548058, 0.013632250018417835, 0.13365623354911804, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.2602275013923645, 0.0514441579580307, 0.4731021821498871, 0.5077798962593079, 0.22717851400375366, 0.04740440100431442, 0.27564913034439087, 0.24302659928798676, 0.05887439846992493, 0.3509802222251892, 0.6124410033226013, 0.11394976824522018, 0.0489780493080616, 0.04593530669808388, 0.01042554248124361, 0.125, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.032066281884908676, 0.1349876970052719, 0.04647025838494301, 0.02243492752313614, 0.02574889175593853, 0.03298051655292511, 0.026965852826833725, 0.3248708248138428, 0.005728535819798708, 0.08351098001003265, 0.1499667763710022, 0.16844461858272552, 0.05473209172487259, 0.05656114220619202, 0.10718395560979843, 0.1283751130104065, 0.06695841252803802, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.005181984044611454, 0.0008690498070791364, 0.00864254217594862, 0.00306740403175354, 0.10709173232316971, 0.0007182863773778081, 0.004329775460064411, 0.010956686921417713, 0.06760676205158234, 0.010445973835885525, 0.012115269899368286, 0.06696799397468567, 0.0054829977452754974, 0.025371035560965538, 0.13854098320007324, 5.319380943547003e-05, 9.114345448324457e-05, 0.7905611991882324, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.03556624799966812, 0.11754146218299866, 0.010577056556940079, 0.008073115721344948, 0.06965696066617966, 0.0032990325707942247, 0.011276635341346264, 0.09485359489917755, 0.10517128556966782, 0.0125450249761343, 0.007751243654638529, 0.0650070384144783, 0.0006160335033200681, 0.002038064645603299, 0.4774436056613922, 0.10777772217988968, 0.19019582867622375, 0.12566408514976501, 0.295462429523468, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.13858208060264587, 0.06875398755073547, 0.01532802265137434, 0.10744626820087433, 0.18273182213306427, 0.002165634883567691, 0.069672591984272, 0.11672408878803253, 0.005795653443783522, 0.0880894884467125, 0.05771886929869652, 0.025581423193216324, 0.03904194384813309, 0.07354751974344254, 0.14365413784980774, 2.4899240088416263e-05, 2.9243250537547283e-05, 0.0014855118934065104, 3.888772698701359e-05, 0.9169090986251831, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.16291819512844086, 0.050931405276060104, 0.14806726574897766, 0.2683573365211487, 0.2810481786727905, 0.002092417562380433, 0.012745368294417858, 0.01212888304144144, 0.014305775985121727, 0.17753903567790985, 0.1299620419740677, 0.10299177467823029, 0.21836693584918976, 0.06576120108366013, 0.12406044453382492, 3.5349924587535497e-07, 4.689470642915694e-06, 0.02691131830215454, 1.3325815416465048e-05, 0.19568589329719543, 0.956480085849762, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.12156791239976883, 0.39120492339134216, 0.1209033653140068, 0.08395244181156158, 0.29989197850227356, 0.044024936854839325, 0.023133939132094383, 0.05934688448905945, 0.02561376802623272, 0.024757277220487595, 0.04535222053527832, 0.11912120133638382, 0.02126661129295826, 0.03811139240860939, 0.248785600066185, 0.08490768820047379, 0.04920955002307892, 0.012384464032948017, 0.04339546710252762, 0.010612337850034237, 0.05702771991491318, 0.7263003587722778, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.106705442070961, 0.8169862627983093, 0.1967339813709259, 0.01375850010663271, 0.13418887555599213, 0.16134029626846313, 0.005958847235888243, 0.09247319400310516, 0.04806499928236008, 0.025876127183437347, 0.08311128616333008, 0.22926460206508636, 0.05653654783964157, 0.04726153612136841, 0.20836575329303741, 0.16491760313510895, 0.04815620183944702, 0.0007595600909553468, 0.006606678944081068, 0.0006115635624155402, 0.0007167417788878083, 0.0015418223338201642, 0.0024032427463680506, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.04722486063838005, 0.04722658172249794, 0.05176655203104019, 0.00462702801451087, 0.20528024435043335, 0.0011717488523572683, 0.004415996838361025, 0.014451048336923122, 0.028127426281571388, 0.007240481209009886, 0.004411954898387194, 0.10081291943788528, 0.07703132927417755, 0.033158108592033386, 0.21852079033851624, 0.012053201906383038, 0.18336322903633118, 0.0033893296495079994, 0.22584111988544464, 0.004534169565886259, 0.003455487545579672, 0.30805450677871704, 0.5499533414840698, 0.13390673696994781, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.032722555100917816, 0.027063244953751564, 0.014943713322281837, 0.0013555125333368778, 0.016471203416585922, 0.005467826500535011, 0.02999643050134182, 0.014794600196182728, 0.03837134689092636, 0.004397213459014893, 0.01024235412478447, 0.04855721816420555, 0.05723624676465988, 0.051476139575242996, 0.2643129825592041, 0.02224119007587433, 0.09969844669103622, 0.01827961951494217, 0.1828235685825348, 0.009660250507295132, 0.005268027540296316, 0.13511976599693298, 0.39505934715270996, 0.1772008240222931, 0.6222725510597229, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.052069392055273056, 0.003948261961340904, 0.01313212513923645, 0.010319330729544163, 0.04011767730116844, 0.00066552241332829, 0.01502715889364481, 0.007099903654307127, 0.16779832541942596, 0.03226454555988312, 0.052614975720644, 0.014822165481746197, 0.002071568975225091, 0.001763610984198749, 0.05304422974586487, 0.19008594751358032, 0.025696618482470512, 0.004118501208722591, 0.03605509176850319, 0.002144730417057872, 0.0023362801875919104, 0.16961191594600677, 0.015426162630319595, 0.016875047236680984, 0.017404966056346893, 0.032629188150167465, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.022045070305466652, 0.036587294191122055, 0.06798984855413437, 0.040110163390636444, 0.5405737161636353, 0.015278805047273636, 0.02948732301592827, 0.034845639020204544, 0.27487096190452576, 0.008005083538591862, 0.012681123800575733, 0.10707750916481018, 0.02124345488846302, 0.00868641585111618, 0.4183328449726105, 0.1594686657190323, 0.03835373371839523, 0.021387629210948944, 0.028402678668498993, 0.12163796275854111, 0.1348690688610077, 0.027878204360604286, 0.016979072242975235, 0.009301519952714443, 0.047045812010765076, 0.103324294090271, 0.0978349894285202, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.07479816675186157, 0.018890362232923508, 0.2873721718788147, 0.028116360306739807, 0.7967413067817688, 0.008446138352155685, 0.020726248621940613, 0.018564706668257713, 0.33813604712486267, 0.003492887830361724, 0.010393181815743446, 0.18903475999832153, 0.00443642633035779, 0.0231452826410532, 0.42231008410453796, 0.08206925541162491, 0.0482555516064167, 0.03066202998161316, 0.14434732496738434, 0.10149279236793518, 0.1536794900894165, 0.16425268352031708, 0.00592045346274972, 0.002011190867051482, 0.030538976192474365, 0.015422381460666656, 0.0400862954556942, 0.6933969259262085, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.07108656316995621, 0.0021144712809473276, 0.0671088695526123, 0.03148089721798897, 0.7113023400306702, 0.006737539079040289, 0.2500847280025482, 0.023258471861481667, 0.23158760368824005, 0.011219021864235401, 0.04227704927325249, 0.03650788217782974, 0.15078191459178925, 0.09633734077215195, 0.15066072344779968, 0.11962933838367462, 0.08867897093296051, 0.023231033235788345, 0.019267449155449867, 0.06578893214464188, 0.01314490009099245, 0.028238458558917046, 0.2009190320968628, 0.005505711771547794, 0.024347275495529175, 0.005847027525305748, 0.13606473803520203, 0.11386173218488693, 0.6883828639984131, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.04487757384777069, 0.009540342725813389, 0.2420971691608429, 0.01275626104325056, 0.3918483257293701, 0.0218670591711998, 0.022137846797704697, 0.08132637292146683, 0.11900310963392258, 0.000993919325992465, 0.03630243241786957, 0.087126724421978, 0.0003738462692126632, 0.02454514056444168, 0.14072805643081665, 0.004133098293095827, 0.007605875376611948, 0.380069762468338, 0.01569206453859806, 0.3162667751312256, 0.06185031309723854, 0.003268925240263343, 0.007663627155125141, 0.00711404625326395, 0.0016827658982947469, 0.002885768422856927, 0.009058460593223572, 0.0104479705914855, 0.0013903286308050156, 0.9176042079925537, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.0048965876922011375, 0.019337626174092293, 0.002879639156162739, 0.0027576948050409555, 0.04260760545730591, 0.003218113211914897, 0.003307115286588669, 0.026640478521585464, 0.011750566773116589, 0.0005104524316266179, 9.575913281878456e-05, 0.057879798114299774, 0.004244217649102211, 0.00609983503818512, 0.28528884053230286, 0.19946889579296112, 0.004915847908705473, 0.0015343156410381198, 0.012221671640872955, 0.003153382334858179, 0.0001576353097334504, 0.0020530277397483587, 0.003957398701459169, 0.010446527041494846, 0.012547693215310574, 0.03473197668790817, 0.06650777161121368, 0.014228541404008865, 0.02601468935608864, 0.0018418998224660754, 0.08826413750648499, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.0335795059800148, 0.030716734007000923, 0.023829646408557892, 0.03415534272789955, 0.08875380456447601, 0.0019310596399009228, 0.017619425430893898, 0.012105603702366352, 0.002468202030286193, 0.010380377061665058, 0.01267782598733902, 0.10606792569160461, 0.0014069904573261738, 0.0004161447286605835, 0.19442977011203766, 0.14040440320968628, 0.29221969842910767, 0.09665771573781967, 0.2947876751422882, 0.00611721258610487, 0.012681002728641033, 0.7610099911689758, 0.27993685007095337, 0.19895455241203308, 0.07963719218969345, 0.025141140446066856, 0.30299919843673706, 0.4374280273914337, 0.12315846234560013, 0.011889583431184292, 0.00027308438438922167, 0.03226177766919136, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.17404082417488098, 0.05758971348404884, 0.12847737967967987, 0.07598815858364105, 0.49957963824272156, 0.003085564589127898, 0.05114232748746872, 0.011464038863778114, 0.06926580518484116, 0.06844814121723175, 0.06813240051269531, 0.08604259043931961, 0.004740274045616388, 0.009239559061825275, 0.19994765520095825, 0.22362156212329865, 0.19648011028766632, 0.02122899703681469, 0.12822405993938446, 0.013841216452419758, 0.009505078196525574, 0.4746513366699219, 0.1753886640071869, 0.09167484194040298, 0.038334570825099945, 0.04122844338417053, 0.14653263986110687, 0.17874038219451904, 0.023550381883978844, 0.014212163165211678, 0.001423373818397522, 0.0059451088309288025, 0.09707646816968918, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.011875619180500507, 0.026503771543502808, 0.054018229246139526, 0.01668175496160984, 0.3499281406402588, 0.01803278550505638, 0.01878167688846588, 0.01221490278840065, 0.15005004405975342, 0.0046301730908453465, 0.005843435879796743, 0.032064031809568405, 0.010490885935723782, 0.00555034726858139, 0.27147379517555237, 0.167328879237175, 0.06208498775959015, 0.010482249781489372, 0.03574186563491821, 0.0675959512591362, 0.06477286666631699, 0.04995346441864967, 0.05412250757217407, 0.009984727017581463, 0.03347667679190636, 0.11074735969305038, 0.16135196387767792, 0.07774785906076431, 0.01735900156199932, 0.007863441482186317, 0.019525114446878433, 0.005842071026563644, 0.1275986284017563, 0.0955328494310379, NaN, NaN, NaN, NaN, NaN, NaN], [0.0646943747997284, 0.047236885875463486, 0.11903148144483566, 0.02203843556344509, 0.4764179587364197, 0.008550588972866535, 0.013687309809029102, 0.008890991099178791, 0.32491248846054077, 0.011557912454009056, 0.009869826957583427, 0.0921611338853836, 0.0031256151851266623, 0.016340140253305435, 0.3438139855861664, 0.05032582953572273, 0.03989394009113312, 0.02223959006369114, 0.07248460501432419, 0.04305185005068779, 0.04872481897473335, 0.09144517779350281, 0.0032577940728515387, 0.000561918190214783, 0.015125684440135956, 0.018474824726581573, 0.0519116036593914, 0.7149417400360107, 0.023930398747324944, 0.005549557972699404, 0.0027118371799588203, 0.08418004959821701, 0.22684048116207123, 0.052481237798929214, 0.7548789381980896, NaN, NaN, NaN, NaN, NaN], [0.17560914158821106, 0.007353567518293858, 0.056802812963724136, 0.032415200024843216, 0.4015137553215027, 0.02137722261250019, 0.35710790753364563, 0.018633568659424782, 0.05862341821193695, 0.02506905421614647, 0.018169963732361794, 0.009134531952440739, 0.07779684662818909, 0.07867905497550964, 0.1750962883234024, 0.14971917867660522, 0.12296220660209656, 0.03256092593073845, 0.015910452231764793, 0.08324312418699265, 0.010959222912788391, 0.03249981626868248, 0.2630986273288727, 0.0023772413842380047, 0.021863164380192757, 0.014683729968965054, 0.3797665238380432, 0.26638853549957275, 0.6724205613136292, 0.015757206827402115, 0.01569446735084057, 0.01732691004872322, 0.06738004088401794, 0.17602917551994324, 0.12501026690006256, 0.6636221408843994, NaN, NaN, NaN, NaN], [0.05210466682910919, 0.006375414319336414, 0.22638031840324402, 0.012961659580469131, 0.3225522041320801, 0.012402641586959362, 0.024030247703194618, 0.056293144822120667, 0.11919546872377396, 0.0012290689628571272, 0.027758106589317322, 0.025181178003549576, 0.00022994892788119614, 0.012616506777703762, 0.1375768631696701, 0.0045495470985770226, 0.007598123978823423, 0.48235079646110535, 0.017675379291176796, 0.30638325214385986, 0.03773635998368263, 0.0025513810105621815, 0.013349749147891998, 0.011474208906292915, 0.002688285429030657, 0.009704438969492912, 0.024301802739501, 0.030528949573636055, 0.006023744586855173, 0.9289764761924744, 0.008095184341073036, 0.015121471136808395, 0.003912394400686026, 0.005678378511220217, 0.005922055337578058, 0.0012866485631093383, 0.9431078433990479, NaN, NaN, NaN], [0.005459210369735956, 0.03143180534243584, 0.0014205367770045996, 0.0012642937945201993, 0.01687682792544365, 0.007108580321073532, 0.004234722815454006, 0.017920657992362976, 0.003724986221641302, 0.0002761750074569136, 2.4563792976550758e-05, 0.011889445595443249, 0.0013067404506728053, 0.002636768389493227, 0.19040453433990479, 0.25144028663635254, 0.013477480970323086, 0.004043558146804571, 0.02197866141796112, 0.005731666926294565, 0.00035365403164178133, 0.0028230457101017237, 0.003569219959899783, 0.00616231607273221, 0.023324957117438316, 0.07691453397274017, 0.11847300082445145, 0.025281671434640884, 0.05239935964345932, 0.002384425140917301, 0.16120819747447968, 0.011955172754824162, 0.09212952852249146, 0.03993848338723183, 0.017148757353425026, 0.01459744293242693, 0.0018050760263577104, 0.08139479160308838, NaN, NaN], [0.031027475371956825, 0.05656901001930237, 0.0113890515640378, 0.024300340563058853, 0.03550150617957115, 0.0024159413296729326, 0.02035972848534584, 0.01581081561744213, 0.002032301388680935, 0.009238713420927525, 0.01651322841644287, 0.11367840319871902, 0.003108791308477521, 0.00086622079834342, 0.16520220041275024, 0.08713241666555405, 0.22884246706962585, 0.12139283120632172, 0.21789073944091797, 0.00419022049754858, 0.011025986634194851, 0.8093750476837158, 0.24520863592624664, 0.11868450790643692, 0.037659380584955215, 0.014297883957624435, 0.35379931330680847, 0.4382935166358948, 0.17632676661014557, 0.006937071681022644, 0.0007303177262656391, 0.027538392692804337, 0.0690605565905571, 0.3237524628639221, 0.41753751039505005, 0.09520361572504044, 0.013310365378856659, 0.0003602981742005795, 0.032565031200647354, NaN], [0.7154905796051025, 0.15825338661670685, 0.49722805619239807, 0.38231807947158813, 0.39668020606040955, 0.051081933081150055, 0.4188354015350342, 0.3623049259185791, 0.3077245056629181, 0.4494604766368866, 0.7933229804039001, 0.20231026411056519, 0.27286192774772644, 0.2623305022716522, 0.06808917224407196, 0.01268855668604374, 0.009620537050068378, 0.0011078648967668414, 0.01395372860133648, 0.00034480926115065813, 0.0002369812864344567, 0.14032205939292908, 0.12187758088111877, 0.004498081747442484, 6.632315489696339e-05, 0.01873306930065155, 0.07693066447973251, 0.06357964873313904, 0.012718681246042252, 0.02489433065056801, 0.4312428832054138, 0.013737366534769535, 0.0326746366918087, 0.34456172585487366, 0.0668448805809021, 0.006646350026130676, 0.04233057424426079, 0.4123155176639557, 0.007851892150938511, 0.43338367342948914]], [[4.754594192490913e-05, 2.1380438752771624e-08, 2.918067565360616e-08, 2.8621201408896013e-08, 2.499384379461844e-07, 0.0002631827082950622, 5.21495513439163e-10, 2.490414274802788e-08, 1.4592379216082918e-07, 4.660217989282955e-09, 1.3478041793746343e-08, 1.530838318331007e-07, 4.6195887989597395e-05, 8.429636181972455e-06, 0.2157532423734665, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.6645432114601135, 0.00044607618474401534, 8.70102576300269e-06, 1.056492124007491e-06, 4.43653931370136e-07, 3.5252294310339494e-06, 0.013106754049658775, 0.0008970960625447333, 5.719662112824153e-07, 3.2791810156140855e-08, 1.0544068729245737e-08, 3.57371057191358e-08, 0.00012361648259684443, 0.0008665899513289332, 0.00011794524471042678, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [5.6636022236489225e-06, 0.771808385848999, 0.2603715658187866, 7.618767995154485e-05, 2.6443340175319463e-05, 1.448297037853763e-08, 1.7459943213449236e-10, 0.0005545829189941287, 1.3129211993145873e-06, 0.0003596498572733253, 1.3187416243454209e-06, 1.2532552773336647e-08, 5.7067543821176514e-05, 1.4676837054139469e-05, 8.822963764032465e-07, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [7.866851170490463e-09, 0.0015575109282508492, 0.5911858677864075, 0.005255529191344976, 0.00012560673349071294, 1.2381517144888221e-08, 1.3975322635251253e-12, 4.631081083061872e-06, 1.8297629367225454e-06, 0.043241821229457855, 0.00025465109501965344, 1.6550380621538352e-07, 1.5873881693551084e-06, 1.3629888329091955e-08, 2.2046858560997862e-08, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [1.6020940130090366e-10, 3.2446525892737554e-06, 0.1964423805475235, 0.9067507982254028, 4.244087540428154e-05, 3.027215825568419e-05, 6.154020626425449e-10, 3.570748958736658e-07, 2.493328743469192e-08, 1.327106815551815e-07, 5.116170723340474e-05, 7.67620722541551e-09, 6.538175512105227e-07, 1.6885725528936746e-07, 1.9495971503857845e-09, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [4.057985947270026e-09, 1.6926858803500977e-09, 0.00014235911658033729, 0.0026504932902753353, 0.8634750843048096, 1.9555229300749488e-05, 1.294085109293519e-06, 2.6649362894204387e-07, 3.0507638082433175e-10, 5.069419550807197e-09, 1.108148239836737e-07, 1.7377595213474706e-05, 9.726352800498717e-06, 1.823265733946755e-06, 5.869507617717318e-07, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [1.9094309466893833e-12, 2.4682887027685507e-13, 6.382604444965523e-10, 6.302604549368596e-10, 1.4692274817207363e-05, 0.3734012544155121, 3.483030241113738e-06, 1.1820202594492457e-08, 1.9522692351614523e-09, 1.394072303342181e-13, 1.7670450172535546e-11, 1.716609077107023e-09, 3.7749509829154704e-06, 2.593782255644328e-06, 3.855710133393586e-07, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [8.508453674949124e-08, 1.863478038544031e-09, 1.257351167627263e-10, 5.331373190142763e-11, 3.337832410466035e-08, 1.777973557182122e-05, 0.8244234323501587, 8.755041926633567e-05, 1.7572835409040977e-09, 1.3142270258170718e-11, 7.735358035533546e-13, 4.927841815161038e-11, 5.296478775562719e-07, 0.000259329448454082, 1.8429471282388477e-08, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [1.2582735964272729e-09, 2.3675827378610848e-06, 5.770066309196409e-07, 5.0431950282536775e-11, 2.6034334410507398e-11, 1.7287857190240175e-07, 9.084228622668888e-06, 0.8877476453781128, 0.0008898449596017599, 7.2106473680833e-08, 1.9634756043274137e-08, 4.930736808433922e-13, 3.217972377456135e-08, 1.2906410120194778e-05, 9.568290160189008e-09, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [2.8039692789860737e-09, 1.3000158105569426e-06, 4.493769978353157e-08, 2.493898698663344e-10, 7.932443764346875e-12, 1.7288407150317653e-08, 2.642636942606913e-10, 3.576151357265189e-05, 0.8324669599533081, 5.240505197434686e-05, 8.11301958947297e-07, 9.422521651814009e-10, 4.6924657937097436e-08, 2.8963553333483105e-08, 6.33739318800508e-08, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [2.873091320410026e-09, 7.32139524188824e-05, 1.393846559949452e-05, 2.2707215663331226e-08, 3.602095333121724e-08, 7.893682235637911e-12, 1.2799745258921386e-13, 1.2971109697446082e-07, 4.534097752184607e-05, 0.7187873721122742, 0.0028858170844614506, 4.860597982769832e-06, 3.316463335067965e-06, 6.64895694058032e-08, 4.189383506769673e-09, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [3.5802516507033033e-10, 3.3775189312024168e-09, 1.689890041234321e-06, 2.72409181434341e-07, 2.3650377656281307e-08, 3.1582386705863996e-10, 4.773196676235644e-14, 6.179980832632381e-11, 1.0790042637154329e-07, 0.00019566719129215926, 0.8666706681251526, 0.00033315850305370986, 7.101260734998505e-07, 3.226231015673875e-08, 6.780910499770698e-09, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [7.800644574729176e-09, 1.700809604265885e-09, 9.215954577257435e-08, 4.046364665555302e-07, 0.00011374137102393433, 5.132134901941754e-06, 5.991689921991394e-10, 9.107053305923429e-11, 5.105777606262407e-11, 3.3974476565390432e-09, 3.904122058884241e-05, 0.65162193775177, 0.00035754009149968624, 6.446759653044865e-05, 8.575011065659055e-07, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [5.410449865905775e-10, 1.9016622998524468e-10, 1.651180719930423e-10, 9.184660809680167e-10, 4.749936000081334e-09, 6.8993631430203095e-06, 9.186856830822876e-10, 1.2120262259107673e-11, 1.0679299241797557e-12, 7.136916383397585e-13, 1.9098522763272285e-10, 9.612936082703527e-06, 0.7662882208824158, 0.00778515450656414, 3.0943773765557125e-08, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0058370670303702354, 0.00017831011791713536, 6.727457275701454e-06, 4.542615897662472e-06, 0.0008248149533756077, 0.04996809363365173, 0.010534689761698246, 8.931134652812034e-05, 2.4081384708551923e-07, 6.080232139993313e-08, 3.077615701840841e-06, 0.00041306819184683263, 0.062034472823143005, 0.37576472759246826, 0.1323644071817398, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.437301367521286, 0.15179137885570526, 0.09085877984762192, 0.06997784972190857, 0.17732757329940796, 0.23180970549583435, 0.11514479666948318, 0.32073739171028137, 0.15501314401626587, 0.1294255405664444, 0.06762269139289856, 0.21488851308822632, 0.2614101469516754, 0.12734454870224, 0.049641113728284836, 0.125, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.028495818376541138, 0.1544514149427414, 0.06366834789514542, 0.016971074044704437, 0.02302762120962143, 0.054101087152957916, 0.012630121782422066, 0.018889501690864563, 0.004939573351293802, 0.01251249760389328, 0.1164683923125267, 0.009905983693897724, 0.01818472519516945, 0.01017050538212061, 0.04256897792220116, 0.13150663673877716, 0.013105388730764389, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.007633751258254051, 0.002589557319879532, 0.02251260355114937, 0.05040144920349121, 0.032673582434654236, 0.0022981506772339344, 0.00627527991309762, 0.0006094649434089661, 0.01362280547618866, 0.006205975078046322, 0.006417383905500174, 0.0010467394022271037, 0.0010408272501081228, 0.007578521966934204, 0.13823428750038147, 0.16704899072647095, 0.0014066778821870685, 0.003860085504129529, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.0074798669666051865, 0.011802621185779572, 0.3115181624889374, 0.22458955645561218, 0.10706131160259247, 0.016402821987867355, 0.046956516802310944, 0.004200803115963936, 0.01468481682240963, 0.014471452683210373, 0.27619558572769165, 0.0038709931541234255, 0.00034889893140643835, 0.0020716534927487373, 0.01783183217048645, 0.14769184589385986, 0.005059333052486181, 0.0053715878166258335, 0.026609797030687332, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.015254770405590534, 0.01172303594648838, 0.002065492793917656, 0.005149758420884609, 0.013159574940800667, 0.001197350095026195, 0.018971139565110207, 0.004385960288345814, 0.06813318282365799, 0.021520443260669708, 0.005575989838689566, 0.001505104242824018, 0.0019181625684723258, 0.005167691968381405, 0.15193934738636017, 0.15381431579589844, 0.05056624114513397, 0.015615872107446194, 0.004382571205496788, 0.00015187788812909275, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.026872141286730766, 0.003412047168239951, 0.03895608335733414, 0.03612855076789856, 0.02536499686539173, 0.03102046251296997, 0.004315483849495649, 0.0027427596505731344, 0.03512648865580559, 0.022632958367466927, 0.05171700567007065, 0.0026941397227346897, 0.0031264815479516983, 0.024213580414652824, 0.12838274240493774, 0.16606314480304718, 0.03878505155444145, 0.01631396822631359, 0.011268166825175285, 0.00036908386391587555, 0.00010962320084217936, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.0600903183221817, 0.002928798785433173, 0.0064612883143126965, 0.05414368212223053, 0.029363246634602547, 0.006244697142392397, 0.397325724363327, 0.040878646075725555, 0.005305922590196133, 0.27715954184532166, 0.04618077725172043, 0.008418801240622997, 0.01155431941151619, 0.05281350389122963, 0.025860372930765152, 0.16556474566459656, 0.059035927057266235, 0.018687130883336067, 0.020593103021383286, 0.0006985706277191639, 0.0006753651541657746, 0.01174053642898798, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.0013151391176506877, 0.002262294292449951, 0.0012738551013171673, 0.0034272209741175175, 0.0030726443510502577, 0.04279911145567894, 0.008567760698497295, 0.17885291576385498, 0.00929640606045723, 0.001624501310288906, 0.02533317357301712, 0.005113683640956879, 0.027247918769717216, 0.07258909195661545, 0.014188846573233604, 0.16100119054317474, 0.03705580160021782, 0.08672276139259338, 0.05696912482380867, 0.00507472176104784, 0.006951047107577324, 0.0023692583199590445, 0.004235508386045694, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.3408622145652771, 0.07445694506168365, 0.03113507851958275, 0.0754152163863182, 0.014415460638701916, 0.002693483140319586, 0.09953030943870544, 0.11086118221282959, 0.5124953985214233, 0.329039990901947, 0.5092117786407471, 0.027396254241466522, 0.055544231086969376, 0.4057520925998688, 0.09588415175676346, 0.288095086812973, 0.011840847320854664, 0.005622565280646086, 0.00535928551107645, 0.0008760345517657697, 0.0004899614141322672, 0.001179057639092207, 0.0010409504175186157, 0.0012723063118755817, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.09238530695438385, 0.007053247652947903, 0.0017291916301473975, 0.005093103274703026, 0.0007437380263581872, 0.0014228186337277293, 0.02520381473004818, 0.019087698310613632, 0.47848576307296753, 0.29748132824897766, 0.057576071470975876, 0.01139640249311924, 0.004621520172804594, 0.02937469258904457, 0.015335291624069214, 0.2984195351600647, 0.024577315896749496, 0.008883590810000896, 0.0237559974193573, 0.001871026586741209, 0.002048116410151124, 0.00452006608247757, 0.0067189703695476055, 0.002311990363523364, 0.0035932722967118025, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.0720675140619278, 0.012255199253559113, 0.04221949726343155, 0.09128241240978241, 0.009349699132144451, 0.008273615501821041, 0.014371694065630436, 0.01100369542837143, 0.1737149953842163, 0.16746114194393158, 0.1696900725364685, 0.014558696188032627, 0.01365632750093937, 0.0269284937530756, 0.016150163486599922, 0.19755195081233978, 0.08605571836233139, 0.04371126368641853, 0.045333728194236755, 0.005393510684370995, 0.006479238625615835, 0.018500106409192085, 0.012994848191738129, 0.011254888959228992, 0.03004884347319603, 0.011813223361968994, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.052127860486507416, 0.0038822691421955824, 0.01307338010519743, 0.12611117959022522, 0.013002983294427395, 0.054914653301239014, 0.022843925282359123, 0.0017219025176018476, 0.025739489123225212, 0.3090609014034271, 0.10414470732212067, 0.006550551857799292, 0.006861968897283077, 0.010005415417253971, 0.011784915812313557, 0.05165635421872139, 0.44527125358581543, 0.31059694290161133, 0.6649516224861145, 0.027770839631557465, 0.02873762883245945, 0.17512862384319305, 0.06940869987010956, 0.1633579134941101, 0.028000785037875175, 0.003091411432251334, 0.016245586797595024, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.074305959045887, 0.010457544587552547, 0.07050318270921707, 0.4022633135318756, 0.04945780336856842, 0.04771194979548454, 0.4660364091396332, 0.07594453543424606, 0.018491366878151894, 0.1513216346502304, 0.09796185791492462, 0.23858080804347992, 0.011272062547504902, 0.09385059028863907, 0.06640274822711945, 0.19151811301708221, 0.1383962333202362, 0.13229386508464813, 0.35712042450904846, 0.18756243586540222, 0.2871147096157074, 0.5138459801673889, 0.22405852377414703, 0.28785935044288635, 0.04021993279457092, 0.0012617700267583132, 0.004019713494926691, 0.003964945673942566, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.025815313681960106, 0.0033349080476909876, 0.00924734864383936, 0.012487816624343395, 0.03726305067539215, 0.016575457528233528, 0.23753590881824493, 0.025156090036034584, 0.11919926106929779, 0.04390435293316841, 0.0095932362601161, 0.04137176275253296, 0.08216788619756699, 0.1757660061120987, 0.30195334553718567, 0.24189773201942444, 0.08955204486846924, 0.32067012786865234, 0.20245005190372467, 0.11740265786647797, 0.08460556715726852, 0.044664137065410614, 0.025831788778305054, 0.07413194328546524, 0.0068964180536568165, 0.002961511956527829, 0.005619046278297901, 0.0014741680352017283, 0.00546230049803853, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.05659867450594902, 0.020075146108865738, 0.01205957867205143, 0.004331792704761028, 0.052221644669771194, 0.0230423454195261, 0.0683140978217125, 0.09752152115106583, 0.2100839763879776, 0.0003861601871903986, 0.0032946986611932516, 0.0004593236662913114, 5.027504084864631e-05, 0.0022022551856935024, 0.14128009974956512, 0.1724659651517868, 0.13219435513019562, 0.15014058351516724, 0.12075512856245041, 0.0006761215627193451, 0.10174072533845901, 0.19516822695732117, 0.009559075348079205, 0.057678524404764175, 0.08239483833312988, 0.0039215064607560635, 0.0027616096194833517, 0.013109313324093819, 0.002305442001670599, 0.00021083203318994492, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.08638240396976471, 0.0710444375872612, 0.06771891564130783, 0.17398057878017426, 0.05179189518094063, 0.34193578362464905, 0.2095513492822647, 0.09331211447715759, 0.052257001399993896, 0.006232596468180418, 0.002646914916113019, 0.06318453699350357, 0.019070196896791458, 0.02972061187028885, 0.2659039795398712, 0.19843007624149323, 0.15979865193367004, 0.14398488402366638, 0.41609427332878113, 0.010126790963113308, 0.04840107262134552, 0.7232485413551331, 0.22829605638980865, 0.34322667121887207, 0.08224418759346008, 0.03167981281876564, 0.020198417827486992, 0.013381149619817734, 0.0009459191933274269, 0.006438484415411949, 0.008794432505965233, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.26895081996917725, 0.1478959172964096, 0.3258365988731384, 0.404258131980896, 0.3733697533607483, 0.19055484235286713, 0.19857566058635712, 0.01781378500163555, 0.07512970268726349, 0.11693259328603745, 0.1175057590007782, 0.24425068497657776, 0.20241285860538483, 0.2411348670721054, 0.06638508290052414, 0.30347728729248047, 0.04726674035191536, 0.010849116370081902, 0.12094812840223312, 0.0013257962418720126, 0.0025908409152179956, 0.0014983253786340356, 0.03437754884362221, 0.009621781297028065, 0.006184253375977278, 0.00671237800270319, 0.0018636187305673957, 0.01123903226107359, 0.0035993149504065514, 0.0012990115210413933, 0.00021464838937390596, 0.001025065197609365, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.17850612103939056, 0.12822727859020233, 0.17801056802272797, 0.28459492325782776, 0.058830633759498596, 0.03884930908679962, 0.3513718843460083, 0.061017971485853195, 0.06718380004167557, 0.071348175406456, 0.23821549117565155, 0.03658399358391762, 0.03897847980260849, 0.20709341764450073, 0.13892877101898193, 0.2792417109012604, 0.26782968640327454, 0.03489779308438301, 0.07551994919776917, 0.018111348152160645, 0.04002813994884491, 0.03850500285625458, 0.11152958869934082, 0.21995633840560913, 0.07949108630418777, 0.0037619988434016705, 0.03436713665723801, 0.020695386454463005, 0.017524488270282745, 0.010141805745661259, 0.003556826151907444, 0.0020958345849066973, 0.0058519174344837666, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.4637373983860016, 0.04377487301826477, 0.15646661818027496, 0.36986854672431946, 0.09056738018989563, 0.23626187443733215, 0.11398540437221527, 0.0026716177817434072, 0.006399102043360472, 0.2626173198223114, 0.20860937237739563, 0.01349638868123293, 0.014208723790943623, 0.042171213775873184, 0.08208009600639343, 0.05386974662542343, 0.6086578965187073, 0.22683310508728027, 0.5828835964202881, 0.02668178826570511, 0.03663201630115509, 0.14977867901325226, 0.2173178791999817, 0.2744499444961548, 0.08338183909654617, 0.008825525641441345, 0.06588608771562576, 0.5592238306999207, 0.17532478272914886, 0.006846817210316658, 0.028904464095830917, 0.01721598580479622, 0.006393561605364084, 0.010461881756782532, NaN, NaN, NaN, NaN, NaN, NaN], [0.13806220889091492, 0.04062362387776375, 0.09515099227428436, 0.37904345989227295, 0.10653041303157806, 0.052835192531347275, 0.5728973150253296, 0.03487204387784004, 0.0029783223289996386, 0.07966885715723038, 0.03475099802017212, 0.13843636214733124, 0.006917618680745363, 0.06183210015296936, 0.1688811033964157, 0.24167264997959137, 0.2504684031009674, 0.15247754752635956, 0.4417489171028137, 0.37691444158554077, 0.47509273886680603, 0.6227271556854248, 0.6949021220207214, 0.5199849605560303, 0.14203055202960968, 0.006932773161679506, 0.02713918127119541, 0.026524275541305542, 0.28478434681892395, 0.05304509028792381, 0.03063105419278145, 0.007391192018985748, 0.001299944007769227, 0.0022179351653903723, 0.0017378581687808037, NaN, NaN, NaN, NaN, NaN], [0.02612869068980217, 0.003477374091744423, 0.007765303365886211, 0.0023155075032263994, 0.018893033266067505, 0.022398637607693672, 0.09549611806869507, 0.004012360703200102, 0.0013466936070472002, 0.0021441734861582518, 0.0004924506065435708, 0.006835760548710823, 0.011635211296379566, 0.023846328258514404, 0.22376547753810883, 0.3587647080421448, 0.13152657449245453, 0.3170546591281891, 0.1872878074645996, 0.17338471114635468, 0.16099165380001068, 0.050314128398895264, 0.07316549867391586, 0.1506616473197937, 0.027928102761507034, 0.013985591009259224, 0.03077181987464428, 0.00928373821079731, 0.01458327379077673, 0.34401679039001465, 0.1675042062997818, 0.008024912327528, 0.00340651860460639, 0.001158604514785111, 0.0004595925274770707, 0.0022153020836412907, NaN, NaN, NaN, NaN], [0.08347997069358826, 0.014491320587694645, 0.015744350850582123, 0.0043899440206587315, 0.05038629099726677, 0.008546282537281513, 0.06458569318056107, 0.03869106248021126, 0.0615551732480526, 0.0002168803766835481, 0.0014501431724056602, 0.00013847390073351562, 1.5032101146061905e-05, 0.0007368824444711208, 0.13783538341522217, 0.18021628260612488, 0.21554027497768402, 0.22428971529006958, 0.28362634778022766, 0.0019759181886911392, 0.19364571571350098, 0.3129161596298218, 0.05571373924612999, 0.43670228123664856, 0.5364305973052979, 0.045233964920043945, 0.02291695959866047, 0.15668357908725739, 0.03788933902978897, 0.0009749932214617729, 0.15011590719223022, 0.009233620017766953, 0.023490505293011665, 0.0018092861864715815, 0.01433361042290926, 0.002351803006604314, 0.00025271173217333853, NaN, NaN, NaN], [0.072405144572258, 0.036094967275857925, 0.060353852808475494, 0.1382489949464798, 0.03810955956578255, 0.1803218573331833, 0.3716851472854614, 0.04992733895778656, 0.002898369450122118, 0.0008571037324145436, 0.00035707451752386987, 0.02692999318242073, 0.003073085332289338, 0.009645520709455013, 0.17640869319438934, 0.18984580039978027, 0.30305740237236023, 0.22004783153533936, 0.5488721132278442, 0.023633448407053947, 0.10360189527273178, 0.8517335653305054, 0.6748489141464233, 0.77315753698349, 0.4876308739185333, 0.2048063576221466, 0.14540305733680725, 0.08473058044910431, 0.012403973378241062, 0.06795734912157059, 0.17164894938468933, 0.18992502987384796, 0.12247806042432785, 0.011528578586876392, 0.009636401198804379, 0.0008312705904245377, 0.013430905528366566, 0.011612125672399998, NaN, NaN], [0.30767515301704407, 0.17313888669013977, 0.17682777345180511, 0.3453424274921417, 0.2732711434364319, 0.18888972699642181, 0.2821650207042694, 0.011036374606192112, 0.013345124199986458, 0.030917862430214882, 0.037141598761081696, 0.14430613815784454, 0.09504004567861557, 0.16429893672466278, 0.0962204858660698, 0.3384567201137543, 0.062264904379844666, 0.014819102361798286, 0.14853152632713318, 0.0019540644716471434, 0.003596463706344366, 0.001872691442258656, 0.11878995597362518, 0.02639206312596798, 0.009769541211426258, 0.011811794713139534, 0.006684192456305027, 0.045877717435359955, 0.019279729574918747, 0.005480214022099972, 0.003932234365493059, 0.006437724456191063, 0.0240105502307415, 0.0011211916571483016, 0.004233745392411947, 0.001469226786866784, 0.0013713098596781492, 0.00014342667418532073, 0.0008160521974787116, NaN], [0.038221023976802826, 0.4632723033428192, 0.022520000115036964, 0.005303966347128153, 0.07163825631141663, 0.030774233862757683, 0.006099082063883543, 0.008936556056141853, 0.02098681591451168, 0.004558844491839409, 0.0029896388296037912, 0.018592750653624535, 0.20478543639183044, 0.08578886091709137, 0.1358346790075302, 0.1837155818939209, 0.5941455364227295, 0.2251758873462677, 0.3662757873535156, 0.039659783244132996, 0.3226933479309082, 0.014135366305708885, 0.028798755258321762, 0.10863638669252396, 0.34925851225852966, 0.03930900990962982, 0.08864527195692062, 0.10118203610181808, 0.05801505595445633, 0.11320658773183823, 0.05595846846699715, 0.0026757779996842146, 0.007132661063224077, 0.010286321863532066, 0.015962811186909676, 0.004528969060629606, 0.01888921484351158, 0.004036444239318371, 0.00027040645363740623, 0.0002387895801803097]], [[0.278582364320755, 0.012074317783117294, 0.4035726487636566, 0.05818924307823181, 0.5308449864387512, 0.7759386301040649, 0.6032847166061401, 0.04120228812098503, 0.6623223423957825, 0.4034832715988159, 0.2541539669036865, 0.023309720680117607, 0.054716046899557114, 0.3570294678211212, 0.004749305546283722, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.03977029398083687, 0.025161603465676308, 0.4579423666000366, 0.3708552420139313, 0.767479419708252, 0.5835962295532227, 0.5609359741210938, 0.14304085075855255, 0.8166816234588623, 0.848468542098999, 0.5771627426147461, 0.07112090289592743, 0.12416274100542068, 0.618628740310669, 0.06885465234518051, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.004083612468093634, 0.0006101519684307277, 0.12011494487524033, 0.04229450225830078, 0.17203551530838013, 0.013333754613995552, 0.01874622330069542, 0.021773431450128555, 0.8914079666137695, 0.25239333510398865, 0.2674473226070404, 0.0986163467168808, 0.10968483239412308, 0.05420238524675369, 0.020816486328840256, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.00974054355174303, 0.009372939355671406, 0.016473596915602684, 0.12944141030311584, 0.06805374473333359, 0.019993484020233154, 0.038472987711429596, 0.21791628003120422, 0.8550615310668945, 0.2646826505661011, 0.7350810766220093, 0.17277619242668152, 0.36265626549720764, 0.3741258382797241, 0.06228891760110855, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0007183643756434321, 0.0016902177594602108, 0.0015671673463657498, 0.000663107552099973, 0.015286565758287907, 0.000776923552621156, 0.007700319401919842, 0.11482121050357819, 0.7658083438873291, 0.5443719625473022, 0.22170989215373993, 0.027013972401618958, 0.025342080742120743, 0.049981117248535156, 0.0074298488907516, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.011776593513786793, 0.00668947771191597, 0.05204532667994499, 0.026732588186860085, 0.007738037500530481, 0.19347773492336273, 0.08661007881164551, 0.02065080776810646, 0.8265263438224792, 0.77967369556427, 0.8155033588409424, 0.7568296194076538, 0.6889008283615112, 0.7797287106513977, 0.04647013917565346, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.03701920434832573, 0.011276619508862495, 0.026248518377542496, 0.01771446317434311, 0.046063318848609924, 0.020064320415258408, 0.23005641996860504, 0.032302577048540115, 0.6365551948547363, 0.6746889352798462, 0.6497765183448792, 0.5260909199714661, 0.6955898404121399, 0.8770567178726196, 0.04424796253442764, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.3583561182022095, 0.034818924963474274, 0.1010005921125412, 0.08171684294939041, 0.0902533084154129, 0.0273053590208292, 0.029195906594395638, 0.10516665875911713, 0.5163984894752502, 0.7107389569282532, 0.5390304327011108, 0.6552954316139221, 0.648922324180603, 0.8148984909057617, 0.13771982491016388, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.04790134355425835, 0.016352321952581406, 0.004838719964027405, 0.039540428668260574, 0.004614146891981363, 0.10033231228590012, 0.05411757901310921, 0.012187371961772442, 0.25466611981391907, 0.4822390675544739, 0.22996564209461212, 0.2013523131608963, 0.3018202781677246, 0.325538694858551, 0.10763657093048096, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.18817435204982758, 0.007200991734862328, 0.0915139690041542, 0.00800582580268383, 0.007660675328224897, 0.27090781927108765, 0.08786749839782715, 0.014442713931202888, 0.017244037240743637, 0.8212726712226868, 0.22018176317214966, 0.05063365772366524, 0.16457810997962952, 0.059498634189367294, 0.11578860878944397, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.1423795521259308, 0.008703344501554966, 0.2208349108695984, 0.02527845837175846, 0.027401143684983253, 0.09980836510658264, 0.024800043553113937, 0.009310302324593067, 0.11915526539087296, 0.048824433237314224, 0.23738479614257812, 0.04641610383987427, 0.11649724096059799, 0.03864651918411255, 0.200869619846344, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.19247660040855408, 0.028833042830228806, 0.1872357279062271, 0.03232081979513168, 0.031028537079691887, 0.3644941747188568, 0.11239293217658997, 0.0803447812795639, 0.13423573970794678, 0.07468846440315247, 0.009079186245799065, 0.19545331597328186, 0.09625646471977234, 0.07526607811450958, 0.1802312582731247, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.1263553649187088, 0.009648445062339306, 0.47829046845436096, 0.22347994148731232, 0.2749265432357788, 0.23197446763515472, 0.05249631777405739, 0.01617230661213398, 0.3326357305049896, 0.1497221142053604, 0.04782721772789955, 0.011572148650884628, 0.1354474574327469, 0.0791783407330513, 0.15636207163333893, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.166306734085083, 0.04561271890997887, 0.48400574922561646, 0.31743937730789185, 0.4171416163444519, 0.1806352734565735, 0.04328177124261856, 0.022486848756670952, 0.1779668778181076, 0.03957689553499222, 0.009708160534501076, 0.01422630064189434, 0.013467496261000633, 0.06257133930921555, 0.22838094830513, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.39438390731811523, 0.20185884833335876, 0.19486168026924133, 0.053202297538518906, 0.29429352283477783, 0.31667405366897583, 0.3313867747783661, 0.37864530086517334, 0.4971301257610321, 0.178373321890831, 0.16689708828926086, 0.16029801964759827, 0.22925321757793427, 0.22496484220027924, 0.11296840012073517, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.04784957319498062, 0.004609245341271162, 0.006819143425673246, 0.0166594497859478, 0.006965316366404295, 0.000989345251582563, 0.006434451788663864, 0.005414100829511881, 0.027048002928495407, 0.008730669505894184, 0.003844247665256262, 0.0032386775128543377, 0.00916406698524952, 0.02474893629550934, 0.20862001180648804, 0.125, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.07474544644355774, 0.14463284611701965, 0.06348620355129242, 0.11649901419878006, 0.010943777859210968, 0.05790672451257706, 0.023460205644369125, 0.09132371097803116, 0.013804412446916103, 0.11923354864120483, 0.04609918221831322, 0.0031168698333203793, 0.02482042834162712, 0.018085025250911713, 0.06715727597475052, 0.12851747870445251, 0.06451001763343811, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.07159372419118881, 0.23599489033222198, 0.6269188523292542, 0.2670744061470032, 0.07840307801961899, 0.7659233808517456, 0.4897821247577667, 0.7919513583183289, 0.47275444865226746, 0.20698092877864838, 0.5493778586387634, 0.516223669052124, 0.5164197683334351, 0.6560667753219604, 0.10535097867250443, 0.16148854792118073, 0.04709945246577263, 0.0016553826862946153, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.030506769195199013, 0.030577607452869415, 0.37364113330841064, 0.17907775938510895, 0.011576596647500992, 0.0018289608415216208, 0.0013806972419843078, 0.0006740305689163506, 0.006688407156616449, 0.02554805763065815, 0.1984224021434784, 0.0020999175030738115, 0.0001219362675328739, 0.0009508132934570312, 0.00851912796497345, 0.12575848400592804, 0.13552792370319366, 0.1085570901632309, 0.11512085795402527, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.6425503492355347, 0.21330313384532928, 0.8213226199150085, 0.6104346513748169, 0.4307103455066681, 0.005470798350870609, 0.1284545361995697, 0.017213305458426476, 0.14068865776062012, 0.2507726550102234, 0.6069697737693787, 0.17266355454921722, 0.10257546603679657, 0.4255537688732147, 0.07138645648956299, 0.14333586394786835, 0.24668441712856293, 0.19262480735778809, 0.13920731842517853, 0.0020065978169441223, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.4833258390426636, 0.07765677571296692, 0.6261626482009888, 0.5845412611961365, 0.457427054643631, 0.012895571999251842, 0.037013884633779526, 0.0045295762829482555, 0.030468540266156197, 0.08583686500787735, 0.4300892949104309, 0.6064226627349854, 0.07339996099472046, 0.02218388393521309, 0.11548874527215958, 0.1578390896320343, 0.19358907639980316, 0.02251395769417286, 0.04702039062976837, 0.018520673736929893, 0.0005939522525295615, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.47047996520996094, 0.06838852912187576, 0.42273014783859253, 0.6319702863693237, 0.4177776277065277, 0.0021309976000338793, 0.00800495408475399, 0.0009326375438831747, 0.00536699453368783, 0.07440605759620667, 0.2710660994052887, 0.5013447999954224, 0.021646764129400253, 0.07749785482883453, 0.039263706654310226, 0.14088943600654602, 0.05360155552625656, 0.043673839420080185, 0.0087194312363863, 0.14876413345336914, 0.3311525881290436, 0.029076436534523964, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.5323148965835571, 0.13256511092185974, 0.352451890707016, 0.6556484699249268, 0.4897412359714508, 0.22345507144927979, 0.17913641035556793, 0.12689323723316193, 0.025374194607138634, 0.169284388422966, 0.17072416841983795, 0.08815333992242813, 0.10821512341499329, 0.18704712390899658, 0.05398408696055412, 0.11886978894472122, 0.08032860606908798, 0.053777631372213364, 0.06359982490539551, 0.49348562955856323, 0.7690801620483398, 0.032007213681936264, 0.00921344943344593, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.14081209897994995, 0.02785991132259369, 0.37397870421409607, 0.3742114305496216, 0.4757237732410431, 0.0011322007048875093, 0.0019287536852061749, 0.00011125820310553536, 0.00032575102522969246, 0.0042410544119775295, 0.007025705184787512, 0.007957610301673412, 0.0022035131696611643, 0.0008391661685891449, 0.0013405061326920986, 0.013988303020596504, 0.031309448182582855, 0.021422432735562325, 0.015959911048412323, 0.13852538168430328, 0.7482463121414185, 0.1306946873664856, 0.0026366086676716805, 0.006285007111728191, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.17781563103199005, 0.10205524414777756, 0.04494810104370117, 0.011432765983045101, 0.0031803075689822435, 0.6873405575752258, 0.1935015618801117, 0.2538544535636902, 0.0006125010550022125, 0.0012519293231889606, 0.0009674279135651886, 0.0007319907890632749, 0.006560447160154581, 0.0005926102166995406, 0.045413821935653687, 0.02759428508579731, 0.1341203898191452, 0.1143924742937088, 0.04895513132214546, 0.2507959306240082, 0.47495928406715393, 0.24884849786758423, 0.04048554226756096, 0.06435439735651016, 0.02207104302942753, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.24551935493946075, 0.010881111957132816, 0.16116493940353394, 0.28567203879356384, 0.017490731552243233, 0.03198051080107689, 0.25225502252578735, 0.04009091481566429, 0.1379493623971939, 0.030329206958413124, 0.00725751556456089, 0.0005535308737307787, 0.0001769027003319934, 0.0002177381538785994, 0.11288075149059296, 0.08376637101173401, 0.08644555509090424, 0.08414626121520996, 0.08246676623821259, 0.09393073618412018, 0.2536129355430603, 0.09570588916540146, 0.057335685938596725, 0.27625876665115356, 0.23640654981136322, 0.22554923593997955, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.2663186192512512, 0.0841110497713089, 0.39283427596092224, 0.3631373345851898, 0.12446267902851105, 0.0023146900348365307, 0.05166012421250343, 0.025394057855010033, 0.09723125398159027, 0.2633029520511627, 0.09458169341087341, 0.0066002910025417805, 0.0024958536960184574, 0.0033851033076643944, 0.0521465502679348, 0.16592197120189667, 0.037314873188734055, 0.020350072532892227, 0.005164262373000383, 0.009123047813773155, 0.005826999898999929, 0.003451529424637556, 0.017567342147231102, 0.055315494537353516, 0.2317170798778534, 0.05933540314435959, 0.06010079011321068, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.032533496618270874, 0.005542360246181488, 0.14801643788814545, 0.028237437829375267, 0.09192534536123276, 0.002004631096497178, 0.0014868990983814, 0.0018816014053300023, 0.026168106123805046, 0.03666744753718376, 0.2621643543243408, 0.27366670966148376, 0.011460919864475727, 0.012693443335592747, 0.006134080700576305, 0.07053745537996292, 0.19491763412952423, 0.06705262511968613, 0.08265279233455658, 0.006405644118785858, 0.0031596925109624863, 0.005410268437117338, 0.030676638707518578, 0.08307406306266785, 0.20774710178375244, 0.4213918149471283, 0.23337899148464203, 0.08583765476942062, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.028670914471149445, 0.004855436272919178, 0.1069486141204834, 0.02764085866510868, 0.11977140605449677, 0.002686614403501153, 0.007388734724372625, 0.00704799173399806, 0.05677136406302452, 0.0688808336853981, 0.16234178841114044, 0.10548661649227142, 0.1935848444700241, 0.06036479026079178, 0.0025575226172804832, 0.13580749928951263, 0.17484943568706512, 0.09017936140298843, 0.11502011120319366, 0.015199831686913967, 0.008567527867853642, 0.04639086127281189, 0.16773870587348938, 0.16907723248004913, 0.43436557054519653, 0.2870768904685974, 0.10786425322294235, 0.08931463956832886, 0.011009148322045803, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.04708265885710716, 0.030478408560156822, 0.0932990089058876, 0.24881142377853394, 0.1139858141541481, 0.03301549330353737, 0.12353643029928207, 0.18121947348117828, 0.3742617964744568, 0.11242274194955826, 0.2673158049583435, 0.05749531090259552, 0.00021243211813271046, 0.005648713558912277, 0.14063234627246857, 0.1727631837129593, 0.039101891219615936, 0.0065339612774550915, 0.0278339721262455, 0.004674504045397043, 0.014613990671932697, 0.03457005321979523, 0.04850766807794571, 0.02412491664290428, 0.009369020350277424, 0.022906647995114326, 0.04899173229932785, 0.01023520715534687, 0.0022774694953113794, 7.664388976991177e-05, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.0034641579259186983, 0.015587975271046162, 0.04098831117153168, 0.025328122079372406, 0.012870541773736477, 0.002695741830393672, 0.0012444279855117202, 0.005834754556417465, 0.005115050356835127, 0.10742342472076416, 0.29450723528862, 0.004624508786946535, 0.028462348505854607, 0.09151851385831833, 0.02349407598376274, 0.08213489502668381, 0.3905046880245209, 0.07204636186361313, 0.08312273025512695, 0.02625700645148754, 0.02937941811978817, 0.04131421819329262, 0.05289716273546219, 0.16493423283100128, 0.290347158908844, 0.47713640332221985, 0.44352003931999207, 0.11574649810791016, 0.0847686156630516, 0.047198787331581116, 0.1300322264432907, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.00187075010035187, 0.017386021092534065, 0.0033179710153490305, 0.00216178921982646, 0.0006196821923367679, 0.0036519868299365044, 0.020315727218985558, 0.0735914558172226, 0.011879049241542816, 0.05418893322348595, 0.04255518689751625, 0.006776698864996433, 0.007105604745447636, 0.005562894977629185, 0.20312508940696716, 0.056048911064863205, 0.04177262261509895, 0.18134142458438873, 0.04556399583816528, 0.1435631662607193, 0.2900937497615814, 0.07549438625574112, 0.08105770498514175, 0.08377190679311752, 0.011481991037726402, 0.017289845272898674, 0.006863615941256285, 0.013694294728338718, 0.13657283782958984, 0.0735873132944107, 0.3659329116344452, 0.0919225886464119, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.018124327063560486, 0.011053304187953472, 0.041496749967336655, 0.08067373931407928, 0.008039752952754498, 0.27361106872558594, 0.12004023045301437, 0.14489491283893585, 0.05115145817399025, 0.09850911796092987, 0.102595254778862, 0.03553636744618416, 0.03690872713923454, 0.062350839376449585, 0.18180564045906067, 0.06230737641453743, 0.038521286100149155, 0.05914388969540596, 0.03398321941494942, 0.13657090067863464, 0.19265799224376678, 0.07424072921276093, 0.08660972863435745, 0.10718739032745361, 0.16533604264259338, 0.0767570361495018, 0.03204379230737686, 0.028188396245241165, 0.21943823993206024, 0.11997849494218826, 0.2698959410190582, 0.12308003753423691, 0.45223531126976013, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.12148405611515045, 0.0812632218003273, 0.2165963500738144, 0.1931358426809311, 0.08697410672903061, 0.006551810074597597, 0.06685828417539597, 0.03445844352245331, 0.0957593098282814, 0.40685340762138367, 0.14669549465179443, 0.05295614153146744, 0.013317806646227837, 0.016840115189552307, 0.07654187083244324, 0.18667352199554443, 0.0350969135761261, 0.030425790697336197, 0.0065561928786337376, 0.028277983888983727, 0.010725672356784344, 0.005219776649028063, 0.03378060460090637, 0.04241056367754936, 0.18939200043678284, 0.06338198482990265, 0.08136797696352005, 0.004227515775710344, 0.024540461599826813, 0.057830944657325745, 0.038525767624378204, 0.0177453625947237, 0.06933332234621048, 0.08866386860609055, NaN, NaN, NaN, NaN, NaN, NaN], [0.00987213384360075, 0.006524993572384119, 0.026135168969631195, 0.011839349754154682, 0.033334147185087204, 0.0041054473258554935, 0.0015945311170071363, 0.0032734640408307314, 0.04142798110842705, 0.08157128095626831, 0.26105597615242004, 0.34578391909599304, 0.018666768446564674, 0.02866668626666069, 0.00917118415236473, 0.04736897721886635, 0.0950922816991806, 0.05233628675341606, 0.0639958381652832, 0.009022187441587448, 0.002768130972981453, 0.005348078906536102, 0.016458049416542053, 0.03350484371185303, 0.1584910899400711, 0.3849281072616577, 0.30566492676734924, 0.08282434195280075, 0.02534077689051628, 0.01897522434592247, 0.013481524772942066, 0.08136109262704849, 0.25969398021698, 0.2513872981071472, 0.07361149042844772, NaN, NaN, NaN, NaN, NaN], [0.024172252044081688, 0.01827125810086727, 0.0764245018362999, 0.024589890614151955, 0.045055974274873734, 0.08366040140390396, 0.049236495047807693, 0.16330885887145996, 0.05235174670815468, 0.18916647136211395, 0.2596777379512787, 0.12284716963768005, 0.3776375353336334, 0.3416304290294647, 0.00993264652788639, 0.15279658138751984, 0.09928575158119202, 0.0573631152510643, 0.10790141671895981, 0.026906443759799004, 0.012519991025328636, 0.06774256378412247, 0.1448669582605362, 0.07826853543519974, 0.4991803467273712, 0.34429702162742615, 0.12145370990037918, 0.10719165205955505, 0.008088642731308937, 0.007662023417651653, 0.013441860675811768, 0.13362208008766174, 0.34251537919044495, 0.10342243313789368, 0.07045409828424454, 0.010391364805400372, NaN, NaN, NaN, NaN], [0.03498423844575882, 0.015507807955145836, 0.05400218814611435, 0.2035217136144638, 0.06879755109548569, 0.01839861460030079, 0.1265679895877838, 0.19229170680046082, 0.28682830929756165, 0.19846217334270477, 0.19391797482967377, 0.03128731623291969, 0.00016305393364746124, 0.003939830232411623, 0.1374405473470688, 0.1865139603614807, 0.02971193566918373, 0.005512321833521128, 0.039164237678050995, 0.007472363766282797, 0.012969624251127243, 0.03476016968488693, 0.0836154893040657, 0.050758667290210724, 0.017821883782744408, 0.08676476776599884, 0.13045690953731537, 0.03245873004198074, 0.009119128808379173, 7.800521416356787e-05, 0.0006276130443438888, 0.0024839011020958424, 0.06682475656270981, 0.06347990781068802, 0.009879485704004765, 0.0017003080574795604, 6.444661266868934e-05, NaN, NaN, NaN], [0.013754391111433506, 0.07632532715797424, 0.05588589236140251, 0.060033075511455536, 0.015113652683794498, 0.024528013542294502, 0.0056539555080235004, 0.025407979264855385, 0.0030256062746047974, 0.3076882064342499, 0.2846599221229553, 0.01613902486860752, 0.07589408755302429, 0.25697121024131775, 0.08533195406198502, 0.029208103194832802, 0.15452517569065094, 0.02615012601017952, 0.034968301653862, 0.030517179518938065, 0.023491270840168, 0.02012590691447258, 0.01683984510600567, 0.047155413776636124, 0.1569623053073883, 0.34555378556251526, 0.29876279830932617, 0.06633269041776657, 0.090775266289711, 0.05117363482713699, 0.14964616298675537, 0.024973956868052483, 0.22028914093971252, 0.5953715443611145, 0.10930891335010529, 0.05826140195131302, 0.08348876982927322, 0.2024080604314804, NaN, NaN], [0.0015476603293791413, 0.017548631876707077, 0.0017550711054354906, 0.0017123925499618053, 0.0004861274501308799, 0.0013240363914519548, 0.007671059109270573, 0.03281305357813835, 0.0013763409806415439, 0.060824256390333176, 0.04298469424247742, 0.011416267603635788, 0.012759965844452381, 0.012971585616469383, 0.16966485977172852, 0.023966457694768906, 0.008770916610956192, 0.0534873865544796, 0.015555462799966335, 0.07408829033374786, 0.12750747799873352, 0.026930494233965874, 0.023400133475661278, 0.02665247581899166, 0.00316479685716331, 0.004739005118608475, 0.002742160577327013, 0.006070322822779417, 0.09564805775880814, 0.029174519702792168, 0.5144217014312744, 0.05911846086382866, 0.020064763724803925, 0.0023497287184000015, 0.004584830719977617, 0.10225256532430649, 0.05520752817392349, 0.4466201066970825, 0.09660884737968445, NaN], [0.005211545154452324, 0.0055291797034442425, 0.0040288688614964485, 0.011110500432550907, 0.002710954286158085, 0.0645279660820961, 0.01716793328523636, 0.025083528831601143, 0.010282285511493683, 0.009002536535263062, 0.0011292833369225264, 0.0045064822770655155, 0.007478337734937668, 0.004868943244218826, 0.13875910639762878, 0.18986307084560394, 0.036011889576911926, 0.08335232734680176, 0.12826237082481384, 0.08758756518363953, 0.027860891073942184, 0.10198243707418442, 0.0981309786438942, 0.17985263466835022, 0.11864234507083893, 0.08274368196725845, 0.1066904067993164, 0.051979877054691315, 0.06548189371824265, 0.03337343409657478, 0.0824524462223053, 0.012718076817691326, 0.0349668525159359, 0.03024965338408947, 0.01082769688218832, 0.0127665214240551, 0.014164488762617111, 0.01925024762749672, 0.0028478982858359814, 0.0007362329051829875]], [[0.12737327814102173, 0.10940374433994293, 0.05123003572225571, 0.7807462215423584, 0.0676276683807373, 0.02884089946746826, 0.05574861168861389, 0.5975708961486816, 0.07044392824172974, 0.5009010434150696, 0.31273892521858215, 0.07660850137472153, 0.29424503445625305, 0.028401609510183334, 0.07683643698692322, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.03750006482005119, 0.429240882396698, 0.15060469508171082, 0.2604650557041168, 0.037177786231040955, 0.1944778561592102, 0.07849539071321487, 0.6716934442520142, 0.06105323135852814, 0.07711976766586304, 0.20997941493988037, 0.028168758377432823, 0.12550987303256989, 0.030995607376098633, 0.0958443135023117, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.15516091883182526, 0.07278051972389221, 0.11765316128730774, 0.7884857058525085, 0.11075033247470856, 0.051856692880392075, 0.18673725426197052, 0.2268398553133011, 0.013722711242735386, 0.6478350162506104, 0.5306386947631836, 0.3090885877609253, 0.22243055701255798, 0.16200464963912964, 0.13070979714393616, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.21811531484127045, 0.7140333652496338, 0.018219277262687683, 0.764274001121521, 0.15804116427898407, 0.03280843421816826, 0.11008237302303314, 0.09874711185693741, 0.0423860140144825, 0.5652360320091248, 0.14938808977603912, 0.2869919240474701, 0.39966318011283875, 0.1259765923023224, 0.0577625073492527, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.11744663864374161, 0.1893559694290161, 0.05823011323809624, 0.03701714053750038, 0.15626470744609833, 0.08588159829378128, 0.26269999146461487, 0.41053518652915955, 0.007210245821624994, 0.3749772906303406, 0.4537068009376526, 0.6417111158370972, 0.1666039228439331, 0.13084180653095245, 0.14052902162075043, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.3613002598285675, 0.240200012922287, 0.044567547738552094, 0.04614294692873955, 0.0021214759908616543, 0.17616558074951172, 0.11286458373069763, 0.11203286051750183, 0.009014172479510307, 0.10163455456495285, 0.0949772298336029, 0.06209810823202133, 0.11910365521907806, 0.04125094786286354, 0.1871420443058014, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.2914785146713257, 0.381010502576828, 0.08399549126625061, 0.4511452913284302, 0.048780620098114014, 0.008560722693800926, 0.1541443020105362, 0.12101723253726959, 0.02183164842426777, 0.18665823340415955, 0.13169258832931519, 0.13539372384548187, 0.14286382496356964, 0.031125182285904884, 0.2064482420682907, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.3084108829498291, 0.4568510055541992, 0.068343386054039, 0.40243175625801086, 0.04035715013742447, 0.028490515425801277, 0.006473515648394823, 0.6036491990089417, 0.14769236743450165, 0.09462843090295792, 0.04651549458503723, 0.08334364742040634, 0.08459941297769547, 0.022403797134757042, 0.13448290526866913, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.4981050491333008, 0.13424238562583923, 0.16773013770580292, 0.5160816311836243, 0.029790958389639854, 0.22989192605018616, 0.568993866443634, 0.056374672800302505, 0.08792523294687271, 0.2900378406047821, 0.12431738525629044, 0.017185388132929802, 0.05061684548854828, 0.020683959126472473, 0.13275840878486633, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.33482691645622253, 0.4720645546913147, 0.20652346312999725, 0.6004944443702698, 0.1402488797903061, 0.13250590860843658, 0.13873517513275146, 0.5260767936706543, 0.01182119082659483, 0.1017654612660408, 0.047682080417871475, 0.04534589499235153, 0.10121697187423706, 0.0026118881069123745, 0.13006491959095, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.27261805534362793, 0.5674196481704712, 0.08154824376106262, 0.8736060261726379, 0.4724165201187134, 0.1720387041568756, 0.13692085444927216, 0.40960294008255005, 0.06138879805803299, 0.0898643285036087, 0.15986473858356476, 0.04882661625742912, 0.09858791530132294, 0.005254920106381178, 0.09166211634874344, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.33052578568458557, 0.40956470370292664, 0.44244009256362915, 0.8809638619422913, 0.26719745993614197, 0.38818857073783875, 0.40750059485435486, 0.4857279658317566, 0.04656125605106354, 0.08998580276966095, 0.02227160707116127, 0.42457664012908936, 0.06242617964744568, 0.019552020356059074, 0.08343644440174103, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.20678018033504486, 0.17620769143104553, 0.3081345558166504, 0.6112105250358582, 0.534289538860321, 0.19626931846141815, 0.17160479724407196, 0.4079393148422241, 0.027630727738142014, 0.07990976423025131, 0.0661839172244072, 0.022294294089078903, 0.11108729988336563, 0.024492109194397926, 0.12739884853363037, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.2302674651145935, 0.4147239625453949, 0.3118293881416321, 0.3454154133796692, 0.20178626477718353, 0.3381562829017639, 0.1571493148803711, 0.4487079083919525, 0.02096635475754738, 0.11857040971517563, 0.09038619697093964, 0.01401298213750124, 0.06377796083688736, 0.029106009751558304, 0.10548537224531174, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0850413590669632, 0.2905830442905426, 0.047175440937280655, 0.009145522490143776, 0.014412813819944859, 0.03387918695807457, 0.04852135106921196, 0.2856408655643463, 0.03688584640622139, 0.02503933012485504, 0.030300520360469818, 0.020876996219158173, 0.004409631714224815, 0.0025441893376410007, 0.1292814165353775, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.01263146661221981, 0.08983241021633148, 0.002674827352166176, 0.0008326905663125217, 0.0032944290433079004, 0.06790440529584885, 0.02327594719827175, 0.08626140654087067, 0.0010102109517902136, 0.0009567838278599083, 0.001915089669637382, 0.019144434481859207, 0.060631223022937775, 0.04236740246415138, 0.2042645514011383, 0.125, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.12322216480970383, 0.14532910287380219, 0.08289580047130585, 0.07800436019897461, 0.016899574548006058, 0.20651613175868988, 0.15389330685138702, 0.08048079907894135, 0.023754820227622986, 0.08939354121685028, 0.05408218502998352, 0.0083498889580369, 0.16772767901420593, 0.03971855714917183, 0.029394451528787613, 0.12774905562400818, 0.07772441953420639, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.002537816995754838, 0.0036866364534944296, 0.0026212686207145452, 0.0010326605988666415, 0.0028582154773175716, 0.0016078348271548748, 0.0024177017621695995, 0.004757970105856657, 0.007405414246022701, 0.0004943490494042635, 0.0008183143800124526, 0.0020540759433060884, 0.0008841927628964186, 0.0009274804615415633, 0.13894422352313995, 0.058547187596559525, 0.7868303656578064, 0.02677525207400322, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.18076959252357483, 0.11159703880548477, 0.07333940267562866, 0.12368053197860718, 0.1442640721797943, 0.3224244713783264, 0.2286587655544281, 0.10576390475034714, 0.0873323604464531, 0.0707816481590271, 0.07077325880527496, 0.024980774149298668, 0.015894055366516113, 0.01236753724515438, 0.034113459289073944, 0.12958122789859772, 0.05996095389127731, 0.20109553635120392, 0.07473170012235641, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.008514223620295525, 0.006442691199481487, 0.003549255197867751, 0.00919315591454506, 0.0011393448803573847, 0.0005870977183803916, 0.02400296926498413, 0.03577389195561409, 0.006469632964581251, 0.004828252829611301, 0.0027150637470185757, 9.597353346180171e-05, 0.00011822552187368274, 0.000396552961319685, 0.1521017998456955, 0.11586850136518478, 0.18037959933280945, 0.354478657245636, 0.6275972127914429, 0.01217791810631752, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.0016907083336263895, 9.336868970422074e-05, 0.0023900996893644333, 0.0018071996746584773, 0.001690928009338677, 0.0010278637055307627, 0.008010926656425, 0.0018918663263320923, 0.0009378245449624956, 0.0005185406771488488, 0.00012474792310968041, 0.00014544214354828, 2.7525844416231848e-05, 2.095987474604044e-05, 0.12926018238067627, 0.04329086095094681, 0.2822243273258209, 0.5110569596290588, 0.8230794668197632, 0.28263914585113525, 0.006951561663299799, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.08279342949390411, 0.00717265997081995, 0.01113244891166687, 0.030300047248601913, 0.03227340802550316, 0.02679654024541378, 0.2711687386035919, 0.12656770646572113, 0.0010184150887653232, 0.0069296094588935375, 0.006689318455755711, 0.00307065830565989, 0.004024384077638388, 0.006041096989065409, 0.12722525000572205, 0.15041278302669525, 0.01652364432811737, 0.09004879742860794, 0.1228649914264679, 0.03705046698451042, 0.03279988467693329, 0.012472960166633129, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.09468965977430344, 0.010531323030591011, 0.1253902167081833, 0.09483902901411057, 0.060478318482637405, 0.1959676593542099, 0.5850688219070435, 0.11734473705291748, 0.08924026787281036, 0.031869061291217804, 0.04437774419784546, 0.004531644284725189, 0.19630968570709229, 0.04580901935696602, 0.04253998026251793, 0.005692727863788605, 0.004583822097629309, 0.011303454637527466, 0.06351188570261002, 0.07110948860645294, 0.03377191722393036, 0.8937738537788391, 0.1077374666929245, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.03443194553256035, 0.006786322686821222, 0.08545193076133728, 0.2555176913738251, 0.16119416058063507, 0.3760574460029602, 0.3180745542049408, 0.0858285129070282, 0.0052651395089924335, 0.035345133394002914, 0.0046972003765404224, 0.00805696938186884, 0.0738091915845871, 0.004572577308863401, 0.028640231117606163, 0.1957636922597885, 0.00532554043456912, 0.2672942280769348, 0.07843183726072311, 0.01169322058558464, 0.006695515010505915, 0.022856300696730614, 0.03495524823665619, 0.2056257426738739, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.26599034667015076, 0.06405031681060791, 0.39913085103034973, 0.7390084862709045, 0.8533709049224854, 0.0830850899219513, 0.22198519110679626, 0.15359464287757874, 0.0286090150475502, 0.1338224709033966, 0.06985709816217422, 0.03841168060898781, 0.1308237761259079, 0.01580808497965336, 0.010780439712107182, 0.21948350965976715, 0.003219911362975836, 0.13064762949943542, 0.017335020005702972, 0.004487968049943447, 0.006097455509006977, 0.0023269150406122208, 0.014221499674022198, 0.1740167737007141, 0.05570632219314575, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.16064751148223877, 0.5348425507545471, 0.09399141371250153, 0.3709404170513153, 0.3757614493370056, 0.2272261530160904, 0.2699662148952484, 0.46868544816970825, 0.09081633388996124, 0.07856583595275879, 0.054298948496580124, 0.10659310221672058, 0.05178465321660042, 0.012835889123380184, 0.19243957102298737, 0.027252521365880966, 0.05625513195991516, 0.024279700592160225, 0.009296371601521969, 0.04113621264696121, 0.04445572942495346, 0.05016031116247177, 0.300394743680954, 0.219209223985672, 0.5284181833267212, 0.13528388738632202, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.33067551255226135, 0.40668511390686035, 0.03748138248920441, 0.16017457842826843, 0.02931954525411129, 0.1285390406847, 0.43687552213668823, 0.6227295398712158, 0.016583241522312164, 0.054699335247278214, 0.43602558970451355, 0.028376825153827667, 0.1860552728176117, 0.202489972114563, 0.03443598374724388, 0.16918426752090454, 0.005196947604417801, 0.010393726639449596, 0.0008839815272949636, 0.18853645026683807, 0.23955073952674866, 0.03703731670975685, 0.018581384792923927, 0.07692746073007584, 0.05213537812232971, 0.05520249530673027, 0.03837481513619423, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.025147954002022743, 0.023277895525097847, 0.036982107907533646, 0.030706623569130898, 0.00253032217733562, 0.08060919493436813, 0.062497250735759735, 0.22720953822135925, 0.015824737027287483, 0.020865583792328835, 0.051981136202812195, 0.016274577006697655, 0.3496847152709961, 0.19709302484989166, 0.00854758732020855, 0.21910618245601654, 0.012340836226940155, 0.011061819270253181, 0.004421355202794075, 0.01345156505703926, 0.015948239713907242, 0.001919197733514011, 0.0006712953327223659, 0.0014401280786842108, 0.0009498890140093863, 0.0011606297921389341, 0.0013843519845977426, 0.005138876382261515, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.0009813109645619988, 0.0007951235747896135, 0.007896890863776207, 0.006039812229573727, 0.001424357295036316, 0.003153599100187421, 0.0010362794855609536, 0.006138501223176718, 0.00410880520939827, 0.003359388094395399, 0.008728301152586937, 0.0021525975316762924, 0.2318088710308075, 0.017491629347205162, 0.0005464124260470271, 0.12592341005802155, 0.022789308801293373, 0.01544136367738247, 0.05098855495452881, 0.006733328104019165, 0.0011512627825140953, 0.0067494111135602, 0.03519098460674286, 0.08756479620933533, 0.04847756400704384, 0.13774195313453674, 0.07365753501653671, 0.19525301456451416, 0.019442297518253326, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.008814784698188305, 0.009578033350408077, 0.008741176687180996, 0.002597709419205785, 0.0019302073633298278, 0.02750723622739315, 0.010486552491784096, 0.061721935868263245, 0.05738110467791557, 0.0038812088314443827, 0.08735688030719757, 0.00500333309173584, 3.085857315454632e-05, 0.005531619768589735, 0.14116442203521729, 0.04374772310256958, 0.10635814815759659, 0.1203576922416687, 0.4972172677516937, 0.09716533124446869, 0.05867829546332359, 0.13453392684459686, 0.39353471994400024, 0.6331138610839844, 0.33491814136505127, 0.5983138680458069, 0.3633559048175812, 0.6357010006904602, 0.7792285084724426, 0.005659972317516804, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.015857994556427002, 0.010374038480222225, 0.002225207630544901, 0.002974742790684104, 0.0010843537747859955, 0.007387869525700808, 0.006818806286901236, 0.0318806953728199, 0.1651621013879776, 0.21757511794567108, 0.2911650240421295, 0.08204617351293564, 0.016449127346277237, 0.10985822230577469, 0.0020742996130138636, 0.05199728533625603, 0.014302223920822144, 0.13574257493019104, 0.05407930538058281, 0.010633953846991062, 0.007459194865077734, 0.0004102779785171151, 0.01107444055378437, 0.16451390087604523, 0.19313758611679077, 0.018386593088507652, 0.03492085263133049, 0.1390746384859085, 0.6526300311088562, 0.08304706960916519, 0.27643677592277527, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.01972219906747341, 0.20374125242233276, 0.0031293979845941067, 0.004390338435769081, 0.031924858689308167, 0.06048818305134773, 0.0774247944355011, 0.7845978140830994, 0.15838612616062164, 0.06142642721533775, 0.0820784792304039, 0.20785683393478394, 0.46646884083747864, 0.42270010709762573, 0.053927596658468246, 0.0008206118363887072, 0.0011099595576524734, 0.0005428412696346641, 0.0013029578840360045, 0.0009422241128049791, 0.001036918954923749, 0.00015340711979661137, 0.003300317795947194, 0.0019372785463929176, 0.003245894331485033, 0.0010756017873063684, 0.0009867959888651967, 0.04242069274187088, 0.25679609179496765, 0.03714281693100929, 0.46563825011253357, 0.052469443529844284, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.026567673310637474, 0.2768426239490509, 0.016553064808249474, 0.07253812253475189, 0.029352964833378792, 0.034967049956321716, 0.09283487498760223, 0.5970632433891296, 0.02342795394361019, 0.04057195410132408, 0.06215028092265129, 0.2966896891593933, 0.4489157795906067, 0.24187524616718292, 0.048112284392118454, 0.0011551693314686418, 0.0015016108518466353, 0.00018865184392780066, 0.0004620797117240727, 0.001353209256194532, 0.001276124152354896, 0.001269699539989233, 0.02504812367260456, 0.016660472378134727, 0.007664685603231192, 0.000621759332716465, 0.0039494638331234455, 0.05373308062553406, 0.5797222256660461, 0.04267296567559242, 0.3308492600917816, 0.22605444490909576, 0.03655111417174339, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.14453455805778503, 0.4129781723022461, 0.021322425454854965, 0.11776001751422882, 0.008680691011250019, 0.12525556981563568, 0.1459336131811142, 0.4943058490753174, 0.041365865617990494, 0.06633096933364868, 0.48416346311569214, 0.027247071266174316, 0.10342812538146973, 0.15874288976192474, 0.04535134881734848, 0.18345873057842255, 0.006115049123764038, 0.007153322920203209, 0.00125643250066787, 0.15791349112987518, 0.17755654454231262, 0.06167090684175491, 0.028255566954612732, 0.04990806803107262, 0.014394938945770264, 0.013118196278810501, 0.02539716847240925, 0.00894339382648468, 0.04024626687169075, 0.05642623454332352, 0.04561464861035347, 0.029457826167345047, 0.09210912138223648, 0.1002524197101593, NaN, NaN, NaN, NaN, NaN, NaN], [0.03164434805512428, 0.10487183183431625, 0.019769076257944107, 0.0709872916340828, 0.0046073514968156815, 0.12636253237724304, 0.06114564463496208, 0.5786424875259399, 0.17960773408412933, 0.15923625230789185, 0.14680741727352142, 0.04373620077967644, 0.20528176426887512, 0.14476445317268372, 0.03252548724412918, 0.2828649580478668, 0.011994204483926296, 0.006339475512504578, 0.0030444697476923466, 0.006948052905499935, 0.008767204359173775, 0.0014567734906449914, 0.00018795454525388777, 0.00020330831466708332, 0.0001539710647193715, 0.0004007722018286586, 0.0012242270167917013, 0.001961026806384325, 0.0007920600473880768, 0.002005743095651269, 0.00011892847396666184, 0.00023868663993198425, 0.0018499011639505625, 0.002196513582020998, 0.004604275804013014, NaN, NaN, NaN, NaN, NaN], [0.03216148540377617, 0.04786192253232002, 0.0904572606086731, 0.284318745136261, 0.04915444552898407, 0.20336958765983582, 0.019341057166457176, 0.31598398089408875, 0.503376841545105, 0.2976534068584442, 0.3550446927547455, 0.318871408700943, 0.31741514801979065, 0.09137054532766342, 0.022498751059174538, 0.128562331199646, 0.014782274141907692, 0.007007280830293894, 0.02549830637872219, 0.0029198189731687307, 0.0006880113505758345, 0.0037798655685037374, 0.009390356950461864, 0.008127862587571144, 0.00817851535975933, 0.024966517463326454, 0.0308842696249485, 0.07813727855682373, 0.003280356992036104, 0.001509596244432032, 0.010023933835327625, 0.08412036299705505, 0.1339937299489975, 0.13076454401016235, 0.2572615444660187, 0.02603374607861042, NaN, NaN, NaN, NaN], [0.00784912146627903, 0.004314524121582508, 0.007757026236504316, 0.004281783476471901, 0.001910648075863719, 0.00898022297769785, 0.007197065278887749, 0.05121663585305214, 0.12398385256528854, 0.006457128562033176, 0.09335841238498688, 0.0023844544775784016, 1.3785818737233058e-05, 0.0021891386713832617, 0.13778245449066162, 0.018602287396788597, 0.034721970558166504, 0.034974802285432816, 0.21532808244228363, 0.037075310945510864, 0.013384592719376087, 0.039282385259866714, 0.11046459525823593, 0.17542847990989685, 0.05914776027202606, 0.1884417086839676, 0.12911023199558258, 0.24417443573474884, 0.327198326587677, 0.0006843891460448503, 0.1527024656534195, 0.4776603579521179, 0.37270504236221313, 0.4335513412952423, 0.6841917634010315, 0.8031085133552551, 0.004920803010463715, NaN, NaN, NaN], [0.0865921899676323, 0.029389984905719757, 0.007211814168840647, 0.022628001868724823, 0.003064699238166213, 0.026838112622499466, 0.02777392417192459, 0.17195671796798706, 0.5349084734916687, 0.37311822175979614, 0.5073185563087463, 0.12468769401311874, 0.014684900641441345, 0.11363118886947632, 0.01852630451321602, 0.05855157971382141, 0.021276630461215973, 0.13662834465503693, 0.05244326964020729, 0.015041220933198929, 0.007642571348696947, 0.00036013865610584617, 0.004098850768059492, 0.033856965601444244, 0.05778159946203232, 0.005442364141345024, 0.017580043524503708, 0.04633626714348793, 0.3112163841724396, 0.03644357994198799, 0.0868009626865387, 0.020123973488807678, 0.03773906081914902, 0.06257405877113342, 0.2619801461696625, 0.7497928738594055, 0.19582624733448029, 0.4370352327823639, NaN, NaN], [0.021940317004919052, 0.17988227307796478, 0.0027716639451682568, 0.0058884406462311745, 0.02112143486738205, 0.056551095098257065, 0.09669405966997147, 0.8433947563171387, 0.1836535632610321, 0.048101164400577545, 0.0939687192440033, 0.12228170782327652, 0.5153423547744751, 0.4533718526363373, 0.10564926266670227, 0.0006882869056425989, 0.0005033394554629922, 0.00030677669565193355, 0.001028614118695259, 0.00036578672006726265, 0.0005035633221268654, 5.2447539928834885e-05, 0.0006442382582463324, 0.0003597578906919807, 0.0002600657753646374, 8.536354289390147e-05, 0.00018848010222427547, 0.00940172839909792, 0.03475101292133331, 0.004768407437950373, 0.09523987770080566, 0.0036924693267792463, 0.0034024319611489773, 0.001987446565181017, 0.06484154611825943, 0.36614781618118286, 0.06470755487680435, 0.48020803928375244, 0.12385622411966324, NaN], [0.07970402389764786, 0.263812392950058, 0.027112353593111038, 0.06228066235780716, 0.03007029928267002, 0.5465735197067261, 0.2176109254360199, 0.5667538046836853, 0.10334119945764542, 0.3484029769897461, 0.1586397886276245, 0.28290486335754395, 0.07807470858097076, 0.405972421169281, 0.12247955799102783, 0.13044977188110352, 0.023216107860207558, 0.019304566085338593, 0.018173998221755028, 0.12614674866199493, 0.04656239226460457, 0.015089727938175201, 0.04114385321736336, 0.018700774759054184, 0.020505733788013458, 0.009310846216976643, 0.02222343534231186, 0.22412429749965668, 0.3900958001613617, 0.1100122332572937, 0.14125461876392365, 0.09716113656759262, 0.14588865637779236, 0.12185929715633392, 0.5472521185874939, 0.7197717428207397, 0.31834876537323, 0.37092098593711853, 0.2838878929615021, 0.0011011400492861867]]], [[[0.00039591442327946424, 4.3682277464540675e-05, 1.7448855942348018e-05, 4.859234650211874e-06, 1.1413659422032651e-06, 1.0625568393152207e-05, 1.9137923246148603e-08, 5.615326585939329e-07, 5.487099315359956e-06, 2.1910665282121045e-07, 2.532970881929941e-07, 7.501878940274764e-07, 1.657212578720646e-06, 1.0862070212169783e-06, 0.18717002868652344, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.6005652546882629, 0.09179380536079407, 0.017407523468136787, 0.009556752629578114, 0.001977206440642476, 0.02417689561843872, 0.001285116421058774, 0.0015866898465901613, 0.0007265046588145196, 0.0008927723974920809, 0.008914382196962833, 0.0016361800953745842, 0.1313493698835373, 0.006872364319860935, 0.052507203072309494, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.00456381356343627, 0.8302816152572632, 0.11558636277914047, 0.010320104658603668, 0.00024428890901617706, 9.749805758474395e-05, 7.678471774852369e-06, 0.0030259541235864162, 3.9539358112961054e-05, 7.781033491482958e-05, 0.0003711417084559798, 9.1652873379644e-06, 0.0006458949064835906, 0.00023330377007368952, 0.00865631178021431, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0011992683866992593, 0.008629350923001766, 0.6251504421234131, 0.015135818161070347, 0.001978840446099639, 0.000745285302400589, 5.708653407054953e-05, 0.00043479635496623814, 0.0005481417756527662, 0.0016355890547856688, 0.0002436988870613277, 5.164237336430233e-06, 4.976044510840438e-05, 3.400173591217026e-05, 0.00024351823958568275, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.006698334589600563, 0.006304558366537094, 0.34660738706588745, 0.7217360138893127, 0.06864907592535019, 0.0027605369687080383, 0.0006927561480551958, 0.00010832686530193314, 0.0002978279662784189, 0.007849807851016521, 0.0023863124661147594, 8.873132173903286e-06, 2.0952818886144087e-05, 4.62439584225649e-06, 0.000559441396035254, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0006861803703941405, 0.036174044013023376, 0.4128260612487793, 0.09897080808877945, 0.6376775503158569, 0.19431157410144806, 0.0007082957308739424, 0.05852581560611725, 0.0003548018867149949, 0.00026609119959175587, 0.0006576658925041556, 0.0007862210040912032, 0.027955245226621628, 0.006076914723962545, 0.0010327105410397053, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [1.7293352305713938e-09, 1.4693102912133327e-06, 3.0192679332685657e-05, 1.0152590220968705e-05, 0.005660888738930225, 0.5108420252799988, 0.0005426039570011199, 0.0008102089632302523, 3.168102921335958e-06, 6.12798771726375e-08, 2.5310575324510864e-07, 5.088519174023531e-06, 0.00021843344438821077, 2.5946601454052143e-06, 2.594279294498847e-06, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [7.755387923680246e-05, 3.5259185096947476e-05, 0.0012139425380155444, 0.00035162578569725156, 0.00505053298547864, 0.4696201980113983, 0.5859625339508057, 0.009771172888576984, 0.0005853781476616859, 3.0261137453635456e-06, 1.2206013707327656e-05, 2.2465645088232122e-05, 0.013555033132433891, 0.0011026648571714759, 7.656160596525297e-05, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [3.390625025190275e-08, 5.7732322602532804e-05, 3.19563605444273e-06, 2.0829493507790175e-07, 5.039521965954918e-06, 0.00017657184798736125, 0.000729007413610816, 0.8331114649772644, 0.0037640428636223078, 1.5948112377373036e-06, 5.8014775277115405e-06, 4.528372699041938e-07, 0.00020723954366985708, 0.00025866259238682687, 1.95706252270611e-06, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [2.7739795882553153e-07, 2.501485141692683e-05, 4.778147285833256e-06, 3.7190903867667657e-07, 9.610201523457818e-09, 1.1292572708043735e-06, 1.2355405942798825e-07, 3.984562499681488e-05, 0.6202287077903748, 0.0002610959345474839, 0.00017016819037962705, 9.242457963409834e-07, 2.799387630147976e-06, 3.2760857493485673e-07, 1.038134087139042e-06, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [1.2775580216839444e-05, 0.0010497755138203502, 6.564326031366363e-05, 4.172011358605232e-06, 4.676745959386608e-07, 3.6489967669695034e-07, 8.09820832614605e-08, 5.78842673348845e-06, 0.0015375507064163685, 0.7445451617240906, 0.026254041120409966, 8.213486580643803e-05, 1.1159563655382954e-05, 3.0355058697750792e-05, 2.6809220798895694e-06, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [1.3068409316474572e-05, 0.00010775982809718698, 0.00024633039720356464, 3.3576598070794716e-05, 4.556980275083333e-05, 1.0597023702985098e-07, 9.86238859468358e-08, 2.1072135041322326e-06, 0.0013669389300048351, 0.5916010141372681, 0.4436832368373871, 0.0013138806680217385, 4.73510908705066e-06, 6.116700660641072e-06, 2.961193558803643e-06, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [4.950460061081685e-05, 0.0011237917933613062, 0.017257435247302055, 0.0011414129985496402, 0.025087760761380196, 0.00036485170130617917, 3.213326635886915e-05, 5.293267349770758e-06, 4.4593522034119815e-05, 0.001686945091933012, 0.00823597889393568, 0.8047888278961182, 0.014818375930190086, 0.006413417402654886, 2.281446177221369e-05, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.000998240546323359, 0.1768636256456375, 0.0663335844874382, 0.02716292440891266, 0.03197554498910904, 0.001621886040084064, 0.00012482069723773748, 7.020989141892642e-05, 0.08078382909297943, 0.1701173484325409, 0.08303841948509216, 0.5506232380867004, 0.06293172389268875, 0.03332124650478363, 0.0033543158788233995, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.021357281133532524, 0.0013016555458307266, 0.00422634556889534, 0.00104909623041749, 0.012563652358949184, 0.07401228696107864, 0.007866809144616127, 0.0024991247337311506, 0.0011657974682748318, 5.4276370065053925e-06, 0.0024851916823536158, 0.0298884529620409, 0.4522511959075928, 0.2182934284210205, 0.14462554454803467, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.02659090794622898, 0.049626123160123825, 0.04500019550323486, 0.012677792459726334, 0.33557751774787903, 0.02776678465306759, 0.02675992250442505, 0.09967876970767975, 0.04216820374131203, 0.009756066836416721, 0.0133897690102458, 0.12886802852153778, 0.03152704983949661, 0.046163998544216156, 0.21004843711853027, 0.125, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.05978302285075188, 0.18161648511886597, 0.038620203733444214, 0.022025080397725105, 0.09790226072072983, 0.04398013651371002, 0.00788698997348547, 0.04135579988360405, 0.0068543110974133015, 0.03809167072176933, 0.03150040656328201, 0.0462106354534626, 0.024762138724327087, 0.011792140081524849, 0.015839271247386932, 0.16810710728168488, 0.017288343980908394, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.005166883580386639, 0.0005590450600720942, 0.007114546839147806, 0.0015656572068110108, 0.02179996483027935, 0.0010864944197237492, 0.0051814797334373, 0.0011148365447297692, 0.00816393457353115, 0.0019027285743504763, 0.005033016670495272, 0.010743028484284878, 0.0006906923954375088, 0.0011143455049023032, 0.16189540922641754, 0.12647151947021484, 0.25301796197891235, 0.03169602155685425, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.17136499285697937, 0.002046054694801569, 0.4725193679332733, 0.24347566068172455, 0.1026763990521431, 0.00369152519851923, 0.013768541626632214, 0.003912978805601597, 0.022358577698469162, 0.06323882192373276, 0.28539538383483887, 0.009778834879398346, 0.0043070269748568535, 0.020384330302476883, 0.006856778170913458, 0.15976493060588837, 0.03159531578421593, 0.05609510838985443, 0.007400199305266142, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.18433871865272522, 0.013500750064849854, 0.42166435718536377, 0.1935500204563141, 0.3502363860607147, 0.0009389789775013924, 0.0472395233809948, 0.015336934477090836, 0.07204270362854004, 0.07276465743780136, 0.4023721218109131, 0.016390468925237656, 0.00493515282869339, 0.01088448241353035, 0.18081046640872955, 0.16021955013275146, 0.26433131098747253, 0.07329617440700531, 0.11257290840148926, 0.001577433431521058, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.01929071731865406, 3.154709338559769e-05, 0.04895680397748947, 0.04499320685863495, 0.03726757690310478, 0.0012487026397138834, 0.06078735366463661, 0.0025376947596669197, 0.023622047156095505, 0.008605116978287697, 0.05601886287331581, 0.011475598439574242, 0.0013240767875686288, 0.009706309996545315, 0.13962702453136444, 0.22870834171772003, 0.043985288590192795, 0.04075293987989426, 0.0035545979626476765, 0.0075324228964746, 0.00014864112017676234, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.032548993825912476, 0.0047013829462230206, 0.08043498545885086, 0.08197268843650818, 0.43236956000328064, 0.013080407865345478, 0.006017346400767565, 0.05529334023594856, 0.01970849372446537, 0.004050384275615215, 0.0073967562057077885, 0.005829385481774807, 0.0008975209202617407, 0.0025361862499266863, 0.011671289801597595, 0.047688793390989304, 0.14664201438426971, 0.03658692538738251, 0.6408759355545044, 0.43873438239097595, 0.20478755235671997, 0.00511742290109396, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.046304989606142044, 0.026358718052506447, 0.20277923345565796, 0.3021180331707001, 0.6281617879867554, 0.19840610027313232, 0.12000668793916702, 0.21165543794631958, 0.0507807619869709, 0.10083203762769699, 0.17539183795452118, 0.08392243832349777, 0.036049142479896545, 0.06088141351938248, 0.024198466911911964, 0.07761336117982864, 0.07061085104942322, 0.041570939123630524, 0.1916733682155609, 0.159084752202034, 0.3477410674095154, 0.5968326330184937, 0.004175147507339716, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.016816509887576103, 0.003118144813925028, 0.035858120769262314, 0.02315649762749672, 0.2957051992416382, 0.0033856350928545, 0.008419573307037354, 0.013085800223052502, 0.0065522813238203526, 0.004261805210262537, 0.0022621729876846075, 0.0015856586396694183, 0.00012999074533581734, 0.00036330719012767076, 0.004947974346578121, 0.07191380113363266, 0.05497179180383682, 0.3517811894416809, 0.9035707116127014, 0.14233137667179108, 0.1767667979001999, 0.04289708659052849, 0.00892895832657814, 0.001834895578213036, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.13966688513755798, 0.051315873861312866, 0.16794879734516144, 0.17204447090625763, 0.02530861273407936, 0.1971883773803711, 0.6035643219947815, 0.35590535402297974, 0.01904589682817459, 0.14328262209892273, 0.05827813595533371, 0.12283631414175034, 0.08582676202058792, 0.021607764065265656, 0.09174748510122299, 0.21536989510059357, 0.19956108927726746, 0.3517906069755554, 0.458966463804245, 0.09842110425233841, 0.08277469873428345, 0.03296331316232681, 0.04812879115343094, 0.009344152174890041, 0.006280441302806139, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.07622234523296356, 0.021088531240820885, 0.13214311003684998, 0.1876712292432785, 0.09946685284376144, 0.0739995539188385, 0.16667790710926056, 0.06527374684810638, 0.2691768705844879, 0.1298666000366211, 0.20347969233989716, 0.28972044587135315, 0.16063560545444489, 0.23408198356628418, 0.02879655919969082, 0.24051256477832794, 0.10134825110435486, 0.04672827199101448, 0.021085558459162712, 0.02245912328362465, 0.026835136115550995, 0.005604758393019438, 0.028772464022040367, 0.01708872988820076, 0.008745603263378143, 0.02540087327361107, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.04186922311782837, 0.028065834194421768, 0.2365874946117401, 0.22718128561973572, 0.717268168926239, 0.0283160749822855, 0.047574929893016815, 0.22635598480701447, 0.046485841274261475, 0.11764083057641983, 0.11684223264455795, 0.600357711315155, 0.07936308532953262, 0.1614740490913391, 0.02326863817870617, 0.18141932785511017, 0.024432087317109108, 0.0408032201230526, 0.004596539307385683, 0.0778040885925293, 0.025828123092651367, 0.04467899724841118, 0.0885351300239563, 0.026468785479664803, 0.030213410034775734, 0.16925157606601715, 0.003915028180927038, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.002160860225558281, 0.00041385856457054615, 0.0032894921023398638, 0.004175879992544651, 0.09230346977710724, 0.00037096597952768207, 0.00036027038004249334, 0.000777967507019639, 0.0010948613053187728, 0.006351495627313852, 0.00803811103105545, 0.2546491026878357, 0.005140772555023432, 0.0052158161997795105, 0.0018242541700601578, 0.0821177139878273, 0.0264634620398283, 0.01841210387647152, 0.010007970035076141, 0.006691556889563799, 0.0167625043541193, 0.0005595253896899521, 0.020632673054933548, 0.0021230748388916254, 0.10790054500102997, 0.5654488801956177, 0.3003200888633728, 0.01571945659816265, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.01453752163797617, 0.0016249779146164656, 0.07837095856666565, 0.046283330768346786, 0.5220571756362915, 0.00571427633985877, 0.011274048127233982, 0.0005770810530520976, 0.06172677502036095, 0.028573052957654, 0.1375623345375061, 0.2926015257835388, 0.17741695046424866, 0.13592077791690826, 0.025488857179880142, 0.0726943239569664, 0.09770844131708145, 0.050709616392850876, 0.04594658315181732, 0.009083828888833523, 0.024983327835798264, 0.021837929263710976, 0.11926575750112534, 0.11382617056369781, 0.22249171137809753, 0.3826439678668976, 0.22458447515964508, 0.24531354010105133, 0.05176876112818718, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.0018199050100520253, 1.759366932674311e-05, 0.005607981700450182, 0.029583722352981567, 0.009902501478791237, 0.00240499060600996, 0.016255119815468788, 0.008434450253844261, 0.0070381201803684235, 0.006882159970700741, 0.008103356696665287, 0.009371891617774963, 3.180988642270677e-05, 0.0005422193789854646, 0.14323127269744873, 0.28158777952194214, 0.045097555965185165, 0.02117414027452469, 0.05809389799833298, 0.0014524150174111128, 0.006964406464248896, 0.010582090355455875, 0.011965163983404636, 0.02265000529587269, 0.020484870299696922, 0.019729144871234894, 0.028731632977724075, 0.004907289054244757, 0.0051048253662884235, 0.00039794077747501433, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.04913536086678505, 0.005111359525471926, 0.3943053185939789, 0.16504207253456116, 0.1333204060792923, 0.007373967207968235, 0.00649205781519413, 0.005781218875199556, 0.0696163922548294, 0.17078818380832672, 0.43588367104530334, 0.2441176176071167, 0.044073574244976044, 0.13962700963020325, 0.0038013174198567867, 0.18024474382400513, 0.03336771950125694, 0.025161737576127052, 0.03788529708981514, 0.010167604312300682, 0.0039537386037409306, 3.701886089402251e-05, 0.046124417334795, 0.08654022216796875, 0.06664562225341797, 0.11276466399431229, 0.09791301190853119, 0.08758807182312012, 0.277656227350235, 0.5478507876396179, 0.06896418333053589, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.02972331829369068, 0.032405998557806015, 0.13676248490810394, 0.2985995411872864, 0.6838041543960571, 0.17950911819934845, 0.02566559985280037, 0.299430251121521, 0.06906868517398834, 0.09219349920749664, 0.14271143078804016, 0.15384355187416077, 0.31184810400009155, 0.37699857354164124, 0.11869719624519348, 0.10793236643075943, 0.04864804446697235, 0.0019557650666683912, 0.14817607402801514, 0.0378977507352829, 0.049347102642059326, 0.0036467635072767735, 0.0038541490212082863, 0.0034904496278613806, 0.0012115711579099298, 0.047197386622428894, 0.05697714909911156, 0.11328870058059692, 0.8784908056259155, 0.019691603258252144, 0.23420120775699615, 0.004765921737998724, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.035901740193367004, 0.049252428114414215, 0.13651704788208008, 0.3431343734264374, 0.4621880352497101, 0.07741573452949524, 0.035817742347717285, 0.1879495084285736, 0.09167803823947906, 0.15167558193206787, 0.20264029502868652, 0.22310277819633484, 0.27972275018692017, 0.27912822365760803, 0.1079779863357544, 0.1524984985589981, 0.08107080310583115, 0.005865868646651506, 0.00971321389079094, 0.007243088912218809, 0.011549782939255238, 0.00268083019182086, 0.03457775339484215, 0.0031127233523875475, 0.000510410696733743, 0.009807620197534561, 0.008875550702214241, 0.023541534319519997, 0.527433454990387, 0.015368063934147358, 0.16288210451602936, 0.20708848536014557, 0.014573587104678154, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.03869367763400078, 0.07609386742115021, 0.09811960905790329, 0.19582945108413696, 0.7770717144012451, 0.05828123167157173, 0.03398818522691727, 0.4334997236728668, 0.06648975610733032, 0.07675088942050934, 0.06197739765048027, 0.7435874938964844, 0.14106591045856476, 0.2445826381444931, 0.04634908586740494, 0.16305263340473175, 0.020936982706189156, 0.020989498123526573, 0.007437185384333134, 0.034894589334726334, 0.016221558675169945, 0.04928300529718399, 0.02460765466094017, 0.006940784398466349, 0.010303718037903309, 0.11923910677433014, 0.002430608496069908, 0.020191287621855736, 0.019723495468497276, 0.015607062727212906, 0.14493703842163086, 0.29023703932762146, 0.2954525649547577, 0.024419967085123062, NaN, NaN, NaN, NaN, NaN, NaN], [0.0033209763932973146, 0.0013802923494949937, 0.007923663593828678, 0.01537866611033678, 0.27329060435295105, 0.0012711664894595742, 0.000925537955481559, 0.0031033798586577177, 0.00518713379278779, 0.008014743216335773, 0.01865261048078537, 0.32840412855148315, 0.015081376768648624, 0.0187647957354784, 0.007287481799721718, 0.04235544800758362, 0.014461617916822433, 0.006770138628780842, 0.009241613559424877, 0.002999901305884123, 0.0037356300745159388, 0.00043396188993938267, 0.005936506669968367, 0.00027135247364640236, 0.00836905650794506, 0.38652852177619934, 0.1805782914161682, 0.00859912484884262, 0.13720881938934326, 0.026457296684384346, 0.044793374836444855, 0.41905051469802856, 0.48846107721328735, 0.271888792514801, 0.02787640690803528, NaN, NaN, NaN, NaN, NaN], [0.012120293453335762, 0.00801909901201725, 0.05887366458773613, 0.08173726499080658, 0.42918333411216736, 0.0074272770434618, 0.018144551664590836, 0.002390465000644326, 0.19959968328475952, 0.01595914363861084, 0.19477497041225433, 0.24081164598464966, 0.32190656661987305, 0.2620943486690521, 0.06223426014184952, 0.03824670985341072, 0.05110237002372742, 0.016365332528948784, 0.027689939364790916, 0.004054062534123659, 0.0016762956511229277, 0.0059990487061440945, 0.061629924923181534, 0.02193543128669262, 0.004144957754760981, 0.11336920410394669, 0.0855039581656456, 0.16943661868572235, 0.007511935196816921, 0.0029296777211129665, 0.005633122753351927, 0.04470856487751007, 0.19621509313583374, 0.1449754536151886, 0.4407651424407959, 0.012849990278482437, NaN, NaN, NaN, NaN], [0.001324097509495914, 1.9873512428603135e-05, 0.0026336663868278265, 0.025088831782341003, 0.006480309646576643, 0.0015246026450768113, 0.009156930260360241, 0.006450172513723373, 0.006447002291679382, 0.003797400277107954, 0.0037222199607640505, 0.006030225194990635, 1.9453302229521796e-05, 0.0003723614208865911, 0.13770580291748047, 0.29710885882377625, 0.04157622903585434, 0.022785142064094543, 0.06820578873157501, 0.0019051277777180076, 0.004196317866444588, 0.012664434500038624, 0.010533612221479416, 0.00958634540438652, 0.006948783528059721, 0.024731770157814026, 0.04424457997083664, 0.0092665059491992, 0.008317369967699051, 0.00025302590802311897, 0.03921425715088844, 0.024433301761746407, 0.005475904326885939, 0.02041386440396309, 0.005526822991669178, 0.006030899006873369, 0.000147900907904841, NaN, NaN, NaN], [0.23361828923225403, 0.06709202378988266, 0.7719610333442688, 0.734594464302063, 0.7922726273536682, 0.049216482788324356, 0.04663456231355667, 0.060855433344841, 0.40224209427833557, 0.20935069024562836, 0.5060975551605225, 0.5454070568084717, 0.2919921875, 0.420108824968338, 0.08753460645675659, 0.15116539597511292, 0.029300624504685402, 0.014213098213076591, 0.04858435317873955, 0.008192096836864948, 0.0029929669108241796, 0.00010039177868748084, 0.02851700410246849, 0.014845605008304119, 0.01335279829800129, 0.07330357283353806, 0.08230004459619522, 0.06801280379295349, 0.12962418794631958, 0.38807213306427, 0.021973537281155586, 0.0005578201962634921, 0.13413770496845245, 0.18835364282131195, 0.15109674632549286, 0.5815849900245667, 0.6008182764053345, 0.10515720397233963, NaN, NaN], [0.01675574854016304, 0.0394110269844532, 0.07827049493789673, 0.20941881835460663, 0.5690934658050537, 0.13831959664821625, 0.015872817486524582, 0.2790753245353699, 0.07380014657974243, 0.05484941974282265, 0.11329877376556396, 0.046586740761995316, 0.27540746331214905, 0.3769146502017975, 0.12728242576122284, 0.05911188945174217, 0.013889956288039684, 0.00048160224105231464, 0.10393460839986801, 0.009916743263602257, 0.013972792774438858, 0.0005543273873627186, 0.0008135904208756983, 0.0005866698920726776, 0.00012856724788434803, 0.016669562086462975, 0.022332170978188515, 0.03126570209860802, 0.39481881260871887, 0.0021035531535744667, 0.09696949273347855, 0.0003469766234047711, 0.012058700434863567, 0.1351245492696762, 0.1276140809059143, 0.8529128432273865, 0.013427066616714, 0.3029053509235382, 0.0016288348706439137, NaN], [0.13399043679237366, 0.38312259316444397, 0.21414920687675476, 0.1335369348526001, 0.883351743221283, 0.17629003524780273, 0.21391625702381134, 0.35840436816215515, 0.7405950427055359, 0.11166028678417206, 0.2222289741039276, 0.2562817633152008, 0.20710349082946777, 0.2988908290863037, 0.10401280969381332, 0.22241219878196716, 0.00997188687324524, 0.004307668190449476, 0.0318865031003952, 0.026490027084946632, 0.04937301576137543, 0.016565896570682526, 0.0013930558925494552, 0.01958940364420414, 0.015218929387629032, 0.1830211728811264, 0.11458480358123779, 0.1729872077703476, 0.047152113169431686, 0.017883911728858948, 0.118315190076828, 0.07728181034326553, 0.31889867782592773, 0.1497264951467514, 0.2596881091594696, 0.15263305604457855, 0.024473916739225388, 0.19167250394821167, 0.12363447993993759, 0.010316992178559303]], [[0.03249572962522507, 0.01680905371904373, 0.01368993055075407, 0.005182549823075533, 0.0014828554121777415, 0.0045396420173347, 0.0006250899168662727, 0.01684878207743168, 0.005824672989547253, 0.007428525947034359, 0.009805276058614254, 0.003550198394805193, 0.007900950498878956, 0.009690256789326668, 0.18011362850666046, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.11159665137529373, 0.10346578061580658, 0.414338618516922, 0.08694489300251007, 0.2136271595954895, 0.10264819115400314, 0.023593097925186157, 0.0335584320127964, 0.0575689822435379, 0.06024341657757759, 0.1307218372821808, 0.13801440596580505, 0.1756829470396042, 0.14866231381893158, 0.1320090889930725, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.1948547214269638, 0.038279034197330475, 0.07790879160165787, 0.04177340865135193, 0.004589961376041174, 0.0009778933599591255, 0.002051346004009247, 0.006739486940205097, 0.009280361235141754, 0.0007642557029612362, 0.0012637393083423376, 0.00433916924521327, 0.00236115837469697, 0.008354227058589458, 0.2381056696176529, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.07799407094717026, 0.10201291739940643, 0.037178199738264084, 0.03369736298918724, 0.035083431750535965, 0.003606606973335147, 0.0009816481033340096, 0.010917055420577526, 0.019562464207410812, 0.004011118784546852, 0.0029224867466837168, 0.0011325542582198977, 0.00486336974427104, 0.007979645393788815, 0.2784355580806732, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.11467810720205307, 0.4025481641292572, 0.4041208028793335, 0.13489782810211182, 0.520052433013916, 0.013409112580120564, 0.0056337821297347546, 0.04408307746052742, 0.06485209614038467, 0.0023049998562783003, 0.0050890627317130566, 0.004091872368007898, 0.006159461103379726, 0.0242836382240057, 0.07189745455980301, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.1516697108745575, 0.2241159826517105, 0.5074643492698669, 0.3874017000198364, 0.2519407868385315, 0.032381314784288406, 0.015091626904904842, 0.006451433524489403, 0.09749187529087067, 0.007731522433459759, 0.00912014115601778, 0.029297562316060066, 0.05765664204955101, 0.059585090726614, 0.023513801395893097, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.01171550527215004, 0.10137046873569489, 0.870269238948822, 0.5154522657394409, 0.6626715660095215, 0.08923148363828659, 0.047533176839351654, 0.015608957968652248, 0.11948943883180618, 0.008091520518064499, 0.008133050054311752, 0.012773845344781876, 0.051611315459012985, 0.01502595841884613, 0.00961183663457632, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.01722140610218048, 0.036506716161966324, 0.7147647738456726, 0.20675897598266602, 0.8291797637939453, 0.31030455231666565, 0.11803850531578064, 0.03327609598636627, 0.4245462417602539, 0.013293992727994919, 0.008976193144917488, 0.054750751703977585, 0.1754072904586792, 0.04528210312128067, 0.012820743955671787, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.01982569508254528, 0.15988187491893768, 0.12975367903709412, 0.1326102912425995, 0.6299260258674622, 0.28946900367736816, 0.34108322858810425, 0.11804011464118958, 0.16752222180366516, 0.01777276024222374, 0.0021109972149133682, 0.0006076672580093145, 0.0030632279813289642, 0.00126487051602453, 0.1333881914615631, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.005461913999170065, 0.03046412020921707, 0.008993657305836678, 0.005659051705151796, 0.004244270734488964, 0.02773391455411911, 0.042834386229515076, 0.13534432649612427, 0.27069228887557983, 0.04962563514709473, 0.015227400697767735, 0.0016283531440421939, 0.0014969720505177975, 0.0027089377399533987, 0.17130999267101288, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.01672529987990856, 0.10339350253343582, 0.009749630466103554, 0.02030925825238228, 0.017326004803180695, 0.03957638517022133, 0.030999623239040375, 0.10308665037155151, 0.5008098483085632, 0.09767498821020126, 0.09780175238847733, 0.025981366634368896, 0.003117683343589306, 0.00962040200829506, 0.1932818591594696, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.026731140911579132, 0.05838552862405777, 0.07611822336912155, 0.05796685442328453, 0.5904980301856995, 0.010755263268947601, 0.0517524816095829, 0.055663660168647766, 0.29654714465141296, 0.1307908594608307, 0.1585402488708496, 0.03976760059595108, 0.07525579631328583, 0.16488958895206451, 0.1035238653421402, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.024593327194452286, 0.12932555377483368, 0.13568159937858582, 0.16021546721458435, 0.3227141201496124, 0.029398979619145393, 0.01611196994781494, 0.016819216310977936, 0.2378186136484146, 0.5602607131004333, 0.7615779638290405, 0.08417549729347229, 0.10783103108406067, 0.2013072967529297, 0.06744378060102463, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.018169090151786804, 0.26050350069999695, 0.078061044216156, 0.023439347743988037, 0.05254700779914856, 0.0014709478709846735, 0.002907117595896125, 0.009980114176869392, 0.1381266713142395, 0.5626046061515808, 0.5405392646789551, 0.11909772455692291, 0.008021530695259571, 0.06359856575727463, 0.009888176806271076, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.08646434545516968, 0.009946366772055626, 0.041608210653066635, 0.009163393639028072, 0.12723588943481445, 0.17822976410388947, 0.01437843032181263, 0.0057503837160766125, 0.008486853912472725, 0.002935740165412426, 0.019836073741316795, 0.07525425404310226, 0.02854214422404766, 0.0230310820043087, 0.1518138200044632, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.169734388589859, 0.018695855513215065, 0.1739528477191925, 0.1591939628124237, 0.2628772258758545, 0.10412096232175827, 0.10786166787147522, 0.024563027545809746, 0.26776236295700073, 0.15710414946079254, 0.04751116409897804, 0.10171505063772202, 0.02745870314538479, 0.022933470085263252, 0.11237789690494537, 0.125, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.04881957918405533, 0.17062845826148987, 0.0187830850481987, 0.030382977798581123, 0.08311481773853302, 0.03788991644978523, 0.005156277678906918, 0.026916639879345894, 0.06639944016933441, 0.03180782124400139, 0.02173716016113758, 0.05343012511730194, 0.01850084401667118, 0.0033381145913153887, 0.04681381955742836, 0.12855423986911774, 0.11611904203891754, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.11046597361564636, 0.13029024004936218, 0.30802851915359497, 0.31618139147758484, 0.21513698995113373, 0.08858107775449753, 0.07770872116088867, 0.030179373919963837, 0.2956576347351074, 0.19506438076496124, 0.06668522953987122, 0.15814362466335297, 0.07954283803701401, 0.09008871018886566, 0.11347464472055435, 0.1812644749879837, 0.04049589857459068, 0.04480821266770363, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.14630576968193054, 0.10272074490785599, 0.06626180559396744, 0.39613619446754456, 0.5213132500648499, 0.09462913125753403, 0.19745559990406036, 0.14176879823207855, 0.45916420221328735, 0.2814978361129761, 0.19076579809188843, 0.7478294968605042, 0.15201923251152039, 0.4428024888038635, 0.11204658448696136, 0.14001408219337463, 0.11702272295951843, 0.5616602897644043, 0.021032487973570824, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.17077980935573578, 0.372023344039917, 0.03066021017730236, 0.20403380692005157, 0.25160810351371765, 0.047236956655979156, 0.19034826755523682, 0.09997845441102982, 0.22249065339565277, 0.14956896007061005, 0.12211201339960098, 0.43811750411987305, 0.32559871673583984, 0.4463178217411041, 0.1688702404499054, 0.17309650778770447, 0.011261633597314358, 0.0023054813500493765, 0.0014516497030854225, 0.17103753983974457, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.001587467617355287, 0.0028523027431219816, 0.001275891438126564, 0.007771230302751064, 0.06833823025226593, 0.016362184658646584, 0.01554875634610653, 0.0395360104739666, 0.020186755806207657, 0.02848842740058899, 0.006796931382268667, 0.08043718338012695, 0.1258731484413147, 0.048048797994852066, 0.14538481831550598, 0.21775518357753754, 0.1599237471818924, 0.031671781092882156, 0.0027859890833497047, 0.1030324175953865, 0.009803196415305138, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.19441094994544983, 0.026329312473535538, 0.03907056525349617, 0.5187185406684875, 0.06508557498455048, 0.04464683309197426, 0.23734036087989807, 0.10510969161987305, 0.23671847581863403, 0.2550508677959442, 0.2969563603401184, 0.31371036171913147, 0.023362383246421814, 0.04756302013993263, 0.09379850327968597, 0.1265520304441452, 0.2245447188615799, 0.3357183039188385, 0.19591355323791504, 0.030100535601377487, 0.11038237810134888, 0.012957160361111164, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.009693926200270653, 0.06855454295873642, 0.04046608507633209, 0.021632034331560135, 0.07003092765808105, 0.1099655032157898, 0.02166297659277916, 0.14673617482185364, 0.08559776097536087, 0.021444879472255707, 0.06376301497220993, 0.07838241755962372, 0.2981177270412445, 0.05645254626870155, 0.11510419100522995, 0.12113019824028015, 0.07331034541130066, 0.073086217045784, 0.038516201078891754, 0.16168329119682312, 0.12152494490146637, 0.1929183006286621, 0.11648087203502655, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.1475960612297058, 0.11415769904851913, 0.09677327424287796, 0.22716772556304932, 0.05128113925457001, 0.0685737207531929, 0.17258046567440033, 0.05221087113022804, 0.2985250651836395, 0.36185649037361145, 0.6199293732643127, 0.5016448497772217, 0.08136574923992157, 0.06544326990842819, 0.09482244402170181, 0.15162895619869232, 0.16000056266784668, 0.47010278701782227, 0.008242717012763023, 0.016423694789409637, 0.19619418680667877, 0.014187236316502094, 0.2187093049287796, 0.3917299807071686, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.16866622865200043, 0.03890697658061981, 0.038960762321949005, 0.045146964490413666, 0.003443084890022874, 0.025941072031855583, 0.02535194903612137, 0.01214737631380558, 0.39030662178993225, 0.11890958994626999, 0.2736153304576874, 0.3244759440422058, 0.00968784186989069, 0.014615286141633987, 0.03826850652694702, 0.1371021270751953, 0.24055053293704987, 0.39826682209968567, 0.0653936043381691, 0.06886317580938339, 0.1729464828968048, 0.02453671395778656, 0.2748231589794159, 0.23215962946414948, 0.03306089714169502, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.08395736664533615, 0.10560688376426697, 0.29490047693252563, 0.15838190913200378, 0.20854075253009796, 0.047574300318956375, 0.025914132595062256, 0.0076736449263989925, 0.23083198070526123, 0.11239635199308395, 0.08150741457939148, 0.3915822207927704, 0.126749187707901, 0.08327525854110718, 0.07453686743974686, 0.05615014582872391, 0.17226241528987885, 0.4426397681236267, 0.534454345703125, 0.0034056571312248707, 0.0038566330913454294, 0.24011781811714172, 0.31882721185684204, 0.4456172287464142, 0.1489524245262146, 0.03087311051785946, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.08537011593580246, 0.01334940642118454, 0.026223814114928246, 0.09485415369272232, 0.04081009700894356, 0.021519087255001068, 0.04835912212729454, 0.008561250753700733, 0.1425430029630661, 0.15310505032539368, 0.12245412170886993, 0.15674236416816711, 0.03265313804149628, 0.020860055461525917, 0.1338454782962799, 0.037336766719818115, 0.065662682056427, 0.18869149684906006, 0.795316219329834, 0.14649540185928345, 0.021824514493346214, 0.13452036678791046, 0.026823654770851135, 0.35548609495162964, 0.18523786962032318, 0.020790524780750275, 0.09485815465450287, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.009048069827258587, 0.008220783434808254, 0.0010462020291015506, 0.0073586152866482735, 0.01628630980849266, 0.0030796914361417294, 0.0014804736711084843, 0.0016866090008988976, 0.021953675895929337, 0.024090107530355453, 0.02321471832692623, 0.2417944222688675, 0.00791110284626484, 0.012413977645337582, 0.02231968566775322, 0.17983746528625488, 0.09746579825878143, 0.46259593963623047, 0.706605851650238, 0.09193093329668045, 0.2823830544948578, 0.007526541594415903, 0.10234087705612183, 0.24847157299518585, 0.2038285881280899, 0.012590465135872364, 0.002493936335667968, 0.04428662359714508, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.02412300556898117, 0.02128133550286293, 0.018482450395822525, 0.016898121684789658, 0.07439899444580078, 0.03563898429274559, 0.04473365843296051, 0.0026737016160041094, 0.06965204328298569, 0.10727399587631226, 0.046027760952711105, 0.33166152238845825, 0.12371443957090378, 0.07036767154932022, 0.15801618993282318, 0.1421777307987213, 0.23310348391532898, 0.2705342471599579, 0.5351002812385559, 0.02795390971004963, 0.06031421944499016, 0.012775074690580368, 0.20022329688072205, 0.6570897698402405, 0.2668534517288208, 0.033325545489788055, 0.023841219022870064, 0.1455993354320526, 0.03172359615564346, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.007644897326827049, 0.000292555516352877, 0.08444877713918686, 0.17402730882167816, 0.16615508496761322, 0.013423392549157143, 0.054235123097896576, 0.007257240824401379, 0.08712441474199295, 0.012547464109957218, 0.0328214131295681, 0.2736492455005646, 0.0037261026445776224, 0.09982366114854813, 0.13941559195518494, 0.11665362864732742, 0.1886645257472992, 0.03897944837808609, 0.07137740403413773, 0.15634050965309143, 0.15400150418281555, 0.13745756447315216, 0.05537642911076546, 0.2729690372943878, 0.04749782383441925, 0.05948880687355995, 0.014797642827033997, 0.11365658044815063, 0.002582019427791238, 0.20324750244617462, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.07466596364974976, 0.11066461354494095, 0.02582395263016224, 0.1052846685051918, 0.0988694354891777, 0.13372771441936493, 0.10285167396068573, 0.04043884575366974, 0.12614820897579193, 0.00874736811965704, 0.006169801577925682, 0.3642371892929077, 0.13258321583271027, 0.14621633291244507, 0.16873647272586823, 0.29635345935821533, 0.04781435802578926, 0.41243496537208557, 0.03004680573940277, 0.13952067494392395, 0.045467544347047806, 4.634694050764665e-05, 0.20948387682437897, 0.002634957665577531, 0.005124728661030531, 0.0019075855379924178, 0.0009838729165494442, 0.0013485344825312495, 0.004148871172219515, 0.03574635088443756, 0.23113909363746643, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.23522600531578064, 0.0398484542965889, 0.3737937808036804, 0.288825660943985, 0.10485613346099854, 0.11366727948188782, 0.29695606231689453, 0.06251946091651917, 0.35146233439445496, 0.04921486973762512, 0.25325968861579895, 0.33112239837646484, 0.06967249512672424, 0.050063006579875946, 0.0896972194314003, 0.22071197628974915, 0.019423967227339745, 0.06694509834051132, 0.2386176735162735, 0.015943216159939766, 0.14270655810832977, 0.039743710309267044, 0.014324809424579144, 0.581375777721405, 0.040944233536720276, 0.011615565046668053, 0.02482481673359871, 0.06486763060092926, 0.002298883395269513, 0.009274494834244251, 0.012798607349395752, 0.009606687352061272, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.1151093989610672, 0.085483118891716, 0.1238018348813057, 0.10984596610069275, 0.07372570037841797, 0.07080911099910736, 0.04283013194799423, 0.011434272862970829, 0.6184931993484497, 0.031299810856580734, 0.1232943907380104, 0.4399086534976959, 0.16973690688610077, 0.18915507197380066, 0.06319096684455872, 0.04979729279875755, 0.005993144121021032, 0.05621323734521866, 0.3196869492530823, 0.0036542851012200117, 0.006608159281313419, 0.07202935218811035, 0.023804083466529846, 0.08581908792257309, 0.002907529706135392, 0.0022882334887981415, 0.155064657330513, 0.6752456426620483, 0.19066885113716125, 0.033486951142549515, 0.1545412391424179, 0.3257397711277008, 0.07836033403873444, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.23179487884044647, 0.03441762179136276, 0.058240070939064026, 0.17834095656871796, 0.049968671053647995, 0.038375332951545715, 0.05405527353286743, 0.00672679441049695, 0.09475977718830109, 0.0764862671494484, 0.1440851390361786, 0.11337311565876007, 0.06998162716627121, 0.031302694231271744, 0.13650138676166534, 0.02027127519249916, 0.036089565604925156, 0.0908525288105011, 0.6094546914100647, 0.035198476165533066, 0.01578100211918354, 0.08828305453062057, 0.00740778585895896, 0.08938029408454895, 0.055872198194265366, 0.01406459603458643, 0.05842210724949837, 0.7085317969322205, 0.04043729975819588, 0.00861792266368866, 0.05839632451534271, 0.306302547454834, 0.11257344484329224, 0.09490343183279037, NaN, NaN, NaN, NaN, NaN, NaN], [0.037197839468717575, 0.022889001294970512, 0.00443503400310874, 0.02830665186047554, 0.056754183024168015, 0.011282439343631268, 0.008815057575702667, 0.005641489755362272, 0.03366301208734512, 0.01200089417397976, 0.022881681099534035, 0.24835483729839325, 0.020306341350078583, 0.028865927830338478, 0.09140723943710327, 0.2219613641500473, 0.0726998969912529, 0.3657586872577667, 0.6172192692756653, 0.07194076478481293, 0.17607101798057556, 0.009873087517917156, 0.09032700955867767, 0.1240842267870903, 0.06592906266450882, 0.021971723064780235, 0.004476875066757202, 0.04292584955692291, 0.013240871019661427, 0.03868407383561134, 0.0364602766931057, 0.007298360578715801, 0.02817610278725624, 0.0009550384129397571, 0.033005379140377045, NaN, NaN, NaN, NaN, NaN], [0.019821494817733765, 0.0461096465587616, 0.009799499064683914, 0.008886821568012238, 0.03164605051279068, 0.03408728539943695, 0.06531291455030441, 0.004583337344229221, 0.015776870772242546, 0.0067581660114228725, 0.005247185938060284, 0.0803409293293953, 0.12878651916980743, 0.033680036664009094, 0.15540239214897156, 0.2832254469394684, 0.40537261962890625, 0.25111812353134155, 0.4335843026638031, 0.05173255130648613, 0.02949104830622673, 0.00834138598293066, 0.5043417811393738, 0.45271721482276917, 0.10732957720756531, 0.08741836994886398, 0.06616821885108948, 0.1252485066652298, 0.04288535565137863, 0.0027607728261500597, 0.11496254801750183, 0.007436650805175304, 0.04789961501955986, 0.014611729420721531, 0.05419020354747772, 0.013982507400214672, NaN, NaN, NaN, NaN], [0.006374652031809092, 0.0003620072384364903, 0.05079201981425285, 0.10443739593029022, 0.13200052082538605, 0.007841442711651325, 0.04038690775632858, 0.005943085998296738, 0.04502689838409424, 0.005707652773708105, 0.010736361145973206, 0.17095635831356049, 0.0034604808315634727, 0.08947119116783142, 0.1356668770313263, 0.1133793368935585, 0.2190774381160736, 0.04727642610669136, 0.08785698562860489, 0.22799502313137054, 0.1395695060491562, 0.17899513244628906, 0.05776361748576164, 0.19579172134399414, 0.03426501154899597, 0.08577524870634079, 0.027239171788096428, 0.22711482644081116, 0.005856664851307869, 0.3394412696361542, 0.03666312247514725, 0.053877539932727814, 0.02460121363401413, 0.02095765992999077, 0.08733106404542923, 0.0007995758787728846, 0.19509249925613403, NaN, NaN, NaN], [0.05784226581454277, 0.06101800128817558, 0.011293647810816765, 0.030310506001114845, 0.02692366950213909, 0.10355494171380997, 0.1643158346414566, 0.02146345190703869, 0.10686127096414566, 0.0006235101609490812, 0.001034505432471633, 0.12770172953605652, 0.08152752369642258, 0.06569667905569077, 0.13584844768047333, 0.32134389877319336, 0.08582156896591187, 0.36053547263145447, 0.06279635429382324, 0.1449708491563797, 0.041098933666944504, 0.0002254477294627577, 0.3326246738433838, 0.0031729326583445072, 0.011426791548728943, 0.00305219367146492, 0.0021134610287845135, 0.0029090954922139645, 0.0035086346324533224, 0.0884322077035904, 0.7275413274765015, 4.6366836613742635e-05, 0.004567307885736227, 0.00048746803076937795, 0.0006845259922556579, 0.00036436106893233955, 0.0336419902741909, 0.19370199739933014, NaN, NaN], [0.24130187928676605, 0.04057329148054123, 0.37395209074020386, 0.32695549726486206, 0.18701796233654022, 0.1542418897151947, 0.4307348132133484, 0.07850468903779984, 0.24226921796798706, 0.027551302686333656, 0.17328326404094696, 0.256756991147995, 0.1007629856467247, 0.0746576264500618, 0.1026487648487091, 0.2431764006614685, 0.00993723887950182, 0.023469794541597366, 0.12711890041828156, 0.013049022294580936, 0.09880916029214859, 0.014819139614701271, 0.015189954079687595, 0.19677633047103882, 0.012298321351408958, 0.006653454154729843, 0.017306946218013763, 0.044382814317941666, 0.005554118659347296, 0.008197239600121975, 0.025704391300678253, 0.01238576602190733, 0.005520223639905453, 0.018611198291182518, 0.07344726473093033, 0.00026948421145789325, 0.012129159644246101, 0.01222553662955761, 0.005697384011000395, NaN], [0.18065117299556732, 0.0850963443517685, 0.37481072545051575, 0.36960142850875854, 0.042269542813301086, 0.04689870774745941, 0.10553675144910812, 0.031215613707900047, 0.03850337490439415, 0.055640675127506256, 0.11964564025402069, 0.20274300873279572, 0.22541530430316925, 0.07314471900463104, 0.12492100149393082, 0.018590128049254417, 0.012204503640532494, 0.0029425490647554398, 0.01610950194299221, 0.024503106251358986, 0.04006015509366989, 0.018976394087076187, 0.006591797806322575, 0.002320006489753723, 0.001339062349870801, 0.028667215257883072, 0.03959575667977333, 0.00960585381835699, 0.009797154925763607, 0.022796805948019028, 0.1637655347585678, 0.20084494352340698, 0.05620957538485527, 0.12549559772014618, 0.022888751700520515, 0.037492163479328156, 0.04711981862783432, 0.44462573528289795, 0.3949664235115051, 0.3300856053829193]], [[0.7472922801971436, 0.06644202023744583, 0.12477048486471176, 0.07691145688295364, 0.17426471412181854, 0.17453429102897644, 0.8713244795799255, 0.22852616012096405, 0.7413471937179565, 0.5253387689590454, 0.16250024735927582, 0.19445888698101044, 0.10716042667627335, 0.2310180366039276, 0.05536508187651634, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.13811203837394714, 0.40626850724220276, 0.2430061399936676, 0.22277961671352386, 0.18414726853370667, 0.21574343740940094, 0.8225958943367004, 0.5822084546089172, 0.41659367084503174, 0.35776287317276, 0.4909748136997223, 0.39181941747665405, 0.34554892778396606, 0.6003718972206116, 0.043436333537101746, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.03130434453487396, 0.0024298657663166523, 0.43690061569213867, 0.5043830275535583, 0.07530603557825089, 0.015139158815145493, 0.03498073294758797, 0.012510559521615505, 0.6034607291221619, 0.7801509499549866, 0.8402397036552429, 0.5008089542388916, 0.17657218873500824, 0.11879491806030273, 0.05205746740102768, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.09661327302455902, 0.049034956842660904, 0.05331439897418022, 0.7222777009010315, 0.25703296065330505, 0.020087046548724174, 0.06235986202955246, 0.0651831179857254, 0.32113927602767944, 0.5460676550865173, 0.7442458271980286, 0.5571728348731995, 0.08091285824775696, 0.059992171823978424, 0.029936296865344048, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.00972762517631054, 0.007879518903791904, 0.02767527848482132, 0.019306808710098267, 0.22303025424480438, 0.007516835816204548, 0.007440114859491587, 0.022099999710917473, 0.29848337173461914, 0.9075287580490112, 0.5192471742630005, 0.8959035873413086, 0.055479276925325394, 0.04288056865334511, 0.021558567881584167, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.03836950287222862, 0.05839527025818825, 0.005887853913009167, 0.08494037389755249, 0.012977076694369316, 0.5726994872093201, 0.09935679286718369, 0.13719113171100616, 0.448569655418396, 0.5218547582626343, 0.13800226151943207, 0.1732572466135025, 0.4354798197746277, 0.4542965292930603, 0.12337890267372131, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.17566490173339844, 0.03925755247473717, 0.01956782303750515, 0.04187121242284775, 0.02149910107254982, 0.049183186143636703, 0.5663522481918335, 0.045388396829366684, 0.45039302110671997, 0.19015204906463623, 0.22913624346256256, 0.10953018814325333, 0.21400360763072968, 0.572381854057312, 0.1667298972606659, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.2136794924736023, 0.20810233056545258, 0.08830246329307556, 0.27903637290000916, 0.02317022904753685, 0.10591837763786316, 0.15087167918682098, 0.5299598574638367, 0.3452024757862091, 0.15965056419372559, 0.2765912711620331, 0.516273021697998, 0.2846863567829132, 0.3888777792453766, 0.0719258189201355, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.07398565858602524, 0.04620325192809105, 0.3374384939670563, 0.19415578246116638, 0.025615269318223, 0.010194968432188034, 0.018451105803251266, 0.0005573831731453538, 0.5073301196098328, 0.25312942266464233, 0.15244188904762268, 0.143111914396286, 0.051979612559080124, 0.04884689673781395, 0.12363318353891373, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.5805832147598267, 0.09438126534223557, 0.24455930292606354, 0.06023820489645004, 0.03943831846117973, 0.021930387243628502, 0.026398053392767906, 0.012488989159464836, 0.011794325895607471, 0.767930269241333, 0.4412824809551239, 0.07896611094474792, 0.01228941697627306, 0.018458310514688492, 0.10866446793079376, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.1145540103316307, 0.05171298235654831, 0.7072227597236633, 0.4839639961719513, 0.11294537037611008, 0.06211492419242859, 0.021921994164586067, 0.0025394419208168983, 0.0033554628025740385, 0.07357389479875565, 0.7795555591583252, 0.05686911940574646, 0.022035235539078712, 0.034172482788562775, 0.07262071967124939, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.08121224492788315, 0.025126218795776367, 0.4891066551208496, 0.29065003991127014, 0.20622830092906952, 0.36699986457824707, 0.07864820212125778, 0.014422299340367317, 0.016684990376234055, 0.0649130716919899, 0.07936163991689682, 0.6605017185211182, 0.18783104419708252, 0.08294262737035751, 0.03477967903017998, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0700722336769104, 0.1311686784029007, 0.5332850813865662, 0.1558467000722885, 0.36321985721588135, 0.7912644743919373, 0.32202765345573425, 0.1934671401977539, 0.031114375218749046, 0.09986341744661331, 0.08630139380693436, 0.055017780512571335, 0.44781896471977234, 0.42446693778038025, 0.1060790941119194, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.08875010907649994, 0.06247853487730026, 0.4616371989250183, 0.12711729109287262, 0.3074216842651367, 0.19363558292388916, 0.2020244151353836, 0.0779867023229599, 0.019831692799925804, 0.03570472076535225, 0.07392378151416779, 0.04282142594456673, 0.0921483263373375, 0.3143211603164673, 0.22281906008720398, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.5682113766670227, 0.1249876543879509, 0.7342633008956909, 0.902918815612793, 0.7035764455795288, 0.3718622326850891, 0.6157594919204712, 0.15625660121440887, 0.8438207507133484, 0.9341241121292114, 0.8159937858581543, 0.6624717712402344, 0.3264457583427429, 0.5970154404640198, 0.003644895739853382, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.2626786530017853, 0.0849713385105133, 0.11954734474420547, 0.09299539029598236, 0.12019845843315125, 0.1675114780664444, 0.12060416489839554, 0.1292921006679535, 0.33819568157196045, 0.3146125078201294, 0.20831438899040222, 0.39596518874168396, 0.2145393043756485, 0.2666572332382202, 0.05294949933886528, 0.125, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.1368129849433899, 0.16135744750499725, 0.15528292953968048, 0.24771884083747864, 0.1416730433702469, 0.05803852900862694, 0.07394444942474365, 0.10563277453184128, 0.033661823719739914, 0.18054474890232086, 0.1985052525997162, 0.05316935107111931, 0.05009648948907852, 0.043446026742458344, 0.03412564843893051, 0.16815106570720673, 0.017178548499941826, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.0030849967151880264, 0.0006440586876124144, 0.016017315909266472, 0.0037563794758170843, 0.009170617908239365, 0.0008218333241529763, 0.0032779525499790907, 0.0006974118296056986, 0.12044321000576019, 0.005983977112919092, 0.011704917997121811, 0.023849062621593475, 0.0031650178134441376, 0.01169323269277811, 0.16145823895931244, 0.2022658735513687, 0.005017802584916353, 0.01763225719332695, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.02798222377896309, 0.012448069639503956, 0.018199993297457695, 0.0069459048099815845, 0.042531996965408325, 0.009718443267047405, 0.013791781850159168, 0.04370715469121933, 0.21814176440238953, 0.024645699188113213, 0.0633857473731041, 0.0802498310804367, 0.006771658081561327, 0.040147896856069565, 0.4109969139099121, 0.16166983544826508, 0.033678483217954636, 0.014520054683089256, 0.003462842432782054, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.02001010812819004, 0.02580004744231701, 0.006869276985526085, 0.007543967105448246, 0.017537932842969894, 0.00023914838675409555, 0.006739956792443991, 0.008227680809795856, 0.05446772649884224, 0.03320171311497688, 0.022232946008443832, 0.01063306163996458, 0.0007752752280794084, 0.0028256638906896114, 0.2078467756509781, 0.10712886601686478, 0.3422684967517853, 0.05748933553695679, 0.2768969237804413, 0.004922540858387947, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.0034786108881235123, 0.00011826713307527825, 0.002407492371276021, 0.005452741403132677, 0.002847136929631233, 0.003419033018872142, 0.013516861945390701, 0.002940082224085927, 0.002004653448238969, 0.006652397103607655, 0.004079414997249842, 0.0028307989705353975, 0.0006369714974425733, 0.002542868722230196, 0.1463778167963028, 0.047501806169748306, 0.48201972246170044, 0.4827657639980316, 0.48466482758522034, 0.022285524755716324, 0.00022009640815667808, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.0762338638305664, 0.11778479814529419, 0.03105221875011921, 0.006415408570319414, 0.0190818402916193, 0.027191398665308952, 0.005222225561738014, 0.0170834269374609, 0.05309534817934036, 0.00936796236783266, 0.03816217556595802, 0.17940494418144226, 0.020440110936760902, 0.13513173162937164, 0.3000544309616089, 0.1517350822687149, 0.04445230960845947, 0.09343461692333221, 0.05873756855726242, 0.07171032577753067, 0.22849556803703308, 0.05614512786269188, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.16228125989437103, 0.35454851388931274, 0.04026315361261368, 0.03822629526257515, 0.023396998643875122, 0.30800631642341614, 0.24136781692504883, 0.15176478028297424, 0.0788438618183136, 0.07347536832094193, 0.030298085883259773, 0.007365733850747347, 0.1061745211482048, 0.2841038405895233, 0.07787416130304337, 0.25680339336395264, 0.00010820403986144811, 0.0123103903606534, 0.007049524690955877, 0.001952940714545548, 0.027401963248848915, 0.0028134624008089304, 0.00041907382546924055, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.05645078793168068, 0.023840615525841713, 0.013567867688834667, 0.00750470208004117, 0.07643276453018188, 0.08809614926576614, 0.06102507561445236, 0.021034346893429756, 0.039108242839574814, 0.02081543207168579, 0.011458326131105423, 0.20520520210266113, 0.027348484843969345, 0.06299317628145218, 0.2514360249042511, 0.005559808574616909, 0.007462772540748119, 0.013313480652868748, 0.017376750707626343, 0.0038542840629816055, 0.006728595122694969, 0.5333897471427917, 0.03155524656176567, 0.15571120381355286, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.016126127913594246, 0.01087501272559166, 0.01213990617543459, 0.004450921434909105, 0.014690833166241646, 0.30525338649749756, 0.02716207131743431, 0.09981174021959305, 0.027048761025071144, 0.01336466334760189, 0.006663064938038588, 0.0520603246986866, 0.042623523622751236, 0.018071996048092842, 0.1948687732219696, 0.004124458413571119, 0.004751718603074551, 0.016015900298953056, 0.01742120459675789, 0.032125748693943024, 0.010460411198437214, 0.45809611678123474, 0.07138781994581223, 0.5171095728874207, 0.17626723647117615, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.04185086488723755, 0.034399643540382385, 0.041276611387729645, 0.0584070086479187, 0.019824109971523285, 0.00856409315019846, 0.08867836743593216, 0.10337970405817032, 0.09468665719032288, 0.02033121883869171, 0.018058426678180695, 0.059728462249040604, 0.09321711957454681, 0.20168805122375488, 0.1941128522157669, 0.24881334602832794, 0.005821824539452791, 0.031170587986707687, 0.009853766299784184, 0.027254868298768997, 0.01885347068309784, 0.02900754101574421, 0.013663586229085922, 0.012090054340660572, 0.0009272377355955541, 0.0030740045476704836, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.01436887588351965, 0.027922889217734337, 0.046481672674417496, 0.010071231983602047, 0.026127830147743225, 0.06003356724977493, 0.022118212655186653, 0.08160483092069626, 0.07784195244312286, 0.010694753378629684, 0.017130734398961067, 0.05340806022286415, 0.041410259902477264, 0.035884104669094086, 0.2491855025291443, 0.19627800583839417, 0.054823894053697586, 0.1886557787656784, 0.00739922234788537, 0.09451853483915329, 0.01572227105498314, 0.0010023268405348063, 0.0061036646366119385, 0.0014733865391463041, 0.0003654434985946864, 0.006776102818548679, 0.0027319795917719603, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.053393200039863586, 0.04828185588121414, 0.03453819081187248, 0.013636122457683086, 0.25098806619644165, 0.12313847243785858, 0.02266266942024231, 0.017618268728256226, 0.019785437732934952, 0.005274764262139797, 0.021053072065114975, 0.20679616928100586, 0.021523641422390938, 0.03855947405099869, 0.1109846979379654, 0.07900664210319519, 0.04510375112295151, 0.002657376928254962, 0.0032053724862635136, 0.0027717212215065956, 0.008140889927744865, 0.0011833005119115114, 0.04105996713042259, 0.0017470002640038729, 0.008194361813366413, 0.019470002502202988, 0.3834601640701294, 0.013146632350981236, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.12851715087890625, 0.12400124222040176, 0.2637093663215637, 0.02439347468316555, 0.07038086652755737, 0.12665364146232605, 0.04898465424776077, 0.03412041813135147, 0.0263816025108099, 0.023226425051689148, 0.11513664573431015, 0.09503531455993652, 0.1215861439704895, 0.11158601939678192, 0.14799171686172485, 0.06578069925308228, 0.08975866436958313, 0.022234706208109856, 0.015388325788080692, 0.006578383035957813, 0.011582762002944946, 0.014906905591487885, 0.04645423963665962, 0.008417387492954731, 0.0318351611495018, 0.024524353444576263, 0.5050408244132996, 0.1078883558511734, 0.09876319766044617, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.0010214513167738914, 0.004835289902985096, 0.0042709591798484325, 0.0026378841139376163, 0.005866974592208862, 0.008331544697284698, 0.006240549497306347, 0.01365274004638195, 0.1720106601715088, 0.0005307683604769409, 0.0007543729152530432, 0.004353509750217199, 0.0002490385086275637, 0.0017186965560540557, 0.14317919313907623, 0.010224410332739353, 0.16048979759216309, 0.09242240339517593, 0.259725958108902, 0.06779038906097412, 0.007232773117721081, 0.09601377695798874, 0.28109633922576904, 0.2723717987537384, 0.1275584101676941, 0.06318827718496323, 0.25179460644721985, 0.2496732771396637, 0.6837621927261353, 0.0018262360244989395, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.07205050438642502, 0.12816517055034637, 0.23753608763217926, 0.08243206143379211, 0.5041552186012268, 0.11970840394496918, 0.04837331175804138, 0.034129947423934937, 0.16484025120735168, 0.011070297099649906, 0.05054215341806412, 0.039082955569028854, 0.09205758571624756, 0.1322212517261505, 0.16203875839710236, 0.04991341754794121, 0.05319196358323097, 0.14821480214595795, 0.020963814109563828, 0.03095317631959915, 0.024693654850125313, 0.008621936663985252, 0.14259999990463257, 0.042305052280426025, 0.09002435952425003, 0.005839803721755743, 0.061309609562158585, 0.23589004576206207, 0.30903181433677673, 0.18008928000926971, 0.49815359711647034, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.014979850500822067, 0.03769220784306526, 0.04367470741271973, 0.009415187872946262, 0.019922776147723198, 0.11522040516138077, 0.014906312339007854, 0.04722318425774574, 0.06570684164762497, 0.008925273083150387, 0.019600573927164078, 0.0472339391708374, 0.005348374601453543, 0.0017698986921459436, 0.1612817794084549, 0.015294999815523624, 0.03185835853219032, 0.0202027577906847, 0.03976168856024742, 0.0711589902639389, 0.13473857939243317, 0.0059967683628201485, 0.0031582280062139034, 0.003374348394572735, 0.002362155122682452, 0.015532899647951126, 0.038825590163469315, 0.08611883223056793, 0.03844507411122322, 0.009673628956079483, 0.7068554162979126, 0.013729983940720558, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.023198002949357033, 0.06148262694478035, 0.046858664602041245, 0.013079512864351273, 0.08762317895889282, 0.00949429627507925, 0.0484880767762661, 0.025388503447175026, 0.04432932287454605, 0.006038118619471788, 0.010164186358451843, 0.08949221670627594, 0.06122652441263199, 0.11895263940095901, 0.16355113685131073, 0.2531464695930481, 0.013071080669760704, 0.035546887665987015, 0.020458703860640526, 0.01740572415292263, 0.009577612392604351, 0.014396607875823975, 0.05952044576406479, 0.013841827400028706, 0.0003843819722533226, 0.0024746267590671778, 0.007157978601753712, 0.013787134550511837, 0.033782534301280975, 0.003469215938821435, 0.007898973301053047, 0.05525756999850273, 0.003914556000381708, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.009917332790791988, 0.01408212911337614, 0.047434139996767044, 0.005388779100030661, 0.023170381784439087, 0.034844160079956055, 0.009820640087127686, 0.03569778800010681, 0.05789060518145561, 0.0037882563192397356, 0.013808010146021843, 0.04879388585686684, 0.03114072047173977, 0.0507131889462471, 0.18661679327487946, 0.20273520052433014, 0.05025332421064377, 0.2335304319858551, 0.009442931972444057, 0.13508503139019012, 0.0181263517588377, 0.0010557285277172923, 0.003822105238214135, 0.0018545370548963547, 0.0003744752029888332, 0.0046313730999827385, 0.0008518796530552208, 0.006319030188024044, 0.014203540980815887, 0.0018540708115324378, 0.003058186499401927, 0.002516325796023011, 0.001575352856889367, 0.0014869269216433167, NaN, NaN, NaN, NaN, NaN, NaN], [0.0652787834405899, 0.04612350836396217, 0.04522763565182686, 0.014745297841727734, 0.27657532691955566, 0.16156227886676788, 0.025164838880300522, 0.017732013016939163, 0.023105354979634285, 0.005499221384525299, 0.020183373242616653, 0.19132839143276215, 0.020515967160463333, 0.056384406983852386, 0.14304831624031067, 0.059709664434194565, 0.021975213661789894, 0.002582199638709426, 0.002308695577085018, 0.00240446999669075, 0.004605048336088657, 0.0013587460853159428, 0.04497997462749481, 0.0009150391560979187, 0.0030208472162485123, 0.016492530703544617, 0.2572183907032013, 0.006429646629840136, 0.013558420352637768, 0.06110598146915436, 0.03728436306118965, 0.019318275153636932, 0.03907725587487221, 0.4492114782333374, 0.01579420454800129, NaN, NaN, NaN, NaN, NaN], [0.14539514482021332, 0.21388974785804749, 0.34906452894210815, 0.031415559351444244, 0.062017399817705154, 0.08485611528158188, 0.03913363441824913, 0.03569692373275757, 0.023448940366506577, 0.020669998601078987, 0.1622902750968933, 0.1315622329711914, 0.09182734042406082, 0.1796703040599823, 0.13702963292598724, 0.025836847722530365, 0.04185229912400246, 0.017175624147057533, 0.005038154777139425, 0.006518983747810125, 0.0043221269734203815, 0.004393702372908592, 0.03134007006883621, 0.002082354621961713, 0.00246719503775239, 0.00855192355811596, 0.28023120760917664, 0.0558621920645237, 0.020582975819706917, 0.00264686718583107, 0.052114877849817276, 0.01051351334899664, 0.0282430537045002, 0.640393853187561, 0.11605942994356155, 0.042242906987667084, NaN, NaN, NaN, NaN], [0.0009059146977961063, 0.004442692268639803, 0.002850044285878539, 0.0024173678830266, 0.006019651889801025, 0.004450949374586344, 0.003768310882151127, 0.009272964671254158, 0.19643637537956238, 0.0004391498805489391, 0.0004852984275203198, 0.005083973053842783, 0.000164541692356579, 0.001456208759918809, 0.13767127692699432, 0.00790853425860405, 0.07249781489372253, 0.09275110065937042, 0.13612288236618042, 0.0654025748372078, 0.0028184219263494015, 0.039562828838825226, 0.11378230899572372, 0.08281006664037704, 0.029445864260196686, 0.03387679159641266, 0.16786670684814453, 0.2288694977760315, 0.6801032423973083, 0.0008468713494949043, 0.32477572560310364, 0.20243169367313385, 0.04291461780667305, 0.2565927505493164, 0.2435160130262375, 0.8255255222320557, 0.0008029205491766334, NaN, NaN, NaN], [0.03601038455963135, 0.08602340519428253, 0.042799800634384155, 0.007577326148748398, 0.12637566030025482, 0.07399067282676697, 0.02205651067197323, 0.01475659292191267, 0.14170114696025848, 0.004405674524605274, 0.013175459578633308, 0.03142356127500534, 0.06839168816804886, 0.09161193668842316, 0.1376270353794098, 0.06791312247514725, 0.034157127141952515, 0.26634278893470764, 0.01933334954082966, 0.08246968686580658, 0.03419587388634682, 0.019395295530557632, 0.1259232461452484, 0.02923283353447914, 0.07644251734018326, 0.00482177222147584, 0.03381035849452019, 0.2429695725440979, 0.4201262295246124, 0.21319957077503204, 0.1469077318906784, 0.005101305432617664, 0.05322602018713951, 0.08754345029592514, 0.4596864581108093, 0.32625797390937805, 0.2286616712808609, 0.6285872459411621, NaN, NaN], [0.014056011103093624, 0.020953036844730377, 0.03237491473555565, 0.0042424313724040985, 0.017438247799873352, 0.08849667757749557, 0.005714876111596823, 0.025588830932974815, 0.08735965192317963, 0.009712125174701214, 0.02371004782617092, 0.06271149963140488, 0.00425978796556592, 0.0027238703332841396, 0.14272134006023407, 0.0236026793718338, 0.032931454479694366, 0.018642868846654892, 0.052601076662540436, 0.09147398918867111, 0.11555580049753189, 0.00512799434363842, 0.006684163119643927, 0.005264784675091505, 0.0023014512844383717, 0.005628940649330616, 0.03778252378106117, 0.09737572073936462, 0.12753169238567352, 0.00698094442486763, 0.6853439807891846, 0.02319822832942009, 0.018658116459846497, 0.08199534565210342, 0.18709556758403778, 0.07321563363075256, 0.027500100433826447, 0.6534799337387085, 0.01572287082672119, NaN], [0.15719948709011078, 0.03286461904644966, 0.12916648387908936, 0.10299614071846008, 0.014032969251275063, 0.011700707487761974, 0.06680437922477722, 0.016068298369646072, 0.04505150765180588, 0.056866806000471115, 0.07287567108869553, 0.09101171046495438, 0.06734755635261536, 0.17371943593025208, 0.1297563910484314, 0.24674107134342194, 0.007728901691734791, 0.010779940523207188, 0.01413859985768795, 0.08573849499225616, 0.014258946292102337, 0.014431791380047798, 0.00199147523380816, 0.006254997570067644, 0.003036148613318801, 0.015209752134978771, 0.015118316747248173, 0.05811062082648277, 0.01987045258283615, 0.012226228602230549, 0.021392136812210083, 0.08141177892684937, 0.016042163595557213, 0.01565614528954029, 0.05352389067411423, 0.01607833430171013, 0.014641694724559784, 0.020306598395109177, 0.06722531467676163, 0.005379782523959875]], [[0.0183254461735487, 0.00659788167104125, 0.046570390462875366, 0.04327844828367233, 0.10241857916116714, 0.5407979488372803, 0.0026681027375161648, 0.15349310636520386, 0.0016508381813764572, 0.010916458442807198, 0.036675866693258286, 0.15769276022911072, 0.4073828458786011, 0.04228133708238602, 0.15622197091579437, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.07985992729663849, 0.06383417546749115, 0.024972105398774147, 0.18746882677078247, 0.11770728975534439, 0.13333363831043243, 0.006719768047332764, 0.04288880154490471, 0.001412510173395276, 0.058754052966833115, 0.14280158281326294, 0.13529875874519348, 0.08268098533153534, 0.02367851696908474, 0.1494951695203781, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.01403640117496252, 0.014278309419751167, 0.1034439280629158, 0.022417087107896805, 0.10706920921802521, 0.018271848559379578, 0.046350300312042236, 0.04233889281749725, 0.037542134523391724, 0.0005760823260061443, 0.004724643658846617, 0.233056902885437, 0.2574465572834015, 0.1892177164554596, 0.21611936390399933, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.032590243965387344, 0.14464972913265228, 0.1993260532617569, 0.12327495217323303, 0.27639931440353394, 0.011173157021403313, 0.012838426046073437, 0.0802190750837326, 0.0400678850710392, 0.013469994999468327, 0.025247203186154366, 0.30583158135414124, 0.6397863626480103, 0.258308470249176, 0.08317234367132187, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.007401467300951481, 0.04209339618682861, 0.1104009672999382, 0.04737341031432152, 0.06253770738840103, 0.0023836863692849874, 0.05026397854089737, 0.01439946424216032, 0.006556188687682152, 0.001721409265883267, 0.01908556930720806, 0.022761031985282898, 0.01600046642124653, 0.22344018518924713, 0.2855986952781677, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.00031611474696546793, 0.010241325944662094, 0.005327185150235891, 0.007503898814320564, 0.009216651320457458, 0.08986854553222656, 0.0022410263773053885, 0.04830501973628998, 0.013246790505945683, 0.0036830154713243246, 0.001605262397788465, 0.004246865399181843, 0.005818811245262623, 0.00778583250939846, 0.2319662719964981, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.00028042105259373784, 0.004604758229106665, 0.008834331296384335, 0.010530425235629082, 0.04934454336762428, 0.3239482641220093, 0.02964387647807598, 0.041019540280103683, 0.028070107102394104, 0.002580034313723445, 0.0034616885241121054, 0.006594499107450247, 0.07731658220291138, 0.01784621551632881, 0.10414844751358032, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.002352550160139799, 0.00811008270829916, 0.007519579492509365, 0.09616736322641373, 0.00784054771065712, 0.06404154002666473, 0.025837063789367676, 0.06720300018787384, 0.008001329377293587, 0.016075177118182182, 0.0036620565224438906, 0.031110821291804314, 0.1529460847377777, 0.03003939613699913, 0.19531111419200897, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.014062762260437012, 0.03979215770959854, 0.0070105125196278095, 0.010145032778382301, 0.023933248594403267, 0.08613994717597961, 0.027301009744405746, 0.007488427218049765, 0.04610109701752663, 0.00706111453473568, 0.005716769024729729, 0.008516461588442326, 0.04168170318007469, 0.004054774064570665, 0.3198099434375763, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0027477010153234005, 0.009237049147486687, 0.005884162615984678, 0.004349177703261375, 0.039300523698329926, 0.06504905968904495, 0.005921225529164076, 0.05048412084579468, 0.004538795445114374, 0.019958311691880226, 0.08035917580127716, 0.1339075267314911, 0.45191076397895813, 0.1108468547463417, 0.15996994078159332, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0004566281568259001, 0.0044615683145821095, 0.008062957786023617, 0.0003266451822128147, 0.032452184706926346, 0.004190187435597181, 0.0009983428753912449, 0.0015420016134157777, 0.025539150461554527, 0.0009114624699577689, 0.001308016013354063, 0.11249691247940063, 0.5262115597724915, 0.16036535799503326, 0.02284345217049122, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.006384413689374924, 0.006966868881136179, 0.013256898149847984, 0.008146845735609531, 0.005910678766667843, 0.005924733821302652, 0.0029809526167809963, 0.004338744096457958, 0.0021091948729008436, 0.02691148780286312, 0.09123647958040237, 0.0904775932431221, 0.10420377552509308, 0.019918829202651978, 0.21981710195541382, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.004395737312734127, 0.0342060811817646, 0.08344801515340805, 0.012639162130653858, 0.07537969946861267, 0.00383414002135396, 0.007808698806911707, 0.007516762241721153, 0.0023650380317121744, 0.055798787623643875, 0.025632014498114586, 0.040716953575611115, 0.16482838988304138, 0.13848447799682617, 0.17180821299552917, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0016022673808038235, 0.013307235203683376, 0.012306403368711472, 0.0029055906925350428, 0.06092625483870506, 0.01653674617409706, 0.008309547789394855, 0.00395687622949481, 0.002493055537343025, 0.0038927635177969933, 0.009680269286036491, 0.23031921684741974, 0.35693949460983276, 0.1708209365606308, 0.050492819398641586, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.009627100080251694, 0.006502249743789434, 0.0023533182684332132, 0.0021814347710460424, 0.007286426145583391, 0.024909881874918938, 0.01453662570565939, 0.010449647903442383, 0.0028000103775411844, 0.001988302916288376, 0.001580765936523676, 0.013102496974170208, 0.001836722600273788, 0.0008430163725279272, 0.15720587968826294, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.010018138214945793, 0.02516627125442028, 0.027397310361266136, 0.005101055838167667, 0.025938771665096283, 0.13529063761234283, 0.02690303698182106, 0.11719205975532532, 0.027814749628305435, 0.019565219059586525, 0.07996311038732529, 0.0991574078798294, 0.16288702189922333, 0.1113416850566864, 0.22370746731758118, 0.125, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.05219842493534088, 0.1440066546201706, 0.27922260761260986, 0.2058621197938919, 0.11230742931365967, 0.6016822457313538, 0.20846855640411377, 0.04777589067816734, 0.20611444115638733, 0.15481434762477875, 0.11950203776359558, 0.02679699845612049, 0.0639302060008049, 0.047183193266391754, 0.04897741973400116, 0.147435262799263, 0.06894105672836304, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.01555164996534586, 0.0014379153726622462, 0.01706753298640251, 0.003720618085935712, 0.10093016922473907, 0.027928827330470085, 0.015380543656647205, 0.0025812943931668997, 0.020822137594223022, 0.014309070073068142, 0.017923271283507347, 0.0120958611369133, 0.014481468126177788, 0.009491728618741035, 0.15904544293880463, 0.18660759925842285, 0.013697005808353424, 0.050341442227363586, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.11612647771835327, 0.0010205605067312717, 0.020188286900520325, 0.027076182886958122, 0.09822120517492294, 0.3221674859523773, 0.1250218003988266, 0.002691123867407441, 0.005359187722206116, 0.04976291581988335, 0.023232540115714073, 0.04237976670265198, 0.028708819299936295, 0.049411751329898834, 0.005618311930447817, 0.14907698333263397, 0.12682567536830902, 0.14014844596385956, 0.024977339431643486, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.0470837838947773, 0.007497857324779034, 0.004583081230521202, 0.022991856560111046, 0.0278051495552063, 0.00051211251411587, 0.0627230703830719, 0.011764267459511757, 0.010903585702180862, 0.07272983342409134, 0.011678352952003479, 0.09392477571964264, 0.01558940764516592, 0.03351595252752304, 0.2068868726491928, 0.20074230432510376, 0.11179281026124954, 0.012457489967346191, 0.01455892063677311, 0.011106430552899837, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.0024584962520748377, 8.163625898305327e-05, 0.00016154914919752628, 0.0002508168399799615, 0.0019916424062103033, 0.0004536219348665327, 0.0036078437697142363, 0.0008641426684334874, 0.00021941671730019152, 0.0014423344982787967, 0.0004360634775366634, 0.004383172374218702, 0.0009428760386072099, 0.0009436326217837632, 0.14683274924755096, 0.20768699049949646, 0.16985096037387848, 0.19526726007461548, 0.016829432919621468, 0.05647609382867813, 0.022808711975812912, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.02989446185529232, 0.007703323382884264, 0.12996061146259308, 0.025068828836083412, 0.2812304198741913, 0.0071953474543988705, 0.0021352169569581747, 0.0025125211104750633, 0.0014658492291346192, 0.007028855849057436, 0.0448734275996685, 0.09462164342403412, 0.0503704659640789, 0.11768583953380585, 0.12974096834659576, 0.14349573850631714, 0.41078659892082214, 0.5100967288017273, 0.04046756774187088, 0.2924310266971588, 0.07987978309392929, 0.007180717773735523, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.16756094992160797, 0.028098214417696, 0.20756086707115173, 0.2207580953836441, 0.10928753018379211, 0.13773545622825623, 0.2233184576034546, 0.1774815022945404, 0.13830231130123138, 0.20932619273662567, 0.18267595767974854, 0.05961548537015915, 0.07697918266057968, 0.18739080429077148, 0.06796090304851532, 0.11146429926156998, 0.3579395115375519, 0.7730652093887329, 0.5723751783370972, 0.2817910611629486, 0.25461745262145996, 0.060240793973207474, 0.08399515599012375, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.017068415880203247, 0.00098085415083915, 0.010854640044271946, 0.006490680854767561, 0.29060667753219604, 0.006710599176585674, 0.0118483304977417, 0.0008181483135558665, 0.00011296885350020602, 0.0034601599909365177, 0.005098147317767143, 0.010750477202236652, 0.010399019345641136, 0.009376241825520992, 0.017405353486537933, 0.13904383778572083, 0.44345301389694214, 0.1345542073249817, 0.05706587806344032, 0.7818705439567566, 0.04436418041586876, 0.015915511175990105, 0.31926584243774414, 0.26167550683021545, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.1331326961517334, 0.019769106060266495, 0.01612294837832451, 0.028521019965410233, 0.007509702816605568, 0.2665199935436249, 0.19958320260047913, 0.1385747790336609, 0.0059373765252530575, 0.08046255260705948, 0.052418529987335205, 0.004961848258972168, 0.10941796749830246, 0.06705309450626373, 0.17611992359161377, 0.12236351519823074, 0.40148651599884033, 0.12099923938512802, 0.38539087772369385, 0.6352627873420715, 0.0574735552072525, 0.027495326474308968, 0.25199854373931885, 0.07788273692131042, 0.1824284791946411, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.019668979570269585, 0.0081618782132864, 0.12552350759506226, 0.0802406370639801, 0.07089362293481827, 0.18871739506721497, 0.12778939306735992, 0.04829992726445198, 0.04307088255882263, 0.02314154990017414, 0.14194107055664062, 0.05861861631274223, 0.19650596380233765, 0.11930099874734879, 0.18420156836509705, 0.0776049941778183, 0.26076433062553406, 0.12800094485282898, 0.15216867625713348, 0.36678510904312134, 0.31404268741607666, 0.13151897490024567, 0.1709745228290558, 0.2591820955276489, 0.18929390609264374, 0.08235450834035873, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.00538466265425086, 0.0270208939909935, 0.18066750466823578, 0.06076826527714729, 0.035171061754226685, 0.411039799451828, 0.09634009003639221, 0.26394954323768616, 0.1915867179632187, 0.03318370133638382, 0.3213040828704834, 0.10995125770568848, 0.5320225954055786, 0.4394112527370453, 0.15243512392044067, 0.08287283033132553, 0.26698997616767883, 0.29562729597091675, 0.13922370970249176, 0.3693794012069702, 0.22139106690883636, 0.612119734287262, 0.1618482619524002, 0.40734153985977173, 0.10604425519704819, 0.2217203825712204, 0.14197519421577454, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.0030147582292556763, 0.00625306461006403, 0.017102748155593872, 0.008551767095923424, 0.0727200135588646, 0.015153692103922367, 0.0023096217773854733, 0.011201570741832256, 0.002435098635032773, 0.006847116630524397, 0.016829995438456535, 0.12519565224647522, 0.3878204822540283, 0.13249750435352325, 0.028183329850435257, 0.0676846131682396, 0.5803259611129761, 0.47128230333328247, 0.2430339902639389, 0.43893957138061523, 0.5822793245315552, 0.9563859105110168, 0.5092246532440186, 0.7397804260253906, 0.6675750613212585, 0.2242172360420227, 0.046741336584091187, 0.09371624141931534, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.066617950797081, 0.006649812217801809, 0.04142908379435539, 0.13957993686199188, 0.025706114247441292, 0.08231058716773987, 0.08377126604318619, 0.02330365777015686, 0.04652002453804016, 0.11060080677270889, 0.09014575183391571, 0.07117310166358948, 0.15938407182693481, 0.1624550223350525, 0.05356656014919281, 0.16273218393325806, 0.4245251417160034, 0.44257473945617676, 0.1064363345503807, 0.22264361381530762, 0.638583779335022, 0.7456080913543701, 0.17856015264987946, 0.09681503474712372, 0.3901955187320709, 0.4154786765575409, 0.10903800278902054, 0.0281606987118721, 0.027353502810001373, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.004379222169518471, 0.0002637936850078404, 0.0022587613202631474, 0.006711117923259735, 0.0006837267428636551, 0.007989797741174698, 0.02997850626707077, 0.045127563178539276, 0.008224103599786758, 0.0034686585422605276, 0.0038658890407532454, 0.00034815416438505054, 7.646608719369397e-05, 0.00017854337056633085, 0.14325816929340363, 0.2541956901550293, 0.2554672658443451, 0.13483673334121704, 0.33163735270500183, 0.11067650467157364, 0.3400806486606598, 0.4272999167442322, 0.2955835163593292, 0.293487548828125, 0.2820315957069397, 0.17141510546207428, 0.08369391411542892, 0.012903732247650623, 0.010530934669077396, 0.015047149732708931, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.25216665863990784, 0.1422366499900818, 0.10172943770885468, 0.3735504150390625, 0.0612066313624382, 0.06238102167844772, 0.11154207587242126, 0.031159698963165283, 0.011768986470997334, 0.4107469618320465, 0.1557808816432953, 0.07179611176252365, 0.186580628156662, 0.18789765238761902, 0.099563829600811, 0.07456009835004807, 0.09125705808401108, 0.20381297171115875, 0.09053967893123627, 0.6734579801559448, 0.8927901983261108, 0.9854956865310669, 0.19160649180412292, 0.848483681678772, 0.3795100748538971, 0.0351644828915596, 0.06069617718458176, 0.0190274715423584, 0.13319239020347595, 0.1618155688047409, 0.029784632846713066, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.0073658498004078865, 0.1486257165670395, 0.03456511348485947, 0.0081891855224967, 0.009660922922194004, 0.09341325610876083, 0.010183881968259811, 0.09390538185834885, 0.005950886756181717, 0.019719628617167473, 0.060451164841651917, 0.021925343200564384, 0.19991156458854675, 0.17004182934761047, 0.15761280059814453, 0.13663174211978912, 0.5250937938690186, 0.20416004955768585, 0.37758082151412964, 0.7281314134597778, 0.24714940786361694, 0.006291824858635664, 0.029336191713809967, 0.258807897567749, 0.17944614589214325, 0.2768983840942383, 0.49996671080589294, 0.6760725975036621, 0.0684136375784874, 0.9500845074653625, 0.04427658021450043, 0.027829600498080254, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.0057948376052081585, 0.023180164396762848, 0.018019115552306175, 0.008233858272433281, 0.005580522585660219, 0.09526203572750092, 0.025384269654750824, 0.05396068096160889, 0.022398412227630615, 0.010895788669586182, 0.02884012460708618, 0.008390026167035103, 0.1754663735628128, 0.0998048186302185, 0.1692073941230774, 0.05520259216427803, 0.4062710404396057, 0.11698392778635025, 0.09814880043268204, 0.8328142166137695, 0.46247926354408264, 0.07190129905939102, 0.3418641984462738, 0.14486591517925262, 0.025201991200447083, 0.042143724858760834, 0.4074908196926117, 0.1494714319705963, 0.17342594265937805, 0.908286988735199, 0.5950636863708496, 0.14296366274356842, 0.20851416885852814, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.0038264640606939793, 0.023839879781007767, 0.12264026701450348, 0.02543032169342041, 0.01467527449131012, 0.22457416355609894, 0.02885078825056553, 0.18430863320827484, 0.08557040989398956, 0.016987022012472153, 0.3513573110103607, 0.04023189842700958, 0.40384334325790405, 0.4235673248767853, 0.16652488708496094, 0.08497714251279831, 0.5087416172027588, 0.4508724510669708, 0.33144411444664, 0.600685715675354, 0.523800790309906, 0.4743403494358063, 0.10964386910200119, 0.6009643077850342, 0.29714730381965637, 0.1661888062953949, 0.10026849061250687, 0.19036318361759186, 0.07889659702777863, 0.29447081685066223, 0.5917950868606567, 0.05482999235391617, 0.0994495078921318, 0.08629819005727768, NaN, NaN, NaN, NaN, NaN, NaN], [0.006266402080655098, 0.015031179413199425, 0.02853887900710106, 0.010518345981836319, 0.09044987708330154, 0.021657679229974747, 0.0031435268465429544, 0.020945381373167038, 0.004824943374842405, 0.0127853499725461, 0.04820985347032547, 0.12459135800600052, 0.5573670268058777, 0.2566193640232086, 0.05160163715481758, 0.04716389998793602, 0.6635201573371887, 0.5744545459747314, 0.33429521322250366, 0.755266010761261, 0.7800281643867493, 0.9541771411895752, 0.5776658058166504, 0.8714791536331177, 0.9158549308776855, 0.2818737030029297, 0.06938906759023666, 0.10379814356565475, 0.3064776659011841, 0.7474142909049988, 0.7715258002281189, 0.37782159447669983, 0.057383324950933456, 0.013433223590254784, 0.03400390222668648, NaN, NaN, NaN, NaN, NaN], [0.3002758324146271, 0.08866846561431885, 0.06544900685548782, 0.25531354546546936, 0.028160221874713898, 0.12210531532764435, 0.16810676455497742, 0.0764283761382103, 0.17981933057308197, 0.3050864636898041, 0.2806880474090576, 0.13050490617752075, 0.19047558307647705, 0.3216065764427185, 0.07704814523458481, 0.1486319750547409, 0.22267495095729828, 0.42902871966362, 0.07982667535543442, 0.5459871888160706, 0.9060689210891724, 0.8350642919540405, 0.10920917987823486, 0.4773065447807312, 0.7826967239379883, 0.5733710527420044, 0.26356616616249084, 0.040332335978746414, 0.031653065234422684, 0.8572309613227844, 0.5636150240898132, 0.07464684545993805, 0.03465104475617409, 0.03009859099984169, 0.008700854144990444, 0.005375253036618233, NaN, NaN, NaN, NaN], [0.005926316604018211, 0.0003559965989552438, 0.0015365411527454853, 0.005924532189965248, 0.0005743101937696338, 0.007415232714265585, 0.024156678467988968, 0.045611582696437836, 0.009969166480004787, 0.003380746114999056, 0.003106702584773302, 0.0003880919248331338, 4.0538176108384505e-05, 0.00014580521383322775, 0.13770556449890137, 0.25873932242393494, 0.5196211338043213, 0.3300914764404297, 0.5837901830673218, 0.4101006090641022, 0.7175306677818298, 0.6572118401527405, 0.6919461488723755, 0.6594171524047852, 0.7066829204559326, 0.46555259823799133, 0.3380126953125, 0.05317035689949989, 0.053740378469228745, 0.031323984265327454, 0.30507126450538635, 0.1422475129365921, 0.03319966048002243, 0.08714800328016281, 0.01252773217856884, 0.006611488293856382, 0.007115270011126995, NaN, NaN, NaN], [0.1617586314678192, 0.29556339979171753, 0.028325924649834633, 0.059843577444553375, 0.009868957102298737, 0.03965649753808975, 0.07811643928289413, 0.06809397041797638, 0.009963614866137505, 0.11740529537200928, 0.08369920402765274, 0.039758261293172836, 0.13982373476028442, 0.1197674348950386, 0.13220268487930298, 0.011579165235161781, 0.05381239950656891, 0.044945720583200455, 0.035533830523490906, 0.6624263525009155, 0.8997865319252014, 0.9679857492446899, 0.17051655054092407, 0.940772533416748, 0.6132625341415405, 0.01721411757171154, 0.04632151871919632, 0.010550450533628464, 0.08354383707046509, 0.12839946150779724, 0.02755529060959816, 0.44050073623657227, 0.04286862909793854, 0.01342833787202835, 0.003870438551530242, 0.026607532054185867, 0.02663758397102356, 0.005111980251967907, NaN, NaN], [0.012153265066444874, 0.16048333048820496, 0.041802890598773956, 0.00796045083552599, 0.018259191885590553, 0.10963782668113708, 0.009757153689861298, 0.07023902982473373, 0.01128031499683857, 0.030125515535473824, 0.0943576917052269, 0.02206866256892681, 0.1321137398481369, 0.19507774710655212, 0.1400403380393982, 0.13300661742687225, 0.5851269960403442, 0.20284885168075562, 0.5700805187225342, 0.7479174137115479, 0.39722636342048645, 0.004733124747872353, 0.0698152482509613, 0.6515945196151733, 0.5409151315689087, 0.25820717215538025, 0.4583084285259247, 0.6744768619537354, 0.3421478569507599, 0.9633424878120422, 0.1852269172668457, 0.04996338114142418, 0.5482219457626343, 0.296283096075058, 0.48366567492485046, 0.06441208720207214, 0.9149421453475952, 0.02780383825302124, 0.0073219588957726955, NaN], [0.005033975467085838, 0.01824766956269741, 0.015512547455728054, 0.006673634983599186, 0.005676268134266138, 0.04240407794713974, 0.023996027186512947, 0.1038113459944725, 0.02023463323712349, 0.0080516142770648, 0.052543867379426956, 0.1188565045595169, 0.05977800861001015, 0.05786403268575668, 0.13343320786952972, 0.14593175053596497, 0.2687321603298187, 0.04604685679078102, 0.30660173296928406, 0.3806478679180145, 0.38105660676956177, 0.15303322672843933, 0.014211257919669151, 0.05383581668138504, 0.20604565739631653, 0.2462100237607956, 0.5718756914138794, 0.5113963484764099, 0.21981710195541382, 0.4276719391345978, 0.5577609539031982, 0.4118191599845886, 0.31598320603370667, 0.5468451976776123, 0.4359907805919647, 0.2059280127286911, 0.3916337192058563, 0.2548142671585083, 0.2198532670736313, 0.026425611227750778]], [[0.060514166951179504, 0.09119007736444473, 0.5136731863021851, 0.024349171668291092, 0.41056114435195923, 0.043175265192985535, 0.016160618513822556, 0.12711943686008453, 0.029147693887352943, 0.01592664048075676, 0.04504424333572388, 0.03736018016934395, 0.026280265301465988, 0.042564861476421356, 0.13562467694282532, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.009338664822280407, 0.09596994519233704, 0.12376897037029266, 0.01794583536684513, 0.059337858110666275, 0.04990454390645027, 0.003890786785632372, 0.07171432673931122, 0.0057785604149103165, 0.005389686673879623, 0.009663187898695469, 0.014342015609145164, 0.020640142261981964, 0.04060304909944534, 0.16408833861351013, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.07689530402421951, 0.027863014489412308, 0.15549975633621216, 0.2693096697330475, 0.73520827293396, 0.03749871999025345, 0.3640631139278412, 0.14002074301242828, 0.16656053066253662, 0.02643253095448017, 0.0061660525389015675, 0.054253485053777695, 0.14240022003650665, 0.14975441992282867, 0.13701564073562622, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.21953634917736053, 0.22122228145599365, 0.04846278205513954, 0.07968296110630035, 0.3619323670864105, 0.03181222453713417, 0.6669740080833435, 0.3975786566734314, 0.11174946278333664, 0.15518029034137726, 0.004886193200945854, 0.010736972093582153, 0.07725195586681366, 0.09191425889730453, 0.1523013859987259, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0740056112408638, 0.054083533585071564, 0.027193741872906685, 0.014972379431128502, 0.04523617774248123, 0.012482533231377602, 0.4212614595890045, 0.25695085525512695, 0.3699147403240204, 0.013461914844810963, 0.08041262626647949, 0.015268572606146336, 0.627507209777832, 0.13811761140823364, 0.19850368797779083, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.029503263533115387, 0.09333665668964386, 0.016309864819049835, 0.1364656686782837, 0.03873518481850624, 0.019083604216575623, 0.758955180644989, 0.6250144243240356, 0.10551930963993073, 0.0059091635048389435, 0.001959211425855756, 0.004587537609040737, 0.0029548059683293104, 0.011073557659983635, 0.10497581213712692, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0038599083200097084, 0.03815716505050659, 0.004112291149795055, 0.0037336996756494045, 0.02896580658853054, 0.003606554586440325, 0.2724342346191406, 0.5795999765396118, 0.041377726942300797, 0.01812332309782505, 0.006642999593168497, 0.006629596464335918, 0.018780261278152466, 0.00801254715770483, 0.11063171178102493, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.023342538625001907, 0.1589166522026062, 0.01254882663488388, 0.01894153468310833, 0.04743911698460579, 0.015340029262006283, 0.06989605724811554, 0.22605817019939423, 0.016811540350317955, 0.014681086875498295, 0.0061398339457809925, 0.02630683407187462, 0.032653048634529114, 0.05358496680855751, 0.18197578191757202, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.01728241890668869, 0.12100599706172943, 0.003952578641474247, 0.038103699684143066, 0.00803869217634201, 0.017839567735791206, 0.040644098073244095, 0.014622771181166172, 0.07288665324449539, 0.4550913870334625, 0.18886235356330872, 0.2150641530752182, 0.487347275018692, 0.42817094922065735, 0.12942945957183838, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.011775199323892593, 0.1349712610244751, 0.005470172502100468, 0.003098055487498641, 0.028361253440380096, 0.03303566575050354, 0.007174484897404909, 0.015601159073412418, 0.006606224924325943, 0.08859884738922119, 0.18040567636489868, 0.31761303544044495, 0.2462366670370102, 0.4818485677242279, 0.12394269555807114, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.05270439758896828, 0.1637289971113205, 0.009510326199233532, 0.008013473823666573, 0.14090411365032196, 0.011389089748263359, 0.013123652897775173, 0.023534703999757767, 0.009078129194676876, 0.02855684608221054, 0.026650836691260338, 0.39132389426231384, 0.16291603446006775, 0.25967708230018616, 0.10212607681751251, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.19571052491664886, 0.10246216505765915, 0.02142595686018467, 0.012254489585757256, 0.00365867605432868, 0.007110960781574249, 0.020346596837043762, 0.03192196041345596, 0.00833944883197546, 0.07423693686723709, 0.09786227345466614, 0.08075869083404541, 0.1330210417509079, 0.26891645789146423, 0.17930860817432404, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.11616674810647964, 0.175978422164917, 0.00425378605723381, 0.017427049577236176, 0.011484457179903984, 0.030517226085066795, 0.08637198060750961, 0.1500588357448578, 0.0009573447750881314, 0.044167183339595795, 0.005869577638804913, 0.0011607500491663814, 0.014711305499076843, 0.027834221720695496, 0.18594378232955933, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.11675343662500381, 0.17556257545948029, 0.016423039138317108, 0.02097608894109726, 0.06606884300708771, 0.06371303647756577, 0.09760221093893051, 0.2481643557548523, 0.0015754855703562498, 0.03009907715022564, 0.03618617355823517, 0.012020162306725979, 0.17486301064491272, 0.22630257904529572, 0.2108311653137207, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.004961065016686916, 0.011551961302757263, 0.006318831816315651, 0.002851473866030574, 0.003461753251031041, 0.011111320927739143, 0.004611799493432045, 0.004697122145444155, 0.0026004482060670853, 0.0010426584631204605, 0.0060967751778662205, 0.01239971723407507, 0.004622939508408308, 0.002610035240650177, 0.15716104209423065, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.1022859737277031, 0.17571765184402466, 0.1416551172733307, 0.11749783158302307, 0.09062699973583221, 0.07838433235883713, 0.09344526380300522, 0.3238999545574188, 0.11371968686580658, 0.10100032389163971, 0.09302259236574173, 0.0389624647796154, 0.16697892546653748, 0.1419355273246765, 0.1285012662410736, 0.125, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.24028724431991577, 0.14351274073123932, 0.051798444241285324, 0.16382630169391632, 0.04226303845643997, 0.020662518218159676, 0.11527843773365021, 0.29321926832199097, 0.02218940667808056, 0.0878078043460846, 0.10535410046577454, 0.011972848325967789, 0.07032275199890137, 0.04715458303689957, 0.0739566907286644, 0.1684475541114807, 0.01643766649067402, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.2799055874347687, 0.11053244769573212, 0.1936434954404831, 0.029654914513230324, 0.3583168685436249, 0.552708625793457, 0.34459343552589417, 0.33612802624702454, 0.17023301124572754, 0.19969996809959412, 0.18768110871315002, 0.6793866157531738, 0.791401207447052, 0.7463385462760925, 0.09094473719596863, 0.20323613286018372, 0.02236698381602764, 0.0030780781526118517, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.1572730988264084, 0.12077052146196365, 0.0489557608962059, 0.1575693041086197, 0.05669395253062248, 0.21311312913894653, 0.07387427985668182, 0.12006285786628723, 0.06427917629480362, 0.05486075580120087, 0.09722346067428589, 0.0672946497797966, 0.519307017326355, 0.15919242799282074, 0.07895061373710632, 0.15523119270801544, 0.029148569330573082, 0.04869325831532478, 0.027081435546278954, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.056666091084480286, 0.13304737210273743, 0.023897293955087662, 0.04679059237241745, 0.045941345393657684, 0.32384783029556274, 0.44531556963920593, 0.533463716506958, 0.08588721603155136, 0.10118058323860168, 0.027683693915605545, 0.15270595252513885, 0.45412689447402954, 0.19033603370189667, 0.009601723402738571, 0.20906439423561096, 0.016835892572999, 0.005647255107760429, 0.004844226874411106, 0.00019458922906778753, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.026866083964705467, 0.01856745034456253, 0.00889106560498476, 0.023431263864040375, 0.014423922635614872, 0.06721587479114532, 0.30465173721313477, 0.5084072351455688, 0.06748852878808975, 0.09416066110134125, 0.028160765767097473, 0.08301042765378952, 0.13479003310203552, 0.08470122516155243, 0.14269311726093292, 0.19736447930335999, 0.01826038584113121, 0.012854915112257004, 0.09684289991855621, 0.0006958578014746308, 4.3345058656996116e-05, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.07283831387758255, 0.02513016201555729, 0.513066828250885, 0.1692790985107422, 0.12089971452951431, 0.05420007184147835, 0.019427694380283356, 0.038392528891563416, 0.31973040103912354, 0.29048243165016174, 0.4046151340007782, 0.10607112944126129, 0.0885496586561203, 0.07017665356397629, 0.1372956782579422, 0.16369424760341644, 0.023256592452526093, 0.01855486072599888, 0.06154748797416687, 0.06098903343081474, 0.10795246064662933, 0.023746412247419357, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.27857187390327454, 0.3617483973503113, 0.2938012182712555, 0.22770966589450836, 0.06824903935194016, 0.055705904960632324, 0.2735913395881653, 0.10727421194314957, 0.15245027840137482, 0.12983311712741852, 0.2781352400779724, 0.010307536460459232, 0.09433942288160324, 0.07780664414167404, 0.13000918924808502, 0.19143380224704742, 0.11398851871490479, 0.03716170787811279, 0.07628969103097916, 0.38886839151382446, 0.24263328313827515, 0.13712459802627563, 0.02201412245631218, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.09918209165334702, 0.053455647081136703, 0.645177960395813, 0.40746453404426575, 0.08205579966306686, 0.11053493618965149, 0.09200509637594223, 0.0519426129758358, 0.15867555141448975, 0.14363400638103485, 0.08945868164300919, 0.009240956045687199, 0.05626320466399193, 0.024817338213324547, 0.10628006607294083, 0.2130274772644043, 0.007986752316355705, 0.02235114760696888, 0.0019427334191277623, 0.005593507084995508, 0.012699572369456291, 0.006745419930666685, 0.06126464158296585, 0.14077326655387878, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.21029417216777802, 0.16975507140159607, 0.4791514277458191, 0.5080997347831726, 0.14877668023109436, 0.04306463524699211, 0.02225780300796032, 0.027854960411787033, 0.09907854348421097, 0.17716829478740692, 0.027767561376094818, 0.04010230675339699, 0.1045137569308281, 0.07445494085550308, 0.1349247545003891, 0.22579564154148102, 0.013292824849486351, 0.10215212404727936, 0.005943832919001579, 0.013894540257751942, 0.01404587086290121, 0.02319374494254589, 0.10344905406236649, 0.1325504034757614, 0.008661924861371517, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.05318222567439079, 0.11344952136278152, 0.09562063962221146, 0.10165436565876007, 0.11442670226097107, 0.07387696951627731, 0.04448265954852104, 0.12469986081123352, 0.10296554863452911, 0.029610879719257355, 0.006854650564491749, 0.06481806933879852, 0.038151390850543976, 0.029200172051787376, 0.19021393358707428, 0.1733061671257019, 0.07715445756912231, 0.2302267998456955, 0.05804288014769554, 0.07560069113969803, 0.23177897930145264, 0.2901765704154968, 0.042333029210567474, 0.08450006693601608, 0.04456959664821625, 0.015471314080059528, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.024841444566845894, 0.16249340772628784, 0.20643305778503418, 0.09402812272310257, 0.0850510448217392, 0.023708872497081757, 0.027868179604411125, 0.16653721034526825, 0.2575382590293884, 0.07176022976636887, 0.04638299718499184, 0.019721999764442444, 0.08340867608785629, 0.04306621477007866, 0.19255293905735016, 0.16428759694099426, 0.01361166127026081, 0.2167942076921463, 0.03707392141222954, 0.09917350113391876, 0.2872558534145355, 0.08793877810239792, 0.03127053380012512, 0.051127880811691284, 0.02603980340063572, 0.12251178920269012, 0.06466985493898392, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.24242781102657318, 0.4547469913959503, 0.7904132008552551, 0.7443370819091797, 0.4808639585971832, 0.2640213668346405, 0.06001711264252663, 0.24681034684181213, 0.5675581097602844, 0.2725449204444885, 0.247804656624794, 0.029579274356365204, 0.19247104227542877, 0.09198179841041565, 0.18542104959487915, 0.2214493751525879, 0.0034381633158773184, 0.025536755099892616, 0.005642351228743792, 0.0024517737329006195, 0.00733930105343461, 0.0003064426709897816, 0.024970028549432755, 0.0009503457695245743, 0.0013023557839915156, 0.012362079694867134, 0.002213133964687586, 0.0037243058905005455, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.10456986725330353, 0.23679938912391663, 0.29603201150894165, 0.2020668387413025, 0.14429134130477905, 0.4285147190093994, 0.3221139907836914, 0.592944860458374, 0.47945162653923035, 0.273953914642334, 0.2270997315645218, 0.05125115066766739, 0.15167200565338135, 0.14498752355575562, 0.03565559163689613, 0.21803884208202362, 0.044672977179288864, 0.15033316612243652, 0.24480289220809937, 0.0010314357932657003, 0.006885815411806107, 0.017953861504793167, 0.09280995279550552, 0.09214792400598526, 0.01309943851083517, 0.026278402656316757, 0.029330603778362274, 0.10137840360403061, 0.0009828503243625164, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.005393329542130232, 0.004602347034960985, 0.02125353366136551, 0.017772456631064415, 0.029431374743580818, 0.06670433282852173, 0.07382840663194656, 0.05640842020511627, 0.2022721767425537, 0.02110537886619568, 0.006757265422493219, 0.0065305884927511215, 0.00012849831546191126, 0.0015581984771415591, 0.14312443137168884, 0.28474918007850647, 0.005827821791172028, 0.0010850036051124334, 0.005180059466511011, 0.00018831032502930611, 0.002925402717664838, 0.0029562395066022873, 0.005281978752464056, 0.002952893264591694, 0.013548285700380802, 0.01663871854543686, 0.02234998345375061, 0.001472283387556672, 0.00024227210087701678, 9.911999950418249e-05, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.03693488612771034, 0.3099628686904907, 0.02452116832137108, 0.038606833666563034, 0.04603191837668419, 0.056979674845933914, 0.014461892656981945, 0.021202413365244865, 0.4372372031211853, 0.02073492854833603, 0.005594322457909584, 0.11605570465326309, 0.05724794790148735, 0.01605997234582901, 0.1753198802471161, 0.11472342163324356, 0.017006950452923775, 0.03429265320301056, 0.05351921543478966, 0.010289198718965054, 0.02545105293393135, 0.002036151010543108, 0.08590202778577805, 0.007977829314768314, 0.008050770498812199, 0.02079172432422638, 0.07815419882535934, 0.25072064995765686, 0.11726108938455582, 0.04080193489789963, 0.020839283242821693, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.17487157881259918, 0.2829012870788574, 0.22657853364944458, 0.2227388322353363, 0.09278897941112518, 0.05522100254893303, 0.023270972073078156, 0.031554628163576126, 0.32194823026657104, 0.13948096334934235, 0.09803083539009094, 0.2809208631515503, 0.14969345927238464, 0.03018103539943695, 0.10283161699771881, 0.25351014733314514, 0.018978603184223175, 0.013279697857797146, 0.14657457172870636, 0.0005683518829755485, 0.003044809214770794, 0.0003673452010843903, 0.0009085922501981258, 0.00026260188315063715, 6.703466351609677e-05, 0.00393629027530551, 0.0411190427839756, 0.014572926796972752, 0.0009043514728546143, 0.001453216653317213, 0.001335341832600534, 0.0036634530406445265, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.06711219251155853, 0.13971862196922302, 0.10573939234018326, 0.08062157034873962, 0.22173365950584412, 0.04757346957921982, 0.02002648264169693, 0.06195787340402603, 0.09553409367799759, 0.04351034387946129, 0.015184497460722923, 0.17841440439224243, 0.07658158242702484, 0.04646967723965645, 0.1461518555879593, 0.2249869406223297, 0.0773954764008522, 0.10561174154281616, 0.3267342746257782, 0.011780736967921257, 0.03227663040161133, 0.09185110032558441, 0.03840579837560654, 0.01289159432053566, 0.002641883445903659, 0.03386297821998596, 0.16820214688777924, 0.06345225125551224, 0.027306171134114265, 0.007737002335488796, 0.018253128975629807, 0.0508209764957428, 0.015562118031084538, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.015694430097937584, 0.09081663191318512, 0.2731003761291504, 0.09780610352754593, 0.06437630951404572, 0.024092676118016243, 0.017730340361595154, 0.09997125715017319, 0.24317535758018494, 0.06615940481424332, 0.05322461575269699, 0.013002216815948486, 0.10308460891246796, 0.03947872668504715, 0.16966252028942108, 0.17073971033096313, 0.01119090337306261, 0.07090220600366592, 0.026190776377916336, 0.04357914999127388, 0.10384812206029892, 0.05681576952338219, 0.008270802907645702, 0.011212479323148727, 0.016114890575408936, 0.1306251734495163, 0.04437248408794403, 0.022720789536833763, 0.0017881430685520172, 0.005742507986724377, 0.03271590173244476, 0.12170897424221039, 0.18442584574222565, 0.07238933444023132, NaN, NaN, NaN, NaN, NaN, NaN], [0.19514591991901398, 0.2590837776660919, 0.7111572027206421, 0.6245842576026917, 0.2279123067855835, 0.21324849128723145, 0.0465325303375721, 0.16129039227962494, 0.5552195906639099, 0.24888396263122559, 0.16995932161808014, 0.017819084227085114, 0.13601525127887726, 0.04923256114125252, 0.1924036145210266, 0.2460513859987259, 0.004599481821060181, 0.030415518209338188, 0.006707339081913233, 0.001940727117471397, 0.0018293699249625206, 0.0002438600640743971, 0.021702459082007408, 0.00019114103633910418, 0.0004616644873749465, 0.02795419655740261, 0.007376548834145069, 0.009364028461277485, 0.0008695388678461313, 0.027626920491456985, 0.002984545426443219, 0.0021758046932518482, 0.005276597570627928, 0.0015223525697365403, 0.0046029179356992245, NaN, NaN, NaN, NaN, NaN], [0.11466818302869797, 0.23749157786369324, 0.22078867256641388, 0.21260471642017365, 0.1054922342300415, 0.38443663716316223, 0.35735341906547546, 0.3432110548019409, 0.45766645669937134, 0.30316272377967834, 0.15794025361537933, 0.23222389817237854, 0.18522031605243683, 0.12369272857904434, 0.062224190682172775, 0.1682240217924118, 0.15532228350639343, 0.17499232292175293, 0.31528380513191223, 0.0016938054468482733, 0.0013859918108209968, 0.0071086762472987175, 0.08609996736049652, 0.02145048975944519, 0.00334079097956419, 0.08546027541160583, 0.16909679770469666, 0.5000762343406677, 0.012536582536995411, 0.0033327846322208643, 0.01681024581193924, 0.01291667390614748, 0.11205089092254639, 0.06917328387498856, 0.24062496423721313, 0.003104837378486991, NaN, NaN, NaN, NaN], [0.004928229842334986, 0.004764902405440807, 0.014567935839295387, 0.014073353260755539, 0.020878629758954048, 0.04901519790291786, 0.05124438554048538, 0.042454566806554794, 0.19801755249500275, 0.018003307282924652, 0.004736864008009434, 0.006620202213525772, 0.00011398878996260464, 0.001381832524202764, 0.13761556148529053, 0.30163663625717163, 0.008585775271058083, 0.0018221536884084344, 0.004949942696839571, 0.0002661931503098458, 0.0017199779395014048, 0.00286088977009058, 0.004591777920722961, 0.0013412131229415536, 0.009152509272098541, 0.029603971168398857, 0.059182800352573395, 0.004352512303739786, 0.0009281163802370429, 0.00013420419418253005, 0.0015637356555089355, 0.004895435180515051, 0.0020298720337450504, 0.016267914324998856, 0.0014363413210958242, 0.00015049855574034154, 4.989441003999673e-05, NaN, NaN, NaN], [0.013776288367807865, 0.25124475359916687, 0.00789756141602993, 0.00910337083041668, 0.005072988104075193, 0.015830766409635544, 0.005818341393023729, 0.011153762228786945, 0.14152461290359497, 0.008211367763578892, 0.002360414480790496, 0.06666377186775208, 0.057822320610284805, 0.009000283665955067, 0.13980405032634735, 0.1420876681804657, 0.030559053644537926, 0.035777460783720016, 0.0549585185945034, 0.010907668620347977, 0.018195953220129013, 0.005288956221193075, 0.07946551591157913, 0.003352995030581951, 0.00945360492914915, 0.03057919070124626, 0.20277532935142517, 0.5438944697380066, 0.2487112432718277, 0.11027072370052338, 0.03672702983021736, 0.009589559398591518, 0.03681262582540512, 0.12653782963752747, 0.3100517988204956, 0.04488144814968109, 0.07299992442131042, 0.024292031303048134, NaN, NaN], [0.25532495975494385, 0.3110601603984833, 0.28066542744636536, 0.29941898584365845, 0.09561395645141602, 0.06004221364855766, 0.0257351566106081, 0.04446575790643692, 0.3475395441055298, 0.2538500130176544, 0.25107017159461975, 0.4736424386501312, 0.29699820280075073, 0.06975124776363373, 0.11745814979076385, 0.2571920156478882, 0.012253361754119396, 0.00982633139938116, 0.09085621684789658, 0.00026428516139276326, 0.001174133620224893, 0.00010905979434028268, 0.0006958161829970777, 9.435929678147659e-05, 1.889842314994894e-05, 0.0019355103140696883, 0.03233037516474724, 0.014144179411232471, 0.0034062752965837717, 0.0014896523207426071, 0.0032966958824545145, 0.0043079969473183155, 0.002425077836960554, 0.0237245112657547, 0.017915409058332443, 0.0004631538176909089, 0.0033925946336239576, 0.0019653798080980778, 0.0010656031081452966, NaN], [0.06876020133495331, 0.07319146394729614, 0.08357107639312744, 0.06905727088451385, 0.010884120129048824, 0.012632370926439762, 0.04344229772686958, 0.06033884361386299, 0.05559740215539932, 0.048808641731739044, 0.06204793229699135, 0.017201891168951988, 0.028970519080758095, 0.021960163488984108, 0.13179059326648712, 0.25252944231033325, 0.012149164453148842, 0.019892947748303413, 0.013666713610291481, 0.05940697342157364, 0.04882493242621422, 0.025430571287870407, 0.00045668394886888564, 0.0054928152821958065, 0.005623141769319773, 0.004253733437508345, 0.014798035845160484, 0.012909402139484882, 0.011927488259971142, 0.007018915377557278, 0.021986471489071846, 0.016502689570188522, 0.002887164242565632, 0.006932961288839579, 0.007926056161522865, 0.015145027078688145, 0.005945136770606041, 0.016453862190246582, 0.011257275938987732, 0.0009747393196448684]], [[0.027552247047424316, 0.013821233063936234, 0.004237555433064699, 0.0007387229125015438, 0.0009859473211690784, 0.001997306477278471, 0.002160864183679223, 0.009250090457499027, 0.0009738927474245429, 0.0009403586154803634, 0.003406830132007599, 0.0010056114988401532, 0.008306043222546577, 0.06191018968820572, 0.18169914186000824, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0056476471945643425, 0.0617278628051281, 0.026225095614790916, 0.009516767226159573, 0.019543437287211418, 0.011766157113015652, 0.0015307252760976553, 0.004000868182629347, 0.006223553325980902, 0.02180931344628334, 0.02397397719323635, 0.025289250537753105, 0.01872297003865242, 0.05591608211398125, 0.17309869825839996, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.5742589831352234, 0.02769068442285061, 0.03131784498691559, 0.008496972732245922, 0.005279624368995428, 0.0009009581408463418, 0.013010378926992416, 0.009255914948880672, 0.08095329999923706, 0.0017015798948705196, 0.0027918636333197355, 0.01474103331565857, 0.07241056859493256, 0.2960302531719208, 0.1991364061832428, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.3870091140270233, 0.24428580701351166, 0.004871743265539408, 0.01251932606101036, 0.004600874613970518, 0.007045479491353035, 0.011942178010940552, 0.06100638955831528, 0.06223933771252632, 0.00421120086684823, 0.0017708303639665246, 0.010406754910945892, 0.016386834904551506, 0.038040366023778915, 0.25559180974960327, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.6136646866798401, 0.2692064642906189, 0.043582458049058914, 0.00652115186676383, 0.05291604623198509, 0.006654517259448767, 0.03398957848548889, 0.03886384516954422, 0.13169772922992706, 0.002106831641867757, 0.005907678045332432, 0.01888049766421318, 0.04876947030425072, 0.2226717472076416, 0.22327177226543427, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.685612678527832, 0.0861489400267601, 0.03236214071512222, 0.16196951270103455, 0.03394145518541336, 0.05551951378583908, 0.027528556063771248, 0.06770895421504974, 0.19389298558235168, 0.03780713677406311, 0.0038191182538866997, 0.05989958345890045, 0.13479465246200562, 0.24111053347587585, 0.15613426268100739, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.6876600384712219, 0.0606975182890892, 0.05783677101135254, 0.05387236177921295, 0.11914167553186417, 0.004756046459078789, 0.031782086938619614, 0.011465699411928654, 0.1448838710784912, 0.09538520872592926, 0.007872258313000202, 0.033316925168037415, 0.09786565601825714, 0.08940181881189346, 0.23629719018936157, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.5363585352897644, 0.11579979956150055, 0.10718797892332077, 0.21453110873699188, 0.030864767730236053, 0.026318436488509178, 0.03807519003748894, 0.12262200564146042, 0.08015674352645874, 0.06537020206451416, 0.004594390746206045, 0.015254726633429527, 0.06485987454652786, 0.039039257913827896, 0.16586215794086456, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.6220377087593079, 0.17304541170597076, 0.23731492459774017, 0.32412996888160706, 0.2203587144613266, 0.09306959062814713, 0.2822628319263458, 0.008407875895500183, 0.14113475382328033, 0.022416740655899048, 0.005183607805520296, 0.0005837879725731909, 0.00799399521201849, 0.006284625735133886, 0.12005029618740082, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.18509520590305328, 0.21334251761436462, 0.12845394015312195, 0.3693835139274597, 0.41559898853302, 0.19613976776599884, 0.7053389549255371, 0.3886314332485199, 0.06599769741296768, 0.04325481504201889, 0.029052795842289925, 0.001557054347358644, 0.0018087843200191855, 0.0036887156311422586, 0.18107539415359497, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.612794041633606, 0.24153079092502594, 0.076973557472229, 0.17341682314872742, 0.06242084503173828, 0.2242424041032791, 0.8304246068000793, 0.5655775666236877, 0.4262824058532715, 0.00936043355613947, 0.03881426528096199, 0.0046007027849555016, 0.005786797031760216, 0.020520325750112534, 0.226027712225914, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.21637925505638123, 0.22487440705299377, 0.19202512502670288, 0.3957260847091675, 0.15970049798488617, 0.16693006455898285, 0.3690066933631897, 0.5193001627922058, 0.6459834575653076, 0.047006867825984955, 0.06868032366037369, 0.043628890067338943, 0.02405296452343464, 0.05333276465535164, 0.08607933670282364, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.5923737287521362, 0.3536633849143982, 0.08390633016824722, 0.2980528473854065, 0.042989592999219894, 0.026934657245874405, 0.1647067815065384, 0.1620720773935318, 0.6647022366523743, 0.13678880035877228, 0.10115252435207367, 0.012052871286869049, 0.2444845736026764, 0.1799331158399582, 0.10357851535081863, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.3260110914707184, 0.10825559496879578, 0.040669191628694534, 0.08903322368860245, 0.055108752101659775, 0.014200238510966301, 0.06877616047859192, 0.07561883330345154, 0.7116665244102478, 0.08518233895301819, 0.13964912295341492, 0.01787719503045082, 0.027594367042183876, 0.0709126889705658, 0.09409899264574051, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.26070404052734375, 0.8011303544044495, 0.17980173230171204, 0.0725909024477005, 0.12434736639261246, 0.28980228304862976, 0.3281027674674988, 0.7843722701072693, 0.12677432596683502, 0.054726697504520416, 0.13370326161384583, 0.19018130004405975, 0.1707623451948166, 0.14939220249652863, 0.07447532564401627, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.1855485588312149, 0.4779467284679413, 0.0886944904923439, 0.027812138199806213, 0.051930978894233704, 0.20570456981658936, 0.13285183906555176, 0.12479114532470703, 0.03275279700756073, 0.13280591368675232, 0.10831113904714584, 0.13358037173748016, 0.31709861755371094, 0.18639257550239563, 0.0658930093050003, 0.125, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.04738391190767288, 0.17884546518325806, 0.030679181218147278, 0.09374479204416275, 0.015219364315271378, 0.004209337756037712, 0.011544613167643547, 0.014519347809255123, 0.0008998611010611057, 0.03714418038725853, 0.02808041125535965, 0.0015275280456990004, 0.014074422419071198, 0.01773718185722828, 0.02865048497915268, 0.14568212628364563, 0.073321633040905, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.4282352328300476, 0.07421883940696716, 0.37614062428474426, 0.6016114950180054, 0.16448479890823364, 0.10949403792619705, 0.43647968769073486, 0.17394804954528809, 0.2346193641424179, 0.5131813287734985, 0.6543169021606445, 0.06318124383687973, 0.059741634875535965, 0.08049911260604858, 0.08155221492052078, 0.07740449905395508, 0.019538799300789833, 0.31676185131073, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.04248558357357979, 0.005498564336448908, 0.015051363967359066, 0.021896474063396454, 0.031015703454613686, 0.23631463944911957, 0.5231030583381653, 0.1651564985513687, 0.010708797723054886, 0.0702022984623909, 0.015817642211914062, 0.01968570239841938, 0.2309122085571289, 0.11954572051763535, 0.04909561946988106, 0.11254165321588516, 0.04977253079414368, 0.12113941460847855, 0.18998825550079346, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.019823409616947174, 0.02119731903076172, 0.0447932668030262, 0.04950243979692459, 0.11350910365581512, 0.3172611892223358, 0.1175147220492363, 0.16474604606628418, 0.025614900514483452, 0.11684545129537582, 0.027774598449468613, 0.03366768732666969, 0.1657668650150299, 0.20241110026836395, 0.02058284729719162, 0.09693466126918793, 0.12094055861234665, 0.48810020089149475, 0.07605772465467453, 0.10663138329982758, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.024027986451983452, 0.07085671275854111, 0.014559593982994556, 0.003951122052967548, 0.5812088251113892, 0.07389754801988602, 0.10464153438806534, 0.06822511553764343, 0.1849648803472519, 0.02429678477346897, 0.014226456172764301, 0.2123226672410965, 0.1049809455871582, 0.17609325051307678, 0.13661964237689972, 0.002718105213716626, 0.037000641226768494, 0.1506986916065216, 0.012303436174988747, 0.09212689101696014, 0.5217995047569275, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.20496347546577454, 0.09403666108846664, 0.02112487144768238, 0.025338320061564445, 0.008130905218422413, 0.1783977895975113, 0.3754851818084717, 0.0950397253036499, 0.0030220954213291407, 0.08205359429121017, 0.011042395606637001, 0.018588367849588394, 0.1888807862997055, 0.10302136838436127, 0.14473272860050201, 0.17887507379055023, 0.10589989274740219, 0.004075651057064533, 0.0014342612121254206, 0.00521382549777627, 0.031908128410577774, 0.003124895039945841, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.037373751401901245, 0.07382072508335114, 0.08205787092447281, 0.10832883417606354, 0.02859049290418625, 0.1663966327905655, 0.058918725699186325, 0.17053310573101044, 0.011018002405762672, 0.15213745832443237, 0.027154715731739998, 0.0019660431426018476, 0.22162862122058868, 0.11411792784929276, 0.08493959158658981, 0.23519471287727356, 0.3653021454811096, 0.05512593686580658, 0.10675911605358124, 0.0014886436983942986, 0.001230676076374948, 0.003634560154750943, 0.00975269265472889, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.015705576166510582, 0.016172299161553383, 0.006149389781057835, 0.0038101596292108297, 0.007736767642199993, 0.20371977984905243, 0.12438680231571198, 0.06649734079837799, 0.004926482681185007, 0.004153827205300331, 0.0012289183214306831, 0.003863752353936434, 0.0550994910299778, 0.04052891582250595, 0.36571574211120605, 0.19171930849552155, 0.3204987347126007, 0.0060858046635985374, 0.010409774258732796, 0.003722283523529768, 0.0010954621247947216, 0.0028676562942564487, 0.35306307673454285, 0.01622932404279709, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.008730506524443626, 0.002757954876869917, 0.0122150257229805, 0.006305738352239132, 0.004681416787207127, 0.06460410356521606, 0.008150112815201283, 0.010960009880363941, 0.004299533553421497, 0.004670997615903616, 0.0034528695978224277, 0.0024545302148908377, 0.005013267509639263, 0.008545692078769207, 0.23703089356422424, 0.25555557012557983, 0.13076956570148468, 0.003832729533314705, 0.0447237528860569, 0.014599477872252464, 0.0024878191761672497, 0.0016443775966763496, 0.20187559723854065, 0.0005508072790689766, 0.0029457835480570793, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.09499987959861755, 0.010673395358026028, 0.007046178914606571, 0.020993953570723534, 0.010670008137822151, 0.07466354966163635, 0.06417079269886017, 0.023990478366613388, 0.17728924751281738, 0.15624059736728668, 0.004560643341392279, 0.010690598748624325, 0.03727814555168152, 0.017693333327770233, 0.14084658026695251, 0.13948844373226166, 0.2463626265525818, 0.09502393007278442, 0.197096586227417, 0.47678983211517334, 0.3142886161804199, 0.09103813022375107, 0.10499368607997894, 0.07698603719472885, 0.026083102449774742, 0.3110981583595276, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.688500165939331, 0.16286028921604156, 0.04583478718996048, 0.22473743557929993, 0.025797681882977486, 0.04771623760461807, 0.5437547564506531, 0.0642164871096611, 0.01443459838628769, 0.2519066631793976, 0.017869845032691956, 0.003991205245256424, 0.04630482196807861, 0.029587149620056152, 0.049375567585229874, 0.1511228382587433, 0.027682308107614517, 0.014322453178465366, 0.0030328254215419292, 0.04723867028951645, 0.30981165170669556, 0.025852922350168228, 0.018514074385166168, 0.01515920553356409, 0.009253463707864285, 0.10175863653421402, 0.16996310651302338, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.14772717654705048, 0.11627800017595291, 0.034884992986917496, 0.02596234902739525, 0.031621210277080536, 0.39286479353904724, 0.6627658009529114, 0.20747745037078857, 0.019052494317293167, 0.06071586161851883, 0.014515946619212627, 0.03545556217432022, 0.1622975915670395, 0.05619712546467781, 0.4560142755508423, 0.1847103387117386, 0.05052594095468521, 0.005765186157077551, 0.018545929342508316, 0.00881477165967226, 0.0375242680311203, 0.027162199839949608, 0.09025334566831589, 0.0028228689916431904, 0.0033718899358063936, 0.1103500947356224, 0.0837099552154541, 0.0044236015528440475, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.3253695070743561, 0.18678773939609528, 0.23196454346179962, 0.43925735354423523, 0.09974130243062973, 0.1577768325805664, 0.26045241951942444, 0.07323815673589706, 0.005399893503636122, 0.23951157927513123, 0.04431937262415886, 0.013187061063945293, 0.0749824121594429, 0.025474021211266518, 0.2768867611885071, 0.27341794967651367, 0.03427007421851158, 0.008004172705113888, 0.009254892356693745, 0.005621441174298525, 0.00972525030374527, 0.005248658824712038, 0.02184745855629444, 0.0006181569187901914, 0.0005494534852914512, 0.06994801014661789, 0.02213645726442337, 0.004287416115403175, 0.0008399627404287457, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.049311667680740356, 0.10222040861845016, 0.30249276757240295, 0.11109475791454315, 0.4333159327507019, 0.4476950168609619, 0.14919614791870117, 0.45436185598373413, 0.10977044701576233, 0.101465605199337, 0.28612539172172546, 0.15904487669467926, 0.4858849048614502, 0.19411928951740265, 0.08273273706436157, 0.008804291486740112, 0.07617928832769394, 0.47516930103302, 0.07513945549726486, 0.5241973400115967, 0.4384346902370453, 0.06213618069887161, 0.06345370411872864, 0.0682281106710434, 0.15877418220043182, 0.023486817255616188, 0.026526909321546555, 0.0028373831883072853, 0.001617963775061071, 0.37629759311676025, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.08865676820278168, 0.0832996591925621, 0.0360012948513031, 0.026901112869381905, 0.0488949753344059, 0.5697077512741089, 0.2118675261735916, 0.21166029572486877, 0.009457184933125973, 0.042189937084913254, 0.010147118009626865, 0.027016732841730118, 0.1966082751750946, 0.18848717212677002, 0.17412608861923218, 0.26533833146095276, 0.10994716733694077, 0.010266831144690514, 0.037150826305150986, 0.009969023987650871, 0.00030588259687647223, 8.988264016807079e-05, 0.07940464466810226, 0.00027601365582086146, 0.0013282618019729853, 0.009904097765684128, 0.03278518095612526, 0.0630892813205719, 0.10911130160093307, 0.016624033451080322, 0.011541539803147316, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.09455566853284836, 0.047932155430316925, 0.06032469496130943, 0.027359262108802795, 0.004525639116764069, 0.19231697916984558, 0.29536089301109314, 0.10446369647979736, 0.004957688972353935, 0.22148354351520538, 0.017980555072426796, 0.016062501817941666, 0.01227590162307024, 0.007468203082680702, 0.14047065377235413, 0.2451263964176178, 0.014867580495774746, 0.0005470102187246084, 0.0054298522882163525, 0.0004450916312634945, 0.0006575370789505541, 3.8741818570997566e-05, 0.0010275153908878565, 0.0013172366889193654, 0.0019110681023448706, 0.13600468635559082, 0.29138538241386414, 0.011091821826994419, 0.0002334356977371499, 0.0002162840828532353, 0.0001727231137920171, 0.004782650154083967, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.18475790321826935, 0.03305341675877571, 0.022945405915379524, 0.02499788999557495, 0.016275716945528984, 0.44049808382987976, 0.3255404233932495, 0.03656867519021034, 0.008760510943830013, 0.28132569789886475, 0.00872495025396347, 0.02103549800813198, 0.09103824943304062, 0.045535117387771606, 0.1431308537721634, 0.18341027200222015, 0.31211209297180176, 0.08544175326824188, 0.17215219140052795, 0.07786234468221664, 0.033002957701683044, 0.028957894071936607, 0.08467604964971542, 0.018818018957972527, 0.0016417433507740498, 0.15075404942035675, 0.1522863805294037, 0.03350237384438515, 0.006119633559137583, 0.022573737427592278, 0.03810621052980423, 0.13675758242607117, 0.1992093175649643, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.5226730704307556, 0.08511564135551453, 0.13128292560577393, 0.22977954149246216, 0.025636736303567886, 0.14430683851242065, 0.697600245475769, 0.08303582668304443, 0.03326253592967987, 0.30183717608451843, 0.04944504052400589, 0.004384536296129227, 0.07144975662231445, 0.05258011445403099, 0.06879302859306335, 0.1540856957435608, 0.05453011393547058, 0.023697303608059883, 0.003979950677603483, 0.014029269106686115, 0.1104540005326271, 0.019629694521427155, 0.011429534293711185, 0.010672842152416706, 0.00807265006005764, 0.1843080371618271, 0.19234825670719147, 0.0017768212128430605, 0.006891301833093166, 0.08265318721532822, 0.014878016896545887, 0.09550431370735168, 0.1691773235797882, 0.20674942433834076, NaN, NaN, NaN, NaN, NaN, NaN], [0.06703877449035645, 0.049393996596336365, 0.041539933532476425, 0.021373772993683815, 0.02868128940463066, 0.32991066575050354, 0.488584041595459, 0.0702073872089386, 0.0075523643754422665, 0.038572411984205246, 0.012813442386686802, 0.04136957228183746, 0.06929102540016174, 0.03757195174694061, 0.23515936732292175, 0.21139073371887207, 0.06409671157598495, 0.007977590896189213, 0.017582383006811142, 0.004139575641602278, 0.008497070521116257, 0.024324562400579453, 0.12332659959793091, 0.0006915424601174891, 0.0006991134723648429, 0.09821731597185135, 0.18821127712726593, 0.009975801222026348, 0.024784373119473457, 0.009686794131994247, 0.0016004297649487853, 0.006526788230985403, 0.04246864095330238, 0.05479469522833824, 0.004482009913772345, NaN, NaN, NaN, NaN, NaN], [0.15618596971035004, 0.12941822409629822, 0.2654253840446472, 0.28590527176856995, 0.31243884563446045, 0.1085575670003891, 0.15852880477905273, 0.026613548398017883, 0.004155577160418034, 0.15324708819389343, 0.037679530680179596, 0.09416285902261734, 0.02134908176958561, 0.010629331693053246, 0.17846201360225677, 0.33224669098854065, 0.07294216006994247, 0.01592269167304039, 0.006994656287133694, 0.003661615075543523, 0.0007586313877254725, 0.0006907262722961605, 0.022764746099710464, 0.000276167003903538, 9.849678463069722e-05, 0.08613532781600952, 0.07070992141962051, 0.03258151933550835, 0.002256957348436117, 0.00035050295991823077, 0.002809839555993676, 0.005992868449538946, 0.14088936150074005, 0.024111032485961914, 0.015468394383788109, 0.000736193498596549, NaN, NaN, NaN, NaN], [0.058257974684238434, 0.12017454952001572, 0.32657214999198914, 0.12284700572490692, 0.5568311810493469, 0.41536086797714233, 0.16300946474075317, 0.49100223183631897, 0.15462136268615723, 0.11520260572433472, 0.260068416595459, 0.28476831316947937, 0.501883327960968, 0.21151991188526154, 0.09330709278583527, 0.00368693470954895, 0.0603332445025444, 0.389295369386673, 0.03955860063433647, 0.26089394092559814, 0.125760018825531, 0.029167605563998222, 0.03710402920842171, 0.03377004712820053, 0.08135493099689484, 0.01946301944553852, 0.033920928835868835, 0.00409010099247098, 0.0020981510169804096, 0.4028157889842987, 0.01821253076195717, 0.03254074230790138, 0.005954912398010492, 0.016414301469922066, 0.0033934058155864477, 0.0012025205651298165, 0.37666910886764526, NaN, NaN, NaN], [0.04007576033473015, 0.04011448100209236, 0.02015572600066662, 0.006723308004438877, 0.01584162376821041, 0.6745935082435608, 0.14270515739917755, 0.05812964215874672, 0.0018657244509086013, 0.018765496090054512, 0.004551106132566929, 0.05217724293470383, 0.21886952221393585, 0.13090433180332184, 0.13149680197238922, 0.30478137731552124, 0.23805196583271027, 0.009743728674948215, 0.02953244559466839, 0.005627358797937632, 0.00013927526015322655, 0.00016958850028458983, 0.09182754158973694, 0.00019882968626916409, 0.0018803260754793882, 0.01743759773671627, 0.09691343456506729, 0.09625609964132309, 0.0949849784374237, 0.057061683386564255, 0.028116967529058456, 0.00013736996334046125, 0.022905906662344933, 0.02515738271176815, 0.029101604595780373, 0.01233749371021986, 0.027021989226341248, 0.012159456498920918, NaN, NaN], [0.051524627953767776, 0.037071868777275085, 0.09267362952232361, 0.03285788744688034, 0.006808253470808268, 0.2584725618362427, 0.21142001450061798, 0.06556515395641327, 0.003410812932997942, 0.18829914927482605, 0.028329605236649513, 0.02864006720483303, 0.014232979156076908, 0.014326054602861404, 0.12804241478443146, 0.2508227825164795, 0.013127491809427738, 0.0004774215049110353, 0.005875048227608204, 0.00014762053615413606, 0.0003128673997707665, 1.7799626220948994e-05, 0.0017815351020544767, 0.0009225650574080646, 0.0009481729357503355, 0.09391504526138306, 0.24316561222076416, 0.008820290677249432, 0.0015348505694419146, 0.0002856143401004374, 0.00038499117363244295, 0.010248353704810143, 0.0923430323600769, 0.1539699137210846, 0.0089821582660079, 0.00013843990745954216, 0.0004539538058452308, 6.709429726470262e-05, 0.0014084051363170147, NaN], [0.13503411412239075, 0.06798373907804489, 0.08072269707918167, 0.04104887321591377, 0.027653640136122704, 0.5933560132980347, 0.15723249316215515, 0.044575583189725876, 0.017590617761015892, 0.04771400988101959, 0.07117579132318497, 0.10345834493637085, 0.10624422132968903, 0.027206260710954666, 0.1271171271800995, 0.06230561435222626, 0.051613274961709976, 0.02077883668243885, 0.04204944148659706, 0.07247611880302429, 0.11675790697336197, 0.004215644672513008, 0.00555834174156189, 0.008976897224783897, 0.017200933769345284, 0.007355507928878069, 0.06492317467927933, 0.04215962812304497, 0.02968345396220684, 0.23223130404949188, 0.03253115341067314, 0.08794146776199341, 0.025323374196887016, 0.08459514379501343, 0.05644838511943817, 0.04970480501651764, 0.3588789105415344, 0.028869707137346268, 0.11940079927444458, 0.27181047201156616]], [[0.10194799304008484, 0.042179130017757416, 0.27587375044822693, 0.8387316465377808, 0.3051532208919525, 0.225641667842865, 0.10655678808689117, 0.4426303505897522, 0.21958006918430328, 0.4376780688762665, 0.7421585917472839, 0.6036965250968933, 0.4420715570449829, 0.6119644045829773, 0.08460802584886551, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.052479684352874756, 0.018692737445235252, 0.13130725920200348, 0.4463008642196655, 0.4007475674152374, 0.4465942680835724, 0.13863760232925415, 0.26287177205085754, 0.5015351176261902, 0.48749616742134094, 0.19089040160179138, 0.2783986032009125, 0.20843097567558289, 0.11412637680768967, 0.11901978403329849, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.09998084604740143, 0.05760321766138077, 0.06884635984897614, 0.1367950737476349, 0.03696327656507492, 0.02052011340856552, 0.23966658115386963, 0.6639524102210999, 0.08913422375917435, 0.1896458864212036, 0.14239966869354248, 0.18587030470371246, 0.2512775659561157, 0.1800404042005539, 0.13985422253608704, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.17776982486248016, 0.2164098620414734, 0.03016561083495617, 0.006355184596031904, 0.04318562150001526, 0.004709928296506405, 0.02340516820549965, 0.07859960943460464, 0.3921053409576416, 0.27134451270103455, 0.2182498425245285, 0.1118401437997818, 0.13378913700580597, 0.4978374242782593, 0.18931511044502258, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.16739480197429657, 0.20097726583480835, 0.038037389516830444, 0.05488090589642525, 0.020769814029335976, 0.044557277113199234, 0.32692524790763855, 0.5529306530952454, 0.06495681405067444, 0.061963245272636414, 0.3602059483528137, 0.040287844836711884, 0.11072657257318497, 0.3166219890117645, 0.19249440729618073, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.07948607206344604, 0.4389178156852722, 0.019072405993938446, 0.11389600485563278, 0.015004596672952175, 0.0008035529754124582, 0.00560334138572216, 0.007579134311527014, 0.12602436542510986, 0.4041804373264313, 0.8435949087142944, 0.7255359292030334, 0.3334953784942627, 0.21919409930706024, 0.13174442946910858, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.11827840656042099, 0.43549492955207825, 0.035650141537189484, 0.3500109016895294, 0.10479609668254852, 0.0029047641437500715, 0.016262628138065338, 0.008920608088374138, 0.1923075020313263, 0.6588289737701416, 0.7271849513053894, 0.8207041025161743, 0.5342087149620056, 0.29674431681632996, 0.16698533296585083, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.19771254062652588, 0.43774574995040894, 0.057631127536296844, 0.15638697147369385, 0.05497771501541138, 0.0015852008946239948, 0.004800108727067709, 0.0038221883587539196, 0.11230877041816711, 0.6780416369438171, 0.6535694003105164, 0.33372464776039124, 0.2617355287075043, 0.4378974735736847, 0.15096917748451233, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.2510830760002136, 0.455088347196579, 0.2769528925418854, 0.28598156571388245, 0.08308438956737518, 0.495423823595047, 0.2878262400627136, 0.017540372908115387, 0.036487918347120285, 0.07030303031206131, 0.04537871107459068, 0.017587929964065552, 0.15749330818653107, 0.15622387826442719, 0.134229376912117, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.2108728438615799, 0.12734071910381317, 0.6047671437263489, 0.5566261410713196, 0.4727993309497833, 0.6295000314712524, 0.20963285863399506, 0.3828260004520416, 0.01981351152062416, 0.02910005673766136, 0.17932364344596863, 0.029557999223470688, 0.02868420071899891, 0.05513756722211838, 0.1339428722858429, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.2013130933046341, 0.35711804032325745, 0.18803814053535461, 0.31239861249923706, 0.6328845024108887, 0.6068195104598999, 0.09879770874977112, 0.295420378446579, 0.033300116658210754, 0.04495004564523697, 0.027333615347743034, 0.034196678549051285, 0.011724627576768398, 0.023517103865742683, 0.3543241322040558, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.27807915210723877, 0.07025524973869324, 0.15421687066555023, 0.23079168796539307, 0.0323871448636055, 0.4182601273059845, 0.43312954902648926, 0.3330070972442627, 0.027521615847945213, 0.03977188467979431, 0.03152378648519516, 0.00340716983191669, 0.005408053286373615, 0.0057552107609808445, 0.23170912265777588, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.15765754878520966, 0.07761365175247192, 0.1382310688495636, 0.33822664618492126, 0.15857987105846405, 0.11602839827537537, 0.3749851584434509, 0.3412497341632843, 0.06253337115049362, 0.09931040555238724, 0.010201470926404, 0.0010190334869548678, 0.0007929145358502865, 0.0016151106683537364, 0.1723894327878952, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.39988550543785095, 0.09145350754261017, 0.3013111352920532, 0.5813722610473633, 0.4042908251285553, 0.2935561537742615, 0.4903331696987152, 0.4357178807258606, 0.04456466808915138, 0.10430204123258591, 0.10590728372335434, 0.007762597873806953, 0.0026525144930928946, 0.0052152471616864204, 0.24974997341632843, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.03366217389702797, 0.03653215244412422, 0.027766529470682144, 0.007369572762399912, 0.014929202385246754, 0.04527684673666954, 0.00940654892474413, 0.023517949506640434, 0.010960820131003857, 0.0019369145156815648, 0.01981637440621853, 0.00444602407515049, 0.014915830455720425, 0.007271313574165106, 0.15384840965270996, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.04247138649225235, 0.01728098653256893, 0.06617120653390884, 0.009399485774338245, 0.0730140432715416, 0.14221039414405823, 0.11889991164207458, 0.10651882737874985, 0.10687308758497238, 0.0351867638528347, 0.09164245426654816, 0.06160420924425125, 0.04699656739830971, 0.14884592592716217, 0.20088525116443634, 0.125, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.35919252038002014, 0.017007382586598396, 0.3711448311805725, 0.05260182172060013, 0.23237934708595276, 0.17189942300319672, 0.06846722215414047, 0.25480321049690247, 0.4269619286060333, 0.141769677400589, 0.19745108485221863, 0.3101239502429962, 0.12419883906841278, 0.061588384211063385, 0.3489930033683777, 0.04884753376245499, 0.31528204679489136, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.1570073962211609, 0.6818748116493225, 0.08056136965751648, 0.04282544180750847, 0.09609510749578476, 0.21831035614013672, 0.11452964693307877, 0.4344905614852905, 0.09872471541166306, 0.06769980490207672, 0.054214250296354294, 0.015440859831869602, 0.04572026804089546, 0.05267196521162987, 0.06955287605524063, 7.444373295584228e-06, 4.17321571148932e-05, 0.5221405029296875, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.1362180858850479, 0.01786869764328003, 0.3548091650009155, 0.13650378584861755, 0.07479218393564224, 0.08773932605981827, 0.007214170414954424, 0.020996512845158577, 0.09793394804000854, 0.26323461532592773, 0.31718939542770386, 0.004400049336254597, 0.01118874829262495, 0.016452480107545853, 0.0059462906792759895, 0.09023705869913101, 0.59262615442276, 0.038057319819927216, 0.1896824985742569, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.13787487149238586, 0.02221597172319889, 0.46063661575317383, 0.42787930369377136, 0.16819633543491364, 0.30927538871765137, 0.10940644890069962, 0.14741046726703644, 0.3708270192146301, 0.08424455672502518, 0.34931957721710205, 0.015041538514196873, 0.02219252847135067, 0.0637117251753807, 0.001682900357991457, 0.0001943353418027982, 0.004992108792066574, 0.35714879631996155, 0.028785984963178635, 0.7041940689086914, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.09526984393596649, 0.013222168199717999, 0.9035038352012634, 0.8715099692344666, 0.20107677578926086, 0.7829492688179016, 0.28305909037590027, 0.141366645693779, 0.15355023741722107, 0.11376345157623291, 0.804192841053009, 0.012117957696318626, 0.3312073349952698, 0.4514775276184082, 0.016239164397120476, 1.0879062756430358e-05, 5.022298137191683e-05, 0.0836932584643364, 0.0041815838776528835, 0.7177854776382446, 0.4451410174369812, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.34537556767463684, 0.010514522902667522, 0.04824088513851166, 0.12771852314472198, 0.005308120045810938, 0.17857761681079865, 0.2263273000717163, 0.26537755131721497, 0.3297313451766968, 0.3104889690876007, 0.11654951423406601, 0.08535956591367722, 0.02363554947078228, 0.031254567205905914, 0.10634612292051315, 0.003986984025686979, 0.03902542591094971, 0.00027279910864308476, 0.00016326647892128676, 0.09999275952577591, 0.23601794242858887, 0.8888784646987915, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.2808375656604767, 0.07436379790306091, 0.11235158890485764, 0.07017786800861359, 0.034851111471652985, 0.01653558947145939, 0.025893066078424454, 0.02911091037094593, 0.23654304444789886, 0.2646749019622803, 0.20617236196994781, 0.25081631541252136, 0.013157923705875874, 0.04621773213148117, 0.2354249358177185, 0.0004483810334932059, 0.01581367664039135, 0.00053547159768641, 0.005416989792138338, 0.0004931549192406237, 1.743426764733158e-06, 0.0002464183489792049, 0.38669928908348083, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.5487799644470215, 0.03728892654180527, 0.05227963626384735, 0.18957917392253876, 0.014632479287683964, 0.19499987363815308, 0.29326584935188293, 0.6778355836868286, 0.45779454708099365, 0.33408117294311523, 0.11356081813573837, 0.01941866986453533, 0.010207045823335648, 0.013884961605072021, 0.09069465100765228, 0.0014915558276697993, 0.0036082565784454346, 0.0005674233543686569, 0.0010717788245528936, 0.04321836307644844, 0.5446166396141052, 0.38359156250953674, 0.006869717035442591, 0.0028910271357744932, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.09531711786985397, 0.03595840558409691, 0.017401238903403282, 0.061305541545152664, 0.1627957820892334, 0.050434935837984085, 0.05516263470053673, 0.23917846381664276, 0.3637218177318573, 0.09729932248592377, 0.03891580551862717, 0.19205324351787567, 0.041229162365198135, 0.046046942472457886, 0.03756402060389519, 8.035104838199914e-05, 0.005924052093178034, 0.005847892723977566, 0.020417997613549232, 0.11436353623867035, 0.6555760502815247, 0.4247216582298279, 0.04553407058119774, 0.00039129320066422224, 0.013846640475094318, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.08811857551336288, 0.010963470675051212, 0.2593647241592407, 0.26678594946861267, 0.42746680974960327, 0.41530901193618774, 0.07491520792245865, 0.18910719454288483, 0.04928334057331085, 0.04599721357226372, 0.4843277335166931, 0.07717985659837723, 0.09353034198284149, 0.07800954580307007, 0.08156391978263855, 0.0012459981953725219, 0.12171746790409088, 0.022806251421570778, 0.021380947902798653, 0.018195364624261856, 0.08835338801145554, 0.20732422173023224, 0.30439698696136475, 0.09951408952474594, 0.2512991428375244, 0.4290468692779541, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.04596662148833275, 0.005170373246073723, 0.12165658175945282, 0.15079215168952942, 0.04554709792137146, 0.08856093138456345, 0.04626012593507767, 0.020681705325841904, 0.17637456953525543, 0.26189061999320984, 0.13335715234279633, 0.046832337975502014, 0.018430203199386597, 0.01621258072555065, 0.10917440801858902, 0.007976139895617962, 0.03435874730348587, 0.026849543675780296, 0.002102706115692854, 0.13315419852733612, 0.1177494078874588, 0.08904305100440979, 0.576798677444458, 0.140389084815979, 0.6266443729400635, 0.32779327034950256, 0.5110495090484619, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.5138411521911621, 0.0654044821858406, 0.1128465011715889, 0.18054738640785217, 0.038166921585798264, 0.13531430065631866, 0.12295213341712952, 0.28065726161003113, 0.2875981628894806, 0.5909985899925232, 0.601227879524231, 0.03077608533203602, 0.04096299037337303, 0.09236451238393784, 0.1495288461446762, 0.0015641784993931651, 0.09294694662094116, 0.006881145294755697, 0.0020365919917821884, 0.4301930069923401, 0.06383264064788818, 0.0045266724191606045, 0.17422647774219513, 0.00404678238555789, 0.006469257641583681, 0.052995309233665466, 0.1725381463766098, 0.668171763420105, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.07072688639163971, 0.012152088806033134, 0.021357353776693344, 0.04663744568824768, 0.020319821313023567, 0.05489102751016617, 0.07223928719758987, 0.23148301243782043, 0.18188072741031647, 0.10590049624443054, 0.10450157523155212, 0.03876996785402298, 0.13536545634269714, 0.10362161695957184, 0.12556865811347961, 0.004304439760744572, 0.05993141233921051, 0.054169829934835434, 0.025809768587350845, 0.7262899279594421, 0.2466905415058136, 0.15344326198101044, 0.33606013655662537, 0.02952432446181774, 0.07010773569345474, 0.008777104318141937, 0.03394261747598648, 0.032566726207733154, 0.6152393221855164, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.07390952110290527, 0.023819932714104652, 0.4992673993110657, 0.293674498796463, 0.18016116321086884, 0.3294305205345154, 0.5326097011566162, 0.20817913115024567, 0.231731578707695, 0.17336609959602356, 0.4696378707885742, 0.3560185134410858, 0.5055418610572815, 0.687153697013855, 0.06569264829158783, 1.0540320545260329e-05, 0.0013190202880650759, 0.20101842284202576, 0.004686327185481787, 0.13271625339984894, 0.04526880756020546, 0.0007031870190985501, 0.0011485026916489005, 0.002882149303331971, 0.0005991549696773291, 0.0030197217129170895, 0.004800362046808004, 0.004403174854815006, 0.002436757553368807, 0.4002683460712433, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.19887569546699524, 0.009285598993301392, 0.17495201528072357, 0.1799449920654297, 0.0410592183470726, 0.0050115324556827545, 0.025978662073612213, 0.011312133632600307, 0.04069671407341957, 0.23767657577991486, 0.3294059634208679, 0.09899688512086868, 0.03285939246416092, 0.08387716114521027, 0.04885585233569145, 0.0003210107679478824, 0.5876501798629761, 0.16318874061107635, 0.7096263766288757, 0.11595475673675537, 0.007003267295658588, 0.001205803593620658, 0.1902448534965515, 0.011727835983037949, 0.44888344407081604, 0.8117052912712097, 0.45698752999305725, 0.023960944265127182, 0.010929742828011513, 0.005293603055179119, 0.00987145397812128, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.054675761610269547, 0.04458622261881828, 0.0536046139895916, 0.016943499445915222, 0.02146792784333229, 0.1686052531003952, 0.036354243755340576, 0.08614800870418549, 0.1611979901790619, 0.170720174908638, 0.163726344704628, 0.09202460944652557, 0.016866492107510567, 0.019021833315491676, 0.13082824647426605, 0.020372437313199043, 0.3410835862159729, 0.6929088234901428, 0.04383905977010727, 0.1458517462015152, 0.4223538339138031, 0.9439106583595276, 0.9473816156387329, 0.15120889246463776, 0.7730743288993835, 0.5082507133483887, 0.0460858978331089, 0.032336097210645676, 0.011211436241865158, 0.009573124349117279, 0.0003536108124535531, 0.06564418971538544, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.254617303609848, 0.09600356966257095, 0.5283652544021606, 0.35948434472084045, 0.11690203100442886, 0.22449535131454468, 0.07030754536390305, 0.14074397087097168, 0.11056768894195557, 0.2017645388841629, 0.5897989273071289, 0.032950446009635925, 0.0850306898355484, 0.16881772875785828, 0.07667817175388336, 0.020423829555511475, 0.09150233864784241, 0.593336284160614, 0.050333935767412186, 0.04262891411781311, 0.44151586294174194, 0.7098277807235718, 0.36869171261787415, 0.7183430194854736, 0.3146522641181946, 0.5934929251670837, 0.08962199836969376, 0.01141325756907463, 0.0268073882907629, 0.008290876634418964, 0.022364463657140732, 0.0520397312939167, 0.3134966492652893, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.06611059606075287, 0.009380446746945381, 0.1600489318370819, 0.18714633584022522, 0.028496628627181053, 0.28509950637817383, 0.06793918460607529, 0.036412376910448074, 0.3864555358886719, 0.38031718134880066, 0.19321800768375397, 0.03279240429401398, 0.024823389947414398, 0.02684853971004486, 0.10572600364685059, 0.008604546077549458, 0.07562410086393356, 0.10463645309209824, 0.003217896446585655, 0.1296835094690323, 0.21162182092666626, 0.30799001455307007, 0.7962209582328796, 0.27782267332077026, 0.5974112749099731, 0.3643631041049957, 0.5975222587585449, 0.032379183918237686, 0.8344925045967102, 0.5903766751289368, 0.1521190106868744, 0.10492946952581406, 0.10503242909908295, 0.5022279620170593, NaN, NaN, NaN, NaN, NaN, NaN], [0.5806823372840881, 0.09046274423599243, 0.1468239277601242, 0.2587219774723053, 0.018666794523596764, 0.17986845970153809, 0.1758078932762146, 0.26734092831611633, 0.30597683787345886, 0.6407824158668518, 0.6427304148674011, 0.011203133501112461, 0.017842967063188553, 0.05609212443232536, 0.1528221219778061, 0.0010157334618270397, 0.08574047684669495, 0.010654903016984463, 0.003869200125336647, 0.15051355957984924, 0.02434478886425495, 0.005829520523548126, 0.10341739654541016, 0.0023463659454137087, 0.00469975033774972, 0.1621563881635666, 0.27765417098999023, 0.6246147155761719, 0.44377410411834717, 0.0757245346903801, 0.08620554953813553, 0.08146335929632187, 0.32109129428863525, 0.1958039551973343, 0.5327519178390503, NaN, NaN, NaN, NaN, NaN], [0.09578646719455719, 0.04883359372615814, 0.014442636631429195, 0.07719788700342178, 0.013871591538190842, 0.24272511899471283, 0.11848346889019012, 0.48695430159568787, 0.10090471804141998, 0.15632015466690063, 0.12246286869049072, 0.056596189737319946, 0.051980338990688324, 0.03806659206748009, 0.1369783878326416, 0.0009064326295629144, 0.04867112636566162, 0.09537991136312485, 0.12993541359901428, 0.38632717728614807, 0.056282784789800644, 0.13602504134178162, 0.18383464217185974, 0.024170320481061935, 0.09972675889730453, 0.022063996642827988, 0.042059145867824554, 0.01842264086008072, 0.8592916131019592, 0.1306053251028061, 0.06485681235790253, 0.048735883086919785, 0.037178389728069305, 0.017466288059949875, 0.006924192421138287, 0.8764364123344421, NaN, NaN, NaN, NaN], [0.12923087179660797, 0.04506811499595642, 0.5631698966026306, 0.4945719838142395, 0.16776354610919952, 0.4656532406806946, 0.6344242095947266, 0.28209388256073, 0.297488808631897, 0.3520771265029907, 0.6463941931724548, 0.3803158104419708, 0.4924411177635193, 0.6891878843307495, 0.08469904214143753, 1.2418378219081205e-06, 0.0003037750138901174, 0.10264009237289429, 0.0010840333998203278, 0.03004724159836769, 0.00720690144225955, 0.00017297905287705362, 0.00021026108879595995, 0.0005732537247240543, 0.00013229742762632668, 0.0014890850288793445, 0.0027206502854824066, 0.0022100789938122034, 0.0018764312844723463, 0.22427155077457428, 0.0012303950497880578, 0.0001426686649210751, 0.0015814924845471978, 0.00487141590565443, 0.0029599322006106377, 0.003610847517848015, 0.41901907324790955, NaN, NaN, NaN], [0.3177553117275238, 0.027823492884635925, 0.11541304737329483, 0.1464630663394928, 0.010460668243467808, 0.028609508648514748, 0.14352867007255554, 0.043905869126319885, 0.18215790390968323, 0.6030426025390625, 0.38763877749443054, 0.1293274313211441, 0.07180552184581757, 0.1464845985174179, 0.10971048474311829, 0.00015546051145065576, 0.5271192193031311, 0.2684091329574585, 0.7487277388572693, 0.0846778005361557, 0.003557654097676277, 0.0064069912768900394, 0.16770148277282715, 0.008421340025961399, 0.27412623167037964, 0.8534677624702454, 0.5243650078773499, 0.02665238454937935, 0.01776440255343914, 0.013793676160275936, 0.00868560466915369, 0.08064579218626022, 0.69512540102005, 0.49261555075645447, 0.010526523925364017, 0.0028473760467022657, 0.008281596936285496, 0.007198471110314131, NaN, NaN], [0.03459807112812996, 0.05000016465783119, 0.02839210256934166, 0.008521324954926968, 0.009519261308014393, 0.12168280780315399, 0.03372196480631828, 0.07665831595659256, 0.21765880286693573, 0.11945746093988419, 0.0821232944726944, 0.058310747146606445, 0.011853469535708427, 0.02031784877181053, 0.13586042821407318, 0.03285643830895424, 0.3327244818210602, 0.7442528605461121, 0.049526505172252655, 0.13722854852676392, 0.37294694781303406, 0.9746374487876892, 0.9050161242485046, 0.144730344414711, 0.44314900040626526, 0.6168692708015442, 0.18840178847312927, 0.12898683547973633, 0.1250022053718567, 0.01759251020848751, 0.0030696040485054255, 0.6704888939857483, 0.3205258250236511, 0.28675025701522827, 0.09770815074443817, 0.0085873082280159, 0.028106005862355232, 0.0015327840810641646, 0.12156207114458084, NaN], [0.02964477799832821, 0.1353258490562439, 0.017653465270996094, 0.011115004308521748, 0.008141545578837395, 0.05911250412464142, 0.01831989735364914, 0.05519499629735947, 0.03573962301015854, 0.02204814739525318, 0.05097896233201027, 0.08341387659311295, 0.08060181885957718, 0.10490117967128754, 0.13247323036193848, 0.027913866564631462, 0.6360336542129517, 0.8947576880455017, 0.5603421926498413, 0.3501611351966858, 0.3494046926498413, 0.7655782103538513, 0.9696423411369324, 0.8922762274742126, 0.42980051040649414, 0.4555767774581909, 0.17016178369522095, 0.1410100758075714, 0.652664303779602, 0.2781027853488922, 0.07839874923229218, 0.11400053650140762, 0.10023999214172363, 0.04957454651594162, 0.07193805277347565, 0.5185664892196655, 0.15356925129890442, 0.02747632935643196, 0.046240244060754776, 0.017650051042437553]], [[0.011476250365376472, 0.7629169225692749, 0.02116730809211731, 0.010803135111927986, 0.005132503807544708, 0.009303245693445206, 0.0005040443502366543, 0.022131631150841713, 0.001470191520638764, 0.0017710012616589665, 0.0004086543631274253, 0.0022351557854562998, 0.000896299781743437, 0.0005698543391190469, 0.019197434186935425, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0024000771809369326, 0.158247172832489, 0.01897430047392845, 0.019486481323838234, 0.0029122373089194298, 0.015832845121622086, 0.0017470666207373142, 0.00117065932136029, 0.01016113068908453, 0.007651789113879204, 0.0020597530528903008, 0.015201352536678314, 0.016943661496043205, 0.009769451804459095, 0.16634535789489746, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.00410552928224206, 0.0015743908006697893, 0.01049637421965599, 0.006504607852548361, 0.035339318215847015, 0.9065937995910645, 0.2998698651790619, 0.12215600907802582, 0.013029203750193119, 0.000650988076813519, 0.002043183660134673, 0.006920983083546162, 0.09688588231801987, 0.057574767619371414, 0.009054930880665779, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.007287806831300259, 0.01375514268875122, 0.001530585577711463, 0.007056740578263998, 0.01978658139705658, 0.9208202958106995, 0.2214416116476059, 0.30606138706207275, 0.052588097751140594, 0.004079628270119429, 0.0024339878000319004, 0.0028739250265061855, 0.04695972800254822, 0.045893676578998566, 0.0110039496794343, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.006429406348615885, 0.016907041892409325, 0.0023819799534976482, 0.0003115522558800876, 0.006808500271290541, 0.9102355241775513, 0.15379303693771362, 0.07056371122598648, 0.06324119120836258, 0.0030630400869995356, 0.007665702607482672, 0.002797773340716958, 0.13533660769462585, 0.03197972849011421, 0.006115978583693504, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.014356410130858421, 0.0526699461042881, 0.0007501932559534907, 0.008851941674947739, 0.0005067299935035408, 0.035332534462213516, 0.09051518887281418, 0.049224019050598145, 0.014900125563144684, 0.01856788620352745, 0.0012414768571034074, 0.002389064058661461, 0.0018446464091539383, 0.000877396494615823, 0.22725383937358856, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0025407460052520037, 0.32041609287261963, 0.0036992463283240795, 0.02451898716390133, 0.007920290343463421, 0.015527674928307533, 0.03544912114739418, 0.29718661308288574, 0.02347515895962715, 0.026838794350624084, 0.01756858080625534, 0.010445725172758102, 0.005995406303554773, 0.0005847325082868338, 0.2055930197238922, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.009255345910787582, 0.034783441573381424, 0.010831266641616821, 0.02782595343887806, 0.001477425335906446, 0.006871670484542847, 0.006518858019262552, 0.0072874827310442924, 0.012387615628540516, 0.05288432911038399, 0.04645476117730141, 0.02255677618086338, 0.014156763441860676, 0.00417641457170248, 0.22105874121189117, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0017225841293111444, 0.0049251834861934185, 0.007573804818093777, 0.014873476698994637, 0.00903867557644844, 0.0076865823939442635, 0.0017025101697072387, 0.00023153165238909423, 0.024773191660642624, 0.1742238849401474, 0.6002998948097229, 0.6145275831222534, 0.25023365020751953, 0.35489538311958313, 0.039457567036151886, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0034636815544217825, 0.39023807644844055, 0.0018667654367163777, 0.0006454490358009934, 0.00025732445647008717, 0.026610050350427628, 0.0026998629327863455, 0.014584111049771309, 0.00032847325201146305, 0.0012709795264527202, 0.07417861372232437, 0.43676891922950745, 0.25757044553756714, 0.32731080055236816, 0.12109360098838806, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0014396773185580969, 0.07700426131486893, 0.0003769460890907794, 0.0015669490676373243, 0.0010665652807801962, 0.05166712775826454, 0.003733921330422163, 0.00829349085688591, 9.729996236274019e-05, 0.0004270579374860972, 0.0022819112055003643, 0.3744491934776306, 0.2681969404220581, 0.4920969009399414, 0.028773367404937744, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.19549021124839783, 0.5118218064308167, 0.053603943437337875, 0.004430307075381279, 0.0015711480518803, 0.024018822237849236, 0.0441354438662529, 0.04134393110871315, 0.0014472270850092173, 0.024767767637968063, 0.029112013056874275, 0.08014442026615143, 0.4702226519584656, 0.40423843264579773, 0.14477935433387756, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.034691162407398224, 0.09692039340734482, 0.003936667460948229, 0.0164506658911705, 0.0005446859868243337, 0.0016573348548263311, 0.02795562334358692, 0.12881094217300415, 0.0004645287699531764, 0.0021237744949758053, 0.0010291342623531818, 0.001068241661414504, 0.00471450574696064, 0.019945403560996056, 0.19273433089256287, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.04783029109239578, 0.11157537996768951, 0.02325829118490219, 0.12799327075481415, 0.0216610599309206, 0.41526544094085693, 0.129922553896904, 0.14850500226020813, 0.0009580283658578992, 0.008097043260931969, 0.01107556838542223, 0.019478609785437584, 0.2748490571975708, 0.11550750583410263, 0.15876543521881104, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.015012643299996853, 0.00804762914776802, 0.00366173661313951, 0.0018753333715721965, 0.0065993256866931915, 0.00479541253298521, 0.005337378475815058, 0.012457020580768585, 0.0033909485209733248, 0.0032401280477643013, 0.00048777347547002137, 0.012255984358489513, 0.0006230318685993552, 0.001543535152450204, 0.1572250872850418, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.20067201554775238, 0.150595024228096, 0.3375815153121948, 0.5753223896026611, 0.03983612731099129, 0.13901081681251526, 0.37267425656318665, 0.07406412810087204, 0.07071352750062943, 0.22996902465820312, 0.35784539580345154, 0.0401473231613636, 0.03251379355788231, 0.07572956383228302, 0.005637211725115776, 0.125, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.055522263050079346, 0.0030253075528889894, 0.054468654096126556, 0.18383808434009552, 0.2751407325267792, 0.06163792684674263, 0.5092534422874451, 0.21577699482440948, 0.23691882193088531, 0.32801976799964905, 0.29786956310272217, 0.4967685043811798, 0.6341143250465393, 0.7677603363990784, 0.40264371037483215, 0.02477514185011387, 0.37543168663978577, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.0005822544917464256, 0.0004425827646628022, 0.0014265297213569283, 0.0006841197027824819, 0.03406556695699692, 0.0010687633184716105, 0.0028485425282269716, 0.020860498771071434, 0.05133597180247307, 0.002158694202080369, 0.002441320102661848, 0.037159714847803116, 0.005256796721369028, 0.008102376013994217, 0.16207638382911682, 0.02274254709482193, 0.6458237767219543, 0.013541627675294876, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.20224374532699585, 0.7376267313957214, 0.004014236852526665, 0.0103965038433671, 0.07275543361902237, 0.03262623772025108, 0.04577071964740753, 0.5017040371894836, 0.12205435335636139, 0.19255708158016205, 0.006990006659179926, 0.028381695970892906, 0.046785227954387665, 0.15206293761730194, 0.330488920211792, 0.03146426007151604, 0.019330549985170364, 0.019686071202158928, 0.5363749265670776, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.3634231686592102, 0.404717355966568, 0.00689590023830533, 0.04770800471305847, 0.0251657422631979, 0.0006883289897814393, 0.02071242779493332, 0.019072405993938446, 0.15776626765727997, 0.3694642186164856, 0.036826737225055695, 0.23951902985572815, 0.011015082709491253, 0.04999716952443123, 0.2037181556224823, 0.05261930450797081, 0.12757715582847595, 0.003555318573489785, 0.48483166098594666, 0.00033596818684600294, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.8270207643508911, 0.8942698836326599, 0.020243747159838676, 0.04263966530561447, 0.09284591674804688, 0.054453812539577484, 0.21418678760528564, 0.23612302541732788, 0.5479635000228882, 0.7225908041000366, 0.08608872443437576, 0.5934221148490906, 0.30024465918540955, 0.22648638486862183, 0.12622572481632233, 0.09825422614812851, 0.08890903741121292, 0.0022953739389777184, 0.3788372278213501, 6.525879871333018e-05, 3.547202504705638e-05, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.043734412640333176, 0.7137998342514038, 0.1370490938425064, 0.045488547533750534, 0.06789389997720718, 0.49671053886413574, 0.1280447244644165, 0.4211912155151367, 0.03652801364660263, 0.041476957499980927, 0.08040425181388855, 0.19641457498073578, 0.603863537311554, 0.49263066053390503, 0.07636027038097382, 0.1839720457792282, 0.005392392631620169, 0.0012601928319782019, 0.000860364583786577, 0.0008281354093924165, 0.0005760629428550601, 0.002849774667993188, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.017375759780406952, 0.012506993487477303, 0.020720014348626137, 0.011049210093915462, 0.03743210807442665, 0.0072485157288610935, 0.03524084761738777, 0.005443913396447897, 0.24646395444869995, 0.048276107758283615, 0.03640883043408394, 0.507624089717865, 0.15355341136455536, 0.1730290949344635, 0.2644885182380676, 0.005911883432418108, 0.0029267233330756426, 0.007144090253859758, 0.001919957809150219, 0.004637785721570253, 0.004848909098654985, 0.006189228966832161, 0.3764636814594269, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.09840062260627747, 0.7509858012199402, 0.13933908939361572, 0.13482652604579926, 0.18154919147491455, 0.32397931814193726, 0.23646889626979828, 0.11657525599002838, 0.03430478647351265, 0.1277371644973755, 0.15700362622737885, 0.24829043447971344, 0.7591869831085205, 0.7825927138328552, 0.06869770586490631, 0.2256152480840683, 0.0020181250292807817, 0.0012439934071153402, 0.00031968209077604115, 0.0029859780333936214, 0.017534615471959114, 0.0004058087943121791, 0.00034323628642596304, 0.029154805466532707, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.22806629538536072, 0.6706615686416626, 0.2560598850250244, 0.17412559688091278, 0.6327939033508301, 0.04699348285794258, 0.058767881244421005, 0.11556732654571533, 0.09056147933006287, 0.3648419678211212, 0.5388886332511902, 0.261055588722229, 0.6016876697540283, 0.7496042847633362, 0.0894755870103836, 0.03960844501852989, 0.0036635666619986296, 0.00109457119833678, 0.0017422186210751534, 0.022469639778137207, 0.004235065542161465, 0.007348764222115278, 0.00280297570861876, 0.030011437833309174, 0.576508641242981, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.5419997572898865, 0.6956567168235779, 0.044124722480773926, 0.12586495280265808, 0.048711128532886505, 0.11729516834020615, 0.4073715806007385, 0.43757542967796326, 0.032695479691028595, 0.4824156165122986, 0.05927032604813576, 0.04766178876161575, 0.25393223762512207, 0.23675066232681274, 0.10572775453329086, 0.0628783106803894, 0.014568633399903774, 0.003403500886633992, 0.005917230620980263, 0.009509358555078506, 0.0019911406561732292, 0.005211993586272001, 0.01603839360177517, 0.00502167409285903, 0.3301290273666382, 0.10268117487430573, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.09369882941246033, 0.5731168985366821, 0.13611510396003723, 0.13756731152534485, 0.024227088317275047, 0.31910547614097595, 0.16772453486919403, 0.1680929958820343, 0.09319504350423813, 0.0998181626200676, 0.22465890645980835, 0.00899507012218237, 0.16640731692314148, 0.25350457429885864, 0.09016240388154984, 0.178706556558609, 0.5124386548995972, 0.028256116434931755, 0.011254883371293545, 0.03223628178238869, 0.0004171380714979023, 0.004843876231461763, 0.09010603278875351, 0.0025540743954479694, 0.016201328486204147, 0.029397757723927498, 0.010837158188223839, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.02838694490492344, 0.30040091276168823, 0.005878766532987356, 0.015430719591677189, 0.017050068825483322, 0.06605669111013412, 0.12745192646980286, 0.23377051949501038, 0.08052214235067368, 0.033177152276039124, 0.06731567531824112, 0.07575374841690063, 0.18187224864959717, 0.570769727230072, 0.04572387412190437, 0.18362975120544434, 0.10373001545667648, 0.006869313772767782, 0.010921900160610676, 0.01820673979818821, 0.0017379705095663667, 0.002349345711991191, 0.03729201853275299, 5.792165029561147e-05, 0.0013579311780631542, 0.0025659396778792143, 0.008523254655301571, 0.1568114459514618, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.2655380666255951, 0.4107033908367157, 0.04865417629480362, 0.08488347381353378, 0.04310445114970207, 0.10849997401237488, 0.15643075108528137, 0.04165918007493019, 0.12898734211921692, 0.11095981299877167, 0.23520684242248535, 0.10632039606571198, 0.055878568440675735, 0.24558725953102112, 0.17682571709156036, 0.060853905975818634, 0.016029829159379005, 0.001439533894881606, 0.017260756343603134, 0.0007974627078510821, 0.0012342276750132442, 0.028226196765899658, 0.0047790613025426865, 0.0015612602001056075, 0.004867547657340765, 0.039023980498313904, 0.05208572745323181, 0.33480554819107056, 0.17332881689071655, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.8565200567245483, 0.8639481067657471, 0.0803997814655304, 0.36449819803237915, 0.17448320984840393, 0.12402030825614929, 0.13765643537044525, 0.2065785825252533, 0.18182852864265442, 0.6806339025497437, 0.1919344812631607, 0.19068314135074615, 0.004361266735941172, 0.01490570418536663, 0.13936595618724823, 0.043774526566267014, 0.2669547498226166, 0.035314492881298065, 0.1941595822572708, 0.006638282909989357, 0.005091785918921232, 0.2628510892391205, 0.2860943675041199, 0.06445851922035217, 0.34950578212738037, 0.6430334448814392, 0.5673049688339233, 0.6101463437080383, 0.29372307658195496, 0.0028161092195659876, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.22751423716545105, 0.21127405762672424, 0.005130667705088854, 0.028237944468855858, 0.06646221876144409, 0.045109983533620834, 0.478432834148407, 0.6443154215812683, 0.140235036611557, 0.0980456992983818, 0.006476161070168018, 0.038696710020303726, 0.25798937678337097, 0.10561345517635345, 0.16755780577659607, 0.018545497208833694, 0.059764593839645386, 0.0026272537652403116, 0.020267995074391365, 0.009687644429504871, 0.00033462722785770893, 0.0024671528954058886, 0.054633729159832, 5.4464391723740846e-05, 0.00043273900519125164, 0.0019224031129851937, 0.21117039024829865, 0.3183750510215759, 0.03866858780384064, 0.011778384447097778, 0.1297062188386917, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.3886019289493561, 0.36600789427757263, 0.07069597393274307, 0.12792876362800598, 0.0629734918475151, 0.0820467472076416, 0.2973020672798157, 0.27475541830062866, 0.019707435742020607, 0.2982620298862457, 0.24423947930335999, 0.05686682090163231, 0.23438367247581482, 0.3444555997848511, 0.09858046472072601, 0.0004199208051431924, 4.603992783813737e-05, 8.09443406524224e-07, 2.029701317951549e-05, 3.386533080629306e-06, 2.203315261795069e-06, 4.220597020321293e-06, 8.901660294213798e-06, 0.00016298270202241838, 0.000983458710834384, 0.0005640776362270117, 0.0008154786773957312, 0.001651398022659123, 2.400618996034609e-06, 3.3168395020766184e-05, 6.549440058734035e-06, 0.8699775338172913, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.31350865960121155, 0.5118260383605957, 0.01775331422686577, 0.060602445155382156, 0.015971101820468903, 0.03445184975862503, 0.4316053092479706, 0.4819965064525604, 0.008238772861659527, 0.27349013090133667, 0.02135261707007885, 0.006705985404551029, 0.06119696795940399, 0.05213680863380432, 0.13011163473129272, 0.06053417548537254, 0.012584012933075428, 0.0010002547642216086, 0.0027718576602637768, 0.006610550452023745, 0.0029896856285631657, 0.008355176076292992, 0.048459943383932114, 0.002307809190824628, 0.65205979347229, 0.1651758849620819, 0.011300449259579182, 0.029586348682641983, 0.014456091448664665, 0.0007872084970586002, 0.0008902085828594863, 0.029332326725125313, 0.16636918485164642, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.11128952354192734, 0.6662537455558777, 0.10913366079330444, 0.08027850091457367, 0.016604425385594368, 0.1904260814189911, 0.09001538157463074, 0.12034764140844345, 0.032395973801612854, 0.07767382264137268, 0.13288450241088867, 0.0038343279156833887, 0.15461067855358124, 0.13092683255672455, 0.1198263093829155, 0.19553376734256744, 0.2426333725452423, 0.004519153386354446, 0.00883245188742876, 0.006844275165349245, 0.00014635240950156003, 0.00260242260992527, 0.03859727829694748, 0.0011520206462591887, 0.014703472144901752, 0.016579829156398773, 0.003783928230404854, 0.01771795004606247, 0.0035672299563884735, 0.000677697011269629, 0.002100451150909066, 0.023971345275640488, 0.03231354430317879, 0.011524699628353119, NaN, NaN, NaN, NaN, NaN, NaN], [0.045069050043821335, 0.5156355500221252, 0.014353718608617783, 0.026371080428361893, 0.027669712901115417, 0.08119883388280869, 0.2510265111923218, 0.45373910665512085, 0.0644708126783371, 0.03346102684736252, 0.06456929445266724, 0.036929432302713394, 0.1635800451040268, 0.4964689314365387, 0.12627021968364716, 0.17035169899463654, 0.07290639728307724, 0.0013864204520359635, 0.008776376023888588, 0.010795027948915958, 0.0008890280150808394, 0.00375909055583179, 0.03264426812529564, 2.1074760297778994e-05, 0.0009656226029619575, 0.004805654752999544, 0.015095297247171402, 0.19429266452789307, 0.060086220502853394, 0.013300183229148388, 0.019145654514431953, 0.08634541183710098, 0.018065713346004486, 0.012390428222715855, 0.3474832773208618, NaN, NaN, NaN, NaN, NaN], [0.15574656426906586, 0.22756966948509216, 0.016156630590558052, 0.0469389408826828, 0.01719032973051071, 0.01580459624528885, 0.07493647187948227, 0.02412206307053566, 0.018628407269716263, 0.03879624605178833, 0.03891688585281372, 0.03379734605550766, 0.008454171009361744, 0.03055991418659687, 0.1906210333108902, 0.002681915881112218, 0.0020622191950678825, 1.740588413667865e-05, 0.001647116499952972, 2.462047996232286e-05, 1.4256034774007276e-05, 0.0023770714178681374, 0.0007797144935466349, 6.146806117612869e-05, 0.00019536878971848637, 0.023629816249012947, 0.022664623335003853, 0.058040015399456024, 0.02328144572675228, 0.00014305225340649486, 0.1791975051164627, 0.7950490117073059, 0.40287262201309204, 0.05916967615485191, 0.11726692318916321, 0.045271970331668854, NaN, NaN, NaN, NaN], [0.7930518984794617, 0.8248118162155151, 0.03787774592638016, 0.2306395173072815, 0.10945193469524384, 0.048738475888967514, 0.07385316491127014, 0.1171715259552002, 0.09199279546737671, 0.5013920664787292, 0.07074998319149017, 0.14583703875541687, 0.0018764830892905593, 0.00646476075053215, 0.13562877476215363, 0.017539121210575104, 0.07800457626581192, 0.013338283635675907, 0.07843150943517685, 0.003389358287677169, 0.0011982140131294727, 0.07936429977416992, 0.08406823873519897, 0.016710255295038223, 0.13201765716075897, 0.339507520198822, 0.3268124461174011, 0.4709261357784271, 0.24707961082458496, 0.0009133804705925286, 0.27326905727386475, 0.539431095123291, 0.8842423558235168, 0.5773340463638306, 0.643308699131012, 0.15606866776943207, 0.0011033734772354364, NaN, NaN, NaN], [0.139163076877594, 0.17112046480178833, 0.0021531793754547834, 0.0053843106143176556, 0.013183848932385445, 0.014547600410878658, 0.39682450890541077, 0.7216413021087646, 0.013683686964213848, 0.038195278495550156, 0.0014429710572585464, 0.0075409854762256145, 0.06976743042469025, 0.016425929963588715, 0.1257757991552353, 0.0009739195229485631, 0.0011780881322920322, 3.265493069193326e-05, 0.0005334040033631027, 0.0007281061843968928, 3.2774634746601805e-05, 0.0004276044783182442, 0.00342408730648458, 2.9227990125946235e-06, 5.522280844161287e-05, 0.00012372780474834144, 0.011400841176509857, 0.008755120448768139, 0.0017365129897370934, 0.0007705622701905668, 0.0024924452882260084, 0.4634210169315338, 0.010356471873819828, 0.06587640196084976, 0.03498200699687004, 0.005118835251778364, 0.0019369632937014103, 0.023791478946805, NaN, NaN], [0.37428542971611023, 0.3404470980167389, 0.07186836749315262, 0.11062464118003845, 0.09624961018562317, 0.06910651177167892, 0.26704323291778564, 0.35990291833877563, 0.016681469976902008, 0.31615501642227173, 0.23382727801799774, 0.051282789558172226, 0.1643712818622589, 0.24623094499111176, 0.1059461385011673, 0.00023119446996133775, 9.065014637599234e-06, 3.0932378081161005e-07, 7.128239758458221e-06, 2.417179757685517e-06, 1.9917408735636855e-06, 1.0686825362427044e-06, 3.5747166293731425e-06, 3.038432441826444e-05, 0.00024045849568210542, 0.00012102597975172102, 0.0003720777458511293, 0.0005474414792843163, 4.2138731259910855e-06, 8.004362825886346e-06, 4.010584234492853e-06, 0.22906039655208588, 0.00024706448311917484, 0.003541025100275874, 0.0035716970451176167, 1.1338630656609894e-06, 4.888530747848563e-05, 2.00755093828775e-05, 0.8455927968025208, NaN], [0.2896858751773834, 0.2041676938533783, 0.0844137892127037, 0.26597079634666443, 0.007990201003849506, 0.057605594396591187, 0.37075188755989075, 0.33039090037345886, 0.04668770357966423, 0.6492098569869995, 0.34850311279296875, 0.12703292071819305, 0.22453922033309937, 0.2423134297132492, 0.11649563163518906, 0.023575956001877785, 0.001566409133374691, 0.0004935376346111298, 0.015205318108201027, 0.0005761805805377662, 0.00026375881861895323, 0.0017682479228824377, 0.00015503005124628544, 0.011253873817622662, 0.321735680103302, 0.05970581993460655, 0.008942467160522938, 0.051820773631334305, 0.009087985381484032, 0.002068085130304098, 0.00584985688328743, 0.01019755844026804, 0.16441591084003448, 0.021173937246203423, 0.09159599989652634, 0.004452125634998083, 0.0037374526727944613, 0.01578103005886078, 0.01742226630449295, 0.3373567461967468]]], [[[0.016101790592074394, 0.0050575402565300465, 0.008322462439537048, 0.006855499465018511, 0.003766664071008563, 0.0032708626240491867, 0.008669405244290829, 0.016983401030302048, 0.023632090538740158, 0.0007983215618878603, 0.006762287113815546, 0.019076332449913025, 0.0018054646207019687, 0.011848386377096176, 0.23875673115253448, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.03118298575282097, 0.022700916975736618, 0.01820814236998558, 0.011041272431612015, 0.013735579326748848, 0.003388292621821165, 0.014374880120158195, 0.0029534229543060064, 0.06276529282331467, 0.0010488847037777305, 0.005698299501091242, 0.018068330362439156, 0.009247002191841602, 0.010645000264048576, 0.2274351567029953, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.10749327391386032, 0.01361121516674757, 0.01930609717965126, 0.025707745924592018, 0.010174103081226349, 0.0019352196250110865, 0.006933925207704306, 0.026056114584207535, 0.003662128932774067, 0.006897854618728161, 0.0015213300939649343, 0.006132383830845356, 0.0028239174280315638, 0.013304864056408405, 0.22739072144031525, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.25010421872138977, 0.005582309328019619, 0.006115755997598171, 0.08664196729660034, 0.005224197171628475, 0.005311913322657347, 0.03281412273645401, 0.024678068235516548, 0.018595430999994278, 0.0819764956831932, 0.005479714833199978, 0.008821909315884113, 0.02042486146092415, 0.03525637462735176, 0.19444485008716583, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.1781134456396103, 0.021083489060401917, 0.038613177835941315, 0.16417931020259857, 0.0029645320028066635, 0.00899361353367567, 0.009076704271137714, 0.01357053779065609, 0.01101364754140377, 0.04086701199412346, 0.014270029030740261, 0.011464214883744717, 0.011689195409417152, 0.0706799253821373, 0.3730076551437378, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.3090042769908905, 0.031162124127149582, 0.033009856939315796, 0.14512063562870026, 0.00411824369803071, 0.07382509857416153, 0.02702517993748188, 0.07667822390794754, 0.021658627316355705, 0.01615101285278797, 0.0066233747638762, 0.008623828180134296, 0.0008525048615410924, 0.011195158585906029, 0.2578849792480469, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.3291372060775757, 0.0561586357653141, 0.4192807674407959, 0.4571635127067566, 0.057550910860300064, 0.04359428584575653, 0.005270917434245348, 0.03804505616426468, 0.03733760863542557, 0.20409555733203888, 0.04554562643170357, 0.024629684165120125, 0.018161950632929802, 0.04353561997413635, 0.145583838224411, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.3828665316104889, 0.019200418144464493, 0.34599530696868896, 0.4376910328865051, 0.07537391781806946, 0.036528222262859344, 0.04610925167798996, 0.04538694769144058, 0.1663823127746582, 0.04690397158265114, 0.05553056299686432, 0.021811597049236298, 0.012554574757814407, 0.03599526360630989, 0.1534716635942459, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.08861738443374634, 0.06363938748836517, 0.7135313749313354, 0.146565243601799, 0.3346884250640869, 0.3544132113456726, 0.12204702943563461, 0.028818881139159203, 0.04564356431365013, 0.03288809210062027, 0.06753166019916534, 0.12387087196111679, 0.029650555923581123, 0.014753012917935848, 0.04379607364535332, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.03655187785625458, 0.006058508530259132, 0.04018249735236168, 0.08900216966867447, 0.027111714705824852, 0.006408872082829475, 0.03783104568719864, 0.010064247064292431, 0.2550305724143982, 0.008420061320066452, 0.012097015976905823, 0.017737949267029762, 0.0012783813290297985, 0.0026436946354806423, 0.172612726688385, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.1163061186671257, 0.04424217715859413, 0.014033653773367405, 0.03590161353349686, 0.06527962535619736, 0.00195779325440526, 0.027195196598768234, 0.1581626534461975, 0.30849722027778625, 0.1652299016714096, 0.04234298691153526, 0.05585171654820442, 0.016547594219446182, 0.04909297078847885, 0.08752257376909256, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.1013311892747879, 0.06866802275180817, 0.06425411254167557, 0.4572087228298187, 0.04987834766507149, 0.005650981329381466, 0.053177352994680405, 0.04739876464009285, 0.2551265060901642, 0.06654207408428192, 0.20209699869155884, 0.04737241193652153, 0.042119286954402924, 0.22778292000293732, 0.10508881509304047, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.24632138013839722, 0.045121580362319946, 0.12561434507369995, 0.43826135993003845, 0.07532560080289841, 0.002372375223785639, 0.0398109070956707, 0.026653334498405457, 0.5938559174537659, 0.12655052542686462, 0.04707850515842438, 0.018195422366261482, 0.010826833546161652, 0.023274976760149002, 0.14916135370731354, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.12666325271129608, 0.047387395054101944, 0.04497509077191353, 0.23918962478637695, 0.016611548140645027, 0.009305250830948353, 0.02713325433433056, 0.030590379610657692, 0.4573454260826111, 0.17728003859519958, 0.08635216951370239, 0.05938294902443886, 0.008936652913689613, 0.028742672875523567, 0.15077541768550873, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.03701020032167435, 0.037774376571178436, 0.1161394715309143, 0.09335700422525406, 0.015312368050217628, 0.026739761233329773, 0.013009096495807171, 0.005902147851884365, 0.07189750671386719, 0.00625182269141078, 0.056744903326034546, 0.06423129141330719, 0.06661844998598099, 0.02100159414112568, 0.2252311259508133, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.12698857486248016, 0.15100647509098053, 0.08910781890153885, 0.09401589632034302, 0.14288602769374847, 0.07712502032518387, 0.1496707946062088, 0.23784373700618744, 0.024656152352690697, 0.07261883467435837, 0.11269068717956543, 0.10889188945293427, 0.23155105113983154, 0.10633593797683716, 0.14060717821121216, 0.125, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.33520859479904175, 0.17541100084781647, 0.043081097304821014, 0.07071122527122498, 0.031066332012414932, 0.05302952229976654, 0.13712948560714722, 0.0819549486041069, 0.010218805633485317, 0.05350261554121971, 0.03376028686761856, 0.016291575506329536, 0.04384060204029083, 0.016914406791329384, 0.06937505304813385, 0.1729947179555893, 0.014742943458259106, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.2972787618637085, 0.14542943239212036, 0.2801832854747772, 0.6946116089820862, 0.3750338852405548, 0.09368664771318436, 0.11078806221485138, 0.124379463493824, 0.028408339247107506, 0.3442523181438446, 0.15075638890266418, 0.08511755615472794, 0.32891392707824707, 0.12337944656610489, 0.05913665145635605, 0.11518532782793045, 0.28854820132255554, 0.0005498379468917847, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.06821048259735107, 0.007578656077384949, 0.033511072397232056, 0.039627932012081146, 0.016393400728702545, 0.20925503969192505, 0.15704192221164703, 0.024064799770712852, 0.005696912761777639, 0.01698312722146511, 0.15042142570018768, 0.0017041407991200686, 0.016995420679450035, 0.005758653394877911, 0.015053601935505867, 0.12768876552581787, 0.007979520596563816, 0.05741023272275925, 0.14377589523792267, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.05268644914031029, 0.018480738624930382, 0.006206580437719822, 0.01908770017325878, 0.009213676676154137, 0.012446015141904354, 0.2606332302093506, 0.15275397896766663, 0.004711512941867113, 0.01064901053905487, 0.00940486416220665, 0.00429189158603549, 0.014810611493885517, 0.012880465015769005, 0.15466143190860748, 0.25598737597465515, 0.03471918776631355, 0.08263758569955826, 0.03616967797279358, 0.0012629067059606314, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.017502065747976303, 0.09008979797363281, 0.045234303921461105, 0.04321402683854103, 0.014162504114210606, 0.2841097414493561, 0.10382679849863052, 0.4497845470905304, 0.042821191251277924, 0.03918898105621338, 0.06416238099336624, 0.04602029174566269, 0.2197093665599823, 0.07547488063573837, 0.13285692036151886, 0.29742351174354553, 0.10481993854045868, 0.07552393525838852, 0.008401650935411453, 0.3407011330127716, 0.028353586792945862, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.02909473329782486, 0.05293780937790871, 0.025932423770427704, 0.061369478702545166, 0.12287095934152603, 0.12207728624343872, 0.20267462730407715, 0.3647293746471405, 0.036313559859991074, 0.028358493000268936, 0.054471470415592194, 0.007501897402107716, 0.10796680301427841, 0.05851392075419426, 0.12157665193080902, 0.17861823737621307, 0.07256677001714706, 0.1795390099287033, 0.04586997628211975, 0.27750420570373535, 0.0032322825863957405, 0.09472999721765518, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.02889016829431057, 0.05256107077002525, 0.05110660940408707, 0.09513585269451141, 0.049980901181697845, 0.07343146204948425, 0.21190620958805084, 0.10279127210378647, 0.1787082403898239, 0.022944355383515358, 0.03947293758392334, 0.008258121088147163, 0.09723227471113205, 0.030062679201364517, 0.14898137748241425, 0.1281835287809372, 0.008169662207365036, 0.10209551453590393, 0.22781534492969513, 0.13339588046073914, 0.022249281406402588, 0.2580547630786896, 0.0071509419940412045, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.027054987847805023, 0.06796294450759888, 0.02347770519554615, 0.04540639370679855, 0.13579830527305603, 0.1935206949710846, 0.09281998127698898, 0.22921815514564514, 0.012567882426083088, 0.02752627059817314, 0.05939676612615585, 0.00633750855922699, 0.24427738785743713, 0.10302533209323883, 0.18246731162071228, 0.19490991532802582, 0.0105251120403409, 0.07082764059305191, 0.07746586948633194, 0.10047772526741028, 0.007984980009496212, 0.045915842056274414, 0.030714787542819977, 0.09154831618070602, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.13923436403274536, 0.07431720942258835, 0.06541924923658371, 0.14132679998874664, 0.10506866127252579, 0.06156519800424576, 0.21440355479717255, 0.06509862840175629, 0.02759510651230812, 0.10144857317209244, 0.13265900313854218, 0.048845868557691574, 0.16166719794273376, 0.1116088330745697, 0.15105699002742767, 0.2116595059633255, 0.006228659767657518, 0.09237925708293915, 0.33000993728637695, 0.06037600710988045, 0.06468494236469269, 0.028822004795074463, 0.015993207693099976, 0.023504862561821938, 0.014777855016291142, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.14352908730506897, 0.10288456827402115, 0.05261845886707306, 0.1541282832622528, 0.05661991983652115, 0.12065587192773819, 0.10697692632675171, 0.15951323509216309, 0.1055477038025856, 0.14385449886322021, 0.23090383410453796, 0.08539394289255142, 0.09938428550958633, 0.08322764188051224, 0.11896289885044098, 0.11546289920806885, 0.0627092570066452, 0.1015198826789856, 0.17440570890903473, 0.11644574254751205, 0.15138378739356995, 0.17151175439357758, 0.07174428552389145, 0.1994275599718094, 0.20994937419891357, 0.08254047483205795, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.24387870728969574, 0.11191204935312271, 0.06428070366382599, 0.3038298189640045, 0.14750736951828003, 0.1200045570731163, 0.46686112880706787, 0.3116493225097656, 0.10273779183626175, 0.10795925557613373, 0.1416371762752533, 0.09460661560297012, 0.27618303894996643, 0.09149192273616791, 0.10828596353530884, 0.13584046065807343, 0.09117304533720016, 0.15590398013591766, 0.10968183726072311, 0.5585501790046692, 0.07535546272993088, 0.2762793302536011, 0.32588398456573486, 0.3246583938598633, 0.41251155734062195, 0.043567951768636703, 0.0185235645622015, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.1039203479886055, 0.05052376165986061, 0.051659513264894485, 0.18036356568336487, 0.11265069991350174, 0.047071922570466995, 0.3453211784362793, 0.29340654611587524, 0.007079527713358402, 0.06730296462774277, 0.08055143058300018, 0.02563900128006935, 0.19650228321552277, 0.060815099626779556, 0.13184599578380585, 0.1674133688211441, 0.12648360431194305, 0.27492284774780273, 0.24355122447013855, 0.8769406676292419, 0.6096609234809875, 0.4704851806163788, 0.055198147892951965, 0.6140321493148804, 0.2705269455909729, 0.07450747489929199, 0.04471021145582199, 0.05369797348976135, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.1947154402732849, 0.003113611601293087, 0.028957238420844078, 0.026910793036222458, 0.017121652141213417, 0.08169777691364288, 0.32467299699783325, 0.05661681666970253, 0.007502032909542322, 0.02869880571961403, 0.020577264949679375, 0.0070375413633883, 0.16551434993743896, 0.06083058565855026, 0.06852211803197861, 0.035074394196271896, 0.012203776277601719, 0.2713678479194641, 0.27628132700920105, 0.5399907231330872, 0.3242804706096649, 0.5765586495399475, 0.02925838902592659, 0.3159044086933136, 0.11935708671808243, 0.16010764241218567, 0.31936678290367126, 0.22831447422504425, 0.09149928390979767, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.018467016518115997, 0.004791167099028826, 0.015553582459688187, 0.021664531901478767, 0.025298617780208588, 0.1971224695444107, 0.13395515084266663, 0.1881190687417984, 0.05309745669364929, 0.018728721886873245, 0.018886514008045197, 0.023248562589287758, 0.008927382528781891, 0.03253133222460747, 0.130488321185112, 0.1354324370622635, 0.08839684724807739, 0.010535157285630703, 0.3809414505958557, 0.006101538427174091, 0.04204240441322327, 0.6714356541633606, 0.02054513990879059, 0.44751474261283875, 0.5217893123626709, 0.16833685338497162, 0.4138224124908447, 0.5945862531661987, 0.14406909048557281, 0.000551112403627485, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.4018593430519104, 0.09619066119194031, 0.047895513474941254, 0.0887020081281662, 0.04670756310224533, 0.17605426907539368, 0.21604543924331665, 0.1403813511133194, 0.0010993692558258772, 0.07762767374515533, 0.0958188846707344, 0.1024225577712059, 0.06565871089696884, 0.04857100546360016, 0.1717240959405899, 0.26645413041114807, 0.038747917860746384, 0.15441381931304932, 0.6166976094245911, 0.04416924715042114, 0.07849516719579697, 0.41569313406944275, 0.018940549343824387, 0.18770581483840942, 0.11268321424722672, 0.0962471142411232, 0.028718965128064156, 0.019747000187635422, 0.011864973232150078, 0.07090434432029724, 0.02976600080728531, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.31909966468811035, 0.26355716586112976, 0.16833621263504028, 0.334572434425354, 0.18670302629470825, 0.11206400394439697, 0.46585598587989807, 0.15377958118915558, 0.014857469126582146, 0.07049962878227234, 0.1590365469455719, 0.09933225810527802, 0.23580892384052277, 0.09940709918737411, 0.11795931309461594, 0.26584282517433167, 0.03641113266348839, 0.24681606888771057, 0.03326011076569557, 0.5612249970436096, 0.11044078320264816, 0.038705065846443176, 0.07638699561357498, 0.20042885839939117, 0.41367095708847046, 0.16446417570114136, 0.05500950291752815, 0.0458536334335804, 0.038293108344078064, 0.05886702984571457, 0.005421455018222332, 0.03447017818689346, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.3361136317253113, 0.18450267612934113, 0.10482683777809143, 0.3672127425670624, 0.09347432106733322, 0.06302808225154877, 0.17493662238121033, 0.11965186893939972, 0.06742112338542938, 0.13331438601016998, 0.26999813318252563, 0.03264465183019638, 0.07908355444669724, 0.09376725554466248, 0.11511774361133575, 0.052208781242370605, 0.10399425774812698, 0.2661847770214081, 0.06582632660865784, 0.5218088626861572, 0.41107869148254395, 0.18652401864528656, 0.10915308445692062, 0.2499890774488449, 0.21385571360588074, 0.11996328830718994, 0.2169666439294815, 0.17541900277137756, 0.34852319955825806, 0.29904353618621826, 0.3583068549633026, 0.0660485103726387, 0.0772518739104271, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.271436870098114, 0.16103556752204895, 0.09723401814699173, 0.3494490087032318, 0.1582973301410675, 0.11393263936042786, 0.41371721029281616, 0.2938876152038574, 0.08068472146987915, 0.08301044255495071, 0.11968915909528732, 0.07779402285814285, 0.24559125304222107, 0.07589462399482727, 0.1087639182806015, 0.1452419012784958, 0.08285138756036758, 0.20162978768348694, 0.10332676023244858, 0.7324197292327881, 0.1815183311700821, 0.27558720111846924, 0.41944485902786255, 0.4614993929862976, 0.7035390734672546, 0.14779764413833618, 0.07484183460474014, 0.09274464100599289, 0.1956741362810135, 0.4027537703514099, 0.17018413543701172, 0.15845544636249542, 0.03217604011297226, 0.027846908196806908, NaN, NaN, NaN, NaN, NaN, NaN], [0.1091129332780838, 0.08970999717712402, 0.08557470142841339, 0.23009367287158966, 0.13180004060268402, 0.0638015940785408, 0.31095248460769653, 0.2814267873764038, 0.0075759077444672585, 0.039292845875024796, 0.06780961900949478, 0.013560868799686432, 0.15987654030323029, 0.04180291295051575, 0.12740370631217957, 0.06803880631923676, 0.0777740478515625, 0.3149954080581665, 0.17862020432949066, 0.9274848103523254, 0.6797788739204407, 0.28538215160369873, 0.04841757193207741, 0.524702250957489, 0.33268001675605774, 0.06556227803230286, 0.08207366615533829, 0.08443650603294373, 0.19301387667655945, 0.68314129114151, 0.7843886613845825, 0.24039600789546967, 0.0983721911907196, 0.035574402660131454, 0.04086223617196083, NaN, NaN, NaN, NaN, NaN], [0.4568881392478943, 0.01152532733976841, 0.12744615972042084, 0.16633041203022003, 0.05682089552283287, 0.22013583779335022, 0.46718865633010864, 0.06831676512956619, 0.011846139095723629, 0.051503561437129974, 0.07631707936525345, 0.017341753467917442, 0.16032609343528748, 0.06682911515235901, 0.06364742666482925, 0.004222579766064882, 0.012189013883471489, 0.38177239894866943, 0.23501808941364288, 0.3822557032108307, 0.273560494184494, 0.28252631425857544, 0.039307549595832825, 0.41269388794898987, 0.3037600517272949, 0.1617780327796936, 0.33094146847724915, 0.37525615096092224, 0.1388353556394577, 0.8142803907394409, 0.5916069149971008, 0.18943282961845398, 0.08566068857908249, 0.11778654158115387, 0.1818830519914627, 0.04465563967823982, NaN, NaN, NaN, NaN], [0.0270079392939806, 0.003701634705066681, 0.024473953992128372, 0.035727839916944504, 0.031186459586024284, 0.22590965032577515, 0.1764952838420868, 0.1725662350654602, 0.06108492240309715, 0.017804577946662903, 0.01644762232899666, 0.018474329262971878, 0.0059660994447767735, 0.026993868872523308, 0.12890712916851044, 0.0780838280916214, 0.07355974614620209, 0.01093215774744749, 0.22770193219184875, 0.008550305850803852, 0.06503485888242722, 0.5060688257217407, 0.02145100012421608, 0.43843212723731995, 0.6872871518135071, 0.1969044953584671, 0.45010682940483093, 0.7415768504142761, 0.3103433847427368, 0.001054091495461762, 0.20113487541675568, 0.21400661766529083, 0.41673052310943604, 0.3260871469974518, 0.620118260383606, 0.12724098563194275, 0.0004952864837832749, NaN, NaN, NaN], [0.32686647772789, 0.10561588406562805, 0.10599718242883682, 0.08397059142589569, 0.05158340185880661, 0.22573474049568176, 0.19403943419456482, 0.08219113945960999, 0.0007591660832986236, 0.028280239552259445, 0.06139420345425606, 0.03943438082933426, 0.025857241824269295, 0.027251310646533966, 0.1435350626707077, 0.3314567506313324, 0.06341477483510971, 0.5618032217025757, 0.642646074295044, 0.27415919303894043, 0.23788774013519287, 0.38833677768707275, 0.08984735608100891, 0.42147237062454224, 0.6564009785652161, 0.2928015887737274, 0.1047874391078949, 0.1023104265332222, 0.06365151703357697, 0.39097070693969727, 0.14560170471668243, 0.23420175909996033, 0.08592629432678223, 0.02493405155837536, 0.011453422717750072, 0.006046658381819725, 0.1451905518770218, 0.005812718998640776, NaN, NaN], [0.21139562129974365, 0.21867576241493225, 0.17973701655864716, 0.29884445667266846, 0.19560806453227997, 0.11132223159074783, 0.28179141879081726, 0.10507592558860779, 0.014165982604026794, 0.04481332749128342, 0.1297360062599182, 0.07738039642572403, 0.2323194295167923, 0.09134778380393982, 0.12234959006309509, 0.21756824851036072, 0.03937938064336777, 0.3266570568084717, 0.05877631530165672, 0.5281912088394165, 0.11102446913719177, 0.03890432044863701, 0.10487684607505798, 0.2815292179584503, 0.4750865697860718, 0.3058159351348877, 0.11602579057216644, 0.12021853774785995, 0.06692790240049362, 0.1190272718667984, 0.019106050953269005, 0.21307361125946045, 0.15337608754634857, 0.06824280321598053, 0.040861621499061584, 0.032932352274656296, 0.052440475672483444, 0.005818615201860666, 0.0524408333003521, NaN], [0.2484172284603119, 0.2714419662952423, 0.13623963296413422, 0.33317360281944275, 0.14056812226772308, 0.16453251242637634, 0.23482279479503632, 0.2797185182571411, 0.08398787677288055, 0.13855448365211487, 0.19988903403282166, 0.12159004807472229, 0.21263501048088074, 0.1342880129814148, 0.11613592505455017, 0.21100056171417236, 0.13406150043010712, 0.10563220083713531, 0.15389345586299896, 0.10192565619945526, 0.07836726307868958, 0.22881029546260834, 0.05055452138185501, 0.24765580892562866, 0.48160815238952637, 0.2201593518257141, 0.1761431246995926, 0.21236160397529602, 0.20979638397693634, 0.10962515324354172, 0.09009265154600143, 0.0623038187623024, 0.17415094375610352, 0.13285446166992188, 0.11576873064041138, 0.10801524668931961, 0.0743527039885521, 0.03413216769695282, 0.027520645409822464, 0.06626196205615997]], [[0.0034671342000365257, 0.05013812705874443, 0.16192083060741425, 0.3595426082611084, 0.20735634863376617, 0.08139260113239288, 0.009979248046875, 0.05037669837474823, 0.0023427342530339956, 6.08037480560597e-05, 0.003484810469672084, 0.023961462080478668, 0.38460296392440796, 0.24992075562477112, 0.13989195227622986, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.6699675917625427, 0.09382463991641998, 0.2939082980155945, 0.17940783500671387, 0.06414232403039932, 0.05161670595407486, 0.09315118193626404, 0.0025183490943163633, 0.0024716362822800875, 0.00784118939191103, 0.06077995523810387, 0.010742363519966602, 0.027031319215893745, 0.033606547862291336, 0.020909229293465614, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.2646949589252472, 0.029353437945246696, 0.21451972424983978, 0.10881441831588745, 0.06597915291786194, 0.0030848400201648474, 0.011694483458995819, 0.021679535508155823, 0.002872215351089835, 0.013158812187612057, 0.002100167330354452, 6.679360376438126e-05, 0.004520595073699951, 0.019191764295101166, 0.15631338953971863, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.040224652737379074, 0.02035309188067913, 0.3179875612258911, 0.11730892956256866, 0.5032125115394592, 0.4173433780670166, 0.2045394331216812, 0.3468436896800995, 0.0142394183203578, 0.034110911190509796, 0.0166803989559412, 0.0005183254834264517, 0.014372344128787518, 0.013749183155596256, 0.07609989494085312, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0153636634349823, 0.002009550342336297, 0.5970484614372253, 0.5668097734451294, 0.03708057850599289, 0.030387206003069878, 0.003990367520600557, 0.00021067907800897956, 0.0006718098884448409, 0.004241611808538437, 0.01157804112881422, 0.0002699779870454222, 0.0015558624872937799, 0.0029094237834215164, 0.04601351544260979, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.03574535250663757, 0.009626551531255245, 0.4402237832546234, 0.2294078767299652, 0.26443710923194885, 0.01504121907055378, 0.016090886667370796, 0.007329131942242384, 0.002309221774339676, 0.0030864060390740633, 0.0026519321836531162, 0.0004272839578334242, 0.0011082548880949616, 0.01614256016910076, 0.03275791555643082, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [6.553631828865036e-05, 0.000357702374458313, 0.08750326931476593, 0.01436514500528574, 0.006815748754888773, 0.6623476147651672, 0.0034670215100049973, 0.0015547194052487612, 0.00029766204534098506, 1.8653441657079384e-05, 0.0003687080170493573, 0.00015007570618763566, 0.0009929342195391655, 0.00030579339363612235, 0.0016504023224115372, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0004548979632090777, 7.145033305278048e-05, 0.025678247213363647, 0.00989772193133831, 0.007979623042047024, 0.6904858946800232, 0.04177143797278404, 0.0005172804230824113, 0.00045151059748604894, 9.678980859462172e-05, 0.0003766386944334954, 0.00020437331113498658, 0.0009936039568856359, 0.0004823105991818011, 0.001104293274693191, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.02770741656422615, 0.15481999516487122, 0.0164713803678751, 0.029219333082437515, 0.01727348566055298, 0.0033895254600793123, 0.08395758271217346, 0.08886045962572098, 0.06561290472745895, 0.23454923927783966, 0.01131775975227356, 0.00014876923523843288, 0.021633606404066086, 0.032435301691293716, 0.2441566288471222, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0002423129917588085, 0.0011915951035916805, 0.0022339578717947006, 0.006169029977172613, 0.0026169228367507458, 0.006970150861889124, 0.0023872333113104105, 0.020186979323625565, 0.5034035444259644, 0.061859097331762314, 0.01802009530365467, 0.08541904389858246, 0.11395227909088135, 0.12879255414009094, 0.06123032420873642, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0016445622313767672, 0.0006882954621687531, 0.0003155411686748266, 0.0014561355346813798, 0.0007120753289200366, 0.00010650769399944693, 0.0005508221802301705, 0.004306118004024029, 0.4519909620285034, 0.2298276424407959, 0.04858560487627983, 0.008956322446465492, 0.005770590156316757, 0.011063157580792904, 0.0306133683770895, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0032223593443632126, 0.0006265831179916859, 0.002176017500460148, 0.010606854222714901, 0.0010762742022052407, 6.259929068619385e-05, 0.0013370343949645758, 0.0014808439882472157, 0.030783534049987793, 0.7491747736930847, 0.34058046340942383, 0.00350938574410975, 0.02303031086921692, 0.0742756798863411, 0.006112673785537481, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.010601752437651157, 0.009935700334608555, 0.0694134384393692, 0.14514312148094177, 0.01701076701283455, 0.0001025431411108002, 0.003628269536420703, 0.007610301487147808, 0.1447119563817978, 0.2691461443901062, 0.7685887217521667, 0.06739932298660278, 0.05600086599588394, 0.567065417766571, 0.01997430995106697, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0020818221382796764, 0.006225256249308586, 0.007747206371277571, 0.02054281160235405, 0.00644321832805872, 0.00019787036580964923, 0.0007576930802315474, 0.0013290452770888805, 0.1748982071876526, 0.20870953798294067, 0.6057864427566528, 0.2165842056274414, 0.10265108197927475, 0.12960675358772278, 0.026959752663969994, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0929064005613327, 0.3412420153617859, 0.13197122514247894, 0.20421825349330902, 0.6308890581130981, 0.08085004985332489, 0.35388287901878357, 0.3416491150856018, 0.024628864601254463, 0.013967287726700306, 0.0762757882475853, 0.26007020473480225, 0.3328040838241577, 0.09019435197114944, 0.014360385946929455, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.1659475415945053, 0.1821746528148651, 0.2680368423461914, 0.3257308900356293, 0.2135642170906067, 0.10952500998973846, 0.23729652166366577, 0.15246635675430298, 0.09328519552946091, 0.22413431107997894, 0.22322525084018707, 0.11237151175737381, 0.18681256473064423, 0.1572018712759018, 0.06837792694568634, 0.125, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.14290380477905273, 0.026570750400424004, 0.14845344424247742, 0.26635152101516724, 0.12476544827222824, 0.1522083431482315, 0.287058562040329, 0.16522644460201263, 0.21008911728858948, 0.3761942982673645, 0.12840349972248077, 0.0757022351026535, 0.39944273233413696, 0.379029244184494, 0.1911974847316742, 0.0702696219086647, 0.2507307231426239, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.00885845348238945, 0.005625984165817499, 0.0020030708983540535, 0.005766861606389284, 0.001782223698683083, 0.004346099682152271, 0.014438317157328129, 0.010037342086434364, 0.0175970196723938, 0.0067982920445501804, 0.003056151093915105, 0.005088370759040117, 0.0035549686290323734, 0.002117584692314267, 0.17935973405838013, 0.028418319299817085, 0.003963488154113293, 0.4144974946975708, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.04871530085802078, 0.2322341799736023, 0.043161727488040924, 0.046935759484767914, 0.04166096821427345, 0.048159919679164886, 0.2838554382324219, 0.5679410696029663, 0.17445935308933258, 0.05776107683777809, 0.14550535380840302, 0.04300517588853836, 0.2332015484571457, 0.28196635842323303, 0.4675023853778839, 0.13786309957504272, 0.03506092354655266, 0.02415982447564602, 0.10726116597652435, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.03277377411723137, 0.28776609897613525, 0.0018310850718989968, 0.006392122711986303, 0.0034063432831317186, 0.0006021481240168214, 0.02006486989557743, 0.09552518278360367, 0.02804744802415371, 0.060428690165281296, 0.004742977675050497, 0.018782831728458405, 0.016696294769644737, 0.023774143308401108, 0.16262513399124146, 0.011229841969907284, 0.008138949982821941, 0.04613415151834488, 0.2518063187599182, 0.013397655449807644, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.006045958958566189, 0.0958699956536293, 0.007954242639243603, 0.011606856249272823, 0.004544504452496767, 0.010406642220914364, 0.011899203062057495, 0.07300186902284622, 0.002370428293943405, 0.012239865958690643, 0.020374998450279236, 0.012496876530349255, 0.024265890941023827, 0.0274967048317194, 0.1423870474100113, 0.0016812672838568687, 0.012760624289512634, 0.002261990448459983, 0.2769384980201721, 0.03090759925544262, 0.0014064738061279058, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.008809137158095837, 0.13565093278884888, 0.03191651031374931, 0.0483417883515358, 0.028707973659038544, 0.039296794682741165, 0.018359076231718063, 0.07145766168832779, 0.13921810686588287, 0.01646633818745613, 0.06145479157567024, 0.028490308672189713, 0.056069642305374146, 0.13838331401348114, 0.19134177267551422, 0.11822758615016937, 0.07095540314912796, 0.030966516584157944, 0.03516996279358864, 0.2070395052433014, 0.02684318646788597, 0.2317354679107666, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.39272594451904297, 0.39728477597236633, 0.32111606001853943, 0.41796234250068665, 0.15293559432029724, 0.04586965963244438, 0.16940170526504517, 0.022719532251358032, 0.14239482581615448, 0.5121501088142395, 0.19016578793525696, 0.06530822068452835, 0.29211705923080444, 0.14742477238178253, 0.11553633958101273, 0.23311708867549896, 0.026411496102809906, 0.011159970425069332, 0.03808103874325752, 0.017219573259353638, 0.006694006733596325, 0.001702688867226243, 0.009211051277816296, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.009060109965503216, 0.08736205101013184, 0.03623565658926964, 0.046393588185310364, 0.04293924570083618, 0.049119193106889725, 0.018734706565737724, 0.10957584530115128, 0.04821338504552841, 0.02008068934082985, 0.029284991323947906, 0.015971768647432327, 0.05779576674103737, 0.21830672025680542, 0.21264111995697021, 0.1427604705095291, 0.06787170469760895, 0.04101337492465973, 0.04024908319115639, 0.2669386863708496, 0.04579312726855278, 0.07587221264839172, 0.10059545934200287, 0.18715938925743103, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.02833615615963936, 0.24966742098331451, 0.06237170845270157, 0.03993965685367584, 0.10454770177602768, 0.019859671592712402, 0.03772445023059845, 0.19178973138332367, 0.012827831320464611, 0.03533304110169411, 0.024230163544416428, 0.054630037397146225, 0.032379381358623505, 0.08906079828739166, 0.17152637243270874, 0.059837497770786285, 0.10673120617866516, 0.06554628908634186, 0.047321293503046036, 0.26084935665130615, 0.05379262939095497, 0.09055614471435547, 0.09319713711738586, 0.334230899810791, 0.23545128107070923, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.015255320817232132, 0.21888743340969086, 0.1253896951675415, 0.08362822234630585, 0.12500159442424774, 0.02890017069876194, 0.03405824303627014, 0.07477163523435593, 0.0229325033724308, 0.01863025315105915, 0.044950928539037704, 0.0560457706451416, 0.04699615016579628, 0.08650227636098862, 0.1548503190279007, 0.06699422001838684, 0.48348554968833923, 0.10470042377710342, 0.2643885016441345, 0.49639153480529785, 0.11732041090726852, 0.061902400106191635, 0.1530170738697052, 0.11711295694112778, 0.23237623274326324, 0.09402092546224594, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.011826024390757084, 0.10608652234077454, 0.04723645746707916, 0.057715099304914474, 0.03395959734916687, 0.028910892084240913, 0.011586843058466911, 0.050380002707242966, 0.030421555042266846, 0.00583301018923521, 0.015118762850761414, 0.014350258745253086, 0.01606619358062744, 0.025515934452414513, 0.18496018648147583, 0.050390250980854034, 0.2627623975276947, 0.057036180049180984, 0.10587681084871292, 0.22481703758239746, 0.07078704982995987, 0.028480585664510727, 0.47086307406425476, 0.03990349546074867, 0.16108965873718262, 0.02393723465502262, 0.06960758566856384, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.015032858587801456, 0.5077551603317261, 0.07541441917419434, 0.08020945638418198, 0.10545077919960022, 0.2137133628129959, 0.01040775515139103, 0.09528981149196625, 0.09038985520601273, 0.012094871141016483, 0.025733938440680504, 0.06706724315881729, 0.03145073354244232, 0.09538157284259796, 0.34148263931274414, 0.29633763432502747, 0.1570599228143692, 0.07358378916978836, 0.08321648091077805, 0.01657349243760109, 0.02100137248635292, 0.019902318716049194, 0.5162196755409241, 0.03987365961074829, 0.018146652728319168, 0.026169516146183014, 0.00614600395783782, 0.07103840261697769, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.32250380516052246, 0.7984310388565063, 0.3962976634502411, 0.40014326572418213, 0.3554738759994507, 0.47898975014686584, 0.10853014886379242, 0.20243746042251587, 0.127571240067482, 0.2699570655822754, 0.16473528742790222, 0.08001074939966202, 0.03713205084204674, 0.14643853902816772, 0.4229389429092407, 0.1833065152168274, 0.0826280415058136, 0.06509751826524734, 0.017351830378174782, 0.08598462492227554, 0.028223805129528046, 0.03195580840110779, 0.045467328280210495, 0.041934747248888016, 0.016390223056077957, 0.05298775061964989, 0.05077003315091133, 0.2718433141708374, 0.04039132222533226, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.023898553103208542, 0.03448064997792244, 0.007101188413798809, 0.020377272740006447, 0.09085186570882797, 0.008504875935614109, 0.01689869724214077, 0.021393392235040665, 0.03013733960688114, 0.004040753003209829, 0.000672544410917908, 0.0007860396872274578, 0.0003324192948639393, 0.0003073772240895778, 0.13160185515880585, 0.09722712635993958, 0.09857381135225296, 0.2290657013654709, 0.162257120013237, 0.3208743929862976, 0.7083525657653809, 0.08285251259803772, 0.05820265784859657, 0.14296579360961914, 0.06442547589540482, 0.3963678479194641, 0.1963234394788742, 0.13509824872016907, 0.0551372766494751, 0.1773844212293625, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.025859396904706955, 0.29733914136886597, 0.09033425897359848, 0.06196272000670433, 0.10889838635921478, 0.14661002159118652, 0.034964289516210556, 0.07059973478317261, 0.007527152542024851, 0.007617437280714512, 0.006072000600397587, 0.0492180734872818, 0.0069811418652534485, 0.011496509425342083, 0.22706106305122375, 0.1786596029996872, 0.03035295568406582, 0.011360704898834229, 0.0041356864385306835, 0.02253635786473751, 0.032254207879304886, 0.05765725299715996, 0.06512543559074402, 0.26075252890586853, 0.14487245678901672, 0.06064848601818085, 0.02561355009675026, 0.06785233318805695, 0.08367668837308884, 0.11658230423927307, 0.21664968132972717, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.014849718660116196, 0.1462036818265915, 0.11065799742937088, 0.06219353526830673, 0.08005399256944656, 0.016894571483135223, 0.010269397869706154, 0.02562439627945423, 0.009192260913550854, 0.009821194224059582, 0.015785057097673416, 0.019254932180047035, 0.01222837995737791, 0.011684795841574669, 0.16154925525188446, 0.02336198277771473, 0.027563903480768204, 0.02503703534603119, 0.002219978952780366, 0.024155667051672935, 0.005802824627608061, 0.011775066144764423, 0.03527237847447395, 0.0438326895236969, 0.16127318143844604, 0.07829897105693817, 0.04636809974908829, 0.16168944537639618, 0.17395752668380737, 0.5116502642631531, 0.11367138475179672, 0.24585914611816406, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.01973692700266838, 0.11480830609798431, 0.07148479670286179, 0.05237298831343651, 0.0777522474527359, 0.019268590956926346, 0.01592963933944702, 0.01235677395015955, 0.06519288569688797, 0.019938096404075623, 0.03185376524925232, 0.0271891038864851, 0.01742159202694893, 0.040164995938539505, 0.1837940812110901, 0.14312313497066498, 0.6151867508888245, 0.2511911392211914, 0.34089455008506775, 0.21357816457748413, 0.06974375993013382, 0.04017443582415581, 0.4436698257923126, 0.0627409890294075, 0.029346130788326263, 0.06214871257543564, 0.07426106929779053, 0.37162381410598755, 0.1908751130104065, 0.2730017304420471, 0.09601876139640808, 0.07787502557039261, 0.1985486000776291, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.006014276295900345, 0.07228019088506699, 0.029915854334831238, 0.031709808856248856, 0.01963544264435768, 0.01660715602338314, 0.00532315531745553, 0.03606380149722099, 0.029185649007558823, 0.0046777487732470036, 0.01710142381489277, 0.013257446698844433, 0.01389795821160078, 0.02201540581882, 0.16183340549468994, 0.05929486081004143, 0.1356429159641266, 0.08288607001304626, 0.1716676652431488, 0.17707081139087677, 0.11502664536237717, 0.023076828569173813, 0.41179341077804565, 0.03153251111507416, 0.08080360293388367, 0.03793509677052498, 0.0956316813826561, 0.40457794070243835, 0.3355584144592285, 0.2116643786430359, 0.2117510586977005, 0.0911363810300827, 0.13469243049621582, 0.08244834095239639, NaN, NaN, NaN, NaN, NaN, NaN], [0.008549164049327374, 0.34144893288612366, 0.03957316279411316, 0.03764811158180237, 0.04039980471134186, 0.07271253317594528, 0.00613941578194499, 0.04612124711275101, 0.0911136344075203, 0.008750539273023605, 0.01715807057917118, 0.03749352693557739, 0.024577608332037926, 0.06848984956741333, 0.2503378689289093, 0.34530380368232727, 0.14280815422534943, 0.08469259738922119, 0.20386184751987457, 0.018106382340192795, 0.025206930935382843, 0.03376462310552597, 0.665645956993103, 0.06945709139108658, 0.030968131497502327, 0.031062953174114227, 0.015101979486644268, 0.10170532017946243, 0.03453005850315094, 0.05652596056461334, 0.028510402888059616, 0.036133769899606705, 0.04489430412650108, 0.010548176243901253, 0.07425779104232788, NaN, NaN, NaN, NaN, NaN], [0.1472499966621399, 0.4703251123428345, 0.2558133602142334, 0.283985435962677, 0.21470209956169128, 0.17662864923477173, 0.07007063925266266, 0.06038873642683029, 0.20766907930374146, 0.26984694600105286, 0.16889145970344543, 0.27114859223365784, 0.03473396599292755, 0.13903996348381042, 0.2962591350078583, 0.21361097693443298, 0.09641434252262115, 0.0472431480884552, 0.030436551198363304, 0.12823571264743805, 0.024378983303904533, 0.03781319037079811, 0.04478050768375397, 0.04302188381552696, 0.031242409721016884, 0.06916327774524689, 0.08240062743425369, 0.2609483301639557, 0.04106062278151512, 0.01303931511938572, 0.014160559512674809, 0.011109860613942146, 0.034855347126722336, 0.10407929867506027, 0.21024775505065918, 0.08525354415178299, NaN, NaN, NaN, NaN], [0.020655758678913116, 0.020222418010234833, 0.006879583932459354, 0.019070995971560478, 0.07609020173549652, 0.006032301113009453, 0.015974652022123337, 0.01717195473611355, 0.05267442390322685, 0.004277344327419996, 0.0005684247589670122, 0.0007490122807212174, 0.0002994663082063198, 0.0002370573638472706, 0.12958088517189026, 0.056013792753219604, 0.04104574769735336, 0.13420559465885162, 0.14404895901679993, 0.30753612518310547, 0.5552563667297363, 0.06356479972600937, 0.02527950517833233, 0.09324341267347336, 0.03306487947702408, 0.2522013187408447, 0.14255186915397644, 0.09901494532823563, 0.06439376622438431, 0.10042564570903778, 0.43083739280700684, 0.20968028903007507, 0.35324180126190186, 0.2700602114200592, 0.23262809216976166, 0.11776822060346603, 0.14138048887252808, NaN, NaN, NaN], [0.009374987334012985, 0.23445867002010345, 0.05258592590689659, 0.020285839214920998, 0.024131227284669876, 0.0535256564617157, 0.01552440132945776, 0.032435644418001175, 0.006646827794611454, 0.005740212742239237, 0.005195626523345709, 0.07125341892242432, 0.0043562185019254684, 0.01014760322868824, 0.17807012796401978, 0.1699744164943695, 0.02438814751803875, 0.00377153092995286, 0.0020952692721039057, 0.017941365018486977, 0.009907160885632038, 0.04197421669960022, 0.08005423098802567, 0.16825814545154572, 0.08759146183729172, 0.037892259657382965, 0.02378804422914982, 0.12696562707424164, 0.21072204411029816, 0.039158232510089874, 0.12900760769844055, 0.018357207998633385, 0.09957201033830643, 0.024237502366304398, 0.12091250717639923, 0.2524404227733612, 0.044468626379966736, 0.19958341121673584, NaN, NaN], [0.018758203834295273, 0.11843696236610413, 0.09101122617721558, 0.0610043928027153, 0.06165887042880058, 0.012400476261973381, 0.011786350980401039, 0.021215293556451797, 0.014211799949407578, 0.011016220785677433, 0.02130991406738758, 0.02418670989573002, 0.015627985820174217, 0.013993974775075912, 0.14536960422992706, 0.016944430768489838, 0.011726072989404202, 0.017351148650050163, 0.0028529188130050898, 0.013441222719848156, 0.005811003036797047, 0.010734970681369305, 0.020825698971748352, 0.04144507274031639, 0.0777476355433464, 0.07330787181854248, 0.0589311420917511, 0.1305314600467682, 0.09686601907014847, 0.49986732006073, 0.09861493855714798, 0.24486178159713745, 0.2709232568740845, 0.08328418433666229, 0.1665872186422348, 0.2741791903972626, 0.5570544600486755, 0.09308093041181564, 0.18428745865821838, NaN], [0.03985379636287689, 0.12957410514354706, 0.13386031985282898, 0.10592924803495407, 0.09455320239067078, 0.03913174197077751, 0.052976641803979874, 0.03812992200255394, 0.11070051789283752, 0.042073190212249756, 0.05433963984251022, 0.058929286897182465, 0.03380222246050835, 0.05054538697004318, 0.1317562311887741, 0.043635401874780655, 0.027883753180503845, 0.11735352873802185, 0.09225393831729889, 0.11462916433811188, 0.1478782296180725, 0.04645288363099098, 0.049018505960702896, 0.08540874719619751, 0.16189652681350708, 0.081883005797863, 0.13365384936332703, 0.17616337537765503, 0.16547891497612, 0.3400772511959076, 0.14388780295848846, 0.2768324613571167, 0.1609276533126831, 0.18515954911708832, 0.2950800061225891, 0.32982173562049866, 0.4366631507873535, 0.3681013882160187, 0.34051525592803955, 0.05319627374410629]], [[0.014275058172643185, 0.006687531713396311, 0.3026585280895233, 0.06917963922023773, 0.2396276444196701, 0.6229325532913208, 0.15904799103736877, 0.13992713391780853, 0.10272591561079025, 0.6685669422149658, 0.22624024748802185, 0.09492585808038712, 0.40837499499320984, 0.2735627591609955, 0.011893448419868946, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.021194536238908768, 0.020265106111764908, 0.1736137419939041, 0.08712188154459, 0.3174395263195038, 0.3545694649219513, 0.3640749752521515, 0.11553992331027985, 0.3069344758987427, 0.7487083673477173, 0.45964598655700684, 0.41950592398643494, 0.6157799363136292, 0.47228363156318665, 0.04039919748902321, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.008898869156837463, 0.002019912237301469, 0.021509699523448944, 0.0182319525629282, 0.07474909722805023, 0.02385670319199562, 0.013716273009777069, 0.008799813687801361, 0.3437807857990265, 0.008914400823414326, 0.012629772536456585, 0.10342472046613693, 0.0370708666741848, 0.023541903123259544, 0.18654775619506836, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.01223641075193882, 0.003142833709716797, 0.006001354195177555, 0.003996475599706173, 0.0579916350543499, 0.01896491087973118, 0.01948327198624611, 0.013184066861867905, 0.30560916662216187, 0.015957718715071678, 0.016950437799096107, 0.06207568570971489, 0.044481322169303894, 0.01894378289580345, 0.19150091707706451, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.003971019294112921, 0.0012432326329872012, 0.005908531602472067, 0.0021760377567261457, 0.002044213702902198, 0.01004379615187645, 0.01574278064072132, 0.026324355974793434, 0.4105670154094696, 0.05117517337203026, 0.02775881439447403, 0.023424910381436348, 0.009920927695930004, 0.011210974305868149, 0.16597995162010193, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.007421860471367836, 0.006305157672613859, 0.011464249342679977, 0.020268600434064865, 0.025753991678357124, 0.031131377443671227, 0.03418951481580734, 0.0052986773662269115, 0.5788748264312744, 0.46168622374534607, 0.07252157479524612, 0.06022901460528374, 0.017210712656378746, 0.04054110497236252, 0.15131165087223053, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.001541785546578467, 0.0008907613810151815, 0.004846525378525257, 0.001811343478038907, 0.0069520194083452225, 0.008084121160209179, 0.021458715200424194, 0.02802192233502865, 0.3832707405090332, 0.25552085041999817, 0.014592574909329414, 0.01065820176154375, 0.012523604556918144, 0.010731800459325314, 0.22416816651821136, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.004116748925298452, 0.0016883857315406203, 0.014749680645763874, 0.00869818776845932, 0.01003838051110506, 0.007631313521414995, 0.02068890631198883, 0.027104953303933144, 0.13497500121593475, 0.6378710865974426, 0.10288828611373901, 0.0942029282450676, 0.028772620484232903, 0.05935161933302879, 0.21764545142650604, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.06222981959581375, 0.01881357654929161, 0.00486758491024375, 0.015509632416069508, 0.0009378677350468934, 0.004574655555188656, 0.005093523766845465, 0.0076056248508393764, 0.02507362887263298, 0.02107030339539051, 0.007815904915332794, 0.010442771948873997, 0.011698074638843536, 0.006942160427570343, 0.31572407484054565, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.01727244071662426, 0.009210732765495777, 0.005953751504421234, 0.0013454181607812643, 0.005081892944872379, 0.04435739293694496, 0.006434922106564045, 0.0007962443050928414, 0.0007702711154706776, 0.16453301906585693, 0.5625144839286804, 0.34227296710014343, 0.6355522871017456, 0.6161591410636902, 0.02771596610546112, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.12786830961704254, 0.008172453381121159, 0.0017843057867139578, 0.004017683211714029, 0.007877650670707226, 0.0018398476531729102, 0.01566770300269127, 0.0026914728805422783, 0.0035052604507654905, 0.0037441153544932604, 0.011492998339235783, 0.10472051054239273, 0.01954079605638981, 0.025050928816199303, 0.24727097153663635, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.1465907245874405, 0.037033673375844955, 0.013877127319574356, 0.00413108617067337, 0.00966043584048748, 0.02326187677681446, 0.04576379433274269, 0.010370912030339241, 0.05009477958083153, 0.002161832293495536, 0.012562266550958157, 0.08835282921791077, 0.018735390156507492, 0.07781965285539627, 0.21298982203006744, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.018177246674895287, 0.009594686329364777, 0.010616189800202847, 0.003939185757189989, 0.020018288865685463, 0.006944165099412203, 0.014553648419678211, 0.014575640670955181, 0.031773608177900314, 0.0201406329870224, 0.008282337337732315, 0.02822018228471279, 0.008926213718950748, 0.030271533876657486, 0.18345791101455688, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.029857823625206947, 0.018949948251247406, 0.0061294399201869965, 0.002908851485699415, 0.00919707678258419, 0.00952958408743143, 0.01205661240965128, 0.00758303003385663, 0.05086279660463333, 0.007759919855743647, 0.006360263098031282, 0.02717713639140129, 0.006157578434795141, 0.027468249201774597, 0.21562480926513672, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.035946138203144073, 0.021175134927034378, 0.025809520855545998, 0.0228139478713274, 0.02454732172191143, 0.008901212364435196, 0.01817207969725132, 0.024075007066130638, 0.042662542313337326, 0.10151555389165878, 0.03429628908634186, 0.025050567463040352, 0.015684176236391068, 0.028640326112508774, 0.23519039154052734, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.038382355123758316, 0.16509199142456055, 0.03795319423079491, 0.018471574410796165, 0.017937200143933296, 0.20822547376155853, 0.036850690841674805, 0.07025959342718124, 0.026183662936091423, 0.008891633711755276, 0.011525453999638557, 0.06559614092111588, 0.10240377485752106, 0.05705304443836212, 0.19186913967132568, 0.125, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.18736660480499268, 0.12802250683307648, 0.06000450998544693, 0.07085607945919037, 0.02492770366370678, 0.13308653235435486, 0.01379183866083622, 0.01460492704063654, 0.018005041405558586, 0.18972568213939667, 0.18918126821517944, 0.05261359363794327, 0.08419474214315414, 0.039842329919338226, 0.12843605875968933, 0.1755252629518509, 0.00892956368625164, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.003212069161236286, 0.04924406483769417, 0.010131219401955605, 0.0015629208646714687, 0.009065762162208557, 0.04507109895348549, 0.003221129300072789, 0.07382506877183914, 0.0011923180427402258, 0.004047631751745939, 0.006328214425593615, 0.012952281162142754, 0.0641837865114212, 0.02541324496269226, 0.1715373396873474, 0.18403629958629608, 0.12486936897039413, 0.01289399154484272, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.002438034862279892, 0.0007996301865205169, 0.10929557681083679, 0.030698396265506744, 0.007961505092680454, 0.21520712971687317, 0.0018748894799500704, 0.0015670642023906112, 0.00039643081254325807, 0.0017966092564165592, 0.010619523003697395, 0.0026792865246534348, 0.0035868084523826838, 0.001077426946721971, 0.003137440187856555, 0.07995349168777466, 0.1140136644244194, 0.16089488565921783, 0.271826833486557, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.04913554713129997, 0.023452362045645714, 0.16805477440357208, 0.2746557891368866, 0.369334876537323, 0.025402046740055084, 0.03595297038555145, 0.27975642681121826, 0.005478397477418184, 0.044800374656915665, 0.028408128768205643, 0.025396348908543587, 0.1202942430973053, 0.22760754823684692, 0.12602998316287994, 0.19368642568588257, 0.20833823084831238, 0.38513559103012085, 0.0724099725484848, 0.026710418984293938, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.0008230121457017958, 0.006709535606205463, 0.005090394522994757, 0.005009432788938284, 0.0009200142812915146, 0.002589132636785507, 0.003276216797530651, 0.011904137209057808, 0.0009605096420273185, 0.0016532291192561388, 0.001647727913223207, 0.0010296034161001444, 0.00474548852071166, 0.004530362784862518, 0.14385877549648285, 0.2920932173728943, 0.20408804714679718, 0.47836723923683167, 0.009784400463104248, 0.41401228308677673, 0.0022880665492266417, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.011407818645238876, 0.11073090881109238, 0.11066732555627823, 0.07063236832618713, 0.2326628416776657, 0.057718440890312195, 0.005228970665484667, 0.12933272123336792, 0.010014788247644901, 0.0034599530044943094, 0.015450170263648033, 0.004393222741782665, 0.010258005000650883, 0.00790967233479023, 0.16524673998355865, 0.2459677904844284, 0.013399376533925533, 0.165635347366333, 0.0016970435390248895, 0.00861914549022913, 0.0019094902090728283, 0.006659353617578745, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.024886149913072586, 0.019822845235466957, 0.050577834248542786, 0.042761147022247314, 0.013624369166791439, 0.03171548992395401, 0.03447520360350609, 0.057101696729660034, 0.018126925453543663, 0.012612801045179367, 0.056599393486976624, 0.005686976481229067, 0.022324958816170692, 0.021004129201173782, 0.18438492715358734, 0.1659669429063797, 0.3024148941040039, 0.4638516902923584, 0.19814886152744293, 0.06386706978082657, 0.37022748589515686, 0.096834197640419, 0.004976118449121714, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.012148641981184483, 0.047028496861457825, 0.07792042940855026, 0.1455426812171936, 0.3985011875629425, 0.08270914107561111, 0.0031603944953531027, 0.07123681157827377, 0.020226983353495598, 0.005742877256125212, 0.009367674589157104, 0.007002389058470726, 0.013849785551428795, 0.006732230074703693, 0.14449873566627502, 0.23605915904045105, 0.015010624192655087, 0.29689958691596985, 0.002272083656862378, 0.02557971514761448, 0.04829570651054382, 0.03933914750814438, 0.012097989208996296, 0.005491157062351704, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.029934342950582504, 0.04287242144346237, 0.10493571311235428, 0.10647397488355637, 0.01039193756878376, 0.1410648375749588, 0.06155749782919884, 0.08983614295721054, 0.05490254610776901, 0.038721270859241486, 0.021267540752887726, 0.05536682903766632, 0.019229264929890633, 0.008436290547251701, 0.15105655789375305, 0.2229652851819992, 0.011020033620297909, 0.07613904774188995, 0.00492003234103322, 0.11613531410694122, 0.12462546676397324, 0.03799906745553017, 0.029671484604477882, 0.022334527224302292, 0.003809461137279868, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.009979508817195892, 0.08308109641075134, 0.026161497458815575, 0.023276647552847862, 0.0017319537000730634, 0.056630972772836685, 0.012614267878234386, 0.041058339178562164, 0.026752248406410217, 0.01169703807681799, 0.011314285919070244, 0.007283498533070087, 0.05053415521979332, 0.019243547692894936, 0.16277745366096497, 0.30055463314056396, 0.03860635682940483, 0.08235271275043488, 0.12519411742687225, 0.07496307790279388, 0.24307869374752045, 0.02970520593225956, 0.043270040303468704, 0.01804984174668789, 0.008444367907941341, 0.04573319852352142, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.04712976887822151, 0.24274323880672455, 0.053717970848083496, 0.06948067992925644, 0.009206406772136688, 0.0471884086728096, 0.010105792433023453, 0.05801715701818466, 0.01891178824007511, 0.07684698700904846, 0.07729421555995941, 0.042662668973207474, 0.10241091996431351, 0.038032110780477524, 0.15563422441482544, 0.361846923828125, 0.0072926427237689495, 0.07028269022703171, 0.038334887474775314, 0.02117738127708435, 0.035939738154411316, 0.03011121228337288, 0.01985063962638378, 0.03699057549238205, 0.0448327511548996, 0.07655268162488937, 0.03217002749443054, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.009955390356481075, 0.06358544528484344, 0.028598172590136528, 0.04170457646250725, 0.01363537646830082, 0.011423949152231216, 0.003101062262430787, 0.04170127958059311, 0.01145926769822836, 0.01274544931948185, 0.020664334297180176, 0.15329574048519135, 0.20515742897987366, 0.07666952162981033, 0.13521607220172882, 0.18510019779205322, 0.0857149139046669, 0.2959531545639038, 0.10870446264743805, 0.034602705389261246, 0.04019882157444954, 0.02403290942311287, 0.05409723520278931, 0.04566982761025429, 0.19149497151374817, 0.23549742996692657, 0.074503093957901, 0.01255789864808321, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.006747167091816664, 0.006801524665206671, 0.007903891615569592, 0.00237295706756413, 0.0009535709978081286, 0.0006887177005410194, 0.0011137888068333268, 0.0005580680444836617, 0.004365934059023857, 0.0043631866574287415, 0.004836279433220625, 0.0014166004257276654, 0.1882382482290268, 0.04424351081252098, 0.006875277496874332, 0.03710656613111496, 0.054964251816272736, 0.037898506969213486, 0.3724515438079834, 0.058691613376140594, 0.03363177552819252, 0.06933214515447617, 0.05247700959444046, 0.15643684566020966, 0.589249849319458, 0.349843829870224, 0.29659491777420044, 0.2287619560956955, 0.05358140170574188, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.0040101236663758755, 0.00047035442548803985, 0.0008357138140127063, 0.009736553765833378, 0.00025759977870620787, 2.9679033104912378e-05, 0.008525178767740726, 0.0036214631982147694, 0.0009930779924616218, 0.0008531230851076543, 0.0029921825043857098, 7.93160234024981e-06, 6.746472354279831e-05, 0.0017078705132007599, 0.13162609934806824, 0.2688547670841217, 0.1434442549943924, 0.18350595235824585, 0.07485228031873703, 0.0647219642996788, 0.04773847386240959, 0.14254990220069885, 0.03905782103538513, 0.2126167118549347, 0.24802155792713165, 0.30339401960372925, 0.17472584545612335, 0.03891041502356529, 0.02338952198624611, 0.026767900213599205, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.021027032285928726, 0.04388788715004921, 0.07337366044521332, 0.13240061700344086, 0.005691900383681059, 0.08179081231355667, 0.010154702700674534, 0.019539857283234596, 0.013572044670581818, 0.03972425311803818, 0.14196330308914185, 0.0491810142993927, 0.029326222836971283, 0.024830663576722145, 0.1775946319103241, 0.1340402513742447, 0.12347351759672165, 0.42842522263526917, 0.0631304681301117, 0.06392616778612137, 0.1770109236240387, 0.11116458475589752, 0.04706185683608055, 0.09571156650781631, 0.3872493505477905, 0.5415271520614624, 0.14801958203315735, 0.013348261825740337, 0.016769861802458763, 0.019784821197390556, 0.012107723392546177, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.020570920780301094, 0.07008225470781326, 0.05771828070282936, 0.10093566030263901, 0.0037175160832703114, 0.10588520765304565, 0.008791210129857063, 0.07720224559307098, 0.037850137799978256, 0.016810759902000427, 0.0763774886727333, 0.06772230565547943, 0.10185997188091278, 0.02133399061858654, 0.1501101702451706, 0.3128407299518585, 0.02314484678208828, 0.20690661668777466, 0.0038596922531723976, 0.10119188576936722, 0.375572144985199, 0.077932208776474, 0.16011959314346313, 0.07805528491735458, 0.020400837063789368, 0.2237216979265213, 0.1006372720003128, 0.022764090448617935, 0.005061473231762648, 0.0205483790487051, 0.0018506759079173207, 0.001139476546086371, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.027059482410550117, 0.22707954049110413, 0.13379518687725067, 0.08346803486347198, 0.011664706282317638, 0.1994924694299698, 0.013729198835790157, 0.07924864441156387, 0.10303384810686111, 0.02253318764269352, 0.06352351605892181, 0.13561668992042542, 0.3492315113544464, 0.13069112598896027, 0.12187084555625916, 0.5802629590034485, 0.17577120661735535, 0.22907592356204987, 0.3224048614501953, 0.21584153175354004, 0.3719359040260315, 0.08852899819612503, 0.18978306651115417, 0.06894023716449738, 0.008546161465346813, 0.34136468172073364, 0.44251179695129395, 0.07915834337472916, 0.27557075023651123, 0.0915302038192749, 0.0036887326277792454, 0.0038842300418764353, 0.015524323098361492, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.038929592818021774, 0.2334582358598709, 0.12089657783508301, 0.17347271740436554, 0.023068996146321297, 0.04853734001517296, 0.008499456569552422, 0.0867975577712059, 0.02351396717131138, 0.04524386301636696, 0.12492679059505463, 0.06575564295053482, 0.10587428510189056, 0.055128976702690125, 0.1414995789527893, 0.5194967985153198, 0.010316978208720684, 0.10247951745986938, 0.03023943491280079, 0.02351299114525318, 0.05376119539141655, 0.03751303628087044, 0.02858700230717659, 0.03933052346110344, 0.026450933888554573, 0.16396890580654144, 0.08825679868459702, 0.01957540772855282, 0.02957809716463089, 0.0652899444103241, 0.003373907646164298, 0.007670924998819828, 0.004321575630456209, 0.024295708164572716, NaN, NaN, NaN, NaN, NaN, NaN], [0.011872883886098862, 0.08469298481941223, 0.054403409361839294, 0.08831894397735596, 0.02684788778424263, 0.021699469536542892, 0.0027920349966734648, 0.05190650746226311, 0.006984782870858908, 0.008844600059092045, 0.02751598134636879, 0.22613400220870972, 0.15431185066699982, 0.06476734578609467, 0.1412026435136795, 0.2508450150489807, 0.1962328553199768, 0.3596697747707367, 0.1504865288734436, 0.029224414378404617, 0.0663013905286789, 0.043777331709861755, 0.06269483268260956, 0.06556038558483124, 0.2250475436449051, 0.35171735286712646, 0.22191122174263, 0.018188640475273132, 0.026326660066843033, 0.017122289165854454, 0.0037187051493674517, 0.024730468168854713, 0.035062648355960846, 0.09351257234811783, 0.011442800983786583, NaN, NaN, NaN, NaN, NaN], [0.015115483663976192, 0.08628259599208832, 0.023322032764554024, 0.012461238540709019, 0.0028755213133990765, 0.010226217098534107, 0.0010302395094186068, 0.002081838669255376, 0.003762529231607914, 0.013111302629113197, 0.0290949996560812, 0.013309521600604057, 0.22778895497322083, 0.05992528051137924, 0.00796937569975853, 0.007168593350797892, 0.033368390053510666, 0.00873665139079094, 0.16062632203102112, 0.028196215629577637, 0.02527499757707119, 0.06866460293531418, 0.0198657363653183, 0.1544157713651657, 0.2752910256385803, 0.14698350429534912, 0.1242247000336647, 0.13061578571796417, 0.010920656844973564, 0.0055906628258526325, 0.006986986380070448, 0.030699225142598152, 0.36674854159355164, 0.2189747393131256, 0.2510429620742798, 0.04264682158827782, NaN, NaN, NaN, NaN], [0.0057023135013878345, 0.0003758604871109128, 0.0009645622340030968, 0.01432577334344387, 0.00027227052487432957, 3.7724938010796905e-05, 0.007459490094333887, 0.0037525389343500137, 0.001061747083440423, 0.0008801367366686463, 0.0023195864632725716, 8.150678695528768e-06, 4.0667833673069254e-05, 0.001007204526104033, 0.12961283326148987, 0.317547470331192, 0.16016888618469238, 0.1976199448108673, 0.10644932836294174, 0.09830258786678314, 0.07801979035139084, 0.301817923784256, 0.05034731701016426, 0.32512444257736206, 0.2241876721382141, 0.4657731354236603, 0.2891538441181183, 0.08093820512294769, 0.06031876429915428, 0.06730521470308304, 0.14267991483211517, 0.289673775434494, 0.1076083853840828, 0.2949788272380829, 0.0365237332880497, 0.015645001083612442, 0.03993191570043564, NaN, NaN, NaN], [0.017900969833135605, 0.026770949363708496, 0.15903817117214203, 0.31877970695495605, 0.014844128862023354, 0.10845804959535599, 0.00868347566574812, 0.015460771508514881, 0.008762474171817303, 0.01190071552991867, 0.07999671250581741, 0.053750935941934586, 0.013735906220972538, 0.020958656445145607, 0.15606556832790375, 0.17233391106128693, 0.22507980465888977, 0.300968736410141, 0.03457535058259964, 0.06539295613765717, 0.2556630074977875, 0.12555503845214844, 0.08745130896568298, 0.10011813044548035, 0.13041436672210693, 0.501103937625885, 0.14929187297821045, 0.03132137656211853, 0.02265048772096634, 0.03383776918053627, 0.006481703836470842, 0.011523596942424774, 0.35894638299942017, 0.1662973165512085, 0.034177642315626144, 0.02702290564775467, 0.036704160273075104, 0.014952532015740871, NaN, NaN], [0.022256335243582726, 0.07135839015245438, 0.07359576225280762, 0.12423767894506454, 0.006224590353667736, 0.13500085473060608, 0.008429165929555893, 0.08156562596559525, 0.02983916364610195, 0.013062523677945137, 0.10225346684455872, 0.04065772891044617, 0.06899033486843109, 0.012502058409154415, 0.13831046223640442, 0.4115316569805145, 0.042032964527606964, 0.21366682648658752, 0.010602481663227081, 0.11737099289894104, 0.5779745578765869, 0.13523340225219727, 0.2636784315109253, 0.170937180519104, 0.020469455048441887, 0.3112620711326599, 0.17165400087833405, 0.044973500072956085, 0.006653682328760624, 0.053596071898937225, 0.008654352277517319, 0.002382548525929451, 0.02675137296319008, 0.09427332878112793, 0.01890433207154274, 0.002222384326159954, 0.018390605226159096, 0.0013299400452524424, 0.0009657714981585741, NaN], [0.016071150079369545, 0.06728275120258331, 0.025518205016851425, 0.023689931258559227, 0.0069392030127346516, 0.04150809720158577, 0.00898416806012392, 0.016712933778762817, 0.005143268499523401, 0.020111138001084328, 0.03020956739783287, 0.01359627302736044, 0.018198341131210327, 0.01637156493961811, 0.1379418522119522, 0.38502925634384155, 0.1563987135887146, 0.13578397035598755, 0.1404726654291153, 0.14828255772590637, 0.28480827808380127, 0.15350891649723053, 0.09994281083345413, 0.06321649998426437, 0.030282480642199516, 0.13266463577747345, 0.1722954362630844, 0.07113035768270493, 0.024887708947062492, 0.016665330156683922, 0.03949398547410965, 0.020136239007115364, 0.01368448045104742, 0.09379612654447556, 0.030771953985095024, 0.011002926155924797, 0.007083212956786156, 0.009242233820259571, 0.007993990555405617, 0.018528543412685394]], [[0.29903000593185425, 0.5539957880973816, 0.06723504513502121, 0.06922264397144318, 0.12363186478614807, 0.04431891441345215, 0.10694187879562378, 0.08094406872987747, 0.15170463919639587, 0.05897890776395798, 0.026665056124329567, 0.04277891665697098, 0.011532573029398918, 0.016366619616746902, 0.08233406394720078, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.030788322910666466, 0.06814564764499664, 0.1441766321659088, 0.42568475008010864, 0.23481200635433197, 0.09723259508609772, 0.20801249146461487, 0.2833361029624939, 0.12989479303359985, 0.09075285494327545, 0.02217184565961361, 0.10632100701332092, 0.07123817503452301, 0.18399499356746674, 0.11842577904462814, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.21215111017227173, 0.2570435404777527, 0.03298918902873993, 0.11753708124160767, 0.2531988024711609, 0.2834656238555908, 0.13087181746959686, 0.14389817416667938, 0.06408312171697617, 0.023736948147416115, 0.043677639216184616, 0.007582403719425201, 0.08098249137401581, 0.042930904775857925, 0.09848955273628235, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.24232596158981323, 0.4370230436325073, 0.27921250462532043, 0.32216426730155945, 0.14763100445270538, 0.1446210741996765, 0.041608523577451706, 0.05782362446188927, 0.03667302429676056, 0.015881532803177834, 0.09886573255062103, 0.0007486737449653447, 0.022804880514740944, 0.01436265092343092, 0.04328664019703865, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0417991504073143, 0.06808368116617203, 0.22980956733226776, 0.06044253334403038, 0.09120408445596695, 0.3664403557777405, 0.01738058589398861, 0.026107804849743843, 0.16878005862236023, 0.007388730999082327, 0.6907519698143005, 0.00283504044637084, 0.004864559043198824, 0.017621232196688652, 0.04920867085456848, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.07025078684091568, 0.08007846027612686, 0.18737106025218964, 0.08649075031280518, 0.14398247003555298, 0.03926409035921097, 0.10999412834644318, 0.10028164088726044, 0.2733333110809326, 0.07497494667768478, 0.6277027726173401, 0.03760387748479843, 0.07242996245622635, 0.04469411447644234, 0.0635850802063942, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.18292218446731567, 0.29889917373657227, 0.16216641664505005, 0.041324593126773834, 0.08738134056329727, 0.03374062106013298, 0.10780933499336243, 0.1685270518064499, 0.3661736249923706, 0.13795819878578186, 0.7607439160346985, 0.022037923336029053, 0.11896573007106781, 0.017960727214813232, 0.09792909026145935, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.29104405641555786, 0.7119240164756775, 0.16990531980991364, 0.02345188707113266, 0.15646961331367493, 0.008449066430330276, 0.06418811529874802, 0.018176060169935226, 0.3091927766799927, 0.08911041170358658, 0.3005200922489166, 0.04236089810729027, 0.2996547222137451, 0.08733220398426056, 0.07523740082979202, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.046947941184043884, 0.14375551044940948, 0.004344047512859106, 0.0067795743234455585, 0.02948000282049179, 0.08397668600082397, 0.06400846689939499, 0.18865461647510529, 0.023663662374019623, 0.08527978509664536, 0.02815503440797329, 0.04117048531770706, 0.5833349823951721, 0.0677085593342781, 0.23153413832187653, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.08349642902612686, 0.4532567262649536, 0.004409583285450935, 0.009004302322864532, 0.007938031107187271, 0.13749390840530396, 0.1858609914779663, 0.31525370478630066, 0.018453413620591164, 0.12712040543556213, 0.04680929332971573, 0.12408707290887833, 0.13737666606903076, 0.12311573326587677, 0.142713725566864, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.05042501911520958, 0.07026762515306473, 0.0020696106366813183, 0.010109566152095795, 0.07710029184818268, 0.05610239878296852, 0.05948542803525925, 0.19247274100780487, 0.001940111513249576, 0.05155838653445244, 0.04620450362563133, 0.20989066362380981, 0.485702246427536, 0.4166657328605652, 0.18102103471755981, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.09080760926008224, 0.09187275916337967, 0.012195594608783722, 0.021634280681610107, 0.019499676302075386, 0.09054076671600342, 0.11008334904909134, 0.23214302957057953, 0.0423310361802578, 0.034868963062763214, 0.06751228123903275, 0.049237679690122604, 0.03915484994649887, 0.08995199203491211, 0.1941523253917694, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0706457570195198, 0.10473088920116425, 0.039385173469781876, 0.02697153575718403, 0.04372800514101982, 0.06655491143465042, 0.23491710424423218, 0.19935868680477142, 0.036273516714572906, 0.06345809996128082, 0.020782677456736565, 0.12393849343061447, 0.05726756155490875, 0.041495081037282944, 0.15982753038406372, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.039186086505651474, 0.11076691001653671, 0.03891725465655327, 0.009549588896334171, 0.01825849525630474, 0.051163915544748306, 0.1146436408162117, 0.1649821698665619, 0.03586947172880173, 0.06679365783929825, 0.09092967957258224, 0.14827685058116913, 0.10948126018047333, 0.10746686905622482, 0.1515202671289444, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.14541134238243103, 0.05313154682517052, 0.01991144008934498, 0.08764121681451797, 0.014597749337553978, 0.03937898576259613, 0.04872390255331993, 0.04689335823059082, 0.04558950290083885, 0.051970891654491425, 0.02520112879574299, 0.022838978096842766, 0.00921469647437334, 0.00801294855773449, 0.21471147239208221, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.029921628534793854, 0.09876842796802521, 0.1324968934059143, 0.09236511588096619, 0.02831152267754078, 0.08077768236398697, 0.03118293546140194, 0.1750149130821228, 0.015778981149196625, 0.07032441347837448, 0.22269371151924133, 0.07579661160707474, 0.029184984043240547, 0.053061336278915405, 0.18562854826450348, 0.125, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.07805982232093811, 0.05365234240889549, 0.2842547595500946, 0.2606758773326874, 0.21293140947818756, 0.02651267871260643, 0.08033362030982971, 0.07913534343242645, 0.17101624608039856, 0.12522375583648682, 0.14315897226333618, 0.16815446317195892, 0.0695369690656662, 0.13316825032234192, 0.19111928343772888, 0.17860974371433258, 0.0018437139224261045, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.11272483319044113, 0.11636882275342941, 0.45685258507728577, 0.0910579040646553, 0.3091263473033905, 0.12632955610752106, 0.1822080761194229, 0.18498732149600983, 0.6353387832641602, 0.08394157886505127, 0.3285849094390869, 0.4818887710571289, 0.08592816442251205, 0.3495768904685974, 0.07449600845575333, 0.20284786820411682, 0.0034877806901931763, 0.08334594964981079, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.2834128737449646, 0.1102365031838417, 0.1840669959783554, 0.5708534121513367, 0.3157653212547302, 0.041008107364177704, 0.038309745490550995, 0.03211268410086632, 0.6102551817893982, 0.20786605775356293, 0.21116787195205688, 0.10018377006053925, 0.04653669148683548, 0.17929011583328247, 0.11314841359853745, 0.1494244486093521, 0.3379342555999756, 0.0649241954088211, 0.006597604602575302, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.5993789434432983, 0.0908532664179802, 0.49218761920928955, 0.41100576519966125, 0.18825526535511017, 0.4342217445373535, 0.12116678059101105, 0.10673660039901733, 0.822167158126831, 0.4385586380958557, 0.6995345950126648, 0.18085956573486328, 0.1357179582118988, 0.2864921987056732, 0.034255724400281906, 0.2969810962677002, 0.005403619725257158, 0.054099179804325104, 0.0006044544279575348, 0.009600944817066193, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.858432412147522, 0.34460219740867615, 0.7778953909873962, 0.7743141651153564, 0.4405529797077179, 0.4761039614677429, 0.6155950427055359, 0.06873662024736404, 0.7323919534683228, 0.7086790204048157, 0.6720118522644043, 0.45794978737831116, 0.1628962755203247, 0.4249861538410187, 0.040913816541433334, 0.32280662655830383, 0.01735025830566883, 0.15535852313041687, 0.00028658873634412885, 0.016427762806415558, 0.001579301548190415, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.04546767473220825, 0.0383436344563961, 0.10268200188875198, 0.20100316405296326, 0.185649111866951, 0.08432896435260773, 0.060354892164468765, 0.07717668265104294, 0.3201402723789215, 0.04503992572426796, 0.088813915848732, 0.3990366756916046, 0.1564548909664154, 0.08066049963235855, 0.11440145969390869, 0.016787199303507805, 0.10643576830625534, 0.24800433218479156, 0.4802894592285156, 0.03762362524867058, 0.06816797703504562, 0.10676699876785278, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.21178147196769714, 0.043018583208322525, 0.1065564677119255, 0.10858221352100372, 0.05675008147954941, 0.06700197607278824, 0.12675313651561737, 0.058651700615882874, 0.18508696556091309, 0.05493801832199097, 0.037313126027584076, 0.19010567665100098, 0.07823225855827332, 0.034572359174489975, 0.16783590614795685, 0.22070105373859406, 0.03063296526670456, 0.12860903143882751, 0.04803713783621788, 0.06528759002685547, 0.3172104060649872, 0.012414618395268917, 0.008628717623651028, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.053469568490982056, 0.03894811123609543, 0.06651152670383453, 0.10646583139896393, 0.08985435962677002, 0.07578439265489578, 0.03395741805434227, 0.09802807122468948, 0.190333291888237, 0.07748086005449295, 0.07400990277528763, 0.6643930077552795, 0.07830479741096497, 0.07947986572980881, 0.11464671790599823, 0.0170818492770195, 0.2921580374240875, 0.24774892628192902, 0.2979756295681, 0.16657015681266785, 0.03825104981660843, 0.39123743772506714, 0.0541624091565609, 0.01715947687625885, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.1680978536605835, 0.06724530458450317, 0.16071708500385284, 0.2987021803855896, 0.11997595429420471, 0.007637033239006996, 0.05953739956021309, 0.06456195563077927, 0.07405640929937363, 0.11493658274412155, 0.07269633561372757, 0.12183233350515366, 0.019239120185375214, 0.0931614562869072, 0.15387272834777832, 0.06952934712171555, 0.09443160146474838, 0.3155873417854309, 0.2511345446109772, 0.20146684348583221, 0.17959536612033844, 0.500001072883606, 0.3407229483127594, 0.15127938985824585, 0.026401039212942123, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.09433168172836304, 0.05311369523406029, 0.44581180810928345, 0.2857709527015686, 0.11141614615917206, 0.04973546415567398, 0.10592624545097351, 0.0732862576842308, 0.26435965299606323, 0.07302475720643997, 0.17637307941913605, 0.06760746240615845, 0.052111051976680756, 0.29667070508003235, 0.11431443691253662, 0.12491581588983536, 0.08139167726039886, 0.045777399092912674, 0.07585746794939041, 0.05243801325559616, 0.09790124744176865, 0.17415514588356018, 0.44996151328086853, 0.13761505484580994, 0.06580806523561478, 0.1016187071800232, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.07687122374773026, 0.10929025709629059, 0.4687592387199402, 0.20397132635116577, 0.26744040846824646, 0.03514130413532257, 0.033296968787908554, 0.08783485740423203, 0.22074763476848602, 0.08713625371456146, 0.12920482456684113, 0.05166565254330635, 0.07679110020399094, 0.17419996857643127, 0.1387287825345993, 0.03772348165512085, 0.0006561332265846431, 0.04040418565273285, 0.23337695002555847, 0.0037602160591632128, 0.1251135915517807, 0.07994246482849121, 0.0032252452801913023, 0.044697076082229614, 0.05314825102686882, 0.16676445305347443, 0.42838534712791443, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.061203911900520325, 0.12594261765480042, 0.353413462638855, 0.22131817042827606, 0.41015592217445374, 0.11432977020740509, 0.010031531564891338, 0.048355478793382645, 0.27572426199913025, 0.07773520797491074, 0.2322542816400528, 0.1527126431465149, 0.05797232687473297, 0.09810248017311096, 0.16366761922836304, 0.008380687795579433, 0.11938491463661194, 0.03761400282382965, 0.10612092912197113, 0.004111893475055695, 0.07536520808935165, 0.06150262430310249, 0.010061400011181831, 0.01712355576455593, 0.026476707309484482, 0.05440329760313034, 0.37643373012542725, 0.12204637378454208, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.10230414569377899, 0.03857935592532158, 0.05230129137635231, 0.14396332204341888, 0.09251677989959717, 0.03541665896773338, 0.005624003708362579, 0.014271721243858337, 0.042375415563583374, 0.13543996214866638, 0.061749108135700226, 0.00788076315075159, 0.1602918803691864, 0.07564403861761093, 0.09375559538602829, 0.0973815768957138, 0.1330094188451767, 0.2356250286102295, 0.23801013827323914, 0.16962124407291412, 0.3808935284614563, 0.19062454998493195, 0.12487400323152542, 0.4241224527359009, 0.1858355700969696, 0.1843334436416626, 0.17186462879180908, 0.1674181967973709, 0.03679514676332474, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.705120861530304, 0.026186510920524597, 0.8528315424919128, 0.8252069354057312, 0.24319231510162354, 0.07270172983407974, 0.09487330913543701, 0.07207771390676498, 0.4722364544868469, 0.7067926526069641, 0.8624283075332642, 0.07399676740169525, 0.0075901346281170845, 0.016478050500154495, 0.12560917437076569, 0.28161293268203735, 0.39586660265922546, 0.35408592224121094, 0.26687130331993103, 0.036089953035116196, 0.12106626480817795, 0.05175312981009483, 0.6374836564064026, 0.06537415832281113, 0.01867927983403206, 0.03261437267065048, 0.05161871388554573, 0.026679201051592827, 0.0063977655954658985, 0.0581950880587101, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.27840110659599304, 0.06363435834646225, 0.3689763844013214, 0.33064448833465576, 0.25749024748802185, 0.1453908383846283, 0.03645810857415199, 0.00836147554218769, 0.3977815508842468, 0.41805213689804077, 0.17756043374538422, 0.05318059027194977, 0.011340576224029064, 0.020938394591212273, 0.05934957042336464, 0.052721865475177765, 0.30848002433776855, 0.24953237175941467, 0.2790854275226593, 0.7654650807380676, 0.6871634125709534, 0.13210926949977875, 0.673875629901886, 0.04467727988958359, 0.018614191561937332, 0.08283445239067078, 0.0906965509057045, 0.06073237210512161, 0.12131030112504959, 0.06997358053922653, 0.3489122688770294, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.17816129326820374, 0.10609658807516098, 0.17893879115581512, 0.28182876110076904, 0.15060719847679138, 0.03372456133365631, 0.04276707395911217, 0.050946421921253204, 0.04137968271970749, 0.16634012758731842, 0.16395889222621918, 0.24548840522766113, 0.05229371041059494, 0.09448723495006561, 0.12793652713298798, 0.03943483531475067, 0.28613966703414917, 0.07243800908327103, 0.8744964599609375, 0.029915155842900276, 0.331167072057724, 0.4079437255859375, 0.5431530475616455, 0.3259604275226593, 0.1150238886475563, 0.3324905335903168, 0.44221389293670654, 0.2450132817029953, 0.12577538192272186, 0.11014749854803085, 0.1900990903377533, 0.042790502309799194, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.14424489438533783, 0.0705854520201683, 0.24214811623096466, 0.24549053609371185, 0.19939330220222473, 0.02639644220471382, 0.021373553201556206, 0.024115193635225296, 0.08405331522226334, 0.14685925841331482, 0.15661610662937164, 0.06219787895679474, 0.032059792429208755, 0.09036684036254883, 0.15146715939044952, 0.06558705866336823, 0.020870981737971306, 0.007642277050763369, 0.028054187074303627, 0.010532653890550137, 0.10334379225969315, 0.12033270299434662, 0.1911371499300003, 0.30930495262145996, 0.04741071164608002, 0.06516209989786148, 0.09313901513814926, 0.24243950843811035, 0.15116305649280548, 0.09231718629598618, 0.47254911065101624, 0.053373783826828, 0.18162642419338226, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.06650430709123611, 0.10705426335334778, 0.3146411180496216, 0.1647443175315857, 0.23945462703704834, 0.035643309354782104, 0.026562364771962166, 0.09605439007282257, 0.19827118515968323, 0.1037423387169838, 0.14283734560012817, 0.08165161311626434, 0.07012972235679626, 0.11072988063097, 0.13417953252792358, 0.017124762758612633, 0.00014164860476739705, 0.01482362300157547, 0.13952724635601044, 0.0008921221597120166, 0.07150562852621078, 0.037848807871341705, 0.0009583857608959079, 0.0160027127712965, 0.01657933183014393, 0.09754330664873123, 0.3402610719203949, 0.02766183763742447, 0.011668790131807327, 0.019427720457315445, 0.01879642903804779, 0.06977814435958862, 0.23379765450954437, 0.41046860814094543, NaN, NaN, NaN, NaN, NaN, NaN], [0.06460674107074738, 0.10897383838891983, 0.18354696035385132, 0.20187535881996155, 0.38844820857048035, 0.04722803831100464, 0.010622762143611908, 0.04332485795021057, 0.31279584765434265, 0.11892355233430862, 0.20366235077381134, 0.1460915356874466, 0.041410893201828, 0.060890424996614456, 0.16885291039943695, 0.0033047832548618317, 0.043024010956287384, 0.009507044218480587, 0.05758155509829521, 0.0012058177962899208, 0.04777836054563522, 0.038867104798555374, 0.0027761561796069145, 0.008453112095594406, 0.011027430184185505, 0.021058345213532448, 0.3453521430492401, 0.05058252438902855, 0.004837945103645325, 0.0014179014833644032, 0.06873936206102371, 0.10687354952096939, 0.21186815202236176, 0.44615596532821655, 0.10872229933738708, NaN, NaN, NaN, NaN, NaN], [0.08445128798484802, 0.07278266549110413, 0.017734743654727936, 0.12906457483768463, 0.17354236543178558, 0.01439378596842289, 0.0032682251185178757, 0.009051240049302578, 0.02403325028717518, 0.17859239876270294, 0.05114053934812546, 0.026160510256886482, 0.17188863456249237, 0.059929899871349335, 0.12745818495750427, 0.05260666832327843, 0.09784732013940811, 0.08957145363092422, 0.40504154562950134, 0.2393025904893875, 0.37446328997612, 0.33926665782928467, 0.06915906071662903, 0.28494811058044434, 0.18951286375522614, 0.21801336109638214, 0.2963850796222687, 0.09700386226177216, 0.02254888415336609, 0.016780056059360504, 0.3380737006664276, 0.17247304320335388, 0.15711140632629395, 0.27414536476135254, 0.12462585419416428, 0.05461693927645683, NaN, NaN, NaN, NaN], [0.6940725445747375, 0.016104217618703842, 0.8427497148513794, 0.8075915575027466, 0.2572270333766937, 0.04667792096734047, 0.07690176367759705, 0.06650352478027344, 0.4641934931278229, 0.7403572797775269, 0.892522931098938, 0.08286882191896439, 0.00509345019236207, 0.009769911877810955, 0.1252693384885788, 0.4168609082698822, 0.5786882042884827, 0.4795728027820587, 0.4880480170249939, 0.07741907238960266, 0.22295767068862915, 0.10229793190956116, 0.7397969365119934, 0.09120289236307144, 0.02111845649778843, 0.040493883192539215, 0.06478337198495865, 0.029333919286727905, 0.01266437117010355, 0.08807221800088882, 0.12442159652709961, 0.019878262653946877, 0.02248454838991165, 0.045759230852127075, 0.02396523579955101, 0.002620323793962598, 0.04143214225769043, NaN, NaN, NaN], [0.47638654708862305, 0.08160793781280518, 0.2188907116651535, 0.3983159363269806, 0.3041192293167114, 0.0773146003484726, 0.041229549795389175, 0.00785501953214407, 0.20719125866889954, 0.6323855519294739, 0.1790589690208435, 0.15920953452587128, 0.005728188902139664, 0.011172757484018803, 0.10331764072179794, 0.05813424289226532, 0.29987069964408875, 0.06046860292553902, 0.2948205769062042, 0.6036045551300049, 0.4684220552444458, 0.10851431638002396, 0.5970842242240906, 0.03630568087100983, 0.009022231213748455, 0.034897517412900925, 0.044963937252759933, 0.06918716430664062, 0.06464210897684097, 0.027029458433389664, 0.39741793274879456, 0.1858920007944107, 0.0860959067940712, 0.03553689271211624, 0.03651457652449608, 0.07401836663484573, 0.02850046567618847, 0.457316130399704, NaN, NaN], [0.3162515461444855, 0.12029282748699188, 0.1898643672466278, 0.3138664960861206, 0.22235795855522156, 0.03812789171934128, 0.07994988560676575, 0.07006566971540451, 0.06856126338243484, 0.2470276951789856, 0.2142392098903656, 0.4667101502418518, 0.07071195542812347, 0.09391427785158157, 0.11791101843118668, 0.011862307786941528, 0.06274299323558807, 0.019264375790953636, 0.7077140212059021, 0.009838010184466839, 0.08938813954591751, 0.2665976285934448, 0.21134285628795624, 0.19931168854236603, 0.029879093170166016, 0.11873869597911835, 0.2187809944152832, 0.10740162432193756, 0.03893040865659714, 0.02778119407594204, 0.17118902504444122, 0.03705315291881561, 0.41107529401779175, 0.3035467863082886, 0.1782693862915039, 0.062172479927539825, 0.04369974508881569, 0.43116021156311035, 0.04090215638279915, NaN], [0.15722334384918213, 0.11492010205984116, 0.22595097124576569, 0.17283931374549866, 0.11246844381093979, 0.07424511015415192, 0.1308857947587967, 0.1509532928466797, 0.12219540029764175, 0.14498494565486908, 0.13763099908828735, 0.16327989101409912, 0.12245305627584457, 0.21428720653057098, 0.12265608459711075, 0.13294808566570282, 0.07747184485197067, 0.06700501590967178, 0.24500344693660736, 0.07035010308027267, 0.06088097393512726, 0.15465889871120453, 0.22422827780246735, 0.20946520566940308, 0.06346394866704941, 0.1416163444519043, 0.10671631991863251, 0.07756247371435165, 0.14874279499053955, 0.2551397681236267, 0.18877547979354858, 0.07302238047122955, 0.24805422127246857, 0.1228112131357193, 0.08095405995845795, 0.12022056430578232, 0.20888803899288177, 0.1654488444328308, 0.07207347452640533, 0.12261014431715012]], [[0.009874092414975166, 0.0475393682718277, 0.0700187012553215, 0.05995699018239975, 0.023110831156373024, 0.04304451867938042, 0.02397323027253151, 0.09104450792074203, 0.13320927321910858, 0.0718994140625, 0.16378211975097656, 0.06306017935276031, 0.03516274318099022, 0.06407153606414795, 0.1927335411310196, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.007679122034460306, 0.008519956842064857, 0.023641018196940422, 0.036320336163043976, 0.005810021422803402, 0.002834178740158677, 0.01027101743966341, 0.005131446290761232, 0.05288401618599892, 0.022729018703103065, 0.02885960415005684, 0.007142365910112858, 0.005423326510936022, 0.00592823838815093, 0.23125353455543518, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.17363575100898743, 0.08529574424028397, 0.018747013062238693, 0.09323837608098984, 0.07366655766963959, 0.2784116566181183, 0.6226999759674072, 0.6422466039657593, 0.18433590233325958, 0.44911590218544006, 0.07703087478876114, 0.23628254234790802, 0.37835898995399475, 0.3362680971622467, 0.10061702132225037, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.039354946464300156, 0.028671007603406906, 0.0009692042949609458, 0.010166235268115997, 0.003592043649405241, 0.024686597287654877, 0.0576656274497509, 0.10543617606163025, 0.069565050303936, 0.23999209702014923, 0.0370241142809391, 0.07099387794733047, 0.08031197637319565, 0.0629396140575409, 0.19831009209156036, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.07821620255708694, 0.07413192838430405, 0.008470119908452034, 0.005837618373334408, 0.016890503466129303, 0.34118980169296265, 0.6424257159233093, 0.5736639499664307, 0.18751046061515808, 0.08286380022764206, 0.013973995111882687, 0.16452431678771973, 0.6265572905540466, 0.24633896350860596, 0.03771306574344635, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.08601168543100357, 0.11519530415534973, 0.00501672737300396, 0.0384475477039814, 0.0009856059914454818, 0.020220156759023666, 0.4602939486503601, 0.41334664821624756, 0.011432202532887459, 0.039776530116796494, 0.004202698357403278, 0.012451107613742352, 0.012797003611922264, 0.0109980758279562, 0.22371669113636017, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.05821564793586731, 0.2493630200624466, 0.017187682911753654, 0.007334073074162006, 0.002277297666296363, 0.012770043686032295, 0.014771709218621254, 0.06810285151004791, 0.008148171938955784, 0.093966543674469, 0.03078475221991539, 0.016961626708507538, 0.009818210266530514, 0.005369590129703283, 0.2805846929550171, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0315314382314682, 0.006441309116780758, 0.005187691655009985, 0.0023020647931843996, 0.001103160553611815, 0.0010285694152116776, 0.0036586276255548, 0.0034369472414255142, 0.02540425956249237, 0.018933216109871864, 0.011261656880378723, 0.014689027331769466, 0.0047272746451199055, 0.003173592034727335, 0.27608010172843933, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.052501752972602844, 0.03902341425418854, 0.022159013897180557, 0.15980832278728485, 0.04565480723977089, 0.04961955174803734, 0.10487794876098633, 0.03556728735566139, 0.011893571354448795, 0.350600004196167, 0.8153157234191895, 0.696418821811676, 0.19642634689807892, 0.7945331335067749, 0.025074943900108337, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.008775658905506134, 0.0231929961591959, 0.001974506536498666, 0.02221933752298355, 0.002016729209572077, 0.03464629501104355, 0.020560195669531822, 0.015741808339953423, 0.024821357801556587, 0.03194829449057579, 0.062133170664310455, 0.009445058181881905, 0.008440939709544182, 0.031038939952850342, 0.24359388649463654, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.15448324382305145, 0.15535393357276917, 0.0009195139864459634, 0.02347545325756073, 0.010745828039944172, 0.05933469906449318, 0.0886014774441719, 0.09891750663518906, 0.008176282048225403, 0.17814745008945465, 0.04613054543733597, 0.10348650068044662, 0.06132601201534271, 0.10257216542959213, 0.2144334316253662, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.1637454628944397, 0.3587695062160492, 0.013175190426409245, 0.027070751413702965, 0.009701711125671864, 0.027045298367738724, 0.06057014688849449, 0.08674251288175583, 0.018084047362208366, 0.012978773564100266, 0.04984384402632713, 0.0746963769197464, 0.21545591950416565, 0.18275731801986694, 0.18403297662734985, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.04016833007335663, 0.03071952983736992, 0.0073937661945819855, 0.044594794511795044, 0.005693770945072174, 0.007929249666631222, 0.19023852050304413, 0.12198647856712341, 0.00967123731970787, 0.05747445672750473, 0.006795276887714863, 0.006636326666921377, 0.014849998988211155, 0.02297961339354515, 0.1823122203350067, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.08359953761100769, 0.14515268802642822, 0.009139984846115112, 0.10055579245090485, 0.007817201316356659, 0.06191832944750786, 0.24591712653636932, 0.26670339703559875, 0.008127851411700249, 0.05132465437054634, 0.011226493865251541, 0.020721180364489555, 0.025672290474176407, 0.06137499585747719, 0.19538666307926178, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.004038439132273197, 0.01158715970814228, 0.012492671608924866, 0.008604439906775951, 0.0044732466340065, 0.001471644383855164, 0.003622728632763028, 0.005392232909798622, 0.024040954187512398, 0.002572751836851239, 0.011896335519850254, 0.00655994052067399, 0.004419950768351555, 0.0023605322930961847, 0.2578853368759155, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.03995227441191673, 0.02612248808145523, 0.09039098769426346, 0.04685363546013832, 0.14171013236045837, 0.3046724796295166, 0.08713044226169586, 0.11726538836956024, 0.3945818245410919, 0.03867875412106514, 0.060879118740558624, 0.3211958110332489, 0.1562168449163437, 0.1954476237297058, 0.12928469479084015, 0.125, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.138319730758667, 0.1925395429134369, 0.06914161890745163, 0.1830926090478897, 0.22252067923545837, 0.24239645898342133, 0.2738734483718872, 0.3115195333957672, 0.287569522857666, 0.12556934356689453, 0.047479670494794846, 0.1859251707792282, 0.015966184437274933, 0.050888173282146454, 0.04287213087081909, 0.04818185046315193, 0.30147239565849304, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.059622667729854584, 0.19761067628860474, 0.019807182252407074, 0.02911451645195484, 0.11472073942422867, 0.03754669055342674, 0.08183436095714569, 0.09122617542743683, 0.10595303028821945, 0.094895139336586, 0.022252719849348068, 0.087751105427742, 0.015402892604470253, 0.02668953314423561, 0.15029701590538025, 0.000490668579004705, 0.5364181399345398, 0.0016803600592538714, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.4440009295940399, 0.5055950880050659, 0.14072291553020477, 0.20776981115341187, 0.24339812994003296, 0.01946749910712242, 0.1477651447057724, 0.24892206490039825, 0.13990418612957, 0.5277839303016663, 0.22113053500652313, 0.7815175652503967, 0.04741470143198967, 0.31336119771003723, 0.318754643201828, 0.17249688506126404, 0.003960400819778442, 1.1815190191555303e-05, 0.00205309153534472, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.003975332248955965, 0.09357346594333649, 0.000580776366405189, 0.001556370290927589, 0.0040078358724713326, 0.00020105167641304433, 0.005314813926815987, 0.0463886484503746, 0.0025405578780919313, 0.008098164573311806, 0.0004367573419585824, 0.0955028310418129, 0.0013312119990587234, 0.008472515270113945, 0.16612127423286438, 0.08659190684556961, 0.2260276973247528, 0.018877657130360603, 0.019257033243775368, 0.9179584980010986, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.00713347876444459, 0.11304348707199097, 0.007166451308876276, 0.017305465415120125, 0.01892760582268238, 0.004294875077903271, 0.013284130021929741, 0.05641845986247063, 0.006293897051364183, 0.008091668598353863, 0.004229044076055288, 0.03852742537856102, 0.036073870956897736, 0.030675750225782394, 0.1423715502023697, 2.1155383365112357e-05, 0.00016346832853741944, 0.0004644138098228723, 9.852640505414456e-05, 0.009302367456257343, 0.8758521676063538, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.112990602850914, 0.20299020409584045, 0.29141831398010254, 0.1917479783296585, 0.25626659393310547, 0.40023526549339294, 0.045914653688669205, 0.05403761938214302, 0.3577503561973572, 0.11164049804210663, 0.20054538547992706, 0.23382915556430817, 0.3541012704372406, 0.39880213141441345, 0.05442150682210922, 0.0038963633123785257, 0.11578002572059631, 0.06833135336637497, 0.2930091321468353, 0.06728219240903854, 0.588379442691803, 0.190787211060524, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.11769542098045349, 0.22490660846233368, 0.16446754336357117, 0.17726869881153107, 0.24409359693527222, 0.16966795921325684, 0.06426751613616943, 0.1868649125099182, 0.17593497037887573, 0.10732528567314148, 0.1210716962814331, 0.18835949897766113, 0.07820838689804077, 0.12172650545835495, 0.0815061554312706, 0.04113525524735451, 0.03917931765317917, 0.013817446306347847, 0.06874216347932816, 0.027753230184316635, 0.04752122610807419, 0.17637789249420166, 0.2964049279689789, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.08801974356174469, 0.2964327037334442, 0.17140379548072815, 0.1086457222700119, 0.1790848970413208, 0.042561717331409454, 0.02568918652832508, 0.12736740708351135, 0.4644424617290497, 0.09952269494533539, 0.1403166949748993, 0.12085206061601639, 0.2499331831932068, 0.14905890822410583, 0.04691213369369507, 0.006397286430001259, 0.008155078627169132, 0.02385183423757553, 0.08218340575695038, 0.09733399748802185, 0.7216709852218628, 0.11420661956071854, 0.028804002329707146, 0.49512770771980286, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.28339406847953796, 0.25363603234291077, 0.49371209740638733, 0.28714650869369507, 0.42171764373779297, 0.03586414083838463, 0.140908345580101, 0.27345338463783264, 0.06897412985563278, 0.24740128219127655, 0.5061832070350647, 0.4192107915878296, 0.43851029872894287, 0.29079654812812805, 0.10071542859077454, 0.007080267183482647, 0.010165071114897728, 0.007166726514697075, 0.04547898843884468, 0.014898931607604027, 0.06153866648674011, 0.05960511788725853, 0.025653565302491188, 0.05574938654899597, 0.5054050087928772, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.049345988780260086, 0.1473262906074524, 0.10952533781528473, 0.16707968711853027, 0.25493475794792175, 0.03866606950759888, 0.046480532735586166, 0.16288119554519653, 0.06614720076322556, 0.0629507377743721, 0.07218940556049347, 0.3448391556739807, 0.06943795084953308, 0.058807674795389175, 0.135455921292305, 0.12821261584758759, 0.09823491424322128, 0.2407415509223938, 0.03722868487238884, 0.07500484585762024, 0.23719841241836548, 0.08696958422660828, 0.10033686459064484, 0.08637046813964844, 0.05946339666843414, 0.17889682948589325, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.05557708069682121, 0.024377070367336273, 0.171014666557312, 0.1548214852809906, 0.21205416321754456, 0.29049578309059143, 0.08155391365289688, 0.2053205668926239, 0.09979691356420517, 0.11640740185976028, 0.23155182600021362, 0.4772811830043793, 0.2134055644273758, 0.3209300637245178, 0.0739695355296135, 0.018611561506986618, 0.530681848526001, 0.37442806363105774, 0.09326046705245972, 0.039934538304805756, 0.607749342918396, 0.1011725440621376, 0.041957128793001175, 0.061673425137996674, 0.012941170483827591, 0.012897199019789696, 0.02531522512435913, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.046621087938547134, 0.02855776995420456, 0.11975010484457016, 0.2049850970506668, 0.16244490444660187, 0.14614170789718628, 0.03785347566008568, 0.2537410259246826, 0.3719625771045685, 0.1159287542104721, 0.23734091222286224, 0.26474830508232117, 0.04938332363963127, 0.17566856741905212, 0.034675102680921555, 0.025258230045437813, 0.013820141553878784, 0.020238902419805527, 0.20186173915863037, 0.008764497935771942, 0.044081512838602066, 0.11685895919799805, 0.12131167203187943, 0.03466574102640152, 0.0033257410395890474, 0.009427645243704319, 0.00932170171290636, 0.6215367317199707, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.08535599708557129, 0.01230260543525219, 0.28460273146629333, 0.3323705196380615, 0.13364574313163757, 0.14216013252735138, 0.16550986468791962, 0.36634352803230286, 0.3233327269554138, 0.13755354285240173, 0.6341029405593872, 0.1276889443397522, 0.0818048045039177, 0.2633805274963379, 0.10007897019386292, 0.0027034373488277197, 0.008653531782329082, 0.0021412167698144913, 0.02395743690431118, 0.06537352502346039, 0.05110874027013779, 0.050060901790857315, 0.023448945954442024, 0.0059632728807628155, 0.0016337132547050714, 0.0060929651372134686, 0.00957516860216856, 0.05008334666490555, 0.696637749671936, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.014263293705880642, 0.07173046469688416, 0.01932992786169052, 0.01909404993057251, 0.16755935549736023, 0.2271488904953003, 0.1093294620513916, 0.14342457056045532, 0.0580194853246212, 0.01671113632619381, 0.03395597264170647, 0.0692841187119484, 0.07175575196743011, 0.04972841590642929, 0.12856654822826385, 5.63129390229733e-07, 0.00027805642457678914, 1.7160025890916586e-05, 5.958595011179568e-06, 0.00078710971865803, 1.2566613349918043e-06, 9.03528507478768e-06, 2.1993335394654423e-05, 4.528845238382928e-06, 1.0594538935038145e-06, 2.375837993895402e-06, 1.0765622391772922e-05, 0.00012861557479482144, 0.000270194374024868, 0.4203896224498749, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.06590985506772995, 0.1636172980070114, 0.09935098141431808, 0.20126965641975403, 0.4101002812385559, 0.21936923265457153, 0.26084569096565247, 0.3593950569629669, 0.014820259064435959, 0.05201014503836632, 0.03426084294915199, 0.38774317502975464, 0.1401163786649704, 0.3782513439655304, 0.13036324083805084, 0.19651824235916138, 0.009276115335524082, 0.0007576652569696307, 0.02043321169912815, 0.000937489268835634, 0.0014158851699903607, 0.02691410481929779, 0.025149332359433174, 0.015754513442516327, 0.002638434525579214, 0.03568584471940994, 0.28478676080703735, 0.08937329053878784, 0.04057440906763077, 0.41798362135887146, 0.02812151424586773, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.05128908529877663, 0.11090300232172012, 0.24501535296440125, 0.07115167379379272, 0.3950805068016052, 0.2010982632637024, 0.08927696198225021, 0.2923780679702759, 0.11195118725299835, 0.05971711874008179, 0.14540457725524902, 0.4000069797039032, 0.2374461144208908, 0.47139719128608704, 0.10731440782546997, 0.0009883381426334381, 0.005475975573062897, 0.017872320488095284, 0.0038598645478487015, 0.01383217889815569, 0.1060260757803917, 0.010558119975030422, 0.0004280287539586425, 0.011488020420074463, 0.004323506727814674, 0.015877770259976387, 0.025533713400363922, 0.06758329272270203, 0.005362953990697861, 0.03033292666077614, 0.3987913429737091, 0.22715723514556885, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.014083221554756165, 0.029302498325705528, 0.019839908927679062, 0.019802037626504898, 0.11310776323080063, 0.014347831718623638, 0.013065088540315628, 0.0404186025261879, 0.14103254675865173, 0.01056672353297472, 0.02028844505548477, 0.4335528016090393, 0.019943613559007645, 0.08491621166467667, 0.15365199744701385, 0.025437461212277412, 0.027387555688619614, 0.0211916733533144, 0.0013409400125965476, 0.0016278955154120922, 0.0205780491232872, 0.006606978829950094, 0.005105526186525822, 0.008417481556534767, 0.008475488983094692, 0.016475802287459373, 0.021865585818886757, 0.04041945934295654, 0.001965513452887535, 0.030297037214040756, 0.018051480874419212, 0.2940014600753784, 0.09546513855457306, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.04251990094780922, 0.025738505646586418, 0.19788101315498352, 0.08900192379951477, 0.20504283905029297, 0.36725619435310364, 0.05852765589952469, 0.12635937333106995, 0.07596885412931442, 0.055006030946969986, 0.1975020170211792, 0.39253395795822144, 0.2602497935295105, 0.3791850209236145, 0.11310473829507828, 0.014116446487605572, 0.6685785055160522, 0.40577325224876404, 0.09365412592887878, 0.008716625161468983, 0.504762589931488, 0.11037815362215042, 0.03693895787000656, 0.066362664103508, 0.025546396151185036, 0.030971869826316833, 0.07333581149578094, 0.21910515427589417, 0.03128749132156372, 0.013437384739518166, 0.06674141436815262, 0.055549826472997665, 0.02615067921578884, 0.05289305001497269, NaN, NaN, NaN, NaN, NaN, NaN], [0.06150972843170166, 0.049163203686475754, 0.14174170792102814, 0.13322500884532928, 0.16170991957187653, 0.21354396641254425, 0.04667104035615921, 0.26311540603637695, 0.32218027114868164, 0.0809161439538002, 0.18361496925354004, 0.23948682844638824, 0.09133663028478622, 0.25973111391067505, 0.07212682068347931, 0.01752244122326374, 0.013681006617844105, 0.015325021930038929, 0.15400148928165436, 0.0017620606813579798, 0.03783759847283363, 0.07285356521606445, 0.042190372943878174, 0.019725583493709564, 0.004497688263654709, 0.010335608385503292, 0.023485884070396423, 0.5969190001487732, 0.22785267233848572, 0.05655405670404434, 0.05765213817358017, 0.006416310556232929, 0.029401889070868492, 0.022928474470973015, 0.6468356251716614, NaN, NaN, NaN, NaN, NaN], [0.12382826954126358, 0.035204268991947174, 0.3469122052192688, 0.27821084856987, 0.12485836446285248, 0.1130678728222847, 0.12963837385177612, 0.3451126217842102, 0.16417652368545532, 0.12570835649967194, 0.5000419616699219, 0.09880878776311874, 0.042446259409189224, 0.2635292708873749, 0.16834798455238342, 0.003705248236656189, 0.09392052888870239, 0.0011726000811904669, 0.042238909751176834, 0.07787514477968216, 0.11800158768892288, 0.09318403154611588, 0.018972182646393776, 0.022339271381497383, 0.02290215529501438, 0.009648749604821205, 0.020298194140195847, 0.09632600843906403, 0.6665039658546448, 0.01913357712328434, 0.016501925885677338, 0.01550414226949215, 0.014767719432711601, 0.035943012684583664, 0.1298983097076416, 0.7307590246200562, NaN, NaN, NaN, NaN], [0.010800065472722054, 0.04851265624165535, 0.01629789173603058, 0.013155121356248856, 0.14412836730480194, 0.10944324731826782, 0.08000180870294571, 0.10409139841794968, 0.054843056946992874, 0.011575616896152496, 0.02017728053033352, 0.044063322246074677, 0.04816943034529686, 0.03936787694692612, 0.1280953288078308, 3.2450822118335054e-07, 0.0001958437787834555, 1.195628647110425e-05, 3.192948497598991e-06, 0.00034392892848700285, 1.3818779507346335e-06, 6.319523890851997e-06, 9.25252061279025e-06, 3.2897685287025524e-06, 1.041492623699014e-06, 2.450263082209858e-06, 1.1291336704744026e-05, 9.216016042046249e-05, 0.00025747373001649976, 0.3770022690296173, 7.494814053643495e-05, 0.00011931787594221532, 5.454379424918443e-05, 3.481862586340867e-05, 0.0001493972522439435, 6.532184488605708e-05, 0.4379080533981323, NaN, NaN, NaN], [0.03501533716917038, 0.12365423142910004, 0.058643028140068054, 0.026187611743807793, 0.2106953263282776, 0.09627192467451096, 0.1373300403356552, 0.209503173828125, 0.00544273667037487, 0.010177833028137684, 0.00795654021203518, 0.17826952040195465, 0.06280092895030975, 0.2785777747631073, 0.15446779131889343, 0.11172444373369217, 0.00812594499439001, 0.000803561822976917, 0.011673782020807266, 0.00013412271800916642, 0.002435607835650444, 0.021002406254410744, 0.009926681406795979, 0.014218374155461788, 0.0044799866154789925, 0.03462693840265274, 0.49634605646133423, 0.1610735058784485, 0.03537029027938843, 0.3717024624347687, 0.0470024012029171, 0.0025306264869868755, 0.08426976948976517, 0.5137573480606079, 0.047759927809238434, 0.008752438239753246, 0.5270217657089233, 0.020567137748003006, NaN, NaN], [0.055331505835056305, 0.14680130779743195, 0.22850985825061798, 0.040600359439849854, 0.2299574315547943, 0.21366852521896362, 0.10291176289319992, 0.2649042010307312, 0.07482050359249115, 0.04207760840654373, 0.11352740973234177, 0.22353075444698334, 0.2551318407058716, 0.4900997579097748, 0.11985023319721222, 0.00039373920299112797, 0.00142151047475636, 0.016346368938684464, 0.0038184949662536383, 0.00426360173150897, 0.10012070834636688, 0.007060237228870392, 0.00022489627008326352, 0.006389277055859566, 0.0014407823327928782, 0.01344740204513073, 0.019176417961716652, 0.04953484237194061, 0.003102741902694106, 0.017501499503850937, 0.25968801975250244, 0.12805432081222534, 0.03450275957584381, 0.03214799612760544, 0.06495527178049088, 0.007038496434688568, 0.018200475722551346, 0.2228115350008011, 0.24082934856414795, NaN], [0.04223596677184105, 0.14613933861255646, 0.08112313598394394, 0.04192597419023514, 0.11981905251741409, 0.18680673837661743, 0.07695262134075165, 0.14058402180671692, 0.1875196099281311, 0.05864474177360535, 0.0581248439848423, 0.23554684221744537, 0.21983209252357483, 0.1619952768087387, 0.12595340609550476, 0.004585978575050831, 0.008592751808464527, 0.20804427564144135, 0.003501898143440485, 0.01809401623904705, 0.0088487658649683, 0.01839679665863514, 0.009930659085512161, 0.019693726673722267, 0.015943868085741997, 0.06719032675027847, 0.03678698092699051, 0.03292753919959068, 0.02313893660902977, 0.023240724578499794, 0.03294161707162857, 0.24390928447246552, 0.10472099483013153, 0.0623757429420948, 0.06489475816488266, 0.03424002602696419, 0.03615953400731087, 0.05666068568825722, 0.29077935218811035, 0.20903274416923523]], [[0.020951254293322563, 0.19576001167297363, 0.05422525107860565, 0.000516751199029386, 0.0576050765812397, 0.039616964757442474, 0.0011584623716771603, 0.06260760873556137, 0.05524995177984238, 5.760174462920986e-05, 0.0005486492882482708, 0.01856253668665886, 0.008022493682801723, 0.0032547120936214924, 0.1980074942111969, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.15878187119960785, 0.5755441188812256, 0.073322594165802, 0.006848999299108982, 0.04221894592046738, 0.057610929012298584, 0.01498481910675764, 0.15564584732055664, 0.02557745948433876, 0.010493909008800983, 0.04444737732410431, 0.10564734041690826, 0.04703369736671448, 0.007807346060872078, 0.10371111333370209, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0667557343840599, 0.5756934881210327, 0.02783285267651081, 0.001271323417313397, 0.13096383213996887, 0.007863562554121017, 0.0004880728665739298, 0.00786207988858223, 0.030193913727998734, 0.0004458925104700029, 0.0008183285826817155, 0.003005507169291377, 0.008833326399326324, 0.014566708356142044, 0.09050195664167404, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.006902126595377922, 0.22582471370697021, 0.027240794152021408, 0.000252248632023111, 0.08146748691797256, 0.008376134559512138, 0.0017193618696182966, 0.010283069685101509, 0.09191752970218658, 1.873078872449696e-05, 0.0001427968527423218, 0.0006295929779298604, 0.016630304977297783, 0.005029548890888691, 0.17517179250717163, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.46813952922821045, 0.7474208474159241, 0.04419572278857231, 0.039987821131944656, 0.07900705188512802, 0.010286353528499603, 0.008277984336018562, 0.21022778749465942, 0.018339863047003746, 0.003122991183772683, 0.0047759185545146465, 0.0031952662393450737, 0.0037801233120262623, 0.005526377819478512, 0.11187370121479034, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.08057912439107895, 0.09254536032676697, 0.26037144660949707, 0.04459136351943016, 0.19053104519844055, 0.18187369406223297, 0.04494835063815117, 0.08866222947835922, 0.05515718460083008, 0.011219717562198639, 0.041749756783246994, 0.13417255878448486, 0.43527963757514954, 0.4240920841693878, 0.05903848633170128, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.005677447654306889, 0.1104632169008255, 0.17886187136173248, 0.06816153228282928, 0.31320425868034363, 0.08580746501684189, 0.044242095202207565, 0.4031389355659485, 0.13310441374778748, 8.991359209176153e-05, 0.00051962147699669, 0.017516016960144043, 0.02517649158835411, 0.02827705629169941, 0.13873830437660217, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.009441166184842587, 0.04568161070346832, 0.08503290265798569, 0.055850934237241745, 0.15800173580646515, 0.09921947866678238, 0.2719998359680176, 0.7131122350692749, 0.12690743803977966, 0.0015569856623187661, 0.019959524273872375, 0.06398878246545792, 0.1124982088804245, 0.07506788522005081, 0.06075114384293556, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.1778930425643921, 0.41812169551849365, 0.05459700897336006, 0.015388981439173222, 0.296997606754303, 0.041353121399879456, 0.1696915328502655, 0.1226804181933403, 0.3453136682510376, 0.006036087870597839, 0.008416525088250637, 0.004891113843768835, 0.003974124789237976, 0.0023401544895023108, 0.04184575751423836, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0018550200620666146, 0.2628808617591858, 0.0018376001389697194, 9.925621998263523e-05, 0.008250601589679718, 0.11965687572956085, 0.011913565918803215, 0.3649533987045288, 0.12527383863925934, 0.0011617891723290086, 0.002173396060243249, 0.011088940314948559, 0.02579125389456749, 0.004398738034069538, 0.18079015612602234, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0033212341368198395, 0.4786561131477356, 0.00019389556837268174, 4.100392834516242e-05, 0.03255903348326683, 0.004482456482946873, 0.0018638258334249258, 0.04032744839787483, 0.151435986161232, 0.0011174781247973442, 0.0008650964009575546, 0.049343932420015335, 0.013284855522215366, 0.009702197276055813, 0.17111515998840332, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.015286837704479694, 0.17760051786899567, 0.012107143178582191, 0.004069492220878601, 0.40114596486091614, 0.005856915842741728, 0.025313973426818848, 0.23595470190048218, 0.5599475502967834, 0.019674712792038918, 0.01789786107838154, 0.0449712835252285, 0.024323459714651108, 0.008310162462294102, 0.10516723990440369, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.013816175982356071, 0.10832668840885162, 0.014126134105026722, 0.0044770012609660625, 0.18972823023796082, 0.04144473373889923, 0.013167506083846092, 0.0398833267390728, 0.08117146790027618, 0.03379456326365471, 0.04336484149098396, 0.6766878366470337, 0.6025072932243347, 0.24042664468288422, 0.05677386373281479, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.010657100938260555, 0.1729527860879898, 0.006031150463968515, 0.006062258500605822, 0.10042858123779297, 0.007653414737433195, 0.0031583579257130623, 0.014785557985305786, 0.13275322318077087, 0.05689838156104088, 0.04302775487303734, 0.36964303255081177, 0.3870774507522583, 0.31299954652786255, 0.07590257376432419, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.014769526198506355, 0.05199434980750084, 0.11582475155591965, 0.14804258942604065, 0.05702318996191025, 0.3275434374809265, 0.3759170472621918, 0.3329218327999115, 0.027774346992373466, 0.12548163533210754, 0.13219930231571198, 0.029332099482417107, 0.2028164267539978, 0.518939197063446, 4.3280975660309196e-05, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.24939602613449097, 0.0921018123626709, 0.20195554196834564, 0.25931593775749207, 0.24976609647274017, 0.08025927096605301, 0.10602997988462448, 0.08455296605825424, 0.038250602781772614, 0.34039628505706787, 0.2528480887413025, 0.17168891429901123, 0.12038858979940414, 0.16591216623783112, 0.05973837152123451, 0.125, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.04881530627608299, 0.07757209986448288, 0.080610491335392, 0.047049663960933685, 0.2744564712047577, 0.18291208148002625, 0.11781244724988937, 0.130965456366539, 0.16412131488323212, 0.049904536455869675, 0.10192018002271652, 0.46385079622268677, 0.23078110814094543, 0.23192283511161804, 0.17445482313632965, 0.15880486369132996, 0.04734092205762863, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.11153621971607208, 0.27696484327316284, 0.0350787453353405, 0.011731116101145744, 0.08945441246032715, 0.2750371992588043, 0.07341955602169037, 0.12011690437793732, 0.026965567842125893, 0.023494159802794456, 0.015654105693101883, 0.05704642832279205, 0.11022293567657471, 0.0463077574968338, 0.1307818740606308, 0.22883240878582, 0.015307039953768253, 0.023610780015587807, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.06216026097536087, 0.123567596077919, 0.044055916368961334, 0.012494971975684166, 0.045035671442747116, 0.18137943744659424, 0.1501520872116089, 0.0996006652712822, 0.05310875549912453, 0.11289763450622559, 0.05045852065086365, 0.055306825786828995, 0.3424266576766968, 0.1600506752729416, 0.04121629521250725, 0.15376803278923035, 0.17623378336429596, 0.16427822411060333, 0.018553992733359337, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.03470996022224426, 0.38486456871032715, 0.007671448867768049, 0.014272118918597698, 0.01295357197523117, 0.001353065250441432, 0.035229261964559555, 0.10929086059331894, 0.03641098737716675, 0.08741087466478348, 0.01870635710656643, 0.10011491179466248, 0.03142678365111351, 0.12343490868806839, 0.15971165895462036, 0.12576976418495178, 0.44071146845817566, 0.38860467076301575, 0.12043511122465134, 0.027116619050502777, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.03053746558725834, 0.24113330245018005, 0.009466315619647503, 0.01980357989668846, 0.04114365205168724, 0.05523357167840004, 0.027042368426918983, 0.10979101061820984, 0.004461985547095537, 0.04689180105924606, 0.04529552906751633, 0.1364448219537735, 0.054305437952280045, 0.06579019129276276, 0.13895106315612793, 0.03928220644593239, 0.42239660024642944, 0.2546820342540741, 0.22367709875106812, 0.1215892881155014, 0.001983387628570199, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.3289671242237091, 0.3443813920021057, 0.38217487931251526, 0.32642021775245667, 0.12515123188495636, 0.04144418612122536, 0.06740343570709229, 0.024584289640188217, 0.007359183859080076, 0.39375364780426025, 0.38123685121536255, 0.3035361170768738, 0.18788036704063416, 0.13260427117347717, 0.09976762533187866, 0.17152060568332672, 0.49365419149398804, 0.08085957914590836, 0.02207508496940136, 0.19231174886226654, 0.008304901421070099, 0.03878962993621826, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.1711268573999405, 0.1900682896375656, 0.20778892934322357, 0.08847668021917343, 0.39589688181877136, 0.3955995440483093, 0.3348483741283417, 0.11133389919996262, 0.10861264914274216, 0.14033687114715576, 0.26926568150520325, 0.4846358299255371, 0.23405344784259796, 0.4343181252479553, 0.08998383581638336, 0.13843253254890442, 0.07047099620103836, 0.2525072991847992, 0.13487939536571503, 0.27911728620529175, 0.11727599054574966, 0.022392159327864647, 0.1764850914478302, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.4154844284057617, 0.4073733687400818, 0.5541329383850098, 0.43809109926223755, 0.11503908038139343, 0.02849700301885605, 0.025097709149122238, 0.014711813069880009, 0.006424109451472759, 0.39197838306427, 0.4694826304912567, 0.17039237916469574, 0.16142874956130981, 0.19919125735759735, 0.054951149970293045, 0.10915631055831909, 0.30942168831825256, 0.19657404720783234, 0.031007295474410057, 0.23716343939304352, 0.05435822904109955, 0.08149112015962601, 0.6613667011260986, 0.11670006066560745, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.24498042464256287, 0.277620404958725, 0.060333866626024246, 0.030503980815410614, 0.04090564325451851, 0.4659561812877655, 0.2110646367073059, 0.11101182550191879, 0.028219982981681824, 0.10508411377668381, 0.025386929512023926, 0.0648839995265007, 0.13676653802394867, 0.07622335106134415, 0.09164498746395111, 0.0640818402171135, 0.41535088419914246, 0.29784247279167175, 0.05657188221812248, 0.036311421543359756, 0.08192699402570724, 0.16688455641269684, 0.10144203901290894, 0.346017450094223, 0.15466110408306122, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.4220424294471741, 0.21296784281730652, 0.10483475774526596, 0.11319100856781006, 0.14396990835666656, 0.1309618502855301, 0.13656088709831238, 0.2097199261188507, 0.1397993415594101, 0.263439804315567, 0.10735370218753815, 0.27457332611083984, 0.26051631569862366, 0.18891198933124542, 0.10100831091403961, 0.04877842590212822, 0.16450235247612, 0.23761717975139618, 0.0720985159277916, 0.12954245507717133, 0.08035153150558472, 0.18124118447303772, 0.05973014980554581, 0.26483285427093506, 0.39028850197792053, 0.05098416656255722, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.12607140839099884, 0.08847615122795105, 0.09191321581602097, 0.06030821427702904, 0.21649383008480072, 0.10438336431980133, 0.07331530004739761, 0.1330888420343399, 0.04176999628543854, 0.06727378815412521, 0.06257567554712296, 0.21110908687114716, 0.09018781781196594, 0.09389244765043259, 0.13621515035629272, 0.11044558137655258, 0.08550350368022919, 0.2513507902622223, 0.28401821851730347, 0.12441904842853546, 0.05029991641640663, 0.42405593395233154, 0.08374682813882828, 0.43869927525520325, 0.14253327250480652, 0.10876792669296265, 0.09369473904371262, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.062066610902547836, 0.07845254987478256, 0.24838510155677795, 0.16541223227977753, 0.16867581009864807, 0.019677892327308655, 0.021460779011249542, 0.018530650064349174, 0.023010587319731712, 0.10349667817354202, 0.16099916398525238, 0.3089703619480133, 0.08426959812641144, 0.16459643840789795, 0.06073381006717682, 0.08764015138149261, 0.46941375732421875, 0.23278135061264038, 0.11763583868741989, 0.0354606918990612, 0.16624747216701508, 0.2793619632720947, 0.1965668648481369, 0.23052528500556946, 0.3914787769317627, 0.08669382333755493, 0.10678009688854218, 0.08708767592906952, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.11642084270715714, 0.11190053075551987, 0.12368596345186234, 0.04549993947148323, 0.3567850887775421, 0.06569506227970123, 0.07286660373210907, 0.03259556367993355, 0.09530685096979141, 0.19273261725902557, 0.06463074684143066, 0.7640278339385986, 0.06371455639600754, 0.1593337506055832, 0.2193848341703415, 0.2116944044828415, 0.06720030307769775, 0.29984304308891296, 0.010844358243048191, 0.051072586327791214, 0.15023349225521088, 0.04554526135325432, 0.1560167670249939, 0.03609438240528107, 0.026584016159176826, 0.14512087404727936, 0.05890262499451637, 0.015816861763596535, 0.07422769069671631, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.11034999042749405, 0.03210863843560219, 0.010996339842677116, 0.026450032368302345, 0.051475513726472855, 0.02743532694876194, 0.3610350787639618, 0.20538736879825592, 0.017281753942370415, 0.05300014466047287, 0.012052728794515133, 0.08001075685024261, 0.0069017065688967705, 0.010893179103732109, 0.13085691630840302, 0.056502565741539, 0.15541820228099823, 0.07158821076154709, 0.00490804947912693, 0.015012365765869617, 0.06302572786808014, 0.01116714347153902, 0.22065599262714386, 0.021468764171004295, 0.01365464273840189, 0.022816751152276993, 0.019708380103111267, 0.0059420084580779076, 0.0700121819972992, 0.287899911403656, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.07615644484758377, 0.1536630541086197, 0.1253354847431183, 0.048576656728982925, 0.05276811867952347, 0.1611642986536026, 0.12317243963479996, 0.32385867834091187, 0.012925365939736366, 0.0864856168627739, 0.08918802440166473, 0.23886144161224365, 0.20351386070251465, 0.20744860172271729, 0.13318131864070892, 0.058403778821229935, 0.0693131536245346, 0.04999461770057678, 0.004054869059473276, 0.0624610111117363, 0.018093721941113472, 0.07961009442806244, 0.1545858234167099, 0.3008257746696472, 0.14455094933509827, 0.09800520539283752, 0.09531621634960175, 0.27401015162467957, 0.4782770574092865, 0.11211755871772766, 0.01358953770250082, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.051417503505945206, 0.1600690335035324, 0.08639511466026306, 0.02997625432908535, 0.08503448963165283, 0.32695260643959045, 0.06822863221168518, 0.16364485025405884, 0.06138167902827263, 0.07786902785301208, 0.04443247988820076, 0.0585777647793293, 0.1263807862997055, 0.10769001394510269, 0.13808733224868774, 0.1399688720703125, 0.5559014678001404, 0.20350231230258942, 0.042011573910713196, 0.020507201552391052, 0.03915366902947426, 0.4243565797805786, 0.11376935243606567, 0.31140708923339844, 0.051479678601026535, 0.07416504621505737, 0.2654426097869873, 0.3960915207862854, 0.5790604948997498, 0.18063338100910187, 0.1939544379711151, 0.04191381484270096, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.1321558654308319, 0.24967153370380402, 0.0761917233467102, 0.044561922550201416, 0.12028387933969498, 0.19908402860164642, 0.04708404839038849, 0.10076720267534256, 0.09921064227819443, 0.18345412611961365, 0.09404058009386063, 0.21650025248527527, 0.11625839024782181, 0.1530369222164154, 0.12011245638132095, 0.027515297755599022, 0.0486784465610981, 0.06845460832118988, 0.023408811539411545, 0.008863206952810287, 0.008533195592463017, 0.24178741872310638, 0.01229054294526577, 0.25817692279815674, 0.6869812607765198, 0.049950506538152695, 0.12178820371627808, 0.0564231351017952, 0.02026011236011982, 0.004908477421849966, 0.03562311828136444, 0.12746450304985046, 0.0016219470417127013, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.10757170617580414, 0.1042957603931427, 0.13590699434280396, 0.06331591308116913, 0.24158470332622528, 0.09161848574876785, 0.0633605495095253, 0.13977625966072083, 0.03925082087516785, 0.07121878862380981, 0.1023484393954277, 0.26378345489501953, 0.10990181565284729, 0.12030858546495438, 0.1261080652475357, 0.11620164662599564, 0.09937138110399246, 0.17538107931613922, 0.40406307578086853, 0.043817292898893356, 0.05759625509381294, 0.49306368827819824, 0.09120260924100876, 0.36450278759002686, 0.08042807132005692, 0.1856311559677124, 0.1376025527715683, 0.1998283714056015, 0.3654005527496338, 0.15910619497299194, 0.4969707429409027, 0.08565060794353485, 0.02514367550611496, 0.090617336332798, NaN, NaN, NaN, NaN, NaN, NaN], [0.06512168049812317, 0.13837532699108124, 0.3250073194503784, 0.16753129661083221, 0.21647527813911438, 0.04118574038147926, 0.03336784988641739, 0.029927842319011688, 0.03334499150514603, 0.08782976865768433, 0.17631417512893677, 0.3171449303627014, 0.10520178824663162, 0.15139654278755188, 0.0914224162697792, 0.0739481970667839, 0.5182103514671326, 0.19721719622612, 0.21118015050888062, 0.015751224011182785, 0.12249443680047989, 0.5174803733825684, 0.17075838148593903, 0.30025264620780945, 0.29246312379837036, 0.0875946432352066, 0.2326347827911377, 0.13986286520957947, 0.511695921421051, 0.12602318823337555, 0.03662485629320145, 0.1263200044631958, 0.0166145209223032, 0.19702456891536713, 0.09621746093034744, NaN, NaN, NaN, NaN, NaN], [0.06382797658443451, 0.2566763758659363, 0.11056842654943466, 0.028001734986901283, 0.2813059389591217, 0.24806144833564758, 0.07807287573814392, 0.05373501405119896, 0.21183612942695618, 0.09658068418502808, 0.05084875971078873, 0.501965343952179, 0.06208595260977745, 0.10913741588592529, 0.26912179589271545, 0.3052336871623993, 0.37224864959716797, 0.45515015721321106, 0.04986808821558952, 0.05332064628601074, 0.13846120238304138, 0.15990367531776428, 0.20659208297729492, 0.06640873104333878, 0.035323526710271835, 0.30340465903282166, 0.10174556821584702, 0.02102985605597496, 0.11508277803659439, 0.09203195571899414, 0.0029288395307958126, 0.023838462308049202, 0.004605103749781847, 0.052648112177848816, 0.006431906949728727, 0.026736242696642876, NaN, NaN, NaN, NaN], [0.08548272401094437, 0.017544403672218323, 0.011271107010543346, 0.022962557151913643, 0.05241750180721283, 0.02648325450718403, 0.3057800531387329, 0.19772306084632874, 0.025625178590416908, 0.03652432560920715, 0.006945622619241476, 0.05576859414577484, 0.00584550853818655, 0.008180957287549973, 0.12917736172676086, 0.047024402767419815, 0.1257133185863495, 0.052377521991729736, 0.009844984859228134, 0.015597687102854252, 0.06965665519237518, 0.01849394477903843, 0.1603521853685379, 0.02587857097387314, 0.00957732368260622, 0.023523790761828423, 0.020081259310245514, 0.008425970561802387, 0.10955916345119476, 0.35300737619400024, 0.023505402728915215, 0.00786643661558628, 0.007557017263025045, 0.013908758759498596, 0.004675114993005991, 0.035296451300382614, 0.3261549174785614, NaN, NaN, NaN], [0.03209112584590912, 0.1926622986793518, 0.09989916533231735, 0.02044818177819252, 0.04127199947834015, 0.22930434346199036, 0.09912838786840439, 0.3779822289943695, 0.007566491607576609, 0.046152934432029724, 0.04734500125050545, 0.35250937938690186, 0.10047939419746399, 0.16575956344604492, 0.13635975122451782, 0.11014947295188904, 0.08461853116750717, 0.02981843426823616, 0.004099451471120119, 0.009237504564225674, 0.011130756698548794, 0.132149338722229, 0.11619938164949417, 0.22203940153121948, 0.02292616292834282, 0.06793706119060516, 0.07227552682161331, 0.3262397348880768, 0.40601006150245667, 0.08270477503538132, 0.013506797142326832, 0.03135772421956062, 0.07034049183130264, 0.09623772650957108, 0.20842698216438293, 0.2752794623374939, 0.1234828308224678, 0.04129752516746521, NaN, NaN], [0.05301084369421005, 0.1661737710237503, 0.08216799795627594, 0.025789698585867882, 0.07900767773389816, 0.3054123520851135, 0.08738221228122711, 0.17720931768417358, 0.06289011240005493, 0.06967967748641968, 0.05491774156689644, 0.02886299602687359, 0.10253670811653137, 0.09415244311094284, 0.129754438996315, 0.1182219609618187, 0.7384620308876038, 0.11492461711168289, 0.09884578734636307, 0.012010940350592136, 0.038200050592422485, 0.4905328154563904, 0.23439669609069824, 0.2528713345527649, 0.015177865512669086, 0.07817362248897552, 0.33532261848449707, 0.4971323609352112, 0.7384514212608337, 0.2383432686328888, 0.2306600660085678, 0.025716517120599747, 0.023198120296001434, 0.3352215886116028, 0.4797173738479614, 0.5688640475273132, 0.2555003762245178, 0.1890360713005066, 0.06237812712788582, NaN], [0.1895110011100769, 0.09308972954750061, 0.1887637972831726, 0.14927715063095093, 0.3653167188167572, 0.1686658412218094, 0.1126369759440422, 0.17013703286647797, 0.0685301423072815, 0.15278968214988708, 0.19327588379383087, 0.18825437128543854, 0.143904447555542, 0.143670454621315, 0.1203024610877037, 0.13153354823589325, 0.5476850867271423, 0.27465543150901794, 0.27658137679100037, 0.5121651291847229, 0.3939417600631714, 0.2527337968349457, 0.41937416791915894, 0.2437492311000824, 0.1485103964805603, 0.10651403665542603, 0.241710364818573, 0.34289923310279846, 0.3691290616989136, 0.108230821788311, 0.32214298844337463, 0.08876177668571472, 0.03369928151369095, 0.23942533135414124, 0.302080899477005, 0.3531237244606018, 0.09724070131778717, 0.19267186522483826, 0.06874143332242966, 0.052875734865665436]], [[0.5917359590530396, 0.12410512566566467, 0.24872945249080658, 0.20040015876293182, 0.21720361709594727, 0.11561702191829681, 0.58521568775177, 0.41413450241088867, 0.22558750212192535, 0.117314413189888, 0.3378458619117737, 0.10710897296667099, 0.0625920221209526, 0.24034489691257477, 0.0060951621271669865, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.03933318331837654, 0.17479471862316132, 0.1999012678861618, 0.1507989913225174, 0.2344110906124115, 0.41628938913345337, 0.19733835756778717, 0.42009472846984863, 0.32125937938690186, 0.09302358329296112, 0.29758843779563904, 0.2500022351741791, 0.15192696452140808, 0.19621950387954712, 0.06078135594725609, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.03998054191470146, 0.02165106125175953, 0.5779209733009338, 0.4094802737236023, 0.3219829499721527, 0.23359909653663635, 0.15223096311092377, 0.0776560828089714, 0.11850404739379883, 0.1752316802740097, 0.7765606641769409, 0.15624035894870758, 0.19448350369930267, 0.3389243483543396, 0.015656093135476112, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.2606712579727173, 0.23122362792491913, 0.33188652992248535, 0.327752023935318, 0.0930425301194191, 0.13157396018505096, 0.5079332590103149, 0.15524731576442719, 0.2039693295955658, 0.336448073387146, 0.7406277656555176, 0.11173539608716965, 0.03980698063969612, 0.2757716476917267, 0.009055807255208492, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.03992704302072525, 0.03562299162149429, 0.05761631205677986, 0.04593607783317566, 0.747100830078125, 0.13848423957824707, 0.25807130336761475, 0.11098858714103699, 0.025020861998200417, 0.027831630781292915, 0.07712040096521378, 0.5344594120979309, 0.28488224744796753, 0.37143638730049133, 0.060307834297418594, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.146702840924263, 0.5779150128364563, 0.04704871401190758, 0.12512727081775665, 0.05839477851986885, 0.5817644596099854, 0.2541782557964325, 0.167904794216156, 0.020014837384223938, 0.0557471327483654, 0.1778557300567627, 0.29983726143836975, 0.34978994727134705, 0.3759990334510803, 0.07532685250043869, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.14372284710407257, 0.20398879051208496, 0.060162752866744995, 0.022449441254138947, 0.15882903337478638, 0.12907396256923676, 0.7781419157981873, 0.20689332485198975, 0.023098474368453026, 0.02567201852798462, 0.04225016012787819, 0.05647281929850578, 0.5644452571868896, 0.8062969446182251, 0.0037398021668195724, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.09274263679981232, 0.19406189024448395, 0.18035270273685455, 0.18292436003684998, 0.2674761116504669, 0.1057504341006279, 0.5214765071868896, 0.1765710562467575, 0.15375129878520966, 0.08563723415136337, 0.35003283619880676, 0.12250327318906784, 0.4574505388736725, 0.6043637990951538, 0.046846963465213776, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.3136129081249237, 0.10648278146982193, 0.02492944709956646, 0.07937752455472946, 0.16382691264152527, 0.40212482213974, 0.2148500233888626, 0.5046796798706055, 0.25625455379486084, 0.10382789373397827, 0.027611082419753075, 0.07138189673423767, 0.1265101283788681, 0.05298655480146408, 0.01642199046909809, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.7252353429794312, 0.23862500488758087, 0.17466871440410614, 0.2584758698940277, 0.15821219980716705, 0.41019105911254883, 0.4795793294906616, 0.2558479905128479, 0.061036378145217896, 0.5831483006477356, 0.23237691819667816, 0.36767491698265076, 0.07294586300849915, 0.0734395682811737, 0.006080146878957748, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.18402060866355896, 0.2199273407459259, 0.10670217871665955, 0.36498934030532837, 0.37264159321784973, 0.5975290536880493, 0.641157865524292, 0.4798426032066345, 0.07047704607248306, 0.30389490723609924, 0.6835307478904724, 0.29959914088249207, 0.32009243965148926, 0.2076108753681183, 0.015385132282972336, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.18547095358371735, 0.1046445369720459, 0.17664410173892975, 0.031107882037758827, 0.4872691333293915, 0.6876094937324524, 0.29805243015289307, 0.2697339355945587, 0.03289056569337845, 0.04577193781733513, 0.2390383929014206, 0.650258481502533, 0.6253164410591125, 0.2719551920890808, 0.042574722319841385, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.06026101112365723, 0.4596063494682312, 0.11362233757972717, 0.050736263394355774, 0.47900232672691345, 0.8146356344223022, 0.23428170382976532, 0.5258204936981201, 0.07407079637050629, 0.24087238311767578, 0.04631686583161354, 0.04097185283899307, 0.24002470076084137, 0.051092784851789474, 0.10185284167528152, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.05915316566824913, 0.3385859429836273, 0.23845957219600677, 0.13520635664463043, 0.49372056126594543, 0.8321547508239746, 0.47351959347724915, 0.4942004382610321, 0.11661165207624435, 0.273796945810318, 0.09639480710029602, 0.07113680988550186, 0.3545372784137726, 0.3069557547569275, 0.026768943294882774, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.6326229572296143, 0.28129494190216064, 0.2424720972776413, 0.23961131274700165, 0.1532977670431137, 0.03248026221990585, 0.07237446308135986, 0.03991716355085373, 0.058106135576963425, 0.6791825294494629, 0.4868316352367401, 0.4841252863407135, 0.1838759332895279, 0.16229771077632904, 0.03779346123337746, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.20045556128025055, 0.06346653401851654, 0.1246497705578804, 0.132145956158638, 0.18068760633468628, 0.0611145943403244, 0.3011611998081207, 0.09648064523935318, 0.3848741054534912, 0.20776434242725372, 0.09024091809988022, 0.10095226764678955, 0.05726093426346779, 0.17784324288368225, 0.06983170658349991, 0.125, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.06639314442873001, 0.03837187588214874, 0.306266725063324, 0.09758531302213669, 0.10875808447599411, 0.20901371538639069, 0.0894559919834137, 0.21620051562786102, 0.13805773854255676, 0.07912127673625946, 0.3521624505519867, 0.036526914685964584, 0.1551785171031952, 0.14622288942337036, 0.19236178696155548, 0.03290099650621414, 0.3365767002105713, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.03379146009683609, 0.11666905134916306, 0.02791847102344036, 0.04754703491926193, 0.02039634808897972, 0.23185299336910248, 0.07985613495111465, 0.3240954875946045, 0.04561735317111015, 0.061520081013441086, 0.18156962096691132, 0.10860903561115265, 0.3409081995487213, 0.3218340575695038, 0.13103368878364563, 0.003547579748556018, 0.004082763101905584, 0.4616691768169403, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.06278766691684723, 0.001863734913058579, 0.30563783645629883, 0.056017640978097916, 0.245498925447464, 0.11060530692338943, 0.09064232558012009, 0.004372697789222002, 0.007118886336684227, 0.06251134723424911, 0.17941752076148987, 0.004394095856696367, 0.11450538039207458, 0.046043287962675095, 0.021101655438542366, 0.03595791012048721, 0.1313885897397995, 0.007101066876202822, 0.42131781578063965, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.11553236097097397, 0.0885467380285263, 0.2750205993652344, 0.21104735136032104, 0.3459762930870056, 0.07976578176021576, 0.218110129237175, 0.05760955810546875, 0.09680842608213425, 0.2662138342857361, 0.21090076863765717, 0.41520535945892334, 0.21548694372177124, 0.2248467653989792, 0.10481394827365875, 0.007601147051900625, 0.014137630350887775, 0.01938864029943943, 0.2572920322418213, 0.0011994435917586088, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.03112325258553028, 0.08175794035196304, 0.035110849887132645, 0.038375336676836014, 0.2468937784433365, 0.060934457927942276, 0.0843387246131897, 0.03423367813229561, 0.02026834897696972, 0.07970783859491348, 0.08959806710481644, 0.1693299561738968, 0.16057033836841583, 0.21660663187503815, 0.13329552114009857, 0.00011468974116723984, 0.0032473355531692505, 0.00037737423554062843, 0.2793608605861664, 0.003465541172772646, 5.061212868895382e-05, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.09539461880922318, 0.058681365102529526, 0.01674766093492508, 0.02866855263710022, 0.012030106969177723, 0.21465063095092773, 0.034089475870132446, 0.04479566961526871, 0.014019637368619442, 0.035355255007743835, 0.1569557934999466, 0.01038492750376463, 0.06631091982126236, 0.1547483503818512, 0.19284123182296753, 0.21311266720294952, 0.10434294492006302, 0.011484598740935326, 0.0013334749964997172, 0.03845251351594925, 0.028238367289304733, 0.05654546618461609, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.04954487085342407, 0.07065968960523605, 0.07275094836950302, 0.040997497737407684, 0.07946129143238068, 0.17300859093666077, 0.03222974017262459, 0.02469809167087078, 0.18557047843933105, 0.13542628288269043, 0.26776814460754395, 0.056715987622737885, 0.15973475575447083, 0.19029632210731506, 0.17610958218574524, 0.052184704691171646, 0.499632865190506, 0.005138374865055084, 0.10169705748558044, 0.09997230768203735, 0.036990027874708176, 0.07566682249307632, 0.32418423891067505, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.047577280551195145, 0.02606579288840294, 0.0165295097976923, 0.04137043654918671, 0.013305035419762135, 0.32835593819618225, 0.026565413922071457, 0.06772360950708389, 0.010228256694972515, 0.041277337819337845, 0.1336892545223236, 0.008326719515025616, 0.10322394222021103, 0.1976388841867447, 0.21077491343021393, 0.23645982146263123, 0.016864946112036705, 0.013305210508406162, 0.0007752762176096439, 0.017555342987179756, 0.03100133314728737, 0.04085567593574524, 0.029846351593732834, 0.010373883880674839, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.043893925845623016, 0.021177353337407112, 0.028366681188344955, 0.07016126066446304, 0.07573862373828888, 0.22699910402297974, 0.055615294724702835, 0.07980518788099289, 0.009269739501178265, 0.09460800141096115, 0.16427507996559143, 0.20832805335521698, 0.1427353024482727, 0.2680304944515228, 0.13907650113105774, 0.18805328011512756, 0.046367619186639786, 0.10314629226922989, 0.018223291262984276, 0.27720585465431213, 0.3798944056034088, 0.09291481226682663, 0.09293034672737122, 0.04290880635380745, 0.03370373696088791, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.03411688283085823, 0.056632235646247864, 0.07365043461322784, 0.10934542864561081, 0.09185239672660828, 0.5077250003814697, 0.05141168087720871, 0.047258101403713226, 0.053326722234487534, 0.13365329802036285, 0.28296661376953125, 0.041020717471838, 0.08861301094293594, 0.13371184468269348, 0.11519401520490646, 0.028641005977988243, 0.03295213729143143, 0.0065453751012682915, 0.16686026751995087, 0.028714975342154503, 0.015397193841636181, 0.02003423683345318, 0.019093815237283707, 0.020523719489574432, 0.016172079369425774, 0.3490104377269745, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.04096442833542824, 0.07374820858240128, 0.07300861179828644, 0.10121195018291473, 0.051522452384233475, 0.3508135676383972, 0.03948133811354637, 0.047985587269067764, 0.06340529769659042, 0.06765846908092499, 0.281475692987442, 0.05536516010761261, 0.1822110116481781, 0.22272904217243195, 0.13150985538959503, 0.10839971899986267, 0.004465002100914717, 0.016082070767879486, 0.035488102585077286, 0.015600458718836308, 0.012030484154820442, 0.015872180461883545, 0.01552913524210453, 0.03533920273184776, 0.11401902139186859, 0.31523072719573975, 0.20448055863380432, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.07982534170150757, 0.06016559898853302, 0.03820561617612839, 0.02410227432847023, 0.006901262793689966, 0.42442968487739563, 0.02364957146346569, 0.07835549116134644, 0.027230771258473396, 0.12123586237430573, 0.15446297824382782, 0.018115278333425522, 0.21087171137332916, 0.29417684674263, 0.08362340182065964, 0.18776558339595795, 0.0060520414263010025, 0.017473671585321426, 0.005528539884835482, 0.0027145782951265574, 0.012176988646388054, 0.0031525399535894394, 0.004637573380023241, 0.011988476850092411, 0.06979440897703171, 0.38327983021736145, 0.020156072452664375, 0.010166948661208153, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.05696694925427437, 0.014171368442475796, 0.06200120970606804, 0.021368764340877533, 0.012162269093096256, 0.0841592326760292, 0.03827953711152077, 0.07895056158304214, 0.01159723848104477, 0.05937046930193901, 0.023348387330770493, 0.008824712596833706, 0.13521961867809296, 0.23698511719703674, 0.03196632117033005, 0.3064975440502167, 0.004262991715222597, 0.009997943416237831, 0.00034317225799895823, 0.013912403024733067, 0.02852706052362919, 0.004078225698322058, 0.001928618410602212, 0.006367305759340525, 0.035507142543792725, 0.050674788653850555, 0.007057875394821167, 0.0049485149793326855, 0.0049379738047719, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.11678174138069153, 0.8205142617225647, 0.01038320455700159, 0.023903295397758484, 0.21764065325260162, 0.2580764889717102, 0.20165181159973145, 0.2900886535644531, 0.03504627197980881, 0.10256802290678024, 0.03713424876332283, 0.7063723206520081, 0.8779962062835693, 0.8367014527320862, 0.0919082760810852, 0.14988604187965393, 0.015584584325551987, 0.137997567653656, 0.0031439096201211214, 0.5546696782112122, 0.01658078096807003, 0.0025873971171677113, 0.0010246702004224062, 0.019667595624923706, 0.012580120004713535, 0.015491531230509281, 0.029023459181189537, 0.021588340401649475, 0.25595030188560486, 0.02325037308037281, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.038494985550642014, 0.05109047889709473, 0.07501792907714844, 0.04001014679670334, 0.021166233345866203, 0.03079657442867756, 0.01494709774851799, 0.010983827523887157, 0.0029027159325778484, 0.0995086133480072, 0.350593626499176, 0.02021479234099388, 0.34575650095939636, 0.21952421963214874, 0.05450797453522682, 0.07357528805732727, 0.007756352424621582, 0.002724927617236972, 0.001402079127728939, 0.0004431438574101776, 0.00010925461538136005, 0.0029409730341285467, 0.005563507787883282, 0.012139370664954185, 0.03890732303261757, 0.05558362230658531, 0.03318313509225845, 0.4270496368408203, 0.07112571597099304, 0.15036046504974365, 0.020786603912711143, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.028108511120080948, 0.08174566179513931, 0.03328564018011093, 0.03230520337820053, 0.012646276503801346, 0.1872790902853012, 0.025206655263900757, 0.06737280637025833, 0.033121660351753235, 0.08641302585601807, 0.2848047614097595, 0.059273794293403625, 0.18425194919109344, 0.15244826674461365, 0.1352420449256897, 0.012120572850108147, 0.0003307444858364761, 0.009640182368457317, 0.00017808230768423527, 0.0021490382496267557, 0.0008148089982569218, 0.0008481521508656442, 0.0019973982125520706, 0.005024890415370464, 0.01719486527144909, 0.044799502938985825, 0.006444229744374752, 0.018026985228061676, 0.0067391968332231045, 0.061299871653318405, 0.01281613577157259, 0.3084925711154938, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.07509021461009979, 0.05027765780687332, 0.23718997836112976, 0.11438266932964325, 0.11051909625530243, 0.431958943605423, 0.046987809240818024, 0.021854011341929436, 0.15366314351558685, 0.1928708851337433, 0.2900879681110382, 0.052021902054548264, 0.11538787186145782, 0.25173547863960266, 0.10233873873949051, 0.011204708367586136, 0.0033799665980041027, 0.008117830380797386, 0.1567971557378769, 0.012545537203550339, 0.002854604972526431, 0.0037395430263131857, 0.0003391341888345778, 0.002928558737039566, 0.004266565665602684, 0.28180748224258423, 0.005543314386159182, 0.0059068226255476475, 0.004401014186441898, 0.09436267614364624, 0.003524675266817212, 0.09697568416595459, 0.3818984925746918, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.03257948160171509, 0.08023553341627121, 0.06238585337996483, 0.06856023520231247, 0.02927098423242569, 0.2968010902404785, 0.03317389637231827, 0.04758336395025253, 0.07943073660135269, 0.053982626646757126, 0.21416282653808594, 0.05025764927268028, 0.14347779750823975, 0.19969123601913452, 0.13921964168548584, 0.1085091158747673, 0.0013132937019690871, 0.011304548010230064, 0.014309195801615715, 0.009265521541237831, 0.00682368129491806, 0.01179590355604887, 0.005223054438829422, 0.01697726733982563, 0.05782441794872284, 0.2522926330566406, 0.16053971648216248, 0.020927468314766884, 0.02051178365945816, 0.1114674061536789, 0.014847181737422943, 0.40623563528060913, 0.12017090618610382, 0.2281613051891327, NaN, NaN, NaN, NaN, NaN, NaN], [0.07817428559064865, 0.11046875268220901, 0.040724072605371475, 0.024797527119517326, 0.004808576311916113, 0.5141928791999817, 0.024754824116826057, 0.080713652074337, 0.03179122135043144, 0.12244449555873871, 0.22665926814079285, 0.013305582106113434, 0.23485711216926575, 0.323343425989151, 0.10171245783567429, 0.23926517367362976, 0.007461922243237495, 0.015478387475013733, 0.02120528556406498, 0.0046339076943695545, 0.01287792343646288, 0.005305645987391472, 0.0037130024284124374, 0.011430526152253151, 0.10132863372564316, 0.42019084095954895, 0.03134358674287796, 0.006659360136836767, 0.0015345009742304683, 0.05340040102601051, 0.0021821516565978527, 0.15366847813129425, 0.09343723207712173, 0.04055917635560036, 0.009410854429006577, NaN, NaN, NaN, NaN, NaN], [0.03765244409441948, 0.0463164821267128, 0.06456112116575241, 0.05319739878177643, 0.010156691074371338, 0.1155625581741333, 0.02458079345524311, 0.07648347318172455, 0.019683409482240677, 0.06488858163356781, 0.09342794120311737, 0.059032924473285675, 0.15581923723220825, 0.2894386053085327, 0.04157077521085739, 0.3882482349872589, 0.012203006073832512, 0.008404962718486786, 0.0008633172838017344, 0.07213836163282394, 0.03903299570083618, 0.006879106629639864, 0.0025245456490665674, 0.011604986153542995, 0.1302306056022644, 0.05970751494169235, 0.005057368893176317, 0.0025832061655819416, 0.003548768814653158, 0.03821956738829613, 0.0041786422953009605, 0.029319334775209427, 0.009258194826543331, 0.010013489983975887, 0.0024901984725147486, 0.009316755458712578, NaN, NaN, NaN, NaN], [0.14924734830856323, 0.8862696886062622, 0.013125438243150711, 0.033269379287958145, 0.22599543631076813, 0.33975404500961304, 0.25561264157295227, 0.36481109261512756, 0.05327271297574043, 0.09902165085077286, 0.03598061203956604, 0.754990816116333, 0.9104278087615967, 0.8631682395935059, 0.10125402361154556, 0.08333727717399597, 0.009125825949013233, 0.12352871894836426, 0.0034849271178245544, 0.49194949865341187, 0.008760062977671623, 0.002427457133308053, 0.0004761714953929186, 0.014378424733877182, 0.007653949782252312, 0.010163314640522003, 0.018072640523314476, 0.014914281666278839, 0.33540958166122437, 0.012212751433253288, 0.050671979784965515, 0.08942927420139313, 0.0058481828309595585, 0.02088618278503418, 0.013520943000912666, 0.3026564419269562, 0.011637967079877853, NaN, NaN, NaN], [0.03672042489051819, 0.12888115644454956, 0.1578092873096466, 0.056865133345127106, 0.03288109228014946, 0.1379515379667282, 0.021150214597582817, 0.013284055516123772, 0.003249341854825616, 0.08646353334188461, 0.5471532940864563, 0.0361909456551075, 0.5093809366226196, 0.39931434392929077, 0.07520455867052078, 0.019913960248231888, 0.003490668721497059, 0.00020567848696373403, 0.00036819992237724364, 0.00019341551524121314, 3.8652269722661003e-05, 0.0008544524316675961, 0.002890991745516658, 0.001110991695895791, 0.005157719366252422, 0.008338885381817818, 0.0030357406940311193, 0.14557099342346191, 0.021602485328912735, 0.04367346689105034, 0.0015647107502445579, 0.009655454196035862, 0.14827704429626465, 0.008163533173501492, 0.49237948656082153, 0.06938102096319199, 0.08394628763198853, 0.049248531460762024, NaN, NaN], [0.03492635861039162, 0.09938696771860123, 0.028945090249180794, 0.03084651380777359, 0.012707062065601349, 0.15071596205234528, 0.029011720791459084, 0.05455483868718147, 0.03256314992904663, 0.07100401073694229, 0.2587825059890747, 0.05546442046761513, 0.17298617959022522, 0.15517692267894745, 0.13362783193588257, 0.010580360889434814, 0.00023049254377838224, 0.00745873898267746, 0.00016025979130063206, 0.002226235345005989, 0.0004258991975802928, 0.000578688399400562, 0.0014760587364435196, 0.002039685845375061, 0.0048048608005046844, 0.019996320828795433, 0.0029125709552317858, 0.006709430366754532, 0.0017099445685744286, 0.02097223326563835, 0.0024284888058900833, 0.10361000150442123, 0.022238893434405327, 0.009704988449811935, 0.017071064561605453, 0.011506098322570324, 0.0406200997531414, 0.0063119689002633095, 0.36112311482429504, NaN], [0.050736088305711746, 0.10139954090118408, 0.08949553966522217, 0.0938185378909111, 0.06053004041314125, 0.18139560520648956, 0.0767659917473793, 0.11340610682964325, 0.19499026238918304, 0.11419404298067093, 0.23666803538799286, 0.05730360746383667, 0.07293370366096497, 0.11558260023593903, 0.12613430619239807, 0.07011571526527405, 0.029766615480184555, 0.05616272985935211, 0.02569880336523056, 0.02553572878241539, 0.010698755271732807, 0.02022577077150345, 0.01824677176773548, 0.03918607532978058, 0.034657131880521774, 0.11515442281961441, 0.05569382756948471, 0.035370998084545135, 0.047812946140766144, 0.1140216588973999, 0.018943075090646744, 0.09709078818559647, 0.08172454684972763, 0.04602199047803879, 0.02941049635410309, 0.031383853405714035, 0.10708537697792053, 0.012693268246948719, 0.07050468772649765, 0.25427982211112976]], [[0.04456469416618347, 0.016716457903385162, 0.08688971400260925, 0.23432573676109314, 0.12769784033298492, 0.0498066172003746, 0.10501405596733093, 0.14398211240768433, 0.3055479824542999, 0.0823235884308815, 0.23467087745666504, 0.6305257678031921, 0.08790664374828339, 0.14063040912151337, 0.13028757274150848, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.04107241332530975, 0.03620494529604912, 0.07322828471660614, 0.1027759537100792, 0.08743055909872055, 0.016458408907055855, 0.09779228270053864, 0.014780157245695591, 0.09821301698684692, 0.025402111932635307, 0.0808086097240448, 0.08257035166025162, 0.07231960445642471, 0.0895148441195488, 0.19708459079265594, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.1263897716999054, 0.01533158216625452, 0.08717449009418488, 0.22571881115436554, 0.06928549706935883, 0.16778334975242615, 0.06136450543999672, 0.07180161774158478, 0.2525678873062134, 0.32249853014945984, 0.08566119521856308, 0.48726531863212585, 0.2929263114929199, 0.21127133071422577, 0.12448348850011826, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.1481804996728897, 0.04817945510149002, 0.03058626689016819, 0.13171793520450592, 0.10783855617046356, 0.24912205338478088, 0.1342363804578781, 0.28650397062301636, 0.25943103432655334, 0.2756144404411316, 0.08422903716564178, 0.7444766163825989, 0.7611673474311829, 0.5739472508430481, 0.11213001608848572, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.1744699776172638, 0.050404343754053116, 0.018338145688176155, 0.11463086307048798, 0.02370826154947281, 0.09417468309402466, 0.04503462836146355, 0.0389062762260437, 0.1780962496995926, 0.7825090885162354, 0.15977078676223755, 0.2598268687725067, 0.05674973130226135, 0.2742767333984375, 0.15589554607868195, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.26428407430648804, 0.0871720165014267, 0.015494171530008316, 0.31054598093032837, 0.31179672479629517, 0.05687993764877319, 0.05327969416975975, 0.14049863815307617, 0.03721972927451134, 0.33735793828964233, 0.06669215857982635, 0.44665512442588806, 0.1105320155620575, 0.07633788883686066, 0.13637836277484894, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.27871736884117126, 0.07987862080335617, 0.06999076902866364, 0.3873903453350067, 0.3669894337654114, 0.0245819091796875, 0.02483827993273735, 0.08571609854698181, 0.04856930300593376, 0.2826782464981079, 0.10519464313983917, 0.8515737056732178, 0.24991582334041595, 0.08752243965864182, 0.1076057106256485, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.18780259788036346, 0.02093103528022766, 0.1730981320142746, 0.27918383479118347, 0.32355740666389465, 0.05090703070163727, 0.030107326805591583, 0.015694553032517433, 0.08293543756008148, 0.11989035457372665, 0.1594303995370865, 0.6402391195297241, 0.08334839344024658, 0.13423335552215576, 0.16886292397975922, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.23048973083496094, 0.05534357205033302, 0.15910016000270844, 0.5473513603210449, 0.11114095151424408, 0.060548413544893265, 0.23547381162643433, 0.0231330469250679, 0.22654443979263306, 0.16574865579605103, 0.03383632004261017, 0.05167527496814728, 0.026772163808345795, 0.028301218524575233, 0.08144620060920715, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.126570925116539, 0.0055835917592048645, 0.7687394022941589, 0.6136845350265503, 0.7887718677520752, 0.24027548730373383, 0.25543272495269775, 0.017155619338154793, 0.01121050026267767, 0.02180907502770424, 0.06387564539909363, 0.04227403923869133, 0.004662328865379095, 0.0204116590321064, 0.16526305675506592, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.3619309663772583, 0.022692076861858368, 0.8739812970161438, 0.5600091814994812, 0.4330839216709137, 0.27864721417427063, 0.1654776781797409, 0.02327956072986126, 0.003977042157202959, 0.0664801374077797, 0.12084753066301346, 0.16815124452114105, 0.07773539423942566, 0.17824198305606842, 0.05263833701610565, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.29354482889175415, 0.16078433394432068, 0.705570638179779, 0.44417092204093933, 0.02176845259964466, 0.15997210144996643, 0.4057019054889679, 0.11617531627416611, 0.010741903446614742, 0.06882698833942413, 0.07046788930892944, 0.041601523756980896, 0.011864392086863518, 0.06714706867933273, 0.14988133311271667, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.5400083065032959, 0.2319646179676056, 0.6198285818099976, 0.2858767509460449, 0.1694929450750351, 0.06001640111207962, 0.26940232515335083, 0.06411167979240417, 0.02847147174179554, 0.18856319785118103, 0.05879069119691849, 0.03795049339532852, 0.009596540592610836, 0.023393897339701653, 0.14663995802402496, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.6488012075424194, 0.15997910499572754, 0.6486002802848816, 0.4859846830368042, 0.34752336144447327, 0.028076842427253723, 0.12281371653079987, 0.019826101139187813, 0.023531395941972733, 0.15743687748908997, 0.059922393411397934, 0.08707788586616516, 0.005486410576850176, 0.025385212153196335, 0.15706156194210052, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.037294961512088776, 0.2018004208803177, 0.33537882566452026, 0.19571122527122498, 0.0998593419790268, 0.48263466358184814, 0.11429780721664429, 0.20324908196926117, 0.7053001523017883, 0.01905757561326027, 0.1765546351671219, 0.10779165476560593, 0.18456625938415527, 0.16855330765247345, 0.014784654602408409, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.1489560306072235, 0.2212677150964737, 0.055408962070941925, 0.03110104240477085, 0.02513720653951168, 0.07830048352479935, 0.05067736655473709, 0.06611648201942444, 0.02238955721259117, 0.03719142824411392, 0.025896798819303513, 0.04350690543651581, 0.11618120968341827, 0.08714473247528076, 0.15466241538524628, 0.125, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.002932992298156023, 0.307859867811203, 0.008187332190573215, 0.003677746979519725, 0.0005738585605286062, 0.0008406178676523268, 0.0005446207360364497, 0.00039283244404941797, 0.0009221792570315301, 0.000758469570428133, 0.003933709114789963, 0.0009352274937555194, 0.001059120986610651, 0.0020118390675634146, 0.010183396749198437, 0.1627129465341568, 0.03836298733949661, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.37297555804252625, 0.09208715707063675, 0.16802547872066498, 0.11860792338848114, 0.08042033761739731, 0.18612971901893616, 0.45423436164855957, 0.07133221626281738, 0.13892753422260284, 0.3810507357120514, 0.291797935962677, 0.16154640913009644, 0.050885219126939774, 0.10468144714832306, 0.10335776954889297, 0.23664157092571259, 0.02332315407693386, 0.0017523575806990266, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.028274476528167725, 0.018124615773558617, 0.13954800367355347, 0.03560209274291992, 0.08428613841533661, 0.17491763830184937, 0.13035845756530762, 0.0214189775288105, 0.009060325101017952, 0.012400318868458271, 0.031279344111680984, 0.011209131218492985, 0.19533281028270721, 0.012452301569283009, 0.020085560157895088, 0.14284735918045044, 0.19342879951000214, 0.5212197303771973, 0.028613613918423653, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.11180772632360458, 0.012462746351957321, 0.04844700172543526, 0.06198285147547722, 0.06685204058885574, 0.44600817561149597, 0.30352795124053955, 0.1519387811422348, 0.003835479263216257, 0.08384031802415848, 0.027865614742040634, 0.159846231341362, 0.46423590183258057, 0.09249147027730942, 0.09178084880113602, 0.022152410820126534, 0.06252314150333405, 0.005122532602399588, 0.24202540516853333, 0.0027534610126167536, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.04840230569243431, 0.026793736964464188, 0.1120820939540863, 0.09037120640277863, 0.2328549474477768, 0.1063276007771492, 0.14073747396469116, 0.19612964987754822, 0.1904316544532776, 0.10354755818843842, 0.10268037766218185, 0.13820117712020874, 0.3374333083629608, 0.15443934500217438, 0.12536528706550598, 0.04657726734876633, 0.23517371714115143, 0.03296450525522232, 0.2014523595571518, 0.06359406560659409, 0.0884864553809166, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.36786824464797974, 0.056283749639987946, 0.03846094757318497, 0.07181648164987564, 0.03666122257709503, 0.04024837538599968, 0.5659748911857605, 0.2338860183954239, 0.11518415063619614, 0.3659259080886841, 0.04107162728905678, 0.012827688828110695, 0.0609581284224987, 0.02837788313627243, 0.060403015464544296, 0.05186963453888893, 0.02286554127931595, 0.21517929434776306, 0.12055587023496628, 0.1711670458316803, 0.27492430806159973, 0.27398592233657837, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.0033490851055830717, 0.001678164815530181, 0.02563566155731678, 0.028815647587180138, 0.007257265504449606, 0.04370535537600517, 0.026118090376257896, 0.435838907957077, 0.005564961116760969, 0.014266176149249077, 0.018343305215239525, 0.0009297388605773449, 0.03809681162238121, 0.020595146343111992, 0.03566184639930725, 0.020278872922062874, 0.02308776043355465, 0.022820638492703438, 0.18259893357753754, 0.3133871257305145, 0.08183155953884125, 0.35655686259269714, 0.17295894026756287, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.34718528389930725, 0.028826624155044556, 0.05378839746117592, 0.0680842474102974, 0.0254778191447258, 0.1994519978761673, 0.7739751935005188, 0.28213825821876526, 0.24756361544132233, 0.3363908529281616, 0.08445209264755249, 0.0067241075448691845, 0.09118638187646866, 0.04656682163476944, 0.0331079363822937, 0.057175230234861374, 0.2799927890300751, 0.10977934300899506, 0.4680712819099426, 0.08838099986314774, 0.05264464393258095, 0.21108192205429077, 0.08241217583417892, 0.0764400064945221, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.06212884560227394, 0.013463910669088364, 0.024143628776073456, 0.025745615363121033, 0.12165382504463196, 0.04105379059910774, 0.21918880939483643, 0.12444313615560532, 0.7241542935371399, 0.2624671459197998, 0.05330171436071396, 0.026902005076408386, 0.04947282373905182, 0.06268218904733658, 0.04105047509074211, 0.17679302394390106, 0.30970489978790283, 0.042192552238702774, 0.2463400512933731, 0.032756272703409195, 0.05394153669476509, 0.02321716584265232, 0.30038926005363464, 0.023974716663360596, 0.0257905051112175, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.23139908909797668, 0.12510670721530914, 0.062008026987314224, 0.06357982009649277, 0.21447335183620453, 0.06672460585832596, 0.5059712529182434, 0.23151132464408875, 0.3211345672607422, 0.29274967312812805, 0.07394816726446152, 0.12323616445064545, 0.33240705728530884, 0.13292434811592102, 0.0974365845322609, 0.1864403486251831, 0.03811780363321304, 0.18074536323547363, 0.08396673202514648, 0.026499373838305473, 0.05736878141760826, 0.274480402469635, 0.10284627228975296, 0.15606749057769775, 0.017497936263680458, 0.09719526022672653, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.3976813554763794, 0.24336650967597961, 0.030069073662161827, 0.04866141080856323, 0.061815883964300156, 0.023062149062752724, 0.2837987542152405, 0.10572359710931778, 0.42220908403396606, 0.47088485956192017, 0.06114182993769646, 0.05295940861105919, 0.04274435341358185, 0.033208493143320084, 0.07069624215364456, 0.1767420768737793, 0.017465414479374886, 0.034512054175138474, 0.0999627411365509, 0.011741198599338531, 0.022724410519003868, 0.04408577084541321, 0.03894393891096115, 0.018038587644696236, 0.058924250304698944, 0.2522818148136139, 0.12782295048236847, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.6213744282722473, 0.08501708507537842, 0.08457361906766891, 0.0819045826792717, 0.02008524350821972, 0.02321169711649418, 0.5481746196746826, 0.17061969637870789, 0.19314314424991608, 0.48946020007133484, 0.08799289166927338, 0.009451461024582386, 0.1643926501274109, 0.03458939492702484, 0.0487554594874382, 0.042104240506887436, 0.022070694714784622, 0.04743226245045662, 0.13338083028793335, 0.020831480622291565, 0.031267598271369934, 0.024703562259674072, 0.041907425969839096, 0.006121364887803793, 0.02875565178692341, 0.13002096116542816, 0.36194902658462524, 0.021867850795388222, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.11498570442199707, 0.014700047671794891, 0.04425002261996269, 0.027370423078536987, 0.031341005116701126, 0.11119254678487778, 0.2834031581878662, 0.24822625517845154, 0.387948602437973, 0.17188440263271332, 0.026020031422376633, 0.003112945705652237, 0.1680845320224762, 0.013143973425030708, 0.05647796019911766, 0.12623563408851624, 0.6370776891708374, 0.07802888005971909, 0.06076015904545784, 0.015353387221693993, 0.0031011439859867096, 0.031844403594732285, 0.5665289163589478, 0.013176449574530125, 0.025442441925406456, 0.05083877220749855, 0.08586791157722473, 0.03281332179903984, 0.0019294946687296033, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.00710845272988081, 0.009718026034533978, 0.08296849578619003, 0.05356726795434952, 0.20372402667999268, 0.20898059010505676, 0.07373131066560745, 0.07588774710893631, 0.33318811655044556, 0.09730548411607742, 0.031877510249614716, 0.04629351943731308, 0.026428943499922752, 0.05165233090519905, 0.12934288382530212, 0.010483458638191223, 0.10243765264749527, 0.013204336166381836, 0.1070198118686676, 0.001742976950481534, 0.0011925535509362817, 0.03764529153704643, 0.023008054122328758, 0.09038762003183365, 0.1208486333489418, 0.06097627431154251, 0.11476689577102661, 0.17706690728664398, 0.4447736442089081, 0.005561552010476589, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.092291921377182, 0.13057716190814972, 0.11971572786569595, 0.09643372148275375, 0.0971774011850357, 0.03882397338747978, 0.30341219902038574, 0.06688009947538376, 0.5493715405464172, 0.21897412836551666, 0.10454282909631729, 0.09917838126420975, 0.19730664789676666, 0.0889393612742424, 0.0462181456387043, 0.03962688520550728, 0.412600040435791, 0.1027907133102417, 0.011060677468776703, 0.04006139934062958, 0.005457504652440548, 0.17391063272953033, 0.009697728790342808, 0.08243320137262344, 0.1504840850830078, 0.029468167573213577, 0.29366523027420044, 0.04788699373602867, 0.17640100419521332, 0.04229334741830826, 0.3300667107105255, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.3365032970905304, 0.06134270504117012, 0.11965256929397583, 0.08703643828630447, 0.08615697175264359, 0.01610170491039753, 0.289604127407074, 0.16905160248279572, 0.690265953540802, 0.5125291347503662, 0.11020015180110931, 0.05034353584051132, 0.04973014071583748, 0.04155145213007927, 0.06180096045136452, 0.20544184744358063, 0.06503231078386307, 0.21778742969036102, 0.04011436551809311, 0.2470238208770752, 0.03102266602218151, 0.027881061658263206, 0.06887322664260864, 0.023802783340215683, 0.2166331559419632, 0.06618232280015945, 0.058350641280412674, 0.04297764599323273, 0.06574989855289459, 0.02652076631784439, 0.08339553326368332, 0.09817715734243393, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.25151577591896057, 0.0737723708152771, 0.11452356725931168, 0.07270905375480652, 0.27380475401878357, 0.046423640102148056, 0.6668940782546997, 0.60158771276474, 0.286392480134964, 0.2904633581638336, 0.07359147071838379, 0.040276750922203064, 0.2706137001514435, 0.15532110631465912, 0.051646988838911057, 0.09466058760881424, 0.0047309016808867455, 0.1481417566537857, 0.06127317249774933, 0.015202163718640804, 0.011932089924812317, 0.31230586767196655, 0.04852164536714554, 0.039501819759607315, 0.001117925625294447, 0.06312739849090576, 0.023924386128783226, 0.02860989049077034, 0.007241260260343552, 0.11453913897275925, 0.012237192131578922, 0.2803768217563629, 0.0480632521212101, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.4344438314437866, 0.2159019559621811, 0.0411386713385582, 0.059745997190475464, 0.08364511281251907, 0.02960371784865856, 0.3908357322216034, 0.17347759008407593, 0.4736940562725067, 0.5831181406974792, 0.08143209666013718, 0.05496616289019585, 0.0508774034678936, 0.03704635798931122, 0.07529113441705704, 0.02001449465751648, 0.0017837424529716372, 0.005722085013985634, 0.04321253299713135, 0.00430489843711257, 0.009005578234791756, 0.010736249387264252, 0.0058517144061625, 0.003792154835537076, 0.008828205987811089, 0.0838593989610672, 0.029530486091971397, 0.015579215250909328, 0.010320665314793587, 0.016853220760822296, 0.017335176467895508, 0.12552303075790405, 0.42354699969291687, 0.08326870948076248, NaN, NaN, NaN, NaN, NaN, NaN], [0.6010525822639465, 0.07716702669858932, 0.12942874431610107, 0.11651009321212769, 0.029510293155908585, 0.025635747238993645, 0.564699649810791, 0.20346374809741974, 0.1942133754491806, 0.5329980254173279, 0.09726559370756149, 0.006782675161957741, 0.1884276419878006, 0.02957840822637081, 0.046941183507442474, 0.001771818962879479, 0.000807587115559727, 0.0031146325636655092, 0.023062998428940773, 0.0018312688916921616, 0.007724495604634285, 0.002569216303527355, 0.003803644794970751, 0.00041838324978016317, 0.001987496856600046, 0.012477965094149113, 0.04809670150279999, 0.0016458284808322787, 0.00020838514319621027, 0.005814890842884779, 0.018183711916208267, 0.30546146631240845, 0.4703490138053894, 0.15369661152362823, 0.012250960804522038, NaN, NaN, NaN, NaN, NaN], [0.07098641246557236, 0.02088714949786663, 0.0536419078707695, 0.04874833673238754, 0.1357380896806717, 0.10192368179559708, 0.22615019977092743, 0.3848302960395813, 0.3569928705692291, 0.19976821541786194, 0.030237246304750443, 0.012232640758156776, 0.14491091668605804, 0.01217038556933403, 0.025625383481383324, 0.02520398050546646, 0.2818087637424469, 0.007948609068989754, 0.07590723037719727, 0.01867567002773285, 0.006826441269367933, 0.011762343347072601, 0.5987983345985413, 0.0045673479326069355, 0.01173742488026619, 0.03130093589425087, 0.03894692659378052, 0.016236862167716026, 0.0014989122282713652, 0.0009245824767276645, 0.025562506169080734, 0.5276230573654175, 0.32699310779571533, 0.1864093542098999, 0.0933799296617508, 0.0060149896889925, NaN, NaN, NaN, NaN], [0.007031308952718973, 0.007269172929227352, 0.08423776179552078, 0.053896792232990265, 0.21268267929553986, 0.2456619292497635, 0.0817742720246315, 0.07338020205497742, 0.2872445285320282, 0.08955906331539154, 0.02503780461847782, 0.043076977133750916, 0.024157537147402763, 0.05127491056919098, 0.1281031221151352, 0.0011320068733766675, 0.011502433568239212, 0.0017513524508103728, 0.020418671891093254, 0.0003008104977197945, 0.00031320590642280877, 0.0053228470496833324, 0.0022876623552292585, 0.011736828833818436, 0.017109515145421028, 0.010937619023025036, 0.015238909050822258, 0.025703608989715576, 0.10705357789993286, 0.0009204442030750215, 0.02667400799691677, 0.16934601962566376, 0.08647502958774567, 0.028284918516874313, 0.06841914355754852, 0.39870724081993103, 0.0010592876933515072, NaN, NaN, NaN], [0.06564409285783768, 0.10634885728359222, 0.14713656902313232, 0.07514703273773193, 0.3204736113548279, 0.07143916934728622, 0.4829144775867462, 0.2612879276275635, 0.7603816986083984, 0.17889906466007233, 0.07189968973398209, 0.10938191413879395, 0.2776612341403961, 0.08681799471378326, 0.052979547530412674, 0.02631283551454544, 0.29101136326789856, 0.042160265147686005, 0.009721376933157444, 0.02933679334819317, 0.014515053480863571, 0.18161341547966003, 0.016545770689845085, 0.03647695854306221, 0.0840071588754654, 0.02240183763206005, 0.1055113896727562, 0.037331126630306244, 0.17535105347633362, 0.010923052206635475, 0.2594170868396759, 0.5064816474914551, 0.06657205522060394, 0.130835622549057, 0.0483754500746727, 0.2870587110519409, 0.010685333050787449, 0.21122200787067413, NaN, NaN], [0.28806957602500916, 0.05887402966618538, 0.12616868317127228, 0.10481040924787521, 0.19247829914093018, 0.033351678401231766, 0.39873749017715454, 0.22540906071662903, 0.7029480338096619, 0.5013188719749451, 0.10523373633623123, 0.08320688456296921, 0.0816955640912056, 0.04881281033158302, 0.09282685816287994, 0.21289733052253723, 0.10400458425283432, 0.2843308448791504, 0.11722961068153381, 0.31265783309936523, 0.07705509662628174, 0.050357937812805176, 0.1631784737110138, 0.04547655209898949, 0.37539371848106384, 0.07925810664892197, 0.07719646394252777, 0.043498191982507706, 0.04735783487558365, 0.022911155596375465, 0.20965908467769623, 0.2452480047941208, 0.05793433263897896, 0.07357832789421082, 0.03363368287682533, 0.041085004806518555, 0.014093895442783833, 0.05045074224472046, 0.0570731945335865, NaN], [0.2559513747692108, 0.07615252584218979, 0.11904845386743546, 0.07934627681970596, 0.09980516135692596, 0.14371442794799805, 0.3059750497341156, 0.09035829454660416, 0.22693291306495667, 0.32864776253700256, 0.08986205607652664, 0.1614997386932373, 0.17624114453792572, 0.16325940191745758, 0.119119793176651, 0.02115148864686489, 0.018139760941267014, 0.03536282852292061, 0.06259438395500183, 0.00901759136468172, 0.014575985260307789, 0.12521256506443024, 0.12870429456233978, 0.09162478893995285, 0.06363746523857117, 0.1348179280757904, 0.07700010389089584, 0.05158444121479988, 0.01101324986666441, 0.03299920633435249, 0.163722425699234, 0.13794326782226562, 0.18303781747817993, 0.117555633187294, 0.08103907853364944, 0.012191864661872387, 0.032527241855859756, 0.16104964911937714, 0.12187117338180542, 0.22321484982967377]]]], \"bot_text\": [\"The_\", \"animal_\", \"didn_\", \"'_\", \"t_\", \"cross_\", \"the_\", \"street_\", \"because_\", \"it_\", \"was_\", \"too_\", \"tire\", \"d_\", \"Das_\", \"Tier\", \"_\", \"\\u00fcber\", \"quer\", \"te_\", \"die_\", \"Stra\\u00dfe_\", \"nicht_\", \", _\", \"weil_\", \"es_\", \"zu_\", \"m\\u00fc\", \"de_\", \"war_\", \", _\", \"weil_\", \"es_\", \"zu_\", \"m\\u00fc\", \"de_\", \"war_\", \"._\"]}, \"out_out\": {\"top_text\": [\"Das_\", \"Tier\", \"_\", \"\\u00fcber\", \"quer\", \"te_\", \"die_\", \"Stra\\u00dfe_\", \"nicht_\", \", _\", \"weil_\", \"es_\", \"zu_\", \"m\\u00fc\", \"de_\", \"war_\", \", _\", \"weil_\", \"es_\", \"zu_\", \"m\\u00fc\", \"de_\", \"war_\", \"._\"], \"att\": [[[[0.125, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.33067038655281067, 0.02820705994963646, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.43891066312789917, 0.3106566071510315, 0.006947982590645552, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.8740342259407043, 0.6547167897224426, 0.0062981778755784035, 0.46666401624679565, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.009682492353022099, 0.17458303272724152, 0.7120969891548157, 0.10496775060892105, 0.0038010317366570234, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.31054121255874634, 0.41146165132522583, 0.4573209881782532, 0.639615535736084, 0.038498248904943466, 0.06232544779777527, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.2996446192264557, 0.18095439672470093, 0.8072441220283508, 0.6008384227752686, 0.045412980020046234, 0.09029265493154526, 0.15878555178642273, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.07671086490154266, 0.13175785541534424, 0.032809216529130936, 0.06887537240982056, 0.32570284605026245, 0.22846734523773193, 0.06983717530965805, 0.07415641844272614, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.4443431496620178, 0.2924090623855591, 0.09237049520015717, 0.07077033072710037, 0.05661908909678459, 0.1886560618877411, 0.5792031288146973, 0.23326165974140167, 0.024399278685450554, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.0045473226346075535, 0.015263181179761887, 0.11153102666139603, 0.01091472152620554, 0.07137833535671234, 0.14599360525608063, 0.24649137258529663, 0.2676219940185547, 0.14942915737628937, 0.03359955921769142, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.0021246292162686586, 0.019146723672747612, 0.0190261360257864, 0.004887872841209173, 0.032842181622982025, 0.009469296783208847, 0.015122202225029469, 0.056959331035614014, 0.014146327041089535, 0.2864534854888916, 0.028167642652988434, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.007321672048419714, 0.06949152052402496, 0.18409577012062073, 0.05168240889906883, 0.5332358479499817, 0.12983477115631104, 0.020923368632793427, 0.015086837112903595, 0.05491120368242264, 0.38865622878074646, 0.036598365753889084, 0.02645716816186905, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.004608431365340948, 0.07759333401918411, 0.05611182749271393, 0.031112710013985634, 0.06043193116784096, 0.023203425109386444, 0.01299421489238739, 0.011212858371436596, 0.2615091800689697, 0.5089370608329773, 0.22289350628852844, 0.10276756435632706, 0.03959360718727112, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.012221934273838997, 0.040381401777267456, 0.0694599524140358, 0.0800129845738411, 0.023234205320477486, 0.003881127340719104, 0.03062801994383335, 0.024260450154542923, 0.012832778505980968, 0.01656900905072689, 0.2333584874868393, 0.3572527766227722, 0.0072386497631669044, 0.014752739109098911, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.09144259989261627, 0.1256924569606781, 0.6557105779647827, 0.1641494482755661, 0.04417502135038376, 0.42902442812919617, 0.377028226852417, 0.1956152766942978, 0.27481555938720703, 0.37677863240242004, 0.4323487877845764, 0.6219720244407654, 0.3997260332107544, 0.1145903542637825, 0.041462015360593796, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.5997433662414551, 0.1045081838965416, 0.10960735380649567, 0.047688476741313934, 0.31575047969818115, 0.1532202959060669, 0.4197675585746765, 0.16546213626861572, 0.31973955035209656, 0.23332525789737701, 0.15541672706604004, 0.05988143011927605, 0.5733460187911987, 0.8565582036972046, 0.009604076854884624, 0.030047349631786346, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.02339007519185543, 0.01581897959113121, 0.02374129369854927, 0.02252129279077053, 0.08995510637760162, 0.0626068115234375, 0.27313846349716187, 0.036778680980205536, 0.22608895599842072, 0.06801939755678177, 0.035735905170440674, 0.022851483896374702, 0.06078701093792915, 0.42404335737228394, 0.41984546184539795, 0.08353053033351898, 0.058427464216947556, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.034203190356492996, 0.23458202183246613, 0.15632590651512146, 0.02520577609539032, 0.26413342356681824, 0.06292548030614853, 0.06378099322319031, 0.08676797896623611, 0.02988903410732746, 0.3430734872817993, 0.007843950763344765, 0.03405369073152542, 0.01887335814535618, 0.39618176221847534, 0.2528276741504669, 0.10531513392925262, 0.12583006918430328, 0.09389571845531464, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.009769688360393047, 0.056299567222595215, 0.11172951757907867, 0.02802591770887375, 0.3647110164165497, 0.09813904017210007, 0.016619421541690826, 0.006417513824999332, 0.016537560150027275, 0.15495160222053528, 0.023067951202392578, 0.011397394351661205, 0.029141509905457497, 0.0527399443089962, 0.2784731984138489, 0.059669919312000275, 0.5969582796096802, 0.09549567103385925, 0.03235183656215668, NaN, NaN, NaN, NaN, NaN, NaN], [0.00987912341952324, 0.12349259853363037, 0.037169262766838074, 0.01944275200366974, 0.06324917078018188, 0.02598830871284008, 0.020618943497538567, 0.009103300981223583, 0.1360517293214798, 0.09789924323558807, 0.06809242814779282, 0.12332575768232346, 0.034675393253564835, 0.16954950988292694, 0.010956126265227795, 0.11111389100551605, 0.1871008574962616, 0.2434563934803009, 0.10274684429168701, 0.0379486046731472, NaN, NaN, NaN, NaN, NaN], [0.010987702757120132, 0.03791751340031624, 0.03792046010494232, 0.0400051474571228, 0.008841714821755886, 0.002161285374313593, 0.031619150191545486, 0.01907121017575264, 0.0057282340712845325, 0.002385619329288602, 0.03308374434709549, 0.11032091826200485, 0.0044158026576042175, 0.05701944977045059, 0.0651637390255928, 0.027267253026366234, 0.3151875138282776, 0.17881636321544647, 0.3164456784725189, 0.005250148009508848, 0.011875288560986519, NaN, NaN, NaN, NaN], [0.08034691959619522, 0.1792650669813156, 0.6813479661941528, 0.11697664856910706, 0.022037051618099213, 0.4362119436264038, 0.3332834541797638, 0.16648675501346588, 0.3133866786956787, 0.21180157363414764, 0.22306133806705475, 0.5634312033653259, 0.2539531886577606, 0.28583550453186035, 0.0421890914440155, 0.24185270071029663, 0.9185315370559692, 0.5444227457046509, 0.7130873799324036, 0.36675870418548584, 0.1082441657781601, 0.02894955314695835, NaN, NaN, NaN], [0.3316553831100464, 0.07297243922948837, 0.18084223568439484, 0.0543624572455883, 0.141310915350914, 0.15985439717769623, 0.22593949735164642, 0.09976530820131302, 0.2670679986476898, 0.12590403854846954, 0.10189743340015411, 0.06066418066620827, 0.14688965678215027, 0.6279550790786743, 0.004891595803201199, 0.013660040684044361, 0.19539086520671844, 0.13336770236492157, 0.11226529628038406, 0.4554508626461029, 0.7914823293685913, 0.007615156006067991, 0.015521766617894173, NaN, NaN], [0.010082974098622799, 0.009416572749614716, 0.026376336812973022, 0.021534079685807228, 0.041008636355400085, 0.028814975172281265, 0.09862472116947174, 0.019531887024641037, 0.1915404349565506, 0.055525705218315125, 0.03489372506737709, 0.035597167909145355, 0.017297467216849327, 0.13875839114189148, 0.18795406818389893, 0.13025526702404022, 0.03705297037959099, 0.016517892479896545, 0.028779756277799606, 0.02632485330104828, 0.36631691455841064, 0.4771501123905182, 0.10461407899856567, 0.07566797733306885, NaN], [0.00671275844797492, 0.019956005737185478, 0.15321078896522522, 0.00987993273884058, 0.1430601179599762, 0.02432059310376644, 0.007838046178221703, 0.016839532181620598, 0.017622128129005432, 0.03075602278113365, 0.01907699555158615, 0.30206096172332764, 0.010013632476329803, 0.06018203869462013, 0.19546428322792053, 0.020215312018990517, 0.04091925173997879, 0.022548291832208633, 0.26572445034980774, 0.010653333738446236, 0.1212434321641922, 0.3668496906757355, 0.1586136817932129, 0.14579400420188904, 0.04911552369594574]], [[0.125, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.00017037145153153688, 0.1837475299835205, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [4.619961600837996e-06, 0.00011092388740507886, 0.19595862925052643, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [7.402049959637225e-07, 0.0014410031726583838, 0.15330694615840912, 0.0009438465931452811, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [6.564930572494632e-07, 1.2471617083065212e-05, 0.0012651559663936496, 1.2094314115529414e-05, 0.2683168947696686, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [3.960849710438197e-07, 2.835777740983758e-05, 0.0015905762556940317, 5.72201497561764e-05, 0.20671997964382172, 0.03618929535150528, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [3.613545777625404e-05, 4.069158967467956e-05, 0.0019799659494310617, 4.598083614837378e-05, 0.28016433119773865, 0.1021510660648346, 0.0019787675701081753, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.03414154052734375, 0.018152736127376556, 0.002861178945749998, 0.0031036457512527704, 0.2743661403656006, 0.08905426412820816, 0.058365415781736374, 0.2834230065345764, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.0001288916973862797, 0.0019113116431981325, 0.0011359998025000095, 2.5460678443778306e-05, 0.0018093753606081009, 0.008086470887064934, 0.005666371434926987, 0.0014489549212157726, 0.27176737785339355, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.0013363973703235388, 0.015213730745017529, 0.019847076386213303, 0.0016770424554124475, 0.6085457801818848, 0.051846977323293686, 0.06904839724302292, 0.023163089528679848, 0.0024616841692477465, 0.4075135886669159, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [1.5705205441918224e-05, 0.00011942459968850017, 3.308789018774405e-05, 0.00047703171730972826, 1.5581523257424124e-05, 3.566192026482895e-05, 0.000621139828581363, 0.002513762330636382, 0.0013953398447483778, 0.001656065694987774, 0.6708395481109619, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.0009777048835530877, 0.006719581317156553, 0.017090875655412674, 0.007835427299141884, 0.0003081739123445004, 0.0027951891534030437, 0.0031432590913027525, 0.011542102321982384, 0.01903962530195713, 0.032312098890542984, 0.23448777198791504, 0.18604722619056702, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.0010771078523248434, 0.00013067253166809678, 0.0004810431564692408, 0.0005832655006088316, 0.27172601222991943, 0.023587899282574654, 0.0011203349567949772, 0.0001570776366861537, 3.2636336982250214e-05, 0.008125105872750282, 0.3860749900341034, 0.011222672648727894, 0.4488545358181, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.0018897228874266148, 0.00010004806244978681, 0.040837980806827545, 0.0009045379119925201, 0.4036760926246643, 0.033945482224226, 0.0009020724683068693, 2.477952148183249e-05, 0.0006147518288344145, 2.3498352675233036e-05, 0.0003015661786776036, 0.00019162058015353978, 0.0013656887458637357, 0.9207848906517029, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [3.0049262932152487e-05, 0.00032340767211280763, 0.0004620190302375704, 1.456133759347722e-05, 0.4214256703853607, 0.00038119935197755694, 2.2086916942498647e-05, 5.437946310848929e-05, 0.0005922063137404621, 0.0002251591213280335, 4.171442924416624e-05, 0.0011568808695301414, 6.667344860034063e-05, 0.004539569839835167, 0.07099039107561111, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.0001142411565524526, 0.001007341779768467, 0.5582761764526367, 0.0006983705679886043, 0.04208780825138092, 0.07311324775218964, 0.011010478250682354, 0.00018356108921580017, 0.11227726191282272, 1.5535662896581925e-05, 7.865564111853018e-05, 8.497068483848125e-05, 0.007107958197593689, 0.04726947844028473, 0.03816111385822296, 0.7400538921356201, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [9.270196460420266e-05, 0.00014002913667354733, 0.006266205105930567, 8.287983655463904e-05, 0.029540851712226868, 0.019505193457007408, 0.0002005908900173381, 0.0002361711667617783, 0.002089217072352767, 0.0007247799658216536, 0.0003387654141988605, 3.3522373996675014e-05, 0.00015295531193260103, 0.005682599265128374, 0.01914886385202408, 0.006167547311633825, 0.6065680980682373, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.017243418842554092, 0.0717378556728363, 0.015470567159354687, 0.14577892422676086, 0.003815611358731985, 0.01656431145966053, 0.21609994769096375, 0.24452562630176544, 0.07360902428627014, 0.020440302789211273, 0.9522358775138855, 0.0012982342159375548, 0.00034142163349315524, 4.905217429040931e-05, 0.0002677988959476352, 0.0020047405268996954, 0.013444142416119576, 0.5238149166107178, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.006589227356016636, 0.025933612138032913, 0.05151839554309845, 0.019538801163434982, 0.000567624403629452, 0.011064885184168816, 0.018599001690745354, 0.0389220230281353, 0.03263486549258232, 0.03920944407582283, 0.309482604265213, 0.18455958366394043, 0.0028949796687811613, 0.0009189100819639862, 0.01304793544113636, 0.01903691701591015, 0.0013186958385631442, 0.1459255963563919, 0.2617945969104767, NaN, NaN, NaN, NaN, NaN, NaN], [0.000940846570301801, 6.996696902206168e-05, 0.0001185448418254964, 0.00013115631008986384, 0.04620806872844696, 0.009408986195921898, 0.0010798430303111672, 0.00010642426059348509, 1.4586596989829559e-05, 0.0008147742482833564, 0.049950405955314636, 0.0020658469293266535, 0.020368386059999466, 0.0015965981874614954, 0.0005227082292549312, 8.089001494226977e-05, 0.42970454692840576, 0.3893451988697052, 0.006195466499775648, 0.2630486488342285, NaN, NaN, NaN, NaN, NaN], [0.0015646422980353236, 5.644361226586625e-05, 0.015588155947625637, 0.0004337269929237664, 0.061090677976608276, 0.015012362040579319, 0.0009935805574059486, 3.2441483199363574e-05, 0.0006383971776813269, 7.901599929027725e-06, 0.00011085882579209283, 2.031324947893154e-05, 0.0001886440732050687, 0.1558367908000946, 2.918860081990715e-05, 0.00031420652521774173, 3.769064642256126e-05, 0.000311522075207904, 8.488001913065091e-05, 0.001447036280296743, 0.9016569256782532, NaN, NaN, NaN, NaN], [6.329882307909429e-05, 0.0007932570297271013, 0.0008974742377176881, 3.545067738741636e-05, 0.41645264625549316, 0.0012166639789938927, 5.162824527360499e-05, 0.00016062096983660012, 0.0028807471971958876, 0.0007734368555247784, 0.0001738688733894378, 0.0017386887921020389, 8.449772576568648e-05, 0.008313576690852642, 0.04833607003092766, 5.605717160506174e-05, 0.000497612461913377, 0.00019103533122688532, 0.0018799308454617858, 0.000193181011127308, 0.010939341969788074, 0.11687301844358444, NaN, NaN, NaN], [2.7039888664148748e-05, 0.0002653435221873224, 0.3520841896533966, 0.0011641159653663635, 0.017258664593100548, 0.13898366689682007, 0.004804374184459448, 0.0001136215214501135, 0.10132589936256409, 1.9021857951884158e-05, 0.00018713112513069063, 5.577637057285756e-05, 0.0021825090516358614, 0.016621561720967293, 0.003813497256487608, 0.05257569998502731, 7.136658678064123e-05, 0.00013083907833788544, 8.304342918563634e-05, 0.009517401456832886, 0.07102376222610474, 0.0242641419172287, 0.791592538356781, NaN, NaN], [1.8426982933306135e-05, 6.735812348779291e-05, 0.005383457988500595, 0.0002568464260548353, 0.03709089383482933, 0.05173188075423241, 0.00015440442075487226, 0.00026214553508907557, 0.0031172526068985462, 0.0018413036596029997, 0.001364374067634344, 0.0001026472236844711, 0.00015940713637974113, 0.00464483629912138, 0.007250420283526182, 0.006640422623604536, 0.10042263567447662, 0.00037284562131389976, 5.502302519744262e-05, 0.00017516437219455838, 0.013823487795889378, 0.028728578239679337, 0.014491567388176918, 0.5602642297744751, NaN], [1.3810687960358337e-05, 0.0002572945086285472, 0.008041280321776867, 0.00040080497274175286, 0.00010326507617719471, 0.0013340600999072194, 0.00019016038277186453, 0.00019489554688334465, 0.0007417663000524044, 0.0012533330591395497, 0.0032668926287442446, 0.001072657760232687, 5.286548912408762e-05, 4.225512952871213e-07, 1.0035311788669787e-05, 2.1279807697283104e-05, 0.0006032216479070485, 0.00048016011714935303, 0.00037273563793860376, 3.447151175350882e-05, 9.715819260236458e-07, 2.8930742701049894e-05, 0.0003854547976516187, 0.005018792115151882, 0.4505775570869446]], [[0.125, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [4.347301455709385e-06, 0.18382565677165985, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.0001576173526700586, 0.00605444610118866, 0.19315025210380554, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.0015271879965439439, 0.2696094512939453, 0.0976908802986145, 0.19172586500644684, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.018620789051055908, 0.1513659805059433, 0.1261996626853943, 0.04123798385262489, 0.18324223160743713, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [7.739824650343508e-05, 0.0007302183075807989, 0.0020413347519934177, 0.0010007238015532494, 0.20195050537586212, 0.04546361416578293, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.0007431988487951458, 0.330532044172287, 0.08558935672044754, 0.06556878238916397, 0.10690004378557205, 0.1145712360739708, 0.06475446373224258, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.015635214745998383, 0.050190601497888565, 0.02352251298725605, 0.24284599721431732, 0.06325101107358932, 0.02171560376882553, 0.015677697956562042, 0.4775830805301666, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.03602181747555733, 0.2262161672115326, 0.11374488472938538, 0.22297167778015137, 0.018925879150629044, 0.2400040328502655, 0.13629396259784698, 0.14897051453590393, 0.11721047759056091, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.001669732853770256, 0.0008830919396132231, 0.007873992435634136, 0.004793200176209211, 0.032567575573921204, 0.019068563356995583, 0.01167156733572483, 0.006520072463899851, 0.001765590044669807, 0.479371041059494, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.04264334216713905, 0.01628556102514267, 0.012549073435366154, 0.1270730197429657, 0.09553729742765427, 0.12904676795005798, 0.28088441491127014, 0.08353402465581894, 0.19219043850898743, 0.1467161476612091, 0.04815742373466492, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.006975929252803326, 0.05510300025343895, 0.007132354192435741, 0.0349782258272171, 0.02191060781478882, 0.018211986869573593, 0.026551326736807823, 0.03648876026272774, 0.06464254856109619, 0.049987878650426865, 0.05908217281103134, 0.5448521375656128, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.000807860866189003, 0.00374230626039207, 0.004482839722186327, 0.005506760906428099, 0.000447272410383448, 0.003816538956016302, 0.03234753757715225, 0.014306235127151012, 0.01718331128358841, 0.04840204864740372, 0.06595310568809509, 0.18900929391384125, 0.0723472312092781, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.00447529973462224, 0.019966747611761093, 0.03737834841012955, 0.3797287940979004, 0.010614297352731228, 0.05463654175400734, 0.32780376076698303, 0.0739898681640625, 0.25606051087379456, 0.8621841073036194, 0.2645638585090637, 0.25103500485420227, 0.016027942299842834, 0.004609693773090839, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.0010164460400119424, 0.011448963545262814, 0.03378765657544136, 0.02785181999206543, 0.056788451969623566, 0.07099426537752151, 0.008927138522267342, 0.01755385287106037, 0.039185769855976105, 0.09313513338565826, 0.027632856741547585, 0.12282836437225342, 0.017955774441361427, 0.02453978732228279, 0.267269104719162, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.09903331845998764, 0.854941725730896, 0.020280463621020317, 0.8786925673484802, 0.37992238998413086, 0.20425425469875336, 0.32038459181785583, 0.8171603083610535, 0.2503354549407959, 0.7644308805465698, 0.7474347949028015, 0.935006856918335, 0.36836859583854675, 0.03383934497833252, 0.0021248040720820427, 0.21007098257541656, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.09584157168865204, 0.00421579135581851, 0.0017077650409191847, 0.0670090913772583, 0.10943465679883957, 0.05715145170688629, 0.03694647178053856, 0.04514404758810997, 0.04956913739442825, 0.07195062190294266, 0.4566742479801178, 0.20942343771457672, 0.1548582911491394, 0.3906869888305664, 0.03925589844584465, 0.005858495831489563, 0.23115697503089905, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.10393274575471878, 0.03258725255727768, 0.01998279243707657, 0.13928532600402832, 0.08602269738912582, 0.139993816614151, 0.2561682462692261, 0.08122693002223969, 0.28790318965911865, 0.34215468168258667, 0.023110536858439445, 0.8003224730491638, 0.11519370973110199, 0.5406965613365173, 0.2252652645111084, 0.07071924954652786, 0.03988110274076462, 0.09249765425920486, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.006400381214916706, 0.03668399527668953, 0.006957556586712599, 0.024804070591926575, 0.013962345197796822, 0.010118995793163776, 0.014814852736890316, 0.02360437996685505, 0.038752347230911255, 0.10996780544519424, 0.24877001345157623, 0.7050904035568237, 0.103914275765419, 0.0656881257891655, 0.03925013542175293, 0.0268316138535738, 0.009403076022863388, 0.042995911091566086, 0.38370969891548157, NaN, NaN, NaN, NaN, NaN, NaN], [0.0005728903925046325, 0.0018518416909500957, 0.003297911025583744, 0.002339646453037858, 0.0003125199000351131, 0.0013706001918762922, 0.011640608310699463, 0.005699110683053732, 0.00646078959107399, 0.029403753578662872, 0.09435103088617325, 0.4532504379749298, 0.1454003006219864, 0.08155784755945206, 0.1478416919708252, 0.06988534331321716, 0.07031917572021484, 0.08092489838600159, 0.16178953647613525, 0.09959835559129715, NaN, NaN, NaN, NaN, NaN], [0.007587960455566645, 0.01947515644133091, 0.06775914877653122, 0.37032291293144226, 0.014833947643637657, 0.04509717598557472, 0.2979332506656647, 0.08052700757980347, 0.2017516791820526, 0.8817963004112244, 0.3514429032802582, 0.3636293411254883, 0.14158478379249573, 0.09958238899707794, 0.13573585450649261, 0.27771836519241333, 0.47418463230133057, 0.36210212111473083, 0.2140081375837326, 0.022566867992281914, 0.004614678677171469, NaN, NaN, NaN, NaN], [0.0009141381597146392, 0.00906511303037405, 0.026196878403425217, 0.011460180394351482, 0.03924085199832916, 0.05833837762475014, 0.004696658346801996, 0.009781464003026485, 0.029306253418326378, 0.06398104876279831, 0.017127037048339844, 0.0922316163778305, 0.03436172753572464, 0.12105685472488403, 0.475220263004303, 0.20121201872825623, 0.0066191148944199085, 0.018271028995513916, 0.05732923001050949, 0.018915977329015732, 0.019877590239048004, 0.23682713508605957, NaN, NaN, NaN], [0.14320576190948486, 0.892350971698761, 0.030759859830141068, 0.8051734566688538, 0.7149769067764282, 0.4937312602996826, 0.3181091248989105, 0.8743517994880676, 0.3442763686180115, 0.8711729049682617, 0.7545801997184753, 0.9297782182693481, 0.6998263001441956, 0.17287810146808624, 0.008261360228061676, 0.9148194789886475, 0.7390273213386536, 0.743715763092041, 0.8801547288894653, 0.47275617718696594, 0.02699747122824192, 0.002916275057941675, 0.1803632229566574, NaN, NaN], [0.0431031733751297, 0.0034584910608828068, 0.0008681766339577734, 0.032780423760414124, 0.11873625963926315, 0.03893061354756355, 0.019801655784249306, 0.03132590278983116, 0.05763043835759163, 0.06388700753450394, 0.3317660689353943, 0.16543246805667877, 0.10311393439769745, 0.4146954417228699, 0.09686555713415146, 0.06189668923616409, 0.5733434557914734, 0.2515217959880829, 0.17396190762519836, 0.13145960867404938, 0.40639445185661316, 0.07709264755249023, 0.007335619535297155, 0.2446187138557434, NaN], [0.046706411987543106, 0.31744489073753357, 0.6429179310798645, 0.4889025092124939, 0.43930482864379883, 0.3055577576160431, 0.6935683488845825, 0.25992196798324585, 0.7758384346961975, 0.2076689600944519, 0.8320663571357727, 0.39907822012901306, 0.8469056487083435, 0.5997118353843689, 0.31635957956314087, 0.36650604009628296, 0.2247273474931717, 0.7608639597892761, 0.37947097420692444, 0.8680096864700317, 0.5816919803619385, 0.19056683778762817, 0.27210569381713867, 0.06685535609722137, 0.040061503648757935]], [[0.125, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.17503570020198822, 0.10145211219787598, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.002467370592057705, 0.014373218640685081, 0.18901397287845612, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [4.782021278515458e-05, 0.0002036100922850892, 0.15351639688014984, 0.001678619533777237, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.015930648893117905, 0.006582066882401705, 0.10560829937458038, 0.3465193808078766, 0.012144939973950386, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.010950141586363316, 0.003185260808095336, 0.03380253165960312, 0.13516294956207275, 0.16374172270298004, 0.0833682045340538, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [4.016391176264733e-05, 0.0003202538937330246, 0.0050767818465828896, 1.7212016246048734e-05, 0.5176156759262085, 0.003749872324988246, 0.00026106167933903635, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.13457109034061432, 0.07774609327316284, 0.006220821291208267, 0.0008077693055383861, 0.2509746253490448, 0.17662860453128815, 0.13796226680278778, 0.053514063358306885, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.06553670763969421, 0.09473168104887009, 0.013516419567167759, 0.0013789478689432144, 0.03089364431798458, 0.0676402598619461, 0.03963227570056915, 0.17151857912540436, 0.1338733434677124, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.07379595190286636, 0.1714182198047638, 0.13684017956256866, 0.00734432740136981, 0.0039545828476548195, 0.09408346563577652, 0.0452522449195385, 0.2525797188282013, 0.15314188599586487, 0.008748584426939487, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.006909683812409639, 0.034793343394994736, 0.13824458420276642, 0.0004423256032168865, 0.38493895530700684, 0.12702688574790955, 0.0007700703572481871, 0.005257567390799522, 0.3978818655014038, 0.028774550184607506, 0.016022928059101105, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.15589091181755066, 0.059809040278196335, 0.2019805759191513, 0.006274765357375145, 0.053891621530056, 0.38889890909194946, 0.024021193385124207, 0.016828669235110283, 0.09206627309322357, 0.15270450711250305, 0.10960505902767181, 0.14381197094917297, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.0011966965394094586, 0.0013769377255812287, 0.0006101150647737086, 4.0936538425739855e-05, 0.008213219232857227, 0.03395655378699303, 0.0003392287762835622, 0.00015790743054822087, 0.000944053172133863, 0.0007261222926899791, 0.011664116755127907, 0.22049497067928314, 0.0034024016931653023, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.2470119595527649, 0.22662757337093353, 0.086290642619133, 0.0011605313047766685, 0.20862528681755066, 0.31339770555496216, 0.007298772688955069, 0.00864456407725811, 0.010568802244961262, 0.01924213580787182, 0.034804634749889374, 0.16789764165878296, 0.11296499520540237, 0.017940307036042213, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.3800778388977051, 0.4679488241672516, 0.19362112879753113, 0.18464821577072144, 0.046723559498786926, 0.160307839512825, 0.24654103815555573, 0.2610638439655304, 0.07595612108707428, 0.1325986683368683, 0.022732526063919067, 0.1294456422328949, 0.2688123285770416, 0.12097980827093124, 0.12297553569078445, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.005153980106115341, 0.0002073257346637547, 0.12819816172122955, 0.00011319551413180307, 0.08506736904382706, 0.013190183788537979, 0.0028314462397247553, 0.00016588614380452782, 0.009067418053746223, 0.0008525841985829175, 0.00018506577180232853, 0.0002737078757490963, 0.0002474631182849407, 0.04919072240591049, 0.1850043386220932, 0.0018668848788365722, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.4235798418521881, 0.8363600969314575, 0.13292381167411804, 0.03160996362566948, 0.6294970512390137, 0.3827916085720062, 0.01768689975142479, 0.031598031520843506, 0.05291707068681717, 0.004268768709152937, 0.01666090451180935, 0.0017059938982129097, 0.03961870074272156, 0.006749838124960661, 0.2787548303604126, 0.12898604571819305, 0.00984524842351675, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.001200420199893415, 0.004923743661493063, 0.03312471881508827, 7.996988279046491e-05, 0.2118730992078781, 0.0288531631231308, 0.00010192030458711088, 0.0002958755649160594, 0.007303019054234028, 0.00011155433458043262, 2.6572593014861923e-06, 0.00035481253871694207, 2.4723947262828005e-06, 2.6933960270980606e-06, 0.017764916643500328, 0.0003658832865767181, 0.25218549370765686, 0.002238432876765728, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.16854390501976013, 0.046801913529634476, 0.18834064900875092, 0.005545254796743393, 0.10321269929409027, 0.3906272351741791, 0.03742265701293945, 0.024458711966872215, 0.05521516501903534, 0.07171308994293213, 0.021107476204633713, 0.025199010968208313, 0.0027974944096058607, 0.0025010560639202595, 0.02306896261870861, 0.15930885076522827, 0.06242140382528305, 0.11754277348518372, 0.21403564512729645, NaN, NaN, NaN, NaN, NaN, NaN], [0.0004002669302280992, 0.00040952101699076593, 0.00012874403910245746, 8.880775567376986e-06, 0.005201425869017839, 0.007163480389863253, 0.0002137795090675354, 0.00012960725871380419, 0.0005550362984649837, 0.0001244707527803257, 0.0006415210082195699, 0.03161805495619774, 4.1008814150700346e-05, 0.000599265971686691, 0.00399716105312109, 5.7038221711991355e-05, 0.0033261284697800875, 0.006950944196432829, 0.22392861545085907, 0.0028074102010577917, NaN, NaN, NaN, NaN, NaN], [0.22722585499286652, 0.18426381051540375, 0.07697561383247375, 0.0012757674558088183, 0.23254786431789398, 0.14769063889980316, 0.013780240900814533, 0.02735842764377594, 0.04001649469137192, 0.031179115176200867, 0.015889445319771767, 0.062248069792985916, 0.013498637825250626, 0.0052745710127055645, 0.2219674438238144, 0.0031969451811164618, 0.0037056237924844027, 0.028058722615242004, 0.22486938536167145, 0.09661445021629333, 0.02616964653134346, NaN, NaN, NaN, NaN], [0.27366653084754944, 0.354305237531662, 0.16368547081947327, 0.1598840057849884, 0.02900015190243721, 0.10581760108470917, 0.21902981400489807, 0.27043354511260986, 0.19813168048858643, 0.2514232099056244, 0.025616073980927467, 0.12471329420804977, 0.09682969748973846, 0.07310353219509125, 0.02883375994861126, 0.09285400807857513, 0.013515813276171684, 0.021914459764957428, 0.14159631729125977, 0.3238908648490906, 0.1783936321735382, 0.11570748686790466, NaN, NaN, NaN], [0.0030968550126999617, 7.297070260392502e-05, 0.1371629387140274, 0.00018204482330475003, 0.04798782989382744, 0.01213640347123146, 0.0023585439193993807, 0.00011540603009052575, 0.016970379278063774, 0.0015150568215176463, 0.0003718302759807557, 0.00044133648043498397, 0.00012143531785113737, 0.021671650931239128, 0.023021340370178223, 0.00010860650218091905, 0.0005334930610843003, 0.000257489358773455, 0.0005856966599822044, 0.00045311596477404237, 0.09709983319044113, 0.18528476357460022, 0.0029071324970573187, NaN, NaN], [0.49188995361328125, 0.918917715549469, 0.2054058462381363, 0.08403602242469788, 0.6967929005622864, 0.5653088688850403, 0.03772272169589996, 0.04957969859242439, 0.18319177627563477, 0.012161915190517902, 0.07060753554105759, 0.009896048344671726, 0.1126827672123909, 0.010653471574187279, 0.1938174068927765, 0.1352803260087967, 0.0021707522682845592, 0.030638370662927628, 0.003963022027164698, 0.03303877264261246, 0.004082953091710806, 0.20578816533088684, 0.11854958534240723, 0.02041587606072426, NaN], [0.001465475419536233, 0.00045102695003151894, 0.017218099907040596, 0.00030212500132620335, 0.11662620306015015, 0.017841650173068047, 0.00014393724268302321, 0.0003088460653088987, 0.006560556124895811, 0.0005491081974469125, 5.78465114813298e-05, 0.0019656207878142595, 0.00016285650781355798, 0.0002489366161171347, 0.011378495953977108, 0.0017521223053336143, 0.00787137821316719, 8.434856863459572e-05, 0.0012881350703537464, 7.287580228876323e-05, 0.00021561238099820912, 0.020317554473876953, 0.04195580258965492, 0.24219898879528046, 0.0017395684262737632]], [[0.125, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.39058852195739746, 8.28505744721042e-06, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [2.7811127438326366e-05, 0.4158080220222473, 0.0005852450849488378, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [9.039229868085252e-13, 4.1926887206500396e-05, 0.15358270704746246, 0.00044542484101839364, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [1.9216391628896996e-16, 4.9363904963684035e-08, 0.0004218998074065894, 0.40449434518814087, 4.695959432865493e-06, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [1.7349648803667746e-14, 5.141012060505545e-09, 3.7822364902240224e-06, 0.0002717413299251348, 0.22465285658836365, 2.698016260183067e-06, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [3.6696812255598843e-09, 2.368522711293508e-09, 3.1902116006676806e-06, 9.520445587440918e-08, 9.990107355406508e-05, 0.2170185148715973, 0.019131841138005257, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [2.292660354896725e-07, 1.4062491449085002e-10, 1.0373556180720556e-11, 2.945570870549474e-11, 1.3987125901948616e-09, 1.1205498822164373e-06, 0.3382871150970459, 0.0008390913717448711, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [2.3133984541345853e-06, 0.00017511146143078804, 1.441240442545677e-06, 3.064446918443764e-09, 3.097617096159411e-08, 7.23518027712089e-08, 0.0017295092111453414, 0.39626115560531616, 0.00019915253506042063, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [8.689644937311981e-15, 2.8357308110571466e-06, 5.0946681540153804e-08, 2.0269605438549831e-10, 1.289949813632063e-10, 3.375676821404383e-11, 8.602300205495794e-09, 4.5097981455910485e-06, 0.29888245463371277, 6.641173968091607e-05, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [2.8127108337250475e-18, 1.3557467148928026e-08, 7.431774662336466e-08, 2.301476165200711e-08, 1.1707952315975767e-11, 7.274678689300762e-12, 7.034611066401852e-13, 5.257664963120856e-13, 3.4044413041556254e-05, 0.32336506247520447, 4.600838292390108e-05, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [6.300134025583048e-13, 5.676838910062543e-08, 1.822371018533886e-06, 2.3448223146260716e-05, 2.5415656068616954e-07, 3.417801153204891e-08, 5.353474885616549e-10, 2.141239963115993e-11, 3.762530198514469e-08, 6.24434178462252e-05, 0.33693620562553406, 3.183486114721745e-05, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [1.5877897954763576e-12, 1.2288996487086479e-09, 3.458522428445576e-07, 9.462546586291865e-06, 7.457422907464206e-05, 0.0005706463125534356, 1.4425116212635203e-08, 4.5430816769144455e-13, 2.616490357709722e-12, 3.545688542772041e-08, 0.00016559385403525084, 0.22770871222019196, 0.0009294600458815694, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [2.579016999959549e-10, 1.5412886245069757e-10, 5.557828156033118e-11, 1.2367832313842086e-09, 3.3751638284229557e-07, 4.776334208145272e-07, 1.75399406998622e-07, 9.608910021829953e-12, 7.499024594652057e-14, 2.8573548556528813e-14, 3.2670008191793e-12, 4.494925178732956e-06, 0.37381958961486816, 3.638648195192218e-05, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [3.090227983193472e-05, 8.430293382843956e-05, 4.32313208875712e-05, 1.6493000885020592e-06, 8.794136192591395e-06, 0.0005616153357550502, 0.0013158570509403944, 0.0005267951055429876, 3.675571861094795e-05, 2.42239195813454e-07, 8.356466074666002e-10, 2.3424906885338714e-06, 0.0012797197559848428, 0.6210904717445374, 0.0014036636566743255, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [7.67247776423119e-09, 2.954437938740284e-08, 8.54147774731473e-09, 2.011255162415182e-09, 5.265776792384713e-08, 1.4630668898618637e-09, 2.2913241082278546e-06, 3.266295323101076e-08, 1.6124132571349037e-06, 1.13081211061683e-11, 2.6358108895513247e-15, 7.728456763445024e-11, 2.3767283696685126e-09, 2.1271845980663784e-05, 0.19462287425994873, 6.456446044467157e-06, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [4.312543703220706e-13, 2.1705271535665815e-07, 1.1365986551936658e-07, 1.9739390211270802e-07, 7.690645453806155e-09, 4.219609994748907e-09, 9.716764060030414e-10, 3.915795687703394e-08, 3.0873563900968293e-06, 5.5168204227129536e-08, 1.0056843552375128e-10, 6.254387632798064e-12, 4.318517331930449e-12, 1.5618051990573534e-11, 6.033264071447775e-05, 0.4116440713405609, 1.8908482161350548e-05, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [1.797858697974407e-17, 3.5553746058347713e-10, 1.0377114723070235e-09, 5.157609006545272e-09, 5.5740526777592336e-11, 3.675403037473046e-11, 3.015720268992328e-12, 1.2632186895361434e-14, 3.2584634990229233e-09, 2.7093712162695738e-08, 2.733851353305984e-15, 2.0347772078377346e-10, 7.802066534575867e-16, 1.702402683943053e-16, 1.8298086656987067e-10, 6.30185184036236e-08, 0.2592085301876068, 3.469779585429933e-06, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [3.386366187463352e-10, 1.5587464474720036e-07, 5.430682108453766e-07, 1.926859113154933e-05, 2.7584928830037825e-06, 5.553058031182445e-07, 6.554741815989473e-08, 7.146391256540596e-10, 4.225638150501254e-08, 2.0539353045023745e-06, 0.00010312868107575923, 2.5505174860995794e-08, 1.3659710695890226e-08, 4.206753695390475e-11, 5.200286035123014e-11, 3.842067428649898e-07, 1.4282905794971157e-05, 0.31164512038230896, 0.00011869923037011176, NaN, NaN, NaN, NaN, NaN, NaN], [3.098006018387167e-10, 3.2388165482899467e-09, 1.8609943808201024e-08, 5.099297482047405e-07, 4.603737033903599e-05, 0.00016448901442345232, 1.6998721719119203e-07, 1.7718410072475876e-11, 2.5886336477154437e-11, 9.218055652127077e-09, 1.2046231745443947e-07, 7.304957398446277e-05, 2.3164133111652774e-10, 2.8952129582648922e-09, 2.9085676575557606e-11, 8.895827650901023e-12, 8.14965606110718e-09, 8.762691868469119e-05, 0.2280847281217575, 0.0004104141262359917, NaN, NaN, NaN, NaN, NaN], [1.3149543676149733e-09, 1.080373679407387e-09, 5.5150013028582023e-11, 7.800748935693491e-10, 1.7859061074432248e-07, 2.183157299384675e-08, 2.5236221290469985e-07, 2.35878039323012e-10, 9.060349692724401e-12, 1.4339956088890715e-12, 1.7799637631876752e-12, 2.9941787715870305e-08, 6.0217857935640495e-06, 3.1683756313016787e-11, 4.5713120788715145e-11, 3.4124135808721867e-13, 3.591858459424911e-15, 1.3559961530365539e-12, 3.119595021416899e-06, 0.35679423809051514, 3.964137067669071e-05, NaN, NaN, NaN, NaN], [4.326914222474443e-06, 0.00023807807883713394, 0.00026310785324312747, 8.714396244613454e-06, 1.617559973965399e-05, 0.0001319001312367618, 0.0005945482989773154, 0.000823884445708245, 0.0008506007143296301, 1.7805428797146305e-05, 2.734714854568665e-08, 2.8855724849563558e-06, 4.891938442597166e-05, 0.0011682395124807954, 8.529372053089901e-07, 0.00017029111040756106, 1.0359013202787537e-07, 7.06834313302096e-10, 1.0861956525332062e-06, 0.0008713650749996305, 0.596385657787323, 0.0009257638594135642, NaN, NaN, NaN], [1.4773272882795396e-10, 2.3448599506536993e-08, 6.434380566133768e-07, 3.8027360460546333e-07, 2.454226432746509e-06, 5.541529457531169e-09, 3.5226184991188347e-06, 2.5443886997322807e-08, 1.7749154721968807e-05, 1.8393259137994278e-09, 4.026108439691978e-12, 6.382850692432385e-09, 1.7809153263215194e-08, 8.996512974590587e-07, 0.00010512088192626834, 1.1464897607671443e-11, 2.794342757184154e-09, 2.4549680847631107e-15, 9.933188299671158e-11, 7.3009864820505754e-09, 8.105817687464878e-05, 0.2077004611492157, 2.0097606466151774e-05, NaN, NaN], [1.1257004341538607e-14, 1.3137036347643516e-08, 4.6611327775281097e-07, 3.0405328743654536e-06, 1.5423474053477548e-07, 2.520166120234535e-08, 3.4643394819511286e-09, 1.1558090484697914e-08, 1.417677253812144e-06, 9.112129362165433e-08, 4.2694305868451465e-09, 3.7723260626343347e-10, 4.1450526344632976e-10, 2.7357388923676673e-11, 6.112880441833113e-07, 3.9687514799879864e-05, 8.382351063263016e-11, 8.293656039715103e-11, 4.97465783844131e-12, 4.144883221368634e-12, 1.4191136113450575e-11, 2.5566061594872735e-05, 0.4056495428085327, 4.4409513066057116e-05, NaN], [9.215334861117716e-19, 2.6557794852166694e-10, 5.799645919069008e-07, 1.003176621633406e-11, 7.217926736302616e-07, 4.876178394397357e-08, 8.254863459455919e-11, 1.424103456687531e-12, 1.1857503423584603e-08, 1.3074058502482444e-09, 8.580362115262474e-12, 5.829819293978744e-09, 1.8017319407259702e-12, 9.234832950427707e-14, 3.576115098491428e-11, 1.9265784523270213e-09, 1.8997316146851517e-06, 1.949248054633479e-11, 8.860704392432694e-10, 2.8198800851872777e-14, 5.674391451236226e-15, 1.0258181110112119e-10, 6.93914080329705e-06, 0.25534507632255554, 2.742740150551981e-07]], [[0.125, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.0002614231198094785, 0.183704674243927, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [1.3331101555991154e-08, 0.003119559260085225, 0.19454506039619446, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [1.1244888353800775e-09, 0.0005117341643199325, 0.15345418453216553, 0.0018621939234435558, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [2.882708471929618e-08, 0.0006895777769386768, 0.008299488574266434, 0.004234161227941513, 0.26378652453422546, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [6.507164653157815e-05, 0.0030905166640877724, 0.269605815410614, 0.06594818085432053, 0.07055308669805527, 0.24370616674423218, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [5.806248736917041e-05, 0.0008924558642320335, 0.00047033390728756785, 0.003593915607780218, 0.044251326471567154, 0.18547922372817993, 0.19724349677562714, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.03321969881653786, 0.1786998063325882, 0.0021111152600497007, 0.00015362887643277645, 0.0013223892310634255, 0.01674751006066799, 0.27181917428970337, 0.0704144611954689, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.0005316429305821657, 0.0021434861700981855, 0.0005638045258820057, 2.0347550162114203e-05, 8.372889715246856e-05, 0.0012170294066891074, 0.0006328476592898369, 0.0015302025713026524, 0.2731996476650238, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [3.384976253073546e-06, 0.0032942681573331356, 0.003179847961291671, 0.0003072107210755348, 3.0923787562642246e-05, 0.0003082206822000444, 0.0026841319631785154, 0.011449099518358707, 0.2928124964237213, 0.0015787724405527115, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [4.910896677756682e-05, 0.01189705915749073, 0.0036808690056204796, 0.006090851966291666, 0.0029882052913308144, 0.006760776974260807, 0.0002592294185888022, 0.0001972121826838702, 0.15788163244724274, 0.14973512291908264, 0.14614373445510864, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [7.539001671830192e-05, 0.036947283893823624, 0.01112621370702982, 0.04119950905442238, 0.06979847699403763, 0.01383589580655098, 0.008948443457484245, 9.020609286380932e-05, 0.0005221512983553112, 0.34183818101882935, 0.12104173004627228, 0.027292484417557716, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [5.4811065638205037e-05, 0.015359039418399334, 0.005874635651707649, 0.024854328483343124, 0.16572602093219757, 0.13195344805717468, 0.08553953468799591, 0.00124072446487844, 0.0008515206864103675, 0.0025517549365758896, 0.03817262500524521, 0.1957935392856598, 0.020919298753142357, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [3.401398498681374e-05, 0.0008079431718215346, 0.00045223115012049675, 0.00013304724416229874, 0.0006849576020613313, 0.009534466080367565, 0.010466179810464382, 0.00030334663460962474, 0.00033610902028158307, 2.1021634893259034e-05, 6.891421071486548e-05, 0.0028196852654218674, 0.3685440421104431, 0.0008976467652246356, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.0012722803512588143, 0.07485485821962357, 0.004568059463053942, 0.008557068184018135, 0.04491077736020088, 0.010689688846468925, 0.010801602154970169, 0.015439217910170555, 0.001288879313506186, 0.032191790640354156, 9.430324280401692e-05, 0.0010071481810882688, 0.03593403846025467, 0.015365669503808022, 0.28865233063697815, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.0003195737663190812, 0.0016381103778257966, 0.001899963477626443, 0.000450764549896121, 0.0029568641912192106, 0.0004077073244843632, 0.006739944685250521, 5.316005626809783e-05, 0.000977654941380024, 0.00033480822457931936, 1.5544836060144007e-05, 5.177688763069455e-06, 0.000280524865956977, 8.569184137741104e-05, 0.19435854256153107, 0.0009946423815563321, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.0004552309401333332, 0.00916277151554823, 0.2859989106655121, 0.028668222948908806, 0.004703177139163017, 0.013283651322126389, 0.011935138143599033, 0.00041849465924315155, 0.021506765857338905, 0.0005354905733838677, 2.3408898414345458e-05, 5.557515123655321e-06, 4.006853941973532e-06, 0.000782388960942626, 0.032734211534261703, 0.33600685000419617, 0.05645810067653656, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.001615832676179707, 0.0592908076941967, 0.004439341835677624, 0.0221478920429945, 0.05761101841926575, 0.08599329739809036, 0.009327156469225883, 0.0014337823959067464, 0.22479815781116486, 0.007599419914186001, 0.00010282513540005311, 0.003995772451162338, 0.0007532926392741501, 0.0001985877170227468, 0.042725738137960434, 0.609107255935669, 0.032340146601200104, 0.2600889503955841, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.0007894318550825119, 0.08912800997495651, 0.00870462041348219, 0.062210533767938614, 0.21669252216815948, 0.04955689236521721, 0.12036743760108948, 0.001276280265301466, 0.002290783217176795, 0.4637441337108612, 0.041003014892339706, 0.007595454342663288, 0.0049859327264130116, 0.030789200216531754, 0.01441932376474142, 0.02666427381336689, 0.013092019595205784, 0.22824719548225403, 0.07290598005056381, NaN, NaN, NaN, NaN, NaN, NaN], [4.2991967347916216e-05, 0.006631283089518547, 0.0006027332856319845, 0.004053125157952309, 0.03894652798771858, 0.031787656247615814, 0.10168109834194183, 0.004267984535545111, 0.002045443281531334, 0.0010633694473654032, 0.005091637372970581, 0.031351421028375626, 6.663963722530752e-05, 0.09428737312555313, 0.0008465268765576184, 0.00024849644978530705, 0.002269570017233491, 0.01905866153538227, 0.2164839655160904, 0.010082208551466465, NaN, NaN, NaN, NaN, NaN], [1.1191940757271368e-05, 0.0006002296577207744, 0.0002709901600610465, 9.913583926390857e-05, 0.0001758227008394897, 0.0029332106932997704, 0.008675863035023212, 0.0011328428518027067, 0.0023299665190279484, 6.693489558529109e-05, 0.00013525204849429429, 0.0013442488852888346, 0.022858861833810806, 2.321010106243193e-05, 0.0010626229923218489, 2.5993340386776254e-05, 3.972689592046663e-05, 5.326797690941021e-05, 0.0033412689808756113, 0.35271701216697693, 0.0008956229430623353, NaN, NaN, NaN, NaN], [0.00036489564809016883, 0.07616367936134338, 0.00673737283796072, 0.011110173538327217, 0.021392904222011566, 0.010494116693735123, 0.006134945899248123, 0.015969248488545418, 0.005187375005334616, 0.12039955705404282, 0.0005341891082935035, 0.0022901638876646757, 0.027128320187330246, 0.005907480139285326, 0.033119603991508484, 0.002176248235628009, 0.0003625153622124344, 6.369769835146144e-05, 0.0007003483478911221, 0.03456505015492439, 0.01570759527385235, 0.28412890434265137, NaN, NaN, NaN], [3.192616713931784e-05, 0.00035208670306019485, 0.002478531561791897, 0.0006564928335137665, 0.0008886585710570216, 0.0005662215990014374, 0.0016915983287617564, 1.3900444173486903e-05, 0.0009738726075738668, 0.00042995362309738994, 8.639829320600256e-05, 1.4000924238644075e-05, 0.00033226466621272266, 2.9785558581352234e-05, 0.00921203475445509, 3.390025085536763e-06, 5.1574592362158e-05, 2.3835823412809987e-06, 1.9022172637050971e-06, 0.00016878120368346572, 9.063100151252002e-05, 0.20696188509464264, 0.001649125711992383, NaN, NaN], [0.00019471753330435604, 0.003537738462910056, 0.2800489366054535, 0.036592625081539154, 0.002127013634890318, 0.024595409631729126, 0.008275463245809078, 0.00023266732750926167, 0.021680369973182678, 0.0005173377576284111, 7.175304199336097e-05, 2.6857771445065737e-05, 1.6371919627999887e-05, 0.0012281013187021017, 0.011112956330180168, 0.058813560754060745, 0.0009629606502130628, 1.1531898962857667e-05, 4.947432444168953e-06, 2.475359451636905e-06, 0.0005685617215931416, 0.0267820842564106, 0.3296748399734497, 0.06147307902574539, NaN], [3.20236104300875e-08, 0.00013383101031649858, 0.00029007354169152677, 0.002788462908938527, 0.0014709108509123325, 0.0009710633894428611, 0.0001290659129153937, 2.0881772798020393e-05, 7.236683813971467e-06, 3.12792144541163e-05, 7.099155482137576e-05, 3.213396485080011e-05, 3.9666349039180204e-05, 0.00022854047711007297, 0.0037343965377658606, 1.487573445047019e-05, 0.00019343644089531153, 8.10168421594426e-05, 1.1448363693489227e-05, 3.5921341350331204e-06, 2.216967368440237e-05, 0.0017730530817061663, 0.0001526248233858496, 0.009769736789166927, 0.4419056475162506]], [[0.125, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.07662782073020935, 0.14776498079299927, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.0006832284270785749, 0.003495789598673582, 0.19430121779441833, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.00020953372586518526, 0.007476589176803827, 0.1521030217409134, 0.003494996577501297, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.00048688906827010214, 0.0011088894680142403, 0.0024602855555713177, 0.0005520267877727747, 0.26744863390922546, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.0004194685607217252, 0.0005068383179605007, 0.026896899566054344, 0.0004147894505877048, 0.006156287621706724, 0.4387049376964569, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [1.0518371709622443e-05, 5.5142045312095433e-05, 0.016997506842017174, 3.693701364682056e-05, 0.0006244040559977293, 0.21657241880893707, 0.01345360092818737, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.3619365394115448, 0.25655418634414673, 0.3611752688884735, 0.14710570871829987, 0.018539972603321075, 0.21814967691898346, 0.09323819726705551, 0.01780291646718979, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.004012200981378555, 0.004658036399632692, 0.017421945929527283, 0.0026806569658219814, 0.590861439704895, 0.051964171230793, 0.007618917152285576, 0.0007336572161875665, 0.12340892106294632, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.44725751876831055, 0.6053639054298401, 0.07041247189044952, 0.07085516303777695, 0.003138674655929208, 0.2879992425441742, 0.049135204404592514, 0.14297868311405182, 0.06008363142609596, 0.06304289400577545, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.7072809338569641, 0.7582566142082214, 0.16150887310504913, 0.18586905300617218, 0.015776842832565308, 0.08385244756937027, 0.32581770420074463, 0.5540359020233154, 0.13379113376140594, 0.0028463751077651978, 0.051922835409641266, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.4378974437713623, 0.10523661971092224, 0.014314417727291584, 0.30093127489089966, 0.06324318051338196, 0.08432605862617493, 0.2594241797924042, 0.6188808083534241, 0.3929617404937744, 0.00827555637806654, 0.07725780457258224, 0.06407154351472855, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.2013174593448639, 0.5200937390327454, 0.3190821707248688, 0.5249915719032288, 0.18779213726520538, 0.1779765784740448, 0.29882070422172546, 0.5049118399620056, 0.06443758308887482, 0.007539320737123489, 0.16998757421970367, 0.031686559319496155, 0.3610091209411621, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.5546301603317261, 0.5397829413414001, 0.43089261651039124, 0.08987504988908768, 0.3114354610443115, 0.4812281131744385, 0.11215226352214813, 0.17198431491851807, 0.5790820121765137, 0.03648975491523743, 0.0541677288711071, 0.04165489599108696, 0.07749651372432709, 0.030232839286327362, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.005376005079597235, 0.010858614929020405, 0.02991071715950966, 0.029742157086730003, 0.04020260274410248, 0.1695990264415741, 0.0604972317814827, 0.10318762809038162, 0.48727869987487793, 0.07163358479738235, 0.025501595810055733, 0.05125340074300766, 0.22269804775714874, 0.08394679427146912, 0.19870582222938538, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.0006954512791708112, 0.0002132337394868955, 0.037006676197052, 0.0018452922813594341, 0.16118928790092468, 0.5505160689353943, 0.028353480622172356, 0.0021746368147432804, 0.027092093601822853, 0.0001434519508620724, 0.0029707583598792553, 4.2726576793938875e-05, 0.0012847317848354578, 0.0010433235438540578, 0.18891005218029022, 0.014656933024525642, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.013874622993171215, 0.0695175901055336, 0.005752294324338436, 0.005697373300790787, 0.0021822804119437933, 0.02415846660733223, 0.00723307253792882, 0.3120453357696533, 0.016472192481160164, 0.004319194238632917, 0.041901107877492905, 0.7052133083343506, 0.0035930864978581667, 0.020578961819410324, 0.0021869041956961155, 0.0003597450559027493, 0.0005889505264349282, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.29724666476249695, 0.30918487906455994, 0.0693497508764267, 0.04026606306433678, 0.00593132060021162, 0.04497085511684418, 0.07199602574110031, 0.16270284354686737, 0.058071933686733246, 0.0005904879071749747, 0.0013724194141104817, 0.013050474226474762, 0.002609569113701582, 0.013482913374900818, 0.089314766228199, 0.03341012820601463, 0.21929660439491272, 0.006776490714401007, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.3422777056694031, 0.07256462424993515, 0.012822822667658329, 0.21187257766723633, 0.060081083327531815, 0.09390594810247421, 0.19744858145713806, 0.5327264666557312, 0.3024030029773712, 0.013231869786977768, 0.1601967215538025, 0.04191795364022255, 0.5788960456848145, 0.791706383228302, 0.2698511779308319, 0.26516515016555786, 0.2890409529209137, 0.032140959054231644, 0.02436642162501812, NaN, NaN, NaN, NaN, NaN, NaN], [0.15722303092479706, 0.44676893949508667, 0.24300073087215424, 0.3980245292186737, 0.29666030406951904, 0.21130049228668213, 0.31708449125289917, 0.45276522636413574, 0.04954151436686516, 0.006070373114198446, 0.23888874053955078, 0.06321726739406586, 0.48237892985343933, 0.09136107563972473, 0.571183979511261, 0.36026179790496826, 0.0799446776509285, 0.1583012342453003, 0.025381257757544518, 0.5154083371162415, NaN, NaN, NaN, NaN, NaN], [0.6566299200057983, 0.6752134561538696, 0.5489535927772522, 0.1520741730928421, 0.6433172821998596, 0.7151104211807251, 0.290630042552948, 0.3418242335319519, 0.686417818069458, 0.046654678881168365, 0.09611856192350388, 0.0634889155626297, 0.4891318380832672, 0.46607306599617004, 0.5581225156784058, 0.4337400496006012, 0.06152508407831192, 0.08386452496051788, 0.0397774837911129, 0.11068917065858841, 0.04009125009179115, NaN, NaN, NaN, NaN], [0.0024060788564383984, 0.006098441779613495, 0.013975032605230808, 0.014695755206048489, 0.022452646866440773, 0.10514718294143677, 0.04751533642411232, 0.0609392412006855, 0.31799331307411194, 0.04427095875144005, 0.01951766200363636, 0.04202713817358017, 0.3371936082839966, 0.2731744647026062, 0.3478449583053589, 0.03363266587257385, 0.011759405955672264, 0.01767517626285553, 0.024101490154862404, 0.19511322677135468, 0.05518092215061188, 0.2097322940826416, NaN, NaN, NaN], [0.000109505133877974, 2.9198725314927287e-05, 0.01053665205836296, 0.0007290886132977903, 0.055462777614593506, 0.18011406064033508, 0.013305839151144028, 0.0007181179826147854, 0.008689867332577705, 4.760328374686651e-05, 0.0016827695071697235, 2.2867327061248943e-05, 0.000821226101834327, 0.0012459746794775128, 0.2353316843509674, 0.004575389437377453, 0.003901307238265872, 0.0009429306373931468, 1.1980442650383338e-05, 0.0003497266152407974, 0.00027309934375807643, 0.1965111494064331, 0.005757085047662258, NaN, NaN], [0.0017744784709066153, 0.012578981928527355, 0.0015974465059116483, 0.002320722443982959, 0.0008557687979191542, 0.004459704738110304, 0.00322481500916183, 0.13683773577213287, 0.010506929829716682, 0.0027294831816107035, 0.03936534747481346, 0.7146239876747131, 0.0021277000196278095, 0.014929071068763733, 0.003117389976978302, 0.0010002683848142624, 0.0005979579291306436, 0.037009548395872116, 0.6984097361564636, 0.0021584301721304655, 0.012162267230451107, 0.002483450109139085, 0.00014705986541230232, 0.0003713203768711537, NaN], [0.10933294892311096, 0.0594157911837101, 0.01442565955221653, 0.027944112196564674, 0.24928514659404755, 0.3314722180366516, 0.036283038556575775, 0.01824975199997425, 0.03247179090976715, 0.02741291932761669, 0.0011664694175124168, 0.03365480154752731, 0.10097742080688477, 0.021067792549729347, 0.42791858315467834, 0.11242418736219406, 0.11434369534254074, 0.000791618600487709, 0.02291581965982914, 0.07201644033193588, 0.02081850729882717, 0.39859694242477417, 0.2763477563858032, 0.13874487578868866, 0.003258609212934971]], [[0.125, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.026641450822353363, 0.17128966748714447, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.5577486157417297, 0.24638143181800842, 0.025497647002339363, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.1241803988814354, 0.06599891930818558, 0.13004763424396515, 0.33318501710891724, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.9552784562110901, 0.6656578779220581, 0.04364815354347229, 0.097982257604599, 0.0012550450628623366, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.6779462695121765, 0.5809971690177917, 0.2087380737066269, 0.15752893686294556, 0.08772724121809006, 0.09023962169885635, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.6994673609733582, 0.48720496892929077, 0.08263873308897018, 0.3298986256122589, 0.0049313209019601345, 0.07016509026288986, 0.5443912744522095, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.3437848389148712, 0.28689879179000854, 0.5712999105453491, 0.5371078252792358, 0.06584293395280838, 0.2492358684539795, 0.014812931418418884, 0.02226697839796543, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.44942334294319153, 0.3777551054954529, 0.7612449526786804, 0.7021526098251343, 0.30080679059028625, 0.4424319267272949, 0.22922295331954956, 0.04627525433897972, 0.055941756814718246, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.47138965129852295, 0.18856076896190643, 0.6503154039382935, 0.9041082859039307, 0.2803841233253479, 0.4006999135017395, 0.5757170915603638, 0.295682817697525, 0.04142303764820099, 0.006079117301851511, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.24097655713558197, 0.15950126945972443, 0.6649572849273682, 0.6751598119735718, 0.46790093183517456, 0.6438081860542297, 0.3765251934528351, 0.2975021302700043, 0.10267924517393112, 0.060453154146671295, 0.03869982063770294, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.39086097478866577, 0.6666929125785828, 0.5642580389976501, 0.557075023651123, 0.25761184096336365, 0.3620971143245697, 0.656988263130188, 0.301082581281662, 0.3758563995361328, 0.026163028553128242, 0.024990877136588097, 0.0074356794357299805, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.7909376621246338, 0.3817039430141449, 0.6133569478988647, 0.41290101408958435, 0.30558884143829346, 0.6049348711967468, 0.5688384175300598, 0.4680134057998657, 0.6550416946411133, 0.42371857166290283, 0.10508850961923599, 0.021316751837730408, 0.05294431000947952, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.17973686754703522, 0.17233335971832275, 0.334688276052475, 0.4481850564479828, 0.04172942414879799, 0.10337609797716141, 0.5107487440109253, 0.7207926511764526, 0.1405051052570343, 0.0654703825712204, 0.41273486614227295, 0.17914383113384247, 0.042542651295661926, 0.010745447129011154, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.5207539200782776, 0.308788537979126, 0.08189663290977478, 0.5850351452827454, 0.3457651734352112, 0.15844188630580902, 0.2948668897151947, 0.4065589904785156, 0.12084604799747467, 0.29343682527542114, 0.49164822697639465, 0.07233413308858871, 0.0535273477435112, 0.014947501011192799, 0.008541097864508629, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.2949400544166565, 0.03748409450054169, 0.14473117887973785, 0.0705113336443901, 0.013025683350861073, 0.005298166535794735, 0.21091029047966003, 0.014800299890339375, 0.2805088758468628, 0.000897476973477751, 0.0938984826207161, 0.004705057479441166, 0.04936474934220314, 0.011992034502327442, 0.18721424043178558, 0.00230285432189703, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.44276589155197144, 0.06478449702262878, 0.543609619140625, 0.8444110155105591, 0.13468694686889648, 0.4405028522014618, 0.6528593897819519, 0.5737791061401367, 0.6313535571098328, 0.8501816987991333, 0.4486657381057739, 0.06076665595173836, 0.7409859299659729, 0.15147589147090912, 0.20801351964473724, 0.027446726337075233, 0.036936238408088684, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.5445577502250671, 0.2876933515071869, 0.7013069987297058, 0.627236008644104, 0.37061285972595215, 0.6206991076469421, 0.38252583146095276, 0.4230470061302185, 0.31842562556266785, 0.28603002429008484, 0.015331648290157318, 0.14692452549934387, 0.8622261881828308, 0.049388445913791656, 0.37183380126953125, 0.17907747626304626, 0.05781394988298416, 0.020684318616986275, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.4656296670436859, 0.6725881099700928, 0.6199259161949158, 0.6479836702346802, 0.24076998233795166, 0.34658652544021606, 0.5947279930114746, 0.37259459495544434, 0.5521662831306458, 0.14718003571033478, 0.19626900553703308, 0.024240192025899887, 0.27736979722976685, 0.05565635487437248, 0.3618892729282379, 0.44332295656204224, 0.027751203626394272, 0.0260067880153656, 0.010717106983065605, NaN, NaN, NaN, NaN, NaN, NaN], [0.830940842628479, 0.42077580094337463, 0.7156820893287659, 0.57599937915802, 0.5493759512901306, 0.7128159999847412, 0.5476810932159424, 0.527928352355957, 0.8053308725357056, 0.8646240234375, 0.542984127998352, 0.2950981855392456, 0.3170693516731262, 0.5610483884811401, 0.26465174555778503, 0.45835256576538086, 0.22733505070209503, 0.10187508910894394, 0.03538959100842476, 0.07069608569145203, NaN, NaN, NaN, NaN, NaN], [0.09599269181489944, 0.08247342705726624, 0.25253206491470337, 0.4357891380786896, 0.039192523807287216, 0.0719948410987854, 0.3563676178455353, 0.5300538539886475, 0.06311739236116409, 0.037909455597400665, 0.5032193064689636, 0.39894816279411316, 0.3283153772354126, 0.21619060635566711, 0.017918655648827553, 0.2577371895313263, 0.14531975984573364, 0.346793532371521, 0.2014700472354889, 0.0539211668074131, 0.0146569162607193, NaN, NaN, NaN, NaN], [0.6422337889671326, 0.3740711212158203, 0.10689651221036911, 0.6858291029930115, 0.4494076073169708, 0.2826421856880188, 0.3886936604976654, 0.475405216217041, 0.13226336240768433, 0.3073323965072632, 0.7139697670936584, 0.17356495559215546, 0.25040003657341003, 0.23144030570983887, 0.024455448612570763, 0.4280460476875305, 0.048713963478803635, 0.3974619209766388, 0.06130422651767731, 0.05969162657856941, 0.015271119773387909, 0.00685582309961319, NaN, NaN, NaN], [0.5218734741210938, 0.03395698964595795, 0.2861349880695343, 0.13773199915885925, 0.02211177349090576, 0.014614011161029339, 0.43378758430480957, 0.02492188662290573, 0.26067787408828735, 0.0009113854030147195, 0.1411941796541214, 0.009023642167448997, 0.14982649683952332, 0.15959703922271729, 0.7153633832931519, 0.014257365837693214, 0.06102409213781357, 0.12158294767141342, 0.006897313520312309, 0.06130388379096985, 0.012951835058629513, 0.16874605417251587, 0.002189028775319457, NaN, NaN], [0.45293620228767395, 0.05202305316925049, 0.4803192913532257, 0.8224762082099915, 0.10338833183050156, 0.2861584722995758, 0.8321961760520935, 0.7622299790382385, 0.5323314070701599, 0.8633370995521545, 0.5219312310218811, 0.07432084530591965, 0.7646023631095886, 0.4150907099246979, 0.4998815357685089, 0.606073796749115, 0.2854492664337158, 0.6639280319213867, 0.09482558071613312, 0.806840717792511, 0.19665148854255676, 0.18194931745529175, 0.01953776553273201, 0.037144362926483154, NaN], [0.8357685804367065, 0.6023411154747009, 0.16389556229114532, 0.4697819948196411, 0.05014880374073982, 0.3185025751590729, 0.2618474066257477, 0.7044641375541687, 0.16675803065299988, 0.7323283553123474, 0.14429442584514618, 0.2621355652809143, 0.041847843676805496, 0.3185603618621826, 0.04513467848300934, 0.49906620383262634, 0.611339807510376, 0.21515053510665894, 0.3302164673805237, 0.04920952767133713, 0.2760073244571686, 0.0218669306486845, 0.25043201446533203, 0.13627314567565918, 0.01334126852452755]]], [[[0.125, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.13569742441177368, 0.0376364141702652, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.05053132027387619, 0.5417848825454712, 0.07814626395702362, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.03762863576412201, 0.4749486744403839, 0.013701170682907104, 0.053301598876714706, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.10598134994506836, 0.16776065528392792, 0.11929589509963989, 0.16846179962158203, 0.40715572237968445, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.05147748813033104, 0.203742116689682, 0.11462464928627014, 0.46246808767318726, 0.01836300455033779, 0.02458924613893032, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.17594558000564575, 0.17753779888153076, 0.024665912613272667, 0.19817322492599487, 0.008797828108072281, 0.022263213992118835, 0.29173722863197327, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.016114797443151474, 0.0061007170006632805, 0.028504224494099617, 0.017245782539248466, 0.08753485232591629, 0.11264273524284363, 0.6154332160949707, 0.029144972562789917, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.027042992413043976, 0.032212790101766586, 0.019619816914200783, 0.014702342450618744, 0.06721275299787521, 0.2560867667198181, 0.5545244216918945, 0.40561506152153015, 0.037922732532024384, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.1654873937368393, 0.013622531667351723, 0.0656571239233017, 0.09179358184337616, 0.03440919890999794, 0.08533406257629395, 0.16269220411777496, 0.1151970624923706, 0.09265416115522385, 0.028269361704587936, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.2598540484905243, 0.010173649527132511, 0.004170349799096584, 0.003479698905721307, 0.0014636714477092028, 0.0011101020500063896, 0.001677120802924037, 0.034040722995996475, 0.0041177538223564625, 0.024958845227956772, 0.016315795481204987, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.17492477595806122, 0.010013026185333729, 0.005800239276140928, 0.0069971769116818905, 0.0036480696871876717, 0.001016399241052568, 0.0060493675991892815, 0.0034581662621349096, 0.00659980857744813, 0.0047594537027180195, 0.3941299021244049, 0.2407994568347931, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.06559828668832779, 0.005602334160357714, 0.0005807551206089556, 0.0005322807701304555, 0.004617360420525074, 0.00354054500348866, 0.005599506665021181, 0.011434626765549183, 0.006905066315084696, 0.009602343663573265, 0.11027393490076065, 0.36931946873664856, 0.06368503719568253, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.015983520075678825, 0.012168757617473602, 0.0015684146201238036, 0.0005484889261424541, 0.00233695306815207, 0.0038106110878288746, 0.005947766825556755, 0.04194773733615875, 0.014443459920585155, 0.06465759128332138, 0.14989611506462097, 0.5095774531364441, 0.1882752925157547, 0.02387852594256401, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.11159919947385788, 0.06036144495010376, 0.06681493669748306, 0.0798669382929802, 0.03668922558426857, 0.018710536882281303, 0.029976846650242805, 0.0675768032670021, 0.03372039645910263, 0.057603828608989716, 0.14515243470668793, 0.25060775876045227, 0.23181115090847015, 0.14262832701206207, 0.33286023139953613, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.018035059794783592, 0.02341379225254059, 0.0019442361081019044, 0.004369894042611122, 0.00136191223282367, 0.00017434914479963481, 0.0011034610215574503, 0.06787250190973282, 0.060198791325092316, 0.12004764378070831, 0.11878902465105057, 0.2063554972410202, 0.28332868218421936, 0.35319504141807556, 0.008158767595887184, 0.26057863235473633, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.17278411984443665, 0.007028562016785145, 0.010641193017363548, 0.013809186406433582, 0.0005732428980991244, 0.001056239241734147, 0.0005258666351437569, 0.03639528155326843, 0.02256075292825699, 0.01660884916782379, 0.1527748554944992, 0.1477358043193817, 0.2577149271965027, 0.03867224231362343, 0.04304511100053787, 0.11759469658136368, 0.0762997567653656, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.38573285937309265, 0.0028330886270850897, 0.0014278099406510592, 0.0009824484586715698, 9.371336636831984e-05, 0.00015483389142900705, 6.760591350030154e-05, 0.0035791138652712107, 0.0002520910056773573, 0.0005180046427994967, 0.00024238335026893765, 0.011901103891432285, 0.011019378900527954, 0.006276060827076435, 0.0026990415062755346, 0.016820058226585388, 0.03330027312040329, 0.047877803444862366, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.21399648487567902, 0.008264300413429737, 0.0051351506263017654, 0.005111425183713436, 0.0020249083172529936, 0.00047485672985203564, 0.0018332998733967543, 0.0008904117858037353, 0.0017731828847900033, 0.000539442349690944, 0.03944296017289162, 0.039767228066921234, 0.00580678740516305, 0.004312179517000914, 0.003937484696507454, 0.00913114845752716, 0.006211036816239357, 0.3553882837295532, 0.3024981617927551, NaN, NaN, NaN, NaN, NaN, NaN], [0.05261809378862381, 0.004144520964473486, 0.00047606538282707334, 0.0003396419051568955, 0.002880769083276391, 0.0015178520698100328, 0.0018901955336332321, 0.0029504895210266113, 0.0017174717504531145, 0.0006908842478878796, 0.0046035549603402615, 0.09042679518461227, 0.0032755613792687654, 0.007712012622505426, 0.032594844698905945, 0.02268057130277157, 0.033856723457574844, 0.07955116033554077, 0.4074561595916748, 0.07153668999671936, NaN, NaN, NaN, NaN, NaN], [0.019381573423743248, 0.012705344706773758, 0.0019882190972566605, 0.0005741973291151226, 0.0020475401543080807, 0.0023934554774314165, 0.004172713495790958, 0.021013854071497917, 0.005879250820726156, 0.006729640066623688, 0.00632414361461997, 0.09735815972089767, 0.01909361220896244, 0.00100265524815768, 0.003452989971265197, 0.008203250356018543, 0.05971603840589523, 0.11904174834489822, 0.5188009142875671, 0.2541559338569641, 0.029506316408514977, NaN, NaN, NaN, NaN], [0.10572486370801926, 0.04525948688387871, 0.055838145315647125, 0.050681136548519135, 0.027844024822115898, 0.014026278629899025, 0.025656970217823982, 0.0361209474503994, 0.017075760290026665, 0.01003955863416195, 0.016965145245194435, 0.04991300031542778, 0.01522271428257227, 0.007584442384541035, 0.03757705166935921, 0.03609456866979599, 0.10922907292842865, 0.19329114258289337, 0.2903786897659302, 0.29551932215690613, 0.1564989984035492, 0.3518115282058716, NaN, NaN, NaN], [0.017342884093523026, 0.024629754945635796, 0.0017386168474331498, 0.003977979999035597, 0.0011948446044698358, 0.0001711023651296273, 0.0019097719341516495, 0.050265345722436905, 0.048485398292541504, 0.025773482397198677, 0.011941587552428246, 0.02582539990544319, 0.014500979334115982, 0.011088544502854347, 0.0004536270862445235, 0.001346826204098761, 0.09912228584289551, 0.03899921476840973, 0.19399496912956238, 0.33165985345840454, 0.3351045250892639, 0.007158405613154173, 0.26822295784950256, NaN, NaN], [0.15815527737140656, 0.009173951111733913, 0.012453499250113964, 0.01756284572184086, 0.0007500716019421816, 0.0020462200045585632, 0.00166225153952837, 0.05335438624024391, 0.037105023860931396, 0.009711050428450108, 0.05516523867845535, 0.04893142729997635, 0.03887411952018738, 0.002221355913206935, 0.004346344619989395, 0.004376854281872511, 0.001785764587111771, 0.09844812005758286, 0.14674220979213715, 0.34636548161506653, 0.04763580113649368, 0.057022612541913986, 0.12166893482208252, 0.13556897640228271, NaN], [0.16895240545272827, 0.0006144722574390471, 0.0027162963524460793, 0.0007400937611237168, 0.0007253509247675538, 0.0007097159395925701, 0.000199983871425502, 0.0005034026107750833, 0.0002540702698752284, 0.0002154638059437275, 0.0004817947919946164, 0.0019994170870631933, 0.0003459753352217376, 6.575404404429719e-05, 0.004540599416941404, 0.00010029276745626703, 0.0005050064064562321, 0.003569946391507983, 0.008527955040335655, 0.003213587449863553, 0.0022120880894362926, 0.11142478138208389, 0.01313241571187973, 0.055687084794044495, 0.21235007047653198]], [[0.125, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.13440807163715363, 0.048166193068027496, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.14904144406318665, 0.03273539990186691, 0.03615117073059082, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.17614386975765228, 0.0854690745472908, 0.038236960768699646, 0.12011754512786865, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.14069411158561707, 0.1466522365808487, 0.07941046357154846, 0.06070372834801674, 0.045592159032821655, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.15778480470180511, 0.11167039722204208, 0.20017755031585693, 0.10082826018333435, 0.013994856737554073, 0.07346371561288834, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.15305520594120026, 0.26692208647727966, 0.1222626119852066, 0.14178596436977386, 0.012799645774066448, 0.019025815650820732, 0.14782781898975372, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.050227321684360504, 0.49922510981559753, 0.2564227879047394, 0.37594476342201233, 0.05222875997424126, 0.019398091360926628, 0.07475102692842484, 0.13636687397956848, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.1278427243232727, 0.4489462971687317, 0.09382158517837524, 0.09914611279964447, 0.11451858282089233, 0.14035384356975555, 0.0858180820941925, 0.1395546793937683, 0.05027398467063904, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.06907324492931366, 0.44302117824554443, 0.21607427299022675, 0.21861647069454193, 0.14559195935726166, 0.12854896485805511, 0.21420170366764069, 0.5056769251823425, 0.05036870762705803, 0.14160890877246857, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.08832916617393494, 0.4917650520801544, 0.16961733996868134, 0.21240676939487457, 0.17275941371917725, 0.13381528854370117, 0.1763075888156891, 0.3443826735019684, 0.022638684138655663, 0.14659351110458374, 0.05034468695521355, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.10765255987644196, 0.1569133847951889, 0.14696621894836426, 0.12414205074310303, 0.1321374922990799, 0.32589367032051086, 0.09939466416835785, 0.15668180584907532, 0.035531532019376755, 0.18526552617549896, 0.100669264793396, 0.1766001582145691, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.0920143872499466, 0.03631591796875, 0.10338561236858368, 0.13865944743156433, 0.14365890622138977, 0.19164490699768066, 0.08302215486764908, 0.17053648829460144, 0.20418454706668854, 0.4243081212043762, 0.23730118572711945, 0.11353020370006561, 0.062482837587594986, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.14247462153434753, 0.10275112092494965, 0.08782284706830978, 0.07633533328771591, 0.09427531808614731, 0.2382509559392929, 0.11237408220767975, 0.1274290829896927, 0.09234490990638733, 0.29983192682266235, 0.19681134819984436, 0.09119200706481934, 0.1394888311624527, 0.02876400761306286, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.14126147329807281, 0.06271495670080185, 0.09029032289981842, 0.10313913226127625, 0.08530516922473907, 0.05194256827235222, 0.09853952378034592, 0.05407971888780594, 0.10021005570888519, 0.14394013583660126, 0.19472479820251465, 0.17138735949993134, 0.055624835193157196, 0.022259291261434555, 0.010825252160429955, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.15579406917095184, 0.5571659207344055, 0.09220181405544281, 0.09424383193254471, 0.2893342971801758, 0.14449337124824524, 0.08881417661905289, 0.09621196240186691, 0.05768556892871857, 0.34467604756355286, 0.16894927620887756, 0.32070621848106384, 0.32385867834091187, 0.08616255223751068, 0.0030245021916925907, 0.011462957598268986, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.06543286889791489, 0.3303832709789276, 0.1981877088546753, 0.17906354367733002, 0.08578304201364517, 0.12075137346982956, 0.09918820112943649, 0.14948950707912445, 0.0696079283952713, 0.2870473861694336, 0.2037079930305481, 0.20505982637405396, 0.415317177772522, 0.18504147231578827, 0.05944397673010826, 0.03780561313033104, 0.06350213289260864, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.08806300163269043, 0.5073549151420593, 0.15216797590255737, 0.1779468059539795, 0.08599209040403366, 0.038353316485881805, 0.05095306783914566, 0.13815101981163025, 0.05531492829322815, 0.3680262565612793, 0.045964885503053665, 0.5803228616714478, 0.2365681380033493, 0.10053237527608871, 0.016326427459716797, 0.011199035681784153, 0.02849578857421875, 0.09785498678684235, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.10047968477010727, 0.17735490202903748, 0.1303417980670929, 0.1233980730175972, 0.11124629527330399, 0.27208706736564636, 0.09057758748531342, 0.20949512720108032, 0.0595981664955616, 0.32820063829421997, 0.19304482638835907, 0.3008245825767517, 0.24370267987251282, 0.0977335274219513, 0.0604717954993248, 0.08826017379760742, 0.05976974964141846, 0.11658596247434616, 0.26095637679100037, NaN, NaN, NaN, NaN, NaN, NaN], [0.08956606686115265, 0.03296149522066116, 0.07127847522497177, 0.10275094956159592, 0.12852256000041962, 0.15250688791275024, 0.05763629823923111, 0.13953621685504913, 0.2147330343723297, 0.3297017514705658, 0.25630685687065125, 0.3529660999774933, 0.05266188457608223, 0.19866161048412323, 0.08034973591566086, 0.16050152480602264, 0.12120798975229263, 0.21796129643917084, 0.13665789365768433, 0.05867582932114601, NaN, NaN, NaN, NaN, NaN], [0.16931524872779846, 0.06866136193275452, 0.058377113193273544, 0.054153572767972946, 0.06997817754745483, 0.17294903099536896, 0.06504172086715698, 0.09800923615694046, 0.07601338624954224, 0.22323867678642273, 0.17471107840538025, 0.20914696156978607, 0.32561469078063965, 0.04201642796397209, 0.014874166809022427, 0.043757203966379166, 0.11901038885116577, 0.15924809873104095, 0.08216992020606995, 0.13305248320102692, 0.031323518604040146, NaN, NaN, NaN, NaN], [0.14597494900226593, 0.05063166096806526, 0.07245789468288422, 0.08537694066762924, 0.07253167033195496, 0.03945168852806091, 0.07488631457090378, 0.04114159941673279, 0.09447583556175232, 0.11984950304031372, 0.21245841681957245, 0.24130037426948547, 0.053050536662340164, 0.036372195929288864, 0.012788524851202965, 0.05413965508341789, 0.17548364400863647, 0.18113258481025696, 0.17045176029205322, 0.056165628135204315, 0.023532675579190254, 0.007599800359457731, NaN, NaN, NaN], [0.20880575478076935, 0.4742221236228943, 0.0684090405702591, 0.07499475032091141, 0.22897963225841522, 0.11411925405263901, 0.06380540132522583, 0.06602712720632553, 0.04886250197887421, 0.25098055601119995, 0.16695836186408997, 0.41882073879241943, 0.45364588499069214, 0.19780457019805908, 0.004864717833697796, 0.007611281704157591, 0.23698794841766357, 0.08390159159898758, 0.28844529390335083, 0.28151822090148926, 0.0680297240614891, 0.0018790157046169043, 0.008693840354681015, NaN, NaN], [0.06649312376976013, 0.2272576093673706, 0.15548978745937347, 0.13675269484519958, 0.06747769564390182, 0.09888236224651337, 0.07679145783185959, 0.09811051189899445, 0.059132058173418045, 0.16564641892910004, 0.1534833461046219, 0.21299242973327637, 0.46317315101623535, 0.18783308565616608, 0.06707606464624405, 0.07066023349761963, 0.038238298147916794, 0.13390158116817474, 0.1738123893737793, 0.3894510865211487, 0.199345201253891, 0.05267143249511719, 0.03450411930680275, 0.0674150139093399, NaN], [0.13068987429141998, 0.5177554488182068, 0.21822108328342438, 0.17411521077156067, 0.11371950805187225, 0.10282127559185028, 0.14754493534564972, 0.10529720038175583, 0.04059072583913803, 0.1422514021396637, 0.16688787937164307, 0.3468432128429413, 0.07328897714614868, 0.033892080187797546, 0.005811289418488741, 0.006848806049674749, 0.033459149301052094, 0.08608346432447433, 0.29348817467689514, 0.07146795839071274, 0.05563248693943024, 0.008248405531048775, 0.00942459236830473, 0.03898181766271591, 0.13983668386936188]], [[0.125, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.13037645816802979, 0.08109150826931, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.14859925210475922, 0.02925589494407177, 0.0505123995244503, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.21387919783592224, 0.03206360712647438, 0.012896520085632801, 0.06630519032478333, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.15968731045722961, 0.046736959367990494, 0.014681101776659489, 0.01418250147253275, 0.011044399812817574, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.22570300102233887, 0.051045093685388565, 0.020206425338983536, 0.021926334127783775, 0.008406145498156548, 0.0702541247010231, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.28555917739868164, 0.03329295665025711, 0.036049578338861465, 0.038853298872709274, 0.007190736476331949, 0.006643606815487146, 0.08228380233049393, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.2511760890483856, 0.07463249564170837, 0.04988643527030945, 0.0701586976647377, 0.028143733739852905, 0.007391677238047123, 0.02261284738779068, 0.0737045407295227, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.15217745304107666, 0.19177564978599548, 0.125013530254364, 0.1473270058631897, 0.20325084030628204, 0.10669662803411484, 0.07946557551622391, 0.027662983164191246, 0.09494684636592865, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.13806378841400146, 0.2514709234237671, 0.17176732420921326, 0.21858137845993042, 0.17882317304611206, 0.16198168694972992, 0.20351995527744293, 0.07158615440130234, 0.0266498401761055, 0.23213928937911987, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.17152094841003418, 0.15314172208309174, 0.15820659697055817, 0.19208288192749023, 0.19640566408634186, 0.061033159494400024, 0.12321671098470688, 0.07748300582170486, 0.07906179875135422, 0.032524362206459045, 0.08073069155216217, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.11935991793870926, 0.25889015197753906, 0.181893989443779, 0.2521744966506958, 0.2510518431663513, 0.1320696324110031, 0.17421388626098633, 0.10352174937725067, 0.13144756853580475, 0.06071629375219345, 0.07381404936313629, 0.11898738145828247, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.11384479701519012, 0.12307179719209671, 0.17695116996765137, 0.21105043590068817, 0.2652710974216461, 0.1994313895702362, 0.5530626177787781, 0.33474239706993103, 0.11353342235088348, 0.20157715678215027, 0.12058570981025696, 0.02405776083469391, 0.20302970707416534, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.1661912202835083, 0.3088836967945099, 0.3049609959125519, 0.34614017605781555, 0.3287224769592285, 0.19484750926494598, 0.49978625774383545, 0.2471936047077179, 0.14924246072769165, 0.2264283001422882, 0.11719675362110138, 0.028577886521816254, 0.03125511854887009, 0.04683076590299606, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.1382068395614624, 0.14312644302845, 0.15027517080307007, 0.2806132137775421, 0.10704077035188675, 0.15715429186820984, 0.3545873463153839, 0.2772214114665985, 0.11900671571493149, 0.16433128714561462, 0.08395379036664963, 0.0337035246193409, 0.08286106586456299, 0.029390821233391762, 0.07092607021331787, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.31265145540237427, 0.17018769681453705, 0.42172688245773315, 0.3373875319957733, 0.26503118872642517, 0.3668123483657837, 0.6080453991889954, 0.3421963155269623, 0.29850897192955017, 0.22005639970302582, 0.08626232296228409, 0.05660916119813919, 0.04967416450381279, 0.020023291930556297, 0.01626538299024105, 0.03365384787321091, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.11847452819347382, 0.5065410137176514, 0.4161456227302551, 0.44356557726860046, 0.358999639749527, 0.34202155470848083, 0.6410406231880188, 0.5693260431289673, 0.3344528377056122, 0.3382241725921631, 0.16963228583335876, 0.12081613391637802, 0.09492655098438263, 0.06781262904405594, 0.059771545231342316, 0.013083304278552532, 0.15846344828605652, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.14143924415111542, 0.33810776472091675, 0.4273369610309601, 0.4442084729671478, 0.4867575168609619, 0.40271657705307007, 0.7919159531593323, 0.5796146988868713, 0.41502290964126587, 0.19611117243766785, 0.2659074366092682, 0.0590454526245594, 0.09533000737428665, 0.06579555571079254, 0.049002423882484436, 0.011413656175136566, 0.05989237129688263, 0.0694013461470604, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.06363721936941147, 0.3402014374732971, 0.30108359456062317, 0.3598821461200714, 0.356340229511261, 0.2955020070075989, 0.3913557827472687, 0.34592464566230774, 0.3881937265396118, 0.23078370094299316, 0.49122318625450134, 0.3432621657848358, 0.1563359946012497, 0.12668228149414062, 0.1534397453069687, 0.06296171993017197, 0.07472987473011017, 0.07419107109308243, 0.08810260146856308, NaN, NaN, NaN, NaN, NaN, NaN], [0.06025628373026848, 0.1445734202861786, 0.2208743691444397, 0.22917300462722778, 0.34805941581726074, 0.30598515272140503, 0.6932811141014099, 0.6030279994010925, 0.2491629421710968, 0.46458470821380615, 0.5228609442710876, 0.2136632800102234, 0.610046923160553, 0.25265923142433167, 0.14038830995559692, 0.07342293113470078, 0.22653138637542725, 0.10003089159727097, 0.02225746400654316, 0.14559555053710938, NaN, NaN, NaN, NaN, NaN], [0.0902293398976326, 0.5066702961921692, 0.45472872257232666, 0.45485398173332214, 0.5058757662773132, 0.3594079613685608, 0.7028806209564209, 0.5180745720863342, 0.25713953375816345, 0.5372852683067322, 0.6213670372962952, 0.2659974694252014, 0.3181111812591553, 0.5259383916854858, 0.33730512857437134, 0.13441412150859833, 0.36266574263572693, 0.10496268421411514, 0.02362431399524212, 0.020191077142953873, 0.04590708762407303, NaN, NaN, NaN, NaN], [0.1059701219201088, 0.2303982675075531, 0.21762119233608246, 0.3580361306667328, 0.17096057534217834, 0.24843183159828186, 0.5131583213806152, 0.47260501980781555, 0.21650557219982147, 0.38561707735061646, 0.416827529668808, 0.1716565638780594, 0.3172723054885864, 0.29216328263282776, 0.47280052304267883, 0.38235870003700256, 0.1798420399427414, 0.1762932986021042, 0.04000748321413994, 0.08066289126873016, 0.03975420445203781, 0.08505715429782867, NaN, NaN, NaN], [0.2317487895488739, 0.2560827136039734, 0.5102789998054504, 0.4199059009552002, 0.44283756613731384, 0.5258800983428955, 0.732390284538269, 0.4491574466228485, 0.4244932234287262, 0.5298821926116943, 0.43037980794906616, 0.2800268232822418, 0.3093121647834778, 0.4250229299068451, 0.19317308068275452, 0.2640416920185089, 0.38813653588294983, 0.11181202530860901, 0.054203763604164124, 0.037284549325704575, 0.018739882856607437, 0.014264266937971115, 0.035236652940511703, NaN, NaN], [0.08032029122114182, 0.6358892321586609, 0.5042787194252014, 0.5074477195739746, 0.5223307013511658, 0.5343775749206543, 0.703619122505188, 0.6657658815383911, 0.45647403597831726, 0.602655827999115, 0.5387927889823914, 0.39006462693214417, 0.39567169547080994, 0.43596506118774414, 0.41000646352767944, 0.269907683134079, 0.5412885546684265, 0.2038634866476059, 0.10306636989116669, 0.05501747503876686, 0.04515310004353523, 0.04695969074964523, 0.008877278305590153, 0.09985174983739853, NaN], [0.03129265457391739, 0.2636677324771881, 0.3672870099544525, 0.438161164522171, 0.7497870922088623, 0.43876102566719055, 0.6747432947158813, 0.5918557643890381, 0.5535795092582703, 0.7133825421333313, 0.7440239787101746, 0.3780657947063446, 0.4423457384109497, 0.6450315713882446, 0.5939705967903137, 0.7279283404350281, 0.4253756105899811, 0.4950290024280548, 0.13756991922855377, 0.08432447165250778, 0.11775307357311249, 0.12791647017002106, 0.07922011613845825, 0.04417572543025017, 0.3473970592021942]], [[0.125, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.13398022949695587, 0.051660239696502686, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.14254364371299744, 0.023038247600197792, 0.14531654119491577, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.17795929312705994, 0.024941343814134598, 0.06730933487415314, 0.21388311684131622, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.09399491548538208, 0.3603954315185547, 0.2704434394836426, 0.1475897580385208, 0.18568314611911774, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.14775781333446503, 0.19919507205486298, 0.14170727133750916, 0.05924544855952263, 0.05067846551537514, 0.45942243933677673, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.14211317896842957, 0.055850330740213394, 0.31645503640174866, 0.16900919377803802, 0.038168299943208694, 0.07897188514471054, 0.2625669240951538, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.08848852664232254, 0.1616290658712387, 0.37575462460517883, 0.24721546471118927, 0.16591095924377441, 0.06889674067497253, 0.052010323852300644, 0.12634019553661346, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.0747382640838623, 0.14914710819721222, 0.6135430335998535, 0.5929751992225647, 0.35069379210472107, 0.2108047604560852, 0.11502823978662491, 0.02365955151617527, 0.17759312689304352, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.02855301834642887, 0.21659326553344727, 0.4310435652732849, 0.40604472160339355, 0.3670090436935425, 0.48140615224838257, 0.27167943120002747, 0.09097199141979218, 0.1627163589000702, 0.1288144737482071, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.03365316241979599, 0.14809295535087585, 0.3644290566444397, 0.4046455919742584, 0.26744210720062256, 0.32108214497566223, 0.1678413599729538, 0.190241739153862, 0.22121649980545044, 0.03444775566458702, 0.46765974164009094, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.038216885179281235, 0.2552680969238281, 0.4071650505065918, 0.3936895430088043, 0.4416206479072571, 0.38015541434288025, 0.1657901555299759, 0.15260477364063263, 0.22771137952804565, 0.10614379495382309, 0.0724361315369606, 0.1760038137435913, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.07068492472171783, 0.07818713039159775, 0.3302493095397949, 0.299561083316803, 0.46339741349220276, 0.48102065920829773, 0.15714748203754425, 0.27301517128944397, 0.38065311312675476, 0.19789563119411469, 0.11113718152046204, 0.05171056091785431, 0.13386131823062897, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.05115865543484688, 0.44867002964019775, 0.49208834767341614, 0.477664977312088, 0.4642978608608246, 0.46059542894363403, 0.25649622082710266, 0.406831830739975, 0.27858051657676697, 0.2405669242143631, 0.11958811432123184, 0.1450459510087967, 0.0628136694431305, 0.09898709505796432, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.04031704366207123, 0.6707005500793457, 0.529548704624176, 0.4586588144302368, 0.3106471002101898, 0.6713098287582397, 0.4458201229572296, 0.5507155060768127, 0.6255134344100952, 0.5032600164413452, 0.18919125199317932, 0.2968505918979645, 0.3902440667152405, 0.16804949939250946, 0.088200144469738, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.13188821077346802, 0.1971314549446106, 0.3902590274810791, 0.4961083233356476, 0.37017205357551575, 0.46889960765838623, 0.2874276340007782, 0.1815745085477829, 0.39618349075317383, 0.17909032106399536, 0.26052209734916687, 0.13463276624679565, 0.11223814636468887, 0.05094114691019058, 0.030694767832756042, 0.23131275177001953, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.029627619311213493, 0.0727827325463295, 0.2382729947566986, 0.16726669669151306, 0.3644602298736572, 0.47072863578796387, 0.2034798413515091, 0.1723088026046753, 0.43477845191955566, 0.18565386533737183, 0.3540991544723511, 0.2379947453737259, 0.07713616639375687, 0.19858470559120178, 0.17015229165554047, 0.0891638696193695, 0.22899208962917328, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.01839388906955719, 0.10223808884620667, 0.244280606508255, 0.22035017609596252, 0.2828108072280884, 0.41914066672325134, 0.09010869264602661, 0.14338640868663788, 0.35142722725868225, 0.12073972821235657, 0.6723650693893433, 0.17433631420135498, 0.20010362565517426, 0.17566151916980743, 0.17214345932006836, 0.06743419170379639, 0.08234895765781403, 0.4274884760379791, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.02117752842605114, 0.17625343799591064, 0.2448491007089615, 0.23410049080848694, 0.3357784152030945, 0.2992798388004303, 0.09099920094013214, 0.1110134869813919, 0.20308172702789307, 0.1763213574886322, 0.1646280288696289, 0.23259523510932922, 0.3615821301937103, 0.32664546370506287, 0.296549916267395, 0.2726198732852936, 0.07387500256299973, 0.07587912678718567, 0.14093360304832458, NaN, NaN, NaN, NaN, NaN, NaN], [0.05486638844013214, 0.06597498804330826, 0.2194771021604538, 0.1927901804447174, 0.37433308362960815, 0.412477970123291, 0.07100911438465118, 0.1499587744474411, 0.3056679368019104, 0.16932857036590576, 0.15193165838718414, 0.19111526012420654, 0.291239857673645, 0.37710845470428467, 0.510109543800354, 0.47089657187461853, 0.17204606533050537, 0.09759342670440674, 0.05198577418923378, 0.1557197868824005, NaN, NaN, NaN, NaN, NaN], [0.03942986950278282, 0.2940163016319275, 0.3192412853240967, 0.3550935387611389, 0.28974649310112, 0.35144588351249695, 0.111830934882164, 0.2212614268064499, 0.1942923218011856, 0.16557106375694275, 0.12293191254138947, 0.3516637980937958, 0.22679129242897034, 0.3504909574985504, 0.4427362084388733, 0.6422855854034424, 0.29741936922073364, 0.17250965535640717, 0.13341550529003143, 0.05469499155879021, 0.0792233869433403, NaN, NaN, NaN, NaN], [0.03949292004108429, 0.6095755696296692, 0.4376317858695984, 0.4024345874786377, 0.24819140136241913, 0.555855929851532, 0.2881583273410797, 0.40402302145957947, 0.5775710940361023, 0.42070186138153076, 0.22824901342391968, 0.4547353982925415, 0.567461371421814, 0.5762937664985657, 0.33163049817085266, 0.41951635479927063, 0.37286072969436646, 0.25620296597480774, 0.25266289710998535, 0.3395143151283264, 0.13239842653274536, 0.07333662360906601, NaN, NaN, NaN], [0.11607979983091354, 0.18507249653339386, 0.30528268218040466, 0.41669708490371704, 0.22673273086547852, 0.3321194052696228, 0.17922396957874298, 0.1181870847940445, 0.299829363822937, 0.11785572022199631, 0.23005077242851257, 0.1731709986925125, 0.17971253395080566, 0.2448451966047287, 0.15796169638633728, 0.701153576374054, 0.1659945547580719, 0.4861533045768738, 0.20215842127799988, 0.13506482541561127, 0.058445703238248825, 0.03114200383424759, 0.21790345013141632, NaN, NaN], [0.017429474741220474, 0.04190561920404434, 0.14842365682125092, 0.09654705971479416, 0.16489917039871216, 0.24686570465564728, 0.09686223417520523, 0.09368213266134262, 0.2918589413166046, 0.08991989493370056, 0.18521137535572052, 0.19666530191898346, 0.06316249072551727, 0.222347229719162, 0.3215444087982178, 0.3288835287094116, 0.38603323698043823, 0.4142700135707855, 0.25910744071006775, 0.0714699923992157, 0.2130158245563507, 0.1895158588886261, 0.07420682162046432, 0.2235250473022461, NaN], [0.011625233106315136, 0.13701221346855164, 0.3079974055290222, 0.17742200195789337, 0.10538481175899506, 0.17213597893714905, 0.08605048805475235, 0.13507568836212158, 0.2275547832250595, 0.07923908531665802, 0.07705283164978027, 0.2479921281337738, 0.3453103303909302, 0.2883259654045105, 0.36409828066825867, 0.18068012595176697, 0.4896908700466156, 0.399289608001709, 0.5261627435684204, 0.6339481472969055, 0.6382991671562195, 0.5417840480804443, 0.2542280852794647, 0.330732524394989, 0.21995915472507477]], [[0.125, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.04915444552898407, 0.7444152235984802, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.10270431637763977, 0.20103313028812408, 0.23083212971687317, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.1558120846748352, 0.09243088960647583, 0.02280065417289734, 0.32627996802330017, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.1265193670988083, 0.1639627069234848, 0.12297425419092178, 0.08557231724262238, 0.1833999902009964, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.11118379235267639, 0.23907560110092163, 0.16732671856880188, 0.1982172429561615, 0.02825341187417507, 0.15412425994873047, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.06564534455537796, 0.4107542335987091, 0.09891282767057419, 0.3507450222969055, 0.0021941487211734056, 0.004341787192970514, 0.11288701742887497, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.09254656732082367, 0.17870496213436127, 0.11882538348436356, 0.2565489113330841, 0.06709786504507065, 0.020701991394162178, 0.05621851608157158, 0.571487307548523, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.12130707502365112, 0.06869146227836609, 0.052872415632009506, 0.07373122870922089, 0.03967232629656792, 0.019552208483219147, 0.024196362122893333, 0.1570335328578949, 0.3329051434993744, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.12370187789201736, 0.027735348790884018, 0.007442266680300236, 0.018701551482081413, 0.04923407360911369, 0.022976329550147057, 0.06834850460290909, 0.13354788720607758, 0.13089321553707123, 0.41554775834083557, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.08012630045413971, 0.020899765193462372, 0.032236725091934204, 0.011631320230662823, 0.1322554349899292, 0.13739252090454102, 0.3272823691368103, 0.10228703171014786, 0.16136890649795532, 0.12631160020828247, 0.3315902352333069, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.07002493739128113, 0.03239390626549721, 0.05209453031420708, 0.033656563609838486, 0.10301846265792847, 0.08080227673053741, 0.10908480733633041, 0.10694557428359985, 0.2992934286594391, 0.26628223061561584, 0.1579413264989853, 0.18216297030448914, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.23901967704296112, 0.02059122547507286, 0.03393668681383133, 0.04736512154340744, 0.05927135422825813, 0.02361929975450039, 0.006761881057173014, 0.05556455999612808, 0.1379650980234146, 0.12424714863300323, 0.191926509141922, 0.01547694206237793, 0.05743350088596344, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.0662187710404396, 0.02669837884604931, 0.008789082989096642, 0.004751283209770918, 0.0528719425201416, 0.011242655105888844, 0.018989307805895805, 0.07620660215616226, 0.012969521805644035, 0.039284493774175644, 0.22954939305782318, 0.04563957825303078, 0.029234008863568306, 0.7488549947738647, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.10826153308153152, 0.014460555277764797, 0.0725417360663414, 0.03217141702771187, 0.06698039174079895, 0.08051858842372894, 0.05872708931565285, 0.022866755723953247, 0.06705553829669952, 0.07034263759851456, 0.3507814407348633, 0.05356235057115555, 0.08709309250116348, 0.23604632914066315, 0.324868768453598, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.13878783583641052, 0.02536645717918873, 0.06943535804748535, 0.05891912057995796, 0.006977759767323732, 0.003910682164132595, 0.004916978534311056, 0.04463541880249977, 0.07985055446624756, 0.07872368395328522, 0.291103333234787, 0.21302121877670288, 0.16995804011821747, 0.19893744587898254, 0.01890285685658455, 0.3838881254196167, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.04579493775963783, 0.04550570994615555, 0.013287660665810108, 0.023886512964963913, 0.024052713066339493, 0.017023656517267227, 0.04836693033576012, 0.030526861548423767, 0.017645621672272682, 0.03170713782310486, 0.09266000241041183, 0.23106807470321655, 0.03557471185922623, 0.12432269752025604, 0.10334902256727219, 0.3233395516872406, 0.3770029842853546, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.0394071489572525, 0.011173942126333714, 0.019201254472136497, 0.012027204036712646, 0.1043756976723671, 0.09629304707050323, 0.044260744005441666, 0.010774374939501286, 0.027033720165491104, 0.01529898401349783, 0.004158060997724533, 0.03471178933978081, 0.3574643135070801, 0.04469288885593414, 0.27014297246932983, 0.10925178974866867, 0.34427598118782043, 0.2875407040119171, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.08343059569597244, 0.043180350214242935, 0.0767669752240181, 0.06360654532909393, 0.1271795630455017, 0.0800960585474968, 0.06889919936656952, 0.05648425221443176, 0.1521727591753006, 0.09240606427192688, 0.03566697984933853, 0.03560119867324829, 0.1492718607187271, 0.18653850257396698, 0.3474813401699066, 0.3278762698173523, 0.10706853121519089, 0.127774178981781, 0.1299499273300171, NaN, NaN, NaN, NaN, NaN, NaN], [0.23721955716609955, 0.02343675307929516, 0.03610215708613396, 0.05973569303750992, 0.07488072663545609, 0.026813305914402008, 0.0050082337111234665, 0.03149579092860222, 0.06251367926597595, 0.02305557392537594, 0.025774041190743446, 0.007636546157300472, 0.004965651780366898, 0.09922869503498077, 0.133448526263237, 0.1956746131181717, 0.04676169902086258, 0.27956491708755493, 0.021136147901415825, 0.057313986122608185, NaN, NaN, NaN, NaN, NaN], [0.0697786882519722, 0.028010839596390724, 0.012634677812457085, 0.007894599810242653, 0.0697624459862709, 0.015741104260087013, 0.01737123914062977, 0.05471426621079445, 0.0063003492541611195, 0.009287585504353046, 0.02825707383453846, 0.016440505161881447, 0.0038715004920959473, 0.07019948214292526, 0.02518516778945923, 0.041359793394804, 0.06545242667198181, 0.29174378514289856, 0.05010553449392319, 0.020036837086081505, 0.7549301981925964, NaN, NaN, NaN, NaN], [0.12042609602212906, 0.016146911308169365, 0.09666067361831665, 0.04101520776748657, 0.09386932849884033, 0.11830881983041763, 0.08227012306451797, 0.02001151442527771, 0.0443122573196888, 0.028465820476412773, 0.11253371834754944, 0.02299223281443119, 0.013287386856973171, 0.043506089597940445, 0.09705191105604172, 0.08899306505918503, 0.14267200231552124, 0.1414598524570465, 0.04555709660053253, 0.08242949843406677, 0.2358742356300354, 0.30384859442710876, NaN, NaN, NaN], [0.14026813209056854, 0.02709769457578659, 0.07936792075634003, 0.07383942604064941, 0.01026969589293003, 0.007506935391575098, 0.01013263501226902, 0.043357811868190765, 0.054843299090862274, 0.032377004623413086, 0.07885654270648956, 0.05951513722538948, 0.021026868373155594, 0.029062975198030472, 0.004067933652549982, 0.00896876398473978, 0.031901001930236816, 0.2457016408443451, 0.1949184089899063, 0.16180625557899475, 0.23649972677230835, 0.020314330235123634, 0.390868216753006, NaN, NaN], [0.036581799387931824, 0.048626694828271866, 0.015552042052149773, 0.027681825682520866, 0.03610476478934288, 0.033903565257787704, 0.10816461592912674, 0.038128215819597244, 0.015381437726318836, 0.020138615742325783, 0.04596110060811043, 0.12391334027051926, 0.008882056921720505, 0.017164889723062515, 0.019657107070088387, 0.039318498224020004, 0.012226631864905357, 0.12883862853050232, 0.2578184902667999, 0.03228205814957619, 0.13855229318141937, 0.08962707966566086, 0.32015570998191833, 0.32621434330940247, NaN], [0.16620944440364838, 0.03880922496318817, 0.027515552937984467, 0.018877340480685234, 0.019147777929902077, 0.2389368712902069, 0.02623477764427662, 0.012871777638792992, 0.013969821855425835, 0.021991701796650887, 0.0026013199239969254, 0.00741098215803504, 0.01774594374001026, 0.003101027337834239, 0.007316285278648138, 0.009464021772146225, 0.007634901907294989, 0.005969886668026447, 0.011287253350019455, 0.04429420828819275, 0.016200777143239975, 0.03440575301647186, 0.14183124899864197, 0.1436305195093155, 0.03402799740433693]], [[0.125, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.13823550939559937, 0.01690824329853058, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.1366243064403534, 0.10029595345258713, 0.03309698402881622, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.14204008877277374, 0.17578311264514923, 0.058153361082077026, 0.03275991603732109, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.15378697216510773, 0.06811928749084473, 0.031730279326438904, 0.02174059860408306, 0.06419884413480759, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.2336570769548416, 0.05475717782974243, 0.004165933933109045, 0.0025384188629686832, 0.005177688784897327, 0.12858138978481293, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.1292651742696762, 0.01662198081612587, 0.01174056064337492, 0.002378111705183983, 0.04036910459399223, 0.6038607358932495, 0.053664252161979675, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.13257111608982086, 0.0015173845458775759, 0.11979293078184128, 0.025075461715459824, 0.17128729820251465, 0.38108551502227783, 0.04533570259809494, 0.02173132263123989, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.12533389031887054, 0.01691550202667713, 0.03341663256287575, 0.04296481981873512, 0.13898836076259613, 0.21484552323818207, 0.09921174496412277, 0.178620383143425, 0.08540544658899307, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.19628551602363586, 0.0262758769094944, 0.06177970767021179, 0.020167797803878784, 0.21508394181728363, 0.05243970826268196, 0.05236654728651047, 0.019688904285430908, 0.04470491781830788, 0.03636182099580765, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.10685201734304428, 0.1520930975675583, 0.22691352665424347, 0.1206204891204834, 0.20647111535072327, 0.3387817144393921, 0.17652125656604767, 0.14866295456886292, 0.058651361614465714, 0.13512541353702545, 0.029732942581176758, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.14931687712669373, 0.17397953569889069, 0.045104723423719406, 0.029273295775055885, 0.009919327683746815, 0.05321130529046059, 0.40632039308547974, 0.053491849452257156, 0.10154163092374802, 0.08916116505861282, 0.038379959762096405, 0.050926242023706436, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.1467411071062088, 0.6613936424255371, 0.30691561102867126, 0.27473992109298706, 0.05103013291954994, 0.09803401678800583, 0.18992389738559723, 0.012332501821219921, 0.08918186277151108, 0.009687116369605064, 0.01925584301352501, 0.0046735359355807304, 0.006799460854381323, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.23535212874412537, 0.03722311928868294, 0.0383867472410202, 0.06886720657348633, 0.040591221302747726, 0.07368911802768707, 0.09838991612195969, 0.052333034574985504, 0.3684787154197693, 0.05692664161324501, 0.030762571841478348, 0.0074586388655006886, 0.017855344340205193, 0.004115242511034012, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.17482686042785645, 0.020169643685221672, 0.038628242909908295, 0.03409411385655403, 0.011309999041259289, 0.013418656773865223, 0.010934274643659592, 0.0036632094997912645, 0.017374617978930473, 0.023464469239115715, 0.0031370571814477444, 0.004764250945299864, 0.022831382229924202, 0.0012565170181915164, 0.01132481824606657, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.2204812914133072, 0.0262058824300766, 0.011961801908910275, 0.00864139012992382, 0.033310361206531525, 0.014301336370408535, 0.009627565741539001, 0.26419174671173096, 0.09070254862308502, 0.04369048774242401, 0.05080936849117279, 0.022543352097272873, 0.012377972714602947, 0.030277462676167488, 0.2341402769088745, 0.01971697248518467, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.253863126039505, 0.004828702192753553, 0.05376851186156273, 0.11550138890743256, 0.1064227893948555, 0.03894256055355072, 0.006152869202196598, 0.03161965310573578, 0.06215812265872955, 0.10950783640146255, 0.01032247580587864, 0.005066303536295891, 0.011880352161824703, 0.09494113177061081, 0.06700112670660019, 0.10617008060216904, 0.020382743328809738, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.04813924431800842, 0.008662978187203407, 0.10469061881303787, 0.06787187606096268, 0.02962217852473259, 0.04144993796944618, 0.019078848883509636, 0.10597121715545654, 0.0923849567770958, 0.24696239829063416, 0.010940729640424252, 0.060362689197063446, 0.059540145099163055, 0.36283043026924133, 0.1817280501127243, 0.2542697787284851, 0.10456714779138565, 0.017782384529709816, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.10143542289733887, 0.13917230069637299, 0.040259018540382385, 0.030723553150892258, 0.006155712995678186, 0.031952716410160065, 0.3338092863559723, 0.06915750354528427, 0.1324792504310608, 0.11542332917451859, 0.05764009431004524, 0.04023035988211632, 0.03596781566739082, 0.1495574563741684, 0.02840258926153183, 0.049019940197467804, 0.4096885919570923, 0.03150010108947754, 0.02953496389091015, NaN, NaN, NaN, NaN, NaN, NaN], [0.1521255224943161, 0.6490614414215088, 0.39427587389945984, 0.3861289620399475, 0.05361294746398926, 0.09808307886123657, 0.16810499131679535, 0.014004985801875591, 0.1451900601387024, 0.008040589280426502, 0.022555561736226082, 0.013471563346683979, 0.006859058979898691, 0.05312783271074295, 0.04058152437210083, 0.023753749206662178, 0.3811529278755188, 0.052651502192020416, 0.007359141018241644, 0.007947265170514584, NaN, NaN, NaN, NaN, NaN], [0.2650813162326813, 0.032561566680669785, 0.05222610384225845, 0.09714324027299881, 0.038093939423561096, 0.08016244322061539, 0.09171951562166214, 0.056265611201524734, 0.42980653047561646, 0.0462084598839283, 0.03524700179696083, 0.017182864248752594, 0.04137876257300377, 0.007372017949819565, 0.08077534288167953, 0.07507885992527008, 0.050101280212402344, 0.02560576982796192, 0.006666052620857954, 0.016142593696713448, 0.003943128511309624, NaN, NaN, NaN, NaN], [0.186274453997612, 0.02024305984377861, 0.052268851548433304, 0.04830823838710785, 0.011142827570438385, 0.015970220789313316, 0.01383616030216217, 0.004258061293512583, 0.024750858545303345, 0.02320612221956253, 0.004944193176925182, 0.006908308248966932, 0.022138824686408043, 0.002315782941877842, 0.022694725543260574, 0.010753386653959751, 0.0032616793178021908, 0.0013332129456102848, 0.0031688748858869076, 0.015737321227788925, 0.00092066585784778, 0.009911282919347286, NaN, NaN, NaN], [0.2620354890823364, 0.032388050109148026, 0.01473915670067072, 0.01008685864508152, 0.03682388737797737, 0.017798764631152153, 0.012407293543219566, 0.2692665457725525, 0.10958822816610336, 0.03793380409479141, 0.07735131680965424, 0.03087974339723587, 0.01817244663834572, 0.0740593820810318, 0.5664002895355225, 0.01639901101589203, 0.07361851632595062, 0.02498074807226658, 0.01953950524330139, 0.011185318231582642, 0.024920325726270676, 0.19407986104488373, 0.01722806692123413, NaN, NaN], [0.27593934535980225, 0.005811678245663643, 0.07111961394548416, 0.13982559740543365, 0.1345955729484558, 0.06462955474853516, 0.009384723380208015, 0.03974011912941933, 0.0818282812833786, 0.09768332540988922, 0.015042337588965893, 0.006764655001461506, 0.01590757444500923, 0.11177312582731247, 0.1289886087179184, 0.2743605673313141, 0.018859822303056717, 0.01428449247032404, 0.0072670611552894115, 0.013756940141320229, 0.08787993341684341, 0.08323681354522705, 0.09635237604379654, 0.025643613189458847, NaN], [0.17263205349445343, 0.01194645743817091, 0.02866498939692974, 0.16296441853046417, 0.0019488729303702712, 0.034664519131183624, 0.05397665500640869, 0.1285821497440338, 0.10828299820423126, 0.02950196899473667, 0.008275950327515602, 0.008977574296295643, 0.09588290750980377, 0.01758315972983837, 0.00981396809220314, 0.06520896404981613, 0.03634792938828468, 0.007794357370585203, 0.007516053505241871, 0.0633511170744896, 0.016588596627116203, 0.008872142061591148, 0.04887184873223305, 0.025813041254878044, 0.0022019031457602978]], [[0.125, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.13826748728752136, 0.016647184267640114, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.12115656584501266, 0.053111400455236435, 0.35221540927886963, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.06620940566062927, 0.0874415934085846, 0.3174281120300293, 0.09698687493801117, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.05510773882269859, 0.045387670397758484, 0.35701045393943787, 0.5011870265007019, 0.0787656381726265, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.05231153964996338, 0.1393265277147293, 0.34751832485198975, 0.15474379062652588, 0.1892920285463333, 0.06652400642633438, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.04669328033924103, 0.038986966013908386, 0.38860636949539185, 0.09904015064239502, 0.3339899182319641, 0.027963249012827873, 0.04134462773799896, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.20758312940597534, 0.07789289951324463, 0.047907259315252304, 0.006299893371760845, 0.2608397901058197, 0.044556185603141785, 0.061705876141786575, 0.034865181893110275, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.18052776157855988, 0.08179321140050888, 0.059846919029951096, 0.02793782763183117, 0.062999427318573, 0.04310278594493866, 0.024987775832414627, 0.015387488529086113, 0.132792130112648, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.03587701544165611, 0.020078828558325768, 0.04571571201086044, 0.02593454346060753, 0.007220670115202665, 0.03280382603406906, 0.012364541180431843, 0.04736338183283806, 0.48638036847114563, 0.015403805300593376, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.010417330078780651, 0.019508572295308113, 0.03964173421263695, 0.041229844093322754, 0.021899865940213203, 0.0029071751050651073, 0.010124437510967255, 0.08508285880088806, 0.40291228890419006, 0.4734281599521637, 0.015163381583988667, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.08744391798973083, 0.1107466071844101, 0.15557123720645905, 0.13837403059005737, 0.05803389474749565, 0.026755833998322487, 0.03754325956106186, 0.4220706820487976, 0.16102783381938934, 0.2859216034412384, 0.1457504779100418, 0.03281670808792114, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.21633882820606232, 0.07441287487745285, 0.04740259423851967, 0.026924576610326767, 0.012407396920025349, 0.002398786135017872, 0.0038467273116111755, 0.13835540413856506, 0.06710492819547653, 0.026295386254787445, 0.17057135701179504, 0.013244924135506153, 0.46883779764175415, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.027107199653983116, 0.05742119997739792, 0.06533583253622055, 0.024222400039434433, 0.014050583355128765, 0.013653005473315716, 0.0030738371424376965, 0.04425956308841705, 0.06826918572187424, 0.011929179541766644, 0.14959540963172913, 0.16161218285560608, 0.5212987065315247, 0.041249219328165054, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.12232528626918793, 0.02327316626906395, 0.043996360152959824, 0.010462167672812939, 0.05786772817373276, 0.006097386125475168, 0.001271827262826264, 0.022651376202702522, 0.03627351298928261, 0.030646052211523056, 0.03145253658294678, 0.18536151945590973, 0.10030946880578995, 0.3235938847064972, 0.09760642796754837, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.01696004532277584, 0.0005225083441473544, 0.012039890512824059, 0.0003213977033738047, 0.024568837136030197, 0.0005492557538673282, 6.035636397427879e-05, 0.0032521369867026806, 0.016784805804491043, 0.013033770024776459, 0.023488081991672516, 0.04594254866242409, 0.04732683673501015, 0.2366781234741211, 0.2578820288181305, 0.02447950839996338, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.016271475702524185, 0.026037830859422684, 0.05988215655088425, 0.04065781086683273, 0.0548781082034111, 0.0059303357265889645, 0.000490839418489486, 0.009792556054890156, 0.05564826726913452, 0.029693011194467545, 0.015783851966261864, 0.050408631563186646, 0.10483089834451675, 0.18894171714782715, 0.4590488076210022, 0.24355939030647278, 0.03408684581518173, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.011992339976131916, 0.02786487340927124, 0.025577154010534286, 0.02912752889096737, 0.009845648892223835, 0.0007121131638996303, 0.001387864351272583, 0.015649031847715378, 0.05334821715950966, 0.05039743706583977, 0.0003855754912365228, 0.07798124849796295, 0.03745294734835625, 0.16697214543819427, 0.29521557688713074, 0.2776513993740082, 0.29445046186447144, 0.031993161886930466, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.11517049372196198, 0.11416894942522049, 0.19162771105766296, 0.14611610770225525, 0.060761958360672, 0.02055470645427704, 0.021888524293899536, 0.20655019581317902, 0.047658227384090424, 0.055987950414419174, 0.01683689095079899, 0.005808014422655106, 0.045862384140491486, 0.09340663254261017, 0.10908356308937073, 0.18944555521011353, 0.26804569363594055, 0.20485185086727142, 0.037772081792354584, NaN, NaN, NaN, NaN, NaN, NaN], [0.24184046685695648, 0.07921410351991653, 0.056290365755558014, 0.026794791221618652, 0.016941547393798828, 0.0021516080014407635, 0.0023830668069422245, 0.05685606598854065, 0.02070370689034462, 0.003236053278669715, 0.01165463775396347, 0.004370343871414661, 0.030780060216784477, 0.00907946564257145, 0.06188458576798439, 0.04407832771539688, 0.006142587400972843, 0.14762946963310242, 0.013672620058059692, 0.4999893307685852, NaN, NaN, NaN, NaN, NaN], [0.03566991165280342, 0.0538097508251667, 0.09943600744009018, 0.028607800602912903, 0.020965654402971268, 0.013461945578455925, 0.002478980924934149, 0.02911236882209778, 0.02446376532316208, 0.0022762087173759937, 0.010774179361760616, 0.04047773778438568, 0.06471210718154907, 0.0026813328731805086, 0.07523855566978455, 0.030470186844468117, 0.0345987044274807, 0.1238497719168663, 0.17781274020671844, 0.4970780611038208, 0.04515520855784416, NaN, NaN, NaN, NaN], [0.12716706097126007, 0.02434932254254818, 0.05787394568324089, 0.013031681068241596, 0.06681805849075317, 0.007088592275977135, 0.0018475945107638836, 0.021072670817375183, 0.024636711925268173, 0.010089303366839886, 0.0076353950425982475, 0.05158482864499092, 0.009980393573641777, 0.034229546785354614, 0.01627102866768837, 0.008032353594899178, 0.013575052842497826, 0.04940066114068031, 0.19428585469722748, 0.10819438844919205, 0.2976790964603424, 0.08516447991132736, NaN, NaN, NaN], [0.01713084802031517, 0.000499976216815412, 0.019638467580080032, 0.00048709739348851144, 0.03356647491455078, 0.0008144291932694614, 0.00011953162174904719, 0.003664336632937193, 0.013800683431327343, 0.004805452190339565, 0.004433726891875267, 0.011711561121046543, 0.003556638490408659, 0.01588965393602848, 0.025807680562138557, 0.00022126971452962607, 0.004036479629576206, 0.00837762001901865, 0.04655361920595169, 0.04086336866021156, 0.22630761563777924, 0.2765483856201172, 0.02425519935786724, NaN, NaN], [0.010901566594839096, 0.020337969064712524, 0.07802019268274307, 0.0504593625664711, 0.06312800198793411, 0.009868033230304718, 0.000861799344420433, 0.010114955715835094, 0.052247028797864914, 0.012602821923792362, 0.005399123765528202, 0.01934058591723442, 0.013776490464806557, 0.010564911179244518, 0.04300173744559288, 0.008748980239033699, 0.0006391598144546151, 0.006108305882662535, 0.05087457224726677, 0.09035929292440414, 0.18751013278961182, 0.4462290108203888, 0.28552356362342834, 0.05451636388897896, NaN], [0.1367119550704956, 0.02979014255106449, 0.04602046683430672, 0.022530242800712585, 0.009278235025703907, 0.01184787880629301, 0.010125648230314255, 0.02445557340979576, 0.052750833332538605, 0.013119504787027836, 0.0006633299053646624, 0.007243738044053316, 0.02398994006216526, 0.00908573716878891, 0.013761860318481922, 0.007176807615906, 0.00677318312227726, 0.0021949538495391607, 0.01309704128652811, 0.09677710384130478, 0.12711098790168762, 0.1613820642232895, 0.37058699131011963, 0.3504316806793213, 0.02586444839835167]], [[0.125, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.13988038897514343, 0.003474950324743986, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.14879919588565826, 0.018745053559541702, 0.07372914999723434, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.030327370390295982, 0.02692173607647419, 0.46947386860847473, 0.09036581218242645, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.164228156208992, 0.0009850627975538373, 0.0044541023671627045, 0.0005622706958092749, 0.024160074070096016, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.020124448463320732, 0.0011880549136549234, 0.0042731426656246185, 3.242780803702772e-05, 0.6858344078063965, 0.023040860891342163, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.0017230550292879343, 3.356653905939311e-05, 0.001307086437009275, 1.4968540199333802e-05, 0.5564903616905212, 0.236929789185524, 0.007688341196626425, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.1612924486398697, 0.00029754414572380483, 0.0029063820838928223, 0.0015110797248780727, 0.16695675253868103, 0.3453270196914673, 0.07193248718976974, 0.006359610706567764, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.1910298615694046, 0.01051796693354845, 0.0018660163041204214, 0.0012154864380136132, 0.022663934156298637, 0.008557457476854324, 0.016767704859375954, 0.05246622860431671, 0.08816055208444595, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.24295811355113983, 0.0012021175352856517, 0.0005200211890041828, 0.00015996988804545254, 0.002627951791509986, 0.03450923040509224, 0.014827161096036434, 0.015967652201652527, 0.005632439162582159, 0.001854590023867786, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.2492469847202301, 0.004325273912400007, 0.004784590099006891, 0.013903478160500526, 0.0013026667293161154, 0.003877879586070776, 0.017029188573360443, 0.01781909167766571, 0.05003270506858826, 0.026610376313328743, 0.008462576195597649, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.25306010246276855, 0.0017952719936147332, 0.005404005758464336, 0.021692873910069466, 0.0005702165653929114, 9.544018394080922e-05, 0.001603480544872582, 0.001225438085384667, 0.036846794188022614, 0.001749897957779467, 0.016878794878721237, 0.021703237667679787, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.055758021771907806, 0.000425096252001822, 0.0005783061496913433, 0.0011671994579955935, 0.00034630659501999617, 0.00031045774812810123, 0.0006358043756335974, 0.004018810577690601, 0.0004720573779195547, 0.006387148518115282, 0.038948215544223785, 0.40798652172088623, 0.0038703898899257183, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.29551389813423157, 0.006183725781738758, 0.0010477532632648945, 0.001470124931074679, 0.0028535614255815744, 0.003910644445568323, 0.004942604340612888, 0.003798475954681635, 0.01567114144563675, 0.060374900698661804, 0.006600319407880306, 0.010896215215325356, 0.009779008105397224, 0.007320093456655741, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.1632017195224762, 0.00519327400252223, 0.00790441408753395, 0.0009941658936440945, 0.3241596221923828, 0.0008480648975819349, 0.0001429034018656239, 0.0012253100285306573, 0.0008457236108370125, 0.006411578040570021, 0.0016067628748714924, 0.003762597683817148, 0.029224932193756104, 0.07677540183067322, 0.06338826566934586, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.005401996895670891, 6.3005199990584515e-06, 0.0004310416697990149, 8.47076989884954e-06, 0.009243682958185673, 0.0008590375073254108, 4.37394373875577e-06, 6.523932825075462e-05, 8.531090134056285e-05, 0.0006816720124334097, 7.644478318979964e-05, 0.00018924157484434545, 0.0012375408550724387, 0.023784970864653587, 0.4309314787387848, 0.034907225519418716, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.29775136709213257, 0.006892140489071608, 0.009814155288040638, 0.016249310225248337, 0.004830268211662769, 0.0035455955658107996, 0.0007549467263743281, 0.000541276705916971, 0.0031480982434004545, 0.001557780895382166, 0.0010192448971793056, 0.0018504501786082983, 0.002619183622300625, 0.1016833484172821, 0.03818811476230621, 0.06928347051143646, 0.0412699431180954, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.26683223247528076, 0.0017643374158069491, 0.02531762421131134, 0.047485485672950745, 0.0005023732082918286, 0.0011795219033956528, 0.002227108459919691, 0.0028741960413753986, 0.005215880926698446, 0.001946018310263753, 3.592624852899462e-05, 0.001338632428087294, 0.0025214410852640867, 0.07723907381296158, 0.012742026709020138, 0.25196006894111633, 0.052669085562229156, 0.020061112940311432, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.3006725609302521, 0.0014043879928067327, 0.009936605580151081, 0.037061650305986404, 0.0005129858036525548, 5.274279828881845e-05, 0.0006371501949615777, 0.00048446646542288363, 0.015043019317090511, 0.0003374778898432851, 0.0015171451959758997, 0.001911269617266953, 0.0014702629996463656, 0.015123972669243813, 0.0006335150101222098, 0.0006853189552202821, 0.0006114236894063652, 0.013829384930431843, 0.010252222418785095, NaN, NaN, NaN, NaN, NaN, NaN], [0.11150761693716049, 0.0006332705961540341, 0.0012255925685167313, 0.0022868558298796415, 0.0007688697660341859, 0.00046408100752159953, 0.0006869957433082163, 0.0021696356125175953, 0.0003113164857495576, 0.0013619231758639216, 0.004312699660658836, 0.1263500303030014, 0.0001710234791971743, 0.0024227115791291, 0.0006429344066418707, 0.008991677314043045, 0.01230061985552311, 0.025017380714416504, 0.33947470784187317, 0.0032216052059084177, NaN, NaN, NaN, NaN, NaN], [0.31111404299736023, 0.0035644923336803913, 0.0013678895775228739, 0.0016790243098512292, 0.0035299588926136494, 0.004438228905200958, 0.004504224751144648, 0.0015486004995182157, 0.006104794796556234, 0.009403211995959282, 0.00038756802678108215, 0.001732571516185999, 0.00042684219079092145, 0.00029873420135118067, 0.02043243870139122, 0.02443091571331024, 0.011036018840968609, 0.0030384601559489965, 0.007405058480799198, 0.004648045636713505, 0.010011163540184498, NaN, NaN, NaN, NaN], [0.16896948218345642, 0.0033956619445234537, 0.009647470898926258, 0.0011160745052620769, 0.30864211916923523, 0.0008666384965181351, 0.0001862353819888085, 0.0007671809289604425, 0.0006719603552483022, 0.002030742121860385, 0.00038655498065054417, 0.0009093419066630304, 0.0015865613240748644, 0.007534818258136511, 0.009185722097754478, 0.00011195908882655203, 0.003075815038755536, 0.000886340974830091, 0.0034873690456151962, 0.021776562556624413, 0.11334169656038284, 0.0832705944776535, NaN, NaN, NaN], [0.006588279269635677, 7.165617716964334e-06, 0.0005450915195979178, 1.0953889614029322e-05, 0.01959507167339325, 0.001590097788721323, 1.1096496564277913e-05, 7.439414184773341e-05, 9.72584675764665e-05, 0.00039174238918349147, 2.7912905352422968e-05, 4.964227991877124e-05, 7.256279786815867e-05, 0.00222678086720407, 0.04727102443575859, 0.0002576226834207773, 0.00020273383415769786, 7.391278631985188e-05, 0.00018598776659928262, 0.000617648009210825, 0.03195251524448395, 0.45461374521255493, 0.037591490894556046, NaN, NaN], [0.35417911410331726, 0.010997277684509754, 0.014662563800811768, 0.023722819983959198, 0.01071385107934475, 0.009427045471966267, 0.002653747797012329, 0.0011037624208256602, 0.005973298568278551, 0.0016420705942437053, 0.0009447215707041323, 0.001327668083831668, 0.0005524749867618084, 0.012130306102335453, 0.005379356909543276, 0.0037436189595609903, 0.0009285339619964361, 0.0002853046462405473, 0.0013114019529893994, 0.0012977200094610453, 0.08090774714946747, 0.034737478941679, 0.058711227029561996, 0.0672648623585701, NaN], [0.18188641965389252, 0.00040442554745823145, 0.0015771333128213882, 0.005189571529626846, 8.387575689994264e-06, 0.0001226859458256513, 0.0011242604814469814, 0.0013583728577941656, 0.0030172227416187525, 0.00029841059586033225, 1.2829146726289764e-05, 0.001467264024540782, 0.001090237987227738, 0.002914785873144865, 0.0006871690275147557, 0.002592542441561818, 0.00021328746515791863, 6.871169898658991e-05, 0.002350796014070511, 0.0026233955286443233, 0.02620280720293522, 0.005966363474726677, 0.08270465582609177, 0.010547555983066559, 0.018362630158662796]]], [[[0.125, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.13007116317749023, 0.035988736897706985, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.17991511523723602, 0.05124381557106972, 0.013642107136547565, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.16831281781196594, 0.043814778327941895, 0.0950295478105545, 0.07350433617830276, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.13759823143482208, 0.14112484455108643, 0.20577600598335266, 0.13910864293575287, 0.034107428044080734, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.11619941890239716, 0.038306448608636856, 0.06045802682638168, 0.03494013100862503, 0.374624639749527, 0.22046393156051636, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.08332619816064835, 0.009484739042818546, 0.012810231186449528, 0.0027760458178818226, 0.3268325924873352, 0.26342087984085083, 0.17634892463684082, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.057563915848731995, 0.01992173306643963, 0.03713805601000786, 0.014863312244415283, 0.25726908445358276, 0.14832180738449097, 0.402090460062027, 0.06479739397764206, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.21478669345378876, 0.15359601378440857, 0.26770198345184326, 0.12653663754463196, 0.09151764959096909, 0.07003500312566757, 0.19363711774349213, 0.014233908616006374, 0.023967349901795387, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.2834857702255249, 0.07559704780578613, 0.07655511796474457, 0.16202391684055328, 0.08316012471914291, 0.11911017447710037, 0.0204884335398674, 0.011816238984465599, 0.13204774260520935, 0.039266277104616165, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.23006244003772736, 0.03933367133140564, 0.07187695801258087, 0.04476522281765938, 0.01073860377073288, 0.0032203071750700474, 0.00176758982706815, 0.018770985305309296, 0.12121162563562393, 0.18536020815372467, 0.01582610420882702, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.18067117035388947, 0.009833509102463722, 0.03744787722826004, 0.016920698806643486, 0.05744745582342148, 0.04540643468499184, 0.008024180307984352, 0.012110988609492779, 0.09370782226324081, 0.08820194005966187, 0.06259123980998993, 0.025030089542269707, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.11523616313934326, 0.03200709819793701, 0.050564926117658615, 0.010618647560477257, 0.09430865943431854, 0.018685024231672287, 0.022438397631049156, 0.017720744013786316, 0.1592920571565628, 0.21717989444732666, 0.2463550567626953, 0.2194516956806183, 0.0009421245777048171, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.09747911244630814, 0.1645127683877945, 0.1875433474779129, 0.09478750824928284, 0.08721300214529037, 0.02294742316007614, 0.02039182186126709, 0.07351931929588318, 0.1815827339887619, 0.5564144849777222, 0.41975197196006775, 0.2698606848716736, 0.05650324374437332, 0.05821085348725319, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.14833268523216248, 0.1209164559841156, 0.08990822732448578, 0.0656033307313919, 0.23720099031925201, 0.11782333254814148, 0.04633651673793793, 0.16808320581912994, 0.06126163899898529, 0.43528908491134644, 0.3754012882709503, 0.13757933676242828, 0.05596579611301422, 0.16984672844409943, 0.002737722359597683, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.19258342683315277, 0.05838138237595558, 0.04652376100420952, 0.017318567261099815, 0.23482391238212585, 0.16333334147930145, 0.02100907638669014, 0.048424359411001205, 0.06841404736042023, 0.3133482038974762, 0.07921069860458374, 0.021035969257354736, 0.03291412815451622, 0.18175286054611206, 0.1566929817199707, 0.053215935826301575, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.17641158401966095, 0.15294750034809113, 0.15352487564086914, 0.10843643546104431, 0.08260629326105118, 0.016529222950339317, 0.012650150805711746, 0.07893627882003784, 0.1388573795557022, 0.19094663858413696, 0.03751035034656525, 0.05650494620203972, 0.2426995038986206, 0.16961677372455597, 0.07263431698083878, 0.152814581990242, 0.018521834164857864, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.25574439764022827, 0.04364950954914093, 0.05707173049449921, 0.02453112043440342, 0.016254547983407974, 0.0026636396069079638, 0.0035282839089632034, 0.015699811279773712, 0.03404982015490532, 0.04375504329800606, 0.001423283712938428, 0.05359426140785217, 0.1740386039018631, 0.10691730678081512, 0.03620539605617523, 0.04950953647494316, 0.022295303642749786, 0.025807255879044533, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.216966450214386, 0.016096990555524826, 0.08351551741361618, 0.02645382098853588, 0.05811392888426781, 0.04091750830411911, 0.014506897889077663, 0.015038754791021347, 0.07221462577581406, 0.08585365861654282, 0.059816163033246994, 0.04502185434103012, 0.00397779606282711, 0.041175276041030884, 0.04448581859469414, 0.10983181744813919, 0.01911303587257862, 0.07987141609191895, 0.062483180314302444, NaN, NaN, NaN, NaN, NaN, NaN], [0.11257521063089371, 0.027663733810186386, 0.023284420371055603, 0.0038690094370394945, 0.053685132414102554, 0.008445030078291893, 0.014706910587847233, 0.009755544364452362, 0.06406830251216888, 0.10475295782089233, 0.08554040640592575, 0.16072620451450348, 0.00029980239924043417, 0.03509804978966713, 0.03031017631292343, 0.04435117170214653, 0.06420817226171494, 0.2780051827430725, 0.2271702140569687, 0.0013584558619186282, NaN, NaN, NaN, NaN, NaN], [0.10895614326000214, 0.15509657561779022, 0.19682957231998444, 0.07681374996900558, 0.06229116767644882, 0.016663551330566406, 0.015513443388044834, 0.04232686012983322, 0.0986364334821701, 0.35070890188217163, 0.19941051304340363, 0.163076713681221, 0.026361489668488503, 0.018140846863389015, 0.016411108896136284, 0.03203867748379707, 0.053678009659051895, 0.19773079454898834, 0.3572796881198883, 0.059515852481126785, 0.04298213869333267, NaN, NaN, NaN, NaN], [0.15568822622299194, 0.11876019835472107, 0.09203660488128662, 0.059780094772577286, 0.24089980125427246, 0.06525673717260361, 0.029934749007225037, 0.11168782413005829, 0.03211824223399162, 0.30118685960769653, 0.22822384536266327, 0.08190999180078506, 0.018841415643692017, 0.1366286426782608, 0.0017427116399630904, 0.02601366490125656, 0.09386949241161346, 0.19522085785865784, 0.1546826809644699, 0.06491755694150925, 0.19679579138755798, 0.0025137634947896004, NaN, NaN, NaN], [0.26271528005599976, 0.07045364379882812, 0.0520184300839901, 0.023400958627462387, 0.11433269083499908, 0.07895253598690033, 0.012276851572096348, 0.023823700845241547, 0.04200353845953941, 0.16687022149562836, 0.05654531344771385, 0.038080912083387375, 0.012698299251496792, 0.10473722219467163, 0.0643644630908966, 0.015445034019649029, 0.014234953559935093, 0.06144930049777031, 0.05821693688631058, 0.0568128302693367, 0.1767931431531906, 0.1402994990348816, 0.07714083790779114, NaN, NaN], [0.1969611942768097, 0.16093717515468597, 0.1609625220298767, 0.11138524115085602, 0.026131147518754005, 0.00619129091501236, 0.005407778546214104, 0.04104578495025635, 0.06517186760902405, 0.06833471357822418, 0.020616043359041214, 0.03467438742518425, 0.095084547996521, 0.06247802451252937, 0.022057469934225082, 0.06569864600896835, 0.0052108620293438435, 0.03032413311302662, 0.0838729590177536, 0.3427644968032837, 0.19215865433216095, 0.08116735517978668, 0.14785417914390564, 0.015012684278190136, NaN], [0.1272672563791275, 0.008308093063533306, 0.030398543924093246, 0.02721896767616272, 0.016537277027964592, 0.021588556468486786, 0.002818688517436385, 0.010970782488584518, 0.01434051152318716, 0.012293173000216484, 0.04184769093990326, 0.03683166950941086, 0.023453323170542717, 0.020430248230695724, 0.03333409130573273, 0.068024642765522, 0.02648366242647171, 0.1640448421239853, 0.109919473528862, 0.1576652079820633, 0.14138163626194, 0.16884489357471466, 0.30372628569602966, 0.2283693552017212, 0.17022481560707092]], [[0.125, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.12451039254665375, 0.1335938721895218, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.18396444618701935, 0.017508728429675102, 0.02471269853413105, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.18453162908554077, 0.038695670664310455, 0.04155581444501877, 0.05072518810629845, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.14826133847236633, 0.04252630099654198, 0.08689215034246445, 0.08308856934309006, 0.015247097238898277, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.1348571479320526, 0.07033194601535797, 0.10030655562877655, 0.13752251863479614, 0.030713800340890884, 0.1331333965063095, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.20671042799949646, 0.05809834972023964, 0.1630101054906845, 0.06033356115221977, 0.07501133531332016, 0.017328333109617233, 0.028450097888708115, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.15813153982162476, 0.14090144634246826, 0.26030233502388, 0.10773709416389465, 0.16133210062980652, 0.04816069453954697, 0.01304988656193018, 0.13335363566875458, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.3033713400363922, 0.22469042241573334, 0.4264413118362427, 0.3422197103500366, 0.14910078048706055, 0.06983038783073425, 0.023690486326813698, 0.010566752403974533, 0.05880258232355118, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.25368839502334595, 0.33459752798080444, 0.3829180896282196, 0.2782860994338989, 0.2427205741405487, 0.08768615871667862, 0.031752120703458786, 0.02143564634025097, 0.03798065707087517, 0.07379034906625748, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.14200474321842194, 0.2391311228275299, 0.18728229403495789, 0.11236919462680817, 0.20923744142055511, 0.13365258276462555, 0.052715059369802475, 0.134474515914917, 0.14480768144130707, 0.06683899462223053, 0.104619100689888, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.09595079720020294, 0.2752297520637512, 0.21842314302921295, 0.13660691678524017, 0.35477691888809204, 0.37130749225616455, 0.20556269586086273, 0.35276445746421814, 0.31008264422416687, 0.11074709892272949, 0.19841141998767853, 0.07199764251708984, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.15323933959007263, 0.4611065983772278, 0.07869336754083633, 0.03600241616368294, 0.47375282645225525, 0.7350273132324219, 0.297486275434494, 0.6052883863449097, 0.4953201115131378, 0.144621342420578, 0.3493393063545227, 0.04881289228796959, 0.10520726442337036, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.12003841996192932, 0.2704387903213501, 0.20063650608062744, 0.23778890073299408, 0.36254584789276123, 0.5319709777832031, 0.4483972191810608, 0.15058189630508423, 0.11134153604507446, 0.09426670521497726, 0.21241672337055206, 0.10488338023424149, 0.049764484167099, 0.15823495388031006, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.15233570337295532, 0.21891875565052032, 0.13215333223342896, 0.2837490439414978, 0.08042775094509125, 0.43866410851478577, 0.2773631513118744, 0.12773916125297546, 0.3155127763748169, 0.07932031899690628, 0.1219707503914833, 0.11212008446455002, 0.1944955438375473, 0.07170752435922623, 0.004313962999731302, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.2607015371322632, 0.3645761013031006, 0.37828943133354187, 0.3385462462902069, 0.2960833013057709, 0.5598280429840088, 0.544554591178894, 0.47054967284202576, 0.3477361798286438, 0.13701467216014862, 0.14822737872600555, 0.030188634991645813, 0.05528556555509567, 0.058441486209630966, 0.03410256654024124, 0.17273126542568207, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.1872977614402771, 0.29805198311805725, 0.5206820368766785, 0.33024296164512634, 0.6395015716552734, 0.7210167050361633, 0.353913813829422, 0.406305193901062, 0.5096184015274048, 0.26257815957069397, 0.07301049679517746, 0.03464117646217346, 0.0787002444267273, 0.10916904360055923, 0.3557807505130768, 0.08364078402519226, 0.08538500964641571, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.13269101083278656, 0.2835436165332794, 0.47488275170326233, 0.24851854145526886, 0.694171130657196, 0.6760384440422058, 0.2759343385696411, 0.29058361053466797, 0.7136873602867126, 0.20711864531040192, 0.04295802861452103, 0.07691331952810287, 0.11943909525871277, 0.1323360651731491, 0.20847304165363312, 0.05967296287417412, 0.12062160670757294, 0.09502720832824707, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.058743223547935486, 0.276242733001709, 0.29826071858406067, 0.20218241214752197, 0.4631478488445282, 0.48415693640708923, 0.2865871787071228, 0.3694051504135132, 0.4054408073425293, 0.19627220928668976, 0.2907293438911438, 0.09057808667421341, 0.11348091810941696, 0.21781016886234283, 0.38082650303840637, 0.3570795953273773, 0.22612451016902924, 0.09323522448539734, 0.03618632256984711, NaN, NaN, NaN, NaN, NaN, NaN], [0.07694489508867264, 0.41184449195861816, 0.038429711014032364, 0.018668875098228455, 0.5307568907737732, 0.7476497888565063, 0.4137455224990845, 0.6917499303817749, 0.6703397035598755, 0.3623183071613312, 0.579600989818573, 0.12613137066364288, 0.20100651681423187, 0.40998968482017517, 0.46115902066230774, 0.575211763381958, 0.35096046328544617, 0.163946270942688, 0.021770814433693886, 0.09986086189746857, NaN, NaN, NaN, NaN, NaN], [0.0834016501903534, 0.33346420526504517, 0.238715261220932, 0.28079062700271606, 0.5652539134025574, 0.6881173849105835, 0.5534363985061646, 0.22000034153461456, 0.1979052871465683, 0.3127084970474243, 0.4257359504699707, 0.18722867965698242, 0.1397658735513687, 0.3447277843952179, 0.13513657450675964, 0.31811001896858215, 0.32070791721343994, 0.12404847145080566, 0.05496959760785103, 0.04215753450989723, 0.16014836728572845, NaN, NaN, NaN, NaN], [0.13260646164417267, 0.29362690448760986, 0.18431688845157623, 0.38109344244003296, 0.20342527329921722, 0.5946046113967896, 0.4558189809322357, 0.26072001457214355, 0.5455912351608276, 0.2635512351989746, 0.31394094228744507, 0.23975242674350739, 0.36583349108695984, 0.2753828167915344, 0.01127256266772747, 0.41475725173950195, 0.29836422204971313, 0.2503683567047119, 0.10983213782310486, 0.21767295897006989, 0.0692884549498558, 0.003035380970686674, NaN, NaN, NaN], [0.2068602293729782, 0.4467880427837372, 0.4564751386642456, 0.4485791325569153, 0.45999279618263245, 0.6740500330924988, 0.7906107902526855, 0.6832103133201599, 0.5420533418655396, 0.4096798300743103, 0.3950984477996826, 0.13646338880062103, 0.10497336834669113, 0.17230592668056488, 0.07012390345335007, 0.27583980560302734, 0.3079235553741455, 0.1555996537208557, 0.038740403950214386, 0.05588690564036369, 0.03859011456370354, 0.02352789230644703, 0.12950412929058075, NaN, NaN], [0.16561447083950043, 0.3958832919597626, 0.5531814098358154, 0.4040684700012207, 0.7809365391731262, 0.8175305128097534, 0.5712264180183411, 0.6113651394844055, 0.6668697595596313, 0.4850655198097229, 0.18787693977355957, 0.08608534932136536, 0.19115354120731354, 0.2498423308134079, 0.6246696710586548, 0.31422460079193115, 0.373276948928833, 0.049351077526807785, 0.046956032514572144, 0.08076699078083038, 0.09392194449901581, 0.3349837362766266, 0.062239501625299454, 0.10001940280199051, NaN], [0.06568613648414612, 0.36780038475990295, 0.6246912479400635, 0.7116879820823669, 0.754679262638092, 0.7714072465896606, 0.7616819739341736, 0.5837911367416382, 0.9111838936805725, 0.8262851238250732, 0.6737059354782104, 0.5146453380584717, 0.7674095630645752, 0.7359525561332703, 0.5679676532745361, 0.7213301062583923, 0.6703079342842102, 0.5636342167854309, 0.38883939385414124, 0.5560528635978699, 0.518941342830658, 0.3739706873893738, 0.32013192772865295, 0.3743935525417328, 0.3977084755897522]], [[0.125, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.1305680274963379, 0.02726716920733452, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.002169837476685643, 0.0032534021884202957, 0.5694547891616821, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.1568225622177124, 0.12336109578609467, 0.028200775384902954, 0.03890102356672287, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.008017625659704208, 0.013223886489868164, 0.04581261798739433, 0.017950134351849556, 0.8790656328201294, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.08130903542041779, 0.2643316090106964, 0.5756329894065857, 0.29882851243019104, 0.31516125798225403, 0.09644471108913422, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.20484277606010437, 0.3443664610385895, 0.0019387316424399614, 0.017399819567799568, 0.0004214652581140399, 0.00013534165918827057, 0.01563790813088417, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.1571786254644394, 0.5643889307975769, 0.13441002368927002, 0.09036820381879807, 0.02947377972304821, 0.015878956764936447, 0.022048691287636757, 0.14189693331718445, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.005826869048178196, 0.13292454183101654, 0.00521356426179409, 0.005004087463021278, 0.10703893005847931, 0.26877719163894653, 0.1785666048526764, 0.23197543621063232, 0.007970587350428104, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.03136341646313667, 0.08873608708381653, 0.009185479953885078, 0.03043411858379841, 0.3010490834712982, 0.36070317029953003, 0.178965762257576, 0.21872122585773468, 0.005464768502861261, 0.06020791083574295, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.07854610681533813, 0.03772095590829849, 0.016643106937408447, 0.02832828275859356, 0.0785825327038765, 0.09336084127426147, 0.24177083373069763, 0.2718014717102051, 0.12932275235652924, 0.08437053114175797, 0.24188947677612305, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.17239268124103546, 0.029533302411437035, 0.030515655875205994, 0.026403654366731644, 0.05037287250161171, 0.13986584544181824, 0.11416076123714447, 0.08228978514671326, 0.26975753903388977, 0.020502708852291107, 0.030797043815255165, 0.006723156664520502, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.35662412643432617, 0.005917226430028677, 0.00044432797585614026, 0.00022813511895947158, 0.0073361690156161785, 0.0027237480971962214, 0.007987208664417267, 0.021625559777021408, 0.010472757741808891, 0.0008755659800954163, 0.012584702111780643, 0.000526397256180644, 0.01033733133226633, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.189227893948555, 0.01606086827814579, 0.0030457540415227413, 0.005861388053745031, 0.04963670298457146, 0.004091562703251839, 0.01225967425853014, 0.037419673055410385, 0.01020084973424673, 0.003108290024101734, 0.01512740459293127, 0.006679146084934473, 0.014098022133111954, 0.03816642239689827, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.00965302623808384, 0.0035168000031262636, 0.03902876377105713, 0.0158648993819952, 0.32648226618766785, 0.0038036927580833435, 0.002248003613203764, 0.002372291637584567, 0.014672092162072659, 0.007728067692369223, 0.022481968626379967, 0.028911879286170006, 0.044244468212127686, 0.021532919257879257, 0.6417658925056458, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.037641312927007675, 0.005557402968406677, 0.0006393054500222206, 0.006437606643885374, 0.007460788358002901, 0.0009530181414447725, 0.0016025539953261614, 0.0067516821436584, 0.02322007343173027, 0.018459537997841835, 0.011051125824451447, 0.006488891318440437, 0.04039585590362549, 0.18200218677520752, 0.0006002468289807439, 0.6243939995765686, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.01615065336227417, 0.01699231006205082, 0.00012957912986166775, 0.016060354188084602, 0.0006264564581215382, 0.0012908404460176826, 0.002684527076780796, 0.027531128376722336, 0.015566377900540829, 0.003692139405757189, 0.5753727555274963, 0.5145941376686096, 0.03750383481383324, 0.009545800276100636, 0.0034461882896721363, 0.005381980445235968, 0.00046628122800029814, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.021861553192138672, 0.01695878431200981, 0.0018149337265640497, 0.015764223411679268, 0.007719711866229773, 0.0034752548672258854, 0.007653116714209318, 0.03472340479493141, 0.038436826318502426, 0.014262136071920395, 0.8426622748374939, 0.36256304383277893, 0.21876515448093414, 0.019672129303216934, 0.020847154781222343, 0.00781619269400835, 0.005409067030996084, 0.16073459386825562, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.18507197499275208, 0.027911728248000145, 0.014699580147862434, 0.025536103174090385, 0.014524195343255997, 0.045023027807474136, 0.031167738139629364, 0.07539253681898117, 0.22652071714401245, 0.011904416605830193, 0.08752688765525818, 0.03955431655049324, 0.2908211648464203, 0.03612781688570976, 0.00514488760381937, 0.017019467428326607, 0.07116629183292389, 0.03509910777211189, 0.02026083506643772, NaN, NaN, NaN, NaN, NaN, NaN], [0.40259334444999695, 0.005078054964542389, 0.00017122419376391917, 9.21270766411908e-05, 0.002624903805553913, 0.0009363252320326865, 0.00360113475471735, 0.01331485528498888, 0.008243494667112827, 0.0007176694343797863, 0.019634194672107697, 0.002027983544394374, 0.02349759265780449, 0.030203014612197876, 0.000993669149465859, 0.0008422310347668827, 0.013102295808494091, 0.025159381330013275, 0.0006507099606096745, 0.018182074651122093, NaN, NaN, NaN, NaN, NaN], [0.2579963207244873, 0.021157346665859222, 0.002921733073890209, 0.006211739499121904, 0.031850416213274, 0.0022005264181643724, 0.0070661455392837524, 0.036871425807476044, 0.012320333160459995, 0.005331193562597036, 0.033889420330524445, 0.020235266536474228, 0.07458563148975372, 0.1398555487394333, 0.008059950545430183, 0.0405682735145092, 0.03368399292230606, 0.012085597030818462, 0.010676471516489983, 0.03411625698208809, 0.08152885735034943, NaN, NaN, NaN, NaN], [0.005019576288759708, 0.001437423750758171, 0.014701779931783676, 0.005876661743968725, 0.15098156034946442, 0.001037455745972693, 0.0006782425916753709, 0.0010664333822205663, 0.006170186679810286, 0.004750464111566544, 0.015587885864078999, 0.020612932741642, 0.024904461577534676, 0.027292385697364807, 0.6522603631019592, 0.02780178189277649, 0.009980881586670876, 0.010863273404538631, 0.016993993893265724, 0.026612548157572746, 0.013426730409264565, 0.6643192768096924, NaN, NaN, NaN], [0.023952102288603783, 0.0025056565646082163, 0.0002975048264488578, 0.0031560298521071672, 0.002087814500555396, 0.00019765450269915164, 0.00028781042783521116, 0.0023521913681179285, 0.009429593570530415, 0.010675383731722832, 0.013774069957435131, 0.012372920289635658, 0.030660077929496765, 0.3810364305973053, 0.0006224916432984173, 0.6039706468582153, 0.2701583206653595, 0.012816790491342545, 0.005745226051658392, 0.052403513342142105, 0.18411211669445038, 0.00043697847286239266, 0.6234135627746582, NaN, NaN], [0.007988094352185726, 0.006256349850445986, 4.065780740347691e-05, 0.006692530121654272, 0.00010113247117260471, 0.0002641561150085181, 0.0006015493418090045, 0.009669815190136433, 0.00486318813636899, 0.0012557843001559377, 0.43231210112571716, 0.35852983593940735, 0.01959061808884144, 0.007567983586341143, 0.0019125458784401417, 0.00857639778405428, 0.0005027590086683631, 0.41286540031433105, 0.4292365312576294, 0.01753525249660015, 0.005813234485685825, 0.00216498039662838, 0.003382693277671933, 0.00027526391204446554, NaN], [0.1387476772069931, 0.027318276464939117, 0.00785337295383215, 0.019197843968868256, 0.013794281519949436, 0.020801816135644913, 0.013009469024837017, 0.07068510353565216, 0.020734209567308426, 0.024748992174863815, 0.04673967882990837, 0.025586238130927086, 0.01648368127644062, 0.06557000428438187, 0.022920427843928337, 0.013843921944499016, 0.04100487753748894, 0.0375630147755146, 0.023956134915351868, 0.018727701157331467, 0.05957711860537529, 0.020177751779556274, 0.007389482576400042, 0.027843382209539413, 0.025224220007658005]], [[0.125, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.1319446712732315, 0.003103907685726881, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.004627853631973267, 0.8189921975135803, 0.006355744786560535, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.0004822930786758661, 0.5574855208396912, 0.0058120423927903175, 0.014268792234361172, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.15055440366268158, 0.0014966451562941074, 0.1733904629945755, 0.05038055405020714, 0.0057296124286949635, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.1304439753293991, 0.00022060537594370544, 0.03428095951676369, 0.0157721396535635, 0.20856629312038422, 0.2746620774269104, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.017820989713072777, 1.0936159014818259e-05, 0.0006241680239327252, 4.3406893382780254e-05, 0.2565733790397644, 0.5255003571510315, 0.040596142411231995, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.2143511176109314, 3.818454570136964e-05, 0.0006476931739598513, 0.00012842394062317908, 0.007853559218347073, 0.008102592080831528, 0.0005345920799300075, 0.00793861411511898, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.00014670012751594186, 7.536429620813578e-06, 0.0001294321846216917, 0.00024457855033688247, 0.00022483686916530132, 0.001284220488741994, 0.0014163334853947163, 0.5552030801773071, 0.006061996798962355, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.09223808348178864, 0.004348577931523323, 0.013163902796804905, 0.018216131255030632, 0.035016678273677826, 0.11075899004936218, 0.1728493720293045, 0.19621391594409943, 0.029301786795258522, 0.46166056394577026, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.11309938877820969, 0.004489036742597818, 0.0485633909702301, 0.021462395787239075, 0.4192940890789032, 0.26214849948883057, 0.22032421827316284, 0.0067114257253706455, 0.010406548157334328, 0.11692964285612106, 0.23004111647605896, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.14281870424747467, 0.000545236689504236, 0.003893920686095953, 0.0005153689999133348, 0.01790653169155121, 0.004868220537900925, 0.0031487985979765654, 0.0011714915744960308, 0.0043698386289179325, 0.020373020321130753, 0.02358497679233551, 0.2682037353515625, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.09794370085000992, 0.0018320194212719798, 0.000285644200630486, 3.260145604144782e-05, 0.00041393720312044024, 0.0043053096160292625, 0.002047628629952669, 0.0003047001373488456, 0.002447759034112096, 0.0016152235912159085, 0.024524936452507973, 0.29461416602134705, 0.014563476666808128, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.13817672431468964, 0.0034516772720962763, 0.002911344636231661, 0.0003800573176704347, 0.001462712767533958, 0.001961951842531562, 0.0040230052545666695, 0.0023086154833436012, 0.002483226591721177, 0.028553131967782974, 0.014239847660064697, 0.18359807133674622, 0.09542248398065567, 0.2067933827638626, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.14011409878730774, 0.01466476172208786, 0.09487155824899673, 0.03769487887620926, 0.062972791492939, 0.003495296463370323, 0.0004466120735742152, 0.0044098952785134315, 0.056031279265880585, 0.12585759162902832, 0.04736572876572609, 0.02727479301393032, 0.06542934477329254, 0.563940703868866, 0.024195805191993713, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.05395817384123802, 6.747527368133888e-05, 0.0018676340114325285, 0.0002809480356518179, 0.03275269269943237, 0.005758063402026892, 9.199039777740836e-05, 0.00011598093260545284, 0.0015754709020256996, 0.026104740798473358, 0.009686414152383804, 0.001081737456843257, 0.0017741151386871934, 0.49180474877357483, 0.007121484261006117, 0.013531914912164211, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.03839295729994774, 0.0002068357716780156, 0.006204192526638508, 0.0054313126020133495, 0.011207946576178074, 0.0013116636546328664, 0.008276019245386124, 0.002269806107506156, 0.004080863669514656, 0.01488969475030899, 0.0006726597202941775, 0.009391524828970432, 0.039596475660800934, 0.19840312004089355, 0.043704546988010406, 0.31202515959739685, 0.23529505729675293, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.07469534128904343, 0.001304430770687759, 0.0239309910684824, 0.008060658350586891, 0.021029237657785416, 0.015191669575870037, 0.006979105528444052, 0.0016427322989329696, 0.002132130553945899, 0.015241370536386967, 0.0018563566263765097, 0.035101406276226044, 0.06515936553478241, 0.27313047647476196, 0.10352547466754913, 0.2570805549621582, 0.45083746314048767, 0.1295340657234192, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.19253067672252655, 0.0008209676598198712, 0.004669400863349438, 0.00047802351764403284, 0.013135433197021484, 0.0034620855003595352, 0.0016354827675968409, 0.0008273401763290167, 0.0018895546672865748, 0.009773027151823044, 0.006215384230017662, 0.2356690764427185, 0.01036232803016901, 0.06144833192229271, 0.008870624005794525, 0.024212215095758438, 0.008509873412549496, 0.01347219105809927, 0.35532569885253906, NaN, NaN, NaN, NaN, NaN, NaN], [0.10910779982805252, 0.002221200615167618, 0.0001436042075511068, 1.1848528629343491e-05, 0.0001887700636871159, 0.0020721519831568003, 0.0009632316650822759, 0.00014056939107831568, 0.0007320817094296217, 0.0006829273188486695, 0.007395991589874029, 0.2889891564846039, 0.007074101362377405, 0.0002627878566272557, 0.004363438580185175, 0.0018575063440948725, 0.00557676050812006, 0.012322820723056793, 0.31134024262428284, 0.027276715263724327, NaN, NaN, NaN, NaN, NaN], [0.18170765042304993, 0.003209297079592943, 0.0023912524338811636, 0.00020479358499869704, 0.0009326079743914306, 0.0013757160631939769, 0.0021110770758241415, 0.0008730489062145352, 0.000792569131590426, 0.01825624145567417, 0.0059272306971251965, 0.11984144151210785, 0.05654650926589966, 0.08423373848199844, 0.024963613599538803, 0.027966396883130074, 0.1777324080467224, 0.005578523967415094, 0.14623191952705383, 0.11331525444984436, 0.2157108038663864, NaN, NaN, NaN, NaN], [0.1515214741230011, 0.008395697921514511, 0.0657893642783165, 0.019086696207523346, 0.05097401514649391, 0.0016111076110973954, 0.00021851839846931398, 0.002003778237849474, 0.01669292151927948, 0.06321260333061218, 0.015100682154297829, 0.010209205560386181, 0.015906400978565216, 0.30131736397743225, 0.012282183393836021, 0.09666845202445984, 0.00808996893465519, 0.03798958286643028, 0.013879657723009586, 0.047733187675476074, 0.5371345281600952, 0.020763304084539413, NaN, NaN, NaN], [0.07945924997329712, 4.7485355025855824e-05, 0.0020416006445884705, 0.00022757358965463936, 0.013386114500463009, 0.001981395063921809, 3.6917605029884726e-05, 2.620528539409861e-05, 0.0003202208608854562, 0.009042860940098763, 0.0030785591807216406, 0.0011855574557557702, 0.0005728560499846935, 0.20002734661102295, 0.00213914574123919, 0.002927121240645647, 0.004968173801898956, 0.0065933396108448505, 0.002585601294413209, 0.002817549044266343, 0.547335147857666, 0.006171087268739939, 0.018697692081332207, NaN, NaN], [0.059381648898124695, 0.00026094831991940737, 0.007586375344544649, 0.006061093881726265, 0.0039266073144972324, 0.0004965912085026503, 0.003665223019197583, 0.0008195870905183256, 0.0014654117403551936, 0.0045553394593298435, 0.00032001128420233727, 0.004615657962858677, 0.017150992527604103, 0.07922492176294327, 0.012805018573999405, 0.1320599913597107, 0.09461667388677597, 0.003555287839844823, 0.019601207226514816, 0.047796737402677536, 0.29085052013397217, 0.04383813217282295, 0.32529252767562866, 0.24933147430419922, NaN], [0.13618361949920654, 0.0007103006355464458, 0.025071904063224792, 0.004419561009854078, 0.001962232170626521, 0.0023795748129487038, 0.002366183791309595, 0.0003890783409588039, 0.00022811641974840313, 0.0010611300822347403, 0.001608739490620792, 0.028126444667577744, 0.005591525696218014, 0.0024579197634011507, 0.004123267717659473, 0.0409882515668869, 0.010364435613155365, 0.010518459603190422, 0.09771004319190979, 0.037823982536792755, 0.019979961216449738, 0.018303534016013145, 0.22492042183876038, 0.09256016463041306, 0.005498841404914856]], [[0.125, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.11621169000864029, 0.2792567312717438, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.16788142919540405, 0.08717074245214462, 0.024576181545853615, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.14762163162231445, 0.09094145894050598, 0.023598572239279747, 0.2273045778274536, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.10424397885799408, 0.7145561575889587, 0.21233327686786652, 0.5272893309593201, 0.04291817173361778, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.11001076549291611, 0.4734446108341217, 0.06134912371635437, 0.2925608456134796, 0.02150837518274784, 0.19962187111377716, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.17212024331092834, 0.1419786959886551, 0.05631781369447708, 0.2185172289609909, 0.002532752463594079, 0.0032626313623040915, 0.18381445109844208, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.09107878059148788, 0.12160263955593109, 0.2150201052427292, 0.3705081045627594, 0.07164584845304489, 0.05021890252828598, 0.14392021298408508, 0.39638784527778625, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.2121918499469757, 0.20806513726711273, 0.15205760300159454, 0.38131871819496155, 0.1009124368429184, 0.09936784207820892, 0.07077471911907196, 0.05006752535700798, 0.14871110022068024, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.21685828268527985, 0.23333710432052612, 0.06609098613262177, 0.12803798913955688, 0.1004808098077774, 0.025170300155878067, 0.04069148004055023, 0.10828333348035812, 0.10351972281932831, 0.29450517892837524, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.05205162987112999, 0.22306090593338013, 0.049221184104681015, 0.061203524470329285, 0.09776578843593597, 0.06183243915438652, 0.17444021999835968, 0.321644127368927, 0.054029058665037155, 0.2629997134208679, 0.2757931053638458, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.05800137668848038, 0.32540804147720337, 0.13333332538604736, 0.05756821855902672, 0.12640602886676788, 0.11846329271793365, 0.2918737828731537, 0.3632459342479706, 0.18816226720809937, 0.6433262228965759, 0.3291742205619812, 0.12170911580324173, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.11078674346208572, 0.40781712532043457, 0.06261185556650162, 0.05779192969202995, 0.18194560706615448, 0.1120922714471817, 0.5645142793655396, 0.33037880063056946, 0.18058234453201294, 0.6155731678009033, 0.21430827677249908, 0.044265877455472946, 0.20548948645591736, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.08288691937923431, 0.2962968051433563, 0.2819015085697174, 0.19574381411075592, 0.1136796846985817, 0.07755676656961441, 0.20596812665462494, 0.3330870270729065, 0.21944326162338257, 0.22804425656795502, 0.1688224822282791, 0.2872299253940582, 0.13759873807430267, 0.09907422959804535, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.11118441820144653, 0.6110438108444214, 0.6292654871940613, 0.5805363655090332, 0.22765980660915375, 0.4274957776069641, 0.6573506593704224, 0.6816673278808594, 0.5361799597740173, 0.320940226316452, 0.3845328688621521, 0.6242536306381226, 0.41633498668670654, 0.12922972440719604, 0.01991792768239975, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.10675505548715591, 0.1912444829940796, 0.23975566029548645, 0.32351911067962646, 0.046362437307834625, 0.08004549145698547, 0.3363644778728485, 0.2706483006477356, 0.26792168617248535, 0.2952979505062103, 0.4496033787727356, 0.1126319095492363, 0.5116660594940186, 0.015820369124412537, 0.030236991122364998, 0.03603934869170189, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.2233639359474182, 0.0911012589931488, 0.12918633222579956, 0.17958812415599823, 0.037158817052841187, 0.06043876335024834, 0.43303725123405457, 0.3349981904029846, 0.09061599522829056, 0.23225362598896027, 0.1514965295791626, 0.09056703746318817, 0.2480165809392929, 0.056160230189561844, 0.015552842989563942, 0.007365798112004995, 0.17054231464862823, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.09585364907979965, 0.22669152915477753, 0.08040254563093185, 0.0638674795627594, 0.15364862978458405, 0.13237975537776947, 0.3887532651424408, 0.5357696413993835, 0.07155110687017441, 0.4139500856399536, 0.05426981300115585, 0.1238613948225975, 0.07816720753908157, 0.14353296160697937, 0.021915707737207413, 0.02897939831018448, 0.22262324392795563, 0.4835837185382843, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.05190133675932884, 0.3522363007068634, 0.14802464842796326, 0.07656959444284439, 0.12417534738779068, 0.17628712952136993, 0.33604755997657776, 0.38481405377388, 0.20552395284175873, 0.5797679424285889, 0.3262830972671509, 0.19466114044189453, 0.045280374586582184, 0.2712458372116089, 0.041196610778570175, 0.08666794002056122, 0.3327068090438843, 0.1922111064195633, 0.10969121754169464, NaN, NaN, NaN, NaN, NaN, NaN], [0.10818891227245331, 0.3937702178955078, 0.030490810051560402, 0.030189264565706253, 0.11243001371622086, 0.07142115384340286, 0.3648340702056885, 0.2467786818742752, 0.13009557127952576, 0.5037410855293274, 0.18716548383235931, 0.08825942128896713, 0.23451530933380127, 0.24434491991996765, 0.03496113047003746, 0.04431905224919319, 0.3934983015060425, 0.31427451968193054, 0.05462265387177467, 0.2524711489677429, NaN, NaN, NaN, NaN, NaN], [0.06088699772953987, 0.23725801706314087, 0.2046121060848236, 0.14171433448791504, 0.06688592582941055, 0.06064169481396675, 0.14286598563194275, 0.21723276376724243, 0.13491223752498627, 0.2083195000886917, 0.15285742282867432, 0.34066644310951233, 0.18166381120681763, 0.10532425343990326, 0.06318715214729309, 0.052211396396160126, 0.20970472693443298, 0.20715771615505219, 0.28281068801879883, 0.13935938477516174, 0.11923542618751526, NaN, NaN, NaN, NaN], [0.09884612262248993, 0.5530695915222168, 0.6301063299179077, 0.5187459588050842, 0.28427499532699585, 0.33059176802635193, 0.49595603346824646, 0.6107674241065979, 0.387560099363327, 0.3283739984035492, 0.3905918300151825, 0.5949583053588867, 0.2912430167198181, 0.19163259863853455, 0.03091937117278576, 0.3911139667034149, 0.3233675956726074, 0.421701043844223, 0.6310504674911499, 0.4068542718887329, 0.13317596912384033, 0.02126597985625267, NaN, NaN, NaN], [0.07192745804786682, 0.09934075176715851, 0.15662430226802826, 0.18248029053211212, 0.021172231063246727, 0.037516966462135315, 0.12766626477241516, 0.09711621701717377, 0.09662153571844101, 0.1303528994321823, 0.3114719092845917, 0.1600099802017212, 0.265144020318985, 0.011710498481988907, 0.02471126988530159, 0.012725233100354671, 0.12533646821975708, 0.446529746055603, 0.11092787981033325, 0.45893827080726624, 0.011159577406942844, 0.028070949018001556, 0.024378135800361633, NaN, NaN], [0.21178482472896576, 0.0713806003332138, 0.12116114795207977, 0.16551871597766876, 0.025692136958241463, 0.03932836279273033, 0.255863755941391, 0.20887790620326996, 0.05500240623950958, 0.14075487852096558, 0.158308207988739, 0.10016348958015442, 0.22940821945667267, 0.06542190909385681, 0.016673747450113297, 0.011679067276418209, 0.21266934275627136, 0.27460965514183044, 0.08977667987346649, 0.1985965520143509, 0.05640871822834015, 0.014301197603344917, 0.004748867359012365, 0.1251523643732071, NaN], [0.11377177387475967, 0.4656391441822052, 0.26672884821891785, 0.20802536606788635, 0.1860857605934143, 0.16829806566238403, 0.19711202383041382, 0.3023360073566437, 0.035885076969861984, 0.11114621162414551, 0.21048156917095184, 0.27827921509742737, 0.11178875714540482, 0.13154125213623047, 0.3096882104873657, 0.09530708193778992, 0.2201821655035019, 0.1989239901304245, 0.27841058373451233, 0.15223632752895355, 0.2206900417804718, 0.34536775946617126, 0.09229245036840439, 0.24595825374126434, 0.2865155339241028]], [[0.125, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.13124778866767883, 0.015335792675614357, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.19323189556598663, 0.005229663103818893, 0.005805561784654856, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.06695510447025299, 0.08997365087270737, 0.32878753542900085, 0.35321861505508423, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.1452476531267166, 0.07996584475040436, 0.2002653181552887, 0.13149262964725494, 0.005022347904741764, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.1274433135986328, 0.13577045500278473, 0.16066212952136993, 0.1959238052368164, 0.04180024936795235, 0.06788772344589233, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.14809708297252655, 0.29017606377601624, 0.22457490861415863, 0.17088554799556732, 0.041788797825574875, 0.013634788803756237, 0.02984887920320034, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.21402230858802795, 0.012405444867908955, 0.0014808804262429476, 0.0009161182679235935, 0.0035427443217486143, 0.0017166208708658814, 0.001927618752233684, 0.015056394040584564, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.10794443637132645, 0.13477572798728943, 0.046750620007514954, 0.03419584408402443, 0.30604344606399536, 0.11879221349954605, 0.08022946119308472, 0.11745522916316986, 0.21712547540664673, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.06259628385305405, 0.21873348951339722, 0.248628169298172, 0.2344663441181183, 0.09133727103471756, 0.05752522125840187, 0.03945200890302658, 0.39403918385505676, 0.15040725469589233, 0.009099425747990608, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.06400181353092194, 0.3208324611186981, 0.5040323138237, 0.6282902359962463, 0.04389061778783798, 0.08030739426612854, 0.10539824515581131, 0.1485716998577118, 0.08085520565509796, 0.13963551819324493, 0.0947280004620552, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.0935494601726532, 0.3055664598941803, 0.46751275658607483, 0.6914730072021484, 0.12860655784606934, 0.15726737678050995, 0.2987912595272064, 0.1529359668493271, 0.062232255935668945, 0.041881486773490906, 0.03399288281798363, 0.026789270341396332, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.012478480115532875, 0.051689472049474716, 0.7194163799285889, 0.8485123515129089, 0.006671697832643986, 0.03636787086725235, 0.05433559790253639, 0.01463489979505539, 0.0011851346353068948, 0.0010049004340544343, 0.012586181983351707, 0.0039429632015526295, 0.0029262336902320385, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.16095376014709473, 0.10161679983139038, 0.15561290085315704, 0.27214428782463074, 0.06339859217405319, 0.047669682651758194, 0.16775988042354584, 0.30333516001701355, 0.29585903882980347, 0.026492541655898094, 0.03390856087207794, 0.020966142416000366, 0.027538424357771873, 0.040642742067575455, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.1701768934726715, 0.015393235720694065, 0.0020776872988790274, 0.011533004231750965, 0.013215321116149426, 0.004845780786126852, 0.011772604659199715, 0.006262979004532099, 0.00390799343585968, 0.007256041280925274, 0.0014780729543417692, 0.007152961101382971, 0.1450572907924652, 0.009833375923335552, 0.004788131918758154, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.27953270077705383, 0.3106633424758911, 0.3078516721725464, 0.2835734188556671, 0.23220741748809814, 0.10028243064880371, 0.059542566537857056, 0.10900203883647919, 0.24247398972511292, 0.19294817745685577, 0.04455278813838959, 0.032558612525463104, 0.2623904049396515, 0.04071282595396042, 0.07101175934076309, 0.01397540420293808, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.15828359127044678, 0.26215362548828125, 0.1828027367591858, 0.3383132517337799, 0.14976613223552704, 0.17187725007534027, 0.16098640859127045, 0.10713529586791992, 0.2253616452217102, 0.27887699007987976, 0.0991593673825264, 0.1987481713294983, 0.2010713517665863, 0.24892166256904602, 0.09143882989883423, 0.028894133865833282, 0.0226773452013731, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.08621957898139954, 0.39239373803138733, 0.32060059905052185, 0.6169360876083374, 0.04211895540356636, 0.07954877614974976, 0.28241875767707825, 0.1073535904288292, 0.10431969910860062, 0.28138864040374756, 0.05428503826260567, 0.29005417227745056, 0.2829020619392395, 0.1771886944770813, 0.12728992104530334, 0.029228007420897484, 0.09527892619371414, 0.030012397095561028, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.10387677699327469, 0.28899070620536804, 0.34778735041618347, 0.5978891849517822, 0.08856049180030823, 0.11093756556510925, 0.2773001492023468, 0.1387036144733429, 0.05535874143242836, 0.040542375296354294, 0.057020239531993866, 0.08593740314245224, 0.3575255870819092, 0.1780063509941101, 0.03115975111722946, 0.05683879926800728, 0.20087137818336487, 0.022991398349404335, 0.024780578911304474, NaN, NaN, NaN, NaN, NaN, NaN], [0.027872784063220024, 0.11975038051605225, 0.8484699726104736, 0.9221431016921997, 0.010032964870333672, 0.05817321315407753, 0.14408904314041138, 0.03149182349443436, 0.0027255630120635033, 0.003546576714143157, 0.054592132568359375, 0.03846639767289162, 0.0179138146340847, 0.04004756733775139, 0.0025625908747315407, 0.006073353346437216, 0.017890095710754395, 0.006128084380179644, 0.0035659971181303263, 0.005842072889208794, NaN, NaN, NaN, NaN, NaN], [0.21095024049282074, 0.16082847118377686, 0.2551726996898651, 0.40046265721321106, 0.07841236889362335, 0.05558479577302933, 0.20925307273864746, 0.4381427764892578, 0.47918838262557983, 0.07096414268016815, 0.11106863617897034, 0.09138666838407516, 0.1393880993127823, 0.1506565660238266, 0.07743309438228607, 0.06943798065185547, 0.09801105409860611, 0.017720624804496765, 0.015859564766287804, 0.029157793149352074, 0.0392736941576004, NaN, NaN, NaN, NaN], [0.17935752868652344, 0.014263968914747238, 0.0022281131241470575, 0.011617614887654781, 0.022433524951338768, 0.0047986325807869434, 0.013686214573681355, 0.007696506567299366, 0.004939754959195852, 0.012488129548728466, 0.002878576284274459, 0.013457567431032658, 0.23303280770778656, 0.030022362247109413, 0.013181640766561031, 0.027029545977711678, 0.010247751139104366, 0.0006795030203647912, 0.0032072996255010366, 0.1104368045926094, 0.006663828622549772, 0.003364446572959423, NaN, NaN, NaN], [0.3113161623477936, 0.29550519585609436, 0.2834082841873169, 0.292662650346756, 0.1380799263715744, 0.055221766233444214, 0.0487985797226429, 0.10219268500804901, 0.25612032413482666, 0.2569950222969055, 0.10279092192649841, 0.16084249317646027, 0.5340818166732788, 0.10305190831422806, 0.16831228137016296, 0.03310799598693848, 0.10521702468395233, 0.008185362443327904, 0.02029210887849331, 0.2447529286146164, 0.0189062412828207, 0.051586367189884186, 0.011271311901509762, NaN, NaN], [0.21913117170333862, 0.2667233347892761, 0.15068072080612183, 0.2934513986110687, 0.11010763049125671, 0.11770202964544296, 0.1548316478729248, 0.10880382359027863, 0.19848009943962097, 0.2926469147205353, 0.17939361929893494, 0.38748762011528015, 0.38622626662254333, 0.4369211196899414, 0.14473943412303925, 0.11290202289819717, 0.11878126114606857, 0.013051117770373821, 0.18458649516105652, 0.15622372925281525, 0.14840805530548096, 0.06742489337921143, 0.01624887064099312, 0.028317920863628387, NaN], [0.13670727610588074, 0.11102687567472458, 0.008893890306353569, 0.008979070000350475, 0.01785319298505783, 0.008134939707815647, 0.02043774165213108, 0.030145585536956787, 0.014907605946063995, 0.021436721086502075, 0.020207075402140617, 0.10284662246704102, 0.06823904067277908, 0.04208305850625038, 0.03810393810272217, 0.04656955599784851, 0.025087369605898857, 0.005296032875776291, 0.07358870655298233, 0.057817310094833374, 0.033472564071416855, 0.02220221422612667, 0.01758744567632675, 0.012124869041144848, 0.052647966891527176]], [[0.125, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.1301431953907013, 0.0347244068980217, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.19350707530975342, 0.0006586865638382733, 0.008110460825264454, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.07742509245872498, 0.025898784399032593, 0.46813124418258667, 0.21566073596477509, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.15508510172367096, 0.002848779782652855, 0.006727630738168955, 0.01290579792112112, 0.0019038956379517913, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.1506490558385849, 0.0018329949816688895, 0.0011812039883807302, 0.010563074611127377, 0.0007367127691395581, 0.0007524989196099341, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.0463392436504364, 0.0861721858382225, 0.5342088341712952, 0.5262086987495422, 0.252642959356308, 0.014757110737264156, 0.02778990939259529, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.08082517981529236, 0.10121051222085953, 0.3481808602809906, 0.41374534368515015, 0.38359278440475464, 0.07890304177999496, 0.1096968874335289, 0.1685827672481537, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.1433362513780594, 0.13670213520526886, 0.10138670355081558, 0.1093992069363594, 0.236768901348114, 0.09415888041257858, 0.011134332977235317, 0.019298367202281952, 0.5348934531211853, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.024931270629167557, 0.02871265634894371, 0.20136752724647522, 0.1457405984401703, 0.13753218948841095, 0.13171687722206116, 0.07031083852052689, 0.04771474376320839, 0.5403124690055847, 0.04482616111636162, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.026511939242482185, 0.12058579176664352, 0.09381356090307236, 0.09726550430059433, 0.13490843772888184, 0.36408668756484985, 0.19949088990688324, 0.09435784071683884, 0.45831772685050964, 0.1274537742137909, 0.014095090329647064, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.12624163925647736, 0.03293433412909508, 0.07055910676717758, 0.06304988265037537, 0.23899653553962708, 0.15645378828048706, 0.07000429183244705, 0.02516351453959942, 0.06797400116920471, 0.07094329595565796, 0.1311238706111908, 0.21208471059799194, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.1118171289563179, 0.015469676814973354, 0.08768722414970398, 0.046650953590869904, 0.23542486131191254, 0.09032069146633148, 0.05012429133057594, 0.004171812906861305, 0.15006321668624878, 0.017805932089686394, 0.049085501581430435, 0.035517167299985886, 0.6428134441375732, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.09301143884658813, 0.13257478177547455, 0.1489255279302597, 0.18642880022525787, 0.318376362323761, 0.31357452273368835, 0.1382697969675064, 0.07457731664180756, 0.17392435669898987, 0.00920780934393406, 0.020603884011507034, 0.049020376056432724, 0.322329580783844, 0.3050764203071594, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.17444664239883423, 0.0007958812057040632, 5.6854176364140585e-05, 0.0004179355164524168, 0.00013179269444663078, 0.00024977640714496374, 0.0001107741700252518, 7.639485556865111e-05, 0.0008396806661039591, 0.00030287212575785816, 0.00023763117496855557, 0.003834246192127466, 0.003433886216953397, 0.00015348535089287907, 0.00014843019016552716, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.00841783918440342, 0.03505324944853783, 0.02469123899936676, 0.026689309626817703, 0.1500382125377655, 0.08861804753541946, 0.006530162878334522, 0.060150377452373505, 0.04669034481048584, 0.007807246409356594, 0.02131708152592182, 0.012364925816655159, 0.041818197816610336, 0.02841370552778244, 0.6981374621391296, 0.06836962699890137, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.0009672276792116463, 0.0037913541309535503, 0.00524782482534647, 0.006044968497008085, 0.07807419449090958, 0.026950905099511147, 0.0024354930501431227, 0.005482541862875223, 0.013836389407515526, 0.002816400956362486, 0.0006559633184224367, 0.002845867071300745, 0.018497759476304054, 0.19704575836658478, 0.41393977403640747, 0.4024144113063812, 0.00308317132294178, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.0023347423411905766, 0.018236415460705757, 0.011423468589782715, 0.014267664402723312, 0.06272618472576141, 0.09006785601377487, 0.023437032476067543, 0.008957883343100548, 0.03532397374510765, 0.006200278177857399, 0.0002018583327298984, 0.016960909590125084, 0.04933774098753929, 0.1362536996603012, 0.47770828008651733, 0.5670948624610901, 0.06992122530937195, 0.03068283386528492, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.0730348452925682, 0.024321116507053375, 0.06646358221769333, 0.0630527138710022, 0.23201428353786469, 0.1378810703754425, 0.04738042131066322, 0.010255109518766403, 0.0316733755171299, 0.07226394861936569, 0.06345586478710175, 0.13366159796714783, 0.1651405692100525, 0.1875276118516922, 0.475235253572464, 0.34701114892959595, 0.106105737388134, 0.17074023187160492, 0.14835108816623688, NaN, NaN, NaN, NaN, NaN, NaN], [0.1317213624715805, 0.02603350207209587, 0.05892709270119667, 0.02498493157327175, 0.2902502715587616, 0.11121267080307007, 0.057563167065382004, 0.004654969088733196, 0.12363925576210022, 0.02343585342168808, 0.03682887554168701, 0.054189957678318024, 0.5043657422065735, 0.23388440907001495, 0.46154457330703735, 0.32561513781547546, 0.055846668779850006, 0.06476935744285583, 0.026345595717430115, 0.5623452067375183, NaN, NaN, NaN, NaN, NaN], [0.037178635597229004, 0.08259578794240952, 0.0920928493142128, 0.09107104688882828, 0.19359135627746582, 0.17535823583602905, 0.06819135695695877, 0.03716395050287247, 0.07458745688199997, 0.0064619481563568115, 0.009060872718691826, 0.02094256319105625, 0.1461041122674942, 0.11104261875152588, 0.6685899496078491, 0.4500047266483307, 0.029085516929626465, 0.03437849134206772, 0.03590574488043785, 0.20188003778457642, 0.23542997241020203, NaN, NaN, NaN, NaN], [0.18516498804092407, 0.0009336460498161614, 7.266629108926281e-05, 0.00041225351742468774, 0.00023152375069912523, 0.0002865330025088042, 0.00012637366307899356, 8.909442112781107e-05, 0.0006568549433723092, 0.0003727772564161569, 0.00021836791711393744, 0.0030449857003986835, 0.002062517451122403, 0.0001740154402796179, 0.00019746039470192045, 0.0010639599058777094, 3.738106170203537e-05, 0.00018948569777421653, 0.0017019548686221242, 0.0021623496431857347, 7.414143328787759e-05, 0.00010166682477574795, NaN, NaN, NaN], [0.014717604033648968, 0.07327108085155487, 0.049021750688552856, 0.04824157431721687, 0.2509053647518158, 0.1518847495317459, 0.011399514973163605, 0.08240412920713425, 0.052963949739933014, 0.012185328640043736, 0.03166860342025757, 0.029948236420750618, 0.0332757867872715, 0.026646502315998077, 0.6691258549690247, 0.05157328397035599, 0.010373775847256184, 0.027277877554297447, 0.022091276943683624, 0.06386284530162811, 0.02213944122195244, 0.7486419677734375, 0.1026511937379837, NaN, NaN], [0.0010381464380770922, 0.0033105257898569107, 0.005275417119264603, 0.005129440221935511, 0.05292869359254837, 0.018404772505164146, 0.0016328096389770508, 0.0039754449389874935, 0.007563540246337652, 0.0015294092008844018, 0.00038045260589569807, 0.0016144785331562161, 0.00974529329687357, 0.09415796399116516, 0.176291361451149, 0.35064396262168884, 0.0026081653777509928, 0.0026635529939085245, 0.004589376971125603, 0.028667066246271133, 0.20089752972126007, 0.45412325859069824, 0.4352543354034424, 0.005037708207964897, NaN], [0.1408424973487854, 0.01142195239663124, 0.027654578909277916, 0.018255943432450294, 0.00871819257736206, 0.007302883546799421, 0.002508251927793026, 0.0010894191218540072, 0.002539109904319048, 0.0016572934109717607, 0.002274427330121398, 0.00915378425270319, 0.004932411015033722, 0.000505969044752419, 0.0064278775826096535, 0.013472460210323334, 0.0009905033512040973, 0.004150861874222755, 0.015419019386172295, 0.013300818391144276, 0.00147106999065727, 0.01399929728358984, 0.03311459720134735, 0.0035406623501330614, 0.008275571279227734]], [[0.125, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.10530310869216919, 0.47072935104370117, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.07470229268074036, 0.01594272069633007, 0.3473423421382904, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.19784890115261078, 0.02982909232378006, 0.008884507231414318, 0.026416730135679245, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.15099161863327026, 0.004257611930370331, 0.06880252063274384, 0.03778434172272682, 0.016005711629986763, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.14908726513385773, 0.01576131209731102, 0.006129090208560228, 0.013888919726014137, 0.006888655014336109, 0.007033796049654484, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.1207430437207222, 0.0697125568985939, 0.0065151299349963665, 0.0038357542362064123, 0.04419673979282379, 0.16196060180664062, 0.49751368165016174, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.02684849314391613, 0.03953110799193382, 0.00281998747959733, 0.001733462675474584, 0.08529012650251389, 0.6486974358558655, 0.306731641292572, 0.07198647409677505, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.012395885773003101, 0.009238478727638721, 0.0003186498652212322, 0.0010813054395839572, 0.008392964489758015, 0.2777543067932129, 0.44055092334747314, 0.0011997584952041507, 0.00246741552837193, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.034838397055864334, 0.015937600284814835, 0.002090656431391835, 0.002794815693050623, 0.008703295141458511, 0.10732896625995636, 0.4454900026321411, 0.001775766140781343, 0.0009654808673076332, 0.016644174233078957, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.293722003698349, 0.0148458918556571, 0.02856721729040146, 0.006315621547400951, 0.005582483485341072, 0.0013911855639889836, 0.004092940129339695, 0.0036679452750831842, 0.0010494120651856065, 0.016411608085036278, 0.023008037358522415, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.13037414848804474, 0.020949387922883034, 0.03831411898136139, 0.007462172769010067, 0.02548721246421337, 0.006367610301822424, 0.008434200659394264, 0.010317808948457241, 0.003713584039360285, 0.00402417778968811, 0.19032441079616547, 0.26746228337287903, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.041874390095472336, 0.024160701781511307, 0.00029624058515764773, 0.00016299582784995437, 0.00014630405348725617, 0.0004776908899657428, 0.0010664566652849317, 0.005874973721802235, 0.000636687153019011, 0.0013240330154076219, 0.0912160873413086, 0.35286882519721985, 0.01772063784301281, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.11822566390037537, 0.015047432854771614, 0.019423136487603188, 0.00686526857316494, 0.0036870460025966167, 0.00022719512344338, 0.002930518239736557, 0.025171050801873207, 0.005165010690689087, 0.05391281098127365, 0.11512911319732666, 0.07776232063770294, 0.2967449426651001, 0.09380093216896057, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.09375648200511932, 0.01475021056830883, 0.012638024985790253, 0.0046005831100046635, 0.051909249275922775, 0.0036223391070961952, 0.004371740389615297, 0.009388775564730167, 0.01159447617828846, 0.023305783048272133, 0.046531662344932556, 0.058873143047094345, 0.07503876090049744, 0.0337555818259716, 0.30213212966918945, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.060409948229789734, 0.03445665165781975, 0.000381257850676775, 0.0036348046269267797, 0.0002713070425670594, 0.0011815812904387712, 0.03030458651483059, 0.03435760363936424, 0.0019682012498378754, 0.00901943538337946, 0.2363511621952057, 0.7836493253707886, 0.05375572293996811, 0.0010517562041059136, 0.002096510259434581, 0.017742546275258064, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.19913224875926971, 0.17475517094135284, 0.0022224360145628452, 0.015882516279816628, 0.001058473251760006, 0.0005846276762895286, 0.02601638250052929, 0.037341512739658356, 0.002062901621684432, 0.01394632738083601, 0.062121838331222534, 0.09270716458559036, 0.13391432166099548, 0.011137665249407291, 0.003502808278426528, 0.007463122718036175, 0.4640289545059204, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.33059969544410706, 0.017222048714756966, 0.029873082414269447, 0.008054245263338089, 0.002331576542928815, 0.0006345488945953548, 0.011296147480607033, 0.005269323009997606, 0.0004991231253370643, 0.01808379590511322, 0.0023433570750057697, 0.0409514382481575, 0.01219080574810505, 0.010968736372888088, 0.004035044461488724, 0.000618473335634917, 0.01301309373229742, 0.04461785778403282, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.11787470430135727, 0.013379373587667942, 0.03657921776175499, 0.007838133722543716, 0.006328434217721224, 0.0013346761697903275, 0.005374525673687458, 0.005563441663980484, 0.0013783610193058848, 0.003622437361627817, 0.10895299166440964, 0.17491653561592102, 0.013411260209977627, 0.006658618804067373, 0.013080593198537827, 0.0013389869127422571, 0.03540230169892311, 0.3923792839050293, 0.2429211437702179, NaN, NaN, NaN, NaN, NaN, NaN], [0.03099578432738781, 0.01363852247595787, 8.312943100463599e-05, 4.0873743273550645e-05, 3.1056373700266704e-05, 8.971957868197933e-05, 0.0004970009904354811, 0.0021136843133717775, 0.00015606316446792334, 0.0008045462891459465, 0.029241982847452164, 0.24120952188968658, 0.011327153071761131, 0.006169632077217102, 0.004105421248823404, 0.0017298789462074637, 0.09891722351312637, 0.13539430499076843, 0.3545337915420532, 0.03266340494155884, NaN, NaN, NaN, NaN, NaN], [0.05892227217555046, 0.006390280555933714, 0.00726453959941864, 0.002730957930907607, 0.0007821861072443426, 5.8160956541541964e-05, 0.0015625637024641037, 0.007388831116259098, 0.0016573512693867087, 0.027249574661254883, 0.062049947679042816, 0.056622181087732315, 0.2355845421552658, 0.04601869359612465, 0.006218506023287773, 0.00966239720582962, 0.07739637047052383, 0.4012998342514038, 0.09626632183790207, 0.38049787282943726, 0.10569068044424057, NaN, NaN, NaN, NaN], [0.09179559350013733, 0.00951253343373537, 0.010748236440122128, 0.0033872865606099367, 0.04677930101752281, 0.0018132117111235857, 0.0035809800028800964, 0.005968866869807243, 0.0062707834877073765, 0.02606387436389923, 0.033457815647125244, 0.03605461120605469, 0.04817588999867439, 0.03754975646734238, 0.2781437933444977, 0.015551367774605751, 0.2560427486896515, 0.08298799395561218, 0.06865174323320389, 0.12361031025648117, 0.04344068095088005, 0.28463616967201233, NaN, NaN, NaN], [0.02905191108584404, 0.012088212184607983, 0.00011298860044917092, 0.0012518719304352999, 4.317293132771738e-05, 0.0001948956778505817, 0.008923283778131008, 0.008874665014445782, 0.00048750368296168745, 0.0041984752751886845, 0.08557221293449402, 0.46109655499458313, 0.018593793734908104, 0.0004841866611968726, 0.0006005582981742918, 0.004410868044942617, 0.1617877185344696, 0.2815479040145874, 0.7414005398750305, 0.06452517956495285, 0.0009642028599046171, 0.0012653517769649625, 0.012943175621330738, NaN, NaN], [0.1381005197763443, 0.0952477678656578, 0.0011117071844637394, 0.007693122606724501, 0.0001761779421940446, 8.233776316046715e-05, 0.0067709037102758884, 0.015442474745213985, 0.0005836034542880952, 0.005857429001480341, 0.020792629569768906, 0.02682901732623577, 0.05164036154747009, 0.0043857707642018795, 0.0008507486782036722, 0.004215322434902191, 0.19233396649360657, 0.21357974410057068, 0.14138071238994598, 0.12764914333820343, 0.011541306972503662, 0.001996394479647279, 0.004979089833796024, 0.4768531322479248, NaN], [0.14079369604587555, 0.0077750058844685555, 0.008707624860107899, 0.002215370535850525, 0.0003697987995110452, 8.685041393619031e-05, 6.568676326423883e-05, 0.0005928067839704454, 0.00018151948461309075, 0.0013713521184399724, 0.003134837606921792, 0.004530616104602814, 0.0021016064565628767, 0.0014590725768357515, 0.01743447594344616, 0.0004639088874682784, 0.00557903666049242, 0.015868593007326126, 0.012156624346971512, 0.006375743541866541, 0.004486390855163336, 0.037133798003196716, 0.0008373309392482042, 0.015209782868623734, 0.053904592990875244]]], [[[0.125, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.12363631278276443, 0.14845161139965057, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.14363405108451843, 0.021847352385520935, 0.10135873407125473, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.13959342241287231, 0.059129536151885986, 0.04632453992962837, 0.0506979376077652, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.1401052325963974, 0.20328059792518616, 0.08711162209510803, 0.021569250151515007, 0.06437158584594727, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.14849096536636353, 0.24162742495536804, 0.13733072578907013, 0.023916935548186302, 0.4261094033718109, 0.034874048084020615, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.1122843325138092, 0.27548718452453613, 0.3164171576499939, 0.11597670614719391, 0.521038293838501, 0.1305568367242813, 0.04802507162094116, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.13016629219055176, 0.2326299250125885, 0.3132029175758362, 0.32591310143470764, 0.1516764611005783, 0.09795279055833817, 0.02053435519337654, 0.1865263283252716, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.121080182492733, 0.4840172827243805, 0.47487083077430725, 0.3000609576702118, 0.5299880504608154, 0.09183567762374878, 0.057097259908914566, 0.12967270612716675, 0.04215369373559952, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.08035996556282043, 0.5049515962600708, 0.21779249608516693, 0.22551923990249634, 0.48642098903656006, 0.17451445758342743, 0.14853931963443756, 0.2973877787590027, 0.02990546263754368, 0.12922555208206177, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.15412510931491852, 0.24815845489501953, 0.21706829965114594, 0.15909965336322784, 0.3919820487499237, 0.2097313106060028, 0.05961627885699272, 0.10788830369710922, 0.04644578695297241, 0.008778278715908527, 0.1666601300239563, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.1319347769021988, 0.07332690805196762, 0.3709748387336731, 0.10343886911869049, 0.2416648119688034, 0.273651659488678, 0.142499178647995, 0.032821010798215866, 0.08169299364089966, 0.04221141338348389, 0.04960552975535393, 0.14849121868610382, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.15117543935775757, 0.09085448831319809, 0.23665060102939606, 0.09974268078804016, 0.5293540358543396, 0.2969721853733063, 0.0923411101102829, 0.04701923578977585, 0.47750627994537354, 0.31436240673065186, 0.11817371100187302, 0.08098391443490982, 0.05702001228928566, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.2022491842508316, 0.0666579008102417, 0.032761361449956894, 0.03407268971204758, 0.3113752603530884, 0.5905517935752869, 0.21839523315429688, 0.043745849281549454, 0.02789805829524994, 0.042396336793899536, 0.08724991232156754, 0.07408890873193741, 0.010044119320809841, 0.12108539044857025, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.14857184886932373, 0.38842764496803284, 0.16100677847862244, 0.1839173436164856, 0.03719957172870636, 0.5251989364624023, 0.25831982493400574, 0.06345110386610031, 0.01966739259660244, 0.013820506632328033, 0.10135386884212494, 0.06285497546195984, 0.037499457597732544, 0.09235794097185135, 0.06518241763114929, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.15810954570770264, 0.08897967636585236, 0.2754043936729431, 0.11542505025863647, 0.7166418433189392, 0.6856120824813843, 0.15602687001228333, 0.03588242083787918, 0.10233978182077408, 0.06907100230455399, 0.13906386494636536, 0.06064911186695099, 0.02474391460418701, 0.09316151589155197, 0.5409220457077026, 0.18577302992343903, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.07972963899374008, 0.06995329260826111, 0.2565014958381653, 0.11985079944133759, 0.5429201126098633, 0.3072132468223572, 0.04467121511697769, 0.06233014911413193, 0.06391221284866333, 0.06306523084640503, 0.04008801653981209, 0.16940940916538239, 0.21208623051643372, 0.3237960636615753, 0.4987465739250183, 0.14530567824840546, 0.42085787653923035, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.057688161730766296, 0.05957844480872154, 0.09227755665779114, 0.06308872997760773, 0.6051628589630127, 0.41719216108322144, 0.06513097882270813, 0.11441777646541595, 0.2576654255390167, 0.039566945284605026, 0.04989808052778244, 0.41204503178596497, 0.6269510388374329, 0.0653882622718811, 0.2309982180595398, 0.05030554160475731, 0.12162061780691147, 0.2016562819480896, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.08513950556516647, 0.05776134505867958, 0.44855204224586487, 0.15441171824932098, 0.37962910532951355, 0.43142464756965637, 0.21386101841926575, 0.07478547096252441, 0.22071515023708344, 0.1727379858493805, 0.06471506506204605, 0.1414414495229721, 0.20356127619743347, 0.23849359154701233, 0.28116941452026367, 0.22387196123600006, 0.24124523997306824, 0.10411572456359863, 0.14086224138736725, NaN, NaN, NaN, NaN, NaN, NaN], [0.09857918322086334, 0.08268877118825912, 0.17155912518501282, 0.08326277136802673, 0.3910389840602875, 0.23102693259716034, 0.0706368237733841, 0.04062340036034584, 0.34264665842056274, 0.40400993824005127, 0.14310938119888306, 0.07597656548023224, 0.059025220572948456, 0.46083009243011475, 0.6441643834114075, 0.8002472519874573, 0.34466618299484253, 0.10859531164169312, 0.04317509010434151, 0.042760394513607025, NaN, NaN, NaN, NaN, NaN], [0.07982634007930756, 0.027687683701515198, 0.01305405143648386, 0.01568622700870037, 0.15395750105381012, 0.36470726132392883, 0.09429053217172623, 0.02618592418730259, 0.00988653302192688, 0.03718657046556473, 0.057223062962293625, 0.036843542009592056, 0.008861655369400978, 0.039983998984098434, 0.5628355145454407, 0.5858935713768005, 0.11540589481592178, 0.07112369686365128, 0.022479010745882988, 0.0049066911451518536, 0.07443748414516449, NaN, NaN, NaN, NaN], [0.13230623304843903, 0.39635705947875977, 0.12619565427303314, 0.23844560980796814, 0.04749276116490364, 0.5552228093147278, 0.304650217294693, 0.16151569783687592, 0.05923860892653465, 0.03940735384821892, 0.37161606550216675, 0.13852664828300476, 0.1098584458231926, 0.421970933675766, 0.059641290456056595, 0.35413044691085815, 0.2336989790201187, 0.21869167685508728, 0.04408164322376251, 0.03093402087688446, 0.08392708003520966, 0.038801465183496475, NaN, NaN, NaN], [0.06938444077968597, 0.08034616708755493, 0.1555827558040619, 0.07347460091114044, 0.4763748347759247, 0.40589335560798645, 0.07265187799930573, 0.022002995014190674, 0.0527057945728302, 0.07314148545265198, 0.11090734601020813, 0.03504399210214615, 0.0172868762165308, 0.14030121266841888, 0.3467526137828827, 0.21038202941417694, 0.6312639117240906, 0.1208876520395279, 0.020520374178886414, 0.014591614715754986, 0.03736459091305733, 0.22129306197166443, 0.05682671070098877, NaN, NaN], [0.08218587934970856, 0.08353152126073837, 0.244074746966362, 0.15340235829353333, 0.5709766745567322, 0.4268343448638916, 0.06391507387161255, 0.13458560407161713, 0.14046461880207062, 0.13024689257144928, 0.043825987726449966, 0.1802380084991455, 0.2593124508857727, 0.4235299825668335, 0.23401854932308197, 0.23376718163490295, 0.4458163380622864, 0.1644086241722107, 0.22351105511188507, 0.25077733397483826, 0.28149890899658203, 0.3320602774620056, 0.05098887160420418, 0.4388013482093811, NaN], [0.13887250423431396, 0.1972966492176056, 0.3352757692337036, 0.30585116147994995, 0.6380553841590881, 0.5158089995384216, 0.3850407004356384, 0.3912012279033661, 0.2877788245677948, 0.30187875032424927, 0.20025724172592163, 0.34020906686782837, 0.47167572379112244, 0.3815076947212219, 0.5385518074035645, 0.20663535594940186, 0.37741178274154663, 0.29376763105392456, 0.3577961027622223, 0.21765607595443726, 0.14290691912174225, 0.3544510304927826, 0.07646653801202774, 0.1391337811946869, 0.019570577889680862]], [[0.125, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.10658828914165497, 0.44162610173225403, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.14346696436405182, 0.1105659008026123, 0.04705679044127464, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.14569434523582458, 0.006359750870615244, 0.06321832537651062, 0.009962446056306362, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.14614860713481903, 0.0770370289683342, 0.14572308957576752, 0.11918944120407104, 0.003047030884772539, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.16211360692977905, 0.1199408695101738, 0.008137544617056847, 0.026895001530647278, 0.022997038438916206, 0.0004772362008225173, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.1276824176311493, 0.05415544658899307, 0.008876973763108253, 0.006533092353492975, 0.16286829113960266, 0.4191088378429413, 0.11241274327039719, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.1310766041278839, 0.09720440953969955, 0.005617472343146801, 0.018550021573901176, 0.07474999874830246, 0.03211009502410889, 0.01561786886304617, 0.5897646546363831, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.07142644375562668, 0.019657818600535393, 0.044225241988897324, 0.006672952324151993, 0.015112369321286678, 0.03715437650680542, 0.012035970576107502, 0.08684496581554413, 0.5578015446662903, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.06384367495775223, 0.009399783797562122, 0.06692944467067719, 0.013825987465679646, 0.01438650768250227, 0.11814092099666595, 0.025182364508509636, 0.04756484180688858, 0.4922580420970917, 0.010614832863211632, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.21570175886154175, 0.004600263200700283, 0.0039491499774158, 0.0010213260538876057, 0.00511409854516387, 0.00780195789411664, 0.0035460677463561296, 0.06005942076444626, 0.002209970960393548, 0.0011990047059953213, 0.010184505954384804, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.15804870426654816, 0.10358668118715286, 0.018792977556586266, 0.0036350360605865717, 0.02226737141609192, 0.007843486964702606, 0.002713214373216033, 0.3624168336391449, 0.00397031893953681, 0.013842551037669182, 0.05391863361001015, 0.040338534861803055, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.0703621581196785, 0.01676221750676632, 0.03283774480223656, 0.005265639629215002, 0.016811830922961235, 0.008307189680635929, 0.0008217993890866637, 0.06662888079881668, 0.006444453727453947, 0.0015952866524457932, 0.03341786190867424, 0.28674793243408203, 0.09830270707607269, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.00274313404224813, 0.01220498327165842, 0.001565106911584735, 0.014617281965911388, 0.0015394951915368438, 0.00014163085143081844, 0.0032730719540268183, 0.04253724217414856, 0.01929563470184803, 0.0011092370841652155, 0.008900013752281666, 0.14250728487968445, 0.44352540373802185, 0.012739983387291431, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.12441921979188919, 0.09727630764245987, 0.031539320945739746, 0.0390433706343174, 0.004017204977571964, 0.003718326799571514, 0.06902258098125458, 0.21229486167430878, 0.1692674309015274, 0.507585346698761, 0.24224399030208588, 0.4713107943534851, 0.22175242006778717, 0.1071210727095604, 0.001354279462248087, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.11131177842617035, 0.045754965394735336, 0.13187335431575775, 0.021390099078416824, 0.2008819729089737, 0.1753949522972107, 0.029810786247253418, 0.1191062182188034, 0.0330519825220108, 0.021209293976426125, 0.007793682627379894, 0.004569755867123604, 0.21031485497951508, 0.08390634506940842, 0.11696453392505646, 0.2920413017272949, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.28942060470581055, 0.004874760750681162, 0.02575746178627014, 0.03629674017429352, 0.0339069589972496, 0.06067432835698128, 0.06949229538440704, 0.17600718140602112, 0.04042575880885124, 0.0021073101088404655, 0.002125136088579893, 0.0013297069817781448, 0.013164625503122807, 0.019647862762212753, 0.0625171884894371, 0.003036472015082836, 0.15673543512821198, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.29843398928642273, 0.006499151699244976, 0.002175502711907029, 0.00474061444401741, 0.012194045819342136, 0.024305779486894608, 0.05332900583744049, 0.20892387628555298, 0.06725459545850754, 0.0056669809855520725, 0.023831704631447792, 0.0038352743722498417, 0.008001168258488178, 0.00692057004198432, 0.006051996257156134, 0.0008782879449427128, 0.0244371946901083, 0.05294432491064072, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.19362471997737885, 0.05030333995819092, 0.012831996195018291, 0.0028119448106735945, 0.011659904383122921, 0.0070129260420799255, 0.002673238283023238, 0.1857692450284958, 0.0015845311572775245, 0.003893241984769702, 0.009055504575371742, 0.013083641417324543, 0.009338575415313244, 0.007860029116272926, 0.009482803754508495, 0.019751103594899178, 0.03845033049583435, 0.03947525471448898, 0.03009573556482792, NaN, NaN, NaN, NaN, NaN, NaN], [0.08181142061948776, 0.013090993277728558, 0.025600923225283623, 0.0045991819351911545, 0.007844633422791958, 0.0066622160375118256, 0.0006054755649529397, 0.01805841363966465, 0.0025927021633833647, 0.0006796378293074667, 0.012531430460512638, 0.18806973099708557, 0.04688132554292679, 0.005460845306515694, 0.053047653287649155, 0.013497358188033104, 0.040136244148015976, 0.022071214392781258, 0.31691932678222656, 0.07654344290494919, NaN, NaN, NaN, NaN, NaN], [0.003571689361706376, 0.007330529857426882, 0.0009176949388347566, 0.011351491324603558, 0.0005700239562429488, 0.0001114286933443509, 0.0023790227714926004, 0.011217805556952953, 0.004490875173360109, 0.00038650527130812407, 0.0025467458181083202, 0.048559535294771194, 0.22723886370658875, 0.0019670024048537016, 0.0002542402071412653, 0.027445662766695023, 0.015111691318452358, 0.029036840423941612, 0.2144545316696167, 0.4208240211009979, 0.013829981908202171, NaN, NaN, NaN, NaN], [0.11162849515676498, 0.06633912026882172, 0.017337389290332794, 0.030477523803710938, 0.0024834000505506992, 0.001867939718067646, 0.03932232782244682, 0.1628599613904953, 0.14192035794258118, 0.2944621741771698, 0.21811458468437195, 0.42557209730148315, 0.2638176381587982, 0.14630424976348877, 0.0005040403339080513, 0.32521945238113403, 0.2411627173423767, 0.28287336230278015, 0.40539565682411194, 0.1682160645723343, 0.08244442939758301, 0.001218001707457006, NaN, NaN, NaN], [0.20973265171051025, 0.07712213695049286, 0.20427735149860382, 0.025535617023706436, 0.4053865373134613, 0.41131824254989624, 0.030548784881830215, 0.060146916657686234, 0.012079673819243908, 0.01592317223548889, 0.0048461491242051125, 0.0021770852617919445, 0.09957096725702286, 0.1170588806271553, 0.13386258482933044, 0.16141492128372192, 0.004613581579178572, 0.015190798789262772, 0.003683852730318904, 0.1389266699552536, 0.07006954401731491, 0.1815212517976761, 0.17825333774089813, NaN, NaN], [0.3360293209552765, 0.0046190484426915646, 0.024437543004751205, 0.03736568242311478, 0.023848971351981163, 0.05927197262644768, 0.0542423352599144, 0.09209144860506058, 0.023972967639565468, 0.000766670098528266, 0.0006589474505744874, 0.0007115502958185971, 0.00637162895873189, 0.012912634760141373, 0.014624576084315777, 0.0019432539120316505, 0.05897590517997742, 0.0038116518408060074, 0.0016802565660327673, 0.011611220426857471, 0.025170182809233665, 0.04455949738621712, 0.0020357028115540743, 0.14134161174297333, NaN], [0.187117338180542, 0.005916869733482599, 0.020901108160614967, 0.0559980571269989, 0.0324174202978611, 0.008547084406018257, 0.044511571526527405, 0.04880741238594055, 0.05289075896143913, 0.038245368748903275, 0.003611604683101177, 0.002279189880937338, 0.01790045015513897, 0.008863909170031548, 0.01127588003873825, 0.005861865822225809, 0.17173975706100464, 0.009364882484078407, 0.005221609957516193, 0.012455414980649948, 0.007264893501996994, 0.016177698969841003, 0.008824422955513, 0.18642237782478333, 0.0006185321253724396]], [[0.125, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.12484697252511978, 0.1276315450668335, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.15841424465179443, 0.03031034581363201, 0.02654799446463585, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.13769303262233734, 0.09575259685516357, 0.025977646932005882, 0.052591271698474884, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.15085087716579437, 0.15096567571163177, 0.09222358465194702, 0.028469638898968697, 0.0012114758137613535, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.16431185603141785, 0.07204771786928177, 0.05053501948714256, 0.012478960677981377, 0.05114812031388283, 0.00039714027661830187, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.1666734665632248, 0.06891340762376785, 0.013632094487547874, 0.018171580508351326, 0.002599227475002408, 0.0009873181115835905, 0.0006481229793280363, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.14423918724060059, 0.12251336872577667, 0.10176724940538406, 0.33380815386772156, 0.1583750993013382, 0.023372141644358635, 0.026839546859264374, 0.06730155646800995, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.2790219187736511, 0.15446610748767853, 0.015893638134002686, 0.03619629144668579, 0.003051391802728176, 0.00038247412885539234, 0.0007123185787349939, 0.010222047567367554, 0.0010863485513255, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.26870372891426086, 0.10405707359313965, 0.00916238222271204, 0.058617573231458664, 0.0049601029604673386, 0.0005682760966010392, 0.004407011903822422, 0.03309918940067291, 0.0036104319151490927, 0.12174393236637115, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.05985519662499428, 0.14893494546413422, 0.09544339030981064, 0.18974637985229492, 0.1120084673166275, 0.28269606828689575, 0.4275827407836914, 0.12184610962867737, 0.40095797181129456, 0.08120625466108322, 0.27448615431785583, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.06809581816196442, 0.09586934000253677, 0.10229554027318954, 0.057183876633644104, 0.25635847449302673, 0.19582371413707733, 0.4237477481365204, 0.37648820877075195, 0.48733898997306824, 0.20777222514152527, 0.24944597482681274, 0.45371755957603455, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.05513762682676315, 0.16880887746810913, 0.02300925739109516, 0.03029457852244377, 0.032050080597400665, 0.0745139941573143, 0.08332593739032745, 0.5048279166221619, 0.051856089383363724, 0.16889351606369019, 0.22218117117881775, 0.29087209701538086, 0.03443009778857231, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.07503295689821243, 0.22708888351917267, 0.011672623455524445, 0.03240634873509407, 0.051372844725847244, 0.0555996336042881, 0.1055832952260971, 0.27455389499664307, 0.019383858889341354, 0.29115474224090576, 0.25329896807670593, 0.3762655258178711, 0.06596359610557556, 0.027243560180068016, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.15851522982120514, 0.22386471927165985, 0.13473065197467804, 0.10273782163858414, 0.539568305015564, 0.23089595139026642, 0.2947250008583069, 0.2566256523132324, 0.08758009225130081, 0.04963833838701248, 0.026406293734908104, 0.02359875850379467, 0.06999926269054413, 0.014701825566589832, 0.008440684527158737, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.1888987272977829, 0.22277534008026123, 0.06621028482913971, 0.04940320923924446, 0.013609242625534534, 0.012980671599507332, 0.0275713000446558, 0.5000426769256592, 0.025658253580331802, 0.28077542781829834, 0.21061377227306366, 0.1005047932267189, 0.0123829934746027, 0.005874408408999443, 0.04495157673954964, 0.007559731602668762, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.10630622506141663, 0.1130438968539238, 0.04711592569947243, 0.14829613268375397, 0.0012987125664949417, 0.0009870391804724932, 0.002409427659586072, 0.10731083154678345, 0.010861101560294628, 0.02266101725399494, 0.22295407950878143, 0.37738272547721863, 0.21324896812438965, 0.09625840187072754, 0.01478838175535202, 0.004724964965134859, 0.13376930356025696, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.0042772903107106686, 0.006450775545090437, 0.00791113544255495, 0.01871791109442711, 0.02349945716559887, 0.036059893667697906, 0.09560179710388184, 0.01157363597303629, 0.020316841080784798, 0.002858342370018363, 0.0015840751584619284, 0.03869258984923363, 0.04008479043841362, 0.0456826388835907, 0.061234306544065475, 0.32812535762786865, 0.4548730254173279, 0.048923686146736145, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.034464891999959946, 0.04304976761341095, 0.0730237364768982, 0.07959159463644028, 0.156441330909729, 0.14927342534065247, 0.37836754322052, 0.2500280439853668, 0.265838086605072, 0.038285933434963226, 0.0458042174577713, 0.2175784856081009, 0.055615901947021484, 0.32925114035606384, 0.23017114400863647, 0.5254709720611572, 0.3807608187198639, 0.4477500319480896, 0.3941081464290619, NaN, NaN, NaN, NaN, NaN, NaN], [0.024431752040982246, 0.057854264974594116, 0.009785568341612816, 0.015689833089709282, 0.010099711827933788, 0.022971261292696, 0.026158222928643227, 0.08270542323589325, 0.00771379703655839, 0.023359954357147217, 0.06216609850525856, 0.1452798992395401, 0.010090651921927929, 0.13497084379196167, 0.023736534640192986, 0.06422590464353561, 0.2799428105354309, 0.34307411313056946, 0.27198341488838196, 0.018816450610756874, NaN, NaN, NaN, NaN, NaN], [0.032250434160232544, 0.07008427381515503, 0.003495490411296487, 0.011726448312401772, 0.013232100754976273, 0.021211393177509308, 0.02240551821887493, 0.050749149173498154, 0.0020511853508651257, 0.034987252205610275, 0.05167752131819725, 0.10231753438711166, 0.017492327839136124, 0.0036121474113315344, 0.0030979528091847897, 0.14347726106643677, 0.4107814431190491, 0.18759746849536896, 0.28042495250701904, 0.02327493391931057, 0.023935986682772636, NaN, NaN, NaN, NaN], [0.17385193705558777, 0.24280618131160736, 0.0901411697268486, 0.1509939581155777, 0.5964542627334595, 0.18189039826393127, 0.25377142429351807, 0.39126867055892944, 0.11990400403738022, 0.04869762808084488, 0.06967514008283615, 0.0491257943212986, 0.1536286324262619, 0.04553663358092308, 0.006321897264569998, 0.008409527130424976, 0.01950901933014393, 0.028066763654351234, 0.039955586194992065, 0.08575458079576492, 0.02489100769162178, 0.0107131227850914, NaN, NaN, NaN], [0.18693126738071442, 0.25040745735168457, 0.07803116738796234, 0.06071358174085617, 0.018153348937630653, 0.012512190267443657, 0.012858238071203232, 0.18478038907051086, 0.008756724186241627, 0.14063727855682373, 0.16963867843151093, 0.06472224742174149, 0.008233368396759033, 0.010625114664435387, 0.04533438757061958, 0.004584541078656912, 0.04685693234205246, 0.3269248306751251, 0.13935554027557373, 0.022706659510731697, 0.015514994971454144, 0.09856907278299332, 0.009564985521137714, NaN, NaN], [0.10220125317573547, 0.06584151834249496, 0.046970706433057785, 0.16499453783035278, 0.0008504274883307517, 0.000721337681170553, 0.0015187861863523722, 0.050142802298069, 0.005332621280103922, 0.005509581416845322, 0.0572623535990715, 0.172898530960083, 0.12213093042373657, 0.0640687644481659, 0.004657925106585026, 0.002522988012060523, 0.028443191200494766, 0.29674383997917175, 0.3544806241989136, 0.20916549861431122, 0.09151047468185425, 0.014975211583077908, 0.0019209993770346045, 0.07398010790348053, NaN], [0.014319260604679585, 0.019726725295186043, 0.010809341445565224, 0.06728478521108627, 0.024899542331695557, 0.06927011907100677, 0.2726534307003021, 0.06849226355552673, 0.06274150311946869, 0.0032663261517882347, 0.007571991998702288, 0.011041088029742241, 0.0653790682554245, 0.06552072614431381, 0.10165777057409286, 0.05923810228705406, 0.20752549171447754, 0.1128133162856102, 0.041725482791662216, 0.12833572924137115, 0.10405165702104568, 0.2233171910047531, 0.10715138167142868, 0.3742898404598236, 0.43902406096458435]], [[0.125, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.12878015637397766, 0.05999259278178215, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.16734670102596283, 0.0018487111665308475, 0.002184537472203374, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.06620991975069046, 0.4480140209197998, 0.42379117012023926, 0.3748236298561096, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.1498516947031021, 0.091057188808918, 0.11073686927556992, 0.05954570695757866, 0.00012444167805369943, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.15789009630680084, 0.05178086459636688, 0.2272004932165146, 0.05532779544591904, 0.002530630910769105, 0.00011625503975665197, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.05158510431647301, 0.42307329177856445, 0.4962795376777649, 0.6637455821037292, 0.11636865884065628, 0.027691489085555077, 0.059323750436306, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.1440366506576538, 0.37752795219421387, 0.42684903740882874, 0.13104133307933807, 0.0449170246720314, 0.0360451340675354, 0.007316120434552431, 0.03281773626804352, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.018571142107248306, 0.11001976579427719, 0.16728174686431885, 0.33147770166397095, 0.29621925950050354, 0.11174014210700989, 0.46736985445022583, 0.18467408418655396, 0.05186863988637924, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.0193540807813406, 0.11997552216053009, 0.4339123070240021, 0.4291674792766571, 0.22741732001304626, 0.21840345859527588, 0.4310562014579773, 0.16546283662319183, 0.05634206160902977, 0.03477246314287186, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.07166115939617157, 0.34385329484939575, 0.5272834300994873, 0.4769807457923889, 0.34829023480415344, 0.19288644194602966, 0.1752767115831375, 0.3240547180175781, 0.026788396760821342, 0.09653788805007935, 0.14339366555213928, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.09568949043750763, 0.2010803371667862, 0.1452081948518753, 0.13633964955806732, 0.13264110684394836, 0.11369673907756805, 0.18754418194293976, 0.10573749244213104, 0.12209529429674149, 0.3772747814655304, 0.4260762333869934, 0.1448964774608612, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.1600937843322754, 0.32966408133506775, 0.46643200516700745, 0.2761552929878235, 0.1128716766834259, 0.16030451655387878, 0.13808301091194153, 0.12019707262516022, 0.08980843424797058, 0.23569302260875702, 0.18699060380458832, 0.06252679228782654, 0.02190866880118847, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.09671676903963089, 0.3181785047054291, 0.5044789910316467, 0.5311775803565979, 0.43058764934539795, 0.24623769521713257, 0.546705424785614, 0.20948244631290436, 0.5971428155899048, 0.15125280618667603, 0.21692372858524323, 0.08393274247646332, 0.0805632621049881, 0.11463441699743271, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.17538371682167053, 0.005170984659343958, 0.01562126912176609, 0.012803001329302788, 0.0004321248270571232, 0.003303500125184655, 0.010391591116786003, 0.0083633316680789, 0.001453742035664618, 0.0005911564221605659, 0.001968160504475236, 0.018067756667733192, 0.0012553221313282847, 0.0006174716982059181, 0.0014710418181493878, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.00964878499507904, 0.07296860218048096, 0.1732037365436554, 0.2482636272907257, 0.018695944920182228, 0.04061494395136833, 0.019565006718039513, 0.048743683844804764, 0.15582872927188873, 0.0506676621735096, 0.08059392869472504, 0.2691291868686676, 0.4701274335384369, 0.05269847437739372, 0.15863555669784546, 0.011098350398242474, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.023792432621121407, 0.42975902557373047, 0.3812340199947357, 0.23295366764068604, 0.2699258625507355, 0.32472288608551025, 0.04527096822857857, 0.2556793987751007, 0.5905154347419739, 0.8116171360015869, 0.684613823890686, 0.13916483521461487, 0.05671815946698189, 0.0401710644364357, 0.30002903938293457, 0.014873968437314034, 0.1109585389494896, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.07327478379011154, 0.42313894629478455, 0.7821765542030334, 0.6752634048461914, 0.18926696479320526, 0.27897483110427856, 0.1972714066505432, 0.26650866866111755, 0.21928414702415466, 0.6610813736915588, 0.8023169040679932, 0.32853400707244873, 0.043605707585811615, 0.04177317023277283, 0.5147100687026978, 0.014965414069592953, 0.041893746703863144, 0.10476090759038925, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.09543995559215546, 0.1369307041168213, 0.1906978189945221, 0.1367466300725937, 0.17180036008358002, 0.12260185182094574, 0.13847540318965912, 0.1559406965970993, 0.13510896265506744, 0.4644373655319214, 0.6843520402908325, 0.2938932180404663, 0.08134166151285172, 0.16692468523979187, 0.35020914673805237, 0.0983358696103096, 0.26928237080574036, 0.11322443932294846, 0.14002281427383423, NaN, NaN, NaN, NaN, NaN, NaN], [0.17294523119926453, 0.44891712069511414, 0.5596615076065063, 0.3151743412017822, 0.15508009493350983, 0.20398668944835663, 0.18162229657173157, 0.14380685985088348, 0.09279182553291321, 0.25614914298057556, 0.37145668268203735, 0.2047339379787445, 0.05775143578648567, 0.06389063596725464, 0.19947569072246552, 0.07508620619773865, 0.162083700299263, 0.036575064063072205, 0.05963924527168274, 0.02704720012843609, NaN, NaN, NaN, NaN, NaN], [0.09450869262218475, 0.5263407230377197, 0.5685468316078186, 0.6246378421783447, 0.5457862615585327, 0.4288109838962555, 0.7265884876251221, 0.4213257133960724, 0.7441360354423523, 0.37028953433036804, 0.4906199276447296, 0.24940308928489685, 0.2854059636592865, 0.25606390833854675, 0.06486664712429047, 0.03651905804872513, 0.215606689453125, 0.16494624316692352, 0.07126681506633759, 0.0978088453412056, 0.18553400039672852, NaN, NaN, NaN, NaN], [0.19233128428459167, 0.0069253402762115, 0.019198253750801086, 0.024288823828101158, 0.0006626379326917231, 0.0032825330272316933, 0.012745865620672703, 0.02121213637292385, 0.004573441576212645, 0.001344278221949935, 0.010449343360960484, 0.07998955249786377, 0.008849495090544224, 0.005957764107733965, 0.00281895836815238, 0.0006993816932663321, 0.0011300387559458613, 0.0034355262760072947, 0.006048144306987524, 0.0007683978183194995, 0.00029024321702308953, 0.0009215899626724422, NaN, NaN, NaN], [0.00490582175552845, 0.09978753328323364, 0.17523892223834991, 0.18201382458209991, 0.025161702185869217, 0.0351867638528347, 0.008898423984646797, 0.033712878823280334, 0.06612548977136612, 0.044598400592803955, 0.0818907842040062, 0.31783777475357056, 0.6522275805473328, 0.26521986722946167, 0.31609129905700684, 0.0543142631649971, 0.07028744369745255, 0.06436092406511307, 0.12702754139900208, 0.4257008731365204, 0.05356784537434578, 0.20406562089920044, 0.022904740646481514, NaN, NaN], [0.02933959849178791, 0.5456263422966003, 0.4945109188556671, 0.26123103499412537, 0.3237256109714508, 0.3705388903617859, 0.04209306091070175, 0.3351372182369232, 0.658141016960144, 0.8126230239868164, 0.8673186898231506, 0.28273773193359375, 0.11254162341356277, 0.17348313331604004, 0.7003386616706848, 0.1474425047636032, 0.36997753381729126, 0.41849759221076965, 0.091117262840271, 0.03724836930632591, 0.036747273057699203, 0.47380825877189636, 0.017722588032484055, 0.0920308530330658, NaN], [0.1429738998413086, 0.11406568437814713, 0.30407312512397766, 0.04420004412531853, 0.050888776779174805, 0.009020227938890457, 0.026264725252985954, 0.20154790580272675, 0.284900963306427, 0.16813665628433228, 0.6384625434875488, 0.35198092460632324, 0.0041788192465901375, 0.017796171829104424, 0.06702794879674911, 0.017356209456920624, 0.11703062057495117, 0.363391250371933, 0.08829980343580246, 0.0006652214215137064, 0.002063008025288582, 0.01232101023197174, 0.0010344748152419925, 0.005295889917761087, 0.10532692819833755]], [[0.125, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.1283751130104065, 0.06695841252803802, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [5.319380943547003e-05, 9.114345448324457e-05, 0.7905611991882324, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.10777772217988968, 0.19019582867622375, 0.12566408514976501, 0.295462429523468, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [2.4899240088416263e-05, 2.9243250537547283e-05, 0.0014855118934065104, 3.888772698701359e-05, 0.9169090986251831, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [3.5349924587535497e-07, 4.689470642915694e-06, 0.02691131830215454, 1.3325815416465048e-05, 0.19568589329719543, 0.956480085849762, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.08490768820047379, 0.04920955002307892, 0.012384464032948017, 0.04339546710252762, 0.010612337850034237, 0.05702771991491318, 0.7263003587722778, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.16491760313510895, 0.04815620183944702, 0.0007595600909553468, 0.006606678944081068, 0.0006115635624155402, 0.0007167417788878083, 0.0015418223338201642, 0.0024032427463680506, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.012053201906383038, 0.18336322903633118, 0.0033893296495079994, 0.22584111988544464, 0.004534169565886259, 0.003455487545579672, 0.30805450677871704, 0.5499533414840698, 0.13390673696994781, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.02224119007587433, 0.09969844669103622, 0.01827961951494217, 0.1828235685825348, 0.009660250507295132, 0.005268027540296316, 0.13511976599693298, 0.39505934715270996, 0.1772008240222931, 0.6222725510597229, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.19008594751358032, 0.025696618482470512, 0.004118501208722591, 0.03605509176850319, 0.002144730417057872, 0.0023362801875919104, 0.16961191594600677, 0.015426162630319595, 0.016875047236680984, 0.017404966056346893, 0.032629188150167465, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.1594686657190323, 0.03835373371839523, 0.021387629210948944, 0.028402678668498993, 0.12163796275854111, 0.1348690688610077, 0.027878204360604286, 0.016979072242975235, 0.009301519952714443, 0.047045812010765076, 0.103324294090271, 0.0978349894285202, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.08206925541162491, 0.0482555516064167, 0.03066202998161316, 0.14434732496738434, 0.10149279236793518, 0.1536794900894165, 0.16425268352031708, 0.00592045346274972, 0.002011190867051482, 0.030538976192474365, 0.015422381460666656, 0.0400862954556942, 0.6933969259262085, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.11962933838367462, 0.08867897093296051, 0.023231033235788345, 0.019267449155449867, 0.06578893214464188, 0.01314490009099245, 0.028238458558917046, 0.2009190320968628, 0.005505711771547794, 0.024347275495529175, 0.005847027525305748, 0.13606473803520203, 0.11386173218488693, 0.6883828639984131, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.004133098293095827, 0.007605875376611948, 0.380069762468338, 0.01569206453859806, 0.3162667751312256, 0.06185031309723854, 0.003268925240263343, 0.007663627155125141, 0.00711404625326395, 0.0016827658982947469, 0.002885768422856927, 0.009058460593223572, 0.0104479705914855, 0.0013903286308050156, 0.9176042079925537, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.19946889579296112, 0.004915847908705473, 0.0015343156410381198, 0.012221671640872955, 0.003153382334858179, 0.0001576353097334504, 0.0020530277397483587, 0.003957398701459169, 0.010446527041494846, 0.012547693215310574, 0.03473197668790817, 0.06650777161121368, 0.014228541404008865, 0.02601468935608864, 0.0018418998224660754, 0.08826413750648499, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.14040440320968628, 0.29221969842910767, 0.09665771573781967, 0.2947876751422882, 0.00611721258610487, 0.012681002728641033, 0.7610099911689758, 0.27993685007095337, 0.19895455241203308, 0.07963719218969345, 0.025141140446066856, 0.30299919843673706, 0.4374280273914337, 0.12315846234560013, 0.011889583431184292, 0.00027308438438922167, 0.03226177766919136, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.22362156212329865, 0.19648011028766632, 0.02122899703681469, 0.12822405993938446, 0.013841216452419758, 0.009505078196525574, 0.4746513366699219, 0.1753886640071869, 0.09167484194040298, 0.038334570825099945, 0.04122844338417053, 0.14653263986110687, 0.17874038219451904, 0.023550381883978844, 0.014212163165211678, 0.001423373818397522, 0.0059451088309288025, 0.09707646816968918, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.167328879237175, 0.06208498775959015, 0.010482249781489372, 0.03574186563491821, 0.0675959512591362, 0.06477286666631699, 0.04995346441864967, 0.05412250757217407, 0.009984727017581463, 0.03347667679190636, 0.11074735969305038, 0.16135196387767792, 0.07774785906076431, 0.01735900156199932, 0.007863441482186317, 0.019525114446878433, 0.005842071026563644, 0.1275986284017563, 0.0955328494310379, NaN, NaN, NaN, NaN, NaN, NaN], [0.05032582953572273, 0.03989394009113312, 0.02223959006369114, 0.07248460501432419, 0.04305185005068779, 0.04872481897473335, 0.09144517779350281, 0.0032577940728515387, 0.000561918190214783, 0.015125684440135956, 0.018474824726581573, 0.0519116036593914, 0.7149417400360107, 0.023930398747324944, 0.005549557972699404, 0.0027118371799588203, 0.08418004959821701, 0.22684048116207123, 0.052481237798929214, 0.7548789381980896, NaN, NaN, NaN, NaN, NaN], [0.14971917867660522, 0.12296220660209656, 0.03256092593073845, 0.015910452231764793, 0.08324312418699265, 0.010959222912788391, 0.03249981626868248, 0.2630986273288727, 0.0023772413842380047, 0.021863164380192757, 0.014683729968965054, 0.3797665238380432, 0.26638853549957275, 0.6724205613136292, 0.015757206827402115, 0.01569446735084057, 0.01732691004872322, 0.06738004088401794, 0.17602917551994324, 0.12501026690006256, 0.6636221408843994, NaN, NaN, NaN, NaN], [0.0045495470985770226, 0.007598123978823423, 0.48235079646110535, 0.017675379291176796, 0.30638325214385986, 0.03773635998368263, 0.0025513810105621815, 0.013349749147891998, 0.011474208906292915, 0.002688285429030657, 0.009704438969492912, 0.024301802739501, 0.030528949573636055, 0.006023744586855173, 0.9289764761924744, 0.008095184341073036, 0.015121471136808395, 0.003912394400686026, 0.005678378511220217, 0.005922055337578058, 0.0012866485631093383, 0.9431078433990479, NaN, NaN, NaN], [0.25144028663635254, 0.013477480970323086, 0.004043558146804571, 0.02197866141796112, 0.005731666926294565, 0.00035365403164178133, 0.0028230457101017237, 0.003569219959899783, 0.00616231607273221, 0.023324957117438316, 0.07691453397274017, 0.11847300082445145, 0.025281671434640884, 0.05239935964345932, 0.002384425140917301, 0.16120819747447968, 0.011955172754824162, 0.09212952852249146, 0.03993848338723183, 0.017148757353425026, 0.01459744293242693, 0.0018050760263577104, 0.08139479160308838, NaN, NaN], [0.08713241666555405, 0.22884246706962585, 0.12139283120632172, 0.21789073944091797, 0.00419022049754858, 0.011025986634194851, 0.8093750476837158, 0.24520863592624664, 0.11868450790643692, 0.037659380584955215, 0.014297883957624435, 0.35379931330680847, 0.4382935166358948, 0.17632676661014557, 0.006937071681022644, 0.0007303177262656391, 0.027538392692804337, 0.0690605565905571, 0.3237524628639221, 0.41753751039505005, 0.09520361572504044, 0.013310365378856659, 0.0003602981742005795, 0.032565031200647354, NaN], [0.01268855668604374, 0.009620537050068378, 0.0011078648967668414, 0.01395372860133648, 0.00034480926115065813, 0.0002369812864344567, 0.14032205939292908, 0.12187758088111877, 0.004498081747442484, 6.632315489696339e-05, 0.01873306930065155, 0.07693066447973251, 0.06357964873313904, 0.012718681246042252, 0.02489433065056801, 0.4312428832054138, 0.013737366534769535, 0.0326746366918087, 0.34456172585487366, 0.0668448805809021, 0.006646350026130676, 0.04233057424426079, 0.4123155176639557, 0.007851892150938511, 0.43338367342948914]], [[0.125, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.13150663673877716, 0.013105388730764389, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.16704899072647095, 0.0014066778821870685, 0.003860085504129529, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.14769184589385986, 0.005059333052486181, 0.0053715878166258335, 0.026609797030687332, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.15381431579589844, 0.05056624114513397, 0.015615872107446194, 0.004382571205496788, 0.00015187788812909275, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.16606314480304718, 0.03878505155444145, 0.01631396822631359, 0.011268166825175285, 0.00036908386391587555, 0.00010962320084217936, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.16556474566459656, 0.059035927057266235, 0.018687130883336067, 0.020593103021383286, 0.0006985706277191639, 0.0006753651541657746, 0.01174053642898798, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.16100119054317474, 0.03705580160021782, 0.08672276139259338, 0.05696912482380867, 0.00507472176104784, 0.006951047107577324, 0.0023692583199590445, 0.004235508386045694, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.288095086812973, 0.011840847320854664, 0.005622565280646086, 0.00535928551107645, 0.0008760345517657697, 0.0004899614141322672, 0.001179057639092207, 0.0010409504175186157, 0.0012723063118755817, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.2984195351600647, 0.024577315896749496, 0.008883590810000896, 0.0237559974193573, 0.001871026586741209, 0.002048116410151124, 0.00452006608247757, 0.0067189703695476055, 0.002311990363523364, 0.0035932722967118025, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.19755195081233978, 0.08605571836233139, 0.04371126368641853, 0.045333728194236755, 0.005393510684370995, 0.006479238625615835, 0.018500106409192085, 0.012994848191738129, 0.011254888959228992, 0.03004884347319603, 0.011813223361968994, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.05165635421872139, 0.44527125358581543, 0.31059694290161133, 0.6649516224861145, 0.027770839631557465, 0.02873762883245945, 0.17512862384319305, 0.06940869987010956, 0.1633579134941101, 0.028000785037875175, 0.003091411432251334, 0.016245586797595024, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.19151811301708221, 0.1383962333202362, 0.13229386508464813, 0.35712042450904846, 0.18756243586540222, 0.2871147096157074, 0.5138459801673889, 0.22405852377414703, 0.28785935044288635, 0.04021993279457092, 0.0012617700267583132, 0.004019713494926691, 0.003964945673942566, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.24189773201942444, 0.08955204486846924, 0.32067012786865234, 0.20245005190372467, 0.11740265786647797, 0.08460556715726852, 0.044664137065410614, 0.025831788778305054, 0.07413194328546524, 0.0068964180536568165, 0.002961511956527829, 0.005619046278297901, 0.0014741680352017283, 0.00546230049803853, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.1724659651517868, 0.13219435513019562, 0.15014058351516724, 0.12075512856245041, 0.0006761215627193451, 0.10174072533845901, 0.19516822695732117, 0.009559075348079205, 0.057678524404764175, 0.08239483833312988, 0.0039215064607560635, 0.0027616096194833517, 0.013109313324093819, 0.002305442001670599, 0.00021083203318994492, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.19843007624149323, 0.15979865193367004, 0.14398488402366638, 0.41609427332878113, 0.010126790963113308, 0.04840107262134552, 0.7232485413551331, 0.22829605638980865, 0.34322667121887207, 0.08224418759346008, 0.03167981281876564, 0.020198417827486992, 0.013381149619817734, 0.0009459191933274269, 0.006438484415411949, 0.008794432505965233, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.30347728729248047, 0.04726674035191536, 0.010849116370081902, 0.12094812840223312, 0.0013257962418720126, 0.0025908409152179956, 0.0014983253786340356, 0.03437754884362221, 0.009621781297028065, 0.006184253375977278, 0.00671237800270319, 0.0018636187305673957, 0.01123903226107359, 0.0035993149504065514, 0.0012990115210413933, 0.00021464838937390596, 0.001025065197609365, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.2792417109012604, 0.26782968640327454, 0.03489779308438301, 0.07551994919776917, 0.018111348152160645, 0.04002813994884491, 0.03850500285625458, 0.11152958869934082, 0.21995633840560913, 0.07949108630418777, 0.0037619988434016705, 0.03436713665723801, 0.020695386454463005, 0.017524488270282745, 0.010141805745661259, 0.003556826151907444, 0.0020958345849066973, 0.0058519174344837666, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.05386974662542343, 0.6086578965187073, 0.22683310508728027, 0.5828835964202881, 0.02668178826570511, 0.03663201630115509, 0.14977867901325226, 0.2173178791999817, 0.2744499444961548, 0.08338183909654617, 0.008825525641441345, 0.06588608771562576, 0.5592238306999207, 0.17532478272914886, 0.006846817210316658, 0.028904464095830917, 0.01721598580479622, 0.006393561605364084, 0.010461881756782532, NaN, NaN, NaN, NaN, NaN, NaN], [0.24167264997959137, 0.2504684031009674, 0.15247754752635956, 0.4417489171028137, 0.37691444158554077, 0.47509273886680603, 0.6227271556854248, 0.6949021220207214, 0.5199849605560303, 0.14203055202960968, 0.006932773161679506, 0.02713918127119541, 0.026524275541305542, 0.28478434681892395, 0.05304509028792381, 0.03063105419278145, 0.007391192018985748, 0.001299944007769227, 0.0022179351653903723, 0.0017378581687808037, NaN, NaN, NaN, NaN, NaN], [0.3587647080421448, 0.13152657449245453, 0.3170546591281891, 0.1872878074645996, 0.17338471114635468, 0.16099165380001068, 0.050314128398895264, 0.07316549867391586, 0.1506616473197937, 0.027928102761507034, 0.013985591009259224, 0.03077181987464428, 0.00928373821079731, 0.01458327379077673, 0.34401679039001465, 0.1675042062997818, 0.008024912327528, 0.00340651860460639, 0.001158604514785111, 0.0004595925274770707, 0.0022153020836412907, NaN, NaN, NaN, NaN], [0.18021628260612488, 0.21554027497768402, 0.22428971529006958, 0.28362634778022766, 0.0019759181886911392, 0.19364571571350098, 0.3129161596298218, 0.05571373924612999, 0.43670228123664856, 0.5364305973052979, 0.045233964920043945, 0.02291695959866047, 0.15668357908725739, 0.03788933902978897, 0.0009749932214617729, 0.15011590719223022, 0.009233620017766953, 0.023490505293011665, 0.0018092861864715815, 0.01433361042290926, 0.002351803006604314, 0.00025271173217333853, NaN, NaN, NaN], [0.18984580039978027, 0.30305740237236023, 0.22004783153533936, 0.5488721132278442, 0.023633448407053947, 0.10360189527273178, 0.8517335653305054, 0.6748489141464233, 0.77315753698349, 0.4876308739185333, 0.2048063576221466, 0.14540305733680725, 0.08473058044910431, 0.012403973378241062, 0.06795734912157059, 0.17164894938468933, 0.18992502987384796, 0.12247806042432785, 0.011528578586876392, 0.009636401198804379, 0.0008312705904245377, 0.013430905528366566, 0.011612125672399998, NaN, NaN], [0.3384567201137543, 0.062264904379844666, 0.014819102361798286, 0.14853152632713318, 0.0019540644716471434, 0.003596463706344366, 0.001872691442258656, 0.11878995597362518, 0.02639206312596798, 0.009769541211426258, 0.011811794713139534, 0.006684192456305027, 0.045877717435359955, 0.019279729574918747, 0.005480214022099972, 0.003932234365493059, 0.006437724456191063, 0.0240105502307415, 0.0011211916571483016, 0.004233745392411947, 0.001469226786866784, 0.0013713098596781492, 0.00014342667418532073, 0.0008160521974787116, NaN], [0.1837155818939209, 0.5941455364227295, 0.2251758873462677, 0.3662757873535156, 0.039659783244132996, 0.3226933479309082, 0.014135366305708885, 0.028798755258321762, 0.10863638669252396, 0.34925851225852966, 0.03930900990962982, 0.08864527195692062, 0.10118203610181808, 0.05801505595445633, 0.11320658773183823, 0.05595846846699715, 0.0026757779996842146, 0.007132661063224077, 0.010286321863532066, 0.015962811186909676, 0.004528969060629606, 0.01888921484351158, 0.004036444239318371, 0.00027040645363740623, 0.0002387895801803097]], [[0.125, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.12851747870445251, 0.06451001763343811, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.16148854792118073, 0.04709945246577263, 0.0016553826862946153, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.12575848400592804, 0.13552792370319366, 0.1085570901632309, 0.11512085795402527, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.14333586394786835, 0.24668441712856293, 0.19262480735778809, 0.13920731842517853, 0.0020065978169441223, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.1578390896320343, 0.19358907639980316, 0.02251395769417286, 0.04702039062976837, 0.018520673736929893, 0.0005939522525295615, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.14088943600654602, 0.05360155552625656, 0.043673839420080185, 0.0087194312363863, 0.14876413345336914, 0.3311525881290436, 0.029076436534523964, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.11886978894472122, 0.08032860606908798, 0.053777631372213364, 0.06359982490539551, 0.49348562955856323, 0.7690801620483398, 0.032007213681936264, 0.00921344943344593, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.013988303020596504, 0.031309448182582855, 0.021422432735562325, 0.015959911048412323, 0.13852538168430328, 0.7482463121414185, 0.1306946873664856, 0.0026366086676716805, 0.006285007111728191, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.02759428508579731, 0.1341203898191452, 0.1143924742937088, 0.04895513132214546, 0.2507959306240082, 0.47495928406715393, 0.24884849786758423, 0.04048554226756096, 0.06435439735651016, 0.02207104302942753, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.08376637101173401, 0.08644555509090424, 0.08414626121520996, 0.08246676623821259, 0.09393073618412018, 0.2536129355430603, 0.09570588916540146, 0.057335685938596725, 0.27625876665115356, 0.23640654981136322, 0.22554923593997955, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.16592197120189667, 0.037314873188734055, 0.020350072532892227, 0.005164262373000383, 0.009123047813773155, 0.005826999898999929, 0.003451529424637556, 0.017567342147231102, 0.055315494537353516, 0.2317170798778534, 0.05933540314435959, 0.06010079011321068, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.07053745537996292, 0.19491763412952423, 0.06705262511968613, 0.08265279233455658, 0.006405644118785858, 0.0031596925109624863, 0.005410268437117338, 0.030676638707518578, 0.08307406306266785, 0.20774710178375244, 0.4213918149471283, 0.23337899148464203, 0.08583765476942062, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.13580749928951263, 0.17484943568706512, 0.09017936140298843, 0.11502011120319366, 0.015199831686913967, 0.008567527867853642, 0.04639086127281189, 0.16773870587348938, 0.16907723248004913, 0.43436557054519653, 0.2870768904685974, 0.10786425322294235, 0.08931463956832886, 0.011009148322045803, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.1727631837129593, 0.039101891219615936, 0.0065339612774550915, 0.0278339721262455, 0.004674504045397043, 0.014613990671932697, 0.03457005321979523, 0.04850766807794571, 0.02412491664290428, 0.009369020350277424, 0.022906647995114326, 0.04899173229932785, 0.01023520715534687, 0.0022774694953113794, 7.664388976991177e-05, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.08213489502668381, 0.3905046880245209, 0.07204636186361313, 0.08312273025512695, 0.02625700645148754, 0.02937941811978817, 0.04131421819329262, 0.05289716273546219, 0.16493423283100128, 0.290347158908844, 0.47713640332221985, 0.44352003931999207, 0.11574649810791016, 0.0847686156630516, 0.047198787331581116, 0.1300322264432907, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.056048911064863205, 0.04177262261509895, 0.18134142458438873, 0.04556399583816528, 0.1435631662607193, 0.2900937497615814, 0.07549438625574112, 0.08105770498514175, 0.08377190679311752, 0.011481991037726402, 0.017289845272898674, 0.006863615941256285, 0.013694294728338718, 0.13657283782958984, 0.0735873132944107, 0.3659329116344452, 0.0919225886464119, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.06230737641453743, 0.038521286100149155, 0.05914388969540596, 0.03398321941494942, 0.13657090067863464, 0.19265799224376678, 0.07424072921276093, 0.08660972863435745, 0.10718739032745361, 0.16533604264259338, 0.0767570361495018, 0.03204379230737686, 0.028188396245241165, 0.21943823993206024, 0.11997849494218826, 0.2698959410190582, 0.12308003753423691, 0.45223531126976013, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.18667352199554443, 0.0350969135761261, 0.030425790697336197, 0.0065561928786337376, 0.028277983888983727, 0.010725672356784344, 0.005219776649028063, 0.03378060460090637, 0.04241056367754936, 0.18939200043678284, 0.06338198482990265, 0.08136797696352005, 0.004227515775710344, 0.024540461599826813, 0.057830944657325745, 0.038525767624378204, 0.0177453625947237, 0.06933332234621048, 0.08866386860609055, NaN, NaN, NaN, NaN, NaN, NaN], [0.04736897721886635, 0.0950922816991806, 0.05233628675341606, 0.0639958381652832, 0.009022187441587448, 0.002768130972981453, 0.005348078906536102, 0.016458049416542053, 0.03350484371185303, 0.1584910899400711, 0.3849281072616577, 0.30566492676734924, 0.08282434195280075, 0.02534077689051628, 0.01897522434592247, 0.013481524772942066, 0.08136109262704849, 0.25969398021698, 0.2513872981071472, 0.07361149042844772, NaN, NaN, NaN, NaN, NaN], [0.15279658138751984, 0.09928575158119202, 0.0573631152510643, 0.10790141671895981, 0.026906443759799004, 0.012519991025328636, 0.06774256378412247, 0.1448669582605362, 0.07826853543519974, 0.4991803467273712, 0.34429702162742615, 0.12145370990037918, 0.10719165205955505, 0.008088642731308937, 0.007662023417651653, 0.013441860675811768, 0.13362208008766174, 0.34251537919044495, 0.10342243313789368, 0.07045409828424454, 0.010391364805400372, NaN, NaN, NaN, NaN], [0.1865139603614807, 0.02971193566918373, 0.005512321833521128, 0.039164237678050995, 0.007472363766282797, 0.012969624251127243, 0.03476016968488693, 0.0836154893040657, 0.050758667290210724, 0.017821883782744408, 0.08676476776599884, 0.13045690953731537, 0.03245873004198074, 0.009119128808379173, 7.800521416356787e-05, 0.0006276130443438888, 0.0024839011020958424, 0.06682475656270981, 0.06347990781068802, 0.009879485704004765, 0.0017003080574795604, 6.444661266868934e-05, NaN, NaN, NaN], [0.029208103194832802, 0.15452517569065094, 0.02615012601017952, 0.034968301653862, 0.030517179518938065, 0.023491270840168, 0.02012590691447258, 0.01683984510600567, 0.047155413776636124, 0.1569623053073883, 0.34555378556251526, 0.29876279830932617, 0.06633269041776657, 0.090775266289711, 0.05117363482713699, 0.14964616298675537, 0.024973956868052483, 0.22028914093971252, 0.5953715443611145, 0.10930891335010529, 0.05826140195131302, 0.08348876982927322, 0.2024080604314804, NaN, NaN], [0.023966457694768906, 0.008770916610956192, 0.0534873865544796, 0.015555462799966335, 0.07408829033374786, 0.12750747799873352, 0.026930494233965874, 0.023400133475661278, 0.02665247581899166, 0.00316479685716331, 0.004739005118608475, 0.002742160577327013, 0.006070322822779417, 0.09564805775880814, 0.029174519702792168, 0.5144217014312744, 0.05911846086382866, 0.020064763724803925, 0.0023497287184000015, 0.004584830719977617, 0.10225256532430649, 0.05520752817392349, 0.4466201066970825, 0.09660884737968445, NaN], [0.18986307084560394, 0.036011889576911926, 0.08335232734680176, 0.12826237082481384, 0.08758756518363953, 0.027860891073942184, 0.10198243707418442, 0.0981309786438942, 0.17985263466835022, 0.11864234507083893, 0.08274368196725845, 0.1066904067993164, 0.051979877054691315, 0.06548189371824265, 0.03337343409657478, 0.0824524462223053, 0.012718076817691326, 0.0349668525159359, 0.03024965338408947, 0.01082769688218832, 0.0127665214240551, 0.014164488762617111, 0.01925024762749672, 0.0028478982858359814, 0.0007362329051829875]], [[0.125, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.12774905562400818, 0.07772441953420639, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.058547187596559525, 0.7868303656578064, 0.02677525207400322, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.12958122789859772, 0.05996095389127731, 0.20109553635120392, 0.07473170012235641, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.11586850136518478, 0.18037959933280945, 0.354478657245636, 0.6275972127914429, 0.01217791810631752, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.04329086095094681, 0.2822243273258209, 0.5110569596290588, 0.8230794668197632, 0.28263914585113525, 0.006951561663299799, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.15041278302669525, 0.01652364432811737, 0.09004879742860794, 0.1228649914264679, 0.03705046698451042, 0.03279988467693329, 0.012472960166633129, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.005692727863788605, 0.004583822097629309, 0.011303454637527466, 0.06351188570261002, 0.07110948860645294, 0.03377191722393036, 0.8937738537788391, 0.1077374666929245, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.1957636922597885, 0.00532554043456912, 0.2672942280769348, 0.07843183726072311, 0.01169322058558464, 0.006695515010505915, 0.022856300696730614, 0.03495524823665619, 0.2056257426738739, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.21948350965976715, 0.003219911362975836, 0.13064762949943542, 0.017335020005702972, 0.004487968049943447, 0.006097455509006977, 0.0023269150406122208, 0.014221499674022198, 0.1740167737007141, 0.05570632219314575, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.027252521365880966, 0.05625513195991516, 0.024279700592160225, 0.009296371601521969, 0.04113621264696121, 0.04445572942495346, 0.05016031116247177, 0.300394743680954, 0.219209223985672, 0.5284181833267212, 0.13528388738632202, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.16918426752090454, 0.005196947604417801, 0.010393726639449596, 0.0008839815272949636, 0.18853645026683807, 0.23955073952674866, 0.03703731670975685, 0.018581384792923927, 0.07692746073007584, 0.05213537812232971, 0.05520249530673027, 0.03837481513619423, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.21910618245601654, 0.012340836226940155, 0.011061819270253181, 0.004421355202794075, 0.01345156505703926, 0.015948239713907242, 0.001919197733514011, 0.0006712953327223659, 0.0014401280786842108, 0.0009498890140093863, 0.0011606297921389341, 0.0013843519845977426, 0.005138876382261515, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.12592341005802155, 0.022789308801293373, 0.01544136367738247, 0.05098855495452881, 0.006733328104019165, 0.0011512627825140953, 0.0067494111135602, 0.03519098460674286, 0.08756479620933533, 0.04847756400704384, 0.13774195313453674, 0.07365753501653671, 0.19525301456451416, 0.019442297518253326, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.04374772310256958, 0.10635814815759659, 0.1203576922416687, 0.4972172677516937, 0.09716533124446869, 0.05867829546332359, 0.13453392684459686, 0.39353471994400024, 0.6331138610839844, 0.33491814136505127, 0.5983138680458069, 0.3633559048175812, 0.6357010006904602, 0.7792285084724426, 0.005659972317516804, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.05199728533625603, 0.014302223920822144, 0.13574257493019104, 0.05407930538058281, 0.010633953846991062, 0.007459194865077734, 0.0004102779785171151, 0.01107444055378437, 0.16451390087604523, 0.19313758611679077, 0.018386593088507652, 0.03492085263133049, 0.1390746384859085, 0.6526300311088562, 0.08304706960916519, 0.27643677592277527, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.0008206118363887072, 0.0011099595576524734, 0.0005428412696346641, 0.0013029578840360045, 0.0009422241128049791, 0.001036918954923749, 0.00015340711979661137, 0.003300317795947194, 0.0019372785463929176, 0.003245894331485033, 0.0010756017873063684, 0.0009867959888651967, 0.04242069274187088, 0.25679609179496765, 0.03714281693100929, 0.46563825011253357, 0.052469443529844284, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.0011551693314686418, 0.0015016108518466353, 0.00018865184392780066, 0.0004620797117240727, 0.001353209256194532, 0.001276124152354896, 0.001269699539989233, 0.02504812367260456, 0.016660472378134727, 0.007664685603231192, 0.000621759332716465, 0.0039494638331234455, 0.05373308062553406, 0.5797222256660461, 0.04267296567559242, 0.3308492600917816, 0.22605444490909576, 0.03655111417174339, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.18345873057842255, 0.006115049123764038, 0.007153322920203209, 0.00125643250066787, 0.15791349112987518, 0.17755654454231262, 0.06167090684175491, 0.028255566954612732, 0.04990806803107262, 0.014394938945770264, 0.013118196278810501, 0.02539716847240925, 0.00894339382648468, 0.04024626687169075, 0.05642623454332352, 0.04561464861035347, 0.029457826167345047, 0.09210912138223648, 0.1002524197101593, NaN, NaN, NaN, NaN, NaN, NaN], [0.2828649580478668, 0.011994204483926296, 0.006339475512504578, 0.0030444697476923466, 0.006948052905499935, 0.008767204359173775, 0.0014567734906449914, 0.00018795454525388777, 0.00020330831466708332, 0.0001539710647193715, 0.0004007722018286586, 0.0012242270167917013, 0.001961026806384325, 0.0007920600473880768, 0.002005743095651269, 0.00011892847396666184, 0.00023868663993198425, 0.0018499011639505625, 0.002196513582020998, 0.004604275804013014, NaN, NaN, NaN, NaN, NaN], [0.128562331199646, 0.014782274141907692, 0.007007280830293894, 0.02549830637872219, 0.0029198189731687307, 0.0006880113505758345, 0.0037798655685037374, 0.009390356950461864, 0.008127862587571144, 0.00817851535975933, 0.024966517463326454, 0.0308842696249485, 0.07813727855682373, 0.003280356992036104, 0.001509596244432032, 0.010023933835327625, 0.08412036299705505, 0.1339937299489975, 0.13076454401016235, 0.2572615444660187, 0.02603374607861042, NaN, NaN, NaN, NaN], [0.018602287396788597, 0.034721970558166504, 0.034974802285432816, 0.21532808244228363, 0.037075310945510864, 0.013384592719376087, 0.039282385259866714, 0.11046459525823593, 0.17542847990989685, 0.05914776027202606, 0.1884417086839676, 0.12911023199558258, 0.24417443573474884, 0.327198326587677, 0.0006843891460448503, 0.1527024656534195, 0.4776603579521179, 0.37270504236221313, 0.4335513412952423, 0.6841917634010315, 0.8031085133552551, 0.004920803010463715, NaN, NaN, NaN], [0.05855157971382141, 0.021276630461215973, 0.13662834465503693, 0.05244326964020729, 0.015041220933198929, 0.007642571348696947, 0.00036013865610584617, 0.004098850768059492, 0.033856965601444244, 0.05778159946203232, 0.005442364141345024, 0.017580043524503708, 0.04633626714348793, 0.3112163841724396, 0.03644357994198799, 0.0868009626865387, 0.020123973488807678, 0.03773906081914902, 0.06257405877113342, 0.2619801461696625, 0.7497928738594055, 0.19582624733448029, 0.4370352327823639, NaN, NaN], [0.0006882869056425989, 0.0005033394554629922, 0.00030677669565193355, 0.001028614118695259, 0.00036578672006726265, 0.0005035633221268654, 5.2447539928834885e-05, 0.0006442382582463324, 0.0003597578906919807, 0.0002600657753646374, 8.536354289390147e-05, 0.00018848010222427547, 0.00940172839909792, 0.03475101292133331, 0.004768407437950373, 0.09523987770080566, 0.0036924693267792463, 0.0034024319611489773, 0.001987446565181017, 0.06484154611825943, 0.36614781618118286, 0.06470755487680435, 0.48020803928375244, 0.12385622411966324, NaN], [0.13044977188110352, 0.023216107860207558, 0.019304566085338593, 0.018173998221755028, 0.12614674866199493, 0.04656239226460457, 0.015089727938175201, 0.04114385321736336, 0.018700774759054184, 0.020505733788013458, 0.009310846216976643, 0.02222343534231186, 0.22412429749965668, 0.3900958001613617, 0.1100122332572937, 0.14125461876392365, 0.09716113656759262, 0.14588865637779236, 0.12185929715633392, 0.5472521185874939, 0.7197717428207397, 0.31834876537323, 0.37092098593711853, 0.2838878929615021, 0.0011011400492861867]]], [[[0.125, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.16810710728168488, 0.017288343980908394, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.12647151947021484, 0.25301796197891235, 0.03169602155685425, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.15976493060588837, 0.03159531578421593, 0.05609510838985443, 0.007400199305266142, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.16021955013275146, 0.26433131098747253, 0.07329617440700531, 0.11257290840148926, 0.001577433431521058, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.22870834171772003, 0.043985288590192795, 0.04075293987989426, 0.0035545979626476765, 0.0075324228964746, 0.00014864112017676234, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.047688793390989304, 0.14664201438426971, 0.03658692538738251, 0.6408759355545044, 0.43873438239097595, 0.20478755235671997, 0.00511742290109396, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.07761336117982864, 0.07061085104942322, 0.041570939123630524, 0.1916733682155609, 0.159084752202034, 0.3477410674095154, 0.5968326330184937, 0.004175147507339716, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.07191380113363266, 0.05497179180383682, 0.3517811894416809, 0.9035707116127014, 0.14233137667179108, 0.1767667979001999, 0.04289708659052849, 0.00892895832657814, 0.001834895578213036, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.21536989510059357, 0.19956108927726746, 0.3517906069755554, 0.458966463804245, 0.09842110425233841, 0.08277469873428345, 0.03296331316232681, 0.04812879115343094, 0.009344152174890041, 0.006280441302806139, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.24051256477832794, 0.10134825110435486, 0.04672827199101448, 0.021085558459162712, 0.02245912328362465, 0.026835136115550995, 0.005604758393019438, 0.028772464022040367, 0.01708872988820076, 0.008745603263378143, 0.02540087327361107, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.18141932785511017, 0.024432087317109108, 0.0408032201230526, 0.004596539307385683, 0.0778040885925293, 0.025828123092651367, 0.04467899724841118, 0.0885351300239563, 0.026468785479664803, 0.030213410034775734, 0.16925157606601715, 0.003915028180927038, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.0821177139878273, 0.0264634620398283, 0.01841210387647152, 0.010007970035076141, 0.006691556889563799, 0.0167625043541193, 0.0005595253896899521, 0.020632673054933548, 0.0021230748388916254, 0.10790054500102997, 0.5654488801956177, 0.3003200888633728, 0.01571945659816265, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.0726943239569664, 0.09770844131708145, 0.050709616392850876, 0.04594658315181732, 0.009083828888833523, 0.024983327835798264, 0.021837929263710976, 0.11926575750112534, 0.11382617056369781, 0.22249171137809753, 0.3826439678668976, 0.22458447515964508, 0.24531354010105133, 0.05176876112818718, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.28158777952194214, 0.045097555965185165, 0.02117414027452469, 0.05809389799833298, 0.0014524150174111128, 0.006964406464248896, 0.010582090355455875, 0.011965163983404636, 0.02265000529587269, 0.020484870299696922, 0.019729144871234894, 0.028731632977724075, 0.004907289054244757, 0.0051048253662884235, 0.00039794077747501433, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.18024474382400513, 0.03336771950125694, 0.025161737576127052, 0.03788529708981514, 0.010167604312300682, 0.0039537386037409306, 3.701886089402251e-05, 0.046124417334795, 0.08654022216796875, 0.06664562225341797, 0.11276466399431229, 0.09791301190853119, 0.08758807182312012, 0.277656227350235, 0.5478507876396179, 0.06896418333053589, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.10793236643075943, 0.04864804446697235, 0.0019557650666683912, 0.14817607402801514, 0.0378977507352829, 0.049347102642059326, 0.0036467635072767735, 0.0038541490212082863, 0.0034904496278613806, 0.0012115711579099298, 0.047197386622428894, 0.05697714909911156, 0.11328870058059692, 0.8784908056259155, 0.019691603258252144, 0.23420120775699615, 0.004765921737998724, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.1524984985589981, 0.08107080310583115, 0.005865868646651506, 0.00971321389079094, 0.007243088912218809, 0.011549782939255238, 0.00268083019182086, 0.03457775339484215, 0.0031127233523875475, 0.000510410696733743, 0.009807620197534561, 0.008875550702214241, 0.023541534319519997, 0.527433454990387, 0.015368063934147358, 0.16288210451602936, 0.20708848536014557, 0.014573587104678154, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.16305263340473175, 0.020936982706189156, 0.020989498123526573, 0.007437185384333134, 0.034894589334726334, 0.016221558675169945, 0.04928300529718399, 0.02460765466094017, 0.006940784398466349, 0.010303718037903309, 0.11923910677433014, 0.002430608496069908, 0.020191287621855736, 0.019723495468497276, 0.015607062727212906, 0.14493703842163086, 0.29023703932762146, 0.2954525649547577, 0.024419967085123062, NaN, NaN, NaN, NaN, NaN, NaN], [0.04235544800758362, 0.014461617916822433, 0.006770138628780842, 0.009241613559424877, 0.002999901305884123, 0.0037356300745159388, 0.00043396188993938267, 0.005936506669968367, 0.00027135247364640236, 0.00836905650794506, 0.38652852177619934, 0.1805782914161682, 0.00859912484884262, 0.13720881938934326, 0.026457296684384346, 0.044793374836444855, 0.41905051469802856, 0.48846107721328735, 0.271888792514801, 0.02787640690803528, NaN, NaN, NaN, NaN, NaN], [0.03824670985341072, 0.05110237002372742, 0.016365332528948784, 0.027689939364790916, 0.004054062534123659, 0.0016762956511229277, 0.0059990487061440945, 0.061629924923181534, 0.02193543128669262, 0.004144957754760981, 0.11336920410394669, 0.0855039581656456, 0.16943661868572235, 0.007511935196816921, 0.0029296777211129665, 0.005633122753351927, 0.04470856487751007, 0.19621509313583374, 0.1449754536151886, 0.4407651424407959, 0.012849990278482437, NaN, NaN, NaN, NaN], [0.29710885882377625, 0.04157622903585434, 0.022785142064094543, 0.06820578873157501, 0.0019051277777180076, 0.004196317866444588, 0.012664434500038624, 0.010533612221479416, 0.00958634540438652, 0.006948783528059721, 0.024731770157814026, 0.04424457997083664, 0.0092665059491992, 0.008317369967699051, 0.00025302590802311897, 0.03921425715088844, 0.024433301761746407, 0.005475904326885939, 0.02041386440396309, 0.005526822991669178, 0.006030899006873369, 0.000147900907904841, NaN, NaN, NaN], [0.15116539597511292, 0.029300624504685402, 0.014213098213076591, 0.04858435317873955, 0.008192096836864948, 0.0029929669108241796, 0.00010039177868748084, 0.02851700410246849, 0.014845605008304119, 0.01335279829800129, 0.07330357283353806, 0.08230004459619522, 0.06801280379295349, 0.12962418794631958, 0.38807213306427, 0.021973537281155586, 0.0005578201962634921, 0.13413770496845245, 0.18835364282131195, 0.15109674632549286, 0.5815849900245667, 0.6008182764053345, 0.10515720397233963, NaN, NaN], [0.05911188945174217, 0.013889956288039684, 0.00048160224105231464, 0.10393460839986801, 0.009916743263602257, 0.013972792774438858, 0.0005543273873627186, 0.0008135904208756983, 0.0005866698920726776, 0.00012856724788434803, 0.016669562086462975, 0.022332170978188515, 0.03126570209860802, 0.39481881260871887, 0.0021035531535744667, 0.09696949273347855, 0.0003469766234047711, 0.012058700434863567, 0.1351245492696762, 0.1276140809059143, 0.8529128432273865, 0.013427066616714, 0.3029053509235382, 0.0016288348706439137, NaN], [0.22241219878196716, 0.00997188687324524, 0.004307668190449476, 0.0318865031003952, 0.026490027084946632, 0.04937301576137543, 0.016565896570682526, 0.0013930558925494552, 0.01958940364420414, 0.015218929387629032, 0.1830211728811264, 0.11458480358123779, 0.1729872077703476, 0.047152113169431686, 0.017883911728858948, 0.118315190076828, 0.07728181034326553, 0.31889867782592773, 0.1497264951467514, 0.2596881091594696, 0.15263305604457855, 0.024473916739225388, 0.19167250394821167, 0.12363447993993759, 0.010316992178559303]], [[0.125, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.12855423986911774, 0.11611904203891754, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.1812644749879837, 0.04049589857459068, 0.04480821266770363, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.14001408219337463, 0.11702272295951843, 0.5616602897644043, 0.021032487973570824, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.17309650778770447, 0.011261633597314358, 0.0023054813500493765, 0.0014516497030854225, 0.17103753983974457, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.21775518357753754, 0.1599237471818924, 0.031671781092882156, 0.0027859890833497047, 0.1030324175953865, 0.009803196415305138, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.1265520304441452, 0.2245447188615799, 0.3357183039188385, 0.19591355323791504, 0.030100535601377487, 0.11038237810134888, 0.012957160361111164, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.12113019824028015, 0.07331034541130066, 0.073086217045784, 0.038516201078891754, 0.16168329119682312, 0.12152494490146637, 0.1929183006286621, 0.11648087203502655, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.15162895619869232, 0.16000056266784668, 0.47010278701782227, 0.008242717012763023, 0.016423694789409637, 0.19619418680667877, 0.014187236316502094, 0.2187093049287796, 0.3917299807071686, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.1371021270751953, 0.24055053293704987, 0.39826682209968567, 0.0653936043381691, 0.06886317580938339, 0.1729464828968048, 0.02453671395778656, 0.2748231589794159, 0.23215962946414948, 0.03306089714169502, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.05615014582872391, 0.17226241528987885, 0.4426397681236267, 0.534454345703125, 0.0034056571312248707, 0.0038566330913454294, 0.24011781811714172, 0.31882721185684204, 0.4456172287464142, 0.1489524245262146, 0.03087311051785946, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.037336766719818115, 0.065662682056427, 0.18869149684906006, 0.795316219329834, 0.14649540185928345, 0.021824514493346214, 0.13452036678791046, 0.026823654770851135, 0.35548609495162964, 0.18523786962032318, 0.020790524780750275, 0.09485815465450287, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.17983746528625488, 0.09746579825878143, 0.46259593963623047, 0.706605851650238, 0.09193093329668045, 0.2823830544948578, 0.007526541594415903, 0.10234087705612183, 0.24847157299518585, 0.2038285881280899, 0.012590465135872364, 0.002493936335667968, 0.04428662359714508, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.1421777307987213, 0.23310348391532898, 0.2705342471599579, 0.5351002812385559, 0.02795390971004963, 0.06031421944499016, 0.012775074690580368, 0.20022329688072205, 0.6570897698402405, 0.2668534517288208, 0.033325545489788055, 0.023841219022870064, 0.1455993354320526, 0.03172359615564346, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.11665362864732742, 0.1886645257472992, 0.03897944837808609, 0.07137740403413773, 0.15634050965309143, 0.15400150418281555, 0.13745756447315216, 0.05537642911076546, 0.2729690372943878, 0.04749782383441925, 0.05948880687355995, 0.014797642827033997, 0.11365658044815063, 0.002582019427791238, 0.20324750244617462, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.29635345935821533, 0.04781435802578926, 0.41243496537208557, 0.03004680573940277, 0.13952067494392395, 0.045467544347047806, 4.634694050764665e-05, 0.20948387682437897, 0.002634957665577531, 0.005124728661030531, 0.0019075855379924178, 0.0009838729165494442, 0.0013485344825312495, 0.004148871172219515, 0.03574635088443756, 0.23113909363746643, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.22071197628974915, 0.019423967227339745, 0.06694509834051132, 0.2386176735162735, 0.015943216159939766, 0.14270655810832977, 0.039743710309267044, 0.014324809424579144, 0.581375777721405, 0.040944233536720276, 0.011615565046668053, 0.02482481673359871, 0.06486763060092926, 0.002298883395269513, 0.009274494834244251, 0.012798607349395752, 0.009606687352061272, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.04979729279875755, 0.005993144121021032, 0.05621323734521866, 0.3196869492530823, 0.0036542851012200117, 0.006608159281313419, 0.07202935218811035, 0.023804083466529846, 0.08581908792257309, 0.002907529706135392, 0.0022882334887981415, 0.155064657330513, 0.6752456426620483, 0.19066885113716125, 0.033486951142549515, 0.1545412391424179, 0.3257397711277008, 0.07836033403873444, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.02027127519249916, 0.036089565604925156, 0.0908525288105011, 0.6094546914100647, 0.035198476165533066, 0.01578100211918354, 0.08828305453062057, 0.00740778585895896, 0.08938029408454895, 0.055872198194265366, 0.01406459603458643, 0.05842210724949837, 0.7085317969322205, 0.04043729975819588, 0.00861792266368866, 0.05839632451534271, 0.306302547454834, 0.11257344484329224, 0.09490343183279037, NaN, NaN, NaN, NaN, NaN, NaN], [0.2219613641500473, 0.0726998969912529, 0.3657586872577667, 0.6172192692756653, 0.07194076478481293, 0.17607101798057556, 0.009873087517917156, 0.09032700955867767, 0.1240842267870903, 0.06592906266450882, 0.021971723064780235, 0.004476875066757202, 0.04292584955692291, 0.013240871019661427, 0.03868407383561134, 0.0364602766931057, 0.007298360578715801, 0.02817610278725624, 0.0009550384129397571, 0.033005379140377045, NaN, NaN, NaN, NaN, NaN], [0.2832254469394684, 0.40537261962890625, 0.25111812353134155, 0.4335843026638031, 0.05173255130648613, 0.02949104830622673, 0.00834138598293066, 0.5043417811393738, 0.45271721482276917, 0.10732957720756531, 0.08741836994886398, 0.06616821885108948, 0.1252485066652298, 0.04288535565137863, 0.0027607728261500597, 0.11496254801750183, 0.007436650805175304, 0.04789961501955986, 0.014611729420721531, 0.05419020354747772, 0.013982507400214672, NaN, NaN, NaN, NaN], [0.1133793368935585, 0.2190774381160736, 0.04727642610669136, 0.08785698562860489, 0.22799502313137054, 0.1395695060491562, 0.17899513244628906, 0.05776361748576164, 0.19579172134399414, 0.03426501154899597, 0.08577524870634079, 0.027239171788096428, 0.22711482644081116, 0.005856664851307869, 0.3394412696361542, 0.03666312247514725, 0.053877539932727814, 0.02460121363401413, 0.02095765992999077, 0.08733106404542923, 0.0007995758787728846, 0.19509249925613403, NaN, NaN, NaN], [0.32134389877319336, 0.08582156896591187, 0.36053547263145447, 0.06279635429382324, 0.1449708491563797, 0.041098933666944504, 0.0002254477294627577, 0.3326246738433838, 0.0031729326583445072, 0.011426791548728943, 0.00305219367146492, 0.0021134610287845135, 0.0029090954922139645, 0.0035086346324533224, 0.0884322077035904, 0.7275413274765015, 4.6366836613742635e-05, 0.004567307885736227, 0.00048746803076937795, 0.0006845259922556579, 0.00036436106893233955, 0.0336419902741909, 0.19370199739933014, NaN, NaN], [0.2431764006614685, 0.00993723887950182, 0.023469794541597366, 0.12711890041828156, 0.013049022294580936, 0.09880916029214859, 0.014819139614701271, 0.015189954079687595, 0.19677633047103882, 0.012298321351408958, 0.006653454154729843, 0.017306946218013763, 0.044382814317941666, 0.005554118659347296, 0.008197239600121975, 0.025704391300678253, 0.01238576602190733, 0.005520223639905453, 0.018611198291182518, 0.07344726473093033, 0.00026948421145789325, 0.012129159644246101, 0.01222553662955761, 0.005697384011000395, NaN], [0.018590128049254417, 0.012204503640532494, 0.0029425490647554398, 0.01610950194299221, 0.024503106251358986, 0.04006015509366989, 0.018976394087076187, 0.006591797806322575, 0.002320006489753723, 0.001339062349870801, 0.028667215257883072, 0.03959575667977333, 0.00960585381835699, 0.009797154925763607, 0.022796805948019028, 0.1637655347585678, 0.20084494352340698, 0.05620957538485527, 0.12549559772014618, 0.022888751700520515, 0.037492163479328156, 0.04711981862783432, 0.44462573528289795, 0.3949664235115051, 0.3300856053829193]], [[0.125, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.16815106570720673, 0.017178548499941826, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.2022658735513687, 0.005017802584916353, 0.01763225719332695, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.16166983544826508, 0.033678483217954636, 0.014520054683089256, 0.003462842432782054, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.10712886601686478, 0.3422684967517853, 0.05748933553695679, 0.2768969237804413, 0.004922540858387947, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.047501806169748306, 0.48201972246170044, 0.4827657639980316, 0.48466482758522034, 0.022285524755716324, 0.00022009640815667808, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.1517350822687149, 0.04445230960845947, 0.09343461692333221, 0.05873756855726242, 0.07171032577753067, 0.22849556803703308, 0.05614512786269188, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.25680339336395264, 0.00010820403986144811, 0.0123103903606534, 0.007049524690955877, 0.001952940714545548, 0.027401963248848915, 0.0028134624008089304, 0.00041907382546924055, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.005559808574616909, 0.007462772540748119, 0.013313480652868748, 0.017376750707626343, 0.0038542840629816055, 0.006728595122694969, 0.5333897471427917, 0.03155524656176567, 0.15571120381355286, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.004124458413571119, 0.004751718603074551, 0.016015900298953056, 0.01742120459675789, 0.032125748693943024, 0.010460411198437214, 0.45809611678123474, 0.07138781994581223, 0.5171095728874207, 0.17626723647117615, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.24881334602832794, 0.005821824539452791, 0.031170587986707687, 0.009853766299784184, 0.027254868298768997, 0.01885347068309784, 0.02900754101574421, 0.013663586229085922, 0.012090054340660572, 0.0009272377355955541, 0.0030740045476704836, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.19627800583839417, 0.054823894053697586, 0.1886557787656784, 0.00739922234788537, 0.09451853483915329, 0.01572227105498314, 0.0010023268405348063, 0.0061036646366119385, 0.0014733865391463041, 0.0003654434985946864, 0.006776102818548679, 0.0027319795917719603, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.07900664210319519, 0.04510375112295151, 0.002657376928254962, 0.0032053724862635136, 0.0027717212215065956, 0.008140889927744865, 0.0011833005119115114, 0.04105996713042259, 0.0017470002640038729, 0.008194361813366413, 0.019470002502202988, 0.3834601640701294, 0.013146632350981236, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.06578069925308228, 0.08975866436958313, 0.022234706208109856, 0.015388325788080692, 0.006578383035957813, 0.011582762002944946, 0.014906905591487885, 0.04645423963665962, 0.008417387492954731, 0.0318351611495018, 0.024524353444576263, 0.5050408244132996, 0.1078883558511734, 0.09876319766044617, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.010224410332739353, 0.16048979759216309, 0.09242240339517593, 0.259725958108902, 0.06779038906097412, 0.007232773117721081, 0.09601377695798874, 0.28109633922576904, 0.2723717987537384, 0.1275584101676941, 0.06318827718496323, 0.25179460644721985, 0.2496732771396637, 0.6837621927261353, 0.0018262360244989395, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.04991341754794121, 0.05319196358323097, 0.14821480214595795, 0.020963814109563828, 0.03095317631959915, 0.024693654850125313, 0.008621936663985252, 0.14259999990463257, 0.042305052280426025, 0.09002435952425003, 0.005839803721755743, 0.061309609562158585, 0.23589004576206207, 0.30903181433677673, 0.18008928000926971, 0.49815359711647034, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.015294999815523624, 0.03185835853219032, 0.0202027577906847, 0.03976168856024742, 0.0711589902639389, 0.13473857939243317, 0.0059967683628201485, 0.0031582280062139034, 0.003374348394572735, 0.002362155122682452, 0.015532899647951126, 0.038825590163469315, 0.08611883223056793, 0.03844507411122322, 0.009673628956079483, 0.7068554162979126, 0.013729983940720558, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.2531464695930481, 0.013071080669760704, 0.035546887665987015, 0.020458703860640526, 0.01740572415292263, 0.009577612392604351, 0.014396607875823975, 0.05952044576406479, 0.013841827400028706, 0.0003843819722533226, 0.0024746267590671778, 0.007157978601753712, 0.013787134550511837, 0.033782534301280975, 0.003469215938821435, 0.007898973301053047, 0.05525756999850273, 0.003914556000381708, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.20273520052433014, 0.05025332421064377, 0.2335304319858551, 0.009442931972444057, 0.13508503139019012, 0.0181263517588377, 0.0010557285277172923, 0.003822105238214135, 0.0018545370548963547, 0.0003744752029888332, 0.0046313730999827385, 0.0008518796530552208, 0.006319030188024044, 0.014203540980815887, 0.0018540708115324378, 0.003058186499401927, 0.002516325796023011, 0.001575352856889367, 0.0014869269216433167, NaN, NaN, NaN, NaN, NaN, NaN], [0.059709664434194565, 0.021975213661789894, 0.002582199638709426, 0.002308695577085018, 0.00240446999669075, 0.004605048336088657, 0.0013587460853159428, 0.04497997462749481, 0.0009150391560979187, 0.0030208472162485123, 0.016492530703544617, 0.2572183907032013, 0.006429646629840136, 0.013558420352637768, 0.06110598146915436, 0.03728436306118965, 0.019318275153636932, 0.03907725587487221, 0.4492114782333374, 0.01579420454800129, NaN, NaN, NaN, NaN, NaN], [0.025836847722530365, 0.04185229912400246, 0.017175624147057533, 0.005038154777139425, 0.006518983747810125, 0.0043221269734203815, 0.004393702372908592, 0.03134007006883621, 0.002082354621961713, 0.00246719503775239, 0.00855192355811596, 0.28023120760917664, 0.0558621920645237, 0.020582975819706917, 0.00264686718583107, 0.052114877849817276, 0.01051351334899664, 0.0282430537045002, 0.640393853187561, 0.11605942994356155, 0.042242906987667084, NaN, NaN, NaN, NaN], [0.00790853425860405, 0.07249781489372253, 0.09275110065937042, 0.13612288236618042, 0.0654025748372078, 0.0028184219263494015, 0.039562828838825226, 0.11378230899572372, 0.08281006664037704, 0.029445864260196686, 0.03387679159641266, 0.16786670684814453, 0.2288694977760315, 0.6801032423973083, 0.0008468713494949043, 0.32477572560310364, 0.20243169367313385, 0.04291461780667305, 0.2565927505493164, 0.2435160130262375, 0.8255255222320557, 0.0008029205491766334, NaN, NaN, NaN], [0.06791312247514725, 0.034157127141952515, 0.26634278893470764, 0.01933334954082966, 0.08246968686580658, 0.03419587388634682, 0.019395295530557632, 0.1259232461452484, 0.02923283353447914, 0.07644251734018326, 0.00482177222147584, 0.03381035849452019, 0.2429695725440979, 0.4201262295246124, 0.21319957077503204, 0.1469077318906784, 0.005101305432617664, 0.05322602018713951, 0.08754345029592514, 0.4596864581108093, 0.32625797390937805, 0.2286616712808609, 0.6285872459411621, NaN, NaN], [0.0236026793718338, 0.032931454479694366, 0.018642868846654892, 0.052601076662540436, 0.09147398918867111, 0.11555580049753189, 0.00512799434363842, 0.006684163119643927, 0.005264784675091505, 0.0023014512844383717, 0.005628940649330616, 0.03778252378106117, 0.09737572073936462, 0.12753169238567352, 0.00698094442486763, 0.6853439807891846, 0.02319822832942009, 0.018658116459846497, 0.08199534565210342, 0.18709556758403778, 0.07321563363075256, 0.027500100433826447, 0.6534799337387085, 0.01572287082672119, NaN], [0.24674107134342194, 0.007728901691734791, 0.010779940523207188, 0.01413859985768795, 0.08573849499225616, 0.014258946292102337, 0.014431791380047798, 0.00199147523380816, 0.006254997570067644, 0.003036148613318801, 0.015209752134978771, 0.015118316747248173, 0.05811062082648277, 0.01987045258283615, 0.012226228602230549, 0.021392136812210083, 0.08141177892684937, 0.016042163595557213, 0.01565614528954029, 0.05352389067411423, 0.01607833430171013, 0.014641694724559784, 0.020306598395109177, 0.06722531467676163, 0.005379782523959875]], [[0.125, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.147435262799263, 0.06894105672836304, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.18660759925842285, 0.013697005808353424, 0.050341442227363586, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.14907698333263397, 0.12682567536830902, 0.14014844596385956, 0.024977339431643486, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.20074230432510376, 0.11179281026124954, 0.012457489967346191, 0.01455892063677311, 0.011106430552899837, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.20768699049949646, 0.16985096037387848, 0.19526726007461548, 0.016829432919621468, 0.05647609382867813, 0.022808711975812912, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.14349573850631714, 0.41078659892082214, 0.5100967288017273, 0.04046756774187088, 0.2924310266971588, 0.07987978309392929, 0.007180717773735523, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.11146429926156998, 0.3579395115375519, 0.7730652093887329, 0.5723751783370972, 0.2817910611629486, 0.25461745262145996, 0.060240793973207474, 0.08399515599012375, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.13904383778572083, 0.44345301389694214, 0.1345542073249817, 0.05706587806344032, 0.7818705439567566, 0.04436418041586876, 0.015915511175990105, 0.31926584243774414, 0.26167550683021545, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.12236351519823074, 0.40148651599884033, 0.12099923938512802, 0.38539087772369385, 0.6352627873420715, 0.0574735552072525, 0.027495326474308968, 0.25199854373931885, 0.07788273692131042, 0.1824284791946411, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.0776049941778183, 0.26076433062553406, 0.12800094485282898, 0.15216867625713348, 0.36678510904312134, 0.31404268741607666, 0.13151897490024567, 0.1709745228290558, 0.2591820955276489, 0.18929390609264374, 0.08235450834035873, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.08287283033132553, 0.26698997616767883, 0.29562729597091675, 0.13922370970249176, 0.3693794012069702, 0.22139106690883636, 0.612119734287262, 0.1618482619524002, 0.40734153985977173, 0.10604425519704819, 0.2217203825712204, 0.14197519421577454, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.0676846131682396, 0.5803259611129761, 0.47128230333328247, 0.2430339902639389, 0.43893957138061523, 0.5822793245315552, 0.9563859105110168, 0.5092246532440186, 0.7397804260253906, 0.6675750613212585, 0.2242172360420227, 0.046741336584091187, 0.09371624141931534, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.16273218393325806, 0.4245251417160034, 0.44257473945617676, 0.1064363345503807, 0.22264361381530762, 0.638583779335022, 0.7456080913543701, 0.17856015264987946, 0.09681503474712372, 0.3901955187320709, 0.4154786765575409, 0.10903800278902054, 0.0281606987118721, 0.027353502810001373, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.2541956901550293, 0.2554672658443451, 0.13483673334121704, 0.33163735270500183, 0.11067650467157364, 0.3400806486606598, 0.4272999167442322, 0.2955835163593292, 0.293487548828125, 0.2820315957069397, 0.17141510546207428, 0.08369391411542892, 0.012903732247650623, 0.010530934669077396, 0.015047149732708931, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.07456009835004807, 0.09125705808401108, 0.20381297171115875, 0.09053967893123627, 0.6734579801559448, 0.8927901983261108, 0.9854956865310669, 0.19160649180412292, 0.848483681678772, 0.3795100748538971, 0.0351644828915596, 0.06069617718458176, 0.0190274715423584, 0.13319239020347595, 0.1618155688047409, 0.029784632846713066, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.13663174211978912, 0.5250937938690186, 0.20416004955768585, 0.37758082151412964, 0.7281314134597778, 0.24714940786361694, 0.006291824858635664, 0.029336191713809967, 0.258807897567749, 0.17944614589214325, 0.2768983840942383, 0.49996671080589294, 0.6760725975036621, 0.0684136375784874, 0.9500845074653625, 0.04427658021450043, 0.027829600498080254, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.05520259216427803, 0.4062710404396057, 0.11698392778635025, 0.09814880043268204, 0.8328142166137695, 0.46247926354408264, 0.07190129905939102, 0.3418641984462738, 0.14486591517925262, 0.025201991200447083, 0.042143724858760834, 0.4074908196926117, 0.1494714319705963, 0.17342594265937805, 0.908286988735199, 0.5950636863708496, 0.14296366274356842, 0.20851416885852814, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.08497714251279831, 0.5087416172027588, 0.4508724510669708, 0.33144411444664, 0.600685715675354, 0.523800790309906, 0.4743403494358063, 0.10964386910200119, 0.6009643077850342, 0.29714730381965637, 0.1661888062953949, 0.10026849061250687, 0.19036318361759186, 0.07889659702777863, 0.29447081685066223, 0.5917950868606567, 0.05482999235391617, 0.0994495078921318, 0.08629819005727768, NaN, NaN, NaN, NaN, NaN, NaN], [0.04716389998793602, 0.6635201573371887, 0.5744545459747314, 0.33429521322250366, 0.755266010761261, 0.7800281643867493, 0.9541771411895752, 0.5776658058166504, 0.8714791536331177, 0.9158549308776855, 0.2818737030029297, 0.06938906759023666, 0.10379814356565475, 0.3064776659011841, 0.7474142909049988, 0.7715258002281189, 0.37782159447669983, 0.057383324950933456, 0.013433223590254784, 0.03400390222668648, NaN, NaN, NaN, NaN, NaN], [0.1486319750547409, 0.22267495095729828, 0.42902871966362, 0.07982667535543442, 0.5459871888160706, 0.9060689210891724, 0.8350642919540405, 0.10920917987823486, 0.4773065447807312, 0.7826967239379883, 0.5733710527420044, 0.26356616616249084, 0.040332335978746414, 0.031653065234422684, 0.8572309613227844, 0.5636150240898132, 0.07464684545993805, 0.03465104475617409, 0.03009859099984169, 0.008700854144990444, 0.005375253036618233, NaN, NaN, NaN, NaN], [0.25873932242393494, 0.5196211338043213, 0.3300914764404297, 0.5837901830673218, 0.4101006090641022, 0.7175306677818298, 0.6572118401527405, 0.6919461488723755, 0.6594171524047852, 0.7066829204559326, 0.46555259823799133, 0.3380126953125, 0.05317035689949989, 0.053740378469228745, 0.031323984265327454, 0.30507126450538635, 0.1422475129365921, 0.03319966048002243, 0.08714800328016281, 0.01252773217856884, 0.006611488293856382, 0.007115270011126995, NaN, NaN, NaN], [0.011579165235161781, 0.05381239950656891, 0.044945720583200455, 0.035533830523490906, 0.6624263525009155, 0.8997865319252014, 0.9679857492446899, 0.17051655054092407, 0.940772533416748, 0.6132625341415405, 0.01721411757171154, 0.04632151871919632, 0.010550450533628464, 0.08354383707046509, 0.12839946150779724, 0.02755529060959816, 0.44050073623657227, 0.04286862909793854, 0.01342833787202835, 0.003870438551530242, 0.026607532054185867, 0.02663758397102356, 0.005111980251967907, NaN, NaN], [0.13300661742687225, 0.5851269960403442, 0.20284885168075562, 0.5700805187225342, 0.7479174137115479, 0.39722636342048645, 0.004733124747872353, 0.0698152482509613, 0.6515945196151733, 0.5409151315689087, 0.25820717215538025, 0.4583084285259247, 0.6744768619537354, 0.3421478569507599, 0.9633424878120422, 0.1852269172668457, 0.04996338114142418, 0.5482219457626343, 0.296283096075058, 0.48366567492485046, 0.06441208720207214, 0.9149421453475952, 0.02780383825302124, 0.0073219588957726955, NaN], [0.14593175053596497, 0.2687321603298187, 0.04604685679078102, 0.30660173296928406, 0.3806478679180145, 0.38105660676956177, 0.15303322672843933, 0.014211257919669151, 0.05383581668138504, 0.20604565739631653, 0.2462100237607956, 0.5718756914138794, 0.5113963484764099, 0.21981710195541382, 0.4276719391345978, 0.5577609539031982, 0.4118191599845886, 0.31598320603370667, 0.5468451976776123, 0.4359907805919647, 0.2059280127286911, 0.3916337192058563, 0.2548142671585083, 0.2198532670736313, 0.026425611227750778]], [[0.125, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.1684475541114807, 0.01643766649067402, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.20323613286018372, 0.02236698381602764, 0.0030780781526118517, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.15523119270801544, 0.029148569330573082, 0.04869325831532478, 0.027081435546278954, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.20906439423561096, 0.016835892572999, 0.005647255107760429, 0.004844226874411106, 0.00019458922906778753, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.19736447930335999, 0.01826038584113121, 0.012854915112257004, 0.09684289991855621, 0.0006958578014746308, 4.3345058656996116e-05, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.16369424760341644, 0.023256592452526093, 0.01855486072599888, 0.06154748797416687, 0.06098903343081474, 0.10795246064662933, 0.023746412247419357, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.19143380224704742, 0.11398851871490479, 0.03716170787811279, 0.07628969103097916, 0.38886839151382446, 0.24263328313827515, 0.13712459802627563, 0.02201412245631218, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.2130274772644043, 0.007986752316355705, 0.02235114760696888, 0.0019427334191277623, 0.005593507084995508, 0.012699572369456291, 0.006745419930666685, 0.06126464158296585, 0.14077326655387878, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.22579564154148102, 0.013292824849486351, 0.10215212404727936, 0.005943832919001579, 0.013894540257751942, 0.01404587086290121, 0.02319374494254589, 0.10344905406236649, 0.1325504034757614, 0.008661924861371517, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.1733061671257019, 0.07715445756912231, 0.2302267998456955, 0.05804288014769554, 0.07560069113969803, 0.23177897930145264, 0.2901765704154968, 0.042333029210567474, 0.08450006693601608, 0.04456959664821625, 0.015471314080059528, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.16428759694099426, 0.01361166127026081, 0.2167942076921463, 0.03707392141222954, 0.09917350113391876, 0.2872558534145355, 0.08793877810239792, 0.03127053380012512, 0.051127880811691284, 0.02603980340063572, 0.12251178920269012, 0.06466985493898392, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.2214493751525879, 0.0034381633158773184, 0.025536755099892616, 0.005642351228743792, 0.0024517737329006195, 0.00733930105343461, 0.0003064426709897816, 0.024970028549432755, 0.0009503457695245743, 0.0013023557839915156, 0.012362079694867134, 0.002213133964687586, 0.0037243058905005455, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.21803884208202362, 0.044672977179288864, 0.15033316612243652, 0.24480289220809937, 0.0010314357932657003, 0.006885815411806107, 0.017953861504793167, 0.09280995279550552, 0.09214792400598526, 0.01309943851083517, 0.026278402656316757, 0.029330603778362274, 0.10137840360403061, 0.0009828503243625164, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.28474918007850647, 0.005827821791172028, 0.0010850036051124334, 0.005180059466511011, 0.00018831032502930611, 0.002925402717664838, 0.0029562395066022873, 0.005281978752464056, 0.002952893264591694, 0.013548285700380802, 0.01663871854543686, 0.02234998345375061, 0.001472283387556672, 0.00024227210087701678, 9.911999950418249e-05, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.11472342163324356, 0.017006950452923775, 0.03429265320301056, 0.05351921543478966, 0.010289198718965054, 0.02545105293393135, 0.002036151010543108, 0.08590202778577805, 0.007977829314768314, 0.008050770498812199, 0.02079172432422638, 0.07815419882535934, 0.25072064995765686, 0.11726108938455582, 0.04080193489789963, 0.020839283242821693, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.25351014733314514, 0.018978603184223175, 0.013279697857797146, 0.14657457172870636, 0.0005683518829755485, 0.003044809214770794, 0.0003673452010843903, 0.0009085922501981258, 0.00026260188315063715, 6.703466351609677e-05, 0.00393629027530551, 0.0411190427839756, 0.014572926796972752, 0.0009043514728546143, 0.001453216653317213, 0.001335341832600534, 0.0036634530406445265, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.2249869406223297, 0.0773954764008522, 0.10561174154281616, 0.3267342746257782, 0.011780736967921257, 0.03227663040161133, 0.09185110032558441, 0.03840579837560654, 0.01289159432053566, 0.002641883445903659, 0.03386297821998596, 0.16820214688777924, 0.06345225125551224, 0.027306171134114265, 0.007737002335488796, 0.018253128975629807, 0.0508209764957428, 0.015562118031084538, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.17073971033096313, 0.01119090337306261, 0.07090220600366592, 0.026190776377916336, 0.04357914999127388, 0.10384812206029892, 0.05681576952338219, 0.008270802907645702, 0.011212479323148727, 0.016114890575408936, 0.1306251734495163, 0.04437248408794403, 0.022720789536833763, 0.0017881430685520172, 0.005742507986724377, 0.03271590173244476, 0.12170897424221039, 0.18442584574222565, 0.07238933444023132, NaN, NaN, NaN, NaN, NaN, NaN], [0.2460513859987259, 0.004599481821060181, 0.030415518209338188, 0.006707339081913233, 0.001940727117471397, 0.0018293699249625206, 0.0002438600640743971, 0.021702459082007408, 0.00019114103633910418, 0.0004616644873749465, 0.02795419655740261, 0.007376548834145069, 0.009364028461277485, 0.0008695388678461313, 0.027626920491456985, 0.002984545426443219, 0.0021758046932518482, 0.005276597570627928, 0.0015223525697365403, 0.0046029179356992245, NaN, NaN, NaN, NaN, NaN], [0.1682240217924118, 0.15532228350639343, 0.17499232292175293, 0.31528380513191223, 0.0016938054468482733, 0.0013859918108209968, 0.0071086762472987175, 0.08609996736049652, 0.02145048975944519, 0.00334079097956419, 0.08546027541160583, 0.16909679770469666, 0.5000762343406677, 0.012536582536995411, 0.0033327846322208643, 0.01681024581193924, 0.01291667390614748, 0.11205089092254639, 0.06917328387498856, 0.24062496423721313, 0.003104837378486991, NaN, NaN, NaN, NaN], [0.30163663625717163, 0.008585775271058083, 0.0018221536884084344, 0.004949942696839571, 0.0002661931503098458, 0.0017199779395014048, 0.00286088977009058, 0.004591777920722961, 0.0013412131229415536, 0.009152509272098541, 0.029603971168398857, 0.059182800352573395, 0.004352512303739786, 0.0009281163802370429, 0.00013420419418253005, 0.0015637356555089355, 0.004895435180515051, 0.0020298720337450504, 0.016267914324998856, 0.0014363413210958242, 0.00015049855574034154, 4.989441003999673e-05, NaN, NaN, NaN], [0.1420876681804657, 0.030559053644537926, 0.035777460783720016, 0.0549585185945034, 0.010907668620347977, 0.018195953220129013, 0.005288956221193075, 0.07946551591157913, 0.003352995030581951, 0.00945360492914915, 0.03057919070124626, 0.20277532935142517, 0.5438944697380066, 0.2487112432718277, 0.11027072370052338, 0.03672702983021736, 0.009589559398591518, 0.03681262582540512, 0.12653782963752747, 0.3100517988204956, 0.04488144814968109, 0.07299992442131042, 0.024292031303048134, NaN, NaN], [0.2571920156478882, 0.012253361754119396, 0.00982633139938116, 0.09085621684789658, 0.00026428516139276326, 0.001174133620224893, 0.00010905979434028268, 0.0006958161829970777, 9.435929678147659e-05, 1.889842314994894e-05, 0.0019355103140696883, 0.03233037516474724, 0.014144179411232471, 0.0034062752965837717, 0.0014896523207426071, 0.0032966958824545145, 0.0043079969473183155, 0.002425077836960554, 0.0237245112657547, 0.017915409058332443, 0.0004631538176909089, 0.0033925946336239576, 0.0019653798080980778, 0.0010656031081452966, NaN], [0.25252944231033325, 0.012149164453148842, 0.019892947748303413, 0.013666713610291481, 0.05940697342157364, 0.04882493242621422, 0.025430571287870407, 0.00045668394886888564, 0.0054928152821958065, 0.005623141769319773, 0.004253733437508345, 0.014798035845160484, 0.012909402139484882, 0.011927488259971142, 0.007018915377557278, 0.021986471489071846, 0.016502689570188522, 0.002887164242565632, 0.006932961288839579, 0.007926056161522865, 0.015145027078688145, 0.005945136770606041, 0.016453862190246582, 0.011257275938987732, 0.0009747393196448684]], [[0.125, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.14568212628364563, 0.073321633040905, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.07740449905395508, 0.019538799300789833, 0.31676185131073, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.11254165321588516, 0.04977253079414368, 0.12113941460847855, 0.18998825550079346, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.09693466126918793, 0.12094055861234665, 0.48810020089149475, 0.07605772465467453, 0.10663138329982758, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.002718105213716626, 0.037000641226768494, 0.1506986916065216, 0.012303436174988747, 0.09212689101696014, 0.5217995047569275, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.17887507379055023, 0.10589989274740219, 0.004075651057064533, 0.0014342612121254206, 0.00521382549777627, 0.031908128410577774, 0.003124895039945841, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.23519471287727356, 0.3653021454811096, 0.05512593686580658, 0.10675911605358124, 0.0014886436983942986, 0.001230676076374948, 0.003634560154750943, 0.00975269265472889, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.19171930849552155, 0.3204987347126007, 0.0060858046635985374, 0.010409774258732796, 0.003722283523529768, 0.0010954621247947216, 0.0028676562942564487, 0.35306307673454285, 0.01622932404279709, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.25555557012557983, 0.13076956570148468, 0.003832729533314705, 0.0447237528860569, 0.014599477872252464, 0.0024878191761672497, 0.0016443775966763496, 0.20187559723854065, 0.0005508072790689766, 0.0029457835480570793, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.13948844373226166, 0.2463626265525818, 0.09502393007278442, 0.197096586227417, 0.47678983211517334, 0.3142886161804199, 0.09103813022375107, 0.10499368607997894, 0.07698603719472885, 0.026083102449774742, 0.3110981583595276, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.1511228382587433, 0.027682308107614517, 0.014322453178465366, 0.0030328254215419292, 0.04723867028951645, 0.30981165170669556, 0.025852922350168228, 0.018514074385166168, 0.01515920553356409, 0.009253463707864285, 0.10175863653421402, 0.16996310651302338, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.1847103387117386, 0.05052594095468521, 0.005765186157077551, 0.018545929342508316, 0.00881477165967226, 0.0375242680311203, 0.027162199839949608, 0.09025334566831589, 0.0028228689916431904, 0.0033718899358063936, 0.1103500947356224, 0.0837099552154541, 0.0044236015528440475, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.27341794967651367, 0.03427007421851158, 0.008004172705113888, 0.009254892356693745, 0.005621441174298525, 0.00972525030374527, 0.005248658824712038, 0.02184745855629444, 0.0006181569187901914, 0.0005494534852914512, 0.06994801014661789, 0.02213645726442337, 0.004287416115403175, 0.0008399627404287457, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.008804291486740112, 0.07617928832769394, 0.47516930103302, 0.07513945549726486, 0.5241973400115967, 0.4384346902370453, 0.06213618069887161, 0.06345370411872864, 0.0682281106710434, 0.15877418220043182, 0.023486817255616188, 0.026526909321546555, 0.0028373831883072853, 0.001617963775061071, 0.37629759311676025, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.26533833146095276, 0.10994716733694077, 0.010266831144690514, 0.037150826305150986, 0.009969023987650871, 0.00030588259687647223, 8.988264016807079e-05, 0.07940464466810226, 0.00027601365582086146, 0.0013282618019729853, 0.009904097765684128, 0.03278518095612526, 0.0630892813205719, 0.10911130160093307, 0.016624033451080322, 0.011541539803147316, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.2451263964176178, 0.014867580495774746, 0.0005470102187246084, 0.0054298522882163525, 0.0004450916312634945, 0.0006575370789505541, 3.8741818570997566e-05, 0.0010275153908878565, 0.0013172366889193654, 0.0019110681023448706, 0.13600468635559082, 0.29138538241386414, 0.011091821826994419, 0.0002334356977371499, 0.0002162840828532353, 0.0001727231137920171, 0.004782650154083967, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.18341027200222015, 0.31211209297180176, 0.08544175326824188, 0.17215219140052795, 0.07786234468221664, 0.033002957701683044, 0.028957894071936607, 0.08467604964971542, 0.018818018957972527, 0.0016417433507740498, 0.15075404942035675, 0.1522863805294037, 0.03350237384438515, 0.006119633559137583, 0.022573737427592278, 0.03810621052980423, 0.13675758242607117, 0.1992093175649643, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.1540856957435608, 0.05453011393547058, 0.023697303608059883, 0.003979950677603483, 0.014029269106686115, 0.1104540005326271, 0.019629694521427155, 0.011429534293711185, 0.010672842152416706, 0.00807265006005764, 0.1843080371618271, 0.19234825670719147, 0.0017768212128430605, 0.006891301833093166, 0.08265318721532822, 0.014878016896545887, 0.09550431370735168, 0.1691773235797882, 0.20674942433834076, NaN, NaN, NaN, NaN, NaN, NaN], [0.21139073371887207, 0.06409671157598495, 0.007977590896189213, 0.017582383006811142, 0.004139575641602278, 0.008497070521116257, 0.024324562400579453, 0.12332659959793091, 0.0006915424601174891, 0.0006991134723648429, 0.09821731597185135, 0.18821127712726593, 0.009975801222026348, 0.024784373119473457, 0.009686794131994247, 0.0016004297649487853, 0.006526788230985403, 0.04246864095330238, 0.05479469522833824, 0.004482009913772345, NaN, NaN, NaN, NaN, NaN], [0.33224669098854065, 0.07294216006994247, 0.01592269167304039, 0.006994656287133694, 0.003661615075543523, 0.0007586313877254725, 0.0006907262722961605, 0.022764746099710464, 0.000276167003903538, 9.849678463069722e-05, 0.08613532781600952, 0.07070992141962051, 0.03258151933550835, 0.002256957348436117, 0.00035050295991823077, 0.002809839555993676, 0.005992868449538946, 0.14088936150074005, 0.024111032485961914, 0.015468394383788109, 0.000736193498596549, NaN, NaN, NaN, NaN], [0.00368693470954895, 0.0603332445025444, 0.389295369386673, 0.03955860063433647, 0.26089394092559814, 0.125760018825531, 0.029167605563998222, 0.03710402920842171, 0.03377004712820053, 0.08135493099689484, 0.01946301944553852, 0.033920928835868835, 0.00409010099247098, 0.0020981510169804096, 0.4028157889842987, 0.01821253076195717, 0.03254074230790138, 0.005954912398010492, 0.016414301469922066, 0.0033934058155864477, 0.0012025205651298165, 0.37666910886764526, NaN, NaN, NaN], [0.30478137731552124, 0.23805196583271027, 0.009743728674948215, 0.02953244559466839, 0.005627358797937632, 0.00013927526015322655, 0.00016958850028458983, 0.09182754158973694, 0.00019882968626916409, 0.0018803260754793882, 0.01743759773671627, 0.09691343456506729, 0.09625609964132309, 0.0949849784374237, 0.057061683386564255, 0.028116967529058456, 0.00013736996334046125, 0.022905906662344933, 0.02515738271176815, 0.029101604595780373, 0.01233749371021986, 0.027021989226341248, 0.012159456498920918, NaN, NaN], [0.2508227825164795, 0.013127491809427738, 0.0004774215049110353, 0.005875048227608204, 0.00014762053615413606, 0.0003128673997707665, 1.7799626220948994e-05, 0.0017815351020544767, 0.0009225650574080646, 0.0009481729357503355, 0.09391504526138306, 0.24316561222076416, 0.008820290677249432, 0.0015348505694419146, 0.0002856143401004374, 0.00038499117363244295, 0.010248353704810143, 0.0923430323600769, 0.1539699137210846, 0.0089821582660079, 0.00013843990745954216, 0.0004539538058452308, 6.709429726470262e-05, 0.0014084051363170147, NaN], [0.06230561435222626, 0.051613274961709976, 0.02077883668243885, 0.04204944148659706, 0.07247611880302429, 0.11675790697336197, 0.004215644672513008, 0.00555834174156189, 0.008976897224783897, 0.017200933769345284, 0.007355507928878069, 0.06492317467927933, 0.04215962812304497, 0.02968345396220684, 0.23223130404949188, 0.03253115341067314, 0.08794146776199341, 0.025323374196887016, 0.08459514379501343, 0.05644838511943817, 0.04970480501651764, 0.3588789105415344, 0.028869707137346268, 0.11940079927444458, 0.27181047201156616]], [[0.125, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.04884753376245499, 0.31528204679489136, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [7.444373295584228e-06, 4.17321571148932e-05, 0.5221405029296875, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.09023705869913101, 0.59262615442276, 0.038057319819927216, 0.1896824985742569, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.0001943353418027982, 0.004992108792066574, 0.35714879631996155, 0.028785984963178635, 0.7041940689086914, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [1.0879062756430358e-05, 5.022298137191683e-05, 0.0836932584643364, 0.0041815838776528835, 0.7177854776382446, 0.4451410174369812, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.003986984025686979, 0.03902542591094971, 0.00027279910864308476, 0.00016326647892128676, 0.09999275952577591, 0.23601794242858887, 0.8888784646987915, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.0004483810334932059, 0.01581367664039135, 0.00053547159768641, 0.005416989792138338, 0.0004931549192406237, 1.743426764733158e-06, 0.0002464183489792049, 0.38669928908348083, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.0014915558276697993, 0.0036082565784454346, 0.0005674233543686569, 0.0010717788245528936, 0.04321836307644844, 0.5446166396141052, 0.38359156250953674, 0.006869717035442591, 0.0028910271357744932, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [8.035104838199914e-05, 0.005924052093178034, 0.005847892723977566, 0.020417997613549232, 0.11436353623867035, 0.6555760502815247, 0.4247216582298279, 0.04553407058119774, 0.00039129320066422224, 0.013846640475094318, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.0012459981953725219, 0.12171746790409088, 0.022806251421570778, 0.021380947902798653, 0.018195364624261856, 0.08835338801145554, 0.20732422173023224, 0.30439698696136475, 0.09951408952474594, 0.2512991428375244, 0.4290468692779541, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.007976139895617962, 0.03435874730348587, 0.026849543675780296, 0.002102706115692854, 0.13315419852733612, 0.1177494078874588, 0.08904305100440979, 0.576798677444458, 0.140389084815979, 0.6266443729400635, 0.32779327034950256, 0.5110495090484619, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.0015641784993931651, 0.09294694662094116, 0.006881145294755697, 0.0020365919917821884, 0.4301930069923401, 0.06383264064788818, 0.0045266724191606045, 0.17422647774219513, 0.00404678238555789, 0.006469257641583681, 0.052995309233665466, 0.1725381463766098, 0.668171763420105, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.004304439760744572, 0.05993141233921051, 0.054169829934835434, 0.025809768587350845, 0.7262899279594421, 0.2466905415058136, 0.15344326198101044, 0.33606013655662537, 0.02952432446181774, 0.07010773569345474, 0.008777104318141937, 0.03394261747598648, 0.032566726207733154, 0.6152393221855164, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [1.0540320545260329e-05, 0.0013190202880650759, 0.20101842284202576, 0.004686327185481787, 0.13271625339984894, 0.04526880756020546, 0.0007031870190985501, 0.0011485026916489005, 0.002882149303331971, 0.0005991549696773291, 0.0030197217129170895, 0.004800362046808004, 0.004403174854815006, 0.002436757553368807, 0.4002683460712433, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.0003210107679478824, 0.5876501798629761, 0.16318874061107635, 0.7096263766288757, 0.11595475673675537, 0.007003267295658588, 0.001205803593620658, 0.1902448534965515, 0.011727835983037949, 0.44888344407081604, 0.8117052912712097, 0.45698752999305725, 0.023960944265127182, 0.010929742828011513, 0.005293603055179119, 0.00987145397812128, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.020372437313199043, 0.3410835862159729, 0.6929088234901428, 0.04383905977010727, 0.1458517462015152, 0.4223538339138031, 0.9439106583595276, 0.9473816156387329, 0.15120889246463776, 0.7730743288993835, 0.5082507133483887, 0.0460858978331089, 0.032336097210645676, 0.011211436241865158, 0.009573124349117279, 0.0003536108124535531, 0.06564418971538544, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.020423829555511475, 0.09150233864784241, 0.593336284160614, 0.050333935767412186, 0.04262891411781311, 0.44151586294174194, 0.7098277807235718, 0.36869171261787415, 0.7183430194854736, 0.3146522641181946, 0.5934929251670837, 0.08962199836969376, 0.01141325756907463, 0.0268073882907629, 0.008290876634418964, 0.022364463657140732, 0.0520397312939167, 0.3134966492652893, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.008604546077549458, 0.07562410086393356, 0.10463645309209824, 0.003217896446585655, 0.1296835094690323, 0.21162182092666626, 0.30799001455307007, 0.7962209582328796, 0.27782267332077026, 0.5974112749099731, 0.3643631041049957, 0.5975222587585449, 0.032379183918237686, 0.8344925045967102, 0.5903766751289368, 0.1521190106868744, 0.10492946952581406, 0.10503242909908295, 0.5022279620170593, NaN, NaN, NaN, NaN, NaN, NaN], [0.0010157334618270397, 0.08574047684669495, 0.010654903016984463, 0.003869200125336647, 0.15051355957984924, 0.02434478886425495, 0.005829520523548126, 0.10341739654541016, 0.0023463659454137087, 0.00469975033774972, 0.1621563881635666, 0.27765417098999023, 0.6246147155761719, 0.44377410411834717, 0.0757245346903801, 0.08620554953813553, 0.08146335929632187, 0.32109129428863525, 0.1958039551973343, 0.5327519178390503, NaN, NaN, NaN, NaN, NaN], [0.0009064326295629144, 0.04867112636566162, 0.09537991136312485, 0.12993541359901428, 0.38632717728614807, 0.056282784789800644, 0.13602504134178162, 0.18383464217185974, 0.024170320481061935, 0.09972675889730453, 0.022063996642827988, 0.042059145867824554, 0.01842264086008072, 0.8592916131019592, 0.1306053251028061, 0.06485681235790253, 0.048735883086919785, 0.037178389728069305, 0.017466288059949875, 0.006924192421138287, 0.8764364123344421, NaN, NaN, NaN, NaN], [1.2418378219081205e-06, 0.0003037750138901174, 0.10264009237289429, 0.0010840333998203278, 0.03004724159836769, 0.00720690144225955, 0.00017297905287705362, 0.00021026108879595995, 0.0005732537247240543, 0.00013229742762632668, 0.0014890850288793445, 0.0027206502854824066, 0.0022100789938122034, 0.0018764312844723463, 0.22427155077457428, 0.0012303950497880578, 0.0001426686649210751, 0.0015814924845471978, 0.00487141590565443, 0.0029599322006106377, 0.003610847517848015, 0.41901907324790955, NaN, NaN, NaN], [0.00015546051145065576, 0.5271192193031311, 0.2684091329574585, 0.7487277388572693, 0.0846778005361557, 0.003557654097676277, 0.0064069912768900394, 0.16770148277282715, 0.008421340025961399, 0.27412623167037964, 0.8534677624702454, 0.5243650078773499, 0.02665238454937935, 0.01776440255343914, 0.013793676160275936, 0.00868560466915369, 0.08064579218626022, 0.69512540102005, 0.49261555075645447, 0.010526523925364017, 0.0028473760467022657, 0.008281596936285496, 0.007198471110314131, NaN, NaN], [0.03285643830895424, 0.3327244818210602, 0.7442528605461121, 0.049526505172252655, 0.13722854852676392, 0.37294694781303406, 0.9746374487876892, 0.9050161242485046, 0.144730344414711, 0.44314900040626526, 0.6168692708015442, 0.18840178847312927, 0.12898683547973633, 0.1250022053718567, 0.01759251020848751, 0.0030696040485054255, 0.6704888939857483, 0.3205258250236511, 0.28675025701522827, 0.09770815074443817, 0.0085873082280159, 0.028106005862355232, 0.0015327840810641646, 0.12156207114458084, NaN], [0.027913866564631462, 0.6360336542129517, 0.8947576880455017, 0.5603421926498413, 0.3501611351966858, 0.3494046926498413, 0.7655782103538513, 0.9696423411369324, 0.8922762274742126, 0.42980051040649414, 0.4555767774581909, 0.17016178369522095, 0.1410100758075714, 0.652664303779602, 0.2781027853488922, 0.07839874923229218, 0.11400053650140762, 0.10023999214172363, 0.04957454651594162, 0.07193805277347565, 0.5185664892196655, 0.15356925129890442, 0.02747632935643196, 0.046240244060754776, 0.017650051042437553]], [[0.125, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.02477514185011387, 0.37543168663978577, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.02274254709482193, 0.6458237767219543, 0.013541627675294876, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.03146426007151604, 0.019330549985170364, 0.019686071202158928, 0.5363749265670776, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.05261930450797081, 0.12757715582847595, 0.003555318573489785, 0.48483166098594666, 0.00033596818684600294, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.09825422614812851, 0.08890903741121292, 0.0022953739389777184, 0.3788372278213501, 6.525879871333018e-05, 3.547202504705638e-05, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.1839720457792282, 0.005392392631620169, 0.0012601928319782019, 0.000860364583786577, 0.0008281354093924165, 0.0005760629428550601, 0.002849774667993188, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.005911883432418108, 0.0029267233330756426, 0.007144090253859758, 0.001919957809150219, 0.004637785721570253, 0.004848909098654985, 0.006189228966832161, 0.3764636814594269, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.2256152480840683, 0.0020181250292807817, 0.0012439934071153402, 0.00031968209077604115, 0.0029859780333936214, 0.017534615471959114, 0.0004058087943121791, 0.00034323628642596304, 0.029154805466532707, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.03960844501852989, 0.0036635666619986296, 0.00109457119833678, 0.0017422186210751534, 0.022469639778137207, 0.004235065542161465, 0.007348764222115278, 0.00280297570861876, 0.030011437833309174, 0.576508641242981, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.0628783106803894, 0.014568633399903774, 0.003403500886633992, 0.005917230620980263, 0.009509358555078506, 0.0019911406561732292, 0.005211993586272001, 0.01603839360177517, 0.00502167409285903, 0.3301290273666382, 0.10268117487430573, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.178706556558609, 0.5124386548995972, 0.028256116434931755, 0.011254883371293545, 0.03223628178238869, 0.0004171380714979023, 0.004843876231461763, 0.09010603278875351, 0.0025540743954479694, 0.016201328486204147, 0.029397757723927498, 0.010837158188223839, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.18362975120544434, 0.10373001545667648, 0.006869313772767782, 0.010921900160610676, 0.01820673979818821, 0.0017379705095663667, 0.002349345711991191, 0.03729201853275299, 5.792165029561147e-05, 0.0013579311780631542, 0.0025659396778792143, 0.008523254655301571, 0.1568114459514618, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.060853905975818634, 0.016029829159379005, 0.001439533894881606, 0.017260756343603134, 0.0007974627078510821, 0.0012342276750132442, 0.028226196765899658, 0.0047790613025426865, 0.0015612602001056075, 0.004867547657340765, 0.039023980498313904, 0.05208572745323181, 0.33480554819107056, 0.17332881689071655, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.043774526566267014, 0.2669547498226166, 0.035314492881298065, 0.1941595822572708, 0.006638282909989357, 0.005091785918921232, 0.2628510892391205, 0.2860943675041199, 0.06445851922035217, 0.34950578212738037, 0.6430334448814392, 0.5673049688339233, 0.6101463437080383, 0.29372307658195496, 0.0028161092195659876, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.018545497208833694, 0.059764593839645386, 0.0026272537652403116, 0.020267995074391365, 0.009687644429504871, 0.00033462722785770893, 0.0024671528954058886, 0.054633729159832, 5.4464391723740846e-05, 0.00043273900519125164, 0.0019224031129851937, 0.21117039024829865, 0.3183750510215759, 0.03866858780384064, 0.011778384447097778, 0.1297062188386917, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.0004199208051431924, 4.603992783813737e-05, 8.09443406524224e-07, 2.029701317951549e-05, 3.386533080629306e-06, 2.203315261795069e-06, 4.220597020321293e-06, 8.901660294213798e-06, 0.00016298270202241838, 0.000983458710834384, 0.0005640776362270117, 0.0008154786773957312, 0.001651398022659123, 2.400618996034609e-06, 3.3168395020766184e-05, 6.549440058734035e-06, 0.8699775338172913, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.06053417548537254, 0.012584012933075428, 0.0010002547642216086, 0.0027718576602637768, 0.006610550452023745, 0.0029896856285631657, 0.008355176076292992, 0.048459943383932114, 0.002307809190824628, 0.65205979347229, 0.1651758849620819, 0.011300449259579182, 0.029586348682641983, 0.014456091448664665, 0.0007872084970586002, 0.0008902085828594863, 0.029332326725125313, 0.16636918485164642, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.19553376734256744, 0.2426333725452423, 0.004519153386354446, 0.00883245188742876, 0.006844275165349245, 0.00014635240950156003, 0.00260242260992527, 0.03859727829694748, 0.0011520206462591887, 0.014703472144901752, 0.016579829156398773, 0.003783928230404854, 0.01771795004606247, 0.0035672299563884735, 0.000677697011269629, 0.002100451150909066, 0.023971345275640488, 0.03231354430317879, 0.011524699628353119, NaN, NaN, NaN, NaN, NaN, NaN], [0.17035169899463654, 0.07290639728307724, 0.0013864204520359635, 0.008776376023888588, 0.010795027948915958, 0.0008890280150808394, 0.00375909055583179, 0.03264426812529564, 2.1074760297778994e-05, 0.0009656226029619575, 0.004805654752999544, 0.015095297247171402, 0.19429266452789307, 0.060086220502853394, 0.013300183229148388, 0.019145654514431953, 0.08634541183710098, 0.018065713346004486, 0.012390428222715855, 0.3474832773208618, NaN, NaN, NaN, NaN, NaN], [0.002681915881112218, 0.0020622191950678825, 1.740588413667865e-05, 0.001647116499952972, 2.462047996232286e-05, 1.4256034774007276e-05, 0.0023770714178681374, 0.0007797144935466349, 6.146806117612869e-05, 0.00019536878971848637, 0.023629816249012947, 0.022664623335003853, 0.058040015399456024, 0.02328144572675228, 0.00014305225340649486, 0.1791975051164627, 0.7950490117073059, 0.40287262201309204, 0.05916967615485191, 0.11726692318916321, 0.045271970331668854, NaN, NaN, NaN, NaN], [0.017539121210575104, 0.07800457626581192, 0.013338283635675907, 0.07843150943517685, 0.003389358287677169, 0.0011982140131294727, 0.07936429977416992, 0.08406823873519897, 0.016710255295038223, 0.13201765716075897, 0.339507520198822, 0.3268124461174011, 0.4709261357784271, 0.24707961082458496, 0.0009133804705925286, 0.27326905727386475, 0.539431095123291, 0.8842423558235168, 0.5773340463638306, 0.643308699131012, 0.15606866776943207, 0.0011033734772354364, NaN, NaN, NaN], [0.0009739195229485631, 0.0011780881322920322, 3.265493069193326e-05, 0.0005334040033631027, 0.0007281061843968928, 3.2774634746601805e-05, 0.0004276044783182442, 0.00342408730648458, 2.9227990125946235e-06, 5.522280844161287e-05, 0.00012372780474834144, 0.011400841176509857, 0.008755120448768139, 0.0017365129897370934, 0.0007705622701905668, 0.0024924452882260084, 0.4634210169315338, 0.010356471873819828, 0.06587640196084976, 0.03498200699687004, 0.005118835251778364, 0.0019369632937014103, 0.023791478946805, NaN, NaN], [0.00023119446996133775, 9.065014637599234e-06, 3.0932378081161005e-07, 7.128239758458221e-06, 2.417179757685517e-06, 1.9917408735636855e-06, 1.0686825362427044e-06, 3.5747166293731425e-06, 3.038432441826444e-05, 0.00024045849568210542, 0.00012102597975172102, 0.0003720777458511293, 0.0005474414792843163, 4.2138731259910855e-06, 8.004362825886346e-06, 4.010584234492853e-06, 0.22906039655208588, 0.00024706448311917484, 0.003541025100275874, 0.0035716970451176167, 1.1338630656609894e-06, 4.888530747848563e-05, 2.00755093828775e-05, 0.8455927968025208, NaN], [0.023575956001877785, 0.001566409133374691, 0.0004935376346111298, 0.015205318108201027, 0.0005761805805377662, 0.00026375881861895323, 0.0017682479228824377, 0.00015503005124628544, 0.011253873817622662, 0.321735680103302, 0.05970581993460655, 0.008942467160522938, 0.051820773631334305, 0.009087985381484032, 0.002068085130304098, 0.00584985688328743, 0.01019755844026804, 0.16441591084003448, 0.021173937246203423, 0.09159599989652634, 0.004452125634998083, 0.0037374526727944613, 0.01578103005886078, 0.01742226630449295, 0.3373567461967468]]], [[[0.125, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.1729947179555893, 0.014742943458259106, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.11518532782793045, 0.28854820132255554, 0.0005498379468917847, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.12768876552581787, 0.007979520596563816, 0.05741023272275925, 0.14377589523792267, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.25598737597465515, 0.03471918776631355, 0.08263758569955826, 0.03616967797279358, 0.0012629067059606314, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.29742351174354553, 0.10481993854045868, 0.07552393525838852, 0.008401650935411453, 0.3407011330127716, 0.028353586792945862, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.17861823737621307, 0.07256677001714706, 0.1795390099287033, 0.04586997628211975, 0.27750420570373535, 0.0032322825863957405, 0.09472999721765518, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.1281835287809372, 0.008169662207365036, 0.10209551453590393, 0.22781534492969513, 0.13339588046073914, 0.022249281406402588, 0.2580547630786896, 0.0071509419940412045, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.19490991532802582, 0.0105251120403409, 0.07082764059305191, 0.07746586948633194, 0.10047772526741028, 0.007984980009496212, 0.045915842056274414, 0.030714787542819977, 0.09154831618070602, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.2116595059633255, 0.006228659767657518, 0.09237925708293915, 0.33000993728637695, 0.06037600710988045, 0.06468494236469269, 0.028822004795074463, 0.015993207693099976, 0.023504862561821938, 0.014777855016291142, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.11546289920806885, 0.0627092570066452, 0.1015198826789856, 0.17440570890903473, 0.11644574254751205, 0.15138378739356995, 0.17151175439357758, 0.07174428552389145, 0.1994275599718094, 0.20994937419891357, 0.08254047483205795, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.13584046065807343, 0.09117304533720016, 0.15590398013591766, 0.10968183726072311, 0.5585501790046692, 0.07535546272993088, 0.2762793302536011, 0.32588398456573486, 0.3246583938598633, 0.41251155734062195, 0.043567951768636703, 0.0185235645622015, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.1674133688211441, 0.12648360431194305, 0.27492284774780273, 0.24355122447013855, 0.8769406676292419, 0.6096609234809875, 0.4704851806163788, 0.055198147892951965, 0.6140321493148804, 0.2705269455909729, 0.07450747489929199, 0.04471021145582199, 0.05369797348976135, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.035074394196271896, 0.012203776277601719, 0.2713678479194641, 0.27628132700920105, 0.5399907231330872, 0.3242804706096649, 0.5765586495399475, 0.02925838902592659, 0.3159044086933136, 0.11935708671808243, 0.16010764241218567, 0.31936678290367126, 0.22831447422504425, 0.09149928390979767, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.1354324370622635, 0.08839684724807739, 0.010535157285630703, 0.3809414505958557, 0.006101538427174091, 0.04204240441322327, 0.6714356541633606, 0.02054513990879059, 0.44751474261283875, 0.5217893123626709, 0.16833685338497162, 0.4138224124908447, 0.5945862531661987, 0.14406909048557281, 0.000551112403627485, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.26645413041114807, 0.038747917860746384, 0.15441381931304932, 0.6166976094245911, 0.04416924715042114, 0.07849516719579697, 0.41569313406944275, 0.018940549343824387, 0.18770581483840942, 0.11268321424722672, 0.0962471142411232, 0.028718965128064156, 0.019747000187635422, 0.011864973232150078, 0.07090434432029724, 0.02976600080728531, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.26584282517433167, 0.03641113266348839, 0.24681606888771057, 0.03326011076569557, 0.5612249970436096, 0.11044078320264816, 0.038705065846443176, 0.07638699561357498, 0.20042885839939117, 0.41367095708847046, 0.16446417570114136, 0.05500950291752815, 0.0458536334335804, 0.038293108344078064, 0.05886702984571457, 0.005421455018222332, 0.03447017818689346, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.052208781242370605, 0.10399425774812698, 0.2661847770214081, 0.06582632660865784, 0.5218088626861572, 0.41107869148254395, 0.18652401864528656, 0.10915308445692062, 0.2499890774488449, 0.21385571360588074, 0.11996328830718994, 0.2169666439294815, 0.17541900277137756, 0.34852319955825806, 0.29904353618621826, 0.3583068549633026, 0.0660485103726387, 0.0772518739104271, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.1452419012784958, 0.08285138756036758, 0.20162978768348694, 0.10332676023244858, 0.7324197292327881, 0.1815183311700821, 0.27558720111846924, 0.41944485902786255, 0.4614993929862976, 0.7035390734672546, 0.14779764413833618, 0.07484183460474014, 0.09274464100599289, 0.1956741362810135, 0.4027537703514099, 0.17018413543701172, 0.15845544636249542, 0.03217604011297226, 0.027846908196806908, NaN, NaN, NaN, NaN, NaN, NaN], [0.06803880631923676, 0.0777740478515625, 0.3149954080581665, 0.17862020432949066, 0.9274848103523254, 0.6797788739204407, 0.28538215160369873, 0.04841757193207741, 0.524702250957489, 0.33268001675605774, 0.06556227803230286, 0.08207366615533829, 0.08443650603294373, 0.19301387667655945, 0.68314129114151, 0.7843886613845825, 0.24039600789546967, 0.0983721911907196, 0.035574402660131454, 0.04086223617196083, NaN, NaN, NaN, NaN, NaN], [0.004222579766064882, 0.012189013883471489, 0.38177239894866943, 0.23501808941364288, 0.3822557032108307, 0.273560494184494, 0.28252631425857544, 0.039307549595832825, 0.41269388794898987, 0.3037600517272949, 0.1617780327796936, 0.33094146847724915, 0.37525615096092224, 0.1388353556394577, 0.8142803907394409, 0.5916069149971008, 0.18943282961845398, 0.08566068857908249, 0.11778654158115387, 0.1818830519914627, 0.04465563967823982, NaN, NaN, NaN, NaN], [0.0780838280916214, 0.07355974614620209, 0.01093215774744749, 0.22770193219184875, 0.008550305850803852, 0.06503485888242722, 0.5060688257217407, 0.02145100012421608, 0.43843212723731995, 0.6872871518135071, 0.1969044953584671, 0.45010682940483093, 0.7415768504142761, 0.3103433847427368, 0.001054091495461762, 0.20113487541675568, 0.21400661766529083, 0.41673052310943604, 0.3260871469974518, 0.620118260383606, 0.12724098563194275, 0.0004952864837832749, NaN, NaN, NaN], [0.3314567506313324, 0.06341477483510971, 0.5618032217025757, 0.642646074295044, 0.27415919303894043, 0.23788774013519287, 0.38833677768707275, 0.08984735608100891, 0.42147237062454224, 0.6564009785652161, 0.2928015887737274, 0.1047874391078949, 0.1023104265332222, 0.06365151703357697, 0.39097070693969727, 0.14560170471668243, 0.23420175909996033, 0.08592629432678223, 0.02493405155837536, 0.011453422717750072, 0.006046658381819725, 0.1451905518770218, 0.005812718998640776, NaN, NaN], [0.21756824851036072, 0.03937938064336777, 0.3266570568084717, 0.05877631530165672, 0.5281912088394165, 0.11102446913719177, 0.03890432044863701, 0.10487684607505798, 0.2815292179584503, 0.4750865697860718, 0.3058159351348877, 0.11602579057216644, 0.12021853774785995, 0.06692790240049362, 0.1190272718667984, 0.019106050953269005, 0.21307361125946045, 0.15337608754634857, 0.06824280321598053, 0.040861621499061584, 0.032932352274656296, 0.052440475672483444, 0.005818615201860666, 0.0524408333003521, NaN], [0.21100056171417236, 0.13406150043010712, 0.10563220083713531, 0.15389345586299896, 0.10192565619945526, 0.07836726307868958, 0.22881029546260834, 0.05055452138185501, 0.24765580892562866, 0.48160815238952637, 0.2201593518257141, 0.1761431246995926, 0.21236160397529602, 0.20979638397693634, 0.10962515324354172, 0.09009265154600143, 0.0623038187623024, 0.17415094375610352, 0.13285446166992188, 0.11576873064041138, 0.10801524668931961, 0.0743527039885521, 0.03413216769695282, 0.027520645409822464, 0.06626196205615997]], [[0.125, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.0702696219086647, 0.2507307231426239, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.028418319299817085, 0.003963488154113293, 0.4144974946975708, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.13786309957504272, 0.03506092354655266, 0.02415982447564602, 0.10726116597652435, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.011229841969907284, 0.008138949982821941, 0.04613415151834488, 0.2518063187599182, 0.013397655449807644, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.0016812672838568687, 0.012760624289512634, 0.002261990448459983, 0.2769384980201721, 0.03090759925544262, 0.0014064738061279058, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.11822758615016937, 0.07095540314912796, 0.030966516584157944, 0.03516996279358864, 0.2070395052433014, 0.02684318646788597, 0.2317354679107666, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.23311708867549896, 0.026411496102809906, 0.011159970425069332, 0.03808103874325752, 0.017219573259353638, 0.006694006733596325, 0.001702688867226243, 0.009211051277816296, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.1427604705095291, 0.06787170469760895, 0.04101337492465973, 0.04024908319115639, 0.2669386863708496, 0.04579312726855278, 0.07587221264839172, 0.10059545934200287, 0.18715938925743103, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.059837497770786285, 0.10673120617866516, 0.06554628908634186, 0.047321293503046036, 0.26084935665130615, 0.05379262939095497, 0.09055614471435547, 0.09319713711738586, 0.334230899810791, 0.23545128107070923, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.06699422001838684, 0.48348554968833923, 0.10470042377710342, 0.2643885016441345, 0.49639153480529785, 0.11732041090726852, 0.061902400106191635, 0.1530170738697052, 0.11711295694112778, 0.23237623274326324, 0.09402092546224594, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.050390250980854034, 0.2627623975276947, 0.057036180049180984, 0.10587681084871292, 0.22481703758239746, 0.07078704982995987, 0.028480585664510727, 0.47086307406425476, 0.03990349546074867, 0.16108965873718262, 0.02393723465502262, 0.06960758566856384, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.29633763432502747, 0.1570599228143692, 0.07358378916978836, 0.08321648091077805, 0.01657349243760109, 0.02100137248635292, 0.019902318716049194, 0.5162196755409241, 0.03987365961074829, 0.018146652728319168, 0.026169516146183014, 0.00614600395783782, 0.07103840261697769, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.1833065152168274, 0.0826280415058136, 0.06509751826524734, 0.017351830378174782, 0.08598462492227554, 0.028223805129528046, 0.03195580840110779, 0.045467328280210495, 0.041934747248888016, 0.016390223056077957, 0.05298775061964989, 0.05077003315091133, 0.2718433141708374, 0.04039132222533226, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.09722712635993958, 0.09857381135225296, 0.2290657013654709, 0.162257120013237, 0.3208743929862976, 0.7083525657653809, 0.08285251259803772, 0.05820265784859657, 0.14296579360961914, 0.06442547589540482, 0.3963678479194641, 0.1963234394788742, 0.13509824872016907, 0.0551372766494751, 0.1773844212293625, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.1786596029996872, 0.03035295568406582, 0.011360704898834229, 0.0041356864385306835, 0.02253635786473751, 0.032254207879304886, 0.05765725299715996, 0.06512543559074402, 0.26075252890586853, 0.14487245678901672, 0.06064848601818085, 0.02561355009675026, 0.06785233318805695, 0.08367668837308884, 0.11658230423927307, 0.21664968132972717, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.02336198277771473, 0.027563903480768204, 0.02503703534603119, 0.002219978952780366, 0.024155667051672935, 0.005802824627608061, 0.011775066144764423, 0.03527237847447395, 0.0438326895236969, 0.16127318143844604, 0.07829897105693817, 0.04636809974908829, 0.16168944537639618, 0.17395752668380737, 0.5116502642631531, 0.11367138475179672, 0.24585914611816406, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.14312313497066498, 0.6151867508888245, 0.2511911392211914, 0.34089455008506775, 0.21357816457748413, 0.06974375993013382, 0.04017443582415581, 0.4436698257923126, 0.0627409890294075, 0.029346130788326263, 0.06214871257543564, 0.07426106929779053, 0.37162381410598755, 0.1908751130104065, 0.2730017304420471, 0.09601876139640808, 0.07787502557039261, 0.1985486000776291, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.05929486081004143, 0.1356429159641266, 0.08288607001304626, 0.1716676652431488, 0.17707081139087677, 0.11502664536237717, 0.023076828569173813, 0.41179341077804565, 0.03153251111507416, 0.08080360293388367, 0.03793509677052498, 0.0956316813826561, 0.40457794070243835, 0.3355584144592285, 0.2116643786430359, 0.2117510586977005, 0.0911363810300827, 0.13469243049621582, 0.08244834095239639, NaN, NaN, NaN, NaN, NaN, NaN], [0.34530380368232727, 0.14280815422534943, 0.08469259738922119, 0.20386184751987457, 0.018106382340192795, 0.025206930935382843, 0.03376462310552597, 0.665645956993103, 0.06945709139108658, 0.030968131497502327, 0.031062953174114227, 0.015101979486644268, 0.10170532017946243, 0.03453005850315094, 0.05652596056461334, 0.028510402888059616, 0.036133769899606705, 0.04489430412650108, 0.010548176243901253, 0.07425779104232788, NaN, NaN, NaN, NaN, NaN], [0.21361097693443298, 0.09641434252262115, 0.0472431480884552, 0.030436551198363304, 0.12823571264743805, 0.024378983303904533, 0.03781319037079811, 0.04478050768375397, 0.04302188381552696, 0.031242409721016884, 0.06916327774524689, 0.08240062743425369, 0.2609483301639557, 0.04106062278151512, 0.01303931511938572, 0.014160559512674809, 0.011109860613942146, 0.034855347126722336, 0.10407929867506027, 0.21024775505065918, 0.08525354415178299, NaN, NaN, NaN, NaN], [0.056013792753219604, 0.04104574769735336, 0.13420559465885162, 0.14404895901679993, 0.30753612518310547, 0.5552563667297363, 0.06356479972600937, 0.02527950517833233, 0.09324341267347336, 0.03306487947702408, 0.2522013187408447, 0.14255186915397644, 0.09901494532823563, 0.06439376622438431, 0.10042564570903778, 0.43083739280700684, 0.20968028903007507, 0.35324180126190186, 0.2700602114200592, 0.23262809216976166, 0.11776822060346603, 0.14138048887252808, NaN, NaN, NaN], [0.1699744164943695, 0.02438814751803875, 0.00377153092995286, 0.0020952692721039057, 0.017941365018486977, 0.009907160885632038, 0.04197421669960022, 0.08005423098802567, 0.16825814545154572, 0.08759146183729172, 0.037892259657382965, 0.02378804422914982, 0.12696562707424164, 0.21072204411029816, 0.039158232510089874, 0.12900760769844055, 0.018357207998633385, 0.09957201033830643, 0.024237502366304398, 0.12091250717639923, 0.2524404227733612, 0.044468626379966736, 0.19958341121673584, NaN, NaN], [0.016944430768489838, 0.011726072989404202, 0.017351148650050163, 0.0028529188130050898, 0.013441222719848156, 0.005811003036797047, 0.010734970681369305, 0.020825698971748352, 0.04144507274031639, 0.0777476355433464, 0.07330787181854248, 0.0589311420917511, 0.1305314600467682, 0.09686601907014847, 0.49986732006073, 0.09861493855714798, 0.24486178159713745, 0.2709232568740845, 0.08328418433666229, 0.1665872186422348, 0.2741791903972626, 0.5570544600486755, 0.09308093041181564, 0.18428745865821838, NaN], [0.043635401874780655, 0.027883753180503845, 0.11735352873802185, 0.09225393831729889, 0.11462916433811188, 0.1478782296180725, 0.04645288363099098, 0.049018505960702896, 0.08540874719619751, 0.16189652681350708, 0.081883005797863, 0.13365384936332703, 0.17616337537765503, 0.16547891497612, 0.3400772511959076, 0.14388780295848846, 0.2768324613571167, 0.1609276533126831, 0.18515954911708832, 0.2950800061225891, 0.32982173562049866, 0.4366631507873535, 0.3681013882160187, 0.34051525592803955, 0.05319627374410629]], [[0.125, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.1755252629518509, 0.00892956368625164, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.18403629958629608, 0.12486936897039413, 0.01289399154484272, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.07995349168777466, 0.1140136644244194, 0.16089488565921783, 0.271826833486557, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.19368642568588257, 0.20833823084831238, 0.38513559103012085, 0.0724099725484848, 0.026710418984293938, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.2920932173728943, 0.20408804714679718, 0.47836723923683167, 0.009784400463104248, 0.41401228308677673, 0.0022880665492266417, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.2459677904844284, 0.013399376533925533, 0.165635347366333, 0.0016970435390248895, 0.00861914549022913, 0.0019094902090728283, 0.006659353617578745, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.1659669429063797, 0.3024148941040039, 0.4638516902923584, 0.19814886152744293, 0.06386706978082657, 0.37022748589515686, 0.096834197640419, 0.004976118449121714, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.23605915904045105, 0.015010624192655087, 0.29689958691596985, 0.002272083656862378, 0.02557971514761448, 0.04829570651054382, 0.03933914750814438, 0.012097989208996296, 0.005491157062351704, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.2229652851819992, 0.011020033620297909, 0.07613904774188995, 0.00492003234103322, 0.11613531410694122, 0.12462546676397324, 0.03799906745553017, 0.029671484604477882, 0.022334527224302292, 0.003809461137279868, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.30055463314056396, 0.03860635682940483, 0.08235271275043488, 0.12519411742687225, 0.07496307790279388, 0.24307869374752045, 0.02970520593225956, 0.043270040303468704, 0.01804984174668789, 0.008444367907941341, 0.04573319852352142, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.361846923828125, 0.0072926427237689495, 0.07028269022703171, 0.038334887474775314, 0.02117738127708435, 0.035939738154411316, 0.03011121228337288, 0.01985063962638378, 0.03699057549238205, 0.0448327511548996, 0.07655268162488937, 0.03217002749443054, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.18510019779205322, 0.0857149139046669, 0.2959531545639038, 0.10870446264743805, 0.034602705389261246, 0.04019882157444954, 0.02403290942311287, 0.05409723520278931, 0.04566982761025429, 0.19149497151374817, 0.23549742996692657, 0.074503093957901, 0.01255789864808321, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.03710656613111496, 0.054964251816272736, 0.037898506969213486, 0.3724515438079834, 0.058691613376140594, 0.03363177552819252, 0.06933214515447617, 0.05247700959444046, 0.15643684566020966, 0.589249849319458, 0.349843829870224, 0.29659491777420044, 0.2287619560956955, 0.05358140170574188, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.2688547670841217, 0.1434442549943924, 0.18350595235824585, 0.07485228031873703, 0.0647219642996788, 0.04773847386240959, 0.14254990220069885, 0.03905782103538513, 0.2126167118549347, 0.24802155792713165, 0.30339401960372925, 0.17472584545612335, 0.03891041502356529, 0.02338952198624611, 0.026767900213599205, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.1340402513742447, 0.12347351759672165, 0.42842522263526917, 0.0631304681301117, 0.06392616778612137, 0.1770109236240387, 0.11116458475589752, 0.04706185683608055, 0.09571156650781631, 0.3872493505477905, 0.5415271520614624, 0.14801958203315735, 0.013348261825740337, 0.016769861802458763, 0.019784821197390556, 0.012107723392546177, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.3128407299518585, 0.02314484678208828, 0.20690661668777466, 0.0038596922531723976, 0.10119188576936722, 0.375572144985199, 0.077932208776474, 0.16011959314346313, 0.07805528491735458, 0.020400837063789368, 0.2237216979265213, 0.1006372720003128, 0.022764090448617935, 0.005061473231762648, 0.0205483790487051, 0.0018506759079173207, 0.001139476546086371, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.5802629590034485, 0.17577120661735535, 0.22907592356204987, 0.3224048614501953, 0.21584153175354004, 0.3719359040260315, 0.08852899819612503, 0.18978306651115417, 0.06894023716449738, 0.008546161465346813, 0.34136468172073364, 0.44251179695129395, 0.07915834337472916, 0.27557075023651123, 0.0915302038192749, 0.0036887326277792454, 0.0038842300418764353, 0.015524323098361492, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.5194967985153198, 0.010316978208720684, 0.10247951745986938, 0.03023943491280079, 0.02351299114525318, 0.05376119539141655, 0.03751303628087044, 0.02858700230717659, 0.03933052346110344, 0.026450933888554573, 0.16396890580654144, 0.08825679868459702, 0.01957540772855282, 0.02957809716463089, 0.0652899444103241, 0.003373907646164298, 0.007670924998819828, 0.004321575630456209, 0.024295708164572716, NaN, NaN, NaN, NaN, NaN, NaN], [0.2508450150489807, 0.1962328553199768, 0.3596697747707367, 0.1504865288734436, 0.029224414378404617, 0.0663013905286789, 0.043777331709861755, 0.06269483268260956, 0.06556038558483124, 0.2250475436449051, 0.35171735286712646, 0.22191122174263, 0.018188640475273132, 0.026326660066843033, 0.017122289165854454, 0.0037187051493674517, 0.024730468168854713, 0.035062648355960846, 0.09351257234811783, 0.011442800983786583, NaN, NaN, NaN, NaN, NaN], [0.007168593350797892, 0.033368390053510666, 0.00873665139079094, 0.16062632203102112, 0.028196215629577637, 0.02527499757707119, 0.06866460293531418, 0.0198657363653183, 0.1544157713651657, 0.2752910256385803, 0.14698350429534912, 0.1242247000336647, 0.13061578571796417, 0.010920656844973564, 0.0055906628258526325, 0.006986986380070448, 0.030699225142598152, 0.36674854159355164, 0.2189747393131256, 0.2510429620742798, 0.04264682158827782, NaN, NaN, NaN, NaN], [0.317547470331192, 0.16016888618469238, 0.1976199448108673, 0.10644932836294174, 0.09830258786678314, 0.07801979035139084, 0.301817923784256, 0.05034731701016426, 0.32512444257736206, 0.2241876721382141, 0.4657731354236603, 0.2891538441181183, 0.08093820512294769, 0.06031876429915428, 0.06730521470308304, 0.14267991483211517, 0.289673775434494, 0.1076083853840828, 0.2949788272380829, 0.0365237332880497, 0.015645001083612442, 0.03993191570043564, NaN, NaN, NaN], [0.17233391106128693, 0.22507980465888977, 0.300968736410141, 0.03457535058259964, 0.06539295613765717, 0.2556630074977875, 0.12555503845214844, 0.08745130896568298, 0.10011813044548035, 0.13041436672210693, 0.501103937625885, 0.14929187297821045, 0.03132137656211853, 0.02265048772096634, 0.03383776918053627, 0.006481703836470842, 0.011523596942424774, 0.35894638299942017, 0.1662973165512085, 0.034177642315626144, 0.02702290564775467, 0.036704160273075104, 0.014952532015740871, NaN, NaN], [0.4115316569805145, 0.042032964527606964, 0.21366682648658752, 0.010602481663227081, 0.11737099289894104, 0.5779745578765869, 0.13523340225219727, 0.2636784315109253, 0.170937180519104, 0.020469455048441887, 0.3112620711326599, 0.17165400087833405, 0.044973500072956085, 0.006653682328760624, 0.053596071898937225, 0.008654352277517319, 0.002382548525929451, 0.02675137296319008, 0.09427332878112793, 0.01890433207154274, 0.002222384326159954, 0.018390605226159096, 0.0013299400452524424, 0.0009657714981585741, NaN], [0.38502925634384155, 0.1563987135887146, 0.13578397035598755, 0.1404726654291153, 0.14828255772590637, 0.28480827808380127, 0.15350891649723053, 0.09994281083345413, 0.06321649998426437, 0.030282480642199516, 0.13266463577747345, 0.1722954362630844, 0.07113035768270493, 0.024887708947062492, 0.016665330156683922, 0.03949398547410965, 0.020136239007115364, 0.01368448045104742, 0.09379612654447556, 0.030771953985095024, 0.011002926155924797, 0.007083212956786156, 0.009242233820259571, 0.007993990555405617, 0.018528543412685394]], [[0.125, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.17860974371433258, 0.0018437139224261045, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.20284786820411682, 0.0034877806901931763, 0.08334594964981079, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.1494244486093521, 0.3379342555999756, 0.0649241954088211, 0.006597604602575302, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.2969810962677002, 0.005403619725257158, 0.054099179804325104, 0.0006044544279575348, 0.009600944817066193, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.32280662655830383, 0.01735025830566883, 0.15535852313041687, 0.00028658873634412885, 0.016427762806415558, 0.001579301548190415, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.016787199303507805, 0.10643576830625534, 0.24800433218479156, 0.4802894592285156, 0.03762362524867058, 0.06816797703504562, 0.10676699876785278, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.22070105373859406, 0.03063296526670456, 0.12860903143882751, 0.04803713783621788, 0.06528759002685547, 0.3172104060649872, 0.012414618395268917, 0.008628717623651028, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.0170818492770195, 0.2921580374240875, 0.24774892628192902, 0.2979756295681, 0.16657015681266785, 0.03825104981660843, 0.39123743772506714, 0.0541624091565609, 0.01715947687625885, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.06952934712171555, 0.09443160146474838, 0.3155873417854309, 0.2511345446109772, 0.20146684348583221, 0.17959536612033844, 0.500001072883606, 0.3407229483127594, 0.15127938985824585, 0.026401039212942123, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.12491581588983536, 0.08139167726039886, 0.045777399092912674, 0.07585746794939041, 0.05243801325559616, 0.09790124744176865, 0.17415514588356018, 0.44996151328086853, 0.13761505484580994, 0.06580806523561478, 0.1016187071800232, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.03772348165512085, 0.0006561332265846431, 0.04040418565273285, 0.23337695002555847, 0.0037602160591632128, 0.1251135915517807, 0.07994246482849121, 0.0032252452801913023, 0.044697076082229614, 0.05314825102686882, 0.16676445305347443, 0.42838534712791443, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.008380687795579433, 0.11938491463661194, 0.03761400282382965, 0.10612092912197113, 0.004111893475055695, 0.07536520808935165, 0.06150262430310249, 0.010061400011181831, 0.01712355576455593, 0.026476707309484482, 0.05440329760313034, 0.37643373012542725, 0.12204637378454208, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.0973815768957138, 0.1330094188451767, 0.2356250286102295, 0.23801013827323914, 0.16962124407291412, 0.3808935284614563, 0.19062454998493195, 0.12487400323152542, 0.4241224527359009, 0.1858355700969696, 0.1843334436416626, 0.17186462879180908, 0.1674181967973709, 0.03679514676332474, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.28161293268203735, 0.39586660265922546, 0.35408592224121094, 0.26687130331993103, 0.036089953035116196, 0.12106626480817795, 0.05175312981009483, 0.6374836564064026, 0.06537415832281113, 0.01867927983403206, 0.03261437267065048, 0.05161871388554573, 0.026679201051592827, 0.0063977655954658985, 0.0581950880587101, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.052721865475177765, 0.30848002433776855, 0.24953237175941467, 0.2790854275226593, 0.7654650807380676, 0.6871634125709534, 0.13210926949977875, 0.673875629901886, 0.04467727988958359, 0.018614191561937332, 0.08283445239067078, 0.0906965509057045, 0.06073237210512161, 0.12131030112504959, 0.06997358053922653, 0.3489122688770294, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.03943483531475067, 0.28613966703414917, 0.07243800908327103, 0.8744964599609375, 0.029915155842900276, 0.331167072057724, 0.4079437255859375, 0.5431530475616455, 0.3259604275226593, 0.1150238886475563, 0.3324905335903168, 0.44221389293670654, 0.2450132817029953, 0.12577538192272186, 0.11014749854803085, 0.1900990903377533, 0.042790502309799194, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.06558705866336823, 0.020870981737971306, 0.007642277050763369, 0.028054187074303627, 0.010532653890550137, 0.10334379225969315, 0.12033270299434662, 0.1911371499300003, 0.30930495262145996, 0.04741071164608002, 0.06516209989786148, 0.09313901513814926, 0.24243950843811035, 0.15116305649280548, 0.09231718629598618, 0.47254911065101624, 0.053373783826828, 0.18162642419338226, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.017124762758612633, 0.00014164860476739705, 0.01482362300157547, 0.13952724635601044, 0.0008921221597120166, 0.07150562852621078, 0.037848807871341705, 0.0009583857608959079, 0.0160027127712965, 0.01657933183014393, 0.09754330664873123, 0.3402610719203949, 0.02766183763742447, 0.011668790131807327, 0.019427720457315445, 0.01879642903804779, 0.06977814435958862, 0.23379765450954437, 0.41046860814094543, NaN, NaN, NaN, NaN, NaN, NaN], [0.0033047832548618317, 0.043024010956287384, 0.009507044218480587, 0.05758155509829521, 0.0012058177962899208, 0.04777836054563522, 0.038867104798555374, 0.0027761561796069145, 0.008453112095594406, 0.011027430184185505, 0.021058345213532448, 0.3453521430492401, 0.05058252438902855, 0.004837945103645325, 0.0014179014833644032, 0.06873936206102371, 0.10687354952096939, 0.21186815202236176, 0.44615596532821655, 0.10872229933738708, NaN, NaN, NaN, NaN, NaN], [0.05260666832327843, 0.09784732013940811, 0.08957145363092422, 0.40504154562950134, 0.2393025904893875, 0.37446328997612, 0.33926665782928467, 0.06915906071662903, 0.28494811058044434, 0.18951286375522614, 0.21801336109638214, 0.2963850796222687, 0.09700386226177216, 0.02254888415336609, 0.016780056059360504, 0.3380737006664276, 0.17247304320335388, 0.15711140632629395, 0.27414536476135254, 0.12462585419416428, 0.05461693927645683, NaN, NaN, NaN, NaN], [0.4168609082698822, 0.5786882042884827, 0.4795728027820587, 0.4880480170249939, 0.07741907238960266, 0.22295767068862915, 0.10229793190956116, 0.7397969365119934, 0.09120289236307144, 0.02111845649778843, 0.040493883192539215, 0.06478337198495865, 0.029333919286727905, 0.01266437117010355, 0.08807221800088882, 0.12442159652709961, 0.019878262653946877, 0.02248454838991165, 0.045759230852127075, 0.02396523579955101, 0.002620323793962598, 0.04143214225769043, NaN, NaN, NaN], [0.05813424289226532, 0.29987069964408875, 0.06046860292553902, 0.2948205769062042, 0.6036045551300049, 0.4684220552444458, 0.10851431638002396, 0.5970842242240906, 0.03630568087100983, 0.009022231213748455, 0.034897517412900925, 0.044963937252759933, 0.06918716430664062, 0.06464210897684097, 0.027029458433389664, 0.39741793274879456, 0.1858920007944107, 0.0860959067940712, 0.03553689271211624, 0.03651457652449608, 0.07401836663484573, 0.02850046567618847, 0.457316130399704, NaN, NaN], [0.011862307786941528, 0.06274299323558807, 0.019264375790953636, 0.7077140212059021, 0.009838010184466839, 0.08938813954591751, 0.2665976285934448, 0.21134285628795624, 0.19931168854236603, 0.029879093170166016, 0.11873869597911835, 0.2187809944152832, 0.10740162432193756, 0.03893040865659714, 0.02778119407594204, 0.17118902504444122, 0.03705315291881561, 0.41107529401779175, 0.3035467863082886, 0.1782693862915039, 0.062172479927539825, 0.04369974508881569, 0.43116021156311035, 0.04090215638279915, NaN], [0.13294808566570282, 0.07747184485197067, 0.06700501590967178, 0.24500344693660736, 0.07035010308027267, 0.06088097393512726, 0.15465889871120453, 0.22422827780246735, 0.20946520566940308, 0.06346394866704941, 0.1416163444519043, 0.10671631991863251, 0.07756247371435165, 0.14874279499053955, 0.2551397681236267, 0.18877547979354858, 0.07302238047122955, 0.24805422127246857, 0.1228112131357193, 0.08095405995845795, 0.12022056430578232, 0.20888803899288177, 0.1654488444328308, 0.07207347452640533, 0.12261014431715012]], [[0.125, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.04818185046315193, 0.30147239565849304, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.000490668579004705, 0.5364181399345398, 0.0016803600592538714, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.17249688506126404, 0.003960400819778442, 1.1815190191555303e-05, 0.00205309153534472, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.08659190684556961, 0.2260276973247528, 0.018877657130360603, 0.019257033243775368, 0.9179584980010986, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [2.1155383365112357e-05, 0.00016346832853741944, 0.0004644138098228723, 9.852640505414456e-05, 0.009302367456257343, 0.8758521676063538, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.0038963633123785257, 0.11578002572059631, 0.06833135336637497, 0.2930091321468353, 0.06728219240903854, 0.588379442691803, 0.190787211060524, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.04113525524735451, 0.03917931765317917, 0.013817446306347847, 0.06874216347932816, 0.027753230184316635, 0.04752122610807419, 0.17637789249420166, 0.2964049279689789, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.006397286430001259, 0.008155078627169132, 0.02385183423757553, 0.08218340575695038, 0.09733399748802185, 0.7216709852218628, 0.11420661956071854, 0.028804002329707146, 0.49512770771980286, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.007080267183482647, 0.010165071114897728, 0.007166726514697075, 0.04547898843884468, 0.014898931607604027, 0.06153866648674011, 0.05960511788725853, 0.025653565302491188, 0.05574938654899597, 0.5054050087928772, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.12821261584758759, 0.09823491424322128, 0.2407415509223938, 0.03722868487238884, 0.07500484585762024, 0.23719841241836548, 0.08696958422660828, 0.10033686459064484, 0.08637046813964844, 0.05946339666843414, 0.17889682948589325, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.018611561506986618, 0.530681848526001, 0.37442806363105774, 0.09326046705245972, 0.039934538304805756, 0.607749342918396, 0.1011725440621376, 0.041957128793001175, 0.061673425137996674, 0.012941170483827591, 0.012897199019789696, 0.02531522512435913, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.025258230045437813, 0.013820141553878784, 0.020238902419805527, 0.20186173915863037, 0.008764497935771942, 0.044081512838602066, 0.11685895919799805, 0.12131167203187943, 0.03466574102640152, 0.0033257410395890474, 0.009427645243704319, 0.00932170171290636, 0.6215367317199707, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.0027034373488277197, 0.008653531782329082, 0.0021412167698144913, 0.02395743690431118, 0.06537352502346039, 0.05110874027013779, 0.050060901790857315, 0.023448945954442024, 0.0059632728807628155, 0.0016337132547050714, 0.0060929651372134686, 0.00957516860216856, 0.05008334666490555, 0.696637749671936, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [5.63129390229733e-07, 0.00027805642457678914, 1.7160025890916586e-05, 5.958595011179568e-06, 0.00078710971865803, 1.2566613349918043e-06, 9.03528507478768e-06, 2.1993335394654423e-05, 4.528845238382928e-06, 1.0594538935038145e-06, 2.375837993895402e-06, 1.0765622391772922e-05, 0.00012861557479482144, 0.000270194374024868, 0.4203896224498749, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.19651824235916138, 0.009276115335524082, 0.0007576652569696307, 0.02043321169912815, 0.000937489268835634, 0.0014158851699903607, 0.02691410481929779, 0.025149332359433174, 0.015754513442516327, 0.002638434525579214, 0.03568584471940994, 0.28478676080703735, 0.08937329053878784, 0.04057440906763077, 0.41798362135887146, 0.02812151424586773, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.0009883381426334381, 0.005475975573062897, 0.017872320488095284, 0.0038598645478487015, 0.01383217889815569, 0.1060260757803917, 0.010558119975030422, 0.0004280287539586425, 0.011488020420074463, 0.004323506727814674, 0.015877770259976387, 0.025533713400363922, 0.06758329272270203, 0.005362953990697861, 0.03033292666077614, 0.3987913429737091, 0.22715723514556885, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.025437461212277412, 0.027387555688619614, 0.0211916733533144, 0.0013409400125965476, 0.0016278955154120922, 0.0205780491232872, 0.006606978829950094, 0.005105526186525822, 0.008417481556534767, 0.008475488983094692, 0.016475802287459373, 0.021865585818886757, 0.04041945934295654, 0.001965513452887535, 0.030297037214040756, 0.018051480874419212, 0.2940014600753784, 0.09546513855457306, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.014116446487605572, 0.6685785055160522, 0.40577325224876404, 0.09365412592887878, 0.008716625161468983, 0.504762589931488, 0.11037815362215042, 0.03693895787000656, 0.066362664103508, 0.025546396151185036, 0.030971869826316833, 0.07333581149578094, 0.21910515427589417, 0.03128749132156372, 0.013437384739518166, 0.06674141436815262, 0.055549826472997665, 0.02615067921578884, 0.05289305001497269, NaN, NaN, NaN, NaN, NaN, NaN], [0.01752244122326374, 0.013681006617844105, 0.015325021930038929, 0.15400148928165436, 0.0017620606813579798, 0.03783759847283363, 0.07285356521606445, 0.042190372943878174, 0.019725583493709564, 0.004497688263654709, 0.010335608385503292, 0.023485884070396423, 0.5969190001487732, 0.22785267233848572, 0.05655405670404434, 0.05765213817358017, 0.006416310556232929, 0.029401889070868492, 0.022928474470973015, 0.6468356251716614, NaN, NaN, NaN, NaN, NaN], [0.003705248236656189, 0.09392052888870239, 0.0011726000811904669, 0.042238909751176834, 0.07787514477968216, 0.11800158768892288, 0.09318403154611588, 0.018972182646393776, 0.022339271381497383, 0.02290215529501438, 0.009648749604821205, 0.020298194140195847, 0.09632600843906403, 0.6665039658546448, 0.01913357712328434, 0.016501925885677338, 0.01550414226949215, 0.014767719432711601, 0.035943012684583664, 0.1298983097076416, 0.7307590246200562, NaN, NaN, NaN, NaN], [3.2450822118335054e-07, 0.0001958437787834555, 1.195628647110425e-05, 3.192948497598991e-06, 0.00034392892848700285, 1.3818779507346335e-06, 6.319523890851997e-06, 9.25252061279025e-06, 3.2897685287025524e-06, 1.041492623699014e-06, 2.450263082209858e-06, 1.1291336704744026e-05, 9.216016042046249e-05, 0.00025747373001649976, 0.3770022690296173, 7.494814053643495e-05, 0.00011931787594221532, 5.454379424918443e-05, 3.481862586340867e-05, 0.0001493972522439435, 6.532184488605708e-05, 0.4379080533981323, NaN, NaN, NaN], [0.11172444373369217, 0.00812594499439001, 0.000803561822976917, 0.011673782020807266, 0.00013412271800916642, 0.002435607835650444, 0.021002406254410744, 0.009926681406795979, 0.014218374155461788, 0.0044799866154789925, 0.03462693840265274, 0.49634605646133423, 0.1610735058784485, 0.03537029027938843, 0.3717024624347687, 0.0470024012029171, 0.0025306264869868755, 0.08426976948976517, 0.5137573480606079, 0.047759927809238434, 0.008752438239753246, 0.5270217657089233, 0.020567137748003006, NaN, NaN], [0.00039373920299112797, 0.00142151047475636, 0.016346368938684464, 0.0038184949662536383, 0.00426360173150897, 0.10012070834636688, 0.007060237228870392, 0.00022489627008326352, 0.006389277055859566, 0.0014407823327928782, 0.01344740204513073, 0.019176417961716652, 0.04953484237194061, 0.003102741902694106, 0.017501499503850937, 0.25968801975250244, 0.12805432081222534, 0.03450275957584381, 0.03214799612760544, 0.06495527178049088, 0.007038496434688568, 0.018200475722551346, 0.2228115350008011, 0.24082934856414795, NaN], [0.004585978575050831, 0.008592751808464527, 0.20804427564144135, 0.003501898143440485, 0.01809401623904705, 0.0088487658649683, 0.01839679665863514, 0.009930659085512161, 0.019693726673722267, 0.015943868085741997, 0.06719032675027847, 0.03678698092699051, 0.03292753919959068, 0.02313893660902977, 0.023240724578499794, 0.03294161707162857, 0.24390928447246552, 0.10472099483013153, 0.0623757429420948, 0.06489475816488266, 0.03424002602696419, 0.03615953400731087, 0.05666068568825722, 0.29077935218811035, 0.20903274416923523]], [[0.125, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.15880486369132996, 0.04734092205762863, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.22883240878582, 0.015307039953768253, 0.023610780015587807, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.15376803278923035, 0.17623378336429596, 0.16427822411060333, 0.018553992733359337, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.12576976418495178, 0.44071146845817566, 0.38860467076301575, 0.12043511122465134, 0.027116619050502777, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.03928220644593239, 0.42239660024642944, 0.2546820342540741, 0.22367709875106812, 0.1215892881155014, 0.001983387628570199, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.17152060568332672, 0.49365419149398804, 0.08085957914590836, 0.02207508496940136, 0.19231174886226654, 0.008304901421070099, 0.03878962993621826, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.13843253254890442, 0.07047099620103836, 0.2525072991847992, 0.13487939536571503, 0.27911728620529175, 0.11727599054574966, 0.022392159327864647, 0.1764850914478302, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.10915631055831909, 0.30942168831825256, 0.19657404720783234, 0.031007295474410057, 0.23716343939304352, 0.05435822904109955, 0.08149112015962601, 0.6613667011260986, 0.11670006066560745, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.0640818402171135, 0.41535088419914246, 0.29784247279167175, 0.05657188221812248, 0.036311421543359756, 0.08192699402570724, 0.16688455641269684, 0.10144203901290894, 0.346017450094223, 0.15466110408306122, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.04877842590212822, 0.16450235247612, 0.23761717975139618, 0.0720985159277916, 0.12954245507717133, 0.08035153150558472, 0.18124118447303772, 0.05973014980554581, 0.26483285427093506, 0.39028850197792053, 0.05098416656255722, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.11044558137655258, 0.08550350368022919, 0.2513507902622223, 0.28401821851730347, 0.12441904842853546, 0.05029991641640663, 0.42405593395233154, 0.08374682813882828, 0.43869927525520325, 0.14253327250480652, 0.10876792669296265, 0.09369473904371262, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.08764015138149261, 0.46941375732421875, 0.23278135061264038, 0.11763583868741989, 0.0354606918990612, 0.16624747216701508, 0.2793619632720947, 0.1965668648481369, 0.23052528500556946, 0.3914787769317627, 0.08669382333755493, 0.10678009688854218, 0.08708767592906952, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.2116944044828415, 0.06720030307769775, 0.29984304308891296, 0.010844358243048191, 0.051072586327791214, 0.15023349225521088, 0.04554526135325432, 0.1560167670249939, 0.03609438240528107, 0.026584016159176826, 0.14512087404727936, 0.05890262499451637, 0.015816861763596535, 0.07422769069671631, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.056502565741539, 0.15541820228099823, 0.07158821076154709, 0.00490804947912693, 0.015012365765869617, 0.06302572786808014, 0.01116714347153902, 0.22065599262714386, 0.021468764171004295, 0.01365464273840189, 0.022816751152276993, 0.019708380103111267, 0.0059420084580779076, 0.0700121819972992, 0.287899911403656, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.058403778821229935, 0.0693131536245346, 0.04999461770057678, 0.004054869059473276, 0.0624610111117363, 0.018093721941113472, 0.07961009442806244, 0.1545858234167099, 0.3008257746696472, 0.14455094933509827, 0.09800520539283752, 0.09531621634960175, 0.27401015162467957, 0.4782770574092865, 0.11211755871772766, 0.01358953770250082, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.1399688720703125, 0.5559014678001404, 0.20350231230258942, 0.042011573910713196, 0.020507201552391052, 0.03915366902947426, 0.4243565797805786, 0.11376935243606567, 0.31140708923339844, 0.051479678601026535, 0.07416504621505737, 0.2654426097869873, 0.3960915207862854, 0.5790604948997498, 0.18063338100910187, 0.1939544379711151, 0.04191381484270096, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.027515297755599022, 0.0486784465610981, 0.06845460832118988, 0.023408811539411545, 0.008863206952810287, 0.008533195592463017, 0.24178741872310638, 0.01229054294526577, 0.25817692279815674, 0.6869812607765198, 0.049950506538152695, 0.12178820371627808, 0.0564231351017952, 0.02026011236011982, 0.004908477421849966, 0.03562311828136444, 0.12746450304985046, 0.0016219470417127013, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.11620164662599564, 0.09937138110399246, 0.17538107931613922, 0.40406307578086853, 0.043817292898893356, 0.05759625509381294, 0.49306368827819824, 0.09120260924100876, 0.36450278759002686, 0.08042807132005692, 0.1856311559677124, 0.1376025527715683, 0.1998283714056015, 0.3654005527496338, 0.15910619497299194, 0.4969707429409027, 0.08565060794353485, 0.02514367550611496, 0.090617336332798, NaN, NaN, NaN, NaN, NaN, NaN], [0.0739481970667839, 0.5182103514671326, 0.19721719622612, 0.21118015050888062, 0.015751224011182785, 0.12249443680047989, 0.5174803733825684, 0.17075838148593903, 0.30025264620780945, 0.29246312379837036, 0.0875946432352066, 0.2326347827911377, 0.13986286520957947, 0.511695921421051, 0.12602318823337555, 0.03662485629320145, 0.1263200044631958, 0.0166145209223032, 0.19702456891536713, 0.09621746093034744, NaN, NaN, NaN, NaN, NaN], [0.3052336871623993, 0.37224864959716797, 0.45515015721321106, 0.04986808821558952, 0.05332064628601074, 0.13846120238304138, 0.15990367531776428, 0.20659208297729492, 0.06640873104333878, 0.035323526710271835, 0.30340465903282166, 0.10174556821584702, 0.02102985605597496, 0.11508277803659439, 0.09203195571899414, 0.0029288395307958126, 0.023838462308049202, 0.004605103749781847, 0.052648112177848816, 0.006431906949728727, 0.026736242696642876, NaN, NaN, NaN, NaN], [0.047024402767419815, 0.1257133185863495, 0.052377521991729736, 0.009844984859228134, 0.015597687102854252, 0.06965665519237518, 0.01849394477903843, 0.1603521853685379, 0.02587857097387314, 0.00957732368260622, 0.023523790761828423, 0.020081259310245514, 0.008425970561802387, 0.10955916345119476, 0.35300737619400024, 0.023505402728915215, 0.00786643661558628, 0.007557017263025045, 0.013908758759498596, 0.004675114993005991, 0.035296451300382614, 0.3261549174785614, NaN, NaN, NaN], [0.11014947295188904, 0.08461853116750717, 0.02981843426823616, 0.004099451471120119, 0.009237504564225674, 0.011130756698548794, 0.132149338722229, 0.11619938164949417, 0.22203940153121948, 0.02292616292834282, 0.06793706119060516, 0.07227552682161331, 0.3262397348880768, 0.40601006150245667, 0.08270477503538132, 0.013506797142326832, 0.03135772421956062, 0.07034049183130264, 0.09623772650957108, 0.20842698216438293, 0.2752794623374939, 0.1234828308224678, 0.04129752516746521, NaN, NaN], [0.1182219609618187, 0.7384620308876038, 0.11492461711168289, 0.09884578734636307, 0.012010940350592136, 0.038200050592422485, 0.4905328154563904, 0.23439669609069824, 0.2528713345527649, 0.015177865512669086, 0.07817362248897552, 0.33532261848449707, 0.4971323609352112, 0.7384514212608337, 0.2383432686328888, 0.2306600660085678, 0.025716517120599747, 0.023198120296001434, 0.3352215886116028, 0.4797173738479614, 0.5688640475273132, 0.2555003762245178, 0.1890360713005066, 0.06237812712788582, NaN], [0.13153354823589325, 0.5476850867271423, 0.27465543150901794, 0.27658137679100037, 0.5121651291847229, 0.3939417600631714, 0.2527337968349457, 0.41937416791915894, 0.2437492311000824, 0.1485103964805603, 0.10651403665542603, 0.241710364818573, 0.34289923310279846, 0.3691290616989136, 0.108230821788311, 0.32214298844337463, 0.08876177668571472, 0.03369928151369095, 0.23942533135414124, 0.302080899477005, 0.3531237244606018, 0.09724070131778717, 0.19267186522483826, 0.06874143332242966, 0.052875734865665436]], [[0.125, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.03290099650621414, 0.3365767002105713, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.003547579748556018, 0.004082763101905584, 0.4616691768169403, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.03595791012048721, 0.1313885897397995, 0.007101066876202822, 0.42131781578063965, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.007601147051900625, 0.014137630350887775, 0.01938864029943943, 0.2572920322418213, 0.0011994435917586088, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.00011468974116723984, 0.0032473355531692505, 0.00037737423554062843, 0.2793608605861664, 0.003465541172772646, 5.061212868895382e-05, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.21311266720294952, 0.10434294492006302, 0.011484598740935326, 0.0013334749964997172, 0.03845251351594925, 0.028238367289304733, 0.05654546618461609, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.052184704691171646, 0.499632865190506, 0.005138374865055084, 0.10169705748558044, 0.09997230768203735, 0.036990027874708176, 0.07566682249307632, 0.32418423891067505, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.23645982146263123, 0.016864946112036705, 0.013305210508406162, 0.0007752762176096439, 0.017555342987179756, 0.03100133314728737, 0.04085567593574524, 0.029846351593732834, 0.010373883880674839, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.18805328011512756, 0.046367619186639786, 0.10314629226922989, 0.018223291262984276, 0.27720585465431213, 0.3798944056034088, 0.09291481226682663, 0.09293034672737122, 0.04290880635380745, 0.03370373696088791, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.028641005977988243, 0.03295213729143143, 0.0065453751012682915, 0.16686026751995087, 0.028714975342154503, 0.015397193841636181, 0.02003423683345318, 0.019093815237283707, 0.020523719489574432, 0.016172079369425774, 0.3490104377269745, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.10839971899986267, 0.004465002100914717, 0.016082070767879486, 0.035488102585077286, 0.015600458718836308, 0.012030484154820442, 0.015872180461883545, 0.01552913524210453, 0.03533920273184776, 0.11401902139186859, 0.31523072719573975, 0.20448055863380432, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.18776558339595795, 0.0060520414263010025, 0.017473671585321426, 0.005528539884835482, 0.0027145782951265574, 0.012176988646388054, 0.0031525399535894394, 0.004637573380023241, 0.011988476850092411, 0.06979440897703171, 0.38327983021736145, 0.020156072452664375, 0.010166948661208153, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.3064975440502167, 0.004262991715222597, 0.009997943416237831, 0.00034317225799895823, 0.013912403024733067, 0.02852706052362919, 0.004078225698322058, 0.001928618410602212, 0.006367305759340525, 0.035507142543792725, 0.050674788653850555, 0.007057875394821167, 0.0049485149793326855, 0.0049379738047719, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.14988604187965393, 0.015584584325551987, 0.137997567653656, 0.0031439096201211214, 0.5546696782112122, 0.01658078096807003, 0.0025873971171677113, 0.0010246702004224062, 0.019667595624923706, 0.012580120004713535, 0.015491531230509281, 0.029023459181189537, 0.021588340401649475, 0.25595030188560486, 0.02325037308037281, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.07357528805732727, 0.007756352424621582, 0.002724927617236972, 0.001402079127728939, 0.0004431438574101776, 0.00010925461538136005, 0.0029409730341285467, 0.005563507787883282, 0.012139370664954185, 0.03890732303261757, 0.05558362230658531, 0.03318313509225845, 0.4270496368408203, 0.07112571597099304, 0.15036046504974365, 0.020786603912711143, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.012120572850108147, 0.0003307444858364761, 0.009640182368457317, 0.00017808230768423527, 0.0021490382496267557, 0.0008148089982569218, 0.0008481521508656442, 0.0019973982125520706, 0.005024890415370464, 0.01719486527144909, 0.044799502938985825, 0.006444229744374752, 0.018026985228061676, 0.0067391968332231045, 0.061299871653318405, 0.01281613577157259, 0.3084925711154938, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.011204708367586136, 0.0033799665980041027, 0.008117830380797386, 0.1567971557378769, 0.012545537203550339, 0.002854604972526431, 0.0037395430263131857, 0.0003391341888345778, 0.002928558737039566, 0.004266565665602684, 0.28180748224258423, 0.005543314386159182, 0.0059068226255476475, 0.004401014186441898, 0.09436267614364624, 0.003524675266817212, 0.09697568416595459, 0.3818984925746918, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.1085091158747673, 0.0013132937019690871, 0.011304548010230064, 0.014309195801615715, 0.009265521541237831, 0.00682368129491806, 0.01179590355604887, 0.005223054438829422, 0.01697726733982563, 0.05782441794872284, 0.2522926330566406, 0.16053971648216248, 0.020927468314766884, 0.02051178365945816, 0.1114674061536789, 0.014847181737422943, 0.40623563528060913, 0.12017090618610382, 0.2281613051891327, NaN, NaN, NaN, NaN, NaN, NaN], [0.23926517367362976, 0.007461922243237495, 0.015478387475013733, 0.02120528556406498, 0.0046339076943695545, 0.01287792343646288, 0.005305645987391472, 0.0037130024284124374, 0.011430526152253151, 0.10132863372564316, 0.42019084095954895, 0.03134358674287796, 0.006659360136836767, 0.0015345009742304683, 0.05340040102601051, 0.0021821516565978527, 0.15366847813129425, 0.09343723207712173, 0.04055917635560036, 0.009410854429006577, NaN, NaN, NaN, NaN, NaN], [0.3882482349872589, 0.012203006073832512, 0.008404962718486786, 0.0008633172838017344, 0.07213836163282394, 0.03903299570083618, 0.006879106629639864, 0.0025245456490665674, 0.011604986153542995, 0.1302306056022644, 0.05970751494169235, 0.005057368893176317, 0.0025832061655819416, 0.003548768814653158, 0.03821956738829613, 0.0041786422953009605, 0.029319334775209427, 0.009258194826543331, 0.010013489983975887, 0.0024901984725147486, 0.009316755458712578, NaN, NaN, NaN, NaN], [0.08333727717399597, 0.009125825949013233, 0.12352871894836426, 0.0034849271178245544, 0.49194949865341187, 0.008760062977671623, 0.002427457133308053, 0.0004761714953929186, 0.014378424733877182, 0.007653949782252312, 0.010163314640522003, 0.018072640523314476, 0.014914281666278839, 0.33540958166122437, 0.012212751433253288, 0.050671979784965515, 0.08942927420139313, 0.0058481828309595585, 0.02088618278503418, 0.013520943000912666, 0.3026564419269562, 0.011637967079877853, NaN, NaN, NaN], [0.019913960248231888, 0.003490668721497059, 0.00020567848696373403, 0.00036819992237724364, 0.00019341551524121314, 3.8652269722661003e-05, 0.0008544524316675961, 0.002890991745516658, 0.001110991695895791, 0.005157719366252422, 0.008338885381817818, 0.0030357406940311193, 0.14557099342346191, 0.021602485328912735, 0.04367346689105034, 0.0015647107502445579, 0.009655454196035862, 0.14827704429626465, 0.008163533173501492, 0.49237948656082153, 0.06938102096319199, 0.08394628763198853, 0.049248531460762024, NaN, NaN], [0.010580360889434814, 0.00023049254377838224, 0.00745873898267746, 0.00016025979130063206, 0.002226235345005989, 0.0004258991975802928, 0.000578688399400562, 0.0014760587364435196, 0.002039685845375061, 0.0048048608005046844, 0.019996320828795433, 0.0029125709552317858, 0.006709430366754532, 0.0017099445685744286, 0.02097223326563835, 0.0024284888058900833, 0.10361000150442123, 0.022238893434405327, 0.009704988449811935, 0.017071064561605453, 0.011506098322570324, 0.0406200997531414, 0.0063119689002633095, 0.36112311482429504, NaN], [0.07011571526527405, 0.029766615480184555, 0.05616272985935211, 0.02569880336523056, 0.02553572878241539, 0.010698755271732807, 0.02022577077150345, 0.01824677176773548, 0.03918607532978058, 0.034657131880521774, 0.11515442281961441, 0.05569382756948471, 0.035370998084545135, 0.047812946140766144, 0.1140216588973999, 0.018943075090646744, 0.09709078818559647, 0.08172454684972763, 0.04602199047803879, 0.02941049635410309, 0.031383853405714035, 0.10708537697792053, 0.012693268246948719, 0.07050468772649765, 0.25427982211112976]], [[0.125, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.1627129465341568, 0.03836298733949661, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.23664157092571259, 0.02332315407693386, 0.0017523575806990266, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.14284735918045044, 0.19342879951000214, 0.5212197303771973, 0.028613613918423653, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.022152410820126534, 0.06252314150333405, 0.005122532602399588, 0.24202540516853333, 0.0027534610126167536, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.04657726734876633, 0.23517371714115143, 0.03296450525522232, 0.2014523595571518, 0.06359406560659409, 0.0884864553809166, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.05186963453888893, 0.02286554127931595, 0.21517929434776306, 0.12055587023496628, 0.1711670458316803, 0.27492430806159973, 0.27398592233657837, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.020278872922062874, 0.02308776043355465, 0.022820638492703438, 0.18259893357753754, 0.3133871257305145, 0.08183155953884125, 0.35655686259269714, 0.17295894026756287, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.057175230234861374, 0.2799927890300751, 0.10977934300899506, 0.4680712819099426, 0.08838099986314774, 0.05264464393258095, 0.21108192205429077, 0.08241217583417892, 0.0764400064945221, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.17679302394390106, 0.30970489978790283, 0.042192552238702774, 0.2463400512933731, 0.032756272703409195, 0.05394153669476509, 0.02321716584265232, 0.30038926005363464, 0.023974716663360596, 0.0257905051112175, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.1864403486251831, 0.03811780363321304, 0.18074536323547363, 0.08396673202514648, 0.026499373838305473, 0.05736878141760826, 0.274480402469635, 0.10284627228975296, 0.15606749057769775, 0.017497936263680458, 0.09719526022672653, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.1767420768737793, 0.017465414479374886, 0.034512054175138474, 0.0999627411365509, 0.011741198599338531, 0.022724410519003868, 0.04408577084541321, 0.03894393891096115, 0.018038587644696236, 0.058924250304698944, 0.2522818148136139, 0.12782295048236847, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.042104240506887436, 0.022070694714784622, 0.04743226245045662, 0.13338083028793335, 0.020831480622291565, 0.031267598271369934, 0.024703562259674072, 0.041907425969839096, 0.006121364887803793, 0.02875565178692341, 0.13002096116542816, 0.36194902658462524, 0.021867850795388222, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.12623563408851624, 0.6370776891708374, 0.07802888005971909, 0.06076015904545784, 0.015353387221693993, 0.0031011439859867096, 0.031844403594732285, 0.5665289163589478, 0.013176449574530125, 0.025442441925406456, 0.05083877220749855, 0.08586791157722473, 0.03281332179903984, 0.0019294946687296033, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.010483458638191223, 0.10243765264749527, 0.013204336166381836, 0.1070198118686676, 0.001742976950481534, 0.0011925535509362817, 0.03764529153704643, 0.023008054122328758, 0.09038762003183365, 0.1208486333489418, 0.06097627431154251, 0.11476689577102661, 0.17706690728664398, 0.4447736442089081, 0.005561552010476589, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.03962688520550728, 0.412600040435791, 0.1027907133102417, 0.011060677468776703, 0.04006139934062958, 0.005457504652440548, 0.17391063272953033, 0.009697728790342808, 0.08243320137262344, 0.1504840850830078, 0.029468167573213577, 0.29366523027420044, 0.04788699373602867, 0.17640100419521332, 0.04229334741830826, 0.3300667107105255, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.20544184744358063, 0.06503231078386307, 0.21778742969036102, 0.04011436551809311, 0.2470238208770752, 0.03102266602218151, 0.027881061658263206, 0.06887322664260864, 0.023802783340215683, 0.2166331559419632, 0.06618232280015945, 0.058350641280412674, 0.04297764599323273, 0.06574989855289459, 0.02652076631784439, 0.08339553326368332, 0.09817715734243393, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.09466058760881424, 0.0047309016808867455, 0.1481417566537857, 0.06127317249774933, 0.015202163718640804, 0.011932089924812317, 0.31230586767196655, 0.04852164536714554, 0.039501819759607315, 0.001117925625294447, 0.06312739849090576, 0.023924386128783226, 0.02860989049077034, 0.007241260260343552, 0.11453913897275925, 0.012237192131578922, 0.2803768217563629, 0.0480632521212101, NaN, NaN, NaN, NaN, NaN, NaN, NaN], [0.02001449465751648, 0.0017837424529716372, 0.005722085013985634, 0.04321253299713135, 0.00430489843711257, 0.009005578234791756, 0.010736249387264252, 0.0058517144061625, 0.003792154835537076, 0.008828205987811089, 0.0838593989610672, 0.029530486091971397, 0.015579215250909328, 0.010320665314793587, 0.016853220760822296, 0.017335176467895508, 0.12552303075790405, 0.42354699969291687, 0.08326870948076248, NaN, NaN, NaN, NaN, NaN, NaN], [0.001771818962879479, 0.000807587115559727, 0.0031146325636655092, 0.023062998428940773, 0.0018312688916921616, 0.007724495604634285, 0.002569216303527355, 0.003803644794970751, 0.00041838324978016317, 0.001987496856600046, 0.012477965094149113, 0.04809670150279999, 0.0016458284808322787, 0.00020838514319621027, 0.005814890842884779, 0.018183711916208267, 0.30546146631240845, 0.4703490138053894, 0.15369661152362823, 0.012250960804522038, NaN, NaN, NaN, NaN, NaN], [0.02520398050546646, 0.2818087637424469, 0.007948609068989754, 0.07590723037719727, 0.01867567002773285, 0.006826441269367933, 0.011762343347072601, 0.5987983345985413, 0.0045673479326069355, 0.01173742488026619, 0.03130093589425087, 0.03894692659378052, 0.016236862167716026, 0.0014989122282713652, 0.0009245824767276645, 0.025562506169080734, 0.5276230573654175, 0.32699310779571533, 0.1864093542098999, 0.0933799296617508, 0.0060149896889925, NaN, NaN, NaN, NaN], [0.0011320068733766675, 0.011502433568239212, 0.0017513524508103728, 0.020418671891093254, 0.0003008104977197945, 0.00031320590642280877, 0.0053228470496833324, 0.0022876623552292585, 0.011736828833818436, 0.017109515145421028, 0.010937619023025036, 0.015238909050822258, 0.025703608989715576, 0.10705357789993286, 0.0009204442030750215, 0.02667400799691677, 0.16934601962566376, 0.08647502958774567, 0.028284918516874313, 0.06841914355754852, 0.39870724081993103, 0.0010592876933515072, NaN, NaN, NaN], [0.02631283551454544, 0.29101136326789856, 0.042160265147686005, 0.009721376933157444, 0.02933679334819317, 0.014515053480863571, 0.18161341547966003, 0.016545770689845085, 0.03647695854306221, 0.0840071588754654, 0.02240183763206005, 0.1055113896727562, 0.037331126630306244, 0.17535105347633362, 0.010923052206635475, 0.2594170868396759, 0.5064816474914551, 0.06657205522060394, 0.130835622549057, 0.0483754500746727, 0.2870587110519409, 0.010685333050787449, 0.21122200787067413, NaN, NaN], [0.21289733052253723, 0.10400458425283432, 0.2843308448791504, 0.11722961068153381, 0.31265783309936523, 0.07705509662628174, 0.050357937812805176, 0.1631784737110138, 0.04547655209898949, 0.37539371848106384, 0.07925810664892197, 0.07719646394252777, 0.043498191982507706, 0.04735783487558365, 0.022911155596375465, 0.20965908467769623, 0.2452480047941208, 0.05793433263897896, 0.07357832789421082, 0.03363368287682533, 0.041085004806518555, 0.014093895442783833, 0.05045074224472046, 0.0570731945335865, NaN], [0.02115148864686489, 0.018139760941267014, 0.03536282852292061, 0.06259438395500183, 0.00901759136468172, 0.014575985260307789, 0.12521256506443024, 0.12870429456233978, 0.09162478893995285, 0.06363746523857117, 0.1348179280757904, 0.07700010389089584, 0.05158444121479988, 0.01101324986666441, 0.03299920633435249, 0.163722425699234, 0.13794326782226562, 0.18303781747817993, 0.117555633187294, 0.08103907853364944, 0.012191864661872387, 0.032527241855859756, 0.16104964911937714, 0.12187117338180542, 0.22321484982967377]]]], \"bot_text\": [\"Das_\", \"Tier\", \"_\", \"\\u00fcber\", \"quer\", \"te_\", \"die_\", \"Stra\\u00dfe_\", \"nicht_\", \", _\", \"weil_\", \"es_\", \"zu_\", \"m\\u00fc\", \"de_\", \"war_\", \", _\", \"weil_\", \"es_\", \"zu_\", \"m\\u00fc\", \"de_\", \"war_\", \"._\"]}, \"inp_inp\": {\"top_text\": [\"The_\", \"animal_\", \"didn_\", \"'_\", \"t_\", \"cross_\", \"the_\", \"street_\", \"because_\", \"it_\", \"was_\", \"too_\", \"tire\", \"d_\"], \"att\": [[[[0.04540494084358215, 0.009098929353058338, 0.06841860711574554, 0.050027038902044296, 0.1867244392633438, 0.20893266797065735, 0.15536439418792725, 0.2501838803291321, 0.03253718465566635, 0.045193806290626526, 0.01405471283942461, 0.15126678347587585, 0.5554144382476807, 0.07120772451162338, 0.21479088068008423], [0.010880604386329651, 0.008569094352424145, 0.3644530475139618, 0.032524824142456055, 0.15862980484962463, 0.2895345985889435, 0.007411073427647352, 0.03074379824101925, 0.23678991198539734, 0.04092710092663765, 0.21633881330490112, 0.10217994451522827, 0.5741018652915955, 0.08794906735420227, 0.15811748802661896], [0.1548197716474533, 0.04407857358455658, 0.04267416149377823, 0.14390510320663452, 0.39150071144104004, 0.10470721870660782, 0.21010224521160126, 0.37398451566696167, 0.24677534401416779, 0.3071460425853729, 0.12511251866817474, 0.37053829431533813, 0.34731435775756836, 0.21468856930732727, 0.22426171600818634], [0.01666487753391266, 0.070415198802948, 0.13558338582515717, 0.030082950368523598, 0.17114414274692535, 0.20995233952999115, 0.018852930516004562, 0.2688913345336914, 0.024380644783377647, 0.01614876091480255, 0.058318838477134705, 0.003357462352141738, 0.22233186662197113, 0.08606056123971939, 0.08522026240825653], [0.26702794432640076, 0.10013092309236526, 0.15535299479961395, 0.01822819747030735, 0.19259323179721832, 0.1620739996433258, 0.06925511360168457, 0.14121465384960175, 0.30160874128341675, 0.138941690325737, 0.14571446180343628, 0.1845642775297165, 0.3172887861728668, 0.1378965824842453, 0.15321676433086395], [0.05774107202887535, 0.08979255706071854, 0.15777261555194855, 0.0986839085817337, 0.04042482376098633, 0.02364284358918667, 0.006265458185225725, 0.20312650501728058, 0.04589210823178291, 0.2705432176589966, 0.29482388496398926, 0.25277185440063477, 0.21941334009170532, 0.09023746848106384, 0.12374064326286316], [0.10808208584785461, 0.08377770334482193, 0.3031982481479645, 0.08575166761875153, 0.1659224033355713, 0.02410510927438736, 0.024052061140537262, 0.06346622854471207, 0.012278172187507153, 0.033475130796432495, 0.02865537814795971, 0.2309909611940384, 0.5272806286811829, 0.058207638561725616, 0.12589795887470245], [0.2848440408706665, 0.04557379335165024, 0.07043055444955826, 0.13887976109981537, 0.25104182958602905, 0.08729252219200134, 0.03900376707315445, 0.06159999966621399, 0.07028467953205109, 0.1360185593366623, 0.12163159996271133, 0.4339398145675659, 0.18035274744033813, 0.13636742532253265, 0.35040098428726196], [0.03364454582333565, 0.06385143101215363, 0.4650610089302063, 0.13847006857395172, 0.12132523953914642, 0.23606915771961212, 0.02828356996178627, 0.17786316573619843, 0.0068073878064751625, 0.0032905752304941416, 0.04716186597943306, 0.060036350041627884, 0.5867005586624146, 0.23594366014003754, 0.05739189311861992], [0.04961356148123741, 0.4571499228477478, 0.32633671164512634, 0.044803813099861145, 0.12193554639816284, 0.15620054304599762, 0.031114954501390457, 0.37925899028778076, 0.023853085935115814, 0.007363635115325451, 0.0625552162528038, 0.04359081760048866, 0.12771400809288025, 0.10945692658424377, 0.03218715265393257], [0.054336514323949814, 0.12682472169399261, 0.28572455048561096, 0.7098703384399414, 0.04356186464428902, 0.036012813448905945, 0.12616953253746033, 0.12438997626304626, 0.06097114831209183, 0.011340769939124584, 0.00453603221103549, 0.02511424943804741, 0.15918391942977905, 0.004009802360087633, 0.1337292641401291], [0.029656492173671722, 0.11861541867256165, 0.25968441367149353, 0.6952800154685974, 0.06073199212551117, 0.3734285235404968, 0.030824951827526093, 0.09641394764184952, 0.0529148206114769, 0.01715172454714775, 0.01323915645480156, 0.055627286434173584, 0.11593649536371231, 0.04441850632429123, 0.04630020260810852], [0.10554661601781845, 0.6362442970275879, 0.6959939002990723, 0.018170323222875595, 0.40134888887405396, 0.15823723375797272, 0.1629355400800705, 0.11358990520238876, 0.24731940031051636, 0.23558683693408966, 0.07505767047405243, 0.03725680336356163, 0.014009351842105389, 0.03713200241327286, 0.09585387259721756], [0.4055319130420685, 0.2534714341163635, 0.44874629378318787, 0.14194901287555695, 0.3008168041706085, 0.20029903948307037, 0.07248799502849579, 0.26174047589302063, 0.1826024055480957, 0.0982341319322586, 0.09884719550609589, 0.22728654742240906, 0.04277953878045082, 0.06280668079853058, 0.09454112499952316], [0.025013893842697144, 0.013348683714866638, 0.22353146970272064, 0.0037027201615273952, 0.14888618886470795, 0.22346094250679016, 0.021921563893556595, 0.6342950463294983, 0.03356323391199112, 0.06236502528190613, 0.03522828221321106, 0.17797930538654327, 0.04731723666191101, 0.06786928325891495, 0.042550042271614075]], [[0.1577349603176117, 0.09554319828748703, 0.02016325853765011, 0.08440300822257996, 0.33925309777259827, 0.35353752970695496, 0.49755600094795227, 0.2782062292098999, 0.2544572949409485, 0.6230229735374451, 0.04059281200170517, 0.12019311636686325, 0.2659685015678406, 0.3508304953575134, 0.10784413665533066], [0.053030457347631454, 0.00926118716597557, 0.08361255377531052, 0.1587543487548828, 0.42493122816085815, 0.0713140144944191, 0.05032603442668915, 0.790120005607605, 0.4618776738643646, 0.3647898733615875, 0.20375682413578033, 0.2847990393638611, 0.20242592692375183, 0.33538198471069336, 0.174686461687088], [0.08703262358903885, 0.32554149627685547, 0.013934381306171417, 0.05831753462553024, 0.13550086319446564, 0.24707834422588348, 0.10738440603017807, 0.2015978991985321, 0.20393061637878418, 0.3176687955856323, 0.11071985214948654, 0.18533341586589813, 0.23293758928775787, 0.34885379672050476, 0.5850104689598083], [0.10977373272180557, 0.1966770738363266, 0.08552326261997223, 0.3559982180595398, 0.025181425735354424, 0.05637436732649803, 0.04466243088245392, 0.30799123644828796, 0.24855823814868927, 0.13041310012340546, 0.16531962156295776, 0.11238406598567963, 0.33737656474113464, 0.08863592892885208, 0.043888676911592484], [0.5166918635368347, 0.35558366775512695, 0.01755080744624138, 0.011931763030588627, 0.556053638458252, 0.21828243136405945, 0.17387567460536957, 0.11686032265424728, 0.22141756117343903, 0.6036979556083679, 0.3235246241092682, 0.21816273033618927, 0.20258961617946625, 0.7225815653800964, 0.3817636966705322], [0.34899845719337463, 0.35567307472229004, 0.2643766403198242, 0.12664493918418884, 0.18397535383701324, 0.012551958672702312, 0.056629326194524765, 0.06369142234325409, 0.252005010843277, 0.3601645529270172, 0.3771168887615204, 0.4479873776435852, 0.13717319071292877, 0.6667386293411255, 0.1451762467622757], [0.5782451629638672, 0.6189379096031189, 0.11758852005004883, 0.3125992715358734, 0.3504111170768738, 0.10631152987480164, 0.16217094659805298, 0.04177623987197876, 0.10916820168495178, 0.3274877965450287, 0.10721725970506668, 0.11595069617033005, 0.11270644515752792, 0.32787472009658813, 0.13412055373191833], [0.2553749084472656, 0.5479037165641785, 0.3395489752292633, 0.13140854239463806, 0.07771788537502289, 0.06743729114532471, 0.04718935862183571, 0.022107038646936417, 0.2706955075263977, 0.06462319940328598, 0.20574931800365448, 0.08401398360729218, 0.11249610781669617, 0.20925462245941162, 0.07354141771793365], [0.15992610156536102, 0.4297313988208771, 0.11996463686227798, 0.29957810044288635, 0.19940054416656494, 0.6192947030067444, 0.07005859166383743, 0.4058174192905426, 0.0451255701482296, 0.02480492927134037, 0.052432600408792496, 0.13078351318836212, 0.14195236563682556, 0.12686756253242493, 0.10959619283676147], [0.13202522695064545, 0.3311104476451874, 0.12707853317260742, 0.06901858001947403, 0.13186469674110413, 0.37057942152023315, 0.1482420712709427, 0.21941475570201874, 0.1949346363544464, 0.11534072458744049, 0.011536079458892345, 0.018882060423493385, 0.16279305517673492, 0.07962523400783539, 0.11737312376499176], [0.0604790523648262, 0.5140921473503113, 0.37517040967941284, 0.060462601482868195, 0.14644990861415863, 0.49839717149734497, 0.08009912073612213, 0.3367377519607544, 0.0785842090845108, 0.043956201523542404, 0.0826396569609642, 0.015624956227838993, 0.10417986661195755, 0.07971351593732834, 0.018050679937005043], [0.10509271919727325, 0.5468136072158813, 0.2136838436126709, 0.13898353278636932, 0.11654751002788544, 0.1982421725988388, 0.03731672093272209, 0.5618436336517334, 0.37511539459228516, 0.015668287873268127, 0.07859797775745392, 0.026544239372015, 0.11879771202802658, 0.051024846732616425, 0.03191406652331352], [0.2583395540714264, 0.306291788816452, 0.15283380448818207, 0.48663485050201416, 0.24239543080329895, 0.6472541093826294, 0.11895711719989777, 0.7050262093544006, 0.43789902329444885, 0.07257331907749176, 0.1529301553964615, 0.07237879186868668, 0.029207568615674973, 0.031136667355895042, 0.04320577159523964], [0.37997886538505554, 0.3090342879295349, 0.09529577195644379, 0.06091787666082382, 0.5611693859100342, 0.5351426005363464, 0.5250707268714905, 0.4058402180671692, 0.08284364640712738, 0.7192233204841614, 0.12988585233688354, 0.24924960732460022, 0.016598563641309738, 0.6531801819801331, 0.22117754817008972], [0.31734058260917664, 0.02799793891608715, 0.08435621112585068, 0.4273812472820282, 0.37900310754776, 0.1551857888698578, 0.12445898354053497, 0.02975497953593731, 0.13922178745269775, 0.25836795568466187, 0.3142063617706299, 0.5329877138137817, 0.020000692456960678, 0.19246473908424377, 0.34441179037094116]], [[0.022252710536122322, 0.017558962106704712, 0.12289869785308838, 0.01514213066548109, 0.04983796179294586, 0.160098597407341, 0.09159664064645767, 0.03634485974907875, 0.27353572845458984, 0.14908282458782196, 0.8423851132392883, 0.33708906173706055, 0.03012021631002426, 0.05972116440534592, 0.2686574459075928], [0.13637107610702515, 0.02899317629635334, 0.09026061743497849, 0.22582301497459412, 0.09117049723863602, 0.19661013782024384, 0.30083417892456055, 0.13528303802013397, 0.1352328211069107, 0.18504901230335236, 0.3621358573436737, 0.504258930683136, 0.10044156759977341, 0.37106865644454956, 0.36433035135269165], [0.10935092717409134, 0.06271693855524063, 0.044740546494722366, 0.1709805577993393, 0.22382155060768127, 0.2615796625614166, 0.3429900109767914, 0.02677186205983162, 0.39723172783851624, 0.1559167355298996, 0.6381150484085083, 0.34350308775901794, 0.14388519525527954, 0.322640985250473, 0.07209958881139755], [0.11123806983232498, 0.14550834894180298, 0.12841136753559113, 0.013620064593851566, 0.006130752619355917, 0.025231752544641495, 0.11538708955049515, 0.09429272264242172, 0.3855685293674469, 0.016912028193473816, 0.3869503438472748, 0.1961694061756134, 0.15352581441402435, 0.019190048798918724, 0.4291467070579529], [0.1283823847770691, 0.33987957239151, 0.06837885081768036, 0.03946131095290184, 0.03139644116163254, 0.11983324587345123, 0.12062173336744308, 0.46404916048049927, 0.24212448298931122, 0.1594262570142746, 0.4298713207244873, 0.5236353278160095, 0.2188095897436142, 0.049411591142416, 0.10146455466747284], [0.010564678348600864, 0.32722386717796326, 0.19864077866077423, 0.015389330685138702, 0.0028029000386595726, 0.007416849955916405, 0.003262599464505911, 0.23795713484287262, 0.05000551417469978, 0.075996033847332, 0.049679387360811234, 0.21265098452568054, 0.2097157984972, 0.01007634773850441, 0.03895873948931694], [0.10390599817037582, 0.04329453781247139, 0.42168325185775757, 0.06385642290115356, 0.04340887442231178, 0.029213739559054375, 0.036663200706243515, 0.0028809772338718176, 0.19718152284622192, 0.16335125267505646, 0.6605148315429688, 0.17834524810314178, 0.08135847747325897, 0.05741032958030701, 0.24636343121528625], [0.010566278360784054, 0.32608217000961304, 0.34194469451904297, 0.08201102167367935, 0.036688148975372314, 0.12155891954898834, 0.015490439720451832, 0.05858473479747772, 0.1731383204460144, 0.12207219004631042, 0.0636284351348877, 0.2239474654197693, 0.2988812327384949, 0.033257871866226196, 0.04593053460121155], [0.26241976022720337, 0.0378817655146122, 0.10770448297262192, 0.11944369971752167, 0.367754727602005, 0.041288651525974274, 0.25914207100868225, 0.061461515724658966, 0.061867646872997284, 0.08977923542261124, 0.03797370195388794, 0.2101898193359375, 0.035329420119524, 0.38835543394088745, 0.3324989080429077], [0.3753410875797272, 0.031615160405635834, 0.1074504628777504, 0.07966858148574829, 0.16393397748470306, 0.01204571221023798, 0.36072632670402527, 0.026240641251206398, 0.09493876993656158, 0.12203314155340195, 0.0640302300453186, 0.13458214700222015, 0.19451306760311127, 0.3176366686820984, 0.19878560304641724], [0.19523903727531433, 0.1090913861989975, 0.11059779673814774, 0.03402426466345787, 0.4491459131240845, 0.1729225516319275, 0.3482173979282379, 0.01764478161931038, 0.14307594299316406, 0.22771455347537994, 0.04787566140294075, 0.14714154601097107, 0.028272001072764397, 0.23823784291744232, 0.19700175523757935], [0.1428564339876175, 0.03585843741893768, 0.023294193670153618, 0.1143055409193039, 0.07461919635534286, 0.13578416407108307, 0.4153969883918762, 0.03374828025698662, 0.10746961832046509, 0.17216910421848297, 0.02314077876508236, 0.02450137585401535, 0.06497504562139511, 0.381274551153183, 0.14229674637317657], [0.5444629788398743, 0.049506742507219315, 0.09827632457017899, 0.29229700565338135, 0.06650383025407791, 0.11397240310907364, 0.597455620765686, 0.1362738311290741, 0.15222173929214478, 0.2562837302684784, 0.13646292686462402, 0.38294121623039246, 0.030382927507162094, 0.038297515362501144, 0.465526819229126], [0.12950241565704346, 0.2834409177303314, 0.40745216608047485, 0.040315985679626465, 0.09126543253660202, 0.16738829016685486, 0.24838824570178986, 0.2707839906215668, 0.5177856087684631, 0.1416875720024109, 0.6573355793952942, 0.4225574731826782, 0.02239617332816124, 0.07502269744873047, 0.07588320225477219], [0.00751910824328661, 0.5024122595787048, 0.38239815831184387, 0.016937274485826492, 0.039716992527246475, 0.11479316651821136, 0.004478333052247763, 0.02017248421907425, 0.011771232821047306, 0.0035600941628217697, 0.03807784244418144, 0.07125832885503769, 0.1964063048362732, 0.0026467873249202967, 0.00302477041259408]], [[0.06952784210443497, 0.0770183801651001, 0.23747292160987854, 0.022874178364872932, 0.14143598079681396, 0.08435114473104477, 0.0795491486787796, 0.054600730538368225, 0.015159118920564651, 0.06120437756180763, 0.02771361917257309, 0.06765643507242203, 0.013518131338059902, 0.15485556423664093, 0.21279898285865784], [0.2531612813472748, 0.03241151198744774, 0.04793045297265053, 0.13835468888282776, 0.05921119078993797, 0.20751594007015228, 0.5453532934188843, 0.021712571382522583, 0.07093679159879684, 0.2689567506313324, 0.13515745103359222, 0.05570060759782791, 0.04099860414862633, 0.03517309948801994, 0.11268090456724167], [0.35043928027153015, 0.18572849035263062, 0.0481790192425251, 0.19426384568214417, 0.018465382978320122, 0.2676069438457489, 0.3000488579273224, 0.2726097106933594, 0.08134563267230988, 0.10164237022399902, 0.05787196010351181, 0.03694695979356766, 0.21335498988628387, 0.0815601795911789, 0.051584985107183456], [0.10967924445867538, 0.047143928706645966, 0.06498727947473526, 0.0161599051207304, 0.08311080187559128, 0.25361040234565735, 0.2589581310749054, 0.0646943673491478, 0.11701063811779022, 0.7398742437362671, 0.11236728727817535, 0.4240334630012512, 0.09019055217504501, 0.1980810910463333, 0.08526580780744553], [0.0050394656136631966, 0.005000656470656395, 0.01952306181192398, 0.4184519350528717, 0.012662295252084732, 0.015614073723554611, 0.006089636590331793, 0.027387546375393867, 0.007885311730206013, 0.009227052330970764, 0.015002718195319176, 0.002679894445464015, 0.040426015853881836, 0.023895790800452232, 0.031263262033462524], [0.1104135811328888, 0.16341662406921387, 0.10040471702814102, 0.15014782547950745, 0.22085179388523102, 0.07417210936546326, 0.08140900731086731, 0.21936744451522827, 0.12380684167146683, 0.030364450067281723, 0.008148477412760258, 0.040405042469501495, 0.016740301623940468, 0.05651557818055153, 0.03777482733130455], [0.021739037707448006, 0.025255737826228142, 0.041796568781137466, 0.028582973405718803, 0.06361079961061478, 0.10603900998830795, 0.04079660773277283, 0.23573672771453857, 0.031395647674798965, 0.17699679732322693, 0.11518478393554688, 0.12758946418762207, 0.029195530340075493, 0.19761133193969727, 0.24158287048339844], [0.1121676117181778, 0.056780170649290085, 0.05766424164175987, 0.4753672778606415, 0.17093990743160248, 0.055545274168252945, 0.23774300515651703, 0.047642335295677185, 0.2396271675825119, 0.07084424793720245, 0.05071293190121651, 0.15200014412403107, 0.17973174154758453, 0.16349640488624573, 0.16329222917556763], [0.08155515789985657, 0.04415197670459747, 0.09395420551300049, 0.06736686080694199, 0.009449290111660957, 0.007789341267198324, 0.08313233405351639, 0.018231436610221863, 0.2736586928367615, 0.12516330182552338, 0.14283257722854614, 0.03993181511759758, 0.11735112965106964, 0.037545330822467804, 0.095799021422863], [0.07989984005689621, 0.019307896494865417, 0.05061032995581627, 0.29983657598495483, 0.009587445296347141, 0.23453857004642487, 0.06259765475988388, 0.014452173374593258, 0.026213111355900764, 0.03952796012163162, 0.12968890368938446, 0.019515926018357277, 0.23016268014907837, 0.18980233371257782, 0.14884653687477112], [0.042069002985954285, 0.007410319056361914, 0.027750220149755478, 0.14348776638507843, 0.190275177359581, 0.0696464255452156, 0.09576459228992462, 0.08924749493598938, 0.16830699145793915, 0.14098002016544342, 0.2945949137210846, 0.08460760116577148, 0.11812892556190491, 0.2108343094587326, 0.28860458731651306], [0.509858250617981, 0.07021021842956543, 0.044154465198516846, 0.005825423635542393, 0.5241404175758362, 0.030089300125837326, 0.19222509860992432, 0.02549084462225437, 0.1939508020877838, 0.09437919408082962, 0.10883274674415588, 0.13631868362426758, 0.08004569262266159, 0.04784407094120979, 0.14005501568317413], [0.029798628762364388, 0.0011461747344583273, 0.00650657806545496, 0.02902117185294628, 0.007348767947405577, 0.012432223185896873, 0.018553903326392174, 0.006125486921519041, 0.008405826054513454, 0.057926055043935776, 0.04542696848511696, 0.21123111248016357, 0.05352021008729935, 0.2931033968925476, 0.1833699345588684], [0.01627730205655098, 0.0057758791372179985, 0.013731835409998894, 0.6289489269256592, 0.011782719753682613, 0.006108477246016264, 0.005309773609042168, 0.023312430828809738, 0.012817217037081718, 0.00939176045358181, 0.04320970177650452, 0.012798959389328957, 0.1585281491279602, 0.11795029044151306, 0.13285225629806519], [0.39748579263687134, 0.10528232902288437, 0.006042438093572855, 0.07306646555662155, 0.020484283566474915, 0.09288878738880157, 0.6331413388252258, 0.03478514030575752, 0.016230005770921707, 0.039869412779808044, 0.10224607586860657, 0.005181388463824987, 0.007975003682076931, 0.01008305512368679, 0.026732152327895164]], [[0.2484879046678543, 0.12593188881874084, 0.11472177505493164, 0.6318025588989258, 0.009745504707098007, 0.030495919287204742, 0.054615989327430725, 0.004801109898835421, 0.23875823616981506, 0.011562658473849297, 0.02087206020951271, 0.059635717421770096, 0.011483770795166492, 0.07716090232133865, 0.041850361973047256], [0.3294946551322937, 0.17723912000656128, 0.041080135852098465, 0.30134642124176025, 0.0073102316819131374, 0.049291279166936874, 0.0495959147810936, 0.0037847748026251793, 0.014987694099545479, 0.07676513493061066, 0.039059415459632874, 0.006041571032255888, 0.011380840092897415, 0.011979957111179829, 0.02782473713159561], [0.008675806224346161, 0.016726570203900337, 0.19906938076019287, 0.3167073726654053, 0.022006884217262268, 0.014510865323245525, 0.00237266905605793, 0.00938868336379528, 0.004848333541303873, 0.00305117666721344, 0.042285457253456116, 0.0026737553998827934, 0.017337674275040627, 0.0016427191440016031, 0.0027906473260372877], [0.06292864680290222, 0.010060630738735199, 0.07846219092607498, 0.3009726405143738, 0.09911586344242096, 0.3769649565219879, 0.290684312582016, 0.048859626054763794, 0.015964722260832787, 0.02972962148487568, 0.25837212800979614, 0.050403933972120285, 0.052831199020147324, 0.44793814420700073, 0.12096201628446579], [0.0647541731595993, 0.06744952499866486, 0.010754776187241077, 0.15598785877227783, 0.08916914463043213, 0.4045051634311676, 0.5958212018013, 0.10594789683818817, 0.12025819718837738, 0.04822946712374687, 0.02913811057806015, 0.014846491627395153, 0.17111137509346008, 0.049513354897499084, 0.14188753068447113], [0.07069405168294907, 0.0006015333347022533, 0.0017680496675893664, 0.0010985832195729017, 0.0012869784841313958, 0.22278346121311188, 0.4465882480144501, 0.06128238886594772, 0.02642727456986904, 0.03756114840507507, 0.002607540925964713, 0.0018699204083532095, 0.0059012919664382935, 0.020283877849578857, 0.03355809301137924], [0.0861939862370491, 0.03346291184425354, 0.009915103204548359, 0.35010838508605957, 0.03437130153179169, 0.18394741415977478, 0.5006390810012817, 0.0633198693394661, 0.36160194873809814, 0.07578127831220627, 0.038500167429447174, 0.08213403075933456, 0.026455186307430267, 0.12013117223978043, 0.1146865040063858], [0.2484544962644577, 0.00790119543671608, 0.004407763481140137, 0.02700735628604889, 0.015422074124217033, 0.015295883640646935, 0.40846768021583557, 0.10706920176744461, 0.06367217004299164, 0.22094424068927765, 0.21221157908439636, 0.006999517325311899, 0.054566796869039536, 0.124799944460392, 0.09114839136600494], [0.1237153485417366, 0.029043834656476974, 0.07521974295377731, 0.04068650305271149, 0.002623512176796794, 0.008706655353307724, 0.03832445293664932, 0.14616532623767853, 0.1701044738292694, 0.20599642395973206, 0.11677426844835281, 0.2341107875108719, 0.06235762685537338, 0.003964806441217661, 0.15731573104858398], [0.034962959587574005, 0.023077068850398064, 0.034600574523210526, 0.14041800796985626, 0.0021679585333913565, 0.009290770627558231, 0.07274696230888367, 0.014187950640916824, 0.1371506154537201, 0.39440277218818665, 0.2198760211467743, 0.19940708577632904, 0.11203428357839584, 0.08552268147468567, 0.11737436801195145], [0.015330069698393345, 0.007386082783341408, 0.017500948160886765, 0.01906486414372921, 0.010120063088834286, 0.05364372953772545, 0.043298348784446716, 0.12658876180648804, 0.06039673835039139, 0.02238147333264351, 0.16429400444030762, 0.06984445452690125, 0.3043651580810547, 0.055543575435876846, 0.11423089355230331], [0.09644094854593277, 0.0058854687958955765, 0.03721459209918976, 0.0025620406959205866, 0.062300242483615875, 0.003563062520697713, 0.07219880819320679, 0.03924282267689705, 0.025451356545090675, 0.06598387658596039, 0.026776403188705444, 0.07250863313674927, 0.45021528005599976, 0.08199745416641235, 0.4220075309276581], [0.01460834126919508, 0.0005662022740580142, 0.0013911814894527197, 0.05315173417329788, 0.008028149604797363, 0.016604119911789894, 0.011740745045244694, 0.008678588084876537, 0.0025609249714761972, 0.01638207584619522, 0.018210044130682945, 0.014119945466518402, 0.06550943106412888, 0.34254926443099976, 0.04794229939579964], [0.05372002348303795, 0.14061135053634644, 0.018787089735269547, 0.0958278551697731, 0.0019092779839411378, 0.03348369151353836, 0.13957257568836212, 0.031220966950058937, 0.19735871255397797, 0.017847368493676186, 0.0589337982237339, 0.01900595612823963, 0.1276925951242447, 0.04769464209675789, 0.4384888708591461], [0.08416850119829178, 0.1088641807436943, 0.0573052242398262, 0.27551695704460144, 0.030813831835985184, 0.18022866547107697, 0.10468263924121857, 0.09972096234560013, 0.31189021468162537, 0.3315774202346802, 0.2321816384792328, 0.034622836858034134, 0.14143656194210052, 0.04640315845608711, 0.09621720016002655]], [[0.130781888961792, 0.31469303369522095, 0.10550640523433685, 0.05234318599104881, 0.073336161673069, 0.022349786013364792, 0.04807984083890915, 0.1931842416524887, 0.06399697810411453, 0.042083337903022766, 0.026750531047582626, 0.11997608095407486, 0.008983415551483631, 0.03431839123368263, 0.019280044361948967], [0.1582711637020111, 0.14862558245658875, 0.20016248524188995, 0.08876624703407288, 0.11006557196378708, 0.14632253348827362, 0.04025046527385712, 0.010204354301095009, 0.017868297174572945, 0.059372395277023315, 0.02111685276031494, 0.04181571304798126, 0.025184988975524902, 0.09681157767772675, 0.11611668020486832], [0.23875439167022705, 0.3084685802459717, 0.14188633859157562, 0.026331612840294838, 0.0149313323199749, 0.09176106750965118, 0.03131069242954254, 0.10051372647285461, 0.03149634972214699, 0.11085867136716843, 0.014410188421607018, 0.02796255424618721, 0.034816499799489975, 0.025807565078139305, 0.01846306212246418], [0.3404518961906433, 0.24260303378105164, 0.15383434295654297, 0.17020593583583832, 0.011800014413893223, 0.014385397545993328, 0.09441643208265305, 0.12204645574092865, 0.13843503594398499, 0.045293405652046204, 0.010667533613741398, 0.19693949818611145, 0.10281307995319366, 0.01422606036067009, 0.06984427571296692], [0.002873742487281561, 0.008706165477633476, 0.35573768615722656, 0.0015586970839649439, 0.015496796928346157, 0.003392455168068409, 0.01149011217057705, 0.01891980692744255, 0.016394488513469696, 0.003960000351071358, 0.0035995631478726864, 0.008501716889441013, 0.018164046108722687, 0.004727588500827551, 0.013562880456447601], [0.044807154685258865, 0.02788197249174118, 0.03947468474507332, 0.1271299421787262, 0.17640650272369385, 0.25110092759132385, 0.08349309861660004, 0.02069718949496746, 0.45751577615737915, 0.039922621101140976, 0.1781769096851349, 0.002931024879217148, 0.16567888855934143, 0.1177627220749855, 0.5156693458557129], [0.005990047473460436, 0.04782475531101227, 0.01399919856339693, 0.010489771142601967, 0.06132129579782486, 0.030459748581051826, 0.010153756476938725, 0.3387801945209503, 0.06446883827447891, 0.007243711035698652, 0.00693717272952199, 0.020023254677653313, 0.007285784464329481, 0.009139767847955227, 0.0044054011814296246], [0.020405659452080727, 0.00729386368766427, 0.06661678105592728, 0.08295443654060364, 0.20373474061489105, 0.3448184132575989, 0.04295210912823677, 0.20947468280792236, 0.03081577830016613, 0.010805373080074787, 0.17521467804908752, 0.06567652523517609, 0.012400656938552856, 0.10652147233486176, 0.07385163754224777], [0.21573591232299805, 0.13175059854984283, 0.04085814207792282, 0.04119405150413513, 0.03551999852061272, 0.023009058088064194, 0.2751774191856384, 0.047030266374349594, 0.14272502064704895, 0.20153193175792694, 0.09575672447681427, 0.11327007412910461, 0.008532780222594738, 0.053245026618242264, 0.08952803909778595], [0.2778390347957611, 0.11423225700855255, 0.3034791946411133, 0.34643107652664185, 0.5395972728729248, 0.06785042583942413, 0.13029156625270844, 0.18737749755382538, 0.029348008334636688, 0.16667678952217102, 0.021040884777903557, 0.008728248998522758, 0.037633832544088364, 0.02033349499106407, 0.03947347402572632], [0.4898838996887207, 0.08082167059183121, 0.07362432777881622, 0.02171795442700386, 0.1333591789007187, 0.09000474214553833, 0.13501934707164764, 0.03979193791747093, 0.19113953411579132, 0.13522492349147797, 0.16557832062244415, 0.16255514323711395, 0.07687958329916, 0.15948235988616943, 0.09843874722719193], [0.045906297862529755, 0.18602333962917328, 0.4082620143890381, 0.010370302945375443, 0.04507172852754593, 0.19693265855312347, 0.04021843150258064, 0.027866821736097336, 0.1546991914510727, 0.33766424655914307, 0.09260500222444534, 0.05066358670592308, 0.05655887722969055, 0.13157807290554047, 0.06850539147853851], [0.020344020798802376, 0.0030158585868775845, 0.004445259924978018, 0.022628312930464745, 0.030150510370731354, 0.027700912207365036, 0.026311388239264488, 0.012862108647823334, 0.07009940594434738, 0.24656175076961517, 0.10596039146184921, 0.1143152266740799, 0.3679012656211853, 0.0068145813420414925, 0.04171491786837578], [0.004749340936541557, 0.00182742765173316, 0.0021293568424880505, 0.00394084258005023, 0.004750867374241352, 5.3125138947507367e-05, 0.0026011874433606863, 0.000718552153557539, 0.002356230979785323, 0.00125187449157238, 0.0021339249797165394, 0.00044074622564949095, 0.2141493707895279, 0.0029175111558288336, 0.00477015832439065], [0.12991508841514587, 0.06724811345338821, 0.06397818773984909, 0.15923364460468292, 0.2566852867603302, 0.07963784784078598, 0.09182894974946976, 0.040824584662914276, 0.21298912167549133, 0.2517295181751251, 0.2285410314798355, 0.11115844547748566, 0.1010512113571167, 0.3968040943145752, 0.1870165765285492]], [[0.06147387623786926, 0.0657946914434433, 0.22564710676670074, 0.1299343705177307, 0.021580645814538002, 0.08992400765419006, 0.025479430332779884, 0.04823821783065796, 0.05891237407922745, 0.016958819702267647, 0.0021926285699009895, 0.017513686791062355, 0.09859969466924667, 0.16368542611598969, 0.038398925215005875], [0.029852252453565598, 0.26626214385032654, 0.14803646504878998, 0.038784727454185486, 0.07803148031234741, 0.006210723891854286, 0.0026457132771611214, 0.006018034182488918, 0.05453306809067726, 0.002730109030380845, 0.015730326995253563, 0.0017557059181854129, 0.034912969917058945, 0.03208531066775322, 0.03983413055539131], [0.01053018867969513, 0.02744918502867222, 0.2530466914176941, 0.05846027657389641, 0.1744728684425354, 0.011957419104874134, 0.003304906887933612, 0.00205883732996881, 0.00874510407447815, 0.0014524421421810985, 0.0009729861048981547, 0.0026561047416180372, 0.0023208027705550194, 0.0038251704536378384, 0.005045189522206783], [0.016039762645959854, 0.05755838379263878, 0.10756286233663559, 0.03799062967300415, 0.5738711953163147, 0.061907339841127396, 0.128611221909523, 0.01847657933831215, 0.06501789391040802, 0.015564735978841782, 0.0016139671206474304, 0.014343881979584694, 0.020734043791890144, 0.14008449018001556, 0.13515408337116241], [0.005847899243235588, 0.11914067715406418, 0.01715121790766716, 0.3517457842826843, 0.0661543607711792, 0.07493122667074203, 0.012425812892615795, 0.11745280772447586, 0.08440648764371872, 0.020029406994581223, 0.05165768414735794, 0.04094480350613594, 0.024548601359128952, 0.005826729815453291, 0.13841456174850464], [0.015926362946629524, 0.007578620687127113, 0.1226087138056755, 0.030128292739391327, 0.03851892054080963, 0.3367418944835663, 0.01694057136774063, 0.09829536825418472, 0.0361555740237236, 0.10537439584732056, 0.007450005039572716, 0.029753634706139565, 0.22920416295528412, 0.01793695241212845, 0.05258304625749588], [0.01326388493180275, 0.05337870866060257, 0.047661036252975464, 0.08615607023239136, 0.12425915151834488, 0.4180251955986023, 0.04702466353774071, 0.0717325434088707, 0.05138256773352623, 0.06877672672271729, 0.0152205191552639, 0.0719875767827034, 0.1666427105665207, 0.13322126865386963, 0.053655143827199936], [0.026802292093634605, 0.003955241292715073, 0.0206829272210598, 0.02742936834692955, 0.06016179919242859, 0.15127348899841309, 0.06774158030748367, 0.2981398105621338, 0.05239749699831009, 0.09365928173065186, 0.035629644989967346, 0.020771589130163193, 0.13655303418636322, 0.012941722758114338, 0.05640798062086105], [0.06469012051820755, 0.1851334124803543, 0.08788572251796722, 0.19977343082427979, 0.00846380740404129, 0.03702360764145851, 0.0876760184764862, 0.046302031725645065, 0.11564433574676514, 0.05180440843105316, 0.49518024921417236, 0.1649368405342102, 0.030481798574328423, 0.10461966693401337, 0.07739346474409103], [0.020106524229049683, 0.01925482228398323, 0.006043681409209967, 0.01652396097779274, 0.001572003006003797, 0.005779887083917856, 0.015335858799517155, 0.03537710756063461, 0.009967570193111897, 0.09144406765699387, 0.43651703000068665, 0.2613205015659332, 0.0483890138566494, 0.06553913652896881, 0.055434126406908035], [0.07980967313051224, 0.14815203845500946, 0.09271827340126038, 0.004086778499186039, 0.010790406726300716, 0.0747552439570427, 0.10995902121067047, 0.04728228971362114, 0.1809520274400711, 0.025821411982178688, 0.06657237559556961, 0.1431768387556076, 0.19449584186077118, 0.20780201256275177, 0.10148976743221283], [0.05537823587656021, 0.008725662715733051, 0.0058344281278550625, 0.029011448845267296, 0.048424966633319855, 0.047911662608385086, 0.16901308298110962, 0.17019973695278168, 0.011648884043097496, 0.08953043073415756, 0.5360274910926819, 0.10330803692340851, 0.078437939286232, 0.12202966213226318, 0.11905822902917862], [0.01546903420239687, 0.0005347061669453979, 0.0015839362749829888, 0.053056132048368454, 0.23614321649074554, 0.013318118639290333, 0.051473915576934814, 0.011966699734330177, 0.007302975282073021, 0.09275621920824051, 0.06646261364221573, 0.010813506320118904, 0.13289499282836914, 0.22826357185840607, 0.04386172071099281], [0.009458722546696663, 0.0058342707343399525, 0.012789146974682808, 0.005895438138395548, 0.026010286062955856, 0.057482823729515076, 0.005663284566253424, 0.005727604031562805, 0.0033144087065011263, 0.011671853251755238, 0.00424896739423275, 0.056589994579553604, 0.20401620864868164, 0.03777612745761871, 0.03114682249724865], [0.0012354525970295072, 0.034024473279714584, 0.10020612925291061, 0.02267461270093918, 0.08676987141370773, 0.14216794073581696, 0.0033775768242776394, 0.07320579141378403, 0.07390473037958145, 0.0168889332562685, 0.00386308366432786, 0.02569040097296238, 0.24664165079593658, 0.2674221694469452, 0.014589445665478706]], [[0.2643359303474426, 0.2943609654903412, 0.10517127066850662, 0.013473477214574814, 0.17808614671230316, 0.05031028389930725, 0.0477585569024086, 0.13444076478481293, 0.0626431554555893, 0.05089121311903, 0.025438696146011353, 0.12666909396648407, 0.015911895781755447, 0.08822031319141388, 0.09637932479381561], [0.02893858775496483, 0.3286381959915161, 0.024464154615998268, 0.015645690262317657, 0.07065004110336304, 0.03320073336362839, 0.0035833900328725576, 0.002133443485945463, 0.0077736834064126015, 0.0014096481027081609, 0.006704544182866812, 0.0034484381321817636, 0.010553284548223019, 0.029550330713391304, 0.0064092278480529785], [0.0403970405459404, 0.029290249571204185, 0.2564694881439209, 0.03103366494178772, 0.01930038072168827, 0.0007984130643308163, 0.0024861868005245924, 0.013074777089059353, 0.025626862421631813, 0.0022637112997472286, 0.010511897504329681, 0.03038576804101467, 0.00803295336663723, 0.000980974524281919, 0.040744345635175705], [0.23322375118732452, 0.23003342747688293, 0.24563531577587128, 0.07496963441371918, 0.029645830392837524, 0.0015733843902125955, 0.048427432775497437, 0.07474764436483383, 0.005064227152615786, 0.006064139772206545, 0.00639030896127224, 0.0023683567997068167, 0.0201968252658844, 0.0057837339118123055, 0.030518243089318275], [0.009382463060319424, 0.004108777269721031, 0.355550616979599, 0.0026344929356127977, 0.036474164575338364, 0.0013674235669896007, 0.010420771315693855, 0.008167937397956848, 0.005904712714254856, 0.0164882093667984, 0.0014915319625288248, 0.00666471105068922, 0.007061991840600967, 0.006146776955574751, 0.03842667490243912], [0.340854674577713, 0.027831802144646645, 0.11495380103588104, 0.4507772624492645, 0.33573275804519653, 0.07158998399972916, 0.3054116368293762, 0.09558256715536118, 0.008191889151930809, 0.08007357269525528, 0.08199689537286758, 0.011630101129412651, 0.016172919422388077, 0.020448284223675728, 0.05253906920552254], [0.0825798362493515, 0.09406770020723343, 0.044158000499010086, 0.06245531886816025, 0.15669509768486023, 0.1018981784582138, 0.17849969863891602, 0.1823071539402008, 0.1725231111049652, 0.14688736200332642, 0.027769910171628, 0.1729786992073059, 0.04907526820898056, 0.09640378504991531, 0.07928813993930817], [0.04138464853167534, 0.0045098732225596905, 0.098704032599926, 0.034942083060741425, 0.1842936873435974, 0.1567782759666443, 0.14141200482845306, 0.1953822374343872, 0.09936889261007309, 0.281032919883728, 0.13522183895111084, 0.012650868855416775, 0.02501768246293068, 0.2133605033159256, 0.14542686939239502], [0.05831298604607582, 0.07845572382211685, 0.00935202743858099, 0.09348727762699127, 0.2554629147052765, 0.026818757876753807, 0.15820558369159698, 0.09712891280651093, 0.18406683206558228, 0.297629177570343, 0.011888068169355392, 0.04674078896641731, 0.01729435659945011, 0.04945852607488632, 0.08047669380903244], [0.030211733654141426, 0.004252443555742502, 0.044400423765182495, 0.0032993308268487453, 0.029341043904423714, 0.14371474087238312, 0.17894455790519714, 0.12369092553853989, 0.48359414935112, 0.06321088969707489, 0.05475561320781708, 0.3139732778072357, 0.086760014295578, 0.13208359479904175, 0.2905256450176239], [0.06285266578197479, 0.0062216646037995815, 0.016913438215851784, 0.007285475265234709, 0.01629750058054924, 0.004617355298250914, 0.06147269159555435, 0.21831700205802917, 0.11657348275184631, 0.39258062839508057, 0.17390909790992737, 0.3519352376461029, 0.014494672417640686, 0.04437657818198204, 0.04845427721738815], [0.014810703694820404, 0.027867808938026428, 0.00787208043038845, 0.003661711234599352, 0.06816401332616806, 0.014048570767045021, 0.04280591011047363, 0.04519394412636757, 0.07874996215105057, 0.2074531614780426, 0.12078044563531876, 0.53052818775177, 0.035032909363508224, 0.1398327797651291, 0.02986292913556099], [0.011430865153670311, 0.002694258699193597, 0.03896895423531532, 0.04504057392477989, 0.00808126013725996, 0.01048098411411047, 0.012571780942380428, 0.0054772221483290195, 0.07419075071811676, 0.02193005569279194, 0.3994891941547394, 0.15694338083267212, 0.3065741956233978, 0.022703034803271294, 0.07852455973625183], [0.0007813395350240171, 4.470362910069525e-06, 0.0010683261789381504, 0.022204171866178513, 0.0022952572908252478, 4.198186070425436e-05, 0.0009061718010343611, 0.0006557627930305898, 0.0009219115017913282, 0.0006920882733538747, 0.005404994357377291, 0.012070748023688793, 0.21383939683437347, 0.0026518681552261114, 0.0011399114737287164], [0.03732156753540039, 0.14082211256027222, 0.08218222856521606, 0.02148711122572422, 0.037640467286109924, 0.011636778712272644, 0.01611051708459854, 0.06724098324775696, 0.20042963325977325, 0.035641491413116455, 0.045655738562345505, 0.041121501475572586, 0.23917138576507568, 0.01630677469074726, 0.2854580283164978]]], [[[0.00028402332100085914, 1.9304454923485537e-08, 1.5483598847509938e-09, 7.885660006923256e-12, 2.7246130684943637e-08, 2.9440096113830805e-05, 4.3406546978985716e-07, 3.7434634236888087e-07, 3.9264233464564313e-07, 1.911867819615054e-08, 6.894639170695882e-08, 1.9322192201798316e-06, 1.594805780769093e-06, 1.097217136702966e-06, 0.25163131952285767], [0.8221166729927063, 0.0031213052570819855, 7.842657214496285e-05, 5.977510153520882e-10, 6.043178735204435e-10, 7.336016096815001e-07, 0.0001510237343609333, 0.000765863514970988, 0.0003504687047097832, 5.704807790607447e-07, 3.8402351520971933e-08, 3.7901799032624695e-07, 1.534954208182171e-05, 4.934078606311232e-05, 0.00023439944197889417], [0.0023944040294736624, 0.796754002571106, 0.004422985017299652, 9.068900226338883e-07, 5.795331436964091e-10, 1.0343059742012883e-08, 4.4964113499190717e-07, 0.0014743957435712218, 0.00028717826353386045, 7.994436600711197e-05, 3.3569827451174206e-07, 1.215876466176269e-07, 7.940250839055807e-07, 4.835407253267476e-06, 2.585098854979151e-07], [4.3931080995207594e-11, 0.0005229745293036103, 0.5791732668876648, 0.0002632129180710763, 3.316774765949049e-08, 1.7754019825469425e-12, 1.4596207272357664e-14, 1.5350217763554497e-09, 1.2882580335826788e-07, 7.457471838279162e-06, 1.2410231420290074e-06, 2.736720361440348e-08, 3.621486097116211e-11, 3.919724787804224e-12, 2.306477925317907e-12], [3.994035801418473e-14, 1.3595737036187217e-10, 5.270875135465758e-06, 0.5513067841529846, 0.00020578903786372393, 1.9226330039145978e-07, 1.181193272532799e-12, 2.80986930771554e-13, 9.120337812881449e-14, 1.37843805814164e-10, 7.154308718781976e-07, 1.5133276747292257e-06, 7.425698944629744e-10, 2.2010659354171347e-13, 1.8997327582565005e-12], [2.3444651168352815e-12, 2.1774425253313912e-13, 1.857566878094019e-09, 0.00030468025943264365, 0.9472002983093262, 0.00010681805724743754, 2.00606624645161e-08, 5.2167251502746245e-14, 1.354494091723496e-15, 5.737065011425513e-13, 8.729777456473187e-10, 3.2425006793346256e-05, 7.676636641917867e-07, 1.870739785303499e-09, 2.3914221713994266e-09], [3.644098217625569e-11, 3.867062572937563e-11, 4.1057553190615437e-11, 1.5412249254609378e-09, 0.018834512680768967, 0.505605936050415, 0.0010763276368379593, 5.434728933551014e-08, 2.6194791127864825e-11, 6.074670846504876e-15, 3.814499497517554e-12, 1.2291486939375318e-07, 9.572526323609054e-06, 4.437842653715052e-05, 7.18067713023629e-06], [5.002242687623948e-05, 2.445471238843311e-07, 7.217475506138271e-09, 2.943958878759423e-12, 1.391844648424012e-07, 0.0035048718564212322, 0.755942702293396, 0.0011242764303460717, 1.4866960555082187e-05, 9.753278740198823e-11, 3.792431321238132e-13, 1.6398679289486573e-11, 1.3850768709744443e-07, 0.0002873632765840739, 2.565975592005998e-05], [7.748224284398475e-09, 3.667011867491965e-07, 1.7906526261768363e-09, 1.001209222569038e-16, 4.707358499311462e-15, 2.921879960204876e-10, 4.77575849799905e-06, 0.9355171918869019, 1.7088919776142575e-05, 1.5246609308405823e-08, 1.546373502880373e-14, 1.9256968477537417e-16, 2.8356877952137637e-15, 6.199032398512827e-10, 3.679770266273863e-09], [6.04271771509346e-11, 2.349539499846287e-06, 6.254656170767703e-08, 2.0915530592191534e-12, 3.303753013789688e-16, 1.0466700578893717e-14, 7.288482968201282e-13, 0.0006303040427155793, 0.47335511445999146, 8.928982424549758e-05, 1.5872458902776998e-08, 1.3611594998645584e-14, 1.3777586457132233e-16, 1.589055302510104e-15, 8.100658338561217e-11], [3.812023474658588e-10, 1.421315573679749e-06, 2.2867025109007955e-06, 2.6682736020688935e-08, 3.632111755455525e-12, 1.6831340872913367e-14, 3.240909670081289e-14, 1.4920277635610546e-07, 0.0005182845052331686, 0.39297640323638916, 0.0007259719423018396, 1.2580667174688642e-08, 3.7229049595736974e-13, 2.157145159519631e-15, 1.0612778433838344e-09], [6.84109713322556e-10, 1.9775532322796607e-08, 5.041609938416514e-07, 0.00017906920402310789, 1.631619738873269e-06, 2.0158734681530177e-09, 9.65507530290054e-15, 4.2181228128435055e-12, 8.564649545128589e-10, 0.00023218656133394688, 0.6439363956451416, 0.000818322179839015, 1.3831699163802114e-07, 2.1358659198916774e-12, 5.4572883101400294e-08], [1.4084274191361601e-08, 2.1930364191291574e-09, 7.004614666072939e-09, 2.0828078959311824e-06, 6.64705439703539e-05, 3.6118690331932157e-06, 4.0857584676645686e-11, 1.0090924406833124e-12, 5.430448080009356e-15, 6.815135122906213e-09, 0.0007384128402918577, 0.9033229351043701, 0.0037223652470856905, 5.428325380307797e-07, 5.097080588711833e-07], [3.370899046006848e-11, 1.5044922772877722e-12, 1.903236411786996e-13, 5.2399131041103164e-12, 5.3600892613303586e-09, 3.287689196440624e-07, 1.293990137263279e-09, 3.2395277866498207e-13, 8.98320316581696e-19, 7.591717251043266e-18, 2.4333673097343134e-12, 7.08575316821225e-05, 0.3025490641593933, 0.00011370918218744919, 1.7842703314840946e-08], [0.0009491983219049871, 3.734114216058515e-05, 0.00010643315181368962, 4.299266220186837e-05, 0.0019948105327785015, 0.012520392425358295, 0.0005770812276750803, 0.00013455892622005194, 0.0002518744731787592, 0.0005399127840064466, 0.0017743584467098117, 0.004756112117320299, 0.00398082984611392, 0.002925803419202566, 0.1746407300233841]], [[0.1577264666557312, 0.03251823037862778, 0.4939506947994232, 0.8334789872169495, 0.6927971243858337, 0.3147047460079193, 0.7604361176490784, 0.11822030693292618, 0.7022377848625183, 0.6516091823577881, 0.14691989123821259, 0.2232232689857483, 0.14339210093021393, 0.3761228322982788, 0.014605461619794369], [0.028655482456088066, 0.14083503186702728, 0.08485368639230728, 0.8299343585968018, 0.8304422497749329, 0.5664599537849426, 0.834579586982727, 0.7438958287239075, 0.8452481031417847, 0.8614712953567505, 0.3640905022621155, 0.805733323097229, 0.3481642007827759, 0.795884370803833, 0.05269646272063255], [0.02106422185897827, 0.010846637189388275, 0.073356993496418, 0.017661061137914658, 0.8741048574447632, 0.5687165856361389, 0.5249210000038147, 0.5693489909172058, 0.5103186368942261, 0.5253384709358215, 0.6472406387329102, 0.4561024308204651, 0.1524587720632553, 0.45141565799713135, 0.034538887441158295], [0.2203565090894699, 0.02154199220240116, 0.007279311306774616, 0.003464027540758252, 0.18461424112319946, 0.07773485034704208, 0.7297388315200806, 0.2260110229253769, 0.6848539113998413, 0.2328294813632965, 0.22646839916706085, 0.3173597455024719, 0.10388152301311493, 0.06158056855201721, 0.11330780386924744], [0.1574045568704605, 0.12516136467456818, 0.04707150533795357, 0.0032313871197402477, 0.19444315135478973, 0.046962298452854156, 0.48863229155540466, 0.8290899991989136, 0.892469584941864, 0.6836395859718323, 0.83636474609375, 0.47956424951553345, 0.034452617168426514, 0.38761135935783386, 0.055785421282052994], [0.4389230012893677, 0.6133158802986145, 0.4783843159675598, 0.11230780929327011, 0.006951127201318741, 0.0644199401140213, 0.03406795859336853, 0.33251792192459106, 0.9552598595619202, 0.8827710747718811, 0.9276224970817566, 0.8325800895690918, 0.737617552280426, 0.745059609413147, 0.05149900168180466], [0.3395847976207733, 0.09897124767303467, 0.16763220727443695, 0.1671983003616333, 0.049412358552217484, 0.007114487700164318, 0.3340696394443512, 0.018166696652770042, 0.7235669493675232, 0.9639523029327393, 0.851059079170227, 0.7306914925575256, 0.5801126956939697, 0.8017169237136841, 0.08099871873855591], [0.44394704699516296, 0.6082286238670349, 0.37166181206703186, 0.3715074956417084, 0.35315781831741333, 0.10853563994169235, 0.013190319761633873, 0.07092351466417313, 0.03435605764389038, 0.25131845474243164, 0.921750545501709, 0.8745512366294861, 0.7473158240318298, 0.834020733833313, 0.1216883435845375], [0.18251584470272064, 0.8759727478027344, 0.1439245641231537, 0.06640342622995377, 0.060579828917980194, 0.2710072100162506, 0.011089610867202282, 0.034396518021821976, 0.1700025051832199, 0.043876904994249344, 0.14450228214263916, 0.9449294805526733, 0.9689385294914246, 0.939329981803894, 0.07954179495573044], [0.32071176171302795, 0.7452729344367981, 0.11999625712633133, 0.08053360879421234, 0.3748469650745392, 0.31863275170326233, 0.028054066002368927, 0.2197551280260086, 0.01771731488406658, 0.23943577706813812, 0.01906767673790455, 0.8113164901733398, 0.9739595055580139, 0.9691897630691528, 0.21732129156589508], [0.6261264085769653, 0.6649302244186401, 0.5194191336631775, 0.6324451565742493, 0.6771988272666931, 0.7814968228340149, 0.4118405878543854, 0.3728334903717041, 0.03296521306037903, 0.008678224869072437, 0.6047253012657166, 0.11251461505889893, 0.21560458838939667, 0.9244948625564575, 0.10127653181552887], [0.3176693320274353, 0.5172579884529114, 0.1793123036623001, 0.37762320041656494, 0.23678036034107208, 0.5621929168701172, 0.08773050457239151, 0.24525783956050873, 0.010828782804310322, 0.025829488411545753, 0.0057976157404482365, 0.08708162605762482, 0.04166324809193611, 0.5714256167411804, 0.16898052394390106], [0.6460146307945251, 0.8194199800491333, 0.48921409249305725, 0.6910595297813416, 0.5259124636650085, 0.6389046311378479, 0.3241840600967407, 0.7817367911338806, 0.17853572964668274, 0.1606196016073227, 0.06383053213357925, 0.007355134002864361, 0.02128707617521286, 0.02206379547715187, 0.23354344069957733], [0.5992116332054138, 0.6358246803283691, 0.47243836522102356, 0.5617506504058838, 0.6971379518508911, 0.6431114673614502, 0.39991113543510437, 0.8182389140129089, 0.2704472243785858, 0.20400457084178925, 0.059529319405555725, 0.06732083112001419, 0.008503233082592487, 0.06121496111154556, 0.2071741670370102], [0.2342938333749771, 0.5683650374412537, 0.6037701964378357, 0.7331977486610413, 0.7349027395248413, 0.6651985049247742, 0.23853524029254913, 0.2293619066476822, 0.48426058888435364, 0.7077944874763489, 0.5918195843696594, 0.8169012665748596, 0.7005065679550171, 0.4784330725669861, 0.015931207686662674]], [[0.04383472725749016, 0.02773081697523594, 0.016415273770689964, 0.024880478158593178, 0.005487722344696522, 0.14834517240524292, 0.010061212815344334, 0.013310510665178299, 0.03559315577149391, 0.022788431495428085, 0.016539618372917175, 0.022621937096118927, 0.3853665292263031, 0.02895752713084221, 0.21785423159599304], [0.02212689444422722, 0.0360226184129715, 0.0007962794625200331, 0.005733562167733908, 0.0017349227564409375, 0.011109595187008381, 0.02015179581940174, 0.048344310373067856, 0.003794114338234067, 0.016348786652088165, 0.0018908409401774406, 0.010183308273553848, 0.04822028428316116, 0.011540568433701992, 0.21287554502487183], [0.19621919095516205, 0.02568935602903366, 0.012553256005048752, 0.05958101898431778, 0.0049527534283697605, 0.009129918180406094, 0.035662900656461716, 0.006033026147633791, 0.01979534700512886, 0.016174430027604103, 0.025959551334381104, 0.017891131341457367, 0.21532145142555237, 0.010915487073361874, 0.2776879370212555], [0.22681212425231934, 0.26364389061927795, 0.1368870735168457, 0.07472710311412811, 0.004966794513165951, 0.17209400236606598, 0.07595591247081757, 0.10330677032470703, 0.009879215620458126, 0.30214887857437134, 0.027453631162643433, 0.07928238064050674, 0.6068928837776184, 0.0009245484252460301, 0.41711828112602234], [0.03220081329345703, 0.07110226154327393, 0.19687172770500183, 0.32465922832489014, 0.06123804301023483, 0.009123058058321476, 0.008925903588533401, 0.001694322214461863, 0.009767607785761356, 0.012425252236425877, 0.021234901621937752, 0.006749649532139301, 0.022427640855312347, 0.00419656652957201, 0.11337225884199142], [0.1499132513999939, 0.1588381826877594, 0.006192722357809544, 0.06905046850442886, 0.021936854347586632, 0.04223879054188728, 0.01654554158449173, 0.012800824828445911, 0.001194898271933198, 0.011350413784384727, 0.0011690479004755616, 0.03650015965104103, 0.0330234132707119, 0.032408226281404495, 0.30060991644859314], [0.10197536647319794, 0.32784661650657654, 0.22266407310962677, 0.37194594740867615, 0.4840903878211975, 0.2562866806983948, 0.20682689547538757, 0.01685171388089657, 0.02662164717912674, 0.01744299754500389, 0.07043293118476868, 0.06053447723388672, 0.13449640572071075, 0.0437617152929306, 0.15905345976352692], [0.04155902937054634, 0.02725875750184059, 0.06621034443378448, 0.15740959346294403, 0.22226983308792114, 0.11737026274204254, 0.021176597103476524, 0.037896860390901566, 0.001983239781111479, 0.07737525552511215, 0.040612466633319855, 0.036445699632167816, 0.04206009954214096, 0.005294053349643946, 0.22695806622505188], [0.3731417655944824, 0.020610323175787926, 0.04687204957008362, 0.19942151010036469, 0.0219199787825346, 0.023319954052567482, 0.607546865940094, 0.0038317576982080936, 0.05746426433324814, 0.0039819530211389065, 0.0020286834333091974, 0.023514816537499428, 0.0007224131841212511, 0.0017132725333794951, 0.31377115845680237], [0.007707278709858656, 0.04994801804423332, 0.0602150596678257, 0.1843070536851883, 0.023052150383591652, 0.00867108628153801, 0.0030793596524745226, 0.008175634779036045, 0.3707427382469177, 0.032583341002464294, 0.030614105984568596, 0.003414844162762165, 0.0027733321767300367, 0.00039667857345193624, 0.06665757298469543], [0.06275568902492523, 0.15385569632053375, 0.07121506333351135, 0.04657430946826935, 0.08974524587392807, 0.017753345891833305, 0.09537442773580551, 0.08409535884857178, 0.4617481529712677, 0.05371565744280815, 0.051210206001996994, 0.014556556940078735, 0.0261379461735487, 0.0015151489060372114, 0.25993233919143677], [0.037524934858083725, 0.08964382112026215, 0.11503562331199646, 0.2385229468345642, 0.14595970511436462, 0.01507873460650444, 0.07354842126369476, 0.014194677583873272, 0.01029899064451456, 0.3145633935928345, 0.08443433046340942, 0.02799280546605587, 0.006364578381180763, 0.0011598452692851424, 0.25597554445266724], [0.03498825803399086, 0.003427299438044429, 0.012860815972089767, 0.00960747804492712, 0.0073430403135716915, 0.002194140339270234, 0.020218953490257263, 0.04016692563891411, 0.0035721054300665855, 0.11439335346221924, 0.03179614990949631, 0.0055262502282857895, 0.08811097592115402, 0.0019241927657276392, 0.31578439474105835], [0.0003122057532891631, 0.0005657155998051167, 0.0003099576279055327, 0.018182117491960526, 8.608390635345131e-05, 0.00029685357003472745, 0.00030423246789723635, 0.0039575002156198025, 0.00041145391878671944, 0.0009832053910940886, 0.0007515411707572639, 0.006357411853969097, 0.3007054328918457, 0.00010537439811741933, 0.00161165336612612], [0.052370160818099976, 0.019386928528547287, 0.0404941625893116, 0.16087706387043, 0.14014431834220886, 0.0561581589281559, 0.1907973736524582, 0.027806226164102554, 0.022970959544181824, 0.05846026912331581, 0.09902504831552505, 0.038958851248025894, 0.016928229480981827, 0.04114920645952225, 0.14461401104927063]], [[0.1774463951587677, 0.26868411898612976, 0.03527391701936722, 0.01705012284219265, 0.00047759010340087116, 0.006241941824555397, 0.0031507122330367565, 0.2944689095020294, 0.038735195994377136, 0.003944840747863054, 0.004385389853268862, 0.004225992131978273, 0.03986744210124016, 0.00549504067748785, 0.07870971411466599], [0.00027908835909329355, 0.005506355315446854, 0.001626787707209587, 0.13775548338890076, 0.0008261757320724428, 0.00028156363987363875, 0.0002459189563523978, 0.0025131029542535543, 0.0009445812902413309, 0.001017659087665379, 0.002250042976811528, 0.0015115974238142371, 0.0017954352078959346, 0.0006745054270140827, 0.21780018508434296], [0.021244889125227928, 0.1178143173456192, 0.008956437930464745, 0.14321640133857727, 0.023635229095816612, 0.3068733811378479, 0.15845780074596405, 0.3092327415943146, 0.0024783278349786997, 0.06481246650218964, 0.008965774439275265, 0.019083118066191673, 0.04005150496959686, 0.01112168189138174, 0.19139143824577332], [0.00042023108107969165, 0.0008873279439285398, 0.0019056870369240642, 0.007766622584313154, 0.23140135407447815, 0.5036463141441345, 0.015440672636032104, 0.008361338637769222, 0.001879698014818132, 0.0006688520661555231, 0.01133010908961296, 0.09722423553466797, 0.03314661607146263, 0.006971372757107019, 0.02285030484199524], [0.002678314223885536, 0.004764833487570286, 0.0003137744788546115, 0.0006636036559939384, 0.07552827149629593, 0.36051952838897705, 0.21059149503707886, 0.11911091953516006, 0.00013829045929014683, 0.00018005385936703533, 0.00021675217431038618, 0.007453517522662878, 0.004449300933629274, 0.03708551451563835, 0.13281597197055817], [0.008487393148243427, 0.014329447411000729, 0.005103611387312412, 0.0017902699764817953, 0.00018748251022771, 0.07080603390932083, 0.1865091174840927, 0.03389747440814972, 0.0026728338561952114, 0.00012369015894364566, 0.0001717496052151546, 0.0016556874616071582, 0.0035823825746774673, 0.018341869115829468, 0.2051384449005127], [0.0016413311241194606, 0.0038119314704090357, 0.0005628983490169048, 6.117233715485781e-05, 0.00011399950017221272, 0.0007454796577803791, 0.054881561547517776, 0.30246245861053467, 0.15667226910591125, 0.0004453254514373839, 0.0002609542279969901, 0.0001120980887208134, 0.0006856885738670826, 0.00573006272315979, 0.011146760545670986], [0.001007524086162448, 0.0022212164476513863, 0.00036003260174766183, 2.8946307793376036e-05, 1.0167077562073246e-05, 0.00012231878645252436, 0.00022786400222685188, 0.03619853034615517, 0.005354967433959246, 0.003357505425810814, 0.0005030903848819435, 5.3131421736907214e-05, 4.2532476072665304e-05, 0.00010396525613032281, 0.2518664300441742], [0.004948427900671959, 0.0037361346185207367, 0.0040338728576898575, 0.0015943445032462478, 3.9753424061927944e-05, 0.00016846440848894417, 0.00017597683472558856, 0.003258961718529463, 0.06328149139881134, 0.43567389249801636, 0.03252503648400307, 0.006277996581047773, 3.634384847828187e-05, 2.672040500328876e-05, 0.030029548332095146], [0.00322673749178648, 0.017767680808901787, 0.0033617434091866016, 0.029219835996627808, 0.0009114073473028839, 0.002889687195420265, 0.00012576105655170977, 0.01574547402560711, 0.0018639388727024198, 0.6032934188842773, 0.1301620751619339, 0.04121570661664009, 0.0035096178762614727, 0.00032833084696903825, 0.3004224896430969], [0.033899419009685516, 0.07324357330799103, 0.00985381193459034, 0.017461512237787247, 0.019165849313139915, 0.07006029784679413, 0.01799222268164158, 0.013579626567661762, 0.00021177329472266138, 0.026033537462353706, 0.13102787733078003, 0.2077469676733017, 0.7029638886451721, 0.029135672375559807, 0.05414650961756706], [0.0015424743760377169, 0.007544125430285931, 0.010602829977869987, 0.0016127177514135838, 0.006006686482578516, 0.08514653891324997, 0.003129118587821722, 0.0036380700767040253, 1.298951519856928e-05, 6.919799488969147e-05, 0.0003367147874087095, 0.031529009342193604, 0.36636054515838623, 0.21289798617362976, 0.04463290795683861], [0.005653384607285261, 0.005221519153565168, 0.010438429191708565, 0.0023121859412640333, 0.0034771040081977844, 0.01156994141638279, 0.006321457680314779, 0.006196276750415564, 2.671167931111995e-05, 0.00012823205906897783, 0.00023895784397609532, 0.0015353390481323004, 0.06888392567634583, 0.3010466396808624, 0.05789510905742645], [0.0025978884659707546, 0.0011408268474042416, 0.0005907863960601389, 0.0073682027868926525, 5.514698841579957e-06, 0.0001586068101460114, 0.0016139426734298468, 0.002635698765516281, 2.2516995159094222e-05, 7.803570952091832e-06, 4.170422926108586e-06, 4.799172893399373e-05, 8.148160122800618e-05, 0.006126015912741423, 0.363029420375824], [0.018444720655679703, 0.036891017109155655, 0.08301377296447754, 0.04485299810767174, 0.0371856652200222, 0.0472157783806324, 0.022677546367049217, 0.017107300460338593, 0.03217196837067604, 0.03369837626814842, 0.021089907735586166, 0.018274538218975067, 0.020997297018766403, 0.034321803599596024, 0.1648317128419876]], [[0.2133164256811142, 0.025492815300822258, 0.20653849840164185, 0.07043907791376114, 0.10411863774061203, 0.3043566346168518, 0.06760577112436295, 0.5064103603363037, 0.08081910014152527, 0.27507925033569336, 0.5432406663894653, 0.27881479263305664, 0.16320040822029114, 0.2653813064098358, 0.11116068065166473], [0.015402763150632381, 0.2444494515657425, 0.0030522451270371675, 0.00048490799963474274, 0.0026600188575685024, 0.06905494630336761, 0.012269481085240841, 0.014592616818845272, 0.004205085337162018, 0.0039128707721829414, 0.0037959537003189325, 0.012499181553721428, 0.02713301219046116, 0.00563135975971818, 0.19437076151371002], [0.04805738478899002, 0.007929358631372452, 0.4969516396522522, 0.08109094947576523, 0.008613435551524162, 0.06128339096903801, 0.020970679819583893, 0.014624540694057941, 0.001800250494852662, 0.04372387006878853, 0.036881472915410995, 0.022519467398524284, 0.032134752720594406, 0.17586740851402283, 0.15428785979747772], [0.021660206839442253, 0.06483402103185654, 0.07990853488445282, 0.8655576705932617, 0.10770212858915329, 0.042777951806783676, 0.004243527539074421, 0.04141073673963547, 0.0011197980493307114, 0.0010354480473324656, 0.007620980031788349, 0.009411019273102283, 0.023886993527412415, 0.8532692193984985, 0.009252375923097134], [0.03802541270852089, 0.5626884698867798, 0.3869370222091675, 0.012873617932200432, 0.11968709528446198, 0.014900745823979378, 0.02957817167043686, 0.018288375809788704, 0.005979553796350956, 0.03379013389348984, 0.016338851302862167, 0.01766209304332733, 0.8086205720901489, 0.08052025735378265, 0.13067808747291565], [0.0663566142320633, 0.02082742564380169, 0.009716741740703583, 0.003548208624124527, 0.0008020728128030896, 0.4547119140625, 0.03523911535739899, 0.0031006578356027603, 0.006736437324434519, 0.0009184986702166498, 0.0011584048625081778, 0.04212343320250511, 0.019468490034341812, 0.001240313402377069, 0.20631356537342072], [0.004470710642635822, 0.02006937935948372, 0.020011691376566887, 0.019766854122281075, 0.12330501526594162, 0.15558527410030365, 0.04160740226507187, 0.1780312955379486, 0.014384130015969276, 0.005233153235167265, 0.004123131278902292, 0.05227937176823616, 0.013469746336340904, 0.022578507661819458, 0.07922197878360748], [0.17898443341255188, 0.006772744003683329, 0.041487641632556915, 0.009575014933943748, 0.016729410737752914, 0.2668032944202423, 0.12321095168590546, 0.6781973838806152, 0.0025635806377977133, 0.01087682880461216, 0.002732365159317851, 0.020299792289733887, 0.0031363710295408964, 0.0008204782498069108, 0.05180227383971214], [0.12461799383163452, 0.013122161850333214, 0.02311752177774906, 0.0762406587600708, 0.09383975714445114, 0.007501720450818539, 0.07133012264966965, 0.008159258402884007, 0.13900579512119293, 0.006521029397845268, 0.021471921354532242, 0.012502939440310001, 0.0014349960256367922, 0.011674328707158566, 0.3848530650138855], [0.014992507174611092, 0.010756749659776688, 0.10129547864198685, 0.15213072299957275, 0.1363232582807541, 0.16603931784629822, 0.0040587568655610085, 0.505429208278656, 0.0025213102344423532, 0.05678342655301094, 0.20746274292469025, 0.04314066469669342, 0.0019582516979426146, 0.01985819824039936, 0.18090446293354034], [0.11427638679742813, 0.0123747568577528, 0.020808644592761993, 0.1336503028869629, 0.008563186042010784, 0.09643486887216568, 0.15193390846252441, 0.050255559384822845, 0.0023536821827292442, 0.3208443820476532, 0.021319447085261345, 0.003293143818154931, 0.027340535074472427, 0.01197835523635149, 0.09007034450769424], [0.15923485159873962, 0.11477550864219666, 0.21969333291053772, 0.09681756794452667, 0.07061057537794113, 0.1670638769865036, 0.1398637294769287, 0.059452954679727554, 0.00850652251392603, 0.062244825065135956, 0.03212086483836174, 0.10482167452573776, 0.05658517777919769, 0.03675027936697006, 0.24718202650547028], [0.004966236650943756, 0.001515651005320251, 0.002549123717471957, 0.006106496322900057, 0.00036676786839962006, 0.0014838402858003974, 0.008350875228643417, 0.003760475432500243, 9.004020830616355e-05, 0.003012964967638254, 0.000879374798387289, 0.0023141989950090647, 0.5349817276000977, 0.00013737898552790284, 0.18041089177131653], [3.0577066354453564e-05, 0.00011073229688918218, 0.0002722943318076432, 0.00012968607188668102, 3.925479541067034e-05, 9.284611587645486e-05, 1.1375399481039494e-05, 0.00013649655738845468, 2.160583608201705e-05, 3.872126853821101e-06, 4.776401965500554e-06, 5.892393892281689e-05, 0.3018791675567627, 0.0016873051645234227, 0.00020723984926007688], [0.0053407615050673485, 0.002270790981128812, 0.015077341347932816, 0.008943013846874237, 0.01947944425046444, 0.013856526464223862, 0.021029049530625343, 0.011522401124238968, 0.019980257377028465, 0.021877266466617584, 0.03018842823803425, 0.06539047509431839, 0.04945596680045128, 0.008784771896898746, 0.1688213050365448]], [[0.09667091816663742, 0.08969368785619736, 0.16646768152713776, 0.01428181305527687, 0.1262292116880417, 0.03015410713851452, 0.00857650488615036, 0.013287652283906937, 0.013465571217238903, 0.009945754893124104, 0.03584994748234749, 0.07976501435041428, 0.013894102536141872, 0.07191513478755951, 0.16682514548301697], [0.00307486648671329, 0.2169581949710846, 0.015313946641981602, 0.005070009268820286, 0.13766343891620636, 0.036365993320941925, 0.013734312728047371, 0.012890451587736607, 0.00037508379318751395, 0.002069024136289954, 0.0038654597010463476, 0.007793853525072336, 0.006365353707224131, 0.02897111512720585, 0.19472798705101013], [0.013033762574195862, 0.0016745100729167461, 0.09789733588695526, 0.11557573825120926, 0.070904940366745, 0.039959780871868134, 0.06112189590930939, 0.005926545709371567, 0.05931684747338295, 0.06562750041484833, 0.015556245110929012, 0.2949027419090271, 0.09280899167060852, 0.18960142135620117, 0.2321171909570694], [0.0009253448224626482, 0.0011463494738563895, 0.0022407870274037123, 0.022192178294062614, 0.18083734810352325, 0.18906380236148834, 0.06340676546096802, 0.5556718111038208, 0.008876022882759571, 0.00195835973136127, 0.009641225449740887, 0.13488754630088806, 0.03692271187901497, 0.0069083282724022865, 0.19416382908821106], [0.020195724442601204, 0.0026999269612133503, 0.0047158133238554, 0.017117822542786598, 0.22690622508525848, 0.009801734238862991, 0.18513473868370056, 0.000916039280127734, 0.006044555455446243, 0.006021710112690926, 0.010346228256821632, 0.04500352963805199, 0.008295656181871891, 0.1122727021574974, 0.4271945357322693], [0.02983868308365345, 0.03651329129934311, 0.005064305383712053, 0.00043434457620605826, 0.001774297677911818, 0.10316617041826248, 0.10274261981248856, 0.570116400718689, 0.0018607155652716756, 0.004884766880422831, 0.0001192242925753817, 0.01004798710346222, 0.011760696768760681, 0.020220324397087097, 0.036799319088459015], [0.020830435678362846, 0.04066089913249016, 0.01340602245181799, 0.0007146665593609214, 0.05329689383506775, 0.010700137354433537, 0.06310626864433289, 0.1416247934103012, 0.059007443487644196, 0.009734428487718105, 0.023192377761006355, 0.030464952811598778, 0.011454294435679913, 0.06458231806755066, 0.29838618636131287], [0.04047420993447304, 0.05575861781835556, 0.0035385461524128914, 0.00047053993330337107, 0.010776028037071228, 0.0002634078555274755, 0.006466362159699202, 0.09768779575824738, 0.011305907741189003, 0.6455902457237244, 0.005685864482074976, 0.009437574073672295, 0.0014128481270745397, 0.0036261524073779583, 0.1994941532611847], [0.001968077849596739, 0.00013096239126753062, 0.014192181639373302, 0.0025808673817664385, 1.1752749742299784e-05, 7.090794679243118e-05, 8.489128958899528e-05, 7.501097570639104e-05, 0.005588378757238388, 0.00024033378576859832, 0.7911840081214905, 0.0006417080294340849, 0.00012212486763019115, 0.0026151463389396667, 0.024830428883433342], [0.007711799815297127, 0.006852409336715937, 0.005409319419413805, 0.029324712231755257, 0.0012151957489550114, 0.0014427780406549573, 0.0002848623844329268, 0.0011284908978268504, 0.00042831210885196924, 0.0035933239851146936, 0.2853389084339142, 0.04352247342467308, 0.0011324246879667044, 0.0015205255476757884, 0.05924868583679199], [0.06333743035793304, 0.004831443540751934, 0.017261236906051636, 0.05893971398472786, 0.005950291641056538, 0.002105317311361432, 0.003185122972354293, 0.0028415010310709476, 0.004572128411382437, 0.007815520279109478, 0.07613655924797058, 0.10669270157814026, 0.027066918089985847, 0.03207901865243912, 0.4743220806121826], [0.10327208787202835, 0.004544916562736034, 0.05445469170808792, 0.010814311914145947, 0.026858847588300705, 0.011217474937438965, 0.07071709632873535, 0.05960191786289215, 0.0010665962472558022, 0.025403864681720734, 0.006131312809884548, 0.5720618963241577, 0.029676837846636772, 0.17520834505558014, 0.23297326266765594], [0.011414228938519955, 0.002735550981014967, 0.015156290493905544, 0.0027777000796049833, 0.009832575917243958, 0.015552453696727753, 0.017305195331573486, 0.004722784738987684, 4.7792200348339975e-05, 0.0034479873720556498, 0.0004017044266220182, 0.0011886333813890815, 0.18307994306087494, 0.2786843478679657, 0.04159880056977272], [0.0032662157900631428, 0.004168938845396042, 0.0016457620076835155, 0.0005059303948655725, 0.0003206630062777549, 0.000853654695674777, 0.010604765266180038, 0.005784912034869194, 0.00014833646127954125, 0.0001704594906186685, 5.580573997576721e-05, 0.0004662217397708446, 0.0009024841128848493, 0.025914611294865608, 0.3543371260166168], [0.057395875453948975, 0.01834016665816307, 0.017516011372208595, 0.011936328373849392, 0.010095582343637943, 0.018046732991933823, 0.24530914425849915, 0.01257838774472475, 0.014466731809079647, 0.027552323415875435, 0.054997242987155914, 0.013960911892354488, 0.0074861980974674225, 0.03251070901751518, 0.14566579461097717]], [[0.3107149600982666, 0.049285680055618286, 0.08128133416175842, 0.03986956924200058, 0.07088969647884369, 0.1961679309606552, 0.15016919374465942, 0.05429982393980026, 0.1291487067937851, 0.03663256764411926, 0.25306442379951477, 0.3913470208644867, 0.2542778253555298, 0.252127081155777, 0.15921251475811005], [0.10834414511919022, 0.3508348762989044, 0.02124197781085968, 0.019397908821702003, 0.026673240587115288, 0.3167271912097931, 0.11886779963970184, 0.17699773609638214, 0.14507175981998444, 0.115145742893219, 0.6241064667701721, 0.1622784435749054, 0.5683063268661499, 0.15724869072437286, 0.12728430330753326], [0.6979861855506897, 0.039286430925130844, 0.3014020621776581, 0.003208757843822241, 0.01772892102599144, 0.014036925509572029, 0.19886529445648193, 0.09335973858833313, 0.4060034155845642, 0.28424081206321716, 0.26539483666419983, 0.1895008385181427, 0.4672236740589142, 0.16107353568077087, 0.10992881655693054], [0.5298255681991577, 0.6474234461784363, 0.19260530173778534, 0.026028962805867195, 0.013013242743909359, 0.01466711051762104, 0.11121421307325363, 0.06523838639259338, 0.29339125752449036, 0.46135157346725464, 0.7174844145774841, 0.3618351221084595, 0.19526919722557068, 0.0703459233045578, 0.24330592155456543], [0.7494951486587524, 0.23358309268951416, 0.3640848398208618, 0.09014757722616196, 0.32190942764282227, 0.0021980239544063807, 0.07713330537080765, 0.030900368466973305, 0.08560045808553696, 0.26394325494766235, 0.11549779027700424, 0.44356539845466614, 0.12175428122282028, 0.3783136308193207, 0.14015373587608337], [0.3064809739589691, 0.15617568790912628, 0.4955383241176605, 0.8125641942024231, 0.02114781178534031, 0.2633197009563446, 0.014569958671927452, 0.04754461348056793, 0.03227522596716881, 0.09995166957378387, 0.0697590634226799, 0.0770602896809578, 0.19454655051231384, 0.18272873759269714, 0.19963966310024261], [0.5314973592758179, 0.5086395144462585, 0.5757231116294861, 0.44031307101249695, 0.2709468603134155, 0.0639616996049881, 0.2984015941619873, 0.0039451331831514835, 0.0197422094643116, 0.0031917106825858355, 0.05093149095773697, 0.12591752409934998, 0.25977155566215515, 0.0615861676633358, 0.3711840510368347], [0.2939777970314026, 0.2997593581676483, 0.5167340040206909, 0.46100836992263794, 0.39705657958984375, 0.5034002065658569, 0.07978513836860657, 0.0779491513967514, 0.012053987942636013, 0.01132633350789547, 0.028715649619698524, 0.059212565422058105, 0.20603224635124207, 0.15584728121757507, 0.14816488325595856], [0.3128078877925873, 0.0864272266626358, 0.7678588032722473, 0.6537591814994812, 0.8236088752746582, 0.6979317665100098, 0.30976778268814087, 0.014760972931981087, 0.5645584464073181, 0.004590533208101988, 0.008271697908639908, 0.012132997624576092, 0.028745530173182487, 0.04464057460427284, 0.1669740080833435], [0.6456499099731445, 0.1693999022245407, 0.7097220420837402, 0.5244839191436768, 0.46365103125572205, 0.5023244023323059, 0.9643971920013428, 0.24913577735424042, 0.13337120413780212, 0.06419410556554794, 0.012416149489581585, 0.0573885552585125, 0.016666844487190247, 0.008706454187631607, 0.1754455268383026], [0.09960467368364334, 0.0907629206776619, 0.36143985390663147, 0.11092879623174667, 0.19937658309936523, 0.03214935213327408, 0.3196737766265869, 0.4763943552970886, 0.497630774974823, 0.1899363249540329, 0.1145005002617836, 0.004749455489218235, 0.0008605146431364119, 0.0007969819707795978, 0.02025206945836544], [0.3807562589645386, 0.26623356342315674, 0.4209006428718567, 0.27443018555641174, 0.5137820839881897, 0.1592678278684616, 0.6250110864639282, 0.6178545951843262, 0.9692861437797546, 0.5716569423675537, 0.22724294662475586, 0.17567582428455353, 0.008769324980676174, 0.002557128667831421, 0.05025441572070122], [0.2969632148742676, 0.16767999529838562, 0.46978121995925903, 0.28813451528549194, 0.45300158858299255, 0.33029136061668396, 0.6236194968223572, 0.1634167730808258, 0.8177276253700256, 0.718397855758667, 0.9021148681640625, 0.07875741273164749, 0.09992827475070953, 0.004932410083711147, 0.1707668900489807], [0.3945808410644531, 0.3581867516040802, 0.5247420072555542, 0.4120633900165558, 0.3024104833602905, 0.35548633337020874, 0.5872392654418945, 0.15815261006355286, 0.7289484143257141, 0.7948301434516907, 0.9396543502807617, 0.9256777167320251, 0.08537369966506958, 0.03166399896144867, 0.03224433213472366], [0.004588960204273462, 0.041907694190740585, 0.17755450308322906, 0.039724841713905334, 0.047663237899541855, 0.09274838864803314, 0.010110240429639816, 0.014862497337162495, 0.11161036789417267, 0.0490046888589859, 0.18517035245895386, 0.029471391811966896, 0.05094437301158905, 0.002971563721075654, 0.16300250589847565]], [[6.113462859502761e-06, 0.5065946578979492, 7.261813152581453e-05, 5.1066386498122354e-14, 1.0490246824277965e-15, 1.4956003015903496e-12, 2.5734427609724886e-13, 2.1143946469237562e-06, 9.544867651811728e-08, 4.2543565892394497e-10, 6.215519418595328e-12, 1.687761909396901e-11, 1.6993320528513323e-08, 1.0583119935958507e-09, 9.857150189418462e-07], [4.727198188447801e-08, 0.002272214274853468, 0.8730366826057434, 0.0016238681273534894, 9.849362297975617e-11, 6.310171162720105e-14, 1.3311845115798748e-12, 1.350557283785747e-07, 1.07800769910682e-05, 3.4101576602552086e-05, 7.529693561991735e-07, 3.7022258592145363e-09, 3.1551092294357375e-10, 8.851498527195911e-12, 1.024629546009237e-05], [6.003397223786067e-10, 5.335852165444521e-06, 0.00445933174341917, 0.5796651840209961, 5.976808097329922e-05, 2.377180230439535e-09, 1.7792844021063958e-12, 1.2140626282075573e-09, 6.417224529542409e-09, 2.601910637167748e-06, 1.1842810181406094e-06, 1.8266834445057611e-07, 1.3081095096012518e-09, 1.5776791765370612e-12, 4.7676843678345904e-05], [2.4071971206038626e-15, 2.3560551770727793e-14, 9.98394700246763e-11, 1.7167060661904543e-07, 0.2774648666381836, 1.6012703781598248e-05, 9.760837530760607e-15, 4.654387315338889e-18, 8.039692137064508e-20, 2.1508527635127157e-16, 1.789740057545064e-11, 2.4233797191186568e-08, 2.7592322870972907e-10, 4.956549239646573e-15, 1.5411848153235042e-06], [1.9919477308935618e-13, 5.266535346254387e-16, 1.2917133013982517e-14, 7.221083175856791e-10, 8.195231930585578e-05, 0.5564944744110107, 4.117699063499458e-06, 5.438900198273533e-13, 2.4172004338169554e-20, 9.57835365503234e-22, 9.376302678036402e-17, 3.235451073724249e-10, 6.101883442966027e-09, 9.971044129253315e-11, 1.6162671201414014e-08], [9.771466125130246e-08, 3.17872256294649e-11, 3.1429036890379125e-13, 5.901367481980172e-16, 4.2342058748090494e-09, 0.0012305855052545667, 0.6103256940841675, 2.2161180822877213e-05, 7.972257402844019e-12, 6.481494664823834e-19, 5.35928561114305e-19, 7.863773244772346e-14, 1.1593314752644801e-07, 8.808668212623161e-07, 1.1730364235518209e-07], [2.6939844799400703e-10, 3.892770337188267e-07, 2.2438891023046637e-10, 2.095593632707407e-18, 1.8655412772298346e-14, 2.206185598652155e-07, 3.0316745323943906e-05, 0.33891788125038147, 5.437008439912461e-06, 1.3213468337612382e-14, 2.5347562276209975e-18, 1.0659246862729562e-18, 2.6392999114346893e-13, 9.868956762915104e-10, 1.6170986327779246e-06], [1.3015508670832787e-09, 4.1474245904282725e-07, 7.619819371029735e-06, 9.079691751061325e-13, 5.725895077835787e-16, 1.0568446176517903e-14, 8.978999488373773e-11, 2.253716047562193e-05, 0.9323674440383911, 0.0001553743495605886, 1.1094852814252931e-10, 4.251380123255501e-17, 3.4548606558270072e-18, 1.563022274271835e-14, 1.7832363141678798e-07], [1.2218349942916262e-10, 4.9370779464652514e-08, 1.0212672805209877e-06, 3.802215486903293e-11, 4.1323817879847246e-16, 3.8503187577578586e-16, 6.2032051316354e-15, 3.2203126920649083e-07, 8.202762546716258e-05, 0.5051153898239136, 1.6483796571264975e-05, 2.317061202194298e-13, 9.134085045449695e-19, 4.959048342554486e-21, 1.9839136555788173e-08], [3.5615963439117673e-14, 6.311461336200308e-12, 7.572167781688677e-09, 7.864790063649707e-08, 5.871175941252194e-13, 4.399392566282849e-15, 3.6105855357745724e-20, 8.408651243829376e-14, 2.915925279012299e-09, 2.7294316168990918e-05, 0.31493836641311646, 1.4271394093157141e-06, 7.57530499374999e-14, 1.0444343699767344e-21, 5.65783730976932e-09], [1.619628042792698e-10, 6.862534152052291e-11, 7.238428190170509e-10, 5.1994692995549485e-08, 8.193378420173758e-08, 6.734891755399985e-09, 1.47457238341411e-14, 5.793711288450045e-15, 1.5065480465795492e-14, 1.167909147170576e-08, 0.0003541565383784473, 0.5504465699195862, 2.5677532903500833e-05, 4.9321430864142715e-14, 1.3459792569392448e-07], [8.003913504195381e-11, 5.626729984720136e-12, 4.9737857062137625e-12, 1.4365373474101162e-11, 1.165467935493325e-07, 3.263785401941277e-05, 9.4434834951862e-11, 2.6144878938953817e-15, 6.540743544149476e-19, 2.5930401594030658e-17, 1.8366722587259687e-09, 1.8794700736179948e-05, 0.49058014154434204, 8.066950840657228e-07, 1.3585024589701788e-06], [1.0801989728040362e-12, 2.2359935084037552e-13, 1.1691597126203823e-12, 1.0214807062303036e-16, 2.4270561688882752e-12, 4.4484740890915475e-10, 1.1468358207533669e-10, 1.5131759777478604e-13, 3.7208958865722007e-20, 6.888861115537483e-21, 1.5888746801787275e-18, 3.2241334168431335e-12, 5.685043561243219e-06, 0.3912107050418854, 3.0407140694244106e-10], [5.397048425948014e-07, 2.3629811494174646e-06, 8.614414923613367e-07, 8.006720286779512e-13, 4.92412575016192e-14, 2.066644277931573e-08, 0.00031528103863820434, 0.011093947105109692, 3.7555511767095595e-07, 1.151808547627739e-13, 5.505821095062543e-16, 1.6971218267519683e-12, 5.383023108151974e-06, 0.8731740117073059, 0.04139598086476326], [0.6266164779663086, 0.3128010928630829, 0.06246759742498398, 0.00042505442979745567, 0.008534153923392296, 0.09425555169582367, 0.2709643542766571, 0.686626672744751, 0.3142872750759125, 0.10107265412807465, 0.015935143455863, 0.012286541052162647, 0.14970052242279053, 0.3989029824733734, 0.022492708638310432]]], [[[0.1393769532442093, 0.0735321119427681, 0.701509952545166, 0.10650816559791565, 0.05110495164990425, 0.021589145064353943, 0.0033319133799523115, 0.0014166238252073526, 0.01486207265406847, 0.006584684830158949, 0.002582702785730362, 0.0004108685825485736, 0.010701421648263931, 0.009390643797814846, 0.06290604919195175], [0.0030957262497395277, 0.0237117987126112, 0.7945073246955872, 0.09792613238096237, 0.2614360749721527, 0.179405078291893, 0.011310527101159096, 0.009954328648746014, 0.009489532560110092, 0.0005609119543805718, 0.000751268700696528, 0.0001462608779547736, 0.004604416899383068, 0.004964352585375309, 0.019775664433836937], [0.002461136318743229, 0.024594180285930634, 0.009559455327689648, 0.055053047835826874, 0.30010533332824707, 0.4690517783164978, 0.03334644436836243, 0.0075769852846860886, 0.007821744307875633, 0.004109389614313841, 0.0022267017047852278, 0.000916018383577466, 0.0037954216822981834, 0.0007741246954537928, 0.004415341652929783], [0.0019876149017363787, 0.0012237336486577988, 0.00015556006110273302, 0.0003553472051862627, 0.4419420659542084, 0.6252713799476624, 0.02062046155333519, 0.0028509902767837048, 0.00548406969755888, 0.0003452444798313081, 0.0001962203241419047, 0.0008938669925555587, 0.0009214308229275048, 1.2216354662086815e-05, 0.0019377138232812285], [0.00020824302919209003, 0.00021322975226212293, 4.6913473852328025e-06, 0.00017657040734775364, 0.0005752452998422086, 0.5289100408554077, 0.1970362812280655, 0.12947966158390045, 0.0005265067447908223, 0.000227929005632177, 6.233566091395915e-05, 0.0001991882745642215, 0.00032238851417787373, 0.0003627484547905624, 0.0016414258861914277], [0.0010278578847646713, 0.0029486939311027527, 0.00014835220645181835, 0.00036925319000147283, 0.00742883887141943, 0.03272741660475731, 0.8576475977897644, 0.03500620648264885, 0.2982224225997925, 0.0003585784579627216, 5.663683623424731e-05, 0.0011889662127941847, 0.00576341338455677, 0.003998933359980583, 0.03130826726555824], [0.002113666385412216, 0.004151111003011465, 0.002428078791126609, 0.002119476906955242, 0.001100956811569631, 0.003687644377350807, 0.13543397188186646, 0.11922256648540497, 0.7567945718765259, 0.2570010721683502, 0.004903816152364016, 0.0001005519661703147, 0.000830159813631326, 0.001259618904441595, 0.14076685905456543], [0.0010344160255044699, 0.00660368800163269, 0.0025270660407841206, 0.00023567670723423362, 0.0004021638887934387, 0.0030120171140879393, 0.0016376315616071224, 0.0524386465549469, 0.7797302007675171, 0.1269131302833557, 0.004214781802147627, 0.0002750723797362298, 0.002267329953610897, 0.001067862962372601, 0.16698867082595825], [0.0009750229655764997, 0.0120720649138093, 0.0038384809158742428, 0.0036232813727110624, 0.004431525245308876, 0.0007613649941049516, 5.662842158926651e-05, 0.01338160876184702, 0.041878536343574524, 0.7091978788375854, 0.2535402476787567, 0.13969287276268005, 0.026510832831263542, 0.0006678565987385809, 0.015569130890071392], [0.0002093962684739381, 0.00030164673808030784, 0.00010105424007633701, 5.030819465901004e-06, 0.001411793869920075, 0.003664590884000063, 0.00017403968377038836, 0.0011218853760510683, 0.011106000281870365, 0.003924186807125807, 0.07315385341644287, 0.3008219599723816, 0.36353737115859985, 0.025737306103110313, 0.0060785748064517975], [0.0001716838014544919, 0.0008840822265483439, 4.3183892557863146e-05, 3.6494086543825688e-06, 0.0005770743009634316, 0.010045445524156094, 0.00010205945727648214, 6.57988857710734e-05, 0.0006949909729883075, 0.004452799912542105, 0.009000658988952637, 0.49080607295036316, 0.17717383801937103, 0.11174798011779785, 0.021669577807188034], [0.019416164606809616, 0.0014941463014110923, 0.001027028076350689, 0.001502541359513998, 0.0085412273183465, 0.12493651360273361, 0.0035243057645857334, 0.0026196581311523914, 0.0008317703031934798, 0.0015569254755973816, 0.060888972133398056, 0.06929422169923782, 0.3396435081958771, 0.387500524520874, 0.017253199592232704], [0.04994890093803406, 0.15025374293327332, 0.024391163140535355, 0.00227133696898818, 0.012616162188351154, 0.2894521951675415, 0.4185648262500763, 0.19089959561824799, 0.027421748265624046, 0.001001756638288498, 0.0036985764745622873, 0.06802930682897568, 0.02484762854874134, 0.057649459689855576, 0.1606004238128662], [0.03736208751797676, 0.11793919652700424, 0.0180205088108778, 0.0001436693564755842, 0.0030756669584661722, 0.08228655159473419, 0.12110688537359238, 0.09650447964668274, 0.015347721055150032, 0.0004259537090547383, 0.00022625335259363055, 0.001013986300677061, 0.0784289613366127, 0.2240448147058487, 0.18707746267318726], [0.7529165148735046, 0.7075774073600769, 0.6068683862686157, 0.3852986991405487, 0.6197313666343689, 0.6735447645187378, 0.6598724722862244, 0.7226093411445618, 0.31395286321640015, 0.2518909275531769, 0.07010441273450851, 0.21793116629123688, 0.4325476884841919, 0.7029338479042053, 0.06848814338445663]], [[0.0006553527782671154, 0.5631614327430725, 0.0008777088369242847, 0.00020331511041149497, 0.0014234310947358608, 0.013944034464657307, 9.958680493582506e-06, 0.01898920349776745, 0.00014103656576480716, 1.4779416233068332e-06, 1.1701366275929104e-07, 1.195983372781484e-06, 0.00012817273091059178, 3.365538941579871e-05, 0.00028557839686982334], [0.00638999929651618, 0.7093943953514099, 0.004974186420440674, 0.06159398332238197, 0.003979360219091177, 0.06536109745502472, 0.005324128083884716, 0.02885170467197895, 0.0003847253101412207, 0.0002721542550716549, 4.3882369936909527e-05, 0.00024302180099766701, 0.00612376956269145, 0.006710950285196304, 0.0343138724565506], [0.109707772731781, 0.1680740863084793, 0.05170662701129913, 0.04158816486597061, 0.026700180023908615, 0.23248757421970367, 0.5156019330024719, 0.3799504041671753, 0.02909121848642826, 0.009008231572806835, 0.0013055672170594335, 0.0032788640819489956, 0.0791734829545021, 0.010587821714580059, 0.06850002706050873], [0.04004191607236862, 0.02257939800620079, 0.01325287576764822, 0.14834734797477722, 0.0700073167681694, 0.12831416726112366, 0.47980472445487976, 0.3121630549430847, 0.05984592065215111, 0.015101294964551926, 0.002668763743713498, 0.0007187540177255869, 0.04004915803670883, 0.0007627750164829195, 0.05523831769824028], [0.0007188548916019499, 0.006864115130156279, 0.00033292395528405905, 0.000431404507253319, 0.0152564262971282, 0.2775210440158844, 0.03714991733431816, 0.7278205156326294, 0.004819776862859726, 0.00047404138604179025, 0.0003997469611931592, 0.0001266899926122278, 0.0201359074562788, 0.0027800032403320074, 0.042311206459999084], [0.00020999301341362298, 0.0025689874310046434, 3.502765650864603e-07, 6.610702985199168e-05, 0.00024143110204022378, 0.018905406817793846, 0.033397458493709564, 0.4650881290435791, 0.004783111158758402, 0.00013528004637919366, 5.751344360760413e-06, 7.93816871009767e-05, 0.0039043116848915815, 0.0005016719806008041, 0.07914639264345169], [0.00019393693946767598, 0.07456899434328079, 1.429513213224709e-05, 4.6383509470615536e-05, 6.820548151154071e-05, 0.004400796256959438, 0.0021800962276756763, 0.45963534712791443, 0.00143687822856009, 0.0008175616967491806, 6.983020284678787e-05, 3.49152869603131e-05, 0.0030698180198669434, 0.0006545006763190031, 0.001625033444724977], [0.004301158711314201, 0.013502174988389015, 4.788395017385483e-05, 0.00021532995742745697, 7.713190279901028e-05, 0.001439842046238482, 0.005622516851872206, 0.121849425137043, 0.006593172438442707, 0.006624745205044746, 0.0006814572843722999, 0.0002721978526096791, 0.0009267745190300047, 0.0016606011195108294, 0.2357456088066101], [0.0064394231885671616, 0.03409593552350998, 0.0025135872419923544, 0.0008376456098631024, 0.0004409599641803652, 0.0026055865455418825, 0.005634414032101631, 0.014003962278366089, 0.2343187928199768, 0.08099395036697388, 0.23927520215511322, 0.01715606264770031, 0.10332414507865906, 0.021894987672567368, 0.1941189020872116], [0.0004975660121999681, 0.0015548047376796603, 6.826691333117196e-06, 1.0557592986515374e-06, 2.731301538005937e-05, 0.0005447702133096755, 0.00042012380436062813, 0.0503113828599453, 0.0053693996742367744, 0.0012762928381562233, 0.0017790982965379953, 0.019809026271104813, 0.47653263807296753, 0.008869247511029243, 0.017010610550642014], [0.00012974163109902292, 0.005610004533082247, 2.3442629753844813e-05, 1.8520654521125834e-06, 3.9678394387010485e-05, 0.0016583451069891453, 0.00029088594601489604, 0.004530484322458506, 0.0021493860986083746, 0.00029196502873674035, 0.0005848451401107013, 0.0028240433894097805, 0.4590959846973419, 0.22978197038173676, 0.0020738127641379833], [0.00021855060185771435, 0.005491270218044519, 1.9927349057979882e-05, 7.633860150235705e-06, 0.0004071943403687328, 0.008836714550852776, 7.301902951439843e-05, 0.011723233386874199, 1.7278060113312677e-05, 0.0001269245840376243, 0.00022235361393541098, 0.016586007550358772, 0.41012606024742126, 0.37776312232017517, 0.0024871949572116137], [0.02619638666510582, 0.18392468988895416, 0.0003054745029658079, 0.00016413358389399946, 0.0015171386767178774, 0.004799532704055309, 0.004810427315533161, 0.058836404234170914, 0.0003794554795604199, 0.0017285931389778852, 0.000568193441722542, 0.003299211384728551, 0.6178385019302368, 0.5079926252365112, 0.05467592179775238], [0.03445081040263176, 0.14193737506866455, 0.0007241201237775385, 0.0002892682678066194, 0.0003202178922947496, 0.003702279180288315, 0.01134149543941021, 0.12129464000463486, 0.0006569268880411983, 0.0008894759230315685, 8.523569704266265e-05, 0.00030898841214366257, 0.7088924646377563, 0.10790188610553741, 0.05374660715460777], [0.04547691345214844, 0.010678221471607685, 0.0016328264027833939, 0.024403419345617294, 0.012795579619705677, 0.004323439672589302, 0.06414945423603058, 0.014008321799337864, 0.011475995182991028, 0.00871653389185667, 0.012156924232840538, 0.0147528275847435, 0.009472412057220936, 0.0331418551504612, 0.1366012692451477]], [[0.3143080472946167, 0.014564945362508297, 0.07743841409683228, 0.19665417075157166, 0.23130221664905548, 0.03274351730942726, 0.23599109053611755, 0.04763320833444595, 0.20168107748031616, 0.7521476149559021, 0.7922006249427795, 0.840878427028656, 0.6463541388511658, 0.6008138656616211, 0.0070990691892802715], [0.05880431830883026, 0.004086965229362249, 0.06557433307170868, 0.4476080536842346, 0.32179930806159973, 0.2046266496181488, 0.5952353477478027, 0.20483972132205963, 0.7834360599517822, 0.27592822909355164, 0.5900363922119141, 0.6986290812492371, 0.3548848032951355, 0.36629796028137207, 0.07452832907438278], [0.4484235942363739, 0.0712433010339737, 0.09740526974201202, 0.49982836842536926, 0.18807044625282288, 0.007537430617958307, 0.2073078453540802, 0.015238385647535324, 0.18028782308101654, 0.6095888018608093, 0.4225178062915802, 0.6769288778305054, 0.3957397937774658, 0.7102670669555664, 0.05611870437860489], [0.4341801106929779, 0.05481646955013275, 0.17834456264972687, 0.2579769194126129, 0.326920747756958, 0.0030261597130447626, 0.03147314488887787, 0.003279186552390456, 0.09941483289003372, 0.5679370760917664, 0.8480010032653809, 0.8133074045181274, 0.4710683822631836, 0.9189481139183044, 0.04321537911891937], [0.559230387210846, 0.08983521163463593, 0.16111011803150177, 0.14667965471744537, 0.32596829533576965, 0.008685072883963585, 0.1111784353852272, 0.02690659649670124, 0.06770152598619461, 0.18340016901493073, 0.4614297151565552, 0.502476155757904, 0.42325475811958313, 0.5992166996002197, 0.05437220633029938], [0.367906779050827, 0.21432256698608398, 0.3548191487789154, 0.2603428363800049, 0.22096140682697296, 0.0013341127196326852, 0.021726170554757118, 0.005543001927435398, 0.5389296412467957, 0.818263828754425, 0.919593095779419, 0.8187286257743835, 0.4823090434074402, 0.4897681474685669, 0.07018090784549713], [0.7116888761520386, 0.17206020653247833, 0.6874114871025085, 0.19288089871406555, 0.20990870893001556, 0.011273512616753578, 0.2026582807302475, 0.004371582996100187, 0.10976968705654144, 0.4432500898838043, 0.7022042274475098, 0.8704607486724854, 0.721519947052002, 0.7422701716423035, 0.025589054450392723], [0.7674684524536133, 0.20032620429992676, 0.42808812856674194, 0.11714937537908554, 0.32732346653938293, 0.009955272078514099, 0.05444686487317085, 0.0040375906974077225, 0.12078685313463211, 0.6266691088676453, 0.5163981914520264, 0.8307003378868103, 0.32096055150032043, 0.24524804949760437, 0.04717922583222389], [0.7549813389778137, 0.15439504384994507, 0.33331331610679626, 0.24930144846439362, 0.2927357852458954, 0.04936225712299347, 0.44933974742889404, 0.06466211378574371, 0.09519664198160172, 0.08716140687465668, 0.058296240866184235, 0.09990595281124115, 0.5117565989494324, 0.1508449912071228, 0.039490822702646255], [0.654628574848175, 0.3205694854259491, 0.5841068029403687, 0.21299651265144348, 0.365792840719223, 0.0401315838098526, 0.18686936795711517, 0.05883712321519852, 0.05069931596517563, 0.33667507767677307, 0.3354107439517975, 0.22027519345283508, 0.05277648940682411, 0.09031395614147186, 0.015531455166637897], [0.3366456627845764, 0.1530359387397766, 0.41866233944892883, 0.39775165915489197, 0.7769761681556702, 0.06979230791330338, 0.41583842039108276, 0.02130916155874729, 0.14617334306240082, 0.25815388560295105, 0.1423572301864624, 0.18894770741462708, 0.041056301444768906, 0.026175418868660927, 0.03888533264398575], [0.24913249909877777, 0.0818726196885109, 0.5426726341247559, 0.1687711775302887, 0.8305720090866089, 0.26261457800865173, 0.39635857939720154, 0.1712585836648941, 0.1158638522028923, 0.17366157472133636, 0.12521226704120636, 0.5298976302146912, 0.041029125452041626, 0.02415779046714306, 0.1170416921377182], [0.3567614257335663, 0.035316068679094315, 0.3819185495376587, 0.10469090938568115, 0.3454773426055908, 0.09596268832683563, 0.3821227550506592, 0.17425164580345154, 0.40528857707977295, 0.1745157092809677, 0.10956539213657379, 0.5078453421592712, 0.0026470222510397434, 0.016186503693461418, 0.08932095021009445], [0.330766886472702, 0.039845019578933716, 0.6981685757637024, 0.09713104367256165, 0.8411048650741577, 0.16356231272220612, 0.3630223274230957, 0.1627381145954132, 0.6954487562179565, 0.17326875030994415, 0.1752558946609497, 0.24479816854000092, 0.026946308091282845, 0.016200177371501923, 0.06702017039060593], [0.07683827728033066, 0.07034450024366379, 0.21707428991794586, 0.2902449369430542, 0.1834353357553482, 0.01726321130990982, 0.13144701719284058, 0.005189047660678625, 0.150242418050766, 0.1182665303349495, 0.4041094183921814, 0.12062898278236389, 0.05959685891866684, 0.1186181977391243, 0.1283060759305954]], [[0.06827192008495331, 0.0036808219738304615, 0.005701950751245022, 0.005157816223800182, 0.003777393838390708, 0.024757172912359238, 0.0020165019668638706, 0.010267351754009724, 0.013163687661290169, 0.001690453034825623, 0.00837681908160448, 0.00522418599575758, 0.061038240790367126, 0.015438525006175041, 0.325132817029953], [0.7422951459884644, 0.028774140402674675, 0.06394203752279282, 0.00887901522219181, 0.04345611855387688, 0.027670713141560555, 0.0295904241502285, 0.01398912351578474, 0.025535697117447853, 0.02094031311571598, 0.022182827815413475, 0.009663421660661697, 0.049684178084135056, 0.026225639507174492, 0.13834334909915924], [0.20897099375724792, 0.21868035197257996, 0.23815643787384033, 0.005872054491192102, 0.0010661164997145534, 0.0017293300479650497, 0.00042713910806924105, 0.002609806600958109, 0.016046296805143356, 0.009100147522985935, 0.014420107938349247, 0.0022624030243605375, 0.010553905740380287, 0.007111164275556803, 0.25332581996917725], [0.2508500814437866, 0.20390872657299042, 0.7329782247543335, 0.07117453217506409, 0.016424261033535004, 0.021444672718644142, 0.001510130357928574, 0.004098558332771063, 0.0484151765704155, 0.02061472274363041, 0.001126835006289184, 0.0022107160184532404, 0.007578131277114153, 0.004504901356995106, 0.1403624713420868], [0.27370113134384155, 0.8174626231193542, 0.7193068861961365, 0.7076587677001953, 0.07771007716655731, 0.01620337925851345, 0.004001453518867493, 0.004182097036391497, 0.03681829199194908, 0.09453201293945312, 0.026799198240041733, 0.006044679321348667, 0.03725922852754593, 0.016391301527619362, 0.04474738612771034], [0.3889567255973816, 0.4487122893333435, 0.5870586037635803, 0.6609426140785217, 0.6319714188575745, 0.10676700621843338, 0.009257740341126919, 0.0017087672604247928, 0.027955975383520126, 0.07590407133102417, 0.006841681431978941, 0.08621303737163544, 0.05063363164663315, 0.016846608370542526, 0.05719457566738129], [0.00991373136639595, 0.0983041524887085, 0.15667210519313812, 0.19277995824813843, 0.5809133052825928, 0.7996482253074646, 0.06316149979829788, 0.004939877428114414, 0.023352928459644318, 0.010926214046776295, 0.008795071393251419, 0.006998055148869753, 0.0765714943408966, 0.006783204153180122, 0.05886436253786087], [0.07887525111436844, 0.017153050750494003, 0.2216421663761139, 0.13068468868732452, 0.5295770764350891, 0.35302138328552246, 0.8493326902389526, 0.04265422001481056, 0.052519019693136215, 0.027357611805200577, 0.01357424259185791, 0.004279646556824446, 0.026089098304510117, 0.04089489206671715, 0.014124121516942978], [0.03465811163187027, 0.15351061522960663, 0.2825109362602234, 0.08174889534711838, 0.19755861163139343, 0.5825939774513245, 0.37084007263183594, 0.7892780900001526, 0.1287456750869751, 0.006381133571267128, 0.001940184272825718, 0.00047384126810356975, 0.011903955601155758, 0.003972942009568214, 0.06710142642259598], [0.013788340613245964, 0.006632686126977205, 0.02207767777144909, 0.0785517543554306, 0.014113685116171837, 0.048156753182411194, 0.1944313496351242, 0.22155866026878357, 0.49656373262405396, 0.009422117844223976, 0.004702835343778133, 0.0007582302205264568, 0.00014129001647233963, 0.00033574484405107796, 0.23994654417037964], [0.00469209672883153, 0.015491061843931675, 0.035103749483823776, 0.009631682187318802, 0.008573818951845169, 0.051444172859191895, 0.04315423220396042, 0.05495374649763107, 0.6859460473060608, 0.5370080471038818, 0.06784479320049286, 0.004556083586066961, 0.001035997993312776, 0.0006345660076476634, 0.13974453508853912], [0.02668480947613716, 0.016245348379015923, 0.01112398225814104, 0.008507933467626572, 0.02067524567246437, 0.17763113975524902, 0.05662769451737404, 0.04544723033905029, 0.7948054671287537, 0.7384940385818481, 0.5224500298500061, 0.1060851439833641, 0.014122114516794682, 0.0019289307529106736, 0.08371670544147491], [0.02394592948257923, 0.04371663182973862, 0.028385786339640617, 0.007640721742063761, 0.014576996676623821, 0.08887659758329391, 0.017377078533172607, 0.020801657810807228, 0.187345951795578, 0.5047414302825928, 0.6342922449111938, 0.3672487437725067, 0.04719087854027748, 0.10966072231531143, 0.08543073385953903], [0.009629062376916409, 0.020042795687913895, 0.006009343545883894, 0.001406975439749658, 0.0026742229238152504, 0.006072318647056818, 0.006495587062090635, 0.0032924923580139875, 0.034326668828725815, 0.5998041033744812, 0.7456773519515991, 0.7204623818397522, 0.012111457996070385, 0.018825965002179146, 0.008305574767291546], [0.08114123344421387, 0.05478224158287048, 0.11802507936954498, 0.1980995535850525, 0.15338915586471558, 0.11414031684398651, 0.06528255343437195, 0.04494854062795639, 0.26375874876976013, 0.30061599612236023, 0.26960447430610657, 0.5329554677009583, 0.4288364350795746, 0.12292250245809555, 0.12395624816417694]], [[0.09139528125524521, 0.1232069656252861, 0.06926427036523819, 0.03596228361129761, 0.08677947521209717, 0.3523865342140198, 0.17220446467399597, 0.3048216700553894, 0.24129998683929443, 0.008230631239712238, 0.012852879241108894, 0.0024019270204007626, 0.003931952640414238, 0.002576343482360244, 0.13348431885242462], [0.005495021585375071, 0.009821278043091297, 0.006606503389775753, 0.0009270968730561435, 0.022634856402873993, 0.02637101709842682, 0.03666122257709503, 0.003247066168114543, 0.03138025477528572, 0.0023785934317857027, 0.007012520916759968, 0.0027185468934476376, 0.001623710268177092, 0.009003029204905033, 0.24841202795505524], [0.004891206510365009, 0.01856830157339573, 0.01660238206386566, 0.05400720611214638, 0.2678459584712982, 0.21548990905284882, 0.0901486948132515, 0.14165979623794556, 0.4387242794036865, 0.0060303402133286, 0.03774549812078476, 0.022296983748674393, 0.014843892306089401, 0.003844154067337513, 0.0701230987906456], [0.009136357344686985, 0.005524215288460255, 0.002000550739467144, 0.004360574297606945, 0.06230698525905609, 0.032116882503032684, 0.14447683095932007, 0.11250873655080795, 0.12456412613391876, 0.017903752624988556, 0.03641437739133835, 0.030236193910241127, 0.03817100450396538, 0.0020203718449920416, 0.24235397577285767], [0.011458649300038815, 0.0028747334145009518, 0.0048751854337751865, 0.0034302298445254564, 0.032581884413957596, 0.009492963552474976, 0.29646721482276917, 0.024549754336476326, 0.5199102163314819, 0.07497825473546982, 0.039336495101451874, 0.23366358876228333, 0.2855432629585266, 0.0047793262638151646, 0.131587415933609], [0.0048281243070960045, 0.014400148764252663, 0.00021499136346392334, 0.00015902110317256302, 0.0008502291166223586, 0.005816742777824402, 0.03721616789698601, 0.31765323877334595, 0.006985681131482124, 9.90723492577672e-05, 0.0015535155544057488, 0.002471775049343705, 0.00966054666787386, 0.002636645222082734, 0.15553238987922668], [0.01824354939162731, 0.02838711440563202, 0.0006440957658924162, 0.00040316785452887416, 0.00041587575105950236, 0.0021029487252235413, 0.07766012847423553, 0.3384210765361786, 0.005884509067982435, 0.02229108288884163, 0.02292727865278721, 0.00326070049777627, 0.002748187631368637, 0.004811563994735479, 0.08466839045286179], [0.0009052195237018168, 0.00028935770387761295, 0.00010135041520697996, 4.4237076508579776e-05, 9.765469440026209e-05, 0.0003226006228942424, 0.0006174442823976278, 0.003764552064239979, 0.001191335148178041, 0.0005841490346938372, 0.001988127361983061, 0.0019700597040355206, 0.0006354944198392332, 0.0011416736524552107, 0.25631290674209595], [0.007226317655295134, 0.015471585094928741, 0.027516253292560577, 0.0063530029729008675, 0.015222059562802315, 0.004327190574258566, 0.010739101096987724, 0.0023785619996488094, 0.053105201572179794, 0.0674574077129364, 0.31870341300964355, 0.4986713230609894, 0.027042971923947334, 0.0736011192202568, 0.116986483335495], [0.015794623643159866, 0.009404269978404045, 0.017993446439504623, 0.003823975333943963, 0.004969433881342411, 0.03679484874010086, 0.04242165759205818, 0.017222637310624123, 0.1201641708612442, 0.016131659969687462, 0.3518509864807129, 0.3061373829841614, 0.0458594486117363, 0.15943044424057007, 0.17968055605888367], [0.006380036938935518, 0.028477374464273453, 0.006851766724139452, 0.005024573765695095, 0.02579522877931595, 0.052536945790052414, 0.0111169358715415, 0.0038714397232979536, 0.008046599105000496, 0.008921324275434017, 0.011395278386771679, 0.10255969315767288, 0.21638940274715424, 0.44467252492904663, 0.05895284563302994], [0.010142950341105461, 0.001643709372729063, 0.002422438468784094, 0.0009472724632360041, 0.0033483330626040697, 0.003415578044950962, 0.03889569267630577, 0.005287462379783392, 0.00042015319922938943, 0.0010667687747627497, 0.00740370387211442, 0.00895014964044094, 0.0067735291086137295, 0.017782215029001236, 0.26753443479537964], [0.11724554747343063, 0.0023070531897246838, 0.004510094877332449, 0.0014967885799705982, 0.007825762964785099, 0.00018500315491110086, 0.013543304987251759, 0.0012864026939496398, 0.0007778326398693025, 0.00044295378029346466, 0.001640060218051076, 0.0014512997586280107, 0.002360806567594409, 0.2112705558538437, 0.19457924365997314], [0.09882069379091263, 0.014871560037136078, 0.005077258683741093, 0.0014827846316620708, 0.005620975513011217, 0.0024449406191706657, 0.07368315756320953, 0.06950978189706802, 0.0017206794582307339, 0.00039900749106891453, 0.0006052122334949672, 0.0005968212499283254, 0.004762541502714157, 0.0232950821518898, 0.2500154376029968], [0.001020739320665598, 0.001402992638759315, 0.0006185534875839949, 0.0003395593084860593, 0.0013021298218518496, 0.0008022591937333345, 0.003452729433774948, 0.0026675688568502665, 0.0021077031269669533, 0.0008018113439902663, 0.0017594166565686464, 0.0005115982494316995, 0.0007778447470627725, 0.0008368113776668906, 0.13888627290725708]], [[0.04622220993041992, 0.12740419805049896, 0.05372706800699234, 0.5582705140113831, 0.030120277777314186, 0.3703221380710602, 0.020304178819060326, 0.3357560634613037, 0.11819478869438171, 0.0765489861369133, 0.09261158853769302, 0.03858334198594093, 0.13079233467578888, 0.0447748564183712, 0.11706516146659851], [0.0919138491153717, 0.05798470228910446, 0.02827676385641098, 0.34965166449546814, 0.05504997447133064, 0.1526506543159485, 0.09941896051168442, 0.4367760419845581, 0.061004042625427246, 0.5390062928199768, 0.28723591566085815, 0.15840129554271698, 0.2018149495124817, 0.11561664938926697, 0.1249081939458847], [0.032068803906440735, 0.0549696609377861, 0.018587671220302582, 0.2202640324831009, 0.0011182812741026282, 0.03810814768075943, 0.027008401229977608, 0.3763306438922882, 0.11146998405456543, 0.16719762980937958, 0.13283231854438782, 0.014421377331018448, 0.07254088670015335, 0.007401765324175358, 0.20662666857242584], [0.10753453522920609, 0.479284405708313, 0.009764611721038818, 0.0431443527340889, 0.0008862981921993196, 0.03188035264611244, 0.00600279588252306, 0.43093177676200867, 0.08460848033428192, 0.18502341210842133, 0.038902610540390015, 0.030237559229135513, 0.1820157915353775, 0.03367093205451965, 0.14427724480628967], [0.013928310945630074, 0.032752107828855515, 0.0024797581136226654, 0.10617181658744812, 0.0002726189268287271, 0.011333486996591091, 0.005626056343317032, 0.05421115458011627, 0.020341530442237854, 0.0548044852912426, 0.027503041550517082, 0.005752534605562687, 0.033552803099155426, 0.008454940281808376, 0.388910174369812], [0.15046736598014832, 0.296213299036026, 0.044096194207668304, 0.05168119817972183, 0.02727358601987362, 0.04717152938246727, 0.0016543868696317077, 0.035376399755477905, 0.027143586426973343, 0.0870317667722702, 0.05812281742691994, 0.06705813109874725, 0.3147181272506714, 0.39039844274520874, 0.23394177854061127], [0.14644725620746613, 0.5605929493904114, 0.11812092363834381, 0.5902084112167358, 0.021858595311641693, 0.10718227922916412, 0.007383488584309816, 0.019886687397956848, 0.06570647656917572, 0.10820640623569489, 0.1357717514038086, 0.025582531467080116, 0.077891044318676, 0.061965201050043106, 0.164744034409523], [0.049012791365385056, 0.35138410329818726, 0.26388463377952576, 0.7301797866821289, 0.014552393928170204, 0.24720129370689392, 0.0041521950624883175, 0.07795857638120651, 0.014070906676352024, 0.04667593538761139, 0.1480453461408615, 0.010990227572619915, 0.20039354264736176, 0.17517414689064026, 0.0717916414141655], [0.09980960935354233, 0.4834202826023102, 0.20237547159194946, 0.5161312222480774, 0.2011035680770874, 0.31254804134368896, 0.023049525916576385, 0.09284620732069016, 0.030714770779013634, 0.009841320104897022, 0.03625232353806496, 0.02249438874423504, 0.030981028452515602, 0.01249231118708849, 0.19809871912002563], [0.2242409735918045, 0.5898000001907349, 0.2996082305908203, 0.6961580514907837, 0.3950251638889313, 0.824604332447052, 0.0551396869122982, 0.5436567068099976, 0.06683327257633209, 0.03568824753165245, 0.060814060270786285, 0.00592254800722003, 0.012778226286172867, 0.017990900203585625, 0.1082865446805954], [0.03427329286932945, 0.7018846869468689, 0.18350760638713837, 0.5559015274047852, 0.03810380771756172, 0.7226935029029846, 0.05184842646121979, 0.881024181842804, 0.06315085291862488, 0.03384441137313843, 0.014913397841155529, 0.002015632577240467, 0.008405282162129879, 0.0011906703002750874, 0.2768104076385498], [0.022437993437051773, 0.7336767315864563, 0.2893984615802765, 0.7315550446510315, 0.021726222708821297, 0.3247562646865845, 0.05117126554250717, 0.7097986340522766, 0.03149837628006935, 0.017582548782229424, 0.017906883731484413, 0.004864181391894817, 0.0014982494758442044, 0.0005988480988889933, 0.17147301137447357], [0.279982328414917, 0.427709698677063, 0.4798988997936249, 0.811837911605835, 0.5607104301452637, 0.3233453035354614, 0.03364620357751846, 0.48738226294517517, 0.20507316291332245, 0.2806957960128784, 0.20560167729854584, 0.021487781777977943, 0.0051806773990392685, 0.018182942643761635, 0.10378202050924301], [0.15081651508808136, 0.5779510736465454, 0.21354816854000092, 0.8126901984214783, 0.041816346347332, 0.5376638174057007, 0.02729017473757267, 0.45972490310668945, 0.1708957701921463, 0.17148789763450623, 0.06268936395645142, 0.0045938147231936455, 0.0036332160234451294, 0.0009066996863111854, 0.10311751067638397], [0.009540104307234287, 0.03889232128858566, 0.016071060672402382, 0.08366316556930542, 0.004574422258883715, 0.029401082545518875, 0.00834547821432352, 0.0893266350030899, 0.14732055366039276, 0.09065960347652435, 0.14173488318920135, 0.042114999145269394, 0.004022075328975916, 0.003513866104185581, 0.1347859650850296]], [[0.009570755064487457, 0.005546795669943094, 0.006825579330325127, 0.033384330570697784, 0.3769712448120117, 0.15916845202445984, 0.5290282368659973, 0.24695992469787598, 0.2377869039773941, 0.0913546234369278, 0.07570143043994904, 0.06522544473409653, 0.12397455424070358, 0.2645682692527771, 0.1787039041519165], [0.0061562443152070045, 0.040286894887685776, 0.0029807272367179394, 0.016133036464452744, 0.1151214987039566, 0.07519882172346115, 0.10128971189260483, 0.046498823910951614, 0.04111110791563988, 0.11845260113477707, 0.08915312588214874, 0.10556784272193909, 0.16933780908584595, 0.3531811535358429, 0.21578538417816162], [0.14712950587272644, 0.04435151070356369, 0.015454337000846863, 0.01427951455116272, 0.08342041075229645, 0.005383625626564026, 0.10468690097332001, 0.05861024558544159, 0.08666124939918518, 0.15304753184318542, 0.23543620109558105, 0.2374279797077179, 0.10751555860042572, 0.10399115085601807, 0.23440681397914886], [0.0859314426779747, 0.15731151401996613, 0.005385389551520348, 0.04620514437556267, 0.010708490386605263, 0.006711416877806187, 0.012445325031876564, 0.056288186460733414, 0.097142793238163, 0.07020799815654755, 0.02479076385498047, 0.0890590250492096, 0.22972674667835236, 0.034618109464645386, 0.28529092669487], [0.07441635429859161, 0.018118128180503845, 0.016377849504351616, 0.003080169903114438, 0.20936372876167297, 0.0007255859090946615, 0.03578657656908035, 0.00550744216889143, 0.1172742024064064, 0.5684130191802979, 0.3980042636394501, 0.15252694487571716, 0.10817506164312363, 0.23486874997615814, 0.2619861364364624], [0.05188249424099922, 0.0069924332201480865, 0.0009591103880666196, 0.0061192926950752735, 0.002253405749797821, 0.006572761107236147, 0.004667140077799559, 0.11107926070690155, 0.03415685519576073, 0.010113962925970554, 0.006655086297541857, 0.010832482948899269, 0.03651394695043564, 0.040573474019765854, 0.2686486840248108], [0.08095332235097885, 0.02014574408531189, 0.011188640259206295, 0.0037319576367735863, 0.024485761299729347, 0.0018746056593954563, 0.04114176332950592, 0.034570205956697464, 0.009728988632559776, 0.07755846530199051, 0.09898480027914047, 0.0613434873521328, 0.09528356045484543, 0.1511603444814682, 0.2821846306324005], [0.04335615411400795, 0.026033984497189522, 0.03572213277220726, 0.017578190192580223, 0.05956277251243591, 0.01715734601020813, 0.011929154396057129, 0.28936532139778137, 0.0027683174703270197, 0.061091482639312744, 0.23734883964061737, 0.10397756844758987, 0.16337142884731293, 0.37352773547172546, 0.18409839272499084], [0.06077902019023895, 0.031166722998023033, 0.11759120225906372, 0.1409873068332672, 0.24215947091579437, 0.009796793572604656, 0.10265856236219406, 0.01014934666454792, 0.2757207751274109, 0.023714441806077957, 0.038815632462501526, 0.15303847193717957, 0.14991649985313416, 0.6824791431427002, 0.13190437853336334], [0.06505369395017624, 0.006089756730943918, 0.036541152745485306, 0.005829536356031895, 0.20233574509620667, 0.029401954263448715, 0.49993017315864563, 0.030510973185300827, 0.01976127363741398, 0.07993583381175995, 0.017815636470913887, 0.04079095646739006, 0.022992853075265884, 0.6425142288208008, 0.26567763090133667], [0.6054520010948181, 0.07051455229520798, 0.2702813744544983, 0.029061302542686462, 0.13962645828723907, 0.07908772677183151, 0.4563634395599365, 0.02414957620203495, 0.02722080610692501, 0.03215296193957329, 0.015534932725131512, 0.009437407366931438, 0.0218642745167017, 0.08506882190704346, 0.4000338017940521], [0.3943043351173401, 0.11258544027805328, 0.12088752537965775, 0.0732470229268074, 0.030587676912546158, 0.056065596640110016, 0.2533946633338928, 0.04020307958126068, 0.03702285513281822, 0.018525324761867523, 0.009753274731338024, 0.01584538072347641, 0.006842197384685278, 0.013304048217833042, 0.2415902465581894], [0.09087645262479782, 0.0733630359172821, 0.03259122744202614, 0.05433432757854462, 0.028730718418955803, 0.026890264824032784, 0.0992540791630745, 0.042951032519340515, 0.1659460812807083, 0.017093859612941742, 0.006921885069459677, 0.0007972968742251396, 0.010357401333749294, 0.037234287708997726, 0.1852690428495407], [0.2766205668449402, 0.06249983608722687, 0.03302843123674393, 0.08374682813882828, 0.07296875864267349, 0.016804786399006844, 0.2612326145172119, 0.06074067950248718, 0.06402052938938141, 0.021471360698342323, 0.00216249143704772, 0.001582604949362576, 0.0037338242400437593, 0.005314995069056749, 0.23526467382907867], [0.005338736344128847, 0.013486125506460667, 0.016210375353693962, 0.00714905746281147, 0.01115293800830841, 0.008639699779450893, 0.009605110622942448, 0.01017976924777031, 0.008433598093688488, 0.06244685873389244, 0.040223702788352966, 0.009117859415709972, 0.005228321999311447, 0.0028589563444256783, 0.13790398836135864]], [[0.3301994204521179, 0.08890271931886673, 0.08465498685836792, 0.06385943293571472, 0.21852104365825653, 0.02508896216750145, 0.03711355850100517, 0.034155964851379395, 0.1728704422712326, 0.06344152241945267, 0.01567375846207142, 0.047274719923734665, 0.023079151287674904, 0.06240373104810715, 0.17532315850257874], [0.08584976941347122, 0.12593986093997955, 0.03313801810145378, 0.017280908301472664, 0.17652282118797302, 0.268716037273407, 0.12116961926221848, 0.2558431923389435, 0.04765854403376579, 0.04246087744832039, 0.0035840249620378017, 0.02463056705892086, 0.2119264155626297, 0.11800020188093185, 0.14393316209316254], [0.046346988528966904, 0.39951857924461365, 0.5525277853012085, 0.10910754650831223, 0.13167327642440796, 0.030212268233299255, 0.021472660824656487, 0.018023721873760223, 0.1298973113298416, 0.04191790521144867, 0.1535157859325409, 0.04246748238801956, 0.3158371150493622, 0.15602277219295502, 0.1064835637807846], [0.0703379437327385, 0.07535148411989212, 0.05811825022101402, 0.428435742855072, 0.07080380618572235, 0.15123498439788818, 0.3036666214466095, 0.07787945121526718, 0.48052453994750977, 0.12286645174026489, 0.04789941385388374, 0.033336445689201355, 0.030469346791505814, 0.005462532863020897, 0.08732402324676514], [0.0663379579782486, 0.03187985718250275, 0.09551261365413666, 0.0323714055120945, 0.33827176690101624, 0.1471284031867981, 0.3127540946006775, 0.02734280750155449, 0.23260797560214996, 0.02317011170089245, 0.046465177088975906, 0.0992102101445198, 0.09175661206245422, 0.13314616680145264, 0.07444406300783157], [0.034720633178949356, 0.01384154986590147, 0.012703170999884605, 0.020319687202572823, 0.10901976376771927, 0.7807050347328186, 0.03443336486816406, 0.028544975444674492, 0.061822760850191116, 0.00809338316321373, 0.007171421777456999, 0.01342758722603321, 0.09649696201086044, 0.05527613312005997, 0.10404697060585022], [0.030445659533143044, 0.041789710521698, 0.023520270362496376, 0.01782963052392006, 0.16124852001667023, 0.06983006745576859, 0.4703807234764099, 0.01895260065793991, 0.027326058596372604, 0.07994905114173889, 0.026343191042542458, 0.032219063490629196, 0.022085823118686676, 0.031095484271645546, 0.24155765771865845], [0.055046502500772476, 0.3847074508666992, 0.04798666015267372, 0.003912709187716246, 0.06840738654136658, 0.36789029836654663, 0.07226144522428513, 0.4079316258430481, 0.022340288385748863, 0.10408379882574081, 0.07774890959262848, 0.04753485694527626, 0.285355806350708, 0.16128498315811157, 0.02375940792262554], [0.03513112664222717, 0.11586778610944748, 0.03034079447388649, 0.001017131027765572, 0.04634808376431465, 0.03800477832555771, 0.03768199309706688, 0.013300161808729172, 0.14031966030597687, 0.015252463519573212, 0.053176701068878174, 0.06856708973646164, 0.13856393098831177, 0.054046642035245895, 0.2367301732301712], [0.025786809623241425, 0.06564735621213913, 0.039564721286296844, 0.0026341548655182123, 0.016324089840054512, 0.016701271757483482, 0.020613567903637886, 0.0767805427312851, 0.22950275242328644, 0.51694655418396, 0.1544727236032486, 0.1054847463965416, 0.025381706655025482, 0.05480813980102539, 0.1677880734205246], [0.012255452573299408, 0.02410232275724411, 0.08552651852369308, 0.002623841166496277, 0.010307574644684792, 0.0127415731549263, 0.021285703405737877, 0.010095748119056225, 0.06661782413721085, 0.12517453730106354, 0.7383688688278198, 0.19885332882404327, 0.07497892528772354, 0.10072800517082214, 0.06182975694537163], [0.2776626944541931, 0.046990759670734406, 0.032447993755340576, 0.015461347065865993, 0.08414210379123688, 0.04174359515309334, 0.19995476305484772, 0.013662091456353664, 0.019540153443813324, 0.048985805362463, 0.25616249442100525, 0.2484772503376007, 0.1799653023481369, 0.17696446180343628, 0.09890354424715042], [0.05504303798079491, 0.08340897411108017, 0.04799877479672432, 0.017563870176672935, 0.028545444831252098, 0.1704884171485901, 0.030681313946843147, 0.02359093725681305, 0.007767115719616413, 0.019779905676841736, 0.03771185874938965, 0.029841119423508644, 0.28957709670066833, 0.04182300344109535, 0.12634176015853882], [0.06153338775038719, 0.02491314895451069, 0.02542346529662609, 0.0031092099379748106, 0.03241894021630287, 0.1874629557132721, 0.1358277052640915, 0.02619485929608345, 0.017582973465323448, 0.03225348889827728, 0.01329810544848442, 0.026643214747309685, 0.1614912450313568, 0.6035103797912598, 0.09545250982046127], [0.027727488428354263, 0.10283610969781876, 0.02349940501153469, 0.010801603086292744, 0.0136191351339221, 0.1518852412700653, 0.05784522369503975, 0.11107083410024643, 0.10270816832780838, 0.1666017472743988, 0.06030665338039398, 0.06198698654770851, 0.05951831862330437, 0.015173939988017082, 0.1310720145702362]]], [[[0.042950913310050964, 0.0007196685182861984, 0.027302199974656105, 0.006393556483089924, 0.09642192721366882, 0.01637418009340763, 0.0023990001063793898, 0.0024961719755083323, 0.0020593979861587286, 0.0015603104839101434, 0.03318732604384422, 0.35782966017723083, 0.0989728793501854, 0.061845745891332626, 0.203965961933136], [0.10955026745796204, 0.02388770505785942, 0.04351670667529106, 0.023162608966231346, 0.012142845429480076, 0.035775765776634216, 0.03457501530647278, 0.11992064118385315, 0.01240380760282278, 0.007506475783884525, 0.05337386205792427, 0.6535924673080444, 0.5536571145057678, 0.19680790603160858, 0.140446737408638], [0.005947283003479242, 0.0010204642312601209, 0.18009734153747559, 0.006447697523981333, 0.012463629245758057, 7.613956404384226e-05, 7.241032290039584e-05, 0.00011841111700050533, 0.0034185522235929966, 0.0034766956232488155, 0.002135018352419138, 0.005925178527832031, 0.003751354990527034, 0.0019247139571234584, 0.28479355573654175], [0.014483454637229443, 0.022866876795887947, 0.32726621627807617, 0.007662326563149691, 0.09431912004947662, 0.0004296264669392258, 0.0011131323408335447, 0.0014158609556034207, 0.018019702285528183, 0.01865016296505928, 0.0020740600302815437, 0.0029411758296191692, 0.0016890126280486584, 0.0063899424858391285, 0.12852828204631805], [0.030419446527957916, 0.058438073843717575, 0.3924228250980377, 0.035587672144174576, 0.08137891441583633, 0.010925069451332092, 0.001356365391984582, 0.0012006007600575686, 0.053269751369953156, 0.0027948038186877966, 0.04010261595249176, 0.01993635483086109, 0.004820133093744516, 0.004111820366233587, 0.21765674650669098], [0.07767480611801147, 0.006269918289035559, 0.09326869994401932, 0.6196063756942749, 0.11043263971805573, 0.052975643426179886, 0.02037718892097473, 0.0008919782703742385, 0.008360025472939014, 0.002104781800881028, 0.0179440937936306, 0.10498880594968796, 0.011864815838634968, 0.002359954407438636, 0.24602332711219788], [0.00026913435431197286, 8.159392746165395e-05, 0.007915529422461987, 0.05068095400929451, 0.6570689678192139, 0.32081079483032227, 0.05758208408951759, 0.0006442792946472764, 0.0015821922570466995, 6.469202344305813e-05, 0.003034515306353569, 0.0310077928006649, 0.025656316429376602, 0.0025228438898921013, 0.023106882348656654], [0.0005435149651020765, 0.0005490019102580845, 0.034476928412914276, 0.01287262886762619, 0.25229769945144653, 0.4536571502685547, 0.10281822830438614, 0.012222280725836754, 0.016108570620417595, 0.00031008716905489564, 0.0026372161228209734, 0.0034134499728679657, 0.0248859953135252, 0.017225822433829308, 0.02475895546376705], [0.000726195692550391, 0.00036735343746840954, 0.007114858832210302, 0.0026034389156848192, 0.01250846590846777, 0.009484091773629189, 0.0354158952832222, 0.0016834242269396782, 0.19215336441993713, 0.007594457361847162, 0.003938279580324888, 2.8376112823025323e-05, 0.001137340790592134, 0.00011368053674232215, 0.29228782653808594], [0.0005387092242017388, 0.0003453432582318783, 0.015091696754097939, 0.06184916943311691, 0.003162123030051589, 0.014056581072509289, 0.012467358261346817, 0.009164737537503242, 0.05548334866762161, 0.008076494559645653, 0.005971547681838274, 0.001972777536138892, 0.006774900481104851, 0.001264052465558052, 0.2362799048423767], [0.0025044670328497887, 0.0023456772323697805, 0.07385681569576263, 0.006188494618982077, 0.021690815687179565, 0.0007893598522059619, 0.002135526854544878, 0.006048245821148157, 0.25190338492393494, 0.09442908316850662, 0.19532348215579987, 0.031008923426270485, 0.009561427868902683, 0.0021240306086838245, 0.21234139800071716], [0.015501828864216805, 0.0072255814447999, 0.006012998055666685, 0.008203291334211826, 0.0171041339635849, 0.001770812552422285, 0.00655776634812355, 0.002186145167797804, 0.15154685080051422, 0.5713958144187927, 0.05368567630648613, 0.051326390355825424, 0.01612916588783264, 0.0019418209558352828, 0.18746227025985718], [0.05876695737242699, 0.005032649263739586, 0.05515526235103607, 0.012789947912096977, 0.017388533800840378, 0.00580496434122324, 0.015462081879377365, 0.009339934214949608, 0.0222479198127985, 0.03960718587040901, 0.14906688034534454, 0.2817051410675049, 0.14850065112113953, 0.09505022317171097, 0.10619710385799408], [0.012425977736711502, 0.0006452641100622714, 0.00298808584921062, 0.001349467202089727, 0.014642779715359211, 0.0010115096811205149, 0.0033098396379500628, 0.00038259345456026495, 0.0035037249326705933, 0.008293021470308304, 0.03801131248474121, 0.8317341208457947, 0.018821584060788155, 0.057542454451322556, 0.011905365623533726], [0.04682805389165878, 0.01908799074590206, 0.10485747456550598, 0.060083843767642975, 0.15075230598449707, 0.029059063643217087, 0.04093548655509949, 0.03368941321969032, 0.017014725133776665, 0.011203174479305744, 0.0391479916870594, 0.24882012605667114, 0.37940239906311035, 0.12485622614622116, 0.12782400846481323]], [[0.010500228963792324, 0.7224081754684448, 0.030353030189871788, 0.00683749420568347, 0.007232841569930315, 0.018554184585809708, 0.0004432629211805761, 0.02719983458518982, 0.0006519495509564877, 0.0012597806053236127, 0.006804677192121744, 0.0011734187137335539, 0.003679303452372551, 0.010371293872594833, 0.019012004137039185], [0.0004097823693882674, 0.007568135391920805, 0.05432860180735588, 0.08570658415555954, 0.005480978172272444, 0.0009473124518990517, 0.000799189496319741, 0.0012391285272315145, 0.00044785221689380705, 0.0009745006100274622, 0.013956908136606216, 0.00011593959061428905, 0.004404959734529257, 0.0031790253706276417, 0.20507724583148956], [0.022728245705366135, 0.0194535069167614, 0.024020839482545853, 0.023168254643678665, 0.45748311281204224, 0.5855799913406372, 0.21754446625709534, 0.1001717820763588, 0.0221620611846447, 0.0033511894289404154, 0.03508710116147995, 0.20201759040355682, 0.2973189353942871, 0.04947788640856743, 0.0494859553873539], [0.010499863885343075, 0.004784405697137117, 0.0035181313287466764, 0.007238015066832304, 0.4155227243900299, 0.8333501219749451, 0.07475034892559052, 0.20445603132247925, 0.005854693241417408, 0.001852003508247435, 0.02841898612678051, 0.243921160697937, 0.10275343060493469, 0.13816815614700317, 0.07406751066446304], [0.00768234534189105, 0.012151399627327919, 0.0006104251369833946, 0.0018971813842654228, 0.08389636874198914, 0.7291921973228455, 0.2573831081390381, 0.13359335064888, 0.0011000150116160512, 0.0005446228897199035, 0.036390628665685654, 0.06110000237822533, 0.1527252048254013, 0.14593005180358887, 0.05624886974692345], [0.0037335127126425505, 0.004452059045433998, 0.00018280810036230832, 0.016856878995895386, 0.0016014263965189457, 0.05306785926222801, 0.5318921208381653, 0.2889253497123718, 0.0004385874199215323, 0.007465890143066645, 0.0005691659171134233, 0.008836256340146065, 0.00793292187154293, 0.0033322598319500685, 0.1706118881702423], [0.00023320072796195745, 0.0486629419028759, 0.0005405444535426795, 0.005952970590442419, 0.0009982762858271599, 0.004001363180577755, 0.009125707671046257, 0.6945337057113647, 0.006549985148012638, 0.007807720452547073, 0.003924727905541658, 0.004149672109633684, 0.003537258366122842, 0.001676861196756363, 0.11541670560836792], [0.0021667596884071827, 0.0005287157837301493, 0.009149480611085892, 0.024324318394064903, 0.0018866003956645727, 0.0003624066011980176, 0.0004668526817113161, 0.0064473398961126804, 0.0217228215187788, 0.0031395854894071817, 0.0052951243706047535, 0.004629157949239016, 0.003511544084176421, 0.0017145106103271246, 0.2705381214618683], [0.0036477160174399614, 0.018601393327116966, 0.00400471780449152, 0.016223786398768425, 0.015442389994859695, 0.030637366697192192, 0.04816145822405815, 0.009263478219509125, 0.08580432087182999, 0.07024423778057098, 0.17587034404277802, 0.2670482397079468, 0.10741393268108368, 0.11723090708255768, 0.197556272149086], [0.0067135002464056015, 0.005400336813181639, 0.002429268090054393, 0.0005210567032918334, 0.0009090648964047432, 0.056922394782304764, 0.006305574905127287, 0.02051912061870098, 0.009087055921554565, 0.0029723523184657097, 0.5903128385543823, 0.4623943269252777, 0.5148944854736328, 0.10147220641374588, 0.10177940130233765], [0.016283290460705757, 0.004236595239490271, 0.00024049253261182457, 0.00013081195356789976, 0.004825976211577654, 0.03370611369609833, 0.030076656490564346, 0.006495397537946701, 0.015585500746965408, 0.0006116450531408191, 0.009124655276536942, 0.7220618724822998, 0.5160555839538574, 0.16948190331459045, 0.04205150157213211], [0.04056651145219803, 0.05449386313557625, 0.007923644036054611, 0.00034379694261588156, 0.0072999089024960995, 0.005707062315195799, 0.018278487026691437, 0.00924981851130724, 0.0004191468469798565, 0.0015566512010991573, 0.0019580996595323086, 0.06517467647790909, 0.4938390851020813, 0.1360015720129013, 0.14540629088878632], [0.02595147117972374, 0.0358305424451828, 0.021912503987550735, 0.01559682097285986, 0.0029425774700939655, 0.008820675313472748, 0.259022980928421, 0.24083182215690613, 0.0008326273527927697, 0.009937180206179619, 0.008380424231290817, 0.0008840225636959076, 0.11912944912910461, 0.5976794362068176, 0.17433230578899384], [0.024576334282755852, 0.01131413970142603, 0.0036256120074540377, 0.007047882303595543, 0.015460383147001266, 0.007877636700868607, 0.035456594079732895, 0.017273712903261185, 0.0020541276317089796, 0.005268692504614592, 0.003138576401397586, 0.0058868261985480785, 0.09279357641935349, 0.45485755801200867, 0.2460370808839798], [0.02016485668718815, 0.03839857131242752, 0.0345035195350647, 0.005700604524463415, 0.03111962042748928, 0.03698137030005455, 0.056010663509368896, 0.043163470923900604, 0.004449993837624788, 0.000997284660115838, 0.006035848520696163, 0.0027079761493951082, 0.009604639373719692, 0.02099894918501377, 0.13394789397716522]], [[0.11855445802211761, 0.018203705549240112, 0.014699782244861126, 0.005997231230139732, 0.012317956425249577, 0.005482070613652468, 0.020501872524619102, 0.04173066467046738, 0.028033137321472168, 0.007907108403742313, 0.13633504509925842, 0.11779958009719849, 0.02402079664170742, 0.08686818182468414, 0.19919154047966003], [0.015789268538355827, 0.07802969217300415, 0.024552250280976295, 0.007203033193945885, 0.015197299420833588, 0.0086579704657197, 0.005928180180490017, 0.015956610441207886, 0.019966211169958115, 0.002508557867258787, 0.048071712255477905, 0.0452260747551918, 0.027286410331726074, 0.034357864409685135, 0.19209280610084534], [0.7560696601867676, 0.09646204113960266, 0.24264514446258545, 0.03150765225291252, 0.15196740627288818, 0.027980739250779152, 0.025865402072668076, 0.037002913653850555, 0.02429634891450405, 0.014392002485692501, 0.11331582069396973, 0.2883520722389221, 0.24113057553768158, 0.5529852509498596, 0.13967400789260864], [0.6593953371047974, 0.14735713601112366, 0.007992099039256573, 0.03938791900873184, 0.047611087560653687, 0.002478603972122073, 0.00756214139983058, 0.01120123453438282, 0.017771385610103607, 0.011085578240454197, 0.01766165718436241, 0.07185176759958267, 0.01590064913034439, 0.05699647217988968, 0.22524236142635345], [0.8214750289916992, 0.5506035089492798, 0.04117008298635483, 0.00517136137932539, 0.5628769993782043, 0.013714980334043503, 0.018153639510273933, 0.019494647160172462, 0.02796507254242897, 0.003693098435178399, 0.052905939519405365, 0.024033749476075172, 0.017759546637535095, 0.154443621635437, 0.2181331366300583], [0.47579920291900635, 0.4996025860309601, 0.02201933227479458, 0.032786499708890915, 0.003352785250172019, 0.402157723903656, 0.028392860665917397, 0.03425603359937668, 0.017302367836236954, 0.007774383760988712, 0.03628184646368027, 0.015436487272381783, 0.09682580828666687, 0.09163853526115417, 0.1807471215724945], [0.6324970722198486, 0.5132108926773071, 0.14723047614097595, 0.10531618446111679, 0.14770705997943878, 0.01965152472257614, 0.16446776688098907, 0.023718399927020073, 0.014144167304039001, 0.003392518265172839, 0.03989372402429581, 0.048702552914619446, 0.05385157838463783, 0.06003360450267792, 0.2021118402481079], [0.2804942727088928, 0.4447323679924011, 0.40719398856163025, 0.15280602872371674, 0.5485119223594666, 0.006256175693124533, 0.005905789323151112, 0.0894087627530098, 0.014159541577100754, 0.0037697115913033485, 0.08780182898044586, 0.04568948596715927, 0.08344046771526337, 0.08309336006641388, 0.1791403889656067], [0.38668709993362427, 0.3767029941082001, 0.5765653848648071, 0.14457443356513977, 0.830109715461731, 0.558448314666748, 0.2105703204870224, 0.015437009744346142, 0.0802588015794754, 0.0035789015237241983, 0.009509528055787086, 0.011719968169927597, 0.04601259157061577, 0.015442220494151115, 0.02989899180829525], [0.42374563217163086, 0.4557475447654724, 0.5995064973831177, 0.22240440547466278, 0.8298278450965881, 0.26192477345466614, 0.5618261694908142, 0.2755923569202423, 0.03321446478366852, 0.014314521104097366, 0.030895033851265907, 0.0061126528307795525, 0.0033166268840432167, 0.0021476708352565765, 0.12580153346061707], [0.4742293357849121, 0.32335561513900757, 0.5931060910224915, 0.0772920548915863, 0.3757626712322235, 0.211185023188591, 0.42018893361091614, 0.37329575419425964, 0.26276469230651855, 0.012583179399371147, 0.3317490220069885, 0.002885210793465376, 0.011435287073254585, 0.00757939275354147, 0.1435183733701706], [0.21439705789089203, 0.17853425443172455, 0.32548797130584717, 0.06489395350217819, 0.64824378490448, 0.1159982681274414, 0.19616922736167908, 0.27417391538619995, 0.6047332286834717, 0.1810707151889801, 0.034782104194164276, 0.10310898721218109, 0.0316632017493248, 0.025309519842267036, 0.09833981841802597], [0.19860051572322845, 0.10174965113401413, 0.08606765419244766, 0.053267233073711395, 0.11251617968082428, 0.2378872036933899, 0.16651752591133118, 0.1490997076034546, 0.4605393707752228, 0.18029887974262238, 0.1883857697248459, 0.007075145840644836, 0.25310245156288147, 0.08171047270298004, 0.15088772773742676], [0.2976968586444855, 0.21286718547344208, 0.04716610535979271, 0.025928588584065437, 0.1317281424999237, 0.12927810847759247, 0.2939497232437134, 0.23276808857917786, 0.5986261367797852, 0.05386120826005936, 0.05668044835329056, 0.025143466889858246, 0.007965278811752796, 0.03647890314459801, 0.16275253891944885], [0.34472423791885376, 0.33325105905532837, 0.5841152667999268, 0.8456752300262451, 0.4377557933330536, 0.4159393310546875, 0.33224907517433167, 0.1488359123468399, 0.2203720510005951, 0.7425854206085205, 0.7086009383201599, 0.5293036699295044, 0.2777566909790039, 0.22530661523342133, 0.09936152398586273]], [[0.3582096993923187, 0.12323450297117233, 0.41414904594421387, 0.12697191536426544, 0.2567327618598938, 0.12921607494354248, 0.303745299577713, 0.26060354709625244, 0.2067556530237198, 0.0739586353302002, 0.038356974720954895, 0.018690073862671852, 0.019858568906784058, 0.03828525170683861, 0.09448481351137161], [0.034560851752758026, 0.06147807836532593, 0.09719342738389969, 0.03090484067797661, 0.05040246620774269, 0.10769589245319366, 0.28225648403167725, 0.03959896042943001, 0.04561477154493332, 0.015998149290680885, 0.010396423749625683, 0.0027313604950904846, 0.02088637463748455, 0.02540828473865986, 0.1729334592819214], [0.031599532812833786, 0.03154325857758522, 0.01938430592417717, 0.10300880670547485, 0.07719798386096954, 0.3211115002632141, 0.5488157868385315, 0.6110779047012329, 0.03511836752295494, 0.03874386474490166, 0.02549627609550953, 0.08684590458869934, 0.1071673184633255, 0.10855282843112946, 0.09071482717990875], [0.05947110056877136, 0.046990834176540375, 0.001917339744977653, 0.019972380250692368, 0.14856000244617462, 0.10937333106994629, 0.7613639235496521, 0.43800127506256104, 0.038890283554792404, 0.0702563002705574, 0.052807219326496124, 0.20175476372241974, 0.09827514737844467, 0.19838720560073853, 0.1799801141023636], [0.010548654943704605, 0.056933727115392685, 0.0004277318366803229, 0.0005220972234383225, 0.03427216783165932, 0.15697234869003296, 0.44382861256599426, 0.28639304637908936, 0.1278306096792221, 0.0589531809091568, 0.07240739464759827, 0.21584689617156982, 0.623681902885437, 0.39177897572517395, 0.053747572004795074], [0.012333033606410027, 0.11936485022306442, 0.0015480549773201346, 0.05167163908481598, 0.003915506415069103, 0.05033823475241661, 0.18770258128643036, 0.5247471332550049, 0.13492631912231445, 0.0999734029173851, 0.02801361307501793, 0.04943297058343887, 0.067798912525177, 0.02220618724822998, 0.04863249137997627], [0.023225123062729836, 0.03936318680644035, 0.0654693990945816, 0.0780135840177536, 0.03190883249044418, 0.007237496320158243, 0.3230750560760498, 0.11266676336526871, 0.3152024447917938, 0.12503208220005035, 0.08215073496103287, 0.20814812183380127, 0.054794978350400925, 0.014369799755513668, 0.31165388226509094], [0.021642545238137245, 0.05032852664589882, 0.10916808992624283, 0.14173567295074463, 0.025796422734856606, 0.002176823327317834, 0.004212724044919014, 0.11230720579624176, 0.2761599123477936, 0.18545517325401306, 0.30032697319984436, 0.18456220626831055, 0.1202857494354248, 0.02383211813867092, 0.22383396327495575], [0.014165909960865974, 0.030938388779759407, 0.019327908754348755, 0.025021186098456383, 0.018685894086956978, 0.058899857103824615, 0.05705944076180458, 0.013411193154752254, 0.27564239501953125, 0.14192135632038116, 0.4484158754348755, 0.49174171686172485, 0.42328834533691406, 0.5148258805274963, 0.024227913469076157], [0.030343737453222275, 0.035576362162828445, 0.011198173277080059, 0.0029289661906659603, 0.004656192846596241, 0.19044476747512817, 0.14425727725028992, 0.14593322575092316, 0.02429576776921749, 0.03922351822257042, 0.03158531337976456, 0.3954472541809082, 0.18761666119098663, 0.829915463924408, 0.05755764618515968], [0.07378673553466797, 0.08269044756889343, 0.008506381884217262, 0.004565858747810125, 0.0033621611073613167, 0.47163471579551697, 0.3437289595603943, 0.16293375194072723, 0.0103234788402915, 0.006828381214290857, 0.025515833869576454, 0.13491219282150269, 0.23380780220031738, 0.7675665616989136, 0.06853343546390533], [0.19539110362529755, 0.20751968026161194, 0.012997383251786232, 0.004634191282093525, 0.004486567340791225, 0.10301963984966278, 0.2361651211977005, 0.10510270297527313, 0.007245894055813551, 0.02498149685561657, 0.005201807711273432, 0.12586773931980133, 0.2985144853591919, 0.741521954536438, 0.061252206563949585], [0.3654796779155731, 0.656768798828125, 0.02389511466026306, 0.057929087430238724, 0.025417884811758995, 0.2985052168369293, 0.29244741797447205, 0.15614598989486694, 0.02199239283800125, 0.027919312939047813, 0.024499662220478058, 0.0015409317566081882, 0.18344998359680176, 0.05587974563241005, 0.11099682748317719], [0.24996283650398254, 0.30432745814323425, 0.08651068061590195, 0.27794384956359863, 0.10948572307825089, 0.32318809628486633, 0.40224379301071167, 0.24700750410556793, 0.016620514914393425, 0.03902489319443703, 0.01563531532883644, 0.008603462018072605, 0.029363060370087624, 0.20380347967147827, 0.1635625809431076], [0.08184575289487839, 0.05559774115681648, 0.012900986708700657, 0.004766350146383047, 0.02465618960559368, 0.0658264234662056, 0.16982027888298035, 0.09995799511671066, 0.1946410834789276, 0.03345171734690666, 0.026332948356866837, 0.010880211368203163, 0.01684177853167057, 0.011932285502552986, 0.13059602677822113]], [[0.06378140300512314, 0.013955923728644848, 0.058693334460258484, 0.014864355325698853, 0.02882157638669014, 0.02533077634871006, 0.013877282850444317, 0.02919653430581093, 0.029733512550592422, 0.010929838754236698, 0.2184230536222458, 0.404588907957077, 0.5044611692428589, 0.4171900451183319, 0.18600669503211975], [0.09787620604038239, 0.3741878271102905, 0.1718531847000122, 0.22170154750347137, 0.11211875081062317, 0.06884550303220749, 0.023903023451566696, 0.00765330670401454, 0.043831951916217804, 0.04742401838302612, 0.08705892413854599, 0.19904442131519318, 0.1439688503742218, 0.08975595235824585, 0.124632827937603], [0.024405136704444885, 0.006321595516055822, 0.03571266308426857, 0.0050111510790884495, 0.01807553507387638, 6.11300565651618e-05, 0.0022184934932738543, 0.002461126074194908, 0.00987271312624216, 0.03944821655750275, 0.02587837167084217, 0.009154303930699825, 0.018459370359778404, 0.07083768397569656, 0.2838045060634613], [0.02829434722661972, 0.05303699150681496, 0.03342747688293457, 0.026768406853079796, 0.06776657700538635, 0.0015663451049476862, 0.0066550131887197495, 0.028257621452212334, 0.02201445959508419, 0.024995435029268265, 0.014314326457679272, 0.019762825220823288, 0.019060753285884857, 0.09995586425065994, 0.2721303105354309], [0.011709636077284813, 0.13082386553287506, 0.3091292977333069, 0.012390679679811, 0.06598176062107086, 0.0025066242087632418, 0.008877930231392384, 0.03396160528063774, 0.01681593246757984, 0.01466491911560297, 0.12272557616233826, 0.010357965715229511, 0.009066522121429443, 0.12291242927312851, 0.3062548041343689], [0.05738264322280884, 0.12342102825641632, 0.7862259149551392, 0.20355252921581268, 0.007363088894635439, 0.0717976987361908, 0.032159313559532166, 0.018495721742510796, 0.0034321516286581755, 0.0013732254737988114, 0.006710591726005077, 0.0023603499867022038, 0.007563347462564707, 0.05948156490921974, 0.12037239223718643], [0.015277753584086895, 0.006394209805876017, 0.6686000227928162, 0.29117655754089355, 0.06745831668376923, 0.2462725043296814, 0.06154515966773033, 0.015117062255740166, 0.004134421236813068, 0.0023558081593364477, 0.08952713012695312, 0.04650713875889778, 0.023702487349510193, 0.01321239210665226, 0.09701406955718994], [0.028385812416672707, 0.012191490270197392, 0.27066752314567566, 0.18411272764205933, 0.040896836668252945, 0.48173367977142334, 0.02650352008640766, 0.07071101665496826, 0.007758310064673424, 0.001958101289346814, 0.01839292421936989, 0.023066602647304535, 0.03435399383306503, 0.03657263144850731, 0.029525745660066605], [0.04876675456762314, 0.422792911529541, 0.22041767835617065, 0.2559551000595093, 0.08884847164154053, 0.01230597123503685, 0.025672338902950287, 0.003895203350111842, 0.022659877315163612, 0.0043840305879712105, 0.007982935756444931, 0.010924039408564568, 0.06971067935228348, 0.0061518345028162, 0.21563398838043213], [0.015657104551792145, 0.02366352081298828, 0.07373688369989395, 0.10379613190889359, 0.013535204343497753, 0.07323776930570602, 0.048540983349084854, 0.008235346525907516, 0.01638718694448471, 0.012322558090090752, 0.073370561003685, 0.03809332847595215, 0.021602218970656395, 0.003090204205363989, 0.23272792994976044], [0.018198516219854355, 0.011175387538969517, 0.02189311571419239, 0.012938260100781918, 0.09454065561294556, 0.010837653651833534, 0.04214898869395256, 0.03231353685259819, 0.2788335978984833, 0.02807164192199707, 0.0381515808403492, 0.013884211890399456, 0.014051362872123718, 0.00934662390500307, 0.24102351069450378], [0.01114112138748169, 0.11382883787155151, 0.017900465056300163, 0.008639826439321041, 0.024639632552862167, 0.020821422338485718, 0.022935912013053894, 0.04321465268731117, 0.055257730185985565, 0.0561254657804966, 0.006350866984575987, 0.034159135073423386, 0.001170721254311502, 0.00040716465446166694, 0.2438717484474182], [0.01806582696735859, 0.014762195758521557, 0.02654433250427246, 0.025726040825247765, 0.03240499645471573, 0.020733002573251724, 0.04244884103536606, 0.02047092467546463, 0.13412125408649445, 0.512605607509613, 0.5156171321868896, 0.023306455463171005, 0.0489252470433712, 0.06594526767730713, 0.173824280500412], [0.018763704225420952, 0.010509289801120758, 0.06387435644865036, 0.02487548068165779, 0.10975509881973267, 0.01984621025621891, 0.06460897624492645, 0.03137337416410446, 0.1802622228860855, 0.7354047894477844, 0.7864400148391724, 0.1003832221031189, 0.007522855885326862, 0.14785504341125488, 0.08187610656023026], [0.02117479033768177, 0.061044495552778244, 0.02157888375222683, 0.021421663463115692, 0.04618487507104874, 0.05167240649461746, 0.01054168026894331, 0.009977741166949272, 0.0295058935880661, 0.008349624462425709, 0.02268156036734581, 0.026699911803007126, 0.020697196945548058, 0.013632250018417835, 0.13365623354911804]], [[4.754594192490913e-05, 2.1380438752771624e-08, 2.918067565360616e-08, 2.8621201408896013e-08, 2.499384379461844e-07, 0.0002631827082950622, 5.21495513439163e-10, 2.490414274802788e-08, 1.4592379216082918e-07, 4.660217989282955e-09, 1.3478041793746343e-08, 1.530838318331007e-07, 4.6195887989597395e-05, 8.429636181972455e-06, 0.2157532423734665], [0.6645432114601135, 0.00044607618474401534, 8.70102576300269e-06, 1.056492124007491e-06, 4.43653931370136e-07, 3.5252294310339494e-06, 0.013106754049658775, 0.0008970960625447333, 5.719662112824153e-07, 3.2791810156140855e-08, 1.0544068729245737e-08, 3.57371057191358e-08, 0.00012361648259684443, 0.0008665899513289332, 0.00011794524471042678], [5.6636022236489225e-06, 0.771808385848999, 0.2603715658187866, 7.618767995154485e-05, 2.6443340175319463e-05, 1.448297037853763e-08, 1.7459943213449236e-10, 0.0005545829189941287, 1.3129211993145873e-06, 0.0003596498572733253, 1.3187416243454209e-06, 1.2532552773336647e-08, 5.7067543821176514e-05, 1.4676837054139469e-05, 8.822963764032465e-07], [7.866851170490463e-09, 0.0015575109282508492, 0.5911858677864075, 0.005255529191344976, 0.00012560673349071294, 1.2381517144888221e-08, 1.3975322635251253e-12, 4.631081083061872e-06, 1.8297629367225454e-06, 0.043241821229457855, 0.00025465109501965344, 1.6550380621538352e-07, 1.5873881693551084e-06, 1.3629888329091955e-08, 2.2046858560997862e-08], [1.6020940130090366e-10, 3.2446525892737554e-06, 0.1964423805475235, 0.9067507982254028, 4.244087540428154e-05, 3.027215825568419e-05, 6.154020626425449e-10, 3.570748958736658e-07, 2.493328743469192e-08, 1.327106815551815e-07, 5.116170723340474e-05, 7.67620722541551e-09, 6.538175512105227e-07, 1.6885725528936746e-07, 1.9495971503857845e-09], [4.057985947270026e-09, 1.6926858803500977e-09, 0.00014235911658033729, 0.0026504932902753353, 0.8634750843048096, 1.9555229300749488e-05, 1.294085109293519e-06, 2.6649362894204387e-07, 3.0507638082433175e-10, 5.069419550807197e-09, 1.108148239836737e-07, 1.7377595213474706e-05, 9.726352800498717e-06, 1.823265733946755e-06, 5.869507617717318e-07], [1.9094309466893833e-12, 2.4682887027685507e-13, 6.382604444965523e-10, 6.302604549368596e-10, 1.4692274817207363e-05, 0.3734012544155121, 3.483030241113738e-06, 1.1820202594492457e-08, 1.9522692351614523e-09, 1.394072303342181e-13, 1.7670450172535546e-11, 1.716609077107023e-09, 3.7749509829154704e-06, 2.593782255644328e-06, 3.855710133393586e-07], [8.508453674949124e-08, 1.863478038544031e-09, 1.257351167627263e-10, 5.331373190142763e-11, 3.337832410466035e-08, 1.777973557182122e-05, 0.8244234323501587, 8.755041926633567e-05, 1.7572835409040977e-09, 1.3142270258170718e-11, 7.735358035533546e-13, 4.927841815161038e-11, 5.296478775562719e-07, 0.000259329448454082, 1.8429471282388477e-08], [1.2582735964272729e-09, 2.3675827378610848e-06, 5.770066309196409e-07, 5.0431950282536775e-11, 2.6034334410507398e-11, 1.7287857190240175e-07, 9.084228622668888e-06, 0.8877476453781128, 0.0008898449596017599, 7.2106473680833e-08, 1.9634756043274137e-08, 4.930736808433922e-13, 3.217972377456135e-08, 1.2906410120194778e-05, 9.568290160189008e-09], [2.8039692789860737e-09, 1.3000158105569426e-06, 4.493769978353157e-08, 2.493898698663344e-10, 7.932443764346875e-12, 1.7288407150317653e-08, 2.642636942606913e-10, 3.576151357265189e-05, 0.8324669599533081, 5.240505197434686e-05, 8.11301958947297e-07, 9.422521651814009e-10, 4.6924657937097436e-08, 2.8963553333483105e-08, 6.33739318800508e-08], [2.873091320410026e-09, 7.32139524188824e-05, 1.393846559949452e-05, 2.2707215663331226e-08, 3.602095333121724e-08, 7.893682235637911e-12, 1.2799745258921386e-13, 1.2971109697446082e-07, 4.534097752184607e-05, 0.7187873721122742, 0.0028858170844614506, 4.860597982769832e-06, 3.316463335067965e-06, 6.64895694058032e-08, 4.189383506769673e-09], [3.5802516507033033e-10, 3.3775189312024168e-09, 1.689890041234321e-06, 2.72409181434341e-07, 2.3650377656281307e-08, 3.1582386705863996e-10, 4.773196676235644e-14, 6.179980832632381e-11, 1.0790042637154329e-07, 0.00019566719129215926, 0.8666706681251526, 0.00033315850305370986, 7.101260734998505e-07, 3.226231015673875e-08, 6.780910499770698e-09], [7.800644574729176e-09, 1.700809604265885e-09, 9.215954577257435e-08, 4.046364665555302e-07, 0.00011374137102393433, 5.132134901941754e-06, 5.991689921991394e-10, 9.107053305923429e-11, 5.105777606262407e-11, 3.3974476565390432e-09, 3.904122058884241e-05, 0.65162193775177, 0.00035754009149968624, 6.446759653044865e-05, 8.575011065659055e-07], [5.410449865905775e-10, 1.9016622998524468e-10, 1.651180719930423e-10, 9.184660809680167e-10, 4.749936000081334e-09, 6.8993631430203095e-06, 9.186856830822876e-10, 1.2120262259107673e-11, 1.0679299241797557e-12, 7.136916383397585e-13, 1.9098522763272285e-10, 9.612936082703527e-06, 0.7662882208824158, 0.00778515450656414, 3.0943773765557125e-08], [0.0058370670303702354, 0.00017831011791713536, 6.727457275701454e-06, 4.542615897662472e-06, 0.0008248149533756077, 0.04996809363365173, 0.010534689761698246, 8.931134652812034e-05, 2.4081384708551923e-07, 6.080232139993313e-08, 3.077615701840841e-06, 0.00041306819184683263, 0.062034472823143005, 0.37576472759246826, 0.1323644071817398]], [[0.278582364320755, 0.012074317783117294, 0.4035726487636566, 0.05818924307823181, 0.5308449864387512, 0.7759386301040649, 0.6032847166061401, 0.04120228812098503, 0.6623223423957825, 0.4034832715988159, 0.2541539669036865, 0.023309720680117607, 0.054716046899557114, 0.3570294678211212, 0.004749305546283722], [0.03977029398083687, 0.025161603465676308, 0.4579423666000366, 0.3708552420139313, 0.767479419708252, 0.5835962295532227, 0.5609359741210938, 0.14304085075855255, 0.8166816234588623, 0.848468542098999, 0.5771627426147461, 0.07112090289592743, 0.12416274100542068, 0.618628740310669, 0.06885465234518051], [0.004083612468093634, 0.0006101519684307277, 0.12011494487524033, 0.04229450225830078, 0.17203551530838013, 0.013333754613995552, 0.01874622330069542, 0.021773431450128555, 0.8914079666137695, 0.25239333510398865, 0.2674473226070404, 0.0986163467168808, 0.10968483239412308, 0.05420238524675369, 0.020816486328840256], [0.00974054355174303, 0.009372939355671406, 0.016473596915602684, 0.12944141030311584, 0.06805374473333359, 0.019993484020233154, 0.038472987711429596, 0.21791628003120422, 0.8550615310668945, 0.2646826505661011, 0.7350810766220093, 0.17277619242668152, 0.36265626549720764, 0.3741258382797241, 0.06228891760110855], [0.0007183643756434321, 0.0016902177594602108, 0.0015671673463657498, 0.000663107552099973, 0.015286565758287907, 0.000776923552621156, 0.007700319401919842, 0.11482121050357819, 0.7658083438873291, 0.5443719625473022, 0.22170989215373993, 0.027013972401618958, 0.025342080742120743, 0.049981117248535156, 0.0074298488907516], [0.011776593513786793, 0.00668947771191597, 0.05204532667994499, 0.026732588186860085, 0.007738037500530481, 0.19347773492336273, 0.08661007881164551, 0.02065080776810646, 0.8265263438224792, 0.77967369556427, 0.8155033588409424, 0.7568296194076538, 0.6889008283615112, 0.7797287106513977, 0.04647013917565346], [0.03701920434832573, 0.011276619508862495, 0.026248518377542496, 0.01771446317434311, 0.046063318848609924, 0.020064320415258408, 0.23005641996860504, 0.032302577048540115, 0.6365551948547363, 0.6746889352798462, 0.6497765183448792, 0.5260909199714661, 0.6955898404121399, 0.8770567178726196, 0.04424796253442764], [0.3583561182022095, 0.034818924963474274, 0.1010005921125412, 0.08171684294939041, 0.0902533084154129, 0.0273053590208292, 0.029195906594395638, 0.10516665875911713, 0.5163984894752502, 0.7107389569282532, 0.5390304327011108, 0.6552954316139221, 0.648922324180603, 0.8148984909057617, 0.13771982491016388], [0.04790134355425835, 0.016352321952581406, 0.004838719964027405, 0.039540428668260574, 0.004614146891981363, 0.10033231228590012, 0.05411757901310921, 0.012187371961772442, 0.25466611981391907, 0.4822390675544739, 0.22996564209461212, 0.2013523131608963, 0.3018202781677246, 0.325538694858551, 0.10763657093048096], [0.18817435204982758, 0.007200991734862328, 0.0915139690041542, 0.00800582580268383, 0.007660675328224897, 0.27090781927108765, 0.08786749839782715, 0.014442713931202888, 0.017244037240743637, 0.8212726712226868, 0.22018176317214966, 0.05063365772366524, 0.16457810997962952, 0.059498634189367294, 0.11578860878944397], [0.1423795521259308, 0.008703344501554966, 0.2208349108695984, 0.02527845837175846, 0.027401143684983253, 0.09980836510658264, 0.024800043553113937, 0.009310302324593067, 0.11915526539087296, 0.048824433237314224, 0.23738479614257812, 0.04641610383987427, 0.11649724096059799, 0.03864651918411255, 0.200869619846344], [0.19247660040855408, 0.028833042830228806, 0.1872357279062271, 0.03232081979513168, 0.031028537079691887, 0.3644941747188568, 0.11239293217658997, 0.0803447812795639, 0.13423573970794678, 0.07468846440315247, 0.009079186245799065, 0.19545331597328186, 0.09625646471977234, 0.07526607811450958, 0.1802312582731247], [0.1263553649187088, 0.009648445062339306, 0.47829046845436096, 0.22347994148731232, 0.2749265432357788, 0.23197446763515472, 0.05249631777405739, 0.01617230661213398, 0.3326357305049896, 0.1497221142053604, 0.04782721772789955, 0.011572148650884628, 0.1354474574327469, 0.0791783407330513, 0.15636207163333893], [0.166306734085083, 0.04561271890997887, 0.48400574922561646, 0.31743937730789185, 0.4171416163444519, 0.1806352734565735, 0.04328177124261856, 0.022486848756670952, 0.1779668778181076, 0.03957689553499222, 0.009708160534501076, 0.01422630064189434, 0.013467496261000633, 0.06257133930921555, 0.22838094830513], [0.39438390731811523, 0.20185884833335876, 0.19486168026924133, 0.053202297538518906, 0.29429352283477783, 0.31667405366897583, 0.3313867747783661, 0.37864530086517334, 0.4971301257610321, 0.178373321890831, 0.16689708828926086, 0.16029801964759827, 0.22925321757793427, 0.22496484220027924, 0.11296840012073517]], [[0.12737327814102173, 0.10940374433994293, 0.05123003572225571, 0.7807462215423584, 0.0676276683807373, 0.02884089946746826, 0.05574861168861389, 0.5975708961486816, 0.07044392824172974, 0.5009010434150696, 0.31273892521858215, 0.07660850137472153, 0.29424503445625305, 0.028401609510183334, 0.07683643698692322], [0.03750006482005119, 0.429240882396698, 0.15060469508171082, 0.2604650557041168, 0.037177786231040955, 0.1944778561592102, 0.07849539071321487, 0.6716934442520142, 0.06105323135852814, 0.07711976766586304, 0.20997941493988037, 0.028168758377432823, 0.12550987303256989, 0.030995607376098633, 0.0958443135023117], [0.15516091883182526, 0.07278051972389221, 0.11765316128730774, 0.7884857058525085, 0.11075033247470856, 0.051856692880392075, 0.18673725426197052, 0.2268398553133011, 0.013722711242735386, 0.6478350162506104, 0.5306386947631836, 0.3090885877609253, 0.22243055701255798, 0.16200464963912964, 0.13070979714393616], [0.21811531484127045, 0.7140333652496338, 0.018219277262687683, 0.764274001121521, 0.15804116427898407, 0.03280843421816826, 0.11008237302303314, 0.09874711185693741, 0.0423860140144825, 0.5652360320091248, 0.14938808977603912, 0.2869919240474701, 0.39966318011283875, 0.1259765923023224, 0.0577625073492527], [0.11744663864374161, 0.1893559694290161, 0.05823011323809624, 0.03701714053750038, 0.15626470744609833, 0.08588159829378128, 0.26269999146461487, 0.41053518652915955, 0.007210245821624994, 0.3749772906303406, 0.4537068009376526, 0.6417111158370972, 0.1666039228439331, 0.13084180653095245, 0.14052902162075043], [0.3613002598285675, 0.240200012922287, 0.044567547738552094, 0.04614294692873955, 0.0021214759908616543, 0.17616558074951172, 0.11286458373069763, 0.11203286051750183, 0.009014172479510307, 0.10163455456495285, 0.0949772298336029, 0.06209810823202133, 0.11910365521907806, 0.04125094786286354, 0.1871420443058014], [0.2914785146713257, 0.381010502576828, 0.08399549126625061, 0.4511452913284302, 0.048780620098114014, 0.008560722693800926, 0.1541443020105362, 0.12101723253726959, 0.02183164842426777, 0.18665823340415955, 0.13169258832931519, 0.13539372384548187, 0.14286382496356964, 0.031125182285904884, 0.2064482420682907], [0.3084108829498291, 0.4568510055541992, 0.068343386054039, 0.40243175625801086, 0.04035715013742447, 0.028490515425801277, 0.006473515648394823, 0.6036491990089417, 0.14769236743450165, 0.09462843090295792, 0.04651549458503723, 0.08334364742040634, 0.08459941297769547, 0.022403797134757042, 0.13448290526866913], [0.4981050491333008, 0.13424238562583923, 0.16773013770580292, 0.5160816311836243, 0.029790958389639854, 0.22989192605018616, 0.568993866443634, 0.056374672800302505, 0.08792523294687271, 0.2900378406047821, 0.12431738525629044, 0.017185388132929802, 0.05061684548854828, 0.020683959126472473, 0.13275840878486633], [0.33482691645622253, 0.4720645546913147, 0.20652346312999725, 0.6004944443702698, 0.1402488797903061, 0.13250590860843658, 0.13873517513275146, 0.5260767936706543, 0.01182119082659483, 0.1017654612660408, 0.047682080417871475, 0.04534589499235153, 0.10121697187423706, 0.0026118881069123745, 0.13006491959095], [0.27261805534362793, 0.5674196481704712, 0.08154824376106262, 0.8736060261726379, 0.4724165201187134, 0.1720387041568756, 0.13692085444927216, 0.40960294008255005, 0.06138879805803299, 0.0898643285036087, 0.15986473858356476, 0.04882661625742912, 0.09858791530132294, 0.005254920106381178, 0.09166211634874344], [0.33052578568458557, 0.40956470370292664, 0.44244009256362915, 0.8809638619422913, 0.26719745993614197, 0.38818857073783875, 0.40750059485435486, 0.4857279658317566, 0.04656125605106354, 0.08998580276966095, 0.02227160707116127, 0.42457664012908936, 0.06242617964744568, 0.019552020356059074, 0.08343644440174103], [0.20678018033504486, 0.17620769143104553, 0.3081345558166504, 0.6112105250358582, 0.534289538860321, 0.19626931846141815, 0.17160479724407196, 0.4079393148422241, 0.027630727738142014, 0.07990976423025131, 0.0661839172244072, 0.022294294089078903, 0.11108729988336563, 0.024492109194397926, 0.12739884853363037], [0.2302674651145935, 0.4147239625453949, 0.3118293881416321, 0.3454154133796692, 0.20178626477718353, 0.3381562829017639, 0.1571493148803711, 0.4487079083919525, 0.02096635475754738, 0.11857040971517563, 0.09038619697093964, 0.01401298213750124, 0.06377796083688736, 0.029106009751558304, 0.10548537224531174], [0.0850413590669632, 0.2905830442905426, 0.047175440937280655, 0.009145522490143776, 0.014412813819944859, 0.03387918695807457, 0.04852135106921196, 0.2856408655643463, 0.03688584640622139, 0.02503933012485504, 0.030300520360469818, 0.020876996219158173, 0.004409631714224815, 0.0025441893376410007, 0.1292814165353775]]], [[[0.00039591442327946424, 4.3682277464540675e-05, 1.7448855942348018e-05, 4.859234650211874e-06, 1.1413659422032651e-06, 1.0625568393152207e-05, 1.9137923246148603e-08, 5.615326585939329e-07, 5.487099315359956e-06, 2.1910665282121045e-07, 2.532970881929941e-07, 7.501878940274764e-07, 1.657212578720646e-06, 1.0862070212169783e-06, 0.18717002868652344], [0.6005652546882629, 0.09179380536079407, 0.017407523468136787, 0.009556752629578114, 0.001977206440642476, 0.02417689561843872, 0.001285116421058774, 0.0015866898465901613, 0.0007265046588145196, 0.0008927723974920809, 0.008914382196962833, 0.0016361800953745842, 0.1313493698835373, 0.006872364319860935, 0.052507203072309494], [0.00456381356343627, 0.8302816152572632, 0.11558636277914047, 0.010320104658603668, 0.00024428890901617706, 9.749805758474395e-05, 7.678471774852369e-06, 0.0030259541235864162, 3.9539358112961054e-05, 7.781033491482958e-05, 0.0003711417084559798, 9.1652873379644e-06, 0.0006458949064835906, 0.00023330377007368952, 0.00865631178021431], [0.0011992683866992593, 0.008629350923001766, 0.6251504421234131, 0.015135818161070347, 0.001978840446099639, 0.000745285302400589, 5.708653407054953e-05, 0.00043479635496623814, 0.0005481417756527662, 0.0016355890547856688, 0.0002436988870613277, 5.164237336430233e-06, 4.976044510840438e-05, 3.400173591217026e-05, 0.00024351823958568275], [0.006698334589600563, 0.006304558366537094, 0.34660738706588745, 0.7217360138893127, 0.06864907592535019, 0.0027605369687080383, 0.0006927561480551958, 0.00010832686530193314, 0.0002978279662784189, 0.007849807851016521, 0.0023863124661147594, 8.873132173903286e-06, 2.0952818886144087e-05, 4.62439584225649e-06, 0.000559441396035254], [0.0006861803703941405, 0.036174044013023376, 0.4128260612487793, 0.09897080808877945, 0.6376775503158569, 0.19431157410144806, 0.0007082957308739424, 0.05852581560611725, 0.0003548018867149949, 0.00026609119959175587, 0.0006576658925041556, 0.0007862210040912032, 0.027955245226621628, 0.006076914723962545, 0.0010327105410397053], [1.7293352305713938e-09, 1.4693102912133327e-06, 3.0192679332685657e-05, 1.0152590220968705e-05, 0.005660888738930225, 0.5108420252799988, 0.0005426039570011199, 0.0008102089632302523, 3.168102921335958e-06, 6.12798771726375e-08, 2.5310575324510864e-07, 5.088519174023531e-06, 0.00021843344438821077, 2.5946601454052143e-06, 2.594279294498847e-06], [7.755387923680246e-05, 3.5259185096947476e-05, 0.0012139425380155444, 0.00035162578569725156, 0.00505053298547864, 0.4696201980113983, 0.5859625339508057, 0.009771172888576984, 0.0005853781476616859, 3.0261137453635456e-06, 1.2206013707327656e-05, 2.2465645088232122e-05, 0.013555033132433891, 0.0011026648571714759, 7.656160596525297e-05], [3.390625025190275e-08, 5.7732322602532804e-05, 3.19563605444273e-06, 2.0829493507790175e-07, 5.039521965954918e-06, 0.00017657184798736125, 0.000729007413610816, 0.8331114649772644, 0.0037640428636223078, 1.5948112377373036e-06, 5.8014775277115405e-06, 4.528372699041938e-07, 0.00020723954366985708, 0.00025866259238682687, 1.95706252270611e-06], [2.7739795882553153e-07, 2.501485141692683e-05, 4.778147285833256e-06, 3.7190903867667657e-07, 9.610201523457818e-09, 1.1292572708043735e-06, 1.2355405942798825e-07, 3.984562499681488e-05, 0.6202287077903748, 0.0002610959345474839, 0.00017016819037962705, 9.242457963409834e-07, 2.799387630147976e-06, 3.2760857493485673e-07, 1.038134087139042e-06], [1.2775580216839444e-05, 0.0010497755138203502, 6.564326031366363e-05, 4.172011358605232e-06, 4.676745959386608e-07, 3.6489967669695034e-07, 8.09820832614605e-08, 5.78842673348845e-06, 0.0015375507064163685, 0.7445451617240906, 0.026254041120409966, 8.213486580643803e-05, 1.1159563655382954e-05, 3.0355058697750792e-05, 2.6809220798895694e-06], [1.3068409316474572e-05, 0.00010775982809718698, 0.00024633039720356464, 3.3576598070794716e-05, 4.556980275083333e-05, 1.0597023702985098e-07, 9.86238859468358e-08, 2.1072135041322326e-06, 0.0013669389300048351, 0.5916010141372681, 0.4436832368373871, 0.0013138806680217385, 4.73510908705066e-06, 6.116700660641072e-06, 2.961193558803643e-06], [4.950460061081685e-05, 0.0011237917933613062, 0.017257435247302055, 0.0011414129985496402, 0.025087760761380196, 0.00036485170130617917, 3.213326635886915e-05, 5.293267349770758e-06, 4.4593522034119815e-05, 0.001686945091933012, 0.00823597889393568, 0.8047888278961182, 0.014818375930190086, 0.006413417402654886, 2.281446177221369e-05], [0.000998240546323359, 0.1768636256456375, 0.0663335844874382, 0.02716292440891266, 0.03197554498910904, 0.001621886040084064, 0.00012482069723773748, 7.020989141892642e-05, 0.08078382909297943, 0.1701173484325409, 0.08303841948509216, 0.5506232380867004, 0.06293172389268875, 0.03332124650478363, 0.0033543158788233995], [0.021357281133532524, 0.0013016555458307266, 0.00422634556889534, 0.00104909623041749, 0.012563652358949184, 0.07401228696107864, 0.007866809144616127, 0.0024991247337311506, 0.0011657974682748318, 5.4276370065053925e-06, 0.0024851916823536158, 0.0298884529620409, 0.4522511959075928, 0.2182934284210205, 0.14462554454803467]], [[0.03249572962522507, 0.01680905371904373, 0.01368993055075407, 0.005182549823075533, 0.0014828554121777415, 0.0045396420173347, 0.0006250899168662727, 0.01684878207743168, 0.005824672989547253, 0.007428525947034359, 0.009805276058614254, 0.003550198394805193, 0.007900950498878956, 0.009690256789326668, 0.18011362850666046], [0.11159665137529373, 0.10346578061580658, 0.414338618516922, 0.08694489300251007, 0.2136271595954895, 0.10264819115400314, 0.023593097925186157, 0.0335584320127964, 0.0575689822435379, 0.06024341657757759, 0.1307218372821808, 0.13801440596580505, 0.1756829470396042, 0.14866231381893158, 0.1320090889930725], [0.1948547214269638, 0.038279034197330475, 0.07790879160165787, 0.04177340865135193, 0.004589961376041174, 0.0009778933599591255, 0.002051346004009247, 0.006739486940205097, 0.009280361235141754, 0.0007642557029612362, 0.0012637393083423376, 0.00433916924521327, 0.00236115837469697, 0.008354227058589458, 0.2381056696176529], [0.07799407094717026, 0.10201291739940643, 0.037178199738264084, 0.03369736298918724, 0.035083431750535965, 0.003606606973335147, 0.0009816481033340096, 0.010917055420577526, 0.019562464207410812, 0.004011118784546852, 0.0029224867466837168, 0.0011325542582198977, 0.00486336974427104, 0.007979645393788815, 0.2784355580806732], [0.11467810720205307, 0.4025481641292572, 0.4041208028793335, 0.13489782810211182, 0.520052433013916, 0.013409112580120564, 0.0056337821297347546, 0.04408307746052742, 0.06485209614038467, 0.0023049998562783003, 0.0050890627317130566, 0.004091872368007898, 0.006159461103379726, 0.0242836382240057, 0.07189745455980301], [0.1516697108745575, 0.2241159826517105, 0.5074643492698669, 0.3874017000198364, 0.2519407868385315, 0.032381314784288406, 0.015091626904904842, 0.006451433524489403, 0.09749187529087067, 0.007731522433459759, 0.00912014115601778, 0.029297562316060066, 0.05765664204955101, 0.059585090726614, 0.023513801395893097], [0.01171550527215004, 0.10137046873569489, 0.870269238948822, 0.5154522657394409, 0.6626715660095215, 0.08923148363828659, 0.047533176839351654, 0.015608957968652248, 0.11948943883180618, 0.008091520518064499, 0.008133050054311752, 0.012773845344781876, 0.051611315459012985, 0.01502595841884613, 0.00961183663457632], [0.01722140610218048, 0.036506716161966324, 0.7147647738456726, 0.20675897598266602, 0.8291797637939453, 0.31030455231666565, 0.11803850531578064, 0.03327609598636627, 0.4245462417602539, 0.013293992727994919, 0.008976193144917488, 0.054750751703977585, 0.1754072904586792, 0.04528210312128067, 0.012820743955671787], [0.01982569508254528, 0.15988187491893768, 0.12975367903709412, 0.1326102912425995, 0.6299260258674622, 0.28946900367736816, 0.34108322858810425, 0.11804011464118958, 0.16752222180366516, 0.01777276024222374, 0.0021109972149133682, 0.0006076672580093145, 0.0030632279813289642, 0.00126487051602453, 0.1333881914615631], [0.005461913999170065, 0.03046412020921707, 0.008993657305836678, 0.005659051705151796, 0.004244270734488964, 0.02773391455411911, 0.042834386229515076, 0.13534432649612427, 0.27069228887557983, 0.04962563514709473, 0.015227400697767735, 0.0016283531440421939, 0.0014969720505177975, 0.0027089377399533987, 0.17130999267101288], [0.01672529987990856, 0.10339350253343582, 0.009749630466103554, 0.02030925825238228, 0.017326004803180695, 0.03957638517022133, 0.030999623239040375, 0.10308665037155151, 0.5008098483085632, 0.09767498821020126, 0.09780175238847733, 0.025981366634368896, 0.003117683343589306, 0.00962040200829506, 0.1932818591594696], [0.026731140911579132, 0.05838552862405777, 0.07611822336912155, 0.05796685442328453, 0.5904980301856995, 0.010755263268947601, 0.0517524816095829, 0.055663660168647766, 0.29654714465141296, 0.1307908594608307, 0.1585402488708496, 0.03976760059595108, 0.07525579631328583, 0.16488958895206451, 0.1035238653421402], [0.024593327194452286, 0.12932555377483368, 0.13568159937858582, 0.16021546721458435, 0.3227141201496124, 0.029398979619145393, 0.01611196994781494, 0.016819216310977936, 0.2378186136484146, 0.5602607131004333, 0.7615779638290405, 0.08417549729347229, 0.10783103108406067, 0.2013072967529297, 0.06744378060102463], [0.018169090151786804, 0.26050350069999695, 0.078061044216156, 0.023439347743988037, 0.05254700779914856, 0.0014709478709846735, 0.002907117595896125, 0.009980114176869392, 0.1381266713142395, 0.5626046061515808, 0.5405392646789551, 0.11909772455692291, 0.008021530695259571, 0.06359856575727463, 0.009888176806271076], [0.08646434545516968, 0.009946366772055626, 0.041608210653066635, 0.009163393639028072, 0.12723588943481445, 0.17822976410388947, 0.01437843032181263, 0.0057503837160766125, 0.008486853912472725, 0.002935740165412426, 0.019836073741316795, 0.07525425404310226, 0.02854214422404766, 0.0230310820043087, 0.1518138200044632]], [[0.7472922801971436, 0.06644202023744583, 0.12477048486471176, 0.07691145688295364, 0.17426471412181854, 0.17453429102897644, 0.8713244795799255, 0.22852616012096405, 0.7413471937179565, 0.5253387689590454, 0.16250024735927582, 0.19445888698101044, 0.10716042667627335, 0.2310180366039276, 0.05536508187651634], [0.13811203837394714, 0.40626850724220276, 0.2430061399936676, 0.22277961671352386, 0.18414726853370667, 0.21574343740940094, 0.8225958943367004, 0.5822084546089172, 0.41659367084503174, 0.35776287317276, 0.4909748136997223, 0.39181941747665405, 0.34554892778396606, 0.6003718972206116, 0.043436333537101746], [0.03130434453487396, 0.0024298657663166523, 0.43690061569213867, 0.5043830275535583, 0.07530603557825089, 0.015139158815145493, 0.03498073294758797, 0.012510559521615505, 0.6034607291221619, 0.7801509499549866, 0.8402397036552429, 0.5008089542388916, 0.17657218873500824, 0.11879491806030273, 0.05205746740102768], [0.09661327302455902, 0.049034956842660904, 0.05331439897418022, 0.7222777009010315, 0.25703296065330505, 0.020087046548724174, 0.06235986202955246, 0.0651831179857254, 0.32113927602767944, 0.5460676550865173, 0.7442458271980286, 0.5571728348731995, 0.08091285824775696, 0.059992171823978424, 0.029936296865344048], [0.00972762517631054, 0.007879518903791904, 0.02767527848482132, 0.019306808710098267, 0.22303025424480438, 0.007516835816204548, 0.007440114859491587, 0.022099999710917473, 0.29848337173461914, 0.9075287580490112, 0.5192471742630005, 0.8959035873413086, 0.055479276925325394, 0.04288056865334511, 0.021558567881584167], [0.03836950287222862, 0.05839527025818825, 0.005887853913009167, 0.08494037389755249, 0.012977076694369316, 0.5726994872093201, 0.09935679286718369, 0.13719113171100616, 0.448569655418396, 0.5218547582626343, 0.13800226151943207, 0.1732572466135025, 0.4354798197746277, 0.4542965292930603, 0.12337890267372131], [0.17566490173339844, 0.03925755247473717, 0.01956782303750515, 0.04187121242284775, 0.02149910107254982, 0.049183186143636703, 0.5663522481918335, 0.045388396829366684, 0.45039302110671997, 0.19015204906463623, 0.22913624346256256, 0.10953018814325333, 0.21400360763072968, 0.572381854057312, 0.1667298972606659], [0.2136794924736023, 0.20810233056545258, 0.08830246329307556, 0.27903637290000916, 0.02317022904753685, 0.10591837763786316, 0.15087167918682098, 0.5299598574638367, 0.3452024757862091, 0.15965056419372559, 0.2765912711620331, 0.516273021697998, 0.2846863567829132, 0.3888777792453766, 0.0719258189201355], [0.07398565858602524, 0.04620325192809105, 0.3374384939670563, 0.19415578246116638, 0.025615269318223, 0.010194968432188034, 0.018451105803251266, 0.0005573831731453538, 0.5073301196098328, 0.25312942266464233, 0.15244188904762268, 0.143111914396286, 0.051979612559080124, 0.04884689673781395, 0.12363318353891373], [0.5805832147598267, 0.09438126534223557, 0.24455930292606354, 0.06023820489645004, 0.03943831846117973, 0.021930387243628502, 0.026398053392767906, 0.012488989159464836, 0.011794325895607471, 0.767930269241333, 0.4412824809551239, 0.07896611094474792, 0.01228941697627306, 0.018458310514688492, 0.10866446793079376], [0.1145540103316307, 0.05171298235654831, 0.7072227597236633, 0.4839639961719513, 0.11294537037611008, 0.06211492419242859, 0.021921994164586067, 0.0025394419208168983, 0.0033554628025740385, 0.07357389479875565, 0.7795555591583252, 0.05686911940574646, 0.022035235539078712, 0.034172482788562775, 0.07262071967124939], [0.08121224492788315, 0.025126218795776367, 0.4891066551208496, 0.29065003991127014, 0.20622830092906952, 0.36699986457824707, 0.07864820212125778, 0.014422299340367317, 0.016684990376234055, 0.0649130716919899, 0.07936163991689682, 0.6605017185211182, 0.18783104419708252, 0.08294262737035751, 0.03477967903017998], [0.0700722336769104, 0.1311686784029007, 0.5332850813865662, 0.1558467000722885, 0.36321985721588135, 0.7912644743919373, 0.32202765345573425, 0.1934671401977539, 0.031114375218749046, 0.09986341744661331, 0.08630139380693436, 0.055017780512571335, 0.44781896471977234, 0.42446693778038025, 0.1060790941119194], [0.08875010907649994, 0.06247853487730026, 0.4616371989250183, 0.12711729109287262, 0.3074216842651367, 0.19363558292388916, 0.2020244151353836, 0.0779867023229599, 0.019831692799925804, 0.03570472076535225, 0.07392378151416779, 0.04282142594456673, 0.0921483263373375, 0.3143211603164673, 0.22281906008720398], [0.5682113766670227, 0.1249876543879509, 0.7342633008956909, 0.902918815612793, 0.7035764455795288, 0.3718622326850891, 0.6157594919204712, 0.15625660121440887, 0.8438207507133484, 0.9341241121292114, 0.8159937858581543, 0.6624717712402344, 0.3264457583427429, 0.5970154404640198, 0.003644895739853382]], [[0.0183254461735487, 0.00659788167104125, 0.046570390462875366, 0.04327844828367233, 0.10241857916116714, 0.5407979488372803, 0.0026681027375161648, 0.15349310636520386, 0.0016508381813764572, 0.010916458442807198, 0.036675866693258286, 0.15769276022911072, 0.4073828458786011, 0.04228133708238602, 0.15622197091579437], [0.07985992729663849, 0.06383417546749115, 0.024972105398774147, 0.18746882677078247, 0.11770728975534439, 0.13333363831043243, 0.006719768047332764, 0.04288880154490471, 0.001412510173395276, 0.058754052966833115, 0.14280158281326294, 0.13529875874519348, 0.08268098533153534, 0.02367851696908474, 0.1494951695203781], [0.01403640117496252, 0.014278309419751167, 0.1034439280629158, 0.022417087107896805, 0.10706920921802521, 0.018271848559379578, 0.046350300312042236, 0.04233889281749725, 0.037542134523391724, 0.0005760823260061443, 0.004724643658846617, 0.233056902885437, 0.2574465572834015, 0.1892177164554596, 0.21611936390399933], [0.032590243965387344, 0.14464972913265228, 0.1993260532617569, 0.12327495217323303, 0.27639931440353394, 0.011173157021403313, 0.012838426046073437, 0.0802190750837326, 0.0400678850710392, 0.013469994999468327, 0.025247203186154366, 0.30583158135414124, 0.6397863626480103, 0.258308470249176, 0.08317234367132187], [0.007401467300951481, 0.04209339618682861, 0.1104009672999382, 0.04737341031432152, 0.06253770738840103, 0.0023836863692849874, 0.05026397854089737, 0.01439946424216032, 0.006556188687682152, 0.001721409265883267, 0.01908556930720806, 0.022761031985282898, 0.01600046642124653, 0.22344018518924713, 0.2855986952781677], [0.00031611474696546793, 0.010241325944662094, 0.005327185150235891, 0.007503898814320564, 0.009216651320457458, 0.08986854553222656, 0.0022410263773053885, 0.04830501973628998, 0.013246790505945683, 0.0036830154713243246, 0.001605262397788465, 0.004246865399181843, 0.005818811245262623, 0.00778583250939846, 0.2319662719964981], [0.00028042105259373784, 0.004604758229106665, 0.008834331296384335, 0.010530425235629082, 0.04934454336762428, 0.3239482641220093, 0.02964387647807598, 0.041019540280103683, 0.028070107102394104, 0.002580034313723445, 0.0034616885241121054, 0.006594499107450247, 0.07731658220291138, 0.01784621551632881, 0.10414844751358032], [0.002352550160139799, 0.00811008270829916, 0.007519579492509365, 0.09616736322641373, 0.00784054771065712, 0.06404154002666473, 0.025837063789367676, 0.06720300018787384, 0.008001329377293587, 0.016075177118182182, 0.0036620565224438906, 0.031110821291804314, 0.1529460847377777, 0.03003939613699913, 0.19531111419200897], [0.014062762260437012, 0.03979215770959854, 0.0070105125196278095, 0.010145032778382301, 0.023933248594403267, 0.08613994717597961, 0.027301009744405746, 0.007488427218049765, 0.04610109701752663, 0.00706111453473568, 0.005716769024729729, 0.008516461588442326, 0.04168170318007469, 0.004054774064570665, 0.3198099434375763], [0.0027477010153234005, 0.009237049147486687, 0.005884162615984678, 0.004349177703261375, 0.039300523698329926, 0.06504905968904495, 0.005921225529164076, 0.05048412084579468, 0.004538795445114374, 0.019958311691880226, 0.08035917580127716, 0.1339075267314911, 0.45191076397895813, 0.1108468547463417, 0.15996994078159332], [0.0004566281568259001, 0.0044615683145821095, 0.008062957786023617, 0.0003266451822128147, 0.032452184706926346, 0.004190187435597181, 0.0009983428753912449, 0.0015420016134157777, 0.025539150461554527, 0.0009114624699577689, 0.001308016013354063, 0.11249691247940063, 0.5262115597724915, 0.16036535799503326, 0.02284345217049122], [0.006384413689374924, 0.006966868881136179, 0.013256898149847984, 0.008146845735609531, 0.005910678766667843, 0.005924733821302652, 0.0029809526167809963, 0.004338744096457958, 0.0021091948729008436, 0.02691148780286312, 0.09123647958040237, 0.0904775932431221, 0.10420377552509308, 0.019918829202651978, 0.21981710195541382], [0.004395737312734127, 0.0342060811817646, 0.08344801515340805, 0.012639162130653858, 0.07537969946861267, 0.00383414002135396, 0.007808698806911707, 0.007516762241721153, 0.0023650380317121744, 0.055798787623643875, 0.025632014498114586, 0.040716953575611115, 0.16482838988304138, 0.13848447799682617, 0.17180821299552917], [0.0016022673808038235, 0.013307235203683376, 0.012306403368711472, 0.0029055906925350428, 0.06092625483870506, 0.01653674617409706, 0.008309547789394855, 0.00395687622949481, 0.002493055537343025, 0.0038927635177969933, 0.009680269286036491, 0.23031921684741974, 0.35693949460983276, 0.1708209365606308, 0.050492819398641586], [0.009627100080251694, 0.006502249743789434, 0.0023533182684332132, 0.0021814347710460424, 0.007286426145583391, 0.024909881874918938, 0.01453662570565939, 0.010449647903442383, 0.0028000103775411844, 0.001988302916288376, 0.001580765936523676, 0.013102496974170208, 0.001836722600273788, 0.0008430163725279272, 0.15720587968826294]], [[0.060514166951179504, 0.09119007736444473, 0.5136731863021851, 0.024349171668291092, 0.41056114435195923, 0.043175265192985535, 0.016160618513822556, 0.12711943686008453, 0.029147693887352943, 0.01592664048075676, 0.04504424333572388, 0.03736018016934395, 0.026280265301465988, 0.042564861476421356, 0.13562467694282532], [0.009338664822280407, 0.09596994519233704, 0.12376897037029266, 0.01794583536684513, 0.059337858110666275, 0.04990454390645027, 0.003890786785632372, 0.07171432673931122, 0.0057785604149103165, 0.005389686673879623, 0.009663187898695469, 0.014342015609145164, 0.020640142261981964, 0.04060304909944534, 0.16408833861351013], [0.07689530402421951, 0.027863014489412308, 0.15549975633621216, 0.2693096697330475, 0.73520827293396, 0.03749871999025345, 0.3640631139278412, 0.14002074301242828, 0.16656053066253662, 0.02643253095448017, 0.0061660525389015675, 0.054253485053777695, 0.14240022003650665, 0.14975441992282867, 0.13701564073562622], [0.21953634917736053, 0.22122228145599365, 0.04846278205513954, 0.07968296110630035, 0.3619323670864105, 0.03181222453713417, 0.6669740080833435, 0.3975786566734314, 0.11174946278333664, 0.15518029034137726, 0.004886193200945854, 0.010736972093582153, 0.07725195586681366, 0.09191425889730453, 0.1523013859987259], [0.0740056112408638, 0.054083533585071564, 0.027193741872906685, 0.014972379431128502, 0.04523617774248123, 0.012482533231377602, 0.4212614595890045, 0.25695085525512695, 0.3699147403240204, 0.013461914844810963, 0.08041262626647949, 0.015268572606146336, 0.627507209777832, 0.13811761140823364, 0.19850368797779083], [0.029503263533115387, 0.09333665668964386, 0.016309864819049835, 0.1364656686782837, 0.03873518481850624, 0.019083604216575623, 0.758955180644989, 0.6250144243240356, 0.10551930963993073, 0.0059091635048389435, 0.001959211425855756, 0.004587537609040737, 0.0029548059683293104, 0.011073557659983635, 0.10497581213712692], [0.0038599083200097084, 0.03815716505050659, 0.004112291149795055, 0.0037336996756494045, 0.02896580658853054, 0.003606554586440325, 0.2724342346191406, 0.5795999765396118, 0.041377726942300797, 0.01812332309782505, 0.006642999593168497, 0.006629596464335918, 0.018780261278152466, 0.00801254715770483, 0.11063171178102493], [0.023342538625001907, 0.1589166522026062, 0.01254882663488388, 0.01894153468310833, 0.04743911698460579, 0.015340029262006283, 0.06989605724811554, 0.22605817019939423, 0.016811540350317955, 0.014681086875498295, 0.0061398339457809925, 0.02630683407187462, 0.032653048634529114, 0.05358496680855751, 0.18197578191757202], [0.01728241890668869, 0.12100599706172943, 0.003952578641474247, 0.038103699684143066, 0.00803869217634201, 0.017839567735791206, 0.040644098073244095, 0.014622771181166172, 0.07288665324449539, 0.4550913870334625, 0.18886235356330872, 0.2150641530752182, 0.487347275018692, 0.42817094922065735, 0.12942945957183838], [0.011775199323892593, 0.1349712610244751, 0.005470172502100468, 0.003098055487498641, 0.028361253440380096, 0.03303566575050354, 0.007174484897404909, 0.015601159073412418, 0.006606224924325943, 0.08859884738922119, 0.18040567636489868, 0.31761303544044495, 0.2462366670370102, 0.4818485677242279, 0.12394269555807114], [0.05270439758896828, 0.1637289971113205, 0.009510326199233532, 0.008013473823666573, 0.14090411365032196, 0.011389089748263359, 0.013123652897775173, 0.023534703999757767, 0.009078129194676876, 0.02855684608221054, 0.026650836691260338, 0.39132389426231384, 0.16291603446006775, 0.25967708230018616, 0.10212607681751251], [0.19571052491664886, 0.10246216505765915, 0.02142595686018467, 0.012254489585757256, 0.00365867605432868, 0.007110960781574249, 0.020346596837043762, 0.03192196041345596, 0.00833944883197546, 0.07423693686723709, 0.09786227345466614, 0.08075869083404541, 0.1330210417509079, 0.26891645789146423, 0.17930860817432404], [0.11616674810647964, 0.175978422164917, 0.00425378605723381, 0.017427049577236176, 0.011484457179903984, 0.030517226085066795, 0.08637198060750961, 0.1500588357448578, 0.0009573447750881314, 0.044167183339595795, 0.005869577638804913, 0.0011607500491663814, 0.014711305499076843, 0.027834221720695496, 0.18594378232955933], [0.11675343662500381, 0.17556257545948029, 0.016423039138317108, 0.02097608894109726, 0.06606884300708771, 0.06371303647756577, 0.09760221093893051, 0.2481643557548523, 0.0015754855703562498, 0.03009907715022564, 0.03618617355823517, 0.012020162306725979, 0.17486301064491272, 0.22630257904529572, 0.2108311653137207], [0.004961065016686916, 0.011551961302757263, 0.006318831816315651, 0.002851473866030574, 0.003461753251031041, 0.011111320927739143, 0.004611799493432045, 0.004697122145444155, 0.0026004482060670853, 0.0010426584631204605, 0.0060967751778662205, 0.01239971723407507, 0.004622939508408308, 0.002610035240650177, 0.15716104209423065]], [[0.027552247047424316, 0.013821233063936234, 0.004237555433064699, 0.0007387229125015438, 0.0009859473211690784, 0.001997306477278471, 0.002160864183679223, 0.009250090457499027, 0.0009738927474245429, 0.0009403586154803634, 0.003406830132007599, 0.0010056114988401532, 0.008306043222546577, 0.06191018968820572, 0.18169914186000824], [0.0056476471945643425, 0.0617278628051281, 0.026225095614790916, 0.009516767226159573, 0.019543437287211418, 0.011766157113015652, 0.0015307252760976553, 0.004000868182629347, 0.006223553325980902, 0.02180931344628334, 0.02397397719323635, 0.025289250537753105, 0.01872297003865242, 0.05591608211398125, 0.17309869825839996], [0.5742589831352234, 0.02769068442285061, 0.03131784498691559, 0.008496972732245922, 0.005279624368995428, 0.0009009581408463418, 0.013010378926992416, 0.009255914948880672, 0.08095329999923706, 0.0017015798948705196, 0.0027918636333197355, 0.01474103331565857, 0.07241056859493256, 0.2960302531719208, 0.1991364061832428], [0.3870091140270233, 0.24428580701351166, 0.004871743265539408, 0.01251932606101036, 0.004600874613970518, 0.007045479491353035, 0.011942178010940552, 0.06100638955831528, 0.06223933771252632, 0.00421120086684823, 0.0017708303639665246, 0.010406754910945892, 0.016386834904551506, 0.038040366023778915, 0.25559180974960327], [0.6136646866798401, 0.2692064642906189, 0.043582458049058914, 0.00652115186676383, 0.05291604623198509, 0.006654517259448767, 0.03398957848548889, 0.03886384516954422, 0.13169772922992706, 0.002106831641867757, 0.005907678045332432, 0.01888049766421318, 0.04876947030425072, 0.2226717472076416, 0.22327177226543427], [0.685612678527832, 0.0861489400267601, 0.03236214071512222, 0.16196951270103455, 0.03394145518541336, 0.05551951378583908, 0.027528556063771248, 0.06770895421504974, 0.19389298558235168, 0.03780713677406311, 0.0038191182538866997, 0.05989958345890045, 0.13479465246200562, 0.24111053347587585, 0.15613426268100739], [0.6876600384712219, 0.0606975182890892, 0.05783677101135254, 0.05387236177921295, 0.11914167553186417, 0.004756046459078789, 0.031782086938619614, 0.011465699411928654, 0.1448838710784912, 0.09538520872592926, 0.007872258313000202, 0.033316925168037415, 0.09786565601825714, 0.08940181881189346, 0.23629719018936157], [0.5363585352897644, 0.11579979956150055, 0.10718797892332077, 0.21453110873699188, 0.030864767730236053, 0.026318436488509178, 0.03807519003748894, 0.12262200564146042, 0.08015674352645874, 0.06537020206451416, 0.004594390746206045, 0.015254726633429527, 0.06485987454652786, 0.039039257913827896, 0.16586215794086456], [0.6220377087593079, 0.17304541170597076, 0.23731492459774017, 0.32412996888160706, 0.2203587144613266, 0.09306959062814713, 0.2822628319263458, 0.008407875895500183, 0.14113475382328033, 0.022416740655899048, 0.005183607805520296, 0.0005837879725731909, 0.00799399521201849, 0.006284625735133886, 0.12005029618740082], [0.18509520590305328, 0.21334251761436462, 0.12845394015312195, 0.3693835139274597, 0.41559898853302, 0.19613976776599884, 0.7053389549255371, 0.3886314332485199, 0.06599769741296768, 0.04325481504201889, 0.029052795842289925, 0.001557054347358644, 0.0018087843200191855, 0.0036887156311422586, 0.18107539415359497], [0.612794041633606, 0.24153079092502594, 0.076973557472229, 0.17341682314872742, 0.06242084503173828, 0.2242424041032791, 0.8304246068000793, 0.5655775666236877, 0.4262824058532715, 0.00936043355613947, 0.03881426528096199, 0.0046007027849555016, 0.005786797031760216, 0.020520325750112534, 0.226027712225914], [0.21637925505638123, 0.22487440705299377, 0.19202512502670288, 0.3957260847091675, 0.15970049798488617, 0.16693006455898285, 0.3690066933631897, 0.5193001627922058, 0.6459834575653076, 0.047006867825984955, 0.06868032366037369, 0.043628890067338943, 0.02405296452343464, 0.05333276465535164, 0.08607933670282364], [0.5923737287521362, 0.3536633849143982, 0.08390633016824722, 0.2980528473854065, 0.042989592999219894, 0.026934657245874405, 0.1647067815065384, 0.1620720773935318, 0.6647022366523743, 0.13678880035877228, 0.10115252435207367, 0.012052871286869049, 0.2444845736026764, 0.1799331158399582, 0.10357851535081863], [0.3260110914707184, 0.10825559496879578, 0.040669191628694534, 0.08903322368860245, 0.055108752101659775, 0.014200238510966301, 0.06877616047859192, 0.07561883330345154, 0.7116665244102478, 0.08518233895301819, 0.13964912295341492, 0.01787719503045082, 0.027594367042183876, 0.0709126889705658, 0.09409899264574051], [0.26070404052734375, 0.8011303544044495, 0.17980173230171204, 0.0725909024477005, 0.12434736639261246, 0.28980228304862976, 0.3281027674674988, 0.7843722701072693, 0.12677432596683502, 0.054726697504520416, 0.13370326161384583, 0.19018130004405975, 0.1707623451948166, 0.14939220249652863, 0.07447532564401627]], [[0.10194799304008484, 0.042179130017757416, 0.27587375044822693, 0.8387316465377808, 0.3051532208919525, 0.225641667842865, 0.10655678808689117, 0.4426303505897522, 0.21958006918430328, 0.4376780688762665, 0.7421585917472839, 0.6036965250968933, 0.4420715570449829, 0.6119644045829773, 0.08460802584886551], [0.052479684352874756, 0.018692737445235252, 0.13130725920200348, 0.4463008642196655, 0.4007475674152374, 0.4465942680835724, 0.13863760232925415, 0.26287177205085754, 0.5015351176261902, 0.48749616742134094, 0.19089040160179138, 0.2783986032009125, 0.20843097567558289, 0.11412637680768967, 0.11901978403329849], [0.09998084604740143, 0.05760321766138077, 0.06884635984897614, 0.1367950737476349, 0.03696327656507492, 0.02052011340856552, 0.23966658115386963, 0.6639524102210999, 0.08913422375917435, 0.1896458864212036, 0.14239966869354248, 0.18587030470371246, 0.2512775659561157, 0.1800404042005539, 0.13985422253608704], [0.17776982486248016, 0.2164098620414734, 0.03016561083495617, 0.006355184596031904, 0.04318562150001526, 0.004709928296506405, 0.02340516820549965, 0.07859960943460464, 0.3921053409576416, 0.27134451270103455, 0.2182498425245285, 0.1118401437997818, 0.13378913700580597, 0.4978374242782593, 0.18931511044502258], [0.16739480197429657, 0.20097726583480835, 0.038037389516830444, 0.05488090589642525, 0.020769814029335976, 0.044557277113199234, 0.32692524790763855, 0.5529306530952454, 0.06495681405067444, 0.061963245272636414, 0.3602059483528137, 0.040287844836711884, 0.11072657257318497, 0.3166219890117645, 0.19249440729618073], [0.07948607206344604, 0.4389178156852722, 0.019072405993938446, 0.11389600485563278, 0.015004596672952175, 0.0008035529754124582, 0.00560334138572216, 0.007579134311527014, 0.12602436542510986, 0.4041804373264313, 0.8435949087142944, 0.7255359292030334, 0.3334953784942627, 0.21919409930706024, 0.13174442946910858], [0.11827840656042099, 0.43549492955207825, 0.035650141537189484, 0.3500109016895294, 0.10479609668254852, 0.0029047641437500715, 0.016262628138065338, 0.008920608088374138, 0.1923075020313263, 0.6588289737701416, 0.7271849513053894, 0.8207041025161743, 0.5342087149620056, 0.29674431681632996, 0.16698533296585083], [0.19771254062652588, 0.43774574995040894, 0.057631127536296844, 0.15638697147369385, 0.05497771501541138, 0.0015852008946239948, 0.004800108727067709, 0.0038221883587539196, 0.11230877041816711, 0.6780416369438171, 0.6535694003105164, 0.33372464776039124, 0.2617355287075043, 0.4378974735736847, 0.15096917748451233], [0.2510830760002136, 0.455088347196579, 0.2769528925418854, 0.28598156571388245, 0.08308438956737518, 0.495423823595047, 0.2878262400627136, 0.017540372908115387, 0.036487918347120285, 0.07030303031206131, 0.04537871107459068, 0.017587929964065552, 0.15749330818653107, 0.15622387826442719, 0.134229376912117], [0.2108728438615799, 0.12734071910381317, 0.6047671437263489, 0.5566261410713196, 0.4727993309497833, 0.6295000314712524, 0.20963285863399506, 0.3828260004520416, 0.01981351152062416, 0.02910005673766136, 0.17932364344596863, 0.029557999223470688, 0.02868420071899891, 0.05513756722211838, 0.1339428722858429], [0.2013130933046341, 0.35711804032325745, 0.18803814053535461, 0.31239861249923706, 0.6328845024108887, 0.6068195104598999, 0.09879770874977112, 0.295420378446579, 0.033300116658210754, 0.04495004564523697, 0.027333615347743034, 0.034196678549051285, 0.011724627576768398, 0.023517103865742683, 0.3543241322040558], [0.27807915210723877, 0.07025524973869324, 0.15421687066555023, 0.23079168796539307, 0.0323871448636055, 0.4182601273059845, 0.43312954902648926, 0.3330070972442627, 0.027521615847945213, 0.03977188467979431, 0.03152378648519516, 0.00340716983191669, 0.005408053286373615, 0.0057552107609808445, 0.23170912265777588], [0.15765754878520966, 0.07761365175247192, 0.1382310688495636, 0.33822664618492126, 0.15857987105846405, 0.11602839827537537, 0.3749851584434509, 0.3412497341632843, 0.06253337115049362, 0.09931040555238724, 0.010201470926404, 0.0010190334869548678, 0.0007929145358502865, 0.0016151106683537364, 0.1723894327878952], [0.39988550543785095, 0.09145350754261017, 0.3013111352920532, 0.5813722610473633, 0.4042908251285553, 0.2935561537742615, 0.4903331696987152, 0.4357178807258606, 0.04456466808915138, 0.10430204123258591, 0.10590728372335434, 0.007762597873806953, 0.0026525144930928946, 0.0052152471616864204, 0.24974997341632843], [0.03366217389702797, 0.03653215244412422, 0.027766529470682144, 0.007369572762399912, 0.014929202385246754, 0.04527684673666954, 0.00940654892474413, 0.023517949506640434, 0.010960820131003857, 0.0019369145156815648, 0.01981637440621853, 0.00444602407515049, 0.014915830455720425, 0.007271313574165106, 0.15384840965270996]], [[0.011476250365376472, 0.7629169225692749, 0.02116730809211731, 0.010803135111927986, 0.005132503807544708, 0.009303245693445206, 0.0005040443502366543, 0.022131631150841713, 0.001470191520638764, 0.0017710012616589665, 0.0004086543631274253, 0.0022351557854562998, 0.000896299781743437, 0.0005698543391190469, 0.019197434186935425], [0.0024000771809369326, 0.158247172832489, 0.01897430047392845, 0.019486481323838234, 0.0029122373089194298, 0.015832845121622086, 0.0017470666207373142, 0.00117065932136029, 0.01016113068908453, 0.007651789113879204, 0.0020597530528903008, 0.015201352536678314, 0.016943661496043205, 0.009769451804459095, 0.16634535789489746], [0.00410552928224206, 0.0015743908006697893, 0.01049637421965599, 0.006504607852548361, 0.035339318215847015, 0.9065937995910645, 0.2998698651790619, 0.12215600907802582, 0.013029203750193119, 0.000650988076813519, 0.002043183660134673, 0.006920983083546162, 0.09688588231801987, 0.057574767619371414, 0.009054930880665779], [0.007287806831300259, 0.01375514268875122, 0.001530585577711463, 0.007056740578263998, 0.01978658139705658, 0.9208202958106995, 0.2214416116476059, 0.30606138706207275, 0.052588097751140594, 0.004079628270119429, 0.0024339878000319004, 0.0028739250265061855, 0.04695972800254822, 0.045893676578998566, 0.0110039496794343], [0.006429406348615885, 0.016907041892409325, 0.0023819799534976482, 0.0003115522558800876, 0.006808500271290541, 0.9102355241775513, 0.15379303693771362, 0.07056371122598648, 0.06324119120836258, 0.0030630400869995356, 0.007665702607482672, 0.002797773340716958, 0.13533660769462585, 0.03197972849011421, 0.006115978583693504], [0.014356410130858421, 0.0526699461042881, 0.0007501932559534907, 0.008851941674947739, 0.0005067299935035408, 0.035332534462213516, 0.09051518887281418, 0.049224019050598145, 0.014900125563144684, 0.01856788620352745, 0.0012414768571034074, 0.002389064058661461, 0.0018446464091539383, 0.000877396494615823, 0.22725383937358856], [0.0025407460052520037, 0.32041609287261963, 0.0036992463283240795, 0.02451898716390133, 0.007920290343463421, 0.015527674928307533, 0.03544912114739418, 0.29718661308288574, 0.02347515895962715, 0.026838794350624084, 0.01756858080625534, 0.010445725172758102, 0.005995406303554773, 0.0005847325082868338, 0.2055930197238922], [0.009255345910787582, 0.034783441573381424, 0.010831266641616821, 0.02782595343887806, 0.001477425335906446, 0.006871670484542847, 0.006518858019262552, 0.0072874827310442924, 0.012387615628540516, 0.05288432911038399, 0.04645476117730141, 0.02255677618086338, 0.014156763441860676, 0.00417641457170248, 0.22105874121189117], [0.0017225841293111444, 0.0049251834861934185, 0.007573804818093777, 0.014873476698994637, 0.00903867557644844, 0.0076865823939442635, 0.0017025101697072387, 0.00023153165238909423, 0.024773191660642624, 0.1742238849401474, 0.6002998948097229, 0.6145275831222534, 0.25023365020751953, 0.35489538311958313, 0.039457567036151886], [0.0034636815544217825, 0.39023807644844055, 0.0018667654367163777, 0.0006454490358009934, 0.00025732445647008717, 0.026610050350427628, 0.0026998629327863455, 0.014584111049771309, 0.00032847325201146305, 0.0012709795264527202, 0.07417861372232437, 0.43676891922950745, 0.25757044553756714, 0.32731080055236816, 0.12109360098838806], [0.0014396773185580969, 0.07700426131486893, 0.0003769460890907794, 0.0015669490676373243, 0.0010665652807801962, 0.05166712775826454, 0.003733921330422163, 0.00829349085688591, 9.729996236274019e-05, 0.0004270579374860972, 0.0022819112055003643, 0.3744491934776306, 0.2681969404220581, 0.4920969009399414, 0.028773367404937744], [0.19549021124839783, 0.5118218064308167, 0.053603943437337875, 0.004430307075381279, 0.0015711480518803, 0.024018822237849236, 0.0441354438662529, 0.04134393110871315, 0.0014472270850092173, 0.024767767637968063, 0.029112013056874275, 0.08014442026615143, 0.4702226519584656, 0.40423843264579773, 0.14477935433387756], [0.034691162407398224, 0.09692039340734482, 0.003936667460948229, 0.0164506658911705, 0.0005446859868243337, 0.0016573348548263311, 0.02795562334358692, 0.12881094217300415, 0.0004645287699531764, 0.0021237744949758053, 0.0010291342623531818, 0.001068241661414504, 0.00471450574696064, 0.019945403560996056, 0.19273433089256287], [0.04783029109239578, 0.11157537996768951, 0.02325829118490219, 0.12799327075481415, 0.0216610599309206, 0.41526544094085693, 0.129922553896904, 0.14850500226020813, 0.0009580283658578992, 0.008097043260931969, 0.01107556838542223, 0.019478609785437584, 0.2748490571975708, 0.11550750583410263, 0.15876543521881104], [0.015012643299996853, 0.00804762914776802, 0.00366173661313951, 0.0018753333715721965, 0.0065993256866931915, 0.00479541253298521, 0.005337378475815058, 0.012457020580768585, 0.0033909485209733248, 0.0032401280477643013, 0.00048777347547002137, 0.012255984358489513, 0.0006230318685993552, 0.001543535152450204, 0.1572250872850418]]], [[[0.016101790592074394, 0.0050575402565300465, 0.008322462439537048, 0.006855499465018511, 0.003766664071008563, 0.0032708626240491867, 0.008669405244290829, 0.016983401030302048, 0.023632090538740158, 0.0007983215618878603, 0.006762287113815546, 0.019076332449913025, 0.0018054646207019687, 0.011848386377096176, 0.23875673115253448], [0.03118298575282097, 0.022700916975736618, 0.01820814236998558, 0.011041272431612015, 0.013735579326748848, 0.003388292621821165, 0.014374880120158195, 0.0029534229543060064, 0.06276529282331467, 0.0010488847037777305, 0.005698299501091242, 0.018068330362439156, 0.009247002191841602, 0.010645000264048576, 0.2274351567029953], [0.10749327391386032, 0.01361121516674757, 0.01930609717965126, 0.025707745924592018, 0.010174103081226349, 0.0019352196250110865, 0.006933925207704306, 0.026056114584207535, 0.003662128932774067, 0.006897854618728161, 0.0015213300939649343, 0.006132383830845356, 0.0028239174280315638, 0.013304864056408405, 0.22739072144031525], [0.25010421872138977, 0.005582309328019619, 0.006115755997598171, 0.08664196729660034, 0.005224197171628475, 0.005311913322657347, 0.03281412273645401, 0.024678068235516548, 0.018595430999994278, 0.0819764956831932, 0.005479714833199978, 0.008821909315884113, 0.02042486146092415, 0.03525637462735176, 0.19444485008716583], [0.1781134456396103, 0.021083489060401917, 0.038613177835941315, 0.16417931020259857, 0.0029645320028066635, 0.00899361353367567, 0.009076704271137714, 0.01357053779065609, 0.01101364754140377, 0.04086701199412346, 0.014270029030740261, 0.011464214883744717, 0.011689195409417152, 0.0706799253821373, 0.3730076551437378], [0.3090042769908905, 0.031162124127149582, 0.033009856939315796, 0.14512063562870026, 0.00411824369803071, 0.07382509857416153, 0.02702517993748188, 0.07667822390794754, 0.021658627316355705, 0.01615101285278797, 0.0066233747638762, 0.008623828180134296, 0.0008525048615410924, 0.011195158585906029, 0.2578849792480469], [0.3291372060775757, 0.0561586357653141, 0.4192807674407959, 0.4571635127067566, 0.057550910860300064, 0.04359428584575653, 0.005270917434245348, 0.03804505616426468, 0.03733760863542557, 0.20409555733203888, 0.04554562643170357, 0.024629684165120125, 0.018161950632929802, 0.04353561997413635, 0.145583838224411], [0.3828665316104889, 0.019200418144464493, 0.34599530696868896, 0.4376910328865051, 0.07537391781806946, 0.036528222262859344, 0.04610925167798996, 0.04538694769144058, 0.1663823127746582, 0.04690397158265114, 0.05553056299686432, 0.021811597049236298, 0.012554574757814407, 0.03599526360630989, 0.1534716635942459], [0.08861738443374634, 0.06363938748836517, 0.7135313749313354, 0.146565243601799, 0.3346884250640869, 0.3544132113456726, 0.12204702943563461, 0.028818881139159203, 0.04564356431365013, 0.03288809210062027, 0.06753166019916534, 0.12387087196111679, 0.029650555923581123, 0.014753012917935848, 0.04379607364535332], [0.03655187785625458, 0.006058508530259132, 0.04018249735236168, 0.08900216966867447, 0.027111714705824852, 0.006408872082829475, 0.03783104568719864, 0.010064247064292431, 0.2550305724143982, 0.008420061320066452, 0.012097015976905823, 0.017737949267029762, 0.0012783813290297985, 0.0026436946354806423, 0.172612726688385], [0.1163061186671257, 0.04424217715859413, 0.014033653773367405, 0.03590161353349686, 0.06527962535619736, 0.00195779325440526, 0.027195196598768234, 0.1581626534461975, 0.30849722027778625, 0.1652299016714096, 0.04234298691153526, 0.05585171654820442, 0.016547594219446182, 0.04909297078847885, 0.08752257376909256], [0.1013311892747879, 0.06866802275180817, 0.06425411254167557, 0.4572087228298187, 0.04987834766507149, 0.005650981329381466, 0.053177352994680405, 0.04739876464009285, 0.2551265060901642, 0.06654207408428192, 0.20209699869155884, 0.04737241193652153, 0.042119286954402924, 0.22778292000293732, 0.10508881509304047], [0.24632138013839722, 0.045121580362319946, 0.12561434507369995, 0.43826135993003845, 0.07532560080289841, 0.002372375223785639, 0.0398109070956707, 0.026653334498405457, 0.5938559174537659, 0.12655052542686462, 0.04707850515842438, 0.018195422366261482, 0.010826833546161652, 0.023274976760149002, 0.14916135370731354], [0.12666325271129608, 0.047387395054101944, 0.04497509077191353, 0.23918962478637695, 0.016611548140645027, 0.009305250830948353, 0.02713325433433056, 0.030590379610657692, 0.4573454260826111, 0.17728003859519958, 0.08635216951370239, 0.05938294902443886, 0.008936652913689613, 0.028742672875523567, 0.15077541768550873], [0.03701020032167435, 0.037774376571178436, 0.1161394715309143, 0.09335700422525406, 0.015312368050217628, 0.026739761233329773, 0.013009096495807171, 0.005902147851884365, 0.07189750671386719, 0.00625182269141078, 0.056744903326034546, 0.06423129141330719, 0.06661844998598099, 0.02100159414112568, 0.2252311259508133]], [[0.0034671342000365257, 0.05013812705874443, 0.16192083060741425, 0.3595426082611084, 0.20735634863376617, 0.08139260113239288, 0.009979248046875, 0.05037669837474823, 0.0023427342530339956, 6.08037480560597e-05, 0.003484810469672084, 0.023961462080478668, 0.38460296392440796, 0.24992075562477112, 0.13989195227622986], [0.6699675917625427, 0.09382463991641998, 0.2939082980155945, 0.17940783500671387, 0.06414232403039932, 0.05161670595407486, 0.09315118193626404, 0.0025183490943163633, 0.0024716362822800875, 0.00784118939191103, 0.06077995523810387, 0.010742363519966602, 0.027031319215893745, 0.033606547862291336, 0.020909229293465614], [0.2646949589252472, 0.029353437945246696, 0.21451972424983978, 0.10881441831588745, 0.06597915291786194, 0.0030848400201648474, 0.011694483458995819, 0.021679535508155823, 0.002872215351089835, 0.013158812187612057, 0.002100167330354452, 6.679360376438126e-05, 0.004520595073699951, 0.019191764295101166, 0.15631338953971863], [0.040224652737379074, 0.02035309188067913, 0.3179875612258911, 0.11730892956256866, 0.5032125115394592, 0.4173433780670166, 0.2045394331216812, 0.3468436896800995, 0.0142394183203578, 0.034110911190509796, 0.0166803989559412, 0.0005183254834264517, 0.014372344128787518, 0.013749183155596256, 0.07609989494085312], [0.0153636634349823, 0.002009550342336297, 0.5970484614372253, 0.5668097734451294, 0.03708057850599289, 0.030387206003069878, 0.003990367520600557, 0.00021067907800897956, 0.0006718098884448409, 0.004241611808538437, 0.01157804112881422, 0.0002699779870454222, 0.0015558624872937799, 0.0029094237834215164, 0.04601351544260979], [0.03574535250663757, 0.009626551531255245, 0.4402237832546234, 0.2294078767299652, 0.26443710923194885, 0.01504121907055378, 0.016090886667370796, 0.007329131942242384, 0.002309221774339676, 0.0030864060390740633, 0.0026519321836531162, 0.0004272839578334242, 0.0011082548880949616, 0.01614256016910076, 0.03275791555643082], [6.553631828865036e-05, 0.000357702374458313, 0.08750326931476593, 0.01436514500528574, 0.006815748754888773, 0.6623476147651672, 0.0034670215100049973, 0.0015547194052487612, 0.00029766204534098506, 1.8653441657079384e-05, 0.0003687080170493573, 0.00015007570618763566, 0.0009929342195391655, 0.00030579339363612235, 0.0016504023224115372], [0.0004548979632090777, 7.145033305278048e-05, 0.025678247213363647, 0.00989772193133831, 0.007979623042047024, 0.6904858946800232, 0.04177143797278404, 0.0005172804230824113, 0.00045151059748604894, 9.678980859462172e-05, 0.0003766386944334954, 0.00020437331113498658, 0.0009936039568856359, 0.0004823105991818011, 0.001104293274693191], [0.02770741656422615, 0.15481999516487122, 0.0164713803678751, 0.029219333082437515, 0.01727348566055298, 0.0033895254600793123, 0.08395758271217346, 0.08886045962572098, 0.06561290472745895, 0.23454923927783966, 0.01131775975227356, 0.00014876923523843288, 0.021633606404066086, 0.032435301691293716, 0.2441566288471222], [0.0002423129917588085, 0.0011915951035916805, 0.0022339578717947006, 0.006169029977172613, 0.0026169228367507458, 0.006970150861889124, 0.0023872333113104105, 0.020186979323625565, 0.5034035444259644, 0.061859097331762314, 0.01802009530365467, 0.08541904389858246, 0.11395227909088135, 0.12879255414009094, 0.06123032420873642], [0.0016445622313767672, 0.0006882954621687531, 0.0003155411686748266, 0.0014561355346813798, 0.0007120753289200366, 0.00010650769399944693, 0.0005508221802301705, 0.004306118004024029, 0.4519909620285034, 0.2298276424407959, 0.04858560487627983, 0.008956322446465492, 0.005770590156316757, 0.011063157580792904, 0.0306133683770895], [0.0032223593443632126, 0.0006265831179916859, 0.002176017500460148, 0.010606854222714901, 0.0010762742022052407, 6.259929068619385e-05, 0.0013370343949645758, 0.0014808439882472157, 0.030783534049987793, 0.7491747736930847, 0.34058046340942383, 0.00350938574410975, 0.02303031086921692, 0.0742756798863411, 0.006112673785537481], [0.010601752437651157, 0.009935700334608555, 0.0694134384393692, 0.14514312148094177, 0.01701076701283455, 0.0001025431411108002, 0.003628269536420703, 0.007610301487147808, 0.1447119563817978, 0.2691461443901062, 0.7685887217521667, 0.06739932298660278, 0.05600086599588394, 0.567065417766571, 0.01997430995106697], [0.0020818221382796764, 0.006225256249308586, 0.007747206371277571, 0.02054281160235405, 0.00644321832805872, 0.00019787036580964923, 0.0007576930802315474, 0.0013290452770888805, 0.1748982071876526, 0.20870953798294067, 0.6057864427566528, 0.2165842056274414, 0.10265108197927475, 0.12960675358772278, 0.026959752663969994], [0.0929064005613327, 0.3412420153617859, 0.13197122514247894, 0.20421825349330902, 0.6308890581130981, 0.08085004985332489, 0.35388287901878357, 0.3416491150856018, 0.024628864601254463, 0.013967287726700306, 0.0762757882475853, 0.26007020473480225, 0.3328040838241577, 0.09019435197114944, 0.014360385946929455]], [[0.014275058172643185, 0.006687531713396311, 0.3026585280895233, 0.06917963922023773, 0.2396276444196701, 0.6229325532913208, 0.15904799103736877, 0.13992713391780853, 0.10272591561079025, 0.6685669422149658, 0.22624024748802185, 0.09492585808038712, 0.40837499499320984, 0.2735627591609955, 0.011893448419868946], [0.021194536238908768, 0.020265106111764908, 0.1736137419939041, 0.08712188154459, 0.3174395263195038, 0.3545694649219513, 0.3640749752521515, 0.11553992331027985, 0.3069344758987427, 0.7487083673477173, 0.45964598655700684, 0.41950592398643494, 0.6157799363136292, 0.47228363156318665, 0.04039919748902321], [0.008898869156837463, 0.002019912237301469, 0.021509699523448944, 0.0182319525629282, 0.07474909722805023, 0.02385670319199562, 0.013716273009777069, 0.008799813687801361, 0.3437807857990265, 0.008914400823414326, 0.012629772536456585, 0.10342472046613693, 0.0370708666741848, 0.023541903123259544, 0.18654775619506836], [0.01223641075193882, 0.003142833709716797, 0.006001354195177555, 0.003996475599706173, 0.0579916350543499, 0.01896491087973118, 0.01948327198624611, 0.013184066861867905, 0.30560916662216187, 0.015957718715071678, 0.016950437799096107, 0.06207568570971489, 0.044481322169303894, 0.01894378289580345, 0.19150091707706451], [0.003971019294112921, 0.0012432326329872012, 0.005908531602472067, 0.0021760377567261457, 0.002044213702902198, 0.01004379615187645, 0.01574278064072132, 0.026324355974793434, 0.4105670154094696, 0.05117517337203026, 0.02775881439447403, 0.023424910381436348, 0.009920927695930004, 0.011210974305868149, 0.16597995162010193], [0.007421860471367836, 0.006305157672613859, 0.011464249342679977, 0.020268600434064865, 0.025753991678357124, 0.031131377443671227, 0.03418951481580734, 0.0052986773662269115, 0.5788748264312744, 0.46168622374534607, 0.07252157479524612, 0.06022901460528374, 0.017210712656378746, 0.04054110497236252, 0.15131165087223053], [0.001541785546578467, 0.0008907613810151815, 0.004846525378525257, 0.001811343478038907, 0.0069520194083452225, 0.008084121160209179, 0.021458715200424194, 0.02802192233502865, 0.3832707405090332, 0.25552085041999817, 0.014592574909329414, 0.01065820176154375, 0.012523604556918144, 0.010731800459325314, 0.22416816651821136], [0.004116748925298452, 0.0016883857315406203, 0.014749680645763874, 0.00869818776845932, 0.01003838051110506, 0.007631313521414995, 0.02068890631198883, 0.027104953303933144, 0.13497500121593475, 0.6378710865974426, 0.10288828611373901, 0.0942029282450676, 0.028772620484232903, 0.05935161933302879, 0.21764545142650604], [0.06222981959581375, 0.01881357654929161, 0.00486758491024375, 0.015509632416069508, 0.0009378677350468934, 0.004574655555188656, 0.005093523766845465, 0.0076056248508393764, 0.02507362887263298, 0.02107030339539051, 0.007815904915332794, 0.010442771948873997, 0.011698074638843536, 0.006942160427570343, 0.31572407484054565], [0.01727244071662426, 0.009210732765495777, 0.005953751504421234, 0.0013454181607812643, 0.005081892944872379, 0.04435739293694496, 0.006434922106564045, 0.0007962443050928414, 0.0007702711154706776, 0.16453301906585693, 0.5625144839286804, 0.34227296710014343, 0.6355522871017456, 0.6161591410636902, 0.02771596610546112], [0.12786830961704254, 0.008172453381121159, 0.0017843057867139578, 0.004017683211714029, 0.007877650670707226, 0.0018398476531729102, 0.01566770300269127, 0.0026914728805422783, 0.0035052604507654905, 0.0037441153544932604, 0.011492998339235783, 0.10472051054239273, 0.01954079605638981, 0.025050928816199303, 0.24727097153663635], [0.1465907245874405, 0.037033673375844955, 0.013877127319574356, 0.00413108617067337, 0.00966043584048748, 0.02326187677681446, 0.04576379433274269, 0.010370912030339241, 0.05009477958083153, 0.002161832293495536, 0.012562266550958157, 0.08835282921791077, 0.018735390156507492, 0.07781965285539627, 0.21298982203006744], [0.018177246674895287, 0.009594686329364777, 0.010616189800202847, 0.003939185757189989, 0.020018288865685463, 0.006944165099412203, 0.014553648419678211, 0.014575640670955181, 0.031773608177900314, 0.0201406329870224, 0.008282337337732315, 0.02822018228471279, 0.008926213718950748, 0.030271533876657486, 0.18345791101455688], [0.029857823625206947, 0.018949948251247406, 0.0061294399201869965, 0.002908851485699415, 0.00919707678258419, 0.00952958408743143, 0.01205661240965128, 0.00758303003385663, 0.05086279660463333, 0.007759919855743647, 0.006360263098031282, 0.02717713639140129, 0.006157578434795141, 0.027468249201774597, 0.21562480926513672], [0.035946138203144073, 0.021175134927034378, 0.025809520855545998, 0.0228139478713274, 0.02454732172191143, 0.008901212364435196, 0.01817207969725132, 0.024075007066130638, 0.042662542313337326, 0.10151555389165878, 0.03429628908634186, 0.025050567463040352, 0.015684176236391068, 0.028640326112508774, 0.23519039154052734]], [[0.29903000593185425, 0.5539957880973816, 0.06723504513502121, 0.06922264397144318, 0.12363186478614807, 0.04431891441345215, 0.10694187879562378, 0.08094406872987747, 0.15170463919639587, 0.05897890776395798, 0.026665056124329567, 0.04277891665697098, 0.011532573029398918, 0.016366619616746902, 0.08233406394720078], [0.030788322910666466, 0.06814564764499664, 0.1441766321659088, 0.42568475008010864, 0.23481200635433197, 0.09723259508609772, 0.20801249146461487, 0.2833361029624939, 0.12989479303359985, 0.09075285494327545, 0.02217184565961361, 0.10632100701332092, 0.07123817503452301, 0.18399499356746674, 0.11842577904462814], [0.21215111017227173, 0.2570435404777527, 0.03298918902873993, 0.11753708124160767, 0.2531988024711609, 0.2834656238555908, 0.13087181746959686, 0.14389817416667938, 0.06408312171697617, 0.023736948147416115, 0.043677639216184616, 0.007582403719425201, 0.08098249137401581, 0.042930904775857925, 0.09848955273628235], [0.24232596158981323, 0.4370230436325073, 0.27921250462532043, 0.32216426730155945, 0.14763100445270538, 0.1446210741996765, 0.041608523577451706, 0.05782362446188927, 0.03667302429676056, 0.015881532803177834, 0.09886573255062103, 0.0007486737449653447, 0.022804880514740944, 0.01436265092343092, 0.04328664019703865], [0.0417991504073143, 0.06808368116617203, 0.22980956733226776, 0.06044253334403038, 0.09120408445596695, 0.3664403557777405, 0.01738058589398861, 0.026107804849743843, 0.16878005862236023, 0.007388730999082327, 0.6907519698143005, 0.00283504044637084, 0.004864559043198824, 0.017621232196688652, 0.04920867085456848], [0.07025078684091568, 0.08007846027612686, 0.18737106025218964, 0.08649075031280518, 0.14398247003555298, 0.03926409035921097, 0.10999412834644318, 0.10028164088726044, 0.2733333110809326, 0.07497494667768478, 0.6277027726173401, 0.03760387748479843, 0.07242996245622635, 0.04469411447644234, 0.0635850802063942], [0.18292218446731567, 0.29889917373657227, 0.16216641664505005, 0.041324593126773834, 0.08738134056329727, 0.03374062106013298, 0.10780933499336243, 0.1685270518064499, 0.3661736249923706, 0.13795819878578186, 0.7607439160346985, 0.022037923336029053, 0.11896573007106781, 0.017960727214813232, 0.09792909026145935], [0.29104405641555786, 0.7119240164756775, 0.16990531980991364, 0.02345188707113266, 0.15646961331367493, 0.008449066430330276, 0.06418811529874802, 0.018176060169935226, 0.3091927766799927, 0.08911041170358658, 0.3005200922489166, 0.04236089810729027, 0.2996547222137451, 0.08733220398426056, 0.07523740082979202], [0.046947941184043884, 0.14375551044940948, 0.004344047512859106, 0.0067795743234455585, 0.02948000282049179, 0.08397668600082397, 0.06400846689939499, 0.18865461647510529, 0.023663662374019623, 0.08527978509664536, 0.02815503440797329, 0.04117048531770706, 0.5833349823951721, 0.0677085593342781, 0.23153413832187653], [0.08349642902612686, 0.4532567262649536, 0.004409583285450935, 0.009004302322864532, 0.007938031107187271, 0.13749390840530396, 0.1858609914779663, 0.31525370478630066, 0.018453413620591164, 0.12712040543556213, 0.04680929332971573, 0.12408707290887833, 0.13737666606903076, 0.12311573326587677, 0.142713725566864], [0.05042501911520958, 0.07026762515306473, 0.0020696106366813183, 0.010109566152095795, 0.07710029184818268, 0.05610239878296852, 0.05948542803525925, 0.19247274100780487, 0.001940111513249576, 0.05155838653445244, 0.04620450362563133, 0.20989066362380981, 0.485702246427536, 0.4166657328605652, 0.18102103471755981], [0.09080760926008224, 0.09187275916337967, 0.012195594608783722, 0.021634280681610107, 0.019499676302075386, 0.09054076671600342, 0.11008334904909134, 0.23214302957057953, 0.0423310361802578, 0.034868963062763214, 0.06751228123903275, 0.049237679690122604, 0.03915484994649887, 0.08995199203491211, 0.1941523253917694], [0.0706457570195198, 0.10473088920116425, 0.039385173469781876, 0.02697153575718403, 0.04372800514101982, 0.06655491143465042, 0.23491710424423218, 0.19935868680477142, 0.036273516714572906, 0.06345809996128082, 0.020782677456736565, 0.12393849343061447, 0.05726756155490875, 0.041495081037282944, 0.15982753038406372], [0.039186086505651474, 0.11076691001653671, 0.03891725465655327, 0.009549588896334171, 0.01825849525630474, 0.051163915544748306, 0.1146436408162117, 0.1649821698665619, 0.03586947172880173, 0.06679365783929825, 0.09092967957258224, 0.14827685058116913, 0.10948126018047333, 0.10746686905622482, 0.1515202671289444], [0.14541134238243103, 0.05313154682517052, 0.01991144008934498, 0.08764121681451797, 0.014597749337553978, 0.03937898576259613, 0.04872390255331993, 0.04689335823059082, 0.04558950290083885, 0.051970891654491425, 0.02520112879574299, 0.022838978096842766, 0.00921469647437334, 0.00801294855773449, 0.21471147239208221]], [[0.009874092414975166, 0.0475393682718277, 0.0700187012553215, 0.05995699018239975, 0.023110831156373024, 0.04304451867938042, 0.02397323027253151, 0.09104450792074203, 0.13320927321910858, 0.0718994140625, 0.16378211975097656, 0.06306017935276031, 0.03516274318099022, 0.06407153606414795, 0.1927335411310196], [0.007679122034460306, 0.008519956842064857, 0.023641018196940422, 0.036320336163043976, 0.005810021422803402, 0.002834178740158677, 0.01027101743966341, 0.005131446290761232, 0.05288401618599892, 0.022729018703103065, 0.02885960415005684, 0.007142365910112858, 0.005423326510936022, 0.00592823838815093, 0.23125353455543518], [0.17363575100898743, 0.08529574424028397, 0.018747013062238693, 0.09323837608098984, 0.07366655766963959, 0.2784116566181183, 0.6226999759674072, 0.6422466039657593, 0.18433590233325958, 0.44911590218544006, 0.07703087478876114, 0.23628254234790802, 0.37835898995399475, 0.3362680971622467, 0.10061702132225037], [0.039354946464300156, 0.028671007603406906, 0.0009692042949609458, 0.010166235268115997, 0.003592043649405241, 0.024686597287654877, 0.0576656274497509, 0.10543617606163025, 0.069565050303936, 0.23999209702014923, 0.0370241142809391, 0.07099387794733047, 0.08031197637319565, 0.0629396140575409, 0.19831009209156036], [0.07821620255708694, 0.07413192838430405, 0.008470119908452034, 0.005837618373334408, 0.016890503466129303, 0.34118980169296265, 0.6424257159233093, 0.5736639499664307, 0.18751046061515808, 0.08286380022764206, 0.013973995111882687, 0.16452431678771973, 0.6265572905540466, 0.24633896350860596, 0.03771306574344635], [0.08601168543100357, 0.11519530415534973, 0.00501672737300396, 0.0384475477039814, 0.0009856059914454818, 0.020220156759023666, 0.4602939486503601, 0.41334664821624756, 0.011432202532887459, 0.039776530116796494, 0.004202698357403278, 0.012451107613742352, 0.012797003611922264, 0.0109980758279562, 0.22371669113636017], [0.05821564793586731, 0.2493630200624466, 0.017187682911753654, 0.007334073074162006, 0.002277297666296363, 0.012770043686032295, 0.014771709218621254, 0.06810285151004791, 0.008148171938955784, 0.093966543674469, 0.03078475221991539, 0.016961626708507538, 0.009818210266530514, 0.005369590129703283, 0.2805846929550171], [0.0315314382314682, 0.006441309116780758, 0.005187691655009985, 0.0023020647931843996, 0.001103160553611815, 0.0010285694152116776, 0.0036586276255548, 0.0034369472414255142, 0.02540425956249237, 0.018933216109871864, 0.011261656880378723, 0.014689027331769466, 0.0047272746451199055, 0.003173592034727335, 0.27608010172843933], [0.052501752972602844, 0.03902341425418854, 0.022159013897180557, 0.15980832278728485, 0.04565480723977089, 0.04961955174803734, 0.10487794876098633, 0.03556728735566139, 0.011893571354448795, 0.350600004196167, 0.8153157234191895, 0.696418821811676, 0.19642634689807892, 0.7945331335067749, 0.025074943900108337], [0.008775658905506134, 0.0231929961591959, 0.001974506536498666, 0.02221933752298355, 0.002016729209572077, 0.03464629501104355, 0.020560195669531822, 0.015741808339953423, 0.024821357801556587, 0.03194829449057579, 0.062133170664310455, 0.009445058181881905, 0.008440939709544182, 0.031038939952850342, 0.24359388649463654], [0.15448324382305145, 0.15535393357276917, 0.0009195139864459634, 0.02347545325756073, 0.010745828039944172, 0.05933469906449318, 0.0886014774441719, 0.09891750663518906, 0.008176282048225403, 0.17814745008945465, 0.04613054543733597, 0.10348650068044662, 0.06132601201534271, 0.10257216542959213, 0.2144334316253662], [0.1637454628944397, 0.3587695062160492, 0.013175190426409245, 0.027070751413702965, 0.009701711125671864, 0.027045298367738724, 0.06057014688849449, 0.08674251288175583, 0.018084047362208366, 0.012978773564100266, 0.04984384402632713, 0.0746963769197464, 0.21545591950416565, 0.18275731801986694, 0.18403297662734985], [0.04016833007335663, 0.03071952983736992, 0.0073937661945819855, 0.044594794511795044, 0.005693770945072174, 0.007929249666631222, 0.19023852050304413, 0.12198647856712341, 0.00967123731970787, 0.05747445672750473, 0.006795276887714863, 0.006636326666921377, 0.014849998988211155, 0.02297961339354515, 0.1823122203350067], [0.08359953761100769, 0.14515268802642822, 0.009139984846115112, 0.10055579245090485, 0.007817201316356659, 0.06191832944750786, 0.24591712653636932, 0.26670339703559875, 0.008127851411700249, 0.05132465437054634, 0.011226493865251541, 0.020721180364489555, 0.025672290474176407, 0.06137499585747719, 0.19538666307926178], [0.004038439132273197, 0.01158715970814228, 0.012492671608924866, 0.008604439906775951, 0.0044732466340065, 0.001471644383855164, 0.003622728632763028, 0.005392232909798622, 0.024040954187512398, 0.002572751836851239, 0.011896335519850254, 0.00655994052067399, 0.004419950768351555, 0.0023605322930961847, 0.2578853368759155]], [[0.020951254293322563, 0.19576001167297363, 0.05422525107860565, 0.000516751199029386, 0.0576050765812397, 0.039616964757442474, 0.0011584623716771603, 0.06260760873556137, 0.05524995177984238, 5.760174462920986e-05, 0.0005486492882482708, 0.01856253668665886, 0.008022493682801723, 0.0032547120936214924, 0.1980074942111969], [0.15878187119960785, 0.5755441188812256, 0.073322594165802, 0.006848999299108982, 0.04221894592046738, 0.057610929012298584, 0.01498481910675764, 0.15564584732055664, 0.02557745948433876, 0.010493909008800983, 0.04444737732410431, 0.10564734041690826, 0.04703369736671448, 0.007807346060872078, 0.10371111333370209], [0.0667557343840599, 0.5756934881210327, 0.02783285267651081, 0.001271323417313397, 0.13096383213996887, 0.007863562554121017, 0.0004880728665739298, 0.00786207988858223, 0.030193913727998734, 0.0004458925104700029, 0.0008183285826817155, 0.003005507169291377, 0.008833326399326324, 0.014566708356142044, 0.09050195664167404], [0.006902126595377922, 0.22582471370697021, 0.027240794152021408, 0.000252248632023111, 0.08146748691797256, 0.008376134559512138, 0.0017193618696182966, 0.010283069685101509, 0.09191752970218658, 1.873078872449696e-05, 0.0001427968527423218, 0.0006295929779298604, 0.016630304977297783, 0.005029548890888691, 0.17517179250717163], [0.46813952922821045, 0.7474208474159241, 0.04419572278857231, 0.039987821131944656, 0.07900705188512802, 0.010286353528499603, 0.008277984336018562, 0.21022778749465942, 0.018339863047003746, 0.003122991183772683, 0.0047759185545146465, 0.0031952662393450737, 0.0037801233120262623, 0.005526377819478512, 0.11187370121479034], [0.08057912439107895, 0.09254536032676697, 0.26037144660949707, 0.04459136351943016, 0.19053104519844055, 0.18187369406223297, 0.04494835063815117, 0.08866222947835922, 0.05515718460083008, 0.011219717562198639, 0.041749756783246994, 0.13417255878448486, 0.43527963757514954, 0.4240920841693878, 0.05903848633170128], [0.005677447654306889, 0.1104632169008255, 0.17886187136173248, 0.06816153228282928, 0.31320425868034363, 0.08580746501684189, 0.044242095202207565, 0.4031389355659485, 0.13310441374778748, 8.991359209176153e-05, 0.00051962147699669, 0.017516016960144043, 0.02517649158835411, 0.02827705629169941, 0.13873830437660217], [0.009441166184842587, 0.04568161070346832, 0.08503290265798569, 0.055850934237241745, 0.15800173580646515, 0.09921947866678238, 0.2719998359680176, 0.7131122350692749, 0.12690743803977966, 0.0015569856623187661, 0.019959524273872375, 0.06398878246545792, 0.1124982088804245, 0.07506788522005081, 0.06075114384293556], [0.1778930425643921, 0.41812169551849365, 0.05459700897336006, 0.015388981439173222, 0.296997606754303, 0.041353121399879456, 0.1696915328502655, 0.1226804181933403, 0.3453136682510376, 0.006036087870597839, 0.008416525088250637, 0.004891113843768835, 0.003974124789237976, 0.0023401544895023108, 0.04184575751423836], [0.0018550200620666146, 0.2628808617591858, 0.0018376001389697194, 9.925621998263523e-05, 0.008250601589679718, 0.11965687572956085, 0.011913565918803215, 0.3649533987045288, 0.12527383863925934, 0.0011617891723290086, 0.002173396060243249, 0.011088940314948559, 0.02579125389456749, 0.004398738034069538, 0.18079015612602234], [0.0033212341368198395, 0.4786561131477356, 0.00019389556837268174, 4.100392834516242e-05, 0.03255903348326683, 0.004482456482946873, 0.0018638258334249258, 0.04032744839787483, 0.151435986161232, 0.0011174781247973442, 0.0008650964009575546, 0.049343932420015335, 0.013284855522215366, 0.009702197276055813, 0.17111515998840332], [0.015286837704479694, 0.17760051786899567, 0.012107143178582191, 0.004069492220878601, 0.40114596486091614, 0.005856915842741728, 0.025313973426818848, 0.23595470190048218, 0.5599475502967834, 0.019674712792038918, 0.01789786107838154, 0.0449712835252285, 0.024323459714651108, 0.008310162462294102, 0.10516723990440369], [0.013816175982356071, 0.10832668840885162, 0.014126134105026722, 0.0044770012609660625, 0.18972823023796082, 0.04144473373889923, 0.013167506083846092, 0.0398833267390728, 0.08117146790027618, 0.03379456326365471, 0.04336484149098396, 0.6766878366470337, 0.6025072932243347, 0.24042664468288422, 0.05677386373281479], [0.010657100938260555, 0.1729527860879898, 0.006031150463968515, 0.006062258500605822, 0.10042858123779297, 0.007653414737433195, 0.0031583579257130623, 0.014785557985305786, 0.13275322318077087, 0.05689838156104088, 0.04302775487303734, 0.36964303255081177, 0.3870774507522583, 0.31299954652786255, 0.07590257376432419], [0.014769526198506355, 0.05199434980750084, 0.11582475155591965, 0.14804258942604065, 0.05702318996191025, 0.3275434374809265, 0.3759170472621918, 0.3329218327999115, 0.027774346992373466, 0.12548163533210754, 0.13219930231571198, 0.029332099482417107, 0.2028164267539978, 0.518939197063446, 4.3280975660309196e-05]], [[0.5917359590530396, 0.12410512566566467, 0.24872945249080658, 0.20040015876293182, 0.21720361709594727, 0.11561702191829681, 0.58521568775177, 0.41413450241088867, 0.22558750212192535, 0.117314413189888, 0.3378458619117737, 0.10710897296667099, 0.0625920221209526, 0.24034489691257477, 0.0060951621271669865], [0.03933318331837654, 0.17479471862316132, 0.1999012678861618, 0.1507989913225174, 0.2344110906124115, 0.41628938913345337, 0.19733835756778717, 0.42009472846984863, 0.32125937938690186, 0.09302358329296112, 0.29758843779563904, 0.2500022351741791, 0.15192696452140808, 0.19621950387954712, 0.06078135594725609], [0.03998054191470146, 0.02165106125175953, 0.5779209733009338, 0.4094802737236023, 0.3219829499721527, 0.23359909653663635, 0.15223096311092377, 0.0776560828089714, 0.11850404739379883, 0.1752316802740097, 0.7765606641769409, 0.15624035894870758, 0.19448350369930267, 0.3389243483543396, 0.015656093135476112], [0.2606712579727173, 0.23122362792491913, 0.33188652992248535, 0.327752023935318, 0.0930425301194191, 0.13157396018505096, 0.5079332590103149, 0.15524731576442719, 0.2039693295955658, 0.336448073387146, 0.7406277656555176, 0.11173539608716965, 0.03980698063969612, 0.2757716476917267, 0.009055807255208492], [0.03992704302072525, 0.03562299162149429, 0.05761631205677986, 0.04593607783317566, 0.747100830078125, 0.13848423957824707, 0.25807130336761475, 0.11098858714103699, 0.025020861998200417, 0.027831630781292915, 0.07712040096521378, 0.5344594120979309, 0.28488224744796753, 0.37143638730049133, 0.060307834297418594], [0.146702840924263, 0.5779150128364563, 0.04704871401190758, 0.12512727081775665, 0.05839477851986885, 0.5817644596099854, 0.2541782557964325, 0.167904794216156, 0.020014837384223938, 0.0557471327483654, 0.1778557300567627, 0.29983726143836975, 0.34978994727134705, 0.3759990334510803, 0.07532685250043869], [0.14372284710407257, 0.20398879051208496, 0.060162752866744995, 0.022449441254138947, 0.15882903337478638, 0.12907396256923676, 0.7781419157981873, 0.20689332485198975, 0.023098474368453026, 0.02567201852798462, 0.04225016012787819, 0.05647281929850578, 0.5644452571868896, 0.8062969446182251, 0.0037398021668195724], [0.09274263679981232, 0.19406189024448395, 0.18035270273685455, 0.18292436003684998, 0.2674761116504669, 0.1057504341006279, 0.5214765071868896, 0.1765710562467575, 0.15375129878520966, 0.08563723415136337, 0.35003283619880676, 0.12250327318906784, 0.4574505388736725, 0.6043637990951538, 0.046846963465213776], [0.3136129081249237, 0.10648278146982193, 0.02492944709956646, 0.07937752455472946, 0.16382691264152527, 0.40212482213974, 0.2148500233888626, 0.5046796798706055, 0.25625455379486084, 0.10382789373397827, 0.027611082419753075, 0.07138189673423767, 0.1265101283788681, 0.05298655480146408, 0.01642199046909809], [0.7252353429794312, 0.23862500488758087, 0.17466871440410614, 0.2584758698940277, 0.15821219980716705, 0.41019105911254883, 0.4795793294906616, 0.2558479905128479, 0.061036378145217896, 0.5831483006477356, 0.23237691819667816, 0.36767491698265076, 0.07294586300849915, 0.0734395682811737, 0.006080146878957748], [0.18402060866355896, 0.2199273407459259, 0.10670217871665955, 0.36498934030532837, 0.37264159321784973, 0.5975290536880493, 0.641157865524292, 0.4798426032066345, 0.07047704607248306, 0.30389490723609924, 0.6835307478904724, 0.29959914088249207, 0.32009243965148926, 0.2076108753681183, 0.015385132282972336], [0.18547095358371735, 0.1046445369720459, 0.17664410173892975, 0.031107882037758827, 0.4872691333293915, 0.6876094937324524, 0.29805243015289307, 0.2697339355945587, 0.03289056569337845, 0.04577193781733513, 0.2390383929014206, 0.650258481502533, 0.6253164410591125, 0.2719551920890808, 0.042574722319841385], [0.06026101112365723, 0.4596063494682312, 0.11362233757972717, 0.050736263394355774, 0.47900232672691345, 0.8146356344223022, 0.23428170382976532, 0.5258204936981201, 0.07407079637050629, 0.24087238311767578, 0.04631686583161354, 0.04097185283899307, 0.24002470076084137, 0.051092784851789474, 0.10185284167528152], [0.05915316566824913, 0.3385859429836273, 0.23845957219600677, 0.13520635664463043, 0.49372056126594543, 0.8321547508239746, 0.47351959347724915, 0.4942004382610321, 0.11661165207624435, 0.273796945810318, 0.09639480710029602, 0.07113680988550186, 0.3545372784137726, 0.3069557547569275, 0.026768943294882774], [0.6326229572296143, 0.28129494190216064, 0.2424720972776413, 0.23961131274700165, 0.1532977670431137, 0.03248026221990585, 0.07237446308135986, 0.03991716355085373, 0.058106135576963425, 0.6791825294494629, 0.4868316352367401, 0.4841252863407135, 0.1838759332895279, 0.16229771077632904, 0.03779346123337746]], [[0.04456469416618347, 0.016716457903385162, 0.08688971400260925, 0.23432573676109314, 0.12769784033298492, 0.0498066172003746, 0.10501405596733093, 0.14398211240768433, 0.3055479824542999, 0.0823235884308815, 0.23467087745666504, 0.6305257678031921, 0.08790664374828339, 0.14063040912151337, 0.13028757274150848], [0.04107241332530975, 0.03620494529604912, 0.07322828471660614, 0.1027759537100792, 0.08743055909872055, 0.016458408907055855, 0.09779228270053864, 0.014780157245695591, 0.09821301698684692, 0.025402111932635307, 0.0808086097240448, 0.08257035166025162, 0.07231960445642471, 0.0895148441195488, 0.19708459079265594], [0.1263897716999054, 0.01533158216625452, 0.08717449009418488, 0.22571881115436554, 0.06928549706935883, 0.16778334975242615, 0.06136450543999672, 0.07180161774158478, 0.2525678873062134, 0.32249853014945984, 0.08566119521856308, 0.48726531863212585, 0.2929263114929199, 0.21127133071422577, 0.12448348850011826], [0.1481804996728897, 0.04817945510149002, 0.03058626689016819, 0.13171793520450592, 0.10783855617046356, 0.24912205338478088, 0.1342363804578781, 0.28650397062301636, 0.25943103432655334, 0.2756144404411316, 0.08422903716564178, 0.7444766163825989, 0.7611673474311829, 0.5739472508430481, 0.11213001608848572], [0.1744699776172638, 0.050404343754053116, 0.018338145688176155, 0.11463086307048798, 0.02370826154947281, 0.09417468309402466, 0.04503462836146355, 0.0389062762260437, 0.1780962496995926, 0.7825090885162354, 0.15977078676223755, 0.2598268687725067, 0.05674973130226135, 0.2742767333984375, 0.15589554607868195], [0.26428407430648804, 0.0871720165014267, 0.015494171530008316, 0.31054598093032837, 0.31179672479629517, 0.05687993764877319, 0.05327969416975975, 0.14049863815307617, 0.03721972927451134, 0.33735793828964233, 0.06669215857982635, 0.44665512442588806, 0.1105320155620575, 0.07633788883686066, 0.13637836277484894], [0.27871736884117126, 0.07987862080335617, 0.06999076902866364, 0.3873903453350067, 0.3669894337654114, 0.0245819091796875, 0.02483827993273735, 0.08571609854698181, 0.04856930300593376, 0.2826782464981079, 0.10519464313983917, 0.8515737056732178, 0.24991582334041595, 0.08752243965864182, 0.1076057106256485], [0.18780259788036346, 0.02093103528022766, 0.1730981320142746, 0.27918383479118347, 0.32355740666389465, 0.05090703070163727, 0.030107326805591583, 0.015694553032517433, 0.08293543756008148, 0.11989035457372665, 0.1594303995370865, 0.6402391195297241, 0.08334839344024658, 0.13423335552215576, 0.16886292397975922], [0.23048973083496094, 0.05534357205033302, 0.15910016000270844, 0.5473513603210449, 0.11114095151424408, 0.060548413544893265, 0.23547381162643433, 0.0231330469250679, 0.22654443979263306, 0.16574865579605103, 0.03383632004261017, 0.05167527496814728, 0.026772163808345795, 0.028301218524575233, 0.08144620060920715], [0.126570925116539, 0.0055835917592048645, 0.7687394022941589, 0.6136845350265503, 0.7887718677520752, 0.24027548730373383, 0.25543272495269775, 0.017155619338154793, 0.01121050026267767, 0.02180907502770424, 0.06387564539909363, 0.04227403923869133, 0.004662328865379095, 0.0204116590321064, 0.16526305675506592], [0.3619309663772583, 0.022692076861858368, 0.8739812970161438, 0.5600091814994812, 0.4330839216709137, 0.27864721417427063, 0.1654776781797409, 0.02327956072986126, 0.003977042157202959, 0.0664801374077797, 0.12084753066301346, 0.16815124452114105, 0.07773539423942566, 0.17824198305606842, 0.05263833701610565], [0.29354482889175415, 0.16078433394432068, 0.705570638179779, 0.44417092204093933, 0.02176845259964466, 0.15997210144996643, 0.4057019054889679, 0.11617531627416611, 0.010741903446614742, 0.06882698833942413, 0.07046788930892944, 0.041601523756980896, 0.011864392086863518, 0.06714706867933273, 0.14988133311271667], [0.5400083065032959, 0.2319646179676056, 0.6198285818099976, 0.2858767509460449, 0.1694929450750351, 0.06001640111207962, 0.26940232515335083, 0.06411167979240417, 0.02847147174179554, 0.18856319785118103, 0.05879069119691849, 0.03795049339532852, 0.009596540592610836, 0.023393897339701653, 0.14663995802402496], [0.6488012075424194, 0.15997910499572754, 0.6486002802848816, 0.4859846830368042, 0.34752336144447327, 0.028076842427253723, 0.12281371653079987, 0.019826101139187813, 0.023531395941972733, 0.15743687748908997, 0.059922393411397934, 0.08707788586616516, 0.005486410576850176, 0.025385212153196335, 0.15706156194210052], [0.037294961512088776, 0.2018004208803177, 0.33537882566452026, 0.19571122527122498, 0.0998593419790268, 0.48263466358184814, 0.11429780721664429, 0.20324908196926117, 0.7053001523017883, 0.01905757561326027, 0.1765546351671219, 0.10779165476560593, 0.18456625938415527, 0.16855330765247345, 0.014784654602408409]]]], \"bot_text\": [\"The_\", \"animal_\", \"didn_\", \"'_\", \"t_\", \"cross_\", \"the_\", \"street_\", \"because_\", \"it_\", \"was_\", \"too_\", \"tire\", \"d_\"]}}" + ], + "text/plain": [ + "<IPython.core.display.Javascript object>" + ] + }, + "metadata": { + "tags": [] + } + }, + { + "output_type": "display_data", + "data": { + "application/javascript": [ + "/**\n", + " * @fileoverview Transformer Visualization D3 javascript code.\n", + " */\n", + "\n", + "requirejs(['jquery', 'd3'],\n", + "function($, d3) {\n", + "\n", + "var attention = window.attention;\n", + "\n", + "const TEXT_SIZE = 15;\n", + "const BOXWIDTH = TEXT_SIZE * 8;\n", + "const BOXHEIGHT = TEXT_SIZE * 1.5;\n", + "const WIDTH = 2000;\n", + "const HEIGHT = attention.all.bot_text.length * BOXHEIGHT * 2 + 100;\n", + "const MATRIX_WIDTH = 150;\n", + "const head_colours = d3.scale.category10();\n", + "const CHECKBOX_SIZE = 20;\n", + "\n", + "function lighten(colour) {\n", + " var c = d3.hsl(colour);\n", + " var increment = (1 - c.l) * 0.6;\n", + " c.l += increment;\n", + " c.s -= increment;\n", + " return c;\n", + "}\n", + "\n", + "function transpose(mat) {\n", + " return mat[0].map(function(col, i) {\n", + " return mat.map(function(row) {\n", + " return row[i];\n", + " });\n", + " });\n", + "}\n", + "\n", + "function zip(a, b) {\n", + " return a.map(function (e, i) {\n", + " return [e, b[i]];\n", + " });\n", + "}\n", + "\n", + "\n", + "function renderVis(id, top_text, bot_text, attention_heads, config) {\n", + " $(id).empty();\n", + " var svg = d3.select(id)\n", + " .append('svg')\n", + " .attr(\"width\", WIDTH)\n", + " .attr(\"height\", HEIGHT);\n", + "\n", + " var att_data = [];\n", + " for (var i=0; i < attention_heads.length; i++) {\n", + " var att_trans = transpose(attention_heads[i]);\n", + " att_data.push(zip(attention_heads[i], att_trans));\n", + " }\n", + "\n", + " renderText(svg, top_text, true, att_data, 0);\n", + " renderText(svg, bot_text, false, att_data, MATRIX_WIDTH + BOXWIDTH);\n", + "\n", + " renderAttentionHighlights(svg, att_data);\n", + "\n", + " svg.append(\"g\").classed(\"attention_heads\", true);\n", + "\n", + " renderAttention(svg, attention_heads);\n", + "\n", + " draw_checkboxes(config, 0, svg, attention_heads);\n", + "}\n", + "\n", + "\n", + "function renderText(svg, text, is_top, att_data, left_pos) {\n", + " var id = is_top ? \"top\" : \"bottom\";\n", + " var textContainer = svg.append(\"svg:g\")\n", + " .attr(\"id\", id);\n", + "\n", + " textContainer.append(\"g\").classed(\"attention_boxes\", true)\n", + " .selectAll(\"g\")\n", + " .data(att_data)\n", + " .enter()\n", + " .append(\"g\")\n", + " .selectAll(\"rect\")\n", + " .data(function(d) {return d;})\n", + " .enter()\n", + " .append(\"rect\")\n", + " .attr(\"x\", function(d, i, j) {\n", + " return left_pos + box_offset(j);\n", + " })\n", + " .attr(\"y\", function(d, i) {\n", + " return (+1) * BOXHEIGHT;\n", + " })\n", + " .attr(\"width\", BOXWIDTH/active_heads())\n", + " .attr(\"height\", function() { return BOXHEIGHT; })\n", + " .attr(\"fill\", function(d, i, j) {\n", + " return head_colours(j);\n", + " })\n", + " .style(\"opacity\", 0.0);\n", + "\n", + "\n", + " var tokenContainer = textContainer.append(\"g\").selectAll(\"g\")\n", + " .data(text)\n", + " .enter()\n", + " .append(\"g\");\n", + "\n", + " tokenContainer.append(\"rect\")\n", + " .classed(\"background\", true)\n", + " .style(\"opacity\", 0.0)\n", + " .attr(\"fill\", \"lightgray\")\n", + " .attr(\"x\", left_pos)\n", + " .attr(\"y\", function(d, i) {\n", + " return (i+1) * BOXHEIGHT;\n", + " })\n", + " .attr(\"width\", BOXWIDTH)\n", + " .attr(\"height\", BOXHEIGHT);\n", + "\n", + " var theText = tokenContainer.append(\"text\")\n", + " .text(function(d) { return d; })\n", + " .attr(\"font-size\", TEXT_SIZE + \"px\")\n", + " .style(\"cursor\", \"default\")\n", + " .style(\"-webkit-user-select\", \"none\")\n", + " .attr(\"x\", left_pos)\n", + " .attr(\"y\", function(d, i) {\n", + " return (i+1) * BOXHEIGHT;\n", + " });\n", + "\n", + " if (is_top) {\n", + " theText.style(\"text-anchor\", \"end\")\n", + " .attr(\"dx\", BOXWIDTH - TEXT_SIZE)\n", + " .attr(\"dy\", TEXT_SIZE);\n", + " } else {\n", + " theText.style(\"text-anchor\", \"start\")\n", + " .attr(\"dx\", + TEXT_SIZE)\n", + " .attr(\"dy\", TEXT_SIZE);\n", + " }\n", + "\n", + " tokenContainer.on(\"mouseover\", function(d, index) {\n", + " textContainer.selectAll(\".background\")\n", + " .style(\"opacity\", function(d, i) {\n", + " return i == index ? 1.0 : 0.0;\n", + " });\n", + "\n", + " svg.selectAll(\".attention_heads\").style(\"display\", \"none\");\n", + "\n", + " svg.selectAll(\".line_heads\") // To get the nesting to work.\n", + " .selectAll(\".att_lines\")\n", + " .attr(\"stroke-opacity\", function(d) {\n", + " return 1.0;\n", + " })\n", + " .attr(\"y1\", function(d, i) {\n", + " if (is_top) {\n", + " return (index+1) * BOXHEIGHT + (BOXHEIGHT/2);\n", + " } else {\n", + " return (i+1) * BOXHEIGHT + (BOXHEIGHT/2);\n", + " }\n", + " })\n", + " .attr(\"x1\", BOXWIDTH)\n", + " .attr(\"y2\", function(d, i) {\n", + " if (is_top) {\n", + " return (i+1) * BOXHEIGHT + (BOXHEIGHT/2);\n", + " } else {\n", + " return (index+1) * BOXHEIGHT + (BOXHEIGHT/2);\n", + " }\n", + " })\n", + " .attr(\"x2\", BOXWIDTH + MATRIX_WIDTH)\n", + " .attr(\"stroke-width\", 2)\n", + " .attr(\"stroke\", function(d, i, j) {\n", + " return head_colours(j);\n", + " })\n", + " .attr(\"stroke-opacity\", function(d, i, j) {\n", + " if (is_top) {d = d[0];} else {d = d[1];}\n", + " if (config.head_vis[j]) {\n", + " if (d) {\n", + " return d[index];\n", + " } else {\n", + " return 0.0;\n", + " }\n", + " } else {\n", + " return 0.0;\n", + " }\n", + " });\n", + "\n", + "\n", + " function updateAttentionBoxes() {\n", + " var id = is_top ? \"bottom\" : \"top\";\n", + " var the_left_pos = is_top ? MATRIX_WIDTH + BOXWIDTH : 0;\n", + " svg.select(\"#\" + id)\n", + " .selectAll(\".attention_boxes\")\n", + " .selectAll(\"g\")\n", + " .selectAll(\"rect\")\n", + " .attr(\"x\", function(d, i, j) { return the_left_pos + box_offset(j); })\n", + " .attr(\"y\", function(d, i) { return (i+1) * BOXHEIGHT; })\n", + " .attr(\"width\", BOXWIDTH/active_heads())\n", + " .attr(\"height\", function() { return BOXHEIGHT; })\n", + " .style(\"opacity\", function(d, i, j) {\n", + " if (is_top) {d = d[0];} else {d = d[1];}\n", + " if (config.head_vis[j])\n", + " if (d) {\n", + " return d[index];\n", + " } else {\n", + " return 0.0;\n", + " }\n", + " else\n", + " return 0.0;\n", + "\n", + " });\n", + " }\n", + "\n", + " updateAttentionBoxes();\n", + " });\n", + "\n", + " textContainer.on(\"mouseleave\", function() {\n", + " d3.select(this).selectAll(\".background\")\n", + " .style(\"opacity\", 0.0);\n", + "\n", + " svg.selectAll(\".att_lines\").attr(\"stroke-opacity\", 0.0);\n", + " svg.selectAll(\".attention_heads\").style(\"display\", \"inline\");\n", + " svg.selectAll(\".attention_boxes\")\n", + " .selectAll(\"g\")\n", + " .selectAll(\"rect\")\n", + " .style(\"opacity\", 0.0);\n", + " });\n", + "}\n", + "\n", + "function renderAttentionHighlights(svg, attention) {\n", + " var line_container = svg.append(\"g\");\n", + " line_container.selectAll(\"g\")\n", + " .data(attention)\n", + " .enter()\n", + " .append(\"g\")\n", + " .classed(\"line_heads\", true)\n", + " .selectAll(\"line\")\n", + " .data(function(d){return d;})\n", + " .enter()\n", + " .append(\"line\").classed(\"att_lines\", true);\n", + "}\n", + "\n", + "function renderAttention(svg, attention_heads) {\n", + " var line_container = svg.selectAll(\".attention_heads\");\n", + " line_container.html(null);\n", + " for(var h=0; h<attention_heads.length; h++) {\n", + " for(var a=0; a<attention_heads[h].length; a++) {\n", + " for(var s=0; s<attention_heads[h][a].length; s++) {\n", + " line_container.append(\"line\")\n", + " .attr(\"y1\", (s+1) * BOXHEIGHT + (BOXHEIGHT/2))\n", + " .attr(\"x1\", BOXWIDTH)\n", + " .attr(\"y2\", (a+1) * BOXHEIGHT + (BOXHEIGHT/2))\n", + " .attr(\"x2\", BOXWIDTH + MATRIX_WIDTH)\n", + " .attr(\"stroke-width\", 2)\n", + " .attr(\"stroke\", head_colours(h))\n", + " .attr(\"stroke-opacity\", function() {\n", + " if (config.head_vis[h]) {\n", + " return attention_heads[h][a][s]/active_heads();\n", + " } else {\n", + " return 0.0;\n", + " }\n", + " }());\n", + " }\n", + " }\n", + " }\n", + "}\n", + "\n", + "// Checkboxes\n", + "function box_offset(i) {\n", + " var num_head_above = config.head_vis.reduce(\n", + " function(acc, val, cur) {return val && cur < i ? acc + 1: acc;}, 0);\n", + " return num_head_above*(BOXWIDTH / active_heads());\n", + "}\n", + "\n", + "function active_heads() {\n", + " return config.head_vis.reduce(function(acc, val) {\n", + " return val ? acc + 1: acc;\n", + " }, 0);\n", + "}\n", + "\n", + "function draw_checkboxes(config, top, svg, attention_heads) {\n", + " var checkboxContainer = svg.append(\"g\");\n", + " var checkbox = checkboxContainer.selectAll(\"rect\")\n", + " .data(config.head_vis)\n", + " .enter()\n", + " .append(\"rect\")\n", + " .attr(\"fill\", function(d, i) {\n", + " return head_colours(i);\n", + " })\n", + " .attr(\"x\", function(d, i) {\n", + " return (i+1) * CHECKBOX_SIZE;\n", + " })\n", + " .attr(\"y\", top)\n", + " .attr(\"width\", CHECKBOX_SIZE)\n", + " .attr(\"height\", CHECKBOX_SIZE);\n", + "\n", + " function update_checkboxes() {\n", + " checkboxContainer.selectAll(\"rect\")\n", + " .data(config.head_vis)\n", + " .attr(\"fill\", function(d, i) {\n", + " var head_colour = head_colours(i);\n", + " var colour = d ? head_colour : lighten(head_colour);\n", + " return colour;\n", + " });\n", + " }\n", + "\n", + " update_checkboxes();\n", + "\n", + " checkbox.on(\"click\", function(d, i) {\n", + " if (config.head_vis[i] && active_heads() == 1) return;\n", + " config.head_vis[i] = !config.head_vis[i];\n", + " update_checkboxes();\n", + " renderAttention(svg, attention_heads);\n", + " });\n", + "\n", + " checkbox.on(\"dblclick\", function(d, i) {\n", + " // If we double click on the only active head then reset\n", + " if (config.head_vis[i] && active_heads() == 1) {\n", + " config.head_vis = new Array(config.num_heads).fill(true);\n", + " } else {\n", + " config.head_vis = new Array(config.num_heads).fill(false);\n", + " config.head_vis[i] = true;\n", + " }\n", + " update_checkboxes();\n", + " renderAttention(svg, attention_heads);\n", + " });\n", + "}\n", + "\n", + "var config = {\n", + " layer: 0,\n", + " att_type: 'all',\n", + "};\n", + "\n", + "function visualize() {\n", + " var num_heads = attention['all']['att'][0].length;\n", + " config.head_vis = new Array(num_heads).fill(true);\n", + " config.num_heads = num_heads;\n", + " config.attention = attention;\n", + "\n", + " render();\n", + "}\n", + "\n", + "function render() {\n", + " var conf = config.attention[config.att_type];\n", + "\n", + " var top_text = conf.top_text;\n", + " var bot_text = conf.bot_text;\n", + " var attention = conf.att[config.layer];\n", + "\n", + " $(\"#vis svg\").empty();\n", + " renderVis(\"#vis\", top_text, bot_text, attention, config);\n", + "}\n", + "\n", + "$(\"#layer\").empty();\n", + "for(var i=0; i<6; i++) {\n", + " $(\"#layer\").append($(\"<option />\").val(i).text(i));\n", + "}\n", + "\n", + "$(\"#layer\").on('change', function(e) {\n", + " config.layer = +e.currentTarget.value;\n", + " render();\n", + "});\n", + "\n", + "$(\"#att_type\").on('change', function(e) {\n", + " config.att_type = e.currentTarget.value;\n", + " render();\n", + "});\n", + "\n", + "$(\"button\").on('click', visualize);\n", + "\n", + "visualize();\n", + "\n", + "});\n" + ], + "text/plain": [ + "<IPython.core.display.Javascript object>" + ] + }, + "metadata": { + "tags": [] + } } ] }, @@ -770,16 +1434,16 @@ "base_uri": "https://localhost:8080/", "height": 34 }, - "outputId": "9535b122-d663-470b-fb03-15541769a8d6", + "outputId": "a574a1a3-ce56-4715-9ad3-8289c61ade3b", "executionInfo": { "status": "ok", - "timestamp": 1512174027233, + "timestamp": 1512369563515, "user_tz": 480, - "elapsed": 372, + "elapsed": 625, "user": { - "displayName": "Ryan Sepassi", - "photoUrl": "//lh4.googleusercontent.com/-dcHmhQy1Y2A/AAAAAAAAAAI/AAAAAAAABEw/if_k14yF4KI/s50-c-k-no/photo.jpg", - "userId": "107877449274830904926" + "displayName": "Niki Parmar", + "photoUrl": "//lh3.googleusercontent.com/-ReuwZvCmGE8/AAAAAAAAAAI/AAAAAAAAAIc/fcvytJVpitE/s50-c-k-no/photo.jpg", + "userId": "115864460963462186442" } } }, @@ -801,7 +1465,7 @@ "optimizer = tf.train.AdamOptimizer()" ], "cell_type": "code", - "execution_count": 14, + "execution_count": 42, "outputs": [ { "output_type": "stream", @@ -827,24 +1491,23 @@ } ], "base_uri": "https://localhost:8080/", - "height": 340 + "height": 204 }, - "outputId": "adfe2262-ca2a-4d74-ef6f-4caaf5531824", + "outputId": "504a7876-8bbb-4e5f-f303-f951c2e071b2", "executionInfo": { "status": "ok", - "timestamp": 1512174129153, + "timestamp": 1512369756046, "user_tz": 480, - "elapsed": 101898, + "elapsed": 103766, "user": { - "displayName": "Ryan Sepassi", - "photoUrl": "//lh4.googleusercontent.com/-dcHmhQy1Y2A/AAAAAAAAAAI/AAAAAAAABEw/if_k14yF4KI/s50-c-k-no/photo.jpg", - "userId": "107877449274830904926" + "displayName": "Niki Parmar", + "photoUrl": "//lh3.googleusercontent.com/-ReuwZvCmGE8/AAAAAAAAAAI/AAAAAAAAAIc/fcvytJVpitE/s50-c-k-no/photo.jpg", + "userId": "115864460963462186442" } } }, "source": [ "# Train\n", - "\n", "NUM_STEPS = 500\n", "\n", "for count, example in enumerate(tfe.Iterator(mnist_train_dataset)):\n", @@ -858,30 +1521,22 @@ " break" ], "cell_type": "code", - "execution_count": 15, + "execution_count": 46, "outputs": [ { "output_type": "stream", "text": [ - "WARNING:tensorflow:From /usr/local/lib/python2.7/dist-packages/tensor2tensor/layers/common_layers.py:1671: softmax_cross_entropy_with_logits (from tensorflow.python.ops.nn_ops) is deprecated and will be removed in a future version.\n", - "Instructions for updating:\n", - "\n", - "Future major versions of TensorFlow will allow gradients to flow\n", - "into the labels input on backprop by default.\n", - "\n", - "See tf.nn.softmax_cross_entropy_with_logits_v2.\n", - "\n", - "Step: 0, Loss: 5.357\n", - "Step: 50, Loss: 0.746\n", - "Step: 100, Loss: 0.618\n", - "Step: 150, Loss: 0.502\n", - "Step: 200, Loss: 0.395\n", - "Step: 250, Loss: 0.345\n", - "Step: 300, Loss: 0.338\n", - "Step: 350, Loss: 0.175\n", - "Step: 400, Loss: 0.345\n", - "Step: 450, Loss: 0.373\n", - "Step: 500, Loss: 0.292\n" + "Step: 0, Loss: 0.513\n", + "Step: 50, Loss: 0.342\n", + "Step: 100, Loss: 0.315\n", + "Step: 150, Loss: 0.372\n", + "Step: 200, Loss: 0.324\n", + "Step: 250, Loss: 0.271\n", + "Step: 300, Loss: 0.281\n", + "Step: 350, Loss: 0.285\n", + "Step: 400, Loss: 0.250\n", + "Step: 450, Loss: 0.247\n", + "Step: 500, Loss: 0.338\n" ], "name": "stdout" } @@ -953,16 +1608,16 @@ "base_uri": "https://localhost:8080/", "height": 68 }, - "outputId": "95ec4064-d884-4ea8-acdf-ffe83dc0c230", + "outputId": "ef33057a-1a22-4ab8-ab7b-3c90d9f6a850", "executionInfo": { "status": "ok", - "timestamp": 1512174132643, + "timestamp": 1512369759917, "user_tz": 480, - "elapsed": 3097, + "elapsed": 3833, "user": { - "displayName": "Ryan Sepassi", - "photoUrl": "//lh4.googleusercontent.com/-dcHmhQy1Y2A/AAAAAAAAAAI/AAAAAAAABEw/if_k14yF4KI/s50-c-k-no/photo.jpg", - "userId": "107877449274830904926" + "displayName": "Niki Parmar", + "photoUrl": "//lh3.googleusercontent.com/-ReuwZvCmGE8/AAAAAAAAAAI/AAAAAAAAAIc/fcvytJVpitE/s50-c-k-no/photo.jpg", + "userId": "115864460963462186442" } } }, @@ -994,14 +1649,14 @@ " print(\"%s: %.2f\" % (name, val))" ], "cell_type": "code", - "execution_count": 17, + "execution_count": 47, "outputs": [ { "output_type": "stream", "text": [ "INFO:tensorflow:Reading data files from /content/t2t/data/image_mnist-dev*\n", "accuracy_top5: 1.00\n", - "accuracy: 0.98\n" + "accuracy: 0.99\n" ], "name": "stdout" } From 75564bb42d804ba46a73365aeb5bfa70e0e2d029 Mon Sep 17 00:00:00 2001 From: Noam Shazeer <noam@google.com> Date: Tue, 5 Dec 2017 08:44:05 -0800 Subject: [PATCH 0652/4095] Set eval_batch_size to None for the "train" job on TPU. PiperOrigin-RevId: 177960290 --- tensor2tensor/bin/t2t-tpu-trainer | 3 ++- tensor2tensor/tpu/tpu_trainer.py | 3 ++- tensor2tensor/tpu/tpu_trainer_lib.py | 18 +++++++++++++++--- 3 files changed, 19 insertions(+), 5 deletions(-) diff --git a/tensor2tensor/bin/t2t-tpu-trainer b/tensor2tensor/bin/t2t-tpu-trainer index 3e8dedd13..ca4af0107 100644 --- a/tensor2tensor/bin/t2t-tpu-trainer +++ b/tensor2tensor/bin/t2t-tpu-trainer @@ -77,6 +77,7 @@ def create_experiment_fn(): FLAGS.train_steps, FLAGS.eval_steps, FLAGS.local_eval_frequency, + FLAGS.schedule, use_tpu=FLAGS.use_tpu) @@ -88,7 +89,7 @@ def create_run_config(): num_shards=FLAGS.tpu_num_shards, log_device_placement=FLAGS.log_device_placement, save_checkpoints_steps=max(FLAGS.iterations_per_loop, - FLAGS.local_eval_frequency), + FLAGS.local_eval_frequency) - 1, num_gpus=FLAGS.worker_gpu, gpu_order=FLAGS.gpu_order, shard_to_cpu=FLAGS.locally_shard_to_cpu, diff --git a/tensor2tensor/tpu/tpu_trainer.py b/tensor2tensor/tpu/tpu_trainer.py index 2c4015469..a0961778a 100644 --- a/tensor2tensor/tpu/tpu_trainer.py +++ b/tensor2tensor/tpu/tpu_trainer.py @@ -76,6 +76,7 @@ def create_experiment_fn(): FLAGS.train_steps, FLAGS.eval_steps, FLAGS.local_eval_frequency, + FLAGS.schedule, use_tpu=FLAGS.use_tpu) @@ -87,7 +88,7 @@ def create_run_config(): num_shards=FLAGS.tpu_num_shards, log_device_placement=FLAGS.log_device_placement, save_checkpoints_steps=max(FLAGS.iterations_per_loop, - FLAGS.local_eval_frequency), + FLAGS.local_eval_frequency) - 1, num_gpus=FLAGS.worker_gpu, gpu_order=FLAGS.gpu_order, shard_to_cpu=FLAGS.locally_shard_to_cpu, diff --git a/tensor2tensor/tpu/tpu_trainer_lib.py b/tensor2tensor/tpu/tpu_trainer_lib.py index 08c352d80..c1efc38e8 100644 --- a/tensor2tensor/tpu/tpu_trainer_lib.py +++ b/tensor2tensor/tpu/tpu_trainer_lib.py @@ -72,19 +72,29 @@ def create_run_config(master="", return config -def create_estimator(model_name, hparams, run_config, use_tpu=True): +def create_estimator(model_name, + hparams, + run_config, + schedule="train_and_evaluate", + use_tpu=True): model_fn = t2t_model.T2TModel.make_estimator_model_fn( model_name, hparams, use_tpu=use_tpu) if use_tpu: batch_size = hparams.tpu_batch_size_per_shard batch_size *= run_config.tpu_config.num_shards + eval_batch_size = batch_size * 2 + if schedule == "train": + # Estimator takes the presence of eval_batch_size as an indication that + # an eval is being performed, and complains about num_shards being too + # big. So we have to eval_batch_size to None. + eval_batch_size = None return tf.contrib.tpu.TPUEstimator( model_fn=model_fn, model_dir=run_config.model_dir, config=run_config, train_batch_size=batch_size, - eval_batch_size=batch_size * 2) + eval_batch_size=eval_batch_size) else: return tf.estimator.Estimator( model_fn=model_fn, model_dir=run_config.model_dir, config=run_config) @@ -98,6 +108,7 @@ def create_experiment(run_config, train_steps, eval_steps, min_eval_frequency, + schedule="train_and_evaluate", use_tpu=True): """Create Experiment.""" # HParams @@ -105,7 +116,8 @@ def create_experiment(run_config, trainer_utils.add_problem_hparams(hparams, problem_name) # Estimator - estimator = create_estimator(model_name, hparams, run_config, use_tpu=use_tpu) + estimator = create_estimator( + model_name, hparams, run_config, schedule, use_tpu=use_tpu) # Input fns from Problem problem = hparams.problem_instances[0] From 8e823b98c5c1c91c7e1c19b8f894708746593609 Mon Sep 17 00:00:00 2001 From: Noam Shazeer <noam@google.com> Date: Tue, 5 Dec 2017 10:13:14 -0800 Subject: [PATCH 0653/4095] remove accidentally-inserted code. PiperOrigin-RevId: 177971717 --- tensor2tensor/bin/t2t-tpu-trainer | 2 +- tensor2tensor/tpu/tpu_trainer.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tensor2tensor/bin/t2t-tpu-trainer b/tensor2tensor/bin/t2t-tpu-trainer index ca4af0107..805ba1078 100644 --- a/tensor2tensor/bin/t2t-tpu-trainer +++ b/tensor2tensor/bin/t2t-tpu-trainer @@ -89,7 +89,7 @@ def create_run_config(): num_shards=FLAGS.tpu_num_shards, log_device_placement=FLAGS.log_device_placement, save_checkpoints_steps=max(FLAGS.iterations_per_loop, - FLAGS.local_eval_frequency) - 1, + FLAGS.local_eval_frequency), num_gpus=FLAGS.worker_gpu, gpu_order=FLAGS.gpu_order, shard_to_cpu=FLAGS.locally_shard_to_cpu, diff --git a/tensor2tensor/tpu/tpu_trainer.py b/tensor2tensor/tpu/tpu_trainer.py index a0961778a..193ecc3f2 100644 --- a/tensor2tensor/tpu/tpu_trainer.py +++ b/tensor2tensor/tpu/tpu_trainer.py @@ -88,7 +88,7 @@ def create_run_config(): num_shards=FLAGS.tpu_num_shards, log_device_placement=FLAGS.log_device_placement, save_checkpoints_steps=max(FLAGS.iterations_per_loop, - FLAGS.local_eval_frequency) - 1, + FLAGS.local_eval_frequency), num_gpus=FLAGS.worker_gpu, gpu_order=FLAGS.gpu_order, shard_to_cpu=FLAGS.locally_shard_to_cpu, From fa131306ee233bd90d56153e79a2cde76e798594 Mon Sep 17 00:00:00 2001 From: T2T Team <no-reply@google.com> Date: Wed, 6 Dec 2017 13:14:23 -0800 Subject: [PATCH 0654/4095] Expose subword tokens in the SubwordTextEncoder class. PiperOrigin-RevId: 178141871 --- tensor2tensor/data_generators/text_encoder.py | 4 +++ .../data_generators/text_encoder_test.py | 26 +++++++++---------- 2 files changed, 17 insertions(+), 13 deletions(-) diff --git a/tensor2tensor/data_generators/text_encoder.py b/tensor2tensor/data_generators/text_encoder.py index 7b7b2287e..6930b205e 100644 --- a/tensor2tensor/data_generators/text_encoder.py +++ b/tensor2tensor/data_generators/text_encoder.py @@ -710,6 +710,10 @@ def build_from_token_counts(self, self._init_subtokens_from_list(new_subtoken_strings) tf.logging.info("vocab_size = %d" % self.vocab_size) + @property + def all_subtoken_strings(self): + return tuple(self._all_subtoken_strings) + def dump(self): """Debugging dump of the current subtoken vocabulary.""" subtoken_strings = [(i, s) diff --git a/tensor2tensor/data_generators/text_encoder_test.py b/tensor2tensor/data_generators/text_encoder_test.py index b02653ebc..8364afafd 100644 --- a/tensor2tensor/data_generators/text_encoder_test.py +++ b/tensor2tensor/data_generators/text_encoder_test.py @@ -136,17 +136,17 @@ def test_encode_decode(self): # The substrings coded and coder are frequent enough in the corpus that # they should appear in the vocabulary even though they are substrings # of other included strings. - subtoken_strings = {encoder._all_subtoken_strings[i] for i in encoded} + subtoken_strings = {encoder.all_subtoken_strings[i] for i in encoded} self.assertIn("encoded_", subtoken_strings) self.assertIn("coded_", subtoken_strings) - self.assertIn("TextEncoder", encoder._all_subtoken_strings) - self.assertIn("coder", encoder._all_subtoken_strings) + self.assertIn("TextEncoder", encoder.all_subtoken_strings) + self.assertIn("coder", encoder.all_subtoken_strings) # Every character in the corpus should be in the encoders alphabet and # its subtoken vocabulary. self.assertTrue(alphabet.issubset(encoder._alphabet)) for a in alphabet: - self.assertIn(a, encoder._all_subtoken_strings) + self.assertIn(a, encoder.all_subtoken_strings) def test_unicode(self): corpus = "Cat emoticons. \U0001F638 \U0001F639 \U0001F63A \U0001F63B" @@ -156,7 +156,7 @@ def test_unicode(self): 100, token_counts, 2, 10) self.assertIn("\U0001F638", encoder._alphabet) - self.assertIn("\U0001F63B", encoder._all_subtoken_strings) + self.assertIn("\U0001F63B", encoder.all_subtoken_strings) def test_small_vocab(self): corpus = "The quick brown fox jumps over the lazy dog" @@ -171,7 +171,7 @@ def test_small_vocab(self): # are encodable. self.assertTrue(alphabet.issubset(encoder._alphabet)) for a in alphabet: - self.assertIn(a, encoder._all_subtoken_strings) + self.assertIn(a, encoder.all_subtoken_strings) def test_encodable_when_not_in_alphabet(self): corpus = "the quick brown fox jumps over the lazy dog" @@ -187,7 +187,7 @@ def test_encodable_when_not_in_alphabet(self): decoded = encoder.decode(encoded) self.assertEqual(original, decoded) - encoded_str = "".join(encoder._all_subtoken_strings[i] for i in encoded) + encoded_str = "".join(encoder.all_subtoken_strings[i] for i in encoded) self.assertIn("\\84;", encoded_str) @mock.patch.object(text_encoder, "_ESCAPE_CHARS", new=set("\\_;13579")) @@ -213,7 +213,7 @@ def test_load_from_file(self): "and\n" "of\n") encoder._load_from_file_object(vocab) - self.assertEqual(encoder._all_subtoken_strings, correct_vocab) + self.assertAllEqual(encoder.all_subtoken_strings, correct_vocab) # Test a vocab file with words wrapped in single quotes encoder = text_encoder.SubwordTextEncoder() @@ -221,7 +221,7 @@ def test_load_from_file(self): "\"and\"\n" "\"of\"\n") encoder._load_from_file_object(vocab) - self.assertEqual(encoder._all_subtoken_strings, correct_vocab) + self.assertAllEqual(encoder.all_subtoken_strings, correct_vocab) def test_reserved_token_chars_not_in_alphabet(self): corpus = "dog" @@ -254,8 +254,8 @@ def test_save_and_reload(self): new_encoder = text_encoder.SubwordTextEncoder(filename) self.assertEqual(encoder._alphabet, new_encoder._alphabet) - self.assertEqual(encoder._all_subtoken_strings, - new_encoder._all_subtoken_strings) + self.assertEqual(encoder.all_subtoken_strings, + new_encoder.all_subtoken_strings) self.assertEqual(encoder._subtoken_string_to_id, new_encoder._subtoken_string_to_id) self.assertEqual(encoder._max_subtoken_len, new_encoder._max_subtoken_len) @@ -274,8 +274,8 @@ def test_save_and_reload_no_single_quotes(self): new_encoder = text_encoder.SubwordTextEncoder(filename) self.assertEqual(encoder._alphabet, new_encoder._alphabet) - self.assertEqual(encoder._all_subtoken_strings, - new_encoder._all_subtoken_strings) + self.assertEqual(encoder.all_subtoken_strings, + new_encoder.all_subtoken_strings) self.assertEqual(encoder._subtoken_string_to_id, new_encoder._subtoken_string_to_id) self.assertEqual(encoder._max_subtoken_len, new_encoder._max_subtoken_len) From b8cb36574aef15b19560a5ca596527bcfdbd94ab Mon Sep 17 00:00:00 2001 From: Noam Shazeer <noam@google.com> Date: Thu, 7 Dec 2017 16:00:34 -0800 Subject: [PATCH 0655/4095] Skip a random fraction of the first shard in each data reader. Solves PiperOrigin-RevId: 178309856 --- tensor2tensor/data_generators/problem.py | 24 +++++++++++++++++++++++- tensor2tensor/utils/data_reader_test.py | 20 ++++++++++++-------- 2 files changed, 35 insertions(+), 9 deletions(-) diff --git a/tensor2tensor/data_generators/problem.py b/tensor2tensor/data_generators/problem.py index 6a1a7208e..92af00342 100644 --- a/tensor2tensor/data_generators/problem.py +++ b/tensor2tensor/data_generators/problem.py @@ -120,6 +120,21 @@ def preprocess_example_common(example, hparams, mode): return example +def _file_num_records_cached(filename): + """Return the number of TFRecords in a file.""" + # Cache the result, as this is expensive to compute + if filename in _file_num_records_cache: + return _file_num_records_cache[filename] + ret = 0 + for _ in tf.python_io.tf_record_iterator(filename): + ret += 1 + _file_num_records_cache[filename] = ret + return ret + + +_file_num_records_cache = {} + + class Problem(object): """Problem base class. Specifies a T2T problem. @@ -381,8 +396,15 @@ def dataset(self, data_files = tf.contrib.slim.parallel_reader.get_data_files( data_filepattern) if shuffle_files or shuffle_files is None and is_training: + # In addition to shuffling the list of file names, we skip a random + # fraction of the first file. The skip is essential for synchronous + # highly-parallel training. Otherwise, we have multiple replicas + # reading the same shard in lock-step. + num_skip = random.randint(0, _file_num_records_cached(data_files[0])) random.shuffle(data_files) - dataset = tf.data.TFRecordDataset(data_files) + dataset = tf.data.TFRecordDataset(data_files).skip(num_skip) + else: + dataset = tf.data.TFRecordDataset(data_files) def decode_record(record): """Serialized Example to dict of <feature name, Tensor>.""" diff --git a/tensor2tensor/utils/data_reader_test.py b/tensor2tensor/utils/data_reader_test.py index bf2aa872e..c104c4bb7 100644 --- a/tensor2tensor/utils/data_reader_test.py +++ b/tensor2tensor/utils/data_reader_test.py @@ -90,8 +90,9 @@ def tearDownClass(cls): os.remove(f) def testBasicExampleReading(self): - dataset = self.problem.dataset( - tf.estimator.ModeKeys.TRAIN, data_dir=self.data_dir) + dataset = self.problem.dataset(tf.estimator.ModeKeys.TRAIN, + data_dir=self.data_dir, + shuffle_files=False) examples = dataset.make_one_shot_iterator().get_next() with tf.train.MonitoredSession() as sess: # Check that there are multiple examples that have the right fields of the @@ -107,8 +108,9 @@ def testBasicExampleReading(self): self.assertGreater(len(field), 0) def testPreprocess(self): - dataset = self.problem.dataset( - tf.estimator.ModeKeys.TRAIN, data_dir=self.data_dir) + dataset = self.problem.dataset(tf.estimator.ModeKeys.TRAIN, + data_dir=self.data_dir, + shuffle_files=False) examples = dataset.make_one_shot_iterator().get_next() with tf.train.MonitoredSession() as sess: ex_val = sess.run(examples) @@ -117,8 +119,9 @@ def testPreprocess(self): def testLengthFilter(self): max_len = 15 - dataset = self.problem.dataset( - tf.estimator.ModeKeys.TRAIN, data_dir=self.data_dir) + dataset = self.problem.dataset(tf.estimator.ModeKeys.TRAIN, + data_dir=self.data_dir, + shuffle_files=False) dataset = dataset.filter( lambda ex: data_reader.example_valid_size(ex, 0, max_len)) examples = dataset.make_one_shot_iterator().get_next() @@ -211,8 +214,9 @@ def example_len(ex): batch_sizes = [10, 8, 4, 2] window_size = 40 - dataset = self.problem.dataset( - tf.estimator.ModeKeys.TRAIN, data_dir=self.data_dir) + dataset = self.problem.dataset(tf.estimator.ModeKeys.TRAIN, + data_dir=self.data_dir, + shuffle_files=False) dataset = data_reader.bucket_by_sequence_length( dataset, example_len, boundaries, batch_sizes, window_size) batch = dataset.make_one_shot_iterator().get_next() From 69851433d70594d647d64d93d661ec07217cd149 Mon Sep 17 00:00:00 2001 From: T2T Team <no-reply@google.com> Date: Sun, 10 Dec 2017 23:15:06 -0800 Subject: [PATCH 0656/4095] Add image_fashion_mnist dataset. PiperOrigin-RevId: 178576769 --- tensor2tensor/data_generators/image.py | 97 ++++++++++++++++++++++++-- 1 file changed, 91 insertions(+), 6 deletions(-) diff --git a/tensor2tensor/data_generators/image.py b/tensor2tensor/data_generators/image.py index 70bca2d60..794d6615a 100644 --- a/tensor2tensor/data_generators/image.py +++ b/tensor2tensor/data_generators/image.py @@ -565,23 +565,23 @@ def _extract_mnist_labels(filename, num_labels): return labels -def mnist_generator(tmp_dir, training, how_many, start_from=0): +def mnist_common_generator(tmp_dir, training, how_many, data_filename, + label_filename, start_from=0): """Image generator for MNIST. Args: tmp_dir: path to temporary storage directory. training: a Boolean; if true, we use the train set, otherwise the test set. how_many: how many images and labels to generate. + data_filename: file that contains features data. + label_filename: file that contains labels. start_from: from which image to start. Returns: An instance of image_generator that produces MNIST images. """ - _get_mnist(tmp_dir) - d = _MNIST_TRAIN_DATA_FILENAME if training else _MNIST_TEST_DATA_FILENAME - l = _MNIST_TRAIN_LABELS_FILENAME if training else _MNIST_TEST_LABELS_FILENAME - data_path = os.path.join(tmp_dir, d) - labels_path = os.path.join(tmp_dir, l) + data_path = os.path.join(tmp_dir, data_filename) + labels_path = os.path.join(tmp_dir, label_filename) images = _extract_mnist_images(data_path, 60000 if training else 10000) labels = _extract_mnist_labels(labels_path, 60000 if training else 10000) # Shuffle the data to make sure classes are well distributed. @@ -592,6 +592,24 @@ def mnist_generator(tmp_dir, training, how_many, start_from=0): labels[start_from:start_from + how_many]) +def mnist_generator(tmp_dir, training, how_many, start_from=0): + """Image generator for MNIST. + + Args: + tmp_dir: path to temporary storage directory. + training: a Boolean; if true, we use the train set, otherwise the test set. + how_many: how many images and labels to generate. + start_from: from which image to start. + + Returns: + An instance of image_generator that produces MNIST images. + """ + _get_mnist(tmp_dir) + d = _MNIST_TRAIN_DATA_FILENAME if training else _MNIST_TEST_DATA_FILENAME + l = _MNIST_TRAIN_LABELS_FILENAME if training else _MNIST_TEST_LABELS_FILENAME + return mnist_common_generator(tmp_dir, training, how_many, d, l, start_from) + + @registry.register_problem class ImageMnistTune(Image2ClassProblem): """MNIST, tuning data.""" @@ -628,6 +646,73 @@ def generator(self, data_dir, tmp_dir, is_training): else: return mnist_generator(tmp_dir, False, 10000) +# URLs and filenames for MNIST data. +_FASHION_MNIST_URL = ("http://fashion-mnist.s3-website.eu-central-1" + ".amazonaws.com/") +_FASHION_MNIST_LOCAL_FILE_PREFIX = "fashion-" +_FASHION_MNIST_IMAGE_SIZE = 28 + + +def _get_fashion_mnist(directory): + """Download all FashionMNIST files to directory unless they are there.""" + # Fashion mnist files have the same names as MNIST. + # We must choose a separate name (by adding 'fashion-' prefix) in the tmp_dir. + for filename in [ + _MNIST_TRAIN_DATA_FILENAME, _MNIST_TRAIN_LABELS_FILENAME, + _MNIST_TEST_DATA_FILENAME, _MNIST_TEST_LABELS_FILENAME + ]: + generator_utils.maybe_download(directory, + _FASHION_MNIST_LOCAL_FILE_PREFIX + filename, + _FASHION_MNIST_URL + filename) + + +def fashion_mnist_generator(tmp_dir, training, how_many, start_from=0): + """Image generator for FashionMNIST. + + Args: + tmp_dir: path to temporary storage directory. + training: a Boolean; if true, we use the train set, otherwise the test set. + how_many: how many images and labels to generate. + start_from: from which image to start. + + Returns: + An instance of image_generator that produces MNIST images. + """ + _get_fashion_mnist(tmp_dir) + d = _FASHION_MNIST_LOCAL_FILE_PREFIX + ( + _MNIST_TRAIN_DATA_FILENAME if training else _MNIST_TEST_DATA_FILENAME) + l = _FASHION_MNIST_LOCAL_FILE_PREFIX + ( + _MNIST_TRAIN_LABELS_FILENAME if training else + _MNIST_TEST_LABELS_FILENAME) + return mnist_common_generator(tmp_dir, training, how_many, d, l, start_from) + + +@registry.register_problem +class ImageFashionMnist(Image2ClassProblem): + """Fashion MNIST.""" + + @property + def is_small(self): + return True + + @property + def num_classes(self): + return 10 + + @property + def class_labels(self): + return [str(c) for c in range(self.num_classes)] + + @property + def train_shards(self): + return 10 + + def generator(self, data_dir, tmp_dir, is_training): + if is_training: + return fashion_mnist_generator(tmp_dir, True, 60000) + else: + return fashion_mnist_generator(tmp_dir, False, 10000) + # URLs and filenames for CIFAR data. _CIFAR10_URL = "https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz" From 54b5806c433fd5adc35412e256572c9e83c4bc2e Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Wed, 13 Dec 2017 16:43:10 -0800 Subject: [PATCH 0657/4095] Add features from non-TPU codepath to TPU codepath (except multi-machine data_parallelism) PiperOrigin-RevId: 178977571 --- tensor2tensor/bin/t2t-tpu-trainer | 1 + tensor2tensor/data_generators/problem.py | 87 +++++++++++++----- tensor2tensor/layers/common_hparams.py | 3 + tensor2tensor/tpu/tpu_trainer.py | 1 + tensor2tensor/tpu/tpu_trainer_lib.py | 7 +- tensor2tensor/utils/data_reader.py | 23 +++-- tensor2tensor/utils/data_reader_test.py | 3 +- tensor2tensor/utils/input_fn_builder.py | 19 +--- tensor2tensor/utils/model_builder.py | 71 +------------- tensor2tensor/utils/optimize.py | 112 ++++++++++++++++++++++- tensor2tensor/utils/t2t_model.py | 44 ++++++--- 11 files changed, 237 insertions(+), 134 deletions(-) diff --git a/tensor2tensor/bin/t2t-tpu-trainer b/tensor2tensor/bin/t2t-tpu-trainer index 805ba1078..65891da7b 100644 --- a/tensor2tensor/bin/t2t-tpu-trainer +++ b/tensor2tensor/bin/t2t-tpu-trainer @@ -93,6 +93,7 @@ def create_run_config(): num_gpus=FLAGS.worker_gpu, gpu_order=FLAGS.gpu_order, shard_to_cpu=FLAGS.locally_shard_to_cpu, + num_async_replicas=FLAGS.worker_replicas, use_tpu=FLAGS.use_tpu) diff --git a/tensor2tensor/data_generators/problem.py b/tensor2tensor/data_generators/problem.py index 92af00342..6b12329ec 100644 --- a/tensor2tensor/data_generators/problem.py +++ b/tensor2tensor/data_generators/problem.py @@ -480,13 +480,14 @@ def feature_info(self): return features def make_estimator_input_fn(self, mode, hparams): + """Return input_fn wrapped for Estimator.""" def estimator_input_fn(params, config): - return self.input_pipeline(mode, hparams, params=params, config=config) + return self.input_fn(mode, hparams, params=params, config=config) return estimator_input_fn - def input_pipeline(self, mode, hparams, params=None, config=None): + def input_fn(self, mode, hparams, params=None, config=None): """Builds input pipeline for problem. Args: @@ -498,17 +499,24 @@ def input_pipeline(self, mode, hparams, params=None, config=None): Returns: (features_dict<str name, Tensor feature>, Tensor targets) """ - tf.logging.warning("Problem.input_pipeline implements a subset of " + tf.logging.warning("Problem.input_fn implements a subset of " "input_fn_builder.build_input_fn and is currently only " "used in tpu_trainer.") is_training = mode == tf.estimator.ModeKeys.TRAIN num_threads = 4 if is_training else 1 batch_size = _get_batch_size(params, hparams, config) - def valid_size(example): + def tpu_valid_size(example): return data_reader.example_valid_size(example, hparams.min_length, hparams.max_length) + def gpu_valid_size(example): + drop_long_sequences = is_training or hparams.eval_drop_long_sequences + return data_reader.example_valid_size( + example, + hparams.min_length, + hparams.max_length if drop_long_sequences else 10**9) + def define_shapes(example): """Set the right shapes for the features.""" inputs = example["inputs"] @@ -523,13 +531,14 @@ def define_shapes(example): example["inputs"] = inputs example["targets"] = targets - # Ensure batch size is set on all features - for _, t in six.iteritems(example): - shape = t.get_shape().as_list() - shape[0] = batch_size - t.set_shape(t.get_shape().merge_with(shape)) - # Assert shapes are fully known - t.get_shape().assert_is_fully_defined() + if config.use_tpu: + # Ensure batch size is set on all features + for _, t in six.iteritems(example): + shape = t.get_shape().as_list() + shape[0] = batch_size + t.set_shape(t.get_shape().merge_with(shape)) + # Assert shapes are fully known + t.get_shape().assert_is_fully_defined() return example @@ -542,24 +551,47 @@ def define_shapes(example): if is_training: dataset = dataset.repeat(None) - # Batch (and pad) - # TODO(rsepassi): Add support for bucketing by length + # Batching if _are_shapes_fully_defined(dataset.output_shapes): dataset = dataset.apply( tf.contrib.data.batch_and_drop_remainder(batch_size)) else: - # If shapes are not fully defined, filter out long ones and pad to - # hparams.max_length - dataset = dataset.filter(valid_size) - padded_shapes = _fill_shape_nones( - dataset.output_shapes, none_filler=hparams.max_length) - dataset = dataset.apply( - tf.contrib.data.padded_batch_and_drop_remainder(batch_size, - padded_shapes)) + # Variable length features + if config.use_tpu: + # On TPU, pad to hparams.max_length + dataset = dataset.filter(tpu_valid_size) + padded_shapes = _fill_shape_nones( + dataset.output_shapes, none_filler=hparams.max_length) + dataset = dataset.apply( + tf.contrib.data.padded_batch_and_drop_remainder(batch_size, + padded_shapes)) + else: + # On GPU, bucket by length + dataset = dataset.filter(gpu_valid_size) + batching_scheme = data_reader.hparams_to_batching_scheme( + hparams, + shard_multiplier=config.t2t_device_info["num_shards"], + length_multiplier=self.get_hparams().batch_size_multiplier) + dataset = data_reader.bucket_by_sequence_length( + dataset, + data_reader.example_length, + batching_scheme["boundaries"], + batching_scheme["batch_sizes"]) dataset = dataset.map(define_shapes, num_parallel_calls=num_threads) dataset = dataset.prefetch(1) features = dataset.make_one_shot_iterator().get_next() + if not config.use_tpu: + _summarize_features(features, config.t2t_device_info["num_shards"]) + + if mode == tf.estimator.ModeKeys.PREDICT: + features["infer_targets"] = features["targets"] + features["targets"] = None + # This is because of a bug in the Estimator that short-circuits prediction + # if it doesn't see a QueueRunner. DummyQueueRunner implements the + # minimal expected interface but does nothing. + tf.add_to_collection(tf.GraphKeys.QUEUE_RUNNERS, + data_reader.DummyQueueRunner()) return features, features["targets"] @@ -859,3 +891,16 @@ def _fill_shape_nones(shapes_dict, none_filler=None): (dim if dim is not None else none_filler) for dim in shape.as_list() ] return padded_shapes + + +def _summarize_features(features, num_shards=1): + with tf.name_scope("input_stats"): + for (k, v) in six.iteritems(features): + if isinstance(v, tf.Tensor) and v.get_shape().ndims > 1: + tf.summary.scalar("%s_batch" % k, tf.shape(v)[0] // num_shards) + tf.summary.scalar("%s_length" % k, tf.shape(v)[1]) + nonpadding = tf.to_float(tf.not_equal(v, 0)) + nonpadding_tokens = tf.reduce_sum(nonpadding) + tf.summary.scalar("%s_nonpadding_tokens" % k, nonpadding_tokens) + tf.summary.scalar("%s_nonpadding_fraction" % k, + tf.reduce_mean(nonpadding)) diff --git a/tensor2tensor/layers/common_hparams.py b/tensor2tensor/layers/common_hparams.py index 673ea1c83..4a38d98c3 100644 --- a/tensor2tensor/layers/common_hparams.py +++ b/tensor2tensor/layers/common_hparams.py @@ -116,6 +116,9 @@ def basic_params1(): # If set to True, drop sequences longer than max_length during eval. # This affects the validity of the evaluation metrics. eval_drop_long_sequences=False, + # If True, run the model autoregressively instead of teacher-forcing + # during eval + eval_run_autoregressive=False, # TODO(lukaszkaiser): these parameters should probably be set elsewhere. # in SymbolModality, share the output embeddings and the softmax # variables. diff --git a/tensor2tensor/tpu/tpu_trainer.py b/tensor2tensor/tpu/tpu_trainer.py index 193ecc3f2..203ddc9e3 100644 --- a/tensor2tensor/tpu/tpu_trainer.py +++ b/tensor2tensor/tpu/tpu_trainer.py @@ -92,6 +92,7 @@ def create_run_config(): num_gpus=FLAGS.worker_gpu, gpu_order=FLAGS.gpu_order, shard_to_cpu=FLAGS.locally_shard_to_cpu, + num_async_replicas=FLAGS.worker_replicas, use_tpu=FLAGS.use_tpu) diff --git a/tensor2tensor/tpu/tpu_trainer_lib.py b/tensor2tensor/tpu/tpu_trainer_lib.py index c1efc38e8..fa9947297 100644 --- a/tensor2tensor/tpu/tpu_trainer_lib.py +++ b/tensor2tensor/tpu/tpu_trainer_lib.py @@ -36,6 +36,7 @@ def create_run_config(master="", num_gpus=1, gpu_order="", shard_to_cpu=False, + num_async_replicas=1, use_tpu=True): """Create TPUConfig and tpu.RunConfig.""" session_config = tf.ConfigProto( @@ -61,12 +62,14 @@ def create_run_config(master="", config = run_config_cls(**run_config_args) # If not using TPU, add device info for data_parallelism + config.use_tpu = use_tpu if not use_tpu: config.t2t_device_info = { "num_gpus": num_gpus, "gpu_order": gpu_order, "shard_to_cpu": shard_to_cpu, - "num_shards": max(1, num_gpus + int(shard_to_cpu)) + "num_shards": max(1, num_gpus + int(shard_to_cpu)), + "num_async_replicas": num_async_replicas, } return config @@ -87,7 +90,7 @@ def create_estimator(model_name, if schedule == "train": # Estimator takes the presence of eval_batch_size as an indication that # an eval is being performed, and complains about num_shards being too - # big. So we have to eval_batch_size to None. + # big. So we have to set eval_batch_size to None. eval_batch_size = None return tf.contrib.tpu.TPUEstimator( model_fn=model_fn, diff --git a/tensor2tensor/utils/data_reader.py b/tensor2tensor/utils/data_reader.py index 2736a0c45..58a9f18a6 100644 --- a/tensor2tensor/utils/data_reader.py +++ b/tensor2tensor/utils/data_reader.py @@ -117,7 +117,7 @@ def input_pipeline(problem, dataset = dataset.shuffle(capacity) dataset = dataset.repeat(None) - bucket_id_fn = _example_length + bucket_id_fn = example_length if len(batching_scheme["boundaries"]) == 1: bucket_id_fn = lambda _: tf.constant(0) @@ -129,14 +129,13 @@ def input_pipeline(problem, bucket_id_fn, batching_scheme["boundaries"], batching_scheme["batch_sizes"], - batching_scheme["window_size"], padded_shapes=batching_scheme["padded_shapes"]) batched_examples = dataset.make_one_shot_iterator().get_next() return batched_examples -def _example_length(example): +def example_length(example): length = 0 # Length of the example is the maximum length of the feature lengths for v in example.values(): @@ -148,7 +147,7 @@ def _example_length(example): def example_valid_size(example, min_length, max_length): - length = _example_length(example) + length = example_length(example) return tf.logical_and( length >= min_length, length <= max_length, @@ -159,7 +158,6 @@ def bucket_by_sequence_length(dataset, example_length_fn, bucket_boundaries, bucket_batch_sizes, - window_size, padded_shapes=None): """Bucket entries in dataset by length. @@ -169,14 +167,12 @@ def bucket_by_sequence_length(dataset, the example, which will determine the bucket it goes into. bucket_boundaries: list<int>, boundaries of the buckets. bucket_batch_sizes: list<int>, batch size per bucket. - window_size: an integer divisible by all elements of bucket_batch_sizes padded_shapes: dict<feature name, list<int>>, optional, shapes of the features with None where feature should be padded to max in that dim. Returns: Dataset of padded and batched examples. """ - del window_size with tf.name_scope("bucket_by_seq_length"): def example_to_bucket_id(example): @@ -311,9 +307,7 @@ def _batching_scheme(batch_size, "min_length": min_length, "max_length": (max_length if drop_long_sequences else 10**9), "shuffle_queue_size": shuffle_queue_size, - "window_size": window_size, } - tf.logging.info("batching_scheme = %s" % ret) return ret @@ -386,3 +380,14 @@ def serving_input_fn(problem, hparams): return tf.estimator.export.ServingInputReceiver( features=features, receiver_tensors=example) + + +class DummyQueueRunner(object): + """Can stand-in for a QueueRunner but does nothing.""" + + def __init__(self): + pass + + def create_threads(self, sess, coord=None, daemon=False, start=False): + del sess, coord, daemon, start + return [] diff --git a/tensor2tensor/utils/data_reader_test.py b/tensor2tensor/utils/data_reader_test.py index c104c4bb7..3893386af 100644 --- a/tensor2tensor/utils/data_reader_test.py +++ b/tensor2tensor/utils/data_reader_test.py @@ -212,13 +212,12 @@ def example_len(ex): boundaries = [10, 20, 30] batch_sizes = [10, 8, 4, 2] - window_size = 40 dataset = self.problem.dataset(tf.estimator.ModeKeys.TRAIN, data_dir=self.data_dir, shuffle_files=False) dataset = data_reader.bucket_by_sequence_length( - dataset, example_len, boundaries, batch_sizes, window_size) + dataset, example_len, boundaries, batch_sizes) batch = dataset.make_one_shot_iterator().get_next() input_vals = [] diff --git a/tensor2tensor/utils/input_fn_builder.py b/tensor2tensor/utils/input_fn_builder.py index fc4a72405..f416b9d2b 100644 --- a/tensor2tensor/utils/input_fn_builder.py +++ b/tensor2tensor/utils/input_fn_builder.py @@ -146,9 +146,10 @@ def input_fn(): feature_map["targets"]._shape = tf.TensorShape([None, None, None, None]) # pylint: disable=protected-access # This is because of a bug in the Estimator that short-circuits prediction - # if it doesn't see a QueueRunner. DummyQueueRunner implements the + # if it doesn't see a QueueRunner. DummyQueueRunner implements the # minimal expected interface but does nothing. - tf.add_to_collection(tf.GraphKeys.QUEUE_RUNNERS, DummyQueueRunner()) + tf.add_to_collection(tf.GraphKeys.QUEUE_RUNNERS, + data_reader.DummyQueueRunner()) return feature_map, None return feature_map, feature_map["targets"] @@ -188,17 +189,6 @@ def cond_on_index(fn, index_tensor, max_idx, cur_idx=0): ) -class DummyQueueRunner(object): - """Can stand-in for a QueueRunner but does nothing.""" - - def __init__(self): - pass - - def create_threads(self, sess, coord=None, daemon=False, start=False): - del sess, coord, daemon, start - return [] - - def features_for_problem(problem_instance, p_hparams, hparams, @@ -223,8 +213,7 @@ def features_for_problem(problem_instance, # If batch_size is fixed, use a single input bucket batching_scheme["batch_sizes"] = [batch_size] batching_scheme["boundaries"] = [] - # Log new batching scheme if updated - tf.logging.info("Updated batching_scheme = %s", batching_scheme) + tf.logging.info("batching_scheme = %s" % batching_scheme) feature_map = data_reader.input_pipeline( problem_instance, data_dir, diff --git a/tensor2tensor/utils/model_builder.py b/tensor2tensor/utils/model_builder.py index 9a05dd16d..61ea55ca9 100644 --- a/tensor2tensor/utils/model_builder.py +++ b/tensor2tensor/utils/model_builder.py @@ -24,7 +24,6 @@ # Dependency imports -import numpy as np import six # pylint: disable=redefined-builtin from six.moves import xrange @@ -38,7 +37,6 @@ from tensor2tensor.utils import registry import tensorflow as tf -from tensorflow.python.framework import dtypes def model_fn(model, @@ -80,7 +78,8 @@ def model_fn(model, # TODO(rsepassi): This still depends on FLAGS. Rm eventually. dp = devices.data_parallelism(hparams) - tf.get_variable_scope().set_initializer(_get_variable_initializer(hparams)) + tf.get_variable_scope().set_initializer( + optimize.get_variable_initializer(hparams)) is_training = mode == tf.estimator.ModeKeys.TRAIN # Add input statistics for incoming features. @@ -243,30 +242,6 @@ def nth_model(n): tf.to_float(nth_steps) / (tf.to_float(global_step) + 1.0)) - # Add weight decay and noise. - total_size, weight_decay_loss = 0, 0.0 - all_weights = {v.name: v for v in tf.trainable_variables()} - for v_name in sorted(list(all_weights)): - v = all_weights[v_name] - v_size = int(np.prod(np.array(v.shape.as_list()))) - total_size += v_size - if hparams.weight_decay > 0.0 and len(v.shape.as_list()) > 1: - # Add weight regularization if set and the weight is not a bias (dim>1). - with tf.device(v._ref().device): # pylint: disable=protected-access - v_loss = tf.nn.l2_loss(v) / v_size - weight_decay_loss += v_loss - is_body = len(v_name) > 5 and v_name[:5] == "body/" - if hparams.weight_noise > 0.0 and is_body: - # Add weight noise if set in hparams. - with tf.device(v._ref().device): # pylint: disable=protected-access - scale = learning_rate * 0.001 - noise = tf.truncated_normal(v.shape) * hparams.weight_noise * scale - noise_op = v.assign_add(noise) - with tf.control_dependencies([noise_op]): - total_loss = tf.identity(total_loss) - if hparams.weight_decay > 0.0: - total_loss += weight_decay_loss * hparams.weight_decay - # The new data reader occasionally emits very small batches, which # cause the examples in those batches to be grossly overweighted. # We decrease the loss proportionally to the ratio of the size of this @@ -284,13 +259,6 @@ def nth_model(n): tf.summary.scalar("small_batch_multiplier", small_batch_multiplier) total_loss *= small_batch_multiplier - # Log variable sizes - _log_variable_sizes(tf.trainable_variables(), "Trainable Variables") - diet_vars = [ - v for v in tf.global_variables() if v.dtype == dtypes.float16_ref - ] - _log_variable_sizes(diet_vars, "Diet Variables") - # Optimize train_op = optimize.optimize(total_loss, learning_rate, hparams) @@ -336,41 +304,6 @@ def wrapping_model_fn(features, labels, mode, params): return wrapping_model_fn -def _log_variable_sizes(var_list, tag): - """Log the sizes and shapes of variables, and the total size. - - Args: - var_list: a list of varaibles - tag: a string - """ - name_to_var = {v.name: v for v in var_list} - total_size = 0 - for v_name in sorted(list(name_to_var)): - v = name_to_var[v_name] - v_size = int(np.prod(np.array(v.shape.as_list()))) - tf.logging.info("Weight %s\tshape %s\tsize %d", - v.name[:-2].ljust(80), - str(v.shape).ljust(20), v_size) - total_size += v_size - tf.logging.info("%s Total size: %d", tag, total_size) - - -def _get_variable_initializer(hparams): - if hparams.initializer == "orthogonal": - return tf.orthogonal_initializer(gain=hparams.initializer_gain) - elif hparams.initializer == "uniform": - max_val = 0.1 * hparams.initializer_gain - return tf.random_uniform_initializer(-max_val, max_val) - elif hparams.initializer == "normal_unit_scaling": - return tf.variance_scaling_initializer( - hparams.initializer_gain, mode="fan_avg", distribution="normal") - elif hparams.initializer == "uniform_unit_scaling": - return tf.variance_scaling_initializer( - hparams.initializer_gain, mode="fan_avg", distribution="uniform") - else: - raise ValueError("Unrecognized initializer: %s" % hparams.initializer) - - def _del_dict_nones(d): for k in list(d.keys()): if d[k] is None: diff --git a/tensor2tensor/utils/optimize.py b/tensor2tensor/utils/optimize.py index aaaeb0015..856b4e005 100644 --- a/tensor2tensor/utils/optimize.py +++ b/tensor2tensor/utils/optimize.py @@ -26,17 +26,24 @@ import tensorflow as tf +from tensorflow.python.framework import dtypes def optimize(loss, learning_rate, hparams, use_tpu=False): """Minimize loss.""" + loss = weight_decay_and_noise(loss, hparams, learning_rate) loss = tf.identity(loss, name="total_loss") + log_variable_sizes() + diet_vars = [ + v for v in tf.global_variables() if v.dtype == dtypes.float16_ref + ] + log_variable_sizes(diet_vars, "Diet Variables") opt = ConditionalOptimizer(hparams.optimizer, learning_rate, hparams) if use_tpu: opt = tf.contrib.tpu.CrossShardOptimizer(opt) - opt_summaries = ["learning_rate", "loss"] + opt_summaries = ["learning_rate", "loss", "gradient_norm"] if hparams.summarize_grads: - opt_summaries.extend(["gradients", "gradient_norm"]) + opt_summaries.extend(["gradients"]) train_op = tf.contrib.layers.optimize_loss( name="training", loss=loss, @@ -141,3 +148,104 @@ def learning_rate_decay(hparams, num_worker_replicas=1, num_train_steps=1): raise ValueError("Unrecognized learning rate decay scheme: %s" % hparams.learning_rate_decay_scheme) return tf.where(step < warmup_steps, inv_decay, decay) + + +def weight_decay_and_noise(loss, hparams, learning_rate, var_list=None): + """Apply weight decay and weight noise.""" + if var_list is None: + var_list = tf.trainable_variables() + + decay_vars = [v for v in var_list if len(v.shape.as_list()) > 1] + noise_vars = [v for v in var_list if "/body/" in v.name] + + weight_decay_loss = weight_decay(hparams.weight_decay, decay_vars) + tf.summary.scalar("weight_decay_loss", weight_decay_loss) + weight_noise_ops = weight_noise(hparams.weight_noise, learning_rate, + noise_vars) + + with tf.control_dependencies(weight_noise_ops): + loss = tf.identity(loss) + + loss += weight_decay_loss + return loss + + +def weight_noise(noise_rate, learning_rate, var_list): + """Apply weight noise to vars in var_list.""" + if not noise_rate: + return [tf.no_op()] + + noise_ops = [] + + for v in var_list: + with tf.device(v._ref().device): # pylint: disable=protected-access + scale = noise_rate * learning_rate * 0.001 + tf.summary.scalar("weight_noise_scale", scale) + noise = tf.truncated_normal(v.shape) * scale + noise_op = v.assign_add(noise) + noise_ops.append(noise_op) + + return noise_ops + + +def weight_decay(decay_rate, var_list): + """Apply weight decay to vars in var_list.""" + if not decay_rate: + return 0. + + weight_decays = [] + for v in var_list: + v_size = int(np.prod(np.array(v.shape.as_list()))) + + # Weight decay + is_bias = len(v.shape.as_list()) <= 1 + if not is_bias: + with tf.device(v._ref().device): # pylint: disable=protected-access + v_loss = tf.nn.l2_loss(v) / v_size + weight_decays.append(v_loss) + + return tf.add_n(weight_decays) * decay_rate + + +def log_variable_sizes(var_list=None, tag=None): + """Log the sizes and shapes of variables, and the total size. + + Args: + var_list: a list of varaibles; defaults to trainable_variables + tag: a string; defaults to "Trainable Variables" + """ + if var_list is None: + var_list = tf.trainable_variables() + if tag is None: + tag = "Trainable Variables" + + if not var_list: + return + + name_to_var = {v.name: v for v in var_list} + total_size = 0 + for v_name in sorted(list(name_to_var)): + v = name_to_var[v_name] + v_size = int(np.prod(np.array(v.shape.as_list()))) + tf.logging.info("Weight %s\tshape %s\tsize %d", + v.name[:-2].ljust(80), + str(v.shape).ljust(20), v_size) + total_size += v_size + tf.logging.info("%s Total size: %d", tag, total_size) + + +def get_variable_initializer(hparams): + """Get variable initializer from hparams.""" + if hparams.initializer == "orthogonal": + return tf.orthogonal_initializer(gain=hparams.initializer_gain) + elif hparams.initializer == "uniform": + max_val = 0.1 * hparams.initializer_gain + return tf.random_uniform_initializer(-max_val, max_val) + elif hparams.initializer == "normal_unit_scaling": + return tf.variance_scaling_initializer( + hparams.initializer_gain, mode="fan_avg", distribution="normal") + elif hparams.initializer == "uniform_unit_scaling": + return tf.variance_scaling_initializer( + hparams.initializer_gain, mode="fan_avg", distribution="uniform") + else: + raise ValueError("Unrecognized initializer: %s" % hparams.initializer) diff --git a/tensor2tensor/utils/t2t_model.py b/tensor2tensor/utils/t2t_model.py index 3fdbc6281..3d85a1e6a 100644 --- a/tensor2tensor/utils/t2t_model.py +++ b/tensor2tensor/utils/t2t_model.py @@ -20,6 +20,7 @@ import contextlib import copy +import math import time # Dependency imports @@ -691,6 +692,8 @@ def sampled_results(): return sharded_logits, losses def call(self, inputs_dict, skip=False, force_full_predict=False): + tf.get_variable_scope().set_initializer( + optimize.get_variable_initializer(self.hparams)) with self._var_store.as_default(): self._fill_problem_hparams_features(inputs_dict) sharded_logits, losses = self._model_fn( @@ -813,9 +816,8 @@ def estimator_model_fn(cls, problem = hparams.problem_instances[0] # Instantiate model - data_parallelism = ( - eu.Parallelism([""]) - if use_tpu else _create_data_parallelism(**config.t2t_device_info)) + data_parallelism = _create_data_parallelism( + use_tpu=use_tpu, **config.t2t_device_info) model = cls(hparams, mode, data_parallelism=data_parallelism) # PREDICT mode @@ -825,16 +827,19 @@ def estimator_model_fn(cls, return model.estimator_spec_predict(features, decode_hparams) # TRAIN and EVAL modes - logits, losses_dict = model(features) # pylint: disable=not-callable + if hparams.eval_run_autoregressive and mode == tf.estimator.ModeKeys.EVAL: + logits, losses_dict = model.eval_autoregressive(features) + else: + logits, losses_dict = model(features) # pylint: disable=not-callable # Set known shapes - # TODO(rsepassi): Add support for variable lengths and batch sizes - shape = logits.get_shape().as_list() - if shape[0] is None: - shape[0] = _get_batch_size(params, hparams, config) - if shape[1] is None: - shape[1] = hparams.max_length - logits.set_shape(shape) + if use_tpu: + shape = logits.get_shape().as_list() + if shape[0] is None: + shape[0] = _get_batch_size(params, hparams, config) + if shape[1] is None: + shape[1] = hparams.max_length + logits.set_shape(shape) # Accumulate losses assert "training" in losses_dict @@ -847,11 +852,15 @@ def estimator_model_fn(cls, # TRAIN mode assert mode == tf.estimator.ModeKeys.TRAIN - return model.estimator_spec_train(loss, use_tpu=use_tpu) + num_async_replicas = ( + 1 if use_tpu else config.t2t_device_info["num_async_replicas"]) + return model.estimator_spec_train( + loss, num_async_replicas=num_async_replicas, use_tpu=use_tpu) - def estimator_spec_train(self, loss, use_tpu=False): + def estimator_spec_train(self, loss, num_async_replicas=1, use_tpu=False): """Construct EstimatorSpec for TRAIN mode.""" lr = self.hparams.learning_rate * optimize.learning_rate_decay(self.hparams) + lr /= math.sqrt(float(num_async_replicas)) train_op = optimize.optimize(loss, lr, self.hparams, use_tpu=use_tpu) if use_tpu: @@ -981,8 +990,15 @@ def _get_batch_size(params, hparams, config): def _create_data_parallelism(num_gpus=1, gpu_order="", shard_to_cpu=False, - num_shards=1): + num_shards=1, + use_tpu=False, + **kwargs): """Create Parallelism object.""" + del kwargs + + if use_tpu: + return eu.Parallelism([""]) + gpus = list(range(num_gpus)) if gpu_order: gpus = [int(s) for s in gpu_order.split(" ")] From d6bdd00c71a5a14b6624e4dece55465e9e14957e Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Wed, 13 Dec 2017 19:23:11 -0800 Subject: [PATCH 0658/4095] Add empty t2t_device_info to config in TPU path PiperOrigin-RevId: 178992005 --- tensor2tensor/tpu/tpu_trainer_lib.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tensor2tensor/tpu/tpu_trainer_lib.py b/tensor2tensor/tpu/tpu_trainer_lib.py index fa9947297..01753488e 100644 --- a/tensor2tensor/tpu/tpu_trainer_lib.py +++ b/tensor2tensor/tpu/tpu_trainer_lib.py @@ -63,6 +63,7 @@ def create_run_config(master="", # If not using TPU, add device info for data_parallelism config.use_tpu = use_tpu + config.t2t_device_info = {} if not use_tpu: config.t2t_device_info = { "num_gpus": num_gpus, From b7ee0c7aa2a859710548bc5c55220f13b3d264d9 Mon Sep 17 00:00:00 2001 From: Lukasz Kaiser <lukaszkaiser@google.com> Date: Wed, 13 Dec 2017 19:32:57 -0800 Subject: [PATCH 0659/4095] Experiment more on VAE Transformer and latent model training. PiperOrigin-RevId: 178992456 --- tensor2tensor/models/transformer_vae.py | 75 ++++++++++++++++--------- 1 file changed, 49 insertions(+), 26 deletions(-) diff --git a/tensor2tensor/models/transformer_vae.py b/tensor2tensor/models/transformer_vae.py index be21fca1a..408b17941 100644 --- a/tensor2tensor/models/transformer_vae.py +++ b/tensor2tensor/models/transformer_vae.py @@ -55,7 +55,7 @@ def residual_conv(x, repeat, k, hparams, name, reuse=None): def decompress_step(source, hparams, first_relu, is_2d, name): """Decompression function.""" with tf.variable_scope(name): - shape = tf.shape(source) + shape = common_layers.shape_list(source) multiplier = 4 if is_2d else 2 kernel = (1, 1) if is_2d else (1, 1) thicker = common_layers.conv_block( @@ -77,7 +77,7 @@ def top_k_softmax(x, k): def top_k_experts(x, k, hparams): - x_shape = tf.shape(x) + x_shape = common_layers.shape_list(x) x_flat = tf.reshape(x, [-1, x.get_shape().as_list()[-1]]) is_training = hparams.mode == tf.contrib.learn.ModeKeys.TRAIN gates, load = expert_utils.noisy_top_k_gating( @@ -102,7 +102,7 @@ def dae(x, hparams, name): return m, m, 1.0 - tf.reduce_mean(kl) logsm = tf.nn.log_softmax(m) # Gumbel-softmax sample. - gumbel_samples = gumbel_sample(tf.shape(m)) + gumbel_samples = gumbel_sample(common_layers.shape_list(m)) steps = hparams.kl_warmup_steps gumbel_samples *= common_layers.inverse_exp_decay(steps // 5) * 0.5 temperature = 1.2 - common_layers.inverse_lin_decay(steps) @@ -125,7 +125,7 @@ def dae(x, hparams, name): d_dev = - tf.reduce_mean(d_variance) ret = s if hparams.mode != tf.contrib.learn.ModeKeys.TRAIN: - ret = tf.reshape(maxvhot, tf.shape(s)) # Just hot on eval/infer. + ret = tf.reshape(maxvhot, common_layers.shape_list(s)) # Just hot @eval. return m, ret, d_dev * 5.0 + tf.reduce_mean(kl) * 0.002 @@ -133,12 +133,12 @@ def vae(x, z_size, name): with tf.variable_scope(name): mu = tf.layers.dense(x, z_size, name="mu") log_sigma = tf.layers.dense(x, z_size, name="log_sigma") - shape = tf.shape(x) + shape = common_layers.shape_list(x) epsilon = tf.random_normal([shape[0], shape[1], 1, z_size]) z = mu + tf.exp(log_sigma / 2) * epsilon kl = 0.5 * tf.reduce_mean( tf.exp(log_sigma) + tf.square(mu) - 1. - log_sigma, axis=-1) - free_bits = z_size // 2 + free_bits = z_size // 4 kl_loss = tf.reduce_mean(tf.maximum(kl - free_bits, 0.0)) return z, kl_loss, mu, log_sigma @@ -178,7 +178,7 @@ def bit_to_int(x_bit, nbits): for i in range(nbits): x_labels.append(x_l[:, i] * 2**i) res = sum(x_labels) - return tf.to_int32(tf.reshape(res, tf.shape(x_bit)[:-1])) + return tf.to_int32(tf.reshape(res, common_layers.shape_list(x_bit)[:-1])) def int_to_bit(x_int, nbits): @@ -228,7 +228,8 @@ def embed(x): tf.summary.histogram("y_clean", tf.reshape(y_clean, [-1])) if hparams.noise_dev > 0 and hparams.mode == tf.estimator.ModeKeys.TRAIN: dev = hparams.noise_dev - noise = tf.truncated_normal(tf.shape(c), mean=0.0, stddev=dev) + noise = tf.truncated_normal(common_layers.shape_list(c), + mean=0.0, stddev=dev) y = common_layers.saturating_sigmoid(c + noise) else: y = y_clean @@ -237,8 +238,8 @@ def embed(x): pd = common_layers.inverse_exp_decay(hparams.startup_steps * 2) pd *= hparams.d_mix pd = pd if hparams.mode == tf.estimator.ModeKeys.TRAIN else 1.0 - c = tf.cond(tf.less(tf.random_uniform([]), pd), - lambda: y_discrete, lambda: y) + c = tf.where(tf.less(tf.random_uniform( + [common_layers.shape_list(y)[0]]), pd), y_discrete, y) h1a = tf.layers.dense(c, filter_size, name="vch1a") h1b = tf.layers.dense(1.0 - c, filter_size, name="vch1b") h1 = h1a + h1b @@ -313,7 +314,7 @@ def decode_transformer(encoder_output, def multinomial_sample(x, vocab_size, temperature): """Multinomial sampling from a n-dimensional tensor.""" samples = tf.multinomial(tf.reshape(x, [-1, vocab_size]) / temperature, 1) - reshaped_samples = tf.reshape(samples, tf.shape(x)[:-1]) + reshaped_samples = tf.reshape(samples, common_layers.shape_list(x)[:-1]) return tf.to_int32(reshaped_samples) @@ -348,7 +349,7 @@ def ae_transformer_internal(inputs, targets, target_space, hparams, # Prepare. orig_targets = targets - batch_size = tf.shape(orig_targets)[0] + batch_size = common_layers.shape_list(orig_targets)[0] targets = tf.reshape(targets, [batch_size, -1, 1, hparams.hidden_size]) # Encoder. @@ -373,9 +374,10 @@ def ae_transformer_internal(inputs, targets, target_space, hparams, tf.summary.histogram("bit0", tf.reshape(t_bit[:, 0, :], [-1])) pc = common_layers.inverse_exp_decay(hparams.startup_steps) * 0.95 pc = pc if hparams.mode == tf.estimator.ModeKeys.TRAIN else 1.0 - cond = tf.less(tf.random_uniform([]), pc) - t_c = tf.cond(cond, lambda: t_c, lambda: targets_c) - losses["extra"] = vc_loss * tf.to_float(cond) + cond = tf.less(tf.random_uniform([batch_size]), pc) + t_c = tf.where(cond, t_c, targets_c) + # TODO(lukaszkaiser): return extra losses batchwise, multiply before mean. + losses["extra"] = vc_loss * tf.reduce_mean(tf.to_float(cond)) # Extra loss predicting latent code from input. Discrete only. if hparams.bottleneck_kind not in ["dense", "vae"]: t_pred = decode_transformer( @@ -384,13 +386,27 @@ def ae_transformer_internal(inputs, targets, target_space, hparams, losses["latent_pred"] = tf.nn.sparse_softmax_cross_entropy_with_logits( labels=t_bit, logits=t_pred) losses["latent_pred"] = tf.reduce_mean( - losses["latent_pred"]) * 0.5 * tf.to_float(cond) + losses["latent_pred"] * 0.5 * tf.to_float(cond)) + else: + inputs_c = decode_transformer(inputs, ed, targets_c, hparams, "dec_c") + losses["latent_pred"] = tf.reduce_mean((inputs_c - targets_c)**2) * 20 + def bn_inputs(): + with tf.variable_scope(tf.get_variable_scope(), reuse=True): + bn, _, _, _ = bottleneck(inputs_c, hparams, 2*2048, "vc") + return bn + pbn = 0.8 if hparams.mode == tf.estimator.ModeKeys.TRAIN else 1.0 + inputs_c = tf.cond(tf.less(tf.random_uniform([]), pbn), + bn_inputs, lambda: inputs_c) + ptc = 1.0 - common_layers.inverse_lin_decay(200000) * 0.5 + ptc = ptc if hparams.mode == tf.estimator.ModeKeys.TRAIN else 1.0 + t_c = tf.where(tf.less(tf.random_uniform([batch_size]), ptc), + t_c, inputs_c) else: if hparams.bottleneck_kind in ["dense", "vae"]: - targets_rand = tf.random_uniform(tf.shape(targets_c)) - t_c, _, _, _ = bottleneck(targets_rand, hparams, 2*2048, "vc") + inputs_c = decode_transformer(inputs, ed, targets_c, hparams, "dec_c") + t_c, _, _, _ = bottleneck(inputs_c, hparams, 2*2048, "vc") else: - latent_len = tf.shape(targets_c)[1] + latent_len = common_layers.shape_list(targets_c)[1] _, _, _, embed = bottleneck(targets_c, hparams, 2*2048, "vc") t_c = tf.zeros_like(targets_c[:, :latent_len, :, :]) if cache is None: @@ -402,7 +418,7 @@ def ae_transformer_internal(inputs, targets, target_space, hparams, # Postprocess. d = t_c pos = tf.get_variable("pos", [1, 1000, 1, hparams.hidden_size]) - pos = pos[:, :tf.shape(t_c)[1] + 1, :, :] + pos = pos[:, :common_layers.shape_list(t_c)[1] + 1, :, :] t_c = tf.pad(t_c, [[0, 0], [1, 0], [0, 0], [0, 0]]) + pos # Masking. @@ -414,7 +430,8 @@ def ae_transformer_internal(inputs, targets, target_space, hparams, masking = tf.minimum(tf.maximum(masking, 0.0), 1.0) if hparams.mode == tf.estimator.ModeKeys.PREDICT: masking = predict_mask - mask = tf.less(masking, tf.random_uniform(tf.shape(targets)[:-1])) + mask = tf.less(masking, tf.random_uniform( + common_layers.shape_list(targets)[:-1])) mask = tf.expand_dims(tf.to_float(mask), 3) for i in xrange(hparams.num_compress_steps): j = hparams.num_compress_steps - i - 1 @@ -425,12 +442,18 @@ def ae_transformer_internal(inputs, targets, target_space, hparams, res = decode_transformer(inputs, ed, targets, hparams, "decoder") if hparams.do_ae: - res = res[:, tf.shape(t_c)[1]:, :, :] + res = res[:, common_layers.shape_list(t_c)[1]:, :, :] if hparams.do_mask and hparams.do_refine: def refine_res(): return residual_conv(res, 1, (5, 1), hparams, "refine") all_masked = tf.less(tf.reduce_sum(mask), 0.1) res = tf.cond(all_masked, refine_res, lambda: res) + latent_time = tf.less(200000, tf.to_int32(tf.train.get_global_step())) + losses["latent_pred"] *= tf.to_float(latent_time) + losses["extra"] *= 1.0 - tf.to_float(latent_time) + res = tf.cond(latent_time, + lambda: tf.stop_gradient(0.7 * res) + 0.3 * res, + lambda: res) return res, losses, cache @@ -491,8 +514,8 @@ def infer(self, features=None, decode_length=50, beam_size=1, top_beams=1, if "partial_targets" in features: initial_output = tf.convert_to_tensor(features["partial_targets"]) else: - batch_size = tf.shape(features["inputs"])[0] - length = tf.shape(features["inputs"])[1] + batch_size = common_layers.shape_list(features["inputs"])[0] + length = common_layers.shape_list(features["inputs"])[1] target_length = tf.to_int32(2.0 * tf.to_float(length)) initial_output = tf.zeros((batch_size, target_length, 1, 1), dtype=tf.int64) @@ -517,13 +540,13 @@ def transformer_ae_small(): hparams.filter_size = 2048 hparams.label_smoothing = 0.0 hparams.add_hparam("z_size", 16) - hparams.add_hparam("noise_dev", 1.0) + hparams.add_hparam("noise_dev", 0.0) hparams.add_hparam("d_mix", 0.5) # Bottleneck kinds supported: dense, vae, semhash, gumbel-softmax, vq-vae. hparams.add_hparam("bottleneck_kind", "semhash") hparams.add_hparam("do_ae", True) hparams.add_hparam("do_mask", True) - hparams.add_hparam("do_refine", True) + hparams.add_hparam("do_refine", False) hparams.add_hparam("drop_inputs", False) hparams.add_hparam("v_size", 1024*64) hparams.add_hparam("max_context_length", 64) From 93a06269e647e4ada84f93c6f2e1bce4fe2d6d57 Mon Sep 17 00:00:00 2001 From: Noam Shazeer <noam@google.com> Date: Thu, 14 Dec 2017 09:35:32 -0800 Subject: [PATCH 0660/4095] first version of super_lm - supercomputer version of attention language model. PiperOrigin-RevId: 179054487 --- tensor2tensor/layers/common_attention.py | 2 + tensor2tensor/layers/common_hparams.py | 2 + tensor2tensor/layers/common_layers.py | 141 +++++++++++ tensor2tensor/layers/modalities.py | 15 +- tensor2tensor/layers/modalities_test.py | 39 +-- tensor2tensor/models/__init__.py | 1 + tensor2tensor/models/super_lm.py | 301 +++++++++++++++++++++++ tensor2tensor/utils/devices.py | 3 +- tensor2tensor/utils/t2t_model.py | 25 +- 9 files changed, 488 insertions(+), 41 deletions(-) create mode 100644 tensor2tensor/models/super_lm.py diff --git a/tensor2tensor/layers/common_attention.py b/tensor2tensor/layers/common_attention.py index 304cb49be..8f17bf734 100644 --- a/tensor2tensor/layers/common_attention.py +++ b/tensor2tensor/layers/common_attention.py @@ -3449,6 +3449,8 @@ def scaled_dot_product_attention_simple(q, k, v, bias, name=None): if bias is not None: logits += bias weights = tf.nn.softmax(logits, name="attention_weights") + tf.summary.image( + "attention", tf.expand_dims(tf.pow(weights, 0.2), 3), max_outputs=1) return tf.matmul(weights, v) diff --git a/tensor2tensor/layers/common_hparams.py b/tensor2tensor/layers/common_hparams.py index 4a38d98c3..9f4a34bed 100644 --- a/tensor2tensor/layers/common_hparams.py +++ b/tensor2tensor/layers/common_hparams.py @@ -190,6 +190,8 @@ def basic_params1(): # Set by tpu_trainer to let the model know whether we are on TPU. # Switching on/off tpu should not invalidate checkpoints. use_tpu=False, + # Set this for pure model parallelism. There is only one data shard. + no_data_parallelism=False, ) diff --git a/tensor2tensor/layers/common_layers.py b/tensor2tensor/layers/common_layers.py index a4f573d03..640730864 100644 --- a/tensor2tensor/layers/common_layers.py +++ b/tensor2tensor/layers/common_layers.py @@ -240,6 +240,15 @@ def shift_right_3d(x, pad_value=None): return shifted_targets +def shift_right_2d(x, pad_value=None): + """Shift the second dimension of x right by one.""" + if pad_value is None: + shifted_targets = tf.pad(x, [[0, 0], [1, 0]])[:, :-1] + else: + shifted_targets = tf.concat([pad_value, x], axis=1)[:, :-1] + return shifted_targets + + def conv_stride2_multistep(x, nbr_steps, output_filters, name=None, reuse=None): """Use a strided convolution to downsample x by 2, `nbr_steps` times. @@ -2343,3 +2352,135 @@ def ones_matrix_band_part(rows, cols, num_lower, num_upper, out_shape=None): band = tf.reshape(band, out_shape) return band + + +def reshape_like_all_dims(a, b): + """Reshapes a to match the shape of b.""" + ret = tf.reshape(a, tf.shape(b)) + if not tfe_context.in_eager_mode(): + ret.set_shape(b.get_shape()) + return ret + + +def reduce_by_device(parallelism, data, reduce_fn): + """Reduces data per device. + + This can be useful, for example, if we want to all-reduce n tensors on k<n + devices (like during eval when we have only one device). We call + reduce_by_device() to first sum the tensors per device, then call our usual + all-reduce operation to create one sum per device, followed by + expand_by_device, to create the appropriate number of pointers to these + results. See all_reduce_ring() below for an example of how this is used. + + Args: + parallelism: a expert_utils.Parallelism object + data: a list of Tensors with length parallelism.n + reduce_fn: a function taking a list of Tensors. e.g. tf.add_n + + Returns: + device_parallelism: a Parallelism object with each device listed only once. + reduced_data: A list of Tensors, one per device. + """ + unique_devices = [] + device_to_data = {} + for dev, datum in zip(parallelism.devices, data): + if dev not in device_to_data: + unique_devices.append(dev) + device_to_data[dev] = [datum] + else: + device_to_data[dev].append(datum) + device_parallelism = eu.Parallelism(unique_devices) + grouped_data = [device_to_data[dev] for dev in unique_devices] + return device_parallelism, device_parallelism(reduce_fn, grouped_data) + + +def expand_by_device(original_parallelism, device_parallelism, data): + """Opposite of reduce_by_device(). + + Args: + original_parallelism: a expert_utils.Parallelism object. + device_parallelism: a expert_utils.Parallelism object. + data: a list of tensors with length device_parallelism.n + + Returns: + a list of Tensors with length original_parallelism.n + """ + device_to_datum = { + device_parallelism.devices[i]: data[i] + for i in xrange(device_parallelism.n)} + return [device_to_datum[d] for d in original_parallelism.devices] + + +def all_reduce_ring(x, parallelism, maybe_reduce=True, use_bfloat16=True): + """Compute the sum of all Tensors and put the result everywhere. + + Assumes that the devices are connected in a ring. + + Args: + x: a list of Tensors with length parallelism.n + parallelism: a expert_utils.Parallelism object. + maybe_reduce: a boolean - first reduce per device. + use_bfloat16: a boolean - saves bandwidth but loses precision + + Returns: + a list of Tensors with length parallelism.n + """ + if parallelism.n == 1: + return x + + if maybe_reduce: + original_parallelism = parallelism + parallelism, x = reduce_by_device(parallelism, x, tf.add_n) + + if parallelism.n == 1: + y = x + else: + # first shard the input: + x_flat = parallelism(tf.reshape, x, [[-1]] * parallelism.n) + # [device, shard] + x_split = parallelism(approximate_split, x_flat, parallelism.n, 0) + def _step(source_replica, target_replica, x_split, op="plus_eq"): + """Helper function - one step of summing or copying. + + If op == "plus_eq", then adds source_replica into target_replica + If op == "copy", then copies source_replica onto target_replica + + These operations happen for all shards. The replica numbers are offset + by the shard numbers to keep all physical links busy. + + Args: + source_replica: an integer + target_replica: an integer + x_split: a list of lists of tensors + op: a string + """ + for shard in xrange(parallelism.n): + source_device = (shard + source_replica) % parallelism.n + target_device = (shard + target_replica) % parallelism.n + source = x_split[source_device][shard] + if use_bfloat16: + with tf.device(parallelism.devices[source_device]): + source = tf.to_bfloat16(source) + with tf.device(parallelism.devices[target_device]): + source = tf.to_float(source) + if op == "plus_eq": + x_split[target_device][shard] += source + else: + assert op == "copy" + x_split[target_device][shard] = tf.identity(source) + center = parallelism.n // 2 + # accumulate everything towards the center. + for i in range(center, parallelism.n - 1)[::-1]: + _step(i + 1, i, x_split, op="plus_eq") + for i in xrange(center): + _step(i, i + 1, x_split, op="plus_eq") + # copy everything away from the center. + for i in xrange(center, parallelism.n - 1): + _step(i, i + 1, x_split, op="copy") + for i in range(center)[::-1]: + _step(i + 1, i, x_split, op="copy") + x_concat = parallelism(tf.concat, x_split, 0) + y = parallelism(reshape_like_all_dims, x_concat, x) + if maybe_reduce: + y = expand_by_device(original_parallelism, parallelism, y) + return y diff --git a/tensor2tensor/layers/modalities.py b/tensor2tensor/layers/modalities.py index ddef5e67f..9d00c5116 100644 --- a/tensor2tensor/layers/modalities.py +++ b/tensor2tensor/layers/modalities.py @@ -433,7 +433,6 @@ def top(self, body_output, _): @registry.register_generic_modality("default") @registry.register_audio_modality("identity") @registry.register_image_modality("identity") -@registry.register_symbol_modality("identity") @registry.register_class_label_modality("identity") @registry.register_real_modality("identity") class IdentityModality(modality.Modality): @@ -502,3 +501,17 @@ class IdentityZeroLossModality(IdentityModality): def loss(self, top_out, targets): return tf.constant(0., tf.float32), tf.constant(0., tf.float32) + + +@registry.register_symbol_modality("identity") +class IdentitySymbolModality(SymbolModality): + """Symbol modality with identity top and bottom transformations. + + Uses the weights_fn from SymbolModality so that loss/metrics ignore padding. + """ + + def bottom(self, x): + return tf.to_float(x) + + def top(self, body_output, _): + return body_output diff --git a/tensor2tensor/layers/modalities_test.py b/tensor2tensor/layers/modalities_test.py index f1bcd87c3..87fd2b7e5 100644 --- a/tensor2tensor/layers/modalities_test.py +++ b/tensor2tensor/layers/modalities_test.py @@ -22,6 +22,7 @@ import numpy as np +from tensor2tensor.layers import common_hparams from tensor2tensor.layers import modalities from tensor2tensor.utils import expert_utils @@ -36,14 +37,9 @@ def testSymbolModalityInputs(self): length = 5 vocab_size = 5000 hidden_size = 9 - model_hparams = tf.contrib.training.HParams( - symbol_modality_num_shards=4, - hidden_size=hidden_size, - multiply_embedding_mode="sqrt_depth", - symbol_modality_skip_top=0, - shared_embedding_and_softmax_weights=0, - prepend_mode="none", - use_tpu=False) + model_hparams = common_hparams.basic_params1() + model_hparams.hidden_size = hidden_size + model_hparams.mode = tf.estimator.ModeKeys.TRAIN x = -1 + np.random.random_integers( vocab_size, size=(batch_size, length, 1, 1)) m = modalities.SymbolModality(model_hparams, vocab_size) @@ -64,16 +60,9 @@ def testSymbolModalityTargets(self): height = 7 hidden_size = 9 vocab_size = 11 - model_hparams = tf.contrib.training.HParams( - symbol_modality_num_shards=4, - hidden_size=hidden_size, - label_smoothing=0.2, - symbol_modality_skip_top=0, - shared_embedding_and_softmax_weights=0, - factored_logits=0, - mode=tf.estimator.ModeKeys.TRAIN, - prepend_mode="none", - use_tpu=False) + model_hparams = common_hparams.basic_params1() + model_hparams.hidden_size = hidden_size + model_hparams.mode = tf.estimator.ModeKeys.TRAIN body_output = -1 + np.random.random_integers( 100, size=(batch_size, length, height, hidden_size)) targets = -1 + np.random.random_integers( @@ -101,16 +90,10 @@ def testSymbolModalityTargetsFactored(self): height = 7 hidden_size = 9 vocab_size = 11 - model_hparams = tf.contrib.training.HParams( - symbol_modality_num_shards=4, - hidden_size=hidden_size, - label_smoothing=0.2, - symbol_modality_skip_top=0, - shared_embedding_and_softmax_weights=0, - factored_logits=1, - mode=tf.estimator.ModeKeys.TRAIN, - prepend_mode="none", - use_tpu=False) + model_hparams = common_hparams.basic_params1() + model_hparams.factored_logits = True + model_hparams.hidden_size = hidden_size + model_hparams.mode = tf.estimator.ModeKeys.TRAIN body_output = -1 + np.random.random_integers( 100, size=(batch_size, length, height, hidden_size)) targets = -1 + np.random.random_integers( diff --git a/tensor2tensor/models/__init__.py b/tensor2tensor/models/__init__.py index c067711be..19a8d9735 100644 --- a/tensor2tensor/models/__init__.py +++ b/tensor2tensor/models/__init__.py @@ -36,6 +36,7 @@ from tensor2tensor.models import resnet from tensor2tensor.models import shake_shake from tensor2tensor.models import slicenet +from tensor2tensor.models import super_lm from tensor2tensor.models import transformer from tensor2tensor.models import transformer_moe from tensor2tensor.models import transformer_revnet diff --git a/tensor2tensor/models/super_lm.py b/tensor2tensor/models/super_lm.py new file mode 100644 index 000000000..ab9287b35 --- /dev/null +++ b/tensor2tensor/models/super_lm.py @@ -0,0 +1,301 @@ +# coding=utf-8 +# Copyright 2017 The Tensor2Tensor Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Supercomputer-based language model. + +Uses model-parallelism. + +Each shard (device) has a similar structure with different weights. +Occasional cross-replica-sum across shards. + +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +# Dependency imports + +from six.moves import xrange # pylint: disable=redefined-builtin + +from tensor2tensor.layers import common_attention +from tensor2tensor.layers import common_hparams +from tensor2tensor.layers import common_layers +from tensor2tensor.utils import expert_utils +from tensor2tensor.utils import registry +from tensor2tensor.utils import t2t_model + +import tensorflow as tf + +ModeKeys = tf.estimator.ModeKeys # pylint: disable=invalid-name + + +def _embedding(inputs, vocab_size, dense_size): + embedding_var = tf.get_variable("embedding", [vocab_size, dense_size]) + emb_x = tf.gather(embedding_var, tf.to_int32(inputs)) + emb_x *= dense_size ** 0.5 + return emb_x + + +@registry.register_model +class SuperLM(t2t_model.T2TModel): + """Attention net. See file docstring.""" + + def model_fn_body(self, features): + # Remove dropout if not training + hparams = self._hparams + ps_devices = self._ps_devices + assert hparams.num_model_shards % len(ps_devices) == 0 + shards_per_device = hparams.num_model_shards // len(ps_devices) + model_devices = [ps_devices[i // shards_per_device] + for i in xrange(hparams.num_model_shards)] + print("model_devices = %s" % model_devices) + mp = expert_utils.Parallelism(model_devices, reuse=False) + vocab_size = self._problem_hparams.vocabulary["targets"].vocab_size + # squeeze out channels, heights + targets = features["targets_raw"] + targets = tf.squeeze(targets, 3) + targets = tf.squeeze(targets, 2) + shifted_targets = common_layers.shift_right_2d(targets) + # Bypass the symbol modality and use a different embedding on each shard. + decoder_input = mp( + _embedding, shifted_targets, vocab_size, hparams.hidden_size) + decoder_self_attention_bias = mp( + common_attention.attention_bias_lower_triangle, + tf.shape(targets)[1]) + if "targets_segmentation" in features: + # "Packed" dataset - keep the examples from seeing each other. + targets_segmentation = features["targets_segmentation"] + targets_position = features["targets_position"] + decoder_self_attention_bias = mp( + tf.add, decoder_self_attention_bias, + mp(common_attention.attention_bias_same_segment, + targets_segmentation, targets_segmentation)) + else: + targets_position = None + + if hparams.pos == "timing": + if targets_position is None: + decoder_input = mp(common_attention.add_timing_signal_1d, decoder_input) + else: + decoder_input = mp( + common_attention.add_timing_signal_1d_given_position, + decoder_input, targets_position) + + decoder_input = mp( + tf.nn.dropout, decoder_input, + 1.0 - hparams.layer_prepostprocess_dropout) + decoder_output = _super_stack( + decoder_input, decoder_self_attention_bias, hparams, mp) + # Bypass the symbol modality and compute logits directly. + # We compute a different set of logits on each shard, and sum them. + logits = mp(tf.layers.dense, decoder_output, vocab_size, name="logits") + logits = common_layers.all_reduce_ring(logits, mp) + logits = mp(tf.multiply, logits, mp.n ** -0.5) + # We now have identical logits on all shards. + # Shard 0 gets returned to the estimator. + logits_shard_0 = logits[0] + logits_shard_0 = tf.expand_dims(logits_shard_0, 2) + logits_shard_0 = tf.expand_dims(logits_shard_0, 3) + # On each device, we compute the loss for a part of the batch. + # This is faster than computing the whole loss on one shard. + mp, logits = common_layers.reduce_by_device(mp, logits, lambda(l): l[0]) + def _loss_for_shard(logits, targets, shard): + if mp.n > 1: + logits = common_layers.approximate_split(logits, mp.n, 0)[shard] + targets = common_layers.approximate_split(targets, mp.n, 0)[shard] + return common_layers.padded_cross_entropy( + logits, targets, hparams.label_smoothing) + num, denom = mp(_loss_for_shard, logits, targets, range(mp.n)) + # override training loss so that it is not computed externally. + losses = {"training": tf.add_n(num) / tf.add_n(denom)} + return logits_shard_0, losses + + +def _super_stack(inputs, + attention_bias, + hparams, + mp, + padding="LEFT"): + """A stack of super_lm layers. + + Args: + inputs: a list of Tensors + attention_bias: list of bias Tensor for self-attention + (see common_attention.attention_bias()) + hparams: hyperparameters for model + mp: a Parallelism object + padding: a string + + Returns: + y: a Tensors + """ + layers = hparams.layers.strip(",").split(",") + ffn_hidden_sizes = [int(s) for s in hparams.ffn_hidden_sizes.split(",")] + # scaled_dot_product_attention_with_projections uses a 3d attention bias + # (no heads), where multihead_attention uses 4d attention bias. + mix_size = int(hparams.mix_fraction * hparams.hidden_size) + attention_bias_3d = mp(tf.squeeze, attention_bias, 1) + accumulator = inputs + x = inputs + for layer_num, layer_type in enumerate(layers): + with tf.variable_scope("%s_%d" % (layer_type, layer_num)): + tf.logging.info("%s_%d" % (layer_type, layer_num)) + if layer_type == "a": + # accumulate + accumulator = mp(tf.add, x, accumulator) + x = accumulator + elif layer_type == "n": + # normalize + x = mp(common_layers.apply_norm, + x, hparams.norm_type, hparams.hidden_size, hparams.norm_epsilon) + elif layer_type == "d": + # dropout + x = mp(tf.nn.dropout, x, 1.0 - hparams.layer_prepostprocess_dropout) + elif layer_type == "m": + # mix across shards + def _split(t): + return tuple(tf.split( + t, [mix_size, hparams.hidden_size - mix_size], 2)) + to_mix, to_keep = mp(_split, x) + mixed = common_layers.all_reduce_ring(to_mix, mp) + mixed = mp(tf.multiply, mixed, mp.n ** -0.5) + x = mp(lambda a, b: tf.concat([a, b], 2), mixed, to_keep) + elif layer_type == "att": + # single-head attention + q = mp(tf.layers.dense, x, hparams.hidden_size, use_bias=False, + name="q_transform") + x = mp( + common_attention.scaled_dot_product_attention_simple, + q, x, x, attention_bias_3d) + x = mp(tf.layers.dense, x, hparams.hidden_size, use_bias=False, + name="o_transform") + elif layer_type == "multihead-att": + # multi-head attention + x = mp( + common_attention.multihead_attention, + x, + None, + attention_bias, # bias + hparams.attention_key_channels or hparams.hidden_size, + hparams.attention_value_channels or hparams.hidden_size, + hparams.hidden_size, + hparams.num_heads, + hparams.attention_dropout) + elif layer_type == "ffn": + y = mp( + expert_utils.ffn_expert_fn( + hparams.hidden_size, ffn_hidden_sizes, hparams.hidden_size), + mp(expert_utils.flatten_all_but_last, x)) + x = mp(expert_utils.reshape_like, y, x) + elif layer_type == "conv": + # convolution + x = mp( + common_layers.conv1d, + x, + hparams.hidden_size, + hparams.kernel_height, + activation=tf.nn.relu, + padding=padding, + ) + else: + assert False, "unknown sublayer %s" % layer_type + return x + + +@registry.register_hparams +def super_lm_base(): + """Set of hyperparameters.""" + hparams = common_hparams.basic_params1() + hparams.hidden_size = 512 + hparams.batch_size = 16384 + hparams.max_length = 256 + hparams.layer_prepostprocess_dropout = 0.0 + hparams.label_smoothing = 0.0 + hparams.clip_grad_norm = 0. # i.e. no gradient clipping + hparams.optimizer_adam_epsilon = 1e-9 + hparams.learning_rate_decay_scheme = "noam" + hparams.learning_rate = 0.1 + hparams.learning_rate_warmup_steps = 8000 + hparams.initializer_gain = 1.0 + hparams.initializer = "uniform_unit_scaling" + hparams.weight_decay = 0.0 + hparams.optimizer_adam_beta1 = 0.9 + hparams.optimizer_adam_beta2 = 0.98 + hparams.shared_embedding_and_softmax_weights = False + hparams.layer_preprocess_sequence = "n" + hparams.layer_postprocess_sequence = "da" + # we only want one data shard. + hparams.no_data_parallelism = True + # bypass the symbol modality so that we can use model parallelism. + hparams.target_modality = "symbol:identity" + hparams.add_hparam("ffn_hidden_sizes", "512") # Add new ones like this. + hparams.add_hparam("mix_fraction", 0.5) + # attention-related flags + hparams.add_hparam("num_heads", 8) + hparams.add_hparam("attention_key_channels", 0) + hparams.add_hparam("attention_value_channels", 0) + # All hyperparameters ending in "dropout" are automatically set to 0.0 + # when not in training mode. + hparams.add_hparam("attention_dropout", 0.0) + hparams.add_hparam("pos", "timing") # timing, none + hparams.add_hparam( + "layers", ("n,att,m,d,a," "n,ffn,m,d,a,") * 4 + "n,ffn,d") + # Number of model shards - each one has separate parameters. + # Changing this number invalidates checkpoints. + hparams.add_hparam("num_model_shards", 8) + return hparams + + +@registry.register_hparams +def super_lm_conv(): + """Add some convolutions.""" + hparams = super_lm_base() + hparams.layers = ( + ("n,conv,m,d,a," "n,att,m,d,a," "n,ffn,m,d,a,") * 4 + "n,ffn,d") + return hparams + + +@registry.register_hparams +def super_lm_big(): + """Big model.""" + hparams = super_lm_base() + hparams.hidden_size = 1024 + hparams.ffn_hidden_sizes = "2048" + return hparams + + +@registry.register_hparams +def super_lm_low_mix(): + """Less mixuing.""" + hparams = super_lm_base() + hparams.mix_fraction = 0.125 + return hparams + + +@registry.register_hparams +def super_lm_high_mix(): + """More mixing.""" + hparams = super_lm_base() + hparams.mix_fraction = 0.875 + return hparams + + +@registry.register_hparams +def super_lm_b8k(): + """Smaller batch.""" + hparams = super_lm_base() + hparams.batch_size = 8192 + return hparams diff --git a/tensor2tensor/utils/devices.py b/tensor2tensor/utils/devices.py index 490366cab..78d6503e9 100644 --- a/tensor2tensor/utils/devices.py +++ b/tensor2tensor/utils/devices.py @@ -101,7 +101,8 @@ def data_parallelism(hparams, all_workers=False): Returns: a expert_utils.Parallelism. """ - + if hparams.no_data_parallelism: + return eu.Parallelism([""]) def _replica_device_setter(worker_device): if FLAGS.ps_replicas == 0: return worker_device diff --git a/tensor2tensor/utils/t2t_model.py b/tensor2tensor/utils/t2t_model.py index 3d85a1e6a..8ba49c630 100644 --- a/tensor2tensor/utils/t2t_model.py +++ b/tensor2tensor/utils/t2t_model.py @@ -615,10 +615,11 @@ def _model_fn(self, features, skip=False, force_full_predict=False): if not last_only: sharded_logits = target_modality.top_sharded( body_outputs, sharded_features["targets"], dp) - training_loss = target_modality.loss_sharded( - sharded_logits, sharded_features["targets"], dp) - - training_loss *= self._problem_hparams.loss_multiplier + if "training" not in losses: + losses["training"] = ( + target_modality.loss_sharded( + sharded_logits, sharded_features["targets"], dp) + * self._problem_hparams.loss_multiplier) else: # Take body outputs for the last position only, and targets too. last_position_body_outputs = [ @@ -632,8 +633,7 @@ def _model_fn(self, features, skip=False, force_full_predict=False): sharded_logits = target_modality.top_sharded(last_position_body_outputs, last_position_targets, self._data_parallelism) - training_loss = None - losses["training"] = training_loss + losses["training"] = None # Scheduled sampling. do_scheduled_sampling = ( # Only do it if training and set for it. @@ -672,10 +672,11 @@ def sampled_results(): with tf.variable_scope(target_modality.name): new_sharded_logits = target_modality.top_sharded( body_outputs, sharded_features["targets"], dp) - training_loss = target_modality.loss_sharded( - sharded_logits, sharded_features["targets"], dp) - training_loss *= self._problem_hparams.loss_multiplier - losses["training"] = training_loss + if "training" not in losses: + losses["training"] = ( + target_modality.loss_sharded( + sharded_logits, sharded_features["targets"], dp) + * self._problem_hparams.loss_multiplier) return new_sharded_logits, losses # Run the above conditionally. @@ -752,7 +753,9 @@ def model_fn_body(self, features): Returns: output: tensor of logits with shape [batch_size, O, P, body_output_size. losses: either single loss as a scalar, a list, a tensor (to be averaged) - or a dictionary of losses. + or a dictionary of losses. If the dictionary contains the key + "training", this is interpreted as an override of the modality's + loss computation. """ raise NotImplementedError("Abstract Method") From 1abca843a3ab412740e434abb3b7d7d96fa2d10e Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Thu, 14 Dec 2017 13:48:20 -0800 Subject: [PATCH 0661/4095] T2TModel.{bottom, body, top, loss}, simplifying overall model construction PiperOrigin-RevId: 179090517 --- docs/example_life.md | 4 +- tensor2tensor/bin/t2t-tpu-trainer | 3 +- tensor2tensor/bin/t2t-trainer | 2 +- tensor2tensor/bin/t2t_trainer.py | 2 +- tensor2tensor/data_generators/problem.py | 36 +- tensor2tensor/layers/common_hparams.py | 12 +- tensor2tensor/models/aligned.py | 6 +- tensor2tensor/models/attention_lm.py | 2 +- tensor2tensor/models/attention_lm_moe.py | 6 +- tensor2tensor/models/bluenet.py | 2 +- tensor2tensor/models/bytenet.py | 2 +- tensor2tensor/models/cycle_gan.py | 2 +- tensor2tensor/models/gene_expression.py | 2 +- tensor2tensor/models/lstm.py | 4 +- tensor2tensor/models/multimodel.py | 6 +- tensor2tensor/models/neural_gpu.py | 4 +- tensor2tensor/models/resnet.py | 2 +- tensor2tensor/models/shake_shake.py | 2 +- tensor2tensor/models/slicenet.py | 2 +- tensor2tensor/models/super_lm.py | 2 +- tensor2tensor/models/transformer.py | 4 +- tensor2tensor/models/transformer_moe.py | 6 +- tensor2tensor/models/transformer_revnet.py | 2 +- tensor2tensor/models/transformer_vae.py | 5 +- tensor2tensor/models/vanilla_gan.py | 4 +- tensor2tensor/models/xception.py | 2 +- tensor2tensor/notebooks/hello_t2t.ipynb | 2 +- tensor2tensor/tpu/tpu_trainer.py | 3 +- tensor2tensor/tpu/tpu_trainer_lib.py | 18 +- tensor2tensor/utils/flags.py | 112 +++++ tensor2tensor/utils/model_builder.py | 4 +- tensor2tensor/utils/registry.py | 6 +- tensor2tensor/utils/t2t_model.py | 502 +++++++++++---------- tensor2tensor/utils/trainer_utils.py | 101 +---- 34 files changed, 478 insertions(+), 396 deletions(-) create mode 100644 tensor2tensor/utils/flags.py diff --git a/docs/example_life.md b/docs/example_life.md index ce6948b05..850f4d500 100644 --- a/docs/example_life.md +++ b/docs/example_life.md @@ -161,13 +161,13 @@ transformed_features["inputs"] = input_modality.bottom( transformed_features["targets"] = target_modality.targets_bottom( features["targets"]) # for autoregressive models -body_outputs = model.model_fn_body(transformed_features) +body_outputs = model.body(transformed_features) predictions = target_modality.top(body_outputs, features["targets"]) loss = target_modality.loss(predictions, features["targets"]) ``` -Most `T2TModel`s only override `model_fn_body`. +Most `T2TModel`s only override `body`. ## Training, Eval, Inference modes diff --git a/tensor2tensor/bin/t2t-tpu-trainer b/tensor2tensor/bin/t2t-tpu-trainer index 65891da7b..41465b030 100644 --- a/tensor2tensor/bin/t2t-tpu-trainer +++ b/tensor2tensor/bin/t2t-tpu-trainer @@ -24,6 +24,7 @@ from __future__ import print_function from tensor2tensor import models # pylint: disable=unused-import from tensor2tensor import problems as problems_lib # pylint: disable=unused-import from tensor2tensor.tpu import tpu_trainer_lib as lib +from tensor2tensor.utils import flags as t2t_flags # pylint: disable=unused-import from tensor2tensor.utils import registry from tensor2tensor.utils import usr_dir @@ -32,7 +33,7 @@ import tensorflow as tf flags = tf.flags FLAGS = flags.FLAGS -# See trainer_utils.py for additional command-line flags. +# See flags.py for additional command-line flags. flags.DEFINE_string("t2t_usr_dir", "", "Path to a Python module that will be imported. The " "__init__.py file should include the necessary imports. " diff --git a/tensor2tensor/bin/t2t-trainer b/tensor2tensor/bin/t2t-trainer index 97ab3106f..1f05cd893 100644 --- a/tensor2tensor/bin/t2t-trainer +++ b/tensor2tensor/bin/t2t-trainer @@ -43,7 +43,7 @@ import tensorflow as tf flags = tf.flags FLAGS = flags.FLAGS -# See trainer_utils.py for additional command-line flags. +# See flags.py for additional command-line flags. flags.DEFINE_string("t2t_usr_dir", "", "Path to a Python module that will be imported. The " "__init__.py file should include the necessary imports. " diff --git a/tensor2tensor/bin/t2t_trainer.py b/tensor2tensor/bin/t2t_trainer.py index 5de5c8d9e..977337b02 100644 --- a/tensor2tensor/bin/t2t_trainer.py +++ b/tensor2tensor/bin/t2t_trainer.py @@ -42,7 +42,7 @@ flags = tf.flags FLAGS = flags.FLAGS -# See trainer_utils.py for additional command-line flags. +# See flags.py for additional command-line flags. flags.DEFINE_string("t2t_usr_dir", "", "Path to a Python module that will be imported. The " "__init__.py file should include the necessary imports. " diff --git a/tensor2tensor/data_generators/problem.py b/tensor2tensor/data_generators/problem.py index 6b12329ec..73414ee40 100644 --- a/tensor2tensor/data_generators/problem.py +++ b/tensor2tensor/data_generators/problem.py @@ -504,7 +504,6 @@ def input_fn(self, mode, hparams, params=None, config=None): "used in tpu_trainer.") is_training = mode == tf.estimator.ModeKeys.TRAIN num_threads = 4 if is_training else 1 - batch_size = _get_batch_size(params, hparams, config) def tpu_valid_size(example): return data_reader.example_valid_size(example, hparams.min_length, @@ -535,7 +534,7 @@ def define_shapes(example): # Ensure batch size is set on all features for _, t in six.iteritems(example): shape = t.get_shape().as_list() - shape[0] = batch_size + shape[0] = params["batch_size"] t.set_shape(t.get_shape().merge_with(shape)) # Assert shapes are fully known t.get_shape().assert_is_fully_defined() @@ -553,8 +552,14 @@ def define_shapes(example): # Batching if _are_shapes_fully_defined(dataset.output_shapes): - dataset = dataset.apply( - tf.contrib.data.batch_and_drop_remainder(batch_size)) + # Static shape features (e.g. images) + if config.use_tpu: + tpu_batch_size = params["batch_size"] + dataset = dataset.apply( + tf.contrib.data.batch_and_drop_remainder(tpu_batch_size)) + else: + num_shards = config.t2t_device_info["num_shards"] + dataset = dataset.batch(hparams.batch_size * num_shards) else: # Variable length features if config.use_tpu: @@ -563,8 +568,8 @@ def define_shapes(example): padded_shapes = _fill_shape_nones( dataset.output_shapes, none_filler=hparams.max_length) dataset = dataset.apply( - tf.contrib.data.padded_batch_and_drop_remainder(batch_size, - padded_shapes)) + tf.contrib.data.padded_batch_and_drop_remainder( + params["batch_size"], padded_shapes)) else: # On GPU, bucket by length dataset = dataset.filter(gpu_valid_size) @@ -572,6 +577,9 @@ def define_shapes(example): hparams, shard_multiplier=config.t2t_device_info["num_shards"], length_multiplier=self.get_hparams().batch_size_multiplier) + if hparams.use_fixed_batch_size: + batching_scheme["batch_sizes"] = [hparams.batch_size] + batching_scheme["boundaries"] = [] dataset = data_reader.bucket_by_sequence_length( dataset, data_reader.example_length, @@ -868,22 +876,6 @@ def _are_shapes_fully_defined(shapes_dict): return True -def _get_batch_size(params, hparams, config): - """Batch size determined by params dict, HParams, and RunConfig.""" - # If params specifies batch size, use that. TPUEstimator passes batch size in - # params. - batch_size = params and params.get("batch_size") - - # If not set, then we're running on CPU/GPU, so use the batch size from the - # hparams, and multiply by the number of data shards. - if not batch_size: - batch_size = hparams.tpu_batch_size_per_shard - if config: - batch_size *= config.t2t_device_info["num_shards"] - - return batch_size - - def _fill_shape_nones(shapes_dict, none_filler=None): padded_shapes = {} for key, shape in six.iteritems(shapes_dict): diff --git a/tensor2tensor/layers/common_hparams.py b/tensor2tensor/layers/common_hparams.py index 9f4a34bed..5b4e39058 100644 --- a/tensor2tensor/layers/common_hparams.py +++ b/tensor2tensor/layers/common_hparams.py @@ -32,9 +32,12 @@ def basic_params1(): """A set of basic hyperparameters.""" return tf.contrib.training.HParams( - batch_size=4096, # in tokens per batch per gpu - # Fixed batch size turns off bucketing during training mode - # and uses batch_size as minibatch size (use small batch_size<=32) + # If the features are variable length, this is in tokens per batch per + # GPU. If the features are of known shape (e.g. image problems), this is + # the actual batch size. + batch_size=4096, + # If True, then if the features are of variable length, the batch_size is + # used as the actual batch size (and not tokens per batch). use_fixed_batch_size=False, num_hidden_layers=4, kernel_height=3, @@ -190,6 +193,9 @@ def basic_params1(): # Set by tpu_trainer to let the model know whether we are on TPU. # Switching on/off tpu should not invalidate checkpoints. use_tpu=False, + # If True in PREDICT mode, then last-position-only optimizations are not + # used. + force_full_predict=False, # Set this for pure model parallelism. There is only one data shard. no_data_parallelism=False, ) diff --git a/tensor2tensor/models/aligned.py b/tensor2tensor/models/aligned.py index a6eca3bab..0cfa4be86 100644 --- a/tensor2tensor/models/aligned.py +++ b/tensor2tensor/models/aligned.py @@ -54,7 +54,11 @@ def _should_postprocess(layer_type): class Aligned(t2t_model.T2TModel): """Attention net. See file docstring.""" - def model_fn_body_sharded(self, sharded_features): + @property + def use_body_sharded(self): + return True + + def body_sharded(self, sharded_features): # Remove dropout if not training hparams = self._hparams dp = self._data_parallelism diff --git a/tensor2tensor/models/attention_lm.py b/tensor2tensor/models/attention_lm.py index 6ee1505b9..d92400fc8 100644 --- a/tensor2tensor/models/attention_lm.py +++ b/tensor2tensor/models/attention_lm.py @@ -42,7 +42,7 @@ class AttentionLM(t2t_model.T2TModel): """Attention net. See file docstring.""" - def model_fn_body(self, features): + def body(self, features): # Remove dropout if not training hparams = self._hparams targets = features["targets"] diff --git a/tensor2tensor/models/attention_lm_moe.py b/tensor2tensor/models/attention_lm_moe.py index a4ffae1b9..fcf04f981 100644 --- a/tensor2tensor/models/attention_lm_moe.py +++ b/tensor2tensor/models/attention_lm_moe.py @@ -84,7 +84,11 @@ def get_choices(): class AttentionLmMoe(t2t_model.T2TModel): """Attention net. See file docstring.""" - def model_fn_body_sharded(self, sharded_features): + @property + def use_body_sharded(self): + return True + + def body_sharded(self, sharded_features): # Remove dropout if not training hparams = self._hparams dp = self._data_parallelism diff --git a/tensor2tensor/models/bluenet.py b/tensor2tensor/models/bluenet.py index 96cb60615..86625a834 100644 --- a/tensor2tensor/models/bluenet.py +++ b/tensor2tensor/models/bluenet.py @@ -451,7 +451,7 @@ def batch_deviation(x): @registry.register_model class BlueNet(t2t_model.T2TModel): - def model_fn_body(self, features): + def body(self, features): hparams = self._hparams # TODO(rshin): Give identity_module lower weight by default. multi_conv = multi_conv_module( diff --git a/tensor2tensor/models/bytenet.py b/tensor2tensor/models/bytenet.py index 5af0c4435..ceefd54b5 100644 --- a/tensor2tensor/models/bytenet.py +++ b/tensor2tensor/models/bytenet.py @@ -80,7 +80,7 @@ def bytenet_internal(inputs, targets, hparams): @registry.register_model class ByteNet(t2t_model.T2TModel): - def model_fn_body(self, features): + def body(self, features): return bytenet_internal(features["inputs"], features["targets"], self._hparams) diff --git a/tensor2tensor/models/cycle_gan.py b/tensor2tensor/models/cycle_gan.py index 4cf1a5871..d2fc67e22 100644 --- a/tensor2tensor/models/cycle_gan.py +++ b/tensor2tensor/models/cycle_gan.py @@ -118,7 +118,7 @@ def tgt2inp(x, reuse=False): @registry.register_model class CycleGAN(t2t_model.T2TModel): - def model_fn_body(self, features): + def body(self, features): return cycle_gan_internal( features["inputs"], features["targets"], features["target_space_id"], self._hparams) diff --git a/tensor2tensor/models/gene_expression.py b/tensor2tensor/models/gene_expression.py index 9d676632e..5e5eb35c5 100644 --- a/tensor2tensor/models/gene_expression.py +++ b/tensor2tensor/models/gene_expression.py @@ -48,7 +48,7 @@ class GeneExpressionConv(t2t_model.T2TModel): (hparams.pooling_windows) at each conv layer (hparams.num_conv_layers). """ - def model_fn_body(self, features): + def body(self, features): inputs = features["inputs"] inputs.get_shape().assert_has_rank(4) diff --git a/tensor2tensor/models/lstm.py b/tensor2tensor/models/lstm.py index e3a5bf9ab..8a0b5a41f 100644 --- a/tensor2tensor/models/lstm.py +++ b/tensor2tensor/models/lstm.py @@ -134,7 +134,7 @@ def lstm_seq2seq_internal_attention(inputs, targets, hparams, train): @registry.register_model class LSTMSeq2seq(t2t_model.T2TModel): - def model_fn_body(self, features): + def body(self, features): # TODO(lukaszkaiser): investigate this issue and repair. if self._hparams.initializer == "orthogonal": raise ValueError("LSTM models fail with orthogonal initializer.") @@ -146,7 +146,7 @@ def model_fn_body(self, features): @registry.register_model class LSTMSeq2seqAttention(t2t_model.T2TModel): - def model_fn_body(self, features): + def body(self, features): # TODO(lukaszkaiser): investigate this issue and repair. if self._hparams.initializer == "orthogonal": raise ValueError("LSTM models fail with orthogonal initializer.") diff --git a/tensor2tensor/models/multimodel.py b/tensor2tensor/models/multimodel.py index 8a837aa63..370647544 100644 --- a/tensor2tensor/models/multimodel.py +++ b/tensor2tensor/models/multimodel.py @@ -108,7 +108,11 @@ def prepare_decoder(targets, target_space_emb): @registry.register_model class MultiModel(t2t_model.T2TModel): - def model_fn_body_sharded(self, sharded_features): + @property + def use_body_sharded(self): + return True + + def body_sharded(self, sharded_features): train = self._hparams.mode == tf.estimator.ModeKeys.TRAIN dp = self._data_parallelism hparams = self._hparams diff --git a/tensor2tensor/models/neural_gpu.py b/tensor2tensor/models/neural_gpu.py index ae692968d..681423190 100644 --- a/tensor2tensor/models/neural_gpu.py +++ b/tensor2tensor/models/neural_gpu.py @@ -58,7 +58,7 @@ def step(state, inp): # pylint: disable=missing-docstring @registry.register_model class NeuralGPU(t2t_model.T2TModel): - def model_fn_body(self, features): + def body(self, features): return neural_gpu_body(features["inputs"], self._hparams) @@ -93,7 +93,7 @@ def step(state_tup, inp): @registry.register_model class DiagonalNeuralGPU(t2t_model.T2TModel): - def model_fn_body(self, features): + def body(self, features): return diagonal_neural_gpu(features["inputs"], self._hparams) diff --git a/tensor2tensor/models/resnet.py b/tensor2tensor/models/resnet.py index ca3c6ee49..f3df54b10 100644 --- a/tensor2tensor/models/resnet.py +++ b/tensor2tensor/models/resnet.py @@ -233,7 +233,7 @@ def resnet50(inputs, hparams): @registry.register_model class Resnet50(t2t_model.T2TModel): - def model_fn_body(self, features): + def body(self, features): return resnet50(features["inputs"], self.hparams) diff --git a/tensor2tensor/models/shake_shake.py b/tensor2tensor/models/shake_shake.py index bad951a32..b4d4a62ea 100644 --- a/tensor2tensor/models/shake_shake.py +++ b/tensor2tensor/models/shake_shake.py @@ -98,7 +98,7 @@ class ShakeShake(t2t_model.T2TModel): "Shake-Shake-Batch" in Table 1. """ - def model_fn_body(self, features): + def body(self, features): hparams = self._hparams inputs = features["inputs"] assert (hparams.num_hidden_layers - 2) % 6 == 0 diff --git a/tensor2tensor/models/slicenet.py b/tensor2tensor/models/slicenet.py index 8807f073b..9f0718dd7 100644 --- a/tensor2tensor/models/slicenet.py +++ b/tensor2tensor/models/slicenet.py @@ -279,7 +279,7 @@ def slicenet_internal(inputs, targets, target_space, hparams, run_decoder=True): @registry.register_model class SliceNet(t2t_model.T2TModel): - def model_fn_body(self, features): + def body(self, features): target_modality_name = ( self._hparams.problems[self._problem_idx].target_modality.name) # If we're just predicing a class, there is no use for a decoder. diff --git a/tensor2tensor/models/super_lm.py b/tensor2tensor/models/super_lm.py index ab9287b35..f6bc4ff85 100644 --- a/tensor2tensor/models/super_lm.py +++ b/tensor2tensor/models/super_lm.py @@ -53,7 +53,7 @@ def _embedding(inputs, vocab_size, dense_size): class SuperLM(t2t_model.T2TModel): """Attention net. See file docstring.""" - def model_fn_body(self, features): + def body(self, features): # Remove dropout if not training hparams = self._hparams ps_devices = self._ps_devices diff --git a/tensor2tensor/models/transformer.py b/tensor2tensor/models/transformer.py index 8fd3edd21..e9c272d7c 100644 --- a/tensor2tensor/models/transformer.py +++ b/tensor2tensor/models/transformer.py @@ -130,7 +130,7 @@ def decode(self, # Expand since t2t expects 4d tensors. return tf.expand_dims(decoder_output, axis=2) - def model_fn_body(self, features): + def body(self, features): """Transformer main model_fn. Args: @@ -391,7 +391,7 @@ def inner_loop(i, next_id, decoded_ids, cache): class TransformerEncoder(t2t_model.T2TModel): """Transformer, encoder only.""" - def model_fn_body(self, features): + def body(self, features): hparams = self._hparams inputs = features["inputs"] target_space = features["target_space_id"] diff --git a/tensor2tensor/models/transformer_moe.py b/tensor2tensor/models/transformer_moe.py index 3b966a285..202c4c9f3 100644 --- a/tensor2tensor/models/transformer_moe.py +++ b/tensor2tensor/models/transformer_moe.py @@ -59,9 +59,11 @@ class TransformerMoe(t2t_model.T2TModel): """Attention net. See file docstring.""" - @expert_utils.add_var_scope("transformer_moe") - def model_fn_body_sharded(self, sharded_features): + @property + def use_body_sharded(self): + return True + def body_sharded(self, sharded_features): # ========= Prepare the input and target ========= hparams = self._hparams diff --git a/tensor2tensor/models/transformer_revnet.py b/tensor2tensor/models/transformer_revnet.py index 7275c370a..bd4492151 100644 --- a/tensor2tensor/models/transformer_revnet.py +++ b/tensor2tensor/models/transformer_revnet.py @@ -43,7 +43,7 @@ class TransformerRevnet(transformer.Transformer): g: Feed-forward """ - def model_fn_body(self, features): + def body(self, features): hparams = self._hparams targets = features["targets"] inputs = features["inputs"] diff --git a/tensor2tensor/models/transformer_vae.py b/tensor2tensor/models/transformer_vae.py index 408b17941..989e362d1 100644 --- a/tensor2tensor/models/transformer_vae.py +++ b/tensor2tensor/models/transformer_vae.py @@ -469,7 +469,7 @@ def __init__(self, *args, **kwargs): def has_input(self): return self._problem_hparams.input_modality - def model_fn_body(self, features): + def body(self, features): inputs = features["inputs"] if "inputs" in features else None if self._hparams.drop_inputs: inputs = None @@ -521,7 +521,7 @@ def infer(self, features=None, decode_length=50, beam_size=1, top_beams=1, dtype=tf.int64) features["targets"] = initial_output - logits, _ = self(features, skip=False, force_full_predict=True) # pylint: disable=not-callable + logits, _ = self(features) # pylint: disable=not-callable samples = tf.argmax(logits, axis=-1) if inputs_old is not None: # Restore to not confuse Estimator. @@ -563,6 +563,7 @@ def transformer_ae_small(): hparams.add_hparam("bit_vae", True) hparams.add_hparam("beta", 0.25) hparams.kl_warmup_steps = 150000 + hparams.force_full_predict = True return hparams diff --git a/tensor2tensor/models/vanilla_gan.py b/tensor2tensor/models/vanilla_gan.py index c9ce8ff3f..36acfc4a2 100644 --- a/tensor2tensor/models/vanilla_gan.py +++ b/tensor2tensor/models/vanilla_gan.py @@ -109,7 +109,7 @@ class VanillaGan(t2t_model.T2TModel): """Simple GAN. """ - def model_fn_body(self, features): + def body(self, features): """Computes the generator and discriminator loss. Args: @@ -165,5 +165,3 @@ def vanilla_gan(): hparams.learning_rate = 0.2 hparams.learning_rate_decay_scheme = "none" return hparams - - diff --git a/tensor2tensor/models/xception.py b/tensor2tensor/models/xception.py index f328c5c06..1c0678584 100644 --- a/tensor2tensor/models/xception.py +++ b/tensor2tensor/models/xception.py @@ -138,7 +138,7 @@ def xception_exit(inputs): @registry.register_model class Xception(t2t_model.T2TModel): - def model_fn_body(self, features): + def body(self, features): return xception_internal(features["inputs"], self._hparams) diff --git a/tensor2tensor/notebooks/hello_t2t.ipynb b/tensor2tensor/notebooks/hello_t2t.ipynb index fd8547e97..5a976a5b3 100644 --- a/tensor2tensor/notebooks/hello_t2t.ipynb +++ b/tensor2tensor/notebooks/hello_t2t.ipynb @@ -1398,7 +1398,7 @@ "\n", "class MySimpleModel(t2t_model.T2TModel):\n", "\n", - " def model_fn_body(self, features):\n", + " def body(self, features):\n", " inputs = features[\"inputs\"]\n", " filters = self.hparams.hidden_size\n", " h1 = tf.layers.conv2d(inputs, filters,\n", diff --git a/tensor2tensor/tpu/tpu_trainer.py b/tensor2tensor/tpu/tpu_trainer.py index 203ddc9e3..9f45bbe75 100644 --- a/tensor2tensor/tpu/tpu_trainer.py +++ b/tensor2tensor/tpu/tpu_trainer.py @@ -23,6 +23,7 @@ from tensor2tensor import models # pylint: disable=unused-import from tensor2tensor import problems as problems_lib # pylint: disable=unused-import from tensor2tensor.tpu import tpu_trainer_lib as lib +from tensor2tensor.utils import flags as t2t_flags # pylint: disable=unused-import from tensor2tensor.utils import registry from tensor2tensor.utils import usr_dir @@ -31,7 +32,7 @@ flags = tf.flags FLAGS = flags.FLAGS -# See trainer_utils.py for additional command-line flags. +# See flags.py for additional command-line flags. flags.DEFINE_string("t2t_usr_dir", "", "Path to a Python module that will be imported. The " "__init__.py file should include the necessary imports. " diff --git a/tensor2tensor/tpu/tpu_trainer_lib.py b/tensor2tensor/tpu/tpu_trainer_lib.py index 01753488e..5793345af 100644 --- a/tensor2tensor/tpu/tpu_trainer_lib.py +++ b/tensor2tensor/tpu/tpu_trainer_lib.py @@ -21,8 +21,8 @@ # Dependency imports +from tensor2tensor.utils import registry from tensor2tensor.utils import t2t_model -from tensor2tensor.utils import trainer_utils import tensorflow as tf @@ -88,7 +88,7 @@ def create_estimator(model_name, batch_size = hparams.tpu_batch_size_per_shard batch_size *= run_config.tpu_config.num_shards eval_batch_size = batch_size * 2 - if schedule == "train": + if "eval" not in schedule: # Estimator takes the presence of eval_batch_size as an indication that # an eval is being performed, and complains about num_shards being too # big. So we have to set eval_batch_size to None. @@ -117,7 +117,7 @@ def create_experiment(run_config, """Create Experiment.""" # HParams hparams.add_hparam("data_dir", data_dir) - trainer_utils.add_problem_hparams(hparams, problem_name) + add_problem_hparams(hparams, problem_name) # Estimator estimator = create_estimator( @@ -148,3 +148,15 @@ def experiment_fn(run_config, hparams): return create_experiment(run_config, hparams, *args, **kwargs) return experiment_fn + + +def add_problem_hparams(hparams, problems): + """Add problem hparams for the problems.""" + hparams.problems = [] + hparams.problem_instances = [] + for problem_name in problems.split("-"): + problem = registry.problem(problem_name) + p_hparams = problem.get_hparams(hparams) + + hparams.problem_instances.append(problem) + hparams.problems.append(p_hparams) diff --git a/tensor2tensor/utils/flags.py b/tensor2tensor/utils/flags.py new file mode 100644 index 000000000..f4e93a68f --- /dev/null +++ b/tensor2tensor/utils/flags.py @@ -0,0 +1,112 @@ +# coding=utf-8 +# Copyright 2017 The Tensor2Tensor Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Common command-line flags.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +# Dependency imports +import tensorflow as tf + +flags = tf.flags +FLAGS = flags.FLAGS + +flags.DEFINE_bool("registry_help", False, + "If True, logs the contents of the registry and exits.") +flags.DEFINE_bool("tfdbg", False, + "If True, use the TF debugger CLI on train/eval.") +flags.DEFINE_bool("export_saved_model", False, + "Whether to export a SavedModel for serving.") +flags.DEFINE_bool("dbgprofile", False, + "If True, record the timeline for chrome://tracing/.") +flags.DEFINE_string("model", "", "Which model to use.") +flags.DEFINE_string("hparams_set", "", "Which parameters to use.") +flags.DEFINE_string("hparams_range", "", "Parameters range.") +flags.DEFINE_string( + "hparams", "", + """A comma-separated list of `name=value` hyperparameter values. This flag + is used to override hyperparameter settings either when manually selecting + hyperparameters or when using Vizier. If a hyperparameter setting is + specified by this flag then it must be a valid hyperparameter name for the + model.""") +flags.DEFINE_string("problems", "", "Dash separated list of problems to " + "solve.") + +# data_dir is a common flag name - catch conflicts and define it once. +try: + flags.DEFINE_string("data_dir", None, "Directory with training data.") +except: # pylint: disable=bare-except + pass + +flags.DEFINE_integer("train_steps", 250000, + "The number of steps to run training for.") +flags.DEFINE_string("eval_early_stopping_metric", "loss", + "If --schedule=train_and_evaluate and " + "--eval_early_stopping_steps is not None, then stop when " + "--eval_early_stopping_metric has not decreased for " + "--eval_early_stopping_steps") +flags.DEFINE_integer("eval_early_stopping_steps", None, + "If --schedule=train_and_evaluate and " + "--eval_early_stopping_steps is not None, then stop when " + "--eval_early_stopping_metric has not decreased for " + "--eval_early_stopping_steps") +flags.DEFINE_bool("eval_early_stopping_metric_minimize", True, + "Whether to check for the early stopping metric going down " + "or up.") +flags.DEFINE_bool("eval_run_autoregressive", False, + "Run eval autoregressively where we condition on previous" + "generated output instead of the actual target.") +flags.DEFINE_bool("eval_use_test_set", False, + "Whether to use the '-test' data for EVAL (and PREDICT).") +flags.DEFINE_integer("keep_checkpoint_max", 20, + "How many recent checkpoints to keep.") +flags.DEFINE_bool("experimental_optimize_placement", False, + "Optimize ops placement with experimental session options.") +flags.DEFINE_integer("keep_checkpoint_every_n_hours", 10000, + "Number of hours between each checkpoint to be saved. " + "The default value 10,000 hours effectively disables it.") +flags.DEFINE_integer("save_checkpoints_secs", 0, + "Save checkpoints every this many seconds. " + "Default=0 means let tensorflow.contrib.learn.python.learn" + " decide, which is currently set to 600 = 10 minutes.") +flags.DEFINE_bool("log_device_placement", False, + "Whether to log device placement.") + +# Distributed training flags +flags.DEFINE_integer("local_eval_frequency", 2000, + "Run evaluation every this steps during local training.") +flags.DEFINE_bool("locally_shard_to_cpu", False, + "Use CPU as a sharding device running locally. This allows " + "to test sharded model construction on a machine with 1 GPU.") +flags.DEFINE_bool("sync", False, "Sync compute on PS.") +flags.DEFINE_string("worker_job", "/job:localhost", "name of worker job") +flags.DEFINE_integer("worker_gpu", 1, "How many GPUs to use.") +flags.DEFINE_integer("worker_replicas", 1, "How many workers to use.") +flags.DEFINE_integer("worker_id", 0, "Which worker task are we.") +flags.DEFINE_float("worker_gpu_memory_fraction", 0.95, + "Fraction of GPU memory to allocate.") +flags.DEFINE_integer("ps_gpu", 0, "How many GPUs to use per ps.") +flags.DEFINE_string("gpu_order", "", "Optional order for daisy-chaining gpus." + " e.g. \"1 3 2 4\"") +flags.DEFINE_string("ps_job", "/job:ps", "name of ps job") +flags.DEFINE_integer("ps_replicas", 0, "How many ps replicas.") + +# Decoding flags +flags.DEFINE_string( + "decode_hparams", "", + "Comma-separated list of name=value pairs to control decode behavior. " + "See decoding.decode_hparams for defaults.") diff --git a/tensor2tensor/utils/model_builder.py b/tensor2tensor/utils/model_builder.py index 61ea55ca9..fe6bea221 100644 --- a/tensor2tensor/utils/model_builder.py +++ b/tensor2tensor/utils/model_builder.py @@ -118,6 +118,7 @@ def nth_model(n): decode_length=decode_hp.extra_length) # In distributed mode, we build graph for problem=0 and problem=worker_id. skipping_is_on = hparams.problem_choice == "distributed" and is_training + del skipping_is_on problem_worker_id = worker_id % len(hparams.problems) skip_this_one = n != 0 and n % worker_replicas != problem_worker_id # On worker 0 also build graph for problems <= 1. @@ -126,8 +127,7 @@ def nth_model(n): if eval_run_autoregressive and mode == tf.estimator.ModeKeys.EVAL: logits, losses_dict = model_class.eval_autoregressive(features) else: - logits, losses_dict = model_class( - features, skip=(skipping_is_on and skip_this_one)) + logits, losses_dict = model_class(features) with tf.variable_scope("losses_avg"): total_loss, ops = 0.0, [] for loss_key, loss_value in six.iteritems(losses_dict): diff --git a/tensor2tensor/utils/registry.py b/tensor2tensor/utils/registry.py index 69edcb473..1125a6ed3 100644 --- a/tensor2tensor/utils/registry.py +++ b/tensor2tensor/utils/registry.py @@ -260,7 +260,11 @@ def parse_problem_name(problem_name): base_name, was_reversed, was_copy = parse_problem_name(name) if base_name not in _PROBLEMS: - raise LookupError("Problem %s never registered." % name) + all_problem_names = sorted(list_problems()) + error_lines = ["%s not in the set of supported problems:" % base_name + ] + all_problem_names + error_msg = "\n * ".join(error_lines) + raise LookupError(error_msg) return _PROBLEMS[base_name](was_reversed, was_copy) diff --git a/tensor2tensor/utils/t2t_model.py b/tensor2tensor/utils/t2t_model.py index 8ba49c630..b06565532 100644 --- a/tensor2tensor/utils/t2t_model.py +++ b/tensor2tensor/utils/t2t_model.py @@ -18,6 +18,7 @@ from __future__ import division from __future__ import print_function +import collections import contextlib import copy import math @@ -26,7 +27,6 @@ # Dependency imports import six -from six.moves import xrange # pylint: disable=redefined-builtin from tensor2tensor.data_generators import text_encoder from tensor2tensor.layers import common_layers @@ -115,6 +115,161 @@ def hparams(self): def has_input(self): return self._problem_hparams.input_modality + def call(self, features): + tf.get_variable_scope().set_initializer( + optimize.get_variable_initializer(self.hparams)) + with self._var_store.as_default(): + self._fill_problem_hparams_features(features) + sharded_features = self._shard_features(features) + sharded_logits, losses = self.model_fn_sharded(sharded_features) + return tf.concat(sharded_logits, 0), losses + + @property + def use_body_sharded(self): + return False + + def body_sharded(self, sharded_features): + raise NotImplementedError("Models that wish to manually control sharding, " + "e.g. MoE models, should override body_sharded " + "and set use_body_sharded to True.") + + def model_fn_sharded(self, sharded_features): + dp = self._data_parallelism + datashard_to_features = self._to_features_per_datashard(sharded_features) + + if self.use_body_sharded: + # MoE models override body_sharded + transformed_features = dp(self.bottom, datashard_to_features) + body_out = self.body_sharded( + self._to_single_features_dict(transformed_features)) + body_out, losses = self._normalize_body_output(body_out) + sharded_logits = dp(self.top, body_out, datashard_to_features) + if "training" not in losses: + sharded_losses = dp(self.loss, sharded_logits, datashard_to_features) + training_loss_dict = average_sharded_losses([{ + "training": loss + } for loss in sharded_losses]) + losses.update(training_loss_dict) + else: + sharded_logits, sharded_losses = dp(self.model_fn, datashard_to_features) + losses = average_sharded_losses(sharded_losses) + + # TODO(rsepassi): Reenable scheduled sampling + # Disabled because of model_fn_sharded refactor + # + # do_scheduled_sampling = ( # Only do it if training and set for it. + # self.hparams.scheduled_sampling_prob > 0.0 and + # self.hparams.mode == tf.estimator.ModeKeys.TRAIN) + # if do_scheduled_sampling: + # sharded_logits, losses = scheduled_sampling( + # self.hparams, self._problem_hparams, dp, + # sharded_logits, losses, sharded_features, + # self._transformed_features, self) + + return sharded_logits, losses + + def model_fn(self, features): + transformed_features = self.bottom(features) + self._transformed_features = transformed_features + + with tf.variable_scope("body"): + body_out = self.body(transformed_features) + output, losses = self._normalize_body_output(body_out) + + logits = self.top(output, features) + if "training" not in losses: + losses["training"] = self.loss(logits, features) + return logits, losses + + def bottom(self, features): + """Transform features to feed into body.""" + transformed_features = {} + all_previous_modalities = [] + + # Transform the input features + for key, input_modality in six.iteritems( + self._problem_hparams.input_modality): + previous_modalities = [ + self.hparams.problems[i].input_modality[key].name + for i in range(self._problem_idx) + ] + all_previous_modalities.extend(previous_modalities) + do_reuse = input_modality.name in all_previous_modalities + with tf.variable_scope(input_modality.name, reuse=do_reuse): + transformed_features[key] = input_modality.bottom(features[key]) + all_previous_modalities.append(input_modality.name) + + # Transform the targets (for autoregressive models) + previous_tgt_modalities = [ + self.hparams.problems[i].target_modality.name + for i in range(self._problem_idx) + ] + all_previous_modalities.extend(previous_tgt_modalities) + + target_modality = self._problem_hparams.target_modality + target_reuse = target_modality.name in previous_tgt_modalities + with tf.variable_scope(target_modality.name, reuse=target_reuse): + transformed_features["targets"] = target_modality.targets_bottom( + features["targets"]) + + for key in features: + if key not in transformed_features: + # For features without a modality, we pass them along as is + transformed_features[key] = features[key] + else: + # Other features get passed along with the "raw" suffix + transformed_features[key + "_raw"] = features[key] + + return transformed_features + + def body(self, features): + """Most models will override this function. + + Compute label logits for one shard as a function of the transformed + features. + + Args: + features: A dictionary of key to Tensor. Each Tensor has shape + [batch_size, ?, ?, hidden_size]. + + Returns: + output: tensor of logits with shape [batch_size, O, P, body_output_size. + losses: either single loss as a scalar, a list, a tensor (to be averaged) + or a dictionary of losses. + """ + raise NotImplementedError("Abstract Method") + + def top(self, body_output, features): + target_modality = self._problem_hparams.target_modality + with tf.variable_scope(target_modality.name): + last_only = ( + target_modality.top_is_pointwise and + self.hparams.mode == tf.estimator.ModeKeys.PREDICT and + not self.hparams.force_full_predict) + if not last_only: + logits = target_modality.top(body_output, features["targets"]) + else: + # Take body outputs for the last position only, and targets too. + last_position_body_output = tf.expand_dims( + body_output[:, -1, :, :], axis=[1]) + last_position_targets = tf.expand_dims( + features["targets"][:, -1:, :, :], axis=[1]) + logits = target_modality.top(last_position_body_output, + last_position_targets) + return logits + + def loss(self, logits, features): + target_modality = self._problem_hparams.target_modality + loss_num, loss_den = target_modality.loss(logits, features["targets"]) + loss_num *= self._problem_hparams.loss_multiplier + return loss_num, loss_den + + def optimize(self, loss, use_tpu=False): + """Return a training op minimizing loss.""" + lr = self.hparams.learning_rate * optimize.learning_rate_decay(self.hparams) + train_op = optimize.optimize(loss, lr, self.hparams, use_tpu=use_tpu) + return train_op + def set_mode(self, mode): """Set hparams with the given mode.""" hparams = copy.copy(self._original_hparams) @@ -527,243 +682,32 @@ def _shard_features(self, features): # pylint: disable=missing-docstring sharded_features = dict() for k, v in six.iteritems(features): v = tf.convert_to_tensor(v) - if not v.shape.as_list(): + v_shape = common_layers.shape_list(v) + if not v_shape: v = tf.expand_dims(v, axis=-1) + v_shape = [1] + if v_shape == [1]: v = tf.tile(v, [self._num_datashards]) sharded_features[k] = self._data_parallelism( tf.identity, tf.split(v, self._num_datashards, 0)) return sharded_features - def _model_fn(self, features, skip=False, force_full_predict=False): - """Computes the entire model and produces sharded logits and losses. - - Args: - features: A dictionary of feature name to tensor. - skip: a Boolean, if we're just dummy-calling and actually skip this model - (but we need to create variables to not confuse distributed training). - force_full_predict: a Boolean, if set, then last-position-only - optimizations are not used even when allowed and in PREDICT mode. - - Returns: - logits: `Tensor` - losses: a dictionary: {loss-name (string): floating point `Scalar`}. - """ - start_time = time.time() - dp = self._data_parallelism - - sharded_features = self._shard_features(features) - - # Construct the model bottom for inputs. - transformed_features = {} - all_previous_modalities = [] - - for key, input_modality in six.iteritems( - self._problem_hparams.input_modality): - previous_modalities = [ - self.hparams.problems[i].input_modality[key].name - for i in xrange(self._problem_idx) - ] - all_previous_modalities.extend(previous_modalities) - do_reuse = input_modality.name in all_previous_modalities - transformed_features[key + "_raw"] = sharded_features[key] - with tf.variable_scope(input_modality.name, reuse=do_reuse): - transformed_features[key] = input_modality.bottom_sharded( - sharded_features[key], dp) - all_previous_modalities.append(input_modality.name) - - # Target space id just gets copied to every shard. - if "target_space_id" in features: - transformed_features["target_space_id"] = [features["target_space_id"] - ] * self._num_datashards - - # For features without a modality ending in "_raw", we pass them raw. - for key, feature in sharded_features.items(): - if key not in transformed_features and key.endswith("_raw"): - transformed_features[key] = feature - - # Targets are transformed by the autoregressive part of the modality - previous_tgt_modalities = [ - self.hparams.problems[i].target_modality.name - for i in xrange(self._problem_idx) - ] - all_previous_modalities.extend(previous_tgt_modalities) - - target_modality = self._problem_hparams.target_modality - target_reuse = target_modality.name in previous_tgt_modalities - with tf.variable_scope(target_modality.name, reuse=target_reuse): - transformed_features["targets"] = target_modality.targets_bottom_sharded( - sharded_features["targets"], dp) - - # Allows later access to pre-embedding raw targets. - transformed_features["targets_raw"] = sharded_features["targets"] - - # Construct the model body. - with tf.variable_scope("body", reuse=self._problem_idx > 0): - if skip: - body_outputs = transformed_features["targets"] - losses = {"extra": 0.0} - else: - body_outputs, losses = self.model_fn_body_sharded(transformed_features) - if not isinstance(losses, dict): # If it's a single extra loss. - losses = {"extra": losses} - - with tf.variable_scope(target_modality.name, reuse=target_reuse): - last_only = (target_modality.top_is_pointwise and - self.hparams.mode == tf.estimator.ModeKeys.PREDICT and - not force_full_predict) - if not last_only: - sharded_logits = target_modality.top_sharded( - body_outputs, sharded_features["targets"], dp) - if "training" not in losses: - losses["training"] = ( - target_modality.loss_sharded( - sharded_logits, sharded_features["targets"], dp) - * self._problem_hparams.loss_multiplier) - else: - # Take body outputs for the last position only, and targets too. - last_position_body_outputs = [ - tf.expand_dims(body_shard[:, -1, :, :], axis=[1]) - for body_shard in body_outputs - ] - last_position_targets = [ - tf.expand_dims(target_shard[:, -1:, :, :], axis=[1]) - for target_shard in sharded_features["targets"] - ] - sharded_logits = target_modality.top_sharded(last_position_body_outputs, - last_position_targets, - self._data_parallelism) - losses["training"] = None - - # Scheduled sampling. - do_scheduled_sampling = ( # Only do it if training and set for it. - self.hparams.scheduled_sampling_prob > 0.0 and - self.hparams.mode == tf.estimator.ModeKeys.TRAIN and not skip) - if do_scheduled_sampling: - - def sample(x): - """Multinomial sampling from a n-dimensional tensor.""" - vocab_size = target_modality.top_dimensionality - samples = tf.multinomial(tf.reshape(x, [-1, vocab_size]), 1) - reshaped_samples = tf.reshape(samples, common_layers.shape_list(x)[:-1]) - return tf.to_int32(reshaped_samples) - - def mix_gold_sampled(gold_targets, sampled_targets): - return tf.where( - tf.less( - tf.random_uniform(common_layers.shape_list(sampled_targets)), - self.hparams.scheduled_sampling_gold_mixin_prob), gold_targets, - sampled_targets) - - def sampled_results(): - """Generate scheduled sampling results.""" - sampled_targets = dp(sample, sharded_logits) - new_targets = dp(mix_gold_sampled, sharded_features["targets"], - sampled_targets) - new_features = transformed_features - with tf.variable_scope(tf.get_variable_scope(), reuse=True): - with tf.variable_scope(target_modality.name): - new_features["targets"] = target_modality.targets_bottom_sharded( - new_targets, dp) - with tf.variable_scope("body"): - body_outputs, losses = self.model_fn_body_sharded(new_features) - if not isinstance(losses, dict): # If it's a single extra loss. - losses = {"extra": losses} - with tf.variable_scope(target_modality.name): - new_sharded_logits = target_modality.top_sharded( - body_outputs, sharded_features["targets"], dp) - if "training" not in losses: - losses["training"] = ( - target_modality.loss_sharded( - sharded_logits, sharded_features["targets"], dp) - * self._problem_hparams.loss_multiplier) - return new_sharded_logits, losses - - # Run the above conditionally. - prob = self.hparams.scheduled_sampling_prob - prob *= common_layers.inverse_exp_decay( - self.hparams.scheduled_sampling_warmup_steps, min_value=0.001) - sharded_logits, losses = tf.cond( - tf.less(tf.random_uniform([]), prob), sampled_results, - lambda: (sharded_logits, losses)) - - if not context.in_eager_mode(): - tf.logging.info("This model_fn took %.3f sec." % - (time.time() - start_time)) - return sharded_logits, losses - - def call(self, inputs_dict, skip=False, force_full_predict=False): - tf.get_variable_scope().set_initializer( - optimize.get_variable_initializer(self.hparams)) - with self._var_store.as_default(): - self._fill_problem_hparams_features(inputs_dict) - sharded_logits, losses = self._model_fn( - inputs_dict, skip=skip, force_full_predict=force_full_predict) - return tf.concat(sharded_logits, 0), losses - - def model_fn_body_sharded(self, sharded_features): - """Mixture-of-experts models will override this function. - - Compute model body on all datashards. - - Args: - sharded_features: map from string to list of Tensors each with shape - [batch, ?, ?, body_input_size] - - Returns: - sharded_body_output: - a list of Tensors, each with shape [batch, O, P, body_output_size] - extra_loss: a Scalar. - """ - with tf.name_scope("model"): - datashard_to_features = [{ - k: v[d] - for k, v in six.iteritems(sharded_features) - } - for d in xrange(self._num_datashards)] - output = self._data_parallelism( - _with_timing( - self.model_fn_body, - "model_fn_body", - silent=context.in_eager_mode()), datashard_to_features) - if isinstance(output, tuple): - losses_sharded = output[1] - if isinstance(losses_sharded[0], dict): - loss = {} - for k in losses_sharded[0].keys(): - k_loss_sharded = [losses[k] for losses in losses_sharded] - loss[k] = tf.reduce_mean(k_loss_sharded) - else: - loss = {"extra": tf.reduce_mean(losses_sharded)} - output = output[0] - else: - loss = {"extra": 0.0} - return output, loss - - def model_fn_body(self, features): - """Most models will override this function. - - Compute label logits for one shard as a function of the transformed - features. - - Args: - features: A dictionary of key to Tensor. Each Tensor has shape - [batch_size, ?, ?, hidden_size]. - - Returns: - output: tensor of logits with shape [batch_size, O, P, body_output_size. - losses: either single loss as a scalar, a list, a tensor (to be averaged) - or a dictionary of losses. If the dictionary contains the key - "training", this is interpreted as an override of the modality's - loss computation. - """ - raise NotImplementedError("Abstract Method") - - def optimize(self, loss, use_tpu=False): - """Return a training op minimizing loss.""" - lr = self.hparams.learning_rate * optimize.learning_rate_decay(self.hparams) - train_op = optimize.optimize(loss, lr, self.hparams, use_tpu=use_tpu) - return train_op + def _to_features_per_datashard(self, features): + datashard_features = [] + assert len(features[features.keys()[0]]) == self._num_datashards + for d in range(self._num_datashards): + f = {k: v[d] for k, v in six.iteritems(features)} + datashard_features.append(f) + return datashard_features + + def _to_single_features_dict(self, datashard_features): + assert len(datashard_features) == self._num_datashards + features = collections.defaultdict(list) + for feats in datashard_features: + for k, v in six.iteritems(feats): + features[k].append(v) + return features @staticmethod def make_estimator_model_fn(model_name, @@ -819,8 +763,9 @@ def estimator_model_fn(cls, problem = hparams.problem_instances[0] # Instantiate model - data_parallelism = _create_data_parallelism( - use_tpu=use_tpu, **config.t2t_device_info) + data_parallelism = ( + None if hparams.no_data_parallelism else _create_data_parallelism( + use_tpu=use_tpu, **config.t2t_device_info)) model = cls(hparams, mode, data_parallelism=data_parallelism) # PREDICT mode @@ -940,6 +885,17 @@ def estimator_spec_predict(self, features, decode_hparams): "output": tf.estimator.export.PredictOutput(export_out) }) + def _normalize_body_output(self, body_out): + if isinstance(body_out, tuple): + output, losses = body_out + if not isinstance(losses, dict): + losses = {"extra": tf.reduce_mean(losses)} + else: + output = body_out + losses = {"extra": 0.0} + + return output, losses + def _warn_changed_modality_type(new_name, old_name, feature_name): new_type, new_name = registry.parse_modality_name(new_name) @@ -995,11 +951,12 @@ def _create_data_parallelism(num_gpus=1, shard_to_cpu=False, num_shards=1, use_tpu=False, + no_dp=False, **kwargs): """Create Parallelism object.""" del kwargs - if use_tpu: + if use_tpu or no_dp: return eu.Parallelism([""]) gpus = list(range(num_gpus)) @@ -1091,3 +1048,80 @@ def create_eager_var_store(): return variable_scope.EagerVariableStore() else: return DummyVariableStore() + + +def scheduled_sampling(hparams, problem_hparams, dp, sharded_logits, losses, + sharded_features, transformed_features, model): + """Scheduled sampling.""" + target_modality = problem_hparams.target_modality + + def sample(x): + """Multinomial sampling from a n-dimensional tensor.""" + vocab_size = target_modality.top_dimensionality + samples = tf.multinomial(tf.reshape(x, [-1, vocab_size]), 1) + reshaped_samples = tf.reshape(samples, common_layers.shape_list(x)[:-1]) + return tf.to_int32(reshaped_samples) + + def mix_gold_sampled(gold_targets, sampled_targets): + return tf.where( + tf.less( + tf.random_uniform(common_layers.shape_list(sampled_targets)), + hparams.scheduled_sampling_gold_mixin_prob), gold_targets, + sampled_targets) + + def sampled_results(): + """Generate scheduled sampling results.""" + sampled_targets = dp(sample, sharded_logits) + new_targets = dp(mix_gold_sampled, sharded_features["targets"], + sampled_targets) + new_features = transformed_features + with tf.variable_scope(tf.get_variable_scope(), reuse=True): + with tf.variable_scope(target_modality.name): + new_features["targets"] = target_modality.targets_bottom_sharded( + new_targets, dp) + with tf.variable_scope("body"): + body_outputs, losses = model.model_fn_sharded(new_features) + if not isinstance(losses, dict): # If it's a single extra loss. + losses = {"extra": losses} + with tf.variable_scope(target_modality.name): + new_sharded_logits = target_modality.top_sharded( + body_outputs, sharded_features["targets"], dp) + if "training" not in losses: + training_loss = target_modality.loss_sharded( + sharded_logits, sharded_features["targets"], dp) + training_loss *= problem_hparams.loss_multiplier + losses["training"] = training_loss + return new_sharded_logits, losses + + # Run the above conditionally. + prob = hparams.scheduled_sampling_prob + prob *= common_layers.inverse_exp_decay( + hparams.scheduled_sampling_warmup_steps, min_value=0.001) + sharded_logits, losses = tf.cond( + tf.less(tf.random_uniform([]), prob), sampled_results, + lambda: (sharded_logits, losses)) + return sharded_logits, losses + + +def average_sharded_losses(sharded_losses): + """Average losses across datashards. + + Args: + sharded_losses: list<dict<str loss_name, Tensor loss>>. The loss + can be a single Tensor or a 2-tuple (numerator and denominator). + + Returns: + losses: dict<str loss_name, Tensor avg_loss> + """ + losses = {} + for loss_name in sharded_losses[0]: + all_shards = [shard_losses[loss_name] for shard_losses in sharded_losses] + if isinstance(all_shards[0], tuple): + sharded_num, sharded_den = zip(*all_shards) + mean_loss = ( + tf.add_n(sharded_num) / tf.maximum(1.0, tf.add_n(sharded_den))) + else: + mean_loss = tf.reduce_mean(all_shards) + + losses[loss_name] = mean_loss + return losses diff --git a/tensor2tensor/utils/trainer_utils.py b/tensor2tensor/utils/trainer_utils.py index b875f7ca8..ace2f0b4e 100644 --- a/tensor2tensor/utils/trainer_utils.py +++ b/tensor2tensor/utils/trainer_utils.py @@ -29,6 +29,7 @@ from tensor2tensor.utils import data_reader from tensor2tensor.utils import decoding from tensor2tensor.utils import devices +from tensor2tensor.utils import flags # pylint: disable=unused-import from tensor2tensor.utils import input_fn_builder from tensor2tensor.utils import model_builder from tensor2tensor.utils import registry @@ -38,95 +39,7 @@ from tensorflow.core.protobuf import rewriter_config_pb2 from tensorflow.python import debug -flags = tf.flags -FLAGS = flags.FLAGS - -flags.DEFINE_bool("registry_help", False, - "If True, logs the contents of the registry and exits.") -flags.DEFINE_bool("tfdbg", False, - "If True, use the TF debugger CLI on train/eval.") -flags.DEFINE_bool("export_saved_model", False, - "Whether to export a SavedModel for serving.") -flags.DEFINE_bool("dbgprofile", False, - "If True, record the timeline for chrome://tracing/.") -flags.DEFINE_string("model", "", "Which model to use.") -flags.DEFINE_string("hparams_set", "", "Which parameters to use.") -flags.DEFINE_string("hparams_range", "", "Parameters range.") -flags.DEFINE_string( - "hparams", "", - """A comma-separated list of `name=value` hyperparameter values. This flag - is used to override hyperparameter settings either when manually selecting - hyperparameters or when using Vizier. If a hyperparameter setting is - specified by this flag then it must be a valid hyperparameter name for the - model.""") -flags.DEFINE_string("problems", "", "Dash separated list of problems to " - "solve.") - - -# data_dir is a common flag name - catch conflicts and define it once. -try: - flags.DEFINE_string("data_dir", None, "Directory with training data.") -except: # pylint: disable=bare-except - pass - -flags.DEFINE_integer("train_steps", 250000, - "The number of steps to run training for.") -flags.DEFINE_string("eval_early_stopping_metric", "loss", - "If --schedule=train_and_evaluate and " - "--eval_early_stopping_steps is not None, then stop when " - "--eval_early_stopping_metric has not decreased for " - "--eval_early_stopping_steps") -flags.DEFINE_integer("eval_early_stopping_steps", None, - "If --schedule=train_and_evaluate and " - "--eval_early_stopping_steps is not None, then stop when " - "--eval_early_stopping_metric has not decreased for " - "--eval_early_stopping_steps") -flags.DEFINE_bool("eval_early_stopping_metric_minimize", True, - "Whether to check for the early stopping metric going down " - "or up.") -flags.DEFINE_bool("eval_run_autoregressive", False, - "Run eval autoregressively where we condition on previous" - "generated output instead of the actual target.") -flags.DEFINE_bool("eval_use_test_set", False, - "Whether to use the '-test' data for EVAL (and PREDICT).") -flags.DEFINE_integer("keep_checkpoint_max", 20, - "How many recent checkpoints to keep.") -flags.DEFINE_bool("experimental_optimize_placement", False, - "Optimize ops placement with experimental session options.") -flags.DEFINE_integer("keep_checkpoint_every_n_hours", 10000, - "Number of hours between each checkpoint to be saved. " - "The default value 10,000 hours effectively disables it.") -flags.DEFINE_integer("save_checkpoints_secs", 0, - "Save checkpoints every this many seconds. " - "Default=0 means let tensorflow.contrib.learn.python.learn" - " decide, which is currently set to 600 = 10 minutes.") -flags.DEFINE_bool("log_device_placement", False, - "Whether to log device placement.") - -# Distributed training flags -flags.DEFINE_integer("local_eval_frequency", 2000, - "Run evaluation every this steps during local training.") -flags.DEFINE_bool("locally_shard_to_cpu", False, - "Use CPU as a sharding device running locally. This allows " - "to test sharded model construction on a machine with 1 GPU.") -flags.DEFINE_bool("sync", False, "Sync compute on PS.") -flags.DEFINE_string("worker_job", "/job:localhost", "name of worker job") -flags.DEFINE_integer("worker_gpu", 1, "How many GPUs to use.") -flags.DEFINE_integer("worker_replicas", 1, "How many workers to use.") -flags.DEFINE_integer("worker_id", 0, "Which worker task are we.") -flags.DEFINE_float("worker_gpu_memory_fraction", 0.95, - "Fraction of GPU memory to allocate.") -flags.DEFINE_integer("ps_gpu", 0, "How many GPUs to use per ps.") -flags.DEFINE_string("gpu_order", "", "Optional order for daisy-chaining gpus." - " e.g. \"1 3 2 4\"") -flags.DEFINE_string("ps_job", "/job:ps", "name of ps job") -flags.DEFINE_integer("ps_replicas", 0, "How many ps replicas.") - -# Decoding flags -flags.DEFINE_string( - "decode_hparams", "", - "Comma-separated list of name=value pairs to control decode behavior. " - "See decoding.decode_hparams for defaults.") +FLAGS = tf.flags.FLAGS def make_experiment_fn(data_dir, model_name, train_steps, eval_steps): @@ -263,19 +176,13 @@ def log_registry(): sys.exit(0) +# TODO(rsepassi): Rm after trainer merge - duplicated in tpu_trainer_lib def add_problem_hparams(hparams, problems): """Add problem hparams for the problems.""" hparams.problems = [] hparams.problem_instances = [] for problem_name in problems.split("-"): - try: - problem = registry.problem(problem_name) - except LookupError: - all_problem_names = sorted(registry.list_problems()) - error_lines = ["%s not in the set of supported problems:" % problem_name - ] + all_problem_names - error_msg = "\n * ".join(error_lines) - raise LookupError(error_msg) + problem = registry.problem(problem_name) p_hparams = problem.get_hparams(hparams) hparams.problem_instances.append(problem) From d6951c1d21973e92b17f49548e25f736fbfc6506 Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Thu, 14 Dec 2017 16:11:11 -0800 Subject: [PATCH 0662/4095] Update t2t_decoder to depend on TPU (eventually only) codepath PiperOrigin-RevId: 179113523 --- tensor2tensor/bin/t2t-decoder | 67 +++++++++++++----------- tensor2tensor/bin/t2t-tpu-trainer | 6 ++- tensor2tensor/bin/t2t_decoder.py | 67 +++++++++++++----------- tensor2tensor/data_generators/problem.py | 21 ++++++-- tensor2tensor/tpu/tpu_trainer.py | 6 ++- tensor2tensor/tpu/tpu_trainer_lib.py | 3 +- tensor2tensor/utils/decoding.py | 36 +++++++------ tensor2tensor/utils/t2t_model.py | 3 ++ 8 files changed, 122 insertions(+), 87 deletions(-) diff --git a/tensor2tensor/bin/t2t-decoder b/tensor2tensor/bin/t2t-decoder index 712cb45ce..de8bc7d50 100644 --- a/tensor2tensor/bin/t2t-decoder +++ b/tensor2tensor/bin/t2t-decoder @@ -37,8 +37,9 @@ import os # Dependency imports +from tensor2tensor.tpu import tpu_trainer +from tensor2tensor.tpu import tpu_trainer_lib from tensor2tensor.utils import decoding -from tensor2tensor.utils import trainer_utils from tensor2tensor.utils import usr_dir import tensorflow as tf @@ -46,7 +47,7 @@ import tensorflow as tf flags = tf.flags FLAGS = flags.FLAGS -flags.DEFINE_string("output_dir", "", "Training directory to load from.") +# Additional flags in tpu/tpu_trainer.py and utils/flags.py flags.DEFINE_string("decode_from_file", None, "Path to the source file for decoding") flags.DEFINE_string("decode_to_file", None, @@ -54,51 +55,55 @@ flags.DEFINE_string("decode_to_file", None, flags.DEFINE_bool("decode_interactive", False, "Interactive local inference mode.") flags.DEFINE_integer("decode_shards", 1, "Number of decoding replicas.") -flags.DEFINE_string("t2t_usr_dir", "", - "Path to a Python module that will be imported. The " - "__init__.py file should include the necessary imports. " - "The imported files should contain registrations, " - "e.g. @registry.register_model calls, that will then be " - "available to the t2t-decoder.") -flags.DEFINE_string("master", "", "Address of TensorFlow master.") -flags.DEFINE_string("schedule", "train_and_evaluate", - "Must be train_and_evaluate for decoding.") -def main(_): - tf.logging.set_verbosity(tf.logging.INFO) - usr_dir.import_usr_dir(FLAGS.t2t_usr_dir) - trainer_utils.log_registry() - trainer_utils.validate_flags() - assert FLAGS.schedule == "train_and_evaluate" - data_dir = os.path.expanduser(FLAGS.data_dir) - output_dir = os.path.expanduser(FLAGS.output_dir) - - hparams = trainer_utils.create_hparams( - FLAGS.hparams_set, data_dir, passed_hparams=FLAGS.hparams) - trainer_utils.add_problem_hparams(hparams, FLAGS.problems) - estimator, _ = trainer_utils.create_experiment_components( - data_dir=data_dir, - model_name=FLAGS.model, - hparams=hparams, - run_config=trainer_utils.create_run_config(output_dir)) +def create_hparams(): + hparams = tpu_trainer.create_hparams() + hparams.add_hparam("data_dir", os.path.expanduser(FLAGS.data_dir)) + tpu_trainer_lib.add_problem_hparams(hparams, FLAGS.problems) + return hparams + +def create_decode_hparams(): decode_hp = decoding.decode_hparams(FLAGS.decode_hparams) decode_hp.add_hparam("shards", FLAGS.decode_shards) decode_hp.add_hparam("shard_id", FLAGS.worker_id) + return decode_hp + + +def decode(estimator, hparams, decode_hp): if FLAGS.decode_interactive: - decoding.decode_interactively(estimator, decode_hp) + decoding.decode_interactively(estimator, hparams, decode_hp) elif FLAGS.decode_from_file: - decoding.decode_from_file(estimator, FLAGS.decode_from_file, decode_hp, - FLAGS.decode_to_file) + decoding.decode_from_file(estimator, FLAGS.decode_from_file, hparams, + decode_hp, FLAGS.decode_to_file) else: decoding.decode_from_dataset( estimator, FLAGS.problems.split("-"), + hparams, decode_hp, decode_to_file=FLAGS.decode_to_file, dataset_split="test" if FLAGS.eval_use_test_set else None) +def main(_): + tf.logging.set_verbosity(tf.logging.INFO) + usr_dir.import_usr_dir(FLAGS.t2t_usr_dir) + FLAGS.use_tpu = False + + hp = create_hparams() + decode_hp = create_decode_hparams() + + estimator = tpu_trainer_lib.create_estimator( + FLAGS.model, + hp, + tpu_trainer.create_run_config(), + decode_hparams=decode_hp, + use_tpu=False) + + decode(estimator, hp, decode_hp) + + if __name__ == "__main__": tf.app.run() diff --git a/tensor2tensor/bin/t2t-tpu-trainer b/tensor2tensor/bin/t2t-tpu-trainer index 41465b030..d09022710 100644 --- a/tensor2tensor/bin/t2t-tpu-trainer +++ b/tensor2tensor/bin/t2t-tpu-trainer @@ -19,6 +19,8 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import os + # Dependency imports from tensor2tensor import models # pylint: disable=unused-import @@ -74,7 +76,7 @@ def create_experiment_fn(): return lib.create_experiment_fn( FLAGS.model, get_problem_name(), - FLAGS.data_dir, + os.path.expanduser(FLAGS.data_dir), FLAGS.train_steps, FLAGS.eval_steps, FLAGS.local_eval_frequency, @@ -84,7 +86,7 @@ def create_experiment_fn(): def create_run_config(): return lib.create_run_config( - model_dir=FLAGS.output_dir, + model_dir=os.path.expanduser(FLAGS.output_dir), master=FLAGS.master, iterations_per_loop=FLAGS.iterations_per_loop, num_shards=FLAGS.tpu_num_shards, diff --git a/tensor2tensor/bin/t2t_decoder.py b/tensor2tensor/bin/t2t_decoder.py index 16da8567d..b98797610 100644 --- a/tensor2tensor/bin/t2t_decoder.py +++ b/tensor2tensor/bin/t2t_decoder.py @@ -36,8 +36,9 @@ # Dependency imports +from tensor2tensor.tpu import tpu_trainer +from tensor2tensor.tpu import tpu_trainer_lib from tensor2tensor.utils import decoding -from tensor2tensor.utils import trainer_utils from tensor2tensor.utils import usr_dir import tensorflow as tf @@ -45,7 +46,7 @@ flags = tf.flags FLAGS = flags.FLAGS -flags.DEFINE_string("output_dir", "", "Training directory to load from.") +# Additional flags in tpu/tpu_trainer.py and utils/flags.py flags.DEFINE_string("decode_from_file", None, "Path to the source file for decoding") flags.DEFINE_string("decode_to_file", None, @@ -53,51 +54,55 @@ flags.DEFINE_bool("decode_interactive", False, "Interactive local inference mode.") flags.DEFINE_integer("decode_shards", 1, "Number of decoding replicas.") -flags.DEFINE_string("t2t_usr_dir", "", - "Path to a Python module that will be imported. The " - "__init__.py file should include the necessary imports. " - "The imported files should contain registrations, " - "e.g. @registry.register_model calls, that will then be " - "available to the t2t-decoder.") -flags.DEFINE_string("master", "", "Address of TensorFlow master.") -flags.DEFINE_string("schedule", "train_and_evaluate", - "Must be train_and_evaluate for decoding.") -def main(_): - tf.logging.set_verbosity(tf.logging.INFO) - usr_dir.import_usr_dir(FLAGS.t2t_usr_dir) - trainer_utils.log_registry() - trainer_utils.validate_flags() - assert FLAGS.schedule == "train_and_evaluate" - data_dir = os.path.expanduser(FLAGS.data_dir) - output_dir = os.path.expanduser(FLAGS.output_dir) - - hparams = trainer_utils.create_hparams( - FLAGS.hparams_set, data_dir, passed_hparams=FLAGS.hparams) - trainer_utils.add_problem_hparams(hparams, FLAGS.problems) - estimator, _ = trainer_utils.create_experiment_components( - data_dir=data_dir, - model_name=FLAGS.model, - hparams=hparams, - run_config=trainer_utils.create_run_config(output_dir)) +def create_hparams(): + hparams = tpu_trainer.create_hparams() + hparams.add_hparam("data_dir", os.path.expanduser(FLAGS.data_dir)) + tpu_trainer_lib.add_problem_hparams(hparams, FLAGS.problems) + return hparams + +def create_decode_hparams(): decode_hp = decoding.decode_hparams(FLAGS.decode_hparams) decode_hp.add_hparam("shards", FLAGS.decode_shards) decode_hp.add_hparam("shard_id", FLAGS.worker_id) + return decode_hp + + +def decode(estimator, hparams, decode_hp): if FLAGS.decode_interactive: - decoding.decode_interactively(estimator, decode_hp) + decoding.decode_interactively(estimator, hparams, decode_hp) elif FLAGS.decode_from_file: - decoding.decode_from_file(estimator, FLAGS.decode_from_file, decode_hp, - FLAGS.decode_to_file) + decoding.decode_from_file(estimator, FLAGS.decode_from_file, hparams, + decode_hp, FLAGS.decode_to_file) else: decoding.decode_from_dataset( estimator, FLAGS.problems.split("-"), + hparams, decode_hp, decode_to_file=FLAGS.decode_to_file, dataset_split="test" if FLAGS.eval_use_test_set else None) +def main(_): + tf.logging.set_verbosity(tf.logging.INFO) + usr_dir.import_usr_dir(FLAGS.t2t_usr_dir) + FLAGS.use_tpu = False + + hp = create_hparams() + decode_hp = create_decode_hparams() + + estimator = tpu_trainer_lib.create_estimator( + FLAGS.model, + hp, + tpu_trainer.create_run_config(), + decode_hparams=decode_hp, + use_tpu=False) + + decode(estimator, hp, decode_hp) + + if __name__ == "__main__": tf.app.run() diff --git a/tensor2tensor/data_generators/problem.py b/tensor2tensor/data_generators/problem.py index 73414ee40..b4021e9c7 100644 --- a/tensor2tensor/data_generators/problem.py +++ b/tensor2tensor/data_generators/problem.py @@ -479,15 +479,17 @@ def feature_info(self): self._feature_info = features return features - def make_estimator_input_fn(self, mode, hparams): + def make_estimator_input_fn(self, mode, hparams, dataset_kwargs=None): """Return input_fn wrapped for Estimator.""" def estimator_input_fn(params, config): - return self.input_fn(mode, hparams, params=params, config=config) + return self.input_fn(mode, hparams, params=params, config=config, + dataset_kwargs=dataset_kwargs) return estimator_input_fn - def input_fn(self, mode, hparams, params=None, config=None): + def input_fn(self, mode, hparams, params=None, config=None, + dataset_kwargs=None): """Builds input pipeline for problem. Args: @@ -495,6 +497,8 @@ def input_fn(self, mode, hparams, params=None, config=None): hparams: HParams, model hparams params: dict, may include "batch_size" config: RunConfig; if passed, should include t2t_device_info dict + dataset_kwargs: dict, if passed, will pass as kwargs to self.dataset + method when called Returns: (features_dict<str name, Tensor feature>, Tensor targets) @@ -543,8 +547,15 @@ def define_shapes(example): # Read and preprocess data_dir = hparams.data_dir - dataset = self.dataset( - mode=mode, data_dir=data_dir, num_threads=num_threads, hparams=hparams) + + dataset_kwargs = dataset_kwargs or {} + dataset_kwargs.update({ + "mode": mode, + "data_dir": data_dir, + "num_threads": num_threads, + "hparams": hparams}) + + dataset = self.dataset(**dataset_kwargs) dataset = dataset.map( data_reader.cast_int64_to_int32, num_parallel_calls=num_threads) if is_training: diff --git a/tensor2tensor/tpu/tpu_trainer.py b/tensor2tensor/tpu/tpu_trainer.py index 9f45bbe75..5eafd4590 100644 --- a/tensor2tensor/tpu/tpu_trainer.py +++ b/tensor2tensor/tpu/tpu_trainer.py @@ -18,6 +18,8 @@ from __future__ import division from __future__ import print_function +import os + # Dependency imports from tensor2tensor import models # pylint: disable=unused-import @@ -73,7 +75,7 @@ def create_experiment_fn(): return lib.create_experiment_fn( FLAGS.model, get_problem_name(), - FLAGS.data_dir, + os.path.expanduser(FLAGS.data_dir), FLAGS.train_steps, FLAGS.eval_steps, FLAGS.local_eval_frequency, @@ -83,7 +85,7 @@ def create_experiment_fn(): def create_run_config(): return lib.create_run_config( - model_dir=FLAGS.output_dir, + model_dir=os.path.expanduser(FLAGS.output_dir), master=FLAGS.master, iterations_per_loop=FLAGS.iterations_per_loop, num_shards=FLAGS.tpu_num_shards, diff --git a/tensor2tensor/tpu/tpu_trainer_lib.py b/tensor2tensor/tpu/tpu_trainer_lib.py index 5793345af..ff433dba7 100644 --- a/tensor2tensor/tpu/tpu_trainer_lib.py +++ b/tensor2tensor/tpu/tpu_trainer_lib.py @@ -79,10 +79,11 @@ def create_run_config(master="", def create_estimator(model_name, hparams, run_config, + decode_hparams=None, schedule="train_and_evaluate", use_tpu=True): model_fn = t2t_model.T2TModel.make_estimator_model_fn( - model_name, hparams, use_tpu=use_tpu) + model_name, hparams, decode_hparams=decode_hparams, use_tpu=use_tpu) if use_tpu: batch_size = hparams.tpu_batch_size_per_shard diff --git a/tensor2tensor/utils/decoding.py b/tensor2tensor/utils/decoding.py index d0913e0e1..2e71abe40 100644 --- a/tensor2tensor/utils/decoding.py +++ b/tensor2tensor/utils/decoding.py @@ -29,7 +29,6 @@ from six.moves import input # pylint: disable=redefined-builtin from tensor2tensor.data_generators import text_encoder -from tensor2tensor.utils import devices from tensor2tensor.utils import input_fn_builder import tensorflow as tf @@ -99,26 +98,31 @@ def log_decode_results(inputs, def decode_from_dataset(estimator, problem_names, + hparams, decode_hp, decode_to_file=None, dataset_split=None): + """Perform decoding from dataset.""" tf.logging.info("Performing local inference from dataset for %s.", str(problem_names)) - hparams = estimator.params # We assume that worker_id corresponds to shard number. shard = decode_hp.shard_id if decode_hp.shards > 1 else None + # If decode_hp.batch_size is specified, use a fixed batch size + if decode_hp.batch_size: + hparams.batch_size = decode_hp.batch_size + hparams.use_fixed_batch_size = True + + dataset_kwargs = { + "shard": shard, + "dataset_split": dataset_split, + } + for problem_idx, problem_name in enumerate(problem_names): # Build the inference input function - infer_input_fn = input_fn_builder.build_input_fn( - mode=tf.estimator.ModeKeys.PREDICT, - hparams=hparams, - data_dir=hparams.data_dir, - num_datashards=devices.data_parallelism(hparams).n, - fixed_problem=problem_idx, - batch_size=decode_hp.batch_size, - dataset_split=dataset_split, - shard=shard) + problem = hparams.problem_instances[problem_idx] + infer_input_fn = problem.make_estimator_input_fn( + tf.estimator.ModeKeys.PREDICT, hparams, dataset_kwargs=dataset_kwargs) # Get the predictions as an iterable predictions = estimator.predict(infer_input_fn) @@ -200,14 +204,17 @@ def decode_from_dataset(estimator, tf.logging.info("Completed inference on %d samples." % num_predictions) # pylint: disable=undefined-loop-variable -def decode_from_file(estimator, filename, decode_hp, decode_to_file=None): +def decode_from_file(estimator, + filename, + hparams, + decode_hp, + decode_to_file=None): """Compute predictions on entries in filename and write them out.""" if not decode_hp.batch_size: decode_hp.batch_size = 32 tf.logging.info( "decode_hp.batch_size not specified; default=%d" % decode_hp.batch_size) - hparams = estimator.params problem_id = decode_hp.problem_idx # Inputs vocabulary is set to targets if there are no inputs in the problem, # e.g., for language models where the inputs are just a prefix of targets. @@ -300,9 +307,8 @@ def input_fn(): return input_fn -def decode_interactively(estimator, decode_hp): +def decode_interactively(estimator, hparams, decode_hp): """Interactive decoding.""" - hparams = estimator.params def input_fn(): gen_fn = make_input_fn_from_generator(_interactive_input_fn(hparams)) diff --git a/tensor2tensor/utils/t2t_model.py b/tensor2tensor/utils/t2t_model.py index b06565532..f189fb413 100644 --- a/tensor2tensor/utils/t2t_model.py +++ b/tensor2tensor/utils/t2t_model.py @@ -921,6 +921,9 @@ def fn_with_timing(*args, **kwargs): def _create_dummy_vars(): """Dummy vars for restore to work when not using TPU codepath.""" + var_names = set([v.name for v in tf.global_variables()]) + if "losses_avg/problem_0/total_loss:0" in var_names: + return with tf.variable_scope("losses_avg"): with tf.variable_scope("problem_0"): for var_name in ["total", "extra", "training"]: From 76b0f51b08f56028ff5392f2e7e6067bb5656494 Mon Sep 17 00:00:00 2001 From: Lukasz Kaiser <lukaszkaiser@google.com> Date: Fri, 15 Dec 2017 12:00:12 -0800 Subject: [PATCH 0663/4095] Add straight-through pass in vq-vae, other small Transformer VAE improvements. PiperOrigin-RevId: 179222668 --- tensor2tensor/models/transformer_vae.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/tensor2tensor/models/transformer_vae.py b/tensor2tensor/models/transformer_vae.py index 989e362d1..22cede293 100644 --- a/tensor2tensor/models/transformer_vae.py +++ b/tensor2tensor/models/transformer_vae.py @@ -253,7 +253,7 @@ def embed(x): means = tf.get_variable(name="means", shape=[hparams.v_size, hparams.hidden_size]) x_means_hot, x_means, l = kmeans(x, means, hparams, name="vq-vae-kmeans") - h1 = x_means + h1 = tf.stop_gradient(x_means) + x - tf.stop_gradient(x) c = tf.argmax(x_means_hot, axis=-1) h2 = tf.layers.dense(tf.nn.relu(h1), filter_size, name="vch2") res = tf.layers.dense(tf.nn.relu(h2), hparams.hidden_size, name="vcfin") @@ -313,7 +313,10 @@ def decode_transformer(encoder_output, def multinomial_sample(x, vocab_size, temperature): """Multinomial sampling from a n-dimensional tensor.""" - samples = tf.multinomial(tf.reshape(x, [-1, vocab_size]) / temperature, 1) + if temperature > 0: + samples = tf.multinomial(tf.reshape(x, [-1, vocab_size]) / temperature, 1) + else: + samples = tf.argmax(x, axis=-1) reshaped_samples = tf.reshape(samples, common_layers.shape_list(x)[:-1]) return tf.to_int32(reshaped_samples) @@ -446,8 +449,9 @@ def bn_inputs(): if hparams.do_mask and hparams.do_refine: def refine_res(): return residual_conv(res, 1, (5, 1), hparams, "refine") - all_masked = tf.less(tf.reduce_sum(mask), 0.1) - res = tf.cond(all_masked, refine_res, lambda: res) + masked_batches = tf.reduce_sum(mask, axis=[1, 2, 3]) + all_masked = tf.less(masked_batches, 0.1) + res = tf.where(all_masked, refine_res(), res) latent_time = tf.less(200000, tf.to_int32(tf.train.get_global_step())) losses["latent_pred"] *= tf.to_float(latent_time) losses["extra"] *= 1.0 - tf.to_float(latent_time) @@ -575,7 +579,7 @@ def transformer_ae_cifar(): hparams.filter_size = 512 hparams.batch_size = 1024 * 4 hparams.num_compress_steps = 2 - hparams.v_size = 1024 * 16 + hparams.v_size = 1024 * 64 hparams.kl_warmup_steps = 150000 hparams.startup_steps = 20000 hparams.kmeans_lr_factor = 0.0 From 48a3ca72a38b41f82e6a1277d014c6bffc2fb2cb Mon Sep 17 00:00:00 2001 From: T2T Team <no-reply@google.com> Date: Mon, 18 Dec 2017 11:00:00 -0800 Subject: [PATCH 0664/4095] Fix saving T2T flags, now that they live in a separate module. PiperOrigin-RevId: 179444338 --- tensor2tensor/utils/trainer_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensor2tensor/utils/trainer_utils.py b/tensor2tensor/utils/trainer_utils.py index ace2f0b4e..a62a66321 100644 --- a/tensor2tensor/utils/trainer_utils.py +++ b/tensor2tensor/utils/trainer_utils.py @@ -197,7 +197,7 @@ def save_metadata(output_dir, hparams): t2t_flags_str = "\n".join([ "--%s=%s" % (f.name, f.value) for f in FLAGS.flags_by_module_dict()[ - "tensor2tensor.utils.trainer_utils"] + "tensor2tensor.utils.flags"] ]) else: flags_dict = FLAGS.__dict__["__flags"] From 69e4b36379b69410b7c5daa7ad979c3003f49b01 Mon Sep 17 00:00:00 2001 From: Ryan Sepassi <rsepassi@google.com> Date: Mon, 18 Dec 2017 11:18:29 -0800 Subject: [PATCH 0665/4095] Move decode_hparams into ctor PiperOrigin-RevId: 179447106 --- tensor2tensor/utils/t2t_model.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tensor2tensor/utils/t2t_model.py b/tensor2tensor/utils/t2t_model.py index f189fb413..e473a6e3b 100644 --- a/tensor2tensor/utils/t2t_model.py +++ b/tensor2tensor/utils/t2t_model.py @@ -766,13 +766,14 @@ def estimator_model_fn(cls, data_parallelism = ( None if hparams.no_data_parallelism else _create_data_parallelism( use_tpu=use_tpu, **config.t2t_device_info)) - model = cls(hparams, mode, data_parallelism=data_parallelism) + model = cls(hparams, mode, data_parallelism=data_parallelism, + decode_hparams=decode_hparams) # PREDICT mode if mode == tf.estimator.ModeKeys.PREDICT: assert not use_tpu assert decode_hparams is not None - return model.estimator_spec_predict(features, decode_hparams) + return model.estimator_spec_predict(features) # TRAIN and EVAL modes if hparams.eval_run_autoregressive and mode == tf.estimator.ModeKeys.EVAL: @@ -846,8 +847,9 @@ def estimator_spec_eval(self, eval_metric_ops=eval_metrics, loss=loss) - def estimator_spec_predict(self, features, decode_hparams): + def estimator_spec_predict(self, features): """Construct EstimatorSpec for PREDICT mode.""" + decode_hparams = self._decode_hparams infer_out = self.infer( features, beam_size=decode_hparams.beam_size, From ae62ed639267d033dcbea3cf345677bd82a7c5d1 Mon Sep 17 00:00:00 2001 From: T2T Team <no-reply@google.com> Date: Mon, 18 Dec 2017 17:07:06 -0800 Subject: [PATCH 0666/4095] Remove the extra kl loss term from the VQ-VAE loss. PiperOrigin-RevId: 179490021 --- tensor2tensor/models/transformer_vae.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/tensor2tensor/models/transformer_vae.py b/tensor2tensor/models/transformer_vae.py index 22cede293..5b540190a 100644 --- a/tensor2tensor/models/transformer_vae.py +++ b/tensor2tensor/models/transformer_vae.py @@ -163,11 +163,9 @@ def kmeans(x, means, hparams, name): with tf.variable_scope(name): x_means_hot = nearest(x, means, hparams) x_means = tf.gather(means, tf.argmax(x_means_hot, axis=-1)) - x_flat = tf.reshape(x, [-1, hparams.hidden_size]) - kl = tf.reduce_mean(tf.reduce_sum(tf.square(x_flat - x_means), axis=-1)) reg_loss1 = tf.nn.l2_loss((tf.stop_gradient(x) - x_means)) reg_loss2 = hparams.beta * tf.nn.l2_loss((x - tf.stop_gradient(x_means))) - l = kl + reg_loss1 + reg_loss2 + l = reg_loss1 + reg_loss2 return x_means_hot, x_means, l @@ -208,6 +206,8 @@ def embed(x): means = tf.get_variable(name="means", shape=[hparams.v_size, hparams.hidden_size]) h1 = tf.gather(means, x) + elif hparams.bottleneck_kind == "rounding": + h1 = tf.round(x) h2 = tf.layers.dense(tf.nn.relu(h1), filter_size, name="vch2") return tf.layers.dense(tf.nn.relu(h2), hparams.hidden_size, name="vcfin") @@ -255,6 +255,9 @@ def embed(x): x_means_hot, x_means, l = kmeans(x, means, hparams, name="vq-vae-kmeans") h1 = tf.stop_gradient(x_means) + x - tf.stop_gradient(x) c = tf.argmax(x_means_hot, axis=-1) + if hparams.bottleneck_kind == "round": + c = tf.round(x) + h1 = x + tf.stop_gradient(tf.round(x) - x) h2 = tf.layers.dense(tf.nn.relu(h1), filter_size, name="vch2") res = tf.layers.dense(tf.nn.relu(h2), hparams.hidden_size, name="vcfin") return res, c, l, embed From 254bdfce1b3b6269e2ffd16aca6340c7a6207639 Mon Sep 17 00:00:00 2001 From: Niki Parmar <nikip@google.com> Date: Mon, 18 Dec 2017 19:14:31 -0800 Subject: [PATCH 0667/4095] Move attention util functions PiperOrigin-RevId: 179499133 --- tensor2tensor/visualization/attention.py | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/tensor2tensor/visualization/attention.py b/tensor2tensor/visualization/attention.py index 6109f9cc6..0de7d7165 100644 --- a/tensor2tensor/visualization/attention.py +++ b/tensor2tensor/visualization/attention.py @@ -45,8 +45,10 @@ def show(inp_text, out_text, enc_atts, dec_atts, encdec_atts): + enc_att, dec_att, encdec_att = (resize(enc_atts), + resize(dec_atts), resize(encdec_atts)) attention = _get_attention( - inp_text, out_text, enc_atts, dec_atts, encdec_atts) + inp_text, out_text, enc_att, dec_att, encdec_att) att_json = json.dumps(attention) _show_attention(att_json) @@ -57,6 +59,23 @@ def _show_attention(att_json): display.display(display.Javascript(vis_js)) +def resize(att_mat, max_length=30): + """Normalize attention matrices and reshape as necessary.""" + layer_mats = [] + for att in att_mat: + # Sum across different heads. + att = att[ :, :max_length, :max_length] + row_sums = np.sum(att, axis=0) + # Normalize + layer_mat = att / row_sums[np.newaxis, :] + lsh = layer_mat.shape + # Add extra batch dim for viz code to work. + if len(np.shape(lsh)) == 3: + layer_mat = np.reshape(layer_mat, (1, lsh[0], lsh[1], lsh[2])) + layer_mats.append(layer_mat) + return layer_mats + + def _get_attention(inp_text, out_text, enc_atts, dec_atts, encdec_atts): """Compute representation of the attention ready for the d3 visualization. From 474545a392334cf5a8213970e178066d74c27e11 Mon Sep 17 00:00:00 2001 From: T2T Team <no-reply@google.com> Date: Mon, 18 Dec 2017 20:36:06 -0800 Subject: [PATCH 0668/4095] Add a minimal webserver and client side code to interactively query a tensor2tensor model PiperOrigin-RevId: 179505632 --- tensor2tensor/insights/__init__.py | 15 + tensor2tensor/insights/graph.py | 155 ++++ tensor2tensor/insights/index.html | 73 ++ .../attention-visualization.html | 130 +++ .../attention-visualization.js | 312 +++++++ .../insights/polymer/common-types.js | 163 ++++ .../polymer/explore_view/explore-view.html | 154 ++++ .../polymer/explore_view/explore-view.js | 205 +++++ .../graph-visualization.html | 186 ++++ .../graph-visualization.js | 822 ++++++++++++++++++ tensor2tensor/insights/polymer/index.html | 17 + .../polymer/insights_app/insights-app.html | 142 +++ .../polymer/insights_app/insights-app.js | 72 ++ .../language-selector-content.html | 62 ++ .../language-selector-content.js | 237 +++++ .../language_selector/language-selector.html | 42 + .../language_selector/language-selector.js | 82 ++ .../processing-visualization.html | 85 ++ .../processing-visualization.js | 49 ++ .../polymer/query_card/query-card.html | 93 ++ .../insights/polymer/query_card/query-card.js | 330 +++++++ .../translation-result.html | 90 ++ .../translation_result/translation-result.js | 111 +++ tensor2tensor/insights/query_processor.py | 43 + tensor2tensor/insights/server.py | 180 ++++ tensor2tensor/insights/transformer_model.py | 300 +++++++ 26 files changed, 4150 insertions(+) create mode 100644 tensor2tensor/insights/__init__.py create mode 100644 tensor2tensor/insights/graph.py create mode 100644 tensor2tensor/insights/index.html create mode 100644 tensor2tensor/insights/polymer/attention_visualization/attention-visualization.html create mode 100644 tensor2tensor/insights/polymer/attention_visualization/attention-visualization.js create mode 100644 tensor2tensor/insights/polymer/common-types.js create mode 100644 tensor2tensor/insights/polymer/explore_view/explore-view.html create mode 100644 tensor2tensor/insights/polymer/explore_view/explore-view.js create mode 100644 tensor2tensor/insights/polymer/graph_visualization/graph-visualization.html create mode 100644 tensor2tensor/insights/polymer/graph_visualization/graph-visualization.js create mode 100644 tensor2tensor/insights/polymer/index.html create mode 100644 tensor2tensor/insights/polymer/insights_app/insights-app.html create mode 100644 tensor2tensor/insights/polymer/insights_app/insights-app.js create mode 100644 tensor2tensor/insights/polymer/language_selector/language-selector-content.html create mode 100644 tensor2tensor/insights/polymer/language_selector/language-selector-content.js create mode 100644 tensor2tensor/insights/polymer/language_selector/language-selector.html create mode 100644 tensor2tensor/insights/polymer/language_selector/language-selector.js create mode 100644 tensor2tensor/insights/polymer/processing_visualization/processing-visualization.html create mode 100644 tensor2tensor/insights/polymer/processing_visualization/processing-visualization.js create mode 100644 tensor2tensor/insights/polymer/query_card/query-card.html create mode 100644 tensor2tensor/insights/polymer/query_card/query-card.js create mode 100644 tensor2tensor/insights/polymer/translation_result/translation-result.html create mode 100644 tensor2tensor/insights/polymer/translation_result/translation-result.js create mode 100644 tensor2tensor/insights/query_processor.py create mode 100644 tensor2tensor/insights/server.py create mode 100644 tensor2tensor/insights/transformer_model.py diff --git a/tensor2tensor/insights/__init__.py b/tensor2tensor/insights/__init__.py new file mode 100644 index 000000000..3f714ce1f --- /dev/null +++ b/tensor2tensor/insights/__init__.py @@ -0,0 +1,15 @@ +# coding=utf-8 +# Copyright 2017 The Tensor2Tensor Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + diff --git a/tensor2tensor/insights/graph.py b/tensor2tensor/insights/graph.py new file mode 100644 index 000000000..a733998b8 --- /dev/null +++ b/tensor2tensor/insights/graph.py @@ -0,0 +1,155 @@ +# coding=utf-8 +# Copyright 2017 The Tensor2Tensor Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Graph representation for building decoding graph visualizations.""" + + +class Vertex(object): + """Vertex stores in and out edge connections to other Vertex instances. + + The Vertex class supports serialization to a JSON data format expected by the + client side representation. When serializing, it generates the following + fields: + in_edge_index: The list of directed edge indices into the Vertex. + out_edge_index: The list of directed edge indices from the Vertex. + """ + + def __init__(self, idx): + """Initialize the Vertex. + + Args: + idx: The index of the vertex. + """ + self.idx = idx + self.in_edges = [] + self.out_edges = [] + + def to_dict(self): + """Returns a simplified dictionary representing the Vertex. + + Returns: + A dictionary that can easily be serialized to JSON. + """ + return { + "in_edge_index": self.in_edges, + "out_edge_index": self.out_edges, + } + + +class Edge(object): + """Edge stores edge details connecting two Vertex instances. + + The Edge class supports serialization to a JSON data format expected by the + client side representation. When serializing, it generates the following + fields: + source_index: The source Vertex index for this Edge. + target_index: The target Vertex index for this Edge. + data: Arbitrary data for this Edge. + """ + + def __init__(self, idx): + """Initialize the Edge. + + Args: + idx: The index of the Edge. + """ + self.idx = idx + self.source = -1 + self.target = -1 + self.data = {} + + def to_dict(self): + """Returns a simplified dictionary representing the Vertex. + + Returns: + A dictionary that can easily be serialized to JSON. + """ + return { + "source_index": self.source, + "target_index": self.target, + "data": self.data, + } + + def __str__(self): + return str(self.to_dict()) + + +class Graph(object): + """A directed graph that can easily be JSON serialized for visualization. + + When serializing, it generates the following fields: + edge: The list of all serialized Edge instances. + node: The list of all serialized Vertex instances. + """ + + def __init__(self): + self.vertices = [] + self.edges = [] + self.vertex_map = {} + + def new_vertex(self): + """Creates and returns a new vertex. + + Returns: + A new Vertex instance with a unique index. + """ + vertex = Vertex(len(self.vertices)) + self.vertices.append(vertex) + return vertex + + def get_vertex(self, key): + """Returns or Creates a Vertex mapped by key. + + Args: + key: A string reference for a vertex. May refer to a new Vertex in which + case it will be created. + + Returns: + A the Vertex mapped to by key. + """ + if key in self.vertex_map: + return self.vertex_map[key] + vertex = self.new_vertex() + self.vertex_map[key] = vertex + return vertex + + def add_edge(self, source, target): + """Returns a new edge connecting source and target vertices. + + Args: + source: The source Vertex. + target: The target Vertex. + + Returns: + A new Edge linking source to target. + """ + edge = Edge(len(self.edges)) + self.edges.append(edge) + source.out_edges.append(edge.idx) + target.in_edges.append(edge.idx) + edge.source = source.idx + edge.target = target.idx + return edge + + def to_dict(self): + """Returns a simplified dictionary representing the Graph. + + Returns: + A dictionary that can easily be serialized to JSON. + """ + return { + "node": [v.to_dict() for v in self.vertices], + "edge": [e.to_dict() for e in self.edges] + } diff --git a/tensor2tensor/insights/index.html b/tensor2tensor/insights/index.html new file mode 100644 index 000000000..fe3f8a0b7 --- /dev/null +++ b/tensor2tensor/insights/index.html @@ -0,0 +1,73 @@ +<!doctype html> +<!-- +@license +Copyright 2017 The Tensor2Tensor Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +--> +<html> +<head> + <meta name="viewport" content="width=device-width, minimum-scale=1.0, initial-scale=1, user-scalable=no"> + <meta name="mobile-web-app-capable" content="yes"> + <meta name="apple-mobile-web-app-capable" content="yes"> + <meta name="apple-touch-fullscreen" content="yes"> + <meta name="apple-mobile-web-app-status-bar-style" + content="black-translucent" > + <meta name="format-detection" content="telephone=no"> + + <title>NMT Research Frontend + + + + + + + + + + + + + + + + + + + + + + diff --git a/tensor2tensor/insights/polymer/attention_visualization/attention-visualization.html b/tensor2tensor/insights/polymer/attention_visualization/attention-visualization.html new file mode 100644 index 000000000..4ec11ace8 --- /dev/null +++ b/tensor2tensor/insights/polymer/attention_visualization/attention-visualization.html @@ -0,0 +1,130 @@ + + + + + + + + + + + + + + + diff --git a/tensor2tensor/insights/polymer/attention_visualization/attention-visualization.js b/tensor2tensor/insights/polymer/attention_visualization/attention-visualization.js new file mode 100644 index 000000000..b58d90905 --- /dev/null +++ b/tensor2tensor/insights/polymer/attention_visualization/attention-visualization.js @@ -0,0 +1,312 @@ +/** + * @license + * Copyright 2017 The Tensor2Tensor Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +goog.module('t2t.AttentionVisualization'); + +/** + * `` presents a heatmap of input-output associations. + * + * The heat map association shows source to target word association strengths + * according to some method. + * + * ### Usage + * + * + */ +class AttentionVisualization extends Polymer.Element { + constructor() { + super(); + + /** + * D3.js DOM element. + * @private + */ + this.container_ = undefined; + /** + * @private + */ + this.margin_ = { + top: 150, + bottom: 50, + right: 10, + left: 100 + }; + /** + * D3.js DOM element. + * @private + */ + this.svg_ = undefined; + /** + * D3.js DOM element. + * @private + */ + this.vis_ = undefined; + /** + * D3.js DOM element. + * @private + */ + this.zoom_ = undefined; + } + + static get is() { + return 'attention-visualization'; + } + + static get properties() { + return { + /** + * @type {AttentionData} + */ + data: { + type: Object, + observer: 'dataUpdated_', + }, + /** + * @type {number} + */ + zoomDepth_: { + type: Number, + }, + }; + } + + static get observers() { + return [ + 'zoomDepthChanged_(zoomDepth_)', + ]; + } + + /** + * Sets the default zoom depth. + * @override + */ + ready() { + super.ready(); + this.set('zoomDepth_', 20); + } + + /** + * Sets the zoom state based on the updated depth. + * @param {number} zoomDepth the zoom depth. + * @private + */ + zoomDepthChanged_(zoomDepth) { + if (!this.container_) { return; } + + if (zoomDepth == 0) { + zoomDepth = 0.000001; + } + let transform = d3.zoomTransform(this.vis_.node()).scale(zoomDepth / 20.0); + this.container_.attr("transform", transform); + } + + /** + * Updates the heatmap. + * @param {!AttentionData} newData the new alignment data. + * @private + */ + dataUpdated_(newData) { + // Create the bounding areas and margins for the heatmap. + let cellDimension = 40; + let sourceTokens = newData.source_tokens; + let targetTokens = newData.target_tokens; + + // Convert the attention weights to cell objects which also give access to + // the row and column indices. + let mapCells = newData.weights.map(function(d, i) { + return { + value: d, + row: Math.floor(i / targetTokens.length), + col: i % targetTokens.length + }; + }); + + // Create the color scale. + let colorScale = d3.scaleQuantile().domain([0.0, 1.0]).range([ + '#cccccc', '#b2b2b2', '#999999', '#7f7f7f', + '#666666', '#4c4c4c', '#333333', '#191919' + ]); + + this.zoom_ = d3.zoom().scaleExtent([1, 10]).on('zoom', zoomed.bind(this)); + + d3.select(this.$.chart).selectAll("*").remove(); + + // Create the bounding div and svgs which will contain all details. + this.svg_ = d3.select(this.$.chart) + .append('div') + .classed('svg-container', true) + .append('svg') + .attr('width', '100%') + .attr('height', '100%') + .classed('svg-content-responsive', true); + + this.vis_ = this.svg_.append('g') + .attr('transform', + 'translate(' + this.margin_.left + ',' + this.margin_.top + ')') + .call(this.zoom_) + .on('dblclick.zoom', null) + .on('wheel.zoom', null); + + // Create a bounding rectangle upon which zooming and panning will take + // place. + this.vis_.append('rect') + .attr('width', '100%') + .attr('height', '100%') + .style('fill', 'none') + .style('pointer-events', 'all'); + + this.container_ = this.vis_.append('g'); + + // Initiate the panning and/or zooming. + function zoomed() { + this.container_.attr("transform", + d3.event.transform.scale(this.zoomDepth_ / 20.0)); + } + + // Place the source tokens along the vertical axis. Each token has an id + // based on it's index. + var sourceLabels = this.container_.append('g'); + + sourceLabels.selectAll('.source-label') + .data(sourceTokens) + .enter() + .append('text') + .text(function(d) { + return d; + }) + .style('text-anchor', 'end') + .attr( + 'id', + function(d, i) { + return 'row-' + i; + }) + .attr('class', 'source-label mono') + .attr('transform', 'translate(-6,' + cellDimension / 1.5 + ')') + .attr('x', 0) + .attr('y', function(d, i) { + return i * cellDimension; + }); + + var targetLabels = this.container_.append('g'); + + // Place the target tokens along the horizontal axis. Each token has an id + // based on it's index. + targetLabels.selectAll('.target-label') + .data(targetTokens) + .enter() + .append('text') + .text(function(d) { + return d; + }) + .style('text-anchor', 'left') + .attr( + 'id', + function(d, i) { + return 'col-' + i; + }) + .attr('class', 'target-label mono') + .attr( + 'transform', 'translate(' + cellDimension / 2 + ',-6) rotate(-90)') + .attr( + 'y', + function(d, i) { + return i * cellDimension; + }) + .attr('x', 0); + + // Create the heat map and populate with cells. Each cell will + // highlight when hovered over. Additionally, the column and row tokens + // will highlight to make clear which tokens are being observed. Lastly, + // each cell will trigger a popup showing details of the alignment state. + var heatMap = this.container_.append('g'); + + // Group the rectangle and text elements and capture the mouse events from + // both so that the rectangle can be highlighted when it's in focus. + let cellGroup = heatMap.selectAll('.cell') + .data(mapCells) + .enter() + .append('g') + .attr('class', 'cell-group') + .on('mouseover', function(d, i) { + // Highlight the newly hovered over cell and it's row/column + // tokens. + d3.select(this).classed('cell-hover', true); + sourceLabels.select('#row-' + d.row) + .classed('text-highlight', true); + targetLabels.select('#col-' + d.col) + .classed('text-highlight', true); + }) + .on('mouseout', function(d) { + // Clear all highlighting. + d3.select(this).classed('cell-hover', false); + + sourceLabels.select('#row-' + d.row) + .classed('text-highlight', false); + targetLabels.select('#col-' + d.col) + .classed('text-highlight', false); + }); + + // Add the rectangles for each cell. + cellGroup + .append('rect') + .attr( + 'id', + function(d, i) { + return 'cell-' + i; + }) + .attr('class', 'cell cell-border') + .attr( + 'x', + function(d) { + return d.col * cellDimension; + }) + .attr( + 'y', + function(d) { + return d.row * cellDimension; + }) + .attr('width', cellDimension) + .attr('height', cellDimension) + .style( + 'fill', + function(d) { + return colorScale(d.value); + }); + + // Add the text for each cell. + cellGroup + .append('text') + .text(function(d) { return d.value.toFixed(2); }) + .attr('class', 'weight weight-label') + .attr('x', function(d) { return 5 + (d.col * cellDimension); }) + .attr('y', function(d) { return 25 + (d.row * cellDimension); }); + } + + /** + * Resets the pan and zoom state. + * @private + */ + reset_() { + if (!this.svg_) { return; } + this.vis_.call(this.zoom_.transform, d3.zoomIdentity); + this.set('zoomDepth_', 20); + } +} + +customElements.define(AttentionVisualization.is, AttentionVisualization); + +exports = {AttentionVisualization}; diff --git a/tensor2tensor/insights/polymer/common-types.js b/tensor2tensor/insights/polymer/common-types.js new file mode 100644 index 000000000..13ecf2428 --- /dev/null +++ b/tensor2tensor/insights/polymer/common-types.js @@ -0,0 +1,163 @@ +/** + * @license + * Copyright 2017 The Tensor2Tensor Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @fileoverview A set of shared types that will be replaced by js proto types. + */ + +/** + * A typedef for a nlp.nmt.mt_debug_fe.LanguageConfiguration message. + * This can't be converted to javascript yet because it transitively depends on + * tensorflow protos that can't be converted to javascript. + * TODO(kstevens): Remove this typedef when we remove the dependency on + * non-convertible tensorflow protos. + * @typedef {{ + * code: string, + * name: string, + * hidden: ?boolean, + * }} + */ +let Language; + +/** + * A typedef for a nlp.nmt.mt_debug_fe.SerializedConfiguration message. + * This can't be converted to javascript yet because it transitively depends on + * tensorflow protos that can't be converted to javascript. + * TODO(kstevens): Remove this typedef when we remove the dependency on + * non-convertible tensorflow protos. + * @typedef {{ + * id: string, + * target: string, + * source_language: Language, + * target_language: Language, + * }} + */ +let Model; + +/** + * @typedef {{ + * name: string, + * localProbability: number, + * cumalitiveProbability: number, + * attention: Array, + * children: Array, + * }} + */ +let TreeNode; + +/** + * @typedef {{ + * source_tokens: Array, + * target_tokens: Array, + * weights: !Array + * }} + */ +let AttentionData; + +/** + * @typedef {{ + * label: string, + * label_id: number, + * log_probability: number, + * total_log_probability: number, + * score: number, + * parent_id: number, + * }} + */ +let Candidate; + +/** + * @typedef {{ + * id: number, + * stepIndex: number, + * candidate: !Candidate, + * children: !Array, + * }} + */ +let InteractiveNode; + +/** + * @typedef {{ + * step_name: string, + * segment: !Array + * }} + */ +let QueryProcessingRewriteStep; + +/** + * @typedef {{ + * source_processing: !Array, + * target_processing: !Array, + * }} + */ +let QueryProcessingVisualization; + +/** + * @typedef {{ + * in_edge_index: !Array, + * out_edge_index: !Array, + * }} + */ +let BeamSearchNode; + +/** + * @typedef {{ + * label_id: number, + * label: string, + * log_probability: number, + * total_log_probability: number, + * score: number, + * completed: boolean, + * }} + */ +let BeamSearchCandidate; + +/** + * @typedef {{ + * source_index: number, + * target_index: number, + * data: !BeamSearchCandidate, + * }} + */ +let BeamSearchEdge; + +/** +/** + * @typedef {{ + * node: !Array, + * edge: !Array, + * }} + */ +let SearchGraphVisualization; + +/** + * @typedef {{ + * candidate_list: !Array<{ + * candidate: !Array, + * }>, + * }} + */ +let GenerateCandidateResponse; + +/** + * @typedef {{ + * session_id: number, + * }} + */ +let StartTranslationResponse; diff --git a/tensor2tensor/insights/polymer/explore_view/explore-view.html b/tensor2tensor/insights/polymer/explore_view/explore-view.html new file mode 100644 index 000000000..d0456211f --- /dev/null +++ b/tensor2tensor/insights/polymer/explore_view/explore-view.html @@ -0,0 +1,154 @@ + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tensor2tensor/insights/polymer/explore_view/explore-view.js b/tensor2tensor/insights/polymer/explore_view/explore-view.js new file mode 100644 index 000000000..b9cb329bb --- /dev/null +++ b/tensor2tensor/insights/polymer/explore_view/explore-view.js @@ -0,0 +1,205 @@ +/** + * @license + * Copyright 2017 The Tensor2Tensor Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +goog.module('t2t.ExploreView'); + +/** + * `` Presents a view for debuging translations. + * + * This provides an interactive interface for querying a backend service to + * fetch detailed analysis of a translation process. Each result will be + * provided as a stack. + * + * ### Usage + * + * + */ +class ExploreView extends Polymer.Element { + static get is() { + return 'explore-view'; + } + + static get properties() { + return { + route: { + type: Object, + }, + /** + * @type {!Array} + */ + rules_: { + type: Array, + }, + /** + * @type {?Model} + */ + model_: { + type: Object + }, + /** + * @type {string} + */ + query_: { + type: Object, + } + }; + } + + static get observers() { + return [ + 'modelChanged_(queryData, model_)', + ]; + } + + /** + * @override + */ + ready() { + super.ready(); + this.set('rules_', []); + this.set('fetchingResult', false); + } + + /** + * Noop + * @public + */ + refresh() { + // Noop + } + + /** + * Resets the results when a model changes and triggers a query automatically + * if one exists. + * @param {?{query: string}} queryData The current route data. + * @param {?Model} model Unused, but needed for triggering. + * @private + */ + modelChanged_(queryData, model) { + if (queryData && queryData.query) { + // Compose the query from the querydata field and the path in the rest of + // the route. If the link includes an escaped "/" app-route splits the + // query and remaining path on that escaped "/". So query appears to not + // include the rest of the intended query. + let query = unescape(queryData.query) + this.get('tailRoute').path; + this.set('query_', query); + this.translate_(); + } + this.set('results', []); + this.set('rules_', []); + } + + /** + * Sends a translation request to the server. + * @private + */ + translate_() { + if (!this.model_ || !this.model_.id) { + return; + } + + var params = { + 'source': this.query_, + 'id': this.model_.id, + 'sl': this.model_.source_language.code, + 'tl': this.model_.target_language.code, + }; + var paramList = this.createBodyValue_(params); + this.set('url', '/debug?' + paramList); + this.set('fetchingResult', true); + this.$.translateAjax.generateRequest(); + } + + /** + * Returns a string with all the query parameters composed together. This + * also serializes the rapid response rules provided. + * @param {!Object} params The params to combine. + * @returns {string} The params collapsed together. + * @private + */ + createBodyValue_(params) { + // Add the key value body parts. + var bodyParts = []; + for (var param in params) { + var value = window.encodeURIComponent(params[param]); + bodyParts.push(param + "=" + value); + } + + // Add the rapid response rules. + for (var i = 0; i < this.rules_.length; ++i) { + var rule = this.rules_[i]; + var value = + 'src_lang: "' + this.model_.source_language.code + '" ' + + 'trg_lang: "' + this.model_.target_language.code + '" ' + + 'source: "' + rule['source'] + '" ' + + 'bad_translations: "' + rule.bad_translations + '" ' + + 'good_translations: "' + rule.good_translations + '" ' + + 'attention_threshold: ' + rule.attention_threshold; + bodyParts.push('rule=' + window.encodeURIComponent(value)); + } + + // Combine everything together. + return bodyParts.join('&'); + } + + /** + * Adds the translation response to the list of results. + * @param {!Event} event The event object from the `response` event. This is + * required to access the current response, as there are timing issues when + * accessing the latest response with iron-ajax's `last-response` attribute. + * @private + */ + handleTranslationResponse_(event) { + this.set('fetchingResult', false); + this.push('results', { + response: event.detail.response, + query: this.query_, + model: this.model_, + }); + } + + /** + * Adds a new rapid response rule to be filled out. + * @private + */ + addRule_() { + this.push('rules_', { + source: '', + bad_translations: '', + good_translations: '', + attention_threshold: 0.9, + }); + } + + /** + * Deletes a rapid response rule. + * @param {Event} e The event in the dom repeat template element. + * @private + */ + deleteRule_(e) { + let model = e.model; + this.splice('rules_', model.index, 1); + } +} + +customElements.define(ExploreView.is, ExploreView); + +exports = {ExploreView}; diff --git a/tensor2tensor/insights/polymer/graph_visualization/graph-visualization.html b/tensor2tensor/insights/polymer/graph_visualization/graph-visualization.html new file mode 100644 index 000000000..930536632 --- /dev/null +++ b/tensor2tensor/insights/polymer/graph_visualization/graph-visualization.html @@ -0,0 +1,186 @@ + + + + + + + + + + + + + + + + + + + diff --git a/tensor2tensor/insights/polymer/graph_visualization/graph-visualization.js b/tensor2tensor/insights/polymer/graph_visualization/graph-visualization.js new file mode 100644 index 000000000..e69ef3713 --- /dev/null +++ b/tensor2tensor/insights/polymer/graph_visualization/graph-visualization.js @@ -0,0 +1,822 @@ +/** + * @license + * Copyright 2017 The Tensor2Tensor Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +goog.module('t2t.GraphVisualization'); + +/** + * `` Presents a beam search decoding graph. + * + * The Beam Search decoding graph visualizes the entire search space of a + * sequence generation model. Each layer in the graph displays a decoding step + * with nodes in that layer representing generated candidates. If supported by + * the backend server, the graph can enter interactive mode where candidates can + * be selected for each generation step. + * + * + * ### Usage + * + * + */ +class GraphVisualization extends Polymer.Element { + constructor() { + super(); + + /** + * @private + */ + this.svg_ = undefined; + /** + * @private + */ + this.vis_ = undefined; + + /** + * @type {!TreeNode} + * @private + */ + this.rootTree_ = { + name: '', + localProbability: 0, + cumalitiveProbability: 0, + score: 0, + attention: [], + children: [], + }; + /** + * @type {!InteractiveNode} + * @private + */ + this.interactiveRoot_ = { + id: this.nodeId_, + stepIndex: 0, + candidate: { + label: '', + label_id: 1, + log_probability: 0, + total_log_probability: 0, + score: 0, + parent_id: 0 + }, + children: [], + }; + /** + * @type {Array} + * @private + */ + this.selectedNodes_ = []; + /** + * @private + */ + this.stepNodes_ = []; + + /** + * Metadata for navigating nodes. + * @private + */ + this.nodeId_ = 0; + + /** + * D3.js helper object. + * @private + */ + this.partition_ = undefined; + /** + * D3.js helper object. + * @private + */ + this.zoom_ = undefined; + + /** + * D3.js DOM element. + * @private + */ + this.container_ = undefined; + } + + static get is() { + return 'graph-visualization'; + } + + static get properties() { + return { + /** + * @type {!SearchGraphVisualization} + */ + data: { + type: Object, + observer: 'dataUpdated_', + }, + /** + * @type {!Model} + */ + model: { + type: Object, + }, + /** + * @type {string} + */ + query: { + type: String, + }, + /** + * @type {number} + */ + zoomDepth_: { + type: Number, + value: 20, + }, + /** + * @type {!StartTranslationResponse} + */ + startResponse_: { + type: Object, + }, + /** + * @type {!GenerateCandidateResponse} + */ + generateResponse_: { + type: Object, + }, + }; + } + + static get observers() { + return [ + 'zoomDepthChanged_(zoomDepth_)', + ]; + } + + /** + * Sets the default zoom depth. + * @override + */ + ready() { + super.ready(); + + this.set('zoomDepth_', 20); + this.set('stepMode', 'view'); + } + + /** + * Sets the zoom state based on the updated depth. + * @param {number} zoomDepth the zoom depth. + * @private + */ + zoomDepthChanged_(zoomDepth) { + if (!this.svg_) { + return; + } + + if (zoomDepth == 0) { + zoomDepth = 0.000001; + } + let transform = d3.zoomTransform(this.svg_.node()).scale(zoomDepth / 20.0); + this.vis_.attr("transform", transform); + } + + /** + * Converts the NMT Graph JSON format to a nested tree heirachy and plots the + * tree as a collapsible tree visualization. + * @private + */ + dataUpdated_() { + // We need to determine two key nodes in the graph: + // Root: This is the node with no in links and some out links. + // Term: This is the terminal node with no out links and some in links. + // + // Our plot will associate token with actual nodes. For all nodes except + // the Term node, this will work fine since in the tree, each node is + // referenced only once as the head of an edge. + // + // The Term node however needs to be duplicated for each edge ending at it + // so that each instance can have a unique token associated with it. + + // Step 1) Find Root and Term node indices so they can be refered to later. + + var rootIndex = -1; + var nodes = this.data.node; + for (var i = 0; i < nodes.length && rootIndex == -1; ++i) { + var node = nodes[i]; + if (node.in_edge_index.length == 0 && node.out_edge_index.length != 0) { + rootIndex = i; + } + } + + // Step 2) Create the root node in the tree. The tree structure will have + // the following components: + // name: The display name of the node. This will be some token. + // localProbability: The per time step probability of this node. + // cumulativeProbability: The total probability of this path in the beam + // search. + // score: A final score for this path in the beam search. This is + // typically the cumulativeProbability with zero or more penalties. + // attention: The attention vector associated with this node transition. + // children: The list of children in the tree, which are themselves trees. + this.rootTree_ = { + name: '', + localProbability: 0, + cumalitiveProbability: 0, + score: 0, + attention: [], + children: [], + }; + + // Step3) Add each child and it's children recursively starting from the + // root node. + var rootNode = nodes[rootIndex]; + var edges = this.data.edge; + for (var i = 0; i < rootNode.out_edge_index.length; ++i) { + // Get the edge. + var outEdge = edges[rootNode.out_edge_index[i]]; + this.addChildToTree_(this.rootTree_, outEdge, nodes, edges); + } + this.propagateLabel_(this.rootTree_); + + this.createSVG_(); + this.plotTree_(this.rootTree_); + } + + /** + * Forwards path labels from a node's child to the current node. + * @param {!TreeNode} node The node to annotate. + * @private + */ + propagateLabel_(node) { + var hasNBest = false; + var hasBeam = false; + var hasAlternative = false; + for (var i = 0; i < node.children.length; ++i) { + hasNBest = hasNBest || node.children[i].pathType == 'nbest'; + hasBeam = hasBeam || node.children[i].pathType == 'beam'; + hasAlternative = hasAlternative || + node.children[i].pathType == 'alternative'; + } + + if (hasNBest) { + node.pathType = 'nbest'; + } else if (hasBeam) { + node.pathType = 'beam'; + } else if (hasAlternative) { + node.pathType = 'beam'; + } else { + node.pathType = 'unknown'; + } + } + + /** + * Iterates through all the children in tree and adds them as children to the + * top level tree. + * @param {!TreeNode} tree The current node in the tree to update with + * children. + * @param {!BeamSearchEdge} currentEdge The edge going into tree. + * @param {!Array} nodes The list of all node objects. + * @param {!Array} edges The list of all edges between nodes. + * @private + */ + addChildToTree_(tree, currentEdge, nodes, edges) { + // The real edge information is nested in wonderfully named proto + // extensions. Extract the extension information appropriately. + var candidate = currentEdge.data; + + // When the label for the new child is empty, we're at a terminal sink. So + // we ignore that node and instead label the parent. + if (candidate.label == '') { + tree.pathType = 'alternative'; + return; + } + + var node = nodes[currentEdge.target_index]; + /** + * @type {TreeNode} + */ + var childTree = { + name: candidate.label, + attention: [], + localProbability: Math.pow(Math.E, candidate.log_probability), + cumalitiveProbability: Math.pow(Math.E, candidate.total_log_probability), + score: Math.pow(Math.E, candidate.score), + finished: currentEdge.completed || false, + children: [], + node: node, + edge: currentEdge, + pathType: 'unknown', + }; + tree.children.push(childTree); + + if (node.out_edge_index.length == 0) { + if (childTree.name == '') { + childTree.pathType = 'nbest'; + } else if (childTree.name == '' || candidate.finished) { + childTree.pathType = 'alternative'; + } else { + childTree.pathType = 'beam'; + } + } else { + for (var i = 0; i < node.out_edge_index.length; ++i) { + // Get the edge. + var outEdge = edges[node.out_edge_index[i]]; + this.addChildToTree_(childTree, outEdge, nodes, edges); + this.propagateLabel_(childTree); + } + } + } + + /** + * Creates the initial SVG canvas and associated structures. This will remove + * all previous svg elements. + * @private + */ + createSVG_() { + // Create the margins, width, and height. + var maxWidth = 1600; + var maxHeight = 1600; + var margins = [20, 120, 20, 20]; + var width = maxWidth - margins[1] - margins[3]; + var height = maxHeight - margins[0] - margins[2]; + + // Use a d3 partition which will place each node based it's number of + // descendents with the highest ranked path along the top. + this.partition_ = d3.partition().size([height, width]).padding(1); + + // Set the initial position of the root of the tree to be a half the height + // and on the left.. + this.rootTree_.x0 = height / 2; + this.rootTree_.y0 = 0; + + this.zoom_ = d3.zoom() + .scaleExtent([1, 10]) + .on("zoom", zoomed.bind(this)); + + d3.select(this.$.chart).selectAll('.svg-container').remove(); + + // Embed the SVG to host the tree and rotate it so that horizontal matches + // the height of the canvas. + this.svg_ = d3.select(this.$.chart) + .append("div") + .classed("svg-container", true) + .append("svg") + .attr("height", "100%") + .attr("width", "100%") + .classed("svg-content-responsive", true) + .call(this.zoom_) + .on('dblclick.zoom', null) + .on('wheel.zoom', null); + + /** + * Note: For reasons not understood, the javascript compiler can't figure + * out the type of _zoomDepth at this line, so we need to coerce it into + * being a number. + * @type {number} + */ + let zoomDepth = parseInt(this.zoomDepth_, 10); + let transform = d3.zoomTransform(this.svg_.node()).scale(zoomDepth / 20.0); + this.vis_ = this.svg_.append('g') + .attr("transform", transform); + + // Ensure that the entire svg element can be used for panning. + this.vis_.append("rect") + .attr("width", maxWidth) + .attr("height", maxWidth) + .style("fill", "none") + .style("pointer-events", "all"); + + this.container_ = this.vis_.append("g"); + + // Apply the zoom transformation. + function zoomed() { + this.vis_.attr("transform", + d3.event.transform.scale(this.zoomDepth_ / 20.0)); + } + } + + /** + * Examines and plots all reachable nodes in the rootTree with respect to the + * given current root. + * @param {!TreeNode} root The current root node to focus on. + * @private + */ + plotTree_(root) { + // Create the hierarchy. We accumulate node values by just counting the + // number of elements, rather than placing a weight on each node.. + var treeHierachy = d3.hierarchy(this.rootTree_) + .sum(function(d) { + return 1; + }) + .sort(function(a, b) { + return a.data.score - b.data.score; + }); + + this.partition_(treeHierachy); + + // Create an enter object where we can add both nodes and links. + var enter = this.container_.selectAll(".node") + .data(treeHierachy.descendants()) + .enter(); + + // Add the nodes in four steps: + // 1) A general group element to hold all node portions. + // 2) A rectangle with no visible elements. + // 3) A circle for the node. + // 4) a text label. + var node = enter.append("g") + .attr("class", function(d) { + return "node" + (d.children ? " node--internal" : " node--leaf"); + }) + .attr("transform", function(d) { + return "translate(" + d.y0 + "," + d.x0 + ")"; + }) + .attr('id', function(d, i) { return "g-" + i; }); + + node.append("rect") + .attr("width", function(d) { return d.y1 - d.y0; }) + .attr("height", 24); + + node.append("circle") + .attr("r", 10) + .attr("transform", "translate(10, 10)"); + + node.append("text") + .attr("x", 24) + .attr("y", 13) + .text(function(d) { return d.data.name; }); + + // Add out links from each node to it's parent. We link two nodes using the + // bottom center of the circle so that the text label can be placed at + // approximately the vertical center of the circle. This gives a decent + // layout while also not hiding any text. + enter.append("path") + .attr("class", "link") + .attr("d", function(d) { + if (!d.parent) { return ""; } + // Pad the placement of the links just below the center. We have to + // use x0 and y0 for location due to partition, which doesn't create + // standard x/y fields. + var nodeX = d.x0 + 16; + var nodeY = d.y0 + 10; + var parentX = d.parent.x0 + 16; + var parentY = d.parent.y0 + 10; + return "M" + + nodeY + "," + nodeX + + "C" + (nodeY + parentY) / 2 + "," + nodeX + " " + + (nodeY + parentY) / 2 + "," + parentX + " " + + parentY + "," + parentX; + }) + .style('stroke', function(d) { + // Associate a different path color depend on the path type for the + // node. + if (d.data.pathType == 'unknown') + return '#222'; + if (d.data.pathType == 'nbest') + return '#66ff33'; + if (d.data.pathType == 'beam') + return '#ccc'; + if (d.data.pathType == 'alternative') + return '#ff3300'; + }); + + // Setup hover events on each node to place focus and highligh on the node + // being hovered over. We do this by adding opacity to all other nodes. + var nodes = this.container_.selectAll(".node"); + node.on('mouseover', function(d, i) { + nodes.classed('fade', function(d, j) { + return i != j; + }); + d3.select(this).classed('hover', true); + this.set('currentName', d.data.name); + this.set( + 'currentProbability', this.displayNumber(d.data.localProbability)); + this.set( + 'currentTotalProbability', + this.displayNumber(d.data.cumalitiveProbability)); + this.set('score', this.displayNumber(d.data.score)); + }.bind(this)) + .on('mouseout', function(d, i) { + nodes.classed("fade", false); + d3.select(this).classed("hover", false); + }); + } + + /** + * Resets the pan and zoom state. + * @private + */ + reset_() { + if (!this.svg_) { + return; + } + this.svg_.call(this.zoom_.transform, d3.zoomIdentity); + this.set('zoomDepth_', 20); + } + + /** + * Returns the number value with only 2 significant digits. + * @param {number} value The value to present. + * @return {string} value with just two significant digits. + */ + displayNumber(value) { + return value.toFixed(2); + } + + /** + * Enters step by step decoding mode. + * @private + */ + startStepMode_() { + this.set('stepMode', 'edit'); + this.startTranslation_(); + } + + /** + * Exits step by step decoding mode. + * @private + */ + exitStepMode_() { + this.set('stepMode', 'view'); + this.dataUpdated_(); + } + + /** + * Begins step by step decoding with the current model and query. + * @private + */ + startTranslation_() { + this.set('startBody', JSON.stringify({ + model_id: { + language_pair: { + source_language: this.model.source_language.code, + target_language: this.model.target_language.code, + }, + name: this.model.id, + }, + input: this.query, + })); + this.$.startAjax.generateRequest(); + } + + /** + * Handles a start error. + * @private + */ + handleStartError_() { + console.log("failed"); + } + + /** + * Initializes the step by step decoding graph with the root note and makes + * the first generation step. + * @private + */ + handleStartResponse_() { + // Reset the node state and create the root of the tree. Later candidates + // that are returned from the generation call will be added. + this.nodeId_ = 0; + this.interactiveRoot_ = { + id: this.nodeId_, + stepIndex: 0, + candidate: { + label: '', + label_id: 1, + log_probability: 0, + total_log_probability: 0, + score: 0, + parent_id: 0 + }, + children: [], + }; + this.nodeId_++; + + // Track which nodes are active and available as inputs to the next + // generation step. These will be updated with the candidates they + // generate. + this.selectedNodes_ = [this.interactiveRoot_]; + + // Redraw the entire plot with an interactive version. + this.createSVG_(); + this.drawInteractiveTree_(this.interactiveRoot_); + + // Make the first generation request. + this.step_(true); + } + + /** + * Handles a generate ajax error. + * @private + */ + handleGenerateError_() { + console.log("generate failed"); + } + + /** + * Processes the returned candidates and adds them to the graph. + * @private + */ + handleGenerateResponse_() { + // Add the candidates returned and tag them with unique identifiers so we + // can ensure later generation steps don't try to include candidates that + // can't be proccesed any more (we can only use candidates from the most + // recent generation step as input due to limitations in the remote + // decoder). + let stepIndex = 0; + let newlySelectedNodes = []; + this.stepNodes_ = []; + for (var i = 0; i < this.generateResponse_.candidate_list.length; ++i) { + let selectedNode = this.selectedNodes_[i]; + let candidateList = this.generateResponse_.candidate_list[i]; + for (var j = 0; j < candidateList.candidate.length && j < 5; ++j) { + let candidate = candidateList.candidate[j]; + // Tag the parent id so that the next generate call knows what network + // states to maintain. + candidate.parent_id = i; + let newNode = { + id: this.nodeId_, + stepIndex: stepIndex, + candidate: candidate, + children: [], + }; + this.nodeId_++; + stepIndex++; + this.stepNodes_.push(newNode); + selectedNode.children.push(newNode); + + // Select the first candidate. + if (j === 0) { + newNode.selected = true; + newlySelectedNodes.push(newNode); + } + } + } + this.selectedNodes_ = newlySelectedNodes; + + // Reset the graph. + this.createSVG_(); + this.drawInteractiveTree_(this.interactiveRoot_); + } + + /** + * Draws the interactive tree. + * @param {InteractiveNode} rootNode The root node to draw out. + * @private + */ + drawInteractiveTree_(rootNode) { + let treeHierachy = d3.hierarchy(rootNode) + .sum(function(d) { + return 1; + }) + .sort(function(a, b) { + return b.data.candidate.total_log_probability - + a.data.candidate.total_log_probability; + }); + + this.partition_(treeHierachy); + + // Create an enter object where we can add both nodes and links. + var enter = this.container_.selectAll(".node") + .data(treeHierachy.descendants()) + .enter(); + + // Add the nodes in four steps: + // 1) A general group element to hold all node portions. + // 2) A rectangle with no visible elements. + // 3) A circle for the node. + // 4) a text label. + var node = enter.append("g") + .attr("class", function(d) { + return "node" + + (d.children ? " node--internal" : " node--leaf") + + (d.data.selected ? " selected" : ""); + }) + .attr("transform", function(d) { + return "translate(" + d.y0 + "," + d.x0 + ")"; + }) + .attr('id', function(d, i) { return "g-" + i; }); + + node.append("rect") + .attr("width", function(d) { return d.y1 - d.y0; }) + .attr("height", 24); + + node.append("circle") + .attr("r", 10) + .attr("transform", "translate(10, 10)"); + + node.append("text") + .attr("x", 24) + .attr("y", 13) + .text(function(d) { return d.data.candidate.label; }); + + // Add out links from each node to it's parent. We link two nodes using the + // bottom center of the circle so that the text label can be placed at + // approximately the vertical center of the circle. This gives a decent + // layout while also not hiding any text. + enter.append("path") + .attr("class", "link") + .attr("d", function(d) { + if (!d.parent) { return ""; } + // Pad the placement of the links just below the center. We have to + // use x0 and y0 for location due to partition, which doesn't create + // standard x/y fields. + var nodeX = d.x0 + 16; + var nodeY = d.y0 + 10; + var parentX = d.parent.x0 + 16; + var parentY = d.parent.y0 + 10; + return "M" + + nodeY + "," + nodeX + + "C" + (nodeY + parentY) / 2 + "," + nodeX + " " + + (nodeY + parentY) / 2 + "," + parentX + " " + + parentY + "," + parentX; + }) + .style('stroke', '#ccc'); + + node.on('mouseover', function(d, i) { + this.set('currentName', d.data.candidate.label); + this.set( + 'currentProbability', + this.displayNumber(Math.exp(d.data.candidate.log_probability))); + this.set( + 'currentTotalProbability', + this.displayNumber(Math.exp(d.data.candidate.total_log_probability))); + this.set('score', this.displayNumber(Math.exp(d.data.candidate.score))); + }.bind(this)); + + // Store a local pointer to stepNodes and selectedNodes so that the click + // handler can access them without having to replace the 'this' pointer. + // The click handler needs the default 'this' handler to update the state of + // the clicked upon node. + let stepNodes = this.stepNodes_; + let selectedNodes = this.selectedNodes_; + + node.on('click', function(d, i) { + // Ignore nodes that fall out of bounds. + let stepIndex = d.data.stepIndex; + if (stepIndex >= stepNodes.length) { + return; + } + + // Ignore nodes that are from different steps. + let node = stepNodes[stepIndex]; + if (node.id != d.data.id) { + return; + } + + // Update the selected state of the node and either add it to the selected + // list or remove it. + if (!node.selected) { + node.selected = true; + selectedNodes.push(node); + } else { + node.selected = false; + selectedNodes.splice(selectedNodes.indexOf(node), 1); + } + d3.select(this).classed('selected', node.selected); + }); + } + + /** + * Make one generation step with the candidates in the current selectedNodes + * list. If no nodes are selected, this silently does nothing. + * @param {boolean=} opt_skipNext If true, skips the next step during + * generation. + * @private + */ + step_(opt_skipNext) { + // Running generate without any nodes can put the decoder into a bad state + // and make the session unusable, so for now, silently skip this case. + if (this.selectedNodes_.length == 0) { + console.log("Skipping empty step."); + return; + } + + this.set('generateParams', { + skip_next: opt_skipNext || false, + }); + this.set('generateBody', JSON.stringify({ + model_id: { + language_pair: { + source_language: this.model.source_language.code, + target_language: this.model.target_language.code, + }, + name: this.model.id, + }, + session_id: this.startResponse_.session_id, + candidate: this.selectedNodes_.map(function(node) { + return node.candidate; + }), + })); + this.$.generateAjax.generateRequest(); + } + +} + +customElements.define(GraphVisualization.is, GraphVisualization); + +exports = {GraphVisualization}; diff --git a/tensor2tensor/insights/polymer/index.html b/tensor2tensor/insights/polymer/index.html new file mode 100644 index 000000000..fb3fa0db7 --- /dev/null +++ b/tensor2tensor/insights/polymer/index.html @@ -0,0 +1,17 @@ + + diff --git a/tensor2tensor/insights/polymer/insights_app/insights-app.html b/tensor2tensor/insights/polymer/insights_app/insights-app.html new file mode 100644 index 000000000..b2c495433 --- /dev/null +++ b/tensor2tensor/insights/polymer/insights_app/insights-app.html @@ -0,0 +1,142 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tensor2tensor/insights/polymer/insights_app/insights-app.js b/tensor2tensor/insights/polymer/insights_app/insights-app.js new file mode 100644 index 000000000..5942d7549 --- /dev/null +++ b/tensor2tensor/insights/polymer/insights_app/insights-app.js @@ -0,0 +1,72 @@ +/** + * @license + * Copyright 2017 The Tensor2Tensor Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +goog.module('t2t.InsightsApp'); + +/** + * `` Manages the views of the NMT Insights App. + * + * ### Usage + * + * + * + */ +class InsightsApp extends Polymer.Element { + static get is() { + return 'insights-app'; + } + + static get properties() { + return { + /** + * @type {string} + */ + page: { + type: String, + reflectToAttribute: true, + }, + }; + } + + static get observers() { + return [ + 'routePageChanged_(routeData.page)', + ]; + } + + /** + * Updates the page field if page exists or uses a default value. + * @param {?string} page The current page name being viewed. + * @private + */ + routePageChanged_(page) { + if (page == this.page) { + return; + } + this.page = page || 'explore'; + this.set('routeData.page', this.page); + + // Refresh the now selected page in case it needs new data on a new view. + let currentPage = this.get('currentPage'); + if (currentPage) { + currentPage.refresh(); + } + } +} + +customElements.define(InsightsApp.is, InsightsApp); + +exports = {InsightsApp}; diff --git a/tensor2tensor/insights/polymer/language_selector/language-selector-content.html b/tensor2tensor/insights/polymer/language_selector/language-selector-content.html new file mode 100644 index 000000000..3abaf7fa4 --- /dev/null +++ b/tensor2tensor/insights/polymer/language_selector/language-selector-content.html @@ -0,0 +1,62 @@ + + + + + + + + + + + + + + + + + diff --git a/tensor2tensor/insights/polymer/language_selector/language-selector-content.js b/tensor2tensor/insights/polymer/language_selector/language-selector-content.js new file mode 100644 index 000000000..b00c5aeec --- /dev/null +++ b/tensor2tensor/insights/polymer/language_selector/language-selector-content.js @@ -0,0 +1,237 @@ +/** + * @license + * Copyright 2017 The Tensor2Tensor Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +goog.module('t2t.LanguageSelectorContent'); + +/** + * `` provides menu content for language selection. + * + * The content provides a search bar that will filter available languages by any + * language name or code that has the query text as a substring. + * + * By default, this will auto select a provided language with language code + * 'en'. + * + * ### Usage + * + * + * + */ +class LanguageSelectorContent extends Polymer.Element { + static get is() { + return 'language-selector-content'; + } + + static get properties() { + return { + /** + * @type {?Array} + */ + languages: { + type: Array, + observer: 'languagesUpdated_', + }, + /** + * @type {!Language} + */ + value: { + type: Object, + notify: true, + }, + /** + * @type {string} + */ + defaultCode: { + type: String, + value: 'en', + } + }; + } + + static get observers() { + return [ + 'selectDefault_(languages, renderedItemCount)', + 'filterUpdated_(filter)', + ]; + } + + /** + * Selects the language in the drop down. + * @param {Language} language The language to pre-select. + * @public + */ + forceSelection(language) { + this.set('filter', ''); + for (var i = 0; i < this.languages.length; ++i) { + if (this.languages[i].code == language.code) { + this.set('value', this.languages[i]); + this.updateSelected_(Polymer.dom(this.$.items).children[i]); + return; + } + } + } + + /** + * Updates the internal languages and resets selection. + * @param {?Array} newLanguages The new language list. + * @private + */ + languagesUpdated_(newLanguages) { + if (newLanguages) { + for (var i = 0; i < newLanguages.length; ++i) { + newLanguages[i].hidden = false; + } + } + + this.set('filter', ''); + this.set('selected', undefined); + } + + /** + * Selects the default language if one can be found after all languages have + * been rendered in the menu. + * @param {?Array} languages The languages + * @param {number} renderedItemCount The number of languages rendered. + * @private + */ + selectDefault_(languages, renderedItemCount) { + if (this.get('selected') || !languages || + languages.length != renderedItemCount) { + return; + } + + this.$.languageList.render(); + if (this.value) { + for (var i = 0; i < languages.length; ++i) { + if (languages[i].code == this.value.code) { + this.updateSelected_(Polymer.dom(this.$.items).children[i]); + return; + } + } + } + + let defaultCode = this.get('defaultCode'); + for (var i = 0; i < languages.length; ++i) { + if (languages[i].code == defaultCode || languages.length == 1) { + this.set('value', languages[i]); + this.updateSelected_(Polymer.dom(this.$.items).children[i]); + return; + } + } + } + + /** + * Selects the rendered language if only one is visible given the current + * search filter. + * @private + */ + enterPressed_() { + let visibleLanguagesIndices = []; + for (var i = 0; i < this.languages.length; ++i) { + if (!this.languages[i].hidden) { + visibleLanguagesIndices.push(i); + } + } + if (visibleLanguagesIndices.length == 1) { + this.set('value', this.languages[visibleLanguagesIndices[0]]); + this.updateSelected_(Polymer.dom(this.$.items).children[0]); + } + } + + /** + * Sets the hidden state of languages given the current filter. + * @param {string} newFilter The new filter to match languages against. + * @private + */ + filterUpdated_(newFilter) { + if (!this.get('languages')) { + return; + } + + let filter = newFilter.toLowerCase(); + for (var i = 0; i < this.languages.length; ++i) { + let hidden = !this.languageMatchesQuery_(this.languages[i], filter); + this.set('languages.' + i + '.hidden', hidden); + } + } + + /** + * Returns true if the language is visible. + * @param {!Language} language The language being evaluated. + * @return {boolean} True if visible. + * @private + */ + isShown_(language) { + return !language.hidden; + } + + /** + * Returns true if the language matches the filter. + * @param {!Language} language The language being evaluated. + * @param {string} filter The filter to compare against. + * @return {boolean} True if language matches filter. + * @private + */ + languageMatchesQuery_(language, filter) { + let languageName = language.name.toLowerCase(); + return filter == '' || languageName.indexOf(filter) >= 0 || + language.code.indexOf(filter) >= 0; + } + + /** + * Selects the tapped element and updates the value with the corresponding + * language value. + * @param {!EventTarget} e The tap event. + * @private + */ + select_(e) { + let language = this.$.languageList.itemForElement(e.target); + this.set('value', language); + this.updateSelected_(e.target); + } + + /** + * Updates the selection with the given element. + * @param {!Element} ele The selected dom element. + * @private + */ + updateSelected_(ele) { + let oldSelection = this.get('selected'); + if (oldSelection) { + this.dispatchEvent(new CustomEvent('iron-deselect', { + bubbles: true, + composed: true, + detail: { + item: oldSelection, + }, + })); + } + this.set('selected', ele); + this.dispatchEvent(new CustomEvent('iron-select', { + bubbles: true, + composed: true, + detail: { + item: ele, + }, + })); + } +} + +customElements.define(LanguageSelectorContent.is, LanguageSelectorContent); + +exports = {LanguageSelectorContent}; diff --git a/tensor2tensor/insights/polymer/language_selector/language-selector.html b/tensor2tensor/insights/polymer/language_selector/language-selector.html new file mode 100644 index 000000000..963484de9 --- /dev/null +++ b/tensor2tensor/insights/polymer/language_selector/language-selector.html @@ -0,0 +1,42 @@ + + + + + + + + + + + + + + + diff --git a/tensor2tensor/insights/polymer/language_selector/language-selector.js b/tensor2tensor/insights/polymer/language_selector/language-selector.js new file mode 100644 index 000000000..ff59f675d --- /dev/null +++ b/tensor2tensor/insights/polymer/language_selector/language-selector.js @@ -0,0 +1,82 @@ +/** + * @license + * Copyright 2017 The Tensor2Tensor Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +goog.module('nmt_insights.LanguageSelector'); + +/** + * `` provides a searchable dropdown of languages. + * + * The dropdown will present the selected language's Name. When opened, the + * search bar will filter available languages by any language name or code that + * has the query text as a substring. + * + * By default, this will auto select a provided language with language code + * 'en'. + * + * ### Usage + * + * + * + */ +class LanguageSelector extends Polymer.Element { + static get is() { + return 'language-selector'; + } + + static get properties() { + return { + /** + * @type {string} + */ + label: { + type: String, + }, + /** + * @type {?Array} + */ + languages: { + type: Array, + }, + /** + * @type {!Language} + */ + value: { + type: Object, + notify: true, + }, + /** + * @type {string} + */ + defaultCode: { + type: String, + value: 'en', + }, + }; + } + + /** + * Selects the language in the drop down. + * @param {Language} language The language to pre-select. + * @public + */ + forceSelection(language) { + this.$.selector.forceSelection(language); + } +} + +customElements.define(LanguageSelector.is, LanguageSelector); + +exports = {LanguageSelector}; diff --git a/tensor2tensor/insights/polymer/processing_visualization/processing-visualization.html b/tensor2tensor/insights/polymer/processing_visualization/processing-visualization.html new file mode 100644 index 000000000..56c75b581 --- /dev/null +++ b/tensor2tensor/insights/polymer/processing_visualization/processing-visualization.html @@ -0,0 +1,85 @@ + + + + + + + + + diff --git a/tensor2tensor/insights/polymer/processing_visualization/processing-visualization.js b/tensor2tensor/insights/polymer/processing_visualization/processing-visualization.js new file mode 100644 index 000000000..99f2d08f9 --- /dev/null +++ b/tensor2tensor/insights/polymer/processing_visualization/processing-visualization.js @@ -0,0 +1,49 @@ +/** + * @license + * Copyright 2017 The Tensor2Tensor Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +goog.module('t2t.ProcessingVisualization'); + +/** + * `` summarises pre/post processing steps. + * + * This element presents the pre-processing segmentation steps and + * post-processing de-segmentation and rewrite steps that are applied to a + * translation query. + * + * ### Usage + * + * + */ +class ProcessingVisualization extends Polymer.Element { + static get is() { + return 'processing-visualization'; + } + + static get properties() { + return { + /** + * @type {!QueryProcessingVisualization} + */ + data: { + type: Object, + }, + }; + } +} + +customElements.define(ProcessingVisualization.is, ProcessingVisualization); + +exports = {ProcessingVisualization}; diff --git a/tensor2tensor/insights/polymer/query_card/query-card.html b/tensor2tensor/insights/polymer/query_card/query-card.html new file mode 100644 index 000000000..740735c0f --- /dev/null +++ b/tensor2tensor/insights/polymer/query_card/query-card.html @@ -0,0 +1,93 @@ + + + + + + + + + + + + + + + + + + + diff --git a/tensor2tensor/insights/polymer/query_card/query-card.js b/tensor2tensor/insights/polymer/query_card/query-card.js new file mode 100644 index 000000000..3141a9545 --- /dev/null +++ b/tensor2tensor/insights/polymer/query_card/query-card.js @@ -0,0 +1,330 @@ +/** + * @license + * Copyright 2017 The Tensor2Tensor Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +goog.module('t2t.QueryCard'); + +/** + * `` presents a material card for selecting a supported mdoel. + * + * This will fetch a set of supported models for debugging and provide three + * selectors: + * - Source Language + * - Target Language + * - Model + * Once all three have been populated, it will emit a `Model` object through + * `model`. + * + * ### Usage + * + * + * Custom InputField + * + */ +class QueryCard extends Polymer.Element { + constructor() { + super(); + + /** + * A general mapping from language code to the language objects. + * @type {!Object} + * @private + */ + this.languageToNameMap_ = {}; + + /** + * A nested mapping of languages to a list of models. + * @type {!Object>>>} + * @private + */ + this.languagePairToModelMap_ = {}; + } + + static get is() { + return 'query-card'; + } + + static get properties() { + return { + /** + * @type {!Object} + */ + route: { + type: String, + }, + /** + * @type {!Object} + */ + subRoute: { + type: String, + notify: true, + }, + /** + * @type {?Model} + */ + model: { + type: Object, + notify: true, + }, + /** + * @type {string} + */ + url: { + type: String, + }, + /** + * @type {?Language} + */ + sourceLanguage_: { + type: Object, + }, + /** + * @type {?Language} + */ + targetLanguage_: { + type: Object, + }, + /** + * @type {string} + */ + defaultModelId: { + type: String, + value: 'prod', + } + }; + } + + static get observers() { + return [ + 'routeActiveUpdated_(routeActive)', + + 'modelsUpdated_(modelConfigurations)', + 'sourceLanguagesUpdated_(sourceLanguages, routeData)', + 'targetLanguagesUpdated_(targetLanguages, routeData)', + + 'sourceLanguageUpdated_(sourceLanguage_)', + 'targetLanguageUpdated_(targetLanguage_)', + 'modelListUpdated_(modelList, routeData)', + 'modelUpdated_(model)', + ]; + } + + /** + * Resets the route data if the route is inactive. + * @param {boolean} routeActive The active state of the route. + * @private + */ + routeActiveUpdated_(routeActive) { + if (!routeActive) { + this.set('routeData', {}); + } + } + + /** + * Sets the sourceLanguage if a new source language matches the route + * path or marks it as undefined. + * @param {Array} sourceLanguages A list of source languages. + * @param {{sourceLanguage: string}} routeData The current route paths. + * @private + */ + sourceLanguagesUpdated_(sourceLanguages, routeData) { + if (this.routeActive && sourceLanguages) { + for (var i = 0; i < sourceLanguages.length; ++i) { + if (routeData.sourceLanguage == sourceLanguages[i].code) { + this.$.sourceSelector.forceSelection(sourceLanguages[i]); + return; + } + } + } + } + + /** + * Selects the available target language list based on the new selected source + * language. + * @param {Language} sourceLanguage The selected source language index. + * @private + */ + sourceLanguageUpdated_(sourceLanguage) { + if (sourceLanguage == undefined) { + this.set('targetLanguages', []); + return; + } + + this.set('routeData.sourceLanguage', sourceLanguage.code); + + var targetLanguages = []; + for (var key in this.languagePairToModelMap_[sourceLanguage.code]) { + targetLanguages.push(this.languageToNameMap_[key]); + } + targetLanguages.sort(sort_); + this.set('targetLanguage', undefined); + this.set('targetLanguages', targetLanguages); + } + + /** + * Sets the targetLanguage if a new target language matches the route + * path or marks it as undefined. + * @param {Array} targetLanguages A list of target languages. + * @param {{targetLanguage: string}} routeData The current route paths. + * @private + */ + targetLanguagesUpdated_(targetLanguages, routeData) { + if (this.routeActive && targetLanguages) { + for (var i = 0; i < targetLanguages.length; ++i) { + if (routeData.targetLanguage == targetLanguages[i].code) { + this.$.targetSelector.forceSelection(targetLanguages[i]); + return; + } + } + } + } + + /** + * Selects the available model list based on the new selected target + * language. + * @param {Language} targetLanguage The selected target language index. + * @private + */ + targetLanguageUpdated_(targetLanguage) { + this.set('model', undefined); + if (targetLanguage == undefined) { + this.set('modelList', []); + return; + } + + let sourceLanguage = this.sourceLanguage_; + this.set('routeData.targetLanguage', targetLanguage.code); + var models = []; + var targetLanguageMap = this.languagePairToModelMap_[sourceLanguage.code]; + for (var key in targetLanguageMap[targetLanguage.code]) { + models.push(targetLanguageMap[targetLanguage.code][key]); + } + this.set('modelList', models); + } + + /** + * Sets the modelIndex if a new model matches the route path or marks it as + * undefined. + * @param {?Array} modelList A list of models. + * @param {{modelId: string}} routeData The current route paths. + * @private + */ + modelListUpdated_(modelList, routeData) { + if (this.routeActive && modelList) { + for (var i = 0; i < modelList.length; ++i) { + if (routeData.modelId == modelList[i].id) { + this.set('model', modelList[i]); + return; + } + } + } + + if (modelList && modelList.length >= 1) { + // Chose the default model if it exists, otherwise choose the first entry. + // This ensures that the ordering of models does't impact the default + // selection. + for (var i = 0; i < modelList.length; ++i) { + if (this.defaultModelId == modelList[i].id) { + this.set('model', modelList[i]); + return; + } + } + this.set('model', modelList[0]); + } + } + + /** + * Updates the selected model with the current model index. + * @param {?Model} model The current selected model index. + * @private + */ + modelUpdated_(model) { + if (!model) { + return; + } + + this.set('routeData.modelId', this.model.id); + } + + /** + * Updates the set of available language sets and models. + * @param {{configuration: !Array}} modelConfigurations A list of + * models. + * @private + */ + modelsUpdated_(modelConfigurations) { + var models = modelConfigurations.configuration; + + this.languageToNameMap_ = {}; + this.languagePairToModelMap_ = {}; + + for (var i = 0; i < models.length; ++i) { + let model = models[i]; + // Extract the language codes and store the code to language mappings. + var source_language = model.source_language.code; + this.languageToNameMap_[source_language] = model.source_language; + var target_language = model.target_language.code; + this.languageToNameMap_[target_language] = model.target_language; + + // Create the first level nested map, from source languages to target + // language maps. + var targetLanguageMap; + if (source_language in this.languagePairToModelMap_) { + targetLanguageMap = this.languagePairToModelMap_[source_language]; + } else { + targetLanguageMap = {}; + this.languagePairToModelMap_[source_language] = targetLanguageMap; + } + + // Create the second level nested map, from target languages to model + // maps. + var model_map; + if (target_language in targetLanguageMap) { + model_map = targetLanguageMap[target_language]; + } else { + model_map = {}; + targetLanguageMap[target_language] = model_map; + } + + // Store the mapping from a model id to a model. + model_map[model.id] = model; + } + + // Prepare the initial set of available source languages. + var sourceLanguageList = []; + for (var key in this.languagePairToModelMap_) { + sourceLanguageList.push(this.languageToNameMap_[key]); + } + sourceLanguageList.sort(sort_); + this.set('sourceLanguages', sourceLanguageList); + } +} + +customElements.define(QueryCard.is, QueryCard); + +exports = {QueryCard}; + +/** + * Returns the ordering of two language's based on their name. + * @param {!Language} a The first language to compare. + * @param {!Language} b The second language to compare. + * @return {number} Negative if a comes before b. + */ +function sort_(a, b) { + if (a.name != b.name) { + return a.name < b.name ? -1 : 1; + } + return 0; +} diff --git a/tensor2tensor/insights/polymer/translation_result/translation-result.html b/tensor2tensor/insights/polymer/translation_result/translation-result.html new file mode 100644 index 000000000..11615ed74 --- /dev/null +++ b/tensor2tensor/insights/polymer/translation_result/translation-result.html @@ -0,0 +1,90 @@ + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tensor2tensor/insights/polymer/translation_result/translation-result.js b/tensor2tensor/insights/polymer/translation_result/translation-result.js new file mode 100644 index 000000000..c2ef46eeb --- /dev/null +++ b/tensor2tensor/insights/polymer/translation_result/translation-result.js @@ -0,0 +1,111 @@ +/** + * @license + * Copyright 2017 The Tensor2Tensor Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +goog.module('t2t.TranslationResult'); + +/** + * `` Presents zero or more visualization of a translation. + * + * This inspects the set of visualization fields provided and triggers the + * corresponding visualization component in the set of available views in tabbed + * layout. + * + * ### Usage + * + * + * + */ +class TranslationResult extends Polymer.Element { + static get is() { + return 'translation-result'; + } + + static get properties() { + return { + /** + * @type {{ + * response: { + * visualization_name: string, + * title: string, + * name: string, + * query_processing: ?Object, + * search_graph: ?Object, + * word_heat_map: ?Object, + * }, + * model: !Model, + * query: string + * }} + */ + result: { + type: Object, + observer: 'resultUpdated_', + }, + /** + * @type {string} + */ + view: { + type: String, + value: 'processing', + }, + }; + } + + /** + * Sets internal data structures given the updated result. + * @private + */ + resultUpdated_() { + var response = this.result.response; + if (!response || !response.result || response.result.length == 0) { + return; + } + + for (var i = 0; i < response.result.length; ++i) { + let visualizationResult = response.result[i]; + + // Dynamically create the visualization element based on the name field. + // This will enable multiple versions of the same visualization to be + // created later on when the data mapping is generalized. + let analysisEle = document.createElement( + visualizationResult.visualization_name + '-visualization'); + + // Set the generic attributes. + analysisEle.name = visualizationResult.name; + analysisEle.model = this.result.model; + analysisEle.query = this.result.query; + + // Set the visualization specific data attribute. + // TODO(kstevens): Cleanup by setting visualization_name the same as the + // protobuffer field names so we don't need this mapping. + if (visualizationResult.visualization_name == 'processing') { + analysisEle.data = visualizationResult.query_processing; + } else if (visualizationResult.visualization_name == 'attention') { + analysisEle.data = visualizationResult.word_heat_map; + } else if (visualizationResult.visualization_name == 'graph') { + analysisEle.data = visualizationResult.search_graph; + } + + Polymer.dom(this.$.view).appendChild(analysisEle); + } + // Don't make assumptions about which visualizations are available. Instead + // preselect the initial view based on data. + this.set('view', response.result[0].name); + } +} + +customElements.define(TranslationResult.is, TranslationResult); + +exports = {TranslationResult}; diff --git a/tensor2tensor/insights/query_processor.py b/tensor2tensor/insights/query_processor.py new file mode 100644 index 000000000..0aed3a313 --- /dev/null +++ b/tensor2tensor/insights/query_processor.py @@ -0,0 +1,43 @@ +# coding=utf-8 +# Copyright 2017 The Tensor2Tensor Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""A base class for all query processing classes.""" + + +class QueryProcessor(object): + """Base class for any class that wants to process sequence queries. + + QueryProcessor classes are expected to convert a string query to a series of + visualization structures. + + TODO(kstevens): Define how the visualization structures should look once the + protos are in better shape. + """ + + def __init__(self): + pass + + def process(self, query): + """Returns the generated visualizations for query. + + Args: + query: The string input + + Returns: + A dictionary with one key: 'result' that maps to a list of visualization + objects. + """ + del query + return {"result": []} diff --git a/tensor2tensor/insights/server.py b/tensor2tensor/insights/server.py new file mode 100644 index 000000000..b82f988d4 --- /dev/null +++ b/tensor2tensor/insights/server.py @@ -0,0 +1,180 @@ +# coding=utf-8 +# Copyright 2017 The Tensor2Tensor Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""A GUnicorn + Flask Debug Frontend for Transformer models.""" + +from flask import Flask +from flask import jsonify +from flask import request +from flask import send_from_directory +from gunicorn.app.base import BaseApplication +from gunicorn.six import iteritems +from tensor2tensor.insights import transformer_model + +import tensorflow as tf + +flags = tf.flags +FLAGS = flags.FLAGS + +flags.DEFINE_string("t2t_model_dir", "", "") +flags.DEFINE_string("t2t_data_dir", "", "") +flags.DEFINE_string("static_path", "", + "Path to static javascript and html files to serve.") + + +class DebugFrontendApplication(BaseApplication): + """A local custom application for GUnicorns. + + This custom application enables us to run with a custom main that parses + tensorflow ops and does some internal setup prior to processing queries. The + underlying app registered instances of this class will be forked. + """ + + def __init__(self, app, options=None): + """Creates the GUnicorn application. + + Args: + app: A Flask application that will process requests. + options: A dict of GUnicorn options. + """ + self.options = options or {} + self.application = app + super(DebugFrontendApplication, self).__init__() + + def load_config(self): + """Loads the configuration.""" + config = dict([(key, value) for key, value in iteritems(self.options) + if key in self.cfg.settings and value is not None]) + for key, value in iteritems(config): + self.cfg.set(key.lower(), value) + + def load(self): + """Loads the application. + + Returns: + The Flask application. + """ + return self.application + + +def main(_): + # Create the models we support: + processors = {} + transformer_key = ("en", "de", "transformers_wmt32k") + # TODO(kstevens): Turn this into a text proto configuration that's read in on + # startup. + processors[transformer_key] = transformer_model.TransformerModel( + FLAGS.t2t_data_dir, FLAGS.t2t_model_dir) + + # Create flask to serve all paths starting with '/static' from the static + # path. + app = Flask( + __name__.split(".")[0], + static_url_path="/static", + static_folder=FLAGS.static_path) + + # Disable static file caching. + app.config["SEND_FILE_MAX_AGE_DEFAULT"] = 0 + + @app.route("/api/language_list/") + def language_list(): # pylint: disable=unused-variable + """Responds to /api/language_list with the supported languages. + + Returns: + JSON for the languages. + """ + # TODO(kstevens): Figure this out automatically by processing the + # configuration. + result = { + "language": [ + {"code": "en", "name": "English"}, + {"code": "de", "name": "German"}, + ], + } + return jsonify(result) + + @app.route("/api/list_models/") + def list_models(): # pylint: disable=unused-variable + """Responds to /api/list_models with the supported modes. + + + Returns: + JSON for the supported models. + """ + # TODO(kstevens): Turn this into a configuration text proto that's read in + # on startup. + result = { + "configuration": [ + { + "id": "transformers_wmt32k", + "source_language": { + "code": "en", + "name": "English", + }, + "target_language": { + "code": "de", + "name": "German", + }, + }, + ], + } + return jsonify(result) + + @app.route("/debug", methods=["GET"]) + def query(): # pylint: disable=unused-variable + """Responds to /debug with processing results. + + Returns: + JSON for the query's result. + """ + query = request.args.get("source") + source_language = request.args.get("sl") + target_language = request.args.get("tl") + model_name = request.args.get("id") + processor = processors[(source_language, target_language, model_name)] + return jsonify(processor.process(query)) + + # Catchall for all other paths. Any other path should get the basic index + # page, the polymer side will determine what view to show and what REST calls + # to make for data. + @app.route("/", defaults={"path": ""}) + @app.route("/") + def root(path): # pylint: disable=unused-variable + """Responds to all other non-static paths with index.html. + + Args: + path: Unused path. + + Returns: + The landing page html text. + """ + del path + return send_from_directory(FLAGS.static_path, "index.html") + + # Run the server. + tf.logging.info("############# READY ##################") + options = { + "bind": ":8010", + "timeout": 600, + "workers": 4, + "reload": True, + "spew": True, + "worker_class": "gevent", + } + DebugFrontendApplication(app, options).run() + + +if __name__ == "__main__": + tf.app.run() diff --git a/tensor2tensor/insights/transformer_model.py b/tensor2tensor/insights/transformer_model.py new file mode 100644 index 000000000..570dc0174 --- /dev/null +++ b/tensor2tensor/insights/transformer_model.py @@ -0,0 +1,300 @@ +# coding=utf-8 +# Copyright 2017 The Tensor2Tensor Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""A QueryProcessor using the Transformer framework.""" + +from collections import deque + +import glob +import os +import shutil +import time + +import numpy as np + +from tensor2tensor.data_generators import text_encoder +from tensor2tensor.insights import graph +from tensor2tensor.insights import query_processor +from tensor2tensor.tpu import tpu_trainer +from tensor2tensor.tpu import tpu_trainer_lib +from tensor2tensor.utils import decoding +from tensor2tensor.utils import usr_dir + +import tensorflow as tf +from tensorflow.python import debug as tfdbg + +flags = tf.flags +FLAGS = flags.FLAGS + + +def topk_watch_fn(feeds, fetches): + """TFDBG watch function for transformer beam search nodes. + + Args: + feeds: Unused. Required by tfdbg. + fetches: Unused. Required by tfdbg. + + Returns: + a WatchOptions instance that will capture all beam search ops. + """ + del fetches, feeds + return tfdbg.WatchOptions( + node_name_regex_whitelist= + ".*grow_(finished|alive)_(topk_scores|topk_seq).*", + debug_ops=["DebugIdentity"]) + + +def seq_filter(datum, tensor): + """TFDBG data directory filter for capturing topk_seq operation dumps. + + Args: + datum: A datum to filter by node_name. + tensor: Unused. Required by tfdbg + + Returns: + a true when datum should be returned. + """ + del tensor + return "topk_seq" in datum.node_name + + +def scores_filter(datum, tensor): + """TFDBG data directory filter for capturing topk_scores operation dumps. + + Args: + datum: A datum to filter by node_name. + tensor: Unused. Required by tfdbg + + Returns: + a true when datum should be returned. + """ + del tensor + return "topk_scores" in datum.node_name + + +def sequence_key(sequence): + """Returns a key for mapping sequence paths to graph vertices.""" + return ":".join([str(s) for s in sequence]) + + +class TransformerModel(query_processor.QueryProcessor): + """A QueryProcessor using a trained Transformer model. + + This processor supports the following visualizations: + - processing: Basic source and target text processing + - graph: A graph of the beam search process. + """ + + def __init__(self, data_dir, model_dir): + """Creates the Transformer estimator. + + Args: + data_dir: The training data directory. + model_dir: The trained model directory. + """ + # Do the pre-setup tensor2tensor requires for flags and configurations. + FLAGS.output_dir = model_dir + FLAGS.data_dir = data_dir + usr_dir.import_usr_dir(FLAGS.t2t_usr_dir) + data_dir = os.path.expanduser(data_dir) + + # Create the basic hyper parameters. + self.hparams = tpu_trainer.create_hparams() + self.hparams.add_hparam("data_dir", os.path.expanduser(data_dir)) + tpu_trainer_lib.add_problem_hparams(self.hparams, FLAGS.problems) + + decode_hp = decoding.decode_hparams(FLAGS.decode_hparams) + decode_hp.add_hparam("shards", 1) + decode_hp.add_hparam("shard_id", 0) + + # Create the estimator and final hyper parameters. + self.estimator = tpu_trainer_lib.create_estimator( + FLAGS.model, + self.hparams, + tpu_trainer.create_run_config(), + decode_hp, use_tpu=False) + + # Fetch the vocabulary and other helpful variables for decoding. + self.source_vocab = self.hparams.problems[0].vocabulary["inputs"] + self.targets_vocab = self.hparams.problems[0].vocabulary["targets"] + self.const_array_size = 10000 + + # Prepare the Transformer's debug data directory. + run_dirs = sorted(glob.glob(os.path.join("/tmp/t2t_server_dump", "run_*"))) + for run_dir in run_dirs: + shutil.rmtree(run_dir) + + def process(self, query): + """Returns the visualizations for query. + + Args: + query: The query to process. + + Returns: + A dictionary of results with processing and graph visualizations. + """ + tf.logging.info("Processing new query [%s]" %query) + + # Create the new TFDBG hook directory. + hook_dir = "/tmp/t2t_server_dump/request_%d" %int(time.time()) + os.makedirs(hook_dir) + hooks = [tfdbg.DumpingDebugHook(hook_dir, watch_fn=topk_watch_fn)] + + # TODO(kstevens): This is extremely hacky and slow for responding to + # queries. Figure out a reasonable way to pre-load the model weights before + # forking and run queries through the estimator quickly. + def server_input_fn(): + """Generator that returns just the current query.""" + for _ in range(1): + input_ids = self.source_vocab.encode(query) + input_ids.append(text_encoder.EOS_ID) + x = [1, 100, len(input_ids)] + input_ids + x += [0] * (self.const_array_size - len(x)) + d = { + "inputs": np.array(x).astype(np.int32), + "problem_choice": np.array(0).astype(np.int32) + } + yield d + + def input_fn(): + """Generator that returns just the current query.""" + gen_fn = decoding.make_input_fn_from_generator(server_input_fn()) + example = gen_fn() + # TODO(kstevens): Make this method public + # pylint: disable=protected-access + return decoding._interactive_input_tensor_to_features_dict( + example, self.hparams) + + # Make the prediction for the current query. + result_iter = self.estimator.predict(input_fn, hooks=hooks) + result = None + for result in result_iter: + break + + # Extract the beam search information by reading the dumped TFDBG event + # tensors. We first read and record the per step beam sequences then record + # the beam scores. Afterwards we align the two sets of values to create the + # full graph vertices and edges. + decoding_graph = graph.Graph() + run_dirs = sorted(glob.glob(os.path.join(hook_dir, "run_*"))) + for run_dir in run_dirs: + # Record the different completed and active beam sequence ids. + alive_sequences = deque() + finished_sequences = deque() + + # Make the root vertex since it always needs to exist. + decoding_graph.get_vertex(sequence_key([0])) + + # Create the initial vertices and edges for the active and finished + # sequences. We uniquely define each vertex using it's full sequence path + # as a string to ensure there's no collisions when the same step has two + # instances of an output id. + dump_dir = tfdbg.DebugDumpDir(run_dir, validate=False) + seq_datums = dump_dir.find(predicate=seq_filter) + for seq_datum in seq_datums: + sequences = np.array(seq_datum.get_tensor()).astype(int)[0] + if "alive" in seq_datum.node_name: + alive_sequences.append(sequences) + if "finished" in seq_datum.node_name: + finished_sequences.append(sequences) + + for sequence in sequences: + pieces = self.targets_vocab.decode_list(sequence) + index = sequence[-1] + if index == 0: + continue + + parent = decoding_graph.get_vertex(sequence_key(sequence[:-1])) + current = decoding_graph.get_vertex(sequence_key(sequence)) + + edge = decoding_graph.add_edge(parent, current) + edge.data["label"] = pieces[-1] + edge.data["label_id"] = index + # Coerce the type to be a python bool. Numpy bools can't be easily + # converted to JSON. + edge.data["completed"] = bool(index == 1) + + # Examine the score results and store the scores with the associated edges + # in the graph. We fetch the vertices (and relevant edges) by looking + # into the saved beam sequences stored above. + score_datums = dump_dir.find(predicate=scores_filter) + for score_datum in score_datums: + if "alive" in score_datum.node_name: + sequences = alive_sequences.popleft() + + if "finished" in score_datum.node_name: + sequences = finished_sequences.popleft() + + scores = np.array(score_datum.get_tensor()).astype(float)[0] + for i, score in enumerate(scores): + sequence = sequences[i] + if sequence[-1] == 0: + continue + + vertex = decoding_graph.get_vertex(sequence_key(sequence)) + edge = decoding_graph.edges[vertex.in_edges[0]] + edge.data["score"] = score + edge.data["log_probability"] = score + edge.data["total_log_probability"] = score + + # Delete the hook dir to save disk space + shutil.rmtree(hook_dir) + + # Create the graph visualization data structure. + graph_vis = { + "visualization_name": "graph", + "title": "Graph", + "name": "graph", + "search_graph": decoding_graph.to_dict(), + } + + # Create the processing visualization data structure. + # TODO(kstevens): Make this method public + # pylint: disable=protected-access + output_ids = decoding._save_until_eos(result["outputs"].flatten(), False) + output_pieces = self.targets_vocab.decode_list(output_ids) + output_token = [{"text": piece} for piece in output_pieces] + output = self.targets_vocab.decode(output_ids) + + source_steps = [{ + "step_name": "Initial", + "segment": [{ + "text": query + }], + }] + + target_steps = [{ + "step_name": "Initial", + "segment": output_token, + }, { + "step_name": "Final", + "segment": [{ + "text": output + }], + }] + + processing_vis = { + "visualization_name": "processing", + "title": "Processing", + "name": "processing", + "query_processing": { + "source_processing": source_steps, + "target_processing": target_steps, + }, + } + + return { + "result": [processing_vis, graph_vis], + } From 2be0cbb2510bddb3d4c66e0e145b148299fb0ef0 Mon Sep 17 00:00:00 2001 From: Lukasz Kaiser Date: Mon, 18 Dec 2017 21:18:54 -0800 Subject: [PATCH 0669/4095] Work on VAE Transformer PiperOrigin-RevId: 179508117 --- tensor2tensor/models/transformer_vae.py | 114 +++++++++++++----------- 1 file changed, 64 insertions(+), 50 deletions(-) diff --git a/tensor2tensor/models/transformer_vae.py b/tensor2tensor/models/transformer_vae.py index 5b540190a..d779b093f 100644 --- a/tensor2tensor/models/transformer_vae.py +++ b/tensor2tensor/models/transformer_vae.py @@ -324,29 +324,32 @@ def multinomial_sample(x, vocab_size, temperature): return tf.to_int32(reshaped_samples) -def ae_latent_sample(t_c, inputs, ed, embed, iters, hparams): +def ae_latent_sample(latents_dense, inputs, ed, embed, iters, hparams): """Sample from the latent space in the autoencoder.""" - t_pred = decode_transformer(inputs, ed, t_c, hparams, "extra") - t_pred = tf.layers.dense(t_pred, 2**16, name="extra_logits") - t_bit = multinomial_sample(t_pred, 2**16, hparams.sampling_temp) + latents_pred = decode_transformer(inputs, ed, latents_dense, hparams, "extra") + latents_pred = tf.layers.dense(latents_pred, 2**16, name="extra_logits") + latents_discrete = multinomial_sample( + latents_pred, 2**16, hparams.sampling_temp) - def next_bit(t_bit, i): - t_bit_prev = t_bit + def next_bit(latents_discrete, i): + latents_discrete_prev = latents_discrete with tf.variable_scope(tf.get_variable_scope(), reuse=True): - t_c = embed(t_bit) - t_pred = decode_transformer(inputs, ed, t_c, hparams, "extra") - t_pred = tf.layers.dense(t_pred, 2**16, name="extra_logits") - t_bit = multinomial_sample(t_pred, 2**16, hparams.sampling_temp) - return tf.concat([t_bit_prev[:, :(i+1), :], - t_bit[:, (i+1):, :]], axis=1) + latents_dense = embed(latents_discrete) + latents_pred = decode_transformer( + inputs, ed, latents_dense, hparams, "extra") + latents_pred = tf.layers.dense(latents_pred, 2**16, name="extra_logits") + latents_discrete = multinomial_sample( + latents_pred, 2**16, hparams.sampling_temp) + return tf.concat([latents_discrete_prev[:, :(i+1), :], + latents_discrete[:, (i+1):, :]], axis=1) for i in xrange(iters): - t_bit = next_bit(t_bit, i) - return t_bit + latents_discrete = next_bit(latents_discrete, i) + return latents_discrete def ae_transformer_internal(inputs, targets, target_space, hparams, - beam_size, cache=None, predict_mask=1.0): + cache=None, predict_mask=1.0): """AE Transformer, main step used for training.""" # Summaries break with the do_refine cond, turn them off in that case. global _DO_SUMMARIES @@ -354,8 +357,7 @@ def ae_transformer_internal(inputs, targets, target_space, hparams, _DO_SUMMARIES = False # Prepare. - orig_targets = targets - batch_size = common_layers.shape_list(orig_targets)[0] + batch_size = common_layers.shape_list(inputs)[0] targets = tf.reshape(targets, [batch_size, -1, 1, hparams.hidden_size]) # Encoder. @@ -375,22 +377,24 @@ def ae_transformer_internal(inputs, targets, target_space, hparams, targets_c = compress(targets, False, hparams, "compress") if hparams.mode != tf.estimator.ModeKeys.PREDICT: # Compress and bottleneck. - t_c, t_bit, vc_loss, _ = bottleneck(targets_c, hparams, 2*2048, "vc") + latents_dense, latents_discrete, extra_loss, _ = bottleneck( + targets_c, hparams, 2*2048, "vc") if _DO_SUMMARIES: - tf.summary.histogram("bit0", tf.reshape(t_bit[:, 0, :], [-1])) + tf.summary.histogram("b0", tf.reshape(latents_discrete[:, 0, :], [-1])) pc = common_layers.inverse_exp_decay(hparams.startup_steps) * 0.95 pc = pc if hparams.mode == tf.estimator.ModeKeys.TRAIN else 1.0 cond = tf.less(tf.random_uniform([batch_size]), pc) - t_c = tf.where(cond, t_c, targets_c) + latents_dense = tf.where(cond, latents_dense, targets_c) # TODO(lukaszkaiser): return extra losses batchwise, multiply before mean. - losses["extra"] = vc_loss * tf.reduce_mean(tf.to_float(cond)) + losses["extra"] = extra_loss * tf.reduce_mean(tf.to_float(cond)) # Extra loss predicting latent code from input. Discrete only. if hparams.bottleneck_kind not in ["dense", "vae"]: - t_pred = decode_transformer( - inputs, ed, tf.stop_gradient(t_c), hparams, "extra") - t_pred = tf.layers.dense(t_pred, 2**16, name="extra_logits") + latents_pred = decode_transformer( + tf.stop_gradient(inputs), tf.stop_gradient(ed), + tf.stop_gradient(latents_dense), hparams, "extra") + latents_pred = tf.layers.dense(latents_pred, 2**16, name="extra_logits") losses["latent_pred"] = tf.nn.sparse_softmax_cross_entropy_with_logits( - labels=t_bit, logits=t_pred) + labels=latents_discrete, logits=latents_pred) losses["latent_pred"] = tf.reduce_mean( losses["latent_pred"] * 0.5 * tf.to_float(cond)) else: @@ -405,27 +409,25 @@ def bn_inputs(): bn_inputs, lambda: inputs_c) ptc = 1.0 - common_layers.inverse_lin_decay(200000) * 0.5 ptc = ptc if hparams.mode == tf.estimator.ModeKeys.TRAIN else 1.0 - t_c = tf.where(tf.less(tf.random_uniform([batch_size]), ptc), - t_c, inputs_c) + latents_dense = tf.where(tf.less(tf.random_uniform([batch_size]), ptc), + latents_dense, inputs_c) else: if hparams.bottleneck_kind in ["dense", "vae"]: inputs_c = decode_transformer(inputs, ed, targets_c, hparams, "dec_c") - t_c, _, _, _ = bottleneck(inputs_c, hparams, 2*2048, "vc") + latents_dense, _, _, _ = bottleneck(inputs_c, hparams, 2*2048, "vc") else: latent_len = common_layers.shape_list(targets_c)[1] _, _, _, embed = bottleneck(targets_c, hparams, 2*2048, "vc") - t_c = tf.zeros_like(targets_c[:, :latent_len, :, :]) + latents_dense = tf.zeros_like(targets_c[:, :latent_len, :, :]) if cache is None: - cache = ae_latent_sample(t_c, inputs, ed, embed, 8, hparams) - cache = cache[0, :, :] - cache = tf.reshape(cache, [1, latent_len, 1]) - cache = tf.tile(cache, [beam_size, 1, 1]) - t_c = embed(cache) + cache = ae_latent_sample(latents_dense, inputs, ed, embed, 8, hparams) + latents_dense = embed(cache) # Postprocess. - d = t_c + d = latents_dense pos = tf.get_variable("pos", [1, 1000, 1, hparams.hidden_size]) - pos = pos[:, :common_layers.shape_list(t_c)[1] + 1, :, :] - t_c = tf.pad(t_c, [[0, 0], [1, 0], [0, 0], [0, 0]]) + pos + pos = pos[:, :common_layers.shape_list(latents_dense)[1] + 1, :, :] + latents_dense = tf.pad(latents_dense, + [[0, 0], [1, 0], [0, 0], [0, 0]]) + pos # Masking. if hparams.do_mask: @@ -444,23 +446,26 @@ def bn_inputs(): d = residual_conv(d, 1, (3, 1), hparams, "decompress_rc_%d" % j) d = decompress_step(d, hparams, i > 0, False, "decompress_%d" % j) targets = mask * targets + (1.0 - mask) * d - targets = tf.concat([tf.reverse(t_c, [1]), targets], axis=1) + targets = tf.concat([tf.reverse(latents_dense, [1]), targets], axis=1) res = decode_transformer(inputs, ed, targets, hparams, "decoder") if hparams.do_ae: - res = res[:, common_layers.shape_list(t_c)[1]:, :, :] + res = res[:, common_layers.shape_list(latents_dense)[1]:, :, :] if hparams.do_mask and hparams.do_refine: def refine_res(): return residual_conv(res, 1, (5, 1), hparams, "refine") masked_batches = tf.reduce_sum(mask, axis=[1, 2, 3]) all_masked = tf.less(masked_batches, 0.1) res = tf.where(all_masked, refine_res(), res) - latent_time = tf.less(200000, tf.to_int32(tf.train.get_global_step())) + # We'll start training only the extra model of latents after 400K steps. + # Before we train only this, we decrease lr for other weights. + latent_time = tf.less(300000, tf.to_int32(tf.train.get_global_step())) + decreased_lr = common_layers.inverse_lin_decay(400000) losses["latent_pred"] *= tf.to_float(latent_time) losses["extra"] *= 1.0 - tf.to_float(latent_time) - res = tf.cond(latent_time, - lambda: tf.stop_gradient(0.7 * res) + 0.3 * res, - lambda: res) + decreased_lr_res = tf.stop_gradient(decreased_lr * res) + decreased_lr_res += (1.0 - decreased_lr) * res + res = tf.cond(latent_time, lambda: decreased_lr_res, lambda: res) return res, losses, cache @@ -481,27 +486,26 @@ def body(self, features): if self._hparams.drop_inputs: inputs = None reuse = "cache_raw" in features - beam_size = self._decode_hparams.beam_size with tf.variable_scope(tf.get_variable_scope(), reuse=reuse): res, loss, _ = ae_transformer_internal( inputs, features["targets"], features["target_space_id"], - self._hparams, beam_size, features.get("cache_raw", None), + self._hparams, features.get("cache_raw", None), predict_mask=self.predict_mask) return res, loss def prepare_features_for_infer(self, features): if not self._hparams.do_ae: return features - beam_size = self._decode_hparams.beam_size - inputs = tf.zeros([beam_size, 1, 1, self._hparams.hidden_size]) + beam_batch_size = self._decode_hparams.beam_size + beam_batch_size *= self._decode_hparams.batch_size + inputs = tf.zeros([beam_batch_size, 1, 1, self._hparams.hidden_size]) inputs = inputs if "inputs" in features else None if self._hparams.drop_inputs or not self.has_input: inputs = None - targets = tf.zeros([beam_size, 1, 1, self._hparams.hidden_size]) + targets = tf.zeros([beam_batch_size, 1, 1, self._hparams.hidden_size]) with tf.variable_scope("body"): _, _, cache = ae_transformer_internal( - inputs, targets, features["target_space_id"], - self._hparams, beam_size) + inputs, targets, features["target_space_id"], self._hparams) features["cache_raw"] = cache def infer(self, features=None, decode_length=50, beam_size=1, top_beams=1, @@ -531,6 +535,16 @@ def infer(self, features=None, decode_length=50, beam_size=1, top_beams=1, logits, _ = self(features) # pylint: disable=not-callable samples = tf.argmax(logits, axis=-1) + # More steps. + self.predict_mask = 0.0 # Use the provided targets this time. + how_many_more_steps = 0 # Set to 1 or more for Gibbs-like sampling. + for _ in xrange(how_many_more_steps): + with tf.variable_scope(tf.get_variable_scope(), reuse=True): + features["targets"] = samples + logits, _ = self(features) # pylint: disable=not-callable + samples = tf.argmax(logits, axis=-1) + + self.predict_mask = 1.0 if inputs_old is not None: # Restore to not confuse Estimator. features["inputs"] = inputs_old return samples From a2f1ee97361e97a42b535e6f953c021e274574f0 Mon Sep 17 00:00:00 2001 From: Ryan Sepassi Date: Tue, 19 Dec 2017 14:11:45 -0800 Subject: [PATCH 0670/4095] Add features (export, SessionConfig, Parallelism, hooks) to TPU codepath PiperOrigin-RevId: 179602110 --- setup.py | 1 + tensor2tensor/bin/__init__.py | 15 ++ tensor2tensor/bin/t2t-decoder | 11 +- tensor2tensor/bin/t2t-tpu-trainer | 56 ++++-- tensor2tensor/bin/t2t-trainer | 1 + tensor2tensor/bin/t2t_decoder.py | 11 +- tensor2tensor/bin/t2t_trainer.py | 1 + tensor2tensor/data_generators/problem.py | 112 ++++++----- .../data_generators/translate_enzh.py | 2 +- tensor2tensor/insights/transformer_model.py | 8 +- tensor2tensor/layers/modalities.py | 2 +- tensor2tensor/models/resnet.py | 1 + tensor2tensor/models/shake_shake.py | 3 +- tensor2tensor/models/vanilla_gan.py | 2 +- tensor2tensor/models/xception.py | 4 +- tensor2tensor/tpu/tpu_trainer.py | 56 ++++-- tensor2tensor/tpu/tpu_trainer_lib.py | 170 +++++++++++++++-- tensor2tensor/tpu/tpu_trainer_lib_test.py | 6 +- tensor2tensor/utils/data_reader.py | 2 + tensor2tensor/utils/devices.py | 176 ++++++++++-------- tensor2tensor/utils/expert_utils.py | 9 +- tensor2tensor/utils/input_fn_builder.py | 1 + tensor2tensor/utils/model_builder.py | 4 +- tensor2tensor/utils/registry.py | 6 +- tensor2tensor/utils/t2t_model.py | 48 +---- tensor2tensor/utils/trainer_utils.py | 3 +- .../TransformerVisualization.ipynb | 2 +- 27 files changed, 488 insertions(+), 225 deletions(-) create mode 100644 tensor2tensor/bin/__init__.py diff --git a/setup.py b/setup.py index 8870809ae..1d7c28305 100644 --- a/setup.py +++ b/setup.py @@ -23,6 +23,7 @@ 'tensor2tensor/bin/t2t-datagen', 'tensor2tensor/bin/t2t-decoder', 'tensor2tensor/bin/t2t-make-tf-configs', + 'tensor2tensor/bin/t2t-tpu-trainer', ], install_requires=[ 'bz2file', diff --git a/tensor2tensor/bin/__init__.py b/tensor2tensor/bin/__init__.py new file mode 100644 index 000000000..3f714ce1f --- /dev/null +++ b/tensor2tensor/bin/__init__.py @@ -0,0 +1,15 @@ +# coding=utf-8 +# Copyright 2017 The Tensor2Tensor Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + diff --git a/tensor2tensor/bin/t2t-decoder b/tensor2tensor/bin/t2t-decoder index de8bc7d50..c9ad7f9c7 100644 --- a/tensor2tensor/bin/t2t-decoder +++ b/tensor2tensor/bin/t2t-decoder @@ -58,10 +58,11 @@ flags.DEFINE_integer("decode_shards", 1, "Number of decoding replicas.") def create_hparams(): - hparams = tpu_trainer.create_hparams() - hparams.add_hparam("data_dir", os.path.expanduser(FLAGS.data_dir)) - tpu_trainer_lib.add_problem_hparams(hparams, FLAGS.problems) - return hparams + return tpu_trainer_lib.create_hparams( + FLAGS.hparams_set, + FLAGS.hparams, + data_dir=os.path.expanduser(FLAGS.data_dir), + problem_name=FLAGS.problems) def create_decode_hparams(): @@ -90,7 +91,7 @@ def decode(estimator, hparams, decode_hp): def main(_): tf.logging.set_verbosity(tf.logging.INFO) usr_dir.import_usr_dir(FLAGS.t2t_usr_dir) - FLAGS.use_tpu = False + FLAGS.use_tpu = False # decoding not supported on TPU hp = create_hparams() decode_hp = create_decode_hparams() diff --git a/tensor2tensor/bin/t2t-tpu-trainer b/tensor2tensor/bin/t2t-tpu-trainer index d09022710..19468a59c 100644 --- a/tensor2tensor/bin/t2t-tpu-trainer +++ b/tensor2tensor/bin/t2t-tpu-trainer @@ -20,12 +20,14 @@ from __future__ import division from __future__ import print_function import os +import sys # Dependency imports from tensor2tensor import models # pylint: disable=unused-import from tensor2tensor import problems as problems_lib # pylint: disable=unused-import -from tensor2tensor.tpu import tpu_trainer_lib as lib +from tensor2tensor.tpu import tpu_trainer_lib +from tensor2tensor.utils import decoding from tensor2tensor.utils import flags as t2t_flags # pylint: disable=unused-import from tensor2tensor.utils import registry from tensor2tensor.utils import usr_dir @@ -45,7 +47,7 @@ flags.DEFINE_string("t2t_usr_dir", "", flags.DEFINE_integer("tpu_num_shards", 8, "Number of tpu shards.") flags.DEFINE_integer("iterations_per_loop", 1000, "Number of iterations in a TPU training loop.") -flags.DEFINE_bool("use_tpu", True, "Whether to use TPU.") +flags.DEFINE_bool("use_tpu", False, "Whether to use TPU.") # To maintain compatibility with some internal libs, we guard against these flag # definitions possibly erroring. Apologies for the ugliness. @@ -66,14 +68,14 @@ def get_problem_name(): def create_hparams(): - hparams = registry.hparams(FLAGS.hparams_set)() - if FLAGS.hparams: - hparams = hparams.parse(FLAGS.hparams) - return hparams + return tpu_trainer_lib.create_hparams(FLAGS.hparams_set, FLAGS.hparams) def create_experiment_fn(): - return lib.create_experiment_fn( + use_validation_monitor = (FLAGS.schedule in + ["train_and_evaluate", "continuous_train_and_eval"] + and FLAGS.local_eval_frequency) + return tpu_trainer_lib.create_experiment_fn( FLAGS.model, get_problem_name(), os.path.expanduser(FLAGS.data_dir), @@ -81,11 +83,20 @@ def create_experiment_fn(): FLAGS.eval_steps, FLAGS.local_eval_frequency, FLAGS.schedule, + export=FLAGS.export_saved_model, + decode_hparams=decoding.decode_hparams(FLAGS.decode_hparams), + use_tfdbg=FLAGS.tfdbg, + use_dbgprofile=FLAGS.dbgprofile, + use_validation_monitor=use_validation_monitor, + eval_early_stopping_steps=FLAGS.eval_early_stopping_steps, + eval_early_stopping_metric=FLAGS.eval_early_stopping_metric, + eval_early_stopping_metric_minimize=FLAGS. + eval_early_stopping_metric_minimize, use_tpu=FLAGS.use_tpu) -def create_run_config(): - return lib.create_run_config( +def create_run_config(hp): + return tpu_trainer_lib.create_run_config( model_dir=os.path.expanduser(FLAGS.output_dir), master=FLAGS.master, iterations_per_loop=FLAGS.iterations_per_loop, @@ -93,11 +104,30 @@ def create_run_config(): log_device_placement=FLAGS.log_device_placement, save_checkpoints_steps=max(FLAGS.iterations_per_loop, FLAGS.local_eval_frequency), + keep_checkpoint_max=FLAGS.keep_checkpoint_max, + keep_checkpoint_every_n_hours=FLAGS.keep_checkpoint_every_n_hours, num_gpus=FLAGS.worker_gpu, gpu_order=FLAGS.gpu_order, shard_to_cpu=FLAGS.locally_shard_to_cpu, num_async_replicas=FLAGS.worker_replicas, - use_tpu=FLAGS.use_tpu) + gpu_mem_fraction=FLAGS.worker_gpu_memory_fraction, + enable_graph_rewriter=FLAGS.experimental_optimize_placement, + use_tpu=FLAGS.use_tpu, + schedule=FLAGS.schedule, + no_data_parallelism=hp.no_data_parallelism, + daisy_chain_variables=hp.daisy_chain_variables, + ps_replicas=FLAGS.ps_replicas, + ps_job=FLAGS.ps_job, + ps_gpu=FLAGS.ps_gpu, + sync=FLAGS.sync, + worker_id=FLAGS.worker_id, + worker_job=FLAGS.worker_job) + + +def log_registry(): + if FLAGS.registry_help: + tf.logging.info(registry.help_string()) + sys.exit(0) def execute_schedule(exp): @@ -111,9 +141,13 @@ def main(_): tf.logging.set_verbosity(tf.logging.INFO) tf.set_random_seed(123) usr_dir.import_usr_dir(FLAGS.t2t_usr_dir) + log_registry() + + hparams = create_hparams() + run_config = create_run_config(hparams) exp_fn = create_experiment_fn() - exp = exp_fn(create_run_config(), create_hparams()) + exp = exp_fn(run_config, hparams) execute_schedule(exp) diff --git a/tensor2tensor/bin/t2t-trainer b/tensor2tensor/bin/t2t-trainer index 1f05cd893..710fa1902 100644 --- a/tensor2tensor/bin/t2t-trainer +++ b/tensor2tensor/bin/t2t-trainer @@ -26,6 +26,7 @@ To train your model, for example: --model=transformer --hparams_set=transformer_base """ +# DEPRECATED from __future__ import absolute_import from __future__ import division from __future__ import print_function diff --git a/tensor2tensor/bin/t2t_decoder.py b/tensor2tensor/bin/t2t_decoder.py index b98797610..47e9badb5 100644 --- a/tensor2tensor/bin/t2t_decoder.py +++ b/tensor2tensor/bin/t2t_decoder.py @@ -57,10 +57,11 @@ def create_hparams(): - hparams = tpu_trainer.create_hparams() - hparams.add_hparam("data_dir", os.path.expanduser(FLAGS.data_dir)) - tpu_trainer_lib.add_problem_hparams(hparams, FLAGS.problems) - return hparams + return tpu_trainer_lib.create_hparams( + FLAGS.hparams_set, + FLAGS.hparams, + data_dir=os.path.expanduser(FLAGS.data_dir), + problem_name=FLAGS.problems) def create_decode_hparams(): @@ -89,7 +90,7 @@ def decode(estimator, hparams, decode_hp): def main(_): tf.logging.set_verbosity(tf.logging.INFO) usr_dir.import_usr_dir(FLAGS.t2t_usr_dir) - FLAGS.use_tpu = False + FLAGS.use_tpu = False # decoding not supported on TPU hp = create_hparams() decode_hp = create_decode_hparams() diff --git a/tensor2tensor/bin/t2t_trainer.py b/tensor2tensor/bin/t2t_trainer.py index 977337b02..68119e8ad 100644 --- a/tensor2tensor/bin/t2t_trainer.py +++ b/tensor2tensor/bin/t2t_trainer.py @@ -25,6 +25,7 @@ --model=transformer --hparams_set=transformer_base """ +# DEPRECATED from __future__ import absolute_import from __future__ import division from __future__ import print_function diff --git a/tensor2tensor/data_generators/problem.py b/tensor2tensor/data_generators/problem.py index b4021e9c7..0cb86b6ad 100644 --- a/tensor2tensor/data_generators/problem.py +++ b/tensor2tensor/data_generators/problem.py @@ -383,13 +383,6 @@ def dataset(self, # Construct the Problem's hparams so that items within it are accessible _ = self.get_hparams(hparams) - data_fields, data_items_to_decoders = self.example_reading_spec() - if data_items_to_decoders is None: - data_items_to_decoders = { - field: tf.contrib.slim.tfexample_decoder.Tensor(field) - for field in data_fields - } - is_training = mode == tf.estimator.ModeKeys.TRAIN data_filepattern = self.filepattern(data_dir, dataset_split, shard=shard) tf.logging.info("Reading data files from %s", data_filepattern) @@ -406,22 +399,13 @@ def dataset(self, else: dataset = tf.data.TFRecordDataset(data_files) - def decode_record(record): - """Serialized Example to dict of .""" - decoder = tf.contrib.slim.tfexample_decoder.TFExampleDecoder( - data_fields, data_items_to_decoders) - - decode_items = list(data_items_to_decoders) - decoded = decoder.decode(record, items=decode_items) - return dict(zip(decode_items, decoded)) - def _preprocess(example): example = self.preprocess_example(example, mode, hparams) self.maybe_reverse_features(example) self.maybe_copy_features(example) return example - dataset = dataset.map(decode_record, num_parallel_calls=num_threads) + dataset = dataset.map(self.decode_example, num_parallel_calls=num_threads) if preprocess: dataset = dataset.map(_preprocess, num_parallel_calls=num_threads) @@ -430,6 +414,22 @@ def _preprocess(example): return dataset + def decode_example(self, serialized_example): + """Return a dict of Tensors from a serialized tensorflow.Example.""" + data_fields, data_items_to_decoders = self.example_reading_spec() + if data_items_to_decoders is None: + data_items_to_decoders = { + field: tf.contrib.slim.tfexample_decoder.Tensor(field) + for field in data_fields + } + + decoder = tf.contrib.slim.tfexample_decoder.TFExampleDecoder( + data_fields, data_items_to_decoders) + + decode_items = list(data_items_to_decoders) + decoded = decoder.decode(serialized_example, items=decode_items) + return dict(zip(decode_items, decoded)) + @property def has_inputs(self): return "inputs" in self.get_feature_encoders() @@ -496,7 +496,8 @@ def input_fn(self, mode, hparams, params=None, config=None, mode: tf.estimator.ModeKeys hparams: HParams, model hparams params: dict, may include "batch_size" - config: RunConfig; if passed, should include t2t_device_info dict + config: RunConfig; should have the data_parallelism attribute if not using + TPU dataset_kwargs: dict, if passed, will pass as kwargs to self.dataset method when called @@ -521,29 +522,8 @@ def gpu_valid_size(example): hparams.max_length if drop_long_sequences else 10**9) def define_shapes(example): - """Set the right shapes for the features.""" - inputs = example["inputs"] - targets = example["targets"] - - # Ensure inputs and targets are proper rank. - while len(inputs.get_shape()) < 4: - inputs = tf.expand_dims(inputs, axis=-1) - while len(targets.get_shape()) < 4: - targets = tf.expand_dims(targets, axis=-1) - - example["inputs"] = inputs - example["targets"] = targets - - if config.use_tpu: - # Ensure batch size is set on all features - for _, t in six.iteritems(example): - shape = t.get_shape().as_list() - shape[0] = params["batch_size"] - t.set_shape(t.get_shape().merge_with(shape)) - # Assert shapes are fully known - t.get_shape().assert_is_fully_defined() - - return example + return _standardize_shapes( + example, batch_size=(config.use_tpu and params["batch_size"])) # Read and preprocess data_dir = hparams.data_dir @@ -569,7 +549,7 @@ def define_shapes(example): dataset = dataset.apply( tf.contrib.data.batch_and_drop_remainder(tpu_batch_size)) else: - num_shards = config.t2t_device_info["num_shards"] + num_shards = config.data_parallelism.n dataset = dataset.batch(hparams.batch_size * num_shards) else: # Variable length features @@ -586,7 +566,7 @@ def define_shapes(example): dataset = dataset.filter(gpu_valid_size) batching_scheme = data_reader.hparams_to_batching_scheme( hparams, - shard_multiplier=config.t2t_device_info["num_shards"], + shard_multiplier=config.data_parallelism.n, length_multiplier=self.get_hparams().batch_size_multiplier) if hparams.use_fixed_batch_size: batching_scheme["batch_sizes"] = [hparams.batch_size] @@ -601,7 +581,7 @@ def define_shapes(example): dataset = dataset.prefetch(1) features = dataset.make_one_shot_iterator().get_next() if not config.use_tpu: - _summarize_features(features, config.t2t_device_info["num_shards"]) + _summarize_features(features, config.data_parallelism.n) if mode == tf.estimator.ModeKeys.PREDICT: features["infer_targets"] = features["targets"] @@ -614,6 +594,25 @@ def define_shapes(example): return features, features["targets"] + def serving_input_fn(self, hparams): + """Input fn for serving export, starting from serialized example.""" + mode = tf.estimator.ModeKeys.PREDICT + serialized_example = tf.placeholder( + dtype=tf.string, shape=[None], name="serialized_example") + dataset = tf.data.Dataset.from_tensor_slices(serialized_example) + dataset = dataset.map(self.decode_example) + dataset = dataset.map(lambda ex: self.preprocess_example(ex, mode, hparams)) + dataset = dataset.map(data_reader.cast_int64_to_int32) + dataset = dataset.padded_batch(1000, dataset.output_shapes) + dataset = dataset.map(_standardize_shapes) + features = tf.contrib.data.get_single_element(dataset) + + if self.has_inputs: + features.pop("targets", None) + + return tf.estimator.export.ServingInputReceiver( + features=features, receiver_tensors=serialized_example) + class FeatureInfo(object): @@ -907,3 +906,28 @@ def _summarize_features(features, num_shards=1): tf.summary.scalar("%s_nonpadding_tokens" % k, nonpadding_tokens) tf.summary.scalar("%s_nonpadding_fraction" % k, tf.reduce_mean(nonpadding)) + + +def _standardize_shapes(features, batch_size=None): + """Set the right shapes for the features.""" + + for fname in ["inputs", "targets"]: + if fname not in features: + continue + + f = features[fname] + while len(f.get_shape()) < 4: + f = tf.expand_dims(f, axis=-1) + + features[fname] = f + + if batch_size: + # Ensure batch size is set on all features + for _, t in six.iteritems(features): + shape = t.get_shape().as_list() + shape[0] = batch_size + t.set_shape(t.get_shape().merge_with(shape)) + # Assert shapes are fully known + t.get_shape().assert_is_fully_defined() + + return features diff --git a/tensor2tensor/data_generators/translate_enzh.py b/tensor2tensor/data_generators/translate_enzh.py index 0ee3bfd08..52b364137 100644 --- a/tensor2tensor/data_generators/translate_enzh.py +++ b/tensor2tensor/data_generators/translate_enzh.py @@ -49,7 +49,7 @@ _ENZH_TEST_DATASETS = [[ "http://data.statmt.org/wmt17/translation-task/dev.tgz", - ("dev/newsdev2017-zhen-src.en.sgm", "dev/newsdev2017-zhen-ref.zh.sgm") + ("dev/newsdev2017-enzh-src.en.sgm", "dev/newsdev2017-enzh-ref.zh.sgm") ]] diff --git a/tensor2tensor/insights/transformer_model.py b/tensor2tensor/insights/transformer_model.py index 570dc0174..94bc7c0e1 100644 --- a/tensor2tensor/insights/transformer_model.py +++ b/tensor2tensor/insights/transformer_model.py @@ -111,9 +111,11 @@ def __init__(self, data_dir, model_dir): data_dir = os.path.expanduser(data_dir) # Create the basic hyper parameters. - self.hparams = tpu_trainer.create_hparams() - self.hparams.add_hparam("data_dir", os.path.expanduser(data_dir)) - tpu_trainer_lib.add_problem_hparams(self.hparams, FLAGS.problems) + self.hparams = tpu_trainer_lib.create_hparams( + FLAGS.hparams_set, + FLAGS.hparams, + data_dir=data_dir, + problem_name=FLAGS.problems) decode_hp = decoding.decode_hparams(FLAGS.decode_hparams) decode_hp.add_hparam("shards", 1) diff --git a/tensor2tensor/layers/modalities.py b/tensor2tensor/layers/modalities.py index 9d00c5116..0e41dd086 100644 --- a/tensor2tensor/layers/modalities.py +++ b/tensor2tensor/layers/modalities.py @@ -76,7 +76,7 @@ def _get_weights(self, hidden_dim=None): """Create or get concatenated embedding or softmax variable. Args: - hidden_dim: dim of the variable. Defaults fo self._body_input_depth + hidden_dim: dim of the variable. Defaults to self._body_input_depth Returns: a list of self._num_shards Tensors. diff --git a/tensor2tensor/models/resnet.py b/tensor2tensor/models/resnet.py index f3df54b10..5858c364b 100644 --- a/tensor2tensor/models/resnet.py +++ b/tensor2tensor/models/resnet.py @@ -247,5 +247,6 @@ def resnet_base(): hparams.add_hparam("strides", [1, 2, 2, 2]) # Can run with a batch size of 128 with Problem ImageImagenet224 + hparams.batch_size = 128 hparams.tpu_batch_size_per_shard = 128 return hparams diff --git a/tensor2tensor/models/shake_shake.py b/tensor2tensor/models/shake_shake.py index b4d4a62ea..d1745bff8 100644 --- a/tensor2tensor/models/shake_shake.py +++ b/tensor2tensor/models/shake_shake.py @@ -135,8 +135,7 @@ def shakeshake_cifar10(): tf.logging.warning("shakeshake_cifar10 hparams have not been verified to " "achieve good performance.") hparams = common_hparams.basic_params1() - # This leads to effective batch size 128 when number of GPUs is 1 - hparams.batch_size = 4096 * 8 + hparams.batch_size = 128 hparams.hidden_size = 16 hparams.dropout = 0 hparams.label_smoothing = 0.0 diff --git a/tensor2tensor/models/vanilla_gan.py b/tensor2tensor/models/vanilla_gan.py index 36acfc4a2..a6196c491 100644 --- a/tensor2tensor/models/vanilla_gan.py +++ b/tensor2tensor/models/vanilla_gan.py @@ -149,7 +149,7 @@ def vanilla_gan(): hparams.input_modalities = "inputs:image:zero_loss" hparams.target_modality = "image:zero_loss" - hparams.batch_size = 2048 # 3136 + hparams.batch_size = 32 hparams.label_smoothing = 0.0 hparams.add_hparam("startup_steps", 10000) diff --git a/tensor2tensor/models/xception.py b/tensor2tensor/models/xception.py index 1c0678584..9e2174161 100644 --- a/tensor2tensor/models/xception.py +++ b/tensor2tensor/models/xception.py @@ -146,7 +146,7 @@ def body(self, features): def xception_base(): """Set of hyperparameters.""" hparams = common_hparams.basic_params1() - hparams.batch_size = 4096 + hparams.batch_size = 128 hparams.hidden_size = 768 hparams.dropout = 0.2 hparams.symbol_dropout = 0.2 @@ -171,7 +171,7 @@ def xception_base(): @registry.register_hparams def xception_tiny(): hparams = xception_base() - hparams.batch_size = 1024 + hparams.batch_size = 2 hparams.hidden_size = 64 hparams.num_hidden_layers = 2 hparams.learning_rate_decay_scheme = "none" diff --git a/tensor2tensor/tpu/tpu_trainer.py b/tensor2tensor/tpu/tpu_trainer.py index 5eafd4590..d3e4130f6 100644 --- a/tensor2tensor/tpu/tpu_trainer.py +++ b/tensor2tensor/tpu/tpu_trainer.py @@ -19,12 +19,14 @@ from __future__ import print_function import os +import sys # Dependency imports from tensor2tensor import models # pylint: disable=unused-import from tensor2tensor import problems as problems_lib # pylint: disable=unused-import -from tensor2tensor.tpu import tpu_trainer_lib as lib +from tensor2tensor.tpu import tpu_trainer_lib +from tensor2tensor.utils import decoding from tensor2tensor.utils import flags as t2t_flags # pylint: disable=unused-import from tensor2tensor.utils import registry from tensor2tensor.utils import usr_dir @@ -44,7 +46,7 @@ flags.DEFINE_integer("tpu_num_shards", 8, "Number of tpu shards.") flags.DEFINE_integer("iterations_per_loop", 1000, "Number of iterations in a TPU training loop.") -flags.DEFINE_bool("use_tpu", True, "Whether to use TPU.") +flags.DEFINE_bool("use_tpu", False, "Whether to use TPU.") # To maintain compatibility with some internal libs, we guard against these flag # definitions possibly erroring. Apologies for the ugliness. @@ -65,14 +67,14 @@ def get_problem_name(): def create_hparams(): - hparams = registry.hparams(FLAGS.hparams_set)() - if FLAGS.hparams: - hparams = hparams.parse(FLAGS.hparams) - return hparams + return tpu_trainer_lib.create_hparams(FLAGS.hparams_set, FLAGS.hparams) def create_experiment_fn(): - return lib.create_experiment_fn( + use_validation_monitor = (FLAGS.schedule in + ["train_and_evaluate", "continuous_train_and_eval"] + and FLAGS.local_eval_frequency) + return tpu_trainer_lib.create_experiment_fn( FLAGS.model, get_problem_name(), os.path.expanduser(FLAGS.data_dir), @@ -80,11 +82,20 @@ def create_experiment_fn(): FLAGS.eval_steps, FLAGS.local_eval_frequency, FLAGS.schedule, + export=FLAGS.export_saved_model, + decode_hparams=decoding.decode_hparams(FLAGS.decode_hparams), + use_tfdbg=FLAGS.tfdbg, + use_dbgprofile=FLAGS.dbgprofile, + use_validation_monitor=use_validation_monitor, + eval_early_stopping_steps=FLAGS.eval_early_stopping_steps, + eval_early_stopping_metric=FLAGS.eval_early_stopping_metric, + eval_early_stopping_metric_minimize=FLAGS. + eval_early_stopping_metric_minimize, use_tpu=FLAGS.use_tpu) -def create_run_config(): - return lib.create_run_config( +def create_run_config(hp): + return tpu_trainer_lib.create_run_config( model_dir=os.path.expanduser(FLAGS.output_dir), master=FLAGS.master, iterations_per_loop=FLAGS.iterations_per_loop, @@ -92,11 +103,30 @@ def create_run_config(): log_device_placement=FLAGS.log_device_placement, save_checkpoints_steps=max(FLAGS.iterations_per_loop, FLAGS.local_eval_frequency), + keep_checkpoint_max=FLAGS.keep_checkpoint_max, + keep_checkpoint_every_n_hours=FLAGS.keep_checkpoint_every_n_hours, num_gpus=FLAGS.worker_gpu, gpu_order=FLAGS.gpu_order, shard_to_cpu=FLAGS.locally_shard_to_cpu, num_async_replicas=FLAGS.worker_replicas, - use_tpu=FLAGS.use_tpu) + gpu_mem_fraction=FLAGS.worker_gpu_memory_fraction, + enable_graph_rewriter=FLAGS.experimental_optimize_placement, + use_tpu=FLAGS.use_tpu, + schedule=FLAGS.schedule, + no_data_parallelism=hp.no_data_parallelism, + daisy_chain_variables=hp.daisy_chain_variables, + ps_replicas=FLAGS.ps_replicas, + ps_job=FLAGS.ps_job, + ps_gpu=FLAGS.ps_gpu, + sync=FLAGS.sync, + worker_id=FLAGS.worker_id, + worker_job=FLAGS.worker_job) + + +def log_registry(): + if FLAGS.registry_help: + tf.logging.info(registry.help_string()) + sys.exit(0) def execute_schedule(exp): @@ -110,9 +140,13 @@ def main(_): tf.logging.set_verbosity(tf.logging.INFO) tf.set_random_seed(123) usr_dir.import_usr_dir(FLAGS.t2t_usr_dir) + log_registry() + + hparams = create_hparams() + run_config = create_run_config(hparams) exp_fn = create_experiment_fn() - exp = exp_fn(create_run_config(), create_hparams()) + exp = exp_fn(run_config, hparams) execute_schedule(exp) diff --git a/tensor2tensor/tpu/tpu_trainer_lib.py b/tensor2tensor/tpu/tpu_trainer_lib.py index ff433dba7..bc18fe298 100644 --- a/tensor2tensor/tpu/tpu_trainer_lib.py +++ b/tensor2tensor/tpu/tpu_trainer_lib.py @@ -21,11 +21,60 @@ # Dependency imports +from tensor2tensor.utils import devices +from tensor2tensor.utils import expert_utils from tensor2tensor.utils import registry from tensor2tensor.utils import t2t_model import tensorflow as tf +from tensorflow.core.protobuf import rewriter_config_pb2 +from tensorflow.python import debug + + +def create_session_config(log_device_placement=False, + enable_graph_rewriter=False, + gpu_mem_fraction=0.95, + use_tpu=True): + """The TensorFlow Session config to use.""" + if use_tpu: + graph_options = tf.GraphOptions() + else: + if enable_graph_rewriter: + rewrite_options = rewriter_config_pb2.RewriterConfig() + rewrite_options.optimizers.append("pruning") + rewrite_options.optimizers.append("constfold") + rewrite_options.optimizers.append("arithmetic") + rewrite_options.optimizers.append("layout") + graph_options = tf.GraphOptions(rewrite_options=rewrite_options) + else: + graph_options = tf.GraphOptions( + optimizer_options=tf.OptimizerOptions( + opt_level=tf.OptimizerOptions.L1, do_function_inlining=False)) + + gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=gpu_mem_fraction) + + config = tf.ConfigProto( + allow_soft_placement=True, + graph_options=graph_options, + gpu_options=gpu_options, + log_device_placement=log_device_placement) + return config + + +def create_hparams(hparams_set, + hparams_overrides_str="", + data_dir=None, + problem_name=None): + hparams = registry.hparams(hparams_set)() + if hparams_overrides_str: + hparams = hparams.parse(hparams_overrides_str) + if data_dir: + hparams.add_hparam("data_dir", data_dir) + if problem_name: + add_problem_hparams(hparams, problem_name) + return hparams + def create_run_config(master="", model_dir=None, @@ -33,21 +82,42 @@ def create_run_config(master="", num_shards=8, log_device_placement=False, save_checkpoints_steps=1000, + keep_checkpoint_max=20, + keep_checkpoint_every_n_hours=10000, num_gpus=1, gpu_order="", shard_to_cpu=False, num_async_replicas=1, + enable_graph_rewriter=False, + gpu_mem_fraction=0.95, + no_data_parallelism=False, + daisy_chain_variables=True, + schedule="continuous_train_and_eval", + worker_job="/job:localhost", + worker_id=0, + ps_replicas=0, + ps_job="/job:ps", + ps_gpu=0, + sync=False, use_tpu=True): - """Create TPUConfig and tpu.RunConfig.""" + """Create RunConfig, TPUConfig, and Parallelism object.""" + session_config = create_session_config( + log_device_placement=log_device_placement, + enable_graph_rewriter=enable_graph_rewriter, + gpu_mem_fraction=gpu_mem_fraction, + use_tpu=use_tpu) session_config = tf.ConfigProto( allow_soft_placement=True, log_device_placement=log_device_placement) run_config_args = { + "master": master, "model_dir": model_dir, "session_config": session_config, "save_summary_steps": 0, "save_checkpoints_steps": save_checkpoints_steps, + "keep_checkpoint_max": keep_checkpoint_max, + "keep_checkpoint_every_n_hours": keep_checkpoint_every_n_hours, } - run_config_cls = tf.estimator.RunConfig + run_config_cls = tf.contrib.learn.RunConfig # If using TPU, use TPU RunConfig, add TPUConfig, and add additional args if use_tpu: @@ -56,22 +126,32 @@ def create_run_config(master="", iterations_per_loop=iterations_per_loop, num_shards=num_shards, per_host_input_for_training=(num_shards <= 8)) - run_config_args["master"] = master run_config_args["tpu_config"] = tpu_config config = run_config_cls(**run_config_args) # If not using TPU, add device info for data_parallelism config.use_tpu = use_tpu - config.t2t_device_info = {} if not use_tpu: config.t2t_device_info = { - "num_gpus": num_gpus, - "gpu_order": gpu_order, - "shard_to_cpu": shard_to_cpu, - "num_shards": max(1, num_gpus + int(shard_to_cpu)), "num_async_replicas": num_async_replicas, } + if no_data_parallelism: + config.data_parallelism = expert_utils.Parallelism([""]) + else: + config.data_parallelism = devices.data_parallelism( + daisy_chain_variables=daisy_chain_variables, + ps_replicas=ps_replicas, + ps_job=ps_job, + ps_gpu=ps_gpu, + schedule=schedule, + sync=sync, + worker_gpu=num_gpus, + worker_replicas=num_async_replicas, + worker_id=worker_id, + gpu_order=gpu_order, + locally_shard_to_cpu=shard_to_cpu, + worker_job=worker_job) return config @@ -79,8 +159,8 @@ def create_run_config(master="", def create_estimator(model_name, hparams, run_config, - decode_hparams=None, schedule="train_and_evaluate", + decode_hparams=None, use_tpu=True): model_fn = t2t_model.T2TModel.make_estimator_model_fn( model_name, hparams, decode_hparams=decode_hparams, use_tpu=use_tpu) @@ -105,6 +185,32 @@ def create_estimator(model_name, model_fn=model_fn, model_dir=run_config.model_dir, config=run_config) +def create_hooks(use_tfdbg=False, use_dbgprofile=False, dbgprofile_kwargs=None, + use_validation_monitor=False, validation_monitor_kwargs=None): + """Create train and eval hooks for Experiment.""" + train_monitors = [] + eval_hooks = [] + + if use_tfdbg: + hook = debug.LocalCLIDebugHook() + train_monitors.append(hook) + eval_hooks.append(hook) + + if use_dbgprofile: + # Recorded traces can be visualized with chrome://tracing/ + # The memory/tensor lifetime is also profiled + defaults = dict(save_steps=10, show_dataflow=True, show_memory=True) + defaults.update(dbgprofile_kwargs) + train_monitors.append(tf.contrib.hooks.ProfilerHook(**defaults)) + + if use_validation_monitor: + train_monitors.append( + tf.contrib.learn.monitors.ValidationMonitor( + hooks=eval_hooks, **validation_monitor_kwargs)) + + return train_monitors, eval_hooks + + def create_experiment(run_config, hparams, model_name, @@ -114,6 +220,14 @@ def create_experiment(run_config, eval_steps, min_eval_frequency, schedule="train_and_evaluate", + export=False, + decode_hparams=None, + use_tfdbg=False, + use_dbgprofile=False, + use_validation_monitor=False, + eval_early_stopping_steps=None, + eval_early_stopping_metric=None, + eval_early_stopping_metric_minimize=True, use_tpu=True): """Create Experiment.""" # HParams @@ -122,7 +236,12 @@ def create_experiment(run_config, # Estimator estimator = create_estimator( - model_name, hparams, run_config, schedule, use_tpu=use_tpu) + model_name, + hparams, + run_config, + schedule=schedule, + decode_hparams=decode_hparams, + use_tpu=use_tpu) # Input fns from Problem problem = hparams.problem_instances[0] @@ -131,6 +250,28 @@ def create_experiment(run_config, eval_input_fn = problem.make_estimator_input_fn( tf.estimator.ModeKeys.EVAL, hparams) + # Export + export_strategies = export and [create_export_strategy(problem, hparams)] + + # Hooks + hooks_kwargs = {} + if not use_tpu: + dbgprofile_kwargs = {"output_dir": run_config.model_dir} + validation_monitor_kwargs = dict( + input_fn=eval_input_fn, + eval_steps=eval_steps, + every_n_steps=min_eval_frequency, + early_stopping_rounds=eval_early_stopping_steps, + early_stopping_metric=eval_early_stopping_metric, + early_stopping_metric_minimize=eval_early_stopping_metric_minimize) + train_monitors, eval_hooks = create_hooks( + use_tfdbg=use_tfdbg, + use_dbgprofile=use_dbgprofile, + dbgprofile_kwargs=dbgprofile_kwargs, + use_validation_monitor=use_validation_monitor, + validation_monitor_kwargs=validation_monitor_kwargs) + hooks_kwargs = {"train_monitors": train_monitors, "eval_hooks": eval_hooks} + # Experiment return tf.contrib.learn.Experiment( estimator=estimator, @@ -139,7 +280,9 @@ def create_experiment(run_config, train_steps=train_steps, eval_steps=eval_steps, min_eval_frequency=min_eval_frequency, - train_steps_per_iteration=min_eval_frequency) + train_steps_per_iteration=min_eval_frequency, + export_strategies=export_strategies, + **hooks_kwargs) def create_experiment_fn(*args, **kwargs): @@ -151,6 +294,11 @@ def experiment_fn(run_config, hparams): return experiment_fn +def create_export_strategy(problem, hparams): + return tf.contrib.learn.make_export_strategy( + lambda: problem.serving_input_fn(hparams), as_text=True) + + def add_problem_hparams(hparams, problems): """Add problem hparams for the problems.""" hparams.problems = [] diff --git a/tensor2tensor/tpu/tpu_trainer_lib_test.py b/tensor2tensor/tpu/tpu_trainer_lib_test.py index 1308c0990..4d8f2aad9 100644 --- a/tensor2tensor/tpu/tpu_trainer_lib_test.py +++ b/tensor2tensor/tpu/tpu_trainer_lib_test.py @@ -21,7 +21,7 @@ # Dependency imports -from tensor2tensor.tpu import tpu_trainer_lib as lib +from tensor2tensor.tpu import tpu_trainer_lib from tensor2tensor.utils import registry from tensor2tensor.utils import trainer_utils_test @@ -35,7 +35,7 @@ def setUpClass(cls): trainer_utils_test.TrainerUtilsTest.setUpClass() def testExperiment(self): - exp_fn = lib.create_experiment_fn( + exp_fn = tpu_trainer_lib.create_experiment_fn( "transformer", "tiny_algo", trainer_utils_test.TrainerUtilsTest.data_dir, @@ -43,7 +43,7 @@ def testExperiment(self): eval_steps=1, min_eval_frequency=1, use_tpu=False) - run_config = lib.create_run_config(num_gpus=0, use_tpu=False) + run_config = tpu_trainer_lib.create_run_config(num_gpus=0, use_tpu=False) hparams = registry.hparams("transformer_tiny_tpu")() exp = exp_fn(run_config, hparams) exp.test() diff --git a/tensor2tensor/utils/data_reader.py b/tensor2tensor/utils/data_reader.py index 58a9f18a6..4721bc5d0 100644 --- a/tensor2tensor/utils/data_reader.py +++ b/tensor2tensor/utils/data_reader.py @@ -65,6 +65,7 @@ def feature_placeholders(data_fields, data_items_to_decoders): return decoded_example +# DEPRECATED def input_pipeline(problem, data_dir, capacity, @@ -348,6 +349,7 @@ def constant_batching_scheme(constant_batch_size_in_sequences): } +# DEPRECATED def serving_input_fn(problem, hparams): """Input fn for serving, starting from Placeholders.""" data_fields, data_items_to_decoders = problem.example_reading_spec() diff --git a/tensor2tensor/utils/devices.py b/tensor2tensor/utils/devices.py index 78d6503e9..06a7938c5 100644 --- a/tensor2tensor/utils/devices.py +++ b/tensor2tensor/utils/devices.py @@ -18,70 +18,15 @@ from __future__ import division from __future__ import print_function -# Dependency imports +import inspect -# pylint: disable=redefined-builtin -from six.moves import xrange -# pylint: enable=redefined-builtin +# Dependency imports from tensor2tensor.utils import expert_utils as eu import tensorflow as tf -# TODO(rsepassi): Rm dep on FLAGS here -FLAGS = tf.flags.FLAGS - - -def _ps_replicas(all_workers=False): - if all_workers: - return list(range(FLAGS.ps_replicas)) - # Worker K will be using replicas {0,...n-1} + K*n if we have n replicas. - num_replicas = FLAGS.ps_replicas // FLAGS.worker_replicas - return [d + FLAGS.worker_id * num_replicas for d in xrange(num_replicas)] - - -def _gpu_order(num_gpus): - if FLAGS.gpu_order: - ret = [int(s) for s in FLAGS.gpu_order.split(" ")] - if len(ret) == num_gpus: - return ret - return list(range(num_gpus)) - - -def _ps_gpus(all_workers=False): - ps_gpus = [] - for d in _ps_replicas(all_workers=all_workers): - ps_gpus.extend([(d, gpu) for gpu in _gpu_order(FLAGS.ps_gpu)]) - return ps_gpus - - -def ps_devices(all_workers=False): - """List of ps devices (where to put the experts). - - Args: - all_workers: whether the list is for all async workers or just this one. - Returns: - a list of device names - """ - if FLAGS.ps_replicas > 0: - if FLAGS.ps_gpu > 0: - return [ - FLAGS.ps_job + "/task:%d/GPU:%d" % (d, gpu) - for (d, gpu) in _ps_gpus(all_workers=all_workers) - ] - else: - return [ - FLAGS.ps_job + "/task:%d" % d - for d in _ps_replicas(all_workers=all_workers) - ] - else: - if FLAGS.worker_gpu > 0: - return ["gpu:%d" % d for d in _gpu_order(FLAGS.worker_gpu)] - else: - return [""] - - -def data_parallelism(hparams, all_workers=False): +def data_parallelism_from_flags(daisy_chain_variables=True, all_workers=False): """Over which devices do we split each training batch. In old-fashioned async mode, we split the batch over all GPUs on the @@ -95,39 +40,113 @@ def data_parallelism(hparams, all_workers=False): between datashards. Args: - hparams: model hyperparameters (an HParams object). + daisy_chain_variables: whether to copy variables in a daisy chain on GPUs. all_workers: whether the devices are all async workers or just this one. Returns: a expert_utils.Parallelism. """ - if hparams.no_data_parallelism: - return eu.Parallelism([""]) + dp_arg_names = inspect.getargspec(data_parallelism).args + + blacklist = ["daisy_chain_variables", "all_workers"] + + kwargs = {} + for arg in dp_arg_names: + if arg in blacklist: + continue + kwargs[arg] = getattr(tf.flags.FLAGS, arg) + + return data_parallelism( + daisy_chain_variables=daisy_chain_variables, + all_workers=all_workers, + **kwargs) + + +def data_parallelism(daisy_chain_variables=True, + all_workers=False, + ps_replicas=0, + ps_job="/job:ps", + ps_gpu=0, + schedule="continuous_train_and_eval", + sync=False, + worker_gpu=1, + worker_replicas=1, + worker_id=0, + gpu_order="", + locally_shard_to_cpu=False, + worker_job="/job:localhost"): + """See data_parallelism_from_flags.""" + def _ps_replicas(all_workers=False): + if all_workers: + return list(range(ps_replicas)) + # Worker K will be using replicas {0,...n-1} + K*n if we have n replicas. + num_replicas = ps_replicas // worker_replicas + return [d + worker_id * num_replicas for d in range(num_replicas)] + + def _gpu_order(num_gpus): + if gpu_order: + ret = [int(s) for s in gpu_order.split(" ")] + if len(ret) == num_gpus: + return ret + return list(range(num_gpus)) + + def _ps_gpus(all_workers=False): + ps_gpus = [] + for d in _ps_replicas(all_workers=all_workers): + ps_gpus.extend([(d, gpu) for gpu in _gpu_order(ps_gpu)]) + return ps_gpus + + def ps_devices(all_workers=False): + """List of ps devices (where to put the experts). + + Args: + all_workers: whether the list is for all async workers or just this one. + + Returns: + a list of device names + """ + if ps_replicas > 0: + if ps_gpu > 0: + return [ + ps_job + "/task:%d/GPU:%d" % (d, gpu) + for (d, gpu) in _ps_gpus(all_workers=all_workers) + ] + else: + return [ + ps_job + "/task:%d" % d + for d in _ps_replicas(all_workers=all_workers) + ] + else: + if worker_gpu > 0: + return ["gpu:%d" % d for d in _gpu_order(worker_gpu)] + else: + return [""] + def _replica_device_setter(worker_device): - if FLAGS.ps_replicas == 0: + if ps_replicas == 0: return worker_device return tf.train.replica_device_setter( worker_device=worker_device, - ps_tasks=FLAGS.ps_replicas, - ps_device=FLAGS.ps_job + "/GPU:0" if FLAGS.ps_gpu > 0 else FLAGS.ps_job) + ps_tasks=ps_replicas, + ps_device=ps_job + "/GPU:0" if ps_gpu > 0 else ps_job) - if FLAGS.schedule in ["train_and_evaluate", "continuous_train_and_eval"]: - assert not FLAGS.sync + if schedule in ["train_and_evaluate", "continuous_train_and_eval"]: + assert not sync tf.logging.warn( "Schedule=%s. Assuming that training is running on a single machine.", - FLAGS.schedule) - datashard_devices = ["gpu:%d" % d for d in _gpu_order(FLAGS.worker_gpu)] - if FLAGS.locally_shard_to_cpu or FLAGS.worker_gpu < 1: + schedule) + datashard_devices = ["gpu:%d" % d for d in _gpu_order(worker_gpu)] + if locally_shard_to_cpu or worker_gpu < 1: datashard_devices += ["cpu:0"] caching_devices = None - elif FLAGS.sync and FLAGS.ps_replicas > 0: + elif sync and ps_replicas > 0: # compute on ps datashard_devices = [ _replica_device_setter(d) for d in ps_devices(all_workers=all_workers) ] - if FLAGS.ps_gpu > 0 and FLAGS.ps_replicas > 1: + if ps_gpu > 0 and ps_replicas > 1: caching_devices = [ - FLAGS.ps_job + "/task:%d/cpu:0" % d + ps_job + "/task:%d/cpu:0" % d for (d, _) in _ps_gpus(all_workers=all_workers) ] else: @@ -135,18 +154,19 @@ def _replica_device_setter(worker_device): else: # compute on worker - this is either a single-worker setup or asynchronous # with parameter servers. - if FLAGS.worker_gpu > 1: + if worker_gpu > 1: datashard_devices = [ - _replica_device_setter(FLAGS.worker_job + "/GPU:%d" % d) - for d in _gpu_order(FLAGS.worker_gpu) + _replica_device_setter(worker_job + "/GPU:%d" % d) + for d in _gpu_order(worker_gpu) ] - caching_devices = [FLAGS.worker_job + "/GPU:0"] * FLAGS.worker_gpu + caching_devices = [worker_job + "/GPU:0"] * worker_gpu else: - datashard_devices = [_replica_device_setter(FLAGS.worker_job)] + datashard_devices = [_replica_device_setter(worker_job)] caching_devices = None tf.logging.info("datashard_devices: %s", datashard_devices) tf.logging.info("caching_devices: %s", caching_devices) return eu.Parallelism( datashard_devices, caching_devices=caching_devices, - daisy_chain_variables=hparams.daisy_chain_variables) + daisy_chain_variables=daisy_chain_variables, + ps_devices=ps_devices(all_workers=all_workers)) diff --git a/tensor2tensor/utils/expert_utils.py b/tensor2tensor/utils/expert_utils.py index fed1af719..c947c6dba 100644 --- a/tensor2tensor/utils/expert_utils.py +++ b/tensor2tensor/utils/expert_utils.py @@ -132,7 +132,8 @@ def __init__(self, device_names_or_functions, reuse=True, caching_devices=None, - daisy_chain_variables=False): + daisy_chain_variables=False, + ps_devices=None): """Create a Parallelism. Args: @@ -144,6 +145,7 @@ def __init__(self, names. daisy_chain_variables: a boolean - if true, then copies variables in a daisy chain between devices. + ps_devices: list, list of devices for experts. Returns: a Parallelism. @@ -154,6 +156,7 @@ def __init__(self, self._reuse = reuse self._caching_devices = self._maybe_repeat(caching_devices) self._daisy_chain_variables = daisy_chain_variables + self._ps_devices = ps_devices or [""] def __call__(self, fn, *args, **kwargs): """A parallel set of function calls (using the specified devices). @@ -264,6 +267,10 @@ def n(self): def devices(self): return self._devices + @property + def ps_devices(self): + return self._ps_devices + def _maybe_repeat(self, x): """Utility function for processing arguments that are singletons or lists. diff --git a/tensor2tensor/utils/input_fn_builder.py b/tensor2tensor/utils/input_fn_builder.py index f416b9d2b..18ca992cf 100644 --- a/tensor2tensor/utils/input_fn_builder.py +++ b/tensor2tensor/utils/input_fn_builder.py @@ -14,6 +14,7 @@ # limitations under the License. """Input function building.""" +# DEPRECATED from __future__ import absolute_import from __future__ import division diff --git a/tensor2tensor/utils/model_builder.py b/tensor2tensor/utils/model_builder.py index fe6bea221..b4a0008e3 100644 --- a/tensor2tensor/utils/model_builder.py +++ b/tensor2tensor/utils/model_builder.py @@ -14,6 +14,7 @@ # limitations under the License. """Model building.""" +# DEPRECATED from __future__ import absolute_import from __future__ import division @@ -76,7 +77,7 @@ def model_fn(model, decode_hp = decode_hparams # TODO(rsepassi): This still depends on FLAGS. Rm eventually. - dp = devices.data_parallelism(hparams) + dp = devices.data_parallelism_from_flags(hparams) tf.get_variable_scope().set_initializer( optimize.get_variable_initializer(hparams)) @@ -107,7 +108,6 @@ def nth_model(n): hparams.problems[n], n, dp, - devices.ps_devices(all_workers=True), decode_hparams=decode_hparams) if mode == tf.estimator.ModeKeys.PREDICT: return model_class.infer( diff --git a/tensor2tensor/utils/registry.py b/tensor2tensor/utils/registry.py index 1125a6ed3..fe2790194 100644 --- a/tensor2tensor/utils/registry.py +++ b/tensor2tensor/utils/registry.py @@ -24,7 +24,7 @@ class MyModel(T2TModel): ``` Access by snake-cased name: `registry.model("my_model")`. If you're using -`t2t_trainer.py`, you can pass on the command-line: `--model=my_model`. +`tpu_trainer.py`, you can pass on the command-line: `--model=my_model`. See all the models registered: `registry.list_models()`. @@ -32,13 +32,13 @@ class MyModel(T2TModel): * Register: `registry.register_hparams` * List: `registry.list_hparams` * Retrieve by name: `registry.hparams` - * Command-line flag in `t2t_trainer.py`: `--hparams_set=name` + * Command-line flag in `tpu_trainer.py`: `--hparams_set=name` For hyperparameter ranges: * Register: `registry.register_ranged_hparams` * List: `registry.list_ranged_hparams` * Retrieve by name: `registry.ranged_hparams` - * Command-line flag in `t2t_trainer.py`: `--hparams_range=name` + * Command-line flag in `tpu_trainer.py`: `--hparams_range=name` """ from __future__ import absolute_import from __future__ import division diff --git a/tensor2tensor/utils/t2t_model.py b/tensor2tensor/utils/t2t_model.py index e473a6e3b..b895c0ed3 100644 --- a/tensor2tensor/utils/t2t_model.py +++ b/tensor2tensor/utils/t2t_model.py @@ -56,7 +56,6 @@ def __init__(self, problem_hparams=None, problem_idx=0, data_parallelism=None, - ps_devices=None, decode_hparams=None): """Create a T2TModel. @@ -67,7 +66,6 @@ def __init__(self, problem_idx: an integer. data_parallelism: a expert_utils.parallelism (specifies devices for data parallelism). - ps_devices: a list of devices to be used for experts decode_hparams: a hyperparameter object with decoding parameters. Returns: @@ -80,8 +78,6 @@ def __init__(self, trainable=mode == tf.estimator.ModeKeys.TRAIN, name=name) if data_parallelism is None: data_parallelism = eu.Parallelism([""]) - if ps_devices is None: - ps_devices = [""] if problem_hparams is None: problem_hparams = hparams.problems[0] @@ -101,7 +97,7 @@ def __init__(self, self._decode_hparams = copy.copy(decode_hparams) self._data_parallelism = data_parallelism self._num_datashards = data_parallelism.n - self._ps_devices = ps_devices + self._ps_devices = data_parallelism.ps_devices self._problem_hparams = problem_hparams self._problem_idx = problem_idx self._create_modalities(problem_hparams, self._hparams) @@ -264,9 +260,10 @@ def loss(self, logits, features): loss_num *= self._problem_hparams.loss_multiplier return loss_num, loss_den - def optimize(self, loss, use_tpu=False): + def optimize(self, loss, num_async_replicas=1, use_tpu=False): """Return a training op minimizing loss.""" lr = self.hparams.learning_rate * optimize.learning_rate_decay(self.hparams) + lr /= math.sqrt(float(num_async_replicas)) train_op = optimize.optimize(loss, lr, self.hparams, use_tpu=use_tpu) return train_op @@ -746,7 +743,7 @@ def estimator_model_fn(cls, features: dict labels: Tensor mode: tf.estimator.ModeKeys - config: RunConfig; if passed, should have t2t_device_info dict + config: RunConfig, possibly with data_parallelism attribute params: dict, may include batch_size decode_hparams: HParams, used when mode == PREDICT. use_tpu: bool, whether using TPU @@ -763,9 +760,8 @@ def estimator_model_fn(cls, problem = hparams.problem_instances[0] # Instantiate model - data_parallelism = ( - None if hparams.no_data_parallelism else _create_data_parallelism( - use_tpu=use_tpu, **config.t2t_device_info)) + data_parallelism = (None if (hparams.no_data_parallelism or use_tpu) + else config.data_parallelism) model = cls(hparams, mode, data_parallelism=data_parallelism, decode_hparams=decode_hparams) @@ -808,9 +804,8 @@ def estimator_model_fn(cls, def estimator_spec_train(self, loss, num_async_replicas=1, use_tpu=False): """Construct EstimatorSpec for TRAIN mode.""" - lr = self.hparams.learning_rate * optimize.learning_rate_decay(self.hparams) - lr /= math.sqrt(float(num_async_replicas)) - train_op = optimize.optimize(loss, lr, self.hparams, use_tpu=use_tpu) + train_op = self.optimize(loss, num_async_replicas=num_async_replicas, + use_tpu=use_tpu) if use_tpu: _remove_summaries() # summaries not currently working on TPU @@ -946,36 +941,11 @@ def _get_batch_size(params, hparams, config): if not batch_size: batch_size = hparams.tpu_batch_size_per_shard if config: - batch_size *= config.t2t_device_info["num_shards"] + batch_size *= config.data_parallelism.n return batch_size -def _create_data_parallelism(num_gpus=1, - gpu_order="", - shard_to_cpu=False, - num_shards=1, - use_tpu=False, - no_dp=False, - **kwargs): - """Create Parallelism object.""" - del kwargs - - if use_tpu or no_dp: - return eu.Parallelism([""]) - - gpus = list(range(num_gpus)) - if gpu_order: - gpus = [int(s) for s in gpu_order.split(" ")] - assert len(gpus) == num_gpus - data_shard_devices = ["gpu:%d" % i for i in gpus] - if shard_to_cpu or num_gpus < 1: - data_shard_devices += ["cpu:0"] - assert len(data_shard_devices) == num_shards - tf.logging.info("Data parallel devices: %s", data_shard_devices) - return eu.Parallelism(data_shard_devices) - - # These metrics are implemented with py_funcs and therefore do no work with TPU TPU_METRIC_BLACKLIST = set([ metrics.Metrics.APPROX_BLEU, diff --git a/tensor2tensor/utils/trainer_utils.py b/tensor2tensor/utils/trainer_utils.py index a62a66321..a32dd446e 100644 --- a/tensor2tensor/utils/trainer_utils.py +++ b/tensor2tensor/utils/trainer_utils.py @@ -14,6 +14,7 @@ # limitations under the License. """Utilities for trainer binary.""" +# DEPRECATED from __future__ import absolute_import from __future__ import division @@ -130,7 +131,7 @@ def create_experiment_components(data_dir, model_name, hparams, run_config): # hparams batch_size is used as minibatch size instead of tokens in batch batch_size = (hparams.use_fixed_batch_size and hparams.batch_size) or None - num_datashards = devices.data_parallelism(hparams).n + num_datashards = devices.data_parallelism_from_flags(hparams).n train_input_fn = input_fn_builder.build_input_fn( mode=tf.estimator.ModeKeys.TRAIN, hparams=hparams, diff --git a/tensor2tensor/visualization/TransformerVisualization.ipynb b/tensor2tensor/visualization/TransformerVisualization.ipynb index ce70bde89..e8f114d08 100644 --- a/tensor2tensor/visualization/TransformerVisualization.ipynb +++ b/tensor2tensor/visualization/TransformerVisualization.ipynb @@ -138,7 +138,7 @@ "\n", "utils.add_problem_hparams(hparams, PROBLEM)\n", "\n", - "num_datashards = utils.devices.data_parallelism().n\n", + "num_datashards = utils.devices.data_parallelism_from_flags(hparams).n\n", "\n", "mode = tf.estimator.ModeKeys.EVAL\n", "\n", From a66cfaf430d7f4ceb90c49c312c39c3c542eaf8a Mon Sep 17 00:00:00 2001 From: Harini Kannan Date: Tue, 19 Dec 2017 15:31:20 -0800 Subject: [PATCH 0671/4095] Adding RevNet-104 to the Tensor2Tensor library. PiperOrigin-RevId: 179612703 --- tensor2tensor/models/__init__.py | 1 + tensor2tensor/models/revnet.py | 296 ++++++++++++++++++++++++++++ tensor2tensor/models/revnet_test.py | 115 +++++++++++ 3 files changed, 412 insertions(+) create mode 100644 tensor2tensor/models/revnet.py create mode 100644 tensor2tensor/models/revnet_test.py diff --git a/tensor2tensor/models/__init__.py b/tensor2tensor/models/__init__.py index 19a8d9735..ef92ccaff 100644 --- a/tensor2tensor/models/__init__.py +++ b/tensor2tensor/models/__init__.py @@ -34,6 +34,7 @@ from tensor2tensor.models import multimodel from tensor2tensor.models import neural_gpu from tensor2tensor.models import resnet +from tensor2tensor.models import revnet from tensor2tensor.models import shake_shake from tensor2tensor.models import slicenet from tensor2tensor.models import super_lm diff --git a/tensor2tensor/models/revnet.py b/tensor2tensor/models/revnet.py new file mode 100644 index 000000000..9d07e918f --- /dev/null +++ b/tensor2tensor/models/revnet.py @@ -0,0 +1,296 @@ +# coding=utf-8 +# Copyright 2017 The Tensor2Tensor Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +"""Creates a RevNet with the bottleneck residual function. + +Implements the following equations described in the RevNet paper: +y1 = x1 + f(x2) +y2 = x2 + g(y1) + +However, in practice, the authors use the following equations to downsample +tensors inside a RevNet block: + +y1 = h(x1) + f(x2) +y2 = h(x2) + g(y1) + +In this case, h is the downsampling function used to change number of channels. + +These modified equations are evident in the authors' code online: +https://github.com/renmengye/revnet-public + +For reference, the original paper can be found here: +https://arxiv.org/pdf/1707.04585.pdf +""" + +# Dependency imports + +from tensor2tensor.layers import common_hparams +from tensor2tensor.layers import rev_block +from tensor2tensor.utils import registry +from tensor2tensor.utils import t2t_model + +import tensorflow as tf + +CONFIG = {'2d': {'conv': tf.layers.conv2d, + 'max_pool': tf.layers.max_pooling2d, + 'avg_pool': tf.layers.average_pooling2d, + 'split_axis': 3, + 'reduction_dimensions': [1, 2] + }, + '3d': {'conv': tf.layers.conv3d, + 'max_pool': tf.layers.max_pooling3d, + 'avg_pool': tf.layers.average_pooling2d, + 'split_axis': 4, + 'reduction_dimensions': [1, 2, 3] + } + } + + +def f(x, depth1, depth2, dim='2d', first_batch_norm=True, layer_stride=1, + training=True, padding='SAME'): + """Applies bottleneck residual function for 104-layer RevNet. + + Args: + x: input tensor + depth1: Number of output channels for the first and second conv layers. + depth2: Number of output channels for the third conv layer. + dim: '2d' if 2-dimensional, '3d' if 3-dimensional. + first_batch_norm: Whether to keep the first batch norm layer or not. + Typically used in the first RevNet block. + layer_stride: Stride for the first conv filter. Note that this particular + 104-layer RevNet architecture only varies the stride for the first conv + filter. The stride for the second conv filter is always set to 1. + training: True for train phase, False for eval phase. + padding: Padding for each conv layer. + + Returns: + Output tensor after applying residual function for 104-layer RevNet. + """ + conv = CONFIG[dim]['conv'] + with tf.variable_scope('f'): + if first_batch_norm: + net = tf.layers.batch_normalization(x, training=training) + net = tf.nn.relu(net) + else: + net = x + net = conv(net, depth1, 1, strides=layer_stride, + padding=padding, activation=None) + + net = tf.layers.batch_normalization(net, training=training) + net = tf.nn.relu(net) + net = conv(net, depth1, 3, strides=1, + padding=padding, activation=None) + + net = tf.layers.batch_normalization(net, training=training) + net = tf.nn.relu(net) + net = conv(net, depth2, 1, strides=1, + padding=padding, activation=None) + return net + + +def h(x, output_channels, dim='2d', layer_stride=1, scope='h'): + """Downsamples 'x' using a 1x1 convolution filter and a chosen stride. + + Args: + x: input tensor of size [N, H, W, C] + output_channels: Desired number of output channels. + dim: '2d' if 2-dimensional, '3d' if 3-dimensional. + layer_stride: What stride to use. Usually 1 or 2. + scope: Optional variable scope for the h function. + + This function uses a 1x1 convolution filter and a chosen stride to downsample + the input tensor x. + + Returns: + A downsampled tensor of size [N, H/2, W/2, output_channels] if layer_stride + is 2, else returns a tensor of size [N, H, W, output_channels] if + layer_stride is 1. + """ + conv = CONFIG[dim]['conv'] + with tf.variable_scope(scope): + x = conv(x, output_channels, 1, strides=layer_stride, padding='SAME', + activation=None) + return x + + +def init(images, num_channels, dim='2d', training=True, scope='init'): + """Standard ResNet initial block used as first RevNet block. + + Args: + images: [N, H, W, 3] tensor of input images to the model. + num_channels: Output depth of convolutional layer in initial block. + dim: '2d' if 2-dimensional, '3d' if 3-dimensional. + training: True for train phase, False for eval phase. + scope: Optional scope for the init block. + + Returns: + Two [N, H, W, C] output activations from input images. + """ + conv = CONFIG[dim]['conv'] + pool = CONFIG[dim]['max_pool'] + with tf.variable_scope(scope): + net = conv(images, num_channels, 7, strides=2, + padding='SAME', activation=None) + net = tf.layers.batch_normalization(net, training=training) + net = tf.nn.relu(net) + net = pool(net, pool_size=3, strides=2) + x1, x2 = tf.split(net, 2, axis=CONFIG[dim]['split_axis']) + return x1, x2 + + +def unit(x1, x2, block_num, depth1, depth2, num_layers, dim='2d', + first_batch_norm=True, stride=1, training=True): + """Implements bottleneck RevNet unit from authors' RevNet-104 architecture. + + Args: + x1: [N, H, W, C] tensor of network activations. + x2: [N, H, W, C] tensor of network activations. + block_num: integer ID of block + depth1: First depth in bottleneck residual unit. + depth2: Second depth in bottleneck residual unit. + num_layers: Number of layers in the RevNet block. + dim: '2d' if 2-dimensional, '3d' if 3-dimensional. + first_batch_norm: Whether to keep the first batch norm layer or not. + Typically used in the first RevNet block. + stride: Stride for the residual function. + training: True for train phase, False for eval phase. + + Returns: + Two [N, H, W, C] output activation tensors. + """ + scope_name = 'unit_%d' % block_num + with tf.variable_scope(scope_name): + # Manual implementation of downsampling + with tf.variable_scope('downsampling'): + with tf.variable_scope('x1'): + hx1 = h(x1, depth2, dim=dim, layer_stride=stride) + fx2 = f(x2, depth1, depth2, dim=dim, layer_stride=stride, + first_batch_norm=first_batch_norm, training=training) + x1 = hx1 + fx2 + with tf.variable_scope('x2'): + hx2 = h(x2, depth2, dim=dim, layer_stride=stride) + fx1 = f(x1, depth1, depth2, dim=dim, training=training) + x2 = hx2 + fx1 + + # Full block using memory-efficient rev_block implementation. + with tf.variable_scope('full_block'): + residual_func = lambda x: f(x, depth1, depth2, dim=dim, training=training) + x1, x2 = rev_block.rev_block(x1, x2, + residual_func, + residual_func, + num_layers=num_layers) + return x1, x2 + + +def final_block(x1, x2, dim='2d', training=True, scope='final_block'): + """Converts activations from last RevNet block to pre-logits. + + Args: + x1: [NxHxWxC] tensor of network activations. + x2: [NxHxWxC] tensor of network activations. + dim: '2d' if 2-dimensional, '3d' if 3-dimensional. + training: True for train phase, False for eval phase. + scope: Optional variable scope for the final block. + + Returns: + [N, hidden_dim] pre-logits tensor from activations x1 and x2. + """ + + # Final batch norm and relu + with tf.variable_scope(scope): + y = tf.concat([x1, x2], axis=CONFIG[dim]['split_axis']) + y = tf.layers.batch_normalization(y, training=training) + y = tf.nn.relu(y) + + # Global average pooling + net = tf.reduce_mean(y, CONFIG[dim]['reduction_dimensions'], + name='final_pool', keep_dims=True) + + return net + + +def revnet104(inputs, hparams, reuse=None): + """Uses Tensor2Tensor memory optimized RevNet block to build a RevNet. + + Args: + inputs: [NxHxWx3] tensor of input images to the model. + hparams: HParams object that contains the following parameters, + in addition to the parameters contained in the basic_params1() object in + the common_hparams module: + num_channels_first - A Python list where each element represents the + depth of the first and third convolutional layers in the bottleneck + residual unit for a given block. + num_channels_second - A Python list where each element represents the + depth of the second convolutional layer in the bottleneck residual + unit for a given block. + num_layers_per_block - A Python list containing the number of RevNet + layers for each block. + first_batch_norm - A Python list containing booleans representing the + presence of a batch norm layer at the beginning of a given block. + strides - A Python list containing integers representing the stride of + the residual function for each block. + num_channels_init_block - An integer representing the number of channels + for the convolutional layer in the initial block. + dimension - A string (either "2d" or "3d") that decides if the RevNet is + 2-dimensional or 3-dimensional. + reuse: Whether to reuse the default variable scope. + + Returns: + [batch_size, hidden_dim] pre-logits tensor from the bottleneck RevNet. + """ + training = hparams.mode == tf.estimator.ModeKeys.TRAIN + with tf.variable_scope('RevNet104', reuse=reuse): + x1, x2 = init(inputs, + num_channels=hparams.num_channels_init_block, + dim=hparams.dim, + training=training) + for block_num in range(1, len(hparams.num_layers_per_block)): + block = {'depth1': hparams.num_channels_first[block_num], + 'depth2': hparams.num_channels_second[block_num], + 'num_layers': hparams.num_layers_per_block[block_num], + 'first_batch_norm': hparams.first_batch_norm[block_num], + 'stride': hparams.strides[block_num]} + x1, x2 = unit(x1, x2, block_num, dim=hparams.dim, training=training, + **block) + pre_logits = final_block(x1, x2, dim=hparams.dim, training=training) + return pre_logits + + +@registry.register_model +class Revnet104(t2t_model.T2TModel): + + def body(self, features): + return revnet104(features['inputs'], self.hparams) + + +@registry.register_hparams +def revnet_base(): + """Set of hyperparameters.""" + hparams = common_hparams.basic_params1() + hparams.add_hparam('num_channels_first', [64, 128, 256, 416]) + hparams.add_hparam('num_channels_second', [256, 512, 1024, 1664]) + hparams.add_hparam('num_layers_per_block', [1, 1, 10, 1]) + hparams.add_hparam('first_batch_norm', [False, True, True, True]) + hparams.add_hparam('strides', [1, 2, 2, 2]) + hparams.add_hparam('num_channels_init_block', 32) + hparams.add_hparam('dim', '2d') + + hparams.optimizer = 'Momentum' + hparams.learning_rate = 0.01 + hparams.weight_decay = 1e-4 + # Can run with a batch size of 128 with Problem ImageImagenet224 + hparams.tpu_batch_size_per_shard = 128 + return hparams diff --git a/tensor2tensor/models/revnet_test.py b/tensor2tensor/models/revnet_test.py new file mode 100644 index 000000000..2c9abc0a9 --- /dev/null +++ b/tensor2tensor/models/revnet_test.py @@ -0,0 +1,115 @@ +# coding=utf-8 +# Copyright 2017 The Tensor2Tensor Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for Revnet.""" + +from tensor2tensor.models import revnet +import tensorflow as tf + + +class RevnetTest(tf.test.TestCase): + + def testH(self): + rev_block_input = tf.random_uniform([1, 299, 299, 3]) + rev_block_output = revnet.h(rev_block_input, 256) + self.assertEquals(rev_block_output.get_shape(), [1, 299, 299, 256]) + + def testHStride(self): + rev_block_input = tf.random_uniform([2, 299, 299, 256]) + rev_block_output = revnet.h(rev_block_input, 512, layer_stride=2, + scope='HStride') + self.assertEquals(rev_block_output.get_shape(), [2, 150, 150, 512]) + + def testInit(self): + images = tf.random_uniform([1, 299, 299, 3]) + x1, x2 = revnet.init(images, 32) + self.assertEquals(x1.get_shape(), [1, 74, 74, 16]) + self.assertEquals(x2.get_shape(), [1, 74, 74, 16]) + + def testInit3D(self): + images = tf.random_uniform([1, 299, 299, 299, 3]) + x1, x2 = revnet.init(images, 32, dim='3d', scope='init3d') + self.assertEquals(x1.get_shape(), [1, 74, 74, 74, 16]) + self.assertEquals(x2.get_shape(), [1, 74, 74, 74, 16]) + + def testUnit1(self): + x1 = tf.random_uniform([4, 74, 74, 256]) + x2 = tf.random_uniform([4, 74, 74, 256]) + x1, x2 = revnet.unit(x1, x2, block_num=1, depth1=64, depth2=256, + first_batch_norm=True, num_layers=1) + self.assertEquals(x1.get_shape(), [4, 74, 74, 256]) + self.assertEquals(x2.get_shape(), [4, 74, 74, 256]) + + def testUnit2(self): + x1 = tf.random_uniform([4, 74, 74, 256]) + x2 = tf.random_uniform([4, 74, 74, 256]) + x1, x2 = revnet.unit(x1, x2, block_num=2, depth1=128, depth2=512, + num_layers=1, stride=2) + self.assertEquals(x1.get_shape(), [4, 37, 37, 512]) + self.assertEquals(x2.get_shape(), [4, 37, 37, 512]) + + def testUnit3(self): + x1 = tf.random_uniform([1, 37, 37, 512]) + x2 = tf.random_uniform([1, 37, 37, 512]) + x1, x2 = revnet.unit(x1, x2, block_num=3, depth1=256, + depth2=1024, num_layers=10, stride=2) + self.assertEquals(x1.get_shape(), [1, 19, 19, 1024]) + self.assertEquals(x2.get_shape(), [1, 19, 19, 1024]) + + def testUnit4(self): + x1 = tf.random_uniform([1, 19, 19, 1024]) + x2 = tf.random_uniform([1, 19, 19, 1024]) + x1, x2 = revnet.unit(x1, x2, block_num=4, depth1=416, + depth2=1664, num_layers=1, stride=2) + self.assertEquals(x1.get_shape(), [1, 10, 10, 1664]) + self.assertEquals(x2.get_shape(), [1, 10, 10, 1664]) + + def testUnit3D(self): + x1 = tf.random_uniform([4, 74, 74, 74, 256]) + x2 = tf.random_uniform([4, 74, 74, 74, 256]) + x1, x2 = revnet.unit(x1, x2, block_num=5, depth1=128, depth2=512, + num_layers=1, dim='3d', stride=2) + self.assertEquals(x1.get_shape(), [4, 37, 37, 37, 512]) + self.assertEquals(x2.get_shape(), [4, 37, 37, 37, 512]) + + def testFinalBlock(self): + x1 = tf.random_uniform([5, 10, 10, 1024]) + x2 = tf.random_uniform([5, 10, 10, 1024]) + logits = revnet.final_block(x1, x2) + self.assertEquals(logits.shape, [5, 1, 1, 2048]) + + def testFinalBlock3D(self): + x1 = tf.random_uniform([5, 10, 10, 10, 1024]) + x2 = tf.random_uniform([5, 10, 10, 10, 1024]) + logits = revnet.final_block(x1, x2, dim='3d', scope='FinalBlock3D') + self.assertEquals(logits.shape, [5, 1, 1, 1, 2048]) + + def testEndToEnd(self): + images = tf.random_uniform([1, 299, 299, 3]) + hparams = revnet.revnet_base() + hparams.mode = tf.estimator.ModeKeys.TRAIN + logits = revnet.revnet104(images, hparams) + self.assertEquals(logits.shape, [1, 1, 1, 3328]) + + def testEndToEnd3D(self): + images = tf.random_uniform([1, 299, 299, 299, 3]) + hparams = revnet.revnet_base() + hparams.dim = '3d' + hparams.mode = tf.estimator.ModeKeys.TRAIN + logits = revnet.revnet104(images, hparams) + self.assertEquals(logits.shape, [1, 1, 1, 1, 3328]) + +if __name__ == '__main__': + tf.test.main() From 5388318279dadd1530bbda6511d42918afb26e76 Mon Sep 17 00:00:00 2001 From: Lukasz Kaiser Date: Wed, 20 Dec 2017 09:22:10 -0800 Subject: [PATCH 0672/4095] Adding a first Gym problem for generative RL models. PiperOrigin-RevId: 179694851 --- setup.py | 1 + tensor2tensor/data_generators/all_problems.py | 1 + tensor2tensor/data_generators/gym.py | 138 ++++++++++++++++++ 3 files changed, 140 insertions(+) create mode 100644 tensor2tensor/data_generators/gym.py diff --git a/setup.py b/setup.py index 1d7c28305..5bcacbd85 100644 --- a/setup.py +++ b/setup.py @@ -28,6 +28,7 @@ install_requires=[ 'bz2file', 'future', + 'gym', 'numpy', 'requests', 'sympy', diff --git a/tensor2tensor/data_generators/all_problems.py b/tensor2tensor/data_generators/all_problems.py index 2aca3d377..ba91965af 100644 --- a/tensor2tensor/data_generators/all_problems.py +++ b/tensor2tensor/data_generators/all_problems.py @@ -25,6 +25,7 @@ from tensor2tensor.data_generators import cipher from tensor2tensor.data_generators import cnn_dailymail from tensor2tensor.data_generators import desc2code +from tensor2tensor.data_generators import gym from tensor2tensor.data_generators import ice_parsing from tensor2tensor.data_generators import image from tensor2tensor.data_generators import imdb diff --git a/tensor2tensor/data_generators/gym.py b/tensor2tensor/data_generators/gym.py new file mode 100644 index 000000000..631c2b281 --- /dev/null +++ b/tensor2tensor/data_generators/gym.py @@ -0,0 +1,138 @@ +# coding=utf-8 +# Copyright 2017 The Tensor2Tensor Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Data generators for Gym environments.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os + +# Dependency imports + +import gym + +from tensor2tensor.data_generators import generator_utils +from tensor2tensor.data_generators import problem +from tensor2tensor.utils import registry + +import tensorflow as tf + + + +class GymDiscreteProblem(problem.Problem): + """Gym environment with discrete actions and rewards.""" + + def __init__(self, *args, **kwargs): + super(GymDiscreteProblem, self).__init__(*args, **kwargs) + self._env = None + + @property + def env_name(self): + # This is the name of the Gym environment for this problem. + raise NotImplementedError() + + @property + def env(self): + if self._env is None: + self._env = gym.make(self.env_name) + return self._env + + @property + def num_actions(self): + raise NotImplementedError() + + @property + def num_rewards(self): + raise NotImplementedError() + + @property + def num_steps(self): + raise NotImplementedError() + + @property + def num_shards(self): + return 10 + + @property + def num_dev_shards(self): + return 1 + + def get_action(self, observation=None): + return self.env.action_space.sample() + + def hparams(self, defaults, unused_model_hparams): + p = defaults + p.input_modality = {"inputs": ("image:identity", 256), + "inputs_prev": ("image:identity", 256), + "reward": ("symbol:identity", self.num_rewards), + "action": ("symbol:identity", self.num_actions)} + p.target_modality = ("image:identity", 256) + p.input_space_id = problem.SpaceID.IMAGE + p.target_space_id = problem.SpaceID.IMAGE + + def generator(self, data_dir, tmp_dir): + self.env.reset() + action = self.get_action() + prev_observation, observation = None, None + for _ in range(self.num_steps): + prev_prev_observation = prev_observation + prev_observation = observation + observation, reward, done, _ = self.env.step(action) + action = self.get_action(observation) + if done: + self.env.reset() + def flatten(nparray): + flat1 = [x for sublist in nparray.tolist() for x in sublist] + return [x for sublist in flat1 for x in sublist] + if prev_prev_observation is not None: + yield {"inputs_prev": flatten(prev_prev_observation), + "inputs": flatten(prev_observation), + "action": [action], + "done": [done], + "reward": [reward], + "targets": flatten(observation)} + + def generate_data(self, data_dir, tmp_dir, task_id=-1): + train_paths = self.training_filepaths( + data_dir, self.num_shards, shuffled=False) + dev_paths = self.dev_filepaths( + data_dir, self.num_dev_shards, shuffled=False) + all_paths = train_paths + dev_paths + generator_utils.generate_files( + self.generator(data_dir, tmp_dir), all_paths) + generator_utils.shuffle_dataset(all_paths) + + +@registry.register_problem +class GymPongRandom5k(GymDiscreteProblem): + """Pong game, random actions.""" + + @property + def env_name(self): + return "Pong-v0" + + @property + def num_actions(self): + return 4 + + @property + def num_rewards(self): + return 2 + + @property + def num_steps(self): + return 5000 From fd77a8bcac45c6118f16943bcd335954fc1a8ba1 Mon Sep 17 00:00:00 2001 From: T2T Team Date: Wed, 20 Dec 2017 23:44:57 -0800 Subject: [PATCH 0673/4095] Fix the rounding bottleneck. At present the input is squashed into 1-d and is rounded in the interval [0, v_size]. PiperOrigin-RevId: 179778221 --- tensor2tensor/models/transformer_vae.py | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/tensor2tensor/models/transformer_vae.py b/tensor2tensor/models/transformer_vae.py index d779b093f..c43342afd 100644 --- a/tensor2tensor/models/transformer_vae.py +++ b/tensor2tensor/models/transformer_vae.py @@ -18,17 +18,12 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function - # Dependency imports - -from six.moves import xrange # pylint: disable=redefined-builtin - from tensor2tensor.layers import common_layers from tensor2tensor.models import transformer from tensor2tensor.utils import expert_utils from tensor2tensor.utils import registry from tensor2tensor.utils import t2t_model - import tensorflow as tf @@ -207,7 +202,7 @@ def embed(x): shape=[hparams.v_size, hparams.hidden_size]) h1 = tf.gather(means, x) elif hparams.bottleneck_kind == "rounding": - h1 = tf.round(x) + h1 = x h2 = tf.layers.dense(tf.nn.relu(h1), filter_size, name="vch2") return tf.layers.dense(tf.nn.relu(h2), hparams.hidden_size, name="vcfin") @@ -255,9 +250,19 @@ def embed(x): x_means_hot, x_means, l = kmeans(x, means, hparams, name="vq-vae-kmeans") h1 = tf.stop_gradient(x_means) + x - tf.stop_gradient(x) c = tf.argmax(x_means_hot, axis=-1) - if hparams.bottleneck_kind == "round": - c = tf.round(x) - h1 = x + tf.stop_gradient(tf.round(x) - x) + if hparams.bottleneck_kind == "rounding": + h = tf.layers.dense(x, 1, name="vcc") + + # Make h between 0 and 1 + h = tf.sigmoid(h) + + # Multiply by z_size to get it between [0, z_size] + h *= hparams.v_size + + # Use the rounding bottleneck + h1 = h + tf.stop_gradient(tf.round(h) - h) + c = tf.squeeze(tf.round(h), axis=-1) + c = tf.to_int32(c) h2 = tf.layers.dense(tf.nn.relu(h1), filter_size, name="vch2") res = tf.layers.dense(tf.nn.relu(h2), hparams.hidden_size, name="vcfin") return res, c, l, embed From 4354f3ba1dcb1687eb8cccb5fd6ac3d25b12866b Mon Sep 17 00:00:00 2001 From: Lukasz Kaiser Date: Thu, 21 Dec 2017 09:14:54 -0800 Subject: [PATCH 0674/4095] update README (first shot). PiperOrigin-RevId: 179821388 --- README.md | 62 +++++++++++++++++++++++++++++++++++++++------ docs/walkthrough.md | 62 +++++++++++++++++++++++++++++++++++++++------ 2 files changed, 108 insertions(+), 16 deletions(-) diff --git a/README.md b/README.md index 9525e9bcb..de2951c53 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# T2T: Tensor2Tensor Transformers +# Tensor2Tensor [![PyPI version](https://badge.fury.io/py/tensor2tensor.svg)](https://badge.fury.io/py/tensor2tensor) @@ -10,11 +10,18 @@ welcome](https://img.shields.io/badge/contributions-welcome-brightgreen.svg)](CO [![License](https://img.shields.io/badge/License-Apache%202.0-brightgreen.svg)](https://opensource.org/licenses/Apache-2.0) [![Travis](https://img.shields.io/travis/tensorflow/tensor2tensor.svg)](https://travis-ci.org/tensorflow/tensor2tensor) -[T2T](https://github.com/tensorflow/tensor2tensor) is a modular and extensible -library and binaries for supervised learning with TensorFlow and with support -for sequence tasks. It is actively used and maintained by researchers and -engineers within the Google Brain team. You can read more about Tensor2Tensor in -the recent [Google Research Blog post introducing +[Tensor2Tensor](https://github.com/tensorflow/tensor2tensor), or +[T2T](https://github.com/tensorflow/tensor2tensor) for short, is a library +of deep learning models and datasets. It has binaries to train the models and +to download and prepare the data for you. T2T is modular and extensible and can +be used in [notebooks](https://goo.gl/wkHexj) for prototyping your own models +or running existing ones on your data. It is actively used and maintained by +researchers and engineers within +the [Google Brain team](https://research.google.com/teams/brain/) and was used +to develop state-of-the-art models for translation (see +[Attention Is All You Need](https://arxiv.org/abs/1706.03762)), summarization, +image generation and other tasks. You can read +more about T2T in the [Google Research Blog post introducing it](https://research.googleblog.com/2017/06/accelerating-deep-learning-research.html). We're eager to collaborate with you on extending T2T, so please feel @@ -29,8 +36,14 @@ You can chat with us and other users on [Google Group](https://groups.google.com/forum/#!forum/tensor2tensor) to keep up with T2T announcements. -Here is a one-command version that installs tensor2tensor, downloads the data, +### Quick Start + +[This iPython notebook](https://goo.gl/wkHexj) explains T2T and runs in your +browser using a free VM from Google, no installation needed. + +Alternatively, here is a one-command version that installs T2T, downloads data, trains an English-German translation model, and evaluates it: + ``` pip install tensor2tensor && t2t-trainer \ --generate_data \ @@ -53,11 +66,17 @@ t2t-decoder \ --decode_interactive ``` -See the [Walkthrough](#walkthrough) below for more details on each step. +See the [Walkthrough](#walkthrough) below for more details on each step +and [Suggested Models](#suggested-models) for well performing models +on common tasks. ### Contents * [Walkthrough](#walkthrough) +* [Suggested Models](#suggested-models) + * [Translation](#translation) + * [Summarization](#summarization) + * [Image Classification](#image-classification) * [Installation](#installation) * [Features](#features) * [T2T Overview](#t2t-overview) @@ -132,6 +151,33 @@ cat $DECODE_FILE.$MODEL.$HPARAMS.beam$BEAM_SIZE.alpha$ALPHA.decodes --- +## Suggested Models + +Here are some combinations of models, hparams and problems that we found +work well, so we suggest to use them if you're interested in that problem. + +### Translation + +For translation, esp. English-German and English-French, we suggest to use +the Transformer model in base or big configurations, i.e. +for `--problems=translate_ende_wmt32k` use `--model=transformer` and +`--hparams_set=transformer_base`. When trained on 8 GPUs for 300K steps +this should reach a BLEU score of about 28. + +### Summarization + +For summarization suggest to use the Transformer model in prepend mode, i.e. +for `--problems=summarize_cnn_dailymail32k` use `--model=transformer` and +`--hparams_set=transformer_prepend`. + +### Image Classification + +For image classification suggest to use the ResNet or Xception, i.e. +for `--problems=image_imagenet` use `--model=resnet50` and +`--hparams_set=resnet_base` or `--model=xception` and +`--hparams_set=xception_base`. + + ## Installation ``` diff --git a/docs/walkthrough.md b/docs/walkthrough.md index 9525e9bcb..de2951c53 100644 --- a/docs/walkthrough.md +++ b/docs/walkthrough.md @@ -1,4 +1,4 @@ -# T2T: Tensor2Tensor Transformers +# Tensor2Tensor [![PyPI version](https://badge.fury.io/py/tensor2tensor.svg)](https://badge.fury.io/py/tensor2tensor) @@ -10,11 +10,18 @@ welcome](https://img.shields.io/badge/contributions-welcome-brightgreen.svg)](CO [![License](https://img.shields.io/badge/License-Apache%202.0-brightgreen.svg)](https://opensource.org/licenses/Apache-2.0) [![Travis](https://img.shields.io/travis/tensorflow/tensor2tensor.svg)](https://travis-ci.org/tensorflow/tensor2tensor) -[T2T](https://github.com/tensorflow/tensor2tensor) is a modular and extensible -library and binaries for supervised learning with TensorFlow and with support -for sequence tasks. It is actively used and maintained by researchers and -engineers within the Google Brain team. You can read more about Tensor2Tensor in -the recent [Google Research Blog post introducing +[Tensor2Tensor](https://github.com/tensorflow/tensor2tensor), or +[T2T](https://github.com/tensorflow/tensor2tensor) for short, is a library +of deep learning models and datasets. It has binaries to train the models and +to download and prepare the data for you. T2T is modular and extensible and can +be used in [notebooks](https://goo.gl/wkHexj) for prototyping your own models +or running existing ones on your data. It is actively used and maintained by +researchers and engineers within +the [Google Brain team](https://research.google.com/teams/brain/) and was used +to develop state-of-the-art models for translation (see +[Attention Is All You Need](https://arxiv.org/abs/1706.03762)), summarization, +image generation and other tasks. You can read +more about T2T in the [Google Research Blog post introducing it](https://research.googleblog.com/2017/06/accelerating-deep-learning-research.html). We're eager to collaborate with you on extending T2T, so please feel @@ -29,8 +36,14 @@ You can chat with us and other users on [Google Group](https://groups.google.com/forum/#!forum/tensor2tensor) to keep up with T2T announcements. -Here is a one-command version that installs tensor2tensor, downloads the data, +### Quick Start + +[This iPython notebook](https://goo.gl/wkHexj) explains T2T and runs in your +browser using a free VM from Google, no installation needed. + +Alternatively, here is a one-command version that installs T2T, downloads data, trains an English-German translation model, and evaluates it: + ``` pip install tensor2tensor && t2t-trainer \ --generate_data \ @@ -53,11 +66,17 @@ t2t-decoder \ --decode_interactive ``` -See the [Walkthrough](#walkthrough) below for more details on each step. +See the [Walkthrough](#walkthrough) below for more details on each step +and [Suggested Models](#suggested-models) for well performing models +on common tasks. ### Contents * [Walkthrough](#walkthrough) +* [Suggested Models](#suggested-models) + * [Translation](#translation) + * [Summarization](#summarization) + * [Image Classification](#image-classification) * [Installation](#installation) * [Features](#features) * [T2T Overview](#t2t-overview) @@ -132,6 +151,33 @@ cat $DECODE_FILE.$MODEL.$HPARAMS.beam$BEAM_SIZE.alpha$ALPHA.decodes --- +## Suggested Models + +Here are some combinations of models, hparams and problems that we found +work well, so we suggest to use them if you're interested in that problem. + +### Translation + +For translation, esp. English-German and English-French, we suggest to use +the Transformer model in base or big configurations, i.e. +for `--problems=translate_ende_wmt32k` use `--model=transformer` and +`--hparams_set=transformer_base`. When trained on 8 GPUs for 300K steps +this should reach a BLEU score of about 28. + +### Summarization + +For summarization suggest to use the Transformer model in prepend mode, i.e. +for `--problems=summarize_cnn_dailymail32k` use `--model=transformer` and +`--hparams_set=transformer_prepend`. + +### Image Classification + +For image classification suggest to use the ResNet or Xception, i.e. +for `--problems=image_imagenet` use `--model=resnet50` and +`--hparams_set=resnet_base` or `--model=xception` and +`--hparams_set=xception_base`. + + ## Installation ``` From bac13211ac162457ad94b082124cca7ace90f77d Mon Sep 17 00:00:00 2001 From: Ryan Sepassi Date: Thu, 21 Dec 2017 09:28:53 -0800 Subject: [PATCH 0675/4095] v1.4, rm unused code and codepaths PiperOrigin-RevId: 179822701 --- .travis.yml | 4 +- docs/cloud_tpu.md | 48 ++- docs/example_life.md | 197 ---------- docs/index.md | 2 +- docs/overview.md | 175 +++++++++ setup.py | 3 +- tensor2tensor/bin/t2t-decoder | 2 +- tensor2tensor/bin/t2t-tpu-trainer | 155 -------- tensor2tensor/bin/t2t-trainer | 186 +++++++--- tensor2tensor/bin/t2t_decoder.py | 2 +- tensor2tensor/bin/t2t_trainer.py | 186 +++++++--- tensor2tensor/data_generators/problem.py | 35 +- tensor2tensor/models/super_lm.py | 2 +- tensor2tensor/notebooks/hello_t2t.ipynb | 10 +- tensor2tensor/tpu/tpu_trainer.py | 50 ++- tensor2tensor/tpu/tpu_trainer_lib.py | 12 +- tensor2tensor/tpu/tpu_trainer_lib_test.py | 59 ++- tensor2tensor/utils/data_reader.py | 155 -------- tensor2tensor/utils/decoding.py | 49 ++- tensor2tensor/utils/input_fn_builder.py | 238 ------------ tensor2tensor/utils/input_fn_builder_test.py | 61 ---- tensor2tensor/utils/metrics.py | 58 ++- tensor2tensor/utils/model_builder.py | 310 ---------------- tensor2tensor/utils/t2t_model.py | 30 +- tensor2tensor/utils/trainer_utils.py | 341 ------------------ tensor2tensor/utils/trainer_utils_test.py | 208 ----------- .../TransformerVisualization.ipynb | 43 +-- 27 files changed, 723 insertions(+), 1898 deletions(-) delete mode 100644 docs/example_life.md create mode 100644 docs/overview.md delete mode 100644 tensor2tensor/bin/t2t-tpu-trainer delete mode 100644 tensor2tensor/utils/input_fn_builder.py delete mode 100644 tensor2tensor/utils/input_fn_builder_test.py delete mode 100644 tensor2tensor/utils/model_builder.py delete mode 100644 tensor2tensor/utils/trainer_utils.py delete mode 100644 tensor2tensor/utils/trainer_utils_test.py diff --git a/.travis.yml b/.travis.yml index 370682401..b67c74b1d 100644 --- a/.travis.yml +++ b/.travis.yml @@ -14,9 +14,9 @@ env: - T2T_DATA_DIR=/tmp/t2t-data - T2T_TRAIN_DIR=/tmp/t2t-train script: - - pytest --ignore=tensor2tensor/utils/registry_test.py --ignore=tensor2tensor/utils/trainer_utils_test.py --ignore=tensor2tensor/problems_test.py --ignore=tensor2tensor/tpu/tpu_trainer_lib_test.py + - pytest --ignore=tensor2tensor/utils/registry_test.py --ignore=tensor2tensor/problems_test.py --ignore=tensor2tensor/tpu/tpu_trainer_lib_test.py - pytest tensor2tensor/utils/registry_test.py - - pytest tensor2tensor/utils/trainer_utils_test.py + - pytest tensor2tensor/tpu/tpu_trainer_lib_test.py - t2t-datagen 2>&1 | grep translate && echo passed - python -c "from tensor2tensor.models import transformer; print(transformer.Transformer.__name__)" - t2t-trainer --registry_help diff --git a/docs/cloud_tpu.md b/docs/cloud_tpu.md index 3dc3986cf..56bad4093 100644 --- a/docs/cloud_tpu.md +++ b/docs/cloud_tpu.md @@ -3,8 +3,10 @@ Tensor2Tensor supports running on Google Cloud Platforms TPUs, chips specialized for ML training. -Not all models are supported but we've tested so far with Transformer (sequence -model) as well as Xception (image model). +Models and hparams that are known to work on TPU: +* `transformer` with `transformer_tpu` +* `xception` with `xception_base` +* `resnet50` with `resnet_base` To run on TPUs, you need to be part of the alpha program; if you're not, these commands won't work for you currently, but access will expand soon, so get @@ -12,6 +14,8 @@ excited for your future ML supercomputers in the cloud. ## Tutorial: Transformer En-De translation on TPU +Update `gcloud`: `gcloud components update` + Set your default zone to a TPU-enabled zone. TPU machines are only available in certain zones for now. ``` @@ -40,29 +44,32 @@ gcloud alpha compute tpus create \ To see all TPU instances running: `gcloud alpha compute tpus list`. The `TPU_IP` should be unique amongst the list and follow the format `10.240.i.2`. -Generate data to GCS -If you already have the data locally, use `gsutil cp` to cp to GCS. +SSH in with port forwarding for TensorBoard ``` -DATA_DIR=gs://my-bucket/t2t/data/ -t2t-datagen --problem=translate_ende_wmt8k --data_dir=$DATA_DIR +gcloud compute ssh $USER-vm -- -L 6006:localhost:6006 ``` -SSH in with port forwarding for TensorBoard +Now that you're on the cloud instance, install T2T: ``` -gcloud compute ssh $USER-vm -L 6006:localhost:6006 +pip install tensor2tensor --user +# If your python bin dir isn't already in your path +export PATH=$HOME/.local/bin:$PATH ``` -Now that you're on the cloud instance, install T2T: +Generate data to GCS +If you already have the data, use `gsutil cp` to copy to GCS. ``` -pip install tensor2tensor +GCS_BUCKET=gs://my-bucket +DATA_DIR=$GCS_BUCKET/t2t/data/ +t2t-datagen --problem=translate_ende_wmt8k --data_dir=$DATA_DIR ``` Setup some vars used below. `TPU_IP` and `DATA_DIR` should be the same as what was used above. Note that the `DATA_DIR` and `OUT_DIR` must be GCS buckets. ``` TPU_IP= -DATA_DIR=gs://my-bucket/t2t/data/ -OUT_DIR=gs://my-bucket/t2t/training/ +DATA_DIR=$GCS_BUCKET/t2t/data/ +OUT_DIR=$GCS_BUCKET/t2t/training/ TPU_MASTER=grpc://$TPU_IP:8470 ``` @@ -73,17 +80,18 @@ tensorboard --logdir=$OUT_DIR > /tmp/tensorboard_logs.txt 2>&1 & Train and evaluate. ``` -t2t-tpu-trainer \ - --master=$TPU_MASTER \ - --data_dir=$DATA_DIR \ - --output_dir=$OUT_DIR \ - --problems=translate_ende_wmt8k \ +t2t-trainer \ --model=transformer \ - --hparams_set=transformer_tiny_tpu \ + --hparams_set=transformer_tpu \ + --problems=translate_ende_wmt8k \ --train_steps=10 \ --eval_steps=10 \ --local_eval_frequency=10 \ - --iterations_per_loop=10 + --iterations_per_loop=10 \ + --master=$TPU_MASTER \ + --use_tpu=True \ + --data_dir=$DATA_DIR \ + --output_dir=$OUT_DIR ``` The above command will train for 10 steps, then evaluate for 10 steps. You can @@ -91,7 +99,7 @@ The above command will train for 10 steps, then evaluate for 10 steps. You can `--train_steps` flag. Evaluation will happen every `--local_eval_frequency` steps, each time for `--eval_steps`. When you increase then number of training steps, also increase `--iterations_per_loop`, which controls how frequently the -TPU machine returns control to the Python code (1000 seems like a fine number). +TPU machine returns control to the host machine (1000 seems like a fine number). Back on your local machine, open your browser and navigate to `localhost:6006` for TensorBoard. diff --git a/docs/example_life.md b/docs/example_life.md deleted file mode 100644 index 850f4d500..000000000 --- a/docs/example_life.md +++ /dev/null @@ -1,197 +0,0 @@ -# T2T: Life of an Example - -[![PyPI -version](https://badge.fury.io/py/tensor2tensor.svg)](https://badge.fury.io/py/tensor2tensor) -[![GitHub -Issues](https://img.shields.io/github/issues/tensorflow/tensor2tensor.svg)](https://github.com/tensorflow/tensor2tensor/issues) -[![Contributions -welcome](https://img.shields.io/badge/contributions-welcome-brightgreen.svg)](CONTRIBUTING.md) -[![Gitter](https://img.shields.io/gitter/room/nwjs/nw.js.svg)](https://gitter.im/tensor2tensor/Lobby) -[![License](https://img.shields.io/badge/License-Apache%202.0-brightgreen.svg)](https://opensource.org/licenses/Apache-2.0) - -This doc explains how a training example flows through T2T, from data generation -to training, evaluation, and decoding. It points out the various hooks available -in the `Problem` and `T2TModel` classes and gives an overview of the T2T code -(key functions, files, hyperparameters, etc.). - -Some key files and their functions: - -* [`trainer_utils.py`](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/utils/trainer_utils.py): - Constructs and runs all the main components of the system (the `Problem`, - the `HParams`, the `Estimator`, the `Experiment`, the `input_fn`s and - `model_fn`). -* [`common_hparams.py`](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/layers/common_hparams.py): - `basic_params1` serves as the base for all model hyperparameters. Registered - model hparams functions always start with this default set of - hyperparameters. -* [`problem.py`](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/data_generators/problem.py): - Every dataset in T2T subclasses `Problem`. -* [`t2t_model.py`](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/utils/t2t_model.py): - Every model in T2T subclasses `T2TModel`. - -## Data Generation - -The `t2t-datagen` binary is the entrypoint for data generation. It simply looks -up the `Problem` specified by `--problem` and calls -`Problem.generate_data(data_dir, tmp_dir)`. - -All `Problem`s are expected to generate 2 sharded `TFRecords` files - 1 for -training and 1 for evaluation - with `tensorflow.Example` protocol buffers. The -expected names of the files are given by `Problem.{training, dev}_filepaths`. -Typically, the features in the `Example` will be `"inputs"` and `"targets"`; -however, some tasks have a different on-disk representation that is converted to -`"inputs"` and `"targets"` online in the input pipeline (e.g. image features are -typically stored with features `"image/encoded"` and `"image/format"` and the -decoding happens in the input pipeline). - -For tasks that require a vocabulary, this is also the point at which the -vocabulary is generated and all examples are encoded. - -There are several utility functions in -[`generator_utils`](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/data_generators/generator_utils.py) -that are commonly used by `Problem`s to generate data. Several are highlighted -below: - -* `generate_dataset_and_shuffle`: given 2 generators, 1 for training and 1 for - eval, yielding dictionaries of `>`, will produce sharded and shuffled `TFRecords` files with - `tensorflow.Example` protos. -* `maybe_download`: downloads a file at a URL to the given directory and - filename (see `maybe_download_from_drive` if the URL points to Google - Drive). -* `get_or_generate_vocab_inner`: given a target vocabulary size and a - generator that yields lines or tokens from the dataset, will build a - `SubwordTextEncoder` along with a backing vocabulary file that can be used - to map input strings to lists of ids. - [`SubwordTextEncoder`](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/data_generators/text_encoder.py) - uses word pieces and its encoding is fully invertible. - -## Data Input Pipeline - -Once the data is produced on disk, training, evaluation, and inference (if -decoding from the dataset) consume it by way of T2T input pipeline. This section -will give an overview of that pipeline with specific attention to the various -hooks in the `Problem` class and the model's `HParams` object (typically -registered in the model's file and specified by the `--hparams_set` flag). - -The entire input pipeline is implemented with the new `tf.data.Dataset` API -(previously `tf.data.Dataset`). - -The key function in the codebase for the input pipeline is -[`data_reader.input_pipeline`](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/utils/data_reader.py). -The full input function is built in -[`input_fn_builder.build_input_fn`](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/utils/input_fn_builder.py) -(which calls `data_reader.input_pipeline`). - -### Reading and decoding data - -`Problem.dataset_filename` specifies the prefix of the files on disk (they will -be suffixed with `-train` or `-dev` as well as their sharding). - -The features read from the files and their decoding is specified by -`Problem.example_reading_spec`, which returns 2 items: - -1. Dict mapping from on-disk feature name to on-disk types (`VarLenFeature` or - `FixedLenFeature`. -2. Dict mapping output feature name to decoder. This return value is optional - and is only needed for tasks whose features may require additional decoding - (e.g. images). You can find the available decoders in - `tf.contrib.slim.tfexample_decoder`. - -At this point in the input pipeline, the example is a `dict`. - -### Preprocessing - -The read `Example` now runs through `Problem.preprocess_example`, which by -default runs -[`problem.preprocess_example_common`](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/data_generators/problem.py), -which may truncate the inputs/targets or prepend to targets, governed by some -hyperparameters. - -### Batching - -Examples are bucketed by sequence length and then batched out of those buckets. -This significantly improves performance over a naive batching scheme for -variable length sequences because each example in a batch must be padded to -match the example with the maximum length in the batch. - -There are several hyperparameters that affect how examples are batched together: - -* `hp.batch_size`: this is the approximate total number of tokens in the batch - (i.e. for a sequence problem, long sequences will have smaller actual batch - size and short sequences will have a larger actual batch size in order to - generally have an equal number of tokens in the batch). -* `hp.max_length`: sequences with length longer than this will be dropped - during training (and also during eval if `hp.eval_drop_long_sequences` is - `True`). If not set, the maximum length of examples is set to - `hp.batch_size`. -* `hp.batch_size_multiplier`: multiplier for the maximum length -* `hp.min_length_bucket`: example length for the smallest bucket (i.e. the - smallest bucket will bucket examples up to this length). -* `hp.length_bucket_step`: controls how spaced out the length buckets are. - -## Building the Model - -At this point, the input features typically have `"inputs"` and `"targets"`, -each of which is a batched 4-D Tensor (e.g. of shape `[batch_size, -sequence_length, 1, 1]` for text input or `[batch_size, height, width, 3]` for -image input). - -A `T2TModel` is composed of transforms of the input features by `Modality`s, -then the body of the model, then transforms of the model output to predictions -by a `Modality`, and then a loss (during training). - -The `Modality` types for the various input features and for the target are -specified in `Problem.hparams`. A `Modality` is a feature adapter that enables -models to be agnostic to input/output spaces. You can see the various -`Modality`s in -[`modalities.py`](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/layers/modalities.py). - -The sketch structure of a T2T model is as follows: - -```python -features = {...} # output from the input pipeline -input_modaly = ... # specified in Problem.hparams -target_modality = ... # specified in Problem.hparams - -transformed_features = {} -transformed_features["inputs"] = input_modality.bottom( - features["inputs"]) -transformed_features["targets"] = target_modality.targets_bottom( - features["targets"]) # for autoregressive models - -body_outputs = model.body(transformed_features) - -predictions = target_modality.top(body_outputs, features["targets"]) -loss = target_modality.loss(predictions, features["targets"]) -``` - -Most `T2TModel`s only override `body`. - -## Training, Eval, Inference modes - -Both the input function and model functions take a mode in the form of a -`tf.estimator.ModeKeys`, which allows the functions to behave differently in -different modes. - -In training, the model function constructs an optimizer and minimizes the loss. - -In evaluation, the model function constructs the evaluation metrics specified by -`Problem.eval_metrics`. - -In inference, the model function outputs predictions. - -## `Estimator` and `Experiment` - -With the input function and model functions constructed, the actual training -loop and related services (checkpointing, summaries, continuous evaluation, -etc.) are all handled by `Estimator` and `Experiment` objects, constructed in -[`trainer_utils.py`](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/utils/trainer_utils.py). - -## Decoding - -* [`decoding.py`](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/utils/decoding.py) - -TODO(rsepassi): Explain decoding (interactive, from file, and from dataset) and -`Problem.feature_encoders`. diff --git a/docs/index.md b/docs/index.md index 3eb7f1c61..da2446803 100644 --- a/docs/index.md +++ b/docs/index.md @@ -24,6 +24,6 @@ documentation, from basic tutorials to full code documentation. ## Deep Dive -* [Life of an Example](example_life.md): how all parts of T2T are connected and +* [System Overview](overview.md): how all parts of T2T are connected and work together * [Distributed Training](distributed_training.md) diff --git a/docs/overview.md b/docs/overview.md new file mode 100644 index 000000000..fcc0aba5a --- /dev/null +++ b/docs/overview.md @@ -0,0 +1,175 @@ +# T2T: Life of an Example + +[![PyPI +version](https://badge.fury.io/py/tensor2tensor.svg)](https://badge.fury.io/py/tensor2tensor) +[![GitHub +Issues](https://img.shields.io/github/issues/tensorflow/tensor2tensor.svg)](https://github.com/tensorflow/tensor2tensor/issues) +[![Contributions +welcome](https://img.shields.io/badge/contributions-welcome-brightgreen.svg)](CONTRIBUTING.md) +[![Gitter](https://img.shields.io/gitter/room/nwjs/nw.js.svg)](https://gitter.im/tensor2tensor/Lobby) +[![License](https://img.shields.io/badge/License-Apache%202.0-brightgreen.svg)](https://opensource.org/licenses/Apache-2.0) + +This doc explains how a training example flows through T2T, from data generation +to training, evaluation, and decoding. + +Some key files and their functions: + +* [`tpu_trainer.py`](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/tpu/tpu_trainer.py) and [`tpu_trainer_lib.py`](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/tpu/tpu_trainer_lib.py): + Main entrypoint for training and evaluation. Constructs and runs all the + main components of the system (the `Problem`, the `HParams`, the + `Estimator`, the `Experiment`, the `input_fn`s and `model_fn`). +* [`common_hparams.py`](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/layers/common_hparams.py): + `basic_params1` serves as the base for all model hyperparameters. Registered + model hparams functions always start with this default set of + hyperparameters. +* [`problem.py`](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/data_generators/problem.py): + Every dataset in T2T subclasses `Problem`. `Problem.input_fn` is the + Estimator input function. +* [`t2t_model.py`](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/utils/t2t_model.py): + Every model in T2T subclasses `T2TModel`. `T2TModel.estimator_model_fn` is + the Estimator model function. + +## Data Generation + +The `t2t-datagen` binary is the entrypoint for data generation. It simply looks +up the `Problem` specified by `--problem` and calls +`Problem.generate_data(data_dir, tmp_dir)`. + +All `Problem`s are expected to generate 2 sharded `TFRecords` files - 1 for +training and 1 for evaluation - with `tensorflow.Example` protocol buffers. The +expected names of the files are given by `Problem.{training, dev}_filepaths`. +Typically, the features in the `Example` will be `"inputs"` and `"targets"`; +however, some tasks have a different on-disk representation that is converted to +`"inputs"` and `"targets"` online in the input pipeline (e.g. image features are +typically stored with features `"image/encoded"` and `"image/format"` and the +decoding happens in the input pipeline). + +For tasks that require a vocabulary, this is also the point at which the +vocabulary is generated and all examples are encoded. + +There are several utility functions in +[`generator_utils`](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/data_generators/generator_utils.py) +that are commonly used by `Problem`s to generate data. Several are highlighted +below: + +* `generate_dataset_and_shuffle`: given 2 generators, 1 for training and 1 for + eval, yielding dictionaries of `>`, will produce sharded and shuffled `TFRecords` files with + `tensorflow.Example` protos. +* `maybe_download`: downloads a file at a URL to the given directory and + filename (see `maybe_download_from_drive` if the URL points to Google + Drive). +* `get_or_generate_vocab_inner`: given a target vocabulary size and a + generator that yields lines or tokens from the dataset, will build a + `SubwordTextEncoder` along with a backing vocabulary file that can be used + to map input strings to lists of ids. + [`SubwordTextEncoder`](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/data_generators/text_encoder.py) + uses word pieces and its encoding is fully invertible. + +## Data Input Pipeline + +Once the data is produced on disk, training, evaluation, and inference (if +decoding from the dataset) consume it by way of the T2T input pipeline, defined +by `Problem.input_fn`. + +The entire input pipeline is implemented with the new `tf.data.Dataset` API. + +The input function has 2 main parts: first, reading and processing individual +examples, which is done is `Problem.dataset`, and second, batching, which is +done in `Problem.input_fn` after the call to `Problem.dataset`. + +`Problem` subclasses may override the entire `input_fn` or portions of it (e.g. +`example_reading_spec` to indicate the names, types, and shapes of features on +disk). Typically they only override portions. + +### Batching + +Problems that have fixed size features (e.g. image problems) can use +`hp.batch_size` to set the batch size. + +Variable length Problems are bucketed by sequence length and then batched out of +those buckets. This significantly improves performance over a naive batching +scheme for variable length sequences because each example in a batch must be +padded to match the example with the maximum length in the batch. + +Controlling hparams: + +* `hp.batch_size`: the approximate total number of tokens in + the batch (i.e. long sequences will have smaller actual batch size and short + sequences will have a larger actual batch size in order to generally have an + equal number of tokens in the batch). +* `hp.max_length`: For variable length features, sequences with length longer + than this will be dropped during training (and also during eval if + `hp.eval_drop_long_sequences` is `True`). If not set, the maximum length of + examples is set to `hp.batch_size`. +* `hp.batch_size_multiplier`: multiplier for the maximum length +* `hp.min_length_bucket`: example length for the smallest bucket (i.e. the + smallest bucket will bucket examples up to this length). +* `hp.length_bucket_step`: controls how spaced out the length buckets are. + +## Building the Model + +At this point, the input features typically have `"inputs"` and `"targets"`, +each of which is a batched 4-D Tensor (e.g. of shape `[batch_size, +sequence_length, 1, 1]` for text input or `[batch_size, height, width, 3]` for +image input). + +The Estimator model function is created by `T2TModel.estimator_model_fn`, which +may be overridden in its entirety by subclasses if desired. Typically, +subclasses only override `T2TModel.body`. + +The model function constructs a `T2TModel`, calls it, and then calls +`T2TModel.{estimator_spec_train, estimator_spec_eval, estimator_spec_predict}` +depending on the mode. + +A call of a `T2TModel` internally calls `bottom`, `body`, `top`, and `loss`, all +of which can be overridden by subclasses (typically only `body` is). + +The default implementations of `bottom`, `top`, and `loss` depend on the +`Modality` specified for the input and target features (e.g. +`SymbolModality.bottom` embeds integer tokens and `SymbolModality.loss` is +`softmax_cross_entropy`). + +## `Estimator` and `Experiment` + +The actual training loop and related services (checkpointing, summaries, +continuous evaluation, etc.) are all handled by `Estimator` and `Experiment` +objects. `tpu_trainer.py` is the main entrypoint and uses `tpu_trainer_lib.py` +to construct the various components. + +## Decoding + +* [`t2t_decoder.py`](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/bin/t2t-decoder) +* [`decoding.py`](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/utils/decoding.py) + +## System Overview for Train/Eval + +See `tpu_trainer.py`. + +* Create HParams +* Create `RunConfig`, including `Parallelism` object (i.e. `data_parallelism`) +* Create `Experiment`, including hooks +* Create `Estimator` + * `T2TModel.estimator_model_fn` + * `model(features)` + * `model.model_fn` + * `model.bottom` + * `model.body` + * `model.top` + * `model.loss` + * [TRAIN] `model.estimator_spec_train` + * `train_op = model.optimize` + * [EVAL] `model.estimator_spec_eval` + * Create metrics +* Create input functions + * `Problem.input_fn` + * `Problem.dataset` + * Batching +* Create hooks +* Run Experiment --schedule (e.g. `exp.continuous_train_and_eval()`) + * `estimator.train` + * `train_op = model_fn(input_fn(mode=TRAIN))` + * Run train op + * `estimator.evaluate` + * `metrics = model_fn(input_fn(mode=EVAL))` + * Accumulate metrics diff --git a/setup.py b/setup.py index 5bcacbd85..01ef5e550 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ setup( name='tensor2tensor', - version='1.3.2', + version='1.4.0', description='Tensor2Tensor', author='Google Inc.', author_email='no-reply@google.com', @@ -23,7 +23,6 @@ 'tensor2tensor/bin/t2t-datagen', 'tensor2tensor/bin/t2t-decoder', 'tensor2tensor/bin/t2t-make-tf-configs', - 'tensor2tensor/bin/t2t-tpu-trainer', ], install_requires=[ 'bz2file', diff --git a/tensor2tensor/bin/t2t-decoder b/tensor2tensor/bin/t2t-decoder index c9ad7f9c7..f453b01fd 100644 --- a/tensor2tensor/bin/t2t-decoder +++ b/tensor2tensor/bin/t2t-decoder @@ -99,7 +99,7 @@ def main(_): estimator = tpu_trainer_lib.create_estimator( FLAGS.model, hp, - tpu_trainer.create_run_config(), + tpu_trainer.create_run_config(hp), decode_hparams=decode_hp, use_tpu=False) diff --git a/tensor2tensor/bin/t2t-tpu-trainer b/tensor2tensor/bin/t2t-tpu-trainer deleted file mode 100644 index 19468a59c..000000000 --- a/tensor2tensor/bin/t2t-tpu-trainer +++ /dev/null @@ -1,155 +0,0 @@ -#!/usr/bin/env python -# coding=utf-8 -# Copyright 2017 The Tensor2Tensor Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Train on TPU.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -import sys - -# Dependency imports - -from tensor2tensor import models # pylint: disable=unused-import -from tensor2tensor import problems as problems_lib # pylint: disable=unused-import -from tensor2tensor.tpu import tpu_trainer_lib -from tensor2tensor.utils import decoding -from tensor2tensor.utils import flags as t2t_flags # pylint: disable=unused-import -from tensor2tensor.utils import registry -from tensor2tensor.utils import usr_dir - -import tensorflow as tf - -flags = tf.flags -FLAGS = flags.FLAGS - -# See flags.py for additional command-line flags. -flags.DEFINE_string("t2t_usr_dir", "", - "Path to a Python module that will be imported. The " - "__init__.py file should include the necessary imports. " - "The imported files should contain registrations, " - "e.g. @registry.register_model calls, that will then be " - "available to the t2t-trainer.") -flags.DEFINE_integer("tpu_num_shards", 8, "Number of tpu shards.") -flags.DEFINE_integer("iterations_per_loop", 1000, - "Number of iterations in a TPU training loop.") -flags.DEFINE_bool("use_tpu", False, "Whether to use TPU.") - -# To maintain compatibility with some internal libs, we guard against these flag -# definitions possibly erroring. Apologies for the ugliness. -try: - flags.DEFINE_string("master", "", "Address of TensorFlow master.") - flags.DEFINE_string("output_dir", "", "Base output directory for run.") - flags.DEFINE_string("schedule", "continuous_train_and_eval", - "Method of Experiment to run.") - flags.DEFINE_integer("eval_steps", 200, "Number of steps in evaluation.") -except: # pylint: disable=bare-except - pass - - -def get_problem_name(): - problems = FLAGS.problems.split("-") - assert len(problems) == 1 - return problems[0] - - -def create_hparams(): - return tpu_trainer_lib.create_hparams(FLAGS.hparams_set, FLAGS.hparams) - - -def create_experiment_fn(): - use_validation_monitor = (FLAGS.schedule in - ["train_and_evaluate", "continuous_train_and_eval"] - and FLAGS.local_eval_frequency) - return tpu_trainer_lib.create_experiment_fn( - FLAGS.model, - get_problem_name(), - os.path.expanduser(FLAGS.data_dir), - FLAGS.train_steps, - FLAGS.eval_steps, - FLAGS.local_eval_frequency, - FLAGS.schedule, - export=FLAGS.export_saved_model, - decode_hparams=decoding.decode_hparams(FLAGS.decode_hparams), - use_tfdbg=FLAGS.tfdbg, - use_dbgprofile=FLAGS.dbgprofile, - use_validation_monitor=use_validation_monitor, - eval_early_stopping_steps=FLAGS.eval_early_stopping_steps, - eval_early_stopping_metric=FLAGS.eval_early_stopping_metric, - eval_early_stopping_metric_minimize=FLAGS. - eval_early_stopping_metric_minimize, - use_tpu=FLAGS.use_tpu) - - -def create_run_config(hp): - return tpu_trainer_lib.create_run_config( - model_dir=os.path.expanduser(FLAGS.output_dir), - master=FLAGS.master, - iterations_per_loop=FLAGS.iterations_per_loop, - num_shards=FLAGS.tpu_num_shards, - log_device_placement=FLAGS.log_device_placement, - save_checkpoints_steps=max(FLAGS.iterations_per_loop, - FLAGS.local_eval_frequency), - keep_checkpoint_max=FLAGS.keep_checkpoint_max, - keep_checkpoint_every_n_hours=FLAGS.keep_checkpoint_every_n_hours, - num_gpus=FLAGS.worker_gpu, - gpu_order=FLAGS.gpu_order, - shard_to_cpu=FLAGS.locally_shard_to_cpu, - num_async_replicas=FLAGS.worker_replicas, - gpu_mem_fraction=FLAGS.worker_gpu_memory_fraction, - enable_graph_rewriter=FLAGS.experimental_optimize_placement, - use_tpu=FLAGS.use_tpu, - schedule=FLAGS.schedule, - no_data_parallelism=hp.no_data_parallelism, - daisy_chain_variables=hp.daisy_chain_variables, - ps_replicas=FLAGS.ps_replicas, - ps_job=FLAGS.ps_job, - ps_gpu=FLAGS.ps_gpu, - sync=FLAGS.sync, - worker_id=FLAGS.worker_id, - worker_job=FLAGS.worker_job) - - -def log_registry(): - if FLAGS.registry_help: - tf.logging.info(registry.help_string()) - sys.exit(0) - - -def execute_schedule(exp): - if not hasattr(exp, FLAGS.schedule): - raise ValueError( - "Experiment has no method %s, from --schedule" % FLAGS.schedule) - getattr(exp, FLAGS.schedule)() - - -def main(_): - tf.logging.set_verbosity(tf.logging.INFO) - tf.set_random_seed(123) - usr_dir.import_usr_dir(FLAGS.t2t_usr_dir) - log_registry() - - hparams = create_hparams() - run_config = create_run_config(hparams) - - exp_fn = create_experiment_fn() - exp = exp_fn(run_config, hparams) - execute_schedule(exp) - - -if __name__ == "__main__": - tf.app.run() diff --git a/tensor2tensor/bin/t2t-trainer b/tensor2tensor/bin/t2t-trainer index 710fa1902..7992e9ba9 100644 --- a/tensor2tensor/bin/t2t-trainer +++ b/tensor2tensor/bin/t2t-trainer @@ -14,29 +14,23 @@ # See the License for the specific language governing permissions and # limitations under the License. -r"""Trainer for T2T models. - -This binary perform training, evaluation, and inference using -the Estimator API with tf.learn Experiment objects. - -To train your model, for example: - t2t-trainer \ - --data_dir ~/data \ - --problems=algorithmic_identity_binary40 \ - --model=transformer - --hparams_set=transformer_base -""" -# DEPRECATED +"""Train on TPU.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function +import contextlib import os +import sys # Dependency imports +from tensor2tensor import models # pylint: disable=unused-import +from tensor2tensor import problems as problems_lib # pylint: disable=unused-import +from tensor2tensor.tpu import tpu_trainer_lib +from tensor2tensor.utils import decoding +from tensor2tensor.utils import flags as t2t_flags # pylint: disable=unused-import from tensor2tensor.utils import registry -from tensor2tensor.utils import trainer_utils from tensor2tensor.utils import usr_dir import tensorflow as tf @@ -51,58 +45,144 @@ flags.DEFINE_string("t2t_usr_dir", "", "The imported files should contain registrations, " "e.g. @registry.register_model calls, that will then be " "available to the t2t-trainer.") -flags.DEFINE_string("tmp_dir", "/tmp/t2t_datagen", - "Temporary storage directory.") +flags.DEFINE_integer("tpu_num_shards", 8, "Number of tpu shards.") +flags.DEFINE_integer("iterations_per_loop", 1000, + "Number of iterations in a TPU training loop.") +flags.DEFINE_bool("use_tpu", False, "Whether to use TPU.") flags.DEFINE_bool("generate_data", False, "Generate data before training?") - -flags.DEFINE_integer("eval_steps", 10, "Number of steps in evaluation.") -flags.DEFINE_string("output_dir", "", "Base output directory for run.") -flags.DEFINE_string("master", "", "Address of TensorFlow master.") -flags.DEFINE_string("schedule", "train_and_evaluate", - "Method of tf.contrib.learn.Experiment to run.") +flags.DEFINE_string("tmp_dir", "/tmp/t2t_datagen", + "Temporary storage directory, used if --generate_data.") flags.DEFINE_bool("profile", False, "Profile performance?") - -def main(_): - tf.logging.set_verbosity(tf.logging.INFO) - usr_dir.import_usr_dir(FLAGS.t2t_usr_dir) - trainer_utils.log_registry() - trainer_utils.validate_flags() - output_dir = os.path.expanduser(FLAGS.output_dir) - tmp_dir = os.path.expanduser(FLAGS.tmp_dir) - if not FLAGS.data_dir: - raise ValueError("You must specify a --data_dir") +# To maintain compatibility with some internal libs, we guard against these flag +# definitions possibly erroring. Apologies for the ugliness. +try: + flags.DEFINE_string("master", "", "Address of TensorFlow master.") + flags.DEFINE_string("output_dir", "", "Base output directory for run.") + flags.DEFINE_string("schedule", "continuous_train_and_eval", + "Method of Experiment to run.") + flags.DEFINE_integer("eval_steps", 200, "Number of steps in evaluation.") +except: # pylint: disable=bare-except + pass + + +def get_problem_name(): + problems = FLAGS.problems.split("-") + assert len(problems) == 1 + return problems[0] + + +def create_hparams(): + return tpu_trainer_lib.create_hparams(FLAGS.hparams_set, FLAGS.hparams) + + +def create_experiment_fn(): + use_validation_monitor = (FLAGS.schedule in + ["train_and_evaluate", "continuous_train_and_eval"] + and FLAGS.local_eval_frequency) + return tpu_trainer_lib.create_experiment_fn( + model_name=FLAGS.model, + problem_name=get_problem_name(), + data_dir=os.path.expanduser(FLAGS.data_dir), + train_steps=FLAGS.train_steps, + eval_steps=FLAGS.eval_steps, + min_eval_frequency=FLAGS.local_eval_frequency, + schedule=FLAGS.schedule, + export=FLAGS.export_saved_model, + decode_hparams=decoding.decode_hparams(FLAGS.decode_hparams), + use_tfdbg=FLAGS.tfdbg, + use_dbgprofile=FLAGS.dbgprofile, + use_validation_monitor=use_validation_monitor, + eval_early_stopping_steps=FLAGS.eval_early_stopping_steps, + eval_early_stopping_metric=FLAGS.eval_early_stopping_metric, + eval_early_stopping_metric_minimize=FLAGS. + eval_early_stopping_metric_minimize, + use_tpu=FLAGS.use_tpu) + + +def create_run_config(hp): + return tpu_trainer_lib.create_run_config( + model_dir=os.path.expanduser(FLAGS.output_dir), + master=FLAGS.master, + iterations_per_loop=FLAGS.iterations_per_loop, + num_shards=FLAGS.tpu_num_shards, + log_device_placement=FLAGS.log_device_placement, + save_checkpoints_steps=max(FLAGS.iterations_per_loop, + FLAGS.local_eval_frequency), + keep_checkpoint_max=FLAGS.keep_checkpoint_max, + keep_checkpoint_every_n_hours=FLAGS.keep_checkpoint_every_n_hours, + num_gpus=FLAGS.worker_gpu, + gpu_order=FLAGS.gpu_order, + shard_to_cpu=FLAGS.locally_shard_to_cpu, + num_async_replicas=FLAGS.worker_replicas, + gpu_mem_fraction=FLAGS.worker_gpu_memory_fraction, + enable_graph_rewriter=FLAGS.experimental_optimize_placement, + use_tpu=FLAGS.use_tpu, + schedule=FLAGS.schedule, + no_data_parallelism=hp.no_data_parallelism, + daisy_chain_variables=hp.daisy_chain_variables, + ps_replicas=FLAGS.ps_replicas, + ps_job=FLAGS.ps_job, + ps_gpu=FLAGS.ps_gpu, + sync=FLAGS.sync, + worker_id=FLAGS.worker_id, + worker_job=FLAGS.worker_job) + + +def generate_data(): + # Generate data if requested. data_dir = os.path.expanduser(FLAGS.data_dir) - tf.gfile.MakeDirs(output_dir) + tmp_dir = os.path.expanduser(FLAGS.tmp_dir) + tf.gfile.MakeDirs(data_dir) + tf.gfile.MakeDirs(tmp_dir) + + problem_name = get_problem_name() + tf.logging.info("Generating data for %s" % problem_name) + registry.problem(problem_name).generate_data(data_dir, tmp_dir) - # Generate data if requested. - if FLAGS.generate_data: - tf.gfile.MakeDirs(data_dir) - tf.gfile.MakeDirs(tmp_dir) - for problem_name in FLAGS.problems.split("-"): - tf.logging.info("Generating data for %s" % problem_name) - problem = registry.problem(problem_name) - problem.generate_data(data_dir, tmp_dir) - - # Run the trainer. - def run_experiment(): - trainer_utils.run( - data_dir=data_dir, - model=FLAGS.model, - output_dir=output_dir, - train_steps=FLAGS.train_steps, - eval_steps=FLAGS.eval_steps, - schedule=FLAGS.schedule) +@contextlib.contextmanager +def profile_context(): if FLAGS.profile: with tf.contrib.tfprof.ProfileContext("t2tprof", trace_steps=range(100), dump_steps=range(100)) as pctx: opts = tf.profiler.ProfileOptionBuilder.time_and_memory() pctx.add_auto_profiling("op", opts, range(100)) - run_experiment() + yield else: - run_experiment() + yield + + +def log_registry(): + if FLAGS.registry_help: + tf.logging.info(registry.help_string()) + sys.exit(0) + + +def execute_schedule(exp): + if not hasattr(exp, FLAGS.schedule): + raise ValueError( + "Experiment has no method %s, from --schedule" % FLAGS.schedule) + with profile_context(): + getattr(exp, FLAGS.schedule)() + + +def main(_): + tf.logging.set_verbosity(tf.logging.INFO) + tf.set_random_seed(123) + usr_dir.import_usr_dir(FLAGS.t2t_usr_dir) + log_registry() + + if FLAGS.generate_data: + generate_data() + + hparams = create_hparams() + run_config = create_run_config(hparams) + + exp_fn = create_experiment_fn() + exp = exp_fn(run_config, hparams) + execute_schedule(exp) if __name__ == "__main__": diff --git a/tensor2tensor/bin/t2t_decoder.py b/tensor2tensor/bin/t2t_decoder.py index 47e9badb5..25358739a 100644 --- a/tensor2tensor/bin/t2t_decoder.py +++ b/tensor2tensor/bin/t2t_decoder.py @@ -98,7 +98,7 @@ def main(_): estimator = tpu_trainer_lib.create_estimator( FLAGS.model, hp, - tpu_trainer.create_run_config(), + tpu_trainer.create_run_config(hp), decode_hparams=decode_hp, use_tpu=False) diff --git a/tensor2tensor/bin/t2t_trainer.py b/tensor2tensor/bin/t2t_trainer.py index 68119e8ad..d17ff85ea 100644 --- a/tensor2tensor/bin/t2t_trainer.py +++ b/tensor2tensor/bin/t2t_trainer.py @@ -13,29 +13,23 @@ # See the License for the specific language governing permissions and # limitations under the License. -r"""Trainer for T2T models. - -This binary perform training, evaluation, and inference using -the Estimator API with tf.learn Experiment objects. - -To train your model, for example: - t2t-trainer \ - --data_dir ~/data \ - --problems=algorithmic_identity_binary40 \ - --model=transformer - --hparams_set=transformer_base -""" -# DEPRECATED +"""Train on TPU.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function +import contextlib import os +import sys # Dependency imports +from tensor2tensor import models # pylint: disable=unused-import +from tensor2tensor import problems as problems_lib # pylint: disable=unused-import +from tensor2tensor.tpu import tpu_trainer_lib +from tensor2tensor.utils import decoding +from tensor2tensor.utils import flags as t2t_flags # pylint: disable=unused-import from tensor2tensor.utils import registry -from tensor2tensor.utils import trainer_utils from tensor2tensor.utils import usr_dir import tensorflow as tf @@ -50,58 +44,144 @@ "The imported files should contain registrations, " "e.g. @registry.register_model calls, that will then be " "available to the t2t-trainer.") -flags.DEFINE_string("tmp_dir", "/tmp/t2t_datagen", - "Temporary storage directory.") +flags.DEFINE_integer("tpu_num_shards", 8, "Number of tpu shards.") +flags.DEFINE_integer("iterations_per_loop", 1000, + "Number of iterations in a TPU training loop.") +flags.DEFINE_bool("use_tpu", False, "Whether to use TPU.") flags.DEFINE_bool("generate_data", False, "Generate data before training?") - -flags.DEFINE_integer("eval_steps", 10, "Number of steps in evaluation.") -flags.DEFINE_string("output_dir", "", "Base output directory for run.") -flags.DEFINE_string("master", "", "Address of TensorFlow master.") -flags.DEFINE_string("schedule", "train_and_evaluate", - "Method of tf.contrib.learn.Experiment to run.") +flags.DEFINE_string("tmp_dir", "/tmp/t2t_datagen", + "Temporary storage directory, used if --generate_data.") flags.DEFINE_bool("profile", False, "Profile performance?") - -def main(_): - tf.logging.set_verbosity(tf.logging.INFO) - usr_dir.import_usr_dir(FLAGS.t2t_usr_dir) - trainer_utils.log_registry() - trainer_utils.validate_flags() - output_dir = os.path.expanduser(FLAGS.output_dir) - tmp_dir = os.path.expanduser(FLAGS.tmp_dir) - if not FLAGS.data_dir: - raise ValueError("You must specify a --data_dir") +# To maintain compatibility with some internal libs, we guard against these flag +# definitions possibly erroring. Apologies for the ugliness. +try: + flags.DEFINE_string("master", "", "Address of TensorFlow master.") + flags.DEFINE_string("output_dir", "", "Base output directory for run.") + flags.DEFINE_string("schedule", "continuous_train_and_eval", + "Method of Experiment to run.") + flags.DEFINE_integer("eval_steps", 200, "Number of steps in evaluation.") +except: # pylint: disable=bare-except + pass + + +def get_problem_name(): + problems = FLAGS.problems.split("-") + assert len(problems) == 1 + return problems[0] + + +def create_hparams(): + return tpu_trainer_lib.create_hparams(FLAGS.hparams_set, FLAGS.hparams) + + +def create_experiment_fn(): + use_validation_monitor = (FLAGS.schedule in + ["train_and_evaluate", "continuous_train_and_eval"] + and FLAGS.local_eval_frequency) + return tpu_trainer_lib.create_experiment_fn( + model_name=FLAGS.model, + problem_name=get_problem_name(), + data_dir=os.path.expanduser(FLAGS.data_dir), + train_steps=FLAGS.train_steps, + eval_steps=FLAGS.eval_steps, + min_eval_frequency=FLAGS.local_eval_frequency, + schedule=FLAGS.schedule, + export=FLAGS.export_saved_model, + decode_hparams=decoding.decode_hparams(FLAGS.decode_hparams), + use_tfdbg=FLAGS.tfdbg, + use_dbgprofile=FLAGS.dbgprofile, + use_validation_monitor=use_validation_monitor, + eval_early_stopping_steps=FLAGS.eval_early_stopping_steps, + eval_early_stopping_metric=FLAGS.eval_early_stopping_metric, + eval_early_stopping_metric_minimize=FLAGS. + eval_early_stopping_metric_minimize, + use_tpu=FLAGS.use_tpu) + + +def create_run_config(hp): + return tpu_trainer_lib.create_run_config( + model_dir=os.path.expanduser(FLAGS.output_dir), + master=FLAGS.master, + iterations_per_loop=FLAGS.iterations_per_loop, + num_shards=FLAGS.tpu_num_shards, + log_device_placement=FLAGS.log_device_placement, + save_checkpoints_steps=max(FLAGS.iterations_per_loop, + FLAGS.local_eval_frequency), + keep_checkpoint_max=FLAGS.keep_checkpoint_max, + keep_checkpoint_every_n_hours=FLAGS.keep_checkpoint_every_n_hours, + num_gpus=FLAGS.worker_gpu, + gpu_order=FLAGS.gpu_order, + shard_to_cpu=FLAGS.locally_shard_to_cpu, + num_async_replicas=FLAGS.worker_replicas, + gpu_mem_fraction=FLAGS.worker_gpu_memory_fraction, + enable_graph_rewriter=FLAGS.experimental_optimize_placement, + use_tpu=FLAGS.use_tpu, + schedule=FLAGS.schedule, + no_data_parallelism=hp.no_data_parallelism, + daisy_chain_variables=hp.daisy_chain_variables, + ps_replicas=FLAGS.ps_replicas, + ps_job=FLAGS.ps_job, + ps_gpu=FLAGS.ps_gpu, + sync=FLAGS.sync, + worker_id=FLAGS.worker_id, + worker_job=FLAGS.worker_job) + + +def generate_data(): + # Generate data if requested. data_dir = os.path.expanduser(FLAGS.data_dir) - tf.gfile.MakeDirs(output_dir) + tmp_dir = os.path.expanduser(FLAGS.tmp_dir) + tf.gfile.MakeDirs(data_dir) + tf.gfile.MakeDirs(tmp_dir) + + problem_name = get_problem_name() + tf.logging.info("Generating data for %s" % problem_name) + registry.problem(problem_name).generate_data(data_dir, tmp_dir) - # Generate data if requested. - if FLAGS.generate_data: - tf.gfile.MakeDirs(data_dir) - tf.gfile.MakeDirs(tmp_dir) - for problem_name in FLAGS.problems.split("-"): - tf.logging.info("Generating data for %s" % problem_name) - problem = registry.problem(problem_name) - problem.generate_data(data_dir, tmp_dir) - - # Run the trainer. - def run_experiment(): - trainer_utils.run( - data_dir=data_dir, - model=FLAGS.model, - output_dir=output_dir, - train_steps=FLAGS.train_steps, - eval_steps=FLAGS.eval_steps, - schedule=FLAGS.schedule) +@contextlib.contextmanager +def profile_context(): if FLAGS.profile: with tf.contrib.tfprof.ProfileContext("t2tprof", trace_steps=range(100), dump_steps=range(100)) as pctx: opts = tf.profiler.ProfileOptionBuilder.time_and_memory() pctx.add_auto_profiling("op", opts, range(100)) - run_experiment() + yield else: - run_experiment() + yield + + +def log_registry(): + if FLAGS.registry_help: + tf.logging.info(registry.help_string()) + sys.exit(0) + + +def execute_schedule(exp): + if not hasattr(exp, FLAGS.schedule): + raise ValueError( + "Experiment has no method %s, from --schedule" % FLAGS.schedule) + with profile_context(): + getattr(exp, FLAGS.schedule)() + + +def main(_): + tf.logging.set_verbosity(tf.logging.INFO) + tf.set_random_seed(123) + usr_dir.import_usr_dir(FLAGS.t2t_usr_dir) + log_registry() + + if FLAGS.generate_data: + generate_data() + + hparams = create_hparams() + run_config = create_run_config(hparams) + + exp_fn = create_experiment_fn() + exp = exp_fn(run_config, hparams) + execute_schedule(exp) if __name__ == "__main__": diff --git a/tensor2tensor/data_generators/problem.py b/tensor2tensor/data_generators/problem.py index 0cb86b6ad..e944f15ab 100644 --- a/tensor2tensor/data_generators/problem.py +++ b/tensor2tensor/data_generators/problem.py @@ -479,22 +479,24 @@ def feature_info(self): self._feature_info = features return features - def make_estimator_input_fn(self, mode, hparams, dataset_kwargs=None): + def make_estimator_input_fn(self, mode, hparams, data_dir=None, + dataset_kwargs=None): """Return input_fn wrapped for Estimator.""" def estimator_input_fn(params, config): - return self.input_fn(mode, hparams, params=params, config=config, - dataset_kwargs=dataset_kwargs) + return self.input_fn(mode, hparams, data_dir=data_dir, params=params, + config=config, dataset_kwargs=dataset_kwargs) return estimator_input_fn - def input_fn(self, mode, hparams, params=None, config=None, + def input_fn(self, mode, hparams, data_dir=None, params=None, config=None, dataset_kwargs=None): """Builds input pipeline for problem. Args: mode: tf.estimator.ModeKeys hparams: HParams, model hparams + data_dir: str, data directory; if None, will use hparams.data_dir params: dict, may include "batch_size" config: RunConfig; should have the data_parallelism attribute if not using TPU @@ -504,9 +506,6 @@ def input_fn(self, mode, hparams, params=None, config=None, Returns: (features_dict, Tensor targets) """ - tf.logging.warning("Problem.input_fn implements a subset of " - "input_fn_builder.build_input_fn and is currently only " - "used in tpu_trainer.") is_training = mode == tf.estimator.ModeKeys.TRAIN num_threads = 4 if is_training else 1 @@ -522,11 +521,11 @@ def gpu_valid_size(example): hparams.max_length if drop_long_sequences else 10**9) def define_shapes(example): - return _standardize_shapes( - example, batch_size=(config.use_tpu and params["batch_size"])) + batch_size = config and config.use_tpu and params["batch_size"] + return standardize_shapes(example, batch_size=batch_size) # Read and preprocess - data_dir = hparams.data_dir + data_dir = data_dir or hparams.data_dir dataset_kwargs = dataset_kwargs or {} dataset_kwargs.update({ @@ -544,16 +543,16 @@ def define_shapes(example): # Batching if _are_shapes_fully_defined(dataset.output_shapes): # Static shape features (e.g. images) - if config.use_tpu: + if config and config.use_tpu: tpu_batch_size = params["batch_size"] dataset = dataset.apply( tf.contrib.data.batch_and_drop_remainder(tpu_batch_size)) else: - num_shards = config.data_parallelism.n + num_shards = (config and config.data_parallelism.n) or 1 dataset = dataset.batch(hparams.batch_size * num_shards) else: # Variable length features - if config.use_tpu: + if config and config.use_tpu: # On TPU, pad to hparams.max_length dataset = dataset.filter(tpu_valid_size) padded_shapes = _fill_shape_nones( @@ -566,7 +565,7 @@ def define_shapes(example): dataset = dataset.filter(gpu_valid_size) batching_scheme = data_reader.hparams_to_batching_scheme( hparams, - shard_multiplier=config.data_parallelism.n, + shard_multiplier=(config and config.data_parallelism.n) or 1, length_multiplier=self.get_hparams().batch_size_multiplier) if hparams.use_fixed_batch_size: batching_scheme["batch_sizes"] = [hparams.batch_size] @@ -580,8 +579,8 @@ def define_shapes(example): dataset = dataset.map(define_shapes, num_parallel_calls=num_threads) dataset = dataset.prefetch(1) features = dataset.make_one_shot_iterator().get_next() - if not config.use_tpu: - _summarize_features(features, config.data_parallelism.n) + if not config or not config.use_tpu: + _summarize_features(features, (config and config.data_parallelism.n) or 1) if mode == tf.estimator.ModeKeys.PREDICT: features["infer_targets"] = features["targets"] @@ -604,7 +603,7 @@ def serving_input_fn(self, hparams): dataset = dataset.map(lambda ex: self.preprocess_example(ex, mode, hparams)) dataset = dataset.map(data_reader.cast_int64_to_int32) dataset = dataset.padded_batch(1000, dataset.output_shapes) - dataset = dataset.map(_standardize_shapes) + dataset = dataset.map(standardize_shapes) features = tf.contrib.data.get_single_element(dataset) if self.has_inputs: @@ -908,7 +907,7 @@ def _summarize_features(features, num_shards=1): tf.reduce_mean(nonpadding)) -def _standardize_shapes(features, batch_size=None): +def standardize_shapes(features, batch_size=None): """Set the right shapes for the features.""" for fname in ["inputs", "targets"]: diff --git a/tensor2tensor/models/super_lm.py b/tensor2tensor/models/super_lm.py index f6bc4ff85..f671e1c19 100644 --- a/tensor2tensor/models/super_lm.py +++ b/tensor2tensor/models/super_lm.py @@ -111,7 +111,7 @@ def body(self, features): logits_shard_0 = tf.expand_dims(logits_shard_0, 3) # On each device, we compute the loss for a part of the batch. # This is faster than computing the whole loss on one shard. - mp, logits = common_layers.reduce_by_device(mp, logits, lambda(l): l[0]) + mp, logits = common_layers.reduce_by_device(mp, logits, lambda l: l[0]) def _loss_for_shard(logits, targets, shard): if mp.n > 1: logits = common_layers.approximate_split(logits, mp.n, 0)[shard] diff --git a/tensor2tensor/notebooks/hello_t2t.ipynb b/tensor2tensor/notebooks/hello_t2t.ipynb index 5a976a5b3..1ff6b1d2b 100644 --- a/tensor2tensor/notebooks/hello_t2t.ipynb +++ b/tensor2tensor/notebooks/hello_t2t.ipynb @@ -87,8 +87,8 @@ "\n", "from tensor2tensor import problems\n", "from tensor2tensor.layers import common_layers\n", + "from tensor2tensor.tpu import tpu_trainer_lib\n", "from tensor2tensor.utils import t2t_model\n", - "from tensor2tensor.utils import trainer_utils\n", "from tensor2tensor.utils import registry\n", "from tensor2tensor.utils import metrics\n", "\n", @@ -597,8 +597,7 @@ "model_name = \"transformer\"\n", "hparams_set = \"transformer_base\"\n", "\n", - "hparams = trainer_utils.create_hparams(hparams_set, data_dir)\n", - "trainer_utils.add_problem_hparams(hparams, \"translate_ende_wmt32k\")\n", + "hparams = tpu_trainer_lib.create_hparams(hparams_set, data_dir=data_dir, problem_name=\"translate_ende_wmt32k\")\n", "\n", "# NOTE: Only create the model once when restoring from a checkpoint; it's a\n", "# Layer and so subsequent instantiations will have different variable scopes\n", @@ -1408,9 +1407,8 @@ " return tf.layers.conv2d(tf.nn.relu(h2), filters,\n", " kernel_size=(3, 3))\n", "\n", - "hparams = trainer_utils.create_hparams(\"basic_1\", data_dir)\n", + "hparams = tpu_trainer_lib.create_hparams(\"basic_1\", data_dir=data_dir, problem_name=\"image_mnist\")\n", "hparams.hidden_size = 64\n", - "trainer_utils.add_problem_hparams(hparams, \"image_mnist\")\n", "model = MySimpleModel(hparams, Modes.TRAIN)" ], "cell_type": "code", @@ -1663,4 +1661,4 @@ ] } ] -} \ No newline at end of file +} diff --git a/tensor2tensor/tpu/tpu_trainer.py b/tensor2tensor/tpu/tpu_trainer.py index d3e4130f6..d17ff85ea 100644 --- a/tensor2tensor/tpu/tpu_trainer.py +++ b/tensor2tensor/tpu/tpu_trainer.py @@ -18,6 +18,7 @@ from __future__ import division from __future__ import print_function +import contextlib import os import sys @@ -47,6 +48,10 @@ flags.DEFINE_integer("iterations_per_loop", 1000, "Number of iterations in a TPU training loop.") flags.DEFINE_bool("use_tpu", False, "Whether to use TPU.") +flags.DEFINE_bool("generate_data", False, "Generate data before training?") +flags.DEFINE_string("tmp_dir", "/tmp/t2t_datagen", + "Temporary storage directory, used if --generate_data.") +flags.DEFINE_bool("profile", False, "Profile performance?") # To maintain compatibility with some internal libs, we guard against these flag # definitions possibly erroring. Apologies for the ugliness. @@ -75,13 +80,13 @@ def create_experiment_fn(): ["train_and_evaluate", "continuous_train_and_eval"] and FLAGS.local_eval_frequency) return tpu_trainer_lib.create_experiment_fn( - FLAGS.model, - get_problem_name(), - os.path.expanduser(FLAGS.data_dir), - FLAGS.train_steps, - FLAGS.eval_steps, - FLAGS.local_eval_frequency, - FLAGS.schedule, + model_name=FLAGS.model, + problem_name=get_problem_name(), + data_dir=os.path.expanduser(FLAGS.data_dir), + train_steps=FLAGS.train_steps, + eval_steps=FLAGS.eval_steps, + min_eval_frequency=FLAGS.local_eval_frequency, + schedule=FLAGS.schedule, export=FLAGS.export_saved_model, decode_hparams=decoding.decode_hparams(FLAGS.decode_hparams), use_tfdbg=FLAGS.tfdbg, @@ -123,6 +128,31 @@ def create_run_config(hp): worker_job=FLAGS.worker_job) +def generate_data(): + # Generate data if requested. + data_dir = os.path.expanduser(FLAGS.data_dir) + tmp_dir = os.path.expanduser(FLAGS.tmp_dir) + tf.gfile.MakeDirs(data_dir) + tf.gfile.MakeDirs(tmp_dir) + + problem_name = get_problem_name() + tf.logging.info("Generating data for %s" % problem_name) + registry.problem(problem_name).generate_data(data_dir, tmp_dir) + + +@contextlib.contextmanager +def profile_context(): + if FLAGS.profile: + with tf.contrib.tfprof.ProfileContext("t2tprof", + trace_steps=range(100), + dump_steps=range(100)) as pctx: + opts = tf.profiler.ProfileOptionBuilder.time_and_memory() + pctx.add_auto_profiling("op", opts, range(100)) + yield + else: + yield + + def log_registry(): if FLAGS.registry_help: tf.logging.info(registry.help_string()) @@ -133,7 +163,8 @@ def execute_schedule(exp): if not hasattr(exp, FLAGS.schedule): raise ValueError( "Experiment has no method %s, from --schedule" % FLAGS.schedule) - getattr(exp, FLAGS.schedule)() + with profile_context(): + getattr(exp, FLAGS.schedule)() def main(_): @@ -142,6 +173,9 @@ def main(_): usr_dir.import_usr_dir(FLAGS.t2t_usr_dir) log_registry() + if FLAGS.generate_data: + generate_data() + hparams = create_hparams() run_config = create_run_config(hparams) diff --git a/tensor2tensor/tpu/tpu_trainer_lib.py b/tensor2tensor/tpu/tpu_trainer_lib.py index bc18fe298..475d0f1be 100644 --- a/tensor2tensor/tpu/tpu_trainer_lib.py +++ b/tensor2tensor/tpu/tpu_trainer_lib.py @@ -35,7 +35,7 @@ def create_session_config(log_device_placement=False, enable_graph_rewriter=False, gpu_mem_fraction=0.95, - use_tpu=True): + use_tpu=False): """The TensorFlow Session config to use.""" if use_tpu: graph_options = tf.GraphOptions() @@ -99,7 +99,7 @@ def create_run_config(master="", ps_job="/job:ps", ps_gpu=0, sync=False, - use_tpu=True): + use_tpu=False): """Create RunConfig, TPUConfig, and Parallelism object.""" session_config = create_session_config( log_device_placement=log_device_placement, @@ -161,7 +161,7 @@ def create_estimator(model_name, run_config, schedule="train_and_evaluate", decode_hparams=None, - use_tpu=True): + use_tpu=False): model_fn = t2t_model.T2TModel.make_estimator_model_fn( model_name, hparams, decode_hparams=decode_hparams, use_tpu=use_tpu) @@ -218,7 +218,7 @@ def create_experiment(run_config, data_dir, train_steps, eval_steps, - min_eval_frequency, + min_eval_frequency=2000, schedule="train_and_evaluate", export=False, decode_hparams=None, @@ -228,7 +228,7 @@ def create_experiment(run_config, eval_early_stopping_steps=None, eval_early_stopping_metric=None, eval_early_stopping_metric_minimize=True, - use_tpu=True): + use_tpu=False): """Create Experiment.""" # HParams hparams.add_hparam("data_dir", data_dir) @@ -280,7 +280,7 @@ def create_experiment(run_config, train_steps=train_steps, eval_steps=eval_steps, min_eval_frequency=min_eval_frequency, - train_steps_per_iteration=min_eval_frequency, + train_steps_per_iteration=min(min_eval_frequency, train_steps), export_strategies=export_strategies, **hooks_kwargs) diff --git a/tensor2tensor/tpu/tpu_trainer_lib_test.py b/tensor2tensor/tpu/tpu_trainer_lib_test.py index 4d8f2aad9..e8c1689c7 100644 --- a/tensor2tensor/tpu/tpu_trainer_lib_test.py +++ b/tensor2tensor/tpu/tpu_trainer_lib_test.py @@ -19,26 +19,51 @@ from __future__ import division from __future__ import print_function +import os +import shutil + # Dependency imports +from tensor2tensor import models # pylint: disable=unused-import +from tensor2tensor.data_generators import algorithmic +from tensor2tensor.data_generators import generator_utils +from tensor2tensor.data_generators import problem as problem_lib from tensor2tensor.tpu import tpu_trainer_lib from tensor2tensor.utils import registry -from tensor2tensor.utils import trainer_utils_test import tensorflow as tf +@registry.register_problem +class TinyAlgo(algorithmic.AlgorithmicIdentityBinary40): + + def generate_data(self, data_dir, _): + identity_problem = algorithmic.AlgorithmicIdentityBinary40() + generator_utils.generate_files( + identity_problem.generator(self.num_symbols, 40, 100000), + self.training_filepaths(data_dir, 1, shuffled=True), 100) + generator_utils.generate_files( + identity_problem.generator(self.num_symbols, 400, 10000), + self.dev_filepaths(data_dir, 1, shuffled=True), 100) + + class TpuTrainerTest(tf.test.TestCase): @classmethod def setUpClass(cls): - trainer_utils_test.TrainerUtilsTest.setUpClass() + tmp_dir = tf.test.get_temp_dir() + shutil.rmtree(tmp_dir) + os.mkdir(tmp_dir) + cls.data_dir = tmp_dir + + # Generate a small test dataset + registry.problem("tiny_algo").generate_data(cls.data_dir, None) def testExperiment(self): exp_fn = tpu_trainer_lib.create_experiment_fn( "transformer", "tiny_algo", - trainer_utils_test.TrainerUtilsTest.data_dir, + self.data_dir, train_steps=1, eval_steps=1, min_eval_frequency=1, @@ -48,6 +73,34 @@ def testExperiment(self): exp = exp_fn(run_config, hparams) exp.test() + def testModel(self): + # HParams + hparams = tpu_trainer_lib.create_hparams("transformer_tiny", + data_dir=self.data_dir, + problem_name="tiny_algo") + + # Dataset + problem = hparams.problem_instances[0] + dataset = problem.dataset(tf.estimator.ModeKeys.TRAIN, self.data_dir) + dataset = dataset.repeat(None).padded_batch(10, dataset.output_shapes) + features = dataset.make_one_shot_iterator().get_next() + features = problem_lib.standardize_shapes(features) + + # Model + model = registry.model("transformer")(hparams, tf.estimator.ModeKeys.TRAIN) + logits, losses = model(features) + + self.assertTrue("training" in losses) + loss = losses["training"] + + with self.test_session() as sess: + sess.run(tf.global_variables_initializer()) + logits_val, loss_val = sess.run([logits, loss]) + logits_shape = list(logits_val.shape) + logits_shape[1] = None + self.assertAllEqual(logits_shape, [10, None, 1, 1, 4]) + self.assertEqual(loss_val.shape, tuple()) + if __name__ == "__main__": tf.test.main() diff --git a/tensor2tensor/utils/data_reader.py b/tensor2tensor/utils/data_reader.py index 4721bc5d0..14de558b0 100644 --- a/tensor2tensor/utils/data_reader.py +++ b/tensor2tensor/utils/data_reader.py @@ -18,8 +18,6 @@ from __future__ import division from __future__ import print_function -import functools - # Dependency imports import numpy as np @@ -39,103 +37,6 @@ def cast_int64_to_int32(features): return f -def feature_placeholders(data_fields, data_items_to_decoders): - """Construct Placeholders and run decoders.""" - example = {} - for field, config in data_fields.items(): - if isinstance(config, tf.VarLenFeature): - shape = [None, None] - else: - shape = config.shape - - example[field] = tf.placeholder(dtype=config.dtype, shape=shape, name=field) - - # Decode - if data_items_to_decoders is None: - data_items_to_decoders = { - field: tf.contrib.slim.tfexample_decoder.Tensor(field) - for field in data_fields - } - - decoded_example = {} - for field, decoder in data_items_to_decoders.items(): - keys_to_tensors = {key: example[key] for key in decoder.keys} - decoded_example[field] = decoder.tensors_to_item(keys_to_tensors) - - return decoded_example - - -# DEPRECATED -def input_pipeline(problem, - data_dir, - capacity, - mode, - hparams, - batching_scheme, - dataset_split=None, - shard=None): - """Input pipeline, returns a dictionary of batched and padded tensors. - - Args: - problem: Problem instance for which to build the input pipeline. - data_dir: directory with input data. - capacity: int, data pipeline buffer capacity. - mode: tf.estimator.ModeKeys entry. - hparams: an HParams object. - batching_scheme: a dictionary containing - "boundaries": a list of integers for the boundaries that will be - used for bucketing; see bucket_by_sequence_length for more details. - "batch_sizes": a list of batch sizes corresponding to the buckets - "min_length": an integer. We drop sequences which are shorter. - "max_length": an integer. We drop sequences which are longer. - dataset_split: tf.estimator.ModeKeys + ["test"], which split of the dataset - to use. Defaults to mode. - shard: int, if provided, will only read data from the specified shard. - - Returns: - dict - """ - is_training = mode == tf.estimator.ModeKeys.TRAIN - num_threads = 4 if is_training else 1 - - with tf.name_scope("input_pipeline"): - dataset = problem.dataset( - mode, - data_dir=data_dir, - num_threads=num_threads, - output_buffer_size=capacity, - hparams=hparams, - dataset_split=dataset_split, - shard=shard) - dataset = dataset.map(cast_int64_to_int32, num_parallel_calls=num_threads) - dataset = dataset.filter( - functools.partial( - example_valid_size, - min_length=batching_scheme["min_length"], - max_length=batching_scheme["max_length"], - )) - if is_training: - dataset = dataset.shuffle(capacity) - dataset = dataset.repeat(None) - - bucket_id_fn = example_length - if len(batching_scheme["boundaries"]) == 1: - bucket_id_fn = lambda _: tf.constant(0) - - if "padded_shapes" not in batching_scheme: - batching_scheme["padded_shapes"] = None - - dataset = bucket_by_sequence_length( - dataset, - bucket_id_fn, - batching_scheme["boundaries"], - batching_scheme["batch_sizes"], - padded_shapes=batching_scheme["padded_shapes"]) - - batched_examples = dataset.make_one_shot_iterator().get_next() - return batched_examples - - def example_length(example): length = 0 # Length of the example is the maximum length of the feature lengths @@ -328,62 +229,6 @@ def hparams_to_batching_scheme(hparams, length_multiplier=length_multiplier) -def constant_batching_scheme(constant_batch_size_in_sequences): - """A batching scheme with constant batch size. - - Args: - constant_batch_size_in_sequences: an integer - - Returns: - a dictionary - """ - boundaries = _bucket_boundaries(1024) - batch_sizes = [constant_batch_size_in_sequences] * (1 + len(boundaries)) - return { - "boundaries": boundaries, - "batch_sizes": batch_sizes, - "min_length": 0, - "max_length": 10**9, - "shuffle_queue_size": None, - "window_size": constant_batch_size_in_sequences, - } - - -# DEPRECATED -def serving_input_fn(problem, hparams): - """Input fn for serving, starting from Placeholders.""" - data_fields, data_items_to_decoders = problem.example_reading_spec() - - # Feature placeholders that mimic what's on disk - example = feature_placeholders(data_fields, data_items_to_decoders) - - # Preprocess - example = problem.preprocess_example(example, tf.estimator.ModeKeys.PREDICT, - hparams) - example = cast_int64_to_int32(example) - - # 4-D inputs and space ids - constants = {} - constants["target_space_id"] = tf.constant( - problem.get_hparams().target_space_id) - constants["problem_choice"] = tf.constant(0) - if problem.has_inputs: - while len(example["inputs"].get_shape()) != 4: - example["inputs"] = tf.expand_dims(example["inputs"], axis=-1) - constants["input_space_id"] = tf.constant( - problem.get_hparams().input_space_id) - example.pop("targets") - else: - while len(example["targets"].get_shape()) != 4: - example["targets"] = tf.expand_dims(example["targets"], axis=-1) - - features = constants - features.update(example) - - return tf.estimator.export.ServingInputReceiver( - features=features, receiver_tensors=example) - - class DummyQueueRunner(object): """Can stand-in for a QueueRunner but does nothing.""" diff --git a/tensor2tensor/utils/decoding.py b/tensor2tensor/utils/decoding.py index 2e71abe40..d072ecce9 100644 --- a/tensor2tensor/utils/decoding.py +++ b/tensor2tensor/utils/decoding.py @@ -29,7 +29,6 @@ from six.moves import input # pylint: disable=redefined-builtin from tensor2tensor.data_generators import text_encoder -from tensor2tensor.utils import input_fn_builder import tensorflow as tf FLAGS = tf.flags.FLAGS @@ -48,6 +47,7 @@ def decode_hparams(overrides=""): beam_size=4, alpha=0.6, return_beams=False, + write_beam_scores=False, max_input_size=-1, identity_output=False, num_samples=-1, @@ -157,10 +157,15 @@ def decode_from_dataset(estimator, # Log predictions decoded_outputs = [] + decoded_scores = [] if decode_hp.return_beams: output_beams = np.split(outputs, decode_hp.beam_size, axis=0) + scores = None + if "scores" in prediction: + scores = np.split(prediction["scores"], decode_hp.beam_size, axis=0) for i, beam in enumerate(output_beams): tf.logging.info("BEAM %d:" % i) + score = scores and scores[i] decoded = log_decode_results( inputs, beam, @@ -173,6 +178,8 @@ def decode_from_dataset(estimator, identity_output=decode_hp.identity_output, targets=targets) decoded_outputs.append(decoded) + if decode_hp.write_beam_scores: + decoded_scores.append(score) else: decoded = log_decode_results( inputs, @@ -189,8 +196,12 @@ def decode_from_dataset(estimator, # Write out predictions if decode_to_file passed if decode_to_file: - for decoded_output, decoded_target in decoded_outputs: - output_file.write(str(decoded_output) + decode_hp.delimiter) + for i, (decoded_output, decoded_target) in enumerate(decoded_outputs): + beam_score_str = "" + if decode_hp.write_beam_scores: + beam_score_str = "\t%.2f" % decoded_scores[i] + output_file.write( + str(decoded_output) + beam_score_str + decode_hp.delimiter) target_file.write(str(decoded_target) + decode_hp.delimiter) if (decode_hp.num_samples >= 0 and @@ -241,14 +252,26 @@ def input_fn(): for result in result_iter: if decode_hp.return_beams: beam_decodes = [] + beam_scores = [] output_beams = np.split(result["outputs"], decode_hp.beam_size, axis=0) + scores = None + if "scores" in result: + scores = np.split(result["scores"], decode_hp.beam_size, axis=0) for k, beam in enumerate(output_beams): tf.logging.info("BEAM %d:" % k) + score = scores and scores[k] decoded_outputs, _ = log_decode_results(result["inputs"], beam, problem_name, None, inputs_vocab, targets_vocab) beam_decodes.append(decoded_outputs) - decodes.append("\t".join(beam_decodes)) + if decode_hp.write_beam_scores: + beam_scores.append(score) + if decode_hp.write_beam_scores: + decodes.append("\t".join( + ["\t".join([d, "%.2f" % s]) for d, s + in zip(beam_decodes, beam_scores)])) + else: + decodes.append("\t".join(beam_decodes)) else: decoded_outputs, _ = log_decode_results(result["inputs"], result["outputs"], problem_name, @@ -575,7 +598,7 @@ def input_fn(problem_choice, x=inputs): # pylint: disable=missing-docstring return (tf.constant(p_hparams.input_space_id), tf.constant( p_hparams.target_space_id), x) - input_space_id, target_space_id, x = input_fn_builder.cond_on_index( + input_space_id, target_space_id, x = cond_on_index( input_fn, feature_map["problem_choice"], len(hparams.problems) - 1) features = {} @@ -605,13 +628,13 @@ def _decode_input_tensor_to_features_dict(feature_map, hparams): def input_fn(problem_choice, x=inputs): # pylint: disable=missing-docstring p_hparams = hparams.problems[problem_choice] - # Add a third empty dimension dimension + # Add a third empty dimension x = tf.expand_dims(x, axis=[2]) x = tf.to_int32(x) return (tf.constant(p_hparams.input_space_id), tf.constant( p_hparams.target_space_id), x) - input_space_id, target_space_id, x = input_fn_builder.cond_on_index( + input_space_id, target_space_id, x = cond_on_index( input_fn, feature_map["problem_choice"], len(hparams.problems) - 1) features = {} @@ -622,3 +645,15 @@ def input_fn(problem_choice, x=inputs): # pylint: disable=missing-docstring IMAGE_DECODE_LENGTH if input_is_image else tf.shape(x)[1] + 50) features["inputs"] = x return features + + +def cond_on_index(fn, index_tensor, max_idx, cur_idx=0): + """Call fn(index_tensor) using tf.cond in [cur_id, max_idx].""" + if cur_idx == max_idx: + return fn(cur_idx) + + return tf.cond( + tf.equal(index_tensor, cur_idx), + lambda: fn(cur_idx), + lambda: cond_on_index(fn, index_tensor, max_idx, cur_idx + 1) + ) diff --git a/tensor2tensor/utils/input_fn_builder.py b/tensor2tensor/utils/input_fn_builder.py deleted file mode 100644 index 18ca992cf..000000000 --- a/tensor2tensor/utils/input_fn_builder.py +++ /dev/null @@ -1,238 +0,0 @@ -# coding=utf-8 -# Copyright 2017 The Tensor2Tensor Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Input function building.""" -# DEPRECATED - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -# Dependency imports - -from six.moves import xrange # pylint: disable=redefined-builtin - -from tensor2tensor.utils import data_reader - -import tensorflow as tf - - -def build_input_fn(mode, - hparams, - data_dir=None, - num_datashards=None, - fixed_problem=None, - worker_replicas=None, - worker_id=None, - batch_size=None, - dataset_split=None, - shard=None): - """Provides input to the graph, either from disk or via a placeholder. - - This function produces an input function that will feed data into - the network. There are two modes of operation: - - 1. If data_file_pattern and all subsequent arguments are None, then - it creates a placeholder for a serialized tf.Example proto. - 2. If data_file_pattern is defined, it will read the data from the - files at the given location. Use this mode for training, - evaluation, and testing prediction. - - Args: - mode: The execution mode, as defined in tf.estimator.ModeKeys. - hparams: HParams object. - data_dir: directory with input data. - num_datashards: An integer. - fixed_problem: An integer indicating the problem to fetch data for, or None - if the input is to be randomly selected. - worker_replicas: int, number of worker replicas. Used in multiproblem - setting with hparams.problem_choice == distributed. - worker_id: int, id of this worker replica. Used in multiproblem setting with - hparams.problem_choice == distributed. - batch_size: int, if provided, will use a fixed batch size. - dataset_split: tf.estimator.ModeKeys + ["test"], which split of the dataset - to use. Defaults to mode. - shard: int, if provided, will only read data from the specified shard. - - Returns: - A function that returns a dictionary of features and the target labels. - """ - - def input_fn(): - """Supplies input to our model. - - This function supplies input to our model, where this input is a - function of the mode. For example, we supply different data if - we're performing training versus evaluation. - - Returns: - A tuple consisting of 1) a dictionary of tensors whose keys are - the feature names, and 2) a tensor of target labels if the mode - is not INFER (and None, otherwise). - - Raises: - ValueError: if one of the parameters has an unsupported value. - """ - problem_count = len(hparams.problems) - problem_batches = [] - with tf.name_scope("input_fn"): - for problem_idx in xrange(problem_count): - if fixed_problem is not None and problem_idx != fixed_problem: - continue - problem_instance = hparams.problem_instances[problem_idx] - p_hparams = hparams.problems[problem_idx] - feature_map = features_for_problem( - problem_instance, - p_hparams, - hparams, - data_dir, - num_datashards, - mode, - batch_size=batch_size, - dataset_split=dataset_split, - shard=shard, - name="problem_%d" % problem_idx) - problem_batches.append(feature_map) - - # We choose which problem to process. - loss_moving_avgs = [] # Need loss moving averages for that. - for problem_idx in xrange(problem_count): - with tf.variable_scope("losses_avg"): - loss_moving_avgs.append( - tf.get_variable( - "problem_%d/total_loss" % problem_idx, - initializer=100.0, - trainable=False)) - if fixed_problem is None: - problem_choice = _problem_choice(hparams.problem_choice, mode, - problem_count, loss_moving_avgs, - worker_replicas, worker_id) - - # Problem conditional on problem_choice. - feature_map = cond_on_index( - lambda problem_idx: problem_batches[problem_idx], problem_choice, - problem_count - 1) - else: - problem_choice = tf.constant(fixed_problem) - # Take the only constructed batch, which is the fixed_problem. - feature_map = problem_batches[0] - - feature_map["problem_choice"] = problem_choice - - # Set shapes so the ranks are clear. - if problem_instance.has_inputs: - feature_map["inputs"].set_shape([None, None, None, None]) - feature_map["input_space_id"].set_shape([]) - feature_map["targets"].set_shape([None, None, None, None]) - feature_map["problem_choice"].set_shape([]) - feature_map["target_space_id"].set_shape([]) - - if mode == tf.estimator.ModeKeys.PREDICT: - feature_map["infer_targets"] = feature_map["targets"] - # Forced shape obfuscation is necessary for inference. - if problem_instance.has_inputs: - feature_map["inputs"]._shape = tf.TensorShape([None, None, None, None]) # pylint: disable=protected-access - feature_map["targets"]._shape = tf.TensorShape([None, None, None, None]) # pylint: disable=protected-access - - # This is because of a bug in the Estimator that short-circuits prediction - # if it doesn't see a QueueRunner. DummyQueueRunner implements the - # minimal expected interface but does nothing. - tf.add_to_collection(tf.GraphKeys.QUEUE_RUNNERS, - data_reader.DummyQueueRunner()) - return feature_map, None - - return feature_map, feature_map["targets"] - - return input_fn - - -def _problem_choice(choice_mode, mode, problem_count, loss_moving_avgs, - worker_replicas, worker_id): - """Return idx of problem based on choice_mode and mode.""" - if choice_mode == "uniform" or mode != tf.estimator.ModeKeys.TRAIN: - problem_choice = tf.random_uniform([], maxval=problem_count, dtype=tf.int32) - elif choice_mode == "adaptive": - loss_moving_avgs = tf.stack(loss_moving_avgs) - problem_choice = tf.multinomial(tf.reshape(loss_moving_avgs, [1, -1]), 1) - problem_choice = tf.to_int32(tf.squeeze(problem_choice)) - elif choice_mode == "distributed": - assert worker_replicas >= problem_count - assert worker_replicas % problem_count == 0 - problem_choice = tf.to_int32(worker_id % problem_count) - else: - raise ValueError("Value of hparams.problem_choice is %s and must be " - "one of [uniform, adaptive, distributed]" % choice_mode) - - return problem_choice - - -def cond_on_index(fn, index_tensor, max_idx, cur_idx=0): - """Call fn(index_tensor) using tf.cond in [cur_id, max_idx].""" - if cur_idx == max_idx: - return fn(cur_idx) - - return tf.cond( - tf.equal(index_tensor, cur_idx), - lambda: fn(cur_idx), - lambda: cond_on_index(fn, index_tensor, max_idx, cur_idx + 1) - ) - - -def features_for_problem(problem_instance, - p_hparams, - hparams, - data_dir, - num_datashards, - mode, - batch_size=None, - dataset_split=None, - shard=None, - name="problem_inputs"): - """Feature map for Problem.""" - with tf.name_scope(name): - with tf.device("/cpu:0"): # Input reading on CPU - capacity = (p_hparams.max_expected_batch_size_per_shard * num_datashards) - batching_scheme = data_reader.hparams_to_batching_scheme( - hparams, - shard_multiplier=num_datashards, - drop_long_sequences=(mode == tf.estimator.ModeKeys.TRAIN or - hparams.eval_drop_long_sequences), - length_multiplier=(p_hparams.batch_size_multiplier)) - if batch_size: - # If batch_size is fixed, use a single input bucket - batching_scheme["batch_sizes"] = [batch_size] - batching_scheme["boundaries"] = [] - tf.logging.info("batching_scheme = %s" % batching_scheme) - feature_map = data_reader.input_pipeline( - problem_instance, - data_dir, - capacity, - mode, - hparams, - batching_scheme, - dataset_split=dataset_split, - shard=shard) - - # Ensure inputs and targets are proper rank. - if problem_instance.has_inputs: - while len(feature_map["inputs"].get_shape()) != 4: - feature_map["inputs"] = tf.expand_dims(feature_map["inputs"], axis=-1) - while len(feature_map["targets"].get_shape()) != 4: - feature_map["targets"] = tf.expand_dims(feature_map["targets"], axis=-1) - - if problem_instance.has_inputs: - feature_map["input_space_id"] = tf.constant(p_hparams.input_space_id) - feature_map["target_space_id"] = tf.constant(p_hparams.target_space_id) - return feature_map diff --git a/tensor2tensor/utils/input_fn_builder_test.py b/tensor2tensor/utils/input_fn_builder_test.py deleted file mode 100644 index ec2e6147e..000000000 --- a/tensor2tensor/utils/input_fn_builder_test.py +++ /dev/null @@ -1,61 +0,0 @@ -# coding=utf-8 -# Copyright 2017 The Tensor2Tensor Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Tests for tensor2tensor.utils.input_fn_builder.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -# Dependency imports - -from tensor2tensor.utils import input_fn_builder -import tensorflow as tf - - -class InputFnBuilderTest(tf.test.TestCase): - - def testCondOnIndex(self): - """Smoke tests of cond_on_index().""" - - z = tf.constant(1., dtype=tf.float32) - def f(n): - return { - "a": z * n, - "b": z * n * n - } - - index = tf.placeholder(shape=[], dtype=tf.int32) - out = input_fn_builder.cond_on_index(f, index, 3, 0) - - with self.test_session() as sess: - # Check dispatching to the correct branch - result = sess.run(out, feed_dict={ - index: 2 - }) - - self.assertAllClose(result["a"], 2.) - self.assertAllClose(result["b"], 4.) - - result = sess.run(out, feed_dict={ - index: 3 - }) - - self.assertAllClose(result["a"], 3.) - self.assertAllClose(result["b"], 9.) - - -if __name__ == "__main__": - tf.test.main() diff --git a/tensor2tensor/utils/metrics.py b/tensor2tensor/utils/metrics.py index 817582809..724dbb740 100644 --- a/tensor2tensor/utils/metrics.py +++ b/tensor2tensor/utils/metrics.py @@ -22,6 +22,8 @@ # Dependency imports +import numpy as np + from tensor2tensor.layers import common_layers from tensor2tensor.utils import bleu_hook from tensor2tensor.utils import registry @@ -29,6 +31,8 @@ import tensorflow as tf +from tensorflow.contrib.eager.python import tfe + class Metrics(object): """Available evaluation metrics.""" @@ -305,10 +309,11 @@ def wrapped_metric_fn(): problem_name = problem_instance.name metrics = problem_instance.eval_metrics() if not all([m in METRICS_FNS for m in metrics]): - raise ValueError("Unrecognized metric. Problem %s specified metrics " - "%s. Recognized metrics are %s." % (problem_name, - metrics, - METRICS_FNS.keys())) + error_str = ("Unrecognized metric. Problem %s specified metrics " + "%s. Recognized metrics are %s.") + raise ValueError(error_str % (problem_name, + metrics, + list(METRICS_FNS.keys()))) def image_wrapped_metric_fn(predictions, labels, @@ -334,6 +339,51 @@ def image_wrapped_metric_fn(predictions, return eval_metrics +def create_eager_metrics_for_problem(problem, model_hparams=None): + """See create_eager_metrics.""" + metric_names = problem.eval_metrics() + tm = problem.get_hparams().target_modality + if isinstance(tm, tuple): + assert model_hparams is not None + tm = registry.create_modality(tm, model_hparams) + return create_eager_metrics(metric_names, weights_fn=tm.targets_weights_fn) + + +def create_eager_metrics(metric_names, weights_fn=common_layers.weights_all): + """Create metrics accumulators and averager for Eager mode. + + Args: + metric_names: list from Metrics enum + weights_fn: function that takes labels and returns a weights mask. Defaults + to weights of all 1, i.e. common_layers.weights_all. Use + common_layers.weights_nonzero if labels have 0-padding. + + Returns: + (accum_fn(predictions, targets) => None, + result_fn() => dict + """ + metric_fns = dict( + [(name, METRICS_FNS[name]) for name in metric_names]) + tfe_metrics = dict() + + for name in metric_names: + tfe_metrics[name] = tfe.metrics.Mean(name=name) + + def metric_accum(predictions, targets): + for name, metric_fn in metric_fns.items(): + val, weight = metric_fn(predictions, targets, + weights_fn=weights_fn) + tfe_metrics[name](np.squeeze(val), np.squeeze(weight)) + + def metric_means(): + avgs = {} + for name in metric_names: + avgs[name] = tfe_metrics[name].result().numpy() + return avgs + + return metric_accum, metric_means + + # Metrics are functions that take predictions and labels and return # a tensor of metrics and a tensor of weights. # If the function has "features" as an argument, it will receive the whole diff --git a/tensor2tensor/utils/model_builder.py b/tensor2tensor/utils/model_builder.py deleted file mode 100644 index b4a0008e3..000000000 --- a/tensor2tensor/utils/model_builder.py +++ /dev/null @@ -1,310 +0,0 @@ -# coding=utf-8 -# Copyright 2017 The Tensor2Tensor Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Model building.""" -# DEPRECATED - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import copy -import math - -# Dependency imports - -import six -# pylint: disable=redefined-builtin -from six.moves import xrange -# pylint: enable=redefined-builtin - -from tensor2tensor import models # pylint: disable=unused-import -from tensor2tensor.utils import devices -from tensor2tensor.utils import input_fn_builder -from tensor2tensor.utils import metrics -from tensor2tensor.utils import optimize -from tensor2tensor.utils import registry - -import tensorflow as tf - - -def model_fn(model, - features, - mode, - hparams, - problem_names, - train_steps=100000, - worker_id=0, - worker_replicas=1, - eval_run_autoregressive=False, - decode_hparams=None): - """Builds the model for all modes. - - * TRAIN: Constructs loss and train_op - * EVAL: Constructs the loss and eval metrics - * PREDICT: Constructs the predictions - - Args: - model: str, name of model. - features: dict. Expected to have keys - {inputs, targets, problem_choice}. - mode: tf.estimator.ModeKeys. - hparams: model HParams. - problem_names: list of str, names of the problems. - train_steps: int, total number of training steps. Used to compute learning - rate decay. - worker_id: int, id of this worker. - worker_replicas: int, number of workers. - eval_run_autoregressive: bool, whether to run evaluation autoregressively. - decode_hparams: HParams for decode settings. Used when mode == PREDICT. - - Returns: - tf.estimator.EstimatorSpec - """ - assert len(problem_names) == len(hparams.problem_instances) - decode_hp = decode_hparams - - # TODO(rsepassi): This still depends on FLAGS. Rm eventually. - dp = devices.data_parallelism_from_flags(hparams) - - tf.get_variable_scope().set_initializer( - optimize.get_variable_initializer(hparams)) - is_training = mode == tf.estimator.ModeKeys.TRAIN - - # Add input statistics for incoming features. - with tf.name_scope("input_stats"): - for (k, v) in six.iteritems(features): - if isinstance(v, tf.Tensor) and v.get_shape().ndims > 1: - tf.summary.scalar("%s_batch" % k, tf.shape(v)[0] // dp.n) - tf.summary.scalar("%s_length" % k, tf.shape(v)[1]) - nonpadding = tf.to_float(tf.not_equal(v, 0)) - nonpadding_tokens = tf.reduce_sum(nonpadding) - if k == "targets": - targets_nonpadding_tokens = nonpadding_tokens - tf.summary.scalar("%s_nonpadding_tokens" % k, nonpadding_tokens) - tf.summary.scalar("%s_nonpadding_fraction" % k, - tf.reduce_mean(nonpadding)) - - # Get multi-problem logits and loss based on features["problem_choice"]. - loss_variable_names = [] - - def nth_model(n): - """Build the model for the n-th problem, plus some added variables.""" - model_class = registry.model(model)( - hparams, - mode, - hparams.problems[n], - n, - dp, - decode_hparams=decode_hparams) - if mode == tf.estimator.ModeKeys.PREDICT: - return model_class.infer( - features, - beam_size=decode_hp.beam_size, - top_beams=(decode_hp.beam_size if decode_hp.return_beams else 1), - alpha=decode_hp.alpha, - decode_length=decode_hp.extra_length) - # In distributed mode, we build graph for problem=0 and problem=worker_id. - skipping_is_on = hparams.problem_choice == "distributed" and is_training - del skipping_is_on - problem_worker_id = worker_id % len(hparams.problems) - skip_this_one = n != 0 and n % worker_replicas != problem_worker_id - # On worker 0 also build graph for problems <= 1. - # TODO(lukaszkaiser): why is this hack needed for variables init? Repair. - skip_this_one = skip_this_one and (worker_id != 0 or n > 1) - if eval_run_autoregressive and mode == tf.estimator.ModeKeys.EVAL: - logits, losses_dict = model_class.eval_autoregressive(features) - else: - logits, losses_dict = model_class(features) - with tf.variable_scope("losses_avg"): - total_loss, ops = 0.0, [] - for loss_key, loss_value in six.iteritems(losses_dict): - loss_name = "problem_%d/%s_loss" % (n, loss_key) - loss_moving_avg = tf.get_variable( - loss_name, initializer=100.0, trainable=False) - loss_variable_names.append(loss_name) - ops.append( - loss_moving_avg.assign(loss_moving_avg * 0.9 + loss_value * 0.1)) - total_loss += loss_value - try: # Total loss avg might be reused or not, we try both. - with tf.variable_scope(tf.get_variable_scope(), reuse=True): - # Total loss was already constructed on input. - loss_moving_avg = tf.get_variable("problem_%d/total_loss" % n) - except ValueError: - loss_moving_avg = tf.get_variable( - "problem_%d/total_loss" % n, initializer=100.0, trainable=False) - ops.append( - loss_moving_avg.assign(loss_moving_avg * 0.9 + total_loss * 0.1)) - with tf.variable_scope("train_stats"): # Count steps for this problem. - problem_steps = tf.get_variable( - "problem_%d_steps" % n, initializer=0, trainable=False) - ops.append(problem_steps.assign_add(1)) - with tf.control_dependencies(ops): # Make sure the ops run. - # Ensure the loss is a scalar here. - total_loss = tf.reshape(total_loss, [], name="total_loss_control_id") - return [total_loss, logits] - - model_output = input_fn_builder.cond_on_index( - nth_model, - index_tensor=features["problem_choice"], - max_idx=len(hparams.problems) - 1) - - if mode == tf.estimator.ModeKeys.PREDICT: - # If beam searching, model_output will be a dict with keys "outputs" and - # "scores". - if isinstance(model_output, dict): - outputs = model_output["outputs"] - scores = model_output["scores"] - else: - outputs = model_output - scores = None - - batched_problem_choice = ( - features["problem_choice"] * tf.ones( - (tf.shape(features["inputs"])[0],), dtype=tf.int32)) - predictions = { - "outputs": outputs, - "scores": scores, - "inputs": features.get("inputs", None), - "targets": features.get("infer_targets", None), - "problem_choice": batched_problem_choice, - } - _del_dict_nones(predictions) - - export_out = {"outputs": predictions["outputs"]} - if "scores" in predictions: - export_out["scores"] = predictions["scores"] - - return tf.estimator.EstimatorSpec( - mode, - predictions=predictions, - export_outputs={ - "output": tf.estimator.export.PredictOutput(export_out) - }) - - total_loss, logits = model_output - - if mode == tf.estimator.ModeKeys.EVAL: - eval_metrics_fns = metrics.create_evaluation_metrics( - hparams.problem_instances, hparams) - - eval_metrics = {} - for metric_name, metric_fn in six.iteritems(eval_metrics_fns): - eval_metrics[metric_name] = metric_fn(logits, features) - - return tf.estimator.EstimatorSpec( - mode, - predictions={"predictions": logits}, - eval_metric_ops=eval_metrics, - loss=total_loss) - - assert mode == tf.estimator.ModeKeys.TRAIN - - # Set learning rate - learning_rate = hparams.learning_rate * optimize.learning_rate_decay( - hparams, num_worker_replicas=worker_replicas, num_train_steps=train_steps) - learning_rate /= math.sqrt(float(worker_replicas)) - - # Get global step - global_step = tf.train.get_or_create_global_step() - - # Some training statistics. - with tf.name_scope("training_stats"): - tf.summary.scalar("learning_rate", learning_rate) - for n in xrange(len(hparams.problems)): - names_and_vars = [] - with tf.variable_scope("losses_avg", reuse=True): - total_loss_var = tf.get_variable("problem_%d/total_loss" % n) - names_and_vars.append(("total_loss", total_loss_var)) - with tf.variable_scope("losses_avg", reuse=True): - for loss_name in loss_variable_names: - if loss_name.startswith("problem_%d/" % n): - loss_var = tf.get_variable(loss_name) - loss_suffix = loss_name[loss_name.index("/") + 1:] - names_and_vars.append((loss_suffix, loss_var)) - for (loss_name, loss_var) in names_and_vars: - tf.summary.scalar("loss_avg_%d/%s" % (n, loss_name), loss_var) - with tf.variable_scope("train_stats", reuse=True): - nth_steps = tf.get_variable("problem_%d_steps" % n, dtype=tf.int32) - tf.summary.scalar("problem_%d_frequency" % n, - tf.to_float(nth_steps) / - (tf.to_float(global_step) + 1.0)) - - # The new data reader occasionally emits very small batches, which - # cause the examples in those batches to be grossly overweighted. - # We decrease the loss proportionally to the ratio of the size of this - # batch to the size of the largest training batch ever. - # TODO(noam): to be more sophisticated, we could keep separate - # maxima based on problem choice. - max_nonpadding_var = tf.get_variable( - "max_nonpadding", - shape=[], - initializer=tf.ones_initializer(), - trainable=False) - max_nonpadding = tf.maximum(max_nonpadding_var, targets_nonpadding_tokens) - with tf.control_dependencies([tf.assign(max_nonpadding_var, max_nonpadding)]): - small_batch_multiplier = targets_nonpadding_tokens / max_nonpadding - tf.summary.scalar("small_batch_multiplier", small_batch_multiplier) - total_loss *= small_batch_multiplier - - # Optimize - train_op = optimize.optimize(total_loss, learning_rate, hparams) - - # Remove summaries that will fail to run because they are in conditionals. - # TODO(cwhipkey): Test with this code removed, later in 2017. - summaries = tf.get_collection_ref(tf.GraphKeys.SUMMARIES) - for i in reversed(range(len(summaries))): - if summaries[i].name.startswith("cond_"): - del summaries[i] - - tf.logging.info("Global model_fn finished.") - return tf.estimator.EstimatorSpec( - mode, - predictions={"problem_choice": features["problem_choice"]}, - loss=total_loss, - train_op=train_op) - - -def build_model_fn(model, **kwargs): - """Returns a function to build the model. See model_fn.""" - - # Model function as expected by Estimator - def wrapping_model_fn(features, labels, mode, params): - # Deep-copy the model hparams between modes to eliminate - # side-effects caused by abuse of the linked problem_hparams - # objects which are used to share modality objects between - # problems. We do not want to share the modality objects between - # modes, since the modality objects may decide to do something - # mode-specific. A better fix would be to stop abusing the - # hparams in this way and instead use a separate dictionary to - # share the modality objects between problems. This dictionary - # could be created once per mode and passed to the constructor of - # t2t_model. - hparams = copy.deepcopy(params) - del params - - if labels is not None: - features["targets"] = labels - del labels - - return model_fn(model, features, mode, hparams, **kwargs) - - return wrapping_model_fn - - -def _del_dict_nones(d): - for k in list(d.keys()): - if d[k] is None: - del d[k] diff --git a/tensor2tensor/utils/t2t_model.py b/tensor2tensor/utils/t2t_model.py index b895c0ed3..26854de13 100644 --- a/tensor2tensor/utils/t2t_model.py +++ b/tensor2tensor/utils/t2t_model.py @@ -692,7 +692,7 @@ def _shard_features(self, features): # pylint: disable=missing-docstring def _to_features_per_datashard(self, features): datashard_features = [] - assert len(features[features.keys()[0]]) == self._num_datashards + assert len(features[list(features.keys())[0]]) == self._num_datashards for d in range(self._num_datashards): f = {k: v[d] for k, v in six.iteritems(features)} datashard_features.append(f) @@ -713,7 +713,7 @@ def make_estimator_model_fn(model_name, use_tpu=False): model_cls = registry.model(model_name) - def wrapping_model_fn(features, labels, mode, params, config): + def wrapping_model_fn(features, labels, mode, params=None, config=None): return model_cls.estimator_model_fn( hparams, features, @@ -760,8 +760,9 @@ def estimator_model_fn(cls, problem = hparams.problem_instances[0] # Instantiate model - data_parallelism = (None if (hparams.no_data_parallelism or use_tpu) - else config.data_parallelism) + data_parallelism = None + if not use_tpu and not hparams.no_data_parallelism and config: + data_parallelism = config.data_parallelism model = cls(hparams, mode, data_parallelism=data_parallelism, decode_hparams=decode_hparams) @@ -781,7 +782,7 @@ def estimator_model_fn(cls, if use_tpu: shape = logits.get_shape().as_list() if shape[0] is None: - shape[0] = _get_batch_size(params, hparams, config) + shape[0] = params["batch_size"] if shape[1] is None: shape[1] = hparams.max_length logits.set_shape(shape) @@ -798,7 +799,8 @@ def estimator_model_fn(cls, # TRAIN mode assert mode == tf.estimator.ModeKeys.TRAIN num_async_replicas = ( - 1 if use_tpu else config.t2t_device_info["num_async_replicas"]) + 1 if (use_tpu or not config) + else config.t2t_device_info["num_async_replicas"]) return model.estimator_spec_train( loss, num_async_replicas=num_async_replicas, use_tpu=use_tpu) @@ -930,22 +932,6 @@ def _create_dummy_vars(): tf.get_variable("problem_0_steps", initializer=0, trainable=False) -def _get_batch_size(params, hparams, config): - """Batch size determined by params dict, HParams, and RunConfig.""" - # If params specifies batch size, use that. TPUEstimator passes batch size in - # params. - batch_size = params and params.get("batch_size") - - # If not set, then we're running on CPU/GPU, so use the batch size from the - # hparams, and multiply by the number of data shards. - if not batch_size: - batch_size = hparams.tpu_batch_size_per_shard - if config: - batch_size *= config.data_parallelism.n - - return batch_size - - # These metrics are implemented with py_funcs and therefore do no work with TPU TPU_METRIC_BLACKLIST = set([ metrics.Metrics.APPROX_BLEU, diff --git a/tensor2tensor/utils/trainer_utils.py b/tensor2tensor/utils/trainer_utils.py deleted file mode 100644 index a32dd446e..000000000 --- a/tensor2tensor/utils/trainer_utils.py +++ /dev/null @@ -1,341 +0,0 @@ -# coding=utf-8 -# Copyright 2017 The Tensor2Tensor Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Utilities for trainer binary.""" -# DEPRECATED - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -import sys - -# Dependency imports - -from tensor2tensor import models # pylint: disable=unused-import -from tensor2tensor.data_generators import all_problems # pylint: disable=unused-import -from tensor2tensor.utils import data_reader -from tensor2tensor.utils import decoding -from tensor2tensor.utils import devices -from tensor2tensor.utils import flags # pylint: disable=unused-import -from tensor2tensor.utils import input_fn_builder -from tensor2tensor.utils import model_builder -from tensor2tensor.utils import registry - -import tensorflow as tf -from tensorflow.contrib.learn.python.learn import learn_runner -from tensorflow.core.protobuf import rewriter_config_pb2 -from tensorflow.python import debug - -FLAGS = tf.flags.FLAGS - - -def make_experiment_fn(data_dir, model_name, train_steps, eval_steps): - """Returns experiment_fn for learn_runner. Wraps create_experiment.""" - - def experiment_fn(run_config, hparams): - return create_experiment( - data_dir, - model_name=model_name, - train_steps=train_steps, - eval_steps=eval_steps, - hparams=hparams, - run_config=run_config) - - return experiment_fn - - -def create_experiment(data_dir, model_name, train_steps, eval_steps, hparams, - run_config): - """Create Experiment.""" - estimator, input_fns = create_experiment_components( - data_dir=data_dir, - model_name=model_name, - hparams=hparams, - run_config=run_config) - - train_monitors = [] - eval_hooks = [] - if FLAGS.tfdbg: - hook = debug.LocalCLIDebugHook() - train_monitors.append(hook) - eval_hooks.append(hook) - if FLAGS.dbgprofile: - # Recorded traces can be visualized with chrome://tracing/ - # The memory/tensor lifetime is also profiled - train_monitors.append( - tf.contrib.hooks.ProfilerHook( - save_steps=10, - output_dir=run_config.model_dir, - show_dataflow=True, - show_memory=True, - )) - if FLAGS.schedule == "train_and_evaluate": - if FLAGS.local_eval_frequency: - train_monitors.append( - tf.contrib.learn.monitors.ValidationMonitor( - input_fn=input_fns[tf.estimator.ModeKeys.EVAL], - eval_steps=eval_steps, - every_n_steps=FLAGS.local_eval_frequency, - hooks=eval_hooks, - early_stopping_rounds=FLAGS.eval_early_stopping_steps, - early_stopping_metric=FLAGS.eval_early_stopping_metric, - early_stopping_metric_minimize=FLAGS. - eval_early_stopping_metric_minimize)) - - optional_kwargs = {} - if FLAGS.export_saved_model: - assert len(hparams.problem_instances) == 1 - problem = hparams.problem_instances[0] - optional_kwargs["export_strategies"] = [ - make_export_strategy(problem, hparams) - ] - - return tf.contrib.learn.Experiment( - estimator=estimator, - train_input_fn=input_fns[tf.estimator.ModeKeys.TRAIN], - eval_input_fn=input_fns[tf.estimator.ModeKeys.EVAL], - train_steps=train_steps, - eval_steps=eval_steps, - train_monitors=train_monitors, - eval_hooks=eval_hooks, - train_steps_per_iteration=FLAGS.local_eval_frequency, - eval_delay_secs=0, - **optional_kwargs) - - -def make_export_strategy(problem, hparams): - return tf.contrib.learn.make_export_strategy( - lambda: data_reader.serving_input_fn(problem, hparams), as_text=True) - - -def create_experiment_components(data_dir, model_name, hparams, run_config): - """Constructs and returns Estimator and train/eval input functions.""" - tf.logging.info("Creating experiment, storing model files in %s", - run_config.model_dir) - - add_problem_hparams(hparams, FLAGS.problems) - - # hparams batch_size is used as minibatch size instead of tokens in batch - batch_size = (hparams.use_fixed_batch_size and hparams.batch_size) or None - num_datashards = devices.data_parallelism_from_flags(hparams).n - train_input_fn = input_fn_builder.build_input_fn( - mode=tf.estimator.ModeKeys.TRAIN, - hparams=hparams, - data_dir=data_dir, - num_datashards=num_datashards, - worker_replicas=FLAGS.worker_replicas, - worker_id=FLAGS.worker_id, - batch_size=batch_size) - - eval_input_fn = input_fn_builder.build_input_fn( - mode=tf.estimator.ModeKeys.EVAL, - hparams=hparams, - data_dir=data_dir, - num_datashards=num_datashards, - worker_replicas=FLAGS.worker_replicas, - worker_id=FLAGS.worker_id, - dataset_split="test" if FLAGS.eval_use_test_set else None) - - model_fn = model_builder.build_model_fn( - model_name, - problem_names=FLAGS.problems.split("-"), - train_steps=FLAGS.train_steps, - worker_id=FLAGS.worker_id, - worker_replicas=FLAGS.worker_replicas, - eval_run_autoregressive=FLAGS.eval_run_autoregressive, - decode_hparams=decoding.decode_hparams(FLAGS.decode_hparams)) - - estimator = tf.estimator.Estimator( - model_fn=model_fn, - model_dir=run_config.model_dir, - params=hparams, - config=run_config) - - return estimator, { - tf.estimator.ModeKeys.TRAIN: train_input_fn, - tf.estimator.ModeKeys.EVAL: eval_input_fn - } - - -def log_registry(): - if FLAGS.registry_help: - tf.logging.info(registry.help_string()) - sys.exit(0) - - -# TODO(rsepassi): Rm after trainer merge - duplicated in tpu_trainer_lib -def add_problem_hparams(hparams, problems): - """Add problem hparams for the problems.""" - hparams.problems = [] - hparams.problem_instances = [] - for problem_name in problems.split("-"): - problem = registry.problem(problem_name) - p_hparams = problem.get_hparams(hparams) - - hparams.problem_instances.append(problem) - hparams.problems.append(p_hparams) - - -def save_metadata(output_dir, hparams): - """Saves FLAGS and hparams to output_dir.""" - # Save FLAGS in txt file - if hasattr(FLAGS, "flags_into_string"): - flags_str = FLAGS.flags_into_string() - t2t_flags_str = "\n".join([ - "--%s=%s" % (f.name, f.value) - for f in FLAGS.flags_by_module_dict()[ - "tensor2tensor.utils.flags"] - ]) - else: - flags_dict = FLAGS.__dict__["__flags"] - flags_str = "\n".join( - ["--%s=%s" % (name, str(f)) for (name, f) in flags_dict.items()]) - t2t_flags_str = None - - flags_txt = os.path.join(output_dir, "flags.txt") - with tf.gfile.Open(flags_txt, "w") as f: - f.write(flags_str) - - if t2t_flags_str: - t2t_flags_txt = os.path.join(output_dir, "flags_t2t.txt") - with tf.gfile.Open(t2t_flags_txt, "w") as f: - f.write(t2t_flags_str) - - # Save hparams as hparams.json - hparams_fname = os.path.join(output_dir, "hparams.json") - with tf.gfile.Open(hparams_fname, "w") as f: - f.write(hparams.to_json()) - - -def create_hparams(params_id, data_dir, passed_hparams=None): - """Returns hyperparameters, including any flag value overrides. - - If the hparams FLAG is set, then it will use any values specified in - hparams to override any individually-set hyperparameter. This logic - allows tuners to override hyperparameter settings to find optimal values. - - Args: - params_id: which set of parameters to choose (must be in _PARAMS above). - data_dir: the directory containing the training data. - passed_hparams: command-line overrides for some hparams. - - Returns: - The hyperparameters as a tf.contrib.training.HParams object. - """ - hparams = registry.hparams(params_id)() - hparams.add_hparam("data_dir", data_dir) - # Command line flags override any of the preceding hyperparameter values. - if passed_hparams: - hparams = hparams.parse(passed_hparams) - - return hparams - - -def create_run_config(output_dir): - """Create a RunConfig object.""" - - run_config = tf.contrib.learn.RunConfig( - model_dir=output_dir, - master=FLAGS.master, - gpu_memory_fraction=FLAGS.worker_gpu_memory_fraction, - session_config=session_config(), - keep_checkpoint_max=FLAGS.keep_checkpoint_max, - keep_checkpoint_every_n_hours=FLAGS.keep_checkpoint_every_n_hours, - save_checkpoints_secs=FLAGS.save_checkpoints_secs) - - return run_config - - -def run(data_dir, model, output_dir, train_steps, eval_steps, schedule): - """Runs an Estimator locally or distributed. - - Args: - data_dir: The directory the data can be found in. - model: The name of the model to use. - output_dir: The directory to store outputs in. - train_steps: The number of steps to run training for. - eval_steps: The number of steps to run evaluation for. - schedule: (str) The schedule to run. The value here must - be the name of one of Experiment's methods. - """ - exp_fn = make_experiment_fn( - data_dir=data_dir, - model_name=model, - train_steps=train_steps, - eval_steps=eval_steps) - - # Create hparams and run_config - run_config = create_run_config(output_dir) - hparams = create_hparams( - FLAGS.hparams_set, data_dir, passed_hparams=FLAGS.hparams) - - if is_chief(): - save_metadata(output_dir, hparams) - - learn_runner.run( - experiment_fn=exp_fn, - schedule=schedule, - run_config=run_config, - hparams=hparams) - - -def validate_flags(): - """Validate command line flags.""" - if not FLAGS.model: - raise ValueError("Must specify a model with --model.") - if not FLAGS.problems: - raise ValueError("Must specify a set of problems with --problems.") - if not (FLAGS.hparams_set or FLAGS.hparams_range): - raise ValueError("Must specify either --hparams_set or --hparams_range.") - if not FLAGS.schedule: - raise ValueError("Must specify --schedule.") - if not FLAGS.output_dir: - FLAGS.output_dir = "/tmp/tensor2tensor" - tf.logging.warning("It is strongly recommended to specify --output_dir. " - "Using default output_dir=%s.", FLAGS.output_dir) - if not FLAGS.data_dir: - raise ValueError("Must specify --data_dir.") - - -def is_chief(): - schedules = ["train", "train_and_evaluate"] - return FLAGS.worker_id == 0 and FLAGS.schedule in schedules - - -def session_config(): - """The TensorFlow Session config to use.""" - graph_options = tf.GraphOptions( - optimizer_options=tf.OptimizerOptions( - opt_level=tf.OptimizerOptions.L1, do_function_inlining=False)) - - if FLAGS.experimental_optimize_placement: - rewrite_options = rewriter_config_pb2.RewriterConfig() - rewrite_options.optimizers.append("pruning") - rewrite_options.optimizers.append("constfold") - rewrite_options.optimizers.append("arithmetic") - rewrite_options.optimizers.append("layout") - graph_options = tf.GraphOptions(rewrite_options=rewrite_options) - - gpu_options = tf.GPUOptions( - per_process_gpu_memory_fraction=FLAGS.worker_gpu_memory_fraction) - - config = tf.ConfigProto( - allow_soft_placement=True, - graph_options=graph_options, - gpu_options=gpu_options, - log_device_placement=FLAGS.log_device_placement) - return config diff --git a/tensor2tensor/utils/trainer_utils_test.py b/tensor2tensor/utils/trainer_utils_test.py deleted file mode 100644 index bd7367766..000000000 --- a/tensor2tensor/utils/trainer_utils_test.py +++ /dev/null @@ -1,208 +0,0 @@ -# coding=utf-8 -# Copyright 2017 The Tensor2Tensor Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Tests for trainer_utils.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -import shutil - -# Dependency imports - -from tensor2tensor.data_generators import algorithmic -from tensor2tensor.data_generators import generator_utils -from tensor2tensor.models import transformer -from tensor2tensor.utils import model_builder -from tensor2tensor.utils import registry -from tensor2tensor.utils import trainer_utils - -import tensorflow as tf - -flags = tf.flags -FLAGS = tf.flags.FLAGS - -flags.DEFINE_string("schedule", "train_and_evaluate", "") -flags.DEFINE_integer("eval_steps", 10, "Number of steps in evaluation.") -flags.DEFINE_string("master", "", "Address of TensorFlow master.") -flags.DEFINE_string("output_dir", "", "Base output directory for run.") - - -@registry.register_problem -class TinyAlgo(algorithmic.AlgorithmicIdentityBinary40): - - def generate_data(self, data_dir, _): - identity_problem = algorithmic.AlgorithmicIdentityBinary40() - generator_utils.generate_files( - identity_problem.generator(self.num_symbols, 40, 100000), - self.training_filepaths(data_dir, 1, shuffled=True), 100) - generator_utils.generate_files( - identity_problem.generator(self.num_symbols, 400, 10000), - self.dev_filepaths(data_dir, 1, shuffled=True), 100) - - -@registry.register_hparams -def transformer_test(): - hparams = transformer.transformer_base() - hparams.batch_size = 10 - hparams.hidden_size = 10 - hparams.num_hidden_layers = 1 - hparams.num_heads = 2 - hparams.max_length = 16 - return hparams - - -class TrainerUtilsTest(tf.test.TestCase): - - @classmethod - def setUpClass(cls): - tmp_dir = tf.test.get_temp_dir() - shutil.rmtree(tmp_dir) - os.mkdir(tmp_dir) - - # Generate a small test dataset - FLAGS.problems = "tiny_algo" - TrainerUtilsTest.data_dir = tmp_dir - registry.problem(FLAGS.problems).generate_data(TrainerUtilsTest.data_dir, - None) - - def testModelsImported(self): - models = registry.list_models() - self.assertTrue("lstm_seq2seq" in models) - - def testHParamsImported(self): - hparams = registry.list_hparams() - self.assertTrue("transformer_base" in hparams) - - def testSingleStep(self): - model_name = "transformer" - data_dir = TrainerUtilsTest.data_dir - hparams = trainer_utils.create_hparams("transformer_test", data_dir) - trainer_utils.add_problem_hparams(hparams, FLAGS.problems) - exp = trainer_utils.create_experiment( - data_dir=data_dir, - model_name=model_name, - train_steps=1, - eval_steps=1, - hparams=hparams, - run_config=trainer_utils.create_run_config( - output_dir=tf.test.get_temp_dir())) - exp.test() - - def testSingleEvalStepRawSession(self): - """Illustrate how to run a T2T model in a raw session.""" - - # Set model name, hparams, problems as would be set on command line. - model_name = "transformer" - FLAGS.hparams_set = "transformer_test" - FLAGS.problems = "tiny_algo" - data_dir = "/tmp" # Used only when a vocab file or such like is needed. - - # Create the problem object, hparams, placeholders, features dict. - encoders = registry.problem(FLAGS.problems).feature_encoders(data_dir) - hparams = trainer_utils.create_hparams(FLAGS.hparams_set, data_dir) - trainer_utils.add_problem_hparams(hparams, FLAGS.problems) - inputs_ph = tf.placeholder(dtype=tf.int32) # Just length dimension. - batch_inputs = tf.reshape(inputs_ph, [1, -1, 1, 1]) # Make it 4D. - # In INFER mode targets can be None. - targets_ph = tf.placeholder(dtype=tf.int32) # Just length dimension. - batch_targets = tf.reshape(targets_ph, [1, -1, 1, 1]) # Make it 4D. - features = { - "inputs": batch_inputs, - "targets": batch_targets, - "problem_choice": tf.constant(0), # We run on the first problem here. - "input_space_id": tf.constant(hparams.problems[0].input_space_id), - "target_space_id": tf.constant(hparams.problems[0].target_space_id) - } - - # Now set a mode and create the graph by invoking model_fn. - mode = tf.estimator.ModeKeys.EVAL - estimator_spec = model_builder.model_fn( - model_name, features, mode, hparams, problem_names=[FLAGS.problems]) - predictions_dict = estimator_spec.predictions - predictions = tf.squeeze( # These are not images, axis=2,3 are not needed. - predictions_dict["predictions"], - axis=[2, 3]) - - # Having the graph, let's run it on some data. - with self.test_session() as sess: - sess.run(tf.global_variables_initializer()) - inputs = "0 1 0" - targets = "0 1 0" - # Encode from raw string to numpy input array using problem encoders. - inputs_numpy = encoders["inputs"].encode(inputs) - targets_numpy = encoders["targets"].encode(targets) - # Feed the encoded inputs and targets and run session. - feed = {inputs_ph: inputs_numpy, targets_ph: targets_numpy} - np_predictions = sess.run(predictions, feed) - # Check that the result has the correct shape: batch x length x vocab_size - # where, for us, batch = 1, length = 3, vocab_size = 4. - self.assertEqual(np_predictions.shape, (1, 3, 4)) - - def testSingleTrainStepCall(self): - """Illustrate how to run a T2T model in a raw session.""" - - # Set model name, hparams, problems as would be set on command line. - model_name = "transformer" - FLAGS.hparams_set = "transformer_test" - FLAGS.problems = "tiny_algo" - data_dir = "/tmp" # Used only when a vocab file or such like is needed. - - # Create the problem object, hparams, placeholders, features dict. - encoders = registry.problem(FLAGS.problems).feature_encoders(data_dir) - hparams = trainer_utils.create_hparams(FLAGS.hparams_set, data_dir) - trainer_utils.add_problem_hparams(hparams, FLAGS.problems) - - # Now set a mode and create the model. - mode = tf.estimator.ModeKeys.TRAIN - model = registry.model(model_name)(hparams, mode) - - # Create placeholder for features and make them batch-sized. - inputs_ph = tf.placeholder(dtype=tf.int32) # Just length dimension. - batch_inputs = tf.reshape(inputs_ph, [1, -1, 1, 1]) # Make it 4D. - targets_ph = tf.placeholder(dtype=tf.int32) # Just length dimension. - batch_targets = tf.reshape(targets_ph, [1, -1, 1, 1]) # Make it 4D. - features = { - "inputs": batch_inputs, - "targets": batch_targets, - "target_space_id": tf.constant(hparams.problems[0].target_space_id) - } - - # Call the model. - predictions, _ = model(features) - nvars = len(tf.trainable_variables()) - model(features) # Call again and check that reuse works. - self.assertEqual(nvars, len(tf.trainable_variables())) - - # Having the graph, let's run it on some data. - with self.test_session() as sess: - sess.run(tf.global_variables_initializer()) - inputs = "0 1 0" - targets = "0 1 0" - # Encode from raw string to numpy input array using problem encoders. - inputs_numpy = encoders["inputs"].encode(inputs) - targets_numpy = encoders["targets"].encode(targets) - # Feed the encoded inputs and targets and run session. - feed = {inputs_ph: inputs_numpy, targets_ph: targets_numpy} - np_predictions = sess.run(predictions, feed) - # Check that the result has the correct shape: batch x length x vocab_size - # where, for us, batch = 1, length = 3, vocab_size = 4. - self.assertEqual(np_predictions.shape, (1, 3, 1, 1, 4)) - - -if __name__ == "__main__": - tf.test.main() diff --git a/tensor2tensor/visualization/TransformerVisualization.ipynb b/tensor2tensor/visualization/TransformerVisualization.ipynb index e8f114d08..f2c4f1559 100644 --- a/tensor2tensor/visualization/TransformerVisualization.ipynb +++ b/tensor2tensor/visualization/TransformerVisualization.ipynb @@ -29,9 +29,11 @@ "import tensorflow as tf\n", "import numpy as np\n", "\n", - "from tensor2tensor.utils import trainer_utils as utils\n", - "from tensor2tensor.visualization import attention\n", - "from tensor2tensor.utils import decoding" + "from tensor2tensor.tpu import tpu_trainer_lib\n", + "from tensor2tensor.utils import t2t_model\n", + "from tensor2tensor.utils import decoding\n", + "from tensor2tensor.utils import devices\n", + "from tensor2tensor.visualization import attention\n" ] }, { @@ -131,27 +133,21 @@ } ], "source": [ - "hparams = utils.create_hparams(FLAGS.hparams_set, FLAGS.data_dir)\n", + "hparams = tpu_trainer_lib.create_hparams(FLAGS.hparams_set, data_dir=FLAGS.data_dir, problem_name=PROBLEM)\n", + "hparams.use_fixed_batch_size = True\n", + "hparams.batch_size = 1\n", "\n", "# SET EXTRA HYPER PARAMS HERE!\n", "#hparams.null_slot = True\n", "\n", - "utils.add_problem_hparams(hparams, PROBLEM)\n", - "\n", - "num_datashards = utils.devices.data_parallelism_from_flags(hparams).n\n", - "\n", "mode = tf.estimator.ModeKeys.EVAL\n", "\n", - "input_fn = utils.input_fn_builder.build_input_fn(\n", + "problem = hparams.problem_instances[0]\n", + "inputs, target = problem.input_fn(\n", " mode=mode,\n", " hparams=hparams,\n", - " data_dir=DATA_DIR,\n", - " num_datashards=num_datashards,\n", - " worker_replicas=FLAGS.worker_replicas,\n", - " worker_id=FLAGS.worker_id,\n", - " batch_size=1)\n", + " data_dir=DATA_DIR)\n", "\n", - "inputs, target = input_fn()\n", "features = inputs\n", "features['targets'] = target" ] @@ -211,15 +207,12 @@ } ], "source": [ - "model_fn=utils.model_builder.build_model_fn(\n", + "decode_hparams = decoding.decode_hparams(FLAGS.decode_hparams)\n", + "model_fn = t2t_model.T2TModel.make_estimator_model_fn(\n", " MODEL,\n", - " problem_names=[PROBLEM],\n", - " train_steps=FLAGS.train_steps,\n", - " worker_id=FLAGS.worker_id,\n", - " worker_replicas=FLAGS.worker_replicas,\n", - " eval_run_autoregressive=FLAGS.eval_run_autoregressive,\n", - " decode_hparams=decoding.decode_hparams(FLAGS.decode_hparams))\n", - "est_spec = model_fn(features, target, mode, hparams)" + " hparams,\n", + " decode_hparams=decode_hparams)\n", + "est_spec = model_fn(features, target, mode)" ] }, { @@ -243,7 +236,7 @@ ], "source": [ "with tf.variable_scope(tf.get_variable_scope(), reuse=True):\n", - " beam_out = model_fn(features, target, tf.contrib.learn.ModeKeys.INFER, hparams)" + " beam_out = model_fn(features, target, tf.contrib.learn.ModeKeys.INFER)" ] }, { @@ -509,4 +502,4 @@ }, "nbformat": 4, "nbformat_minor": 2 -} \ No newline at end of file +} From 87bfac5c9773a119390a7971025e699674bb6df9 Mon Sep 17 00:00:00 2001 From: Ryan Sepassi Date: Thu, 21 Dec 2017 14:40:33 -0800 Subject: [PATCH 0676/4095] Add EarlyStoppingHook, PlateauOpHook, and MetricsBasedHook base class PiperOrigin-RevId: 179860572 --- tensor2tensor/bin/t2t-trainer | 5 +- tensor2tensor/bin/t2t_trainer.py | 5 +- tensor2tensor/tpu/tpu_trainer.py | 5 +- tensor2tensor/tpu/tpu_trainer_lib.py | 33 ++- tensor2tensor/tpu/tpu_trainer_lib_test.py | 3 +- tensor2tensor/utils/flags.py | 12 +- tensor2tensor/utils/metrics_hook.py | 291 ++++++++++++++++++++++ tensor2tensor/utils/metrics_hook_test.py | 198 +++++++++++++++ 8 files changed, 530 insertions(+), 22 deletions(-) create mode 100644 tensor2tensor/utils/metrics_hook.py create mode 100644 tensor2tensor/utils/metrics_hook_test.py diff --git a/tensor2tensor/bin/t2t-trainer b/tensor2tensor/bin/t2t-trainer index 7992e9ba9..ed89949ab 100644 --- a/tensor2tensor/bin/t2t-trainer +++ b/tensor2tensor/bin/t2t-trainer @@ -77,9 +77,6 @@ def create_hparams(): def create_experiment_fn(): - use_validation_monitor = (FLAGS.schedule in - ["train_and_evaluate", "continuous_train_and_eval"] - and FLAGS.local_eval_frequency) return tpu_trainer_lib.create_experiment_fn( model_name=FLAGS.model, problem_name=get_problem_name(), @@ -92,9 +89,9 @@ def create_experiment_fn(): decode_hparams=decoding.decode_hparams(FLAGS.decode_hparams), use_tfdbg=FLAGS.tfdbg, use_dbgprofile=FLAGS.dbgprofile, - use_validation_monitor=use_validation_monitor, eval_early_stopping_steps=FLAGS.eval_early_stopping_steps, eval_early_stopping_metric=FLAGS.eval_early_stopping_metric, + eval_early_stopping_metric_delta=FLAGS.eval_early_stopping_metric_delta, eval_early_stopping_metric_minimize=FLAGS. eval_early_stopping_metric_minimize, use_tpu=FLAGS.use_tpu) diff --git a/tensor2tensor/bin/t2t_trainer.py b/tensor2tensor/bin/t2t_trainer.py index d17ff85ea..990035ed0 100644 --- a/tensor2tensor/bin/t2t_trainer.py +++ b/tensor2tensor/bin/t2t_trainer.py @@ -76,9 +76,6 @@ def create_hparams(): def create_experiment_fn(): - use_validation_monitor = (FLAGS.schedule in - ["train_and_evaluate", "continuous_train_and_eval"] - and FLAGS.local_eval_frequency) return tpu_trainer_lib.create_experiment_fn( model_name=FLAGS.model, problem_name=get_problem_name(), @@ -91,9 +88,9 @@ def create_experiment_fn(): decode_hparams=decoding.decode_hparams(FLAGS.decode_hparams), use_tfdbg=FLAGS.tfdbg, use_dbgprofile=FLAGS.dbgprofile, - use_validation_monitor=use_validation_monitor, eval_early_stopping_steps=FLAGS.eval_early_stopping_steps, eval_early_stopping_metric=FLAGS.eval_early_stopping_metric, + eval_early_stopping_metric_delta=FLAGS.eval_early_stopping_metric_delta, eval_early_stopping_metric_minimize=FLAGS. eval_early_stopping_metric_minimize, use_tpu=FLAGS.use_tpu) diff --git a/tensor2tensor/tpu/tpu_trainer.py b/tensor2tensor/tpu/tpu_trainer.py index d17ff85ea..990035ed0 100644 --- a/tensor2tensor/tpu/tpu_trainer.py +++ b/tensor2tensor/tpu/tpu_trainer.py @@ -76,9 +76,6 @@ def create_hparams(): def create_experiment_fn(): - use_validation_monitor = (FLAGS.schedule in - ["train_and_evaluate", "continuous_train_and_eval"] - and FLAGS.local_eval_frequency) return tpu_trainer_lib.create_experiment_fn( model_name=FLAGS.model, problem_name=get_problem_name(), @@ -91,9 +88,9 @@ def create_experiment_fn(): decode_hparams=decoding.decode_hparams(FLAGS.decode_hparams), use_tfdbg=FLAGS.tfdbg, use_dbgprofile=FLAGS.dbgprofile, - use_validation_monitor=use_validation_monitor, eval_early_stopping_steps=FLAGS.eval_early_stopping_steps, eval_early_stopping_metric=FLAGS.eval_early_stopping_metric, + eval_early_stopping_metric_delta=FLAGS.eval_early_stopping_metric_delta, eval_early_stopping_metric_minimize=FLAGS. eval_early_stopping_metric_minimize, use_tpu=FLAGS.use_tpu) diff --git a/tensor2tensor/tpu/tpu_trainer_lib.py b/tensor2tensor/tpu/tpu_trainer_lib.py index 475d0f1be..be7f00351 100644 --- a/tensor2tensor/tpu/tpu_trainer_lib.py +++ b/tensor2tensor/tpu/tpu_trainer_lib.py @@ -19,10 +19,13 @@ from __future__ import division from __future__ import print_function +import os + # Dependency imports from tensor2tensor.utils import devices from tensor2tensor.utils import expert_utils +from tensor2tensor.utils import metrics_hook from tensor2tensor.utils import registry from tensor2tensor.utils import t2t_model @@ -186,7 +189,8 @@ def create_estimator(model_name, def create_hooks(use_tfdbg=False, use_dbgprofile=False, dbgprofile_kwargs=None, - use_validation_monitor=False, validation_monitor_kwargs=None): + use_validation_monitor=False, validation_monitor_kwargs=None, + use_early_stopping=False, early_stopping_kwargs=None): """Create train and eval hooks for Experiment.""" train_monitors = [] eval_hooks = [] @@ -208,6 +212,12 @@ def create_hooks(use_tfdbg=False, use_dbgprofile=False, dbgprofile_kwargs=None, tf.contrib.learn.monitors.ValidationMonitor( hooks=eval_hooks, **validation_monitor_kwargs)) + if use_early_stopping: + hook = metrics_hook.EarlyStoppingHook(**early_stopping_kwargs) + # Adding to both training and eval so that eval aborts as well + train_monitors.append(hook) + eval_hooks.append(hook) + return train_monitors, eval_hooks @@ -224,9 +234,9 @@ def create_experiment(run_config, decode_hparams=None, use_tfdbg=False, use_dbgprofile=False, - use_validation_monitor=False, eval_early_stopping_steps=None, eval_early_stopping_metric=None, + eval_early_stopping_metric_delta=None, eval_early_stopping_metric_minimize=True, use_tpu=False): """Create Experiment.""" @@ -264,12 +274,29 @@ def create_experiment(run_config, early_stopping_rounds=eval_early_stopping_steps, early_stopping_metric=eval_early_stopping_metric, early_stopping_metric_minimize=eval_early_stopping_metric_minimize) + early_stopping_kwargs = dict( + events_dir=os.path.join(run_config.model_dir, "eval_continuous"), + tag=eval_early_stopping_metric, + num_plateau_steps=eval_early_stopping_steps, + plateau_decrease=eval_early_stopping_metric_minimize, + plateau_delta=eval_early_stopping_metric_delta, + every_n_steps=min_eval_frequency) + + # In-process eval (and possible early stopping) + local_schedules = ["train_and_evaluate", "continuous_train_and_eval"] + use_validation_monitor = ( + schedule in local_schedules and min_eval_frequency) + # Distributed early stopping + use_early_stopping = ( + schedule not in local_schedules and eval_early_stopping_steps) train_monitors, eval_hooks = create_hooks( use_tfdbg=use_tfdbg, use_dbgprofile=use_dbgprofile, dbgprofile_kwargs=dbgprofile_kwargs, use_validation_monitor=use_validation_monitor, - validation_monitor_kwargs=validation_monitor_kwargs) + use_early_stopping=use_early_stopping, + validation_monitor_kwargs=validation_monitor_kwargs, + early_stopping_kwargs=early_stopping_kwargs) hooks_kwargs = {"train_monitors": train_monitors, "eval_hooks": eval_hooks} # Experiment diff --git a/tensor2tensor/tpu/tpu_trainer_lib_test.py b/tensor2tensor/tpu/tpu_trainer_lib_test.py index e8c1689c7..2a2148afd 100644 --- a/tensor2tensor/tpu/tpu_trainer_lib_test.py +++ b/tensor2tensor/tpu/tpu_trainer_lib_test.py @@ -68,7 +68,8 @@ def testExperiment(self): eval_steps=1, min_eval_frequency=1, use_tpu=False) - run_config = tpu_trainer_lib.create_run_config(num_gpus=0, use_tpu=False) + run_config = tpu_trainer_lib.create_run_config( + model_dir=self.data_dir, num_gpus=0, use_tpu=False) hparams = registry.hparams("transformer_tiny_tpu")() exp = exp_fn(run_config, hparams) exp.test() diff --git a/tensor2tensor/utils/flags.py b/tensor2tensor/utils/flags.py index f4e93a68f..410dccfe1 100644 --- a/tensor2tensor/utils/flags.py +++ b/tensor2tensor/utils/flags.py @@ -55,14 +55,14 @@ flags.DEFINE_integer("train_steps", 250000, "The number of steps to run training for.") flags.DEFINE_string("eval_early_stopping_metric", "loss", - "If --schedule=train_and_evaluate and " - "--eval_early_stopping_steps is not None, then stop when " - "--eval_early_stopping_metric has not decreased for " + "If --eval_early_stopping_steps is not None, then stop " + "when --eval_early_stopping_metric has not decreased for " "--eval_early_stopping_steps") +flags.DEFINE_float("eval_early_stopping_metric_delta", 0.1, + "Delta determining whether metric has plateaued.") flags.DEFINE_integer("eval_early_stopping_steps", None, - "If --schedule=train_and_evaluate and " - "--eval_early_stopping_steps is not None, then stop when " - "--eval_early_stopping_metric has not decreased for " + "If --eval_early_stopping_steps is not None, then stop " + "when --eval_early_stopping_metric has not decreased for " "--eval_early_stopping_steps") flags.DEFINE_bool("eval_early_stopping_metric_minimize", True, "Whether to check for the early stopping metric going down " diff --git a/tensor2tensor/utils/metrics_hook.py b/tensor2tensor/utils/metrics_hook.py new file mode 100644 index 000000000..e5cde12cc --- /dev/null +++ b/tensor2tensor/utils/metrics_hook.py @@ -0,0 +1,291 @@ +# coding=utf-8 +# Copyright 2017 The Tensor2Tensor Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Summary-based SessionRunHooks.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os + +# Dependency imports + +import tensorflow as tf + +from tensorboard.backend.event_processing import event_accumulator +from tensorboard.backend.event_processing import event_multiplexer + + +class MetricsBasedHook(tf.train.SessionRunHook): + """Base class for hooks based on summary metrics. + + Subclasses should override _process_metrics. + + If _process_metrics returns True, calls run_context.request_stop(). + + This can be used to something like "Stop after the loss has stopped decreasing + for 5000 steps. + """ + _RUN_NAME = "run%d" + + def __init__(self, events_dir, subdirs=None, tags=None, every_n_steps=1000): + """Construct MetricsBasedHook. + + Args: + events_dir: str, top-level directory containing events files. + subdirs: list, subdirectories of events_dir that also contain + events files. Use "" to specify the top-level directory. Defaults to + [""]. + tags: list, names of metrics to collect. Default will collect all + metrics. + every_n_steps: int, collect metrics every n steps. + """ + self._events_dir = events_dir + self._subdirs = subdirs or [""] + self._tags = tags + self._every_n_steps = every_n_steps + self._start_step = None + self._event_multiplexer = self._init_multiplexer() + + def _init_multiplexer(self): + dirs = [os.path.join(self._events_dir, subdir) for subdir in self._subdirs] + run_path_map = dict([(self._RUN_NAME % i, d) for i, d in enumerate(dirs)]) + return event_multiplexer.EventMultiplexer(run_path_map) + + def begin(self): + self._global_step_tensor = tf.train.get_global_step() + if self._global_step_tensor is None: + raise RuntimeError("Global step must be created to use MetricsBasedHook.") + + def after_create_session(self, session, coord): + del coord + if self._start_step is None: + self._start_step = session.run(self._global_step_tensor) + + def before_run(self, run_context): + del run_context + return tf.train.SessionRunArgs([self._global_step_tensor]) + + def after_run(self, run_context, run_values): + global_step = run_values.results[0] + if (global_step - self._start_step) % self._every_n_steps != 0: + return + metrics = self._collect_metrics() + self._after_run(run_context, run_values, global_step, metrics) + + def _after_run(self, run_context, run_values, global_step, metrics): + if self._process_metrics(global_step, metrics): + run_context.request_stop() + + def _collect_metrics(self): + self._event_multiplexer.Reload() + subdir_data = {} + for i, subdir in enumerate(self._subdirs): + subdir_metrics = {} + + accum = self._event_multiplexer.GetAccumulator(self._RUN_NAME % i) + for tag in accum.Tags()[event_accumulator.SCALARS]: + steps, vals = zip(*[ + (event.step, event.value) for event in accum.Scalars(tag)]) + subdir_metrics[tag] = (steps, vals) + + subdir_data[subdir] = subdir_metrics + return subdir_data + + def _process_metrics(self, global_step, metrics): + """Process the collected metrics. + + Args: + global_step: int, the current global step value. + metrics: dict. The collected + metrics. subdir_metrics is a dict from tag name to tuple of lists. The + lists are a list of global steps and a list of values. + i.e. subdir_metrics: + `dict global steps, list values>>>` + + Returns: + should_stop: bool. If True, will request that the session stops. + """ + return False + + +class EarlyStoppingHook(MetricsBasedHook): + """EarlyStoppingHook will stop training when a given metric has plateaued.""" + + def __init__(self, + events_dir, + tag, + num_plateau_steps=1000, + plateau_delta=0.1, + plateau_decrease=True, + every_n_steps=1000): + """Create an EarlyStoppingHook. + + This hook will stop training when the metric identified by tag has + plateaued. Plateaued is defined by the metric having stopped + increasing/decreasing (based on plateau_decrease) by plateau_delta for + num_plateau_steps. + + Args: + events_dir: Directory with events files. + tag: Name of metric in TensorBoard. + num_plateau_steps: Number of steps over which to check the plateau. + plateau_delta: delta to define a "plateau". + plateau_decrease: whether to check decrease or increase in the metric. + every_n_steps: how often to run this hook. + + Returns: + An instance of EarlyStoppingHook. + """ + super(EarlyStoppingHook, self).__init__( + events_dir=events_dir, tags=[tag], every_n_steps=every_n_steps) + self._num_plateau_steps = num_plateau_steps + self._plateau_delta = plateau_delta + self._plateau_decrease = plateau_decrease + + def _process_metrics(self, global_step, metrics): + if not metrics: + return + + if not metrics.values()[0]: + return + + # Metrics should have just a single subdir and a single tag + steps, vals = metrics.values()[0][self._tags[0]] + return has_metric_plateaued( + steps, + vals, + num_steps=self._num_plateau_steps, + delta=self._plateau_delta, + decrease=self._plateau_decrease) + + +class PlateauOpHook(MetricsBasedHook): + """Runs an op when a metric has plateaued.""" + + def __init__(self, + events_dir, + tag, + plateau_op, + num_plateau_steps=1000, + plateau_delta=0.1, + plateau_decrease=True, + every_n_steps=1000, + only_once=False): + """See EarlyStoppingHook for args. Runs plateau_op if plateaued.""" + super(PlateauOpHook, self).__init__( + events_dir=events_dir, tags=[tag], every_n_steps=every_n_steps) + self._num_plateau_steps = num_plateau_steps + self._plateau_delta = plateau_delta + self._plateau_decrease = plateau_decrease + self._plateau_op = plateau_op + self._only_once = only_once + self._should_run_op = False + self._ever_ran = False + self._last_metric_step_seen = 0 + + @property + def keep_alive(self): + if self._only_once and self._ever_ran: + return False + return True + + def before_run(self, run_context): + del run_context + + fetches = [self._global_step_tensor] + if self._should_run_op and self.keep_alive: + fetches.append(self._plateau_op) + self._should_run_op = False + self._ever_ran = True + + return tf.train.SessionRunArgs(fetches) + + def _after_run(self, run_context, run_values, global_step, metrics): + del run_context + del run_values + del global_step + + if not self.keep_alive: + return + + if not metrics: + return + + if not metrics.values()[0]: + return + + # There should be only a single subdir and a single tag + steps, vals = metrics.values()[0][self._tags[0]] + + if not steps: + return + + last_step = steps[-1] + if last_step == self._last_metric_step_seen: + return + self._last_metric_step_seen = last_step + + if has_metric_plateaued( + steps, + vals, + num_steps=self._num_plateau_steps, + delta=self._plateau_delta, + decrease=self._plateau_decrease): + self._should_run_op = True + + +def has_metric_plateaued(steps, values, num_steps=100, delta=0.1, + decrease=True): + """Check if metric has plateaued. + + A metric has plateaued if the value has not increased/decreased (depending on + `decrease`) by `delta` for at least `num_steps`. + + Args: + steps: list list of global steps for values. + values: list list of metric values. + num_steps: int, number of steps the metric has to have been plateaued for. + delta: float, how much the metric should have changed by over num_steps. + decrease: bool, whether to check if the metric has decreased by delta or + increased by delta. + + Returns: + bool, whether the metric has plateaued. + """ + assert num_steps > 0 + if len(steps) < 2: + return False + + steps_at_least_num_steps_ago = [ + s for s in steps if s <= (steps[-1] - num_steps) + ] + if not steps_at_least_num_steps_ago: + # Not enough steps yet + return False + delta_step_idx = len(steps_at_least_num_steps_ago) - 1 + + start_val = values[delta_step_idx] + values_to_check = values[delta_step_idx:] + observed_deltas = [] + for val in values_to_check: + if decrease: + observed_delta = start_val - val + else: + observed_delta = val - start_val + observed_deltas.append(observed_delta) + + within_range = [obs < delta for obs in observed_deltas] + return all(within_range) diff --git a/tensor2tensor/utils/metrics_hook_test.py b/tensor2tensor/utils/metrics_hook_test.py new file mode 100644 index 000000000..dc4468cc4 --- /dev/null +++ b/tensor2tensor/utils/metrics_hook_test.py @@ -0,0 +1,198 @@ +# coding=utf-8 +# Copyright 2017 The Tensor2Tensor Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for metrics_hook.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import contextlib +import os +import shutil + +# Dependency imports + +from tensor2tensor.utils import metrics_hook + +import tensorflow as tf + + +class DummyHook(metrics_hook.MetricsBasedHook): + + def _process_metrics(self, global_step, metrics): + if metrics: + assert "" in metrics + assert isinstance(metrics[""], dict) + if metrics[""]: + assert "global_step_1" in metrics[""] + self.test_metrics = metrics + if global_step >= 40: + return True + + +class MetricsHookTest(tf.test.TestCase): + + @classmethod + def setUpClass(cls): + cls.base_checkpoint_dir = tf.test.get_temp_dir() + shutil.rmtree(cls.base_checkpoint_dir, ignore_errors=True) + + def ckpt_dir(self, name): + return os.path.join(self.base_checkpoint_dir, name) + + @contextlib.contextmanager + def sess(self, hook, ckpt_dir): + with tf.train.MonitoredTrainingSession( + checkpoint_dir=ckpt_dir, + save_checkpoint_secs=0, + save_summaries_steps=10, + hooks=[hook]) as sess: + self._sess = sess + yield sess + + def flush(self): + self._sess._hooks[1]._summary_writer.flush() + + def testStop(self): + global_step = tf.train.create_global_step() + tf.summary.scalar("global_step", global_step) + incr_global_step = tf.assign_add(global_step, 1) + + ckpt_dir = self.ckpt_dir("stop") + dummy = DummyHook(ckpt_dir, every_n_steps=10) + with self.sess(dummy, ckpt_dir) as sess: + for _ in xrange(20): + sess.run(incr_global_step) + + # Summary files should now have 2 global step values in them + self.flush() + + # Run for 10 more so that the hook gets triggered again + for _ in xrange(10): + sess.run(incr_global_step) + + # Check that the metrics have actually been collected. + self.assertTrue("" in dummy.test_metrics) + metrics = dummy.test_metrics[""] + self.assertTrue("global_step_1" in metrics) + steps, vals = metrics["global_step_1"] + self.assertTrue(len(steps) == len(vals)) + self.assertTrue(len(steps) >= 2) + + # Run for 10 more so that the hook triggers stoppage + for _ in xrange(10): + sess.run(incr_global_step) + + with self.assertRaisesRegexp(RuntimeError, "after should_stop requested"): + sess.run(incr_global_step) + + def testEarlyStoppingHook(self): + global_step = tf.train.create_global_step() + counter = tf.get_variable("count", initializer=0, dtype=tf.int32) + tf.summary.scalar("count", counter) + incr_global_step = tf.assign_add(global_step, 1) + incr_counter = tf.assign_add(counter, 1) + + # Stop if the global step has not gone up by more than 1 in 20 steps. + + ckpt_dir = self.ckpt_dir("early") + stop_hook = metrics_hook.EarlyStoppingHook( + ckpt_dir, + "count_1", + num_plateau_steps=20, + plateau_delta=1., + plateau_decrease=False, + every_n_steps=10) + with self.sess(stop_hook, ckpt_dir) as sess: + for _ in xrange(20): + sess.run((incr_global_step, incr_counter)) + + # Summary files should now have 2 values in them + self.flush() + + # Run for more steps so that the hook gets triggered and we verify that we + # don't stop. + for _ in xrange(30): + sess.run((incr_global_step, incr_counter)) + + self.flush() + + # Run without incrementing the counter + for _ in xrange(40): + sess.run(incr_global_step) + + # Metrics should be written such that now the counter has gone >20 steps + # without being incremented. + self.flush() + + # Check that we ask for stop + with self.assertRaisesRegexp(RuntimeError, "after should_stop requested"): + for _ in xrange(30): + sess.run(incr_global_step) + + def testPlateauOpHook(self): + global_step = tf.train.create_global_step() + counter = tf.get_variable("count", initializer=0, dtype=tf.int32) + indicator = tf.get_variable("indicator", initializer=0, dtype=tf.int32) + tf.summary.scalar("count", counter) + incr_global_step = tf.assign_add(global_step, 1) + incr_counter = tf.assign_add(counter, 1) + incr_indicator = tf.assign_add(indicator, 1) + + # Stop if the global step has not gone up by more than 1 in 20 steps. + + ckpt_dir = self.ckpt_dir("plateauop") + stop_hook = metrics_hook.PlateauOpHook( + ckpt_dir, + "count_1", + incr_indicator, + num_plateau_steps=20, + plateau_delta=1., + plateau_decrease=False, + every_n_steps=10) + with self.sess(stop_hook, ckpt_dir) as sess: + for _ in xrange(20): + sess.run((incr_global_step, incr_counter)) + + # Summary files should now have 2 values in them + self.flush() + + # Run for more steps so that the hook gets triggered and we verify that we + # don't stop. + for _ in xrange(30): + sess.run((incr_global_step, incr_counter)) + + self.flush() + + # Run without incrementing the counter + for _ in xrange(30): + sess.run(incr_global_step) + self.flush() + + self.assertTrue(sess.run(indicator) < 1) + + # Metrics should be written such that now the counter has gone >20 steps + # without being incremented. + # Check that we run the incr_indicator op several times + for _ in xrange(3): + for _ in xrange(10): + sess.run(incr_global_step) + self.flush() + + self.assertTrue(sess.run(indicator) > 1) + +if __name__ == "__main__": + tf.test.main() From 45a4b88bdab90574929d25ef0a8bd0dda3481eb2 Mon Sep 17 00:00:00 2001 From: Ryan Sepassi Date: Thu, 21 Dec 2017 16:12:54 -0800 Subject: [PATCH 0677/4095] Fix colab notebook PiperOrigin-RevId: 179871302 --- tensor2tensor/notebooks/hello_t2t.ipynb | 52 +------------------------ 1 file changed, 2 insertions(+), 50 deletions(-) diff --git a/tensor2tensor/notebooks/hello_t2t.ipynb b/tensor2tensor/notebooks/hello_t2t.ipynb index 1ff6b1d2b..5b58b042b 100644 --- a/tensor2tensor/notebooks/hello_t2t.ipynb +++ b/tensor2tensor/notebooks/hello_t2t.ipynb @@ -85,6 +85,7 @@ "import os\n", "import collections\n", "\n", + "from tensor2tensor import models\n", "from tensor2tensor import problems\n", "from tensor2tensor.layers import common_layers\n", "from tensor2tensor.tpu import tpu_trainer_lib\n", @@ -1540,55 +1541,6 @@ } ] }, - { - "metadata": { - "id": "a2cL8UwLaSYG", - "colab_type": "code", - "colab": { - "autoexec": { - "startup": false, - "wait_interval": 0 - } - } - }, - "source": [ - "# This will eventually be available at\n", - "# tensor2tensor.metrics.create_eager_metrics\n", - "def create_eager_metrics(metric_names):\n", - " \"\"\"Create metrics accumulators and averager for Eager mode.\n", - "\n", - " Args:\n", - " metric_names: list from tensor2tensor.metrics.Metrics\n", - "\n", - " Returns:\n", - " (accum_fn(predictions, targets) => None,\n", - " result_fn() => dict\n", - " \"\"\"\n", - " metric_fns = dict(\n", - " [(name, metrics.METRICS_FNS[name]) for name in metric_names])\n", - " tfe_metrics = dict()\n", - "\n", - " for name in metric_names:\n", - " tfe_metrics[name] = tfe.metrics.Mean(name=name)\n", - "\n", - " def metric_accum(predictions, targets):\n", - " for name, metric_fn in metric_fns.items():\n", - " val, weight = metric_fn(predictions, targets,\n", - " weights_fn=common_layers.weights_all)\n", - " tfe_metrics[name](np.squeeze(val), np.squeeze(weight))\n", - "\n", - " def metric_means():\n", - " avgs = {}\n", - " for name in metric_names:\n", - " avgs[name] = tfe_metrics[name].result().numpy()\n", - " return avgs\n", - "\n", - " return metric_accum, metric_means" - ], - "cell_type": "code", - "execution_count": 0, - "outputs": [] - }, { "metadata": { "id": "CIFlkiVOd8jO", @@ -1625,7 +1577,7 @@ "\n", "# Create eval metric accumulators for accuracy (ACC) and accuracy in\n", "# top 5 (ACC_TOP5)\n", - "metrics_accum, metrics_result = create_eager_metrics(\n", + "metrics_accum, metrics_result = metrics.create_eager_metrics(\n", " [metrics.Metrics.ACC, metrics.Metrics.ACC_TOP5])\n", "\n", "for count, example in enumerate(tfe.Iterator(mnist_eval_dataset)):\n", From b10286edfd366e68b12dac8eaf1a7e26305a683e Mon Sep 17 00:00:00 2001 From: Ryan Sepassi Date: Thu, 21 Dec 2017 18:13:56 -0800 Subject: [PATCH 0678/4095] Pad eval batch to enable multi-device eval; skip T2TModel.top if T2TModel.body returns training loss PiperOrigin-RevId: 179882031 --- setup.py | 2 +- tensor2tensor/bin/t2t-trainer | 6 ++++- tensor2tensor/bin/t2t_trainer.py | 6 ++++- tensor2tensor/data_generators/problem.py | 33 ++++++++++++++++++++++++ tensor2tensor/tpu/tpu_trainer.py | 6 ++++- tensor2tensor/utils/t2t_model.py | 8 ++++-- 6 files changed, 55 insertions(+), 6 deletions(-) diff --git a/setup.py b/setup.py index 01ef5e550..0ae11d780 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ setup( name='tensor2tensor', - version='1.4.0', + version='1.4.1', description='Tensor2Tensor', author='Google Inc.', author_email='no-reply@google.com', diff --git a/tensor2tensor/bin/t2t-trainer b/tensor2tensor/bin/t2t-trainer index ed89949ab..9e2ca39b9 100644 --- a/tensor2tensor/bin/t2t-trainer +++ b/tensor2tensor/bin/t2t-trainer @@ -61,7 +61,11 @@ try: flags.DEFINE_string("output_dir", "", "Base output directory for run.") flags.DEFINE_string("schedule", "continuous_train_and_eval", "Method of Experiment to run.") - flags.DEFINE_integer("eval_steps", 200, "Number of steps in evaluation.") + flags.DEFINE_integer("eval_steps", 10000, + "Number of steps in evaluation. By default, eval will " + "stop after eval_steps or when it runs through the eval " + "dataset once in full, whichever comes first, so this " + "can be a very large number.") except: # pylint: disable=bare-except pass diff --git a/tensor2tensor/bin/t2t_trainer.py b/tensor2tensor/bin/t2t_trainer.py index 990035ed0..792403062 100644 --- a/tensor2tensor/bin/t2t_trainer.py +++ b/tensor2tensor/bin/t2t_trainer.py @@ -60,7 +60,11 @@ flags.DEFINE_string("output_dir", "", "Base output directory for run.") flags.DEFINE_string("schedule", "continuous_train_and_eval", "Method of Experiment to run.") - flags.DEFINE_integer("eval_steps", 200, "Number of steps in evaluation.") + flags.DEFINE_integer("eval_steps", 10000, + "Number of steps in evaluation. By default, eval will " + "stop after eval_steps or when it runs through the eval " + "dataset once in full, whichever comes first, so this " + "can be a very large number.") except: # pylint: disable=bare-except pass diff --git a/tensor2tensor/data_generators/problem.py b/tensor2tensor/data_generators/problem.py index e944f15ab..aa1c894db 100644 --- a/tensor2tensor/data_generators/problem.py +++ b/tensor2tensor/data_generators/problem.py @@ -576,6 +576,19 @@ def define_shapes(example): batching_scheme["boundaries"], batching_scheme["batch_sizes"]) + if not is_training: + def _pad_batch(features): + if not config or config.data_parallelism.n <= 1: + return features + tf.logging.warn( + "Padding the batch to ensure that remainder eval batches have " + "a batch size divisible by the number of data shards. This may " + "lead to incorrect metrics for non-zero-padded features, e.g. " + "images. Use a single datashard (i.e. 1 GPU) in that case.") + return pad_batch(features, config.data_parallelism.n) + + dataset = dataset.map(_pad_batch, num_parallel_calls=num_threads) + dataset = dataset.map(define_shapes, num_parallel_calls=num_threads) dataset = dataset.prefetch(1) features = dataset.make_one_shot_iterator().get_next() @@ -930,3 +943,23 @@ def standardize_shapes(features, batch_size=None): t.get_shape().assert_is_fully_defined() return features + + +def pad_batch(features, batch_multiple): + """Pad batch dim of features to nearest multiple of batch_multiple.""" + feature = features.items()[0][1] + batch_size = tf.shape(feature)[0] + mod = batch_size % batch_multiple + has_mod = tf.cast(tf.cast(mod, tf.bool), tf.int32) + batch_padding = batch_multiple * has_mod - mod + + padded_features = {} + for k, feature in features.items(): + rank = len(feature.shape) + paddings = [] + for _ in range(rank): + paddings.append([0, 0]) + paddings[0][1] = batch_padding + padded_feature = tf.pad(feature, paddings) + padded_features[k] = padded_feature + return padded_features diff --git a/tensor2tensor/tpu/tpu_trainer.py b/tensor2tensor/tpu/tpu_trainer.py index 990035ed0..792403062 100644 --- a/tensor2tensor/tpu/tpu_trainer.py +++ b/tensor2tensor/tpu/tpu_trainer.py @@ -60,7 +60,11 @@ flags.DEFINE_string("output_dir", "", "Base output directory for run.") flags.DEFINE_string("schedule", "continuous_train_and_eval", "Method of Experiment to run.") - flags.DEFINE_integer("eval_steps", 200, "Number of steps in evaluation.") + flags.DEFINE_integer("eval_steps", 10000, + "Number of steps in evaluation. By default, eval will " + "stop after eval_steps or when it runs through the eval " + "dataset once in full, whichever comes first, so this " + "can be a very large number.") except: # pylint: disable=bare-except pass diff --git a/tensor2tensor/utils/t2t_model.py b/tensor2tensor/utils/t2t_model.py index 26854de13..630011541 100644 --- a/tensor2tensor/utils/t2t_model.py +++ b/tensor2tensor/utils/t2t_model.py @@ -139,13 +139,15 @@ def model_fn_sharded(self, sharded_features): body_out = self.body_sharded( self._to_single_features_dict(transformed_features)) body_out, losses = self._normalize_body_output(body_out) - sharded_logits = dp(self.top, body_out, datashard_to_features) if "training" not in losses: + sharded_logits = dp(self.top, body_out, datashard_to_features) sharded_losses = dp(self.loss, sharded_logits, datashard_to_features) training_loss_dict = average_sharded_losses([{ "training": loss } for loss in sharded_losses]) losses.update(training_loss_dict) + else: + sharded_logits = body_out else: sharded_logits, sharded_losses = dp(self.model_fn, datashard_to_features) losses = average_sharded_losses(sharded_losses) @@ -172,9 +174,11 @@ def model_fn(self, features): body_out = self.body(transformed_features) output, losses = self._normalize_body_output(body_out) - logits = self.top(output, features) if "training" not in losses: + logits = self.top(output, features) losses["training"] = self.loss(logits, features) + else: + logits = output return logits, losses def bottom(self, features): From 83e5949a6c9502623a9ab35c4cb62ad681e23e7f Mon Sep 17 00:00:00 2001 From: Ryan Sepassi Date: Thu, 21 Dec 2017 18:28:29 -0800 Subject: [PATCH 0679/4095] Rm xrange usage from metrics_hook_test PiperOrigin-RevId: 179882966 --- tensor2tensor/utils/metrics_hook_test.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/tensor2tensor/utils/metrics_hook_test.py b/tensor2tensor/utils/metrics_hook_test.py index dc4468cc4..67c78eb2d 100644 --- a/tensor2tensor/utils/metrics_hook_test.py +++ b/tensor2tensor/utils/metrics_hook_test.py @@ -74,14 +74,14 @@ def testStop(self): ckpt_dir = self.ckpt_dir("stop") dummy = DummyHook(ckpt_dir, every_n_steps=10) with self.sess(dummy, ckpt_dir) as sess: - for _ in xrange(20): + for _ in range(20): sess.run(incr_global_step) # Summary files should now have 2 global step values in them self.flush() # Run for 10 more so that the hook gets triggered again - for _ in xrange(10): + for _ in range(10): sess.run(incr_global_step) # Check that the metrics have actually been collected. @@ -93,7 +93,7 @@ def testStop(self): self.assertTrue(len(steps) >= 2) # Run for 10 more so that the hook triggers stoppage - for _ in xrange(10): + for _ in range(10): sess.run(incr_global_step) with self.assertRaisesRegexp(RuntimeError, "after should_stop requested"): @@ -117,7 +117,7 @@ def testEarlyStoppingHook(self): plateau_decrease=False, every_n_steps=10) with self.sess(stop_hook, ckpt_dir) as sess: - for _ in xrange(20): + for _ in range(20): sess.run((incr_global_step, incr_counter)) # Summary files should now have 2 values in them @@ -125,13 +125,13 @@ def testEarlyStoppingHook(self): # Run for more steps so that the hook gets triggered and we verify that we # don't stop. - for _ in xrange(30): + for _ in range(30): sess.run((incr_global_step, incr_counter)) self.flush() # Run without incrementing the counter - for _ in xrange(40): + for _ in range(40): sess.run(incr_global_step) # Metrics should be written such that now the counter has gone >20 steps @@ -140,7 +140,7 @@ def testEarlyStoppingHook(self): # Check that we ask for stop with self.assertRaisesRegexp(RuntimeError, "after should_stop requested"): - for _ in xrange(30): + for _ in range(30): sess.run(incr_global_step) def testPlateauOpHook(self): @@ -164,7 +164,7 @@ def testPlateauOpHook(self): plateau_decrease=False, every_n_steps=10) with self.sess(stop_hook, ckpt_dir) as sess: - for _ in xrange(20): + for _ in range(20): sess.run((incr_global_step, incr_counter)) # Summary files should now have 2 values in them @@ -172,13 +172,13 @@ def testPlateauOpHook(self): # Run for more steps so that the hook gets triggered and we verify that we # don't stop. - for _ in xrange(30): + for _ in range(30): sess.run((incr_global_step, incr_counter)) self.flush() # Run without incrementing the counter - for _ in xrange(30): + for _ in range(30): sess.run(incr_global_step) self.flush() @@ -187,8 +187,8 @@ def testPlateauOpHook(self): # Metrics should be written such that now the counter has gone >20 steps # without being incremented. # Check that we run the incr_indicator op several times - for _ in xrange(3): - for _ in xrange(10): + for _ in range(3): + for _ in range(10): sess.run(incr_global_step) self.flush() From f2b620f7bd3266e911b75690e504c4146b2d2fdf Mon Sep 17 00:00:00 2001 From: Ryan Sepassi Date: Thu, 21 Dec 2017 19:38:07 -0800 Subject: [PATCH 0680/4095] python3 fix to metrics_hook_test PiperOrigin-RevId: 179886783 --- tensor2tensor/utils/metrics_hook.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tensor2tensor/utils/metrics_hook.py b/tensor2tensor/utils/metrics_hook.py index e5cde12cc..964139a42 100644 --- a/tensor2tensor/utils/metrics_hook.py +++ b/tensor2tensor/utils/metrics_hook.py @@ -159,11 +159,11 @@ def _process_metrics(self, global_step, metrics): if not metrics: return - if not metrics.values()[0]: + if not list(metrics.values())[0]: return # Metrics should have just a single subdir and a single tag - steps, vals = metrics.values()[0][self._tags[0]] + steps, vals = list(metrics.values())[0][self._tags[0]] return has_metric_plateaued( steps, vals, @@ -224,11 +224,11 @@ def _after_run(self, run_context, run_values, global_step, metrics): if not metrics: return - if not metrics.values()[0]: + if not list(metrics.values())[0]: return # There should be only a single subdir and a single tag - steps, vals = metrics.values()[0][self._tags[0]] + steps, vals = list(metrics.values())[0][self._tags[0]] if not steps: return From ee947c95b45ac7048b9fd802ffbcf9a7a65cf165 Mon Sep 17 00:00:00 2001 From: T2T Team Date: Fri, 22 Dec 2017 08:20:04 -0800 Subject: [PATCH 0681/4095] Fix transformer's encode docstring. PiperOrigin-RevId: 179928442 --- tensor2tensor/models/transformer.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tensor2tensor/models/transformer.py b/tensor2tensor/models/transformer.py index e9c272d7c..de812b64b 100644 --- a/tensor2tensor/models/transformer.py +++ b/tensor2tensor/models/transformer.py @@ -53,7 +53,8 @@ def encode(self, inputs, target_space, hparams, features=None): """Encode transformer inputs. Args: - inputs: Transformer inputs [batch_size, input_length, hidden_dim] + inputs: Transformer inputs [batch_size, input_length, input_height, + hidden_dim] which will be flattened along the two spatial dimensions. target_space: scalar, target space ID. hparams: hyperparmeters for model. features: optionally pass the entire features dictionary as well. From 2a07e8f2e79316b3f10a1b9b8a2e487af2cbeec9 Mon Sep 17 00:00:00 2001 From: T2T Team Date: Fri, 22 Dec 2017 09:01:56 -0800 Subject: [PATCH 0682/4095] Factor out common audio feature extraction and apply it to Librispeech dataset. PiperOrigin-RevId: 179931584 --- setup.py | 1 + tensor2tensor/data_generators/librispeech.py | 203 ++--------- .../data_generators/speech_recognition.py | 332 ++++++++++++++++++ 3 files changed, 367 insertions(+), 169 deletions(-) create mode 100644 tensor2tensor/data_generators/speech_recognition.py diff --git a/setup.py b/setup.py index 0ae11d780..fb2b6492d 100644 --- a/setup.py +++ b/setup.py @@ -30,6 +30,7 @@ 'gym', 'numpy', 'requests', + 'scipy', 'sympy', 'six', ], diff --git a/tensor2tensor/data_generators/librispeech.py b/tensor2tensor/data_generators/librispeech.py index d6a07a391..ad8e931d8 100644 --- a/tensor2tensor/data_generators/librispeech.py +++ b/tensor2tensor/data_generators/librispeech.py @@ -16,23 +16,14 @@ """Librispeech dataset.""" import os -from subprocess import call import tarfile -import wave # Dependency imports -import numpy as np - from tensor2tensor.data_generators import generator_utils -from tensor2tensor.data_generators import problem -from tensor2tensor.data_generators import text_encoder -from tensor2tensor.layers import common_layers -from tensor2tensor.utils import modality +from tensor2tensor.data_generators import speech_recognition from tensor2tensor.utils import registry -import tensorflow as tf - _LIBRISPEECH_TRAIN_DATASETS = [ [ @@ -86,130 +77,13 @@ def _collect_data(directory, input_ext, transcription_ext): return data_files -def _get_audio_data(filepath): - # Construct a true .wav file. - out_filepath = filepath.strip(".flac") + ".wav" - # Assumes sox is installed on system. Sox converts from FLAC to WAV. - call(["sox", filepath, out_filepath]) - wav_file = wave.open(open(out_filepath)) - frame_count = wav_file.getnframes() - byte_array = wav_file.readframes(frame_count) - - data = np.fromstring(byte_array, np.uint8).tolist() - return data, frame_count, wav_file.getsampwidth(), wav_file.getnchannels() - - -class LibrispeechTextEncoder(text_encoder.TextEncoder): - - def encode(self, s): - return [self._num_reserved_ids + ord(c) for c in s] - - def decode(self, ids): - """Transform a sequence of int ids into a human-readable string. - - EOS is not expected in ids. - - Args: - ids: list of integers to be converted. - Returns: - s: human-readable string. - """ - decoded_ids = [] - for id_ in ids: - if 0 <= id_ < self._num_reserved_ids: - decoded_ids.append(text_encoder.RESERVED_TOKENS[int(id_)]) - else: - decoded_ids.append(id_ - self._num_reserved_ids) - return "".join([chr(d) for d in decoded_ids]) - - -@registry.register_audio_modality -class LibrispeechModality(modality.Modality): - """Performs strided conv compressions for audio spectral data.""" - - def bottom(self, inputs): - """Transform input from data space to model space. - - Args: - inputs: A Tensor with shape [batch, ...] - Returns: - body_input: A Tensor with shape [batch, ?, ?, body_input_depth]. - """ - with tf.variable_scope(self.name): - # TODO(aidangomez): Will need to sort out a better audio pipeline - def xnet_resblock(x, filters, res_relu, name): - with tf.variable_scope(name): - # We only stride along the length dimension to preserve the spectral - # bins (which are tiny in dimensionality relative to length) - y = common_layers.separable_conv_block( - x, - filters, [((1, 1), (3, 3)), ((1, 1), (3, 3))], - first_relu=True, - padding="SAME", - force2d=True, - name="sep_conv_block") - y = common_layers.pool(y, (3, 3), "MAX", "SAME", strides=(2, 1)) - return y + common_layers.conv_block( - x, - filters, [((1, 1), (1, 1))], - padding="SAME", - strides=(2, 1), - first_relu=res_relu, - force2d=True, - name="res_conv0") - - # Rescale from UINT8 to floats in [-1,-1] - signals = (tf.to_float(inputs)-127)/128. - signals = tf.squeeze(signals, [2, 3]) - - # `stfts` is a complex64 Tensor representing the short-time Fourier - # Transform of each signal in `signals`. Its shape is - # [batch_size, ?, fft_unique_bins] - # where fft_unique_bins = fft_length // 2 + 1 = 513. - stfts = tf.contrib.signal.stft(signals, frame_length=1024, frame_step=512, - fft_length=1024) - - # An energy spectrogram is the magnitude of the complex-valued STFT. - # A float32 Tensor of shape [batch_size, ?, 513]. - magnitude_spectrograms = tf.abs(stfts) - - # Warp the linear-scale, magnitude spectrograms into the mel-scale. - num_spectrogram_bins = magnitude_spectrograms.shape[-1].value - lower_edge_hertz, upper_edge_hertz, num_mel_bins = 80.0, 7600.0, 64 - sample_rate = 16000 - linear_to_mel_weight_matrix = ( - tf.contrib.signal.linear_to_mel_weight_matrix( - num_mel_bins, num_spectrogram_bins, sample_rate, lower_edge_hertz, - upper_edge_hertz)) - mel_spectrograms = tf.tensordot( - magnitude_spectrograms, linear_to_mel_weight_matrix, 1) - # Note: Shape inference for tensordot does not currently handle this case. - mel_spectrograms.set_shape(magnitude_spectrograms.shape[:-1].concatenate( - linear_to_mel_weight_matrix.shape[-1:])) - - x = tf.expand_dims(mel_spectrograms, 2) - x.set_shape([None, None, None, num_mel_bins]) - for i in xrange(self._model_hparams.audio_compression): - x = xnet_resblock(x, 2**(i + 1), True, "compress_block_%d" % i) - return xnet_resblock(x, self._body_input_depth, False, - "compress_block_final") - - @registry.register_problem() -class Librispeech(problem.Problem): - """Problem spec for English word to dictionary definition.""" +class Librispeech(speech_recognition.SpeechRecognitionProblem): + """Problem spec for Librispeech using clean and noisy data.""" - @property - def is_character_level(self): - return True - - @property - def input_space_id(self): - return problem.SpaceID.AUDIO_SPECTRAL - - @property - def target_space_id(self): - return problem.SpaceID.EN_CHR + # Select only the clean data + TRAIN_DATASETS = _LIBRISPEECH_TRAIN_DATASETS + DEV_DATASETS = _LIBRISPEECH_TEST_DATASETS @property def num_shards(self): @@ -228,26 +102,8 @@ def use_train_shards_for_dev(self): """If true, we only generate training data and hold out shards for dev.""" return False - def feature_encoders(self, _): - return { - "inputs": text_encoder.TextEncoder(), - "targets": LibrispeechTextEncoder(), - } - - def example_reading_spec(self): - data_fields = { - "inputs": tf.VarLenFeature(tf.int64), - "targets": tf.VarLenFeature(tf.int64), - } - data_items_to_decoders = None - return (data_fields, data_items_to_decoders) - - def generator(self, data_dir, tmp_dir, training, + def generator(self, data_dir, tmp_dir, datasets, eos_list=None, start_from=0, how_many=0): - eos_list = [1] if eos_list is None else eos_list - datasets = (_LIBRISPEECH_TRAIN_DATASETS if training - else _LIBRISPEECH_TEST_DATASETS) - num_reserved_ids = self.feature_encoders(None)["targets"].num_reserved_ids i = 0 for url, subdir in datasets: filename = os.path.basename(url) @@ -267,19 +123,18 @@ def generator(self, data_dir, tmp_dir, training, data_dir = os.path.join(tmp_dir, "LibriSpeech", subdir) data_files = _collect_data(data_dir, "flac", "txt") data_pairs = data_files.values() + + encoders = self.feature_encoders(None) + audio_encoder = encoders["waveforms"] + text_encoder = encoders["targets"] + for media_file, text_data in sorted(data_pairs)[start_from:]: if how_many > 0 and i == how_many: return i += 1 - audio_data, sample_count, sample_width, num_channels = _get_audio_data( - media_file) - label = [num_reserved_ids + ord(c) for c in text_data] + eos_list yield { - "inputs": audio_data, - "audio/channel_count": [num_channels], - "audio/sample_count": [sample_count], - "audio/sample_width": [sample_width], - "targets": label + "waveforms": audio_encoder.encode(media_file), + "targets": text_encoder.encode(text_data) } def generate_data(self, data_dir, tmp_dir, task_id=-1): @@ -287,24 +142,34 @@ def generate_data(self, data_dir, tmp_dir, task_id=-1): data_dir, self.num_shards, shuffled=False) dev_paths = self.dev_filepaths( data_dir, self.num_dev_shards, shuffled=False) + if self.use_train_shards_for_dev: all_paths = train_paths + dev_paths generator_utils.generate_files( - self.generator(data_dir, tmp_dir, True), all_paths) + self.generator(data_dir, tmp_dir, self.TRAIN_DATASETS), all_paths) generator_utils.shuffle_dataset(all_paths) else: generator_utils.generate_dataset_and_shuffle( - self.generator(data_dir, tmp_dir, True), train_paths, - self.generator(data_dir, tmp_dir, False), dev_paths) + self.generator(data_dir, tmp_dir, self.TRAIN_DATASETS), train_paths, + self.generator(data_dir, tmp_dir, self.DEV_DATASETS), dev_paths) - def hparams(self, defaults, unused_model_hparams): - p = defaults - p.stop_at_eos = int(False) - p.input_modality = {"inputs": ("audio:librispeech_modality", None)} - p.target_modality = (registry.Modalities.SYMBOL, 256) - def preprocess_example(self, example, mode, hparams): - return example +@registry.register_problem() +class LibrispeechCleanSmall(Librispeech): + """Problem spec for Librispeech using 100h clean train data.""" + + # Select only the clean data + TRAIN_DATASETS = _LIBRISPEECH_TRAIN_DATASETS[:1] + DEV_DATASETS = _LIBRISPEECH_TEST_DATASETS[:1] + + +@registry.register_problem() +class LibrispeechClean(Librispeech): + """Problem spec for Librispeech using 460h clean train data.""" + + # Select only the clean data + TRAIN_DATASETS = _LIBRISPEECH_TRAIN_DATASETS[:2] + DEV_DATASETS = _LIBRISPEECH_TEST_DATASETS[:1] # TODO(lukaszkaiser): clean up hparams or remove from here. diff --git a/tensor2tensor/data_generators/speech_recognition.py b/tensor2tensor/data_generators/speech_recognition.py new file mode 100644 index 000000000..c54878045 --- /dev/null +++ b/tensor2tensor/data_generators/speech_recognition.py @@ -0,0 +1,332 @@ +# coding=utf-8 +# Copyright 2017 The Tensor2Tensor Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Common classes for automatic speech recogntion (ASR) datasets. + +The audio import uses sox to generate normalized waveforms, please install +it as appropriate (e.g. using apt-get or yum). +""" + +import functools +import os +from subprocess import call +import tempfile + +# Dependency imports + +import numpy as np +from scipy.io import wavfile +import scipy.signal + +from tensor2tensor.data_generators import problem +from tensor2tensor.data_generators import text_encoder +from tensor2tensor.layers import common_layers +from tensor2tensor.utils import modality +from tensor2tensor.utils import registry + +import tensorflow as tf + + +# +# ASR Feature pipeline in TF. +# +def add_delta_deltas(filterbanks, name=None): + """Compute time first and second-order derivative channels. + + Args: + filterbanks: float32 tensor with shape [batch_size, len, num_bins, 1] + name: scope name + + Returns: + float32 tensor with shape [batch_size, len, num_bins, 3] + """ + delta_filter = np.array([2, 1, 0, -1, -2]) + delta_delta_filter = scipy.signal.convolve(delta_filter, delta_filter, "full") + + delta_filter_stack = np.array( + [[0] * 4 + [1] + [0] * 4, [0] * 2 + list(delta_filter) + [0] * 2, + list(delta_delta_filter)], + dtype=np.float32).T[:, None, None, :] + + delta_filter_stack /= np.sqrt( + np.sum(delta_filter_stack**2, axis=0, keepdims=True)) + + filterbanks = tf.nn.conv2d( + filterbanks, delta_filter_stack, [1, 1, 1, 1], "SAME", data_format="NHWC", + name=name) + return filterbanks + + +def compute_mel_filterbank_features( + waveforms, + sample_rate=16000, dither=1.0 / np.iinfo(np.int16).max, preemphasis=0.97, + frame_length=25, frame_step=10, fft_length=None, + window_fn=functools.partial(tf.contrib.signal.hann_window, periodic=True), + lower_edge_hertz=80.0, upper_edge_hertz=7600.0, num_mel_bins=80, + log_noise_floor=1e-3): + """Implement mel-filterbank extraction using tf ops. + + Args: + waveforms: float32 tensor with shape [batch_size, max_len] + sample_rate: sampling rate of the waveform + dither: stddev of Gaussian noise added to waveform to prevent quantization + artefacts + preemphasis: waveform high-pass filtering costant + frame_length: frame length in ms + frame_step: frame_Step in ms + fft_length: number of fft bins + window_fn: windowing function + lower_edge_hertz: lowest frequency of the filterbank + upper_edge_hertz: highest frequency of the filterbank + num_mel_bins: filterbank size + log_noise_floor: clip small values to prevent numeric overflow in log + Returns: + tuple of (filterbanks, filterbank_lens) where: + filterbanks are float32 tensor with shape [batch_size, len, num_bins, 1] + filterbank_lens are int64 tensor with shape [batch_size] + """ + # `stfts` is a complex64 Tensor representing the short-time Fourier + # Transform of each signal in `signals`. Its shape is + # [batch_size, ?, fft_unique_bins] + # where fft_unique_bins = fft_length // 2 + 1 + if dither > 0: + waveforms += tf.random_normal(tf.shape(waveforms), stddev=dither) + if preemphasis > 0: + waveforms = waveforms[:, 1:] - preemphasis * waveforms[:, :-1] + frame_length = int(frame_length * sample_rate / 1e3) + frame_step = int(frame_step * sample_rate / 1e3) + if fft_length is None: + fft_length = int(2**(np.ceil(np.log2(frame_length)))) + stfts = tf.contrib.signal.stft( + waveforms, + frame_length=frame_length, + frame_step=frame_step, + fft_length=fft_length, + window_fn=window_fn, + pad_end=True) + + # An energy spectrogram is the magnitude of the complex-valued STFT. + # A float32 Tensor of shape [batch_size, ?, 257]. + magnitude_spectrograms = tf.abs(stfts) + + # Warp the linear-scale, magnitude spectrograms into the mel-scale. + num_spectrogram_bins = magnitude_spectrograms.shape[-1].value + linear_to_mel_weight_matrix = ( + tf.contrib.signal.linear_to_mel_weight_matrix( + num_mel_bins, num_spectrogram_bins, sample_rate, lower_edge_hertz, + upper_edge_hertz)) + mel_spectrograms = tf.tensordot( + magnitude_spectrograms, linear_to_mel_weight_matrix, 1) + # Note: Shape inference for tensordot does not currently handle this case. + mel_spectrograms.set_shape(magnitude_spectrograms.shape[:-1].concatenate( + linear_to_mel_weight_matrix.shape[-1:])) + + log_mel_sgram = tf.log(tf.maximum(log_noise_floor, mel_spectrograms)) + + return tf.expand_dims(log_mel_sgram, -1) + + +# +# Audio problem definition +# +class AudioEncoder(object): + """Encoder class for saving and loading waveforms.""" + + def __init__(self, num_reserved_ids=0, sample_rate=16000): + assert num_reserved_ids == 0 + self._sample_rate = sample_rate + + @property + def num_reserved_ids(self): + return 0 + + def encode(self, s): + """Transform a string with a filename into a list of float32. + + Args: + s: path to the file with a waveform. + + Returns: + samples: list of int16s + """ + # Make sure that the data is a single channel, 16bit, 16kHz wave. + # TODO(chorowski): the directory may not be writable, this should fallback + # to a temp path, and provide instructions for instaling sox. + if not s.endswith(".wav"): + out_filepath = s + ".wav" + if not os.path.exists(out_filepath): + call(["sox", "-r", "16k", "-b", "16", "-c", "1", s, out_filepath]) + s = out_filepath + rate, data = wavfile.read(s) + assert rate == self._sample_rate + assert len(data.shape) == 1 + if data.dtype not in [np.float32, np.float64]: + data = data.astype(np.float32) / np.iinfo(data.dtype).max + return data.tolist() + + def decode(self, ids): + """Transform a sequence of float32 into a waveform. + + Args: + ids: list of integers to be converted. + + Returns: + Path to the temporary file where the waveform was saved. + + Raises: + ValueError: if the ids are not of the appropriate size. + """ + _, tmp_file_path = tempfile.mkstemp() + wavfile.write(tmp_file_path, self._sample_rate, np.asarray(ids)) + return tmp_file_path + + def decode_list(self, ids): + """Transform a sequence of int ids into an image file. + + Args: + ids: list of integers to be converted. + + Returns: + Singleton list: path to the temporary file where the wavfile was saved. + """ + return [self.decode(ids)] + + @property + def vocab_size(self): + return 256 + + +class SpeechRecognitionProblem(problem.Problem): + """Base class for speech recognition problems.""" + + def hparams(self, defaults, model_hparams): + p = model_hparams + # Filterbank extraction + p.add_hparam("audio_sample_rate", 16000) + p.add_hparam("audio_preemphasis", 0.97) + p.add_hparam("audio_dither", 1.0 / np.iinfo(np.int16).max) + p.add_hparam("audio_frame_length", 25.0) + p.add_hparam("audio_frame_step", 10.0) + p.add_hparam("audio_lower_edge_hertz", 20.0) + p.add_hparam("audio_upper_edge_hertz", 8000.0) + p.add_hparam("audio_num_mel_bins", 80) + p.add_hparam("audio_add_delta_deltas", True) + + p = defaults + # p.stop_at_eos = int(False) + p.input_modality = {"inputs": ("audio:speech_recognition_modality", None)} + p.target_modality = (registry.Modalities.SYMBOL, 256) + + @property + def is_character_level(self): + return True + + @property + def input_space_id(self): + return problem.SpaceID.AUDIO_SPECTRAL + + @property + def target_space_id(self): + return problem.SpaceID.EN_CHR + + def feature_encoders(self, _): + return { + "waveforms": AudioEncoder(), + "targets": text_encoder.ByteTextEncoder(), + } + + def example_reading_spec(self): + data_fields = { + "waveforms": tf.VarLenFeature(tf.float32), + "targets": tf.VarLenFeature(tf.int64), + } + + data_items_to_decoders = None + + return data_fields, data_items_to_decoders + + def preprocess_example(self, example, mode, hparams): + p = hparams + waveforms = tf.expand_dims(example["waveforms"], 0) + mel_fbanks = compute_mel_filterbank_features( + waveforms, + sample_rate=p.audio_sample_rate, + dither=p.audio_dither, + preemphasis=p.audio_preemphasis, + frame_length=p.audio_frame_length, + frame_step=p.audio_frame_step, + lower_edge_hertz=p.audio_lower_edge_hertz, + upper_edge_hertz=p.audio_upper_edge_hertz, + num_mel_bins=p.audio_num_mel_bins) + if p.audio_add_delta_deltas: + mel_fbanks = add_delta_deltas(mel_fbanks) + fbank_size = common_layers.shape_list(mel_fbanks) + assert fbank_size[0] == 1 + # Later models like to flatten the two spatial dims. Instead, we add a unit + # spatial dim and flatten the frequencies and channels. + example["inputs"] = tf.reshape( + mel_fbanks, [fbank_size[1], 1, fbank_size[2] * fbank_size[3]]) + return super(SpeechRecognitionProblem, self + ).preprocess_example(example, mode, hparams) + + +@registry.register_audio_modality +class SpeechRecognitionModality(modality.Modality): + """Common ASR filterbank processing.""" + + def bottom(self, inputs): + """Use batchnorm instead of CMVN and shorten the stft with strided convs. + + Args: + inputs: float32 tensor with shape [batch_size, len, 1, freqs * channels] + + Returns: + float32 tensor with shape [batch_size, shorter_len, 1, hidden_size] + """ + p = self._model_hparams + training = p.mode == tf.estimator.ModeKeys.TRAIN + + with tf.variable_scope(self.name): + x = inputs + num_mel_bins = p.audio_num_mel_bins + num_channels = 3 if p.audio_add_delta_deltas else 1 + # The convention is that the models are flattened along the spatial, + # dimensions, thus the speech preprocessor treats frequencies and channels + # as image colors (last axis) + x.set_shape([None, None, 1, num_mel_bins * num_channels]) + + # This replaces CMVN estimation on data + x = tf.layers.batch_normalization( + x, axis=3, center=False, scale=False, training=training) + + xshape = common_layers.shape_list(x) + # restore batch_size x time x frequency x channel layout + x = tf.reshape(x, [xshape[0], xshape[1], num_mel_bins, num_channels]) + + # TODO(chorowski): how to specify bottom's hparams and avoid hardcoding? + for _ in range(2): + x = tf.layers.conv2d( + x, 128, (3, 3), (2, 2), use_bias=False) + x = tf.layers.batch_normalization(x, axis=3, training=training) + x = tf.nn.relu(x) + + xshape = common_layers.shape_list(x) + # apply a conv that will remove all frequencies and at the same time + # project the output into desired hidden_size + x = tf.layers.conv2d(x, p.hidden_size, (3, xshape[2]), use_bias=False) + assert common_layers.shape_list(x)[2] == 1 + x = tf.layers.batch_normalization(x, axis=3, training=training) + x = tf.nn.relu(x) + return x From 02da1be9a40e62d1bdcebb85fed5da813433436b Mon Sep 17 00:00:00 2001 From: Ryan Sepassi Date: Fri, 22 Dec 2017 10:53:55 -0800 Subject: [PATCH 0683/4095] Add random seed, py3 fix, disable flaky test PiperOrigin-RevId: 179942374 --- .travis.yml | 2 +- tensor2tensor/bin/t2t-trainer | 3 ++- tensor2tensor/bin/t2t_trainer.py | 3 ++- tensor2tensor/data_generators/algorithmic_math_test.py | 1 + tensor2tensor/data_generators/problem.py | 2 +- tensor2tensor/tpu/tpu_trainer.py | 3 ++- tensor2tensor/tpu/tpu_trainer_lib.py | 9 +++++++++ 7 files changed, 18 insertions(+), 5 deletions(-) diff --git a/.travis.yml b/.travis.yml index b67c74b1d..7841b0b7e 100644 --- a/.travis.yml +++ b/.travis.yml @@ -14,7 +14,7 @@ env: - T2T_DATA_DIR=/tmp/t2t-data - T2T_TRAIN_DIR=/tmp/t2t-train script: - - pytest --ignore=tensor2tensor/utils/registry_test.py --ignore=tensor2tensor/problems_test.py --ignore=tensor2tensor/tpu/tpu_trainer_lib_test.py + - pytest --ignore=tensor2tensor/utils/registry_test.py --ignore=tensor2tensor/problems_test.py --ignore=tensor2tensor/tpu/tpu_trainer_lib_test.py --ignore=tensor2tensor/data_generators/algorithmic_math_test.py - pytest tensor2tensor/utils/registry_test.py - pytest tensor2tensor/tpu/tpu_trainer_lib_test.py - t2t-datagen 2>&1 | grep translate && echo passed diff --git a/tensor2tensor/bin/t2t-trainer b/tensor2tensor/bin/t2t-trainer index 9e2ca39b9..70435094a 100644 --- a/tensor2tensor/bin/t2t-trainer +++ b/tensor2tensor/bin/t2t-trainer @@ -45,6 +45,7 @@ flags.DEFINE_string("t2t_usr_dir", "", "The imported files should contain registrations, " "e.g. @registry.register_model calls, that will then be " "available to the t2t-trainer.") +flags.DEFINE_integer("random_seed", 1234, "Random seed.") flags.DEFINE_integer("tpu_num_shards", 8, "Number of tpu shards.") flags.DEFINE_integer("iterations_per_loop", 1000, "Number of iterations in a TPU training loop.") @@ -171,7 +172,7 @@ def execute_schedule(exp): def main(_): tf.logging.set_verbosity(tf.logging.INFO) - tf.set_random_seed(123) + tpu_trainer_lib.set_random_seed(FLAGS.random_seed) usr_dir.import_usr_dir(FLAGS.t2t_usr_dir) log_registry() diff --git a/tensor2tensor/bin/t2t_trainer.py b/tensor2tensor/bin/t2t_trainer.py index 792403062..571a21839 100644 --- a/tensor2tensor/bin/t2t_trainer.py +++ b/tensor2tensor/bin/t2t_trainer.py @@ -44,6 +44,7 @@ "The imported files should contain registrations, " "e.g. @registry.register_model calls, that will then be " "available to the t2t-trainer.") +flags.DEFINE_integer("random_seed", 1234, "Random seed.") flags.DEFINE_integer("tpu_num_shards", 8, "Number of tpu shards.") flags.DEFINE_integer("iterations_per_loop", 1000, "Number of iterations in a TPU training loop.") @@ -170,7 +171,7 @@ def execute_schedule(exp): def main(_): tf.logging.set_verbosity(tf.logging.INFO) - tf.set_random_seed(123) + tpu_trainer_lib.set_random_seed(FLAGS.random_seed) usr_dir.import_usr_dir(FLAGS.t2t_usr_dir) log_registry() diff --git a/tensor2tensor/data_generators/algorithmic_math_test.py b/tensor2tensor/data_generators/algorithmic_math_test.py index 7cd67a83c..c7fdfa156 100644 --- a/tensor2tensor/data_generators/algorithmic_math_test.py +++ b/tensor2tensor/data_generators/algorithmic_math_test.py @@ -14,6 +14,7 @@ # limitations under the License. """Tests for tensor2tensor.data_generators.algorithmic_math.""" +# TODO(rsepassi): This test is flaky. Disable, remove, or update. from __future__ import absolute_import from __future__ import division diff --git a/tensor2tensor/data_generators/problem.py b/tensor2tensor/data_generators/problem.py index aa1c894db..52d7bdab2 100644 --- a/tensor2tensor/data_generators/problem.py +++ b/tensor2tensor/data_generators/problem.py @@ -947,7 +947,7 @@ def standardize_shapes(features, batch_size=None): def pad_batch(features, batch_multiple): """Pad batch dim of features to nearest multiple of batch_multiple.""" - feature = features.items()[0][1] + feature = list(features.items())[0][1] batch_size = tf.shape(feature)[0] mod = batch_size % batch_multiple has_mod = tf.cast(tf.cast(mod, tf.bool), tf.int32) diff --git a/tensor2tensor/tpu/tpu_trainer.py b/tensor2tensor/tpu/tpu_trainer.py index 792403062..571a21839 100644 --- a/tensor2tensor/tpu/tpu_trainer.py +++ b/tensor2tensor/tpu/tpu_trainer.py @@ -44,6 +44,7 @@ "The imported files should contain registrations, " "e.g. @registry.register_model calls, that will then be " "available to the t2t-trainer.") +flags.DEFINE_integer("random_seed", 1234, "Random seed.") flags.DEFINE_integer("tpu_num_shards", 8, "Number of tpu shards.") flags.DEFINE_integer("iterations_per_loop", 1000, "Number of iterations in a TPU training loop.") @@ -170,7 +171,7 @@ def execute_schedule(exp): def main(_): tf.logging.set_verbosity(tf.logging.INFO) - tf.set_random_seed(123) + tpu_trainer_lib.set_random_seed(FLAGS.random_seed) usr_dir.import_usr_dir(FLAGS.t2t_usr_dir) log_registry() diff --git a/tensor2tensor/tpu/tpu_trainer_lib.py b/tensor2tensor/tpu/tpu_trainer_lib.py index be7f00351..bde85e4db 100644 --- a/tensor2tensor/tpu/tpu_trainer_lib.py +++ b/tensor2tensor/tpu/tpu_trainer_lib.py @@ -20,9 +20,12 @@ from __future__ import print_function import os +import random # Dependency imports +import numpy as np + from tensor2tensor.utils import devices from tensor2tensor.utils import expert_utils from tensor2tensor.utils import metrics_hook @@ -336,3 +339,9 @@ def add_problem_hparams(hparams, problems): hparams.problem_instances.append(problem) hparams.problems.append(p_hparams) + + +def set_random_seed(seed): + tf.set_random_seed(seed) + random.seed(seed) + np.random.seed(seed) From 96f72d408e3f498f3b19ce7332a47cb1c12f8d74 Mon Sep 17 00:00:00 2001 From: iislucas Date: Mon, 8 Jan 2018 13:18:35 -0500 Subject: [PATCH 0684/4095] Updated instructions & bugfix in new Problem docs (#490) * Add helpful (I think) instructions & bugfix * Added some instructions for using pip to create editable install so you can locally test. * Also added commands to run the local test. * Fixed bug with accidental `self` argument in hparams definition. * code review comment fixes * tweak language * typos and more tweaks --- docs/new_problem.md | 29 ++++++++++++++++++++++++++--- 1 file changed, 26 insertions(+), 3 deletions(-) diff --git a/docs/new_problem.md b/docs/new_problem.md index 48976a61b..70bb79892 100644 --- a/docs/new_problem.md +++ b/docs/new_problem.md @@ -240,16 +240,40 @@ All hyperparamters inherit from `_default_hparams()` in `problem.py.` If you wou from tensor2tensor.models import transformer @registry.register_hparams -def word2def_hparams(self): +def word2def_hparams(): hparams = transformer.transformer_base_single_gpu() # Or whatever you'd like to build off. hparams.batch_size = 1024 return hparams ``` +# Test the data generation + +You can test data generation of your a problem in your own project with: + +```bash +PROBLEM=word2def +DATA_DIR=$HOME/t2t_data +TMP_DIR=/tmp/t2t_datagen +mkdir -p $DATA_DIR $TMP_DIR + +t2t-datagen \ + --t2t_usr_dir=$PATH_TO_YOUR_PROBLEM_DIR \ + --data_dir=$DATA_DIR \ + --tmp_dir=$TMP_DIR \ + --problem=$PROBLEM +``` + +Where: +* `PROBLEM` is the name of the class that was registered with `@registry.register_problem()`, but converted from `CamelCase` to `snake_case`. +* `PATH_TO_YOUR_PROBLEM_DIR` is a path to the directory of your python problem file. + +If you plan to contribute to the tensor2tensor repository, you can install the local cloned version in developer mode with `pip install -e .` from the tensor2tensor directory. You can also add your new problem file to [`all_problems.py`](https://github.com/tensorflow/tensor2tensor/blob/master/tensor2tensor/data_generators/all_problems.py). + # Run the problem Now that we've gotten our problem set up, let's train a model and generate definitions. -We specify our problem name, the model, and hparams. +To train, specify the problem name, the model, and hparams: + ```bash PROBLEM=word2def MODEL=transformer @@ -258,7 +282,6 @@ HPARAMS=word2def_hparams The rest of the steps are as given in the [walkthrough](walkthrough.md). - What if we wanted to train a model to generate words given definitions? In T2T, we can change the problem name to be `PROBLEM=word2def_rev`. All done. Let us know what definitions your model generated. From 0fcdf8eef0f0c4f69f11b4d5d8d8b6c1404cb2ec Mon Sep 17 00:00:00 2001 From: iislucas Date: Mon, 8 Jan 2018 13:42:59 -0500 Subject: [PATCH 0685/4095] Doc fix: data_generators.wmt => data_generators.translate (#489) `data_generators.wmt` seems to have become `data_generators.translate`. --- docs/new_problem.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/new_problem.md b/docs/new_problem.md index 70bb79892..fd5f9d625 100644 --- a/docs/new_problem.md +++ b/docs/new_problem.md @@ -184,7 +184,7 @@ import os from tensor2tensor.data_generators import problem from tensor2tensor.data_generators import text_encoder -from tensor2tensor.data_generators.wmt import character_generator +from tensor2tensor.data_generators.translate import character_generator from tensor2tensor.utils import registry From 92267e85104b731e48b123373d30f701ca9b83d1 Mon Sep 17 00:00:00 2001 From: Jerry Liu Date: Tue, 9 Jan 2018 08:00:55 +0800 Subject: [PATCH 0686/4095] Enhance WMT17 En-Zh task with full dataset. (#461) * Enhance WMT17 En-Zh task with full dataset. Fix #446 Added `file_size_budget` as argument to `get_or_generate_vocab`. * Made requested Fixes: - Added TranslateEnzhWmt8k problem. - Renamed to TranslateEnzhWmt32k, to reflect target vocab in problem name - Added instructions for manually downloading full dataset. --- .../data_generators/generator_utils.py | 5 +- .../data_generators/translate_enzh.py | 189 ++++++++++++++++-- 2 files changed, 173 insertions(+), 21 deletions(-) diff --git a/tensor2tensor/data_generators/generator_utils.py b/tensor2tensor/data_generators/generator_utils.py index 236d43772..c657a503f 100644 --- a/tensor2tensor/data_generators/generator_utils.py +++ b/tensor2tensor/data_generators/generator_utils.py @@ -316,7 +316,8 @@ def get_or_generate_vocab_inner(data_dir, vocab_filename, vocab_size, def get_or_generate_vocab(data_dir, tmp_dir, vocab_filename, vocab_size, - sources): + sources, + _file_byte_budget=1e6): """Generate a vocabulary from the datasets in sources.""" def generate(): @@ -349,7 +350,7 @@ def generate(): # Use Tokenizer to count the word occurrences. with tf.gfile.GFile(filepath, mode="r") as source_file: - file_byte_budget = 1e6 + file_byte_budget = _file_byte_budget counter = 0 countermax = int(source_file.size() / file_byte_budget / 2) for line in source_file: diff --git a/tensor2tensor/data_generators/translate_enzh.py b/tensor2tensor/data_generators/translate_enzh.py index 52b364137..d3ddd8d98 100644 --- a/tensor2tensor/data_generators/translate_enzh.py +++ b/tensor2tensor/data_generators/translate_enzh.py @@ -42,28 +42,145 @@ # This is far from being the real WMT17 task - only toyset here # you need to register to get UN data and CWT data. Also, by convention, # this is EN to ZH - use translate_enzh_wmt8k_rev for ZH to EN task -_ENZH_TRAIN_DATASETS = [[("http://data.statmt.org/wmt17/translation-task/" - "training-parallel-nc-v12.tgz"), - ("training/news-commentary-v12.zh-en.en", - "training/news-commentary-v12.zh-en.zh")]] +# +# News Commentary, around 220k lines +# This dataset is only a small fraction of full WMT17 task +_NC_TRAIN_DATASETS = [[ + "http://data.statmt.org/wmt17/translation-task/training-parallel-nc-v12.tgz", + ["training/news-commentary-v12.zh-en.en", + "training/news-commentary-v12.zh-en.zh"]]] -_ENZH_TEST_DATASETS = [[ +# Test set from News Commentary. 2000 lines +_NC_TEST_DATASETS = [[ "http://data.statmt.org/wmt17/translation-task/dev.tgz", ("dev/newsdev2017-enzh-src.en.sgm", "dev/newsdev2017-enzh-ref.zh.sgm") ]] +# UN parallel corpus. 15,886,041 lines +# Visit source website to download manually: +# https://conferences.unite.un.org/UNCorpus +# +# NOTE: You need to register to download dataset from official source +# place into tmp directory e.g. /tmp/t2t_datagen/dataset.tgz +_UN_TRAIN_DATASETS = [[ + "https://s3-us-west-2.amazonaws.com/twairball.wmt17.zh-en/UNv1.0.en-zh.tar.gz", + ["en-zh/UNv1.0.en-zh.en", + "en-zh/UNv1.0.en-zh.zh"]]] + +# CWMT corpus +# Visit source website to download manually: +# http://nlp.nju.edu.cn/cwmt-wmt/ +# +# casia2015: 1,050,000 lines +# casict2015: 2,036,833 lines +# datum2015: 1,000,003 lines +# datum2017: 1,999,968 lines +# NEU2017: 2,000,000 lines +# +# NOTE: You need to register to download dataset from official source +# place into tmp directory e.g. /tmp/t2t_datagen/dataset.tgz + +_CWMT_TRAIN_DATASETS = [ + ["https://s3-us-west-2.amazonaws.com/twairball.wmt17.zh-en/cwmt.tgz", + ["cwmt/casia2015/casia2015_en.txt", + "cwmt/casia2015/casia2015_ch.txt"]], + ["https://s3-us-west-2.amazonaws.com/twairball.wmt17.zh-en/cwmt.tgz", + ["cwmt/casict2015/casict2015_en.txt", + "cwmt/casict2015/casict2015_ch.txt"]], + ["https://s3-us-west-2.amazonaws.com/twairball.wmt17.zh-en/cwmt.tgz", + ["cwmt/neu2017/NEU_en.txt", + "cwmt/neu2017/NEU_cn.txt"]], + ["https://s3-us-west-2.amazonaws.com/twairball.wmt17.zh-en/cwmt.tgz", + ["cwmt/datum2015/datum_en.txt", + "cwmt/datum2015/datum_ch.txt"]], + ["https://s3-us-west-2.amazonaws.com/twairball.wmt17.zh-en/cwmt.tgz", + ["cwmt/datum2017/Book1_en.txt", + "cwmt/datum2017/Book1_cn.txt"]], + ["https://s3-us-west-2.amazonaws.com/twairball.wmt17.zh-en/cwmt.tgz", + ["cwmt/datum2017/Book2_en.txt", + "cwmt/datum2017/Book2_cn.txt"]], + ["https://s3-us-west-2.amazonaws.com/twairball.wmt17.zh-en/cwmt.tgz", + ["cwmt/datum2017/Book3_en.txt", + "cwmt/datum2017/Book3_cn.txt"]], + ["https://s3-us-west-2.amazonaws.com/twairball.wmt17.zh-en/cwmt.tgz", + ["cwmt/datum2017/Book4_en.txt", + "cwmt/datum2017/Book4_cn.txt"]], + ["https://s3-us-west-2.amazonaws.com/twairball.wmt17.zh-en/cwmt.tgz", + ["cwmt/datum2017/Book5_en.txt", + "cwmt/datum2017/Book5_cn.txt"]], + ["https://s3-us-west-2.amazonaws.com/twairball.wmt17.zh-en/cwmt.tgz", + ["cwmt/datum2017/Book6_en.txt", + "cwmt/datum2017/Book6_cn.txt"]], + ["https://s3-us-west-2.amazonaws.com/twairball.wmt17.zh-en/cwmt.tgz", + ["cwmt/datum2017/Book7_en.txt", + "cwmt/datum2017/Book7_cn.txt"]], + ["https://s3-us-west-2.amazonaws.com/twairball.wmt17.zh-en/cwmt.tgz", + ["cwmt/datum2017/Book8_en.txt", + "cwmt/datum2017/Book8_cn.txt"]], + ["https://s3-us-west-2.amazonaws.com/twairball.wmt17.zh-en/cwmt.tgz", + ["cwmt/datum2017/Book9_en.txt", + "cwmt/datum2017/Book9_cn.txt"]], + ["https://s3-us-west-2.amazonaws.com/twairball.wmt17.zh-en/cwmt.tgz", + ["cwmt/datum2017/Book10_en.txt", + "cwmt/datum2017/Book10_cn.txt"]], + ["https://s3-us-west-2.amazonaws.com/twairball.wmt17.zh-en/cwmt.tgz", + ["cwmt/datum2017/Book11_en.txt", + "cwmt/datum2017/Book11_cn.txt"]], + ["https://s3-us-west-2.amazonaws.com/twairball.wmt17.zh-en/cwmt.tgz", + ["cwmt/datum2017/Book12_en.txt", + "cwmt/datum2017/Book12_cn.txt"]], + ["https://s3-us-west-2.amazonaws.com/twairball.wmt17.zh-en/cwmt.tgz", + ["cwmt/datum2017/Book13_en.txt", + "cwmt/datum2017/Book13_cn.txt"]], + ["https://s3-us-west-2.amazonaws.com/twairball.wmt17.zh-en/cwmt.tgz", + ["cwmt/datum2017/Book14_en.txt", + "cwmt/datum2017/Book14_cn.txt"]], + ["https://s3-us-west-2.amazonaws.com/twairball.wmt17.zh-en/cwmt.tgz", + ["cwmt/datum2017/Book15_en.txt", + "cwmt/datum2017/Book15_cn.txt"]], + ["https://s3-us-west-2.amazonaws.com/twairball.wmt17.zh-en/cwmt.tgz", + ["cwmt/datum2017/Book16_en.txt", + "cwmt/datum2017/Book16_cn.txt"]], + ["https://s3-us-west-2.amazonaws.com/twairball.wmt17.zh-en/cwmt.tgz", + ["cwmt/datum2017/Book17_en.txt", + "cwmt/datum2017/Book17_cn.txt"]], + ["https://s3-us-west-2.amazonaws.com/twairball.wmt17.zh-en/cwmt.tgz", + ["cwmt/datum2017/Book18_en.txt", + "cwmt/datum2017/Book18_cn.txt"]], + ["https://s3-us-west-2.amazonaws.com/twairball.wmt17.zh-en/cwmt.tgz", + ["cwmt/datum2017/Book19_en.txt", + "cwmt/datum2017/Book19_cn.txt"]], + ["https://s3-us-west-2.amazonaws.com/twairball.wmt17.zh-en/cwmt.tgz", + ["cwmt/datum2017/Book20_en.txt", + "cwmt/datum2017/Book20_cn.txt"]] +] + + +def get_filename(dataset): + return dataset[0][0].split('/')[-1] @registry.register_problem -class TranslateEnzhWmt8k(translate.TranslateProblem): - """Problem spec for WMT En-Zh translation.""" +class TranslateEnzhWmt32k(translate.TranslateProblem): + """Problem spec for WMT En-Zh translation. + Attempts to use full training dataset, which needs website + registration and downloaded manually from official sources: - @property - def targeted_vocab_size(self): - return 2**13 # 8192 + CWMT: + - http://nlp.nju.edu.cn/cwmt-wmt/ + - Website contrains instructions for FTP server access. + - You'll need to download CASIA, CASICT, DATUM2015, DATUM2017, + NEU datasets + + UN Parallel Corpus: + - https://conferences.unite.un.org/UNCorpus + - You'll need to register your to download the dataset. + + NOTE: place into tmp directory e.g. /tmp/t2t_datagen/dataset.tgz + """ @property - def num_shards(self): - return 10 # This is a small dataset. + def targeted_vocab_size(self): + return 2**15 # 32k @property def source_vocab_name(self): @@ -72,20 +189,35 @@ def source_vocab_name(self): @property def target_vocab_name(self): return "vocab.enzh-zh.%d" % self.targeted_vocab_size + + def get_training_dataset(self, tmp_dir): + """UN Parallel Corpus and CWMT Corpus need to be downloaded manually. + Append to training dataset if available + """ + full_dataset = _NC_TRAIN_DATASETS + for dataset in [_CWMT_TRAIN_DATASETS, _UN_TRAIN_DATASETS]: + filename = get_filename(dataset) + tmp_filepath = os.path.join(tmp_dir, filename) + if tf.gfile.Exists(tmp_filepath): + full_dataset = full_dataset + dataset + else: + tf.logging.info("[TranslateEzhWmt] dataset incomplete, you need to manually download %s" % filename) + return full_dataset def generator(self, data_dir, tmp_dir, train): - datasets = _ENZH_TRAIN_DATASETS if train else _ENZH_TEST_DATASETS - source_datasets = [[item[0], [item[1][0]]] for item in _ENZH_TRAIN_DATASETS] - target_datasets = [[item[0], [item[1][1]]] for item in _ENZH_TRAIN_DATASETS] + TRAIN_DATASET = self.get_training_dataset(tmp_dir) + datasets = TRAIN_DATASET if train else _NC_TEST_DATASETS + source_datasets = [[item[0], [item[1][0]]] for item in TRAIN_DATASET] + target_datasets = [[item[0], [item[1][1]]] for item in TRAIN_DATASET] source_vocab = generator_utils.get_or_generate_vocab( data_dir, tmp_dir, self.source_vocab_name, self.targeted_vocab_size, - source_datasets) + source_datasets, _file_byte_budget=1e8) target_vocab = generator_utils.get_or_generate_vocab( data_dir, tmp_dir, self.target_vocab_name, self.targeted_vocab_size, - target_datasets) + target_datasets, _file_byte_budget=1e8) tag = "train" if train else "dev" - data_path = translate.compile_data(tmp_dir, datasets, - "wmt_enzh_tok_%s" % tag) + filename_base = "wmt_enzh_%sk_tok_%s" % (self.targeted_vocab_size, tag) + data_path = translate.compile_data(tmp_dir, datasets, filename_base) return translate.bi_vocabs_token_generator(data_path + ".lang1", data_path + ".lang2", source_vocab, target_vocab, EOS) @@ -107,3 +239,22 @@ def feature_encoders(self, data_dir): "inputs": source_token, "targets": target_token, } + + +@registry.register_problem +class TranslateEnzhWmt8k(TranslateEnzhWmt32k): + """Problem spec for WMT En-Zh translation. + This is far from being the real WMT17 task - only toyset here + """ + + @property + def targeted_vocab_size(self): + return 2**13 # 8192 + + @property + def num_shards(self): + return 10 # This is a small dataset. + + def get_training_dataset(self, tmp_dir): + """Uses only News Commentary Dataset for training""" + return _NC_TRAIN_DATASETS From f55462a9928f3f8af0b1275a4fb40d13cae6cc79 Mon Sep 17 00:00:00 2001 From: Martin Popel Date: Tue, 9 Jan 2018 01:13:04 +0100 Subject: [PATCH 0687/4095] Scripts for proper BLEU evaluation, batch translation and averaging (#488) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `t2t-bleu` computes the "real" BLEU (giving the same result as [sacréBLEU](https://github.com/awslabs/sockeye/tree/master/contrib/sacrebleu) with `--tokenization intl` and as [mteval-v14.pl](https://github.com/moses-smt/mosesdecoder/blob/master/scripts/generic/mteval-v14.pl) with `--international-tokenization`). It can be used in two ways: * To evaluate an already translated file: `t2t-bleu --translation=my-wmt13.de --reference=wmt13_deen.de` * To evaluate all translations in a given directory. `t2t-translate-all` translates all checkpoints in a given directory. A custom command (e.g. SGE cluster wrapper) can be used instead of `t2t-decoder` for the translation. `t2t-avg-all` for each checkpoint in a given directory it averages it with the N preceding ones. All three scripts wait a given number of minutes for new checkpoints (produced by t2t-decoder, which can be run concurrently with these scripts). --- tensor2tensor/bin/t2t-avg-all | 106 +++++++++++++++++++++ tensor2tensor/bin/t2t-bleu | 137 ++++++++++++++++++++++++++++ tensor2tensor/bin/t2t-translate-all | 91 ++++++++++++++++++ tensor2tensor/utils/bleu_hook.py | 68 ++++++++++++++ 4 files changed, 402 insertions(+) create mode 100755 tensor2tensor/bin/t2t-avg-all create mode 100755 tensor2tensor/bin/t2t-bleu create mode 100755 tensor2tensor/bin/t2t-translate-all diff --git a/tensor2tensor/bin/t2t-avg-all b/tensor2tensor/bin/t2t-avg-all new file mode 100755 index 000000000..3b4d6211d --- /dev/null +++ b/tensor2tensor/bin/t2t-avg-all @@ -0,0 +1,106 @@ +#!/usr/bin/env python +# coding=utf-8 +# Copyright 2017 The Tensor2Tensor Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Script to continously average last N checkpoints in a given directory.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +import logging + +# Dependency imports + +import numpy as np +import six +from six.moves import zip # pylint: disable=redefined-builtin +from collections import deque +import shutil +import tensorflow as tf +from tensor2tensor.utils import bleu_hook + +flags = tf.flags +FLAGS = flags.FLAGS + +flags.DEFINE_string("model_dir", "", "Directory to load model checkpoints from.") +flags.DEFINE_string("output_dir", "avg/", "Directory to output the averaged checkpoints to.") +flags.DEFINE_integer("n", 8, "How many checkpoints should be averaged?") +flags.DEFINE_integer("min_steps", 0, "Ignore checkpoints with less steps.") +flags.DEFINE_integer("wait_minutes", 0, "Wait upto N minutes for a new checkpoint.") + + +def main(_): + tf.logging._handler.setFormatter(logging.Formatter("%(asctime)s:" + logging.BASIC_FORMAT, None)) + tf.logging.set_verbosity(tf.logging.INFO) + + model_dir = os.path.expanduser(FLAGS.model_dir) + output_dir = os.path.expanduser(FLAGS.output_dir) + out_base_file = os.path.join(output_dir, 'model.ckpt') + + # Copy flags.txt with the original time, so t2t-bleu can report correct relative time. + os.makedirs(FLAGS.output_dir, exist_ok=True) + if not os.path.exists(os.path.join(output_dir, 'flags.txt')): + shutil.copy2(os.path.join(model_dir, 'flags.txt'), os.path.join(output_dir, 'flags.txt')) + + models_processed = 0 + queue = deque() + for model in bleu_hook.stepfiles_iterator(model_dir, FLAGS.wait_minutes, FLAGS.min_steps): + if models_processed == 0: + var_list = tf.contrib.framework.list_variables(model.filename) + avg_values = {} + for (name, shape) in var_list: + if not name.startswith("global_step"): + avg_values[name] = np.zeros(shape) + models_processed += 1 + + tf.logging.info("Loading [%d]: %s" % (models_processed, model.filename)) + reader = tf.contrib.framework.load_checkpoint(model.filename) + for name in avg_values: + avg_values[name] += reader.get_tensor(name) / FLAGS.n + queue.append(model) + if len(queue) < FLAGS.n: + continue + + out_file = "%s-%d" % (out_base_file, model.steps) + tf_vars = [] + tf.logging.info("Averaging %s" % (out_file)) + for (name, value) in six.iteritems(avg_values): + tf_vars.append(tf.get_variable(name, shape=value.shape)) # TODO , dtype=var_dtypes[name] + placeholders = [tf.placeholder(v.dtype, shape=v.shape) for v in tf_vars] + assign_ops = [tf.assign(v, p) for (v, p) in zip(tf_vars, placeholders)] + + global_step = tf.Variable(model.steps, name="global_step", trainable=False, dtype=tf.int64) + saver = tf.train.Saver(tf.global_variables()) + + tf.logging.info("Running session for %s" % (out_file)) + with tf.Session() as sess: + sess.run(tf.global_variables_initializer()) + for p, assign_op, (name, value) in zip(placeholders, assign_ops, six.iteritems(avg_values)): + sess.run(assign_op, {p: value}) + tf.logging.info("Storing to %s" % out_file) + saver.save(sess, out_base_file, global_step=global_step) + os.utime(out_file + '.index', (model.mtime, model.mtime)) + + tf.reset_default_graph() + first_model = queue.popleft() + + reader = tf.contrib.framework.load_checkpoint(first_model.filename) + for name in avg_values: + avg_values[name] -= reader.get_tensor(name) / FLAGS.n + + +if __name__ == "__main__": + tf.app.run() diff --git a/tensor2tensor/bin/t2t-bleu b/tensor2tensor/bin/t2t-bleu new file mode 100755 index 000000000..cac2b9fc3 --- /dev/null +++ b/tensor2tensor/bin/t2t-bleu @@ -0,0 +1,137 @@ +#!/usr/bin/env python +# coding=utf-8 +# Copyright 2017 The Tensor2Tensor Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Evaluate BLEU score for all checkpoints/translations in a given directory. + +This script can be used in two ways. + +To evaluate one already translated file: +`t2t-bleu --translation=my-wmt13.de --reference=wmt13_deen.de` + +To evaluate all translations in a given directory (translated by t2t-translate-all): +`t2t-bleu + --translations_dir=my-translations + --reference=wmt13_deen.de + --event_dir=events` + +In addition to the above-mentioned compulsory parameters, +there are optional parameters: + + * bleu_variant: cased (case-sensitive), uncased, both (default). + * tag_suffix: Default="", so the tags will be BLEU_cased and BLEU_uncased. tag_suffix + can be used e.g. for different beam sizes if these should be plotted in different graphs. + * min_steps: Don't evaluate checkpoints with less steps. + Default=-1 means check the `last_evaluated_step.txt` file, which contains the number of steps + of the last successfully evaluated checkpoint. + * report_zero: Store BLEU=0 and guess its time based on the oldest file in the translations_dir. + Default=True. This is useful, so TensorBoard reports correct relative time for the remaining + checkpoints. This flag is set to False if min_steps is > 0. + * wait_minutes: Wait upto N minutes for a new translated file. Default=0. + This is useful for continuous evaluation of a running training, + in which case this should be equal to save_checkpoints_secs/60 plus time needed for translation + plus some reserve. +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +import os +from tensor2tensor.utils import bleu_hook +import tensorflow as tf + + +flags = tf.flags +FLAGS = flags.FLAGS + +flags.DEFINE_string("source", None, "Path to the source-language file to be translated") +flags.DEFINE_string("reference", None, "Path to the reference translation file") +flags.DEFINE_string("translation", None, "Path to the MT system translation file") +flags.DEFINE_string("translations_dir", None, "Directory with translated files to be evaulated.") +flags.DEFINE_string("event_dir", None, "Where to store the event file.") + +flags.DEFINE_string("bleu_variant", "both", + "Possible values: cased(case-sensitive), uncased, both(default).") +flags.DEFINE_string("tag_suffix", "", + "What to add to BLEU_cased and BLEU_uncased tags. Default=''.") +flags.DEFINE_integer("min_steps", -1, "Don't evaluate checkpoints with less steps.") +flags.DEFINE_integer("wait_minutes", 0, + "Wait upto N minutes for a new checkpoint, cf. save_checkpoints_secs.") +flags.DEFINE_bool("report_zero", None, "Store BLEU=0 and guess its time based on the oldest file.") + + +def main(_): + tf.logging.set_verbosity(tf.logging.INFO) + if FLAGS.translation: + if FLAGS.translations_dir: + raise ValueError('Cannot specify both --translation and --translations_dir.') + if FLAGS.bleu_variant in ('uncased', 'both'): + bleu = 100 * bleu_hook.bleu_wrapper(FLAGS.reference, FLAGS.translation, case_sensitive=False) + print("BLEU_uncased = %6.2f" % bleu) + if FLAGS.bleu_variant in ('cased', 'both'): + bleu = 100 * bleu_hook.bleu_wrapper(FLAGS.reference, FLAGS.translation, case_sensitive=True) + print("BLEU_cased = %6.2f" % bleu) + return + + if not FLAGS.translations_dir: + raise ValueError('Either --translation or --translations_dir must be specified.') + transl_dir = os.path.expanduser(FLAGS.translations_dir) + + last_step_file = os.path.join(FLAGS.event_dir, 'last_evaluated_step.txt') + if FLAGS.min_steps == -1: + try: + with open(last_step_file) as ls_file: + FLAGS.min_steps = int(ls_file.read()) + except FileNotFoundError: + FLAGS.min_steps = 0 + if FLAGS.report_zero is None: + FLAGS.report_zero = FLAGS.min_steps == 0 + + writer = tf.summary.FileWriter(FLAGS.event_dir) + for transl_file in bleu_hook.stepfiles_iterator(transl_dir, FLAGS.wait_minutes, + FLAGS.min_steps, path_suffix=''): + # report_zero handling must be inside the for-loop, + # so we are sure the transl_dir is already created. + if FLAGS.report_zero: + all_files = (os.path.join(transl_dir, f) for f in os.listdir(transl_dir)) + start_time = min(os.path.getmtime(f) for f in all_files if os.path.isfile(f)) + values = [] + if FLAGS.bleu_variant in ('uncased', 'both'): + values.append(tf.Summary.Value(tag='BLEU_uncased' + FLAGS.tag_suffix, simple_value=0)) + if FLAGS.bleu_variant in ('cased', 'both'): + values.append(tf.Summary.Value(tag='BLEU_cased' + FLAGS.tag_suffix, simple_value=0)) + writer.add_event(tf.summary.Event(summary=tf.Summary(value=values), + wall_time=start_time, step=0)) + FLAGS.report_zero = False + + filename = transl_file.filename + tf.logging.info("Evaluating " + filename) + values = [] + if FLAGS.bleu_variant in ('uncased', 'both'): + bleu = 100 * bleu_hook.bleu_wrapper(FLAGS.reference, filename, case_sensitive=False) + values.append(tf.Summary.Value(tag='BLEU_uncased' + FLAGS.tag_suffix, simple_value=bleu)) + tf.logging.info("%s: BLEU_uncased = %6.2f" % (filename, bleu)) + if FLAGS.bleu_variant in ('cased', 'both'): + bleu = 100 * bleu_hook.bleu_wrapper(FLAGS.reference, filename, case_sensitive=True) + values.append(tf.Summary.Value(tag='BLEU_cased' + FLAGS.tag_suffix, simple_value=bleu)) + tf.logging.info("%s: BLEU_cased = %6.2f" % (transl_file.filename, bleu)) + writer.add_event(tf.summary.Event(summary=tf.Summary(value=values), + wall_time=transl_file.mtime, step=transl_file.steps)) + writer.flush() + with open(last_step_file, 'w') as ls_file: + ls_file.write(str(transl_file.steps) + '\n') + + +if __name__ == "__main__": + tf.app.run() diff --git a/tensor2tensor/bin/t2t-translate-all b/tensor2tensor/bin/t2t-translate-all new file mode 100755 index 000000000..1ee7e535f --- /dev/null +++ b/tensor2tensor/bin/t2t-translate-all @@ -0,0 +1,91 @@ +#!/usr/bin/env python +# coding=utf-8 +# Copyright 2017 The Tensor2Tensor Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Translate a file with all checkpoints in a given directory. + +t2t-decoder will be executed with these parameters: +--problems +--data_dir +--output_dir with the value of --model_dir +--decode_from_file with the value of --source +--decode_hparams with properly formated --beam_size and --alpha +--checkpoint_path automatically filled +--decode_to_file automatically filled +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +import os +import shutil +import tensorflow as tf +from tensor2tensor.utils import bleu_hook + + +flags = tf.flags + +# t2t-translate-all specific options +flags.DEFINE_string("decoder_command", "t2t-decoder {params}", + "Which command to execute instead t2t-decoder." + "{params} is replaced by the parameters. Useful e.g. for qsub wrapper.") +flags.DEFINE_string("model_dir", "", "Directory to load model checkpoints from.") +flags.DEFINE_string("source", None, "Path to the source-language file to be translated") +flags.DEFINE_string("translations_dir", "translations", "Where to store the translated files.") +flags.DEFINE_integer("min_steps", 0, "Ignore checkpoints with less steps.") +flags.DEFINE_integer("wait_minutes", 0, "Wait upto N minutes for a new checkpoint") + +# options derived from t2t-decoder +flags.DEFINE_integer("beam_size", 4, "Beam-search width.") +flags.DEFINE_float("alpha", 0.6, "Beam-search alpha.") +flags.DEFINE_string("model", "transformer", "see t2t-decoder") +flags.DEFINE_string("t2t_usr_dir", None, "see t2t-decoder") +flags.DEFINE_string("data_dir", None, "see t2t-decoder") +flags.DEFINE_string("problems", None, "see t2t-decoder") +flags.DEFINE_string("hparams_set", "transformer_big_single_gpu", "see t2t-decoder") + + +def main(_): + FLAGS = flags.FLAGS + tf.logging.set_verbosity(tf.logging.INFO) + model_dir = os.path.expanduser(FLAGS.model_dir) + translations_dir = os.path.expanduser(FLAGS.translations_dir) + source = os.path.expanduser(FLAGS.source) + os.makedirs(translations_dir, exist_ok=True) + translated_base_file = os.path.join(translations_dir, FLAGS.problems) + + # Copy flags.txt with the original time, so t2t-bleu can report correct relative time. + flags_path = os.path.join(translations_dir, FLAGS.problems + '-flags.txt') + if not os.path.exists(flags_path): + shutil.copy2(os.path.join(model_dir, 'flags.txt'), flags_path) + + for model in bleu_hook.stepfiles_iterator(model_dir, FLAGS.wait_minutes, FLAGS.min_steps): + tf.logging.info("Translating " + model.filename) + out_file = translated_base_file + '-' + str(model.steps) + if os.path.exists(out_file): + tf.logging.info(out_file + " already exists, so skipping it.") + else: + tf.logging.info("Translating " + out_file) + params = ("--t2t_usr_dir={FLAGS.t2t_usr_dir} --output_dir={model_dir} " + "--data_dir={FLAGS.data_dir} --problems={FLAGS.problems} " + "--decode_hparams=beam_size={FLAGS.beam_size},alpha={FLAGS.alpha} " + "--model={FLAGS.model} --hparams_set={FLAGS.hparams_set} " + "--checkpoint_path={model.filename} --decode_from_file={source} " + "--decode_to_file={out_file}".format(**locals())) + command = FLAGS.decoder_command.format(**locals()) + tf.logging.info("Running:\n" + command) + os.system(command) + +if __name__ == "__main__": + tf.app.run() diff --git a/tensor2tensor/utils/bleu_hook.py b/tensor2tensor/utils/bleu_hook.py index 49b31c1bb..3ca5070a8 100644 --- a/tensor2tensor/utils/bleu_hook.py +++ b/tensor2tensor/utils/bleu_hook.py @@ -20,9 +20,12 @@ import collections import math +import os import re import sys +import time import unicodedata +from collections import namedtuple # Dependency imports @@ -197,3 +200,68 @@ def bleu_wrapper(ref_filename, hyp_filename, case_sensitive=False): ref_tokens = [bleu_tokenize(x) for x in ref_lines] hyp_tokens = [bleu_tokenize(x) for x in hyp_lines] return compute_bleu(ref_tokens, hyp_tokens) + + +StepFile = namedtuple('StepFile', 'filename mtime ctime steps') + + +def _read_stepfiles_list(path_prefix, path_suffix='.index', min_steps=0): + stepfiles = [] + for filename in tf.gfile.Glob(path_prefix + '*-[0-9]*' + path_suffix): + basename = filename[:-len(path_suffix)] if len(path_suffix) else filename + try: + steps = int(basename.rsplit('-')[-1]) + except ValueError: # The -[0-9]* part is not an integer. + continue + if steps < min_steps: + continue + if not os.path.exists(filename): + tf.logging.info(filename + " was deleted, so skipping it") + continue + stepfiles.append(StepFile(basename, os.path.getmtime(filename), + os.path.getctime(filename), steps)) + return sorted(stepfiles, key=lambda x: -x.steps) + + +def stepfiles_iterator(path_prefix, wait_minutes=0, min_steps=0, + path_suffix='.index', sleep_sec=10): + """Continuously yield new files with steps in filename as they appear. + + This is useful for checkpoint files or other files whose names differ just in an interger + marking the number of steps and match the wildcard path_prefix + '*-[0-9]*' + path_suffix. + Unlike `tf.contrib.training.checkpoints_iterator`, this + implementation always starts from the oldest files + (and it cannot miss any file). Note that the oldest checkpoint + may be deleted anytime by Tensorflow (if set up so). It is up to the user + to check that the files returned by this generator actually exist. + Args: + path_prefix: The directory + possible common filename prefix to the files. + path_suffix: Common filename suffix (after steps), including possible extension dot. + wait_minutes: The maximum amount of minutes to wait between files. + min_steps: Skip files with lower global step. + sleep_sec: How often to check for new files. + Yields: + named tuples (filename, mtime, ctime, steps) of the files as they arrive. + """ + # Wildcard D*-[0-9]* does not match D/x-1, so if D is a directory let path_prefix='D/'. + if not path_prefix.endswith(os.sep) and os.path.isdir(path_prefix): + path_prefix += os.sep + stepfiles = _read_stepfiles_list(path_prefix, path_suffix, min_steps) + tf.logging.info("Found %d files with steps: %s" + % (len(stepfiles), ", ".join(str(x.steps) for x in reversed(stepfiles)))) + exit_time = time.time() + wait_minutes * 60 + while True: + if not stepfiles and wait_minutes: + tf.logging.info('Waiting till %s if a new file matching %s*-[0-9]*%s appears' + % (time.asctime(time.localtime(exit_time)), path_prefix, path_suffix)) + while True: + stepfiles = _read_stepfiles_list(path_prefix, path_suffix, min_steps) + if stepfiles or time.time() > exit_time: + break + time.sleep(sleep_sec) + if not stepfiles: + return + + stepfile = stepfiles.pop() + exit_time, min_steps = stepfile.ctime + wait_minutes * 60, stepfile.steps + 1 + yield stepfile From cc43389fabffc17f1cc35c9ad57d6bd23fccc563 Mon Sep 17 00:00:00 2001 From: Albert Zeyer Date: Fri, 12 Jan 2018 22:56:01 +0100 Subject: [PATCH 0688/4095] fix for shakeshake2_py with equal=True (#510) --- tensor2tensor/layers/common_layers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensor2tensor/layers/common_layers.py b/tensor2tensor/layers/common_layers.py index 640730864..0e305ef54 100644 --- a/tensor2tensor/layers/common_layers.py +++ b/tensor2tensor/layers/common_layers.py @@ -76,7 +76,7 @@ def shakeshake2_py(x, y, equal=False, individual=False): """The shake-shake sum of 2 tensors, python version.""" if equal: alpha = 0.5 - if individual: + elif individual: alpha = tf.random_uniform(tf.get_shape(x)[:1]) else: alpha = tf.random_uniform([]) From d9cba5ce295a35cbfed41a067234c5572cb76d2c Mon Sep 17 00:00:00 2001 From: Martin Popel Date: Sat, 13 Jan 2018 00:43:10 +0100 Subject: [PATCH 0689/4095] fix and test bleu_hook.bleu_tokenize (#514) * fix and test bleu_hook.bleu_tokenize * make the test work in Python2 --- tensor2tensor/utils/bleu_hook.py | 9 +++++---- tensor2tensor/utils/bleu_hook_test.py | 4 ++++ 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/tensor2tensor/utils/bleu_hook.py b/tensor2tensor/utils/bleu_hook.py index 3ca5070a8..50caf09bf 100644 --- a/tensor2tensor/utils/bleu_hook.py +++ b/tensor2tensor/utils/bleu_hook.py @@ -153,7 +153,7 @@ def __init__(self): def _property_chars(prefix): return ''.join(six.unichr(x) for x in range(sys.maxunicode) if unicodedata.category(six.unichr(x)).startswith(prefix)) - punctuation = self._property_chars('P') + punctuation = _property_chars('P') self.nondigit_punct_re = re.compile(r'([^\d])([' + punctuation + r'])') self.punct_nondigit_re = re.compile(r'([' + punctuation + r'])([^\d])') self.symbol_re = re.compile('([' + _property_chars('S') + '])') @@ -183,9 +183,10 @@ def bleu_tokenize(string): Returns: a list of tokens """ - string = UnicodeRegex.nondigit_punct_re.sub(r'\1 \2 ', string) - string = UnicodeRegex.punct_nondigit_re.sub(r' \1 \2', string) - string = UnicodeRegex.symbol_re.sub(r' \1 ', string) + uregex = UnicodeRegex() + string = uregex.nondigit_punct_re.sub(r'\1 \2 ', string) + string = uregex.punct_nondigit_re.sub(r' \1 \2', string) + string = uregex.symbol_re.sub(r' \1 ', string) return string.split() diff --git a/tensor2tensor/utils/bleu_hook_test.py b/tensor2tensor/utils/bleu_hook_test.py index e4f3a18a9..b616aaf7c 100644 --- a/tensor2tensor/utils/bleu_hook_test.py +++ b/tensor2tensor/utils/bleu_hook_test.py @@ -57,5 +57,9 @@ def testComputeMultipleNgrams(self): actual_bleu = 0.3436 self.assertAllClose(bleu, actual_bleu, atol=1e-03) + def testBleuTokenize(self): + self.assertEqual(bleu_hook.bleu_tokenize(u'hi, “there”'), [u'hi', u',', u'“', u'there', u'”']) + + if __name__ == '__main__': tf.test.main() From ad4ad2ca588a04d7729c812fb0f9848f5d25796b Mon Sep 17 00:00:00 2001 From: T2T Team Date: Mon, 25 Dec 2017 10:02:27 -0800 Subject: [PATCH 0690/4095] Fix some issues with the VQ-VAE discretization bottleneck. PiperOrigin-RevId: 180097448 --- docs/new_problem.md | 31 +-- tensor2tensor/bin/t2t-avg-all | 106 --------- tensor2tensor/bin/t2t-bleu | 137 ----------- tensor2tensor/bin/t2t-datagen | 212 ------------------ tensor2tensor/bin/t2t-decoder | 110 --------- tensor2tensor/bin/t2t-make-tf-configs | 87 ------- tensor2tensor/bin/t2t-trainer | 191 ---------------- tensor2tensor/bin/t2t-translate-all | 91 -------- tensor2tensor/bin/t2t_trainer.py | 165 +------------- .../data_generators/generator_utils.py | 5 +- .../data_generators/translate_enzh.py | 189 ++-------------- tensor2tensor/layers/common_layers.py | 2 +- tensor2tensor/models/transformer_vae.py | 26 ++- tensor2tensor/utils/bleu_hook.py | 77 +------ tensor2tensor/utils/bleu_hook_test.py | 4 - 15 files changed, 50 insertions(+), 1383 deletions(-) delete mode 100755 tensor2tensor/bin/t2t-avg-all delete mode 100755 tensor2tensor/bin/t2t-bleu delete mode 100644 tensor2tensor/bin/t2t-datagen delete mode 100644 tensor2tensor/bin/t2t-decoder delete mode 100644 tensor2tensor/bin/t2t-make-tf-configs delete mode 100644 tensor2tensor/bin/t2t-trainer delete mode 100755 tensor2tensor/bin/t2t-translate-all diff --git a/docs/new_problem.md b/docs/new_problem.md index fd5f9d625..48976a61b 100644 --- a/docs/new_problem.md +++ b/docs/new_problem.md @@ -184,7 +184,7 @@ import os from tensor2tensor.data_generators import problem from tensor2tensor.data_generators import text_encoder -from tensor2tensor.data_generators.translate import character_generator +from tensor2tensor.data_generators.wmt import character_generator from tensor2tensor.utils import registry @@ -240,40 +240,16 @@ All hyperparamters inherit from `_default_hparams()` in `problem.py.` If you wou from tensor2tensor.models import transformer @registry.register_hparams -def word2def_hparams(): +def word2def_hparams(self): hparams = transformer.transformer_base_single_gpu() # Or whatever you'd like to build off. hparams.batch_size = 1024 return hparams ``` -# Test the data generation - -You can test data generation of your a problem in your own project with: - -```bash -PROBLEM=word2def -DATA_DIR=$HOME/t2t_data -TMP_DIR=/tmp/t2t_datagen -mkdir -p $DATA_DIR $TMP_DIR - -t2t-datagen \ - --t2t_usr_dir=$PATH_TO_YOUR_PROBLEM_DIR \ - --data_dir=$DATA_DIR \ - --tmp_dir=$TMP_DIR \ - --problem=$PROBLEM -``` - -Where: -* `PROBLEM` is the name of the class that was registered with `@registry.register_problem()`, but converted from `CamelCase` to `snake_case`. -* `PATH_TO_YOUR_PROBLEM_DIR` is a path to the directory of your python problem file. - -If you plan to contribute to the tensor2tensor repository, you can install the local cloned version in developer mode with `pip install -e .` from the tensor2tensor directory. You can also add your new problem file to [`all_problems.py`](https://github.com/tensorflow/tensor2tensor/blob/master/tensor2tensor/data_generators/all_problems.py). - # Run the problem Now that we've gotten our problem set up, let's train a model and generate definitions. -To train, specify the problem name, the model, and hparams: - +We specify our problem name, the model, and hparams. ```bash PROBLEM=word2def MODEL=transformer @@ -282,6 +258,7 @@ HPARAMS=word2def_hparams The rest of the steps are as given in the [walkthrough](walkthrough.md). + What if we wanted to train a model to generate words given definitions? In T2T, we can change the problem name to be `PROBLEM=word2def_rev`. All done. Let us know what definitions your model generated. diff --git a/tensor2tensor/bin/t2t-avg-all b/tensor2tensor/bin/t2t-avg-all deleted file mode 100755 index 3b4d6211d..000000000 --- a/tensor2tensor/bin/t2t-avg-all +++ /dev/null @@ -1,106 +0,0 @@ -#!/usr/bin/env python -# coding=utf-8 -# Copyright 2017 The Tensor2Tensor Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Script to continously average last N checkpoints in a given directory.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -import logging - -# Dependency imports - -import numpy as np -import six -from six.moves import zip # pylint: disable=redefined-builtin -from collections import deque -import shutil -import tensorflow as tf -from tensor2tensor.utils import bleu_hook - -flags = tf.flags -FLAGS = flags.FLAGS - -flags.DEFINE_string("model_dir", "", "Directory to load model checkpoints from.") -flags.DEFINE_string("output_dir", "avg/", "Directory to output the averaged checkpoints to.") -flags.DEFINE_integer("n", 8, "How many checkpoints should be averaged?") -flags.DEFINE_integer("min_steps", 0, "Ignore checkpoints with less steps.") -flags.DEFINE_integer("wait_minutes", 0, "Wait upto N minutes for a new checkpoint.") - - -def main(_): - tf.logging._handler.setFormatter(logging.Formatter("%(asctime)s:" + logging.BASIC_FORMAT, None)) - tf.logging.set_verbosity(tf.logging.INFO) - - model_dir = os.path.expanduser(FLAGS.model_dir) - output_dir = os.path.expanduser(FLAGS.output_dir) - out_base_file = os.path.join(output_dir, 'model.ckpt') - - # Copy flags.txt with the original time, so t2t-bleu can report correct relative time. - os.makedirs(FLAGS.output_dir, exist_ok=True) - if not os.path.exists(os.path.join(output_dir, 'flags.txt')): - shutil.copy2(os.path.join(model_dir, 'flags.txt'), os.path.join(output_dir, 'flags.txt')) - - models_processed = 0 - queue = deque() - for model in bleu_hook.stepfiles_iterator(model_dir, FLAGS.wait_minutes, FLAGS.min_steps): - if models_processed == 0: - var_list = tf.contrib.framework.list_variables(model.filename) - avg_values = {} - for (name, shape) in var_list: - if not name.startswith("global_step"): - avg_values[name] = np.zeros(shape) - models_processed += 1 - - tf.logging.info("Loading [%d]: %s" % (models_processed, model.filename)) - reader = tf.contrib.framework.load_checkpoint(model.filename) - for name in avg_values: - avg_values[name] += reader.get_tensor(name) / FLAGS.n - queue.append(model) - if len(queue) < FLAGS.n: - continue - - out_file = "%s-%d" % (out_base_file, model.steps) - tf_vars = [] - tf.logging.info("Averaging %s" % (out_file)) - for (name, value) in six.iteritems(avg_values): - tf_vars.append(tf.get_variable(name, shape=value.shape)) # TODO , dtype=var_dtypes[name] - placeholders = [tf.placeholder(v.dtype, shape=v.shape) for v in tf_vars] - assign_ops = [tf.assign(v, p) for (v, p) in zip(tf_vars, placeholders)] - - global_step = tf.Variable(model.steps, name="global_step", trainable=False, dtype=tf.int64) - saver = tf.train.Saver(tf.global_variables()) - - tf.logging.info("Running session for %s" % (out_file)) - with tf.Session() as sess: - sess.run(tf.global_variables_initializer()) - for p, assign_op, (name, value) in zip(placeholders, assign_ops, six.iteritems(avg_values)): - sess.run(assign_op, {p: value}) - tf.logging.info("Storing to %s" % out_file) - saver.save(sess, out_base_file, global_step=global_step) - os.utime(out_file + '.index', (model.mtime, model.mtime)) - - tf.reset_default_graph() - first_model = queue.popleft() - - reader = tf.contrib.framework.load_checkpoint(first_model.filename) - for name in avg_values: - avg_values[name] -= reader.get_tensor(name) / FLAGS.n - - -if __name__ == "__main__": - tf.app.run() diff --git a/tensor2tensor/bin/t2t-bleu b/tensor2tensor/bin/t2t-bleu deleted file mode 100755 index cac2b9fc3..000000000 --- a/tensor2tensor/bin/t2t-bleu +++ /dev/null @@ -1,137 +0,0 @@ -#!/usr/bin/env python -# coding=utf-8 -# Copyright 2017 The Tensor2Tensor Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Evaluate BLEU score for all checkpoints/translations in a given directory. - -This script can be used in two ways. - -To evaluate one already translated file: -`t2t-bleu --translation=my-wmt13.de --reference=wmt13_deen.de` - -To evaluate all translations in a given directory (translated by t2t-translate-all): -`t2t-bleu - --translations_dir=my-translations - --reference=wmt13_deen.de - --event_dir=events` - -In addition to the above-mentioned compulsory parameters, -there are optional parameters: - - * bleu_variant: cased (case-sensitive), uncased, both (default). - * tag_suffix: Default="", so the tags will be BLEU_cased and BLEU_uncased. tag_suffix - can be used e.g. for different beam sizes if these should be plotted in different graphs. - * min_steps: Don't evaluate checkpoints with less steps. - Default=-1 means check the `last_evaluated_step.txt` file, which contains the number of steps - of the last successfully evaluated checkpoint. - * report_zero: Store BLEU=0 and guess its time based on the oldest file in the translations_dir. - Default=True. This is useful, so TensorBoard reports correct relative time for the remaining - checkpoints. This flag is set to False if min_steps is > 0. - * wait_minutes: Wait upto N minutes for a new translated file. Default=0. - This is useful for continuous evaluation of a running training, - in which case this should be equal to save_checkpoints_secs/60 plus time needed for translation - plus some reserve. -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -import os -from tensor2tensor.utils import bleu_hook -import tensorflow as tf - - -flags = tf.flags -FLAGS = flags.FLAGS - -flags.DEFINE_string("source", None, "Path to the source-language file to be translated") -flags.DEFINE_string("reference", None, "Path to the reference translation file") -flags.DEFINE_string("translation", None, "Path to the MT system translation file") -flags.DEFINE_string("translations_dir", None, "Directory with translated files to be evaulated.") -flags.DEFINE_string("event_dir", None, "Where to store the event file.") - -flags.DEFINE_string("bleu_variant", "both", - "Possible values: cased(case-sensitive), uncased, both(default).") -flags.DEFINE_string("tag_suffix", "", - "What to add to BLEU_cased and BLEU_uncased tags. Default=''.") -flags.DEFINE_integer("min_steps", -1, "Don't evaluate checkpoints with less steps.") -flags.DEFINE_integer("wait_minutes", 0, - "Wait upto N minutes for a new checkpoint, cf. save_checkpoints_secs.") -flags.DEFINE_bool("report_zero", None, "Store BLEU=0 and guess its time based on the oldest file.") - - -def main(_): - tf.logging.set_verbosity(tf.logging.INFO) - if FLAGS.translation: - if FLAGS.translations_dir: - raise ValueError('Cannot specify both --translation and --translations_dir.') - if FLAGS.bleu_variant in ('uncased', 'both'): - bleu = 100 * bleu_hook.bleu_wrapper(FLAGS.reference, FLAGS.translation, case_sensitive=False) - print("BLEU_uncased = %6.2f" % bleu) - if FLAGS.bleu_variant in ('cased', 'both'): - bleu = 100 * bleu_hook.bleu_wrapper(FLAGS.reference, FLAGS.translation, case_sensitive=True) - print("BLEU_cased = %6.2f" % bleu) - return - - if not FLAGS.translations_dir: - raise ValueError('Either --translation or --translations_dir must be specified.') - transl_dir = os.path.expanduser(FLAGS.translations_dir) - - last_step_file = os.path.join(FLAGS.event_dir, 'last_evaluated_step.txt') - if FLAGS.min_steps == -1: - try: - with open(last_step_file) as ls_file: - FLAGS.min_steps = int(ls_file.read()) - except FileNotFoundError: - FLAGS.min_steps = 0 - if FLAGS.report_zero is None: - FLAGS.report_zero = FLAGS.min_steps == 0 - - writer = tf.summary.FileWriter(FLAGS.event_dir) - for transl_file in bleu_hook.stepfiles_iterator(transl_dir, FLAGS.wait_minutes, - FLAGS.min_steps, path_suffix=''): - # report_zero handling must be inside the for-loop, - # so we are sure the transl_dir is already created. - if FLAGS.report_zero: - all_files = (os.path.join(transl_dir, f) for f in os.listdir(transl_dir)) - start_time = min(os.path.getmtime(f) for f in all_files if os.path.isfile(f)) - values = [] - if FLAGS.bleu_variant in ('uncased', 'both'): - values.append(tf.Summary.Value(tag='BLEU_uncased' + FLAGS.tag_suffix, simple_value=0)) - if FLAGS.bleu_variant in ('cased', 'both'): - values.append(tf.Summary.Value(tag='BLEU_cased' + FLAGS.tag_suffix, simple_value=0)) - writer.add_event(tf.summary.Event(summary=tf.Summary(value=values), - wall_time=start_time, step=0)) - FLAGS.report_zero = False - - filename = transl_file.filename - tf.logging.info("Evaluating " + filename) - values = [] - if FLAGS.bleu_variant in ('uncased', 'both'): - bleu = 100 * bleu_hook.bleu_wrapper(FLAGS.reference, filename, case_sensitive=False) - values.append(tf.Summary.Value(tag='BLEU_uncased' + FLAGS.tag_suffix, simple_value=bleu)) - tf.logging.info("%s: BLEU_uncased = %6.2f" % (filename, bleu)) - if FLAGS.bleu_variant in ('cased', 'both'): - bleu = 100 * bleu_hook.bleu_wrapper(FLAGS.reference, filename, case_sensitive=True) - values.append(tf.Summary.Value(tag='BLEU_cased' + FLAGS.tag_suffix, simple_value=bleu)) - tf.logging.info("%s: BLEU_cased = %6.2f" % (transl_file.filename, bleu)) - writer.add_event(tf.summary.Event(summary=tf.Summary(value=values), - wall_time=transl_file.mtime, step=transl_file.steps)) - writer.flush() - with open(last_step_file, 'w') as ls_file: - ls_file.write(str(transl_file.steps) + '\n') - - -if __name__ == "__main__": - tf.app.run() diff --git a/tensor2tensor/bin/t2t-datagen b/tensor2tensor/bin/t2t-datagen deleted file mode 100644 index 2ac0f0db2..000000000 --- a/tensor2tensor/bin/t2t-datagen +++ /dev/null @@ -1,212 +0,0 @@ -#!/usr/bin/env python -# coding=utf-8 -# Copyright 2017 The Tensor2Tensor Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Produces the training and dev data for --problem into --data_dir. - -Produces sharded and shuffled TFRecord files of tensorflow.Example protocol -buffers for a variety of registered datasets. - -All Problems are registered with @registry.register_problem or are in -_SUPPORTED_PROBLEM_GENERATORS in this file. Each entry maps a string name -(selectable on the command-line with --problem) to a function that takes 2 -arguments - input_directory and mode (one of "train" or "dev") - and yields for -each training example a dictionary mapping string feature names to lists of -{string, int, float}. The generator will be run once for each mode. -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -import random -import tempfile - -# Dependency imports - -import numpy as np - -from tensor2tensor.data_generators import algorithmic_math -from tensor2tensor.data_generators import all_problems # pylint: disable=unused-import -from tensor2tensor.data_generators import audio -from tensor2tensor.data_generators import generator_utils -from tensor2tensor.data_generators import snli -from tensor2tensor.data_generators import wsj_parsing -from tensor2tensor.utils import registry -from tensor2tensor.utils import usr_dir - -import tensorflow as tf - -flags = tf.flags -FLAGS = flags.FLAGS - -flags.DEFINE_string("data_dir", "", "Data directory.") -flags.DEFINE_string("tmp_dir", "/tmp/t2t_datagen", - "Temporary storage directory.") -flags.DEFINE_string("problem", "", - "The name of the problem to generate data for.") -flags.DEFINE_string("exclude_problems", "", - "Comma-separates list of problems to exclude.") -flags.DEFINE_integer("num_shards", 0, "How many shards to use. Ignored for " - "registered Problems.") -flags.DEFINE_integer("max_cases", 0, - "Maximum number of cases to generate (unbounded if 0).") -flags.DEFINE_bool("only_list", False, - "If true, we only list the problems that will be generated.") -flags.DEFINE_integer("random_seed", 429459, "Random seed to use.") -flags.DEFINE_integer("task_id", -1, "For distributed data generation.") -flags.DEFINE_string("t2t_usr_dir", "", - "Path to a Python module that will be imported. The " - "__init__.py file should include the necessary imports. " - "The imported files should contain registrations, " - "e.g. @registry.register_problem calls, that will then be " - "available to t2t-datagen.") - -# Mapping from problems that we can generate data for to their generators. -# pylint: disable=g-long-lambda -_SUPPORTED_PROBLEM_GENERATORS = { - "algorithmic_algebra_inverse": ( - lambda: algorithmic_math.algebra_inverse(26, 0, 2, 100000), - lambda: algorithmic_math.algebra_inverse(26, 3, 3, 10000)), - "parsing_english_ptb8k": ( - lambda: wsj_parsing.parsing_token_generator( - FLAGS.data_dir, FLAGS.tmp_dir, True, 2**13, 2**9), - lambda: wsj_parsing.parsing_token_generator( - FLAGS.data_dir, FLAGS.tmp_dir, False, 2**13, 2**9)), - "parsing_english_ptb16k": ( - lambda: wsj_parsing.parsing_token_generator( - FLAGS.data_dir, FLAGS.tmp_dir, True, 2**14, 2**9), - lambda: wsj_parsing.parsing_token_generator( - FLAGS.data_dir, FLAGS.tmp_dir, False, 2**14, 2**9)), - "inference_snli32k": ( - lambda: snli.snli_token_generator(FLAGS.tmp_dir, True, 2**15), - lambda: snli.snli_token_generator(FLAGS.tmp_dir, False, 2**15), - ), - "audio_timit_characters_test": ( - lambda: audio.timit_generator( - FLAGS.data_dir, FLAGS.tmp_dir, True, 1718), - lambda: audio.timit_generator( - FLAGS.data_dir, FLAGS.tmp_dir, False, 626)), - "audio_timit_tokens_8k_test": ( - lambda: audio.timit_generator( - FLAGS.data_dir, FLAGS.tmp_dir, True, 1718, - vocab_filename="vocab.endefr.%d" % 2**13, vocab_size=2**13), - lambda: audio.timit_generator( - FLAGS.data_dir, FLAGS.tmp_dir, False, 626, - vocab_filename="vocab.endefr.%d" % 2**13, vocab_size=2**13)), - "audio_timit_tokens_32k_test": ( - lambda: audio.timit_generator( - FLAGS.data_dir, FLAGS.tmp_dir, True, 1718, - vocab_filename="vocab.endefr.%d" % 2**15, vocab_size=2**15), - lambda: audio.timit_generator( - FLAGS.data_dir, FLAGS.tmp_dir, False, 626, - vocab_filename="vocab.endefr.%d" % 2**15, vocab_size=2**15)), -} - -# pylint: enable=g-long-lambda - - -def set_random_seed(): - """Set the random seed from flag everywhere.""" - tf.set_random_seed(FLAGS.random_seed) - random.seed(FLAGS.random_seed) - np.random.seed(FLAGS.random_seed) - - -def main(_): - tf.logging.set_verbosity(tf.logging.INFO) - usr_dir.import_usr_dir(FLAGS.t2t_usr_dir) - - # Calculate the list of problems to generate. - problems = sorted( - list(_SUPPORTED_PROBLEM_GENERATORS) + registry.list_problems()) - for exclude in FLAGS.exclude_problems.split(","): - if exclude: - problems = [p for p in problems if exclude not in p] - if FLAGS.problem and FLAGS.problem[-1] == "*": - problems = [p for p in problems if p.startswith(FLAGS.problem[:-1])] - elif FLAGS.problem: - problems = [p for p in problems if p == FLAGS.problem] - else: - problems = [] - - # Remove TIMIT if paths are not given. - if not FLAGS.timit_paths: - problems = [p for p in problems if "timit" not in p] - # Remove parsing if paths are not given. - if not FLAGS.parsing_path: - problems = [p for p in problems if "parsing" not in p] - - if not problems: - problems_str = "\n * ".join( - sorted(list(_SUPPORTED_PROBLEM_GENERATORS) + registry.list_problems())) - error_msg = ("You must specify one of the supported problems to " - "generate data for:\n * " + problems_str + "\n") - error_msg += ("TIMIT and parsing need data_sets specified with " - "--timit_paths and --parsing_path.") - raise ValueError(error_msg) - - if not FLAGS.data_dir: - FLAGS.data_dir = tempfile.gettempdir() - tf.logging.warning("It is strongly recommended to specify --data_dir. " - "Data will be written to default data_dir=%s.", - FLAGS.data_dir) - - tf.logging.info("Generating problems:\n%s" - % registry.display_list_by_prefix(problems, - starting_spaces=4)) - if FLAGS.only_list: - return - for problem in problems: - set_random_seed() - - if problem in _SUPPORTED_PROBLEM_GENERATORS: - generate_data_for_problem(problem) - else: - generate_data_for_registered_problem(problem) - - -def generate_data_for_problem(problem): - """Generate data for a problem in _SUPPORTED_PROBLEM_GENERATORS.""" - training_gen, dev_gen = _SUPPORTED_PROBLEM_GENERATORS[problem] - - num_shards = FLAGS.num_shards or 10 - tf.logging.info("Generating training data for %s.", problem) - train_output_files = generator_utils.train_data_filenames( - problem + generator_utils.UNSHUFFLED_SUFFIX, FLAGS.data_dir, num_shards) - generator_utils.generate_files(training_gen(), train_output_files, - FLAGS.max_cases) - tf.logging.info("Generating development data for %s.", problem) - dev_output_files = generator_utils.dev_data_filenames( - problem + generator_utils.UNSHUFFLED_SUFFIX, FLAGS.data_dir, 1) - generator_utils.generate_files(dev_gen(), dev_output_files) - all_output_files = train_output_files + dev_output_files - generator_utils.shuffle_dataset(all_output_files) - - -def generate_data_for_registered_problem(problem_name): - tf.logging.info("Generating data for %s.", problem_name) - if FLAGS.num_shards: - raise ValueError("--num_shards should not be set for registered Problem.") - problem = registry.problem(problem_name) - task_id = None if FLAGS.task_id < 0 else FLAGS.task_id - problem.generate_data( - os.path.expanduser(FLAGS.data_dir), - os.path.expanduser(FLAGS.tmp_dir), - task_id=task_id) - - -if __name__ == "__main__": - tf.app.run() diff --git a/tensor2tensor/bin/t2t-decoder b/tensor2tensor/bin/t2t-decoder deleted file mode 100644 index f453b01fd..000000000 --- a/tensor2tensor/bin/t2t-decoder +++ /dev/null @@ -1,110 +0,0 @@ -#!/usr/bin/env python -# coding=utf-8 -# Copyright 2017 The Tensor2Tensor Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -r"""Decode from trained T2T models. - -This binary performs inference using the Estimator API. - -Example usage to decode from dataset: - - t2t-decoder \ - --data_dir ~/data \ - --problems=algorithmic_identity_binary40 \ - --model=transformer - --hparams_set=transformer_base - -Set FLAGS.decode_interactive or FLAGS.decode_from_file for alternative decode -sources. -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os - -# Dependency imports - -from tensor2tensor.tpu import tpu_trainer -from tensor2tensor.tpu import tpu_trainer_lib -from tensor2tensor.utils import decoding -from tensor2tensor.utils import usr_dir - -import tensorflow as tf - -flags = tf.flags -FLAGS = flags.FLAGS - -# Additional flags in tpu/tpu_trainer.py and utils/flags.py -flags.DEFINE_string("decode_from_file", None, - "Path to the source file for decoding") -flags.DEFINE_string("decode_to_file", None, - "Path to the decoded (output) file") -flags.DEFINE_bool("decode_interactive", False, - "Interactive local inference mode.") -flags.DEFINE_integer("decode_shards", 1, "Number of decoding replicas.") - - -def create_hparams(): - return tpu_trainer_lib.create_hparams( - FLAGS.hparams_set, - FLAGS.hparams, - data_dir=os.path.expanduser(FLAGS.data_dir), - problem_name=FLAGS.problems) - - -def create_decode_hparams(): - decode_hp = decoding.decode_hparams(FLAGS.decode_hparams) - decode_hp.add_hparam("shards", FLAGS.decode_shards) - decode_hp.add_hparam("shard_id", FLAGS.worker_id) - return decode_hp - - -def decode(estimator, hparams, decode_hp): - if FLAGS.decode_interactive: - decoding.decode_interactively(estimator, hparams, decode_hp) - elif FLAGS.decode_from_file: - decoding.decode_from_file(estimator, FLAGS.decode_from_file, hparams, - decode_hp, FLAGS.decode_to_file) - else: - decoding.decode_from_dataset( - estimator, - FLAGS.problems.split("-"), - hparams, - decode_hp, - decode_to_file=FLAGS.decode_to_file, - dataset_split="test" if FLAGS.eval_use_test_set else None) - - -def main(_): - tf.logging.set_verbosity(tf.logging.INFO) - usr_dir.import_usr_dir(FLAGS.t2t_usr_dir) - FLAGS.use_tpu = False # decoding not supported on TPU - - hp = create_hparams() - decode_hp = create_decode_hparams() - - estimator = tpu_trainer_lib.create_estimator( - FLAGS.model, - hp, - tpu_trainer.create_run_config(hp), - decode_hparams=decode_hp, - use_tpu=False) - - decode(estimator, hp, decode_hp) - - -if __name__ == "__main__": - tf.app.run() diff --git a/tensor2tensor/bin/t2t-make-tf-configs b/tensor2tensor/bin/t2t-make-tf-configs deleted file mode 100644 index 0b656aba6..000000000 --- a/tensor2tensor/bin/t2t-make-tf-configs +++ /dev/null @@ -1,87 +0,0 @@ -#!/usr/bin/env python -# coding=utf-8 -# Copyright 2017 The Tensor2Tensor Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Output command line arguments and json-encoded TF_CONFIGs. - -Usage: - -`t2t-make-tf-configs --masters="server1:1234" --ps="server3:2134,server4:2334"` - -Outputs 1 line per job to stdout, first the masters, then the parameter servers. -Each line has the TF_CONFIG, then a tab, then the command line flags for that -job. - -If there is a single master, it will have the `--sync` flag. -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import json - -# Dependency imports - -import tensorflow as tf - -flags = tf.flags -FLAGS = flags.FLAGS - -flags.DEFINE_string("masters", "", "Comma-separated list of master addresses") -flags.DEFINE_string("ps", "", "Comma-separated list of ps addresses") - - -def main(_): - if not (FLAGS.masters and FLAGS.ps): - raise ValueError("Must provide --masters and --ps") - - masters = FLAGS.masters.split(",") - ps = FLAGS.ps.split(",") - - cluster = {"ps": ps, "master": masters} - - for task_type, jobs in (("master", masters), ("ps", ps)): - for idx, job in enumerate(jobs): - if task_type == "master": - cmd_line_flags = " ".join([ - "--master=grpc://%s" % job, - "--ps_replicas=%d" % len(ps), - "--worker_replicas=%d" % len(masters), - "--worker_gpu=1", - "--worker_id=%d" % idx, - "--worker_job='/job:master'", - "--ps_gpu=1", - "--schedule=train", - "--sync" if len(masters) == 1 else "", - ]) - else: - cmd_line_flags = " ".join([ - "--master=grpc://%s" % job, - "--schedule=run_std_server", - ]) - - tf_config = json.dumps({ - "cluster": cluster, - "task": { - "type": task_type, - "index": idx - }, - "environment": "cloud", - }) - print("'%s'\t%s" % (tf_config, cmd_line_flags)) - - -if __name__ == "__main__": - tf.app.run() diff --git a/tensor2tensor/bin/t2t-trainer b/tensor2tensor/bin/t2t-trainer deleted file mode 100644 index 70435094a..000000000 --- a/tensor2tensor/bin/t2t-trainer +++ /dev/null @@ -1,191 +0,0 @@ -#!/usr/bin/env python -# coding=utf-8 -# Copyright 2017 The Tensor2Tensor Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Train on TPU.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import contextlib -import os -import sys - -# Dependency imports - -from tensor2tensor import models # pylint: disable=unused-import -from tensor2tensor import problems as problems_lib # pylint: disable=unused-import -from tensor2tensor.tpu import tpu_trainer_lib -from tensor2tensor.utils import decoding -from tensor2tensor.utils import flags as t2t_flags # pylint: disable=unused-import -from tensor2tensor.utils import registry -from tensor2tensor.utils import usr_dir - -import tensorflow as tf - -flags = tf.flags -FLAGS = flags.FLAGS - -# See flags.py for additional command-line flags. -flags.DEFINE_string("t2t_usr_dir", "", - "Path to a Python module that will be imported. The " - "__init__.py file should include the necessary imports. " - "The imported files should contain registrations, " - "e.g. @registry.register_model calls, that will then be " - "available to the t2t-trainer.") -flags.DEFINE_integer("random_seed", 1234, "Random seed.") -flags.DEFINE_integer("tpu_num_shards", 8, "Number of tpu shards.") -flags.DEFINE_integer("iterations_per_loop", 1000, - "Number of iterations in a TPU training loop.") -flags.DEFINE_bool("use_tpu", False, "Whether to use TPU.") -flags.DEFINE_bool("generate_data", False, "Generate data before training?") -flags.DEFINE_string("tmp_dir", "/tmp/t2t_datagen", - "Temporary storage directory, used if --generate_data.") -flags.DEFINE_bool("profile", False, "Profile performance?") - -# To maintain compatibility with some internal libs, we guard against these flag -# definitions possibly erroring. Apologies for the ugliness. -try: - flags.DEFINE_string("master", "", "Address of TensorFlow master.") - flags.DEFINE_string("output_dir", "", "Base output directory for run.") - flags.DEFINE_string("schedule", "continuous_train_and_eval", - "Method of Experiment to run.") - flags.DEFINE_integer("eval_steps", 10000, - "Number of steps in evaluation. By default, eval will " - "stop after eval_steps or when it runs through the eval " - "dataset once in full, whichever comes first, so this " - "can be a very large number.") -except: # pylint: disable=bare-except - pass - - -def get_problem_name(): - problems = FLAGS.problems.split("-") - assert len(problems) == 1 - return problems[0] - - -def create_hparams(): - return tpu_trainer_lib.create_hparams(FLAGS.hparams_set, FLAGS.hparams) - - -def create_experiment_fn(): - return tpu_trainer_lib.create_experiment_fn( - model_name=FLAGS.model, - problem_name=get_problem_name(), - data_dir=os.path.expanduser(FLAGS.data_dir), - train_steps=FLAGS.train_steps, - eval_steps=FLAGS.eval_steps, - min_eval_frequency=FLAGS.local_eval_frequency, - schedule=FLAGS.schedule, - export=FLAGS.export_saved_model, - decode_hparams=decoding.decode_hparams(FLAGS.decode_hparams), - use_tfdbg=FLAGS.tfdbg, - use_dbgprofile=FLAGS.dbgprofile, - eval_early_stopping_steps=FLAGS.eval_early_stopping_steps, - eval_early_stopping_metric=FLAGS.eval_early_stopping_metric, - eval_early_stopping_metric_delta=FLAGS.eval_early_stopping_metric_delta, - eval_early_stopping_metric_minimize=FLAGS. - eval_early_stopping_metric_minimize, - use_tpu=FLAGS.use_tpu) - - -def create_run_config(hp): - return tpu_trainer_lib.create_run_config( - model_dir=os.path.expanduser(FLAGS.output_dir), - master=FLAGS.master, - iterations_per_loop=FLAGS.iterations_per_loop, - num_shards=FLAGS.tpu_num_shards, - log_device_placement=FLAGS.log_device_placement, - save_checkpoints_steps=max(FLAGS.iterations_per_loop, - FLAGS.local_eval_frequency), - keep_checkpoint_max=FLAGS.keep_checkpoint_max, - keep_checkpoint_every_n_hours=FLAGS.keep_checkpoint_every_n_hours, - num_gpus=FLAGS.worker_gpu, - gpu_order=FLAGS.gpu_order, - shard_to_cpu=FLAGS.locally_shard_to_cpu, - num_async_replicas=FLAGS.worker_replicas, - gpu_mem_fraction=FLAGS.worker_gpu_memory_fraction, - enable_graph_rewriter=FLAGS.experimental_optimize_placement, - use_tpu=FLAGS.use_tpu, - schedule=FLAGS.schedule, - no_data_parallelism=hp.no_data_parallelism, - daisy_chain_variables=hp.daisy_chain_variables, - ps_replicas=FLAGS.ps_replicas, - ps_job=FLAGS.ps_job, - ps_gpu=FLAGS.ps_gpu, - sync=FLAGS.sync, - worker_id=FLAGS.worker_id, - worker_job=FLAGS.worker_job) - - -def generate_data(): - # Generate data if requested. - data_dir = os.path.expanduser(FLAGS.data_dir) - tmp_dir = os.path.expanduser(FLAGS.tmp_dir) - tf.gfile.MakeDirs(data_dir) - tf.gfile.MakeDirs(tmp_dir) - - problem_name = get_problem_name() - tf.logging.info("Generating data for %s" % problem_name) - registry.problem(problem_name).generate_data(data_dir, tmp_dir) - - -@contextlib.contextmanager -def profile_context(): - if FLAGS.profile: - with tf.contrib.tfprof.ProfileContext("t2tprof", - trace_steps=range(100), - dump_steps=range(100)) as pctx: - opts = tf.profiler.ProfileOptionBuilder.time_and_memory() - pctx.add_auto_profiling("op", opts, range(100)) - yield - else: - yield - - -def log_registry(): - if FLAGS.registry_help: - tf.logging.info(registry.help_string()) - sys.exit(0) - - -def execute_schedule(exp): - if not hasattr(exp, FLAGS.schedule): - raise ValueError( - "Experiment has no method %s, from --schedule" % FLAGS.schedule) - with profile_context(): - getattr(exp, FLAGS.schedule)() - - -def main(_): - tf.logging.set_verbosity(tf.logging.INFO) - tpu_trainer_lib.set_random_seed(FLAGS.random_seed) - usr_dir.import_usr_dir(FLAGS.t2t_usr_dir) - log_registry() - - if FLAGS.generate_data: - generate_data() - - hparams = create_hparams() - run_config = create_run_config(hparams) - - exp_fn = create_experiment_fn() - exp = exp_fn(run_config, hparams) - execute_schedule(exp) - - -if __name__ == "__main__": - tf.app.run() diff --git a/tensor2tensor/bin/t2t-translate-all b/tensor2tensor/bin/t2t-translate-all deleted file mode 100755 index 1ee7e535f..000000000 --- a/tensor2tensor/bin/t2t-translate-all +++ /dev/null @@ -1,91 +0,0 @@ -#!/usr/bin/env python -# coding=utf-8 -# Copyright 2017 The Tensor2Tensor Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Translate a file with all checkpoints in a given directory. - -t2t-decoder will be executed with these parameters: ---problems ---data_dir ---output_dir with the value of --model_dir ---decode_from_file with the value of --source ---decode_hparams with properly formated --beam_size and --alpha ---checkpoint_path automatically filled ---decode_to_file automatically filled -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -import os -import shutil -import tensorflow as tf -from tensor2tensor.utils import bleu_hook - - -flags = tf.flags - -# t2t-translate-all specific options -flags.DEFINE_string("decoder_command", "t2t-decoder {params}", - "Which command to execute instead t2t-decoder." - "{params} is replaced by the parameters. Useful e.g. for qsub wrapper.") -flags.DEFINE_string("model_dir", "", "Directory to load model checkpoints from.") -flags.DEFINE_string("source", None, "Path to the source-language file to be translated") -flags.DEFINE_string("translations_dir", "translations", "Where to store the translated files.") -flags.DEFINE_integer("min_steps", 0, "Ignore checkpoints with less steps.") -flags.DEFINE_integer("wait_minutes", 0, "Wait upto N minutes for a new checkpoint") - -# options derived from t2t-decoder -flags.DEFINE_integer("beam_size", 4, "Beam-search width.") -flags.DEFINE_float("alpha", 0.6, "Beam-search alpha.") -flags.DEFINE_string("model", "transformer", "see t2t-decoder") -flags.DEFINE_string("t2t_usr_dir", None, "see t2t-decoder") -flags.DEFINE_string("data_dir", None, "see t2t-decoder") -flags.DEFINE_string("problems", None, "see t2t-decoder") -flags.DEFINE_string("hparams_set", "transformer_big_single_gpu", "see t2t-decoder") - - -def main(_): - FLAGS = flags.FLAGS - tf.logging.set_verbosity(tf.logging.INFO) - model_dir = os.path.expanduser(FLAGS.model_dir) - translations_dir = os.path.expanduser(FLAGS.translations_dir) - source = os.path.expanduser(FLAGS.source) - os.makedirs(translations_dir, exist_ok=True) - translated_base_file = os.path.join(translations_dir, FLAGS.problems) - - # Copy flags.txt with the original time, so t2t-bleu can report correct relative time. - flags_path = os.path.join(translations_dir, FLAGS.problems + '-flags.txt') - if not os.path.exists(flags_path): - shutil.copy2(os.path.join(model_dir, 'flags.txt'), flags_path) - - for model in bleu_hook.stepfiles_iterator(model_dir, FLAGS.wait_minutes, FLAGS.min_steps): - tf.logging.info("Translating " + model.filename) - out_file = translated_base_file + '-' + str(model.steps) - if os.path.exists(out_file): - tf.logging.info(out_file + " already exists, so skipping it.") - else: - tf.logging.info("Translating " + out_file) - params = ("--t2t_usr_dir={FLAGS.t2t_usr_dir} --output_dir={model_dir} " - "--data_dir={FLAGS.data_dir} --problems={FLAGS.problems} " - "--decode_hparams=beam_size={FLAGS.beam_size},alpha={FLAGS.alpha} " - "--model={FLAGS.model} --hparams_set={FLAGS.hparams_set} " - "--checkpoint_path={model.filename} --decode_from_file={source} " - "--decode_to_file={out_file}".format(**locals())) - command = FLAGS.decoder_command.format(**locals()) - tf.logging.info("Running:\n" + command) - os.system(command) - -if __name__ == "__main__": - tf.app.run() diff --git a/tensor2tensor/bin/t2t_trainer.py b/tensor2tensor/bin/t2t_trainer.py index 571a21839..99ec99b20 100644 --- a/tensor2tensor/bin/t2t_trainer.py +++ b/tensor2tensor/bin/t2t_trainer.py @@ -13,177 +13,20 @@ # See the License for the specific language governing permissions and # limitations under the License. -"""Train on TPU.""" +"""Trainer for T2T models. See tpu_trainer.py.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function -import contextlib -import os -import sys - # Dependency imports -from tensor2tensor import models # pylint: disable=unused-import -from tensor2tensor import problems as problems_lib # pylint: disable=unused-import -from tensor2tensor.tpu import tpu_trainer_lib -from tensor2tensor.utils import decoding -from tensor2tensor.utils import flags as t2t_flags # pylint: disable=unused-import -from tensor2tensor.utils import registry -from tensor2tensor.utils import usr_dir +from tensor2tensor.tpu import tpu_trainer import tensorflow as tf -flags = tf.flags -FLAGS = flags.FLAGS - -# See flags.py for additional command-line flags. -flags.DEFINE_string("t2t_usr_dir", "", - "Path to a Python module that will be imported. The " - "__init__.py file should include the necessary imports. " - "The imported files should contain registrations, " - "e.g. @registry.register_model calls, that will then be " - "available to the t2t-trainer.") -flags.DEFINE_integer("random_seed", 1234, "Random seed.") -flags.DEFINE_integer("tpu_num_shards", 8, "Number of tpu shards.") -flags.DEFINE_integer("iterations_per_loop", 1000, - "Number of iterations in a TPU training loop.") -flags.DEFINE_bool("use_tpu", False, "Whether to use TPU.") -flags.DEFINE_bool("generate_data", False, "Generate data before training?") -flags.DEFINE_string("tmp_dir", "/tmp/t2t_datagen", - "Temporary storage directory, used if --generate_data.") -flags.DEFINE_bool("profile", False, "Profile performance?") - -# To maintain compatibility with some internal libs, we guard against these flag -# definitions possibly erroring. Apologies for the ugliness. -try: - flags.DEFINE_string("master", "", "Address of TensorFlow master.") - flags.DEFINE_string("output_dir", "", "Base output directory for run.") - flags.DEFINE_string("schedule", "continuous_train_and_eval", - "Method of Experiment to run.") - flags.DEFINE_integer("eval_steps", 10000, - "Number of steps in evaluation. By default, eval will " - "stop after eval_steps or when it runs through the eval " - "dataset once in full, whichever comes first, so this " - "can be a very large number.") -except: # pylint: disable=bare-except - pass - - -def get_problem_name(): - problems = FLAGS.problems.split("-") - assert len(problems) == 1 - return problems[0] - - -def create_hparams(): - return tpu_trainer_lib.create_hparams(FLAGS.hparams_set, FLAGS.hparams) - - -def create_experiment_fn(): - return tpu_trainer_lib.create_experiment_fn( - model_name=FLAGS.model, - problem_name=get_problem_name(), - data_dir=os.path.expanduser(FLAGS.data_dir), - train_steps=FLAGS.train_steps, - eval_steps=FLAGS.eval_steps, - min_eval_frequency=FLAGS.local_eval_frequency, - schedule=FLAGS.schedule, - export=FLAGS.export_saved_model, - decode_hparams=decoding.decode_hparams(FLAGS.decode_hparams), - use_tfdbg=FLAGS.tfdbg, - use_dbgprofile=FLAGS.dbgprofile, - eval_early_stopping_steps=FLAGS.eval_early_stopping_steps, - eval_early_stopping_metric=FLAGS.eval_early_stopping_metric, - eval_early_stopping_metric_delta=FLAGS.eval_early_stopping_metric_delta, - eval_early_stopping_metric_minimize=FLAGS. - eval_early_stopping_metric_minimize, - use_tpu=FLAGS.use_tpu) - - -def create_run_config(hp): - return tpu_trainer_lib.create_run_config( - model_dir=os.path.expanduser(FLAGS.output_dir), - master=FLAGS.master, - iterations_per_loop=FLAGS.iterations_per_loop, - num_shards=FLAGS.tpu_num_shards, - log_device_placement=FLAGS.log_device_placement, - save_checkpoints_steps=max(FLAGS.iterations_per_loop, - FLAGS.local_eval_frequency), - keep_checkpoint_max=FLAGS.keep_checkpoint_max, - keep_checkpoint_every_n_hours=FLAGS.keep_checkpoint_every_n_hours, - num_gpus=FLAGS.worker_gpu, - gpu_order=FLAGS.gpu_order, - shard_to_cpu=FLAGS.locally_shard_to_cpu, - num_async_replicas=FLAGS.worker_replicas, - gpu_mem_fraction=FLAGS.worker_gpu_memory_fraction, - enable_graph_rewriter=FLAGS.experimental_optimize_placement, - use_tpu=FLAGS.use_tpu, - schedule=FLAGS.schedule, - no_data_parallelism=hp.no_data_parallelism, - daisy_chain_variables=hp.daisy_chain_variables, - ps_replicas=FLAGS.ps_replicas, - ps_job=FLAGS.ps_job, - ps_gpu=FLAGS.ps_gpu, - sync=FLAGS.sync, - worker_id=FLAGS.worker_id, - worker_job=FLAGS.worker_job) - - -def generate_data(): - # Generate data if requested. - data_dir = os.path.expanduser(FLAGS.data_dir) - tmp_dir = os.path.expanduser(FLAGS.tmp_dir) - tf.gfile.MakeDirs(data_dir) - tf.gfile.MakeDirs(tmp_dir) - - problem_name = get_problem_name() - tf.logging.info("Generating data for %s" % problem_name) - registry.problem(problem_name).generate_data(data_dir, tmp_dir) - - -@contextlib.contextmanager -def profile_context(): - if FLAGS.profile: - with tf.contrib.tfprof.ProfileContext("t2tprof", - trace_steps=range(100), - dump_steps=range(100)) as pctx: - opts = tf.profiler.ProfileOptionBuilder.time_and_memory() - pctx.add_auto_profiling("op", opts, range(100)) - yield - else: - yield - - -def log_registry(): - if FLAGS.registry_help: - tf.logging.info(registry.help_string()) - sys.exit(0) - - -def execute_schedule(exp): - if not hasattr(exp, FLAGS.schedule): - raise ValueError( - "Experiment has no method %s, from --schedule" % FLAGS.schedule) - with profile_context(): - getattr(exp, FLAGS.schedule)() - - -def main(_): - tf.logging.set_verbosity(tf.logging.INFO) - tpu_trainer_lib.set_random_seed(FLAGS.random_seed) - usr_dir.import_usr_dir(FLAGS.t2t_usr_dir) - log_registry() - - if FLAGS.generate_data: - generate_data() - - hparams = create_hparams() - run_config = create_run_config(hparams) - exp_fn = create_experiment_fn() - exp = exp_fn(run_config, hparams) - execute_schedule(exp) +def main(unused_argv): + tpu_trainer.main(unused_argv) if __name__ == "__main__": diff --git a/tensor2tensor/data_generators/generator_utils.py b/tensor2tensor/data_generators/generator_utils.py index c657a503f..236d43772 100644 --- a/tensor2tensor/data_generators/generator_utils.py +++ b/tensor2tensor/data_generators/generator_utils.py @@ -316,8 +316,7 @@ def get_or_generate_vocab_inner(data_dir, vocab_filename, vocab_size, def get_or_generate_vocab(data_dir, tmp_dir, vocab_filename, vocab_size, - sources, - _file_byte_budget=1e6): + sources): """Generate a vocabulary from the datasets in sources.""" def generate(): @@ -350,7 +349,7 @@ def generate(): # Use Tokenizer to count the word occurrences. with tf.gfile.GFile(filepath, mode="r") as source_file: - file_byte_budget = _file_byte_budget + file_byte_budget = 1e6 counter = 0 countermax = int(source_file.size() / file_byte_budget / 2) for line in source_file: diff --git a/tensor2tensor/data_generators/translate_enzh.py b/tensor2tensor/data_generators/translate_enzh.py index d3ddd8d98..52b364137 100644 --- a/tensor2tensor/data_generators/translate_enzh.py +++ b/tensor2tensor/data_generators/translate_enzh.py @@ -42,145 +42,28 @@ # This is far from being the real WMT17 task - only toyset here # you need to register to get UN data and CWT data. Also, by convention, # this is EN to ZH - use translate_enzh_wmt8k_rev for ZH to EN task -# -# News Commentary, around 220k lines -# This dataset is only a small fraction of full WMT17 task -_NC_TRAIN_DATASETS = [[ - "http://data.statmt.org/wmt17/translation-task/training-parallel-nc-v12.tgz", - ["training/news-commentary-v12.zh-en.en", - "training/news-commentary-v12.zh-en.zh"]]] +_ENZH_TRAIN_DATASETS = [[("http://data.statmt.org/wmt17/translation-task/" + "training-parallel-nc-v12.tgz"), + ("training/news-commentary-v12.zh-en.en", + "training/news-commentary-v12.zh-en.zh")]] -# Test set from News Commentary. 2000 lines -_NC_TEST_DATASETS = [[ +_ENZH_TEST_DATASETS = [[ "http://data.statmt.org/wmt17/translation-task/dev.tgz", ("dev/newsdev2017-enzh-src.en.sgm", "dev/newsdev2017-enzh-ref.zh.sgm") ]] -# UN parallel corpus. 15,886,041 lines -# Visit source website to download manually: -# https://conferences.unite.un.org/UNCorpus -# -# NOTE: You need to register to download dataset from official source -# place into tmp directory e.g. /tmp/t2t_datagen/dataset.tgz -_UN_TRAIN_DATASETS = [[ - "https://s3-us-west-2.amazonaws.com/twairball.wmt17.zh-en/UNv1.0.en-zh.tar.gz", - ["en-zh/UNv1.0.en-zh.en", - "en-zh/UNv1.0.en-zh.zh"]]] - -# CWMT corpus -# Visit source website to download manually: -# http://nlp.nju.edu.cn/cwmt-wmt/ -# -# casia2015: 1,050,000 lines -# casict2015: 2,036,833 lines -# datum2015: 1,000,003 lines -# datum2017: 1,999,968 lines -# NEU2017: 2,000,000 lines -# -# NOTE: You need to register to download dataset from official source -# place into tmp directory e.g. /tmp/t2t_datagen/dataset.tgz - -_CWMT_TRAIN_DATASETS = [ - ["https://s3-us-west-2.amazonaws.com/twairball.wmt17.zh-en/cwmt.tgz", - ["cwmt/casia2015/casia2015_en.txt", - "cwmt/casia2015/casia2015_ch.txt"]], - ["https://s3-us-west-2.amazonaws.com/twairball.wmt17.zh-en/cwmt.tgz", - ["cwmt/casict2015/casict2015_en.txt", - "cwmt/casict2015/casict2015_ch.txt"]], - ["https://s3-us-west-2.amazonaws.com/twairball.wmt17.zh-en/cwmt.tgz", - ["cwmt/neu2017/NEU_en.txt", - "cwmt/neu2017/NEU_cn.txt"]], - ["https://s3-us-west-2.amazonaws.com/twairball.wmt17.zh-en/cwmt.tgz", - ["cwmt/datum2015/datum_en.txt", - "cwmt/datum2015/datum_ch.txt"]], - ["https://s3-us-west-2.amazonaws.com/twairball.wmt17.zh-en/cwmt.tgz", - ["cwmt/datum2017/Book1_en.txt", - "cwmt/datum2017/Book1_cn.txt"]], - ["https://s3-us-west-2.amazonaws.com/twairball.wmt17.zh-en/cwmt.tgz", - ["cwmt/datum2017/Book2_en.txt", - "cwmt/datum2017/Book2_cn.txt"]], - ["https://s3-us-west-2.amazonaws.com/twairball.wmt17.zh-en/cwmt.tgz", - ["cwmt/datum2017/Book3_en.txt", - "cwmt/datum2017/Book3_cn.txt"]], - ["https://s3-us-west-2.amazonaws.com/twairball.wmt17.zh-en/cwmt.tgz", - ["cwmt/datum2017/Book4_en.txt", - "cwmt/datum2017/Book4_cn.txt"]], - ["https://s3-us-west-2.amazonaws.com/twairball.wmt17.zh-en/cwmt.tgz", - ["cwmt/datum2017/Book5_en.txt", - "cwmt/datum2017/Book5_cn.txt"]], - ["https://s3-us-west-2.amazonaws.com/twairball.wmt17.zh-en/cwmt.tgz", - ["cwmt/datum2017/Book6_en.txt", - "cwmt/datum2017/Book6_cn.txt"]], - ["https://s3-us-west-2.amazonaws.com/twairball.wmt17.zh-en/cwmt.tgz", - ["cwmt/datum2017/Book7_en.txt", - "cwmt/datum2017/Book7_cn.txt"]], - ["https://s3-us-west-2.amazonaws.com/twairball.wmt17.zh-en/cwmt.tgz", - ["cwmt/datum2017/Book8_en.txt", - "cwmt/datum2017/Book8_cn.txt"]], - ["https://s3-us-west-2.amazonaws.com/twairball.wmt17.zh-en/cwmt.tgz", - ["cwmt/datum2017/Book9_en.txt", - "cwmt/datum2017/Book9_cn.txt"]], - ["https://s3-us-west-2.amazonaws.com/twairball.wmt17.zh-en/cwmt.tgz", - ["cwmt/datum2017/Book10_en.txt", - "cwmt/datum2017/Book10_cn.txt"]], - ["https://s3-us-west-2.amazonaws.com/twairball.wmt17.zh-en/cwmt.tgz", - ["cwmt/datum2017/Book11_en.txt", - "cwmt/datum2017/Book11_cn.txt"]], - ["https://s3-us-west-2.amazonaws.com/twairball.wmt17.zh-en/cwmt.tgz", - ["cwmt/datum2017/Book12_en.txt", - "cwmt/datum2017/Book12_cn.txt"]], - ["https://s3-us-west-2.amazonaws.com/twairball.wmt17.zh-en/cwmt.tgz", - ["cwmt/datum2017/Book13_en.txt", - "cwmt/datum2017/Book13_cn.txt"]], - ["https://s3-us-west-2.amazonaws.com/twairball.wmt17.zh-en/cwmt.tgz", - ["cwmt/datum2017/Book14_en.txt", - "cwmt/datum2017/Book14_cn.txt"]], - ["https://s3-us-west-2.amazonaws.com/twairball.wmt17.zh-en/cwmt.tgz", - ["cwmt/datum2017/Book15_en.txt", - "cwmt/datum2017/Book15_cn.txt"]], - ["https://s3-us-west-2.amazonaws.com/twairball.wmt17.zh-en/cwmt.tgz", - ["cwmt/datum2017/Book16_en.txt", - "cwmt/datum2017/Book16_cn.txt"]], - ["https://s3-us-west-2.amazonaws.com/twairball.wmt17.zh-en/cwmt.tgz", - ["cwmt/datum2017/Book17_en.txt", - "cwmt/datum2017/Book17_cn.txt"]], - ["https://s3-us-west-2.amazonaws.com/twairball.wmt17.zh-en/cwmt.tgz", - ["cwmt/datum2017/Book18_en.txt", - "cwmt/datum2017/Book18_cn.txt"]], - ["https://s3-us-west-2.amazonaws.com/twairball.wmt17.zh-en/cwmt.tgz", - ["cwmt/datum2017/Book19_en.txt", - "cwmt/datum2017/Book19_cn.txt"]], - ["https://s3-us-west-2.amazonaws.com/twairball.wmt17.zh-en/cwmt.tgz", - ["cwmt/datum2017/Book20_en.txt", - "cwmt/datum2017/Book20_cn.txt"]] -] - - -def get_filename(dataset): - return dataset[0][0].split('/')[-1] @registry.register_problem -class TranslateEnzhWmt32k(translate.TranslateProblem): - """Problem spec for WMT En-Zh translation. - Attempts to use full training dataset, which needs website - registration and downloaded manually from official sources: - - CWMT: - - http://nlp.nju.edu.cn/cwmt-wmt/ - - Website contrains instructions for FTP server access. - - You'll need to download CASIA, CASICT, DATUM2015, DATUM2017, - NEU datasets - - UN Parallel Corpus: - - https://conferences.unite.un.org/UNCorpus - - You'll need to register your to download the dataset. - - NOTE: place into tmp directory e.g. /tmp/t2t_datagen/dataset.tgz - """ +class TranslateEnzhWmt8k(translate.TranslateProblem): + """Problem spec for WMT En-Zh translation.""" @property def targeted_vocab_size(self): - return 2**15 # 32k + return 2**13 # 8192 + + @property + def num_shards(self): + return 10 # This is a small dataset. @property def source_vocab_name(self): @@ -189,35 +72,20 @@ def source_vocab_name(self): @property def target_vocab_name(self): return "vocab.enzh-zh.%d" % self.targeted_vocab_size - - def get_training_dataset(self, tmp_dir): - """UN Parallel Corpus and CWMT Corpus need to be downloaded manually. - Append to training dataset if available - """ - full_dataset = _NC_TRAIN_DATASETS - for dataset in [_CWMT_TRAIN_DATASETS, _UN_TRAIN_DATASETS]: - filename = get_filename(dataset) - tmp_filepath = os.path.join(tmp_dir, filename) - if tf.gfile.Exists(tmp_filepath): - full_dataset = full_dataset + dataset - else: - tf.logging.info("[TranslateEzhWmt] dataset incomplete, you need to manually download %s" % filename) - return full_dataset def generator(self, data_dir, tmp_dir, train): - TRAIN_DATASET = self.get_training_dataset(tmp_dir) - datasets = TRAIN_DATASET if train else _NC_TEST_DATASETS - source_datasets = [[item[0], [item[1][0]]] for item in TRAIN_DATASET] - target_datasets = [[item[0], [item[1][1]]] for item in TRAIN_DATASET] + datasets = _ENZH_TRAIN_DATASETS if train else _ENZH_TEST_DATASETS + source_datasets = [[item[0], [item[1][0]]] for item in _ENZH_TRAIN_DATASETS] + target_datasets = [[item[0], [item[1][1]]] for item in _ENZH_TRAIN_DATASETS] source_vocab = generator_utils.get_or_generate_vocab( data_dir, tmp_dir, self.source_vocab_name, self.targeted_vocab_size, - source_datasets, _file_byte_budget=1e8) + source_datasets) target_vocab = generator_utils.get_or_generate_vocab( data_dir, tmp_dir, self.target_vocab_name, self.targeted_vocab_size, - target_datasets, _file_byte_budget=1e8) + target_datasets) tag = "train" if train else "dev" - filename_base = "wmt_enzh_%sk_tok_%s" % (self.targeted_vocab_size, tag) - data_path = translate.compile_data(tmp_dir, datasets, filename_base) + data_path = translate.compile_data(tmp_dir, datasets, + "wmt_enzh_tok_%s" % tag) return translate.bi_vocabs_token_generator(data_path + ".lang1", data_path + ".lang2", source_vocab, target_vocab, EOS) @@ -239,22 +107,3 @@ def feature_encoders(self, data_dir): "inputs": source_token, "targets": target_token, } - - -@registry.register_problem -class TranslateEnzhWmt8k(TranslateEnzhWmt32k): - """Problem spec for WMT En-Zh translation. - This is far from being the real WMT17 task - only toyset here - """ - - @property - def targeted_vocab_size(self): - return 2**13 # 8192 - - @property - def num_shards(self): - return 10 # This is a small dataset. - - def get_training_dataset(self, tmp_dir): - """Uses only News Commentary Dataset for training""" - return _NC_TRAIN_DATASETS diff --git a/tensor2tensor/layers/common_layers.py b/tensor2tensor/layers/common_layers.py index 0e305ef54..640730864 100644 --- a/tensor2tensor/layers/common_layers.py +++ b/tensor2tensor/layers/common_layers.py @@ -76,7 +76,7 @@ def shakeshake2_py(x, y, equal=False, individual=False): """The shake-shake sum of 2 tensors, python version.""" if equal: alpha = 0.5 - elif individual: + if individual: alpha = tf.random_uniform(tf.get_shape(x)[:1]) else: alpha = tf.random_uniform([]) diff --git a/tensor2tensor/models/transformer_vae.py b/tensor2tensor/models/transformer_vae.py index c43342afd..22d842c73 100644 --- a/tensor2tensor/models/transformer_vae.py +++ b/tensor2tensor/models/transformer_vae.py @@ -142,10 +142,11 @@ def nearest(x, means, hparams): """Find the nearest means to elements in x.""" x, means = tf.stop_gradient(x), tf.stop_gradient(means) x_flat = tf.reshape(x, [-1, hparams.hidden_size]) - x_norm = tf.norm(x_flat, axis=-1, keep_dims=True) - means_norm = tf.norm(means, axis=-1, keep_dims=True) - dist = x_norm + tf.transpose(means_norm) - 2 * tf.matmul(x_flat, means, - transpose_b=True) + x_norm_sq = tf.reduce_sum(x_flat ** 2, axis=-1, keep_dims=True) + means_norm_sq = tf.reduce_sum(means ** 2, axis=-1, keep_dims=True) + dist = ( + x_norm_sq + tf.transpose(means_norm_sq) - + 2 * tf.matmul(x_flat, means, transpose_b=True)) _, nearest_idx = tf.nn.top_k(- dist, k=1) nearest_hot = tf.one_hot(tf.squeeze(nearest_idx, axis=1), hparams.v_size) shape = common_layers.shape_list(x) @@ -158,8 +159,9 @@ def kmeans(x, means, hparams, name): with tf.variable_scope(name): x_means_hot = nearest(x, means, hparams) x_means = tf.gather(means, tf.argmax(x_means_hot, axis=-1)) - reg_loss1 = tf.nn.l2_loss((tf.stop_gradient(x) - x_means)) - reg_loss2 = hparams.beta * tf.nn.l2_loss((x - tf.stop_gradient(x_means))) + reg_loss1 = tf.reduce_mean((tf.stop_gradient(x) - x_means)**2) + reg_loss2 = hparams.beta * tf.reduce_mean( + (x - tf.stop_gradient(x_means))**2) l = reg_loss1 + reg_loss2 return x_means_hot, x_means, l @@ -198,8 +200,10 @@ def embed(x): hot = tf.one_hot(x, hparams.v_size) h1 = tf.layers.dense(hot, hparams.hidden_size, name="dae_dense") elif hparams.bottleneck_kind == "vq-vae": - means = tf.get_variable(name="means", - shape=[hparams.v_size, hparams.hidden_size]) + means = tf.get_variable( + name="means", + shape=[hparams.v_size, hparams.hidden_size], + initializer=tf.random_normal_initializer()) h1 = tf.gather(means, x) elif hparams.bottleneck_kind == "rounding": h1 = x @@ -245,8 +249,10 @@ def embed(x): c = tf.argmax(hot, axis=-1) h1 = tf.layers.dense(hot, hparams.hidden_size, name="dae_dense") if hparams.bottleneck_kind == "vq-vae": - means = tf.get_variable(name="means", shape=[hparams.v_size, - hparams.hidden_size]) + means = tf.get_variable( + name="means", + shape=[hparams.v_size, hparams.hidden_size], + initializer=tf.random_normal_initializer()) x_means_hot, x_means, l = kmeans(x, means, hparams, name="vq-vae-kmeans") h1 = tf.stop_gradient(x_means) + x - tf.stop_gradient(x) c = tf.argmax(x_means_hot, axis=-1) diff --git a/tensor2tensor/utils/bleu_hook.py b/tensor2tensor/utils/bleu_hook.py index 50caf09bf..49b31c1bb 100644 --- a/tensor2tensor/utils/bleu_hook.py +++ b/tensor2tensor/utils/bleu_hook.py @@ -20,12 +20,9 @@ import collections import math -import os import re import sys -import time import unicodedata -from collections import namedtuple # Dependency imports @@ -153,7 +150,7 @@ def __init__(self): def _property_chars(prefix): return ''.join(six.unichr(x) for x in range(sys.maxunicode) if unicodedata.category(six.unichr(x)).startswith(prefix)) - punctuation = _property_chars('P') + punctuation = self._property_chars('P') self.nondigit_punct_re = re.compile(r'([^\d])([' + punctuation + r'])') self.punct_nondigit_re = re.compile(r'([' + punctuation + r'])([^\d])') self.symbol_re = re.compile('([' + _property_chars('S') + '])') @@ -183,10 +180,9 @@ def bleu_tokenize(string): Returns: a list of tokens """ - uregex = UnicodeRegex() - string = uregex.nondigit_punct_re.sub(r'\1 \2 ', string) - string = uregex.punct_nondigit_re.sub(r' \1 \2', string) - string = uregex.symbol_re.sub(r' \1 ', string) + string = UnicodeRegex.nondigit_punct_re.sub(r'\1 \2 ', string) + string = UnicodeRegex.punct_nondigit_re.sub(r' \1 \2', string) + string = UnicodeRegex.symbol_re.sub(r' \1 ', string) return string.split() @@ -201,68 +197,3 @@ def bleu_wrapper(ref_filename, hyp_filename, case_sensitive=False): ref_tokens = [bleu_tokenize(x) for x in ref_lines] hyp_tokens = [bleu_tokenize(x) for x in hyp_lines] return compute_bleu(ref_tokens, hyp_tokens) - - -StepFile = namedtuple('StepFile', 'filename mtime ctime steps') - - -def _read_stepfiles_list(path_prefix, path_suffix='.index', min_steps=0): - stepfiles = [] - for filename in tf.gfile.Glob(path_prefix + '*-[0-9]*' + path_suffix): - basename = filename[:-len(path_suffix)] if len(path_suffix) else filename - try: - steps = int(basename.rsplit('-')[-1]) - except ValueError: # The -[0-9]* part is not an integer. - continue - if steps < min_steps: - continue - if not os.path.exists(filename): - tf.logging.info(filename + " was deleted, so skipping it") - continue - stepfiles.append(StepFile(basename, os.path.getmtime(filename), - os.path.getctime(filename), steps)) - return sorted(stepfiles, key=lambda x: -x.steps) - - -def stepfiles_iterator(path_prefix, wait_minutes=0, min_steps=0, - path_suffix='.index', sleep_sec=10): - """Continuously yield new files with steps in filename as they appear. - - This is useful for checkpoint files or other files whose names differ just in an interger - marking the number of steps and match the wildcard path_prefix + '*-[0-9]*' + path_suffix. - Unlike `tf.contrib.training.checkpoints_iterator`, this - implementation always starts from the oldest files - (and it cannot miss any file). Note that the oldest checkpoint - may be deleted anytime by Tensorflow (if set up so). It is up to the user - to check that the files returned by this generator actually exist. - Args: - path_prefix: The directory + possible common filename prefix to the files. - path_suffix: Common filename suffix (after steps), including possible extension dot. - wait_minutes: The maximum amount of minutes to wait between files. - min_steps: Skip files with lower global step. - sleep_sec: How often to check for new files. - Yields: - named tuples (filename, mtime, ctime, steps) of the files as they arrive. - """ - # Wildcard D*-[0-9]* does not match D/x-1, so if D is a directory let path_prefix='D/'. - if not path_prefix.endswith(os.sep) and os.path.isdir(path_prefix): - path_prefix += os.sep - stepfiles = _read_stepfiles_list(path_prefix, path_suffix, min_steps) - tf.logging.info("Found %d files with steps: %s" - % (len(stepfiles), ", ".join(str(x.steps) for x in reversed(stepfiles)))) - exit_time = time.time() + wait_minutes * 60 - while True: - if not stepfiles and wait_minutes: - tf.logging.info('Waiting till %s if a new file matching %s*-[0-9]*%s appears' - % (time.asctime(time.localtime(exit_time)), path_prefix, path_suffix)) - while True: - stepfiles = _read_stepfiles_list(path_prefix, path_suffix, min_steps) - if stepfiles or time.time() > exit_time: - break - time.sleep(sleep_sec) - if not stepfiles: - return - - stepfile = stepfiles.pop() - exit_time, min_steps = stepfile.ctime + wait_minutes * 60, stepfile.steps + 1 - yield stepfile diff --git a/tensor2tensor/utils/bleu_hook_test.py b/tensor2tensor/utils/bleu_hook_test.py index b616aaf7c..e4f3a18a9 100644 --- a/tensor2tensor/utils/bleu_hook_test.py +++ b/tensor2tensor/utils/bleu_hook_test.py @@ -57,9 +57,5 @@ def testComputeMultipleNgrams(self): actual_bleu = 0.3436 self.assertAllClose(bleu, actual_bleu, atol=1e-03) - def testBleuTokenize(self): - self.assertEqual(bleu_hook.bleu_tokenize(u'hi, “there”'), [u'hi', u',', u'“', u'there', u'”']) - - if __name__ == '__main__': tf.test.main() From 872ce75692eb09f41067d4a314f63e02b037ec9d Mon Sep 17 00:00:00 2001 From: T2T Team Date: Thu, 28 Dec 2017 16:13:10 -0800 Subject: [PATCH 0691/4095] Use exponential moving average for the VQ-VAE embeddings. PiperOrigin-RevId: 180302324 --- tensor2tensor/models/transformer_vae.py | 84 +++++++++++++++++++------ 1 file changed, 64 insertions(+), 20 deletions(-) diff --git a/tensor2tensor/models/transformer_vae.py b/tensor2tensor/models/transformer_vae.py index 22d842c73..f187e2d71 100644 --- a/tensor2tensor/models/transformer_vae.py +++ b/tensor2tensor/models/transformer_vae.py @@ -25,6 +25,7 @@ from tensor2tensor.utils import registry from tensor2tensor.utils import t2t_model import tensorflow as tf +from tensorflow.python.training import moving_averages _DO_SUMMARIES = True @@ -140,15 +141,14 @@ def vae(x, z_size, name): def nearest(x, means, hparams): """Find the nearest means to elements in x.""" - x, means = tf.stop_gradient(x), tf.stop_gradient(means) x_flat = tf.reshape(x, [-1, hparams.hidden_size]) x_norm_sq = tf.reduce_sum(x_flat ** 2, axis=-1, keep_dims=True) means_norm_sq = tf.reduce_sum(means ** 2, axis=-1, keep_dims=True) dist = ( x_norm_sq + tf.transpose(means_norm_sq) - 2 * tf.matmul(x_flat, means, transpose_b=True)) - _, nearest_idx = tf.nn.top_k(- dist, k=1) - nearest_hot = tf.one_hot(tf.squeeze(nearest_idx, axis=1), hparams.v_size) + nearest_idx = tf.argmax(-dist, axis=-1) + nearest_hot = tf.one_hot(nearest_idx, hparams.v_size) shape = common_layers.shape_list(x) shape[-1] = hparams.v_size nearest_hot = tf.reshape(nearest_hot, shape=shape) @@ -156,14 +156,12 @@ def nearest(x, means, hparams): def kmeans(x, means, hparams, name): - with tf.variable_scope(name): + with tf.variable_scope(name, reuse=tf.AUTO_REUSE): x_means_hot = nearest(x, means, hparams) x_means = tf.gather(means, tf.argmax(x_means_hot, axis=-1)) - reg_loss1 = tf.reduce_mean((tf.stop_gradient(x) - x_means)**2) - reg_loss2 = hparams.beta * tf.reduce_mean( - (x - tf.stop_gradient(x_means))**2) - l = reg_loss1 + reg_loss2 - return x_means_hot, x_means, l + q_loss = tf.reduce_mean((tf.stop_gradient(x) - x_means)**2) + e_loss = tf.reduce_mean((x - tf.stop_gradient(x_means))**2) + return x_means_hot, x_means, q_loss, e_loss def bit_to_int(x_bit, nbits): @@ -200,10 +198,17 @@ def embed(x): hot = tf.one_hot(x, hparams.v_size) h1 = tf.layers.dense(hot, hparams.hidden_size, name="dae_dense") elif hparams.bottleneck_kind == "vq-vae": - means = tf.get_variable( - name="means", - shape=[hparams.v_size, hparams.hidden_size], - initializer=tf.random_normal_initializer()) + if hparams.ema: + ema_means = tf.get_variable( + name="ema_means", + shape=[hparams.v_size, hparams.hidden_size], + initializer=tf.random_normal_initializer()) + means = ema_means + else: + means = tf.get_variable( + name="means", + shape=[hparams.v_size, hparams.hidden_size], + initializer=tf.random_normal_initializer()) h1 = tf.gather(means, x) elif hparams.bottleneck_kind == "rounding": h1 = x @@ -249,13 +254,49 @@ def embed(x): c = tf.argmax(hot, axis=-1) h1 = tf.layers.dense(hot, hparams.hidden_size, name="dae_dense") if hparams.bottleneck_kind == "vq-vae": - means = tf.get_variable( - name="means", - shape=[hparams.v_size, hparams.hidden_size], - initializer=tf.random_normal_initializer()) - x_means_hot, x_means, l = kmeans(x, means, hparams, name="vq-vae-kmeans") - h1 = tf.stop_gradient(x_means) + x - tf.stop_gradient(x) + means = tf.Variable( + tf.random_normal([hparams.v_size, hparams.hidden_size]), name="means") + + # Use EMA if ema flag is set + if hparams.ema: + ema_count = tf.get_variable( + "ema_count", [hparams.v_size], + initializer=tf.constant_initializer(0)) + with tf.colocate_with(means): + ema_means = tf.get_variable( + "ema_means", initializer=means.initialized_value()) + + x_means_hot, x_means, q_loss, e_loss = kmeans( + x, means, hparams, name="vq-vae-kmeans") c = tf.argmax(x_means_hot, axis=-1) + + # Update the ema variables + if hparams.ema: + tf.logging.info("Using EMA with beta = {}".format(hparams.beta)) + x_means_hot_flat = tf.reshape(x_means_hot, shape=[-1, hparams.v_size]) + updated_ema_count = moving_averages.assign_moving_average( + ema_count, + tf.reduce_sum(x_means_hot_flat, axis=0), + hparams.decay, + zero_debias=False) + x_flat = tf.reshape(x, [-1, hparams.hidden_size]) + dw = tf.matmul(x_means_hot_flat, x_flat, transpose_a=True) + updated_ema_means = moving_averages.assign_moving_average( + ema_means, dw, hparams.decay, zero_debias=False) + n = tf.reduce_sum(updated_ema_count) + updated_ema_count = ((updated_ema_count + hparams.epsilon) / + (n + hparams.v_size * hparams.epsilon) * n) + updated_ema_means /= tf.expand_dims(updated_ema_count, axis=-1) + + with tf.control_dependencies([e_loss]): + update_w = tf.assign(means, updated_ema_means) + with tf.control_dependencies([update_w]): + l = hparams.beta * e_loss + else: + l = q_loss + e_loss + + h1 = tf.stop_gradient(x_means) + x - tf.stop_gradient(x) + if hparams.bottleneck_kind == "rounding": h = tf.layers.dense(x, 1, name="vcc") @@ -594,6 +635,9 @@ def transformer_ae_small(): hparams.add_hparam("do_vae", True) hparams.add_hparam("bit_vae", True) hparams.add_hparam("beta", 0.25) + hparams.add_hparam("epsilon", 1e-5) + hparams.add_hparam("decay", 0.999) + hparams.add_hparam("ema", True) hparams.kl_warmup_steps = 150000 hparams.force_full_predict = True return hparams @@ -609,7 +653,7 @@ def transformer_ae_cifar(): hparams.num_compress_steps = 2 hparams.v_size = 1024 * 64 hparams.kl_warmup_steps = 150000 - hparams.startup_steps = 20000 + hparams.startup_steps = 10000 hparams.kmeans_lr_factor = 0.0 hparams.is_2d = 1 hparams.learning_rate_warmup_steps = 8000 From a84f42507a2d588891e355f8bac74276d0baed54 Mon Sep 17 00:00:00 2001 From: T2T Team Date: Mon, 31 Dec 2018 01:56:33 -0800 Subject: [PATCH 0692/4095] Introduces fixes to get VQ-VAE working. PiperOrigin-RevId: 180425931 --- tensor2tensor/models/transformer_vae.py | 118 +++++++++++++++--------- 1 file changed, 73 insertions(+), 45 deletions(-) diff --git a/tensor2tensor/models/transformer_vae.py b/tensor2tensor/models/transformer_vae.py index f187e2d71..2d0e14990 100644 --- a/tensor2tensor/models/transformer_vae.py +++ b/tensor2tensor/models/transformer_vae.py @@ -155,13 +155,12 @@ def nearest(x, means, hparams): return tf.stop_gradient(nearest_hot) -def kmeans(x, means, hparams, name): - with tf.variable_scope(name, reuse=tf.AUTO_REUSE): - x_means_hot = nearest(x, means, hparams) - x_means = tf.gather(means, tf.argmax(x_means_hot, axis=-1)) - q_loss = tf.reduce_mean((tf.stop_gradient(x) - x_means)**2) - e_loss = tf.reduce_mean((x - tf.stop_gradient(x_means))**2) - return x_means_hot, x_means, q_loss, e_loss +def kmeans(x, means, hparams): + x_means_hot = nearest(x, means, hparams) + x_means = tf.gather(means, tf.argmax(x_means_hot, axis=-1)) + q_loss = tf.reduce_mean((tf.stop_gradient(x) - x_means)**2) + e_loss = tf.reduce_mean((x - tf.stop_gradient(x_means))**2) + return x_means_hot, x_means, q_loss, e_loss def bit_to_int(x_bit, nbits): @@ -184,11 +183,23 @@ def int_to_bit(x_int, nbits): return tf.to_float(res) -def bottleneck(x, hparams, filter_size, name): +def bottleneck(x, + hparams, + filter_size, + name, + means=None, + ema_count=None, + ema_means=None): """Bottleneck.""" + if hparams.bottleneck_kind == "vq-vae": + assert means is not None + if hparams.ema: + assert ema_count is not None + assert ema_means is not None + def embed(x): """Embedding function; must be compatible with the code later.""" - with tf.variable_scope(name, reuse=True): + with tf.variable_scope(name, reuse=tf.AUTO_REUSE): if hparams.bottleneck_kind == "semhash": c = int_to_bit(x, z_size) h1a = tf.layers.dense(c, filter_size, name="vch1a") @@ -199,16 +210,11 @@ def embed(x): h1 = tf.layers.dense(hot, hparams.hidden_size, name="dae_dense") elif hparams.bottleneck_kind == "vq-vae": if hparams.ema: - ema_means = tf.get_variable( - name="ema_means", - shape=[hparams.v_size, hparams.hidden_size], - initializer=tf.random_normal_initializer()) + ema_means = tf.get_variable(name="ema_means") means = ema_means else: - means = tf.get_variable( - name="means", - shape=[hparams.v_size, hparams.hidden_size], - initializer=tf.random_normal_initializer()) + tf.logging.info("means = {}".format(means)) + h1 = tf.gather(means, x) elif hparams.bottleneck_kind == "rounding": h1 = x @@ -216,7 +222,7 @@ def embed(x): h2 = tf.layers.dense(tf.nn.relu(h1), filter_size, name="vch2") return tf.layers.dense(tf.nn.relu(h2), hparams.hidden_size, name="vcfin") - with tf.variable_scope(name): + with tf.variable_scope(name, reuse=tf.AUTO_REUSE): z_size = hparams.z_size l = tf.constant(0.0) if hparams.bottleneck_kind == "dense": @@ -254,20 +260,7 @@ def embed(x): c = tf.argmax(hot, axis=-1) h1 = tf.layers.dense(hot, hparams.hidden_size, name="dae_dense") if hparams.bottleneck_kind == "vq-vae": - means = tf.Variable( - tf.random_normal([hparams.v_size, hparams.hidden_size]), name="means") - - # Use EMA if ema flag is set - if hparams.ema: - ema_count = tf.get_variable( - "ema_count", [hparams.v_size], - initializer=tf.constant_initializer(0)) - with tf.colocate_with(means): - ema_means = tf.get_variable( - "ema_means", initializer=means.initialized_value()) - - x_means_hot, x_means, q_loss, e_loss = kmeans( - x, means, hparams, name="vq-vae-kmeans") + x_means_hot, x_means, q_loss, e_loss = kmeans(x, means, hparams) c = tf.argmax(x_means_hot, axis=-1) # Update the ema variables @@ -289,8 +282,8 @@ def embed(x): updated_ema_means /= tf.expand_dims(updated_ema_count, axis=-1) with tf.control_dependencies([e_loss]): - update_w = tf.assign(means, updated_ema_means) - with tf.control_dependencies([update_w]): + update_means = tf.assign(means, updated_ema_means) + with tf.control_dependencies([update_means]): l = hparams.beta * e_loss else: l = q_loss + e_loss @@ -400,8 +393,15 @@ def next_bit(latents_discrete, i): return latents_discrete -def ae_transformer_internal(inputs, targets, target_space, hparams, - cache=None, predict_mask=1.0): +def ae_transformer_internal(inputs, + targets, + target_space, + hparams, + cache=None, + predict_mask=1.0, + means=None, + ema_count=None, + ema_means=None): """AE Transformer, main step used for training.""" # Summaries break with the do_refine cond, turn them off in that case. global _DO_SUMMARIES @@ -430,7 +430,7 @@ def ae_transformer_internal(inputs, targets, target_space, hparams, if hparams.mode != tf.estimator.ModeKeys.PREDICT: # Compress and bottleneck. latents_dense, latents_discrete, extra_loss, _ = bottleneck( - targets_c, hparams, 2*2048, "vc") + targets_c, hparams, 2 * 2048, "vc", means, ema_count, ema_means) if _DO_SUMMARIES: tf.summary.histogram("b0", tf.reshape(latents_discrete[:, 0, :], [-1])) pc = common_layers.inverse_exp_decay(hparams.startup_steps) * 0.95 @@ -454,7 +454,8 @@ def ae_transformer_internal(inputs, targets, target_space, hparams, losses["latent_pred"] = tf.reduce_mean((inputs_c - targets_c)**2) * 20 def bn_inputs(): with tf.variable_scope(tf.get_variable_scope(), reuse=True): - bn, _, _, _ = bottleneck(inputs_c, hparams, 2*2048, "vc") + bn, _, _, _ = bottleneck(inputs_c, hparams, 2 * 2048, "vc", means, + ema_count, ema_means) return bn pbn = 0.8 if hparams.mode == tf.estimator.ModeKeys.TRAIN else 1.0 inputs_c = tf.cond(tf.less(tf.random_uniform([]), pbn), @@ -466,10 +467,11 @@ def bn_inputs(): else: if hparams.bottleneck_kind in ["dense", "vae"]: inputs_c = decode_transformer(inputs, ed, targets_c, hparams, "dec_c") - latents_dense, _, _, _ = bottleneck(inputs_c, hparams, 2*2048, "vc") + latents_dense, _, _, _ = bottleneck(inputs_c, hparams, 2 * 2048, "vc", + means, ema_count, ema_means) else: latent_len = common_layers.shape_list(targets_c)[1] - _, _, _, embed = bottleneck(targets_c, hparams, 2*2048, "vc") + _, _, _, embed = bottleneck(targets_c, hparams, 2 * 2048, "vc", means) latents_dense = tf.zeros_like(targets_c[:, :latent_len, :, :]) if cache is None: cache = ae_latent_sample(latents_dense, inputs, ed, embed, 8, hparams) @@ -529,6 +531,25 @@ def __init__(self, *args, **kwargs): super(TransformerAE, self).__init__(*args, **kwargs) self.predict_mask = 1.0 + # Define the embeddings if we are using vq-vae + self.means = None + self.ema_count = None + self.ema_means = None + if self._hparams.bottleneck_kind == "vq-vae": + self.means = tf.get_variable( + name="means", + shape=[self._hparams.v_size, self._hparams.hidden_size], + initializer=tf.random_normal_initializer()) + + # Create the shadow variables if we are using EMA + if self._hparams.ema: + self.ema_count = tf.get_variable( + "ema_count", [self._hparams.v_size], + initializer=tf.constant_initializer(0)) + with tf.colocate_with(self.means): + self.ema_means = tf.get_variable( + "ema_means", initializer=self.means.initialized_value()) + @property def has_input(self): return self._problem_hparams.input_modality @@ -540,9 +561,15 @@ def body(self, features): reuse = "cache_raw" in features with tf.variable_scope(tf.get_variable_scope(), reuse=reuse): res, loss, _ = ae_transformer_internal( - inputs, features["targets"], features["target_space_id"], - self._hparams, features.get("cache_raw", None), - predict_mask=self.predict_mask) + inputs, + features["targets"], + features["target_space_id"], + self._hparams, + features.get("cache_raw", None), + predict_mask=self.predict_mask, + means=self.means, + ema_count=self.ema_count, + ema_means=self.ema_means) return res, loss def prepare_features_for_infer(self, features): @@ -557,7 +584,8 @@ def prepare_features_for_infer(self, features): targets = tf.zeros([beam_batch_size, 1, 1, self._hparams.hidden_size]) with tf.variable_scope("body"): _, _, cache = ae_transformer_internal( - inputs, targets, features["target_space_id"], self._hparams) + inputs, targets, features["target_space_id"], self._hparams, + self.means, self.ema_count, self.ema_means) features["cache_raw"] = cache def infer(self, features=None, decode_length=50, beam_size=1, top_beams=1, @@ -635,7 +663,7 @@ def transformer_ae_small(): hparams.add_hparam("do_vae", True) hparams.add_hparam("bit_vae", True) hparams.add_hparam("beta", 0.25) - hparams.add_hparam("epsilon", 1e-5) + hparams.add_hparam("epsilon", 1e-1) hparams.add_hparam("decay", 0.999) hparams.add_hparam("ema", True) hparams.kl_warmup_steps = 150000 From 84ee146fc3849f1e913187fcb7548d3f7895dbe5 Mon Sep 17 00:00:00 2001 From: Ryan Sepassi Date: Tue, 2 Jan 2018 10:02:57 -0800 Subject: [PATCH 0693/4095] Add random seed to RunConfig PiperOrigin-RevId: 180558518 --- tensor2tensor/tpu/tpu_trainer.py | 3 ++- tensor2tensor/tpu/tpu_trainer_lib.py | 2 ++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/tensor2tensor/tpu/tpu_trainer.py b/tensor2tensor/tpu/tpu_trainer.py index 571a21839..47e92da98 100644 --- a/tensor2tensor/tpu/tpu_trainer.py +++ b/tensor2tensor/tpu/tpu_trainer.py @@ -127,7 +127,8 @@ def create_run_config(hp): ps_gpu=FLAGS.ps_gpu, sync=FLAGS.sync, worker_id=FLAGS.worker_id, - worker_job=FLAGS.worker_job) + worker_job=FLAGS.worker_job, + random_seed=FLAGS.random_seed) def generate_data(): diff --git a/tensor2tensor/tpu/tpu_trainer_lib.py b/tensor2tensor/tpu/tpu_trainer_lib.py index bde85e4db..ff2045302 100644 --- a/tensor2tensor/tpu/tpu_trainer_lib.py +++ b/tensor2tensor/tpu/tpu_trainer_lib.py @@ -104,6 +104,7 @@ def create_run_config(master="", ps_replicas=0, ps_job="/job:ps", ps_gpu=0, + random_seed=None, sync=False, use_tpu=False): """Create RunConfig, TPUConfig, and Parallelism object.""" @@ -122,6 +123,7 @@ def create_run_config(master="", "save_checkpoints_steps": save_checkpoints_steps, "keep_checkpoint_max": keep_checkpoint_max, "keep_checkpoint_every_n_hours": keep_checkpoint_every_n_hours, + "tf_random_seed": random_seed, } run_config_cls = tf.contrib.learn.RunConfig From 6407b2d35301e19c14c70921d8f4fdfb4da4e09e Mon Sep 17 00:00:00 2001 From: Ryan Sepassi Date: Tue, 2 Jan 2018 11:52:04 -0800 Subject: [PATCH 0694/4095] Code moves and renames PiperOrigin-RevId: 180573810 --- .travis.yml | 4 +- docs/overview.md | 6 +- tensor2tensor/bin/t2t_decoder.py | 12 +- tensor2tensor/bin/t2t_trainer.py | 166 ++++++++++++++- tensor2tensor/insights/transformer_model.py | 10 +- tensor2tensor/layers/common_hparams.py | 4 +- tensor2tensor/notebooks/hello_t2t.ipynb | 2 +- tensor2tensor/tpu/__init__.py | 15 -- tensor2tensor/tpu/tpu_trainer.py | 191 ------------------ tensor2tensor/utils/registry.py | 6 +- tensor2tensor/utils/t2t_model.py | 5 +- .../trainer_lib.py} | 2 +- .../trainer_lib_test.py} | 16 +- .../TransformerVisualization.ipynb | 4 +- 14 files changed, 196 insertions(+), 247 deletions(-) delete mode 100644 tensor2tensor/tpu/__init__.py delete mode 100644 tensor2tensor/tpu/tpu_trainer.py rename tensor2tensor/{tpu/tpu_trainer_lib.py => utils/trainer_lib.py} (99%) rename tensor2tensor/{tpu/tpu_trainer_lib_test.py => utils/trainer_lib_test.py} (88%) diff --git a/.travis.yml b/.travis.yml index 7841b0b7e..f424014b5 100644 --- a/.travis.yml +++ b/.travis.yml @@ -14,9 +14,9 @@ env: - T2T_DATA_DIR=/tmp/t2t-data - T2T_TRAIN_DIR=/tmp/t2t-train script: - - pytest --ignore=tensor2tensor/utils/registry_test.py --ignore=tensor2tensor/problems_test.py --ignore=tensor2tensor/tpu/tpu_trainer_lib_test.py --ignore=tensor2tensor/data_generators/algorithmic_math_test.py + - pytest --ignore=tensor2tensor/utils/registry_test.py --ignore=tensor2tensor/problems_test.py --ignore=tensor2tensor/utils/trainer_lib_test.py --ignore=tensor2tensor/data_generators/algorithmic_math_test.py - pytest tensor2tensor/utils/registry_test.py - - pytest tensor2tensor/tpu/tpu_trainer_lib_test.py + - pytest tensor2tensor/utils/trainer_lib_test.py - t2t-datagen 2>&1 | grep translate && echo passed - python -c "from tensor2tensor.models import transformer; print(transformer.Transformer.__name__)" - t2t-trainer --registry_help diff --git a/docs/overview.md b/docs/overview.md index fcc0aba5a..9ea87bc50 100644 --- a/docs/overview.md +++ b/docs/overview.md @@ -14,7 +14,7 @@ to training, evaluation, and decoding. Some key files and their functions: -* [`tpu_trainer.py`](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/tpu/tpu_trainer.py) and [`tpu_trainer_lib.py`](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/tpu/tpu_trainer_lib.py): +* [`t2t_trainer.py`](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/bin/t2t_trainer.py) and [`trainer_lib.py`](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/utils/trainer_lib.py): Main entrypoint for training and evaluation. Constructs and runs all the main components of the system (the `Problem`, the `HParams`, the `Estimator`, the `Experiment`, the `input_fn`s and `model_fn`). @@ -134,7 +134,7 @@ The default implementations of `bottom`, `top`, and `loss` depend on the The actual training loop and related services (checkpointing, summaries, continuous evaluation, etc.) are all handled by `Estimator` and `Experiment` -objects. `tpu_trainer.py` is the main entrypoint and uses `tpu_trainer_lib.py` +objects. `t2t_trainer.py` is the main entrypoint and uses `trainer_lib.py` to construct the various components. ## Decoding @@ -144,7 +144,7 @@ to construct the various components. ## System Overview for Train/Eval -See `tpu_trainer.py`. +See `t2t_trainer.py` and `trainer_lib.py`. * Create HParams * Create `RunConfig`, including `Parallelism` object (i.e. `data_parallelism`) diff --git a/tensor2tensor/bin/t2t_decoder.py b/tensor2tensor/bin/t2t_decoder.py index 25358739a..132dac0e4 100644 --- a/tensor2tensor/bin/t2t_decoder.py +++ b/tensor2tensor/bin/t2t_decoder.py @@ -36,9 +36,9 @@ # Dependency imports -from tensor2tensor.tpu import tpu_trainer -from tensor2tensor.tpu import tpu_trainer_lib +from tensor2tensor.bin import t2t_trainer from tensor2tensor.utils import decoding +from tensor2tensor.utils import trainer_lib from tensor2tensor.utils import usr_dir import tensorflow as tf @@ -46,7 +46,7 @@ flags = tf.flags FLAGS = flags.FLAGS -# Additional flags in tpu/tpu_trainer.py and utils/flags.py +# Additional flags in bin/t2t_trainer.py and utils/flags.py flags.DEFINE_string("decode_from_file", None, "Path to the source file for decoding") flags.DEFINE_string("decode_to_file", None, @@ -57,7 +57,7 @@ def create_hparams(): - return tpu_trainer_lib.create_hparams( + return trainer_lib.create_hparams( FLAGS.hparams_set, FLAGS.hparams, data_dir=os.path.expanduser(FLAGS.data_dir), @@ -95,10 +95,10 @@ def main(_): hp = create_hparams() decode_hp = create_decode_hparams() - estimator = tpu_trainer_lib.create_estimator( + estimator = trainer_lib.create_estimator( FLAGS.model, hp, - tpu_trainer.create_run_config(hp), + t2t_trainer.create_run_config(hp), decode_hparams=decode_hp, use_tpu=False) diff --git a/tensor2tensor/bin/t2t_trainer.py b/tensor2tensor/bin/t2t_trainer.py index 99ec99b20..9e77de384 100644 --- a/tensor2tensor/bin/t2t_trainer.py +++ b/tensor2tensor/bin/t2t_trainer.py @@ -13,20 +13,178 @@ # See the License for the specific language governing permissions and # limitations under the License. -"""Trainer for T2T models. See tpu_trainer.py.""" +"""Train and evaluate.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function +import contextlib +import os +import sys + # Dependency imports -from tensor2tensor.tpu import tpu_trainer +from tensor2tensor import models # pylint: disable=unused-import +from tensor2tensor import problems as problems_lib # pylint: disable=unused-import +from tensor2tensor.utils import decoding +from tensor2tensor.utils import flags as t2t_flags # pylint: disable=unused-import +from tensor2tensor.utils import registry +from tensor2tensor.utils import trainer_lib +from tensor2tensor.utils import usr_dir import tensorflow as tf +flags = tf.flags +FLAGS = flags.FLAGS + +# See flags.py for additional command-line flags. +flags.DEFINE_string("t2t_usr_dir", "", + "Path to a Python module that will be imported. The " + "__init__.py file should include the necessary imports. " + "The imported files should contain registrations, " + "e.g. @registry.register_model calls, that will then be " + "available to the t2t-trainer.") +flags.DEFINE_integer("random_seed", 1234, "Random seed.") +flags.DEFINE_integer("tpu_num_shards", 8, "Number of tpu shards.") +flags.DEFINE_integer("iterations_per_loop", 1000, + "Number of iterations in a TPU training loop.") +flags.DEFINE_bool("use_tpu", False, "Whether to use TPU.") +flags.DEFINE_bool("generate_data", False, "Generate data before training?") +flags.DEFINE_string("tmp_dir", "/tmp/t2t_datagen", + "Temporary storage directory, used if --generate_data.") +flags.DEFINE_bool("profile", False, "Profile performance?") + +# To maintain compatibility with some internal libs, we guard against these flag +# definitions possibly erroring. Apologies for the ugliness. +try: + flags.DEFINE_string("master", "", "Address of TensorFlow master.") + flags.DEFINE_string("output_dir", "", "Base output directory for run.") + flags.DEFINE_string("schedule", "continuous_train_and_eval", + "Method of Experiment to run.") + flags.DEFINE_integer("eval_steps", 10000, + "Number of steps in evaluation. By default, eval will " + "stop after eval_steps or when it runs through the eval " + "dataset once in full, whichever comes first, so this " + "can be a very large number.") +except: # pylint: disable=bare-except + pass + + +def get_problem_name(): + problems = FLAGS.problems.split("-") + assert len(problems) == 1 + return problems[0] + + +def create_hparams(): + return trainer_lib.create_hparams(FLAGS.hparams_set, FLAGS.hparams) + + +def create_experiment_fn(): + return trainer_lib.create_experiment_fn( + model_name=FLAGS.model, + problem_name=get_problem_name(), + data_dir=os.path.expanduser(FLAGS.data_dir), + train_steps=FLAGS.train_steps, + eval_steps=FLAGS.eval_steps, + min_eval_frequency=FLAGS.local_eval_frequency, + schedule=FLAGS.schedule, + export=FLAGS.export_saved_model, + decode_hparams=decoding.decode_hparams(FLAGS.decode_hparams), + use_tfdbg=FLAGS.tfdbg, + use_dbgprofile=FLAGS.dbgprofile, + eval_early_stopping_steps=FLAGS.eval_early_stopping_steps, + eval_early_stopping_metric=FLAGS.eval_early_stopping_metric, + eval_early_stopping_metric_delta=FLAGS.eval_early_stopping_metric_delta, + eval_early_stopping_metric_minimize=FLAGS. + eval_early_stopping_metric_minimize, + use_tpu=FLAGS.use_tpu) + + +def create_run_config(hp): + return trainer_lib.create_run_config( + model_dir=os.path.expanduser(FLAGS.output_dir), + master=FLAGS.master, + iterations_per_loop=FLAGS.iterations_per_loop, + num_shards=FLAGS.tpu_num_shards, + log_device_placement=FLAGS.log_device_placement, + save_checkpoints_steps=max(FLAGS.iterations_per_loop, + FLAGS.local_eval_frequency), + keep_checkpoint_max=FLAGS.keep_checkpoint_max, + keep_checkpoint_every_n_hours=FLAGS.keep_checkpoint_every_n_hours, + num_gpus=FLAGS.worker_gpu, + gpu_order=FLAGS.gpu_order, + shard_to_cpu=FLAGS.locally_shard_to_cpu, + num_async_replicas=FLAGS.worker_replicas, + gpu_mem_fraction=FLAGS.worker_gpu_memory_fraction, + enable_graph_rewriter=FLAGS.experimental_optimize_placement, + use_tpu=FLAGS.use_tpu, + schedule=FLAGS.schedule, + no_data_parallelism=hp.no_data_parallelism, + daisy_chain_variables=hp.daisy_chain_variables, + ps_replicas=FLAGS.ps_replicas, + ps_job=FLAGS.ps_job, + ps_gpu=FLAGS.ps_gpu, + sync=FLAGS.sync, + worker_id=FLAGS.worker_id, + worker_job=FLAGS.worker_job, + random_seed=FLAGS.random_seed) + + +def generate_data(): + # Generate data if requested. + data_dir = os.path.expanduser(FLAGS.data_dir) + tmp_dir = os.path.expanduser(FLAGS.tmp_dir) + tf.gfile.MakeDirs(data_dir) + tf.gfile.MakeDirs(tmp_dir) + + problem_name = get_problem_name() + tf.logging.info("Generating data for %s" % problem_name) + registry.problem(problem_name).generate_data(data_dir, tmp_dir) + + +@contextlib.contextmanager +def profile_context(): + if FLAGS.profile: + with tf.contrib.tfprof.ProfileContext("t2tprof", + trace_steps=range(100), + dump_steps=range(100)) as pctx: + opts = tf.profiler.ProfileOptionBuilder.time_and_memory() + pctx.add_auto_profiling("op", opts, range(100)) + yield + else: + yield + + +def log_registry(): + if FLAGS.registry_help: + tf.logging.info(registry.help_string()) + sys.exit(0) + + +def execute_schedule(exp): + if not hasattr(exp, FLAGS.schedule): + raise ValueError( + "Experiment has no method %s, from --schedule" % FLAGS.schedule) + with profile_context(): + getattr(exp, FLAGS.schedule)() + + +def main(_): + tf.logging.set_verbosity(tf.logging.INFO) + trainer_lib.set_random_seed(FLAGS.random_seed) + usr_dir.import_usr_dir(FLAGS.t2t_usr_dir) + log_registry() + + if FLAGS.generate_data: + generate_data() + + hparams = create_hparams() + run_config = create_run_config(hparams) -def main(unused_argv): - tpu_trainer.main(unused_argv) + exp_fn = create_experiment_fn() + exp = exp_fn(run_config, hparams) + execute_schedule(exp) if __name__ == "__main__": diff --git a/tensor2tensor/insights/transformer_model.py b/tensor2tensor/insights/transformer_model.py index 94bc7c0e1..0a2ff8c46 100644 --- a/tensor2tensor/insights/transformer_model.py +++ b/tensor2tensor/insights/transformer_model.py @@ -24,12 +24,12 @@ import numpy as np +from tensor2tensor.bin import t2t_trainer from tensor2tensor.data_generators import text_encoder from tensor2tensor.insights import graph from tensor2tensor.insights import query_processor -from tensor2tensor.tpu import tpu_trainer -from tensor2tensor.tpu import tpu_trainer_lib from tensor2tensor.utils import decoding +from tensor2tensor.utils import trainer_lib from tensor2tensor.utils import usr_dir import tensorflow as tf @@ -111,7 +111,7 @@ def __init__(self, data_dir, model_dir): data_dir = os.path.expanduser(data_dir) # Create the basic hyper parameters. - self.hparams = tpu_trainer_lib.create_hparams( + self.hparams = trainer_lib.create_hparams( FLAGS.hparams_set, FLAGS.hparams, data_dir=data_dir, @@ -122,10 +122,10 @@ def __init__(self, data_dir, model_dir): decode_hp.add_hparam("shard_id", 0) # Create the estimator and final hyper parameters. - self.estimator = tpu_trainer_lib.create_estimator( + self.estimator = trainer_lib.create_estimator( FLAGS.model, self.hparams, - tpu_trainer.create_run_config(), + t2t_trainer.create_run_config(), decode_hp, use_tpu=False) # Fetch the vocabulary and other helpful variables for decoding. diff --git a/tensor2tensor/layers/common_hparams.py b/tensor2tensor/layers/common_hparams.py index 5b4e39058..35bac33b0 100644 --- a/tensor2tensor/layers/common_hparams.py +++ b/tensor2tensor/layers/common_hparams.py @@ -190,8 +190,8 @@ def basic_params1(): # This is the actual batch size, *not* tokens per batch (i.e. for # language models this is the number of sentences in the batch) tpu_batch_size_per_shard=24, - # Set by tpu_trainer to let the model know whether we are on TPU. - # Switching on/off tpu should not invalidate checkpoints. + # Set by t2t_trainer if --use_tpu to let the model know whether we are on + # TPU. Switching on/off tpu should not invalidate checkpoints. use_tpu=False, # If True in PREDICT mode, then last-position-only optimizations are not # used. diff --git a/tensor2tensor/notebooks/hello_t2t.ipynb b/tensor2tensor/notebooks/hello_t2t.ipynb index 5b58b042b..bc39b7337 100644 --- a/tensor2tensor/notebooks/hello_t2t.ipynb +++ b/tensor2tensor/notebooks/hello_t2t.ipynb @@ -61,7 +61,7 @@ "source": [ "# Install deps\n", "# We're using some new features from tensorflow so we install tf-nightly\n", - "!pip install -q tensor2tensor tf-nightly" + "!pip install -q 'tensor2tensor==1.4.1' tf-nightly" ], "cell_type": "code", "execution_count": 0, diff --git a/tensor2tensor/tpu/__init__.py b/tensor2tensor/tpu/__init__.py deleted file mode 100644 index 3f714ce1f..000000000 --- a/tensor2tensor/tpu/__init__.py +++ /dev/null @@ -1,15 +0,0 @@ -# coding=utf-8 -# Copyright 2017 The Tensor2Tensor Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - diff --git a/tensor2tensor/tpu/tpu_trainer.py b/tensor2tensor/tpu/tpu_trainer.py deleted file mode 100644 index 47e92da98..000000000 --- a/tensor2tensor/tpu/tpu_trainer.py +++ /dev/null @@ -1,191 +0,0 @@ -# coding=utf-8 -# Copyright 2017 The Tensor2Tensor Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Train on TPU.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import contextlib -import os -import sys - -# Dependency imports - -from tensor2tensor import models # pylint: disable=unused-import -from tensor2tensor import problems as problems_lib # pylint: disable=unused-import -from tensor2tensor.tpu import tpu_trainer_lib -from tensor2tensor.utils import decoding -from tensor2tensor.utils import flags as t2t_flags # pylint: disable=unused-import -from tensor2tensor.utils import registry -from tensor2tensor.utils import usr_dir - -import tensorflow as tf - -flags = tf.flags -FLAGS = flags.FLAGS - -# See flags.py for additional command-line flags. -flags.DEFINE_string("t2t_usr_dir", "", - "Path to a Python module that will be imported. The " - "__init__.py file should include the necessary imports. " - "The imported files should contain registrations, " - "e.g. @registry.register_model calls, that will then be " - "available to the t2t-trainer.") -flags.DEFINE_integer("random_seed", 1234, "Random seed.") -flags.DEFINE_integer("tpu_num_shards", 8, "Number of tpu shards.") -flags.DEFINE_integer("iterations_per_loop", 1000, - "Number of iterations in a TPU training loop.") -flags.DEFINE_bool("use_tpu", False, "Whether to use TPU.") -flags.DEFINE_bool("generate_data", False, "Generate data before training?") -flags.DEFINE_string("tmp_dir", "/tmp/t2t_datagen", - "Temporary storage directory, used if --generate_data.") -flags.DEFINE_bool("profile", False, "Profile performance?") - -# To maintain compatibility with some internal libs, we guard against these flag -# definitions possibly erroring. Apologies for the ugliness. -try: - flags.DEFINE_string("master", "", "Address of TensorFlow master.") - flags.DEFINE_string("output_dir", "", "Base output directory for run.") - flags.DEFINE_string("schedule", "continuous_train_and_eval", - "Method of Experiment to run.") - flags.DEFINE_integer("eval_steps", 10000, - "Number of steps in evaluation. By default, eval will " - "stop after eval_steps or when it runs through the eval " - "dataset once in full, whichever comes first, so this " - "can be a very large number.") -except: # pylint: disable=bare-except - pass - - -def get_problem_name(): - problems = FLAGS.problems.split("-") - assert len(problems) == 1 - return problems[0] - - -def create_hparams(): - return tpu_trainer_lib.create_hparams(FLAGS.hparams_set, FLAGS.hparams) - - -def create_experiment_fn(): - return tpu_trainer_lib.create_experiment_fn( - model_name=FLAGS.model, - problem_name=get_problem_name(), - data_dir=os.path.expanduser(FLAGS.data_dir), - train_steps=FLAGS.train_steps, - eval_steps=FLAGS.eval_steps, - min_eval_frequency=FLAGS.local_eval_frequency, - schedule=FLAGS.schedule, - export=FLAGS.export_saved_model, - decode_hparams=decoding.decode_hparams(FLAGS.decode_hparams), - use_tfdbg=FLAGS.tfdbg, - use_dbgprofile=FLAGS.dbgprofile, - eval_early_stopping_steps=FLAGS.eval_early_stopping_steps, - eval_early_stopping_metric=FLAGS.eval_early_stopping_metric, - eval_early_stopping_metric_delta=FLAGS.eval_early_stopping_metric_delta, - eval_early_stopping_metric_minimize=FLAGS. - eval_early_stopping_metric_minimize, - use_tpu=FLAGS.use_tpu) - - -def create_run_config(hp): - return tpu_trainer_lib.create_run_config( - model_dir=os.path.expanduser(FLAGS.output_dir), - master=FLAGS.master, - iterations_per_loop=FLAGS.iterations_per_loop, - num_shards=FLAGS.tpu_num_shards, - log_device_placement=FLAGS.log_device_placement, - save_checkpoints_steps=max(FLAGS.iterations_per_loop, - FLAGS.local_eval_frequency), - keep_checkpoint_max=FLAGS.keep_checkpoint_max, - keep_checkpoint_every_n_hours=FLAGS.keep_checkpoint_every_n_hours, - num_gpus=FLAGS.worker_gpu, - gpu_order=FLAGS.gpu_order, - shard_to_cpu=FLAGS.locally_shard_to_cpu, - num_async_replicas=FLAGS.worker_replicas, - gpu_mem_fraction=FLAGS.worker_gpu_memory_fraction, - enable_graph_rewriter=FLAGS.experimental_optimize_placement, - use_tpu=FLAGS.use_tpu, - schedule=FLAGS.schedule, - no_data_parallelism=hp.no_data_parallelism, - daisy_chain_variables=hp.daisy_chain_variables, - ps_replicas=FLAGS.ps_replicas, - ps_job=FLAGS.ps_job, - ps_gpu=FLAGS.ps_gpu, - sync=FLAGS.sync, - worker_id=FLAGS.worker_id, - worker_job=FLAGS.worker_job, - random_seed=FLAGS.random_seed) - - -def generate_data(): - # Generate data if requested. - data_dir = os.path.expanduser(FLAGS.data_dir) - tmp_dir = os.path.expanduser(FLAGS.tmp_dir) - tf.gfile.MakeDirs(data_dir) - tf.gfile.MakeDirs(tmp_dir) - - problem_name = get_problem_name() - tf.logging.info("Generating data for %s" % problem_name) - registry.problem(problem_name).generate_data(data_dir, tmp_dir) - - -@contextlib.contextmanager -def profile_context(): - if FLAGS.profile: - with tf.contrib.tfprof.ProfileContext("t2tprof", - trace_steps=range(100), - dump_steps=range(100)) as pctx: - opts = tf.profiler.ProfileOptionBuilder.time_and_memory() - pctx.add_auto_profiling("op", opts, range(100)) - yield - else: - yield - - -def log_registry(): - if FLAGS.registry_help: - tf.logging.info(registry.help_string()) - sys.exit(0) - - -def execute_schedule(exp): - if not hasattr(exp, FLAGS.schedule): - raise ValueError( - "Experiment has no method %s, from --schedule" % FLAGS.schedule) - with profile_context(): - getattr(exp, FLAGS.schedule)() - - -def main(_): - tf.logging.set_verbosity(tf.logging.INFO) - tpu_trainer_lib.set_random_seed(FLAGS.random_seed) - usr_dir.import_usr_dir(FLAGS.t2t_usr_dir) - log_registry() - - if FLAGS.generate_data: - generate_data() - - hparams = create_hparams() - run_config = create_run_config(hparams) - - exp_fn = create_experiment_fn() - exp = exp_fn(run_config, hparams) - execute_schedule(exp) - - -if __name__ == "__main__": - tf.app.run() diff --git a/tensor2tensor/utils/registry.py b/tensor2tensor/utils/registry.py index fe2790194..1125a6ed3 100644 --- a/tensor2tensor/utils/registry.py +++ b/tensor2tensor/utils/registry.py @@ -24,7 +24,7 @@ class MyModel(T2TModel): ``` Access by snake-cased name: `registry.model("my_model")`. If you're using -`tpu_trainer.py`, you can pass on the command-line: `--model=my_model`. +`t2t_trainer.py`, you can pass on the command-line: `--model=my_model`. See all the models registered: `registry.list_models()`. @@ -32,13 +32,13 @@ class MyModel(T2TModel): * Register: `registry.register_hparams` * List: `registry.list_hparams` * Retrieve by name: `registry.hparams` - * Command-line flag in `tpu_trainer.py`: `--hparams_set=name` + * Command-line flag in `t2t_trainer.py`: `--hparams_set=name` For hyperparameter ranges: * Register: `registry.register_ranged_hparams` * List: `registry.list_ranged_hparams` * Retrieve by name: `registry.ranged_hparams` - * Command-line flag in `tpu_trainer.py`: `--hparams_range=name` + * Command-line flag in `t2t_trainer.py`: `--hparams_range=name` """ from __future__ import absolute_import from __future__ import division diff --git a/tensor2tensor/utils/t2t_model.py b/tensor2tensor/utils/t2t_model.py index 630011541..d2af84c0f 100644 --- a/tensor2tensor/utils/t2t_model.py +++ b/tensor2tensor/utils/t2t_model.py @@ -739,7 +739,7 @@ def estimator_model_fn(cls, config=None, params=None, decode_hparams=None, - use_tpu=True): + use_tpu=False): """Model fn for Estimator. Args: @@ -755,9 +755,6 @@ def estimator_model_fn(cls, Returns: TPUEstimatorSpec if use tpu else EstimatorSpec """ - tf.logging.warning("T2TModel.estimator_model_fn implements a subset of " - "model_builder.model_fn and is currently only used " - "in tpu_trainer.") _create_dummy_vars() hparams = copy.deepcopy(hparams) hparams.use_tpu = use_tpu diff --git a/tensor2tensor/tpu/tpu_trainer_lib.py b/tensor2tensor/utils/trainer_lib.py similarity index 99% rename from tensor2tensor/tpu/tpu_trainer_lib.py rename to tensor2tensor/utils/trainer_lib.py index ff2045302..6442d9781 100644 --- a/tensor2tensor/tpu/tpu_trainer_lib.py +++ b/tensor2tensor/utils/trainer_lib.py @@ -13,7 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -"""Library for training on TPU. See tpu_trainer.py.""" +"""Library for training. See t2t_trainer.py.""" from __future__ import absolute_import from __future__ import division diff --git a/tensor2tensor/tpu/tpu_trainer_lib_test.py b/tensor2tensor/utils/trainer_lib_test.py similarity index 88% rename from tensor2tensor/tpu/tpu_trainer_lib_test.py rename to tensor2tensor/utils/trainer_lib_test.py index 2a2148afd..5df62d2cb 100644 --- a/tensor2tensor/tpu/tpu_trainer_lib_test.py +++ b/tensor2tensor/utils/trainer_lib_test.py @@ -13,7 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -"""Tests for tpu_trainer_lib.""" +"""Tests for trainer_lib.""" from __future__ import absolute_import from __future__ import division @@ -28,8 +28,8 @@ from tensor2tensor.data_generators import algorithmic from tensor2tensor.data_generators import generator_utils from tensor2tensor.data_generators import problem as problem_lib -from tensor2tensor.tpu import tpu_trainer_lib from tensor2tensor.utils import registry +from tensor2tensor.utils import trainer_lib import tensorflow as tf @@ -47,7 +47,7 @@ def generate_data(self, data_dir, _): self.dev_filepaths(data_dir, 1, shuffled=True), 100) -class TpuTrainerTest(tf.test.TestCase): +class TrainerLibTest(tf.test.TestCase): @classmethod def setUpClass(cls): @@ -60,7 +60,7 @@ def setUpClass(cls): registry.problem("tiny_algo").generate_data(cls.data_dir, None) def testExperiment(self): - exp_fn = tpu_trainer_lib.create_experiment_fn( + exp_fn = trainer_lib.create_experiment_fn( "transformer", "tiny_algo", self.data_dir, @@ -68,7 +68,7 @@ def testExperiment(self): eval_steps=1, min_eval_frequency=1, use_tpu=False) - run_config = tpu_trainer_lib.create_run_config( + run_config = trainer_lib.create_run_config( model_dir=self.data_dir, num_gpus=0, use_tpu=False) hparams = registry.hparams("transformer_tiny_tpu")() exp = exp_fn(run_config, hparams) @@ -76,9 +76,9 @@ def testExperiment(self): def testModel(self): # HParams - hparams = tpu_trainer_lib.create_hparams("transformer_tiny", - data_dir=self.data_dir, - problem_name="tiny_algo") + hparams = trainer_lib.create_hparams("transformer_tiny", + data_dir=self.data_dir, + problem_name="tiny_algo") # Dataset problem = hparams.problem_instances[0] diff --git a/tensor2tensor/visualization/TransformerVisualization.ipynb b/tensor2tensor/visualization/TransformerVisualization.ipynb index f2c4f1559..bec758327 100644 --- a/tensor2tensor/visualization/TransformerVisualization.ipynb +++ b/tensor2tensor/visualization/TransformerVisualization.ipynb @@ -29,10 +29,10 @@ "import tensorflow as tf\n", "import numpy as np\n", "\n", - "from tensor2tensor.tpu import tpu_trainer_lib\n", "from tensor2tensor.utils import t2t_model\n", "from tensor2tensor.utils import decoding\n", "from tensor2tensor.utils import devices\n", + "from tensor2tensor.utils import trainer_lib\n", "from tensor2tensor.visualization import attention\n" ] }, @@ -133,7 +133,7 @@ } ], "source": [ - "hparams = tpu_trainer_lib.create_hparams(FLAGS.hparams_set, data_dir=FLAGS.data_dir, problem_name=PROBLEM)\n", + "hparams = trainer_lib.create_hparams(FLAGS.hparams_set, data_dir=FLAGS.data_dir, problem_name=PROBLEM)\n", "hparams.use_fixed_batch_size = True\n", "hparams.batch_size = 1\n", "\n", From b20795cbb6d32bda92e1d5e89305bb736634b692 Mon Sep 17 00:00:00 2001 From: Ryan Sepassi Date: Tue, 2 Jan 2018 12:11:53 -0800 Subject: [PATCH 0695/4095] Make scripts thin and executable; add t2t_usr_dir example and test; log metadata; allow Eager-mode re-registration PiperOrigin-RevId: 180576491 --- .travis.yml | 1 + README.md | 32 +-------------- docs/walkthrough.md | 32 +-------------- setup.py | 4 +- tensor2tensor/bin/t2t-datagen | 15 +++++++ tensor2tensor/bin/t2t-decoder | 15 +++++++ tensor2tensor/bin/t2t-make-tf-configs | 15 +++++++ tensor2tensor/bin/t2t-trainer | 15 +++++++ tensor2tensor/bin/t2t_trainer.py | 40 +++++++++++++++++++ tensor2tensor/layers/common_hparams.py | 2 +- tensor2tensor/models/transformer.py | 1 + .../test_data/example_usr_dir/__init__.py | 17 ++++++++ .../test_data/example_usr_dir/my_submodule.py | 32 +++++++++++++++ tensor2tensor/utils/registry.py | 10 +++-- 14 files changed, 164 insertions(+), 67 deletions(-) create mode 100755 tensor2tensor/bin/t2t-datagen create mode 100755 tensor2tensor/bin/t2t-decoder create mode 100755 tensor2tensor/bin/t2t-make-tf-configs create mode 100755 tensor2tensor/bin/t2t-trainer create mode 100644 tensor2tensor/test_data/example_usr_dir/__init__.py create mode 100644 tensor2tensor/test_data/example_usr_dir/my_submodule.py diff --git a/.travis.yml b/.travis.yml index f424014b5..00fe35951 100644 --- a/.travis.yml +++ b/.travis.yml @@ -18,6 +18,7 @@ script: - pytest tensor2tensor/utils/registry_test.py - pytest tensor2tensor/utils/trainer_lib_test.py - t2t-datagen 2>&1 | grep translate && echo passed + - t2t-trainer --registry_help --t2t_usr_dir=./tensor2tensor/test_data/example_usr_dir 2>&1 | grep my_very_own_hparams && echo passed - python -c "from tensor2tensor.models import transformer; print(transformer.Transformer.__name__)" - t2t-trainer --registry_help - mkdir $T2T_DATA_DIR diff --git a/README.md b/README.md index de2951c53..06a15d1c8 100644 --- a/README.md +++ b/README.md @@ -296,36 +296,8 @@ specifying the `--t2t_usr_dir` flag in `t2t-trainer`. You can do so for models, hyperparameter sets, modalities, and problems. Please do submit a pull request if your component might be useful to others. -Here's an example with a new hyperparameter set: - -```python -# In ~/usr/t2t_usr/my_registrations.py - -from tensor2tensor.models import transformer -from tensor2tensor.utils import registry - -@registry.register_hparams -def transformer_my_very_own_hparams_set(): - hparams = transformer.transformer_base() - hparams.hidden_size = 1024 - ... -``` - -```python -# In ~/usr/t2t_usr/__init__.py -from . import my_registrations -``` - -``` -t2t-trainer --t2t_usr_dir=~/usr/t2t_usr --registry_help -``` - -You'll see under the registered HParams your -`transformer_my_very_own_hparams_set`, which you can directly use on the command -line with the `--hparams_set` flag. - -`t2t-datagen` also supports the `--t2t_usr_dir` flag for `Problem` -registrations. +See the [`example_usr_dir`](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/test_data/example_usr_dir) +for an example user directory. ## Adding a dataset diff --git a/docs/walkthrough.md b/docs/walkthrough.md index de2951c53..06a15d1c8 100644 --- a/docs/walkthrough.md +++ b/docs/walkthrough.md @@ -296,36 +296,8 @@ specifying the `--t2t_usr_dir` flag in `t2t-trainer`. You can do so for models, hyperparameter sets, modalities, and problems. Please do submit a pull request if your component might be useful to others. -Here's an example with a new hyperparameter set: - -```python -# In ~/usr/t2t_usr/my_registrations.py - -from tensor2tensor.models import transformer -from tensor2tensor.utils import registry - -@registry.register_hparams -def transformer_my_very_own_hparams_set(): - hparams = transformer.transformer_base() - hparams.hidden_size = 1024 - ... -``` - -```python -# In ~/usr/t2t_usr/__init__.py -from . import my_registrations -``` - -``` -t2t-trainer --t2t_usr_dir=~/usr/t2t_usr --registry_help -``` - -You'll see under the registered HParams your -`transformer_my_very_own_hparams_set`, which you can directly use on the command -line with the `--hparams_set` flag. - -`t2t-datagen` also supports the `--t2t_usr_dir` flag for `Problem` -registrations. +See the [`example_usr_dir`](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/test_data/example_usr_dir) +for an example user directory. ## Adding a dataset diff --git a/setup.py b/setup.py index fb2b6492d..aae7f6288 100644 --- a/setup.py +++ b/setup.py @@ -35,8 +35,8 @@ 'six', ], extras_require={ - 'tensorflow': ['tensorflow>=1.4.0'], - 'tensorflow_gpu': ['tensorflow-gpu>=1.4.0'], + 'tensorflow': ['tensorflow>=1.4.1'], + 'tensorflow_gpu': ['tensorflow-gpu>=1.4.1'], 'tests': ['pytest', 'h5py', 'mock'], }, classifiers=[ diff --git a/tensor2tensor/bin/t2t-datagen b/tensor2tensor/bin/t2t-datagen new file mode 100755 index 000000000..ef8933e90 --- /dev/null +++ b/tensor2tensor/bin/t2t-datagen @@ -0,0 +1,15 @@ +"""t2t-datagen.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensor2tensor.bin import t2t_datagen + +import tensorflow as tf + +def main(argv): + t2t_datagen.main(argv) + + +if __name__ == "__main__": + tf.app.run() diff --git a/tensor2tensor/bin/t2t-decoder b/tensor2tensor/bin/t2t-decoder new file mode 100755 index 000000000..a878c0e9b --- /dev/null +++ b/tensor2tensor/bin/t2t-decoder @@ -0,0 +1,15 @@ +"""t2t-decoder.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensor2tensor.bin import t2t_decoder + +import tensorflow as tf + +def main(argv): + t2t_decoder.main(argv) + + +if __name__ == "__main__": + tf.app.run() diff --git a/tensor2tensor/bin/t2t-make-tf-configs b/tensor2tensor/bin/t2t-make-tf-configs new file mode 100755 index 000000000..9e656239e --- /dev/null +++ b/tensor2tensor/bin/t2t-make-tf-configs @@ -0,0 +1,15 @@ +"""t2t-make-tf-configs.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensor2tensor.bin import make_tf_configs + +import tensorflow as tf + +def main(argv): + make_tf_configs.main(argv) + + +if __name__ == "__main__": + tf.app.run() diff --git a/tensor2tensor/bin/t2t-trainer b/tensor2tensor/bin/t2t-trainer new file mode 100755 index 000000000..5cbc8cf77 --- /dev/null +++ b/tensor2tensor/bin/t2t-trainer @@ -0,0 +1,15 @@ +"""t2t-trainer.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensor2tensor.bin import t2t_trainer + +import tensorflow as tf + +def main(argv): + t2t_trainer.main(argv) + + +if __name__ == "__main__": + tf.app.run() diff --git a/tensor2tensor/bin/t2t_trainer.py b/tensor2tensor/bin/t2t_trainer.py index 9e77de384..6ad0fd438 100644 --- a/tensor2tensor/bin/t2t_trainer.py +++ b/tensor2tensor/bin/t2t_trainer.py @@ -162,6 +162,43 @@ def log_registry(): sys.exit(0) +def is_chief(): + schedules = ["train", "train_and_evaluate", "continuous_train_and_eval"] + return FLAGS.worker_id == 0 and FLAGS.schedule in schedules + + +def save_metadata(hparams): + """Saves FLAGS and hparams to output_dir.""" + output_dir = os.path.expanduser(FLAGS.output_dir) + # Save FLAGS in txt file + if hasattr(FLAGS, "flags_into_string"): + flags_str = FLAGS.flags_into_string() + t2t_flags_str = "\n".join([ + "--%s=%s" % (f.name, f.value) + for f in FLAGS.flags_by_module_dict()[ + "tensor2tensor.utils.flags"] + ]) + else: + flags_dict = FLAGS.__dict__["__flags"] + flags_str = "\n".join( + ["--%s=%s" % (name, str(f)) for (name, f) in flags_dict.items()]) + t2t_flags_str = None + + flags_txt = os.path.join(output_dir, "flags.txt") + with tf.gfile.Open(flags_txt, "w") as f: + f.write(flags_str) + + if t2t_flags_str: + t2t_flags_txt = os.path.join(output_dir, "flags_t2t.txt") + with tf.gfile.Open(t2t_flags_txt, "w") as f: + f.write(t2t_flags_str) + + # Save hparams as hparams.json + hparams_fname = os.path.join(output_dir, "hparams.json") + with tf.gfile.Open(hparams_fname, "w") as f: + f.write(hparams.to_json()) + + def execute_schedule(exp): if not hasattr(exp, FLAGS.schedule): raise ValueError( @@ -182,6 +219,9 @@ def main(_): hparams = create_hparams() run_config = create_run_config(hparams) + if is_chief(): + save_metadata(hparams) + exp_fn = create_experiment_fn() exp = exp_fn(run_config, hparams) execute_schedule(exp) diff --git a/tensor2tensor/layers/common_hparams.py b/tensor2tensor/layers/common_hparams.py index 35bac33b0..b9593b00e 100644 --- a/tensor2tensor/layers/common_hparams.py +++ b/tensor2tensor/layers/common_hparams.py @@ -96,7 +96,7 @@ def basic_params1(): norm_type="layer", # "batch", layer", "noam", "none". # epsilon parameter to normalization function norm_epsilon=1e-6, - symbol_modality_num_shards=16, + symbol_modality_num_shards=1, # During training, we drop sequences whose inputs and targets are shorter # than min_length min_length=0, diff --git a/tensor2tensor/models/transformer.py b/tensor2tensor/models/transformer.py index de812b64b..f43ace037 100644 --- a/tensor2tensor/models/transformer.py +++ b/tensor2tensor/models/transformer.py @@ -750,6 +750,7 @@ def transformer_base_v1(): hparams.num_sampled_classes = 0 hparams.label_smoothing = 0.1 hparams.shared_embedding_and_softmax_weights = True + hparams.symbol_modality_num_shards = 16 # Add new ones like this. hparams.add_hparam("filter_size", 2048) # Layer-related flags. If zero, these fall back on hparams.num_hidden_layers. diff --git a/tensor2tensor/test_data/example_usr_dir/__init__.py b/tensor2tensor/test_data/example_usr_dir/__init__.py new file mode 100644 index 000000000..9bab20593 --- /dev/null +++ b/tensor2tensor/test_data/example_usr_dir/__init__.py @@ -0,0 +1,17 @@ +# coding=utf-8 +# Copyright 2017 The Tensor2Tensor Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Example T2T user directory.""" +from . import my_submodule diff --git a/tensor2tensor/test_data/example_usr_dir/my_submodule.py b/tensor2tensor/test_data/example_usr_dir/my_submodule.py new file mode 100644 index 000000000..b6c3579ac --- /dev/null +++ b/tensor2tensor/test_data/example_usr_dir/my_submodule.py @@ -0,0 +1,32 @@ +# coding=utf-8 +# Copyright 2017 The Tensor2Tensor Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Example registrations for T2T.""" +from tensor2tensor.layers import common_hparams +from tensor2tensor.utils import registry + + +@registry.register_hparams +def my_very_own_hparams(): + # Start with the base set + hp = common_hparams.basic_params1() + # Modify existing hparams + hp.num_hidden_layers = 2 + # Add new hparams + hp.add_hparam("filter_size", 2048) + return hp + +# Use register_model for a new T2TModel +# Use register_problem for a new Problem diff --git a/tensor2tensor/utils/registry.py b/tensor2tensor/utils/registry.py index 1125a6ed3..4f84752d1 100644 --- a/tensor2tensor/utils/registry.py +++ b/tensor2tensor/utils/registry.py @@ -51,6 +51,8 @@ class MyModel(T2TModel): import six +from tensorflow.python.eager import context + _MODELS = {} _HPARAMS = {} _RANGED_HPARAMS = {} @@ -120,7 +122,7 @@ def register_model(name=None): def decorator(model_cls, registration_name=None): """Registers & returns model_cls with registration_name or default name.""" model_name = registration_name or default_name(model_cls) - if model_name in _MODELS: + if model_name in _MODELS and not context.in_eager_mode(): raise LookupError("Model %s already registered." % model_name) model_cls.REGISTERED_NAME = model_name _MODELS[model_name] = model_cls @@ -150,7 +152,7 @@ def register_hparams(name=None): def decorator(hp_fn, registration_name=None): """Registers & returns hp_fn with registration_name or default name.""" hp_name = registration_name or default_name(hp_fn) - if hp_name in _HPARAMS: + if hp_name in _HPARAMS and not context.in_eager_mode(): raise LookupError("HParams set %s already registered." % hp_name) _HPARAMS[hp_name] = hp_fn return hp_fn @@ -217,7 +219,7 @@ def register_problem(name=None): def decorator(p_cls, registration_name=None): """Registers & returns p_cls with registration_name or default name.""" p_name = registration_name or default_name(p_cls) - if p_name in _PROBLEMS: + if p_name in _PROBLEMS and not context.in_eager_mode(): raise LookupError("Problem %s already registered." % p_name) _PROBLEMS[p_name] = p_cls @@ -317,7 +319,7 @@ def _internal_register_modality(name, mod_collection, collection_str): def decorator(mod_cls, registration_name=None): """Registers & returns mod_cls with registration_name or default name.""" mod_name = registration_name or default_name(mod_cls) - if mod_name in mod_collection: + if mod_name in mod_collection and not context.in_eager_mode(): raise LookupError("%s modality %s already registered." % (collection_str, mod_name)) mod_collection[mod_name] = mod_cls From 4361c19242056b19d3455e7d9d7d17bd9f67aa98 Mon Sep 17 00:00:00 2001 From: T2T Team Date: Tue, 2 Jan 2018 15:30:55 -0800 Subject: [PATCH 0696/4095] Adding bower dependencies and changes to the index html to properly PiperOrigin-RevId: 180601814 --- setup.py | 4 + .../attention-visualization.js | 13 ++- tensor2tensor/insights/polymer/bower.json | 80 +++++++++++++++++++ .../polymer/explore_view/explore-view.html | 4 +- .../polymer/explore_view/explore-view.js | 12 ++- .../graph-visualization.js | 12 ++- tensor2tensor/insights/polymer/index.html | 58 +++++++++++++- .../polymer/insights_app/insights-app.html | 6 +- .../polymer/insights_app/insights-app.js | 12 ++- .../language-selector-content.js | 12 ++- .../language_selector/language-selector.js | 9 ++- .../processing-visualization.js | 9 ++- .../polymer/query_card/query-card.html | 2 +- .../insights/polymer/query_card/query-card.js | 12 ++- .../tensor2tensor.html} | 4 +- .../translation-result.html | 6 +- .../translation_result/translation-result.js | 9 ++- tensor2tensor/insights/server.py | 19 ++++- tensor2tensor/insights/transformer_model.py | 4 +- 19 files changed, 239 insertions(+), 48 deletions(-) create mode 100644 tensor2tensor/insights/polymer/bower.json rename tensor2tensor/insights/{index.html => polymer/tensor2tensor.html} (91%) diff --git a/setup.py b/setup.py index aae7f6288..18f97d089 100644 --- a/setup.py +++ b/setup.py @@ -23,10 +23,14 @@ 'tensor2tensor/bin/t2t-datagen', 'tensor2tensor/bin/t2t-decoder', 'tensor2tensor/bin/t2t-make-tf-configs', + 'tensor2tensor/insights/server', ], install_requires=[ 'bz2file', + 'flask', 'future', + 'gevent', + 'gunicorn', 'gym', 'numpy', 'requests', diff --git a/tensor2tensor/insights/polymer/attention_visualization/attention-visualization.js b/tensor2tensor/insights/polymer/attention_visualization/attention-visualization.js index b58d90905..e738c2629 100644 --- a/tensor2tensor/insights/polymer/attention_visualization/attention-visualization.js +++ b/tensor2tensor/insights/polymer/attention_visualization/attention-visualization.js @@ -15,8 +15,6 @@ * limitations under the License. */ -goog.module('t2t.AttentionVisualization'); - /** * `` presents a heatmap of input-output associations. * @@ -62,10 +60,16 @@ class AttentionVisualization extends Polymer.Element { this.zoom_ = undefined; } + /** + * @return {string} The component name. + */ static get is() { return 'attention-visualization'; } + /** + * @return {!Object} The component properties. + */ static get properties() { return { /** @@ -84,6 +88,9 @@ class AttentionVisualization extends Polymer.Element { }; } + /** + * @return {!Array} The component observers. + */ static get observers() { return [ 'zoomDepthChanged_(zoomDepth_)', @@ -308,5 +315,3 @@ class AttentionVisualization extends Polymer.Element { } customElements.define(AttentionVisualization.is, AttentionVisualization); - -exports = {AttentionVisualization}; diff --git a/tensor2tensor/insights/polymer/bower.json b/tensor2tensor/insights/polymer/bower.json new file mode 100644 index 000000000..da1f4aaed --- /dev/null +++ b/tensor2tensor/insights/polymer/bower.json @@ -0,0 +1,80 @@ +{ + "name": "tensor2tensor-insights", + "homepage": "https://github.com/tensorflow/tensor2tensor", + "description": "Components for analyzing tensor2tensor neural machine translation models.", + "main": "index.html", + "keywords": [ + "neural", + "machine", + "translation" + ], + "authors": [ + "kstevens@google.com" + ], + "license": "Apache 2.0", + "private": true, + "ignore": [ + "**/.*", + "node_modules", + "bower_components", + "test", + "tests" + ], + "dependencies": { + "app-layout": "PolymerElements/app-layout#2.0.4", + "app-route": "PolymerElements/app-route#2.0.3", + "d3": "d3#4.12.2", + "iron-a11y-keys": "PolymerElements/iron-a11y-keys#2.0.0", + "iron-ajax": "PolymerElements/iron-ajax#2.0.0", + "iron-flex-layout": "PolymerElements/iron-flex-layout#2.0.0", + "iron-icon": "PolymerElements/iron-icon#2.0.0", + "iron-icons": "PolymerElements/iron-icons#2.0.0", + "iron-list": "PolymerElements/iron-list#2.0.0", + "iron-pages": "PolymerElements/iron-pages#2.0.0", + "iron-selector": "PolymerElements/iron-selector#2.0.0", + "neon-animation": "PolymerElements/neon-animation#2.0.0", + "paper-button": "PolymerElements/paper-button#2.0.0", + "paper-card": "PolymerElements/paper-card#2.0.0", + "paper-dialog": "PolymerElements/paper-dialog#2.0.0", + "paper-dropdown-menu": "PolymerElements/paper-dropdown-menu#2.0.0", + "paper-icon-button": "PolymerElements/paper-icon-button#2.0.0", + "paper-input": "PolymerElements/paper-input#2.0.0", + "paper-item": "PolymerElements/paper-item#2.0.0", + "paper-listbox": "PolymerElements/paper-listbox#2.0.0", + "paper-slider": "PolymerElements/paper-slider#2.0.0", + "paper-tabs": "PolymerElements/paper-tabs#2.0.0", + "paper-toggle-button": "PolymerElements/paper-toggle-button#2.0.0", + "paper-tooltip": "PolymerElements/paper-tooltip#2.0.0", + "paper-progress": "PolymerElements/paper-progress#2.0.0", + "polymer": "polymer/polymer#v2.3.1" + }, + "resolutions": { + "webcomponentsjs": "^v1.0.19", + "polymer": "^v2.3.1", + "app-route": "^2.0.3", + "app-layout": "^2.0.4", + "iron-location": "1 - 2", + "iron-selector": "^2.0.0", + "neon-animation": "^2.0.0", + "iron-icon": "^2.0.0", + "iron-pages": "^2.0.0", + "iron-icons": "^2.0.0", + "paper-icon-button": "^2.0.0", + "paper-item": "^2.0.0", + "iron-flex-layout": "^2.0.0", + "paper-listbox": "^2.0.0", + "iron-a11y-keys": "^2.0.0", + "paper-dialog": "^2.0.0", + "iron-ajax": "^2.0.0", + "paper-progress": "^2.0.0", + "paper-dropdown-menu": "^2.0.0", + "paper-tabs": "^2.0.0", + "paper-input": "^2.0.0", + "paper-toggle-button": "^2.0.0", + "paper-slider": "^2.0.0", + "iron-list": "^2.0.0", + "paper-card": "^2.0.0", + "paper-tooltip": "^2.0.0", + "iron-overlay-behavior": "^2.2.0" + } +} diff --git a/tensor2tensor/insights/polymer/explore_view/explore-view.html b/tensor2tensor/insights/polymer/explore_view/explore-view.html index d0456211f..97fce423c 100644 --- a/tensor2tensor/insights/polymer/explore_view/explore-view.html +++ b/tensor2tensor/insights/polymer/explore_view/explore-view.html @@ -31,8 +31,8 @@ - - + +